diff mbox series

[xf86-video-intel,v6] sna: Added AYUV format support for textured and sprite video adapters.

Message ID 20181102100603.2990-1-stanislav.lisovskiy@intel.com (mailing list archive)
State New, archived
Headers show
Series [xf86-video-intel,v6] sna: Added AYUV format support for textured and sprite video adapters. | expand

Commit Message

Lisovskiy, Stanislav Nov. 2, 2018, 10:06 a.m. UTC
v2: Renamed DRM_FORMAT_XYUV to DRM_FORMAT_XYUV8888.
    Added comment about AYUV byte ordering in Gstreamer.

v3: Removed sna_composite_op flags related change to the separate patch.

v4: Fixed review comments, done code refactoring

v5: Fixed following review comments:
    - Fixed comment in shader code for ayuv kernel.
    - Fixed naming to VIDEO_AYUV_BT601/BT709 for ayuv kernels.
    - Removed duplicate gen9_kernel parameter, left from previous patches
    - Added colorspace handling for new AYUV kernel
    - Fixed naming of sna_copy_packed_data_ayuv to sna_copy_ayuv_data
    - Started using standard bswap_32 function for byte swapping in sna_copy_ayuv_data
    - Removed redundant code in sna_copy_ayuv_data so that it looks more neat
    - Fixed XVIMAGE_AYUV structure initialization to contain proper byte sequence for GST
    - Fixed bogus comment about subsampling for DRM_FORMAT_XYUV8888
    - Fixed AYUV advertisement for all platforms
    - Removed unnecessary RGB888 declaration.

v6:
    - Fixed surface format not to use alpha as supposed
    - Now doing byte swapping always during copy
    - Changed hack, required for GST to work to be at one place
    - Fixed invalid sampling values for XVIMAGE_AYUV
    - Fixed sprite format checking order and images_ayuv definition.

Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
---
 src/render_program/Makefile.am                |  2 +
 .../exa_wm_src_sample_argb_ayuv.g8a           | 76 ++++++++++++++++
 .../exa_wm_src_sample_argb_ayuv.g8b           |  8 ++
 src/sna/gen9_render.c                         | 24 ++++-
 src/sna/sna_render.h                          |  3 +
 src/sna/sna_video.c                           | 89 ++++++++++++++++++-
 src/sna/sna_video.h                           | 20 +++++
 src/sna/sna_video_sprite.c                    | 20 ++++-
 src/sna/sna_video_textured.c                  |  7 ++
 9 files changed, 244 insertions(+), 5 deletions(-)
 create mode 100644 src/render_program/exa_wm_src_sample_argb_ayuv.g8a
 create mode 100644 src/render_program/exa_wm_src_sample_argb_ayuv.g8b

Comments

Lisovskiy, Stanislav Nov. 6, 2018, 3:33 p.m. UTC | #1
On Fri, 2018-11-02 at 12:06 +0200, Stanislav Lisovskiy wrote:

Ping.

> v2: Renamed DRM_FORMAT_XYUV to DRM_FORMAT_XYUV8888.
>     Added comment about AYUV byte ordering in Gstreamer.
> 
> v3: Removed sna_composite_op flags related change to the separate
> patch.
> 
> v4: Fixed review comments, done code refactoring
> 
> v5: Fixed following review comments:
>     - Fixed comment in shader code for ayuv kernel.
>     - Fixed naming to VIDEO_AYUV_BT601/BT709 for ayuv kernels.
>     - Removed duplicate gen9_kernel parameter, left from previous
> patches
>     - Added colorspace handling for new AYUV kernel
>     - Fixed naming of sna_copy_packed_data_ayuv to sna_copy_ayuv_data
>     - Started using standard bswap_32 function for byte swapping in
> sna_copy_ayuv_data
>     - Removed redundant code in sna_copy_ayuv_data so that it looks
> more neat
>     - Fixed XVIMAGE_AYUV structure initialization to contain proper
> byte sequence for GST
>     - Fixed bogus comment about subsampling for DRM_FORMAT_XYUV8888
>     - Fixed AYUV advertisement for all platforms
>     - Removed unnecessary RGB888 declaration.
> 
> v6:
>     - Fixed surface format not to use alpha as supposed
>     - Now doing byte swapping always during copy
>     - Changed hack, required for GST to work to be at one place
>     - Fixed invalid sampling values for XVIMAGE_AYUV
>     - Fixed sprite format checking order and images_ayuv definition.
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> ---
>  src/render_program/Makefile.am                |  2 +
>  .../exa_wm_src_sample_argb_ayuv.g8a           | 76 ++++++++++++++++
>  .../exa_wm_src_sample_argb_ayuv.g8b           |  8 ++
>  src/sna/gen9_render.c                         | 24 ++++-
>  src/sna/sna_render.h                          |  3 +
>  src/sna/sna_video.c                           | 89
> ++++++++++++++++++-
>  src/sna/sna_video.h                           | 20 +++++
>  src/sna/sna_video_sprite.c                    | 20 ++++-
>  src/sna/sna_video_textured.c                  |  7 ++
>  9 files changed, 244 insertions(+), 5 deletions(-)
>  create mode 100644
> src/render_program/exa_wm_src_sample_argb_ayuv.g8a
>  create mode 100644
> src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> 
> diff --git a/src/render_program/Makefile.am
> b/src/render_program/Makefile.am
> index dc58138f..e35ffa52 100644
> --- a/src/render_program/Makefile.am
> +++ b/src/render_program/Makefile.am
> @@ -196,6 +196,7 @@ INTEL_G7B =				\
>  INTEL_G8A =				\
>  	exa_wm_src_affine.g8a 		\
>  	exa_wm_src_sample_argb.g8a 	\
> +	exa_wm_src_sample_argb_ayuv.g8a \
>  	exa_wm_src_sample_nv12.g8a 	\
>  	exa_wm_src_sample_planar.g8a 	\
>  	exa_wm_write.g8a 		\
> @@ -205,6 +206,7 @@ INTEL_G8A =				\
>  
>  INTEL_G8B =				\
>  	exa_wm_src_affine.g8b 		\
> +	exa_wm_src_sample_argb_ayuv.g8b \
>  	exa_wm_src_sample_argb.g8b 	\
>  	exa_wm_src_sample_nv12.g8b 	\
>  	exa_wm_src_sample_planar.g8b 	\
> diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> new file mode 100644
> index 00000000..c0b84c2e
> --- /dev/null
> +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> @@ -0,0 +1,76 @@
> +/*
> + * Copyright © 2006 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> the
> + * Software is furnished to do so, subject to the following
> conditions:
> + *
> + * The above copyright notice and this permission notice (including
> the next
> + * paragraph) shall be included in all copies or substantial
> portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
> OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Wang Zhenyu <zhenyu.z.wang@intel.com>
> + *    Keith Packard <keithp@keithp.com>
> + */
> +
> +/* Sample the src surface */
> +
> +include(`exa_wm.g4i')
> +
> +undefine(`src_msg')
> +undefine(`src_msg_ind')
> +
> +define(`src_msg',       `g65')
> +define(`src_msg_ind',   `65')
> +
> +/* prepare sampler read back gX register, which would be written
> back to output */
> +
> +/* use simd16 sampler, param 0 is u, param 1 is v. */
> +/* 'payload' loading, assuming tex coord start from g4 */
> +
> +/* load argb */
> +mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
> +mov (8) src_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start
> reg*/
> +
> +/* src_msg will be copied with g0, as it contains send desc */
> +/* emit sampler 'send' cmd */
> +send (16) src_msg_ind	/* msg reg index */
> +	src_sample_base<1>UW /* readback */
> +	null
> +	sampler (1,0,F)	/* sampler message description,
> (binding_table,sampler_index,datatype)
> +				/* here(src->dst) we should use
> src_sampler and src_surface */
> +	mlen 5 rlen 8 { align1 };   /* required message len 5,
> readback len 8 */
> +
> +/*
> + * Have to change bytes order, because the only
> + * player which supports AYUV format currently is
> + * Gstreamer and it supports in bad way, even though
> + * spec says MSB:AYUV, we get the bytes opposite way.
> + * We swap bytes both for sprite and texture modes during copy.
> + * So here we get argb which then becomes 1bgr.
> + */
> +mov (16) src_sample_a<1>UD src_sample_b<1>UD  { align1 };
> +mov (16) src_sample_b<1>UD src_sample_g<1>UD  { align1 };
> +mov (16) src_sample_g<1>UD src_sample_r<1>UD  { align1 };
> +mov (16) src_sample_r<1>UD src_sample_a<1>UD  { align1 };
> +mov (16) src_sample_a<1>F 1.0F;
> +
> +
> +
> +
> +
> +
> +
> +
> diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> new file mode 100644
> index 00000000..f3ac4959
> --- /dev/null
> +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> @@ -0,0 +1,8 @@
> +   { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 },
> +   { 0x00600001, 0x28200208, 0x008d0000, 0x00000000 },
> +   { 0x02800031, 0x21c00a48, 0x06000820, 0x0a8c0001 },
> +   { 0x00800001, 0x22800208, 0x00200240, 0x00000000 },
> +   { 0x00800001, 0x22400208, 0x00200200, 0x00000000 },
> +   { 0x00800001, 0x22000208, 0x002001c0, 0x00000000 },
> +   { 0x00800001, 0x21c00208, 0x00200280, 0x00000000 },
> +   { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 },
> diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c
> index eb22b642..90707b1f 100644
> --- a/src/sna/gen9_render.c
> +++ b/src/sna/gen9_render.c
> @@ -129,6 +129,20 @@ static const uint32_t
> ps_kernel_planar_bt709[][4] = {
>  #include "exa_wm_write.g8b"
>  };
>  
> +static const uint32_t ps_kernel_ayuv_bt601[][4] = {
> +#include "exa_wm_src_affine.g8b"
> +#include "exa_wm_src_sample_argb_ayuv.g8b"
> +#include "exa_wm_yuv_rgb_bt601.g8b"
> +#include "exa_wm_write.g8b"
> +};
> +
> +static const uint32_t ps_kernel_ayuv_bt709[][4] = {
> +#include "exa_wm_src_affine.g8b"
> +#include "exa_wm_src_sample_argb_ayuv.g8b"
> +#include "exa_wm_yuv_rgb_bt709.g8b"
> +#include "exa_wm_write.g8b"
> +};
> +
>  static const uint32_t ps_kernel_nv12_bt709[][4] = {
>  #include "exa_wm_src_affine.g8b"
>  #include "exa_wm_src_sample_nv12.g8b"
> @@ -177,6 +191,8 @@ static const struct wm_kernel_info {
>  	KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7),
>  	KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7),
>  	KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2),
> +	KERNEL(VIDEO_AYUV_BT601, ps_kernel_ayuv_bt601, 2),
> +	KERNEL(VIDEO_AYUV_BT709, ps_kernel_ayuv_bt709, 2),
>  	KERNEL(VIDEO_RGB, ps_kernel_rgb, 2),
>  #endif
>  };
> @@ -2552,7 +2568,6 @@ gen9_render_composite(struct sna *sna,
>  							     tmp-
> >mask.bo != NULL,
>  							     tmp-
> >has_component_alpha,
>  							     tmp-
> >is_affine);
> -
>  	tmp->blt   = gen9_render_composite_blt;
>  	tmp->box   = gen9_render_composite_box;
>  	tmp->boxes = gen9_render_composite_boxes__blt;
> @@ -3853,6 +3868,8 @@ static void gen9_emit_video_state(struct sna
> *sna,
>  			src_surf_format[0] =
> SURFACEFORMAT_B8G8R8X8_UNORM;
>  		else if (frame->id == FOURCC_UYVY)
>  			src_surf_format[0] =
> SURFACEFORMAT_YCRCB_SWAPY;
> +		else if (is_ayuv_fourcc(frame->id))
> +			src_surf_format[0] =
> SURFACEFORMAT_B8G8R8X8_UNORM;
>  		else
>  			src_surf_format[0] =
> SURFACEFORMAT_YCRCB_NORMAL;
>  
> @@ -3903,6 +3920,11 @@ static unsigned select_video_kernel(const
> struct sna_video *video,
>  	case FOURCC_RGB565:
>  		return GEN9_WM_KERNEL_VIDEO_RGB;
>  
> +	case FOURCC_AYUV:
> +		return video->colorspace ?
> +			GEN9_WM_KERNEL_VIDEO_AYUV_BT709 :
> +			GEN9_WM_KERNEL_VIDEO_AYUV_BT601;
> +
>  	default:
>  		return video->colorspace ?
>  			GEN9_WM_KERNEL_VIDEO_PACKED_BT709 :
> diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
> index a4e5b56a..891fc905 100644
> --- a/src/sna/sna_render.h
> +++ b/src/sna/sna_render.h
> @@ -617,6 +617,9 @@ enum {
>  	GEN9_WM_KERNEL_VIDEO_NV12_BT709,
>  	GEN9_WM_KERNEL_VIDEO_PACKED_BT709,
>  
> +	GEN9_WM_KERNEL_VIDEO_AYUV_BT601,
> +	GEN9_WM_KERNEL_VIDEO_AYUV_BT709,
> +
>  	GEN9_WM_KERNEL_VIDEO_RGB,
>  	GEN9_WM_KERNEL_COUNT
>  };
> diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
> index 55405f81..d4ed8464 100644
> --- a/src/sna/sna_video.c
> +++ b/src/sna/sna_video.c
> @@ -59,6 +59,7 @@
>  #include "intel_options.h"
>  
>  #include <xf86xv.h>
> +#include <byteswap.h>
>  
>  #ifdef SNA_XVMC
>  #define _SNA_XVMC_SERVER_
> @@ -281,6 +282,7 @@ sna_video_frame_set_rotation(struct sna_video
> *video,
>  	} else {
>  		switch (frame->id) {
>  		case FOURCC_RGB888:
> +		case FOURCC_AYUV:
>  			if (rotation & (RR_Rotate_90 |
> RR_Rotate_270)) {
>  				frame->pitch[0] = ALIGN((height <<
> 2), align);
>  				frame->size = (int)frame->pitch[0] *
> width;
> @@ -584,6 +586,89 @@ sna_copy_packed_data(struct sna_video *video,
>  	}
>  }
>  
> +static void
> +sna_copy_ayuv_data(struct sna_video *video,
> +		   const struct sna_video_frame *frame,
> +		   const uint8_t *buf,
> +		   uint8_t *dst,
> +		   bool reverse_bytes)
> +{
> +	int pitch = frame->width << 2;
> +	const uint8_t *src, *s;
> +	const uint32_t *src_dw;
> +	uint32_t *dst_dw = (uint32_t *)dst;
> +	int x, y, w, h;
> +	int i, j;
> +	uint32_t dw;
> +
> +	if (video->textured) {
> +		/* XXX support copying cropped extents */
> +		x = y = 0;
> +		w = frame->width;
> +		h = frame->height;
> +	} else {
> +		x = frame->image.x1;
> +		y = frame->image.y1;
> +		w = frame->image.x2 - frame->image.x1;
> +		h = frame->image.y2 - frame->image.y1;
> +	}
> +
> +	src = buf + (y * pitch) + (x << 2);
> +	src_dw = (uint32_t *)src;
> +
> +	if (reverse_bytes) {
> +		/*
> +		 * Have to reverse bytes order, because the only
> +		 * player which supports AYUV format currently is
> +		 * Gstreamer and it supports in bad way, even though
> +		 * spec says MSB:AYUV, we get the bytes opposite
> way.
> +		 */
> +		for (i = 0; i < h; i++) {
> +			for (j = 0; j < w; j++) {
> +				uint32_t reverse_dw;
> +				dw = src_dw[i * w + j];
> +				reverse_dw = bswap_32(dw);
> +				dst_dw[i * w + j] = reverse_dw;
> +			}
> +		}
> +	}
> +
> +	switch (frame->rotation) {
> +	case RR_Rotate_0:
> +		for (i = 0; i < h; i++) {
> +			for (j = 0; j < w; j++) {
> +				dw = dst_dw[i * w + j];
> +				dst_dw[i * w + j] = dw;
> +			}
> +		}
> +		break;
> +	case RR_Rotate_90:
> +		for (i = 0; i < h; i++) {
> +			for (j = 0; j < w; j++) {
> +				dw = dst_dw[i * w + j];
> +				dst_dw[(w - j - 1) * h + i] = dw;
> +			}
> +		}
> +		break;
> +	case RR_Rotate_180:
> +		for (i = 0; i < h; i++) {
> +			for (j = 0; j < w; j++) {
> +				dw = dst_dw[i * w + j];
> +				dst_dw[(h - i - 1) * w + w - j - 1]
> = dw;
> +			}
> +		}
> +		break;
> +	case RR_Rotate_270:
> +		for (i = 0; i < h; i++) {
> +			for (j = 0; j < w; j++) {
> +				dw = dst_dw[i * w + j];;
> +				dst_dw[(w - j - 1) * h + i] = dw;
> +			}
> +		}
> +		break;
> +	}
> +}
> +
>  bool
>  sna_video_copy_data(struct sna_video *video,
>  		    struct sna_video_frame *frame,
> @@ -604,7 +689,7 @@ sna_video_copy_data(struct sna_video *video,
>  	assert(frame->size);
>  
>  	/* In the common case, we can simply the upload in a single
> pwrite */
> -	if (frame->rotation == RR_Rotate_0 && !video->tiled) {
> +	if (frame->rotation == RR_Rotate_0 && !video->tiled &&
> !is_ayuv_fourcc(frame->id)) {
>  		DBG(("%s: unrotated, untiled fast paths: is-
> planar?=%d\n",
>  		     __FUNCTION__, is_planar_fourcc(frame->id)));
>  		if (is_nv12_fourcc(frame->id)) {
> @@ -709,6 +794,8 @@ use_gtt: /* copy data, must use GTT so that we
> keep the overlay uncached */
>  		sna_copy_nv12_data(video, frame, buf, dst);
>  	else if (is_planar_fourcc(frame->id))
>  		sna_copy_planar_data(video, frame, buf, dst);
> +	else if (is_ayuv_fourcc(frame->id))
> +		sna_copy_ayuv_data(video, frame, buf, dst, true);
>  	else
>  		sna_copy_packed_data(video, frame, buf, dst);
>  
> diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h
> index bbd3f0fd..a3ffdc0b 100644
> --- a/src/sna/sna_video.h
> +++ b/src/sna/sna_video.h
> @@ -39,6 +39,7 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  #define FOURCC_RGB565 ((16 << 24) + ('B' << 16) + ('G' << 8) + 'R')
>  #define FOURCC_RGB888 ((24 << 24) + ('B' << 16) + ('G' << 8) + 'R')
>  #define FOURCC_NV12 (('2' << 24) + ('1' << 16) + ('V' << 8) + 'N')
> +#define FOURCC_AYUV (('V' << 24) + ('U' << 16) + ('Y' << 8) + 'A')
>  
>  /*
>   * Below, a dummy picture type that is used in XvPutImage
> @@ -79,6 +80,15 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  	XvTopToBottom \
>  }
>  
> +#define XVIMAGE_AYUV { \
> +	FOURCC_AYUV, XvYUV, LSBFirst, \
> +	{'A', 'Y', 'U', 'V', 0x00, 0x00, 0x00, 0x10, 0x80, 0x00,
> 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71}, \
> +	32, XvPacked, 1, 0, 0, 0, 0, 8, 8, 8, 1, 1, 1, 1, 1, 1, \
> +	{'A', 'Y', 'U', 'V', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
> +	XvTopToBottom \
> +}
> +
> +
>  struct sna_video {
>  	struct sna *sna;
>  
> @@ -189,6 +199,16 @@ static inline int is_nv12_fourcc(int id)
>  	}
>  }
>  
> +static inline int is_ayuv_fourcc(int id)
> +{
> +	switch (id) {
> +	case FOURCC_AYUV:
> +		return 1;
> +	default:
> +		return 0;
> +	}
> +}
> +
>  bool
>  sna_video_clip_helper(struct sna_video *video,
>  		      struct sna_video_frame *frame,
> diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c
> index 8b7ae8ae..3780dc0e 100644
> --- a/src/sna/sna_video_sprite.c
> +++ b/src/sna/sna_video_sprite.c
> @@ -47,7 +47,7 @@
>  #define DRM_FORMAT_YUYV         fourcc_code('Y', 'U', 'Y', 'V') /*
> [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
>  #define DRM_FORMAT_UYVY         fourcc_code('U', 'Y', 'V', 'Y') /*
> [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */
>  #define DRM_FORMAT_NV12         fourcc_code('N', 'V', '1', '2') /*
> 2x2 subsampled Cr:Cb plane */
> -
> +#define DRM_FORMAT_XYUV8888     fourcc_code('X', 'Y', 'U', 'V') /*
> [31:0] x:Y:U:V 8:8:8:8 little endian */
>  #define has_hw_scaling(sna, video) ((sna)->kgem.gen < 071 || \
>  				    (sna)->kgem.gen >= 0110)
>  
> @@ -79,6 +79,8 @@ static const XvImageRec images_rgb565[] = {
> XVIMAGE_YUY2, XVIMAGE_UYVY,
>  					    XVMC_RGB888, XVMC_RGB565
> };
>  static const XvImageRec images_nv12[] = { XVIMAGE_YUY2,
> XVIMAGE_UYVY,
>  					  XVIMAGE_NV12, XVMC_RGB888,
> XVMC_RGB565 };
> +static const XvImageRec images_ayuv[] = { XVIMAGE_AYUV,
> XVIMAGE_YUY2, XVIMAGE_UYVY,
> +					  XVIMAGE_NV12, XVMC_RGB888,
> XVMC_RGB565 };
>  static const XvAttributeRec attribs[] = {
>  	{ XvSettable | XvGettable, 0, 1, (char *)"XV_COLORSPACE" },
> /* BT.601, BT.709 */
>  	{ XvSettable | XvGettable, 0, 0xffffff, (char
> *)"XV_COLORKEY" },
> @@ -364,6 +366,10 @@ sna_video_sprite_show(struct sna *sna,
>  		case FOURCC_UYVY:
>  			f.pixel_format = DRM_FORMAT_UYVY;
>  			break;
> +		case FOURCC_AYUV:
> +			/* i915 doesn't support alpha, so we use
> XYUV */
> +			f.pixel_format = DRM_FORMAT_XYUV8888;
> +			break;
>  		case FOURCC_YUY2:
>  		default:
>  			f.pixel_format = DRM_FORMAT_YUYV;
> @@ -705,7 +711,12 @@ static int
> sna_video_sprite_query(ddQueryImageAttributes_ARGS)
>  		tmp *= (*h >> 1);
>  		size += tmp;
>  		break;
> -
> +	case FOURCC_AYUV:
> +		tmp = *w << 2;
> +		if (pitches)
> +			pitches[0] = tmp;
> +		size = *h * tmp;
> +		break;
>  	default:
>  		*w = (*w + 1) & ~1;
>  		*h = (*h + 1) & ~1;
> @@ -805,7 +816,10 @@ void sna_video_sprite_setup(struct sna *sna,
> ScreenPtr screen)
>  	adaptor->nAttributes = ARRAY_SIZE(attribs);
>  	adaptor->pAttributes = (XvAttributeRec *)attribs;
>  
> -	if (sna_has_sprite_format(sna, DRM_FORMAT_NV12)) {
> +	if (sna_has_sprite_format(sna, DRM_FORMAT_XYUV8888)) {
> +		adaptor->pImages = (XvImageRec *)images_ayuv;
> +		adaptor->nImages = ARRAY_SIZE(images_ayuv);
> +	} else if (sna_has_sprite_format(sna, DRM_FORMAT_NV12)) {
>  		adaptor->pImages = (XvImageRec *)images_nv12;
>  		adaptor->nImages = ARRAY_SIZE(images_nv12);
>  	} else if (sna_has_sprite_format(sna, DRM_FORMAT_RGB565)) {
> diff --git a/src/sna/sna_video_textured.c
> b/src/sna/sna_video_textured.c
> index a784fe2e..46c213ef 100644
> --- a/src/sna/sna_video_textured.c
> +++ b/src/sna/sna_video_textured.c
> @@ -68,6 +68,7 @@ static const XvImageRec gen4_Images[] = {
>  	XVIMAGE_I420,
>  	XVIMAGE_NV12,
>  	XVIMAGE_UYVY,
> +	XVIMAGE_AYUV,
>  	XVMC_YUV,
>  };
>  
> @@ -337,6 +338,12 @@
> sna_video_textured_query(ddQueryImageAttributes_ARGS)
>  			pitches[0] = size;
>  		size *= *h;
>  		break;
> +	case FOURCC_AYUV:
> +		size = *w << 2;
> +		if (pitches)
> +			pitches[0] = size;
> +		size *= *h;
> +		break;
>  	case FOURCC_XVMC:
>  		*h = (*h + 1) & ~1;
>  		size = sizeof(uint32_t);
Chris Wilson Nov. 8, 2018, 5:17 p.m. UTC | #2
Quoting Stanislav Lisovskiy (2018-11-02 10:06:03)
> v2: Renamed DRM_FORMAT_XYUV to DRM_FORMAT_XYUV8888.
>     Added comment about AYUV byte ordering in Gstreamer.
> 
> v3: Removed sna_composite_op flags related change to the separate patch.
> 
> v4: Fixed review comments, done code refactoring
> 
> v5: Fixed following review comments:
>     - Fixed comment in shader code for ayuv kernel.
>     - Fixed naming to VIDEO_AYUV_BT601/BT709 for ayuv kernels.
>     - Removed duplicate gen9_kernel parameter, left from previous patches
>     - Added colorspace handling for new AYUV kernel
>     - Fixed naming of sna_copy_packed_data_ayuv to sna_copy_ayuv_data
>     - Started using standard bswap_32 function for byte swapping in sna_copy_ayuv_data
>     - Removed redundant code in sna_copy_ayuv_data so that it looks more neat
>     - Fixed XVIMAGE_AYUV structure initialization to contain proper byte sequence for GST
>     - Fixed bogus comment about subsampling for DRM_FORMAT_XYUV8888
>     - Fixed AYUV advertisement for all platforms
>     - Removed unnecessary RGB888 declaration.
> 
> v6:
>     - Fixed surface format not to use alpha as supposed
>     - Now doing byte swapping always during copy
>     - Changed hack, required for GST to work to be at one place
>     - Fixed invalid sampling values for XVIMAGE_AYUV
>     - Fixed sprite format checking order and images_ayuv definition.
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>

Ville, happy?

> +       if (reverse_bytes) {
> +               /*
> +                * Have to reverse bytes order, because the only
> +                * player which supports AYUV format currently is
> +                * Gstreamer and it supports in bad way, even though
> +                * spec says MSB:AYUV, we get the bytes opposite way.
> +                */

This worries me. Is there no room for negotiation with Gstreamer so they
use the format the kernel and HW expects. Though I presume they chose
their layout for good reason (some TV probably expects it in host
order.)
-Chris
Ville Syrjälä Nov. 8, 2018, 5:47 p.m. UTC | #3
On Fri, Nov 02, 2018 at 12:06:03PM +0200, Stanislav Lisovskiy wrote:
> v2: Renamed DRM_FORMAT_XYUV to DRM_FORMAT_XYUV8888.
>     Added comment about AYUV byte ordering in Gstreamer.
> 
> v3: Removed sna_composite_op flags related change to the separate patch.
> 
> v4: Fixed review comments, done code refactoring
> 
> v5: Fixed following review comments:
>     - Fixed comment in shader code for ayuv kernel.
>     - Fixed naming to VIDEO_AYUV_BT601/BT709 for ayuv kernels.
>     - Removed duplicate gen9_kernel parameter, left from previous patches
>     - Added colorspace handling for new AYUV kernel
>     - Fixed naming of sna_copy_packed_data_ayuv to sna_copy_ayuv_data
>     - Started using standard bswap_32 function for byte swapping in sna_copy_ayuv_data
>     - Removed redundant code in sna_copy_ayuv_data so that it looks more neat
>     - Fixed XVIMAGE_AYUV structure initialization to contain proper byte sequence for GST
>     - Fixed bogus comment about subsampling for DRM_FORMAT_XYUV8888
>     - Fixed AYUV advertisement for all platforms
>     - Removed unnecessary RGB888 declaration.
> 
> v6:
>     - Fixed surface format not to use alpha as supposed
>     - Now doing byte swapping always during copy
>     - Changed hack, required for GST to work to be at one place
>     - Fixed invalid sampling values for XVIMAGE_AYUV
>     - Fixed sprite format checking order and images_ayuv definition.
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> ---
>  src/render_program/Makefile.am                |  2 +
>  .../exa_wm_src_sample_argb_ayuv.g8a           | 76 ++++++++++++++++
>  .../exa_wm_src_sample_argb_ayuv.g8b           |  8 ++
>  src/sna/gen9_render.c                         | 24 ++++-
>  src/sna/sna_render.h                          |  3 +
>  src/sna/sna_video.c                           | 89 ++++++++++++++++++-
>  src/sna/sna_video.h                           | 20 +++++
>  src/sna/sna_video_sprite.c                    | 20 ++++-
>  src/sna/sna_video_textured.c                  |  7 ++
>  9 files changed, 244 insertions(+), 5 deletions(-)
>  create mode 100644 src/render_program/exa_wm_src_sample_argb_ayuv.g8a
>  create mode 100644 src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> 
> diff --git a/src/render_program/Makefile.am b/src/render_program/Makefile.am
> index dc58138f..e35ffa52 100644
> --- a/src/render_program/Makefile.am
> +++ b/src/render_program/Makefile.am
> @@ -196,6 +196,7 @@ INTEL_G7B =				\
>  INTEL_G8A =				\
>  	exa_wm_src_affine.g8a 		\
>  	exa_wm_src_sample_argb.g8a 	\
> +	exa_wm_src_sample_argb_ayuv.g8a \
>  	exa_wm_src_sample_nv12.g8a 	\
>  	exa_wm_src_sample_planar.g8a 	\
>  	exa_wm_write.g8a 		\
> @@ -205,6 +206,7 @@ INTEL_G8A =				\
>  
>  INTEL_G8B =				\
>  	exa_wm_src_affine.g8b 		\
> +	exa_wm_src_sample_argb_ayuv.g8b \
>  	exa_wm_src_sample_argb.g8b 	\
>  	exa_wm_src_sample_nv12.g8b 	\
>  	exa_wm_src_sample_planar.g8b 	\
> diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8a b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> new file mode 100644
> index 00000000..c0b84c2e
> --- /dev/null
> +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> @@ -0,0 +1,76 @@
> +/*
> + * Copyright © 2006 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Wang Zhenyu <zhenyu.z.wang@intel.com>
> + *    Keith Packard <keithp@keithp.com>
> + */
> +
> +/* Sample the src surface */
> +
> +include(`exa_wm.g4i')
> +
> +undefine(`src_msg')
> +undefine(`src_msg_ind')
> +
> +define(`src_msg',       `g65')
> +define(`src_msg_ind',   `65')
> +
> +/* prepare sampler read back gX register, which would be written back to output */
> +
> +/* use simd16 sampler, param 0 is u, param 1 is v. */
> +/* 'payload' loading, assuming tex coord start from g4 */
> +
> +/* load argb */
> +mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
> +mov (8) src_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
> +
> +/* src_msg will be copied with g0, as it contains send desc */
> +/* emit sampler 'send' cmd */
> +send (16) src_msg_ind	/* msg reg index */
> +	src_sample_base<1>UW /* readback */
> +	null
> +	sampler (1,0,F)	/* sampler message description, (binding_table,sampler_index,datatype)
> +				/* here(src->dst) we should use src_sampler and src_surface */
> +	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
> +
> +/*
> + * Have to change bytes order, because the only
> + * player which supports AYUV format currently is
> + * Gstreamer and it supports in bad way, even though
> + * spec says MSB:AYUV, we get the bytes opposite way.
> + * We swap bytes both for sprite and texture modes during copy.
> + * So here we get argb which then becomes 1bgr.
> + */
> +mov (16) src_sample_a<1>UD src_sample_b<1>UD  { align1 };
> +mov (16) src_sample_b<1>UD src_sample_g<1>UD  { align1 };
> +mov (16) src_sample_g<1>UD src_sample_r<1>UD  { align1 };
> +mov (16) src_sample_r<1>UD src_sample_a<1>UD  { align1 };
> +mov (16) src_sample_a<1>F 1.0F;
> +
> +
> +
> +
> +
> +
> +
> +
> diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8b b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> new file mode 100644
> index 00000000..f3ac4959
> --- /dev/null
> +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> @@ -0,0 +1,8 @@
> +   { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 },
> +   { 0x00600001, 0x28200208, 0x008d0000, 0x00000000 },
> +   { 0x02800031, 0x21c00a48, 0x06000820, 0x0a8c0001 },
> +   { 0x00800001, 0x22800208, 0x00200240, 0x00000000 },
> +   { 0x00800001, 0x22400208, 0x00200200, 0x00000000 },
> +   { 0x00800001, 0x22000208, 0x002001c0, 0x00000000 },
> +   { 0x00800001, 0x21c00208, 0x00200280, 0x00000000 },
> +   { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 },
> diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c
> index eb22b642..90707b1f 100644
> --- a/src/sna/gen9_render.c
> +++ b/src/sna/gen9_render.c
> @@ -129,6 +129,20 @@ static const uint32_t ps_kernel_planar_bt709[][4] = {
>  #include "exa_wm_write.g8b"
>  };
>  
> +static const uint32_t ps_kernel_ayuv_bt601[][4] = {
> +#include "exa_wm_src_affine.g8b"
> +#include "exa_wm_src_sample_argb_ayuv.g8b"
> +#include "exa_wm_yuv_rgb_bt601.g8b"
> +#include "exa_wm_write.g8b"
> +};
> +
> +static const uint32_t ps_kernel_ayuv_bt709[][4] = {
> +#include "exa_wm_src_affine.g8b"
> +#include "exa_wm_src_sample_argb_ayuv.g8b"
> +#include "exa_wm_yuv_rgb_bt709.g8b"
> +#include "exa_wm_write.g6b"
> +};
> +
>  static const uint32_t ps_kernel_nv12_bt709[][4] = {
>  #include "exa_wm_src_affine.g8b"
>  #include "exa_wm_src_sample_nv12.g8b"
> @@ -177,6 +191,8 @@ static const struct wm_kernel_info {
>  	KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7),
>  	KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7),
>  	KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2),
> +	KERNEL(VIDEO_AYUV_BT601, ps_kernel_ayuv_bt601, 2),
> +	KERNEL(VIDEO_AYUV_BT709, ps_kernel_ayuv_bt709, 2),
>  	KERNEL(VIDEO_RGB, ps_kernel_rgb, 2),
>  #endif
>  };
> @@ -2552,7 +2568,6 @@ gen9_render_composite(struct sna *sna,
>  							     tmp->mask.bo != NULL,
>  							     tmp->has_component_alpha,
>  							     tmp->is_affine);
> -
>  	tmp->blt   = gen9_render_composite_blt;
>  	tmp->box   = gen9_render_composite_box;
>  	tmp->boxes = gen9_render_composite_boxes__blt;
> @@ -3853,6 +3868,8 @@ static void gen9_emit_video_state(struct sna *sna,
>  			src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM;
>  		else if (frame->id == FOURCC_UYVY)
>  			src_surf_format[0] = SURFACEFORMAT_YCRCB_SWAPY;
> +		else if (is_ayuv_fourcc(frame->id))
> +			src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM;
>  		else
>  			src_surf_format[0] = SURFACEFORMAT_YCRCB_NORMAL;
>  
> @@ -3903,6 +3920,11 @@ static unsigned select_video_kernel(const struct sna_video *video,
>  	case FOURCC_RGB565:
>  		return GEN9_WM_KERNEL_VIDEO_RGB;
>  
> +	case FOURCC_AYUV:
> +		return video->colorspace ?
> +			GEN9_WM_KERNEL_VIDEO_AYUV_BT709 :
> +			GEN9_WM_KERNEL_VIDEO_AYUV_BT601;
> +
>  	default:
>  		return video->colorspace ?
>  			GEN9_WM_KERNEL_VIDEO_PACKED_BT709 :
> diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
> index a4e5b56a..891fc905 100644
> --- a/src/sna/sna_render.h
> +++ b/src/sna/sna_render.h
> @@ -617,6 +617,9 @@ enum {
>  	GEN9_WM_KERNEL_VIDEO_NV12_BT709,
>  	GEN9_WM_KERNEL_VIDEO_PACKED_BT709,
>  
> +	GEN9_WM_KERNEL_VIDEO_AYUV_BT601,
> +	GEN9_WM_KERNEL_VIDEO_AYUV_BT709,
> +
>  	GEN9_WM_KERNEL_VIDEO_RGB,
>  	GEN9_WM_KERNEL_COUNT
>  };
> diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
> index 55405f81..d4ed8464 100644
> --- a/src/sna/sna_video.c
> +++ b/src/sna/sna_video.c
> @@ -59,6 +59,7 @@
>  #include "intel_options.h"
>  
>  #include <xf86xv.h>
> +#include <byteswap.h>
>  
>  #ifdef SNA_XVMC
>  #define _SNA_XVMC_SERVER_
> @@ -281,6 +282,7 @@ sna_video_frame_set_rotation(struct sna_video *video,
>  	} else {
>  		switch (frame->id) {
>  		case FOURCC_RGB888:
> +		case FOURCC_AYUV:
>  			if (rotation & (RR_Rotate_90 | RR_Rotate_270)) {
>  				frame->pitch[0] = ALIGN((height << 2), align);
>  				frame->size = (int)frame->pitch[0] * width;
> @@ -584,6 +586,89 @@ sna_copy_packed_data(struct sna_video *video,
>  	}
>  }
>  
> +static void
> +sna_copy_ayuv_data(struct sna_video *video,
> +		   const struct sna_video_frame *frame,
> +		   const uint8_t *buf,
> +		   uint8_t *dst,
> +		   bool reverse_bytes)

Why this parameter? It's always true no?

> +{
> +	int pitch = frame->width << 2;
> +	const uint8_t *src, *s;
> +	const uint32_t *src_dw;
> +	uint32_t *dst_dw = (uint32_t *)dst;
> +	int x, y, w, h;
> +	int i, j;
> +	uint32_t dw;
> +
> +	if (video->textured) {
> +		/* XXX support copying cropped extents */
> +		x = y = 0;
> +		w = frame->width;
> +		h = frame->height;
> +	} else {
> +		x = frame->image.x1;
> +		y = frame->image.y1;
> +		w = frame->image.x2 - frame->image.x1;
> +		h = frame->image.y2 - frame->image.y1;
> +	}
> +
> +	src = buf + (y * pitch) + (x << 2);
> +	src_dw = (uint32_t *)src;
> +
> +	if (reverse_bytes) {
> +		/*
> +		 * Have to reverse bytes order, because the only
> +		 * player which supports AYUV format currently is
> +		 * Gstreamer and it supports in bad way, even though
> +		 * spec says MSB:AYUV, we get the bytes opposite way.
> +		 */
> +		for (i = 0; i < h; i++) {
> +			for (j = 0; j < w; j++) {
> +				uint32_t reverse_dw;
> +				dw = src_dw[i * w + j];
> +				reverse_dw = bswap_32(dw);
> +				dst_dw[i * w + j] = reverse_dw;
> +			}
> +		}
> +	}

Two loops through the data isn't quite what I was thinking.

> +
> +	switch (frame->rotation) {
> +	case RR_Rotate_0:
> +		for (i = 0; i < h; i++) {
> +			for (j = 0; j < w; j++) {
> +				dw = dst_dw[i * w + j];
> +				dst_dw[i * w + j] = dw;

What I had in mind was just

dst_dw[..] = bswap_32(src_dw[...]);

here.

> +			}
> +		}
> +		break;
> +	case RR_Rotate_90:
> +		for (i = 0; i < h; i++) {
> +			for (j = 0; j < w; j++) {
> +				dw = dst_dw[i * w + j];
> +				dst_dw[(w - j - 1) * h + i] = dw;
> +			}
> +		}
> +		break;
> +	case RR_Rotate_180:
> +		for (i = 0; i < h; i++) {
> +			for (j = 0; j < w; j++) {
> +				dw = dst_dw[i * w + j];
> +				dst_dw[(h - i - 1) * w + w - j - 1] = dw;
> +			}
> +		}
> +		break;
> +	case RR_Rotate_270:
> +		for (i = 0; i < h; i++) {
> +			for (j = 0; j < w; j++) {
> +				dw = dst_dw[i * w + j];;
> +				dst_dw[(w - j - 1) * h + i] = dw;
> +			}
> +		}
> +		break;
> +	}
> +}
> +
>  bool
>  sna_video_copy_data(struct sna_video *video,
>  		    struct sna_video_frame *frame,
> @@ -604,7 +689,7 @@ sna_video_copy_data(struct sna_video *video,
>  	assert(frame->size);
>  
>  	/* In the common case, we can simply the upload in a single pwrite */
> -	if (frame->rotation == RR_Rotate_0 && !video->tiled) {
> +	if (frame->rotation == RR_Rotate_0 && !video->tiled && !is_ayuv_fourcc(frame->id)) {
>  		DBG(("%s: unrotated, untiled fast paths: is-planar?=%d\n",
>  		     __FUNCTION__, is_planar_fourcc(frame->id)));
>  		if (is_nv12_fourcc(frame->id)) {
> @@ -709,6 +794,8 @@ use_gtt: /* copy data, must use GTT so that we keep the overlay uncached */
>  		sna_copy_nv12_data(video, frame, buf, dst);
>  	else if (is_planar_fourcc(frame->id))
>  		sna_copy_planar_data(video, frame, buf, dst);
> +	else if (is_ayuv_fourcc(frame->id))
> +		sna_copy_ayuv_data(video, frame, buf, dst, true);
>  	else
>  		sna_copy_packed_data(video, frame, buf, dst);
>  
> diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h
> index bbd3f0fd..a3ffdc0b 100644
> --- a/src/sna/sna_video.h
> +++ b/src/sna/sna_video.h
> @@ -39,6 +39,7 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  #define FOURCC_RGB565 ((16 << 24) + ('B' << 16) + ('G' << 8) + 'R')
>  #define FOURCC_RGB888 ((24 << 24) + ('B' << 16) + ('G' << 8) + 'R')
>  #define FOURCC_NV12 (('2' << 24) + ('1' << 16) + ('V' << 8) + 'N')
> +#define FOURCC_AYUV (('V' << 24) + ('U' << 16) + ('Y' << 8) + 'A')
>  
>  /*
>   * Below, a dummy picture type that is used in XvPutImage
> @@ -79,6 +80,15 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  	XvTopToBottom \
>  }
>  
> +#define XVIMAGE_AYUV { \
> +	FOURCC_AYUV, XvYUV, LSBFirst, \
> +	{'A', 'Y', 'U', 'V', 0x00, 0x00, 0x00, 0x10, 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71}, \
> +	32, XvPacked, 1, 0, 0, 0, 0, 8, 8, 8, 1, 1, 1, 1, 1, 1, \
> +	{'A', 'Y', 'U', 'V', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
> +	XvTopToBottom \
> +}
> +
> +
>  struct sna_video {
>  	struct sna *sna;
>  
> @@ -189,6 +199,16 @@ static inline int is_nv12_fourcc(int id)
>  	}
>  }
>  
> +static inline int is_ayuv_fourcc(int id)
> +{
> +	switch (id) {
> +	case FOURCC_AYUV:
> +		return 1;
> +	default:
> +		return 0;
> +	}
> +}
> +
>  bool
>  sna_video_clip_helper(struct sna_video *video,
>  		      struct sna_video_frame *frame,
> diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c
> index 8b7ae8ae..3780dc0e 100644
> --- a/src/sna/sna_video_sprite.c
> +++ b/src/sna/sna_video_sprite.c
> @@ -47,7 +47,7 @@
>  #define DRM_FORMAT_YUYV         fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
>  #define DRM_FORMAT_UYVY         fourcc_code('U', 'Y', 'V', 'Y') /* [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */
>  #define DRM_FORMAT_NV12         fourcc_code('N', 'V', '1', '2') /* 2x2 subsampled Cr:Cb plane */
> -
> +#define DRM_FORMAT_XYUV8888     fourcc_code('X', 'Y', 'U', 'V') /* [31:0] x:Y:U:V 8:8:8:8 little endian */
>  #define has_hw_scaling(sna, video) ((sna)->kgem.gen < 071 || \
>  				    (sna)->kgem.gen >= 0110)
>  
> @@ -79,6 +79,8 @@ static const XvImageRec images_rgb565[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
>  					    XVMC_RGB888, XVMC_RGB565 };
>  static const XvImageRec images_nv12[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
>  					  XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565 };
> +static const XvImageRec images_ayuv[] = { XVIMAGE_AYUV, XVIMAGE_YUY2, XVIMAGE_UYVY,
> +					  XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565 };
>  static const XvAttributeRec attribs[] = {
>  	{ XvSettable | XvGettable, 0, 1, (char *)"XV_COLORSPACE" }, /* BT.601, BT.709 */
>  	{ XvSettable | XvGettable, 0, 0xffffff, (char *)"XV_COLORKEY" },
> @@ -364,6 +366,10 @@ sna_video_sprite_show(struct sna *sna,
>  		case FOURCC_UYVY:
>  			f.pixel_format = DRM_FORMAT_UYVY;
>  			break;
> +		case FOURCC_AYUV:
> +			/* i915 doesn't support alpha, so we use XYUV */
> +			f.pixel_format = DRM_FORMAT_XYUV8888;
> +			break;
>  		case FOURCC_YUY2:
>  		default:
>  			f.pixel_format = DRM_FORMAT_YUYV;
> @@ -705,7 +711,12 @@ static int sna_video_sprite_query(ddQueryImageAttributes_ARGS)
>  		tmp *= (*h >> 1);
>  		size += tmp;
>  		break;
> -
> +	case FOURCC_AYUV:
> +		tmp = *w << 2;
> +		if (pitches)
> +			pitches[0] = tmp;
> +		size = *h * tmp;
> +		break;
>  	default:
>  		*w = (*w + 1) & ~1;
>  		*h = (*h + 1) & ~1;
> @@ -805,7 +816,10 @@ void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen)
>  	adaptor->nAttributes = ARRAY_SIZE(attribs);
>  	adaptor->pAttributes = (XvAttributeRec *)attribs;
>  
> -	if (sna_has_sprite_format(sna, DRM_FORMAT_NV12)) {
> +	if (sna_has_sprite_format(sna, DRM_FORMAT_XYUV8888)) {
> +		adaptor->pImages = (XvImageRec *)images_ayuv;
> +		adaptor->nImages = ARRAY_SIZE(images_ayuv);
> +	} else if (sna_has_sprite_format(sna, DRM_FORMAT_NV12)) {
>  		adaptor->pImages = (XvImageRec *)images_nv12;
>  		adaptor->nImages = ARRAY_SIZE(images_nv12);
>  	} else if (sna_has_sprite_format(sna, DRM_FORMAT_RGB565)) {
> diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
> index a784fe2e..46c213ef 100644
> --- a/src/sna/sna_video_textured.c
> +++ b/src/sna/sna_video_textured.c
> @@ -68,6 +68,7 @@ static const XvImageRec gen4_Images[] = {
>  	XVIMAGE_I420,
>  	XVIMAGE_NV12,
>  	XVIMAGE_UYVY,
> +	XVIMAGE_AYUV,
>  	XVMC_YUV,
>  };
>  
> @@ -337,6 +338,12 @@ sna_video_textured_query(ddQueryImageAttributes_ARGS)
>  			pitches[0] = size;
>  		size *= *h;
>  		break;
> +	case FOURCC_AYUV:
> +		size = *w << 2;
> +		if (pitches)
> +			pitches[0] = size;
> +		size *= *h;
> +		break;
>  	case FOURCC_XVMC:
>  		*h = (*h + 1) & ~1;
>  		size = sizeof(uint32_t);
> -- 
> 2.17.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Ville Syrjälä Nov. 8, 2018, 5:50 p.m. UTC | #4
On Thu, Nov 08, 2018 at 05:17:13PM +0000, Chris Wilson wrote:
> Quoting Stanislav Lisovskiy (2018-11-02 10:06:03)
> > v2: Renamed DRM_FORMAT_XYUV to DRM_FORMAT_XYUV8888.
> >     Added comment about AYUV byte ordering in Gstreamer.
> > 
> > v3: Removed sna_composite_op flags related change to the separate patch.
> > 
> > v4: Fixed review comments, done code refactoring
> > 
> > v5: Fixed following review comments:
> >     - Fixed comment in shader code for ayuv kernel.
> >     - Fixed naming to VIDEO_AYUV_BT601/BT709 for ayuv kernels.
> >     - Removed duplicate gen9_kernel parameter, left from previous patches
> >     - Added colorspace handling for new AYUV kernel
> >     - Fixed naming of sna_copy_packed_data_ayuv to sna_copy_ayuv_data
> >     - Started using standard bswap_32 function for byte swapping in sna_copy_ayuv_data
> >     - Removed redundant code in sna_copy_ayuv_data so that it looks more neat
> >     - Fixed XVIMAGE_AYUV structure initialization to contain proper byte sequence for GST
> >     - Fixed bogus comment about subsampling for DRM_FORMAT_XYUV8888
> >     - Fixed AYUV advertisement for all platforms
> >     - Removed unnecessary RGB888 declaration.
> > 
> > v6:
> >     - Fixed surface format not to use alpha as supposed
> >     - Now doing byte swapping always during copy
> >     - Changed hack, required for GST to work to be at one place
> >     - Fixed invalid sampling values for XVIMAGE_AYUV
> >     - Fixed sprite format checking order and images_ayuv definition.
> > 
> > Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> 
> Ville, happy?

Almost.

> 
> > +       if (reverse_bytes) {

This bool bugs me, replied separately.

> > +               /*
> > +                * Have to reverse bytes order, because the only
> > +                * player which supports AYUV format currently is
> > +                * Gstreamer and it supports in bad way, even though
> > +                * spec says MSB:AYUV, we get the bytes opposite way.
> > +                */
> 
> This worries me. Is there no room for negotiation with Gstreamer so they
> use the format the kernel and HW expects. Though I presume they chose
> their layout for good reason (some TV probably expects it in host
> order.)

Either that or no one ever actually tested it.

One might hope that the component_order[] in the XvImage can be used to
tell the standard AYUV and gst AYUV apart. But probably no one even
looks at that thing. Also not sure how that would work w.r.t. the
fourcc if both needed to be supported.
Lisovskiy, Stanislav Nov. 9, 2018, 8:18 a.m. UTC | #5
On Thu, 2018-11-08 at 19:47 +0200, Ville Syrjälä wrote:
> On Fri, Nov 02, 2018 at 12:06:03PM +0200, Stanislav Lisovskiy wrote:
> > v2: Renamed DRM_FORMAT_XYUV to DRM_FORMAT_XYUV8888.
> >     Added comment about AYUV byte ordering in Gstreamer.
> > 
> > v3: Removed sna_composite_op flags related change to the separate
> > patch.
> > 
> > v4: Fixed review comments, done code refactoring
> > 
> > v5: Fixed following review comments:
> >     - Fixed comment in shader code for ayuv kernel.
> >     - Fixed naming to VIDEO_AYUV_BT601/BT709 for ayuv kernels.
> >     - Removed duplicate gen9_kernel parameter, left from previous
> > patches
> >     - Added colorspace handling for new AYUV kernel
> >     - Fixed naming of sna_copy_packed_data_ayuv to
> > sna_copy_ayuv_data
> >     - Started using standard bswap_32 function for byte swapping in
> > sna_copy_ayuv_data
> >     - Removed redundant code in sna_copy_ayuv_data so that it looks
> > more neat
> >     - Fixed XVIMAGE_AYUV structure initialization to contain proper
> > byte sequence for GST
> >     - Fixed bogus comment about subsampling for DRM_FORMAT_XYUV8888
> >     - Fixed AYUV advertisement for all platforms
> >     - Removed unnecessary RGB888 declaration.
> > 
> > v6:
> >     - Fixed surface format not to use alpha as supposed
> >     - Now doing byte swapping always during copy
> >     - Changed hack, required for GST to work to be at one place
> >     - Fixed invalid sampling values for XVIMAGE_AYUV
> >     - Fixed sprite format checking order and images_ayuv
> > definition.
> > 
> > Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> > ---
> >  src/render_program/Makefile.am                |  2 +
> >  .../exa_wm_src_sample_argb_ayuv.g8a           | 76
> > ++++++++++++++++
> >  .../exa_wm_src_sample_argb_ayuv.g8b           |  8 ++
> >  src/sna/gen9_render.c                         | 24 ++++-
> >  src/sna/sna_render.h                          |  3 +
> >  src/sna/sna_video.c                           | 89
> > ++++++++++++++++++-
> >  src/sna/sna_video.h                           | 20 +++++
> >  src/sna/sna_video_sprite.c                    | 20 ++++-
> >  src/sna/sna_video_textured.c                  |  7 ++
> >  9 files changed, 244 insertions(+), 5 deletions(-)
> >  create mode 100644
> > src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> >  create mode 100644
> > src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> > 
> > diff --git a/src/render_program/Makefile.am
> > b/src/render_program/Makefile.am
> > index dc58138f..e35ffa52 100644
> > --- a/src/render_program/Makefile.am
> > +++ b/src/render_program/Makefile.am
> > @@ -196,6 +196,7 @@ INTEL_G7B =				\
> >  INTEL_G8A =				\
> >  	exa_wm_src_affine.g8a 		\
> >  	exa_wm_src_sample_argb.g8a 	\
> > +	exa_wm_src_sample_argb_ayuv.g8a \
> >  	exa_wm_src_sample_nv12.g8a 	\
> >  	exa_wm_src_sample_planar.g8a 	\
> >  	exa_wm_write.g8a 		\
> > @@ -205,6 +206,7 @@ INTEL_G8A =				\
> >  
> >  INTEL_G8B =				\
> >  	exa_wm_src_affine.g8b 		\
> > +	exa_wm_src_sample_argb_ayuv.g8b \
> >  	exa_wm_src_sample_argb.g8b 	\
> >  	exa_wm_src_sample_nv12.g8b 	\
> >  	exa_wm_src_sample_planar.g8b 	\
> > diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> > b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> > new file mode 100644
> > index 00000000..c0b84c2e
> > --- /dev/null
> > +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> > @@ -0,0 +1,76 @@
> > +/*
> > + * Copyright © 2006 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> > obtaining a
> > + * copy of this software and associated documentation files (the
> > "Software"),
> > + * to deal in the Software without restriction, including without
> > limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> > sublicense,
> > + * and/or sell copies of the Software, and to permit persons to
> > whom the
> > + * Software is furnished to do so, subject to the following
> > conditions:
> > + *
> > + * The above copyright notice and this permission notice
> > (including the next
> > + * paragraph) shall be included in all copies or substantial
> > portions of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> > EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> > EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> > OTHER DEALINGS
> > + * IN THE SOFTWARE.
> > + *
> > + * Authors:
> > + *    Wang Zhenyu <zhenyu.z.wang@intel.com>
> > + *    Keith Packard <keithp@keithp.com>
> > + */
> > +
> > +/* Sample the src surface */
> > +
> > +include(`exa_wm.g4i')
> > +
> > +undefine(`src_msg')
> > +undefine(`src_msg_ind')
> > +
> > +define(`src_msg',       `g65')
> > +define(`src_msg_ind',   `65')
> > +
> > +/* prepare sampler read back gX register, which would be written
> > back to output */
> > +
> > +/* use simd16 sampler, param 0 is u, param 1 is v. */
> > +/* 'payload' loading, assuming tex coord start from g4 */
> > +
> > +/* load argb */
> > +mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
> > +mov (8) src_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start
> > reg*/
> > +
> > +/* src_msg will be copied with g0, as it contains send desc */
> > +/* emit sampler 'send' cmd */
> > +send (16) src_msg_ind	/* msg reg index */
> > +	src_sample_base<1>UW /* readback */
> > +	null
> > +	sampler (1,0,F)	/* sampler message description,
> > (binding_table,sampler_index,datatype)
> > +				/* here(src->dst) we should use
> > src_sampler and src_surface */
> > +	mlen 5 rlen 8 { align1 };   /* required message len 5,
> > readback len 8 */
> > +
> > +/*
> > + * Have to change bytes order, because the only
> > + * player which supports AYUV format currently is
> > + * Gstreamer and it supports in bad way, even though
> > + * spec says MSB:AYUV, we get the bytes opposite way.
> > + * We swap bytes both for sprite and texture modes during copy.
> > + * So here we get argb which then becomes 1bgr.
> > + */
> > +mov (16) src_sample_a<1>UD src_sample_b<1>UD  { align1 };
> > +mov (16) src_sample_b<1>UD src_sample_g<1>UD  { align1 };
> > +mov (16) src_sample_g<1>UD src_sample_r<1>UD  { align1 };
> > +mov (16) src_sample_r<1>UD src_sample_a<1>UD  { align1 };
> > +mov (16) src_sample_a<1>F 1.0F;
> > +
> > +
> > +
> > +
> > +
> > +
> > +
> > +
> > diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> > b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> > new file mode 100644
> > index 00000000..f3ac4959
> > --- /dev/null
> > +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> > @@ -0,0 +1,8 @@
> > +   { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 },
> > +   { 0x00600001, 0x28200208, 0x008d0000, 0x00000000 },
> > +   { 0x02800031, 0x21c00a48, 0x06000820, 0x0a8c0001 },
> > +   { 0x00800001, 0x22800208, 0x00200240, 0x00000000 },
> > +   { 0x00800001, 0x22400208, 0x00200200, 0x00000000 },
> > +   { 0x00800001, 0x22000208, 0x002001c0, 0x00000000 },
> > +   { 0x00800001, 0x21c00208, 0x00200280, 0x00000000 },
> > +   { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 },
> > diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c
> > index eb22b642..90707b1f 100644
> > --- a/src/sna/gen9_render.c
> > +++ b/src/sna/gen9_render.c
> > @@ -129,6 +129,20 @@ static const uint32_t
> > ps_kernel_planar_bt709[][4] = {
> >  #include "exa_wm_write.g8b"
> >  };
> >  
> > +static const uint32_t ps_kernel_ayuv_bt601[][4] = {
> > +#include "exa_wm_src_affine.g8b"
> > +#include "exa_wm_src_sample_argb_ayuv.g8b"
> > +#include "exa_wm_yuv_rgb_bt601.g8b"
> > +#include "exa_wm_write.g8b"
> > +};
> > +
> > +static const uint32_t ps_kernel_ayuv_bt709[][4] = {
> > +#include "exa_wm_src_affine.g8b"
> > +#include "exa_wm_src_sample_argb_ayuv.g8b"
> > +#include "exa_wm_yuv_rgb_bt709.g8b"
> > +#include "exa_wm_write.g6b"
> > +};
> > +
> >  static const uint32_t ps_kernel_nv12_bt709[][4] = {
> >  #include "exa_wm_src_affine.g8b"
> >  #include "exa_wm_src_sample_nv12.g8b"
> > @@ -177,6 +191,8 @@ static const struct wm_kernel_info {
> >  	KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7),
> >  	KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7),
> >  	KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2),
> > +	KERNEL(VIDEO_AYUV_BT601, ps_kernel_ayuv_bt601, 2),
> > +	KERNEL(VIDEO_AYUV_BT709, ps_kernel_ayuv_bt709, 2),
> >  	KERNEL(VIDEO_RGB, ps_kernel_rgb, 2),
> >  #endif
> >  };
> > @@ -2552,7 +2568,6 @@ gen9_render_composite(struct sna *sna,
> >  							     tmp-
> > >mask.bo != NULL,
> >  							     tmp-
> > >has_component_alpha,
> >  							     tmp-
> > >is_affine);
> > -
> >  	tmp->blt   = gen9_render_composite_blt;
> >  	tmp->box   = gen9_render_composite_box;
> >  	tmp->boxes = gen9_render_composite_boxes__blt;
> > @@ -3853,6 +3868,8 @@ static void gen9_emit_video_state(struct sna
> > *sna,
> >  			src_surf_format[0] =
> > SURFACEFORMAT_B8G8R8X8_UNORM;
> >  		else if (frame->id == FOURCC_UYVY)
> >  			src_surf_format[0] =
> > SURFACEFORMAT_YCRCB_SWAPY;
> > +		else if (is_ayuv_fourcc(frame->id))
> > +			src_surf_format[0] =
> > SURFACEFORMAT_B8G8R8X8_UNORM;
> >  		else
> >  			src_surf_format[0] =
> > SURFACEFORMAT_YCRCB_NORMAL;
> >  
> > @@ -3903,6 +3920,11 @@ static unsigned select_video_kernel(const
> > struct sna_video *video,
> >  	case FOURCC_RGB565:
> >  		return GEN9_WM_KERNEL_VIDEO_RGB;
> >  
> > +	case FOURCC_AYUV:
> > +		return video->colorspace ?
> > +			GEN9_WM_KERNEL_VIDEO_AYUV_BT709 :
> > +			GEN9_WM_KERNEL_VIDEO_AYUV_BT601;
> > +
> >  	default:
> >  		return video->colorspace ?
> >  			GEN9_WM_KERNEL_VIDEO_PACKED_BT709 :
> > diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
> > index a4e5b56a..891fc905 100644
> > --- a/src/sna/sna_render.h
> > +++ b/src/sna/sna_render.h
> > @@ -617,6 +617,9 @@ enum {
> >  	GEN9_WM_KERNEL_VIDEO_NV12_BT709,
> >  	GEN9_WM_KERNEL_VIDEO_PACKED_BT709,
> >  
> > +	GEN9_WM_KERNEL_VIDEO_AYUV_BT601,
> > +	GEN9_WM_KERNEL_VIDEO_AYUV_BT709,
> > +
> >  	GEN9_WM_KERNEL_VIDEO_RGB,
> >  	GEN9_WM_KERNEL_COUNT
> >  };
> > diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
> > index 55405f81..d4ed8464 100644
> > --- a/src/sna/sna_video.c
> > +++ b/src/sna/sna_video.c
> > @@ -59,6 +59,7 @@
> >  #include "intel_options.h"
> >  
> >  #include <xf86xv.h>
> > +#include <byteswap.h>
> >  
> >  #ifdef SNA_XVMC
> >  #define _SNA_XVMC_SERVER_
> > @@ -281,6 +282,7 @@ sna_video_frame_set_rotation(struct sna_video
> > *video,
> >  	} else {
> >  		switch (frame->id) {
> >  		case FOURCC_RGB888:
> > +		case FOURCC_AYUV:
> >  			if (rotation & (RR_Rotate_90 |
> > RR_Rotate_270)) {
> >  				frame->pitch[0] = ALIGN((height <<
> > 2), align);
> >  				frame->size = (int)frame->pitch[0] 
> > * width;
> > @@ -584,6 +586,89 @@ sna_copy_packed_data(struct sna_video *video,
> >  	}
> >  }
> >  
> > +static void
> > +sna_copy_ayuv_data(struct sna_video *video,
> > +		   const struct sna_video_frame *frame,
> > +		   const uint8_t *buf,
> > +		   uint8_t *dst,
> > +		   bool reverse_bytes)
> 
> Why this parameter? It's always true no?

Yes, it is currently always true. I just thought we might want
to have a way to manipulate this byte order thing if some other
player except Gstreamer is used and it has a correct order.

> 
> > +{
> > +	int pitch = frame->width << 2;
> > +	const uint8_t *src, *s;
> > +	const uint32_t *src_dw;
> > +	uint32_t *dst_dw = (uint32_t *)dst;
> > +	int x, y, w, h;
> > +	int i, j;
> > +	uint32_t dw;
> > +
> > +	if (video->textured) {
> > +		/* XXX support copying cropped extents */
> > +		x = y = 0;
> > +		w = frame->width;
> > +		h = frame->height;
> > +	} else {
> > +		x = frame->image.x1;
> > +		y = frame->image.y1;
> > +		w = frame->image.x2 - frame->image.x1;
> > +		h = frame->image.y2 - frame->image.y1;
> > +	}
> > +
> > +	src = buf + (y * pitch) + (x << 2);
> > +	src_dw = (uint32_t *)src;
> > +
> > +	if (reverse_bytes) {
> > +		/*
> > +		 * Have to reverse bytes order, because the only
> > +		 * player which supports AYUV format currently is
> > +		 * Gstreamer and it supports in bad way, even
> > though
> > +		 * spec says MSB:AYUV, we get the bytes opposite
> > way.
> > +		 */
> > +		for (i = 0; i < h; i++) {
> > +			for (j = 0; j < w; j++) {
> > +				uint32_t reverse_dw;
> > +				dw = src_dw[i * w + j];
> > +				reverse_dw = bswap_32(dw);
> > +				dst_dw[i * w + j] = reverse_dw;
> > +			}
> > +		}
> > +	}
> 
> Two loops through the data isn't quite what I was thinking.
> 
> > +
> > +	switch (frame->rotation) {
> > +	case RR_Rotate_0:
> > +		for (i = 0; i < h; i++) {
> > +			for (j = 0; j < w; j++) {
> > +				dw = dst_dw[i * w + j];
> > +				dst_dw[i * w + j] = dw;
> 
> What I had in mind was just
> 
> dst_dw[..] = bswap_32(src_dw[...]);
> 
> here.

I think this is how it was initially in previous revisions, in each of
those case labels I was doing bswap in same loop. 
I thought your idea was to put it somewhat separately :)

Should I then put it back so that this loop looks like this?

for (i = 0; i < h; i++) {
	for (j = 0; j < w; j++) {
		if (reverse_bytes)
			dst_dw[i * w + j] = bswap_32(src_dw[i * w +j]);
		else
			dst_dw[i * w + j = src_dw[i * w + j];
	}
}

However if I remove reverse_bytes parameter from here - we will
anyway need to do something once some other player will appear to have
a correct AYUV byte order or we'll have only Gst properly playing video
with this format.

> 
> > +			}
> > +		}
> > +		break;
> > +	case RR_Rotate_90:
> > +		for (i = 0; i < h; i++) {
> > +			for (j = 0; j < w; j++) {
> > +				dw = dst_dw[i * w + j];
> > +				dst_dw[(w - j - 1) * h + i] = dw;
> > +			}
> > +		}
> > +		break;
> > +	case RR_Rotate_180:
> > +		for (i = 0; i < h; i++) {
> > +			for (j = 0; j < w; j++) {
> > +				dw = dst_dw[i * w + j];
> > +				dst_dw[(h - i - 1) * w + w - j -
> > 1] = dw;
> > +			}
> > +		}
> > +		break;
> > +	case RR_Rotate_270:
> > +		for (i = 0; i < h; i++) {
> > +			for (j = 0; j < w; j++) {
> > +				dw = dst_dw[i * w + j];;
> > +				dst_dw[(w - j - 1) * h + i] = dw;
> > +			}
> > +		}
> > +		break;
> > +	}
> > +}
> > +
> >  bool
> >  sna_video_copy_data(struct sna_video *video,
> >  		    struct sna_video_frame *frame,
> > @@ -604,7 +689,7 @@ sna_video_copy_data(struct sna_video *video,
> >  	assert(frame->size);
> >  
> >  	/* In the common case, we can simply the upload in a
> > single pwrite */
> > -	if (frame->rotation == RR_Rotate_0 && !video->tiled) {
> > +	if (frame->rotation == RR_Rotate_0 && !video->tiled &&
> > !is_ayuv_fourcc(frame->id)) {
> >  		DBG(("%s: unrotated, untiled fast paths: is-
> > planar?=%d\n",
> >  		     __FUNCTION__, is_planar_fourcc(frame->id)));
> >  		if (is_nv12_fourcc(frame->id)) {
> > @@ -709,6 +794,8 @@ use_gtt: /* copy data, must use GTT so that we
> > keep the overlay uncached */
> >  		sna_copy_nv12_data(video, frame, buf, dst);
> >  	else if (is_planar_fourcc(frame->id))
> >  		sna_copy_planar_data(video, frame, buf, dst);
> > +	else if (is_ayuv_fourcc(frame->id))
> > +		sna_copy_ayuv_data(video, frame, buf, dst, true);
> >  	else
> >  		sna_copy_packed_data(video, frame, buf, dst);
> >  
> > diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h
> > index bbd3f0fd..a3ffdc0b 100644
> > --- a/src/sna/sna_video.h
> > +++ b/src/sna/sna_video.h
> > @@ -39,6 +39,7 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> >  #define FOURCC_RGB565 ((16 << 24) + ('B' << 16) + ('G' << 8) +
> > 'R')
> >  #define FOURCC_RGB888 ((24 << 24) + ('B' << 16) + ('G' << 8) +
> > 'R')
> >  #define FOURCC_NV12 (('2' << 24) + ('1' << 16) + ('V' << 8) + 'N')
> > +#define FOURCC_AYUV (('V' << 24) + ('U' << 16) + ('Y' << 8) + 'A')
> >  
> >  /*
> >   * Below, a dummy picture type that is used in XvPutImage
> > @@ -79,6 +80,15 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> >  	XvTopToBottom \
> >  }
> >  
> > +#define XVIMAGE_AYUV { \
> > +	FOURCC_AYUV, XvYUV, LSBFirst, \
> > +	{'A', 'Y', 'U', 'V', 0x00, 0x00, 0x00, 0x10, 0x80, 0x00,
> > 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71}, \
> > +	32, XvPacked, 1, 0, 0, 0, 0, 8, 8, 8, 1, 1, 1, 1, 1, 1, \
> > +	{'A', 'Y', 'U', 'V', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> > 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
> > +	XvTopToBottom \
> > +}
> > +
> > +
> >  struct sna_video {
> >  	struct sna *sna;
> >  
> > @@ -189,6 +199,16 @@ static inline int is_nv12_fourcc(int id)
> >  	}
> >  }
> >  
> > +static inline int is_ayuv_fourcc(int id)
> > +{
> > +	switch (id) {
> > +	case FOURCC_AYUV:
> > +		return 1;
> > +	default:
> > +		return 0;
> > +	}
> > +}
> > +
> >  bool
> >  sna_video_clip_helper(struct sna_video *video,
> >  		      struct sna_video_frame *frame,
> > diff --git a/src/sna/sna_video_sprite.c
> > b/src/sna/sna_video_sprite.c
> > index 8b7ae8ae..3780dc0e 100644
> > --- a/src/sna/sna_video_sprite.c
> > +++ b/src/sna/sna_video_sprite.c
> > @@ -47,7 +47,7 @@
> >  #define DRM_FORMAT_YUYV         fourcc_code('Y', 'U', 'Y', 'V') /*
> > [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
> >  #define DRM_FORMAT_UYVY         fourcc_code('U', 'Y', 'V', 'Y') /*
> > [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */
> >  #define DRM_FORMAT_NV12         fourcc_code('N', 'V', '1', '2') /*
> > 2x2 subsampled Cr:Cb plane */
> > -
> > +#define DRM_FORMAT_XYUV8888     fourcc_code('X', 'Y', 'U', 'V') /*
> > [31:0] x:Y:U:V 8:8:8:8 little endian */
> >  #define has_hw_scaling(sna, video) ((sna)->kgem.gen < 071 || \
> >  				    (sna)->kgem.gen >= 0110)
> >  
> > @@ -79,6 +79,8 @@ static const XvImageRec images_rgb565[] = {
> > XVIMAGE_YUY2, XVIMAGE_UYVY,
> >  					    XVMC_RGB888,
> > XVMC_RGB565 };
> >  static const XvImageRec images_nv12[] = { XVIMAGE_YUY2,
> > XVIMAGE_UYVY,
> >  					  XVIMAGE_NV12,
> > XVMC_RGB888, XVMC_RGB565 };
> > +static const XvImageRec images_ayuv[] = { XVIMAGE_AYUV,
> > XVIMAGE_YUY2, XVIMAGE_UYVY,
> > +					  XVIMAGE_NV12,
> > XVMC_RGB888, XVMC_RGB565 };
> >  static const XvAttributeRec attribs[] = {
> >  	{ XvSettable | XvGettable, 0, 1, (char *)"XV_COLORSPACE"
> > }, /* BT.601, BT.709 */
> >  	{ XvSettable | XvGettable, 0, 0xffffff, (char
> > *)"XV_COLORKEY" },
> > @@ -364,6 +366,10 @@ sna_video_sprite_show(struct sna *sna,
> >  		case FOURCC_UYVY:
> >  			f.pixel_format = DRM_FORMAT_UYVY;
> >  			break;
> > +		case FOURCC_AYUV:
> > +			/* i915 doesn't support alpha, so we use
> > XYUV */
> > +			f.pixel_format = DRM_FORMAT_XYUV8888;
> > +			break;
> >  		case FOURCC_YUY2:
> >  		default:
> >  			f.pixel_format = DRM_FORMAT_YUYV;
> > @@ -705,7 +711,12 @@ static int
> > sna_video_sprite_query(ddQueryImageAttributes_ARGS)
> >  		tmp *= (*h >> 1);
> >  		size += tmp;
> >  		break;
> > -
> > +	case FOURCC_AYUV:
> > +		tmp = *w << 2;
> > +		if (pitches)
> > +			pitches[0] = tmp;
> > +		size = *h * tmp;
> > +		break;
> >  	default:
> >  		*w = (*w + 1) & ~1;
> >  		*h = (*h + 1) & ~1;
> > @@ -805,7 +816,10 @@ void sna_video_sprite_setup(struct sna *sna,
> > ScreenPtr screen)
> >  	adaptor->nAttributes = ARRAY_SIZE(attribs);
> >  	adaptor->pAttributes = (XvAttributeRec *)attribs;
> >  
> > -	if (sna_has_sprite_format(sna, DRM_FORMAT_NV12)) {
> > +	if (sna_has_sprite_format(sna, DRM_FORMAT_XYUV8888)) {
> > +		adaptor->pImages = (XvImageRec *)images_ayuv;
> > +		adaptor->nImages = ARRAY_SIZE(images_ayuv);
> > +	} else if (sna_has_sprite_format(sna, DRM_FORMAT_NV12)) {
> >  		adaptor->pImages = (XvImageRec *)images_nv12;
> >  		adaptor->nImages = ARRAY_SIZE(images_nv12);
> >  	} else if (sna_has_sprite_format(sna, DRM_FORMAT_RGB565))
> > {
> > diff --git a/src/sna/sna_video_textured.c
> > b/src/sna/sna_video_textured.c
> > index a784fe2e..46c213ef 100644
> > --- a/src/sna/sna_video_textured.c
> > +++ b/src/sna/sna_video_textured.c
> > @@ -68,6 +68,7 @@ static const XvImageRec gen4_Images[] = {
> >  	XVIMAGE_I420,
> >  	XVIMAGE_NV12,
> >  	XVIMAGE_UYVY,
> > +	XVIMAGE_AYUV,
> >  	XVMC_YUV,
> >  };
> >  
> > @@ -337,6 +338,12 @@
> > sna_video_textured_query(ddQueryImageAttributes_ARGS)
> >  			pitches[0] = size;
> >  		size *= *h;
> >  		break;
> > +	case FOURCC_AYUV:
> > +		size = *w << 2;
> > +		if (pitches)
> > +			pitches[0] = size;
> > +		size *= *h;
> > +		break;
> >  	case FOURCC_XVMC:
> >  		*h = (*h + 1) & ~1;
> >  		size = sizeof(uint32_t);
> > -- 
> > 2.17.1
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
>
Ville Syrjälä Nov. 9, 2018, 1:24 p.m. UTC | #6
On Fri, Nov 09, 2018 at 10:18:50AM +0200, Lisovskiy, Stanislav wrote:
> On Thu, 2018-11-08 at 19:47 +0200, Ville Syrjälä wrote:
> > On Fri, Nov 02, 2018 at 12:06:03PM +0200, Stanislav Lisovskiy wrote:
> > > v2: Renamed DRM_FORMAT_XYUV to DRM_FORMAT_XYUV8888.
> > >     Added comment about AYUV byte ordering in Gstreamer.
> > > 
> > > v3: Removed sna_composite_op flags related change to the separate
> > > patch.
> > > 
> > > v4: Fixed review comments, done code refactoring
> > > 
> > > v5: Fixed following review comments:
> > >     - Fixed comment in shader code for ayuv kernel.
> > >     - Fixed naming to VIDEO_AYUV_BT601/BT709 for ayuv kernels.
> > >     - Removed duplicate gen9_kernel parameter, left from previous
> > > patches
> > >     - Added colorspace handling for new AYUV kernel
> > >     - Fixed naming of sna_copy_packed_data_ayuv to
> > > sna_copy_ayuv_data
> > >     - Started using standard bswap_32 function for byte swapping in
> > > sna_copy_ayuv_data
> > >     - Removed redundant code in sna_copy_ayuv_data so that it looks
> > > more neat
> > >     - Fixed XVIMAGE_AYUV structure initialization to contain proper
> > > byte sequence for GST
> > >     - Fixed bogus comment about subsampling for DRM_FORMAT_XYUV8888
> > >     - Fixed AYUV advertisement for all platforms
> > >     - Removed unnecessary RGB888 declaration.
> > > 
> > > v6:
> > >     - Fixed surface format not to use alpha as supposed
> > >     - Now doing byte swapping always during copy
> > >     - Changed hack, required for GST to work to be at one place
> > >     - Fixed invalid sampling values for XVIMAGE_AYUV
> > >     - Fixed sprite format checking order and images_ayuv
> > > definition.
> > > 
> > > Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> > > ---
> > >  src/render_program/Makefile.am                |  2 +
> > >  .../exa_wm_src_sample_argb_ayuv.g8a           | 76
> > > ++++++++++++++++
> > >  .../exa_wm_src_sample_argb_ayuv.g8b           |  8 ++
> > >  src/sna/gen9_render.c                         | 24 ++++-
> > >  src/sna/sna_render.h                          |  3 +
> > >  src/sna/sna_video.c                           | 89
> > > ++++++++++++++++++-
> > >  src/sna/sna_video.h                           | 20 +++++
> > >  src/sna/sna_video_sprite.c                    | 20 ++++-
> > >  src/sna/sna_video_textured.c                  |  7 ++
> > >  9 files changed, 244 insertions(+), 5 deletions(-)
> > >  create mode 100644
> > > src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> > >  create mode 100644
> > > src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> > > 
> > > diff --git a/src/render_program/Makefile.am
> > > b/src/render_program/Makefile.am
> > > index dc58138f..e35ffa52 100644
> > > --- a/src/render_program/Makefile.am
> > > +++ b/src/render_program/Makefile.am
> > > @@ -196,6 +196,7 @@ INTEL_G7B =				\
> > >  INTEL_G8A =				\
> > >  	exa_wm_src_affine.g8a 		\
> > >  	exa_wm_src_sample_argb.g8a 	\
> > > +	exa_wm_src_sample_argb_ayuv.g8a \
> > >  	exa_wm_src_sample_nv12.g8a 	\
> > >  	exa_wm_src_sample_planar.g8a 	\
> > >  	exa_wm_write.g8a 		\
> > > @@ -205,6 +206,7 @@ INTEL_G8A =				\
> > >  
> > >  INTEL_G8B =				\
> > >  	exa_wm_src_affine.g8b 		\
> > > +	exa_wm_src_sample_argb_ayuv.g8b \
> > >  	exa_wm_src_sample_argb.g8b 	\
> > >  	exa_wm_src_sample_nv12.g8b 	\
> > >  	exa_wm_src_sample_planar.g8b 	\
> > > diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> > > b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> > > new file mode 100644
> > > index 00000000..c0b84c2e
> > > --- /dev/null
> > > +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> > > @@ -0,0 +1,76 @@
> > > +/*
> > > + * Copyright © 2006 Intel Corporation
> > > + *
> > > + * Permission is hereby granted, free of charge, to any person
> > > obtaining a
> > > + * copy of this software and associated documentation files (the
> > > "Software"),
> > > + * to deal in the Software without restriction, including without
> > > limitation
> > > + * the rights to use, copy, modify, merge, publish, distribute,
> > > sublicense,
> > > + * and/or sell copies of the Software, and to permit persons to
> > > whom the
> > > + * Software is furnished to do so, subject to the following
> > > conditions:
> > > + *
> > > + * The above copyright notice and this permission notice
> > > (including the next
> > > + * paragraph) shall be included in all copies or substantial
> > > portions of the
> > > + * Software.
> > > + *
> > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> > > EXPRESS OR
> > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> > > MERCHANTABILITY,
> > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> > > EVENT SHALL
> > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
> > > DAMAGES OR OTHER
> > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > > ARISING
> > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> > > OTHER DEALINGS
> > > + * IN THE SOFTWARE.
> > > + *
> > > + * Authors:
> > > + *    Wang Zhenyu <zhenyu.z.wang@intel.com>
> > > + *    Keith Packard <keithp@keithp.com>
> > > + */
> > > +
> > > +/* Sample the src surface */
> > > +
> > > +include(`exa_wm.g4i')
> > > +
> > > +undefine(`src_msg')
> > > +undefine(`src_msg_ind')
> > > +
> > > +define(`src_msg',       `g65')
> > > +define(`src_msg_ind',   `65')
> > > +
> > > +/* prepare sampler read back gX register, which would be written
> > > back to output */
> > > +
> > > +/* use simd16 sampler, param 0 is u, param 1 is v. */
> > > +/* 'payload' loading, assuming tex coord start from g4 */
> > > +
> > > +/* load argb */
> > > +mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
> > > +mov (8) src_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start
> > > reg*/
> > > +
> > > +/* src_msg will be copied with g0, as it contains send desc */
> > > +/* emit sampler 'send' cmd */
> > > +send (16) src_msg_ind	/* msg reg index */
> > > +	src_sample_base<1>UW /* readback */
> > > +	null
> > > +	sampler (1,0,F)	/* sampler message description,
> > > (binding_table,sampler_index,datatype)
> > > +				/* here(src->dst) we should use
> > > src_sampler and src_surface */
> > > +	mlen 5 rlen 8 { align1 };   /* required message len 5,
> > > readback len 8 */
> > > +
> > > +/*
> > > + * Have to change bytes order, because the only
> > > + * player which supports AYUV format currently is
> > > + * Gstreamer and it supports in bad way, even though
> > > + * spec says MSB:AYUV, we get the bytes opposite way.
> > > + * We swap bytes both for sprite and texture modes during copy.
> > > + * So here we get argb which then becomes 1bgr.
> > > + */
> > > +mov (16) src_sample_a<1>UD src_sample_b<1>UD  { align1 };
> > > +mov (16) src_sample_b<1>UD src_sample_g<1>UD  { align1 };
> > > +mov (16) src_sample_g<1>UD src_sample_r<1>UD  { align1 };
> > > +mov (16) src_sample_r<1>UD src_sample_a<1>UD  { align1 };
> > > +mov (16) src_sample_a<1>F 1.0F;
> > > +
> > > +
> > > +
> > > +
> > > +
> > > +
> > > +
> > > +
> > > diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> > > b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> > > new file mode 100644
> > > index 00000000..f3ac4959
> > > --- /dev/null
> > > +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> > > @@ -0,0 +1,8 @@
> > > +   { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 },
> > > +   { 0x00600001, 0x28200208, 0x008d0000, 0x00000000 },
> > > +   { 0x02800031, 0x21c00a48, 0x06000820, 0x0a8c0001 },
> > > +   { 0x00800001, 0x22800208, 0x00200240, 0x00000000 },
> > > +   { 0x00800001, 0x22400208, 0x00200200, 0x00000000 },
> > > +   { 0x00800001, 0x22000208, 0x002001c0, 0x00000000 },
> > > +   { 0x00800001, 0x21c00208, 0x00200280, 0x00000000 },
> > > +   { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 },
> > > diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c
> > > index eb22b642..90707b1f 100644
> > > --- a/src/sna/gen9_render.c
> > > +++ b/src/sna/gen9_render.c
> > > @@ -129,6 +129,20 @@ static const uint32_t
> > > ps_kernel_planar_bt709[][4] = {
> > >  #include "exa_wm_write.g8b"
> > >  };
> > >  
> > > +static const uint32_t ps_kernel_ayuv_bt601[][4] = {
> > > +#include "exa_wm_src_affine.g8b"
> > > +#include "exa_wm_src_sample_argb_ayuv.g8b"
> > > +#include "exa_wm_yuv_rgb_bt601.g8b"
> > > +#include "exa_wm_write.g8b"
> > > +};
> > > +
> > > +static const uint32_t ps_kernel_ayuv_bt709[][4] = {
> > > +#include "exa_wm_src_affine.g8b"
> > > +#include "exa_wm_src_sample_argb_ayuv.g8b"
> > > +#include "exa_wm_yuv_rgb_bt709.g8b"
> > > +#include "exa_wm_write.g6b"
> > > +};
> > > +
> > >  static const uint32_t ps_kernel_nv12_bt709[][4] = {
> > >  #include "exa_wm_src_affine.g8b"
> > >  #include "exa_wm_src_sample_nv12.g8b"
> > > @@ -177,6 +191,8 @@ static const struct wm_kernel_info {
> > >  	KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7),
> > >  	KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7),
> > >  	KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2),
> > > +	KERNEL(VIDEO_AYUV_BT601, ps_kernel_ayuv_bt601, 2),
> > > +	KERNEL(VIDEO_AYUV_BT709, ps_kernel_ayuv_bt709, 2),
> > >  	KERNEL(VIDEO_RGB, ps_kernel_rgb, 2),
> > >  #endif
> > >  };
> > > @@ -2552,7 +2568,6 @@ gen9_render_composite(struct sna *sna,
> > >  							     tmp-
> > > >mask.bo != NULL,
> > >  							     tmp-
> > > >has_component_alpha,
> > >  							     tmp-
> > > >is_affine);
> > > -
> > >  	tmp->blt   = gen9_render_composite_blt;
> > >  	tmp->box   = gen9_render_composite_box;
> > >  	tmp->boxes = gen9_render_composite_boxes__blt;
> > > @@ -3853,6 +3868,8 @@ static void gen9_emit_video_state(struct sna
> > > *sna,
> > >  			src_surf_format[0] =
> > > SURFACEFORMAT_B8G8R8X8_UNORM;
> > >  		else if (frame->id == FOURCC_UYVY)
> > >  			src_surf_format[0] =
> > > SURFACEFORMAT_YCRCB_SWAPY;
> > > +		else if (is_ayuv_fourcc(frame->id))
> > > +			src_surf_format[0] =
> > > SURFACEFORMAT_B8G8R8X8_UNORM;
> > >  		else
> > >  			src_surf_format[0] =
> > > SURFACEFORMAT_YCRCB_NORMAL;
> > >  
> > > @@ -3903,6 +3920,11 @@ static unsigned select_video_kernel(const
> > > struct sna_video *video,
> > >  	case FOURCC_RGB565:
> > >  		return GEN9_WM_KERNEL_VIDEO_RGB;
> > >  
> > > +	case FOURCC_AYUV:
> > > +		return video->colorspace ?
> > > +			GEN9_WM_KERNEL_VIDEO_AYUV_BT709 :
> > > +			GEN9_WM_KERNEL_VIDEO_AYUV_BT601;
> > > +
> > >  	default:
> > >  		return video->colorspace ?
> > >  			GEN9_WM_KERNEL_VIDEO_PACKED_BT709 :
> > > diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
> > > index a4e5b56a..891fc905 100644
> > > --- a/src/sna/sna_render.h
> > > +++ b/src/sna/sna_render.h
> > > @@ -617,6 +617,9 @@ enum {
> > >  	GEN9_WM_KERNEL_VIDEO_NV12_BT709,
> > >  	GEN9_WM_KERNEL_VIDEO_PACKED_BT709,
> > >  
> > > +	GEN9_WM_KERNEL_VIDEO_AYUV_BT601,
> > > +	GEN9_WM_KERNEL_VIDEO_AYUV_BT709,
> > > +
> > >  	GEN9_WM_KERNEL_VIDEO_RGB,
> > >  	GEN9_WM_KERNEL_COUNT
> > >  };
> > > diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
> > > index 55405f81..d4ed8464 100644
> > > --- a/src/sna/sna_video.c
> > > +++ b/src/sna/sna_video.c
> > > @@ -59,6 +59,7 @@
> > >  #include "intel_options.h"
> > >  
> > >  #include <xf86xv.h>
> > > +#include <byteswap.h>
> > >  
> > >  #ifdef SNA_XVMC
> > >  #define _SNA_XVMC_SERVER_
> > > @@ -281,6 +282,7 @@ sna_video_frame_set_rotation(struct sna_video
> > > *video,
> > >  	} else {
> > >  		switch (frame->id) {
> > >  		case FOURCC_RGB888:
> > > +		case FOURCC_AYUV:
> > >  			if (rotation & (RR_Rotate_90 |
> > > RR_Rotate_270)) {
> > >  				frame->pitch[0] = ALIGN((height <<
> > > 2), align);
> > >  				frame->size = (int)frame->pitch[0] 
> > > * width;
> > > @@ -584,6 +586,89 @@ sna_copy_packed_data(struct sna_video *video,
> > >  	}
> > >  }
> > >  
> > > +static void
> > > +sna_copy_ayuv_data(struct sna_video *video,
> > > +		   const struct sna_video_frame *frame,
> > > +		   const uint8_t *buf,
> > > +		   uint8_t *dst,
> > > +		   bool reverse_bytes)
> > 
> > Why this parameter? It's always true no?
> 
> Yes, it is currently always true. I just thought we might want
> to have a way to manipulate this byte order thing if some other
> player except Gstreamer is used and it has a correct order.

If that happens it's easy enough to provide a second copy routine.

> 
> > 
> > > +{
> > > +	int pitch = frame->width << 2;
> > > +	const uint8_t *src, *s;
> > > +	const uint32_t *src_dw;
> > > +	uint32_t *dst_dw = (uint32_t *)dst;
> > > +	int x, y, w, h;
> > > +	int i, j;
> > > +	uint32_t dw;
> > > +
> > > +	if (video->textured) {
> > > +		/* XXX support copying cropped extents */
> > > +		x = y = 0;
> > > +		w = frame->width;
> > > +		h = frame->height;
> > > +	} else {
> > > +		x = frame->image.x1;
> > > +		y = frame->image.y1;
> > > +		w = frame->image.x2 - frame->image.x1;
> > > +		h = frame->image.y2 - frame->image.y1;
> > > +	}
> > > +
> > > +	src = buf + (y * pitch) + (x << 2);
> > > +	src_dw = (uint32_t *)src;
> > > +
> > > +	if (reverse_bytes) {
> > > +		/*
> > > +		 * Have to reverse bytes order, because the only
> > > +		 * player which supports AYUV format currently is
> > > +		 * Gstreamer and it supports in bad way, even
> > > though
> > > +		 * spec says MSB:AYUV, we get the bytes opposite
> > > way.
> > > +		 */
> > > +		for (i = 0; i < h; i++) {
> > > +			for (j = 0; j < w; j++) {
> > > +				uint32_t reverse_dw;
> > > +				dw = src_dw[i * w + j];
> > > +				reverse_dw = bswap_32(dw);
> > > +				dst_dw[i * w + j] = reverse_dw;
> > > +			}
> > > +		}
> > > +	}
> > 
> > Two loops through the data isn't quite what I was thinking.
> > 
> > > +
> > > +	switch (frame->rotation) {
> > > +	case RR_Rotate_0:
> > > +		for (i = 0; i < h; i++) {
> > > +			for (j = 0; j < w; j++) {
> > > +				dw = dst_dw[i * w + j];
> > > +				dst_dw[i * w + j] = dw;
> > 
> > What I had in mind was just
> > 
> > dst_dw[..] = bswap_32(src_dw[...]);
> > 
> > here.
> 
> I think this is how it was initially in previous revisions, in each of
> those case labels I was doing bswap in same loop. 
> I thought your idea was to put it somewhat separately :)
> 
> Should I then put it back so that this loop looks like this?
> 
> for (i = 0; i < h; i++) {
> 	for (j = 0; j < w; j++) {
> 		if (reverse_bytes)
> 			dst_dw[i * w + j] = bswap_32(src_dw[i * w +j]);
> 		else
> 			dst_dw[i * w + j = src_dw[i * w + j];

No. I don't think we want the extra branch here. Just swap
unconditionally.

> 	}
> }
> 
> However if I remove reverse_bytes parameter from here - we will
> anyway need to do something once some other player will appear to have
> a correct AYUV byte order or we'll have only Gst properly playing video
> with this format.
> 
> > 
> > > +			}
> > > +		}
> > > +		break;
> > > +	case RR_Rotate_90:
> > > +		for (i = 0; i < h; i++) {
> > > +			for (j = 0; j < w; j++) {
> > > +				dw = dst_dw[i * w + j];
> > > +				dst_dw[(w - j - 1) * h + i] = dw;
> > > +			}
> > > +		}
> > > +		break;
> > > +	case RR_Rotate_180:
> > > +		for (i = 0; i < h; i++) {
> > > +			for (j = 0; j < w; j++) {
> > > +				dw = dst_dw[i * w + j];
> > > +				dst_dw[(h - i - 1) * w + w - j -
> > > 1] = dw;
> > > +			}
> > > +		}
> > > +		break;
> > > +	case RR_Rotate_270:
> > > +		for (i = 0; i < h; i++) {
> > > +			for (j = 0; j < w; j++) {
> > > +				dw = dst_dw[i * w + j];;
> > > +				dst_dw[(w - j - 1) * h + i] = dw;
> > > +			}
> > > +		}
> > > +		break;
> > > +	}
> > > +}
> > > +
> > >  bool
> > >  sna_video_copy_data(struct sna_video *video,
> > >  		    struct sna_video_frame *frame,
> > > @@ -604,7 +689,7 @@ sna_video_copy_data(struct sna_video *video,
> > >  	assert(frame->size);
> > >  
> > >  	/* In the common case, we can simply the upload in a
> > > single pwrite */
> > > -	if (frame->rotation == RR_Rotate_0 && !video->tiled) {
> > > +	if (frame->rotation == RR_Rotate_0 && !video->tiled &&
> > > !is_ayuv_fourcc(frame->id)) {
> > >  		DBG(("%s: unrotated, untiled fast paths: is-
> > > planar?=%d\n",
> > >  		     __FUNCTION__, is_planar_fourcc(frame->id)));
> > >  		if (is_nv12_fourcc(frame->id)) {
> > > @@ -709,6 +794,8 @@ use_gtt: /* copy data, must use GTT so that we
> > > keep the overlay uncached */
> > >  		sna_copy_nv12_data(video, frame, buf, dst);
> > >  	else if (is_planar_fourcc(frame->id))
> > >  		sna_copy_planar_data(video, frame, buf, dst);
> > > +	else if (is_ayuv_fourcc(frame->id))
> > > +		sna_copy_ayuv_data(video, frame, buf, dst, true);
> > >  	else
> > >  		sna_copy_packed_data(video, frame, buf, dst);
> > >  
> > > diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h
> > > index bbd3f0fd..a3ffdc0b 100644
> > > --- a/src/sna/sna_video.h
> > > +++ b/src/sna/sna_video.h
> > > @@ -39,6 +39,7 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> > >  #define FOURCC_RGB565 ((16 << 24) + ('B' << 16) + ('G' << 8) +
> > > 'R')
> > >  #define FOURCC_RGB888 ((24 << 24) + ('B' << 16) + ('G' << 8) +
> > > 'R')
> > >  #define FOURCC_NV12 (('2' << 24) + ('1' << 16) + ('V' << 8) + 'N')
> > > +#define FOURCC_AYUV (('V' << 24) + ('U' << 16) + ('Y' << 8) + 'A')
> > >  
> > >  /*
> > >   * Below, a dummy picture type that is used in XvPutImage
> > > @@ -79,6 +80,15 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> > >  	XvTopToBottom \
> > >  }
> > >  
> > > +#define XVIMAGE_AYUV { \
> > > +	FOURCC_AYUV, XvYUV, LSBFirst, \
> > > +	{'A', 'Y', 'U', 'V', 0x00, 0x00, 0x00, 0x10, 0x80, 0x00,
> > > 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71}, \
> > > +	32, XvPacked, 1, 0, 0, 0, 0, 8, 8, 8, 1, 1, 1, 1, 1, 1, \
> > > +	{'A', 'Y', 'U', 'V', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> > > 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
> > > +	XvTopToBottom \
> > > +}
> > > +
> > > +
> > >  struct sna_video {
> > >  	struct sna *sna;
> > >  
> > > @@ -189,6 +199,16 @@ static inline int is_nv12_fourcc(int id)
> > >  	}
> > >  }
> > >  
> > > +static inline int is_ayuv_fourcc(int id)
> > > +{
> > > +	switch (id) {
> > > +	case FOURCC_AYUV:
> > > +		return 1;
> > > +	default:
> > > +		return 0;
> > > +	}
> > > +}
> > > +
> > >  bool
> > >  sna_video_clip_helper(struct sna_video *video,
> > >  		      struct sna_video_frame *frame,
> > > diff --git a/src/sna/sna_video_sprite.c
> > > b/src/sna/sna_video_sprite.c
> > > index 8b7ae8ae..3780dc0e 100644
> > > --- a/src/sna/sna_video_sprite.c
> > > +++ b/src/sna/sna_video_sprite.c
> > > @@ -47,7 +47,7 @@
> > >  #define DRM_FORMAT_YUYV         fourcc_code('Y', 'U', 'Y', 'V') /*
> > > [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
> > >  #define DRM_FORMAT_UYVY         fourcc_code('U', 'Y', 'V', 'Y') /*
> > > [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */
> > >  #define DRM_FORMAT_NV12         fourcc_code('N', 'V', '1', '2') /*
> > > 2x2 subsampled Cr:Cb plane */
> > > -
> > > +#define DRM_FORMAT_XYUV8888     fourcc_code('X', 'Y', 'U', 'V') /*
> > > [31:0] x:Y:U:V 8:8:8:8 little endian */
> > >  #define has_hw_scaling(sna, video) ((sna)->kgem.gen < 071 || \
> > >  				    (sna)->kgem.gen >= 0110)
> > >  
> > > @@ -79,6 +79,8 @@ static const XvImageRec images_rgb565[] = {
> > > XVIMAGE_YUY2, XVIMAGE_UYVY,
> > >  					    XVMC_RGB888,
> > > XVMC_RGB565 };
> > >  static const XvImageRec images_nv12[] = { XVIMAGE_YUY2,
> > > XVIMAGE_UYVY,
> > >  					  XVIMAGE_NV12,
> > > XVMC_RGB888, XVMC_RGB565 };
> > > +static const XvImageRec images_ayuv[] = { XVIMAGE_AYUV,
> > > XVIMAGE_YUY2, XVIMAGE_UYVY,
> > > +					  XVIMAGE_NV12,
> > > XVMC_RGB888, XVMC_RGB565 };
> > >  static const XvAttributeRec attribs[] = {
> > >  	{ XvSettable | XvGettable, 0, 1, (char *)"XV_COLORSPACE"
> > > }, /* BT.601, BT.709 */
> > >  	{ XvSettable | XvGettable, 0, 0xffffff, (char
> > > *)"XV_COLORKEY" },
> > > @@ -364,6 +366,10 @@ sna_video_sprite_show(struct sna *sna,
> > >  		case FOURCC_UYVY:
> > >  			f.pixel_format = DRM_FORMAT_UYVY;
> > >  			break;
> > > +		case FOURCC_AYUV:
> > > +			/* i915 doesn't support alpha, so we use
> > > XYUV */
> > > +			f.pixel_format = DRM_FORMAT_XYUV8888;
> > > +			break;
> > >  		case FOURCC_YUY2:
> > >  		default:
> > >  			f.pixel_format = DRM_FORMAT_YUYV;
> > > @@ -705,7 +711,12 @@ static int
> > > sna_video_sprite_query(ddQueryImageAttributes_ARGS)
> > >  		tmp *= (*h >> 1);
> > >  		size += tmp;
> > >  		break;
> > > -
> > > +	case FOURCC_AYUV:
> > > +		tmp = *w << 2;
> > > +		if (pitches)
> > > +			pitches[0] = tmp;
> > > +		size = *h * tmp;
> > > +		break;
> > >  	default:
> > >  		*w = (*w + 1) & ~1;
> > >  		*h = (*h + 1) & ~1;
> > > @@ -805,7 +816,10 @@ void sna_video_sprite_setup(struct sna *sna,
> > > ScreenPtr screen)
> > >  	adaptor->nAttributes = ARRAY_SIZE(attribs);
> > >  	adaptor->pAttributes = (XvAttributeRec *)attribs;
> > >  
> > > -	if (sna_has_sprite_format(sna, DRM_FORMAT_NV12)) {
> > > +	if (sna_has_sprite_format(sna, DRM_FORMAT_XYUV8888)) {
> > > +		adaptor->pImages = (XvImageRec *)images_ayuv;
> > > +		adaptor->nImages = ARRAY_SIZE(images_ayuv);
> > > +	} else if (sna_has_sprite_format(sna, DRM_FORMAT_NV12)) {
> > >  		adaptor->pImages = (XvImageRec *)images_nv12;
> > >  		adaptor->nImages = ARRAY_SIZE(images_nv12);
> > >  	} else if (sna_has_sprite_format(sna, DRM_FORMAT_RGB565))
> > > {
> > > diff --git a/src/sna/sna_video_textured.c
> > > b/src/sna/sna_video_textured.c
> > > index a784fe2e..46c213ef 100644
> > > --- a/src/sna/sna_video_textured.c
> > > +++ b/src/sna/sna_video_textured.c
> > > @@ -68,6 +68,7 @@ static const XvImageRec gen4_Images[] = {
> > >  	XVIMAGE_I420,
> > >  	XVIMAGE_NV12,
> > >  	XVIMAGE_UYVY,
> > > +	XVIMAGE_AYUV,
> > >  	XVMC_YUV,
> > >  };
> > >  
> > > @@ -337,6 +338,12 @@
> > > sna_video_textured_query(ddQueryImageAttributes_ARGS)
> > >  			pitches[0] = size;
> > >  		size *= *h;
> > >  		break;
> > > +	case FOURCC_AYUV:
> > > +		size = *w << 2;
> > > +		if (pitches)
> > > +			pitches[0] = size;
> > > +		size *= *h;
> > > +		break;
> > >  	case FOURCC_XVMC:
> > >  		*h = (*h + 1) & ~1;
> > >  		size = sizeof(uint32_t);
> > > -- 
> > > 2.17.1
> > > 
> > > _______________________________________________
> > > Intel-gfx mailing list
> > > Intel-gfx@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > 
> > 
> -- 
> Best Regards,
> 
> Lisovskiy Stanislav
diff mbox series

Patch

diff --git a/src/render_program/Makefile.am b/src/render_program/Makefile.am
index dc58138f..e35ffa52 100644
--- a/src/render_program/Makefile.am
+++ b/src/render_program/Makefile.am
@@ -196,6 +196,7 @@  INTEL_G7B =				\
 INTEL_G8A =				\
 	exa_wm_src_affine.g8a 		\
 	exa_wm_src_sample_argb.g8a 	\
+	exa_wm_src_sample_argb_ayuv.g8a \
 	exa_wm_src_sample_nv12.g8a 	\
 	exa_wm_src_sample_planar.g8a 	\
 	exa_wm_write.g8a 		\
@@ -205,6 +206,7 @@  INTEL_G8A =				\
 
 INTEL_G8B =				\
 	exa_wm_src_affine.g8b 		\
+	exa_wm_src_sample_argb_ayuv.g8b \
 	exa_wm_src_sample_argb.g8b 	\
 	exa_wm_src_sample_nv12.g8b 	\
 	exa_wm_src_sample_planar.g8b 	\
diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8a b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
new file mode 100644
index 00000000..c0b84c2e
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
@@ -0,0 +1,76 @@ 
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang@intel.com>
+ *    Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the src surface */
+
+include(`exa_wm.g4i')
+
+undefine(`src_msg')
+undefine(`src_msg_ind')
+
+define(`src_msg',       `g65')
+define(`src_msg_ind',   `65')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load argb */
+mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
+mov (8) src_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
+
+/* src_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) src_msg_ind	/* msg reg index */
+	src_sample_base<1>UW /* readback */
+	null
+	sampler (1,0,F)	/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
+
+/*
+ * Have to change bytes order, because the only
+ * player which supports AYUV format currently is
+ * Gstreamer and it supports in bad way, even though
+ * spec says MSB:AYUV, we get the bytes opposite way.
+ * We swap bytes both for sprite and texture modes during copy.
+ * So here we get argb which then becomes 1bgr.
+ */
+mov (16) src_sample_a<1>UD src_sample_b<1>UD  { align1 };
+mov (16) src_sample_b<1>UD src_sample_g<1>UD  { align1 };
+mov (16) src_sample_g<1>UD src_sample_r<1>UD  { align1 };
+mov (16) src_sample_r<1>UD src_sample_a<1>UD  { align1 };
+mov (16) src_sample_a<1>F 1.0F;
+
+
+
+
+
+
+
+
diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8b b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
new file mode 100644
index 00000000..f3ac4959
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
@@ -0,0 +1,8 @@ 
+   { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28200208, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x21c00a48, 0x06000820, 0x0a8c0001 },
+   { 0x00800001, 0x22800208, 0x00200240, 0x00000000 },
+   { 0x00800001, 0x22400208, 0x00200200, 0x00000000 },
+   { 0x00800001, 0x22000208, 0x002001c0, 0x00000000 },
+   { 0x00800001, 0x21c00208, 0x00200280, 0x00000000 },
+   { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 },
diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c
index eb22b642..90707b1f 100644
--- a/src/sna/gen9_render.c
+++ b/src/sna/gen9_render.c
@@ -129,6 +129,20 @@  static const uint32_t ps_kernel_planar_bt709[][4] = {
 #include "exa_wm_write.g8b"
 };
 
+static const uint32_t ps_kernel_ayuv_bt601[][4] = {
+#include "exa_wm_src_affine.g8b"
+#include "exa_wm_src_sample_argb_ayuv.g8b"
+#include "exa_wm_yuv_rgb_bt601.g8b"
+#include "exa_wm_write.g8b"
+};
+
+static const uint32_t ps_kernel_ayuv_bt709[][4] = {
+#include "exa_wm_src_affine.g8b"
+#include "exa_wm_src_sample_argb_ayuv.g8b"
+#include "exa_wm_yuv_rgb_bt709.g8b"
+#include "exa_wm_write.g8b"
+};
+
 static const uint32_t ps_kernel_nv12_bt709[][4] = {
 #include "exa_wm_src_affine.g8b"
 #include "exa_wm_src_sample_nv12.g8b"
@@ -177,6 +191,8 @@  static const struct wm_kernel_info {
 	KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7),
 	KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7),
 	KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2),
+	KERNEL(VIDEO_AYUV_BT601, ps_kernel_ayuv_bt601, 2),
+	KERNEL(VIDEO_AYUV_BT709, ps_kernel_ayuv_bt709, 2),
 	KERNEL(VIDEO_RGB, ps_kernel_rgb, 2),
 #endif
 };
@@ -2552,7 +2568,6 @@  gen9_render_composite(struct sna *sna,
 							     tmp->mask.bo != NULL,
 							     tmp->has_component_alpha,
 							     tmp->is_affine);
-
 	tmp->blt   = gen9_render_composite_blt;
 	tmp->box   = gen9_render_composite_box;
 	tmp->boxes = gen9_render_composite_boxes__blt;
@@ -3853,6 +3868,8 @@  static void gen9_emit_video_state(struct sna *sna,
 			src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM;
 		else if (frame->id == FOURCC_UYVY)
 			src_surf_format[0] = SURFACEFORMAT_YCRCB_SWAPY;
+		else if (is_ayuv_fourcc(frame->id))
+			src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM;
 		else
 			src_surf_format[0] = SURFACEFORMAT_YCRCB_NORMAL;
 
@@ -3903,6 +3920,11 @@  static unsigned select_video_kernel(const struct sna_video *video,
 	case FOURCC_RGB565:
 		return GEN9_WM_KERNEL_VIDEO_RGB;
 
+	case FOURCC_AYUV:
+		return video->colorspace ?
+			GEN9_WM_KERNEL_VIDEO_AYUV_BT709 :
+			GEN9_WM_KERNEL_VIDEO_AYUV_BT601;
+
 	default:
 		return video->colorspace ?
 			GEN9_WM_KERNEL_VIDEO_PACKED_BT709 :
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index a4e5b56a..891fc905 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -617,6 +617,9 @@  enum {
 	GEN9_WM_KERNEL_VIDEO_NV12_BT709,
 	GEN9_WM_KERNEL_VIDEO_PACKED_BT709,
 
+	GEN9_WM_KERNEL_VIDEO_AYUV_BT601,
+	GEN9_WM_KERNEL_VIDEO_AYUV_BT709,
+
 	GEN9_WM_KERNEL_VIDEO_RGB,
 	GEN9_WM_KERNEL_COUNT
 };
diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
index 55405f81..d4ed8464 100644
--- a/src/sna/sna_video.c
+++ b/src/sna/sna_video.c
@@ -59,6 +59,7 @@ 
 #include "intel_options.h"
 
 #include <xf86xv.h>
+#include <byteswap.h>
 
 #ifdef SNA_XVMC
 #define _SNA_XVMC_SERVER_
@@ -281,6 +282,7 @@  sna_video_frame_set_rotation(struct sna_video *video,
 	} else {
 		switch (frame->id) {
 		case FOURCC_RGB888:
+		case FOURCC_AYUV:
 			if (rotation & (RR_Rotate_90 | RR_Rotate_270)) {
 				frame->pitch[0] = ALIGN((height << 2), align);
 				frame->size = (int)frame->pitch[0] * width;
@@ -584,6 +586,89 @@  sna_copy_packed_data(struct sna_video *video,
 	}
 }
 
+static void
+sna_copy_ayuv_data(struct sna_video *video,
+		   const struct sna_video_frame *frame,
+		   const uint8_t *buf,
+		   uint8_t *dst,
+		   bool reverse_bytes)
+{
+	int pitch = frame->width << 2;
+	const uint8_t *src, *s;
+	const uint32_t *src_dw;
+	uint32_t *dst_dw = (uint32_t *)dst;
+	int x, y, w, h;
+	int i, j;
+	uint32_t dw;
+
+	if (video->textured) {
+		/* XXX support copying cropped extents */
+		x = y = 0;
+		w = frame->width;
+		h = frame->height;
+	} else {
+		x = frame->image.x1;
+		y = frame->image.y1;
+		w = frame->image.x2 - frame->image.x1;
+		h = frame->image.y2 - frame->image.y1;
+	}
+
+	src = buf + (y * pitch) + (x << 2);
+	src_dw = (uint32_t *)src;
+
+	if (reverse_bytes) {
+		/*
+		 * Have to reverse bytes order, because the only
+		 * player which supports AYUV format currently is
+		 * Gstreamer and it supports in bad way, even though
+		 * spec says MSB:AYUV, we get the bytes opposite way.
+		 */
+		for (i = 0; i < h; i++) {
+			for (j = 0; j < w; j++) {
+				uint32_t reverse_dw;
+				dw = src_dw[i * w + j];
+				reverse_dw = bswap_32(dw);
+				dst_dw[i * w + j] = reverse_dw;
+			}
+		}
+	}
+
+	switch (frame->rotation) {
+	case RR_Rotate_0:
+		for (i = 0; i < h; i++) {
+			for (j = 0; j < w; j++) {
+				dw = dst_dw[i * w + j];
+				dst_dw[i * w + j] = dw;
+			}
+		}
+		break;
+	case RR_Rotate_90:
+		for (i = 0; i < h; i++) {
+			for (j = 0; j < w; j++) {
+				dw = dst_dw[i * w + j];
+				dst_dw[(w - j - 1) * h + i] = dw;
+			}
+		}
+		break;
+	case RR_Rotate_180:
+		for (i = 0; i < h; i++) {
+			for (j = 0; j < w; j++) {
+				dw = dst_dw[i * w + j];
+				dst_dw[(h - i - 1) * w + w - j - 1] = dw;
+			}
+		}
+		break;
+	case RR_Rotate_270:
+		for (i = 0; i < h; i++) {
+			for (j = 0; j < w; j++) {
+				dw = dst_dw[i * w + j];;
+				dst_dw[(w - j - 1) * h + i] = dw;
+			}
+		}
+		break;
+	}
+}
+
 bool
 sna_video_copy_data(struct sna_video *video,
 		    struct sna_video_frame *frame,
@@ -604,7 +689,7 @@  sna_video_copy_data(struct sna_video *video,
 	assert(frame->size);
 
 	/* In the common case, we can simply the upload in a single pwrite */
-	if (frame->rotation == RR_Rotate_0 && !video->tiled) {
+	if (frame->rotation == RR_Rotate_0 && !video->tiled && !is_ayuv_fourcc(frame->id)) {
 		DBG(("%s: unrotated, untiled fast paths: is-planar?=%d\n",
 		     __FUNCTION__, is_planar_fourcc(frame->id)));
 		if (is_nv12_fourcc(frame->id)) {
@@ -709,6 +794,8 @@  use_gtt: /* copy data, must use GTT so that we keep the overlay uncached */
 		sna_copy_nv12_data(video, frame, buf, dst);
 	else if (is_planar_fourcc(frame->id))
 		sna_copy_planar_data(video, frame, buf, dst);
+	else if (is_ayuv_fourcc(frame->id))
+		sna_copy_ayuv_data(video, frame, buf, dst, true);
 	else
 		sna_copy_packed_data(video, frame, buf, dst);
 
diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h
index bbd3f0fd..a3ffdc0b 100644
--- a/src/sna/sna_video.h
+++ b/src/sna/sna_video.h
@@ -39,6 +39,7 @@  THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define FOURCC_RGB565 ((16 << 24) + ('B' << 16) + ('G' << 8) + 'R')
 #define FOURCC_RGB888 ((24 << 24) + ('B' << 16) + ('G' << 8) + 'R')
 #define FOURCC_NV12 (('2' << 24) + ('1' << 16) + ('V' << 8) + 'N')
+#define FOURCC_AYUV (('V' << 24) + ('U' << 16) + ('Y' << 8) + 'A')
 
 /*
  * Below, a dummy picture type that is used in XvPutImage
@@ -79,6 +80,15 @@  THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 	XvTopToBottom \
 }
 
+#define XVIMAGE_AYUV { \
+	FOURCC_AYUV, XvYUV, LSBFirst, \
+	{'A', 'Y', 'U', 'V', 0x00, 0x00, 0x00, 0x10, 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71}, \
+	32, XvPacked, 1, 0, 0, 0, 0, 8, 8, 8, 1, 1, 1, 1, 1, 1, \
+	{'A', 'Y', 'U', 'V', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
+	XvTopToBottom \
+}
+
+
 struct sna_video {
 	struct sna *sna;
 
@@ -189,6 +199,16 @@  static inline int is_nv12_fourcc(int id)
 	}
 }
 
+static inline int is_ayuv_fourcc(int id)
+{
+	switch (id) {
+	case FOURCC_AYUV:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
 bool
 sna_video_clip_helper(struct sna_video *video,
 		      struct sna_video_frame *frame,
diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c
index 8b7ae8ae..3780dc0e 100644
--- a/src/sna/sna_video_sprite.c
+++ b/src/sna/sna_video_sprite.c
@@ -47,7 +47,7 @@ 
 #define DRM_FORMAT_YUYV         fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
 #define DRM_FORMAT_UYVY         fourcc_code('U', 'Y', 'V', 'Y') /* [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */
 #define DRM_FORMAT_NV12         fourcc_code('N', 'V', '1', '2') /* 2x2 subsampled Cr:Cb plane */
-
+#define DRM_FORMAT_XYUV8888     fourcc_code('X', 'Y', 'U', 'V') /* [31:0] x:Y:U:V 8:8:8:8 little endian */
 #define has_hw_scaling(sna, video) ((sna)->kgem.gen < 071 || \
 				    (sna)->kgem.gen >= 0110)
 
@@ -79,6 +79,8 @@  static const XvImageRec images_rgb565[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
 					    XVMC_RGB888, XVMC_RGB565 };
 static const XvImageRec images_nv12[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
 					  XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565 };
+static const XvImageRec images_ayuv[] = { XVIMAGE_AYUV, XVIMAGE_YUY2, XVIMAGE_UYVY,
+					  XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565 };
 static const XvAttributeRec attribs[] = {
 	{ XvSettable | XvGettable, 0, 1, (char *)"XV_COLORSPACE" }, /* BT.601, BT.709 */
 	{ XvSettable | XvGettable, 0, 0xffffff, (char *)"XV_COLORKEY" },
@@ -364,6 +366,10 @@  sna_video_sprite_show(struct sna *sna,
 		case FOURCC_UYVY:
 			f.pixel_format = DRM_FORMAT_UYVY;
 			break;
+		case FOURCC_AYUV:
+			/* i915 doesn't support alpha, so we use XYUV */
+			f.pixel_format = DRM_FORMAT_XYUV8888;
+			break;
 		case FOURCC_YUY2:
 		default:
 			f.pixel_format = DRM_FORMAT_YUYV;
@@ -705,7 +711,12 @@  static int sna_video_sprite_query(ddQueryImageAttributes_ARGS)
 		tmp *= (*h >> 1);
 		size += tmp;
 		break;
-
+	case FOURCC_AYUV:
+		tmp = *w << 2;
+		if (pitches)
+			pitches[0] = tmp;
+		size = *h * tmp;
+		break;
 	default:
 		*w = (*w + 1) & ~1;
 		*h = (*h + 1) & ~1;
@@ -805,7 +816,10 @@  void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen)
 	adaptor->nAttributes = ARRAY_SIZE(attribs);
 	adaptor->pAttributes = (XvAttributeRec *)attribs;
 
-	if (sna_has_sprite_format(sna, DRM_FORMAT_NV12)) {
+	if (sna_has_sprite_format(sna, DRM_FORMAT_XYUV8888)) {
+		adaptor->pImages = (XvImageRec *)images_ayuv;
+		adaptor->nImages = ARRAY_SIZE(images_ayuv);
+	} else if (sna_has_sprite_format(sna, DRM_FORMAT_NV12)) {
 		adaptor->pImages = (XvImageRec *)images_nv12;
 		adaptor->nImages = ARRAY_SIZE(images_nv12);
 	} else if (sna_has_sprite_format(sna, DRM_FORMAT_RGB565)) {
diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
index a784fe2e..46c213ef 100644
--- a/src/sna/sna_video_textured.c
+++ b/src/sna/sna_video_textured.c
@@ -68,6 +68,7 @@  static const XvImageRec gen4_Images[] = {
 	XVIMAGE_I420,
 	XVIMAGE_NV12,
 	XVIMAGE_UYVY,
+	XVIMAGE_AYUV,
 	XVMC_YUV,
 };
 
@@ -337,6 +338,12 @@  sna_video_textured_query(ddQueryImageAttributes_ARGS)
 			pitches[0] = size;
 		size *= *h;
 		break;
+	case FOURCC_AYUV:
+		size = *w << 2;
+		if (pitches)
+			pitches[0] = size;
+		size *= *h;
+		break;
 	case FOURCC_XVMC:
 		*h = (*h + 1) & ~1;
 		size = sizeof(uint32_t);