diff mbox series

[xf86-video-intel,v3,2/2] sna: Added AYUV format support for textured and sprite video adapters.

Message ID 20181009133017.23120-3-stanislav.lisovskiy@intel.com (mailing list archive)
State New, archived
Headers show
Series Added AYUV format support | expand

Commit Message

Lisovskiy, Stanislav Oct. 9, 2018, 1:30 p.m. UTC
v2: Renamed DRM_FORMAT_XYUV to DRM_FORMAT_XYUV8888.
    Added comment about AYUV byte ordering in Gstreamer.

v3: Removed sna_composite_op flags related change to the separate patch.

Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
---
 src/render_program/Makefile.am                |   2 +
 .../exa_wm_src_sample_argb_ayuv.g8a           |  60 +++++++
 .../exa_wm_src_sample_argb_ayuv.g8b           |   6 +
 src/sna/gen9_render.c                         |  27 ++++
 src/sna/sna_render.h                          |   4 +
 src/sna/sna_video.c                           | 147 ++++++++++++++++++
 src/sna/sna_video.h                           |  20 +++
 src/sna/sna_video_sprite.c                    |  19 ++-
 src/sna/sna_video_textured.c                  |   8 +
 9 files changed, 288 insertions(+), 5 deletions(-)
 create mode 100644 src/render_program/exa_wm_src_sample_argb_ayuv.g8a
 create mode 100644 src/render_program/exa_wm_src_sample_argb_ayuv.g8b

Comments

Ville Syrjälä Oct. 10, 2018, 7:47 p.m. UTC | #1
On Tue, Oct 09, 2018 at 04:30:17PM +0300, Stanislav Lisovskiy wrote:
> v2: Renamed DRM_FORMAT_XYUV to DRM_FORMAT_XYUV8888.
>     Added comment about AYUV byte ordering in Gstreamer.
> 
> v3: Removed sna_composite_op flags related change to the separate patch.
> 
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
> ---
>  src/render_program/Makefile.am                |   2 +
>  .../exa_wm_src_sample_argb_ayuv.g8a           |  60 +++++++
>  .../exa_wm_src_sample_argb_ayuv.g8b           |   6 +
>  src/sna/gen9_render.c                         |  27 ++++
>  src/sna/sna_render.h                          |   4 +
>  src/sna/sna_video.c                           | 147 ++++++++++++++++++
>  src/sna/sna_video.h                           |  20 +++
>  src/sna/sna_video_sprite.c                    |  19 ++-
>  src/sna/sna_video_textured.c                  |   8 +
>  9 files changed, 288 insertions(+), 5 deletions(-)
>  create mode 100644 src/render_program/exa_wm_src_sample_argb_ayuv.g8a
>  create mode 100644 src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> 
> diff --git a/src/render_program/Makefile.am b/src/render_program/Makefile.am
> index dc58138f..e35ffa52 100644
> --- a/src/render_program/Makefile.am
> +++ b/src/render_program/Makefile.am
> @@ -196,6 +196,7 @@ INTEL_G7B =				\
>  INTEL_G8A =				\
>  	exa_wm_src_affine.g8a 		\
>  	exa_wm_src_sample_argb.g8a 	\
> +	exa_wm_src_sample_argb_ayuv.g8a \
>  	exa_wm_src_sample_nv12.g8a 	\
>  	exa_wm_src_sample_planar.g8a 	\
>  	exa_wm_write.g8a 		\
> @@ -205,6 +206,7 @@ INTEL_G8A =				\
>  
>  INTEL_G8B =				\
>  	exa_wm_src_affine.g8b 		\
> +	exa_wm_src_sample_argb_ayuv.g8b \
>  	exa_wm_src_sample_argb.g8b 	\
>  	exa_wm_src_sample_nv12.g8b 	\
>  	exa_wm_src_sample_planar.g8b 	\
> diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8a b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> new file mode 100644
> index 00000000..d79840ac
> --- /dev/null
> +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> @@ -0,0 +1,60 @@
> +/*
> + * Copyright © 2006 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Wang Zhenyu <zhenyu.z.wang@intel.com>
> + *    Keith Packard <keithp@keithp.com>
> + */
> +
> +/* Sample the src surface */
> +
> +include(`exa_wm.g4i')
> +
> +undefine(`src_msg')
> +undefine(`src_msg_ind')
> +
> +define(`src_msg',       `g65')
> +define(`src_msg_ind',   `65')
> +
> +/* prepare sampler read back gX register, which would be written back to output */
> +
> +/* use simd16 sampler, param 0 is u, param 1 is v. */
> +/* 'payload' loading, assuming tex coord start from g4 */
> +
> +/* load argb */
> +mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
> +mov (8) src_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
> +
> +/* src_msg will be copied with g0, as it contains send desc */
> +/* emit sampler 'send' cmd */
> +send (16) src_msg_ind	/* msg reg index */
> +	src_sample_base<1>UW /* readback */
> +	null
> +	sampler (1,0,F)	/* sampler message description, (binding_table,sampler_index,datatype)
> +				/* here(src->dst) we should use src_sampler and src_surface */
> +	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
> +
> +/* Put CbCr into right places */

I guess this could use a comment that we follow the nonsense gstreamer
byte order here rather than the proper AYUV byte order. Or maybe it
would make more sense to just byte swap during the copy anyway.

> +mov (16) src_sample_b<1>UD src_sample_r<1>UD  { align1 };
> +mov (16) src_sample_r<1>UD src_sample_a<1>UD  { align1 };
> +mov (16) src_sample_a<1>F 1.0F;
> +
> diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8b b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> new file mode 100644
> index 00000000..4f439141
> --- /dev/null
> +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> @@ -0,0 +1,6 @@
> +   { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 },
> +   { 0x00600001, 0x28200208, 0x008d0000, 0x00000000 },
> +   { 0x02800031, 0x21c00a48, 0x06000820, 0x0a8c0001 },
> +   { 0x00800001, 0x22400208, 0x002001c0, 0x00000000 },
> +   { 0x00800001, 0x21c00208, 0x00200280, 0x00000000 },
> +   { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 },
> diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c
> index eb22b642..29447d25 100644
> --- a/src/sna/gen9_render.c
> +++ b/src/sna/gen9_render.c
> @@ -129,6 +129,20 @@ static const uint32_t ps_kernel_planar_bt709[][4] = {
>  #include "exa_wm_write.g8b"
>  };
>  
> +static const uint32_t ps_kernel_packed_ayuv_bt601[][4] = {
> +#include "exa_wm_src_affine.g8b"
> +#include "exa_wm_src_sample_argb_ayuv.g8b"
> +#include "exa_wm_yuv_rgb_bt601.g8b"
> +#include "exa_wm_write.g8b"
> +};
> +
> +static const uint32_t ps_kernel_packed_ayuv_bt709[][4] = {
> +#include "exa_wm_src_affine.g8b"
> +#include "exa_wm_src_sample_argb_ayuv.g8b"
> +#include "exa_wm_yuv_rgb_bt709.g8b"
> +#include "exa_wm_write.g8b"
> +};
> +
>  static const uint32_t ps_kernel_nv12_bt709[][4] = {
>  #include "exa_wm_src_affine.g8b"
>  #include "exa_wm_src_sample_nv12.g8b"
> @@ -177,6 +191,8 @@ static const struct wm_kernel_info {
>  	KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7),
>  	KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7),
>  	KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2),
> +	KERNEL(VIDEO_PACKED_AYUV_BT601, ps_kernel_packed_ayuv_bt601, 2),
> +	KERNEL(VIDEO_PACKED_AYUV_BT709, ps_kernel_packed_ayuv_bt709, 2),

Maybe just VIDEO_AYUV_BT*. 

>  	KERNEL(VIDEO_RGB, ps_kernel_rgb, 2),
>  #endif
>  };
> @@ -2553,6 +2569,10 @@ gen9_render_composite(struct sna *sna,
>  							     tmp->has_component_alpha,
>  							     tmp->is_affine);
>  
> +        tmp->gen9_kernel = gen9_choose_composite_kernel(tmp->op,
> +							    tmp->mask.bo != NULL,
> +							    tmp->has_component_alpha,
> +							    tmp->is_affine);

What's this?

>  	tmp->blt   = gen9_render_composite_blt;
>  	tmp->box   = gen9_render_composite_box;
>  	tmp->boxes = gen9_render_composite_boxes__blt;
> @@ -2784,6 +2804,7 @@ gen9_render_composite_spans(struct sna *sna,
>  	tmp->base.u.gen9.wm_kernel =
>  		GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine;
>  
> +	tmp->base.gen9_kernel = GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine;

And this?

>  	tmp->box   = gen9_render_composite_spans_box;
>  	tmp->boxes = gen9_render_composite_spans_boxes;
>  	if (tmp->emit_boxes)
> @@ -3853,6 +3874,8 @@ static void gen9_emit_video_state(struct sna *sna,
>  			src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM;
>  		else if (frame->id == FOURCC_UYVY)
>  			src_surf_format[0] = SURFACEFORMAT_YCRCB_SWAPY;
> +		else if (is_ayuv_fourcc(frame->id))

Just frame->id == FOURCC_AYUV?

> +			src_surf_format[0] = SURFACEFORMAT_B8G8R8A8_UNORM;
>  		else
>  			src_surf_format[0] = SURFACEFORMAT_YCRCB_NORMAL;
>  
> @@ -3903,6 +3926,9 @@ static unsigned select_video_kernel(const struct sna_video *video,
>  	case FOURCC_RGB565:
>  		return GEN9_WM_KERNEL_VIDEO_RGB;
>  
> +	case FOURCC_AYUV:
> +		return GEN9_WM_KERNEL_VIDEO_PACKED_AYUV_BT601;

Missing the colorspace handling.

> +
>  	default:
>  		return video->colorspace ?
>  			GEN9_WM_KERNEL_VIDEO_PACKED_BT709 :
> @@ -4080,6 +4106,7 @@ static void gen9_render_fini(struct sna *sna)
>  	kgem_bo_destroy(&sna->kgem, sna->render_state.gen9.general_bo);
>  }
>  
> +

stray whitespace

>  static bool gen9_render_setup(struct sna *sna)
>  {
>  	struct gen9_render_state *state = &sna->render_state.gen9;
> diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
> index a4e5b56a..7ae9a0b0 100644
> --- a/src/sna/sna_render.h
> +++ b/src/sna/sna_render.h
> @@ -154,6 +154,7 @@ struct sna_composite_op {
>  			uint8_t wm_kernel;
>  		} gen9;
>  	} u;
> +	unsigned long gen9_kernel;

?

>  
>  	void *priv;
>  };
> @@ -617,6 +618,9 @@ enum {
>  	GEN9_WM_KERNEL_VIDEO_NV12_BT709,
>  	GEN9_WM_KERNEL_VIDEO_PACKED_BT709,
>  
> +	GEN9_WM_KERNEL_VIDEO_PACKED_AYUV_BT601,
> +	GEN9_WM_KERNEL_VIDEO_PACKED_AYUV_BT709,
> +
>  	GEN9_WM_KERNEL_VIDEO_RGB,
>  	GEN9_WM_KERNEL_COUNT
>  };
> diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
> index 55405f81..b3c2bdc6 100644
> --- a/src/sna/sna_video.c
> +++ b/src/sna/sna_video.c
> @@ -281,6 +281,7 @@ sna_video_frame_set_rotation(struct sna_video *video,
>  	} else {
>  		switch (frame->id) {
>  		case FOURCC_RGB888:
> +		case FOURCC_AYUV:
>  			if (rotation & (RR_Rotate_90 | RR_Rotate_270)) {
>  				frame->pitch[0] = ALIGN((height << 2), align);
>  				frame->size = (int)frame->pitch[0] * width;
> @@ -584,6 +585,149 @@ sna_copy_packed_data(struct sna_video *video,
>  	}
>  }
>  
> +static void
> +sna_copy_packed_data_ayuv(struct sna_video *video,

Maybe sna_copy_ayuv_data() ?

> +		     const struct sna_video_frame *frame,
> +		     const uint8_t *buf,
> +		     uint8_t *dst)
> +{
> +	int pitch = frame->width << 2;
> +	const uint8_t *src, *s;
> +	int x, y, w, h;
> +	int i, j;
> +
> +	if (video->textured) {
> +		/* XXX support copying cropped extents */
> +		x = y = 0;
> +		w = frame->width;
> +		h = frame->height;
> +	} else {
> +		x = frame->image.x1;
> +		y = frame->image.y1;
> +		w = frame->image.x2 - frame->image.x1;
> +		h = frame->image.y2 - frame->image.y1;
> +	}
> +
> +	src = buf + (y * pitch) + (x << 2);
> +	switch (frame->rotation) {
> +	case RR_Rotate_0:
> +		w <<= 2;
> +		for (i = 0; i < h; i++) {
> +			for (j = 0; j < w; j += 4) {
> +				uint32_t reverse_dw, dw = *((uint32_t*)(&src[i * frame->pitch[0] + j]));
> +				if (!video->textured) {

Maybe just byteswap always to make things work the same way for both
textured and sprites?

bswap_32() or something?

> +					/*
> +					 * For textured we do byte reversing in shader.
> +					 * Have to reverse bytes order, because the only
> +					 * player which supports AYUV format currently is
> +					 * Gstreamer and it supports in bad way, even though
> +					 * spec says MSB:AYUV, we get the bytes opposite way.
> +					 */
> +					reverse_dw = 0;
> +					reverse_dw |= ((dw & 0x000000ff) << 24);
> +					reverse_dw |= ((dw & 0x0000ff00) << 8);
> +					reverse_dw |= ((dw & 0x00ff0000) >> 8);
> +					reverse_dw |= (dw >> 24);
> +				}
> +				else
> +					reverse_dw = dw;
> +				*((uint32_t*)&dst[i * frame->pitch[0] + j]) = reverse_dw;
> +			}
> +		}
> +		break;
> +	case RR_Rotate_90:
> +		h <<= 2;
> +		for (i = 0; i < h; i += 4) {
> +			for (j = 0;j < w; j++) {
> +				uint32_t reverse_dw, dw;
> +				dw = 0;
> +				dw |= (src[i * frame->pitch[0] + j]);
> +				dw |= ((uint32_t)src[(i + 1) * frame->pitch[0] + j] << 8);
> +				dw |= ((uint32_t)src[(i + 2) * frame->pitch[0] + j] << 16);
> +				dw |= ((uint32_t)src[(i + 3) * frame->pitch[0] + j] << 24);

This looks dodgy.

> +				if (!video->textured) {
> +					/*
> +					 * For textured we do byte reversing in shader.
> +					 * Have to reverse bytes order, because the only
> +					 * player which supports AYUV format currently is
> +					 * Gstreamer and it supports in bad way, even though
> +					 * spec says MSB:AYUV, we get the bytes opposite way.
> +					 */
> +					reverse_dw = 0;
> +					reverse_dw |= ((dw & 0x000000ff) << 24);
> +					reverse_dw |= ((dw & 0x0000ff00) << 8);
> +					reverse_dw |= ((dw & 0x00ff0000) >> 8);
> +					reverse_dw |= (dw >> 24);
> +				}
> +				else
> +					reverse_dw = dw;
> +				*((uint32_t*)&dst[(w - j - 1) * h + i]) = reverse_dw;
> +			}
> +		}
> +		break;
> +	case RR_Rotate_180:
> +		w <<= 2;
> +		for (i = 0; i < h; i++) {
> +			for (j = 0;j < w; j += 4) {
> +				uint32_t reverse_dw, dw;
> +				dw = 0;
> +				dw |= (src[i * frame->pitch[0] + j + 3]);
> +				dw |= ((uint32_t)src[i * frame->pitch[0] + j + 2] << 8);
> +				dw |= ((uint32_t)src[i * frame->pitch[0] + j + 1] << 16);
> +				dw |= ((uint32_t)src[i * frame->pitch[0]] << 24);
> +				if (!video->textured) {
> +					/*
> +					 * For textured we do byte reversing in shader.
> +					 * Have to reverse bytes order, because the only
> +					 * player which supports AYUV format currently is
> +					 * Gstreamer and it supports in bad way, even though
> +					 * spec says MSB:AYUV, we get the bytes opposite way.
> +					 */
> +					reverse_dw = 0;
> +					reverse_dw |= ((dw & 0x000000ff) << 24);
> +					reverse_dw |= ((dw & 0x0000ff00) << 8);
> +					reverse_dw |= ((dw & 0x00ff0000) >> 8);
> +					reverse_dw |= (dw >> 24);
> +				}
> +				else
> +					reverse_dw = dw;
> +				*((uint32_t*)&dst[(h - i - 1) * w + (w - j - 4)]) = reverse_dw;
> +			}
> +		}
> +		break;
> +	case RR_Rotate_270:
> +		h <<= 2;
> +		for (i = 0; i < h; i += 4) {
> +			for (j = 0; j < w; j++) {
> +				uint32_t reverse_dw, dw;
> +				dw = 0;
> +				dw |= (src[(i + 3) * frame->pitch[0] + j]);
> +				dw |= ((uint32_t)src[(i + 2) * frame->pitch[0] + j] << 8);
> +				dw |= ((uint32_t)src[(i + 1) * frame->pitch[0] + j] << 16);
> +				dw |= ((uint32_t)src[i * frame->pitch[0] + j] << 24);
> +				if (!video->textured) {
> +					/*
> +					 * For textured we do byte reversing in shader.
> +					 * Have to reverse bytes order, because the only
> +					 * player which supports AYUV format currently is
> +					 * Gstreamer and it supports in bad way, even though
> +					 * spec says MSB:AYUV, we get the bytes opposite way.
> +					 */
> +					reverse_dw = 0;
> +					reverse_dw |= ((dw & 0x000000ff) << 24);
> +					reverse_dw |= ((dw & 0x0000ff00) << 8);
> +					reverse_dw |= ((dw & 0x00ff0000) >> 8);
> +					reverse_dw |= (dw >> 24);
> +				}
> +				else
> +					reverse_dw = dw;
> +				*((uint32_t*)&dst[j * h + (h - i - 4)]) = reverse_dw;
> +			}
> +		}
> +		break;
> +	}
> +}
> +
>  bool
>  sna_video_copy_data(struct sna_video *video,
>  		    struct sna_video_frame *frame,
> @@ -709,6 +853,9 @@ use_gtt: /* copy data, must use GTT so that we keep the overlay uncached */
>  		sna_copy_nv12_data(video, frame, buf, dst);
>  	else if (is_planar_fourcc(frame->id))
>  		sna_copy_planar_data(video, frame, buf, dst);
> +	else if (is_ayuv_fourcc(frame->id))
> +		/* Some hardcoding is done in default sna_copy_packed_data, so added a specific function */
> +		sna_copy_packed_data_ayuv(video, frame, buf, dst);
>  	else
>  		sna_copy_packed_data(video, frame, buf, dst);
>  
> diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h
> index bbd3f0fd..d18c79e5 100644
> --- a/src/sna/sna_video.h
> +++ b/src/sna/sna_video.h
> @@ -39,6 +39,7 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  #define FOURCC_RGB565 ((16 << 24) + ('B' << 16) + ('G' << 8) + 'R')
>  #define FOURCC_RGB888 ((24 << 24) + ('B' << 16) + ('G' << 8) + 'R')
>  #define FOURCC_NV12 (('2' << 24) + ('1' << 16) + ('V' << 8) + 'N')
> +#define FOURCC_AYUV (('V' << 24) + ('U' << 16) + ('Y' << 8) + 'A')
>  
>  /*
>   * Below, a dummy picture type that is used in XvPutImage
> @@ -79,6 +80,15 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>  	XvTopToBottom \
>  }
>  
> +#define XVIMAGE_AYUV { \
> +	FOURCC_AYUV, XvYUV, LSBFirst, \
> +	{'P', 'A', 'S', 'S', 'T', 'H', 'R', 'O', 'U', 'G', 'H', ' ', 'A', 'Y', 'U', 'V'}, \

That should be something more like
{'A','Y','U','V',....}

> +	32, XvPacked, 1, 24, 0xff<<16, 0xff<<8, 0xff<<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \

Comments in XvImageRec suggest that we shouldn't have depth/masks
for yuv formats, and we should instead fill the yuv related things instead.

> +	{'V', 'U', 'Y', 'X', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \

'A','Y','U','V' would match the gst byte order I guess.

> +	XvTopToBottom \
> +}
> +
> +
>  struct sna_video {
>  	struct sna *sna;
>  
> @@ -189,6 +199,16 @@ static inline int is_nv12_fourcc(int id)
>  	}
>  }
>  
> +static inline int is_ayuv_fourcc(int id)
> +{
> +	switch (id) {
> +	case FOURCC_AYUV:
> +		return 1;
> +	default:
> +		return 0;
> +	}
> +}
> +
>  bool
>  sna_video_clip_helper(struct sna_video *video,
>  		      struct sna_video_frame *frame,
> diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c
> index 8b7ae8ae..288f1d62 100644
> --- a/src/sna/sna_video_sprite.c
> +++ b/src/sna/sna_video_sprite.c
> @@ -47,7 +47,7 @@
>  #define DRM_FORMAT_YUYV         fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
>  #define DRM_FORMAT_UYVY         fourcc_code('U', 'Y', 'V', 'Y') /* [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */
>  #define DRM_FORMAT_NV12         fourcc_code('N', 'V', '1', '2') /* 2x2 subsampled Cr:Cb plane */
> -
> +#define DRM_FORMAT_XYUV8888     fourcc_code('X', 'Y', 'U', 'V') /* 2x2 subsampled Cr:Cb plane */

Bogus comment about subsampling.

>  #define has_hw_scaling(sna, video) ((sna)->kgem.gen < 071 || \
>  				    (sna)->kgem.gen >= 0110)
>  
> @@ -74,11 +74,11 @@ static Atom xvColorKey, xvAlwaysOnTop, xvSyncToVblank, xvColorspace;
>  
>  static XvFormatRec formats[] = { {15}, {16}, {24} };
>  static const XvImageRec images[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
> -				     XVMC_RGB888 };
> +				     XVMC_RGB888, XVIMAGE_AYUV };
>  static const XvImageRec images_rgb565[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
> -					    XVMC_RGB888, XVMC_RGB565 };
> +					    XVMC_RGB888, XVMC_RGB565, XVIMAGE_AYUV };
>  static const XvImageRec images_nv12[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
> -					  XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565 };
> +					  XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565, XVIMAGE_AYUV };

This will now advertize AYUV on every platform.

>  static const XvAttributeRec attribs[] = {
>  	{ XvSettable | XvGettable, 0, 1, (char *)"XV_COLORSPACE" }, /* BT.601, BT.709 */
>  	{ XvSettable | XvGettable, 0, 0xffffff, (char *)"XV_COLORKEY" },
> @@ -364,6 +364,10 @@ sna_video_sprite_show(struct sna *sna,
>  		case FOURCC_UYVY:
>  			f.pixel_format = DRM_FORMAT_UYVY;
>  			break;
> +		case FOURCC_AYUV:
> +			/* i915 doesn't support alpha, so we use XYUV */
> +			f.pixel_format = DRM_FORMAT_XYUV8888;
> +			break;
>  		case FOURCC_YUY2:
>  		default:
>  			f.pixel_format = DRM_FORMAT_YUYV;
> @@ -705,7 +709,12 @@ static int sna_video_sprite_query(ddQueryImageAttributes_ARGS)
>  		tmp *= (*h >> 1);
>  		size += tmp;
>  		break;
> -
> +	case FOURCC_AYUV:
> +		tmp = *w << 2;
> +		if (pitches)
> +			pitches[0] = tmp;
> +		size = *h * tmp;
> +		break;
>  	default:
>  		*w = (*w + 1) & ~1;
>  		*h = (*h + 1) & ~1;
> diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
> index a784fe2e..21c5f379 100644
> --- a/src/sna/sna_video_textured.c
> +++ b/src/sna/sna_video_textured.c
> @@ -68,6 +68,8 @@ static const XvImageRec gen4_Images[] = {
>  	XVIMAGE_I420,
>  	XVIMAGE_NV12,
>  	XVIMAGE_UYVY,
> +	XVIMAGE_AYUV,
> +	XVMC_RGB888,

RGB888 too?

>  	XVMC_YUV,
>  };
>  
> @@ -337,6 +339,12 @@ sna_video_textured_query(ddQueryImageAttributes_ARGS)
>  			pitches[0] = size;
>  		size *= *h;
>  		break;
> +	case FOURCC_AYUV:
> +		size = *w << 2;
> +		if (pitches)
> +			pitches[0] = size;
> +		size *= *h;
> +		break;
>  	case FOURCC_XVMC:
>  		*h = (*h + 1) & ~1;
>  		size = sizeof(uint32_t);
> -- 
> 2.17.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lisovskiy, Stanislav Oct. 11, 2018, 8:56 a.m. UTC | #2
On Wed, 2018-10-10 at 22:47 +0300, Ville Syrjälä wrote:
> What's this?
> 
> >  	tmp->blt   = gen9_render_composite_blt;
> >  	tmp->box   = gen9_render_composite_box;
> >  	tmp->boxes = gen9_render_composite_boxes__blt;
> > @@ -2784,6 +2804,7 @@ gen9_render_composite_spans(struct sna *sna,
> >  	tmp->base.u.gen9.wm_kernel =
> >  		GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine;
> >  
> > +	tmp->base.gen9_kernel = GEN9_WM_KERNEL_OPACITY | !tmp-
> > >base.is_affine;
> 
> And this?

Thanks for noticing! Those are remains of my own patch when I tried to
fix the flag(running out of kernels) issue myself, then after I figured
out that you have this patch already I applied it, however looks like
for some weird reason, I didn't remove my own duplicate changes.

> 
> >  	tmp->box   = gen9_render_composite_spans_box;
> >  	tmp->boxes = gen9_render_composite_spans_boxes;
> >  	if (tmp->emit_boxes)
> > @@ -3853,6 +3874,8 @@ static void gen9_emit_video_state(struct sna
> > *sna,
> >  			src_surf_format[0] =
> > SURFACEFORMAT_B8G8R8X8_UNORM;
> >  		else if (frame->id == FOURCC_UYVY)
> >  			src_surf_format[0] =
> > SURFACEFORMAT_YCRCB_SWAPY;
> > +		else if (is_ayuv_fourcc(frame->id))
> 
> Just frame->id == FOURCC_AYUV?
> 
> > +			src_surf_format[0] =
> > SURFACEFORMAT_B8G8R8A8_UNORM;
> >  		else
> >  			src_surf_format[0] =
> > SURFACEFORMAT_YCRCB_NORMAL;
> >  
> > @@ -3903,6 +3926,9 @@ static unsigned select_video_kernel(const
> > struct sna_video *video,
> >  	case FOURCC_RGB565:
> >  		return GEN9_WM_KERNEL_VIDEO_RGB;
> >  
> > +	case FOURCC_AYUV:
> > +		return GEN9_WM_KERNEL_VIDEO_PACKED_AYUV_BT601;
> 
> Missing the colorspace handling.
> 
> > +
> >  	default:
> >  		return video->colorspace ?
> >  			GEN9_WM_KERNEL_VIDEO_PACKED_BT709 :
> > @@ -4080,6 +4106,7 @@ static void gen9_render_fini(struct sna *sna)
> >  	kgem_bo_destroy(&sna->kgem, sna-
> > >render_state.gen9.general_bo);
> >  }
> >  
> > +
> 
> stray whitespace
> 
> >  static bool gen9_render_setup(struct sna *sna)
> >  {
> >  	struct gen9_render_state *state = &sna->render_state.gen9;
> > diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
> > index a4e5b56a..7ae9a0b0 100644
> > --- a/src/sna/sna_render.h
> > +++ b/src/sna/sna_render.h
> > @@ -154,6 +154,7 @@ struct sna_composite_op {
> >  			uint8_t wm_kernel;
> >  		} gen9;
> >  	} u;
> > +	unsigned long gen9_kernel;
> 
> ?
> 
> >  
> >  	void *priv;
> >  };
> > @@ -617,6 +618,9 @@ enum {
> >  	GEN9_WM_KERNEL_VIDEO_NV12_BT709,
> >  	GEN9_WM_KERNEL_VIDEO_PACKED_BT709,
> >  
> > +	GEN9_WM_KERNEL_VIDEO_PACKED_AYUV_BT601,
> > +	GEN9_WM_KERNEL_VIDEO_PACKED_AYUV_BT709,
> > +
> >  	GEN9_WM_KERNEL_VIDEO_RGB,
> >  	GEN9_WM_KERNEL_COUNT
> >  };
> > diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
> > index 55405f81..b3c2bdc6 100644
> > --- a/src/sna/sna_video.c
> > +++ b/src/sna/sna_video.c
> > @@ -281,6 +281,7 @@ sna_video_frame_set_rotation(struct sna_video
> > *video,
> >  	} else {
> >  		switch (frame->id) {
> >  		case FOURCC_RGB888:
> > +		case FOURCC_AYUV:
> >  			if (rotation & (RR_Rotate_90 |
> > RR_Rotate_270)) {
> >  				frame->pitch[0] = ALIGN((height <<
> > 2), align);
> >  				frame->size = (int)frame->pitch[0] 
> > * width;
> > @@ -584,6 +585,149 @@ sna_copy_packed_data(struct sna_video *video,
> >  	}
> >  }
> >  
> > +static void
> > +sna_copy_packed_data_ayuv(struct sna_video *video,
> 
> Maybe sna_copy_ayuv_data() ?
> 
> > +		     const struct sna_video_frame *frame,
> > +		     const uint8_t *buf,
> > +		     uint8_t *dst)
> > +{
> > +	int pitch = frame->width << 2;
> > +	const uint8_t *src, *s;
> > +	int x, y, w, h;
> > +	int i, j;
> > +
> > +	if (video->textured) {
> > +		/* XXX support copying cropped extents */
> > +		x = y = 0;
> > +		w = frame->width;
> > +		h = frame->height;
> > +	} else {
> > +		x = frame->image.x1;
> > +		y = frame->image.y1;
> > +		w = frame->image.x2 - frame->image.x1;
> > +		h = frame->image.y2 - frame->image.y1;
> > +	}
> > +
> > +	src = buf + (y * pitch) + (x << 2);
> > +	switch (frame->rotation) {
> > +	case RR_Rotate_0:
> > +		w <<= 2;
> > +		for (i = 0; i < h; i++) {
> > +			for (j = 0; j < w; j += 4) {
> > +				uint32_t reverse_dw, dw =
> > *((uint32_t*)(&src[i * frame->pitch[0] + j]));
> > +				if (!video->textured) {
> 
> Maybe just byteswap always to make things work the same way for both
> textured and sprites?
> 
> bswap_32() or something?

Definitely, I'll put this to separate function.

> 
> > +					/*
> > +					 * For textured we do byte
> > reversing in shader.
> > +					 * Have to reverse bytes
> > order, because the only
> > +					 * player which supports
> > AYUV format currently is
> > +					 * Gstreamer and it
> > supports in bad way, even though
> > +					 * spec says MSB:AYUV, we
> > get the bytes opposite way.
> > +					 */
> > +					reverse_dw = 0;
> > +					reverse_dw |= ((dw &
> > 0x000000ff) << 24);
> > +					reverse_dw |= ((dw &
> > 0x0000ff00) << 8);
> > +					reverse_dw |= ((dw &
> > 0x00ff0000) >> 8);
> > +					reverse_dw |= (dw >> 24);
> > +				}
> > +				else
> > +					reverse_dw = dw;
> > +				*((uint32_t*)&dst[i * frame-
> > >pitch[0] + j]) = reverse_dw;
> > +			}
> > +		}
> > +		break;
> > +	case RR_Rotate_90:
> > +		h <<= 2;
> > +		for (i = 0; i < h; i += 4) {
> > +			for (j = 0;j < w; j++) {
> > +				uint32_t reverse_dw, dw;
> > +				dw = 0;
> > +				dw |= (src[i * frame->pitch[0] +
> > j]);
> > +				dw |= ((uint32_t)src[(i + 1) *
> > frame->pitch[0] + j] << 8);
> > +				dw |= ((uint32_t)src[(i + 2) *
> > frame->pitch[0] + j] << 16);
> > +				dw |= ((uint32_t)src[(i + 3) *
> > frame->pitch[0] + j] << 24);
> 
> This looks dodgy.
> 
> > +				if (!video->textured) {
> > +					/*
> > +					 * For textured we do byte
> > reversing in shader.
> > +					 * Have to reverse bytes
> > order, because the only
> > +					 * player which supports
> > AYUV format currently is
> > +					 * Gstreamer and it
> > supports in bad way, even though
> > +					 * spec says MSB:AYUV, we
> > get the bytes opposite way.
> > +					 */
> > +					reverse_dw = 0;
> > +					reverse_dw |= ((dw &
> > 0x000000ff) << 24);
> > +					reverse_dw |= ((dw &
> > 0x0000ff00) << 8);
> > +					reverse_dw |= ((dw &
> > 0x00ff0000) >> 8);
> > +					reverse_dw |= (dw >> 24);
> > +				}
> > +				else
> > +					reverse_dw = dw;
> > +				*((uint32_t*)&dst[(w - j - 1) * h
> > + i]) = reverse_dw;
> > +			}
> > +		}
> > +		break;
> > +	case RR_Rotate_180:
> > +		w <<= 2;
> > +		for (i = 0; i < h; i++) {
> > +			for (j = 0;j < w; j += 4) {
> > +				uint32_t reverse_dw, dw;
> > +				dw = 0;
> > +				dw |= (src[i * frame->pitch[0] + j
> > + 3]);
> > +				dw |= ((uint32_t)src[i * frame-
> > >pitch[0] + j + 2] << 8);
> > +				dw |= ((uint32_t)src[i * frame-
> > >pitch[0] + j + 1] << 16);
> > +				dw |= ((uint32_t)src[i * frame-
> > >pitch[0]] << 24);
> > +				if (!video->textured) {
> > +					/*
> > +					 * For textured we do byte
> > reversing in shader.
> > +					 * Have to reverse bytes
> > order, because the only
> > +					 * player which supports
> > AYUV format currently is
> > +					 * Gstreamer and it
> > supports in bad way, even though
> > +					 * spec says MSB:AYUV, we
> > get the bytes opposite way.
> > +					 */
> > +					reverse_dw = 0;
> > +					reverse_dw |= ((dw &
> > 0x000000ff) << 24);
> > +					reverse_dw |= ((dw &
> > 0x0000ff00) << 8);
> > +					reverse_dw |= ((dw &
> > 0x00ff0000) >> 8);
> > +					reverse_dw |= (dw >> 24);
> > +				}
> > +				else
> > +					reverse_dw = dw;
> > +				*((uint32_t*)&dst[(h - i - 1) * w
> > + (w - j - 4)]) = reverse_dw;
> > +			}
> > +		}
> > +		break;
> > +	case RR_Rotate_270:
> > +		h <<= 2;
> > +		for (i = 0; i < h; i += 4) {
> > +			for (j = 0; j < w; j++) {
> > +				uint32_t reverse_dw, dw;
> > +				dw = 0;
> > +				dw |= (src[(i + 3) * frame-
> > >pitch[0] + j]);
> > +				dw |= ((uint32_t)src[(i + 2) *
> > frame->pitch[0] + j] << 8);
> > +				dw |= ((uint32_t)src[(i + 1) *
> > frame->pitch[0] + j] << 16);
> > +				dw |= ((uint32_t)src[i * frame-
> > >pitch[0] + j] << 24);
> > +				if (!video->textured) {
> > +					/*
> > +					 * For textured we do byte
> > reversing in shader.
> > +					 * Have to reverse bytes
> > order, because the only
> > +					 * player which supports
> > AYUV format currently is
> > +					 * Gstreamer and it
> > supports in bad way, even though
> > +					 * spec says MSB:AYUV, we
> > get the bytes opposite way.
> > +					 */
> > +					reverse_dw = 0;
> > +					reverse_dw |= ((dw &
> > 0x000000ff) << 24);
> > +					reverse_dw |= ((dw &
> > 0x0000ff00) << 8);
> > +					reverse_dw |= ((dw &
> > 0x00ff0000) >> 8);
> > +					reverse_dw |= (dw >> 24);
> > +				}
> > +				else
> > +					reverse_dw = dw;
> > +				*((uint32_t*)&dst[j * h + (h - i -
> > 4)]) = reverse_dw;
> > +			}
> > +		}
> > +		break;
> > +	}
> > +}
> > +
> >  bool
> >  sna_video_copy_data(struct sna_video *video,
> >  		    struct sna_video_frame *frame,
> > @@ -709,6 +853,9 @@ use_gtt: /* copy data, must use GTT so that we
> > keep the overlay uncached */
> >  		sna_copy_nv12_data(video, frame, buf, dst);
> >  	else if (is_planar_fourcc(frame->id))
> >  		sna_copy_planar_data(video, frame, buf, dst);
> > +	else if (is_ayuv_fourcc(frame->id))
> > +		/* Some hardcoding is done in default
> > sna_copy_packed_data, so added a specific function */
> > +		sna_copy_packed_data_ayuv(video, frame, buf, dst);
> >  	else
> >  		sna_copy_packed_data(video, frame, buf, dst);
> >  
> > diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h
> > index bbd3f0fd..d18c79e5 100644
> > --- a/src/sna/sna_video.h
> > +++ b/src/sna/sna_video.h
> > @@ -39,6 +39,7 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> >  #define FOURCC_RGB565 ((16 << 24) + ('B' << 16) + ('G' << 8) +
> > 'R')
> >  #define FOURCC_RGB888 ((24 << 24) + ('B' << 16) + ('G' << 8) +
> > 'R')
> >  #define FOURCC_NV12 (('2' << 24) + ('1' << 16) + ('V' << 8) + 'N')
> > +#define FOURCC_AYUV (('V' << 24) + ('U' << 16) + ('Y' << 8) + 'A')
> >  
> >  /*
> >   * Below, a dummy picture type that is used in XvPutImage
> > @@ -79,6 +80,15 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> >  	XvTopToBottom \
> >  }
> >  
> > +#define XVIMAGE_AYUV { \
> > +	FOURCC_AYUV, XvYUV, LSBFirst, \
> > +	{'P', 'A', 'S', 'S', 'T', 'H', 'R', 'O', 'U', 'G', 'H', '
> > ', 'A', 'Y', 'U', 'V'}, \
> 
> That should be something more like
> {'A','Y','U','V',....}
> 
> > +	32, XvPacked, 1, 24, 0xff<<16, 0xff<<8, 0xff<<0, 0, 0, 0,
> > 0, 0, 0, 0, 0, 0, \
> 
> Comments in XvImageRec suggest that we shouldn't have depth/masks
> for yuv formats, and we should instead fill the yuv related things
> instead.
> 
> > +	{'V', 'U', 'Y', 'X', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> > 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
> 
> 'A','Y','U','V' would match the gst byte order I guess.
> 
> > +	XvTopToBottom \
> > +}
> > +
> > +
> >  struct sna_video {
> >  	struct sna *sna;
> >  
> > @@ -189,6 +199,16 @@ static inline int is_nv12_fourcc(int id)
> >  	}
> >  }
> >  
> > +static inline int is_ayuv_fourcc(int id)
> > +{
> > +	switch (id) {
> > +	case FOURCC_AYUV:
> > +		return 1;
> > +	default:
> > +		return 0;
> > +	}
> > +}
> > +
> >  bool
> >  sna_video_clip_helper(struct sna_video *video,
> >  		      struct sna_video_frame *frame,
> > diff --git a/src/sna/sna_video_sprite.c
> > b/src/sna/sna_video_sprite.c
> > index 8b7ae8ae..288f1d62 100644
> > --- a/src/sna/sna_video_sprite.c
> > +++ b/src/sna/sna_video_sprite.c
> > @@ -47,7 +47,7 @@
> >  #define DRM_FORMAT_YUYV         fourcc_code('Y', 'U', 'Y', 'V') /*
> > [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
> >  #define DRM_FORMAT_UYVY         fourcc_code('U', 'Y', 'V', 'Y') /*
> > [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */
> >  #define DRM_FORMAT_NV12         fourcc_code('N', 'V', '1', '2') /*
> > 2x2 subsampled Cr:Cb plane */
> > -
> > +#define DRM_FORMAT_XYUV8888     fourcc_code('X', 'Y', 'U', 'V') /*
> > 2x2 subsampled Cr:Cb plane */
> 
> Bogus comment about subsampling.
> 
> >  #define has_hw_scaling(sna, video) ((sna)->kgem.gen < 071 || \
> >  				    (sna)->kgem.gen >= 0110)
> >  
> > @@ -74,11 +74,11 @@ static Atom xvColorKey, xvAlwaysOnTop,
> > xvSyncToVblank, xvColorspace;
> >  
> >  static XvFormatRec formats[] = { {15}, {16}, {24} };
> >  static const XvImageRec images[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
> > -				     XVMC_RGB888 };
> > +				     XVMC_RGB888, XVIMAGE_AYUV };
> >  static const XvImageRec images_rgb565[] = { XVIMAGE_YUY2,
> > XVIMAGE_UYVY,
> > -					    XVMC_RGB888,
> > XVMC_RGB565 };
> > +					    XVMC_RGB888,
> > XVMC_RGB565, XVIMAGE_AYUV };
> >  static const XvImageRec images_nv12[] = { XVIMAGE_YUY2,
> > XVIMAGE_UYVY,
> > -					  XVIMAGE_NV12,
> > XVMC_RGB888, XVMC_RGB565 };
> > +					  XVIMAGE_NV12,
> > XVMC_RGB888, XVMC_RGB565, XVIMAGE_AYUV };
> 
> This will now advertize AYUV on every platform.
> 
> >  static const XvAttributeRec attribs[] = {
> >  	{ XvSettable | XvGettable, 0, 1, (char *)"XV_COLORSPACE"
> > }, /* BT.601, BT.709 */
> >  	{ XvSettable | XvGettable, 0, 0xffffff, (char
> > *)"XV_COLORKEY" },
> > @@ -364,6 +364,10 @@ sna_video_sprite_show(struct sna *sna,
> >  		case FOURCC_UYVY:
> >  			f.pixel_format = DRM_FORMAT_UYVY;
> >  			break;
> > +		case FOURCC_AYUV:
> > +			/* i915 doesn't support alpha, so we use
> > XYUV */
> > +			f.pixel_format = DRM_FORMAT_XYUV8888;
> > +			break;
> >  		case FOURCC_YUY2:
> >  		default:
> >  			f.pixel_format = DRM_FORMAT_YUYV;
> > @@ -705,7 +709,12 @@ static int
> > sna_video_sprite_query(ddQueryImageAttributes_ARGS)
> >  		tmp *= (*h >> 1);
> >  		size += tmp;
> >  		break;
> > -
> > +	case FOURCC_AYUV:
> > +		tmp = *w << 2;
> > +		if (pitches)
> > +			pitches[0] = tmp;
> > +		size = *h * tmp;
> > +		break;
> >  	default:
> >  		*w = (*w + 1) & ~1;
> >  		*h = (*h + 1) & ~1;
> > diff --git a/src/sna/sna_video_textured.c
> > b/src/sna/sna_video_textured.c
> > index a784fe2e..21c5f379 100644
> > --- a/src/sna/sna_video_textured.c
> > +++ b/src/sna/sna_video_textured.c
> > @@ -68,6 +68,8 @@ static const XvImageRec gen4_Images[] = {
> >  	XVIMAGE_I420,
> >  	XVIMAGE_NV12,
> >  	XVIMAGE_UYVY,
> > +	XVIMAGE_AYUV,
> > +	XVMC_RGB888,
> 
> RGB888 too?
> 
> >  	XVMC_YUV,
> >  };
> >  
> > @@ -337,6 +339,12 @@
> > sna_video_textured_query(ddQueryImageAttributes_ARGS)
> >  			pitches[0] = size;
> >  		size *= *h;
> >  		break;
> > +	case FOURCC_AYUV:
> > +		size = *w << 2;
> > +		if (pitches)
> > +			pitches[0] = size;
> > +		size *= *h;
> > +		break;
> >  	case FOURCC_XVMC:
> >  		*h = (*h + 1) & ~1;
> >  		size = sizeof(uint32_t);
> > -- 
> > 2.17.1
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
>
diff mbox series

Patch

diff --git a/src/render_program/Makefile.am b/src/render_program/Makefile.am
index dc58138f..e35ffa52 100644
--- a/src/render_program/Makefile.am
+++ b/src/render_program/Makefile.am
@@ -196,6 +196,7 @@  INTEL_G7B =				\
 INTEL_G8A =				\
 	exa_wm_src_affine.g8a 		\
 	exa_wm_src_sample_argb.g8a 	\
+	exa_wm_src_sample_argb_ayuv.g8a \
 	exa_wm_src_sample_nv12.g8a 	\
 	exa_wm_src_sample_planar.g8a 	\
 	exa_wm_write.g8a 		\
@@ -205,6 +206,7 @@  INTEL_G8A =				\
 
 INTEL_G8B =				\
 	exa_wm_src_affine.g8b 		\
+	exa_wm_src_sample_argb_ayuv.g8b \
 	exa_wm_src_sample_argb.g8b 	\
 	exa_wm_src_sample_nv12.g8b 	\
 	exa_wm_src_sample_planar.g8b 	\
diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8a b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
new file mode 100644
index 00000000..d79840ac
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
@@ -0,0 +1,60 @@ 
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Wang Zhenyu <zhenyu.z.wang@intel.com>
+ *    Keith Packard <keithp@keithp.com>
+ */
+
+/* Sample the src surface */
+
+include(`exa_wm.g4i')
+
+undefine(`src_msg')
+undefine(`src_msg_ind')
+
+define(`src_msg',       `g65')
+define(`src_msg_ind',   `65')
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+
+/* load argb */
+mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
+mov (8) src_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
+
+/* src_msg will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) src_msg_ind	/* msg reg index */
+	src_sample_base<1>UW /* readback */
+	null
+	sampler (1,0,F)	/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
+
+/* Put CbCr into right places */
+mov (16) src_sample_b<1>UD src_sample_r<1>UD  { align1 };
+mov (16) src_sample_r<1>UD src_sample_a<1>UD  { align1 };
+mov (16) src_sample_a<1>F 1.0F;
+
diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8b b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
new file mode 100644
index 00000000..4f439141
--- /dev/null
+++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
@@ -0,0 +1,6 @@ 
+   { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28200208, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x21c00a48, 0x06000820, 0x0a8c0001 },
+   { 0x00800001, 0x22400208, 0x002001c0, 0x00000000 },
+   { 0x00800001, 0x21c00208, 0x00200280, 0x00000000 },
+   { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 },
diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c
index eb22b642..29447d25 100644
--- a/src/sna/gen9_render.c
+++ b/src/sna/gen9_render.c
@@ -129,6 +129,20 @@  static const uint32_t ps_kernel_planar_bt709[][4] = {
 #include "exa_wm_write.g8b"
 };
 
+static const uint32_t ps_kernel_packed_ayuv_bt601[][4] = {
+#include "exa_wm_src_affine.g8b"
+#include "exa_wm_src_sample_argb_ayuv.g8b"
+#include "exa_wm_yuv_rgb_bt601.g8b"
+#include "exa_wm_write.g8b"
+};
+
+static const uint32_t ps_kernel_packed_ayuv_bt709[][4] = {
+#include "exa_wm_src_affine.g8b"
+#include "exa_wm_src_sample_argb_ayuv.g8b"
+#include "exa_wm_yuv_rgb_bt709.g8b"
+#include "exa_wm_write.g8b"
+};
+
 static const uint32_t ps_kernel_nv12_bt709[][4] = {
 #include "exa_wm_src_affine.g8b"
 #include "exa_wm_src_sample_nv12.g8b"
@@ -177,6 +191,8 @@  static const struct wm_kernel_info {
 	KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7),
 	KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7),
 	KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2),
+	KERNEL(VIDEO_PACKED_AYUV_BT601, ps_kernel_packed_ayuv_bt601, 2),
+	KERNEL(VIDEO_PACKED_AYUV_BT709, ps_kernel_packed_ayuv_bt709, 2),
 	KERNEL(VIDEO_RGB, ps_kernel_rgb, 2),
 #endif
 };
@@ -2553,6 +2569,10 @@  gen9_render_composite(struct sna *sna,
 							     tmp->has_component_alpha,
 							     tmp->is_affine);
 
+        tmp->gen9_kernel = gen9_choose_composite_kernel(tmp->op,
+							    tmp->mask.bo != NULL,
+							    tmp->has_component_alpha,
+							    tmp->is_affine);
 	tmp->blt   = gen9_render_composite_blt;
 	tmp->box   = gen9_render_composite_box;
 	tmp->boxes = gen9_render_composite_boxes__blt;
@@ -2784,6 +2804,7 @@  gen9_render_composite_spans(struct sna *sna,
 	tmp->base.u.gen9.wm_kernel =
 		GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine;
 
+	tmp->base.gen9_kernel = GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine;
 	tmp->box   = gen9_render_composite_spans_box;
 	tmp->boxes = gen9_render_composite_spans_boxes;
 	if (tmp->emit_boxes)
@@ -3853,6 +3874,8 @@  static void gen9_emit_video_state(struct sna *sna,
 			src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM;
 		else if (frame->id == FOURCC_UYVY)
 			src_surf_format[0] = SURFACEFORMAT_YCRCB_SWAPY;
+		else if (is_ayuv_fourcc(frame->id))
+			src_surf_format[0] = SURFACEFORMAT_B8G8R8A8_UNORM;
 		else
 			src_surf_format[0] = SURFACEFORMAT_YCRCB_NORMAL;
 
@@ -3903,6 +3926,9 @@  static unsigned select_video_kernel(const struct sna_video *video,
 	case FOURCC_RGB565:
 		return GEN9_WM_KERNEL_VIDEO_RGB;
 
+	case FOURCC_AYUV:
+		return GEN9_WM_KERNEL_VIDEO_PACKED_AYUV_BT601;
+
 	default:
 		return video->colorspace ?
 			GEN9_WM_KERNEL_VIDEO_PACKED_BT709 :
@@ -4080,6 +4106,7 @@  static void gen9_render_fini(struct sna *sna)
 	kgem_bo_destroy(&sna->kgem, sna->render_state.gen9.general_bo);
 }
 
+
 static bool gen9_render_setup(struct sna *sna)
 {
 	struct gen9_render_state *state = &sna->render_state.gen9;
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index a4e5b56a..7ae9a0b0 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -154,6 +154,7 @@  struct sna_composite_op {
 			uint8_t wm_kernel;
 		} gen9;
 	} u;
+	unsigned long gen9_kernel;
 
 	void *priv;
 };
@@ -617,6 +618,9 @@  enum {
 	GEN9_WM_KERNEL_VIDEO_NV12_BT709,
 	GEN9_WM_KERNEL_VIDEO_PACKED_BT709,
 
+	GEN9_WM_KERNEL_VIDEO_PACKED_AYUV_BT601,
+	GEN9_WM_KERNEL_VIDEO_PACKED_AYUV_BT709,
+
 	GEN9_WM_KERNEL_VIDEO_RGB,
 	GEN9_WM_KERNEL_COUNT
 };
diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
index 55405f81..b3c2bdc6 100644
--- a/src/sna/sna_video.c
+++ b/src/sna/sna_video.c
@@ -281,6 +281,7 @@  sna_video_frame_set_rotation(struct sna_video *video,
 	} else {
 		switch (frame->id) {
 		case FOURCC_RGB888:
+		case FOURCC_AYUV:
 			if (rotation & (RR_Rotate_90 | RR_Rotate_270)) {
 				frame->pitch[0] = ALIGN((height << 2), align);
 				frame->size = (int)frame->pitch[0] * width;
@@ -584,6 +585,149 @@  sna_copy_packed_data(struct sna_video *video,
 	}
 }
 
+static void
+sna_copy_packed_data_ayuv(struct sna_video *video,
+		     const struct sna_video_frame *frame,
+		     const uint8_t *buf,
+		     uint8_t *dst)
+{
+	int pitch = frame->width << 2;
+	const uint8_t *src, *s;
+	int x, y, w, h;
+	int i, j;
+
+	if (video->textured) {
+		/* XXX support copying cropped extents */
+		x = y = 0;
+		w = frame->width;
+		h = frame->height;
+	} else {
+		x = frame->image.x1;
+		y = frame->image.y1;
+		w = frame->image.x2 - frame->image.x1;
+		h = frame->image.y2 - frame->image.y1;
+	}
+
+	src = buf + (y * pitch) + (x << 2);
+	switch (frame->rotation) {
+	case RR_Rotate_0:
+		w <<= 2;
+		for (i = 0; i < h; i++) {
+			for (j = 0; j < w; j += 4) {
+				uint32_t reverse_dw, dw = *((uint32_t*)(&src[i * frame->pitch[0] + j]));
+				if (!video->textured) {
+					/*
+					 * For textured we do byte reversing in shader.
+					 * Have to reverse bytes order, because the only
+					 * player which supports AYUV format currently is
+					 * Gstreamer and it supports in bad way, even though
+					 * spec says MSB:AYUV, we get the bytes opposite way.
+					 */
+					reverse_dw = 0;
+					reverse_dw |= ((dw & 0x000000ff) << 24);
+					reverse_dw |= ((dw & 0x0000ff00) << 8);
+					reverse_dw |= ((dw & 0x00ff0000) >> 8);
+					reverse_dw |= (dw >> 24);
+				}
+				else
+					reverse_dw = dw;
+				*((uint32_t*)&dst[i * frame->pitch[0] + j]) = reverse_dw;
+			}
+		}
+		break;
+	case RR_Rotate_90:
+		h <<= 2;
+		for (i = 0; i < h; i += 4) {
+			for (j = 0;j < w; j++) {
+				uint32_t reverse_dw, dw;
+				dw = 0;
+				dw |= (src[i * frame->pitch[0] + j]);
+				dw |= ((uint32_t)src[(i + 1) * frame->pitch[0] + j] << 8);
+				dw |= ((uint32_t)src[(i + 2) * frame->pitch[0] + j] << 16);
+				dw |= ((uint32_t)src[(i + 3) * frame->pitch[0] + j] << 24);
+				if (!video->textured) {
+					/*
+					 * For textured we do byte reversing in shader.
+					 * Have to reverse bytes order, because the only
+					 * player which supports AYUV format currently is
+					 * Gstreamer and it supports in bad way, even though
+					 * spec says MSB:AYUV, we get the bytes opposite way.
+					 */
+					reverse_dw = 0;
+					reverse_dw |= ((dw & 0x000000ff) << 24);
+					reverse_dw |= ((dw & 0x0000ff00) << 8);
+					reverse_dw |= ((dw & 0x00ff0000) >> 8);
+					reverse_dw |= (dw >> 24);
+				}
+				else
+					reverse_dw = dw;
+				*((uint32_t*)&dst[(w - j - 1) * h + i]) = reverse_dw;
+			}
+		}
+		break;
+	case RR_Rotate_180:
+		w <<= 2;
+		for (i = 0; i < h; i++) {
+			for (j = 0;j < w; j += 4) {
+				uint32_t reverse_dw, dw;
+				dw = 0;
+				dw |= (src[i * frame->pitch[0] + j + 3]);
+				dw |= ((uint32_t)src[i * frame->pitch[0] + j + 2] << 8);
+				dw |= ((uint32_t)src[i * frame->pitch[0] + j + 1] << 16);
+				dw |= ((uint32_t)src[i * frame->pitch[0]] << 24);
+				if (!video->textured) {
+					/*
+					 * For textured we do byte reversing in shader.
+					 * Have to reverse bytes order, because the only
+					 * player which supports AYUV format currently is
+					 * Gstreamer and it supports in bad way, even though
+					 * spec says MSB:AYUV, we get the bytes opposite way.
+					 */
+					reverse_dw = 0;
+					reverse_dw |= ((dw & 0x000000ff) << 24);
+					reverse_dw |= ((dw & 0x0000ff00) << 8);
+					reverse_dw |= ((dw & 0x00ff0000) >> 8);
+					reverse_dw |= (dw >> 24);
+				}
+				else
+					reverse_dw = dw;
+				*((uint32_t*)&dst[(h - i - 1) * w + (w - j - 4)]) = reverse_dw;
+			}
+		}
+		break;
+	case RR_Rotate_270:
+		h <<= 2;
+		for (i = 0; i < h; i += 4) {
+			for (j = 0; j < w; j++) {
+				uint32_t reverse_dw, dw;
+				dw = 0;
+				dw |= (src[(i + 3) * frame->pitch[0] + j]);
+				dw |= ((uint32_t)src[(i + 2) * frame->pitch[0] + j] << 8);
+				dw |= ((uint32_t)src[(i + 1) * frame->pitch[0] + j] << 16);
+				dw |= ((uint32_t)src[i * frame->pitch[0] + j] << 24);
+				if (!video->textured) {
+					/*
+					 * For textured we do byte reversing in shader.
+					 * Have to reverse bytes order, because the only
+					 * player which supports AYUV format currently is
+					 * Gstreamer and it supports in bad way, even though
+					 * spec says MSB:AYUV, we get the bytes opposite way.
+					 */
+					reverse_dw = 0;
+					reverse_dw |= ((dw & 0x000000ff) << 24);
+					reverse_dw |= ((dw & 0x0000ff00) << 8);
+					reverse_dw |= ((dw & 0x00ff0000) >> 8);
+					reverse_dw |= (dw >> 24);
+				}
+				else
+					reverse_dw = dw;
+				*((uint32_t*)&dst[j * h + (h - i - 4)]) = reverse_dw;
+			}
+		}
+		break;
+	}
+}
+
 bool
 sna_video_copy_data(struct sna_video *video,
 		    struct sna_video_frame *frame,
@@ -709,6 +853,9 @@  use_gtt: /* copy data, must use GTT so that we keep the overlay uncached */
 		sna_copy_nv12_data(video, frame, buf, dst);
 	else if (is_planar_fourcc(frame->id))
 		sna_copy_planar_data(video, frame, buf, dst);
+	else if (is_ayuv_fourcc(frame->id))
+		/* Some hardcoding is done in default sna_copy_packed_data, so added a specific function */
+		sna_copy_packed_data_ayuv(video, frame, buf, dst);
 	else
 		sna_copy_packed_data(video, frame, buf, dst);
 
diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h
index bbd3f0fd..d18c79e5 100644
--- a/src/sna/sna_video.h
+++ b/src/sna/sna_video.h
@@ -39,6 +39,7 @@  THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define FOURCC_RGB565 ((16 << 24) + ('B' << 16) + ('G' << 8) + 'R')
 #define FOURCC_RGB888 ((24 << 24) + ('B' << 16) + ('G' << 8) + 'R')
 #define FOURCC_NV12 (('2' << 24) + ('1' << 16) + ('V' << 8) + 'N')
+#define FOURCC_AYUV (('V' << 24) + ('U' << 16) + ('Y' << 8) + 'A')
 
 /*
  * Below, a dummy picture type that is used in XvPutImage
@@ -79,6 +80,15 @@  THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 	XvTopToBottom \
 }
 
+#define XVIMAGE_AYUV { \
+	FOURCC_AYUV, XvYUV, LSBFirst, \
+	{'P', 'A', 'S', 'S', 'T', 'H', 'R', 'O', 'U', 'G', 'H', ' ', 'A', 'Y', 'U', 'V'}, \
+	32, XvPacked, 1, 24, 0xff<<16, 0xff<<8, 0xff<<0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+	{'V', 'U', 'Y', 'X', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
+	XvTopToBottom \
+}
+
+
 struct sna_video {
 	struct sna *sna;
 
@@ -189,6 +199,16 @@  static inline int is_nv12_fourcc(int id)
 	}
 }
 
+static inline int is_ayuv_fourcc(int id)
+{
+	switch (id) {
+	case FOURCC_AYUV:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
 bool
 sna_video_clip_helper(struct sna_video *video,
 		      struct sna_video_frame *frame,
diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c
index 8b7ae8ae..288f1d62 100644
--- a/src/sna/sna_video_sprite.c
+++ b/src/sna/sna_video_sprite.c
@@ -47,7 +47,7 @@ 
 #define DRM_FORMAT_YUYV         fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
 #define DRM_FORMAT_UYVY         fourcc_code('U', 'Y', 'V', 'Y') /* [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */
 #define DRM_FORMAT_NV12         fourcc_code('N', 'V', '1', '2') /* 2x2 subsampled Cr:Cb plane */
-
+#define DRM_FORMAT_XYUV8888     fourcc_code('X', 'Y', 'U', 'V') /* 2x2 subsampled Cr:Cb plane */
 #define has_hw_scaling(sna, video) ((sna)->kgem.gen < 071 || \
 				    (sna)->kgem.gen >= 0110)
 
@@ -74,11 +74,11 @@  static Atom xvColorKey, xvAlwaysOnTop, xvSyncToVblank, xvColorspace;
 
 static XvFormatRec formats[] = { {15}, {16}, {24} };
 static const XvImageRec images[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
-				     XVMC_RGB888 };
+				     XVMC_RGB888, XVIMAGE_AYUV };
 static const XvImageRec images_rgb565[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
-					    XVMC_RGB888, XVMC_RGB565 };
+					    XVMC_RGB888, XVMC_RGB565, XVIMAGE_AYUV };
 static const XvImageRec images_nv12[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
-					  XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565 };
+					  XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565, XVIMAGE_AYUV };
 static const XvAttributeRec attribs[] = {
 	{ XvSettable | XvGettable, 0, 1, (char *)"XV_COLORSPACE" }, /* BT.601, BT.709 */
 	{ XvSettable | XvGettable, 0, 0xffffff, (char *)"XV_COLORKEY" },
@@ -364,6 +364,10 @@  sna_video_sprite_show(struct sna *sna,
 		case FOURCC_UYVY:
 			f.pixel_format = DRM_FORMAT_UYVY;
 			break;
+		case FOURCC_AYUV:
+			/* i915 doesn't support alpha, so we use XYUV */
+			f.pixel_format = DRM_FORMAT_XYUV8888;
+			break;
 		case FOURCC_YUY2:
 		default:
 			f.pixel_format = DRM_FORMAT_YUYV;
@@ -705,7 +709,12 @@  static int sna_video_sprite_query(ddQueryImageAttributes_ARGS)
 		tmp *= (*h >> 1);
 		size += tmp;
 		break;
-
+	case FOURCC_AYUV:
+		tmp = *w << 2;
+		if (pitches)
+			pitches[0] = tmp;
+		size = *h * tmp;
+		break;
 	default:
 		*w = (*w + 1) & ~1;
 		*h = (*h + 1) & ~1;
diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
index a784fe2e..21c5f379 100644
--- a/src/sna/sna_video_textured.c
+++ b/src/sna/sna_video_textured.c
@@ -68,6 +68,8 @@  static const XvImageRec gen4_Images[] = {
 	XVIMAGE_I420,
 	XVIMAGE_NV12,
 	XVIMAGE_UYVY,
+	XVIMAGE_AYUV,
+	XVMC_RGB888,
 	XVMC_YUV,
 };
 
@@ -337,6 +339,12 @@  sna_video_textured_query(ddQueryImageAttributes_ARGS)
 			pitches[0] = size;
 		size *= *h;
 		break;
+	case FOURCC_AYUV:
+		size = *w << 2;
+		if (pitches)
+			pitches[0] = size;
+		size *= *h;
+		break;
 	case FOURCC_XVMC:
 		*h = (*h + 1) & ~1;
 		size = sizeof(uint32_t);