diff mbox

[xf86-video-intel,2/6] sna/gen6+: Split out wm_kernel from the sna_composite_op flags

Message ID 20180704185919.30946-2-ville.syrjala@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ville Syrjälä July 4, 2018, 6:59 p.m. UTC
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

With the extra video kernels we already ran out of bits in
the flags. To tackle that let's just split out the
wm_kernel to its own thing.

Fixes: e4f2b5d5af95 ("sna/video: Add XV_COLORSPACE attribute for the textured Xv adaptor")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 src/sna/gen6_render.c | 47 ++++++++++++++++++++++++++++-------------------
 src/sna/gen7_render.c | 35 ++++++++++++++++++++++-------------
 src/sna/gen8_render.c | 35 ++++++++++++++++++++++-------------
 src/sna/gen9_render.c | 35 ++++++++++++++++++++++-------------
 src/sna/sna_render.h  |  4 ++++
 5 files changed, 98 insertions(+), 58 deletions(-)

Comments

Chris Wilson July 4, 2018, 7:08 p.m. UTC | #1
Quoting Ville Syrjala (2018-07-04 19:59:15)
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> With the extra video kernels we already ran out of bits in
> the flags. To tackle that let's just split out the
> wm_kernel to its own thing.

Does make one wish we had used push constants or loading them from a
constant surface. Oh well, stick to the path of least resistance. Looks
fine, but I should double check the asm at the end.
-Chris
Ville Syrjälä July 4, 2018, 7:20 p.m. UTC | #2
On Wed, Jul 04, 2018 at 08:08:49PM +0100, Chris Wilson wrote:
> Quoting Ville Syrjala (2018-07-04 19:59:15)
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > 
> > With the extra video kernels we already ran out of bits in
> > the flags. To tackle that let's just split out the
> > wm_kernel to its own thing.
> 
> Does make one wish we had used push constants or loading them from a
> constant surface. Oh well, stick to the path of least resistance. Looks
> fine, but I should double check the asm at the end.

Yeah. I should really rewrite these things to use the brw thing, at
which point I imagine it would be feasible to pass in some constants
as well. So far the motivation has escaped me.
diff mbox

Patch

diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 1558750325f0..99cc4b0ecb5c 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -224,18 +224,17 @@  static const struct blendinfo {
 
 #define COPY_SAMPLER 0
 #define COPY_VERTEX VERTEX_2s2s
-#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX)
+#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, COPY_VERTEX)
 
 #define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state))
 #define FILL_VERTEX VERTEX_2s2s
-#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
-#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
+#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), FILL_VERTEX)
+#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, FILL_VERTEX)
 
 #define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0)
 #define GEN6_BLEND(f) (((f) >> 0) & 0xfff0)
-#define GEN6_KERNEL(f) (((f) >> 16) & 0xf)
 #define GEN6_VERTEX(f) (((f) >> 0) & 0xf)
-#define GEN6_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
+#define GEN6_SET_FLAGS(S, B, V)  ((S) << 16 | ((B) | (V)))
 
 #define OUT_BATCH(v) batch_emit(sna, v)
 #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
@@ -967,7 +966,7 @@  gen6_emit_state(struct sna *sna,
 	gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
 	gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
 	gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
-	gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
+	gen6_emit_wm(sna, op->u.gen6.wm_kernel, GEN6_VERTEX(op->u.gen6.flags) >> 2);
 	gen6_emit_vertex_elements(sna, op);
 	gen6_emit_binding_table(sna, wm_binding_table);
 
@@ -1252,7 +1251,7 @@  static int gen6_get_rectangles__flush(struct sna *sna,
 			gen6_emit_pipe_stall(sna);
 			gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
 			gen6_emit_wm(sna,
-				     GEN6_KERNEL(op->u.gen6.flags),
+				     op->u.gen6.wm_kernel,
 				     GEN6_VERTEX(op->u.gen6.flags) >> 2);
 		}
 	}
@@ -1732,8 +1731,8 @@  gen6_render_video(struct sna *sna,
 		GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
 					       SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
 			       NO_BLEND,
-			       select_video_kernel(video, frame),
 			       2);
+	tmp.u.gen6.wm_kernel = select_video_kernel(video, frame);
 	tmp.priv = frame;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
@@ -2417,11 +2416,11 @@  gen6_render_composite(struct sna *sna,
 			       gen6_get_blend(tmp->op,
 					      tmp->has_component_alpha,
 					      tmp->dst.format),
-			       gen6_choose_composite_kernel(tmp->op,
-							    tmp->mask.bo != NULL,
-							    tmp->has_component_alpha,
-							    tmp->is_affine),
 			       gen4_choose_composite_emitter(sna, tmp));
+	tmp->u.gen6.wm_kernel = gen6_choose_composite_kernel(tmp->op,
+							     tmp->mask.bo != NULL,
+							     tmp->has_component_alpha,
+							     tmp->is_affine);
 
 	tmp->blt   = gen6_render_composite_blt;
 	tmp->box   = gen6_render_composite_box;
@@ -2669,8 +2668,9 @@  gen6_render_composite_spans(struct sna *sna,
 					      SAMPLER_FILTER_NEAREST,
 					      SAMPLER_EXTEND_PAD),
 			       gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
-			       GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
 			       gen4_choose_spans_emitter(sna, tmp));
+	tmp->base.u.gen6.wm_kernel =
+		GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine;
 
 	tmp->box   = gen6_render_composite_spans_box;
 	tmp->boxes = gen6_render_composite_spans_boxes;
@@ -2928,7 +2928,8 @@  fallback_blt:
 	tmp.need_magic_ca_pass = 0;
 
 	tmp.u.gen6.flags = COPY_FLAGS(alu);
-	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
+	tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
+	assert(tmp.u.gen6.wm_kernel == GEN6_WM_KERNEL_NOMASK);
 	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER);
 	assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX);
 
@@ -3102,7 +3103,8 @@  fallback:
 	op->base.floats_per_rect = 6;
 
 	op->base.u.gen6.flags = COPY_FLAGS(alu);
-	assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
+	op->base.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
+	assert(op->base.u.gen6.wm_kernel == GEN6_WM_KERNEL_NOMASK);
 	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER);
 	assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX);
 
@@ -3247,7 +3249,8 @@  gen6_render_fill_boxes(struct sna *sna,
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen6.flags = FILL_FLAGS(op, format);
-	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
+	tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
+	assert(tmp.u.gen6.wm_kernel == GEN6_WM_KERNEL_NOMASK);
 	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
 	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
@@ -3427,7 +3430,8 @@  gen6_render_fill(struct sna *sna, uint8_t alu,
 	op->base.floats_per_rect = 6;
 
 	op->base.u.gen6.flags = FILL_FLAGS_NOBLEND;
-	assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
+	op->base.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
+	assert(op->base.u.gen6.wm_kernel == GEN6_WM_KERNEL_NOMASK);
 	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER);
 	assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX);
 
@@ -3509,7 +3513,8 @@  gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
-	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
+	tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
+	assert(tmp.u.gen6.wm_kernel == GEN6_WM_KERNEL_NOMASK);
 	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
 	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
@@ -3596,7 +3601,8 @@  gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
-	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
+	tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK;
+	assert(tmp.u.gen6.wm_kernel == GEN6_WM_KERNEL_NOMASK);
 	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
 	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
@@ -3725,6 +3731,9 @@  static bool gen6_render_setup(struct sna *sna, int devid)
 		}
 	}
 
+	COMPILE_TIME_ASSERT(GEN6_KERNEL_COUNT <=
+			    1 << (sizeof(((struct sna_composite_op *)NULL)->u.gen6.wm_kernel) * 8));
+
 	COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0xfff);
 	ss = sna_static_stream_map(&general,
 				   2 * sizeof(*ss) *
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 671f60e07679..cffe17de6d0d 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -321,19 +321,18 @@  static const struct blendinfo {
 
 #define COPY_SAMPLER 0
 #define COPY_VERTEX VERTEX_2s2s
-#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX)
+#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, COPY_VERTEX)
 
 #define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state))
 #define FILL_VERTEX VERTEX_2s2s
-#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
-#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
+#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), FILL_VERTEX)
+#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, FILL_VERTEX)
 
 #define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0)
 #define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0)
 #define GEN7_READS_DST(f) (((f) >> 15) & 1)
-#define GEN7_KERNEL(f) (((f) >> 16) & 0xf)
 #define GEN7_VERTEX(f) (((f) >> 0) & 0xf)
-#define GEN7_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
+#define GEN7_SET_FLAGS(S, B, V)  ((S) << 16 | ((B) | (V)))
 
 #define OUT_BATCH(v) batch_emit(sna, v)
 #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
@@ -1188,7 +1187,7 @@  gen7_emit_state(struct sna *sna,
 	gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
 	gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
 	gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
-	gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
+	gen7_emit_wm(sna, op->u.gen7.wm_kernel);
 	gen7_emit_vertex_elements(sna, op);
 	gen7_emit_binding_table(sna, wm_binding_table);
 
@@ -1453,7 +1452,7 @@  static int gen7_get_rectangles__flush(struct sna *sna,
 		if (gen7_magic_ca_pass(sna, op)) {
 			gen7_emit_pipe_stall(sna);
 			gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
-			gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
+			gen7_emit_wm(sna, op->u.gen7.wm_kernel);
 		}
 	}
 
@@ -1949,8 +1948,8 @@  gen7_render_video(struct sna *sna,
 		GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
 					      SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
 			       NO_BLEND,
-			       select_video_kernel(video, frame),
 			       2);
+	tmp.u.gen7.wm_kernel = select_video_kernel(video, frame);
 	tmp.priv = frame;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
@@ -2647,11 +2646,11 @@  gen7_render_composite(struct sna *sna,
 			       gen7_get_blend(tmp->op,
 					      tmp->has_component_alpha,
 					      tmp->dst.format),
-			       gen7_choose_composite_kernel(tmp->op,
-							    tmp->mask.bo != NULL,
-							    tmp->has_component_alpha,
-							    tmp->is_affine),
 			       gen4_choose_composite_emitter(sna, tmp));
+	tmp->u.gen7.wm_kernel = gen7_choose_composite_kernel(tmp->op,
+							     tmp->mask.bo != NULL,
+							     tmp->has_component_alpha,
+							     tmp->is_affine);
 
 	tmp->blt   = gen7_render_composite_blt;
 	tmp->box   = gen7_render_composite_box;
@@ -2879,8 +2878,9 @@  gen7_render_composite_spans(struct sna *sna,
 					      SAMPLER_FILTER_NEAREST,
 					      SAMPLER_EXTEND_PAD),
 			       gen7_get_blend(tmp->base.op, false, tmp->base.dst.format),
-			       GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine,
 			       gen4_choose_spans_emitter(sna, tmp));
+	tmp->base.u.gen7.wm_kernel =
+		GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine;
 
 	tmp->box   = gen7_render_composite_spans_box;
 	tmp->boxes = gen7_render_composite_spans_boxes;
@@ -3141,6 +3141,7 @@  fallback_blt:
 	tmp.need_magic_ca_pass = 0;
 
 	tmp.u.gen7.flags = COPY_FLAGS(alu);
+	tmp.u.gen7.wm_kernel = GEN7_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
 	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
@@ -3308,6 +3309,7 @@  fallback:
 	op->base.floats_per_rect = 6;
 
 	op->base.u.gen7.flags = COPY_FLAGS(alu);
+	op->base.u.gen7.wm_kernel = GEN7_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
@@ -3461,6 +3463,7 @@  gen7_render_fill_boxes(struct sna *sna,
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen7.flags = FILL_FLAGS(op, format);
+	tmp.u.gen7.wm_kernel = GEN7_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
@@ -3642,6 +3645,7 @@  gen7_render_fill(struct sna *sna, uint8_t alu,
 	op->base.floats_per_rect = 6;
 
 	op->base.u.gen7.flags = FILL_FLAGS_NOBLEND;
+	op->base.u.gen7.wm_kernel = GEN7_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
@@ -3726,6 +3730,7 @@  gen7_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen7.flags = FILL_FLAGS_NOBLEND;
+	tmp.u.gen7.wm_kernel = GEN7_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
@@ -3811,6 +3816,7 @@  gen7_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen7.flags = FILL_FLAGS_NOBLEND;
+	tmp.u.gen7.wm_kernel = GEN7_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
@@ -3958,6 +3964,9 @@  static bool gen7_render_setup(struct sna *sna, int devid)
 		assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
 	}
 
+	COMPILE_TIME_ASSERT(GEN7_WM_KERNEL_COUNT <=
+			    1 << (sizeof(((struct sna_composite_op *)NULL)->u.gen7.wm_kernel) * 8));
+
 	COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0xfff);
 	ss = sna_static_stream_map(&general,
 				   2 * sizeof(*ss) *
diff --git a/src/sna/gen8_render.c b/src/sna/gen8_render.c
index 69617da561b6..9bac9b8bb6be 100644
--- a/src/sna/gen8_render.c
+++ b/src/sna/gen8_render.c
@@ -225,19 +225,18 @@  static const struct blendinfo {
 
 #define COPY_SAMPLER 0
 #define COPY_VERTEX VERTEX_2s2s
-#define COPY_FLAGS(a) GEN8_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN8_WM_KERNEL_NOMASK, COPY_VERTEX)
+#define COPY_FLAGS(a) GEN8_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, COPY_VERTEX)
 
 #define FILL_SAMPLER 1
 #define FILL_VERTEX VERTEX_2s2s
-#define FILL_FLAGS(op, format) GEN8_SET_FLAGS(FILL_SAMPLER, gen8_get_blend((op), false, (format)), GEN8_WM_KERNEL_NOMASK, FILL_VERTEX)
-#define FILL_FLAGS_NOBLEND GEN8_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN8_WM_KERNEL_NOMASK, FILL_VERTEX)
+#define FILL_FLAGS(op, format) GEN8_SET_FLAGS(FILL_SAMPLER, gen8_get_blend((op), false, (format)), FILL_VERTEX)
+#define FILL_FLAGS_NOBLEND GEN8_SET_FLAGS(FILL_SAMPLER, NO_BLEND, FILL_VERTEX)
 
 #define GEN8_SAMPLER(f) (((f) >> 20) & 0xfff)
 #define GEN8_BLEND(f) (((f) >> 4) & 0x7ff)
 #define GEN8_READS_DST(f) (((f) >> 15) & 1)
-#define GEN8_KERNEL(f) (((f) >> 16) & 0xf)
 #define GEN8_VERTEX(f) (((f) >> 0) & 0xf)
-#define GEN8_SET_FLAGS(S, B, K, V)  ((S) << 20 | (K) << 16 | (B) | (V))
+#define GEN8_SET_FLAGS(S, B, V)  ((S) << 20 | (B) | (V))
 
 #define OUT_BATCH(v) batch_emit(sna, v)
 #define OUT_BATCH64(v) batch_emit64(sna, v)
@@ -1286,7 +1285,7 @@  gen8_emit_state(struct sna *sna,
 	gen8_emit_cc(sna, GEN8_BLEND(op->u.gen8.flags));
 	gen8_emit_sampler(sna, GEN8_SAMPLER(op->u.gen8.flags));
 	gen8_emit_sf(sna, GEN8_VERTEX(op->u.gen8.flags) >> 2);
-	gen8_emit_wm(sna, GEN8_KERNEL(op->u.gen8.flags));
+	gen8_emit_wm(sna, op->u.gen8.wm_kernel);
 	gen8_emit_vertex_elements(sna, op);
 	gen8_emit_binding_table(sna, wm_binding_table);
 
@@ -1557,7 +1556,7 @@  static int gen8_get_rectangles__flush(struct sna *sna,
 		if (gen8_magic_ca_pass(sna, op)) {
 			gen8_emit_pipe_invalidate(sna);
 			gen8_emit_cc(sna, GEN8_BLEND(op->u.gen8.flags));
-			gen8_emit_wm(sna, GEN8_KERNEL(op->u.gen8.flags));
+			gen8_emit_wm(sna, op->u.gen8.wm_kernel);
 		}
 	}
 
@@ -2485,11 +2484,11 @@  gen8_render_composite(struct sna *sna,
 			       gen8_get_blend(tmp->op,
 					      tmp->has_component_alpha,
 					      tmp->dst.format),
-			       gen8_choose_composite_kernel(tmp->op,
-							    tmp->mask.bo != NULL,
-							    tmp->has_component_alpha,
-							    tmp->is_affine),
 			       gen4_choose_composite_emitter(sna, tmp));
+	tmp->u.gen8.wm_kernel = gen8_choose_composite_kernel(tmp->op,
+							     tmp->mask.bo != NULL,
+							     tmp->has_component_alpha,
+							     tmp->is_affine);
 
 	tmp->blt   = gen8_render_composite_blt;
 	tmp->box   = gen8_render_composite_box;
@@ -2718,8 +2717,9 @@  gen8_render_composite_spans(struct sna *sna,
 					      SAMPLER_FILTER_NEAREST,
 					      SAMPLER_EXTEND_PAD),
 			       gen8_get_blend(tmp->base.op, false, tmp->base.dst.format),
-			       GEN8_WM_KERNEL_OPACITY | !tmp->base.is_affine,
 			       gen4_choose_spans_emitter(sna, tmp));
+	tmp->base.u.gen8.wm_kernel =
+		GEN8_WM_KERNEL_OPACITY | !tmp->base.is_affine;
 
 	tmp->box   = gen8_render_composite_spans_box;
 	tmp->boxes = gen8_render_composite_spans_boxes;
@@ -2982,6 +2982,7 @@  fallback_blt:
 	tmp.need_magic_ca_pass = 0;
 
 	tmp.u.gen8.flags = COPY_FLAGS(alu);
+	tmp.u.gen8.wm_kernel = GEN8_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
 	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
@@ -3151,6 +3152,7 @@  fallback:
 	op->base.floats_per_rect = 6;
 
 	op->base.u.gen8.flags = COPY_FLAGS(alu);
+	op->base.u.gen8.wm_kernel = GEN8_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
@@ -3303,6 +3305,7 @@  gen8_render_fill_boxes(struct sna *sna,
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen8.flags = FILL_FLAGS(op, format);
+	tmp.u.gen8.wm_kernel = GEN8_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
@@ -3489,6 +3492,7 @@  gen8_render_fill(struct sna *sna, uint8_t alu,
 	op->base.floats_per_rect = 6;
 
 	op->base.u.gen8.flags = FILL_FLAGS_NOBLEND;
+	op->base.u.gen8.wm_kernel = GEN8_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
@@ -3574,6 +3578,7 @@  gen8_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen8.flags = FILL_FLAGS_NOBLEND;
+	tmp.u.gen8.wm_kernel = GEN8_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
@@ -3660,6 +3665,7 @@  gen8_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen8.flags = FILL_FLAGS_NOBLEND;
+	tmp.u.gen8.wm_kernel = GEN8_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
@@ -3901,8 +3907,8 @@  gen8_render_video(struct sna *sna,
 		GEN8_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
 					      SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
 			       NO_BLEND,
-			       select_video_kernel(video, frame),
 			       2);
+	tmp.u.gen8.wm_kernel = select_video_kernel(video, frame);
 	tmp.priv = frame;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
@@ -4066,6 +4072,9 @@  static bool gen8_render_setup(struct sna *sna)
 		assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
 	}
 
+	COMPILE_TIME_ASSERT(GEN8_WM_KERNEL_COUNT <=
+			    1 << (sizeof(((struct sna_composite_op *)NULL)->u.gen8.wm_kernel) * 8));
+
 	COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff);
 	ss = sna_static_stream_map(&general,
 				   2 * sizeof(*ss) *
diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c
index 505b98afa38c..eb22b642ffa1 100644
--- a/src/sna/gen9_render.c
+++ b/src/sna/gen9_render.c
@@ -226,19 +226,18 @@  static const struct blendinfo {
 
 #define COPY_SAMPLER 0
 #define COPY_VERTEX VERTEX_2s2s
-#define COPY_FLAGS(a) GEN9_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN9_WM_KERNEL_NOMASK, COPY_VERTEX)
+#define COPY_FLAGS(a) GEN9_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, COPY_VERTEX)
 
 #define FILL_SAMPLER 1
 #define FILL_VERTEX VERTEX_2s2s
-#define FILL_FLAGS(op, format) GEN9_SET_FLAGS(FILL_SAMPLER, gen9_get_blend((op), false, (format)), GEN9_WM_KERNEL_NOMASK, FILL_VERTEX)
-#define FILL_FLAGS_NOBLEND GEN9_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN9_WM_KERNEL_NOMASK, FILL_VERTEX)
+#define FILL_FLAGS(op, format) GEN9_SET_FLAGS(FILL_SAMPLER, gen9_get_blend((op), false, (format)), FILL_VERTEX)
+#define FILL_FLAGS_NOBLEND GEN9_SET_FLAGS(FILL_SAMPLER, NO_BLEND, FILL_VERTEX)
 
 #define GEN9_SAMPLER(f) (((f) >> 20) & 0xfff)
 #define GEN9_BLEND(f) (((f) >> 4) & 0x7ff)
 #define GEN9_READS_DST(f) (((f) >> 15) & 1)
-#define GEN9_KERNEL(f) (((f) >> 16) & 0xf)
 #define GEN9_VERTEX(f) (((f) >> 0) & 0xf)
-#define GEN9_SET_FLAGS(S, B, K, V)  ((S) << 20 | (K) << 16 | (B) | (V))
+#define GEN9_SET_FLAGS(S, B, V)  ((S) << 20 | (B) | (V))
 
 #define OUT_BATCH(v) batch_emit(sna, v)
 #define OUT_BATCH64(v) batch_emit64(sna, v)
@@ -1349,7 +1348,7 @@  gen9_emit_state(struct sna *sna,
 	gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags));
 	gen9_emit_sampler(sna, GEN9_SAMPLER(op->u.gen9.flags));
 	gen9_emit_sf(sna, GEN9_VERTEX(op->u.gen9.flags) >> 2);
-	gen9_emit_wm(sna, GEN9_KERNEL(op->u.gen9.flags));
+	gen9_emit_wm(sna, op->u.gen9.wm_kernel);
 	gen9_emit_vertex_elements(sna, op);
 	gen9_emit_binding_table(sna, wm_binding_table);
 
@@ -1618,7 +1617,7 @@  static int gen9_get_rectangles__flush(struct sna *sna,
 		if (gen9_magic_ca_pass(sna, op)) {
 			gen9_emit_pipe_invalidate(sna);
 			gen9_emit_cc(sna, GEN9_BLEND(op->u.gen9.flags));
-			gen9_emit_wm(sna, GEN9_KERNEL(op->u.gen9.flags));
+			gen9_emit_wm(sna, op->u.gen9.wm_kernel);
 		}
 	}
 
@@ -2548,11 +2547,11 @@  gen9_render_composite(struct sna *sna,
 			       gen9_get_blend(tmp->op,
 					      tmp->has_component_alpha,
 					      tmp->dst.format),
-			       gen9_choose_composite_kernel(tmp->op,
-							    tmp->mask.bo != NULL,
-							    tmp->has_component_alpha,
-							    tmp->is_affine),
 			       gen4_choose_composite_emitter(sna, tmp));
+	tmp->u.gen9.wm_kernel = gen9_choose_composite_kernel(tmp->op,
+							     tmp->mask.bo != NULL,
+							     tmp->has_component_alpha,
+							     tmp->is_affine);
 
 	tmp->blt   = gen9_render_composite_blt;
 	tmp->box   = gen9_render_composite_box;
@@ -2781,8 +2780,9 @@  gen9_render_composite_spans(struct sna *sna,
 					      SAMPLER_FILTER_NEAREST,
 					      SAMPLER_EXTEND_PAD),
 			       gen9_get_blend(tmp->base.op, false, tmp->base.dst.format),
-			       GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine,
 			       gen4_choose_spans_emitter(sna, tmp));
+	tmp->base.u.gen9.wm_kernel =
+		GEN9_WM_KERNEL_OPACITY | !tmp->base.is_affine;
 
 	tmp->box   = gen9_render_composite_spans_box;
 	tmp->boxes = gen9_render_composite_spans_boxes;
@@ -3045,6 +3045,7 @@  fallback_blt:
 	tmp.need_magic_ca_pass = 0;
 
 	tmp.u.gen9.flags = COPY_FLAGS(alu);
+	tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
 	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
@@ -3214,6 +3215,7 @@  fallback:
 	op->base.floats_per_rect = 6;
 
 	op->base.u.gen9.flags = COPY_FLAGS(alu);
+	op->base.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
@@ -3366,6 +3368,7 @@  gen9_render_fill_boxes(struct sna *sna,
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen9.flags = FILL_FLAGS(op, format);
+	tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
@@ -3552,6 +3555,7 @@  gen9_render_fill(struct sna *sna, uint8_t alu,
 	op->base.floats_per_rect = 6;
 
 	op->base.u.gen9.flags = FILL_FLAGS_NOBLEND;
+	op->base.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
@@ -3637,6 +3641,7 @@  gen9_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen9.flags = FILL_FLAGS_NOBLEND;
+	tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
@@ -3723,6 +3728,7 @@  gen9_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
 	tmp.need_magic_ca_pass = false;
 
 	tmp.u.gen9.flags = FILL_FLAGS_NOBLEND;
+	tmp.u.gen9.wm_kernel = GEN9_WM_KERNEL_NOMASK;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
 	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
@@ -3964,8 +3970,8 @@  gen9_render_video(struct sna *sna,
 		GEN9_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
 					      SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
 			       NO_BLEND,
-			       select_video_kernel(video, frame),
 			       2);
+	tmp.u.gen9.wm_kernel = select_video_kernel(video, frame);
 	tmp.priv = frame;
 
 	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
@@ -4135,6 +4141,9 @@  static bool gen9_render_setup(struct sna *sna)
 		assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
 	}
 
+	COMPILE_TIME_ASSERT(GEN9_WM_KERNEL_COUNT <=
+			    1 << (sizeof(((struct sna_composite_op *)NULL)->u.gen9.wm_kernel) * 8));
+
 	COMPILE_TIME_ASSERT(SAMPLER_OFFSET(FILTER_COUNT, EXTEND_COUNT, FILTER_COUNT, EXTEND_COUNT) <= 0x7ff);
 	ss = sna_static_stream_map(&general,
 				   2 * sizeof(*ss) *
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 6669af9d1cd8..442a6ec50191 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -139,18 +139,22 @@  struct sna_composite_op {
 
 		struct {
 			uint32_t flags;
+			uint8_t wm_kernel;
 		} gen6;
 
 		struct {
 			uint32_t flags;
+			uint8_t wm_kernel;
 		} gen7;
 
 		struct {
 			uint32_t flags;
+			uint8_t wm_kernel;
 		} gen8;
 
 		struct {
 			uint32_t flags;
+			uint8_t wm_kernel;
 		} gen9;
 	} u;