diff mbox series

[RFC,006/162] drm/i915: split gen8+ flush and bb_start emission functions to their own file

Message ID 20201127120718.454037-7-matthew.auld@intel.com (mailing list archive)
State New, archived
Headers show
Series DG1 + LMEM enabling | expand

Commit Message

Matthew Auld Nov. 27, 2020, 12:04 p.m. UTC
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

These functions are independent from the backend used and can therefore
be split out of the exelists submission file, so they can be re-used by
the upcoming GuC submission backend.

Based on a patch by Chris Wilson.

Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Chris P Wilson <chris.p.wilson@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |   1 +
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c      | 393 ++++++++++++++++++
 drivers/gpu/drm/i915/gt/gen8_engine_cs.h      |  26 ++
 .../drm/i915/gt/intel_execlists_submission.c  | 385 +----------------
 4 files changed, 421 insertions(+), 384 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gt/gen8_engine_cs.c
 create mode 100644 drivers/gpu/drm/i915/gt/gen8_engine_cs.h

Comments

Chris Wilson Nov. 27, 2020, 7:58 p.m. UTC | #1
Quoting Matthew Auld (2020-11-27 12:04:42)
> From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> 
> These functions are independent from the backend used and can therefore
> be split out of the exelists submission file, so they can be re-used by
> the upcoming GuC submission backend.
> 
> Based on a patch by Chris Wilson.
> 
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Chris P Wilson <chris.p.wilson@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile                 |   1 +
>  drivers/gpu/drm/i915/gt/gen8_engine_cs.c      | 393 ++++++++++++++++++
>  drivers/gpu/drm/i915/gt/gen8_engine_cs.h      |  26 ++
>  .../drm/i915/gt/intel_execlists_submission.c  | 385 +----------------
>  4 files changed, 421 insertions(+), 384 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/gt/gen8_engine_cs.c
>  create mode 100644 drivers/gpu/drm/i915/gt/gen8_engine_cs.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index aedbd8f52be8..f9ef5199b124 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -82,6 +82,7 @@ gt-y += \
>         gt/gen6_engine_cs.o \
>         gt/gen6_ppgtt.o \
>         gt/gen7_renderclear.o \
> +       gt/gen8_engine_cs.o \
>         gt/gen8_ppgtt.o \
>         gt/intel_breadcrumbs.o \
>         gt/intel_context.o \
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> new file mode 100644
> index 000000000000..a96fe108685e
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> @@ -0,0 +1,393 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2014 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +#include "intel_execlists_submission.h" /* XXX */
> +#include "intel_gpu_commands.h"
> +#include "intel_ring.h"
> +
> +int gen8_emit_flush_render(struct i915_request *request, u32 mode)

Refresh the names to make the recent schemes.
(rcs when specific, xcs when not)
-Chris
---------------------------------------------------------------------
Intel Corporation (UK) Limited
Registered No. 1134945 (England)
Registered Office: Pipers Way, Swindon SN3 1RJ
VAT No: 860 2173 47

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index aedbd8f52be8..f9ef5199b124 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -82,6 +82,7 @@  gt-y += \
 	gt/gen6_engine_cs.o \
 	gt/gen6_ppgtt.o \
 	gt/gen7_renderclear.o \
+	gt/gen8_engine_cs.o \
 	gt/gen8_ppgtt.o \
 	gt/intel_breadcrumbs.o \
 	gt/intel_context.o \
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
new file mode 100644
index 000000000000..a96fe108685e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -0,0 +1,393 @@ 
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_execlists_submission.h" /* XXX */
+#include "intel_gpu_commands.h"
+#include "intel_ring.h"
+
+int gen8_emit_flush_render(struct i915_request *request, u32 mode)
+{
+	bool vf_flush_wa = false, dc_flush_wa = false;
+	u32 *cs, flags = 0;
+	int len;
+
+	flags |= PIPE_CONTROL_CS_STALL;
+
+	if (mode & EMIT_FLUSH) {
+		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_FLUSH_ENABLE;
+	}
+
+	if (mode & EMIT_INVALIDATE) {
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_QW_WRITE;
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+
+		/*
+		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
+		 * pipe control.
+		 */
+		if (IS_GEN(request->engine->i915, 9))
+			vf_flush_wa = true;
+
+		/* WaForGAMHang:kbl */
+		if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0))
+			dc_flush_wa = true;
+	}
+
+	len = 6;
+
+	if (vf_flush_wa)
+		len += 6;
+
+	if (dc_flush_wa)
+		len += 12;
+
+	cs = intel_ring_begin(request, len);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	if (vf_flush_wa)
+		cs = gen8_emit_pipe_control(cs, 0, 0);
+
+	if (dc_flush_wa)
+		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
+					    0);
+
+	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+
+	if (dc_flush_wa)
+		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
+
+	intel_ring_advance(request, cs);
+
+	return 0;
+}
+
+int gen8_emit_flush(struct i915_request *request, u32 mode)
+{
+	u32 cmd, *cs;
+
+	cs = intel_ring_begin(request, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	cmd = MI_FLUSH_DW + 1;
+
+	/* We always require a command barrier so that subsequent
+	 * commands, such as breadcrumb interrupts, are strictly ordered
+	 * wrt the contents of the write cache being flushed to memory
+	 * (and thus being coherent from the CPU).
+	 */
+	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+	if (mode & EMIT_INVALIDATE) {
+		cmd |= MI_INVALIDATE_TLB;
+		if (request->engine->class == VIDEO_DECODE_CLASS)
+			cmd |= MI_INVALIDATE_BSD;
+	}
+
+	*cs++ = cmd;
+	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
+	*cs++ = 0; /* upper addr */
+	*cs++ = 0; /* value */
+	intel_ring_advance(request, cs);
+
+	return 0;
+}
+
+int gen11_emit_flush_render(struct i915_request *request, u32 mode)
+{
+	if (mode & EMIT_FLUSH) {
+		u32 *cs;
+		u32 flags = 0;
+
+		flags |= PIPE_CONTROL_CS_STALL;
+
+		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_QW_WRITE;
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+
+		cs = intel_ring_begin(request, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+		intel_ring_advance(request, cs);
+	}
+
+	if (mode & EMIT_INVALIDATE) {
+		u32 *cs;
+		u32 flags = 0;
+
+		flags |= PIPE_CONTROL_CS_STALL;
+
+		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_QW_WRITE;
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+
+		cs = intel_ring_begin(request, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+		intel_ring_advance(request, cs);
+	}
+
+	return 0;
+}
+
+static u32 preparser_disable(bool state)
+{
+	return MI_ARB_CHECK | 1 << 8 | state;
+}
+
+static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
+{
+	static const i915_reg_t vd[] = {
+		GEN12_VD0_AUX_NV,
+		GEN12_VD1_AUX_NV,
+		GEN12_VD2_AUX_NV,
+		GEN12_VD3_AUX_NV,
+	};
+
+	static const i915_reg_t ve[] = {
+		GEN12_VE0_AUX_NV,
+		GEN12_VE1_AUX_NV,
+	};
+
+	if (engine->class == VIDEO_DECODE_CLASS)
+		return vd[engine->instance];
+
+	if (engine->class == VIDEO_ENHANCEMENT_CLASS)
+		return ve[engine->instance];
+
+	GEM_BUG_ON("unknown aux_inv_reg\n");
+
+	return INVALID_MMIO_REG;
+}
+
+static u32 *
+gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
+{
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(inv_reg);
+	*cs++ = AUX_INV;
+	*cs++ = MI_NOOP;
+
+	return cs;
+}
+
+int gen12_emit_flush_render(struct i915_request *request, u32 mode)
+{
+	if (mode & EMIT_FLUSH) {
+		u32 flags = 0;
+		u32 *cs;
+
+		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_FLUSH_L3;
+		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+		/* Wa_1409600907:tgl */
+		flags |= PIPE_CONTROL_DEPTH_STALL;
+		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+		flags |= PIPE_CONTROL_FLUSH_ENABLE;
+
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+		flags |= PIPE_CONTROL_QW_WRITE;
+
+		flags |= PIPE_CONTROL_CS_STALL;
+
+		cs = intel_ring_begin(request, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		cs = gen12_emit_pipe_control(cs,
+					     PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
+					     flags, LRC_PPHWSP_SCRATCH_ADDR);
+		intel_ring_advance(request, cs);
+	}
+
+	if (mode & EMIT_INVALIDATE) {
+		u32 flags = 0;
+		u32 *cs;
+
+		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+
+		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+		flags |= PIPE_CONTROL_QW_WRITE;
+
+		flags |= PIPE_CONTROL_CS_STALL;
+
+		cs = intel_ring_begin(request, 8 + 4);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		/*
+		 * Prevent the pre-parser from skipping past the TLB
+		 * invalidate and loading a stale page for the batch
+		 * buffer / request payload.
+		 */
+		*cs++ = preparser_disable(true);
+
+		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+
+		/* hsdes: 1809175790 */
+		cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
+
+		*cs++ = preparser_disable(false);
+		intel_ring_advance(request, cs);
+	}
+
+	return 0;
+}
+
+int gen12_emit_flush(struct i915_request *request, u32 mode)
+{
+	intel_engine_mask_t aux_inv = 0;
+	u32 cmd, *cs;
+
+	cmd = 4;
+	if (mode & EMIT_INVALIDATE)
+		cmd += 2;
+	if (mode & EMIT_INVALIDATE)
+		aux_inv = request->engine->mask & ~BIT(BCS0);
+	if (aux_inv)
+		cmd += 2 * hweight8(aux_inv) + 2;
+
+	cs = intel_ring_begin(request, cmd);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	if (mode & EMIT_INVALIDATE)
+		*cs++ = preparser_disable(true);
+
+	cmd = MI_FLUSH_DW + 1;
+
+	/* We always require a command barrier so that subsequent
+	 * commands, such as breadcrumb interrupts, are strictly ordered
+	 * wrt the contents of the write cache being flushed to memory
+	 * (and thus being coherent from the CPU).
+	 */
+	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+	if (mode & EMIT_INVALIDATE) {
+		cmd |= MI_INVALIDATE_TLB;
+		if (request->engine->class == VIDEO_DECODE_CLASS)
+			cmd |= MI_INVALIDATE_BSD;
+	}
+
+	*cs++ = cmd;
+	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
+	*cs++ = 0; /* upper addr */
+	*cs++ = 0; /* value */
+
+	if (aux_inv) { /* hsdes: 1809175790 */
+		struct intel_engine_cs *engine;
+		unsigned int tmp;
+
+		*cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
+		for_each_engine_masked(engine, request->engine->gt,
+				       aux_inv, tmp) {
+			*cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
+			*cs++ = AUX_INV;
+		}
+		*cs++ = MI_NOOP;
+	}
+
+	if (mode & EMIT_INVALIDATE)
+		*cs++ = preparser_disable(false);
+
+	intel_ring_advance(request, cs);
+
+	return 0;
+}
+
+int gen8_emit_bb_start_noarb(struct i915_request *rq,
+			     u64 offset, u32 len,
+			     const unsigned int flags)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/*
+	 * WaDisableCtxRestoreArbitration:bdw,chv
+	 *
+	 * We don't need to perform MI_ARB_ENABLE as often as we do (in
+	 * particular all the gen that do not need the w/a at all!), if we
+	 * took care to make sure that on every switch into this context
+	 * (both ordinary and for preemption) that arbitrartion was enabled
+	 * we would be fine.  However, for gen8 there is another w/a that
+	 * requires us to not preempt inside GPGPU execution, so we keep
+	 * arbitration disabled for gen8 batches. Arbitration will be
+	 * re-enabled before we close the request
+	 * (engine->emit_fini_breadcrumb).
+	 */
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+	/* FIXME(BDW+): Address space and security selectors. */
+	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
+		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+	*cs++ = lower_32_bits(offset);
+	*cs++ = upper_32_bits(offset);
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+int gen8_emit_bb_start(struct i915_request *rq,
+		       u64 offset, u32 len,
+		       const unsigned int flags)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 6);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
+		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+	*cs++ = lower_32_bits(offset);
+	*cs++ = upper_32_bits(offset);
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+	*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
new file mode 100644
index 000000000000..c0c62284b650
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
@@ -0,0 +1,26 @@ 
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#ifndef __GEN8_ENGINE_CS_H__
+#define __GEN8_ENGINE_CS_H__
+
+#include <linux/types.h>
+
+struct i915_request;
+
+int gen8_emit_flush_render(struct i915_request *request, u32 mode);
+int gen8_emit_flush(struct i915_request *request, u32 mode);
+int gen11_emit_flush_render(struct i915_request *request, u32 mode);
+int gen12_emit_flush_render(struct i915_request *request, u32 mode);
+int gen12_emit_flush(struct i915_request *request, u32 mode);
+
+int gen8_emit_bb_start_noarb(struct i915_request *rq,
+			     u64 offset, u32 len,
+			     const unsigned int flags);
+int gen8_emit_bb_start(struct i915_request *rq,
+		       u64 offset, u32 len,
+		       const unsigned int flags);
+
+#endif /* __GEN8_ENGINE_CS_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index fc330233ea20..9069a456d2f7 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -112,6 +112,7 @@ 
 #include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
+#include "gen8_engine_cs.h"
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
@@ -4465,67 +4466,6 @@  static void execlists_reset_finish(struct intel_engine_cs *engine)
 		     atomic_read(&execlists->tasklet.count));
 }
 
-static int gen8_emit_bb_start_noarb(struct i915_request *rq,
-				    u64 offset, u32 len,
-				    const unsigned int flags)
-{
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	/*
-	 * WaDisableCtxRestoreArbitration:bdw,chv
-	 *
-	 * We don't need to perform MI_ARB_ENABLE as often as we do (in
-	 * particular all the gen that do not need the w/a at all!), if we
-	 * took care to make sure that on every switch into this context
-	 * (both ordinary and for preemption) that arbitrartion was enabled
-	 * we would be fine.  However, for gen8 there is another w/a that
-	 * requires us to not preempt inside GPGPU execution, so we keep
-	 * arbitration disabled for gen8 batches. Arbitration will be
-	 * re-enabled before we close the request
-	 * (engine->emit_fini_breadcrumb).
-	 */
-	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
-	/* FIXME(BDW+): Address space and security selectors. */
-	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
-		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
-	*cs++ = lower_32_bits(offset);
-	*cs++ = upper_32_bits(offset);
-
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
-static int gen8_emit_bb_start(struct i915_request *rq,
-			      u64 offset, u32 len,
-			      const unsigned int flags)
-{
-	u32 *cs;
-
-	cs = intel_ring_begin(rq, 6);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
-	*cs++ = MI_BATCH_BUFFER_START_GEN8 |
-		(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
-	*cs++ = lower_32_bits(offset);
-	*cs++ = upper_32_bits(offset);
-
-	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-	*cs++ = MI_NOOP;
-
-	intel_ring_advance(rq, cs);
-
-	return 0;
-}
-
 static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
 {
 	ENGINE_WRITE(engine, RING_IMR,
@@ -4538,329 +4478,6 @@  static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
 	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
 }
 
-static int gen8_emit_flush(struct i915_request *request, u32 mode)
-{
-	u32 cmd, *cs;
-
-	cs = intel_ring_begin(request, 4);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	cmd = MI_FLUSH_DW + 1;
-
-	/* We always require a command barrier so that subsequent
-	 * commands, such as breadcrumb interrupts, are strictly ordered
-	 * wrt the contents of the write cache being flushed to memory
-	 * (and thus being coherent from the CPU).
-	 */
-	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
-	if (mode & EMIT_INVALIDATE) {
-		cmd |= MI_INVALIDATE_TLB;
-		if (request->engine->class == VIDEO_DECODE_CLASS)
-			cmd |= MI_INVALIDATE_BSD;
-	}
-
-	*cs++ = cmd;
-	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
-	*cs++ = 0; /* upper addr */
-	*cs++ = 0; /* value */
-	intel_ring_advance(request, cs);
-
-	return 0;
-}
-
-static int gen8_emit_flush_render(struct i915_request *request,
-				  u32 mode)
-{
-	bool vf_flush_wa = false, dc_flush_wa = false;
-	u32 *cs, flags = 0;
-	int len;
-
-	flags |= PIPE_CONTROL_CS_STALL;
-
-	if (mode & EMIT_FLUSH) {
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-		flags |= PIPE_CONTROL_FLUSH_ENABLE;
-	}
-
-	if (mode & EMIT_INVALIDATE) {
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-
-		/*
-		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
-		 * pipe control.
-		 */
-		if (IS_GEN(request->engine->i915, 9))
-			vf_flush_wa = true;
-
-		/* WaForGAMHang:kbl */
-		if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0))
-			dc_flush_wa = true;
-	}
-
-	len = 6;
-
-	if (vf_flush_wa)
-		len += 6;
-
-	if (dc_flush_wa)
-		len += 12;
-
-	cs = intel_ring_begin(request, len);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	if (vf_flush_wa)
-		cs = gen8_emit_pipe_control(cs, 0, 0);
-
-	if (dc_flush_wa)
-		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
-					    0);
-
-	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
-
-	if (dc_flush_wa)
-		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
-
-	intel_ring_advance(request, cs);
-
-	return 0;
-}
-
-static int gen11_emit_flush_render(struct i915_request *request,
-				   u32 mode)
-{
-	if (mode & EMIT_FLUSH) {
-		u32 *cs;
-		u32 flags = 0;
-
-		flags |= PIPE_CONTROL_CS_STALL;
-
-		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-		flags |= PIPE_CONTROL_FLUSH_ENABLE;
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-
-		cs = intel_ring_begin(request, 6);
-		if (IS_ERR(cs))
-			return PTR_ERR(cs);
-
-		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
-		intel_ring_advance(request, cs);
-	}
-
-	if (mode & EMIT_INVALIDATE) {
-		u32 *cs;
-		u32 flags = 0;
-
-		flags |= PIPE_CONTROL_CS_STALL;
-
-		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-
-		cs = intel_ring_begin(request, 6);
-		if (IS_ERR(cs))
-			return PTR_ERR(cs);
-
-		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
-		intel_ring_advance(request, cs);
-	}
-
-	return 0;
-}
-
-static u32 preparser_disable(bool state)
-{
-	return MI_ARB_CHECK | 1 << 8 | state;
-}
-
-static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
-{
-	static const i915_reg_t vd[] = {
-		GEN12_VD0_AUX_NV,
-		GEN12_VD1_AUX_NV,
-		GEN12_VD2_AUX_NV,
-		GEN12_VD3_AUX_NV,
-	};
-
-	static const i915_reg_t ve[] = {
-		GEN12_VE0_AUX_NV,
-		GEN12_VE1_AUX_NV,
-	};
-
-	if (engine->class == VIDEO_DECODE_CLASS)
-		return vd[engine->instance];
-
-	if (engine->class == VIDEO_ENHANCEMENT_CLASS)
-		return ve[engine->instance];
-
-	GEM_BUG_ON("unknown aux_inv_reg\n");
-
-	return INVALID_MMIO_REG;
-}
-
-static u32 *
-gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
-{
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
-	*cs++ = i915_mmio_reg_offset(inv_reg);
-	*cs++ = AUX_INV;
-	*cs++ = MI_NOOP;
-
-	return cs;
-}
-
-static int gen12_emit_flush_render(struct i915_request *request,
-				   u32 mode)
-{
-	if (mode & EMIT_FLUSH) {
-		u32 flags = 0;
-		u32 *cs;
-
-		flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_FLUSH_L3;
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-		/* Wa_1409600907:tgl */
-		flags |= PIPE_CONTROL_DEPTH_STALL;
-		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-		flags |= PIPE_CONTROL_FLUSH_ENABLE;
-
-		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-		flags |= PIPE_CONTROL_QW_WRITE;
-
-		flags |= PIPE_CONTROL_CS_STALL;
-
-		cs = intel_ring_begin(request, 6);
-		if (IS_ERR(cs))
-			return PTR_ERR(cs);
-
-		cs = gen12_emit_pipe_control(cs,
-					     PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
-					     flags, LRC_PPHWSP_SCRATCH_ADDR);
-		intel_ring_advance(request, cs);
-	}
-
-	if (mode & EMIT_INVALIDATE) {
-		u32 flags = 0;
-		u32 *cs;
-
-		flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-
-		flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-		flags |= PIPE_CONTROL_QW_WRITE;
-
-		flags |= PIPE_CONTROL_CS_STALL;
-
-		cs = intel_ring_begin(request, 8 + 4);
-		if (IS_ERR(cs))
-			return PTR_ERR(cs);
-
-		/*
-		 * Prevent the pre-parser from skipping past the TLB
-		 * invalidate and loading a stale page for the batch
-		 * buffer / request payload.
-		 */
-		*cs++ = preparser_disable(true);
-
-		cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
-
-		/* hsdes: 1809175790 */
-		cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
-
-		*cs++ = preparser_disable(false);
-		intel_ring_advance(request, cs);
-	}
-
-	return 0;
-}
-
-static int gen12_emit_flush(struct i915_request *request, u32 mode)
-{
-	intel_engine_mask_t aux_inv = 0;
-	u32 cmd, *cs;
-
-	cmd = 4;
-	if (mode & EMIT_INVALIDATE)
-		cmd += 2;
-	if (mode & EMIT_INVALIDATE)
-		aux_inv = request->engine->mask & ~BIT(BCS0);
-	if (aux_inv)
-		cmd += 2 * hweight8(aux_inv) + 2;
-
-	cs = intel_ring_begin(request, cmd);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	if (mode & EMIT_INVALIDATE)
-		*cs++ = preparser_disable(true);
-
-	cmd = MI_FLUSH_DW + 1;
-
-	/* We always require a command barrier so that subsequent
-	 * commands, such as breadcrumb interrupts, are strictly ordered
-	 * wrt the contents of the write cache being flushed to memory
-	 * (and thus being coherent from the CPU).
-	 */
-	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
-	if (mode & EMIT_INVALIDATE) {
-		cmd |= MI_INVALIDATE_TLB;
-		if (request->engine->class == VIDEO_DECODE_CLASS)
-			cmd |= MI_INVALIDATE_BSD;
-	}
-
-	*cs++ = cmd;
-	*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
-	*cs++ = 0; /* upper addr */
-	*cs++ = 0; /* value */
-
-	if (aux_inv) { /* hsdes: 1809175790 */
-		struct intel_engine_cs *engine;
-		unsigned int tmp;
-
-		*cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
-		for_each_engine_masked(engine, request->engine->gt,
-				       aux_inv, tmp) {
-			*cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
-			*cs++ = AUX_INV;
-		}
-		*cs++ = MI_NOOP;
-	}
-
-	if (mode & EMIT_INVALIDATE)
-		*cs++ = preparser_disable(false);
-
-	intel_ring_advance(request, cs);
-
-	return 0;
-}
 
 static void assert_request_valid(struct i915_request *rq)
 {