diff mbox series

[RFC,v2,02/11] drm/i915/gt: Allow the creation of multi-mode CCS masks

Message ID 20240817210026.310645-3-andi.shyti@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series CCS static load balance | expand

Commit Message

Andi Shyti Aug. 17, 2024, 9 p.m. UTC
Until now, we have only set CCS mode balancing to 1, which means
that only one compute engine is exposed to the user. The stream
of compute commands submitted to that engine is then shared among
all the dedicated execution units.

This is done by calling the 'intel_gt_apply_ccs_mode(); function.

With this change, the aforementioned function takes an additional
parameter called 'mode' that specifies the desired mode to be set
for the CCS engines balancing. The mode parameter can have the
following values:

 - mode = 0: CCS load balancing mode 1 (1 CCS engine exposed)
 - mode = 1: CCS load balancing mode 2 (2 CCS engines exposed)
 - mode = 3: CCS load balancing mode 4 (4 CCS engines exposed)

This allows us to generate the appropriate register value to be
written to CCS_MODE, configuring how the exposed engine streams
will be submitted to the execution units.

No functional changes are intended yet, as no mode higher than
'0' is currently being set.

Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 78 ++++++++++++++++-----
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h |  4 +-
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  2 +-
 3 files changed, 65 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
index 19e0bc359861..6afd44ffc358 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -4,37 +4,83 @@ 
  */
 
 #include "i915_drv.h"
-#include "intel_gt.h"
 #include "intel_gt_ccs_mode.h"
 #include "intel_gt_regs.h"
 
-void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+void intel_gt_apply_ccs_mode(struct intel_gt *gt, u32 mode)
 {
+	unsigned long cslices_mask = gt->ccs.cslices;
+	u32 mode_val = 0;
+	u32 m = mode;
+	int ccs_id;
 	int cslice;
-	u32 mode = 0;
-	int first_ccs = __ffs(CCS_MASK(gt));
 
 	lockdep_assert_held(&gt->ccs.mutex);
 
 	if (!IS_DG2(gt->i915))
 		return;
 
-	/* Build the value for the fixed CCS load balancing */
-	for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
-		if (gt->ccs.cslices & BIT(cslice))
-			/*
-			 * If available, assign the cslice
-			 * to the first available engine...
-			 */
-			mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+	/*
+	 * The mode has two bit dedicated for each engine
+	 * that will be used for the CCS balancing algorithm:
+	 *
+	 *    BIT | CCS slice
+	 *   ------------------
+	 *     0  | CCS slice
+	 *     1  |     0
+	 *   ------------------
+	 *     2  | CCS slice
+	 *     3  |     1
+	 *   ------------------
+	 *     4  | CCS slice
+	 *     5  |     2
+	 *   ------------------
+	 *     6  | CCS slice
+	 *     7  |     3
+	 *   ------------------
+	 *
+	 * When a CCS slice is not available, then we will write 0x7,
+	 * oterwise we will write the user engine id which load will
+	 * be forwarded to that slice.
+	 *
+	 * The possible configurations are:
+	 *
+	 * 1 engine (ccs0):
+	 *   slice 0, 1, 2, 3: ccs0
+	 *
+	 * 2 engines (ccs0, ccs1):
+	 *   slice 0, 2: ccs0
+	 *   slice 1, 3: ccs1
+	 *
+	 * 4 engines (ccs0, ccs1, ccs2, ccs3):
+	 *   slice 0: ccs0
+	 *   slice 1: ccs1
+	 *   slice 2: ccs2
+	 *   slice 3: ccs3
+	 */
+	ccs_id = __ffs(cslices_mask);
 
-		else
+	for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+		if (!(cslices_mask & BIT(cslice))) {
 			/*
-			 * ... otherwise, mark the cslice as
-			 * unavailable if no CCS dispatches here
+			 * If not available, mark the slice as unavailable
+			 * and no task will be dispatched here.
 			 */
-			mode |= XEHP_CCS_MODE_CSLICE(cslice,
+			mode_val |= XEHP_CCS_MODE_CSLICE(cslice,
 						     XEHP_CCS_MODE_CSLICE_MASK);
+			continue;
+		}
+
+		mode_val |= XEHP_CCS_MODE_CSLICE(cslice, ccs_id);
+
+		if (!m) {
+			m = mode;
+			ccs_id = __ffs(cslices_mask);
+			continue;
+		}
+
+		m--;
+		ccs_id = find_next_bit(&cslices_mask, I915_MAX_CCS, ccs_id + 1);
 	}
 
 	gt->ccs.mode_reg_val = mode;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
index e646ab595ded..0e1c43ea1d54 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
@@ -6,9 +6,9 @@ 
 #ifndef __INTEL_GT_CCS_MODE_H__
 #define __INTEL_GT_CCS_MODE_H__
 
-struct intel_gt;
+#include "intel_gt.h"
 
-void intel_gt_apply_ccs_mode(struct intel_gt *gt);
+void intel_gt_apply_ccs_mode(struct intel_gt *gt, u32 mode);
 void intel_gt_ccs_mode_init(struct intel_gt *gt);
 
 #endif /* __INTEL_GT_CCS_MODE_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index daa11e11d68f..203f2bb00e30 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2744,7 +2744,7 @@  static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_li
 	 * assign all slices to a single CCS. We will call it CCS mode 1
 	 */
 	mutex_lock(&gt->ccs.mutex);
-	intel_gt_apply_ccs_mode(gt);
+	intel_gt_apply_ccs_mode(gt, 0);
 	wa_masked_en(wal, XEHP_CCS_MODE, gt->ccs.mode_reg_val);
 	mutex_unlock(&gt->ccs.mutex);
 }