Message ID | 20220301231549.1817978-12-matthew.d.roper@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | i915: Prepare for Xe_HP compute engines | expand |
On Tue, Mar 01, 2022 at 03:15:47PM -0800, Matt Roper wrote: > From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > > HW resources are divided across the active CCS engines at the compute > slice level, with each CCS having priority on one of the cslices. > If a compute slice has no enabled DSS, its paired compute engine is not > usable in full parallel execution because the other ones already fully > saturate the HW, so consider it fused off. > > v2 (José): > - moved it to its own function > - fixed definition of ccs_mask > > v3 (Matt): > - Replace fls() condition with a simple IP version test > > Cc: Stuart Summers <stuart.summers@intel.com> > Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> > Cc: Ashutosh Dixit <ashutosh.dixit@intel.com> > Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Signed-off-by: Stuart Summers <stuart.summers@intel.com> > Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Matt Roper <matthew.d.roper@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 25 +++++++++++++++++++++++ > drivers/gpu/drm/i915/gt/intel_sseu.c | 17 ++++++++++++--- > drivers/gpu/drm/i915/gt/intel_sseu.h | 4 +++- > 3 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > index 92f4cf9833ee..809747c20bc4 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > @@ -592,6 +592,29 @@ bool gen11_vdbox_has_sfc(struct intel_gt *gt, > return false; > } > > +static void engine_mask_apply_compute_fuses(struct intel_gt *gt) > +{ > + struct drm_i915_private *i915 = gt->i915; > + struct intel_gt_info *info = >->info; > + int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS; > + const unsigned long ccs_mask = > + intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu), > + ss_per_ccs); > + unsigned int i; > + > + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) > + return; > + > + /* > + * If all DSS in a quadrant are fused off, the corresponding CCS > + * engine is not available for use. > + */ > + for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) { > + info->engine_mask &= ~BIT(_CCS(i)); > + drm_dbg(&i915->drm, "ccs%u fused off\n", i); > + } > +} > + > /* > * Determine which engines are fused off in our particular hardware. > * Note that we have a catch-22 situation where we need to be able to access > @@ -673,6 +696,8 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) > vebox_mask, VEBOX_MASK(gt)); > GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt)); > > + engine_mask_apply_compute_fuses(gt); > + > return info->engine_mask; > } > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c > index 29118c652811..4ac0bbaf0c31 100644 > --- a/drivers/gpu/drm/i915/gt/intel_sseu.c > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c > @@ -32,7 +32,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) > return total; > } > > -u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) > +static u32 > +_intel_sseu_get_subslices(const struct sseu_dev_info *sseu, > + const u8 *subslice_mask, u8 slice) > { > int i, offset = slice * sseu->ss_stride; > u32 mask = 0; > @@ -40,12 +42,21 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) > GEM_BUG_ON(slice >= sseu->max_slices); > > for (i = 0; i < sseu->ss_stride; i++) > - mask |= (u32)sseu->subslice_mask[offset + i] << > - i * BITS_PER_BYTE; > + mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE; > > return mask; > } > > +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) > +{ > + return _intel_sseu_get_subslices(sseu, sseu->subslice_mask, slice); > +} > + > +u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu) > +{ > + return _intel_sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0); > +} > + > void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, > u8 *subslice_mask, u32 ss_mask) > { > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h > index 60882a74741e..8a79cd8eaab4 100644 > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h > @@ -103,7 +103,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu); > unsigned int > intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); > > -u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); > +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); > + > +u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu); > > void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, > u8 *subslice_mask, u32 ss_mask); > -- > 2.34.1 >
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 92f4cf9833ee..809747c20bc4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -592,6 +592,29 @@ bool gen11_vdbox_has_sfc(struct intel_gt *gt, return false; } +static void engine_mask_apply_compute_fuses(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct intel_gt_info *info = >->info; + int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS; + const unsigned long ccs_mask = + intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu), + ss_per_ccs); + unsigned int i; + + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) + return; + + /* + * If all DSS in a quadrant are fused off, the corresponding CCS + * engine is not available for use. + */ + for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) { + info->engine_mask &= ~BIT(_CCS(i)); + drm_dbg(&i915->drm, "ccs%u fused off\n", i); + } +} + /* * Determine which engines are fused off in our particular hardware. * Note that we have a catch-22 situation where we need to be able to access @@ -673,6 +696,8 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) vebox_mask, VEBOX_MASK(gt)); GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt)); + engine_mask_apply_compute_fuses(gt); + return info->engine_mask; } diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 29118c652811..4ac0bbaf0c31 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -32,7 +32,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu) return total; } -u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +static u32 +_intel_sseu_get_subslices(const struct sseu_dev_info *sseu, + const u8 *subslice_mask, u8 slice) { int i, offset = slice * sseu->ss_stride; u32 mask = 0; @@ -40,12 +42,21 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) GEM_BUG_ON(slice >= sseu->max_slices); for (i = 0; i < sseu->ss_stride; i++) - mask |= (u32)sseu->subslice_mask[offset + i] << - i * BITS_PER_BYTE; + mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE; return mask; } +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) +{ + return _intel_sseu_get_subslices(sseu, sseu->subslice_mask, slice); +} + +u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu) +{ + return _intel_sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0); +} + void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u8 *subslice_mask, u32 ss_mask) { diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 60882a74741e..8a79cd8eaab4 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -103,7 +103,9 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu); unsigned int intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); -u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); + +u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu); void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, u8 *subslice_mask, u32 ss_mask);