diff mbox series

[1/2] drm/i915/sseu: Don't overallocate subslice storage

Message ID 20220311061543.153611-1-matthew.d.roper@intel.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/i915/sseu: Don't overallocate subslice storage | expand

Commit Message

Matt Roper March 11, 2022, 6:15 a.m. UTC
Xe_HP removed "slice" as a first-class unit in the hardware design.
Instead we now have a single pool of subslices (which are now referred
to as "DSS") that different hardware units have different ways of
grouping ("compute slices," "geometry slices," etc.).  For the purposes
of topology representation, we treat Xe_HP-based platforms as having a
single slice that contains all of the platform's DSS.  There's no need
to allocate storage space for (max legacy slices * max dss); let's
update some of our macros to minimize the storage requirement for sseu
topology.  We'll also document some of the constants to make it a little
bit more clear what they represent.

Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
 drivers/gpu/drm/i915/gt/intel_sseu.h         | 47 +++++++++++++++-----
 2 files changed, 36 insertions(+), 13 deletions(-)

Comments

Lucas De Marchi March 11, 2022, 7 p.m. UTC | #1
On Thu, Mar 10, 2022 at 10:15:42PM -0800, Matt Roper wrote:
>Xe_HP removed "slice" as a first-class unit in the hardware design.
>Instead we now have a single pool of subslices (which are now referred
>to as "DSS") that different hardware units have different ways of
>grouping ("compute slices," "geometry slices," etc.).  For the purposes
>of topology representation, we treat Xe_HP-based platforms as having a
>single slice that contains all of the platform's DSS.  There's no need
>to allocate storage space for (max legacy slices * max dss); let's
>update some of our macros to minimize the storage requirement for sseu
>topology.  We'll also document some of the constants to make it a little
>bit more clear what they represent.
>
>Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
>---
> drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
> drivers/gpu/drm/i915/gt/intel_sseu.h         | 47 +++++++++++++++-----
> 2 files changed, 36 insertions(+), 13 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>index 4fbf45a74ec0..f9e246004bc0 100644
>--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>@@ -645,7 +645,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
>
> #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \
> 	for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \
>-	     (iter_) < GEN_MAX_SUBSLICES; \
>+	     (iter_) < GEN_SS_MASK_SIZE; \
> 	     (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \
> 	     (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \
> 		for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_)))
>diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
>index 8a79cd8eaab4..4f59eadbb61a 100644
>--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
>+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
>@@ -15,26 +15,49 @@ struct drm_i915_private;
> struct intel_gt;
> struct drm_printer;
>
>-#define GEN_MAX_SLICES		(3) /* SKL upper bound */
>-#define GEN_MAX_SUBSLICES	(32) /* XEHPSDV upper bound */
>-#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
>-#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
>-#define GEN_MAX_EUS		(16) /* TGL upper bound */
>-#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
>+/*
>+ * Maximum number of legacy slices.  Legacy slices no longer exist starting on
>+ * Xe_HP ("gslices," "cslices," etc. on Xe_HP and beyond are a different
>+ * concept and are not expressed through fusing).
>+ */
>+#define GEN_MAX_LEGACY_SLICES		3
>+
>+/*
>+ * Maximum number of subslices that can exist within a legacy slice.  This is
>+ * only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the GEN_MAX_DSS
>+ * value below).
>+ */
>+#define GEN_MAX_LEGACY_SUBSLICES	6

instead of calling the old legacy, maybe just add the XEHP_ prefix to
the new ones?

>+
>+/* Maximum number of DSS on newer platforms (Xe_HP and beyond). */
>+#define GEN_MAX_DSS			32
>+
>+/* Maximum number of EUs that can exist within a subslice or DSS. */
>+#define GEN_MAX_EUS_PER_SS		16
>+
>+#define MAX(a, b)			((a) > (b) ? (a) : (b))

what's worse, include kernel.h in another header file or redefine MAX
everywhere? Re-defining it in headers we risk situations in which the
include order may create warnings about defining it in multiple places.


>+
>+/* The maximum number of bits needed to express each subslice/DSS independently */
>+#define GEN_SS_MASK_SIZE		MAX(GEN_MAX_DSS, \
>+					    GEN_MAX_LEGACY_SLICES * GEN_MAX_LEGACY_SUBSLICES)
>+
>+#define GEN_SSEU_STRIDE(max_entries)	DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
>+#define GEN_MAX_SUBSLICE_STRIDE		GEN_SSEU_STRIDE(GEN_SS_MASK_SIZE)
>+#define GEN_MAX_EU_STRIDE		GEN_SSEU_STRIDE(GEN_MAX_EUS_PER_SS)
>
> #define GEN_DSS_PER_GSLICE	4
> #define GEN_DSS_PER_CSLICE	8
> #define GEN_DSS_PER_MSLICE	8
>
>-#define GEN_MAX_GSLICES		(GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE)
>-#define GEN_MAX_CSLICES		(GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE)
>+#define GEN_MAX_GSLICES		(GEN_MAX_DSS / GEN_DSS_PER_GSLICE)
>+#define GEN_MAX_CSLICES		(GEN_MAX_DSS / GEN_DSS_PER_CSLICE)
>
> struct sseu_dev_info {
> 	u8 slice_mask;
>-	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>-	u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>-	u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>-	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
>+	u8 subslice_mask[GEN_SS_MASK_SIZE];
>+	u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
>+	u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
>+	u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];


Aside the minor things above, everything look correct.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>

thanks
Lucas De Marchi

> 	u16 eu_total;
> 	u8 eu_per_subslice;
> 	u8 min_eu_in_pool;
>-- 
>2.34.1
>
Matt Roper March 11, 2022, 8:38 p.m. UTC | #2
On Fri, Mar 11, 2022 at 11:00:09AM -0800, Lucas De Marchi wrote:
> On Thu, Mar 10, 2022 at 10:15:42PM -0800, Matt Roper wrote:
> > Xe_HP removed "slice" as a first-class unit in the hardware design.
> > Instead we now have a single pool of subslices (which are now referred
> > to as "DSS") that different hardware units have different ways of
> > grouping ("compute slices," "geometry slices," etc.).  For the purposes
> > of topology representation, we treat Xe_HP-based platforms as having a
> > single slice that contains all of the platform's DSS.  There's no need
> > to allocate storage space for (max legacy slices * max dss); let's
> > update some of our macros to minimize the storage requirement for sseu
> > topology.  We'll also document some of the constants to make it a little
> > bit more clear what they represent.
> > 
> > Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> > ---
> > drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
> > drivers/gpu/drm/i915/gt/intel_sseu.h         | 47 +++++++++++++++-----
> > 2 files changed, 36 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > index 4fbf45a74ec0..f9e246004bc0 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > @@ -645,7 +645,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
> > 
> > #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \
> > 	for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \
> > -	     (iter_) < GEN_MAX_SUBSLICES; \
> > +	     (iter_) < GEN_SS_MASK_SIZE; \
> > 	     (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \
> > 	     (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \
> > 		for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_)))
> > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> > index 8a79cd8eaab4..4f59eadbb61a 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> > @@ -15,26 +15,49 @@ struct drm_i915_private;
> > struct intel_gt;
> > struct drm_printer;
> > 
> > -#define GEN_MAX_SLICES		(3) /* SKL upper bound */
> > -#define GEN_MAX_SUBSLICES	(32) /* XEHPSDV upper bound */
> > -#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
> > -#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
> > -#define GEN_MAX_EUS		(16) /* TGL upper bound */
> > -#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
> > +/*
> > + * Maximum number of legacy slices.  Legacy slices no longer exist starting on
> > + * Xe_HP ("gslices," "cslices," etc. on Xe_HP and beyond are a different
> > + * concept and are not expressed through fusing).
> > + */
> > +#define GEN_MAX_LEGACY_SLICES		3
> > +
> > +/*
> > + * Maximum number of subslices that can exist within a legacy slice.  This is
> > + * only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the GEN_MAX_DSS
> > + * value below).
> > + */
> > +#define GEN_MAX_LEGACY_SUBSLICES	6
> 
> instead of calling the old legacy, maybe just add the XEHP_ prefix to
> the new ones?

Maybe a "HSW_" prefix on the old ones would be better?  People still use
the termm 'subslice' in casual discussion when talking about DSS, so I
want to somehow distinguish that what we're talking about here is a
different, older design than we have on modern platforms.


Matt

> 
> > +
> > +/* Maximum number of DSS on newer platforms (Xe_HP and beyond). */
> > +#define GEN_MAX_DSS			32
> > +
> > +/* Maximum number of EUs that can exist within a subslice or DSS. */
> > +#define GEN_MAX_EUS_PER_SS		16
> > +
> > +#define MAX(a, b)			((a) > (b) ? (a) : (b))
> 
> what's worse, include kernel.h in another header file or redefine MAX
> everywhere? Re-defining it in headers we risk situations in which the
> include order may create warnings about defining it in multiple places.
> 
> 
> > +
> > +/* The maximum number of bits needed to express each subslice/DSS independently */
> > +#define GEN_SS_MASK_SIZE		MAX(GEN_MAX_DSS, \
> > +					    GEN_MAX_LEGACY_SLICES * GEN_MAX_LEGACY_SUBSLICES)
> > +
> > +#define GEN_SSEU_STRIDE(max_entries)	DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
> > +#define GEN_MAX_SUBSLICE_STRIDE		GEN_SSEU_STRIDE(GEN_SS_MASK_SIZE)
> > +#define GEN_MAX_EU_STRIDE		GEN_SSEU_STRIDE(GEN_MAX_EUS_PER_SS)
> > 
> > #define GEN_DSS_PER_GSLICE	4
> > #define GEN_DSS_PER_CSLICE	8
> > #define GEN_DSS_PER_MSLICE	8
> > 
> > -#define GEN_MAX_GSLICES		(GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE)
> > -#define GEN_MAX_CSLICES		(GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE)
> > +#define GEN_MAX_GSLICES		(GEN_MAX_DSS / GEN_DSS_PER_GSLICE)
> > +#define GEN_MAX_CSLICES		(GEN_MAX_DSS / GEN_DSS_PER_CSLICE)
> > 
> > struct sseu_dev_info {
> > 	u8 slice_mask;
> > -	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> > -	u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> > -	u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> > -	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
> > +	u8 subslice_mask[GEN_SS_MASK_SIZE];
> > +	u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
> > +	u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
> > +	u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];
> 
> 
> Aside the minor things above, everything look correct.
> 
> Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
> 
> thanks
> Lucas De Marchi
> 
> > 	u16 eu_total;
> > 	u8 eu_per_subslice;
> > 	u8 min_eu_in_pool;
> > -- 
> > 2.34.1
> >
Matt Roper March 11, 2022, 8:43 p.m. UTC | #3
On Fri, Mar 11, 2022 at 12:38:17PM -0800, Matt Roper wrote:
> On Fri, Mar 11, 2022 at 11:00:09AM -0800, Lucas De Marchi wrote:
> > On Thu, Mar 10, 2022 at 10:15:42PM -0800, Matt Roper wrote:
> > > Xe_HP removed "slice" as a first-class unit in the hardware design.
> > > Instead we now have a single pool of subslices (which are now referred
> > > to as "DSS") that different hardware units have different ways of
> > > grouping ("compute slices," "geometry slices," etc.).  For the purposes
> > > of topology representation, we treat Xe_HP-based platforms as having a
> > > single slice that contains all of the platform's DSS.  There's no need
> > > to allocate storage space for (max legacy slices * max dss); let's
> > > update some of our macros to minimize the storage requirement for sseu
> > > topology.  We'll also document some of the constants to make it a little
> > > bit more clear what they represent.
> > > 
> > > Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> > > ---
> > > drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
> > > drivers/gpu/drm/i915/gt/intel_sseu.h         | 47 +++++++++++++++-----
> > > 2 files changed, 36 insertions(+), 13 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > index 4fbf45a74ec0..f9e246004bc0 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > @@ -645,7 +645,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
> > > 
> > > #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \
> > > 	for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \
> > > -	     (iter_) < GEN_MAX_SUBSLICES; \
> > > +	     (iter_) < GEN_SS_MASK_SIZE; \
> > > 	     (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \
> > > 	     (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \
> > > 		for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_)))
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> > > index 8a79cd8eaab4..4f59eadbb61a 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> > > @@ -15,26 +15,49 @@ struct drm_i915_private;
> > > struct intel_gt;
> > > struct drm_printer;
> > > 
> > > -#define GEN_MAX_SLICES		(3) /* SKL upper bound */
> > > -#define GEN_MAX_SUBSLICES	(32) /* XEHPSDV upper bound */
> > > -#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
> > > -#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
> > > -#define GEN_MAX_EUS		(16) /* TGL upper bound */
> > > -#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
> > > +/*
> > > + * Maximum number of legacy slices.  Legacy slices no longer exist starting on
> > > + * Xe_HP ("gslices," "cslices," etc. on Xe_HP and beyond are a different
> > > + * concept and are not expressed through fusing).
> > > + */
> > > +#define GEN_MAX_LEGACY_SLICES		3
> > > +
> > > +/*
> > > + * Maximum number of subslices that can exist within a legacy slice.  This is
> > > + * only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the GEN_MAX_DSS
> > > + * value below).
> > > + */
> > > +#define GEN_MAX_LEGACY_SUBSLICES	6
> > 
> > instead of calling the old legacy, maybe just add the XEHP_ prefix to
> > the new ones?
> 
> Maybe a "HSW_" prefix on the old ones would be better?  People still use
> the termm 'subslice' in casual discussion when talking about DSS, so I
> want to somehow distinguish that what we're talking about here is a
> different, older design than we have on modern platforms.

Hmm, or maybe just "GEN_MAX_SUBSLICES_PER_LEGACY_SLICE" to tie it into
the slice definition above?


Matt

> 
> 
> Matt
> 
> > 
> > > +
> > > +/* Maximum number of DSS on newer platforms (Xe_HP and beyond). */
> > > +#define GEN_MAX_DSS			32
> > > +
> > > +/* Maximum number of EUs that can exist within a subslice or DSS. */
> > > +#define GEN_MAX_EUS_PER_SS		16
> > > +
> > > +#define MAX(a, b)			((a) > (b) ? (a) : (b))
> > 
> > what's worse, include kernel.h in another header file or redefine MAX
> > everywhere? Re-defining it in headers we risk situations in which the
> > include order may create warnings about defining it in multiple places.
> > 
> > 
> > > +
> > > +/* The maximum number of bits needed to express each subslice/DSS independently */
> > > +#define GEN_SS_MASK_SIZE		MAX(GEN_MAX_DSS, \
> > > +					    GEN_MAX_LEGACY_SLICES * GEN_MAX_LEGACY_SUBSLICES)
> > > +
> > > +#define GEN_SSEU_STRIDE(max_entries)	DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
> > > +#define GEN_MAX_SUBSLICE_STRIDE		GEN_SSEU_STRIDE(GEN_SS_MASK_SIZE)
> > > +#define GEN_MAX_EU_STRIDE		GEN_SSEU_STRIDE(GEN_MAX_EUS_PER_SS)
> > > 
> > > #define GEN_DSS_PER_GSLICE	4
> > > #define GEN_DSS_PER_CSLICE	8
> > > #define GEN_DSS_PER_MSLICE	8
> > > 
> > > -#define GEN_MAX_GSLICES		(GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE)
> > > -#define GEN_MAX_CSLICES		(GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE)
> > > +#define GEN_MAX_GSLICES		(GEN_MAX_DSS / GEN_DSS_PER_GSLICE)
> > > +#define GEN_MAX_CSLICES		(GEN_MAX_DSS / GEN_DSS_PER_CSLICE)
> > > 
> > > struct sseu_dev_info {
> > > 	u8 slice_mask;
> > > -	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> > > -	u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> > > -	u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> > > -	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
> > > +	u8 subslice_mask[GEN_SS_MASK_SIZE];
> > > +	u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
> > > +	u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
> > > +	u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];
> > 
> > 
> > Aside the minor things above, everything look correct.
> > 
> > Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
> > 
> > thanks
> > Lucas De Marchi
> > 
> > > 	u16 eu_total;
> > > 	u8 eu_per_subslice;
> > > 	u8 min_eu_in_pool;
> > > -- 
> > > 2.34.1
> > > 
> 
> -- 
> Matt Roper
> Graphics Software Engineer
> VTT-OSGC Platform Enablement
> Intel Corporation
> (916) 356-2795
Lucas De Marchi March 11, 2022, 8:52 p.m. UTC | #4
On Fri, Mar 11, 2022 at 12:43:40PM -0800, Matt Roper wrote:
>On Fri, Mar 11, 2022 at 12:38:17PM -0800, Matt Roper wrote:
>> On Fri, Mar 11, 2022 at 11:00:09AM -0800, Lucas De Marchi wrote:
>> > On Thu, Mar 10, 2022 at 10:15:42PM -0800, Matt Roper wrote:
>> > > Xe_HP removed "slice" as a first-class unit in the hardware design.
>> > > Instead we now have a single pool of subslices (which are now referred
>> > > to as "DSS") that different hardware units have different ways of
>> > > grouping ("compute slices," "geometry slices," etc.).  For the purposes
>> > > of topology representation, we treat Xe_HP-based platforms as having a
>> > > single slice that contains all of the platform's DSS.  There's no need
>> > > to allocate storage space for (max legacy slices * max dss); let's
>> > > update some of our macros to minimize the storage requirement for sseu
>> > > topology.  We'll also document some of the constants to make it a little
>> > > bit more clear what they represent.
>> > >
>> > > Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
>> > > ---
>> > > drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
>> > > drivers/gpu/drm/i915/gt/intel_sseu.h         | 47 +++++++++++++++-----
>> > > 2 files changed, 36 insertions(+), 13 deletions(-)
>> > >
>> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> > > index 4fbf45a74ec0..f9e246004bc0 100644
>> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>> > > @@ -645,7 +645,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
>> > >
>> > > #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \
>> > > 	for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \
>> > > -	     (iter_) < GEN_MAX_SUBSLICES; \
>> > > +	     (iter_) < GEN_SS_MASK_SIZE; \
>> > > 	     (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \
>> > > 	     (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \
>> > > 		for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_)))
>> > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
>> > > index 8a79cd8eaab4..4f59eadbb61a 100644
>> > > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
>> > > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
>> > > @@ -15,26 +15,49 @@ struct drm_i915_private;
>> > > struct intel_gt;
>> > > struct drm_printer;
>> > >
>> > > -#define GEN_MAX_SLICES		(3) /* SKL upper bound */
>> > > -#define GEN_MAX_SUBSLICES	(32) /* XEHPSDV upper bound */
>> > > -#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
>> > > -#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
>> > > -#define GEN_MAX_EUS		(16) /* TGL upper bound */
>> > > -#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
>> > > +/*
>> > > + * Maximum number of legacy slices.  Legacy slices no longer exist starting on
>> > > + * Xe_HP ("gslices," "cslices," etc. on Xe_HP and beyond are a different
>> > > + * concept and are not expressed through fusing).
>> > > + */
>> > > +#define GEN_MAX_LEGACY_SLICES		3
>> > > +
>> > > +/*
>> > > + * Maximum number of subslices that can exist within a legacy slice.  This is
>> > > + * only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the GEN_MAX_DSS
>> > > + * value below).
>> > > + */
>> > > +#define GEN_MAX_LEGACY_SUBSLICES	6
>> >
>> > instead of calling the old legacy, maybe just add the XEHP_ prefix to
>> > the new ones?
>>
>> Maybe a "HSW_" prefix on the old ones would be better?  People still use
>> the termm 'subslice' in casual discussion when talking about DSS, so I
>> want to somehow distinguish that what we're talking about here is a
>> different, older design than we have on modern platforms.
>
>Hmm, or maybe just "GEN_MAX_SUBSLICES_PER_LEGACY_SLICE" to tie it into
>the slice definition above?

I'm not too fond of calling it "legacy" when everywhere else in the driver
we just use the platform as prefix/suffix. Some may see legacy as
< ver 12, others as 12.50, etc.

Well... but I will leave that up to you if you are convinced one is
better than the other.

thanks
Lucas De Marchi
Ville Syrjälä March 11, 2022, 9:01 p.m. UTC | #5
On Fri, Mar 11, 2022 at 12:52:33PM -0800, Lucas De Marchi wrote:
> On Fri, Mar 11, 2022 at 12:43:40PM -0800, Matt Roper wrote:
> >On Fri, Mar 11, 2022 at 12:38:17PM -0800, Matt Roper wrote:
> >> On Fri, Mar 11, 2022 at 11:00:09AM -0800, Lucas De Marchi wrote:
> >> > On Thu, Mar 10, 2022 at 10:15:42PM -0800, Matt Roper wrote:
> >> > > Xe_HP removed "slice" as a first-class unit in the hardware design.
> >> > > Instead we now have a single pool of subslices (which are now referred
> >> > > to as "DSS") that different hardware units have different ways of
> >> > > grouping ("compute slices," "geometry slices," etc.).  For the purposes
> >> > > of topology representation, we treat Xe_HP-based platforms as having a
> >> > > single slice that contains all of the platform's DSS.  There's no need
> >> > > to allocate storage space for (max legacy slices * max dss); let's
> >> > > update some of our macros to minimize the storage requirement for sseu
> >> > > topology.  We'll also document some of the constants to make it a little
> >> > > bit more clear what they represent.
> >> > >
> >> > > Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> >> > > ---
> >> > > drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
> >> > > drivers/gpu/drm/i915/gt/intel_sseu.h         | 47 +++++++++++++++-----
> >> > > 2 files changed, 36 insertions(+), 13 deletions(-)
> >> > >
> >> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> >> > > index 4fbf45a74ec0..f9e246004bc0 100644
> >> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> >> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> >> > > @@ -645,7 +645,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
> >> > >
> >> > > #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \
> >> > > 	for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \
> >> > > -	     (iter_) < GEN_MAX_SUBSLICES; \
> >> > > +	     (iter_) < GEN_SS_MASK_SIZE; \
> >> > > 	     (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \
> >> > > 	     (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \
> >> > > 		for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_)))
> >> > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> >> > > index 8a79cd8eaab4..4f59eadbb61a 100644
> >> > > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> >> > > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> >> > > @@ -15,26 +15,49 @@ struct drm_i915_private;
> >> > > struct intel_gt;
> >> > > struct drm_printer;
> >> > >
> >> > > -#define GEN_MAX_SLICES		(3) /* SKL upper bound */
> >> > > -#define GEN_MAX_SUBSLICES	(32) /* XEHPSDV upper bound */
> >> > > -#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
> >> > > -#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
> >> > > -#define GEN_MAX_EUS		(16) /* TGL upper bound */
> >> > > -#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
> >> > > +/*
> >> > > + * Maximum number of legacy slices.  Legacy slices no longer exist starting on
> >> > > + * Xe_HP ("gslices," "cslices," etc. on Xe_HP and beyond are a different
> >> > > + * concept and are not expressed through fusing).
> >> > > + */
> >> > > +#define GEN_MAX_LEGACY_SLICES		3
> >> > > +
> >> > > +/*
> >> > > + * Maximum number of subslices that can exist within a legacy slice.  This is
> >> > > + * only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the GEN_MAX_DSS
> >> > > + * value below).
> >> > > + */
> >> > > +#define GEN_MAX_LEGACY_SUBSLICES	6
> >> >
> >> > instead of calling the old legacy, maybe just add the XEHP_ prefix to
> >> > the new ones?
> >>
> >> Maybe a "HSW_" prefix on the old ones would be better?  People still use
> >> the termm 'subslice' in casual discussion when talking about DSS, so I
> >> want to somehow distinguish that what we're talking about here is a
> >> different, older design than we have on modern platforms.
> >
> >Hmm, or maybe just "GEN_MAX_SUBSLICES_PER_LEGACY_SLICE" to tie it into
> >the slice definition above?
> 
> I'm not too fond of calling it "legacy" when everywhere else in the driver
> we just use the platform as prefix/suffix. Some may see legacy as
> < ver 12, others as 12.50, etc.

Everything will become legacy at some point. This kind of naming
scheme falls apart when the next shiny new thing comes around
and we end up with multiple different leagacies. Are we going
to have ANCIENT_LEGACY, RECENT_LEGACY, NOT_YET_LEGACY etc?
Matt Roper March 11, 2022, 9:11 p.m. UTC | #6
On Fri, Mar 11, 2022 at 11:01:01PM +0200, Ville Syrjälä wrote:
> On Fri, Mar 11, 2022 at 12:52:33PM -0800, Lucas De Marchi wrote:
> > On Fri, Mar 11, 2022 at 12:43:40PM -0800, Matt Roper wrote:
> > >On Fri, Mar 11, 2022 at 12:38:17PM -0800, Matt Roper wrote:
> > >> On Fri, Mar 11, 2022 at 11:00:09AM -0800, Lucas De Marchi wrote:
> > >> > On Thu, Mar 10, 2022 at 10:15:42PM -0800, Matt Roper wrote:
> > >> > > Xe_HP removed "slice" as a first-class unit in the hardware design.
> > >> > > Instead we now have a single pool of subslices (which are now referred
> > >> > > to as "DSS") that different hardware units have different ways of
> > >> > > grouping ("compute slices," "geometry slices," etc.).  For the purposes
> > >> > > of topology representation, we treat Xe_HP-based platforms as having a
> > >> > > single slice that contains all of the platform's DSS.  There's no need
> > >> > > to allocate storage space for (max legacy slices * max dss); let's
> > >> > > update some of our macros to minimize the storage requirement for sseu
> > >> > > topology.  We'll also document some of the constants to make it a little
> > >> > > bit more clear what they represent.
> > >> > >
> > >> > > Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> > >> > > ---
> > >> > > drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
> > >> > > drivers/gpu/drm/i915/gt/intel_sseu.h         | 47 +++++++++++++++-----
> > >> > > 2 files changed, 36 insertions(+), 13 deletions(-)
> > >> > >
> > >> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > >> > > index 4fbf45a74ec0..f9e246004bc0 100644
> > >> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > >> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > >> > > @@ -645,7 +645,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
> > >> > >
> > >> > > #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \
> > >> > > 	for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \
> > >> > > -	     (iter_) < GEN_MAX_SUBSLICES; \
> > >> > > +	     (iter_) < GEN_SS_MASK_SIZE; \
> > >> > > 	     (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \
> > >> > > 	     (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \
> > >> > > 		for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_)))
> > >> > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> > >> > > index 8a79cd8eaab4..4f59eadbb61a 100644
> > >> > > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> > >> > > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> > >> > > @@ -15,26 +15,49 @@ struct drm_i915_private;
> > >> > > struct intel_gt;
> > >> > > struct drm_printer;
> > >> > >
> > >> > > -#define GEN_MAX_SLICES		(3) /* SKL upper bound */
> > >> > > -#define GEN_MAX_SUBSLICES	(32) /* XEHPSDV upper bound */
> > >> > > -#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
> > >> > > -#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
> > >> > > -#define GEN_MAX_EUS		(16) /* TGL upper bound */
> > >> > > -#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
> > >> > > +/*
> > >> > > + * Maximum number of legacy slices.  Legacy slices no longer exist starting on
> > >> > > + * Xe_HP ("gslices," "cslices," etc. on Xe_HP and beyond are a different
> > >> > > + * concept and are not expressed through fusing).
> > >> > > + */
> > >> > > +#define GEN_MAX_LEGACY_SLICES		3
> > >> > > +
> > >> > > +/*
> > >> > > + * Maximum number of subslices that can exist within a legacy slice.  This is
> > >> > > + * only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the GEN_MAX_DSS
> > >> > > + * value below).
> > >> > > + */
> > >> > > +#define GEN_MAX_LEGACY_SUBSLICES	6
> > >> >
> > >> > instead of calling the old legacy, maybe just add the XEHP_ prefix to
> > >> > the new ones?
> > >>
> > >> Maybe a "HSW_" prefix on the old ones would be better?  People still use
> > >> the termm 'subslice' in casual discussion when talking about DSS, so I
> > >> want to somehow distinguish that what we're talking about here is a
> > >> different, older design than we have on modern platforms.
> > >
> > >Hmm, or maybe just "GEN_MAX_SUBSLICES_PER_LEGACY_SLICE" to tie it into
> > >the slice definition above?
> > 
> > I'm not too fond of calling it "legacy" when everywhere else in the driver
> > we just use the platform as prefix/suffix. Some may see legacy as
> > < ver 12, others as 12.50, etc.
> 
> Everything will become legacy at some point. This kind of naming
> scheme falls apart when the next shiny new thing comes around
> and we end up with multiple different leagacies. Are we going
> to have ANCIENT_LEGACY, RECENT_LEGACY, NOT_YET_LEGACY etc?

Well that's kind of the point --- there is no shiny new thing and never
will be.  "slice" is gone for good and the places we use the term are
_not_ the same thing as similar-sounding terms like gslice, cslice,
mslice, etc.

But I'll go ahead and switch it to "HSW_" and hope people figure out
that it stops being a meaningful concept Xe_HP.


Matt

> 
> -- 
> Ville Syrjälä
> Intel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 4fbf45a74ec0..f9e246004bc0 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -645,7 +645,7 @@  intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
 
 #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \
 	for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \
-	     (iter_) < GEN_MAX_SUBSLICES; \
+	     (iter_) < GEN_SS_MASK_SIZE; \
 	     (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \
 	     (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \
 		for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_)))
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 8a79cd8eaab4..4f59eadbb61a 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -15,26 +15,49 @@  struct drm_i915_private;
 struct intel_gt;
 struct drm_printer;
 
-#define GEN_MAX_SLICES		(3) /* SKL upper bound */
-#define GEN_MAX_SUBSLICES	(32) /* XEHPSDV upper bound */
-#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
-#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
-#define GEN_MAX_EUS		(16) /* TGL upper bound */
-#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
+/*
+ * Maximum number of legacy slices.  Legacy slices no longer exist starting on
+ * Xe_HP ("gslices," "cslices," etc. on Xe_HP and beyond are a different
+ * concept and are not expressed through fusing).
+ */
+#define GEN_MAX_LEGACY_SLICES		3
+
+/*
+ * Maximum number of subslices that can exist within a legacy slice.  This is
+ * only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the GEN_MAX_DSS
+ * value below).
+ */
+#define GEN_MAX_LEGACY_SUBSLICES	6
+
+/* Maximum number of DSS on newer platforms (Xe_HP and beyond). */
+#define GEN_MAX_DSS			32
+
+/* Maximum number of EUs that can exist within a subslice or DSS. */
+#define GEN_MAX_EUS_PER_SS		16
+
+#define MAX(a, b)			((a) > (b) ? (a) : (b))
+
+/* The maximum number of bits needed to express each subslice/DSS independently */
+#define GEN_SS_MASK_SIZE		MAX(GEN_MAX_DSS, \
+					    GEN_MAX_LEGACY_SLICES * GEN_MAX_LEGACY_SUBSLICES)
+
+#define GEN_SSEU_STRIDE(max_entries)	DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
+#define GEN_MAX_SUBSLICE_STRIDE		GEN_SSEU_STRIDE(GEN_SS_MASK_SIZE)
+#define GEN_MAX_EU_STRIDE		GEN_SSEU_STRIDE(GEN_MAX_EUS_PER_SS)
 
 #define GEN_DSS_PER_GSLICE	4
 #define GEN_DSS_PER_CSLICE	8
 #define GEN_DSS_PER_MSLICE	8
 
-#define GEN_MAX_GSLICES		(GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE)
-#define GEN_MAX_CSLICES		(GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE)
+#define GEN_MAX_GSLICES		(GEN_MAX_DSS / GEN_DSS_PER_GSLICE)
+#define GEN_MAX_CSLICES		(GEN_MAX_DSS / GEN_DSS_PER_CSLICE)
 
 struct sseu_dev_info {
 	u8 slice_mask;
-	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
-	u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
-	u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
-	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
+	u8 subslice_mask[GEN_SS_MASK_SIZE];
+	u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
+	u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
+	u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];
 	u16 eu_total;
 	u8 eu_per_subslice;
 	u8 min_eu_in_pool;