Message ID | 20190913075137.18476-2-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/2] drm/i915/tgl: Introduce gen12 forcewake ranges | expand |
On Fri, Sep 13, 2019 at 12:51 AM Chris Wilson <chris@chris-wilson.co.uk> wrote: > > From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > > Gen12 has dual-subslices (DSS), which compared to gen11 subslices have > some duplicated resources/paths. Although DSS behave similarly to 2 > subslices, instead of splitting this and presenting userspace with bits > not directly representative of hardware resources, present userspace > with a subslice_mask made up of DSS bits instead. > > v2: GEM_BUG_ON on mask size (Lionel) > > Bspec: 29547 > Bspec: 12247 > Cc: Kelvin Gardiner <kelvin.gardiner@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > CC: Radhakrishna Sripada <radhakrishna.sripada@intel.com> > Cc: Michel Thierry <michel.thierry@intel.com> #v1 > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Cc: José Roberto de Souza <jose.souza@intel.com> > Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Signed-off-by: James Ausmus <james.ausmus@intel.com> > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> > Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com> > Signed-off-by: Stuart Summers <stuart.summers@intel.com> > Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- After this I get the correct values for TGL: - Available Subslice Total: 2 - Available Slice0 subslices: 2 - Available EU Total: 16 - Available EU Per Subslice: 8 + Available Subslice Total: 6 + Available Slice0 subslices: 6 + Available EU Total: 96 + Available EU Per Subslice: 16 Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com> Lucas De Marchi > drivers/gpu/drm/i915/gt/intel_sseu.h | 9 +-- > drivers/gpu/drm/i915/i915_debugfs.c | 3 +- > drivers/gpu/drm/i915/i915_reg.h | 2 + > drivers/gpu/drm/i915/intel_device_info.c | 83 ++++++++++++++++++------ > include/uapi/drm/i915_drm.h | 6 +- > 5 files changed, 72 insertions(+), 31 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h > index 4070f6ff1db6..d1d225204f09 100644 > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h > @@ -18,12 +18,13 @@ struct drm_i915_private; > #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ > #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) > #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) > -#define GEN_MAX_EUS (10) /* HSW upper bound */ > +#define GEN_MAX_EUS (16) /* TGL upper bound */ > #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) > > struct sseu_dev_info { > u8 slice_mask; > u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; > + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; > u16 eu_total; > u8 eu_per_subslice; > u8 min_eu_in_pool; > @@ -40,12 +41,6 @@ struct sseu_dev_info { > > u8 ss_stride; > u8 eu_stride; > - > - /* We don't have more than 8 eus per subslice at the moment and as we > - * store eus enabled using bits, no need to multiply by eus per > - * subslice. > - */ > - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; > }; > > /* > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 43db50095257..b5b449a88cf1 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -3823,7 +3823,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, > for (ss = 0; ss < info->sseu.max_subslices; ss++) { > unsigned int eu_cnt; > > - if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) > + if (info->sseu.has_subslice_pg && > + !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) > /* skip disabled subslice */ > continue; > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index bf37ecebc82f..47847135a11f 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) > > #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) > > +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) > + > #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) > #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) > #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) > diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c > index 50b05a5de53b..b91a960b037f 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.c > +++ b/drivers/gpu/drm/i915/intel_device_info.c > @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) > return total; > } > > +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, > + u8 s_en, u32 ss_en, u16 eu_en) > +{ > + int s, ss; > + > + /* ss_en represents entire subslice mask across all slices */ > + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > > + sizeof(ss_en) * BITS_PER_BYTE); > + > + for (s = 0; s < sseu->max_slices; s++) { > + if ((s_en & BIT(s)) == 0) > + continue; > + > + sseu->slice_mask |= BIT(s); > + > + intel_sseu_set_subslices(sseu, s, ss_en); > + > + for (ss = 0; ss < sseu->max_subslices; ss++) > + if (intel_sseu_has_subslice(sseu, s, ss)) > + sseu_set_eus(sseu, s, ss, eu_en); > + } > + sseu->eu_per_subslice = hweight16(eu_en); > + sseu->eu_total = compute_eu_total(sseu); > +} > + > +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv) > +{ > + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; > + u8 s_en; > + u32 dss_en; > + u16 eu_en = 0; > + u8 eu_en_fuse; > + int eu; > + > + /* > + * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. > + * Instead of splitting these, provide userspace with an array > + * of DSS to more closely represent the hardware resource. > + */ > + intel_sseu_set_info(sseu, 1, 6, 16); > + > + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; > + > + dss_en = I915_READ(GEN12_GT_DSS_ENABLE); > + > + /* one bit per pair of EUs */ > + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); > + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) > + if (eu_en_fuse & BIT(eu)) > + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); > + > + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); > + > + /* TGL only supports slice-level power gating */ > + sseu->has_slice_pg = 1; > +} > + > static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) > { > struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; > u8 s_en; > - u32 ss_en, ss_en_mask; > + u32 ss_en; > u8 eu_en; > - int s; > > if (IS_ELKHARTLAKE(dev_priv)) > intel_sseu_set_info(sseu, 1, 4, 8); > @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) > > s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; > ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); > - ss_en_mask = BIT(sseu->max_subslices) - 1; > eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); > > - for (s = 0; s < sseu->max_slices; s++) { > - if (s_en & BIT(s)) { > - int ss_idx = sseu->max_subslices * s; > - int ss; > - > - sseu->slice_mask |= BIT(s); > - > - intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) & > - ss_en_mask); > - > - for (ss = 0; ss < sseu->max_subslices; ss++) > - if (intel_sseu_has_subslice(sseu, s, ss)) > - sseu_set_eus(sseu, s, ss, eu_en); > - } > - } > - sseu->eu_per_subslice = hweight8(eu_en); > - sseu->eu_total = compute_eu_total(sseu); > + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); > > /* ICL has no power gating restrictions. */ > sseu->has_slice_pg = 1; > @@ -959,8 +998,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) > gen9_sseu_info_init(dev_priv); > else if (IS_GEN(dev_priv, 10)) > gen10_sseu_info_init(dev_priv); > - else if (INTEL_GEN(dev_priv) >= 11) > + else if (IS_GEN(dev_priv, 11)) > gen11_sseu_info_init(dev_priv); > + else if (INTEL_GEN(dev_priv) >= 12) > + gen12_sseu_info_init(dev_priv); > > if (IS_GEN(dev_priv, 6) && intel_vtd_active()) { > DRM_INFO("Disabling ppGTT for VT-d support\n"); > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 469dc512cca3..30c542144016 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -2033,8 +2033,10 @@ struct drm_i915_query { > * (data[X / 8] >> (X % 8)) & 1 > * > * - the subslice mask for each slice with one bit per subslice telling > - * whether a subslice is available. The availability of subslice Y in slice > - * X can be queried with the following formula : > + * whether a subslice is available. Gen12 has dual-subslices, which are > + * similar to two gen11 subslices. For gen12, this array represents dual- > + * subslices. The availability of subslice Y in slice X can be queried > + * with the following formula : > * > * (data[subslice_offset + > * X * subslice_stride + > -- > 2.23.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On 21/09/2019 03:39, Lucas De Marchi wrote: > On Fri, Sep 13, 2019 at 12:51 AM Chris Wilson <chris@chris-wilson.co.uk> wrote: >> From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> >> >> Gen12 has dual-subslices (DSS), which compared to gen11 subslices have >> some duplicated resources/paths. Although DSS behave similarly to 2 >> subslices, instead of splitting this and presenting userspace with bits >> not directly representative of hardware resources, present userspace >> with a subslice_mask made up of DSS bits instead. >> >> v2: GEM_BUG_ON on mask size (Lionel) >> >> Bspec: 29547 >> Bspec: 12247 >> Cc: Kelvin Gardiner <kelvin.gardiner@intel.com> >> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> CC: Radhakrishna Sripada <radhakrishna.sripada@intel.com> >> Cc: Michel Thierry <michel.thierry@intel.com> #v1 >> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> >> Cc: José Roberto de Souza <jose.souza@intel.com> >> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> >> Signed-off-by: James Ausmus <james.ausmus@intel.com> >> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> >> Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com> >> Signed-off-by: Stuart Summers <stuart.summers@intel.com> >> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> >> Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> --- > After this I get the correct values for TGL: > - Available Subslice Total: 2 > - Available Slice0 subslices: 2 > - Available EU Total: 16 > - Available EU Per Subslice: 8 > + Available Subslice Total: 6 > + Available Slice0 subslices: 6 > + Available EU Total: 96 > + Available EU Per Subslice: 16 > > Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com> > > Lucas De Marchi Btw, shouldn't we print "Dualsubslice" rather than "Subslice" for TGL? -Lionel >> drivers/gpu/drm/i915/gt/intel_sseu.h | 9 +-- >> drivers/gpu/drm/i915/i915_debugfs.c | 3 +- >> drivers/gpu/drm/i915/i915_reg.h | 2 + >> drivers/gpu/drm/i915/intel_device_info.c | 83 ++++++++++++++++++------ >> include/uapi/drm/i915_drm.h | 6 +- >> 5 files changed, 72 insertions(+), 31 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h >> index 4070f6ff1db6..d1d225204f09 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h >> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h >> @@ -18,12 +18,13 @@ struct drm_i915_private; >> #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ >> #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) >> #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) >> -#define GEN_MAX_EUS (10) /* HSW upper bound */ >> +#define GEN_MAX_EUS (16) /* TGL upper bound */ >> #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) >> >> struct sseu_dev_info { >> u8 slice_mask; >> u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; >> + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; >> u16 eu_total; >> u8 eu_per_subslice; >> u8 min_eu_in_pool; >> @@ -40,12 +41,6 @@ struct sseu_dev_info { >> >> u8 ss_stride; >> u8 eu_stride; >> - >> - /* We don't have more than 8 eus per subslice at the moment and as we >> - * store eus enabled using bits, no need to multiply by eus per >> - * subslice. >> - */ >> - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; >> }; >> >> /* >> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c >> index 43db50095257..b5b449a88cf1 100644 >> --- a/drivers/gpu/drm/i915/i915_debugfs.c >> +++ b/drivers/gpu/drm/i915/i915_debugfs.c >> @@ -3823,7 +3823,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, >> for (ss = 0; ss < info->sseu.max_subslices; ss++) { >> unsigned int eu_cnt; >> >> - if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) >> + if (info->sseu.has_subslice_pg && >> + !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) >> /* skip disabled subslice */ >> continue; >> >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h >> index bf37ecebc82f..47847135a11f 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) >> >> #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) >> >> +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) >> + >> #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) >> #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) >> #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) >> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c >> index 50b05a5de53b..b91a960b037f 100644 >> --- a/drivers/gpu/drm/i915/intel_device_info.c >> +++ b/drivers/gpu/drm/i915/intel_device_info.c >> @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) >> return total; >> } >> >> +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, >> + u8 s_en, u32 ss_en, u16 eu_en) >> +{ >> + int s, ss; >> + >> + /* ss_en represents entire subslice mask across all slices */ >> + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > >> + sizeof(ss_en) * BITS_PER_BYTE); >> + >> + for (s = 0; s < sseu->max_slices; s++) { >> + if ((s_en & BIT(s)) == 0) >> + continue; >> + >> + sseu->slice_mask |= BIT(s); >> + >> + intel_sseu_set_subslices(sseu, s, ss_en); >> + >> + for (ss = 0; ss < sseu->max_subslices; ss++) >> + if (intel_sseu_has_subslice(sseu, s, ss)) >> + sseu_set_eus(sseu, s, ss, eu_en); >> + } >> + sseu->eu_per_subslice = hweight16(eu_en); >> + sseu->eu_total = compute_eu_total(sseu); >> +} >> + >> +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv) >> +{ >> + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; >> + u8 s_en; >> + u32 dss_en; >> + u16 eu_en = 0; >> + u8 eu_en_fuse; >> + int eu; >> + >> + /* >> + * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. >> + * Instead of splitting these, provide userspace with an array >> + * of DSS to more closely represent the hardware resource. >> + */ >> + intel_sseu_set_info(sseu, 1, 6, 16); >> + >> + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; >> + >> + dss_en = I915_READ(GEN12_GT_DSS_ENABLE); >> + >> + /* one bit per pair of EUs */ >> + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); >> + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) >> + if (eu_en_fuse & BIT(eu)) >> + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); >> + >> + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); >> + >> + /* TGL only supports slice-level power gating */ >> + sseu->has_slice_pg = 1; >> +} >> + >> static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) >> { >> struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; >> u8 s_en; >> - u32 ss_en, ss_en_mask; >> + u32 ss_en; >> u8 eu_en; >> - int s; >> >> if (IS_ELKHARTLAKE(dev_priv)) >> intel_sseu_set_info(sseu, 1, 4, 8); >> @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) >> >> s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; >> ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); >> - ss_en_mask = BIT(sseu->max_subslices) - 1; >> eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); >> >> - for (s = 0; s < sseu->max_slices; s++) { >> - if (s_en & BIT(s)) { >> - int ss_idx = sseu->max_subslices * s; >> - int ss; >> - >> - sseu->slice_mask |= BIT(s); >> - >> - intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) & >> - ss_en_mask); >> - >> - for (ss = 0; ss < sseu->max_subslices; ss++) >> - if (intel_sseu_has_subslice(sseu, s, ss)) >> - sseu_set_eus(sseu, s, ss, eu_en); >> - } >> - } >> - sseu->eu_per_subslice = hweight8(eu_en); >> - sseu->eu_total = compute_eu_total(sseu); >> + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); >> >> /* ICL has no power gating restrictions. */ >> sseu->has_slice_pg = 1; >> @@ -959,8 +998,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) >> gen9_sseu_info_init(dev_priv); >> else if (IS_GEN(dev_priv, 10)) >> gen10_sseu_info_init(dev_priv); >> - else if (INTEL_GEN(dev_priv) >= 11) >> + else if (IS_GEN(dev_priv, 11)) >> gen11_sseu_info_init(dev_priv); >> + else if (INTEL_GEN(dev_priv) >= 12) >> + gen12_sseu_info_init(dev_priv); >> >> if (IS_GEN(dev_priv, 6) && intel_vtd_active()) { >> DRM_INFO("Disabling ppGTT for VT-d support\n"); >> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >> index 469dc512cca3..30c542144016 100644 >> --- a/include/uapi/drm/i915_drm.h >> +++ b/include/uapi/drm/i915_drm.h >> @@ -2033,8 +2033,10 @@ struct drm_i915_query { >> * (data[X / 8] >> (X % 8)) & 1 >> * >> * - the subslice mask for each slice with one bit per subslice telling >> - * whether a subslice is available. The availability of subslice Y in slice >> - * X can be queried with the following formula : >> + * whether a subslice is available. Gen12 has dual-subslices, which are >> + * similar to two gen11 subslices. For gen12, this array represents dual- >> + * subslices. The availability of subslice Y in slice X can be queried >> + * with the following formula : >> * >> * (data[subslice_offset + >> * X * subslice_stride + >> -- >> 2.23.0 >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx > >
On Sun, 2019-09-22 at 19:48 +0300, Lionel Landwerlin wrote: > On 21/09/2019 03:39, Lucas De Marchi wrote: > > On Fri, Sep 13, 2019 at 12:51 AM Chris Wilson < > > chris@chris-wilson.co.uk> wrote: > > > From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > > > > > > Gen12 has dual-subslices (DSS), which compared to gen11 subslices > > > have > > > some duplicated resources/paths. Although DSS behave similarly to > > > 2 > > > subslices, instead of splitting this and presenting userspace > > > with bits > > > not directly representative of hardware resources, present > > > userspace > > > with a subslice_mask made up of DSS bits instead. > > > > > > v2: GEM_BUG_ON on mask size (Lionel) > > > > > > Bspec: 29547 > > > Bspec: 12247 > > > Cc: Kelvin Gardiner <kelvin.gardiner@intel.com> > > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > > > CC: Radhakrishna Sripada <radhakrishna.sripada@intel.com> > > > Cc: Michel Thierry <michel.thierry@intel.com> #v1 > > > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > > > Cc: José Roberto de Souza <jose.souza@intel.com> > > > Signed-off-by: Daniele Ceraolo Spurio < > > > daniele.ceraolospurio@intel.com> > > > Signed-off-by: James Ausmus <james.ausmus@intel.com> > > > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> > > > Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com> > > > Signed-off-by: Stuart Summers <stuart.summers@intel.com> > > > Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > > > Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > > > --- > > > > After this I get the correct values for TGL: > > - Available Subslice Total: 2 > > - Available Slice0 subslices: 2 > > - Available EU Total: 16 > > - Available EU Per Subslice: 8 > > + Available Subslice Total: 6 > > + Available Slice0 subslices: 6 > > + Available EU Total: 96 > > + Available EU Per Subslice: 16 > > > > Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com> > > > > Lucas De Marchi > > > Btw, shouldn't we print "Dualsubslice" rather than "Subslice" for > TGL? The idea here is that from the userspace perspective, there is still only one unit which can be utilized in hardware. Thanks, Stuart > > > -Lionel > > > > > drivers/gpu/drm/i915/gt/intel_sseu.h | 9 +-- > > > drivers/gpu/drm/i915/i915_debugfs.c | 3 +- > > > drivers/gpu/drm/i915/i915_reg.h | 2 + > > > drivers/gpu/drm/i915/intel_device_info.c | 83 > > > ++++++++++++++++++------ > > > include/uapi/drm/i915_drm.h | 6 +- > > > 5 files changed, 72 insertions(+), 31 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h > > > b/drivers/gpu/drm/i915/gt/intel_sseu.h > > > index 4070f6ff1db6..d1d225204f09 100644 > > > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h > > > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h > > > @@ -18,12 +18,13 @@ struct drm_i915_private; > > > #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ > > > #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, > > > BITS_PER_BYTE) > > > #define GEN_MAX_SUBSLICE_STRIDE > > > GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) > > > -#define GEN_MAX_EUS (10) /* HSW upper bound */ > > > +#define GEN_MAX_EUS (16) /* TGL upper bound */ > > > #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) > > > > > > struct sseu_dev_info { > > > u8 slice_mask; > > > u8 subslice_mask[GEN_MAX_SLICES * > > > GEN_MAX_SUBSLICE_STRIDE]; > > > + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * > > > GEN_MAX_EU_STRIDE]; > > > u16 eu_total; > > > u8 eu_per_subslice; > > > u8 min_eu_in_pool; > > > @@ -40,12 +41,6 @@ struct sseu_dev_info { > > > > > > u8 ss_stride; > > > u8 eu_stride; > > > - > > > - /* We don't have more than 8 eus per subslice at the > > > moment and as we > > > - * store eus enabled using bits, no need to multiply by > > > eus per > > > - * subslice. > > > - */ > > > - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; > > > }; > > > > > > /* > > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c > > > b/drivers/gpu/drm/i915/i915_debugfs.c > > > index 43db50095257..b5b449a88cf1 100644 > > > --- a/drivers/gpu/drm/i915/i915_debugfs.c > > > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > > > @@ -3823,7 +3823,8 @@ static void gen10_sseu_device_status(struct > > > drm_i915_private *dev_priv, > > > for (ss = 0; ss < info->sseu.max_subslices; > > > ss++) { > > > unsigned int eu_cnt; > > > > > > - if (!(s_reg[s] & > > > (GEN9_PGCTL_SS_ACK(ss)))) > > > + if (info->sseu.has_subslice_pg && > > > + !(s_reg[s] & > > > (GEN9_PGCTL_SS_ACK(ss)))) > > > /* skip disabled subslice */ > > > continue; > > > > > > diff --git a/drivers/gpu/drm/i915/i915_reg.h > > > b/drivers/gpu/drm/i915/i915_reg.h > > > index bf37ecebc82f..47847135a11f 100644 > > > --- a/drivers/gpu/drm/i915/i915_reg.h > > > +++ b/drivers/gpu/drm/i915/i915_reg.h > > > @@ -2956,6 +2956,8 @@ static inline bool > > > i915_mmio_reg_valid(i915_reg_t reg) > > > > > > #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) > > > > > > +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) > > > + > > > #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) > > > #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) > > > #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) > > > diff --git a/drivers/gpu/drm/i915/intel_device_info.c > > > b/drivers/gpu/drm/i915/intel_device_info.c > > > index 50b05a5de53b..b91a960b037f 100644 > > > --- a/drivers/gpu/drm/i915/intel_device_info.c > > > +++ b/drivers/gpu/drm/i915/intel_device_info.c > > > @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct > > > sseu_dev_info *sseu) > > > return total; > > > } > > > > > > +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, > > > + u8 s_en, u32 ss_en, u16 > > > eu_en) > > > +{ > > > + int s, ss; > > > + > > > + /* ss_en represents entire subslice mask across all > > > slices */ > > > + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > > > > + sizeof(ss_en) * BITS_PER_BYTE); > > > + > > > + for (s = 0; s < sseu->max_slices; s++) { > > > + if ((s_en & BIT(s)) == 0) > > > + continue; > > > + > > > + sseu->slice_mask |= BIT(s); > > > + > > > + intel_sseu_set_subslices(sseu, s, ss_en); > > > + > > > + for (ss = 0; ss < sseu->max_subslices; ss++) > > > + if (intel_sseu_has_subslice(sseu, s, ss)) > > > + sseu_set_eus(sseu, s, ss, eu_en); > > > + } > > > + sseu->eu_per_subslice = hweight16(eu_en); > > > + sseu->eu_total = compute_eu_total(sseu); > > > +} > > > + > > > +static void gen12_sseu_info_init(struct drm_i915_private > > > *dev_priv) > > > +{ > > > + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)- > > > >sseu; > > > + u8 s_en; > > > + u32 dss_en; > > > + u16 eu_en = 0; > > > + u8 eu_en_fuse; > > > + int eu; > > > + > > > + /* > > > + * Gen12 has Dual-Subslices, which behave similarly to 2 > > > gen11 SS. > > > + * Instead of splitting these, provide userspace with an > > > array > > > + * of DSS to more closely represent the hardware > > > resource. > > > + */ > > > + intel_sseu_set_info(sseu, 1, 6, 16); > > > + > > > + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & > > > GEN11_GT_S_ENA_MASK; > > > + > > > + dss_en = I915_READ(GEN12_GT_DSS_ENABLE); > > > + > > > + /* one bit per pair of EUs */ > > > + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & > > > GEN11_EU_DIS_MASK); > > > + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) > > > + if (eu_en_fuse & BIT(eu)) > > > + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); > > > + > > > + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); > > > + > > > + /* TGL only supports slice-level power gating */ > > > + sseu->has_slice_pg = 1; > > > +} > > > + > > > static void gen11_sseu_info_init(struct drm_i915_private > > > *dev_priv) > > > { > > > struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)- > > > >sseu; > > > u8 s_en; > > > - u32 ss_en, ss_en_mask; > > > + u32 ss_en; > > > u8 eu_en; > > > - int s; > > > > > > if (IS_ELKHARTLAKE(dev_priv)) > > > intel_sseu_set_info(sseu, 1, 4, 8); > > > @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct > > > drm_i915_private *dev_priv) > > > > > > s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & > > > GEN11_GT_S_ENA_MASK; > > > ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); > > > - ss_en_mask = BIT(sseu->max_subslices) - 1; > > > eu_en = ~(I915_READ(GEN11_EU_DISABLE) & > > > GEN11_EU_DIS_MASK); > > > > > > - for (s = 0; s < sseu->max_slices; s++) { > > > - if (s_en & BIT(s)) { > > > - int ss_idx = sseu->max_subslices * s; > > > - int ss; > > > - > > > - sseu->slice_mask |= BIT(s); > > > - > > > - intel_sseu_set_subslices(sseu, s, (ss_en > > > >> ss_idx) & > > > - ss_en_m > > > ask); > > > - > > > - for (ss = 0; ss < sseu->max_subslices; > > > ss++) > > > - if (intel_sseu_has_subslice(sseu, > > > s, ss)) > > > - sseu_set_eus(sseu, s, ss, > > > eu_en); > > > - } > > > - } > > > - sseu->eu_per_subslice = hweight8(eu_en); > > > - sseu->eu_total = compute_eu_total(sseu); > > > + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); > > > > > > /* ICL has no power gating restrictions. */ > > > sseu->has_slice_pg = 1; > > > @@ -959,8 +998,10 @@ void intel_device_info_runtime_init(struct > > > drm_i915_private *dev_priv) > > > gen9_sseu_info_init(dev_priv); > > > else if (IS_GEN(dev_priv, 10)) > > > gen10_sseu_info_init(dev_priv); > > > - else if (INTEL_GEN(dev_priv) >= 11) > > > + else if (IS_GEN(dev_priv, 11)) > > > gen11_sseu_info_init(dev_priv); > > > + else if (INTEL_GEN(dev_priv) >= 12) > > > + gen12_sseu_info_init(dev_priv); > > > > > > if (IS_GEN(dev_priv, 6) && intel_vtd_active()) { > > > DRM_INFO("Disabling ppGTT for VT-d support\n"); > > > diff --git a/include/uapi/drm/i915_drm.h > > > b/include/uapi/drm/i915_drm.h > > > index 469dc512cca3..30c542144016 100644 > > > --- a/include/uapi/drm/i915_drm.h > > > +++ b/include/uapi/drm/i915_drm.h > > > @@ -2033,8 +2033,10 @@ struct drm_i915_query { > > > * (data[X / 8] >> (X % 8)) & 1 > > > * > > > * - the subslice mask for each slice with one bit per subslice > > > telling > > > - * whether a subslice is available. The availability of > > > subslice Y in slice > > > - * X can be queried with the following formula : > > > + * whether a subslice is available. Gen12 has dual-subslices, > > > which are > > > + * similar to two gen11 subslices. For gen12, this array > > > represents dual- > > > + * subslices. The availability of subslice Y in slice X can be > > > queried > > > + * with the following formula : > > > * > > > * (data[subslice_offset + > > > * X * subslice_stride + > > > -- > > > 2.23.0 > > > > > > _______________________________________________ > > > Intel-gfx mailing list > > > Intel-gfx@lists.freedesktop.org > > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx > > > > > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 4070f6ff1db6..d1d225204f09 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -18,12 +18,13 @@ struct drm_i915_private; #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) -#define GEN_MAX_EUS (10) /* HSW upper bound */ +#define GEN_MAX_EUS (16) /* TGL upper bound */ #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; @@ -40,12 +41,6 @@ struct sseu_dev_info { u8 ss_stride; u8 eu_stride; - - /* We don't have more than 8 eus per subslice at the moment and as we - * store eus enabled using bits, no need to multiply by eus per - * subslice. - */ - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; }; /* diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 43db50095257..b5b449a88cf1 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3823,7 +3823,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; - if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) + if (info->sseu.has_subslice_pg && + !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) /* skip disabled subslice */ continue; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index bf37ecebc82f..47847135a11f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) + #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 50b05a5de53b..b91a960b037f 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, + u8 s_en, u32 ss_en, u16 eu_en) +{ + int s, ss; + + /* ss_en represents entire subslice mask across all slices */ + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > + sizeof(ss_en) * BITS_PER_BYTE); + + for (s = 0; s < sseu->max_slices; s++) { + if ((s_en & BIT(s)) == 0) + continue; + + sseu->slice_mask |= BIT(s); + + intel_sseu_set_subslices(sseu, s, ss_en); + + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, s, ss)) + sseu_set_eus(sseu, s, ss, eu_en); + } + sseu->eu_per_subslice = hweight16(eu_en); + sseu->eu_total = compute_eu_total(sseu); +} + +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv) +{ + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; + u8 s_en; + u32 dss_en; + u16 eu_en = 0; + u8 eu_en_fuse; + int eu; + + /* + * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. + * Instead of splitting these, provide userspace with an array + * of DSS to more closely represent the hardware resource. + */ + intel_sseu_set_info(sseu, 1, 6, 16); + + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; + + dss_en = I915_READ(GEN12_GT_DSS_ENABLE); + + /* one bit per pair of EUs */ + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); + + /* TGL only supports slice-level power gating */ + sseu->has_slice_pg = 1; +} + static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u8 s_en; - u32 ss_en, ss_en_mask; + u32 ss_en; u8 eu_en; - int s; if (IS_ELKHARTLAKE(dev_priv)) intel_sseu_set_info(sseu, 1, 4, 8); @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); - ss_en_mask = BIT(sseu->max_subslices) - 1; eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - for (s = 0; s < sseu->max_slices; s++) { - if (s_en & BIT(s)) { - int ss_idx = sseu->max_subslices * s; - int ss; - - sseu->slice_mask |= BIT(s); - - intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) & - ss_en_mask); - - for (ss = 0; ss < sseu->max_subslices; ss++) - if (intel_sseu_has_subslice(sseu, s, ss)) - sseu_set_eus(sseu, s, ss, eu_en); - } - } - sseu->eu_per_subslice = hweight8(eu_en); - sseu->eu_total = compute_eu_total(sseu); + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -959,8 +998,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) gen9_sseu_info_init(dev_priv); else if (IS_GEN(dev_priv, 10)) gen10_sseu_info_init(dev_priv); - else if (INTEL_GEN(dev_priv) >= 11) + else if (IS_GEN(dev_priv, 11)) gen11_sseu_info_init(dev_priv); + else if (INTEL_GEN(dev_priv) >= 12) + gen12_sseu_info_init(dev_priv); if (IS_GEN(dev_priv, 6) && intel_vtd_active()) { DRM_INFO("Disabling ppGTT for VT-d support\n"); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 469dc512cca3..30c542144016 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2033,8 +2033,10 @@ struct drm_i915_query { * (data[X / 8] >> (X % 8)) & 1 * * - the subslice mask for each slice with one bit per subslice telling - * whether a subslice is available. The availability of subslice Y in slice - * X can be queried with the following formula : + * whether a subslice is available. Gen12 has dual-subslices, which are + * similar to two gen11 subslices. For gen12, this array represents dual- + * subslices. The availability of subslice Y in slice X can be queried + * with the following formula : * * (data[subslice_offset + * X * subslice_stride +