Message ID | 20180115144159.25913-2-lionel.g.landwerlin@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 15/01/2018 14:41, Lionel Landwerlin wrote: > Up to now, subslice mask was assumed to be uniform across slices. But > starting with Cannonlake, slices can be asymmetric (for example slice0 > has different number of subslices as slice1+). This change stores all > subslices masks for all slices rather than having a single mask that > applies to all slices. > > v2: Rework how we store total numbers in sseu_dev_info (Tvrtko) > Fix CHV eu masks, was reading disabled as enabled (Tvrtko) > Readability changes (Tvrtko) > Add EU index helper (Tvrtko) > > v3: Turn ALIGN(v, 8) / 8 into DIV_ROUND_UP(v, BITS_PER_BYTE) (Tvrtko) > Reuse sseu_eu_idx() for setting eu_mask on CHV (Tvrtko) > Reformat debug prints for subslices (Tvrtko) > > v4: Change eu_mask helper into sseu_set_eus() (Tvrtko) > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 25 ++-- > drivers/gpu/drm/i915/i915_drv.c | 2 +- > drivers/gpu/drm/i915/intel_device_info.c | 201 +++++++++++++++++++++++-------- > drivers/gpu/drm/i915/intel_device_info.h | 47 +++++++- > drivers/gpu/drm/i915/intel_lrc.c | 2 +- > drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- > 6 files changed, 216 insertions(+), 63 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index cc659b4b2a45..684551114965 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -4289,7 +4289,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, > continue; > > sseu->slice_mask = BIT(0); > - sseu->subslice_mask |= BIT(ss); > + sseu->subslice_mask[0] |= BIT(ss); > eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) + > ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) + > ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) + > @@ -4336,7 +4336,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, > continue; > > sseu->slice_mask |= BIT(s); > - sseu->subslice_mask = info->sseu.subslice_mask; > + sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; > > for (ss = 0; ss < ss_max; ss++) { > unsigned int eu_cnt; > @@ -4391,8 +4391,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, > sseu->slice_mask |= BIT(s); > > if (IS_GEN9_BC(dev_priv)) > - sseu->subslice_mask = > - INTEL_INFO(dev_priv)->sseu.subslice_mask; > + sseu->subslice_mask[s] = > + INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; > > for (ss = 0; ss < ss_max; ss++) { > unsigned int eu_cnt; > @@ -4402,7 +4402,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, > /* skip disabled subslice */ > continue; > > - sseu->subslice_mask |= BIT(ss); > + sseu->subslice_mask[s] |= BIT(ss); > } > > eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & > @@ -4424,9 +4424,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, > sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; > > if (sseu->slice_mask) { > - sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; > sseu->eu_per_subslice = > INTEL_INFO(dev_priv)->sseu.eu_per_subslice; > + for (s = 0; s < fls(sseu->slice_mask); s++) { > + sseu->subslice_mask[s] = > + INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; > + } > sseu->eu_total = sseu->eu_per_subslice * > sseu_subslice_total(sseu); > > @@ -4445,6 +4448,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, > { > struct drm_i915_private *dev_priv = node_to_i915(m->private); > const char *type = is_available_info ? "Available" : "Enabled"; > + int s; > > seq_printf(m, " %s Slice Mask: %04x\n", type, > sseu->slice_mask); > @@ -4452,10 +4456,11 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, > hweight8(sseu->slice_mask)); > seq_printf(m, " %s Subslice Total: %u\n", type, > sseu_subslice_total(sseu)); > - seq_printf(m, " %s Subslice Mask: %04x\n", type, > - sseu->subslice_mask); > - seq_printf(m, " %s Subslice Per Slice: %u\n", type, > - hweight8(sseu->subslice_mask)); > + for (s = 0; s < fls(sseu->slice_mask); s++) { > + seq_printf(m, " %s Slice%i %u subslices, mask=%04x\n", type, > + s, hweight8(sseu->subslice_mask[s]), > + sseu->subslice_mask[s]); > + } > seq_printf(m, " %s EU Total: %u\n", type, > sseu->eu_total); > seq_printf(m, " %s EU Per Subslice: %u\n", type, > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 6c8da9d20c33..969835d3cbcd 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, void *data, > return -ENODEV; > break; > case I915_PARAM_SUBSLICE_MASK: > - value = INTEL_INFO(dev_priv)->sseu.subslice_mask; > + value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0]; > if (!value) > return -ENODEV; > break; > diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c > index d28592e43512..ed14994527fc 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.c > +++ b/drivers/gpu/drm/i915/intel_device_info.c > @@ -80,12 +80,16 @@ void intel_device_info_dump_flags(const struct intel_device_info *info, > > static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) > { > + int s; > + > drm_printf(p, "slice mask: %04x\n", sseu->slice_mask); > drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask)); > drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); > - drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask); > - drm_printf(p, "subslice per slice: %u\n", > - hweight8(sseu->subslice_mask)); > + for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) { > + drm_printf(p, "slice%d %u subslices mask=%04x\n", > + s, hweight8(sseu->subslice_mask[s]), > + sseu->subslice_mask[s]); > + } > drm_printf(p, "EU total: %u\n", sseu->eu_total); > drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); > drm_printf(p, "has slice power gating: %s\n", > @@ -119,22 +123,87 @@ void intel_device_info_dump(const struct intel_device_info *info, > intel_device_info_dump_flags(info, p); > } > > +static u16 compute_eu_total(const struct sseu_dev_info *sseu) > +{ > + u16 i, total = 0; > + > + for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) > + total += hweight8(sseu->eu_mask[i]); > + > + return total; > +} > + > +static u16 compute_subslice_total(const struct sseu_dev_info *sseu) > +{ > + u16 i, total = 0; > + > + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) > + total += hweight8(sseu->subslice_mask[i]); > + > + return total; > +} > + > static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) > { > struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; > const u32 fuse2 = I915_READ(GEN8_FUSE2); > + int s, ss; > + const int eu_mask = 0xff; > + u32 subslice_mask, eu_en; > > sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> > GEN10_F2_S_ENA_SHIFT; > - sseu->subslice_mask = (1 << 4) - 1; > - sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> > - GEN10_F2_SS_DIS_SHIFT); > + sseu->max_slices = 6; > + sseu->max_subslices = 4; > + sseu->max_eus_per_subslice = 8; > > - sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0)); > - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1)); > - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2)); > - sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) & > - GEN10_EU_DIS_SS_MASK)); > + subslice_mask = (1 << 4) - 1; > + subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> > + GEN10_F2_SS_DIS_SHIFT); > + > + /* > + * Slice0 can have up to 3 subslices, but there are only 2 in > + * slice1/2. > + */ > + sseu->subslice_mask[0] = subslice_mask; > + for (s = 1; s < sseu->max_slices; s++) > + sseu->subslice_mask[s] = subslice_mask & 0x3; > + > + /* Slice0 */ > + eu_en = ~I915_READ(GEN8_EU_DISABLE0); > + for (ss = 0; ss < sseu->max_subslices; ss++) > + sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); > + /* Slice1 */ > + sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); > + eu_en = ~I915_READ(GEN8_EU_DISABLE1); > + sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); > + /* Slice2 */ > + sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); > + sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); > + /* Slice3 */ > + sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); > + eu_en = ~I915_READ(GEN8_EU_DISABLE2); > + sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); > + /* Slice4 */ > + sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); > + sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); > + /* Slice5 */ > + sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); > + eu_en = ~I915_READ(GEN10_EU_DISABLE3); > + sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); > + > + /* Do a second pass where we mark the subslices disabled if all their > + * eus are off. > + */ > + for (s = 0; s < sseu->max_slices; s++) { > + for (ss = 0; ss < sseu->max_subslices; ss++) { > + if (sseu_get_eus(sseu, s, ss) == 0) > + sseu->subslice_mask[s] &= ~BIT(ss); > + } > + } > + > + sseu->subslice_total = compute_subslice_total(sseu); > + sseu->eu_total = compute_eu_total(sseu); > > /* > * CNL is expected to always have a uniform distribution > @@ -155,26 +224,40 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) > static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) > { > struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; > - u32 fuse, eu_dis; > + u32 fuse; > > fuse = I915_READ(CHV_FUSE_GT); > > sseu->slice_mask = BIT(0); > + sseu->max_slices = 1; > + sseu->max_subslices = 2; > + sseu->max_eus_per_subslice = 8; > > if (!(fuse & CHV_FGT_DISABLE_SS0)) { > - sseu->subslice_mask |= BIT(0); > - eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK | > - CHV_FGT_EU_DIS_SS0_R1_MASK); > - sseu->eu_total += 8 - hweight32(eu_dis); > + u8 disabled_mask = > + ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> > + CHV_FGT_EU_DIS_SS0_R0_SHIFT) | > + (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> > + CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); > + > + sseu->subslice_mask[0] |= BIT(0); > + sseu_set_eus(sseu, 0, 0, ~disabled_mask); > } > > if (!(fuse & CHV_FGT_DISABLE_SS1)) { > - sseu->subslice_mask |= BIT(1); > - eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK | > - CHV_FGT_EU_DIS_SS1_R1_MASK); > - sseu->eu_total += 8 - hweight32(eu_dis); > + u8 disabled_mask = > + ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> > + CHV_FGT_EU_DIS_SS1_R0_SHIFT) | > + (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> > + CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); > + > + sseu->subslice_mask[0] |= BIT(1); > + sseu_set_eus(sseu, 0, 1, ~disabled_mask); > } > > + sseu->subslice_total = compute_subslice_total(sseu); > + sseu->eu_total = compute_eu_total(sseu); > + > /* > * CHV expected to always have a uniform distribution of EU > * across subslices. > @@ -196,41 +279,52 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) > { > struct intel_device_info *info = mkwrite_device_info(dev_priv); > struct sseu_dev_info *sseu = &info->sseu; > - int s_max = 3, ss_max = 4, eu_max = 8; > int s, ss; > - u32 fuse2, eu_disable; > - u8 eu_mask = 0xff; > + u32 fuse2, eu_disable, subslice_mask; > + const u8 eu_mask = 0xff; > > fuse2 = I915_READ(GEN8_FUSE2); > sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; > > + /* BXT has a single slice and at most 3 subslices. */ > + sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3; > + sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4; > + sseu->max_eus_per_subslice = 8; > + > /* > * The subslice disable field is global, i.e. it applies > * to each of the enabled slices. > */ > - sseu->subslice_mask = (1 << ss_max) - 1; > - sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> > - GEN9_F2_SS_DIS_SHIFT); > + subslice_mask = (1 << sseu->max_subslices) - 1; > + subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> > + GEN9_F2_SS_DIS_SHIFT); > > /* > * Iterate through enabled slices and subslices to > * count the total enabled EU. > */ > - for (s = 0; s < s_max; s++) { > + for (s = 0; s < sseu->max_slices; s++) { > if (!(sseu->slice_mask & BIT(s))) > /* skip disabled slice */ > continue; > > + sseu->subslice_mask[s] = subslice_mask; > + > eu_disable = I915_READ(GEN9_EU_DISABLE(s)); > - for (ss = 0; ss < ss_max; ss++) { > + for (ss = 0; ss < sseu->max_subslices; ss++) { > int eu_per_ss; > + u8 eu_disabled_mask; > > - if (!(sseu->subslice_mask & BIT(ss))) > + if (!(sseu->subslice_mask[s] & BIT(ss))) > /* skip disabled subslice */ > continue; > > - eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) & > - eu_mask); > + eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask; > + > + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); > + > + eu_per_ss = sseu->max_eus_per_subslice - > + hweight8(eu_disabled_mask); > > /* > * Record which subslice(s) has(have) 7 EUs. we > @@ -239,11 +333,12 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) > */ > if (eu_per_ss == 7) > sseu->subslice_7eu[s] |= BIT(ss); > - > - sseu->eu_total += eu_per_ss; > } > } > > + sseu->subslice_total = compute_subslice_total(sseu); > + sseu->eu_total = compute_eu_total(sseu); > + > /* > * SKL is expected to always have a uniform distribution > * of EU across subslices with the exception that any one > @@ -269,8 +364,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) > sseu->has_eu_pg = sseu->eu_per_subslice > 2; > > if (IS_GEN9_LP(dev_priv)) { > -#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) > - info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3; > +#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) > + info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; > > sseu->min_eu_in_pool = 0; > if (info->has_pooled_eu) { > @@ -288,19 +383,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) > static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) > { > struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; > - const int s_max = 3, ss_max = 3, eu_max = 8; > int s, ss; > - u32 fuse2, eu_disable[3]; /* s_max */ > + u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ > > fuse2 = I915_READ(GEN8_FUSE2); > sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; > + sseu->max_slices = 3; > + sseu->max_subslices = 3; > + sseu->max_eus_per_subslice = 8; > + > /* > * The subslice disable field is global, i.e. it applies > * to each of the enabled slices. > */ > - sseu->subslice_mask = GENMASK(ss_max - 1, 0); > - sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> > - GEN8_F2_SS_DIS_SHIFT); > + subslice_mask = GENMASK(sseu->max_subslices - 1, 0); > + subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> > + GEN8_F2_SS_DIS_SHIFT); > > eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK; > eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) | > @@ -314,30 +412,39 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) > * Iterate through enabled slices and subslices to > * count the total enabled EU. > */ > - for (s = 0; s < s_max; s++) { > + for (s = 0; s < sseu->max_slices; s++) { > if (!(sseu->slice_mask & BIT(s))) > /* skip disabled slice */ > continue; > > - for (ss = 0; ss < ss_max; ss++) { > + sseu->subslice_mask[s] = subslice_mask; > + > + for (ss = 0; ss < sseu->max_subslices; ss++) { > + u8 eu_disabled_mask; > u32 n_disabled; > > - if (!(sseu->subslice_mask & BIT(ss))) > + if (!(sseu->subslice_mask[ss] & BIT(ss))) > /* skip disabled subslice */ > continue; > > - n_disabled = hweight8(eu_disable[s] >> (ss * eu_max)); > + eu_disabled_mask = > + eu_disable[s] >> (ss * sseu->max_eus_per_subslice); > + > + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); > + > + n_disabled = hweight8(eu_disabled_mask); > > /* > * Record which subslices have 7 EUs. > */ > - if (eu_max - n_disabled == 7) > + if (sseu->max_eus_per_subslice - n_disabled == 7) > sseu->subslice_7eu[s] |= 1 << ss; > - > - sseu->eu_total += eu_max - n_disabled; > } > } > > + sseu->subslice_total = compute_subslice_total(sseu); > + sseu->eu_total = compute_eu_total(sseu); > + > /* > * BDW is expected to always have a uniform distribution of EU across > * subslices with the exception that any one EU in any one subslice may > diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h > index 49cb27bd04c1..36e0df87862d 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.h > +++ b/drivers/gpu/drm/i915/intel_device_info.h > @@ -110,10 +110,14 @@ enum intel_platform { > func(supports_tv); \ > func(has_ipc); > > +#define GEN_MAX_SLICES (6) /* CNL upper bound */ > +#define GEN_MAX_SUBSLICES (7) > + > struct sseu_dev_info { > u8 slice_mask; > - u8 subslice_mask; > - u8 eu_total; > + u8 subslice_mask[GEN_MAX_SUBSLICES]; > + u16 subslice_total; > + u16 eu_total; > u8 eu_per_subslice; > u8 min_eu_in_pool; > /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ > @@ -121,6 +125,17 @@ struct sseu_dev_info { > u8 has_slice_pg:1; > u8 has_subslice_pg:1; > u8 has_eu_pg:1; > + > + /* Topology fields */ > + u8 max_slices; > + u8 max_subslices; > + u8 max_eus_per_subslice; > + > + /* We don't have more than 8 eus per subslice at the moment and as we > + * store eus enabled using bits, no need to multiply by eus per > + * subslice. > + */ > + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; > }; > > struct intel_device_info { > @@ -167,7 +182,33 @@ struct intel_device_info { > > static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) > { > - return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); > + return sseu->subslice_total; > +} > + > +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu, > + int slice, int subslice) > +{ > + int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, > + BITS_PER_BYTE); > + int slice_stride = sseu->max_subslices * subslice_stride; > + > + return slice * slice_stride + subslice * subslice_stride; > +} > + > +/* > + * The following functions prototypes should be updated with a larger type > + * than u8 if we ever have more than 8 EUs per subslice. > + */ > +static inline u8 sseu_get_eus(const struct sseu_dev_info *sseu, > + int slice, int subslice) > +{ > + return sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice)]; > +} > + > +static inline void sseu_set_eus(struct sseu_dev_info *sseu, > + int slice, int subslice, u8 eu_mask) > +{ > + sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice)] = eu_mask; > } > > const char *intel_platform_name(enum intel_platform platform); > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index ff25f209d0a5..ac7896031b8d 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -2098,7 +2098,7 @@ make_rpcs(struct drm_i915_private *dev_priv) > > if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { > rpcs |= GEN8_RPCS_SS_CNT_ENABLE; > - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) << > + rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) << > GEN8_RPCS_SS_CNT_SHIFT; > rpcs |= GEN8_RPCS_ENABLE; > } > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index c5ff203e42d6..23ae9a957fab 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) > > #define instdone_subslice_mask(dev_priv__) \ > (INTEL_GEN(dev_priv__) == 7 ? \ > - 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) > + 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0]) > > #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ > for ((slice__) = 0, (subslice__) = 0; \ > I am happy with this version and I did not spot any mistakes. Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Do you know if we have any test coverage which would be able to tell us if the reported numbers before and after match? Regards, Tvrtko
On 15/01/18 17:37, Tvrtko Ursulin wrote: > > On 15/01/2018 14:41, Lionel Landwerlin wrote: >> Up to now, subslice mask was assumed to be uniform across slices. But >> starting with Cannonlake, slices can be asymmetric (for example slice0 >> has different number of subslices as slice1+). This change stores all >> subslices masks for all slices rather than having a single mask that >> applies to all slices. >> >> v2: Rework how we store total numbers in sseu_dev_info (Tvrtko) >> Fix CHV eu masks, was reading disabled as enabled (Tvrtko) >> Readability changes (Tvrtko) >> Add EU index helper (Tvrtko) >> >> v3: Turn ALIGN(v, 8) / 8 into DIV_ROUND_UP(v, BITS_PER_BYTE) (Tvrtko) >> Reuse sseu_eu_idx() for setting eu_mask on CHV (Tvrtko) >> Reformat debug prints for subslices (Tvrtko) >> >> v4: Change eu_mask helper into sseu_set_eus() (Tvrtko) >> >> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> --- >> drivers/gpu/drm/i915/i915_debugfs.c | 25 ++-- >> drivers/gpu/drm/i915/i915_drv.c | 2 +- >> drivers/gpu/drm/i915/intel_device_info.c | 201 >> +++++++++++++++++++++++-------- >> drivers/gpu/drm/i915/intel_device_info.h | 47 +++++++- >> drivers/gpu/drm/i915/intel_lrc.c | 2 +- >> drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- >> 6 files changed, 216 insertions(+), 63 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c >> b/drivers/gpu/drm/i915/i915_debugfs.c >> index cc659b4b2a45..684551114965 100644 >> --- a/drivers/gpu/drm/i915/i915_debugfs.c >> +++ b/drivers/gpu/drm/i915/i915_debugfs.c >> @@ -4289,7 +4289,7 @@ static void >> cherryview_sseu_device_status(struct drm_i915_private *dev_priv, >> continue; >> sseu->slice_mask = BIT(0); >> - sseu->subslice_mask |= BIT(ss); >> + sseu->subslice_mask[0] |= BIT(ss); >> eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) + >> ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) + >> ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) + >> @@ -4336,7 +4336,7 @@ static void gen10_sseu_device_status(struct >> drm_i915_private *dev_priv, >> continue; >> sseu->slice_mask |= BIT(s); >> - sseu->subslice_mask = info->sseu.subslice_mask; >> + sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; >> for (ss = 0; ss < ss_max; ss++) { >> unsigned int eu_cnt; >> @@ -4391,8 +4391,8 @@ static void gen9_sseu_device_status(struct >> drm_i915_private *dev_priv, >> sseu->slice_mask |= BIT(s); >> if (IS_GEN9_BC(dev_priv)) >> - sseu->subslice_mask = >> - INTEL_INFO(dev_priv)->sseu.subslice_mask; >> + sseu->subslice_mask[s] = >> + INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; >> for (ss = 0; ss < ss_max; ss++) { >> unsigned int eu_cnt; >> @@ -4402,7 +4402,7 @@ static void gen9_sseu_device_status(struct >> drm_i915_private *dev_priv, >> /* skip disabled subslice */ >> continue; >> - sseu->subslice_mask |= BIT(ss); >> + sseu->subslice_mask[s] |= BIT(ss); >> } >> eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & >> @@ -4424,9 +4424,12 @@ static void >> broadwell_sseu_device_status(struct drm_i915_private *dev_priv, >> sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; >> if (sseu->slice_mask) { >> - sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; >> sseu->eu_per_subslice = >> INTEL_INFO(dev_priv)->sseu.eu_per_subslice; >> + for (s = 0; s < fls(sseu->slice_mask); s++) { >> + sseu->subslice_mask[s] = >> + INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; >> + } >> sseu->eu_total = sseu->eu_per_subslice * >> sseu_subslice_total(sseu); >> @@ -4445,6 +4448,7 @@ static void i915_print_sseu_info(struct >> seq_file *m, bool is_available_info, >> { >> struct drm_i915_private *dev_priv = node_to_i915(m->private); >> const char *type = is_available_info ? "Available" : "Enabled"; >> + int s; >> seq_printf(m, " %s Slice Mask: %04x\n", type, >> sseu->slice_mask); >> @@ -4452,10 +4456,11 @@ static void i915_print_sseu_info(struct >> seq_file *m, bool is_available_info, >> hweight8(sseu->slice_mask)); >> seq_printf(m, " %s Subslice Total: %u\n", type, >> sseu_subslice_total(sseu)); >> - seq_printf(m, " %s Subslice Mask: %04x\n", type, >> - sseu->subslice_mask); >> - seq_printf(m, " %s Subslice Per Slice: %u\n", type, >> - hweight8(sseu->subslice_mask)); >> + for (s = 0; s < fls(sseu->slice_mask); s++) { >> + seq_printf(m, " %s Slice%i %u subslices, mask=%04x\n", type, >> + s, hweight8(sseu->subslice_mask[s]), >> + sseu->subslice_mask[s]); >> + } >> seq_printf(m, " %s EU Total: %u\n", type, >> sseu->eu_total); >> seq_printf(m, " %s EU Per Subslice: %u\n", type, >> diff --git a/drivers/gpu/drm/i915/i915_drv.c >> b/drivers/gpu/drm/i915/i915_drv.c >> index 6c8da9d20c33..969835d3cbcd 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.c >> +++ b/drivers/gpu/drm/i915/i915_drv.c >> @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, >> void *data, >> return -ENODEV; >> break; >> case I915_PARAM_SUBSLICE_MASK: >> - value = INTEL_INFO(dev_priv)->sseu.subslice_mask; >> + value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0]; >> if (!value) >> return -ENODEV; >> break; >> diff --git a/drivers/gpu/drm/i915/intel_device_info.c >> b/drivers/gpu/drm/i915/intel_device_info.c >> index d28592e43512..ed14994527fc 100644 >> --- a/drivers/gpu/drm/i915/intel_device_info.c >> +++ b/drivers/gpu/drm/i915/intel_device_info.c >> @@ -80,12 +80,16 @@ void intel_device_info_dump_flags(const struct >> intel_device_info *info, >> static void sseu_dump(const struct sseu_dev_info *sseu, struct >> drm_printer *p) >> { >> + int s; >> + >> drm_printf(p, "slice mask: %04x\n", sseu->slice_mask); >> drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask)); >> drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); >> - drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask); >> - drm_printf(p, "subslice per slice: %u\n", >> - hweight8(sseu->subslice_mask)); >> + for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) { >> + drm_printf(p, "slice%d %u subslices mask=%04x\n", >> + s, hweight8(sseu->subslice_mask[s]), >> + sseu->subslice_mask[s]); >> + } >> drm_printf(p, "EU total: %u\n", sseu->eu_total); >> drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); >> drm_printf(p, "has slice power gating: %s\n", >> @@ -119,22 +123,87 @@ void intel_device_info_dump(const struct >> intel_device_info *info, >> intel_device_info_dump_flags(info, p); >> } >> +static u16 compute_eu_total(const struct sseu_dev_info *sseu) >> +{ >> + u16 i, total = 0; >> + >> + for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) >> + total += hweight8(sseu->eu_mask[i]); >> + >> + return total; >> +} >> + >> +static u16 compute_subslice_total(const struct sseu_dev_info *sseu) >> +{ >> + u16 i, total = 0; >> + >> + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) >> + total += hweight8(sseu->subslice_mask[i]); >> + >> + return total; >> +} >> + >> static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) >> { >> struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; >> const u32 fuse2 = I915_READ(GEN8_FUSE2); >> + int s, ss; >> + const int eu_mask = 0xff; >> + u32 subslice_mask, eu_en; >> sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> >> GEN10_F2_S_ENA_SHIFT; >> - sseu->subslice_mask = (1 << 4) - 1; >> - sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> >> - GEN10_F2_SS_DIS_SHIFT); >> + sseu->max_slices = 6; >> + sseu->max_subslices = 4; >> + sseu->max_eus_per_subslice = 8; >> - sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0)); >> - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1)); >> - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2)); >> - sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) & >> - GEN10_EU_DIS_SS_MASK)); >> + subslice_mask = (1 << 4) - 1; >> + subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> >> + GEN10_F2_SS_DIS_SHIFT); >> + >> + /* >> + * Slice0 can have up to 3 subslices, but there are only 2 in >> + * slice1/2. >> + */ >> + sseu->subslice_mask[0] = subslice_mask; >> + for (s = 1; s < sseu->max_slices; s++) >> + sseu->subslice_mask[s] = subslice_mask & 0x3; >> + >> + /* Slice0 */ >> + eu_en = ~I915_READ(GEN8_EU_DISABLE0); >> + for (ss = 0; ss < sseu->max_subslices; ss++) >> + sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); >> + /* Slice1 */ >> + sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); >> + eu_en = ~I915_READ(GEN8_EU_DISABLE1); >> + sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); >> + /* Slice2 */ >> + sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); >> + sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); >> + /* Slice3 */ >> + sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); >> + eu_en = ~I915_READ(GEN8_EU_DISABLE2); >> + sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); >> + /* Slice4 */ >> + sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); >> + sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); >> + /* Slice5 */ >> + sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); >> + eu_en = ~I915_READ(GEN10_EU_DISABLE3); >> + sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); >> + >> + /* Do a second pass where we mark the subslices disabled if all >> their >> + * eus are off. >> + */ >> + for (s = 0; s < sseu->max_slices; s++) { >> + for (ss = 0; ss < sseu->max_subslices; ss++) { >> + if (sseu_get_eus(sseu, s, ss) == 0) >> + sseu->subslice_mask[s] &= ~BIT(ss); >> + } >> + } >> + >> + sseu->subslice_total = compute_subslice_total(sseu); >> + sseu->eu_total = compute_eu_total(sseu); >> /* >> * CNL is expected to always have a uniform distribution >> @@ -155,26 +224,40 @@ static void gen10_sseu_info_init(struct >> drm_i915_private *dev_priv) >> static void cherryview_sseu_info_init(struct drm_i915_private >> *dev_priv) >> { >> struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; >> - u32 fuse, eu_dis; >> + u32 fuse; >> fuse = I915_READ(CHV_FUSE_GT); >> sseu->slice_mask = BIT(0); >> + sseu->max_slices = 1; >> + sseu->max_subslices = 2; >> + sseu->max_eus_per_subslice = 8; >> if (!(fuse & CHV_FGT_DISABLE_SS0)) { >> - sseu->subslice_mask |= BIT(0); >> - eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK | >> - CHV_FGT_EU_DIS_SS0_R1_MASK); >> - sseu->eu_total += 8 - hweight32(eu_dis); >> + u8 disabled_mask = >> + ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> >> + CHV_FGT_EU_DIS_SS0_R0_SHIFT) | >> + (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> >> + CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); >> + >> + sseu->subslice_mask[0] |= BIT(0); >> + sseu_set_eus(sseu, 0, 0, ~disabled_mask); >> } >> if (!(fuse & CHV_FGT_DISABLE_SS1)) { >> - sseu->subslice_mask |= BIT(1); >> - eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK | >> - CHV_FGT_EU_DIS_SS1_R1_MASK); >> - sseu->eu_total += 8 - hweight32(eu_dis); >> + u8 disabled_mask = >> + ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> >> + CHV_FGT_EU_DIS_SS1_R0_SHIFT) | >> + (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> >> + CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); >> + >> + sseu->subslice_mask[0] |= BIT(1); >> + sseu_set_eus(sseu, 0, 1, ~disabled_mask); >> } >> + sseu->subslice_total = compute_subslice_total(sseu); >> + sseu->eu_total = compute_eu_total(sseu); >> + >> /* >> * CHV expected to always have a uniform distribution of EU >> * across subslices. >> @@ -196,41 +279,52 @@ static void gen9_sseu_info_init(struct >> drm_i915_private *dev_priv) >> { >> struct intel_device_info *info = mkwrite_device_info(dev_priv); >> struct sseu_dev_info *sseu = &info->sseu; >> - int s_max = 3, ss_max = 4, eu_max = 8; >> int s, ss; >> - u32 fuse2, eu_disable; >> - u8 eu_mask = 0xff; >> + u32 fuse2, eu_disable, subslice_mask; >> + const u8 eu_mask = 0xff; >> fuse2 = I915_READ(GEN8_FUSE2); >> sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> >> GEN8_F2_S_ENA_SHIFT; >> + /* BXT has a single slice and at most 3 subslices. */ >> + sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3; >> + sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4; >> + sseu->max_eus_per_subslice = 8; >> + >> /* >> * The subslice disable field is global, i.e. it applies >> * to each of the enabled slices. >> */ >> - sseu->subslice_mask = (1 << ss_max) - 1; >> - sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> >> - GEN9_F2_SS_DIS_SHIFT); >> + subslice_mask = (1 << sseu->max_subslices) - 1; >> + subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> >> + GEN9_F2_SS_DIS_SHIFT); >> /* >> * Iterate through enabled slices and subslices to >> * count the total enabled EU. >> */ >> - for (s = 0; s < s_max; s++) { >> + for (s = 0; s < sseu->max_slices; s++) { >> if (!(sseu->slice_mask & BIT(s))) >> /* skip disabled slice */ >> continue; >> + sseu->subslice_mask[s] = subslice_mask; >> + >> eu_disable = I915_READ(GEN9_EU_DISABLE(s)); >> - for (ss = 0; ss < ss_max; ss++) { >> + for (ss = 0; ss < sseu->max_subslices; ss++) { >> int eu_per_ss; >> + u8 eu_disabled_mask; >> - if (!(sseu->subslice_mask & BIT(ss))) >> + if (!(sseu->subslice_mask[s] & BIT(ss))) >> /* skip disabled subslice */ >> continue; >> - eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) & >> - eu_mask); >> + eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask; >> + >> + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); >> + >> + eu_per_ss = sseu->max_eus_per_subslice - >> + hweight8(eu_disabled_mask); >> /* >> * Record which subslice(s) has(have) 7 EUs. we >> @@ -239,11 +333,12 @@ static void gen9_sseu_info_init(struct >> drm_i915_private *dev_priv) >> */ >> if (eu_per_ss == 7) >> sseu->subslice_7eu[s] |= BIT(ss); >> - >> - sseu->eu_total += eu_per_ss; >> } >> } >> + sseu->subslice_total = compute_subslice_total(sseu); >> + sseu->eu_total = compute_eu_total(sseu); >> + >> /* >> * SKL is expected to always have a uniform distribution >> * of EU across subslices with the exception that any one >> @@ -269,8 +364,8 @@ static void gen9_sseu_info_init(struct >> drm_i915_private *dev_priv) >> sseu->has_eu_pg = sseu->eu_per_subslice > 2; >> if (IS_GEN9_LP(dev_priv)) { >> -#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) >> - info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3; >> +#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) >> + info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; >> sseu->min_eu_in_pool = 0; >> if (info->has_pooled_eu) { >> @@ -288,19 +383,22 @@ static void gen9_sseu_info_init(struct >> drm_i915_private *dev_priv) >> static void broadwell_sseu_info_init(struct drm_i915_private >> *dev_priv) >> { >> struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; >> - const int s_max = 3, ss_max = 3, eu_max = 8; >> int s, ss; >> - u32 fuse2, eu_disable[3]; /* s_max */ >> + u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ >> fuse2 = I915_READ(GEN8_FUSE2); >> sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> >> GEN8_F2_S_ENA_SHIFT; >> + sseu->max_slices = 3; >> + sseu->max_subslices = 3; >> + sseu->max_eus_per_subslice = 8; >> + >> /* >> * The subslice disable field is global, i.e. it applies >> * to each of the enabled slices. >> */ >> - sseu->subslice_mask = GENMASK(ss_max - 1, 0); >> - sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> >> - GEN8_F2_SS_DIS_SHIFT); >> + subslice_mask = GENMASK(sseu->max_subslices - 1, 0); >> + subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> >> + GEN8_F2_SS_DIS_SHIFT); >> eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & >> GEN8_EU_DIS0_S0_MASK; >> eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> >> GEN8_EU_DIS0_S1_SHIFT) | >> @@ -314,30 +412,39 @@ static void broadwell_sseu_info_init(struct >> drm_i915_private *dev_priv) >> * Iterate through enabled slices and subslices to >> * count the total enabled EU. >> */ >> - for (s = 0; s < s_max; s++) { >> + for (s = 0; s < sseu->max_slices; s++) { >> if (!(sseu->slice_mask & BIT(s))) >> /* skip disabled slice */ >> continue; >> - for (ss = 0; ss < ss_max; ss++) { >> + sseu->subslice_mask[s] = subslice_mask; >> + >> + for (ss = 0; ss < sseu->max_subslices; ss++) { >> + u8 eu_disabled_mask; >> u32 n_disabled; >> - if (!(sseu->subslice_mask & BIT(ss))) >> + if (!(sseu->subslice_mask[ss] & BIT(ss))) >> /* skip disabled subslice */ >> continue; >> - n_disabled = hweight8(eu_disable[s] >> (ss * eu_max)); >> + eu_disabled_mask = >> + eu_disable[s] >> (ss * sseu->max_eus_per_subslice); >> + >> + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); >> + >> + n_disabled = hweight8(eu_disabled_mask); >> /* >> * Record which subslices have 7 EUs. >> */ >> - if (eu_max - n_disabled == 7) >> + if (sseu->max_eus_per_subslice - n_disabled == 7) >> sseu->subslice_7eu[s] |= 1 << ss; >> - >> - sseu->eu_total += eu_max - n_disabled; >> } >> } >> + sseu->subslice_total = compute_subslice_total(sseu); >> + sseu->eu_total = compute_eu_total(sseu); >> + >> /* >> * BDW is expected to always have a uniform distribution of EU >> across >> * subslices with the exception that any one EU in any one >> subslice may >> diff --git a/drivers/gpu/drm/i915/intel_device_info.h >> b/drivers/gpu/drm/i915/intel_device_info.h >> index 49cb27bd04c1..36e0df87862d 100644 >> --- a/drivers/gpu/drm/i915/intel_device_info.h >> +++ b/drivers/gpu/drm/i915/intel_device_info.h >> @@ -110,10 +110,14 @@ enum intel_platform { >> func(supports_tv); \ >> func(has_ipc); >> +#define GEN_MAX_SLICES (6) /* CNL upper bound */ >> +#define GEN_MAX_SUBSLICES (7) >> + >> struct sseu_dev_info { >> u8 slice_mask; >> - u8 subslice_mask; >> - u8 eu_total; >> + u8 subslice_mask[GEN_MAX_SUBSLICES]; >> + u16 subslice_total; >> + u16 eu_total; >> u8 eu_per_subslice; >> u8 min_eu_in_pool; >> /* For each slice, which subslice(s) has(have) 7 EUs >> (bitfield)? */ >> @@ -121,6 +125,17 @@ struct sseu_dev_info { >> u8 has_slice_pg:1; >> u8 has_subslice_pg:1; >> u8 has_eu_pg:1; >> + >> + /* Topology fields */ >> + u8 max_slices; >> + u8 max_subslices; >> + u8 max_eus_per_subslice; >> + >> + /* We don't have more than 8 eus per subslice at the moment and >> as we >> + * store eus enabled using bits, no need to multiply by eus per >> + * subslice. >> + */ >> + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; >> }; >> struct intel_device_info { >> @@ -167,7 +182,33 @@ struct intel_device_info { >> static inline unsigned int sseu_subslice_total(const struct >> sseu_dev_info *sseu) >> { >> - return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); >> + return sseu->subslice_total; >> +} >> + >> +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu, >> + int slice, int subslice) >> +{ >> + int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, >> + BITS_PER_BYTE); >> + int slice_stride = sseu->max_subslices * subslice_stride; >> + >> + return slice * slice_stride + subslice * subslice_stride; >> +} >> + >> +/* >> + * The following functions prototypes should be updated with a >> larger type >> + * than u8 if we ever have more than 8 EUs per subslice. >> + */ >> +static inline u8 sseu_get_eus(const struct sseu_dev_info *sseu, >> + int slice, int subslice) >> +{ >> + return sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice)]; >> +} >> + >> +static inline void sseu_set_eus(struct sseu_dev_info *sseu, >> + int slice, int subslice, u8 eu_mask) >> +{ >> + sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice)] = eu_mask; >> } >> const char *intel_platform_name(enum intel_platform platform); >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c >> b/drivers/gpu/drm/i915/intel_lrc.c >> index ff25f209d0a5..ac7896031b8d 100644 >> --- a/drivers/gpu/drm/i915/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/intel_lrc.c >> @@ -2098,7 +2098,7 @@ make_rpcs(struct drm_i915_private *dev_priv) >> if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { >> rpcs |= GEN8_RPCS_SS_CNT_ENABLE; >> - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) << >> + rpcs |= >> hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) << >> GEN8_RPCS_SS_CNT_SHIFT; >> rpcs |= GEN8_RPCS_ENABLE; >> } >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h >> b/drivers/gpu/drm/i915/intel_ringbuffer.h >> index c5ff203e42d6..23ae9a957fab 100644 >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h >> @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum >> intel_engine_hangcheck_action a) >> #define instdone_subslice_mask(dev_priv__) \ >> (INTEL_GEN(dev_priv__) == 7 ? \ >> - 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) >> + 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0]) >> #define for_each_instdone_slice_subslice(dev_priv__, slice__, >> subslice__) \ >> for ((slice__) = 0, (subslice__) = 0; \ >> > > I am happy with this version and I did not spot any mistakes. > > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Do you know if we have any test coverage which would be able to tell > us if the reported numbers before and after match? > I have some IGT tests (which I should resend) that verify that the old GET_PARAM values match the new uapi. But because this refactors code, the old behavior is lost :( Thanks for the review.
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index cc659b4b2a45..684551114965 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4289,7 +4289,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, continue; sseu->slice_mask = BIT(0); - sseu->subslice_mask |= BIT(ss); + sseu->subslice_mask[0] |= BIT(ss); eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) + @@ -4336,7 +4336,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, continue; sseu->slice_mask |= BIT(s); - sseu->subslice_mask = info->sseu.subslice_mask; + sseu->subslice_mask[s] = info->sseu.subslice_mask[s]; for (ss = 0; ss < ss_max; ss++) { unsigned int eu_cnt; @@ -4391,8 +4391,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); if (IS_GEN9_BC(dev_priv)) - sseu->subslice_mask = - INTEL_INFO(dev_priv)->sseu.subslice_mask; + sseu->subslice_mask[s] = + INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; for (ss = 0; ss < ss_max; ss++) { unsigned int eu_cnt; @@ -4402,7 +4402,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, /* skip disabled subslice */ continue; - sseu->subslice_mask |= BIT(ss); + sseu->subslice_mask[s] |= BIT(ss); } eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & @@ -4424,9 +4424,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; if (sseu->slice_mask) { - sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; sseu->eu_per_subslice = INTEL_INFO(dev_priv)->sseu.eu_per_subslice; + for (s = 0; s < fls(sseu->slice_mask); s++) { + sseu->subslice_mask[s] = + INTEL_INFO(dev_priv)->sseu.subslice_mask[s]; + } sseu->eu_total = sseu->eu_per_subslice * sseu_subslice_total(sseu); @@ -4445,6 +4448,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, { struct drm_i915_private *dev_priv = node_to_i915(m->private); const char *type = is_available_info ? "Available" : "Enabled"; + int s; seq_printf(m, " %s Slice Mask: %04x\n", type, sseu->slice_mask); @@ -4452,10 +4456,11 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, hweight8(sseu->slice_mask)); seq_printf(m, " %s Subslice Total: %u\n", type, sseu_subslice_total(sseu)); - seq_printf(m, " %s Subslice Mask: %04x\n", type, - sseu->subslice_mask); - seq_printf(m, " %s Subslice Per Slice: %u\n", type, - hweight8(sseu->subslice_mask)); + for (s = 0; s < fls(sseu->slice_mask); s++) { + seq_printf(m, " %s Slice%i %u subslices, mask=%04x\n", type, + s, hweight8(sseu->subslice_mask[s]), + sseu->subslice_mask[s]); + } seq_printf(m, " %s EU Total: %u\n", type, sseu->eu_total); seq_printf(m, " %s EU Per Subslice: %u\n", type, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 6c8da9d20c33..969835d3cbcd 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_SUBSLICE_MASK: - value = INTEL_INFO(dev_priv)->sseu.subslice_mask; + value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0]; if (!value) return -ENODEV; break; diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d28592e43512..ed14994527fc 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -80,12 +80,16 @@ void intel_device_info_dump_flags(const struct intel_device_info *info, static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) { + int s; + drm_printf(p, "slice mask: %04x\n", sseu->slice_mask); drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask)); drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); - drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask); - drm_printf(p, "subslice per slice: %u\n", - hweight8(sseu->subslice_mask)); + for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) { + drm_printf(p, "slice%d %u subslices mask=%04x\n", + s, hweight8(sseu->subslice_mask[s]), + sseu->subslice_mask[s]); + } drm_printf(p, "EU total: %u\n", sseu->eu_total); drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); drm_printf(p, "has slice power gating: %s\n", @@ -119,22 +123,87 @@ void intel_device_info_dump(const struct intel_device_info *info, intel_device_info_dump_flags(info, p); } +static u16 compute_eu_total(const struct sseu_dev_info *sseu) +{ + u16 i, total = 0; + + for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) + total += hweight8(sseu->eu_mask[i]); + + return total; +} + +static u16 compute_subslice_total(const struct sseu_dev_info *sseu) +{ + u16 i, total = 0; + + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) + total += hweight8(sseu->subslice_mask[i]); + + return total; +} + static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; const u32 fuse2 = I915_READ(GEN8_FUSE2); + int s, ss; + const int eu_mask = 0xff; + u32 subslice_mask, eu_en; sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> GEN10_F2_S_ENA_SHIFT; - sseu->subslice_mask = (1 << 4) - 1; - sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> - GEN10_F2_SS_DIS_SHIFT); + sseu->max_slices = 6; + sseu->max_subslices = 4; + sseu->max_eus_per_subslice = 8; - sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0)); - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1)); - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2)); - sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) & - GEN10_EU_DIS_SS_MASK)); + subslice_mask = (1 << 4) - 1; + subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> + GEN10_F2_SS_DIS_SHIFT); + + /* + * Slice0 can have up to 3 subslices, but there are only 2 in + * slice1/2. + */ + sseu->subslice_mask[0] = subslice_mask; + for (s = 1; s < sseu->max_slices; s++) + sseu->subslice_mask[s] = subslice_mask & 0x3; + + /* Slice0 */ + eu_en = ~I915_READ(GEN8_EU_DISABLE0); + for (ss = 0; ss < sseu->max_subslices; ss++) + sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); + /* Slice1 */ + sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); + eu_en = ~I915_READ(GEN8_EU_DISABLE1); + sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); + /* Slice2 */ + sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); + sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); + /* Slice3 */ + sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); + eu_en = ~I915_READ(GEN8_EU_DISABLE2); + sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); + /* Slice4 */ + sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); + sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); + /* Slice5 */ + sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); + eu_en = ~I915_READ(GEN10_EU_DISABLE3); + sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); + + /* Do a second pass where we mark the subslices disabled if all their + * eus are off. + */ + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + if (sseu_get_eus(sseu, s, ss) == 0) + sseu->subslice_mask[s] &= ~BIT(ss); + } + } + + sseu->subslice_total = compute_subslice_total(sseu); + sseu->eu_total = compute_eu_total(sseu); /* * CNL is expected to always have a uniform distribution @@ -155,26 +224,40 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; - u32 fuse, eu_dis; + u32 fuse; fuse = I915_READ(CHV_FUSE_GT); sseu->slice_mask = BIT(0); + sseu->max_slices = 1; + sseu->max_subslices = 2; + sseu->max_eus_per_subslice = 8; if (!(fuse & CHV_FGT_DISABLE_SS0)) { - sseu->subslice_mask |= BIT(0); - eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK | - CHV_FGT_EU_DIS_SS0_R1_MASK); - sseu->eu_total += 8 - hweight32(eu_dis); + u8 disabled_mask = + ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> + CHV_FGT_EU_DIS_SS0_R0_SHIFT) | + (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> + CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); + + sseu->subslice_mask[0] |= BIT(0); + sseu_set_eus(sseu, 0, 0, ~disabled_mask); } if (!(fuse & CHV_FGT_DISABLE_SS1)) { - sseu->subslice_mask |= BIT(1); - eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK | - CHV_FGT_EU_DIS_SS1_R1_MASK); - sseu->eu_total += 8 - hweight32(eu_dis); + u8 disabled_mask = + ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> + CHV_FGT_EU_DIS_SS1_R0_SHIFT) | + (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> + CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); + + sseu->subslice_mask[0] |= BIT(1); + sseu_set_eus(sseu, 0, 1, ~disabled_mask); } + sseu->subslice_total = compute_subslice_total(sseu); + sseu->eu_total = compute_eu_total(sseu); + /* * CHV expected to always have a uniform distribution of EU * across subslices. @@ -196,41 +279,52 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) { struct intel_device_info *info = mkwrite_device_info(dev_priv); struct sseu_dev_info *sseu = &info->sseu; - int s_max = 3, ss_max = 4, eu_max = 8; int s, ss; - u32 fuse2, eu_disable; - u8 eu_mask = 0xff; + u32 fuse2, eu_disable, subslice_mask; + const u8 eu_mask = 0xff; fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + /* BXT has a single slice and at most 3 subslices. */ + sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3; + sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4; + sseu->max_eus_per_subslice = 8; + /* * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = (1 << ss_max) - 1; - sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> - GEN9_F2_SS_DIS_SHIFT); + subslice_mask = (1 << sseu->max_subslices) - 1; + subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> + GEN9_F2_SS_DIS_SHIFT); /* * Iterate through enabled slices and subslices to * count the total enabled EU. */ - for (s = 0; s < s_max; s++) { + for (s = 0; s < sseu->max_slices; s++) { if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue; + sseu->subslice_mask[s] = subslice_mask; + eu_disable = I915_READ(GEN9_EU_DISABLE(s)); - for (ss = 0; ss < ss_max; ss++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { int eu_per_ss; + u8 eu_disabled_mask; - if (!(sseu->subslice_mask & BIT(ss))) + if (!(sseu->subslice_mask[s] & BIT(ss))) /* skip disabled subslice */ continue; - eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) & - eu_mask); + eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask; + + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + + eu_per_ss = sseu->max_eus_per_subslice - + hweight8(eu_disabled_mask); /* * Record which subslice(s) has(have) 7 EUs. we @@ -239,11 +333,12 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) */ if (eu_per_ss == 7) sseu->subslice_7eu[s] |= BIT(ss); - - sseu->eu_total += eu_per_ss; } } + sseu->subslice_total = compute_subslice_total(sseu); + sseu->eu_total = compute_eu_total(sseu); + /* * SKL is expected to always have a uniform distribution * of EU across subslices with the exception that any one @@ -269,8 +364,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = sseu->eu_per_subslice > 2; if (IS_GEN9_LP(dev_priv)) { -#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) - info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3; +#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) + info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; sseu->min_eu_in_pool = 0; if (info->has_pooled_eu) { @@ -288,19 +383,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; - const int s_max = 3, ss_max = 3, eu_max = 8; int s, ss; - u32 fuse2, eu_disable[3]; /* s_max */ + u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + sseu->max_slices = 3; + sseu->max_subslices = 3; + sseu->max_eus_per_subslice = 8; + /* * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = GENMASK(ss_max - 1, 0); - sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> - GEN8_F2_SS_DIS_SHIFT); + subslice_mask = GENMASK(sseu->max_subslices - 1, 0); + subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> + GEN8_F2_SS_DIS_SHIFT); eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK; eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) | @@ -314,30 +412,39 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * Iterate through enabled slices and subslices to * count the total enabled EU. */ - for (s = 0; s < s_max; s++) { + for (s = 0; s < sseu->max_slices; s++) { if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue; - for (ss = 0; ss < ss_max; ss++) { + sseu->subslice_mask[s] = subslice_mask; + + for (ss = 0; ss < sseu->max_subslices; ss++) { + u8 eu_disabled_mask; u32 n_disabled; - if (!(sseu->subslice_mask & BIT(ss))) + if (!(sseu->subslice_mask[ss] & BIT(ss))) /* skip disabled subslice */ continue; - n_disabled = hweight8(eu_disable[s] >> (ss * eu_max)); + eu_disabled_mask = + eu_disable[s] >> (ss * sseu->max_eus_per_subslice); + + sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + + n_disabled = hweight8(eu_disabled_mask); /* * Record which subslices have 7 EUs. */ - if (eu_max - n_disabled == 7) + if (sseu->max_eus_per_subslice - n_disabled == 7) sseu->subslice_7eu[s] |= 1 << ss; - - sseu->eu_total += eu_max - n_disabled; } } + sseu->subslice_total = compute_subslice_total(sseu); + sseu->eu_total = compute_eu_total(sseu); + /* * BDW is expected to always have a uniform distribution of EU across * subslices with the exception that any one EU in any one subslice may diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 49cb27bd04c1..36e0df87862d 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -110,10 +110,14 @@ enum intel_platform { func(supports_tv); \ func(has_ipc); +#define GEN_MAX_SLICES (6) /* CNL upper bound */ +#define GEN_MAX_SUBSLICES (7) + struct sseu_dev_info { u8 slice_mask; - u8 subslice_mask; - u8 eu_total; + u8 subslice_mask[GEN_MAX_SUBSLICES]; + u16 subslice_total; + u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ @@ -121,6 +125,17 @@ struct sseu_dev_info { u8 has_slice_pg:1; u8 has_subslice_pg:1; u8 has_eu_pg:1; + + /* Topology fields */ + u8 max_slices; + u8 max_subslices; + u8 max_eus_per_subslice; + + /* We don't have more than 8 eus per subslice at the moment and as we + * store eus enabled using bits, no need to multiply by eus per + * subslice. + */ + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; }; struct intel_device_info { @@ -167,7 +182,33 @@ struct intel_device_info { static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) { - return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); + return sseu->subslice_total; +} + +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu, + int slice, int subslice) +{ + int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, + BITS_PER_BYTE); + int slice_stride = sseu->max_subslices * subslice_stride; + + return slice * slice_stride + subslice * subslice_stride; +} + +/* + * The following functions prototypes should be updated with a larger type + * than u8 if we ever have more than 8 EUs per subslice. + */ +static inline u8 sseu_get_eus(const struct sseu_dev_info *sseu, + int slice, int subslice) +{ + return sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice)]; +} + +static inline void sseu_set_eus(struct sseu_dev_info *sseu, + int slice, int subslice, u8 eu_mask) +{ + sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice)] = eu_mask; } const char *intel_platform_name(enum intel_platform platform); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ff25f209d0a5..ac7896031b8d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2098,7 +2098,7 @@ make_rpcs(struct drm_i915_private *dev_priv) if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { rpcs |= GEN8_RPCS_SS_CNT_ENABLE; - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) << + rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) << GEN8_RPCS_SS_CNT_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c5ff203e42d6..23ae9a957fab 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) #define instdone_subslice_mask(dev_priv__) \ (INTEL_GEN(dev_priv__) == 7 ? \ - 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) + 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0]) #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ for ((slice__) = 0, (subslice__) = 0; \
Up to now, subslice mask was assumed to be uniform across slices. But starting with Cannonlake, slices can be asymmetric (for example slice0 has different number of subslices as slice1+). This change stores all subslices masks for all slices rather than having a single mask that applies to all slices. v2: Rework how we store total numbers in sseu_dev_info (Tvrtko) Fix CHV eu masks, was reading disabled as enabled (Tvrtko) Readability changes (Tvrtko) Add EU index helper (Tvrtko) v3: Turn ALIGN(v, 8) / 8 into DIV_ROUND_UP(v, BITS_PER_BYTE) (Tvrtko) Reuse sseu_eu_idx() for setting eu_mask on CHV (Tvrtko) Reformat debug prints for subslices (Tvrtko) v4: Change eu_mask helper into sseu_set_eus() (Tvrtko) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- drivers/gpu/drm/i915/i915_debugfs.c | 25 ++-- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/intel_device_info.c | 201 +++++++++++++++++++++++-------- drivers/gpu/drm/i915/intel_device_info.h | 47 +++++++- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 6 files changed, 216 insertions(+), 63 deletions(-)