diff mbox

[v2,1/6] drm/i915: store all subslice masks

Message ID 20180111195352.27193-2-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lionel Landwerlin Jan. 11, 2018, 7:53 p.m. UTC
Up to now, subslice mask was assumed to be uniform across slices. But
starting with Cannonlake, slices can be asymmetric (for example slice0
has different number of subslices as slice1+). This change stores all
subslices masks for all slices rather than having a single mask that
applies to all slices.

v2: Rework how we store total numbers in sseu_dev_info (Tvrtko)
    Fix CHV eu masks, was reading disabled as enabled (Tvrtko)
    Readability changes (Tvrtko)
    Add EU index helper (Tvrtko)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  25 ++--
 drivers/gpu/drm/i915/i915_drv.c          |   2 +-
 drivers/gpu/drm/i915/intel_device_info.c | 196 +++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_device_info.h |  36 +++++-
 drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
 6 files changed, 200 insertions(+), 63 deletions(-)

Comments

Tvrtko Ursulin Jan. 12, 2018, 10:15 a.m. UTC | #1
On 11/01/2018 19:53, Lionel Landwerlin wrote:
> Up to now, subslice mask was assumed to be uniform across slices. But
> starting with Cannonlake, slices can be asymmetric (for example slice0
> has different number of subslices as slice1+). This change stores all
> subslices masks for all slices rather than having a single mask that
> applies to all slices.
> 
> v2: Rework how we store total numbers in sseu_dev_info (Tvrtko)
>      Fix CHV eu masks, was reading disabled as enabled (Tvrtko)
>      Readability changes (Tvrtko)
>      Add EU index helper (Tvrtko)
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c      |  25 ++--
>   drivers/gpu/drm/i915/i915_drv.c          |   2 +-
>   drivers/gpu/drm/i915/intel_device_info.c | 196 +++++++++++++++++++++++--------
>   drivers/gpu/drm/i915/intel_device_info.h |  36 +++++-
>   drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
>   6 files changed, 200 insertions(+), 63 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 2bb63073d73f..463029f72a0b 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4285,7 +4285,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
>   			continue;
>   
>   		sseu->slice_mask = BIT(0);
> -		sseu->subslice_mask |= BIT(ss);
> +		sseu->subslice_mask[0] |= BIT(ss);
>   		eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
>   			 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
>   			 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
> @@ -4332,7 +4332,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>   			continue;
>   
>   		sseu->slice_mask |= BIT(s);
> -		sseu->subslice_mask = info->sseu.subslice_mask;
> +		sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
>   
>   		for (ss = 0; ss < ss_max; ss++) {
>   			unsigned int eu_cnt;
> @@ -4387,8 +4387,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
>   		sseu->slice_mask |= BIT(s);
>   
>   		if (IS_GEN9_BC(dev_priv))
> -			sseu->subslice_mask =
> -				INTEL_INFO(dev_priv)->sseu.subslice_mask;
> +			sseu->subslice_mask[s] =
> +				INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
>   
>   		for (ss = 0; ss < ss_max; ss++) {
>   			unsigned int eu_cnt;
> @@ -4398,7 +4398,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
>   					/* skip disabled subslice */
>   					continue;
>   
> -				sseu->subslice_mask |= BIT(ss);
> +				sseu->subslice_mask[s] |= BIT(ss);
>   			}
>   
>   			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
> @@ -4420,9 +4420,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
>   	sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
>   
>   	if (sseu->slice_mask) {
> -		sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>   		sseu->eu_per_subslice =
>   				INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
> +		for (s = 0; s < fls(sseu->slice_mask); s++) {
> +			sseu->subslice_mask[s] =
> +				INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
> +		}
>   		sseu->eu_total = sseu->eu_per_subslice *
>   				 sseu_subslice_total(sseu);
>   
> @@ -4441,6 +4444,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
>   {
>   	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>   	const char *type = is_available_info ? "Available" : "Enabled";
> +	int s;
>   
>   	seq_printf(m, "  %s Slice Mask: %04x\n", type,
>   		   sseu->slice_mask);
> @@ -4448,10 +4452,11 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
>   		   hweight8(sseu->slice_mask));
>   	seq_printf(m, "  %s Subslice Total: %u\n", type,
>   		   sseu_subslice_total(sseu));
> -	seq_printf(m, "  %s Subslice Mask: %04x\n", type,
> -		   sseu->subslice_mask);
> -	seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
> -		   hweight8(sseu->subslice_mask));
> +	for (s = 0; s < fls(sseu->slice_mask); s++) {
> +		seq_printf(m, "  %s Slice%i %u subslices, mask=%04x\n", type,
> +			   s, hweight8(sseu->subslice_mask[s]),
> +			   sseu->subslice_mask[s]);
> +	}
>   	seq_printf(m, "  %s EU Total: %u\n", type,
>   		   sseu->eu_total);
>   	seq_printf(m, "  %s EU Per Subslice: %u\n", type,
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 6c8da9d20c33..969835d3cbcd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
>   			return -ENODEV;
>   		break;
>   	case I915_PARAM_SUBSLICE_MASK:
> -		value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
> +		value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0];
>   		if (!value)
>   			return -ENODEV;
>   		break;
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index d28592e43512..0bf6ef51cdb7 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -80,12 +80,17 @@ void intel_device_info_dump_flags(const struct intel_device_info *info,
>   
>   static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
>   {
> +	int s;
> +
>   	drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
>   	drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
>   	drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
> -	drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask);
> -	drm_printf(p, "subslice per slice: %u\n",
> -		   hweight8(sseu->subslice_mask));
> +	for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) {
> +		drm_printf(p, "slice%d subslice mask %04x\n",
> +			   s, sseu->subslice_mask[s]);
> +		drm_printf(p, "slice%d subslice per slice: %u\n",
> +			   s, hweight8(sseu->subslice_mask[s]));

Cosmetic only but consider condensing this into one line if you don't 
have a preference to either.

> +	}
>   	drm_printf(p, "EU total: %u\n", sseu->eu_total);
>   	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
>   	drm_printf(p, "has slice power gating: %s\n",
> @@ -119,22 +124,88 @@ void intel_device_info_dump(const struct intel_device_info *info,
>   	intel_device_info_dump_flags(info, p);
>   }
>   
> +static u16 compute_eu_total(const struct sseu_dev_info *sseu)
> +{
> +	u16 i, total = 0;
> +
> +	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
> +		total += hweight8(sseu->eu_mask[i]);
> +
> +	return total;
> +}
> +
> +static u16 compute_subslice_total(const struct sseu_dev_info *sseu)
> +{
> +	u16 i, total = 0;
> +
> +	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
> +		total += hweight8(sseu->subslice_mask[i]);
> +
> +	return total;
> +}
> +
>   static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>   	const u32 fuse2 = I915_READ(GEN8_FUSE2);
> +	int s, ss;
> +	const int eu_mask = 0xff;
> +	u32 subslice_mask, eu_en;
>   
>   	sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
>   			    GEN10_F2_S_ENA_SHIFT;
> -	sseu->subslice_mask = (1 << 4) - 1;
> -	sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
> -				 GEN10_F2_SS_DIS_SHIFT);
> +	sseu->max_slices = 6;
> +	sseu->max_subslices = 4;
> +	sseu->max_eus_per_subslice = 8;
>   
> -	sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
> -	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
> -	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
> -	sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
> -				     GEN10_EU_DIS_SS_MASK));
> +	subslice_mask = (1 << 4) - 1;
> +	subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
> +			   GEN10_F2_SS_DIS_SHIFT);
> +
> +	/* Slice0 can have up to 3 subslices, but there are only 2 in
> +	 * slice1/2.
> +	 */

Sorry for not spotting this earlier - this comment style is frowned 
upon. Instead for multi-line please use:

/*
  * Blah blah...
  */

> +	sseu->subslice_mask[0] = subslice_mask;
> +	for (s = 1; s < sseu->max_slices; s++)
> +		sseu->subslice_mask[s] = subslice_mask & 0x3;
> +
> +	/* Slice0 */
> +	eu_en = ~I915_READ(GEN8_EU_DISABLE0);
> +	for (ss = 0; ss < sseu->max_subslices; ss++) {
> +		sseu->eu_mask[sseu_eu_idx(sseu, 0, ss, 0)] =
> +			(eu_en >> (8 * ss)) & eu_mask;
> +	}
> +	/* Slice1 */
> +	sseu->eu_mask[sseu_eu_idx(sseu, 1, 0, 0)] = (eu_en >> 24) & eu_mask;
> +	eu_en = ~I915_READ(GEN8_EU_DISABLE1);
> +	sseu->eu_mask[sseu_eu_idx(sseu, 1, 1, 0)] = eu_en & eu_mask;
> +	/* Slice2 */
> +	sseu->eu_mask[sseu_eu_idx(sseu, 2, 0, 0)] = (eu_en >> 8) & eu_mask;
> +	sseu->eu_mask[sseu_eu_idx(sseu, 2, 1, 0)] = (eu_en >> 16) & eu_mask;
> +	/* Slice3 */
> +	sseu->eu_mask[sseu_eu_idx(sseu, 3, 0, 0)] = (eu_en >> 24) & eu_mask;
> +	eu_en = ~I915_READ(GEN8_EU_DISABLE2);
> +	sseu->eu_mask[sseu_eu_idx(sseu, 3, 1, 0)] = eu_en & eu_mask;
> +	/* Slice4 */
> +	sseu->eu_mask[sseu_eu_idx(sseu, 4, 0, 0)] = (eu_en >> 8) & eu_mask;
> +	sseu->eu_mask[sseu_eu_idx(sseu, 4, 1, 0)] = (eu_en >> 16) & eu_mask;
> +	/* Slice5 */
> +	sseu->eu_mask[sseu_eu_idx(sseu, 5, 0, 0)] = (eu_en >> 24) & eu_mask;
> +	eu_en = ~I915_READ(GEN10_EU_DISABLE3);
> +	sseu->eu_mask[sseu_eu_idx(sseu, 5, 1, 0)] = eu_en & eu_mask;

Indices are guaranteed to be unique in these operations? No need to RMW?

> +
> +	/* Do a second pass where we marked the subslices disabled if all
> +	 * their eus are off.
> +	 */
> +	for (s = 0; s < sseu->max_slices; s++) {
> +		for (ss = 0; ss < sseu->max_subslices; ss++) {
> +			if (sseu_eu_mask(sseu, s, ss, 0) == 0)
> +				sseu->subslice_mask[s] &= ~BIT(ss);
> +		}
> +	}
> +
> +	sseu->subslice_total = compute_subslice_total(sseu);
> +	sseu->eu_total = compute_eu_total(sseu);
>   
>   	/*
>   	 * CNL is expected to always have a uniform distribution
> @@ -155,26 +226,31 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>   static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
> -	u32 fuse, eu_dis;
> +	u32 fuse;
>   
>   	fuse = I915_READ(CHV_FUSE_GT);
>   
>   	sseu->slice_mask = BIT(0);
> +	sseu->max_slices = 1;
> +	sseu->max_subslices = 2;
> +	sseu->max_eus_per_subslice = 8;
>   
>   	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
> -		sseu->subslice_mask |= BIT(0);
> -		eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
> -				 CHV_FGT_EU_DIS_SS0_R1_MASK);
> -		sseu->eu_total += 8 - hweight32(eu_dis);
> +		sseu->subslice_mask[0] |= BIT(0);
> +		sseu->eu_mask[0] = ~((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT);
> +		sseu->eu_mask[0] |= ~(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);

Use sseu_eu_idx?

> +		sseu->subslice_mask[0] = 1;
>   	}
>   
>   	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
> -		sseu->subslice_mask |= BIT(1);
> -		eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
> -				 CHV_FGT_EU_DIS_SS1_R1_MASK);
> -		sseu->eu_total += 8 - hweight32(eu_dis);
> +		sseu->subslice_mask[0] |= BIT(1);
> +		sseu->eu_mask[1] = ~((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT);
> +		sseu->eu_mask[1] |= ~(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
>   	}
>   
> +	sseu->subslice_total = compute_subslice_total(sseu);
> +	sseu->eu_total = compute_eu_total(sseu);
> +
>   	/*
>   	 * CHV expected to always have a uniform distribution of EU
>   	 * across subslices.
> @@ -196,41 +272,53 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_device_info *info = mkwrite_device_info(dev_priv);
>   	struct sseu_dev_info *sseu = &info->sseu;
> -	int s_max = 3, ss_max = 4, eu_max = 8;
>   	int s, ss;
> -	u32 fuse2, eu_disable;
> -	u8 eu_mask = 0xff;
> +	u32 fuse2, eu_disable, subslice_mask;
> +	const u8 eu_mask = 0xff;
>   
>   	fuse2 = I915_READ(GEN8_FUSE2);
>   	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
>   
> +	/* BXT has a single slice and at most 3 subslices. */
> +	sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
> +	sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
> +	sseu->max_eus_per_subslice = 8;
> +
>   	/*
>   	 * The subslice disable field is global, i.e. it applies
>   	 * to each of the enabled slices.
>   	*/
> -	sseu->subslice_mask = (1 << ss_max) - 1;
> -	sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
> -				 GEN9_F2_SS_DIS_SHIFT);
> +	subslice_mask = (1 << sseu->max_subslices) - 1;
> +	subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
> +			   GEN9_F2_SS_DIS_SHIFT);
>   
>   	/*
>   	 * Iterate through enabled slices and subslices to
>   	 * count the total enabled EU.
>   	*/
> -	for (s = 0; s < s_max; s++) {
> +	for (s = 0; s < sseu->max_slices; s++) {
>   		if (!(sseu->slice_mask & BIT(s)))
>   			/* skip disabled slice */
>   			continue;
>   
> +		sseu->subslice_mask[s] = subslice_mask;
> +
>   		eu_disable = I915_READ(GEN9_EU_DISABLE(s));
> -		for (ss = 0; ss < ss_max; ss++) {
> +		for (ss = 0; ss < sseu->max_subslices; ss++) {
>   			int eu_per_ss;
> +			u8 eu_disabled_mask;
>   
> -			if (!(sseu->subslice_mask & BIT(ss)))
> +			if (!(sseu->subslice_mask[s] & BIT(ss)))
>   				/* skip disabled subslice */
>   				continue;
>   
> -			eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
> -						      eu_mask);
> +			eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask; > +
> +			sseu->eu_mask[sseu_eu_idx(sseu, s, ss, 0)] =
> +				~eu_disabled_mask;
> +
> +			eu_per_ss = sseu->max_eus_per_subslice -
> +				hweight8(eu_disabled_mask);
>   
>   			/*
>   			 * Record which subslice(s) has(have) 7 EUs. we
> @@ -239,11 +327,12 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   			 */
>   			if (eu_per_ss == 7)
>   				sseu->subslice_7eu[s] |= BIT(ss);
> -
> -			sseu->eu_total += eu_per_ss;
>   		}
>   	}
>   
> +	sseu->subslice_total = compute_subslice_total(sseu);
> +	sseu->eu_total = compute_eu_total(sseu);
> +
>   	/*
>   	 * SKL is expected to always have a uniform distribution
>   	 * of EU across subslices with the exception that any one
> @@ -269,8 +358,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
>   
>   	if (IS_GEN9_LP(dev_priv)) {
> -#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask & BIT(ss)))
> -		info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
> +#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask[0] & BIT(ss)))
> +		info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
>   
>   		sseu->min_eu_in_pool = 0;
>   		if (info->has_pooled_eu) {
> @@ -288,19 +377,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
> -	const int s_max = 3, ss_max = 3, eu_max = 8;
>   	int s, ss;
> -	u32 fuse2, eu_disable[3]; /* s_max */
> +	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
>   
>   	fuse2 = I915_READ(GEN8_FUSE2);
>   	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
> +	sseu->max_slices = 3;
> +	sseu->max_subslices = 3;
> +	sseu->max_eus_per_subslice = 8;
> +
>   	/*
>   	 * The subslice disable field is global, i.e. it applies
>   	 * to each of the enabled slices.
>   	 */
> -	sseu->subslice_mask = GENMASK(ss_max - 1, 0);
> -	sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
> -				 GEN8_F2_SS_DIS_SHIFT);
> +	subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
> +	subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
> +			   GEN8_F2_SS_DIS_SHIFT);
>   
>   	eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK;
>   	eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) |
> @@ -314,30 +406,40 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>   	 * Iterate through enabled slices and subslices to
>   	 * count the total enabled EU.
>   	 */
> -	for (s = 0; s < s_max; s++) {
> +	for (s = 0; s < sseu->max_slices; s++) {
>   		if (!(sseu->slice_mask & BIT(s)))
>   			/* skip disabled slice */
>   			continue;
>   
> -		for (ss = 0; ss < ss_max; ss++) {
> +		sseu->subslice_mask[s] = subslice_mask;
> +
> +		for (ss = 0; ss < sseu->max_subslices; ss++) {
> +			u8 eu_disabled_mask;
>   			u32 n_disabled;
>   
> -			if (!(sseu->subslice_mask & BIT(ss)))
> +			if (!(sseu->subslice_mask[ss] & BIT(ss)))
>   				/* skip disabled subslice */
>   				continue;
>   
> -			n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
> +			eu_disabled_mask =
> +				eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
> +
> +			sseu->eu_mask[sseu_eu_idx(sseu, s, ss, 0)] =
> +				~eu_disabled_mask;
> +
> +			n_disabled = hweight8(eu_disabled_mask);
>   
>   			/*
>   			 * Record which subslices have 7 EUs.
>   			 */
> -			if (eu_max - n_disabled == 7)
> +			if (sseu->max_eus_per_subslice - n_disabled == 7)
>   				sseu->subslice_7eu[s] |= 1 << ss;
> -
> -			sseu->eu_total += eu_max - n_disabled;
>   		}
>   	}
>   
> +	sseu->subslice_total = compute_subslice_total(sseu);
> +	sseu->eu_total = compute_eu_total(sseu);
> +
>   	/*
>   	 * BDW is expected to always have a uniform distribution of EU across
>   	 * subslices with the exception that any one EU in any one subslice may
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 49cb27bd04c1..6eb8bcf5a213 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -110,10 +110,14 @@ enum intel_platform {
>   	func(supports_tv); \
>   	func(has_ipc);
>   
> +#define GEN_MAX_SLICES		(6) /* CNL upper bound */
> +#define GEN_MAX_SUBSLICES	(7)
> +
>   struct sseu_dev_info {
>   	u8 slice_mask;
> -	u8 subslice_mask;
> -	u8 eu_total;
> +	u8 subslice_mask[GEN_MAX_SUBSLICES];
> +	u16 subslice_total;
> +	u16 eu_total;
>   	u8 eu_per_subslice;
>   	u8 min_eu_in_pool;
>   	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
> @@ -121,6 +125,17 @@ struct sseu_dev_info {
>   	u8 has_slice_pg:1;
>   	u8 has_subslice_pg:1;
>   	u8 has_eu_pg:1;
> +
> +	/* Topology fields */
> +	u8 max_slices;
> +	u8 max_subslices;
> +	u8 max_eus_per_subslice;
> +
> +	/* We don't have more than 8 eus per subslice at the moment and as we
> +	 * store eus enabled using bits, no need to multiply by eus per
> +	 * subslice.
> +	 */
> +	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>   };
>   
>   struct intel_device_info {
> @@ -167,7 +182,22 @@ struct intel_device_info {
>   
>   static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
>   {
> -	return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
> +	return sseu->subslice_total;
> +}
> +
> +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
> +			      int slice, int subslice, int eu_group)

What is eu_group for? Will it be used at some point?

> +{
> +	int subslice_stride = ALIGN(sseu->max_eus_per_subslice, 8) / 8;

What are these eights? sizeof(eu_mask[0]) ? Is this whole statement 
DIV_ROUND_UP also?

> +	int slice_stride = sseu->max_subslices * subslice_stride;
> +
> +	return slice * slice_stride + subslice * subslice_stride + eu_group;
> +}
> +
> +static inline int sseu_eu_mask(const struct sseu_dev_info *sseu,
> +			       int slice, int subslice, int eu_group)
> +{
> +	return sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice, eu_group)];
>   }
>   
>   const char *intel_platform_name(enum intel_platform platform);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index ff25f209d0a5..ac7896031b8d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2098,7 +2098,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
>   
>   	if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
>   		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
> -		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
> +		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) <<
>   			GEN8_RPCS_SS_CNT_SHIFT;
>   		rpcs |= GEN8_RPCS_ENABLE;
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c5ff203e42d6..23ae9a957fab 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
>   
>   #define instdone_subslice_mask(dev_priv__) \
>   	(INTEL_GEN(dev_priv__) == 7 ? \
> -	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
> +	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0])
>   
>   #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
>   	for ((slice__) = 0, (subslice__) = 0; \
> 

Regards,

Tvrtko
Lionel Landwerlin Jan. 12, 2018, 10:58 a.m. UTC | #2
On 12/01/18 10:15, Tvrtko Ursulin wrote:
>
> On 11/01/2018 19:53, Lionel Landwerlin wrote:
>> Up to now, subslice mask was assumed to be uniform across slices. But
>> starting with Cannonlake, slices can be asymmetric (for example slice0
>> has different number of subslices as slice1+). This change stores all
>> subslices masks for all slices rather than having a single mask that
>> applies to all slices.
>>
>> v2: Rework how we store total numbers in sseu_dev_info (Tvrtko)
>>      Fix CHV eu masks, was reading disabled as enabled (Tvrtko)
>>      Readability changes (Tvrtko)
>>      Add EU index helper (Tvrtko)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c      |  25 ++--
>>   drivers/gpu/drm/i915/i915_drv.c          |   2 +-
>>   drivers/gpu/drm/i915/intel_device_info.c | 196 
>> +++++++++++++++++++++++--------
>>   drivers/gpu/drm/i915/intel_device_info.h |  36 +++++-
>>   drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
>>   drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
>>   6 files changed, 200 insertions(+), 63 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 2bb63073d73f..463029f72a0b 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -4285,7 +4285,7 @@ static void 
>> cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
>>               continue;
>>             sseu->slice_mask = BIT(0);
>> -        sseu->subslice_mask |= BIT(ss);
>> +        sseu->subslice_mask[0] |= BIT(ss);
>>           eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
>>                ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
>>                ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
>> @@ -4332,7 +4332,7 @@ static void gen10_sseu_device_status(struct 
>> drm_i915_private *dev_priv,
>>               continue;
>>             sseu->slice_mask |= BIT(s);
>> -        sseu->subslice_mask = info->sseu.subslice_mask;
>> +        sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
>>             for (ss = 0; ss < ss_max; ss++) {
>>               unsigned int eu_cnt;
>> @@ -4387,8 +4387,8 @@ static void gen9_sseu_device_status(struct 
>> drm_i915_private *dev_priv,
>>           sseu->slice_mask |= BIT(s);
>>             if (IS_GEN9_BC(dev_priv))
>> -            sseu->subslice_mask =
>> -                INTEL_INFO(dev_priv)->sseu.subslice_mask;
>> +            sseu->subslice_mask[s] =
>> +                INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
>>             for (ss = 0; ss < ss_max; ss++) {
>>               unsigned int eu_cnt;
>> @@ -4398,7 +4398,7 @@ static void gen9_sseu_device_status(struct 
>> drm_i915_private *dev_priv,
>>                       /* skip disabled subslice */
>>                       continue;
>>   -                sseu->subslice_mask |= BIT(ss);
>> +                sseu->subslice_mask[s] |= BIT(ss);
>>               }
>>                 eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
>> @@ -4420,9 +4420,12 @@ static void 
>> broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
>>       sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
>>         if (sseu->slice_mask) {
>> -        sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>>           sseu->eu_per_subslice =
>>                   INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
>> +        for (s = 0; s < fls(sseu->slice_mask); s++) {
>> +            sseu->subslice_mask[s] =
>> +                INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
>> +        }
>>           sseu->eu_total = sseu->eu_per_subslice *
>>                    sseu_subslice_total(sseu);
>>   @@ -4441,6 +4444,7 @@ static void i915_print_sseu_info(struct 
>> seq_file *m, bool is_available_info,
>>   {
>>       struct drm_i915_private *dev_priv = node_to_i915(m->private);
>>       const char *type = is_available_info ? "Available" : "Enabled";
>> +    int s;
>>         seq_printf(m, "  %s Slice Mask: %04x\n", type,
>>              sseu->slice_mask);
>> @@ -4448,10 +4452,11 @@ static void i915_print_sseu_info(struct 
>> seq_file *m, bool is_available_info,
>>              hweight8(sseu->slice_mask));
>>       seq_printf(m, "  %s Subslice Total: %u\n", type,
>>              sseu_subslice_total(sseu));
>> -    seq_printf(m, "  %s Subslice Mask: %04x\n", type,
>> -           sseu->subslice_mask);
>> -    seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
>> -           hweight8(sseu->subslice_mask));
>> +    for (s = 0; s < fls(sseu->slice_mask); s++) {
>> +        seq_printf(m, "  %s Slice%i %u subslices, mask=%04x\n", type,
>> +               s, hweight8(sseu->subslice_mask[s]),
>> +               sseu->subslice_mask[s]);
>> +    }
>>       seq_printf(m, "  %s EU Total: %u\n", type,
>>              sseu->eu_total);
>>       seq_printf(m, "  %s EU Per Subslice: %u\n", type,
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index 6c8da9d20c33..969835d3cbcd 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, 
>> void *data,
>>               return -ENODEV;
>>           break;
>>       case I915_PARAM_SUBSLICE_MASK:
>> -        value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>> +        value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0];
>>           if (!value)
>>               return -ENODEV;
>>           break;
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
>> b/drivers/gpu/drm/i915/intel_device_info.c
>> index d28592e43512..0bf6ef51cdb7 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -80,12 +80,17 @@ void intel_device_info_dump_flags(const struct 
>> intel_device_info *info,
>>     static void sseu_dump(const struct sseu_dev_info *sseu, struct 
>> drm_printer *p)
>>   {
>> +    int s;
>> +
>>       drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
>>       drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
>>       drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
>> -    drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask);
>> -    drm_printf(p, "subslice per slice: %u\n",
>> -           hweight8(sseu->subslice_mask));
>> +    for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) {
>> +        drm_printf(p, "slice%d subslice mask %04x\n",
>> +               s, sseu->subslice_mask[s]);
>> +        drm_printf(p, "slice%d subslice per slice: %u\n",
>> +               s, hweight8(sseu->subslice_mask[s]));
>
> Cosmetic only but consider condensing this into one line if you don't 
> have a preference to either.

Sure, just conscious about the 80characters.

>
>> +    }
>>       drm_printf(p, "EU total: %u\n", sseu->eu_total);
>>       drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
>>       drm_printf(p, "has slice power gating: %s\n",
>> @@ -119,22 +124,88 @@ void intel_device_info_dump(const struct 
>> intel_device_info *info,
>>       intel_device_info_dump_flags(info, p);
>>   }
>>   +static u16 compute_eu_total(const struct sseu_dev_info *sseu)
>> +{
>> +    u16 i, total = 0;
>> +
>> +    for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
>> +        total += hweight8(sseu->eu_mask[i]);
>> +
>> +    return total;
>> +}
>> +
>> +static u16 compute_subslice_total(const struct sseu_dev_info *sseu)
>> +{
>> +    u16 i, total = 0;
>> +
>> +    for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
>> +        total += hweight8(sseu->subslice_mask[i]);
>> +
>> +    return total;
>> +}
>> +
>>   static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>>   {
>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>>       const u32 fuse2 = I915_READ(GEN8_FUSE2);
>> +    int s, ss;
>> +    const int eu_mask = 0xff;
>> +    u32 subslice_mask, eu_en;
>>         sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
>>                   GEN10_F2_S_ENA_SHIFT;
>> -    sseu->subslice_mask = (1 << 4) - 1;
>> -    sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
>> -                 GEN10_F2_SS_DIS_SHIFT);
>> +    sseu->max_slices = 6;
>> +    sseu->max_subslices = 4;
>> +    sseu->max_eus_per_subslice = 8;
>>   -    sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
>> -    sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
>> -    sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
>> -    sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
>> -                     GEN10_EU_DIS_SS_MASK));
>> +    subslice_mask = (1 << 4) - 1;
>> +    subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
>> +               GEN10_F2_SS_DIS_SHIFT);
>> +
>> +    /* Slice0 can have up to 3 subslices, but there are only 2 in
>> +     * slice1/2.
>> +     */
>
> Sorry for not spotting this earlier - this comment style is frowned 
> upon. Instead for multi-line please use:
>
> /*
>  * Blah blah...
>  */

Done.

>
>> +    sseu->subslice_mask[0] = subslice_mask;
>> +    for (s = 1; s < sseu->max_slices; s++)
>> +        sseu->subslice_mask[s] = subslice_mask & 0x3;
>> +
>> +    /* Slice0 */
>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE0);
>> +    for (ss = 0; ss < sseu->max_subslices; ss++) {
>> +        sseu->eu_mask[sseu_eu_idx(sseu, 0, ss, 0)] =
>> +            (eu_en >> (8 * ss)) & eu_mask;
>> +    }
>> +    /* Slice1 */
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 1, 0, 0)] = (eu_en >> 24) & 
>> eu_mask;
>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE1);
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 1, 1, 0)] = eu_en & eu_mask;
>> +    /* Slice2 */
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 2, 0, 0)] = (eu_en >> 8) & eu_mask;
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 2, 1, 0)] = (eu_en >> 16) & 
>> eu_mask;
>> +    /* Slice3 */
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 3, 0, 0)] = (eu_en >> 24) & 
>> eu_mask;
>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE2);
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 3, 1, 0)] = eu_en & eu_mask;
>> +    /* Slice4 */
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 4, 0, 0)] = (eu_en >> 8) & eu_mask;
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 4, 1, 0)] = (eu_en >> 16) & 
>> eu_mask;
>> +    /* Slice5 */
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 5, 0, 0)] = (eu_en >> 24) & 
>> eu_mask;
>> +    eu_en = ~I915_READ(GEN10_EU_DISABLE3);
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 5, 1, 0)] = eu_en & eu_mask;
>
> Indices are guaranteed to be unique in these operations? No need to RMW?

For a given max_slices/max_subslices/DIV_ROUND_UP(max_eus_per_subslice, 
BITS_PER_BYTE) indices are unique.

>
>> +
>> +    /* Do a second pass where we marked the subslices disabled if all
>> +     * their eus are off.
>> +     */
>> +    for (s = 0; s < sseu->max_slices; s++) {
>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>> +            if (sseu_eu_mask(sseu, s, ss, 0) == 0)
>> +                sseu->subslice_mask[s] &= ~BIT(ss);
>> +        }
>> +    }
>> +
>> +    sseu->subslice_total = compute_subslice_total(sseu);
>> +    sseu->eu_total = compute_eu_total(sseu);
>>         /*
>>        * CNL is expected to always have a uniform distribution
>> @@ -155,26 +226,31 @@ static void gen10_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>   static void cherryview_sseu_info_init(struct drm_i915_private 
>> *dev_priv)
>>   {
>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>> -    u32 fuse, eu_dis;
>> +    u32 fuse;
>>         fuse = I915_READ(CHV_FUSE_GT);
>>         sseu->slice_mask = BIT(0);
>> +    sseu->max_slices = 1;
>> +    sseu->max_subslices = 2;
>> +    sseu->max_eus_per_subslice = 8;
>>         if (!(fuse & CHV_FGT_DISABLE_SS0)) {
>> -        sseu->subslice_mask |= BIT(0);
>> -        eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
>> -                 CHV_FGT_EU_DIS_SS0_R1_MASK);
>> -        sseu->eu_total += 8 - hweight32(eu_dis);
>> +        sseu->subslice_mask[0] |= BIT(0);
>> +        sseu->eu_mask[0] = ~((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> 
>> CHV_FGT_EU_DIS_SS0_R0_SHIFT);
>> +        sseu->eu_mask[0] |= ~(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) 
>> >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
>
> Use sseu_eu_idx?

Done.

>
>> +        sseu->subslice_mask[0] = 1;
>>       }
>>         if (!(fuse & CHV_FGT_DISABLE_SS1)) {
>> -        sseu->subslice_mask |= BIT(1);
>> -        eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
>> -                 CHV_FGT_EU_DIS_SS1_R1_MASK);
>> -        sseu->eu_total += 8 - hweight32(eu_dis);
>> +        sseu->subslice_mask[0] |= BIT(1);
>> +        sseu->eu_mask[1] = ~((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> 
>> CHV_FGT_EU_DIS_SS0_R0_SHIFT);
>> +        sseu->eu_mask[1] |= ~(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) 
>> >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
>>       }
>>   +    sseu->subslice_total = compute_subslice_total(sseu);
>> +    sseu->eu_total = compute_eu_total(sseu);
>> +
>>       /*
>>        * CHV expected to always have a uniform distribution of EU
>>        * across subslices.
>> @@ -196,41 +272,53 @@ static void gen9_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>   {
>>       struct intel_device_info *info = mkwrite_device_info(dev_priv);
>>       struct sseu_dev_info *sseu = &info->sseu;
>> -    int s_max = 3, ss_max = 4, eu_max = 8;
>>       int s, ss;
>> -    u32 fuse2, eu_disable;
>> -    u8 eu_mask = 0xff;
>> +    u32 fuse2, eu_disable, subslice_mask;
>> +    const u8 eu_mask = 0xff;
>>         fuse2 = I915_READ(GEN8_FUSE2);
>>       sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> 
>> GEN8_F2_S_ENA_SHIFT;
>>   +    /* BXT has a single slice and at most 3 subslices. */
>> +    sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
>> +    sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
>> +    sseu->max_eus_per_subslice = 8;
>> +
>>       /*
>>        * The subslice disable field is global, i.e. it applies
>>        * to each of the enabled slices.
>>       */
>> -    sseu->subslice_mask = (1 << ss_max) - 1;
>> -    sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
>> -                 GEN9_F2_SS_DIS_SHIFT);
>> +    subslice_mask = (1 << sseu->max_subslices) - 1;
>> +    subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
>> +               GEN9_F2_SS_DIS_SHIFT);
>>         /*
>>        * Iterate through enabled slices and subslices to
>>        * count the total enabled EU.
>>       */
>> -    for (s = 0; s < s_max; s++) {
>> +    for (s = 0; s < sseu->max_slices; s++) {
>>           if (!(sseu->slice_mask & BIT(s)))
>>               /* skip disabled slice */
>>               continue;
>>   +        sseu->subslice_mask[s] = subslice_mask;
>> +
>>           eu_disable = I915_READ(GEN9_EU_DISABLE(s));
>> -        for (ss = 0; ss < ss_max; ss++) {
>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>>               int eu_per_ss;
>> +            u8 eu_disabled_mask;
>>   -            if (!(sseu->subslice_mask & BIT(ss)))
>> +            if (!(sseu->subslice_mask[s] & BIT(ss)))
>>                   /* skip disabled subslice */
>>                   continue;
>>   -            eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
>> -                              eu_mask);
>> +            eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask; > +
>> +            sseu->eu_mask[sseu_eu_idx(sseu, s, ss, 0)] =
>> +                ~eu_disabled_mask;
>> +
>> +            eu_per_ss = sseu->max_eus_per_subslice -
>> +                hweight8(eu_disabled_mask);
>>                 /*
>>                * Record which subslice(s) has(have) 7 EUs. we
>> @@ -239,11 +327,12 @@ static void gen9_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>                */
>>               if (eu_per_ss == 7)
>>                   sseu->subslice_7eu[s] |= BIT(ss);
>> -
>> -            sseu->eu_total += eu_per_ss;
>>           }
>>       }
>>   +    sseu->subslice_total = compute_subslice_total(sseu);
>> +    sseu->eu_total = compute_eu_total(sseu);
>> +
>>       /*
>>        * SKL is expected to always have a uniform distribution
>>        * of EU across subslices with the exception that any one
>> @@ -269,8 +358,8 @@ static void gen9_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>       sseu->has_eu_pg = sseu->eu_per_subslice > 2;
>>         if (IS_GEN9_LP(dev_priv)) {
>> -#define IS_SS_DISABLED(ss)    (!(sseu->subslice_mask & BIT(ss)))
>> -        info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
>> +#define IS_SS_DISABLED(ss)    (!(sseu->subslice_mask[0] & BIT(ss)))
>> +        info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
>>             sseu->min_eu_in_pool = 0;
>>           if (info->has_pooled_eu) {
>> @@ -288,19 +377,22 @@ static void gen9_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>   static void broadwell_sseu_info_init(struct drm_i915_private 
>> *dev_priv)
>>   {
>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>> -    const int s_max = 3, ss_max = 3, eu_max = 8;
>>       int s, ss;
>> -    u32 fuse2, eu_disable[3]; /* s_max */
>> +    u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
>>         fuse2 = I915_READ(GEN8_FUSE2);
>>       sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> 
>> GEN8_F2_S_ENA_SHIFT;
>> +    sseu->max_slices = 3;
>> +    sseu->max_subslices = 3;
>> +    sseu->max_eus_per_subslice = 8;
>> +
>>       /*
>>        * The subslice disable field is global, i.e. it applies
>>        * to each of the enabled slices.
>>        */
>> -    sseu->subslice_mask = GENMASK(ss_max - 1, 0);
>> -    sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
>> -                 GEN8_F2_SS_DIS_SHIFT);
>> +    subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
>> +    subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
>> +               GEN8_F2_SS_DIS_SHIFT);
>>         eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & 
>> GEN8_EU_DIS0_S0_MASK;
>>       eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> 
>> GEN8_EU_DIS0_S1_SHIFT) |
>> @@ -314,30 +406,40 @@ static void broadwell_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>        * Iterate through enabled slices and subslices to
>>        * count the total enabled EU.
>>        */
>> -    for (s = 0; s < s_max; s++) {
>> +    for (s = 0; s < sseu->max_slices; s++) {
>>           if (!(sseu->slice_mask & BIT(s)))
>>               /* skip disabled slice */
>>               continue;
>>   -        for (ss = 0; ss < ss_max; ss++) {
>> +        sseu->subslice_mask[s] = subslice_mask;
>> +
>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>> +            u8 eu_disabled_mask;
>>               u32 n_disabled;
>>   -            if (!(sseu->subslice_mask & BIT(ss)))
>> +            if (!(sseu->subslice_mask[ss] & BIT(ss)))
>>                   /* skip disabled subslice */
>>                   continue;
>>   -            n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
>> +            eu_disabled_mask =
>> +                eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
>> +
>> +            sseu->eu_mask[sseu_eu_idx(sseu, s, ss, 0)] =
>> +                ~eu_disabled_mask;
>> +
>> +            n_disabled = hweight8(eu_disabled_mask);
>>                 /*
>>                * Record which subslices have 7 EUs.
>>                */
>> -            if (eu_max - n_disabled == 7)
>> +            if (sseu->max_eus_per_subslice - n_disabled == 7)
>>                   sseu->subslice_7eu[s] |= 1 << ss;
>> -
>> -            sseu->eu_total += eu_max - n_disabled;
>>           }
>>       }
>>   +    sseu->subslice_total = compute_subslice_total(sseu);
>> +    sseu->eu_total = compute_eu_total(sseu);
>> +
>>       /*
>>        * BDW is expected to always have a uniform distribution of EU 
>> across
>>        * subslices with the exception that any one EU in any one 
>> subslice may
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>> b/drivers/gpu/drm/i915/intel_device_info.h
>> index 49cb27bd04c1..6eb8bcf5a213 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>> @@ -110,10 +110,14 @@ enum intel_platform {
>>       func(supports_tv); \
>>       func(has_ipc);
>>   +#define GEN_MAX_SLICES        (6) /* CNL upper bound */
>> +#define GEN_MAX_SUBSLICES    (7)
>> +
>>   struct sseu_dev_info {
>>       u8 slice_mask;
>> -    u8 subslice_mask;
>> -    u8 eu_total;
>> +    u8 subslice_mask[GEN_MAX_SUBSLICES];
>> +    u16 subslice_total;
>> +    u16 eu_total;
>>       u8 eu_per_subslice;
>>       u8 min_eu_in_pool;
>>       /* For each slice, which subslice(s) has(have) 7 EUs 
>> (bitfield)? */
>> @@ -121,6 +125,17 @@ struct sseu_dev_info {
>>       u8 has_slice_pg:1;
>>       u8 has_subslice_pg:1;
>>       u8 has_eu_pg:1;
>> +
>> +    /* Topology fields */
>> +    u8 max_slices;
>> +    u8 max_subslices;
>> +    u8 max_eus_per_subslice;
>> +
>> +    /* We don't have more than 8 eus per subslice at the moment and 
>> as we
>> +     * store eus enabled using bits, no need to multiply by eus per
>> +     * subslice.
>> +     */
>> +    u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>>   };
>>     struct intel_device_info {
>> @@ -167,7 +182,22 @@ struct intel_device_info {
>>     static inline unsigned int sseu_subslice_total(const struct 
>> sseu_dev_info *sseu)
>>   {
>> -    return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
>> +    return sseu->subslice_total;
>> +}
>> +
>> +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
>> +                  int slice, int subslice, int eu_group)
>
> What is eu_group for? Will it be used at some point?

In case we ever have more than 8 EUs per subslice.

>
>> +{
>> +    int subslice_stride = ALIGN(sseu->max_eus_per_subslice, 8) / 8;
>
> What are these eights? sizeof(eu_mask[0]) ? Is this whole statement 
> DIV_ROUND_UP also?

Sorry, forgot to replace some of them. Done.

>
>> +    int slice_stride = sseu->max_subslices * subslice_stride;
>> +
>> +    return slice * slice_stride + subslice * subslice_stride + 
>> eu_group;
>> +}
>> +
>> +static inline int sseu_eu_mask(const struct sseu_dev_info *sseu,
>> +                   int slice, int subslice, int eu_group)
>> +{
>> +    return sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice, eu_group)];
>>   }
>>     const char *intel_platform_name(enum intel_platform platform);
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index ff25f209d0a5..ac7896031b8d 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2098,7 +2098,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
>>         if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
>>           rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
>> -        rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
>> +        rpcs |= 
>> hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) <<
>>               GEN8_RPCS_SS_CNT_SHIFT;
>>           rpcs |= GEN8_RPCS_ENABLE;
>>       }
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index c5ff203e42d6..23ae9a957fab 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum 
>> intel_engine_hangcheck_action a)
>>     #define instdone_subslice_mask(dev_priv__) \
>>       (INTEL_GEN(dev_priv__) == 7 ? \
>> -     1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
>> +     1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0])
>>     #define for_each_instdone_slice_subslice(dev_priv__, slice__, 
>> subslice__) \
>>       for ((slice__) = 0, (subslice__) = 0; \
>>
>
> Regards,
>
> Tvrtko
>
Tvrtko Ursulin Jan. 12, 2018, 11:05 a.m. UTC | #3
On 12/01/2018 10:58, Lionel Landwerlin wrote:
> On 12/01/18 10:15, Tvrtko Ursulin wrote:

[snip]

>>
>>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>>> @@ -80,12 +80,17 @@ void intel_device_info_dump_flags(const struct 
>>> intel_device_info *info,
>>>     static void sseu_dump(const struct sseu_dev_info *sseu, struct 
>>> drm_printer *p)
>>>   {
>>> +    int s;
>>> +
>>>       drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
>>>       drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
>>>       drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
>>> -    drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask);
>>> -    drm_printf(p, "subslice per slice: %u\n",
>>> -           hweight8(sseu->subslice_mask));
>>> +    for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) {
>>> +        drm_printf(p, "slice%d subslice mask %04x\n",
>>> +               s, sseu->subslice_mask[s]);
>>> +        drm_printf(p, "slice%d subslice per slice: %u\n",
>>> +               s, hweight8(sseu->subslice_mask[s]));
>>
>> Cosmetic only but consider condensing this into one line if you don't 
>> have a preference to either.
> 
> Sure, just conscious about the 80characters.

I wasn't 100% clear here - I meant the kernel log messages, not the 
source code. I think there is no point in logging two lines per slice 
where one only contains a mask, and second a count.

Regards,

Tvrtko
Lionel Landwerlin Jan. 12, 2018, 11:31 a.m. UTC | #4
On 12/01/18 11:05, Tvrtko Ursulin wrote:
>
> On 12/01/2018 10:58, Lionel Landwerlin wrote:
>> On 12/01/18 10:15, Tvrtko Ursulin wrote:
>
> [snip]
>
>>>
>>>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>>>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>>>> @@ -80,12 +80,17 @@ void intel_device_info_dump_flags(const struct 
>>>> intel_device_info *info,
>>>>     static void sseu_dump(const struct sseu_dev_info *sseu, struct 
>>>> drm_printer *p)
>>>>   {
>>>> +    int s;
>>>> +
>>>>       drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
>>>>       drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
>>>>       drm_printf(p, "subslice total: %u\n", 
>>>> sseu_subslice_total(sseu));
>>>> -    drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask);
>>>> -    drm_printf(p, "subslice per slice: %u\n",
>>>> -           hweight8(sseu->subslice_mask));
>>>> +    for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) {
>>>> +        drm_printf(p, "slice%d subslice mask %04x\n",
>>>> +               s, sseu->subslice_mask[s]);
>>>> +        drm_printf(p, "slice%d subslice per slice: %u\n",
>>>> +               s, hweight8(sseu->subslice_mask[s]));
>>>
>>> Cosmetic only but consider condensing this into one line if you 
>>> don't have a preference to either.
>>
>> Sure, just conscious about the 80characters.
>
> I wasn't 100% clear here - I meant the kernel log messages, not the 
> source code. I think there is no point in logging two lines per slice 
> where one only contains a mask, and second a count.
>
> Regards,
>
> Tvrtko
>

Ahaha :)

Okay, done.
Tvrtko Ursulin Jan. 12, 2018, 12:01 p.m. UTC | #5
On 12/01/2018 10:58, Lionel Landwerlin wrote:
> On 12/01/18 10:15, Tvrtko Ursulin wrote:
>>

[snip]

>>> +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
>>> +                  int slice, int subslice, int eu_group)
>>
>> What is eu_group for? Will it be used at some point?
> 
> In case we ever have more than 8 EUs per subslice.

I am thinking if we could hide that from the call sites, to avoid it 
being passed as zeros, and to avoid having to write loops in other 
patches which reference eu_groups, when it is not immediately obvious 
what that means.

Could we for instance have a helper which would clear/set numbered EUs 
in sseu_dev_info, and so hide all the implementation details?

sseu_enable_eus(sseu, slice, subslice, start, end);

Then when you have code like:

  sseu->eu_mask[sseu_eu_idx(sseu, s, ss, 0)] = ~eu_disabled_mask;

You would write it as:

  /* On this slice/subslice mark EUs 0 to N as enabled. */
  sseu_enable_eus(sseu, s, ss, 0, fls(~eu_disabled_mask));

Helper would internally know the size of the underlying storage and 
dtrt. There would be no need to manually manage eu_groups. In the 
initial implementation you could simply GEM_BUG_ON if the EU range does 
not fit into the current storage. Later u8 could be turned into u16 or 
similar. You also wouldn't have any iteration over eu_groups in this 
version.

I think that would be cleaner and easier to extend in the future. Unless 
I overlooked some important detail?

Or even simplify it by passing bitmask instead of start/end, and just 
have no support for more than 8 EUs in this version? No eu_group etc. 
When the need arises to have more, bump the eu_mask type to u16. That 
would require you to put back the stride parameter in the uAPI I think.

Regards,

Tvrtko
Lionel Landwerlin Jan. 12, 2018, 1:53 p.m. UTC | #6
On 12/01/18 12:01, Tvrtko Ursulin wrote:
>
> On 12/01/2018 10:58, Lionel Landwerlin wrote:
>> On 12/01/18 10:15, Tvrtko Ursulin wrote:
>>>
>
> [snip]
>
>>>> +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
>>>> +                  int slice, int subslice, int eu_group)
>>>
>>> What is eu_group for? Will it be used at some point?
>>
>> In case we ever have more than 8 EUs per subslice.
>
> I am thinking if we could hide that from the call sites, to avoid it 
> being passed as zeros, and to avoid having to write loops in other 
> patches which reference eu_groups, when it is not immediately obvious 
> what that means.
>
> Could we for instance have a helper which would clear/set numbered EUs 
> in sseu_dev_info, and so hide all the implementation details?
>
> sseu_enable_eus(sseu, slice, subslice, start, end);
>
> Then when you have code like:
>
>  sseu->eu_mask[sseu_eu_idx(sseu, s, ss, 0)] = ~eu_disabled_mask;
>
> You would write it as:
>
>  /* On this slice/subslice mark EUs 0 to N as enabled. */
>  sseu_enable_eus(sseu, s, ss, 0, fls(~eu_disabled_mask));

Hmm... I don't think that works if you have gaps, right?
Like a BXT 2x6 where a row of EUs has been fused off. It would be 
something like 0b01110111 or 0b10111011.

>
> Helper would internally know the size of the underlying storage and 
> dtrt. There would be no need to manually manage eu_groups. In the 
> initial implementation you could simply GEM_BUG_ON if the EU range 
> does not fit into the current storage. Later u8 could be turned into 
> u16 or similar. You also wouldn't have any iteration over eu_groups in 
> this version.
>
> I think that would be cleaner and easier to extend in the future. 
> Unless I overlooked some important detail?
>
> Or even simplify it by passing bitmask instead of start/end, and just 
> have no support for more than 8 EUs in this version? No eu_group etc. 
> When the need arises to have more, bump the eu_mask type to u16. That 
> would require you to put back the stride parameter in the uAPI I think.

I'm not really a fan of having the data field in userspace be 
reinterpreted (as u16 or u32) based on one of the other field.
It might be easier on the kernel side, but complicates userspace.

I would prefer to stick to u8 and have everybody think of 
slice/subslice/eus availability as array of u8 bit fields which you 
might need to iterate more than one if there are more than 8 elements.

>
> Regards,
>
> Tvrtko
>
Tvrtko Ursulin Jan. 12, 2018, 5:37 p.m. UTC | #7
On 12/01/2018 13:53, Lionel Landwerlin wrote:
> On 12/01/18 12:01, Tvrtko Ursulin wrote:
>>
>> On 12/01/2018 10:58, Lionel Landwerlin wrote:
>>> On 12/01/18 10:15, Tvrtko Ursulin wrote:
>>>>
>>
>> [snip]
>>
>>>>> +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
>>>>> +                  int slice, int subslice, int eu_group)
>>>>
>>>> What is eu_group for? Will it be used at some point?
>>>
>>> In case we ever have more than 8 EUs per subslice.
>>
>> I am thinking if we could hide that from the call sites, to avoid it 
>> being passed as zeros, and to avoid having to write loops in other 
>> patches which reference eu_groups, when it is not immediately obvious 
>> what that means.
>>
>> Could we for instance have a helper which would clear/set numbered EUs 
>> in sseu_dev_info, and so hide all the implementation details?
>>
>> sseu_enable_eus(sseu, slice, subslice, start, end);
>>
>> Then when you have code like:
>>
>>  sseu->eu_mask[sseu_eu_idx(sseu, s, ss, 0)] = ~eu_disabled_mask;
>>
>> You would write it as:
>>
>>  /* On this slice/subslice mark EUs 0 to N as enabled. */
>>  sseu_enable_eus(sseu, s, ss, 0, fls(~eu_disabled_mask));
> 
> Hmm... I don't think that works if you have gaps, right?
> Like a BXT 2x6 where a row of EUs has been fused off. It would be 
> something like 0b01110111 or 0b10111011.

Oops, you are right. I just don't like having this eu_group field which 
is internal data storage detail, leaked out, is always zero, and even 
unused today. So I am trying to come up with a nicer api.

What about sseu_set_eus(sseu, slice, subslice, mask)?

Then you can replace call sites like:

  sseu->eu_mask[sseu_eu_idx(sseu, 0, 0, 0)] =
	~((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
	CHV_FGT_EU_DIS_SS0_R0_SHIFT);
  sseu->eu_mask[sseu_eu_idx(sseu, 0, 0, 0)] |=
	~(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
	CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);

With:

  mask = (fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
	CHV_FGT_EU_DIS_SS0_R0_SHIFT;
  mask |= ((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
	CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);

  sseu_set_eus(sseu, 0, 0, ~mask);

sseu_set_eus can then be the only place which understand the storage 
format. You can even later extend the mask size past u8 and it can still 
do the magic inside it.

>>
>> Helper would internally know the size of the underlying storage and 
>> dtrt. There would be no need to manually manage eu_groups. In the 
>> initial implementation you could simply GEM_BUG_ON if the EU range 
>> does not fit into the current storage. Later u8 could be turned into 
>> u16 or similar. You also wouldn't have any iteration over eu_groups in 
>> this version.
>>
>> I think that would be cleaner and easier to extend in the future. 
>> Unless I overlooked some important detail?
>>
>> Or even simplify it by passing bitmask instead of start/end, and just 
>> have no support for more than 8 EUs in this version? No eu_group etc. 
>> When the need arises to have more, bump the eu_mask type to u16. That 
>> would require you to put back the stride parameter in the uAPI I think.
> 
> I'm not really a fan of having the data field in userspace be 
> reinterpreted (as u16 or u32) based on one of the other field.
> It might be easier on the kernel side, but complicates userspace.
> 
> I would prefer to stick to u8 and have everybody think of 
> slice/subslice/eus availability as array of u8 bit fields which you 
> might need to iterate more than one if there are more than 8 elements.

You are thinking about bit-ordering? Yeah, if it is easier keep the 
formats the same. Even though strictly speaking internal and ABI data 
representation do not need to be the same. As said above, I am just 
looking for a solution which hides the eu_group parameter from the 
callers and also is a bit shorter to read.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2bb63073d73f..463029f72a0b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4285,7 +4285,7 @@  static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
 			continue;
 
 		sseu->slice_mask = BIT(0);
-		sseu->subslice_mask |= BIT(ss);
+		sseu->subslice_mask[0] |= BIT(ss);
 		eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
 			 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
 			 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
@@ -4332,7 +4332,7 @@  static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 			continue;
 
 		sseu->slice_mask |= BIT(s);
-		sseu->subslice_mask = info->sseu.subslice_mask;
+		sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
 
 		for (ss = 0; ss < ss_max; ss++) {
 			unsigned int eu_cnt;
@@ -4387,8 +4387,8 @@  static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 		sseu->slice_mask |= BIT(s);
 
 		if (IS_GEN9_BC(dev_priv))
-			sseu->subslice_mask =
-				INTEL_INFO(dev_priv)->sseu.subslice_mask;
+			sseu->subslice_mask[s] =
+				INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
 
 		for (ss = 0; ss < ss_max; ss++) {
 			unsigned int eu_cnt;
@@ -4398,7 +4398,7 @@  static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 					/* skip disabled subslice */
 					continue;
 
-				sseu->subslice_mask |= BIT(ss);
+				sseu->subslice_mask[s] |= BIT(ss);
 			}
 
 			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
@@ -4420,9 +4420,12 @@  static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
 	sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
 
 	if (sseu->slice_mask) {
-		sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
 		sseu->eu_per_subslice =
 				INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
+		for (s = 0; s < fls(sseu->slice_mask); s++) {
+			sseu->subslice_mask[s] =
+				INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
+		}
 		sseu->eu_total = sseu->eu_per_subslice *
 				 sseu_subslice_total(sseu);
 
@@ -4441,6 +4444,7 @@  static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	const char *type = is_available_info ? "Available" : "Enabled";
+	int s;
 
 	seq_printf(m, "  %s Slice Mask: %04x\n", type,
 		   sseu->slice_mask);
@@ -4448,10 +4452,11 @@  static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
 		   hweight8(sseu->slice_mask));
 	seq_printf(m, "  %s Subslice Total: %u\n", type,
 		   sseu_subslice_total(sseu));
-	seq_printf(m, "  %s Subslice Mask: %04x\n", type,
-		   sseu->subslice_mask);
-	seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
-		   hweight8(sseu->subslice_mask));
+	for (s = 0; s < fls(sseu->slice_mask); s++) {
+		seq_printf(m, "  %s Slice%i %u subslices, mask=%04x\n", type,
+			   s, hweight8(sseu->subslice_mask[s]),
+			   sseu->subslice_mask[s]);
+	}
 	seq_printf(m, "  %s EU Total: %u\n", type,
 		   sseu->eu_total);
 	seq_printf(m, "  %s EU Per Subslice: %u\n", type,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6c8da9d20c33..969835d3cbcd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -414,7 +414,7 @@  static int i915_getparam(struct drm_device *dev, void *data,
 			return -ENODEV;
 		break;
 	case I915_PARAM_SUBSLICE_MASK:
-		value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
+		value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0];
 		if (!value)
 			return -ENODEV;
 		break;
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index d28592e43512..0bf6ef51cdb7 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -80,12 +80,17 @@  void intel_device_info_dump_flags(const struct intel_device_info *info,
 
 static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
 {
+	int s;
+
 	drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
 	drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
 	drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
-	drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask);
-	drm_printf(p, "subslice per slice: %u\n",
-		   hweight8(sseu->subslice_mask));
+	for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) {
+		drm_printf(p, "slice%d subslice mask %04x\n",
+			   s, sseu->subslice_mask[s]);
+		drm_printf(p, "slice%d subslice per slice: %u\n",
+			   s, hweight8(sseu->subslice_mask[s]));
+	}
 	drm_printf(p, "EU total: %u\n", sseu->eu_total);
 	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
 	drm_printf(p, "has slice power gating: %s\n",
@@ -119,22 +124,88 @@  void intel_device_info_dump(const struct intel_device_info *info,
 	intel_device_info_dump_flags(info, p);
 }
 
+static u16 compute_eu_total(const struct sseu_dev_info *sseu)
+{
+	u16 i, total = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
+		total += hweight8(sseu->eu_mask[i]);
+
+	return total;
+}
+
+static u16 compute_subslice_total(const struct sseu_dev_info *sseu)
+{
+	u16 i, total = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
+		total += hweight8(sseu->subslice_mask[i]);
+
+	return total;
+}
+
 static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
 	const u32 fuse2 = I915_READ(GEN8_FUSE2);
+	int s, ss;
+	const int eu_mask = 0xff;
+	u32 subslice_mask, eu_en;
 
 	sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
 			    GEN10_F2_S_ENA_SHIFT;
-	sseu->subslice_mask = (1 << 4) - 1;
-	sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
-				 GEN10_F2_SS_DIS_SHIFT);
+	sseu->max_slices = 6;
+	sseu->max_subslices = 4;
+	sseu->max_eus_per_subslice = 8;
 
-	sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
-	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
-	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
-	sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
-				     GEN10_EU_DIS_SS_MASK));
+	subslice_mask = (1 << 4) - 1;
+	subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
+			   GEN10_F2_SS_DIS_SHIFT);
+
+	/* Slice0 can have up to 3 subslices, but there are only 2 in
+	 * slice1/2.
+	 */
+	sseu->subslice_mask[0] = subslice_mask;
+	for (s = 1; s < sseu->max_slices; s++)
+		sseu->subslice_mask[s] = subslice_mask & 0x3;
+
+	/* Slice0 */
+	eu_en = ~I915_READ(GEN8_EU_DISABLE0);
+	for (ss = 0; ss < sseu->max_subslices; ss++) {
+		sseu->eu_mask[sseu_eu_idx(sseu, 0, ss, 0)] =
+			(eu_en >> (8 * ss)) & eu_mask;
+	}
+	/* Slice1 */
+	sseu->eu_mask[sseu_eu_idx(sseu, 1, 0, 0)] = (eu_en >> 24) & eu_mask;
+	eu_en = ~I915_READ(GEN8_EU_DISABLE1);
+	sseu->eu_mask[sseu_eu_idx(sseu, 1, 1, 0)] = eu_en & eu_mask;
+	/* Slice2 */
+	sseu->eu_mask[sseu_eu_idx(sseu, 2, 0, 0)] = (eu_en >> 8) & eu_mask;
+	sseu->eu_mask[sseu_eu_idx(sseu, 2, 1, 0)] = (eu_en >> 16) & eu_mask;
+	/* Slice3 */
+	sseu->eu_mask[sseu_eu_idx(sseu, 3, 0, 0)] = (eu_en >> 24) & eu_mask;
+	eu_en = ~I915_READ(GEN8_EU_DISABLE2);
+	sseu->eu_mask[sseu_eu_idx(sseu, 3, 1, 0)] = eu_en & eu_mask;
+	/* Slice4 */
+	sseu->eu_mask[sseu_eu_idx(sseu, 4, 0, 0)] = (eu_en >> 8) & eu_mask;
+	sseu->eu_mask[sseu_eu_idx(sseu, 4, 1, 0)] = (eu_en >> 16) & eu_mask;
+	/* Slice5 */
+	sseu->eu_mask[sseu_eu_idx(sseu, 5, 0, 0)] = (eu_en >> 24) & eu_mask;
+	eu_en = ~I915_READ(GEN10_EU_DISABLE3);
+	sseu->eu_mask[sseu_eu_idx(sseu, 5, 1, 0)] = eu_en & eu_mask;
+
+	/* Do a second pass where we marked the subslices disabled if all
+	 * their eus are off.
+	 */
+	for (s = 0; s < sseu->max_slices; s++) {
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
+			if (sseu_eu_mask(sseu, s, ss, 0) == 0)
+				sseu->subslice_mask[s] &= ~BIT(ss);
+		}
+	}
+
+	sseu->subslice_total = compute_subslice_total(sseu);
+	sseu->eu_total = compute_eu_total(sseu);
 
 	/*
 	 * CNL is expected to always have a uniform distribution
@@ -155,26 +226,31 @@  static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
-	u32 fuse, eu_dis;
+	u32 fuse;
 
 	fuse = I915_READ(CHV_FUSE_GT);
 
 	sseu->slice_mask = BIT(0);
+	sseu->max_slices = 1;
+	sseu->max_subslices = 2;
+	sseu->max_eus_per_subslice = 8;
 
 	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
-		sseu->subslice_mask |= BIT(0);
-		eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
-				 CHV_FGT_EU_DIS_SS0_R1_MASK);
-		sseu->eu_total += 8 - hweight32(eu_dis);
+		sseu->subslice_mask[0] |= BIT(0);
+		sseu->eu_mask[0] = ~((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT);
+		sseu->eu_mask[0] |= ~(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
+		sseu->subslice_mask[0] = 1;
 	}
 
 	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
-		sseu->subslice_mask |= BIT(1);
-		eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
-				 CHV_FGT_EU_DIS_SS1_R1_MASK);
-		sseu->eu_total += 8 - hweight32(eu_dis);
+		sseu->subslice_mask[0] |= BIT(1);
+		sseu->eu_mask[1] = ~((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT);
+		sseu->eu_mask[1] |= ~(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
 	}
 
+	sseu->subslice_total = compute_subslice_total(sseu);
+	sseu->eu_total = compute_eu_total(sseu);
+
 	/*
 	 * CHV expected to always have a uniform distribution of EU
 	 * across subslices.
@@ -196,41 +272,53 @@  static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct intel_device_info *info = mkwrite_device_info(dev_priv);
 	struct sseu_dev_info *sseu = &info->sseu;
-	int s_max = 3, ss_max = 4, eu_max = 8;
 	int s, ss;
-	u32 fuse2, eu_disable;
-	u8 eu_mask = 0xff;
+	u32 fuse2, eu_disable, subslice_mask;
+	const u8 eu_mask = 0xff;
 
 	fuse2 = I915_READ(GEN8_FUSE2);
 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
 
+	/* BXT has a single slice and at most 3 subslices. */
+	sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
+	sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
+	sseu->max_eus_per_subslice = 8;
+
 	/*
 	 * The subslice disable field is global, i.e. it applies
 	 * to each of the enabled slices.
 	*/
-	sseu->subslice_mask = (1 << ss_max) - 1;
-	sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
-				 GEN9_F2_SS_DIS_SHIFT);
+	subslice_mask = (1 << sseu->max_subslices) - 1;
+	subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
+			   GEN9_F2_SS_DIS_SHIFT);
 
 	/*
 	 * Iterate through enabled slices and subslices to
 	 * count the total enabled EU.
 	*/
-	for (s = 0; s < s_max; s++) {
+	for (s = 0; s < sseu->max_slices; s++) {
 		if (!(sseu->slice_mask & BIT(s)))
 			/* skip disabled slice */
 			continue;
 
+		sseu->subslice_mask[s] = subslice_mask;
+
 		eu_disable = I915_READ(GEN9_EU_DISABLE(s));
-		for (ss = 0; ss < ss_max; ss++) {
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			int eu_per_ss;
+			u8 eu_disabled_mask;
 
-			if (!(sseu->subslice_mask & BIT(ss)))
+			if (!(sseu->subslice_mask[s] & BIT(ss)))
 				/* skip disabled subslice */
 				continue;
 
-			eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
-						      eu_mask);
+			eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask;
+
+			sseu->eu_mask[sseu_eu_idx(sseu, s, ss, 0)] =
+				~eu_disabled_mask;
+
+			eu_per_ss = sseu->max_eus_per_subslice -
+				hweight8(eu_disabled_mask);
 
 			/*
 			 * Record which subslice(s) has(have) 7 EUs. we
@@ -239,11 +327,12 @@  static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 			 */
 			if (eu_per_ss == 7)
 				sseu->subslice_7eu[s] |= BIT(ss);
-
-			sseu->eu_total += eu_per_ss;
 		}
 	}
 
+	sseu->subslice_total = compute_subslice_total(sseu);
+	sseu->eu_total = compute_eu_total(sseu);
+
 	/*
 	 * SKL is expected to always have a uniform distribution
 	 * of EU across subslices with the exception that any one
@@ -269,8 +358,8 @@  static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
 
 	if (IS_GEN9_LP(dev_priv)) {
-#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask & BIT(ss)))
-		info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
+#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask[0] & BIT(ss)))
+		info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
 
 		sseu->min_eu_in_pool = 0;
 		if (info->has_pooled_eu) {
@@ -288,19 +377,22 @@  static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
-	const int s_max = 3, ss_max = 3, eu_max = 8;
 	int s, ss;
-	u32 fuse2, eu_disable[3]; /* s_max */
+	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
 
 	fuse2 = I915_READ(GEN8_FUSE2);
 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
+	sseu->max_slices = 3;
+	sseu->max_subslices = 3;
+	sseu->max_eus_per_subslice = 8;
+
 	/*
 	 * The subslice disable field is global, i.e. it applies
 	 * to each of the enabled slices.
 	 */
-	sseu->subslice_mask = GENMASK(ss_max - 1, 0);
-	sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
-				 GEN8_F2_SS_DIS_SHIFT);
+	subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
+	subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
+			   GEN8_F2_SS_DIS_SHIFT);
 
 	eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK;
 	eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) |
@@ -314,30 +406,40 @@  static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 	 * Iterate through enabled slices and subslices to
 	 * count the total enabled EU.
 	 */
-	for (s = 0; s < s_max; s++) {
+	for (s = 0; s < sseu->max_slices; s++) {
 		if (!(sseu->slice_mask & BIT(s)))
 			/* skip disabled slice */
 			continue;
 
-		for (ss = 0; ss < ss_max; ss++) {
+		sseu->subslice_mask[s] = subslice_mask;
+
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
+			u8 eu_disabled_mask;
 			u32 n_disabled;
 
-			if (!(sseu->subslice_mask & BIT(ss)))
+			if (!(sseu->subslice_mask[ss] & BIT(ss)))
 				/* skip disabled subslice */
 				continue;
 
-			n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
+			eu_disabled_mask =
+				eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
+
+			sseu->eu_mask[sseu_eu_idx(sseu, s, ss, 0)] =
+				~eu_disabled_mask;
+
+			n_disabled = hweight8(eu_disabled_mask);
 
 			/*
 			 * Record which subslices have 7 EUs.
 			 */
-			if (eu_max - n_disabled == 7)
+			if (sseu->max_eus_per_subslice - n_disabled == 7)
 				sseu->subslice_7eu[s] |= 1 << ss;
-
-			sseu->eu_total += eu_max - n_disabled;
 		}
 	}
 
+	sseu->subslice_total = compute_subslice_total(sseu);
+	sseu->eu_total = compute_eu_total(sseu);
+
 	/*
 	 * BDW is expected to always have a uniform distribution of EU across
 	 * subslices with the exception that any one EU in any one subslice may
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 49cb27bd04c1..6eb8bcf5a213 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -110,10 +110,14 @@  enum intel_platform {
 	func(supports_tv); \
 	func(has_ipc);
 
+#define GEN_MAX_SLICES		(6) /* CNL upper bound */
+#define GEN_MAX_SUBSLICES	(7)
+
 struct sseu_dev_info {
 	u8 slice_mask;
-	u8 subslice_mask;
-	u8 eu_total;
+	u8 subslice_mask[GEN_MAX_SUBSLICES];
+	u16 subslice_total;
+	u16 eu_total;
 	u8 eu_per_subslice;
 	u8 min_eu_in_pool;
 	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
@@ -121,6 +125,17 @@  struct sseu_dev_info {
 	u8 has_slice_pg:1;
 	u8 has_subslice_pg:1;
 	u8 has_eu_pg:1;
+
+	/* Topology fields */
+	u8 max_slices;
+	u8 max_subslices;
+	u8 max_eus_per_subslice;
+
+	/* We don't have more than 8 eus per subslice at the moment and as we
+	 * store eus enabled using bits, no need to multiply by eus per
+	 * subslice.
+	 */
+	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
 };
 
 struct intel_device_info {
@@ -167,7 +182,22 @@  struct intel_device_info {
 
 static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
 {
-	return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
+	return sseu->subslice_total;
+}
+
+static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
+			      int slice, int subslice, int eu_group)
+{
+	int subslice_stride = ALIGN(sseu->max_eus_per_subslice, 8) / 8;
+	int slice_stride = sseu->max_subslices * subslice_stride;
+
+	return slice * slice_stride + subslice * subslice_stride + eu_group;
+}
+
+static inline int sseu_eu_mask(const struct sseu_dev_info *sseu,
+			       int slice, int subslice, int eu_group)
+{
+	return sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice, eu_group)];
 }
 
 const char *intel_platform_name(enum intel_platform platform);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ff25f209d0a5..ac7896031b8d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2098,7 +2098,7 @@  make_rpcs(struct drm_i915_private *dev_priv)
 
 	if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
 		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
-		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
+		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) <<
 			GEN8_RPCS_SS_CNT_SHIFT;
 		rpcs |= GEN8_RPCS_ENABLE;
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..23ae9a957fab 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -90,7 +90,7 @@  hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
 
 #define instdone_subslice_mask(dev_priv__) \
 	(INTEL_GEN(dev_priv__) == 7 ? \
-	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
+	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0])
 
 #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
 	for ((slice__) = 0, (subslice__) = 0; \