diff mbox

[1/6] drm/i915: store all subslice masks

Message ID 20171218153520.14181-2-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lionel Landwerlin Dec. 18, 2017, 3:35 p.m. UTC
Up to now, subslice mask was assumed to be uniform across slices. But
starting with Cannonlake, slices can be asymetric (for example slice0
has different number of subslices as slice1+). This change stores all
subslices masks for all slices rather than having a single mask that
applies to all slices.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  24 +++--
 drivers/gpu/drm/i915/i915_drv.c          |   2 +-
 drivers/gpu/drm/i915/i915_drv.h          |  23 ++++-
 drivers/gpu/drm/i915/intel_device_info.c | 169 ++++++++++++++++++++++---------
 drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
 6 files changed, 161 insertions(+), 61 deletions(-)

Comments

Tvrtko Ursulin Jan. 11, 2018, 11:12 a.m. UTC | #1
On 18/12/2017 15:35, Lionel Landwerlin wrote:
> Up to now, subslice mask was assumed to be uniform across slices. But
> starting with Cannonlake, slices can be asymetric (for example slice0

asymmetric, thanks auto spell checker. :)

> has different number of subslices as slice1+). This change stores all
> subslices masks for all slices rather than having a single mask that
> applies to all slices.
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c      |  24 +++--
>   drivers/gpu/drm/i915/i915_drv.c          |   2 +-
>   drivers/gpu/drm/i915/i915_drv.h          |  23 ++++-
>   drivers/gpu/drm/i915/intel_device_info.c | 169 ++++++++++++++++++++++---------
>   drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
>   6 files changed, 161 insertions(+), 61 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 0ddce72552bf..0c7890b695c5 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4293,7 +4293,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
>   			continue;
>   
>   		sseu->slice_mask = BIT(0);
> -		sseu->subslice_mask |= BIT(ss);
> +		sseu->subslices_mask[0] |= BIT(ss);
>   		eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
>   			 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
>   			 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
> @@ -4340,7 +4340,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>   			continue;
>   
>   		sseu->slice_mask |= BIT(s);
> -		sseu->subslice_mask = info->sseu.subslice_mask;
> +		sseu->subslices_mask[s] = info->sseu.subslices_mask[s];
>   
>   		for (ss = 0; ss < ss_max; ss++) {
>   			unsigned int eu_cnt;
> @@ -4395,8 +4395,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
>   		sseu->slice_mask |= BIT(s);
>   
>   		if (IS_GEN9_BC(dev_priv))
> -			sseu->subslice_mask =
> -				INTEL_INFO(dev_priv)->sseu.subslice_mask;
> +			sseu->subslices_mask[s] =
> +				INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
>   
>   		for (ss = 0; ss < ss_max; ss++) {
>   			unsigned int eu_cnt;
> @@ -4406,7 +4406,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
>   					/* skip disabled subslice */
>   					continue;
>   
> -				sseu->subslice_mask |= BIT(ss);
> +				sseu->subslices_mask[s] |= BIT(ss);
>   			}
>   
>   			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
> @@ -4428,9 +4428,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
>   	sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
>   
>   	if (sseu->slice_mask) {
> -		sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>   		sseu->eu_per_subslice =
>   				INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
> +		for (s = 0; s < fls(sseu->slice_mask); s++) {
> +			sseu->subslices_mask[s] =
> +				INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
> +		}
>   		sseu->eu_total = sseu->eu_per_subslice *
>   				 sseu_subslice_total(sseu);
>   
> @@ -4449,6 +4452,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
>   {
>   	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>   	const char *type = is_available_info ? "Available" : "Enabled";
> +	int s;
>   
>   	seq_printf(m, "  %s Slice Mask: %04x\n", type,
>   		   sseu->slice_mask);
> @@ -4456,10 +4460,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
>   		   hweight8(sseu->slice_mask));
>   	seq_printf(m, "  %s Subslice Total: %u\n", type,
>   		   sseu_subslice_total(sseu));
> -	seq_printf(m, "  %s Subslice Mask: %04x\n", type,
> -		   sseu->subslice_mask);
> -	seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
> -		   hweight8(sseu->subslice_mask));
> +	for (s = 0; s < fls(sseu->slice_mask); s++) {

Slice mask is always contiguous bits?

> +		seq_printf(m, "  %s Slice%i Subslice Mask: %04x\n", type,
> +			   s, sseu->subslices_mask[s]);

Don't want to keep printing the count? Like maybe " %s Slice%u %u 
sublices, mask=%04x\n" ?

> +	}
>   	seq_printf(m, "  %s EU Total: %u\n", type,
>   		   sseu->eu_total);
>   	seq_printf(m, "  %s EU Per Subslice: %u\n", type,
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 72bea281edb7..8b99e415c345 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
>   			return -ENODEV;
>   		break;
>   	case I915_PARAM_SUBSLICE_MASK:
> -		value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
> +		value = INTEL_INFO(dev_priv)->sseu.subslices_mask[0];
>   		if (!value)
>   			return -ENODEV;
>   		break;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1aba5657f5f0..82fc59078c6a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -802,9 +802,12 @@ struct intel_csr {
>   	func(supports_tv); \
>   	func(has_ipc);
>   
> +#define GEN_MAX_SLICES		(6) /* CNL upper bound */
> +#define GEN_MAX_SUBSLICES	(7)
> +
>   struct sseu_dev_info {
>   	u8 slice_mask;
> -	u8 subslice_mask;
> +	u8 subslices_mask[GEN_MAX_SLICES];

Personally I would probably kept the subslice_mask name and just turned 
it into an array, but that is bike-shed territory.

>   	u8 eu_total;
>   	u8 eu_per_subslice;
>   	u8 min_eu_in_pool;
> @@ -813,11 +816,27 @@ struct sseu_dev_info {
>   	u8 has_slice_pg:1;
>   	u8 has_subslice_pg:1;
>   	u8 has_eu_pg:1;
> +
> +	/* Topology fields */
> +	u8 max_slices;
> +	u8 max_subslices;
> +	u8 max_eus_per_subslice;
> +
> +	/* We don't have more than 8 eus per subslice at the moment and as we
> +	 * store eus enabled using bits, no need to multiply by eus per
> +	 * subslice.
> +	 */
> +	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>   };
>   
>   static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
>   {
> -	return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
> +	unsigned s, total = 0;
> +
> +	for (s = 0; s < ARRAY_SIZE(sseu->subslices_mask); s++)
> +		total += hweight8(sseu->subslices_mask[s]);
> +
> +	return total;

The function is bigger now so maybe it needs stops being inline, but 
either moved to the .c file, or even the total stored in struct 
sseu_dev_info, and then this becomes just a getter?

>   }
>   
>   /* Keep in gen based order, and chronological order within a gen */
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index f478be3ae0ba..6a3c40439e83 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -82,22 +82,74 @@ void intel_device_info_dump(struct drm_i915_private *dev_priv)
>   #undef PRINT_FLAG
>   }
>   
> +static u8 compute_eu_total(const struct sseu_dev_info *sseu)
> +{
> +	u8 i, total = 0;

sseu_subslice_total returns an unsinged int, while this is u8, 
suggesting there can be more sublices and eus. ;) Jokes aside, I'd 
probably make them consistent. The index variable is probably best as 
unsigned int, but the total and return can be u8 if big enough for total 
eus? Either way best it matches the types in the struct they get 
assigned to.

> +
> +	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
> +		total += hweight8(sseu->eu_mask[i]);
> +
> +	return total;
> +}
> +
>   static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>   	const u32 fuse2 = I915_READ(GEN8_FUSE2);
> +	int s, ss, eu_mask = 0xff;

const eu_mask ?

> +	u32 subslice_mask, eu_en;
>   
>   	sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
>   			    GEN10_F2_S_ENA_SHIFT;
> -	sseu->subslice_mask = (1 << 4) - 1;
> -	sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
> -				 GEN10_F2_SS_DIS_SHIFT);
> +	sseu->max_slices = 6;
> +	sseu->max_subslices = 4;
> +	sseu->max_eus_per_subslice = 8;
> +
> +	subslice_mask = (1 << 4) - 1;
> +	subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
> +			   GEN10_F2_SS_DIS_SHIFT);
> +
> +	/* Slice0 can have up to 3 subslices, but there are only 2 in
> +	 * slice1/2.
> +	 */
> +	sseu->subslices_mask[0] = subslice_mask;
> +	for (s = 1; s < sseu->max_slices; s++)
> +		sseu->subslices_mask[s] = subslice_mask & 0x3;
> +
> +	/* Slice0 */
> +	eu_en = ~I915_READ(GEN8_EU_DISABLE0);
> +	for (ss = 0; ss < sseu->max_subslices; ss++)
> +		sseu->eu_mask[ss]     = (eu_en >> (8 * ss)) & eu_mask;
> +	/* Slice1 */
> +	sseu->eu_mask[sseu->max_subslices]         = (eu_en >> 24) & eu_mask;
> +	eu_en = ~I915_READ(GEN8_EU_DISABLE1);
> +	sseu->eu_mask[sseu->max_subslices + 1]     = eu_en & eu_mask;

I suggest a helper to index into sse->eu_mask, like 
sseu->eu_mask[_eu_mask_idx(slice, subslice)] or something, so it is more 
readable what is happening here. Or even:

  _eu_mask(sseu, slice, subslice) = mask;

Doable? I am not 100% I picked up exactly on the layout of the eu_mask 
array.. each element is one subslice? Consecutive for slice0, 
subslice0..N, slice1... sliceN ?

> +	/* Slice2 */
> +	sseu->eu_mask[2 * sseu->max_subslices]     = (eu_en >> 8) & eu_mask;
> +	sseu->eu_mask[2 * sseu->max_subslices + 1] = (eu_en >> 16) & eu_mask;
> +	/* Slice3 */
> +	sseu->eu_mask[3 * sseu->max_subslices]     = (eu_en >> 24) & eu_mask;
> +	eu_en = ~I915_READ(GEN8_EU_DISABLE2);
> +	sseu->eu_mask[3 * sseu->max_subslices + 1] = eu_en & eu_mask;
> +	/* Slice4 */
> +	sseu->eu_mask[4 * sseu->max_subslices]     = (eu_en >> 8) & eu_mask;
> +	sseu->eu_mask[4 * sseu->max_subslices + 1] = (eu_en >> 16) & eu_mask;
> +	/* Slice5 */
> +	sseu->eu_mask[5 * sseu->max_subslices]     = (eu_en >> 24) & eu_mask;
> +	eu_en = ~I915_READ(GEN10_EU_DISABLE3);
> +	sseu->eu_mask[5 * sseu->max_subslices + 1] = eu_en & eu_mask;
> +
> +	/* Do a second pass where we marked the subslices disabled if all
> +	 * their eus are off.
> +	 */
> +	for (s = 0; s < sseu->max_slices; s++) {
> +		for (ss = 0; ss < sseu->max_subslices; ss++) {
> +			if (sseu->eu_mask[s * sseu->max_subslices + ss] == 0)
> +				sseu->subslices_mask[s] &= ~BIT(ss);
> +		}
> +	}
>   
> -	sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
> -	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
> -	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
> -	sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
> -				     GEN10_EU_DIS_SS_MASK));
> +	sseu->eu_total = compute_eu_total(sseu);
>   
>   	/*
>   	 * CNL is expected to always have a uniform distribution
> @@ -118,26 +170,30 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>   static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
> -	u32 fuse, eu_dis;
> +	u32 fuse;
>   
>   	fuse = I915_READ(CHV_FUSE_GT);
>   
>   	sseu->slice_mask = BIT(0);
> +	sseu->max_slices = 1;
> +	sseu->max_subslices = 2;
> +	sseu->max_eus_per_subslice = 8;
>   
>   	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
> -		sseu->subslice_mask |= BIT(0);
> -		eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
> -				 CHV_FGT_EU_DIS_SS0_R1_MASK);
> -		sseu->eu_total += 8 - hweight32(eu_dis);
> +		sseu->subslices_mask[0] |= BIT(0);
> +		sseu->eu_mask[0] = (fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
> +		sseu->eu_mask[0] |= ((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;

Do you need to invert the bits here?

> +		sseu->subslices_mask[0] = 1;
>   	}
>   
>   	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
> -		sseu->subslice_mask |= BIT(1);
> -		eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
> -				 CHV_FGT_EU_DIS_SS1_R1_MASK);
> -		sseu->eu_total += 8 - hweight32(eu_dis);
> +		sseu->subslices_mask[0] |= BIT(1);
> +		sseu->eu_mask[1] = (fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
> +		sseu->eu_mask[2] |= ((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;

Why eu_mask indices 1 and 2 here, while for subslice 0 you merged them 
into index 0?

>   	}
>   
> +	sseu->eu_total = compute_eu_total(sseu);
> +
>   	/*
>   	 * CHV expected to always have a uniform distribution of EU
>   	 * across subslices.
> @@ -159,41 +215,50 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_device_info *info = mkwrite_device_info(dev_priv);
>   	struct sseu_dev_info *sseu = &info->sseu;
> -	int s_max = 3, ss_max = 4, eu_max = 8;
>   	int s, ss;
> -	u32 fuse2, eu_disable;
> +	u32 fuse2, eu_disable, subslice_mask;
>   	u8 eu_mask = 0xff;
>   
>   	fuse2 = I915_READ(GEN8_FUSE2);
>   	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
>   
> +	/* BXT has a single slice and at most 3 subslices. */
> +	sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
> +	sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
> +	sseu->max_eus_per_subslice = 8;
> +
>   	/*
>   	 * The subslice disable field is global, i.e. it applies
>   	 * to each of the enabled slices.
>   	*/
> -	sseu->subslice_mask = (1 << ss_max) - 1;
> -	sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
> -				 GEN9_F2_SS_DIS_SHIFT);
> +	subslice_mask = (1 << sseu->max_subslices) - 1;
> +	subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
> +			   GEN9_F2_SS_DIS_SHIFT);
>   
>   	/*
>   	 * Iterate through enabled slices and subslices to
>   	 * count the total enabled EU.
>   	*/
> -	for (s = 0; s < s_max; s++) {
> +	for (s = 0; s < sseu->max_slices; s++) {
>   		if (!(sseu->slice_mask & BIT(s)))
>   			/* skip disabled slice */
>   			continue;
>   
> +		sseu->subslices_mask[s] = subslice_mask;
> +
>   		eu_disable = I915_READ(GEN9_EU_DISABLE(s));
> -		for (ss = 0; ss < ss_max; ss++) {
> +		for (ss = 0; ss < sseu->max_subslices; ss++) {
>   			int eu_per_ss;
>   
> -			if (!(sseu->subslice_mask & BIT(ss)))
> +			if (!(sseu->subslices_mask[s] & BIT(ss)))
>   				/* skip disabled subslice */
>   				continue;
>   
> -			eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
> -						      eu_mask);
> +			sseu->eu_mask[ss + s * sseu->max_subslices] =
> +				~((eu_disable >> (ss*8)) & eu_mask);
> +
> +			eu_per_ss = sseu->max_eus_per_subslice -
> +				hweight8((eu_disable >> (ss*8)) & eu_mask);

Store "eu_disable >> (ss*8)) & eu_mask" in a local for readability since 
it is calculated twice?

>   
>   			/*
>   			 * Record which subslice(s) has(have) 7 EUs. we
> @@ -202,11 +267,11 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   			 */
>   			if (eu_per_ss == 7)
>   				sseu->subslice_7eu[s] |= BIT(ss);
> -
> -			sseu->eu_total += eu_per_ss;

You could theoretically keep this running total and then at the end 
compare against compute_eu_total under a GEM_BUG_ON, just to check 
everyting is working as expected.

>   		}
>   	}
>   
> +	sseu->eu_total = compute_eu_total(sseu);
> +
>   	/*
>   	 * SKL is expected to always have a uniform distribution
>   	 * of EU across subslices with the exception that any one
> @@ -232,8 +297,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
>   
>   	if (IS_GEN9_LP(dev_priv)) {
> -#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask & BIT(ss)))
> -		info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
> +#define IS_SS_DISABLED(ss)	(!(sseu->subslices_mask[0] & BIT(ss)))
> +		info->has_pooled_eu = hweight8(sseu->subslices_mask[0]) == 3;
>   
>   		sseu->min_eu_in_pool = 0;
>   		if (info->has_pooled_eu) {
> @@ -251,19 +316,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
> -	const int s_max = 3, ss_max = 3, eu_max = 8;
>   	int s, ss;
> -	u32 fuse2, eu_disable[3]; /* s_max */
> +	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
>   
>   	fuse2 = I915_READ(GEN8_FUSE2);
>   	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
> +	sseu->max_slices = 3;
> +	sseu->max_subslices = 3;
> +	sseu->max_eus_per_subslice = 8;
> +
>   	/*
>   	 * The subslice disable field is global, i.e. it applies
>   	 * to each of the enabled slices.
>   	 */
> -	sseu->subslice_mask = GENMASK(ss_max - 1, 0);
> -	sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
> -				 GEN8_F2_SS_DIS_SHIFT);
> +	subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
> +	subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
> +			   GEN8_F2_SS_DIS_SHIFT);
>   
>   	eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK;
>   	eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) |
> @@ -277,30 +345,36 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>   	 * Iterate through enabled slices and subslices to
>   	 * count the total enabled EU.
>   	 */
> -	for (s = 0; s < s_max; s++) {
> +	for (s = 0; s < sseu->max_slices; s++) {
>   		if (!(sseu->slice_mask & BIT(s)))
>   			/* skip disabled slice */
>   			continue;
>   
> -		for (ss = 0; ss < ss_max; ss++) {
> +		sseu->subslices_mask[s] = subslice_mask;
> +
> +		for (ss = 0; ss < sseu->max_subslices; ss++) {
>   			u32 n_disabled;
>   
> -			if (!(sseu->subslice_mask & BIT(ss)))
> +			if (!(sseu->subslices_mask[ss] & BIT(ss)))
>   				/* skip disabled subslice */
>   				continue;
>   
> -			n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
> +			sseu->eu_mask[ss + s * sseu->max_subslices] =
> +				~(eu_disable[s] >>
> +				  (ss * sseu->max_eus_per_subslice));
> +			n_disabled = hweight8(eu_disable[s] >>
> +					      (ss * sseu->max_eus_per_subslice));

As above could use a local for "eu_disable[s] >> (ss * 
sseu->max_eus_per_subslice)"

>   
>   			/*
>   			 * Record which subslices have 7 EUs.
>   			 */
> -			if (eu_max - n_disabled == 7)
> +			if (sseu->max_eus_per_subslice - n_disabled == 7)
>   				sseu->subslice_7eu[s] |= 1 << ss;
> -
> -			sseu->eu_total += eu_max - n_disabled;
>   		}
>   	}
>   
> +	sseu->eu_total = compute_eu_total(sseu);
> +
>   	/*
>   	 * BDW is expected to always have a uniform distribution of EU across
>   	 * subslices with the exception that any one EU in any one subslice may
> @@ -437,6 +511,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_device_info *info = mkwrite_device_info(dev_priv);
>   	enum pipe pipe;
> +	int s;
>   
>   	if (INTEL_GEN(dev_priv) >= 10) {
>   		for_each_pipe(dev_priv, pipe)
> @@ -548,9 +623,11 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
>   	DRM_DEBUG_DRIVER("subslice total: %u\n",
>   			 sseu_subslice_total(&info->sseu));
> -	DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask);
> -	DRM_DEBUG_DRIVER("subslice per slice: %u\n",
> -			 hweight8(info->sseu.subslice_mask));
> +	for (s = 0; s < ARRAY_SIZE(info->sseu.subslices_mask); s++) {
> +		DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslices_mask[s]);
> +		DRM_DEBUG_DRIVER("subslice per slice: %u\n",
> +				 hweight8(info->sseu.subslices_mask[s]));

Put a subslice index into messages?

> +	}
>   	DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total);
>   	DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->sseu.eu_per_subslice);
>   	DRM_DEBUG_DRIVER("has slice power gating: %s\n",
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 2e38fbfdf08f..6e55b842c21d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2072,7 +2072,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
>   
>   	if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
>   		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
> -		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
> +		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslices_mask[0]) <<
>   			GEN8_RPCS_SS_CNT_SHIFT;
>   		rpcs |= GEN8_RPCS_ENABLE;
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c5ff203e42d6..a94bc1b5c502 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
>   
>   #define instdone_subslice_mask(dev_priv__) \
>   	(INTEL_GEN(dev_priv__) == 7 ? \
> -	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
> +	 1 : INTEL_INFO(dev_priv__)->sseu.subslices_mask[0])
>   
>   #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
>   	for ((slice__) = 0, (subslice__) = 0; \
> 

Regards,

Tvrtko
Lionel Landwerlin Jan. 11, 2018, 3:48 p.m. UTC | #2
On 11/01/18 11:12, Tvrtko Ursulin wrote:
>
> On 18/12/2017 15:35, Lionel Landwerlin wrote:
>> Up to now, subslice mask was assumed to be uniform across slices. But
>> starting with Cannonlake, slices can be asymetric (for example slice0
>
> asymmetric, thanks auto spell checker. :)

Done, thanks.

>
>> has different number of subslices as slice1+). This change stores all
>> subslices masks for all slices rather than having a single mask that
>> applies to all slices.
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c      |  24 +++--
>>   drivers/gpu/drm/i915/i915_drv.c          |   2 +-
>>   drivers/gpu/drm/i915/i915_drv.h          |  23 ++++-
>>   drivers/gpu/drm/i915/intel_device_info.c | 169 
>> ++++++++++++++++++++++---------
>>   drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
>>   drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
>>   6 files changed, 161 insertions(+), 61 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 0ddce72552bf..0c7890b695c5 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -4293,7 +4293,7 @@ static void 
>> cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
>>               continue;
>>             sseu->slice_mask = BIT(0);
>> -        sseu->subslice_mask |= BIT(ss);
>> +        sseu->subslices_mask[0] |= BIT(ss);
>>           eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
>>                ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
>>                ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
>> @@ -4340,7 +4340,7 @@ static void gen10_sseu_device_status(struct 
>> drm_i915_private *dev_priv,
>>               continue;
>>             sseu->slice_mask |= BIT(s);
>> -        sseu->subslice_mask = info->sseu.subslice_mask;
>> +        sseu->subslices_mask[s] = info->sseu.subslices_mask[s];
>>             for (ss = 0; ss < ss_max; ss++) {
>>               unsigned int eu_cnt;
>> @@ -4395,8 +4395,8 @@ static void gen9_sseu_device_status(struct 
>> drm_i915_private *dev_priv,
>>           sseu->slice_mask |= BIT(s);
>>             if (IS_GEN9_BC(dev_priv))
>> -            sseu->subslice_mask =
>> -                INTEL_INFO(dev_priv)->sseu.subslice_mask;
>> +            sseu->subslices_mask[s] =
>> + INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
>>             for (ss = 0; ss < ss_max; ss++) {
>>               unsigned int eu_cnt;
>> @@ -4406,7 +4406,7 @@ static void gen9_sseu_device_status(struct 
>> drm_i915_private *dev_priv,
>>                       /* skip disabled subslice */
>>                       continue;
>>   -                sseu->subslice_mask |= BIT(ss);
>> +                sseu->subslices_mask[s] |= BIT(ss);
>>               }
>>                 eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
>> @@ -4428,9 +4428,12 @@ static void 
>> broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
>>       sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
>>         if (sseu->slice_mask) {
>> -        sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>>           sseu->eu_per_subslice =
>>                   INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
>> +        for (s = 0; s < fls(sseu->slice_mask); s++) {
>> +            sseu->subslices_mask[s] =
>> + INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
>> +        }
>>           sseu->eu_total = sseu->eu_per_subslice *
>>                    sseu_subslice_total(sseu);
>>   @@ -4449,6 +4452,7 @@ static void i915_print_sseu_info(struct 
>> seq_file *m, bool is_available_info,
>>   {
>>       struct drm_i915_private *dev_priv = node_to_i915(m->private);
>>       const char *type = is_available_info ? "Available" : "Enabled";
>> +    int s;
>>         seq_printf(m, "  %s Slice Mask: %04x\n", type,
>>              sseu->slice_mask);
>> @@ -4456,10 +4460,10 @@ static void i915_print_sseu_info(struct 
>> seq_file *m, bool is_available_info,
>>              hweight8(sseu->slice_mask));
>>       seq_printf(m, "  %s Subslice Total: %u\n", type,
>>              sseu_subslice_total(sseu));
>> -    seq_printf(m, "  %s Subslice Mask: %04x\n", type,
>> -           sseu->subslice_mask);
>> -    seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
>> -           hweight8(sseu->subslice_mask));
>> +    for (s = 0; s < fls(sseu->slice_mask); s++) {
>
> Slice mask is always contiguous bits?

I have a 2x6 BXT on my desk where the subslice0 appears to be fused-off.
I assumed same could be true for slices.
fls() should make us iterate through all the slices (even the fused off 
ones).

>
>> +        seq_printf(m, "  %s Slice%i Subslice Mask: %04x\n", type,
>> +               s, sseu->subslices_mask[s]);
>
> Don't want to keep printing the count? Like maybe " %s Slice%u %u 
> sublices, mask=%04x\n" ?

Sure, done.

>
>> +    }
>>       seq_printf(m, "  %s EU Total: %u\n", type,
>>              sseu->eu_total);
>>       seq_printf(m, "  %s EU Per Subslice: %u\n", type,
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index 72bea281edb7..8b99e415c345 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, 
>> void *data,
>>               return -ENODEV;
>>           break;
>>       case I915_PARAM_SUBSLICE_MASK:
>> -        value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>> +        value = INTEL_INFO(dev_priv)->sseu.subslices_mask[0];
>>           if (!value)
>>               return -ENODEV;
>>           break;
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 1aba5657f5f0..82fc59078c6a 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -802,9 +802,12 @@ struct intel_csr {
>>       func(supports_tv); \
>>       func(has_ipc);
>>   +#define GEN_MAX_SLICES        (6) /* CNL upper bound */
>> +#define GEN_MAX_SUBSLICES    (7)
>> +
>>   struct sseu_dev_info {
>>       u8 slice_mask;
>> -    u8 subslice_mask;
>> +    u8 subslices_mask[GEN_MAX_SLICES];
>
> Personally I would probably kept the subslice_mask name and just 
> turned it into an array, but that is bike-shed territory.

It was easier to spot all the places to modify. Will go back.

>
>>       u8 eu_total;
>>       u8 eu_per_subslice;
>>       u8 min_eu_in_pool;
>> @@ -813,11 +816,27 @@ struct sseu_dev_info {
>>       u8 has_slice_pg:1;
>>       u8 has_subslice_pg:1;
>>       u8 has_eu_pg:1;
>> +
>> +    /* Topology fields */
>> +    u8 max_slices;
>> +    u8 max_subslices;
>> +    u8 max_eus_per_subslice;
>> +
>> +    /* We don't have more than 8 eus per subslice at the moment and 
>> as we
>> +     * store eus enabled using bits, no need to multiply by eus per
>> +     * subslice.
>> +     */
>> +    u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>>   };
>>     static inline unsigned int sseu_subslice_total(const struct 
>> sseu_dev_info *sseu)
>>   {
>> -    return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
>> +    unsigned s, total = 0;
>> +
>> +    for (s = 0; s < ARRAY_SIZE(sseu->subslices_mask); s++)
>> +        total += hweight8(sseu->subslices_mask[s]);
>> +
>> +    return total;
>
> The function is bigger now so maybe it needs stops being inline, but 
> either moved to the .c file, or even the total stored in struct 
> sseu_dev_info, and then this becomes just a getter?

Let's go with a subslice_total field.

>
>>   }
>>     /* Keep in gen based order, and chronological order within a gen */
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
>> b/drivers/gpu/drm/i915/intel_device_info.c
>> index f478be3ae0ba..6a3c40439e83 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -82,22 +82,74 @@ void intel_device_info_dump(struct 
>> drm_i915_private *dev_priv)
>>   #undef PRINT_FLAG
>>   }
>>   +static u8 compute_eu_total(const struct sseu_dev_info *sseu)
>> +{
>> +    u8 i, total = 0;
>
> sseu_subslice_total returns an unsinged int, while this is u8, 
> suggesting there can be more sublices and eus. ;) Jokes aside, I'd 
> probably make them consistent. The index variable is probably best as 
> unsigned int, but the total and return can be u8 if big enough for 
> total eus? Either way best it matches the types in the struct they get 
> assigned to.

You're right!
u16 should be fine though.

>
>> +
>> +    for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
>> +        total += hweight8(sseu->eu_mask[i]);
>> +
>> +    return total;
>> +}
>> +
>>   static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>>   {
>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>>       const u32 fuse2 = I915_READ(GEN8_FUSE2);
>> +    int s, ss, eu_mask = 0xff;
>
> const eu_mask ?

Done.

>
>> +    u32 subslice_mask, eu_en;
>>         sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
>>                   GEN10_F2_S_ENA_SHIFT;
>> -    sseu->subslice_mask = (1 << 4) - 1;
>> -    sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
>> -                 GEN10_F2_SS_DIS_SHIFT);
>> +    sseu->max_slices = 6;
>> +    sseu->max_subslices = 4;
>> +    sseu->max_eus_per_subslice = 8;
>> +
>> +    subslice_mask = (1 << 4) - 1;
>> +    subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
>> +               GEN10_F2_SS_DIS_SHIFT);
>> +
>> +    /* Slice0 can have up to 3 subslices, but there are only 2 in
>> +     * slice1/2.
>> +     */
>> +    sseu->subslices_mask[0] = subslice_mask;
>> +    for (s = 1; s < sseu->max_slices; s++)
>> +        sseu->subslices_mask[s] = subslice_mask & 0x3;
>> +
>> +    /* Slice0 */
>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE0);
>> +    for (ss = 0; ss < sseu->max_subslices; ss++)
>> +        sseu->eu_mask[ss]     = (eu_en >> (8 * ss)) & eu_mask;
>> +    /* Slice1 */
>> +    sseu->eu_mask[sseu->max_subslices]         = (eu_en >> 24) & 
>> eu_mask;
>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE1);
>> +    sseu->eu_mask[sseu->max_subslices + 1]     = eu_en & eu_mask;
>
> I suggest a helper to index into sse->eu_mask, like 
> sseu->eu_mask[_eu_mask_idx(slice, subslice)] or something, so it is 
> more readable what is happening here. Or even:
>
>  _eu_mask(sseu, slice, subslice) = mask;
>
> Doable? I am not 100% I picked up exactly on the layout of the eu_mask 
> array.. each element is one subslice? Consecutive for slice0, 
> subslice0..N, slice1... sliceN ?

Unfortunately that's not possible because of the asymmetry thing...

>
>> +    /* Slice2 */
>> +    sseu->eu_mask[2 * sseu->max_subslices]     = (eu_en >> 8) & 
>> eu_mask;
>> +    sseu->eu_mask[2 * sseu->max_subslices + 1] = (eu_en >> 16) & 
>> eu_mask;
>> +    /* Slice3 */
>> +    sseu->eu_mask[3 * sseu->max_subslices]     = (eu_en >> 24) & 
>> eu_mask;
>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE2);
>> +    sseu->eu_mask[3 * sseu->max_subslices + 1] = eu_en & eu_mask;
>> +    /* Slice4 */
>> +    sseu->eu_mask[4 * sseu->max_subslices]     = (eu_en >> 8) & 
>> eu_mask;
>> +    sseu->eu_mask[4 * sseu->max_subslices + 1] = (eu_en >> 16) & 
>> eu_mask;
>> +    /* Slice5 */
>> +    sseu->eu_mask[5 * sseu->max_subslices]     = (eu_en >> 24) & 
>> eu_mask;
>> +    eu_en = ~I915_READ(GEN10_EU_DISABLE3);
>> +    sseu->eu_mask[5 * sseu->max_subslices + 1] = eu_en & eu_mask;
>> +
>> +    /* Do a second pass where we marked the subslices disabled if all
>> +     * their eus are off.
>> +     */
>> +    for (s = 0; s < sseu->max_slices; s++) {
>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>> +            if (sseu->eu_mask[s * sseu->max_subslices + ss] == 0)
>> +                sseu->subslices_mask[s] &= ~BIT(ss);
>> +        }
>> +    }
>>   -    sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
>> -    sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
>> -    sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
>> -    sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
>> -                     GEN10_EU_DIS_SS_MASK));
>> +    sseu->eu_total = compute_eu_total(sseu);
>>         /*
>>        * CNL is expected to always have a uniform distribution
>> @@ -118,26 +170,30 @@ static void gen10_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>   static void cherryview_sseu_info_init(struct drm_i915_private 
>> *dev_priv)
>>   {
>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>> -    u32 fuse, eu_dis;
>> +    u32 fuse;
>>         fuse = I915_READ(CHV_FUSE_GT);
>>         sseu->slice_mask = BIT(0);
>> +    sseu->max_slices = 1;
>> +    sseu->max_subslices = 2;
>> +    sseu->max_eus_per_subslice = 8;
>>         if (!(fuse & CHV_FGT_DISABLE_SS0)) {
>> -        sseu->subslice_mask |= BIT(0);
>> -        eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
>> -                 CHV_FGT_EU_DIS_SS0_R1_MASK);
>> -        sseu->eu_total += 8 - hweight32(eu_dis);
>> +        sseu->subslices_mask[0] |= BIT(0);
>> +        sseu->eu_mask[0] = (fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> 
>> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
>> +        sseu->eu_mask[0] |= ((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> 
>> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;
>
> Do you need to invert the bits here?

Oh dear... Indeed!

>
>> +        sseu->subslices_mask[0] = 1;
>>       }
>>         if (!(fuse & CHV_FGT_DISABLE_SS1)) {
>> -        sseu->subslice_mask |= BIT(1);
>> -        eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
>> -                 CHV_FGT_EU_DIS_SS1_R1_MASK);
>> -        sseu->eu_total += 8 - hweight32(eu_dis);
>> +        sseu->subslices_mask[0] |= BIT(1);
>> +        sseu->eu_mask[1] = (fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> 
>> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
>> +        sseu->eu_mask[2] |= ((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> 
>> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;
>
> Why eu_mask indices 1 and 2 here, while for subslice 0 you merged them 
> into index 0?

Again, you're right.
Thanks!

>
>>       }
>>   +    sseu->eu_total = compute_eu_total(sseu);
>> +
>>       /*
>>        * CHV expected to always have a uniform distribution of EU
>>        * across subslices.
>> @@ -159,41 +215,50 @@ static void gen9_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>   {
>>       struct intel_device_info *info = mkwrite_device_info(dev_priv);
>>       struct sseu_dev_info *sseu = &info->sseu;
>> -    int s_max = 3, ss_max = 4, eu_max = 8;
>>       int s, ss;
>> -    u32 fuse2, eu_disable;
>> +    u32 fuse2, eu_disable, subslice_mask;
>>       u8 eu_mask = 0xff;
>>         fuse2 = I915_READ(GEN8_FUSE2);
>>       sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> 
>> GEN8_F2_S_ENA_SHIFT;
>>   +    /* BXT has a single slice and at most 3 subslices. */
>> +    sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
>> +    sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
>> +    sseu->max_eus_per_subslice = 8;
>> +
>>       /*
>>        * The subslice disable field is global, i.e. it applies
>>        * to each of the enabled slices.
>>       */
>> -    sseu->subslice_mask = (1 << ss_max) - 1;
>> -    sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
>> -                 GEN9_F2_SS_DIS_SHIFT);
>> +    subslice_mask = (1 << sseu->max_subslices) - 1;
>> +    subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
>> +               GEN9_F2_SS_DIS_SHIFT);
>>         /*
>>        * Iterate through enabled slices and subslices to
>>        * count the total enabled EU.
>>       */
>> -    for (s = 0; s < s_max; s++) {
>> +    for (s = 0; s < sseu->max_slices; s++) {
>>           if (!(sseu->slice_mask & BIT(s)))
>>               /* skip disabled slice */
>>               continue;
>>   +        sseu->subslices_mask[s] = subslice_mask;
>> +
>>           eu_disable = I915_READ(GEN9_EU_DISABLE(s));
>> -        for (ss = 0; ss < ss_max; ss++) {
>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>>               int eu_per_ss;
>>   -            if (!(sseu->subslice_mask & BIT(ss)))
>> +            if (!(sseu->subslices_mask[s] & BIT(ss)))
>>                   /* skip disabled subslice */
>>                   continue;
>>   -            eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
>> -                              eu_mask);
>> +            sseu->eu_mask[ss + s * sseu->max_subslices] =
>> +                ~((eu_disable >> (ss*8)) & eu_mask);
>> +
>> +            eu_per_ss = sseu->max_eus_per_subslice -
>> +                hweight8((eu_disable >> (ss*8)) & eu_mask);
>
> Store "eu_disable >> (ss*8)) & eu_mask" in a local for readability 
> since it is calculated twice?

Done.

>
>>                 /*
>>                * Record which subslice(s) has(have) 7 EUs. we
>> @@ -202,11 +267,11 @@ static void gen9_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>                */
>>               if (eu_per_ss == 7)
>>                   sseu->subslice_7eu[s] |= BIT(ss);
>> -
>> -            sseu->eu_total += eu_per_ss;
>
> You could theoretically keep this running total and then at the end 
> compare against compute_eu_total under a GEM_BUG_ON, just to check 
> everyting is working as expected.
>
>>           }
>>       }
>>   +    sseu->eu_total = compute_eu_total(sseu);
>> +
>>       /*
>>        * SKL is expected to always have a uniform distribution
>>        * of EU across subslices with the exception that any one
>> @@ -232,8 +297,8 @@ static void gen9_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>       sseu->has_eu_pg = sseu->eu_per_subslice > 2;
>>         if (IS_GEN9_LP(dev_priv)) {
>> -#define IS_SS_DISABLED(ss)    (!(sseu->subslice_mask & BIT(ss)))
>> -        info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
>> +#define IS_SS_DISABLED(ss)    (!(sseu->subslices_mask[0] & BIT(ss)))
>> +        info->has_pooled_eu = hweight8(sseu->subslices_mask[0]) == 3;
>>             sseu->min_eu_in_pool = 0;
>>           if (info->has_pooled_eu) {
>> @@ -251,19 +316,22 @@ static void gen9_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>   static void broadwell_sseu_info_init(struct drm_i915_private 
>> *dev_priv)
>>   {
>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>> -    const int s_max = 3, ss_max = 3, eu_max = 8;
>>       int s, ss;
>> -    u32 fuse2, eu_disable[3]; /* s_max */
>> +    u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
>>         fuse2 = I915_READ(GEN8_FUSE2);
>>       sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> 
>> GEN8_F2_S_ENA_SHIFT;
>> +    sseu->max_slices = 3;
>> +    sseu->max_subslices = 3;
>> +    sseu->max_eus_per_subslice = 8;
>> +
>>       /*
>>        * The subslice disable field is global, i.e. it applies
>>        * to each of the enabled slices.
>>        */
>> -    sseu->subslice_mask = GENMASK(ss_max - 1, 0);
>> -    sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
>> -                 GEN8_F2_SS_DIS_SHIFT);
>> +    subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
>> +    subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
>> +               GEN8_F2_SS_DIS_SHIFT);
>>         eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & 
>> GEN8_EU_DIS0_S0_MASK;
>>       eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> 
>> GEN8_EU_DIS0_S1_SHIFT) |
>> @@ -277,30 +345,36 @@ static void broadwell_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>        * Iterate through enabled slices and subslices to
>>        * count the total enabled EU.
>>        */
>> -    for (s = 0; s < s_max; s++) {
>> +    for (s = 0; s < sseu->max_slices; s++) {
>>           if (!(sseu->slice_mask & BIT(s)))
>>               /* skip disabled slice */
>>               continue;
>>   -        for (ss = 0; ss < ss_max; ss++) {
>> +        sseu->subslices_mask[s] = subslice_mask;
>> +
>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>>               u32 n_disabled;
>>   -            if (!(sseu->subslice_mask & BIT(ss)))
>> +            if (!(sseu->subslices_mask[ss] & BIT(ss)))
>>                   /* skip disabled subslice */
>>                   continue;
>>   -            n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
>> +            sseu->eu_mask[ss + s * sseu->max_subslices] =
>> +                ~(eu_disable[s] >>
>> +                  (ss * sseu->max_eus_per_subslice));
>> +            n_disabled = hweight8(eu_disable[s] >>
>> +                          (ss * sseu->max_eus_per_subslice));
>
> As above could use a local for "eu_disable[s] >> (ss * 
> sseu->max_eus_per_subslice)"

Done.

>
>>                 /*
>>                * Record which subslices have 7 EUs.
>>                */
>> -            if (eu_max - n_disabled == 7)
>> +            if (sseu->max_eus_per_subslice - n_disabled == 7)
>>                   sseu->subslice_7eu[s] |= 1 << ss;
>> -
>> -            sseu->eu_total += eu_max - n_disabled;
>>           }
>>       }
>>   +    sseu->eu_total = compute_eu_total(sseu);
>> +
>>       /*
>>        * BDW is expected to always have a uniform distribution of EU 
>> across
>>        * subslices with the exception that any one EU in any one 
>> subslice may
>> @@ -437,6 +511,7 @@ void intel_device_info_runtime_init(struct 
>> drm_i915_private *dev_priv)
>>   {
>>       struct intel_device_info *info = mkwrite_device_info(dev_priv);
>>       enum pipe pipe;
>> +    int s;
>>         if (INTEL_GEN(dev_priv) >= 10) {
>>           for_each_pipe(dev_priv, pipe)
>> @@ -548,9 +623,11 @@ void intel_device_info_runtime_init(struct 
>> drm_i915_private *dev_priv)
>>       DRM_DEBUG_DRIVER("slice total: %u\n", 
>> hweight8(info->sseu.slice_mask));
>>       DRM_DEBUG_DRIVER("subslice total: %u\n",
>>                sseu_subslice_total(&info->sseu));
>> -    DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask);
>> -    DRM_DEBUG_DRIVER("subslice per slice: %u\n",
>> -             hweight8(info->sseu.subslice_mask));
>> +    for (s = 0; s < ARRAY_SIZE(info->sseu.subslices_mask); s++) {
>> +        DRM_DEBUG_DRIVER("subslice mask %04x\n", 
>> info->sseu.subslices_mask[s]);
>> +        DRM_DEBUG_DRIVER("subslice per slice: %u\n",
>> +                 hweight8(info->sseu.subslices_mask[s]));
>
> Put a subslice index into messages?

Done.

>
>> +    }
>>       DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total);
>>       DRM_DEBUG_DRIVER("EU per subslice: %u\n", 
>> info->sseu.eu_per_subslice);
>>       DRM_DEBUG_DRIVER("has slice power gating: %s\n",
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 2e38fbfdf08f..6e55b842c21d 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2072,7 +2072,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
>>         if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
>>           rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
>> -        rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
>> +        rpcs |= 
>> hweight8(INTEL_INFO(dev_priv)->sseu.subslices_mask[0]) <<
>>               GEN8_RPCS_SS_CNT_SHIFT;
>>           rpcs |= GEN8_RPCS_ENABLE;
>>       }
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index c5ff203e42d6..a94bc1b5c502 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum 
>> intel_engine_hangcheck_action a)
>>     #define instdone_subslice_mask(dev_priv__) \
>>       (INTEL_GEN(dev_priv__) == 7 ? \
>> -     1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
>> +     1 : INTEL_INFO(dev_priv__)->sseu.subslices_mask[0])
>>     #define for_each_instdone_slice_subslice(dev_priv__, slice__, 
>> subslice__) \
>>       for ((slice__) = 0, (subslice__) = 0; \
>>
>
> Regards,
>
> Tvrtko
>
Tvrtko Ursulin Jan. 11, 2018, 3:57 p.m. UTC | #3
On 11/01/2018 15:48, Lionel Landwerlin wrote:
> On 11/01/18 11:12, Tvrtko Ursulin wrote:
>>
>> On 18/12/2017 15:35, Lionel Landwerlin wrote:
>>> Up to now, subslice mask was assumed to be uniform across slices. But
>>> starting with Cannonlake, slices can be asymetric (for example slice0
>>
>> asymmetric, thanks auto spell checker. :)
> 
> Done, thanks.
> 
>>
>>> has different number of subslices as slice1+). This change stores all
>>> subslices masks for all slices rather than having a single mask that
>>> applies to all slices.
>>>
>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_debugfs.c      |  24 +++--
>>>   drivers/gpu/drm/i915/i915_drv.c          |   2 +-
>>>   drivers/gpu/drm/i915/i915_drv.h          |  23 ++++-
>>>   drivers/gpu/drm/i915/intel_device_info.c | 169 
>>> ++++++++++++++++++++++---------
>>>   drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
>>>   drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
>>>   6 files changed, 161 insertions(+), 61 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 0ddce72552bf..0c7890b695c5 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -4293,7 +4293,7 @@ static void 
>>> cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
>>>               continue;
>>>             sseu->slice_mask = BIT(0);
>>> -        sseu->subslice_mask |= BIT(ss);
>>> +        sseu->subslices_mask[0] |= BIT(ss);
>>>           eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
>>>                ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
>>>                ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
>>> @@ -4340,7 +4340,7 @@ static void gen10_sseu_device_status(struct 
>>> drm_i915_private *dev_priv,
>>>               continue;
>>>             sseu->slice_mask |= BIT(s);
>>> -        sseu->subslice_mask = info->sseu.subslice_mask;
>>> +        sseu->subslices_mask[s] = info->sseu.subslices_mask[s];
>>>             for (ss = 0; ss < ss_max; ss++) {
>>>               unsigned int eu_cnt;
>>> @@ -4395,8 +4395,8 @@ static void gen9_sseu_device_status(struct 
>>> drm_i915_private *dev_priv,
>>>           sseu->slice_mask |= BIT(s);
>>>             if (IS_GEN9_BC(dev_priv))
>>> -            sseu->subslice_mask =
>>> -                INTEL_INFO(dev_priv)->sseu.subslice_mask;
>>> +            sseu->subslices_mask[s] =
>>> + INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
>>>             for (ss = 0; ss < ss_max; ss++) {
>>>               unsigned int eu_cnt;
>>> @@ -4406,7 +4406,7 @@ static void gen9_sseu_device_status(struct 
>>> drm_i915_private *dev_priv,
>>>                       /* skip disabled subslice */
>>>                       continue;
>>>   -                sseu->subslice_mask |= BIT(ss);
>>> +                sseu->subslices_mask[s] |= BIT(ss);
>>>               }
>>>                 eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
>>> @@ -4428,9 +4428,12 @@ static void 
>>> broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
>>>       sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
>>>         if (sseu->slice_mask) {
>>> -        sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>>>           sseu->eu_per_subslice =
>>>                   INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
>>> +        for (s = 0; s < fls(sseu->slice_mask); s++) {
>>> +            sseu->subslices_mask[s] =
>>> + INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
>>> +        }
>>>           sseu->eu_total = sseu->eu_per_subslice *
>>>                    sseu_subslice_total(sseu);
>>>   @@ -4449,6 +4452,7 @@ static void i915_print_sseu_info(struct 
>>> seq_file *m, bool is_available_info,
>>>   {
>>>       struct drm_i915_private *dev_priv = node_to_i915(m->private);
>>>       const char *type = is_available_info ? "Available" : "Enabled";
>>> +    int s;
>>>         seq_printf(m, "  %s Slice Mask: %04x\n", type,
>>>              sseu->slice_mask);
>>> @@ -4456,10 +4460,10 @@ static void i915_print_sseu_info(struct 
>>> seq_file *m, bool is_available_info,
>>>              hweight8(sseu->slice_mask));
>>>       seq_printf(m, "  %s Subslice Total: %u\n", type,
>>>              sseu_subslice_total(sseu));
>>> -    seq_printf(m, "  %s Subslice Mask: %04x\n", type,
>>> -           sseu->subslice_mask);
>>> -    seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
>>> -           hweight8(sseu->subslice_mask));
>>> +    for (s = 0; s < fls(sseu->slice_mask); s++) {
>>
>> Slice mask is always contiguous bits?
> 
> I have a 2x6 BXT on my desk where the subslice0 appears to be fused-off.
> I assumed same could be true for slices.
> fls() should make us iterate through all the slices (even the fused off 
> ones).

Do you then want to skip printing the fused off ones? Just asking, don't 
think it matters hugely.

>>
>>> +        seq_printf(m, "  %s Slice%i Subslice Mask: %04x\n", type,
>>> +               s, sseu->subslices_mask[s]);
>>
>> Don't want to keep printing the count? Like maybe " %s Slice%u %u 
>> sublices, mask=%04x\n" ?
> 
> Sure, done.
> 
>>
>>> +    }
>>>       seq_printf(m, "  %s EU Total: %u\n", type,
>>>              sseu->eu_total);
>>>       seq_printf(m, "  %s EU Per Subslice: %u\n", type,
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>>> b/drivers/gpu/drm/i915/i915_drv.c
>>> index 72bea281edb7..8b99e415c345 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.c
>>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>>> @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, 
>>> void *data,
>>>               return -ENODEV;
>>>           break;
>>>       case I915_PARAM_SUBSLICE_MASK:
>>> -        value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>>> +        value = INTEL_INFO(dev_priv)->sseu.subslices_mask[0];
>>>           if (!value)
>>>               return -ENODEV;
>>>           break;
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 1aba5657f5f0..82fc59078c6a 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -802,9 +802,12 @@ struct intel_csr {
>>>       func(supports_tv); \
>>>       func(has_ipc);
>>>   +#define GEN_MAX_SLICES        (6) /* CNL upper bound */
>>> +#define GEN_MAX_SUBSLICES    (7)
>>> +
>>>   struct sseu_dev_info {
>>>       u8 slice_mask;
>>> -    u8 subslice_mask;
>>> +    u8 subslices_mask[GEN_MAX_SLICES];
>>
>> Personally I would probably kept the subslice_mask name and just 
>> turned it into an array, but that is bike-shed territory.
> 
> It was easier to spot all the places to modify. Will go back.
> 
>>
>>>       u8 eu_total;
>>>       u8 eu_per_subslice;
>>>       u8 min_eu_in_pool;
>>> @@ -813,11 +816,27 @@ struct sseu_dev_info {
>>>       u8 has_slice_pg:1;
>>>       u8 has_subslice_pg:1;
>>>       u8 has_eu_pg:1;
>>> +
>>> +    /* Topology fields */
>>> +    u8 max_slices;
>>> +    u8 max_subslices;
>>> +    u8 max_eus_per_subslice;
>>> +
>>> +    /* We don't have more than 8 eus per subslice at the moment and 
>>> as we
>>> +     * store eus enabled using bits, no need to multiply by eus per
>>> +     * subslice.
>>> +     */
>>> +    u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>>>   };
>>>     static inline unsigned int sseu_subslice_total(const struct 
>>> sseu_dev_info *sseu)
>>>   {
>>> -    return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
>>> +    unsigned s, total = 0;
>>> +
>>> +    for (s = 0; s < ARRAY_SIZE(sseu->subslices_mask); s++)
>>> +        total += hweight8(sseu->subslices_mask[s]);
>>> +
>>> +    return total;
>>
>> The function is bigger now so maybe it needs stops being inline, but 
>> either moved to the .c file, or even the total stored in struct 
>> sseu_dev_info, and then this becomes just a getter?
> 
> Let's go with a subslice_total field.
> 
>>
>>>   }
>>>     /* Keep in gen based order, and chronological order within a gen */
>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
>>> b/drivers/gpu/drm/i915/intel_device_info.c
>>> index f478be3ae0ba..6a3c40439e83 100644
>>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>>> @@ -82,22 +82,74 @@ void intel_device_info_dump(struct 
>>> drm_i915_private *dev_priv)
>>>   #undef PRINT_FLAG
>>>   }
>>>   +static u8 compute_eu_total(const struct sseu_dev_info *sseu)
>>> +{
>>> +    u8 i, total = 0;
>>
>> sseu_subslice_total returns an unsinged int, while this is u8, 
>> suggesting there can be more sublices and eus. ;) Jokes aside, I'd 
>> probably make them consistent. The index variable is probably best as 
>> unsigned int, but the total and return can be u8 if big enough for 
>> total eus? Either way best it matches the types in the struct they get 
>> assigned to.
> 
> You're right!
> u16 should be fine though.
> 
>>
>>> +
>>> +    for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
>>> +        total += hweight8(sseu->eu_mask[i]);
>>> +
>>> +    return total;
>>> +}
>>> +
>>>   static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>>>   {
>>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>>>       const u32 fuse2 = I915_READ(GEN8_FUSE2);
>>> +    int s, ss, eu_mask = 0xff;
>>
>> const eu_mask ?
> 
> Done.
> 
>>
>>> +    u32 subslice_mask, eu_en;
>>>         sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
>>>                   GEN10_F2_S_ENA_SHIFT;
>>> -    sseu->subslice_mask = (1 << 4) - 1;
>>> -    sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
>>> -                 GEN10_F2_SS_DIS_SHIFT);
>>> +    sseu->max_slices = 6;
>>> +    sseu->max_subslices = 4;
>>> +    sseu->max_eus_per_subslice = 8;
>>> +
>>> +    subslice_mask = (1 << 4) - 1;
>>> +    subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
>>> +               GEN10_F2_SS_DIS_SHIFT);
>>> +
>>> +    /* Slice0 can have up to 3 subslices, but there are only 2 in
>>> +     * slice1/2.
>>> +     */
>>> +    sseu->subslices_mask[0] = subslice_mask;
>>> +    for (s = 1; s < sseu->max_slices; s++)
>>> +        sseu->subslices_mask[s] = subslice_mask & 0x3;
>>> +
>>> +    /* Slice0 */
>>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE0);
>>> +    for (ss = 0; ss < sseu->max_subslices; ss++)
>>> +        sseu->eu_mask[ss]     = (eu_en >> (8 * ss)) & eu_mask;
>>> +    /* Slice1 */
>>> +    sseu->eu_mask[sseu->max_subslices]         = (eu_en >> 24) & 
>>> eu_mask;
>>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE1);
>>> +    sseu->eu_mask[sseu->max_subslices + 1]     = eu_en & eu_mask;
>>
>> I suggest a helper to index into sse->eu_mask, like 
>> sseu->eu_mask[_eu_mask_idx(slice, subslice)] or something, so it is 
>> more readable what is happening here. Or even:
>>
>>  _eu_mask(sseu, slice, subslice) = mask;
>>
>> Doable? I am not 100% I picked up exactly on the layout of the eu_mask 
>> array.. each element is one subslice? Consecutive for slice0, 
>> subslice0..N, slice1... sliceN ?
> 
> Unfortunately that's not possible because of the asymmetry thing...

Hm so on a given SKU you are saying the eu_mask you are generating is 
also asymmetrical? Meaning different number of total allocated bits 
per-slice? Could we have it symmetrical in the internal representation 
and uAPI for simpler code?

Regards,

Tvrtko

>>
>>> +    /* Slice2 */
>>> +    sseu->eu_mask[2 * sseu->max_subslices]     = (eu_en >> 8) & 
>>> eu_mask;
>>> +    sseu->eu_mask[2 * sseu->max_subslices + 1] = (eu_en >> 16) & 
>>> eu_mask;
>>> +    /* Slice3 */
>>> +    sseu->eu_mask[3 * sseu->max_subslices]     = (eu_en >> 24) & 
>>> eu_mask;
>>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE2);
>>> +    sseu->eu_mask[3 * sseu->max_subslices + 1] = eu_en & eu_mask;
>>> +    /* Slice4 */
>>> +    sseu->eu_mask[4 * sseu->max_subslices]     = (eu_en >> 8) & 
>>> eu_mask;
>>> +    sseu->eu_mask[4 * sseu->max_subslices + 1] = (eu_en >> 16) & 
>>> eu_mask;
>>> +    /* Slice5 */
>>> +    sseu->eu_mask[5 * sseu->max_subslices]     = (eu_en >> 24) & 
>>> eu_mask;
>>> +    eu_en = ~I915_READ(GEN10_EU_DISABLE3);
>>> +    sseu->eu_mask[5 * sseu->max_subslices + 1] = eu_en & eu_mask;
>>> +
>>> +    /* Do a second pass where we marked the subslices disabled if all
>>> +     * their eus are off.
>>> +     */
>>> +    for (s = 0; s < sseu->max_slices; s++) {
>>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>>> +            if (sseu->eu_mask[s * sseu->max_subslices + ss] == 0)
>>> +                sseu->subslices_mask[s] &= ~BIT(ss);
>>> +        }
>>> +    }
>>>   -    sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
>>> -    sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
>>> -    sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
>>> -    sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
>>> -                     GEN10_EU_DIS_SS_MASK));
>>> +    sseu->eu_total = compute_eu_total(sseu);
>>>         /*
>>>        * CNL is expected to always have a uniform distribution
>>> @@ -118,26 +170,30 @@ static void gen10_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>   static void cherryview_sseu_info_init(struct drm_i915_private 
>>> *dev_priv)
>>>   {
>>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>>> -    u32 fuse, eu_dis;
>>> +    u32 fuse;
>>>         fuse = I915_READ(CHV_FUSE_GT);
>>>         sseu->slice_mask = BIT(0);
>>> +    sseu->max_slices = 1;
>>> +    sseu->max_subslices = 2;
>>> +    sseu->max_eus_per_subslice = 8;
>>>         if (!(fuse & CHV_FGT_DISABLE_SS0)) {
>>> -        sseu->subslice_mask |= BIT(0);
>>> -        eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
>>> -                 CHV_FGT_EU_DIS_SS0_R1_MASK);
>>> -        sseu->eu_total += 8 - hweight32(eu_dis);
>>> +        sseu->subslices_mask[0] |= BIT(0);
>>> +        sseu->eu_mask[0] = (fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> 
>>> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
>>> +        sseu->eu_mask[0] |= ((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> 
>>> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;
>>
>> Do you need to invert the bits here?
> 
> Oh dear... Indeed!
> 
>>
>>> +        sseu->subslices_mask[0] = 1;
>>>       }
>>>         if (!(fuse & CHV_FGT_DISABLE_SS1)) {
>>> -        sseu->subslice_mask |= BIT(1);
>>> -        eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
>>> -                 CHV_FGT_EU_DIS_SS1_R1_MASK);
>>> -        sseu->eu_total += 8 - hweight32(eu_dis);
>>> +        sseu->subslices_mask[0] |= BIT(1);
>>> +        sseu->eu_mask[1] = (fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> 
>>> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
>>> +        sseu->eu_mask[2] |= ((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> 
>>> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;
>>
>> Why eu_mask indices 1 and 2 here, while for subslice 0 you merged them 
>> into index 0?
> 
> Again, you're right.
> Thanks!
> 
>>
>>>       }
>>>   +    sseu->eu_total = compute_eu_total(sseu);
>>> +
>>>       /*
>>>        * CHV expected to always have a uniform distribution of EU
>>>        * across subslices.
>>> @@ -159,41 +215,50 @@ static void gen9_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>   {
>>>       struct intel_device_info *info = mkwrite_device_info(dev_priv);
>>>       struct sseu_dev_info *sseu = &info->sseu;
>>> -    int s_max = 3, ss_max = 4, eu_max = 8;
>>>       int s, ss;
>>> -    u32 fuse2, eu_disable;
>>> +    u32 fuse2, eu_disable, subslice_mask;
>>>       u8 eu_mask = 0xff;
>>>         fuse2 = I915_READ(GEN8_FUSE2);
>>>       sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> 
>>> GEN8_F2_S_ENA_SHIFT;
>>>   +    /* BXT has a single slice and at most 3 subslices. */
>>> +    sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
>>> +    sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
>>> +    sseu->max_eus_per_subslice = 8;
>>> +
>>>       /*
>>>        * The subslice disable field is global, i.e. it applies
>>>        * to each of the enabled slices.
>>>       */
>>> -    sseu->subslice_mask = (1 << ss_max) - 1;
>>> -    sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
>>> -                 GEN9_F2_SS_DIS_SHIFT);
>>> +    subslice_mask = (1 << sseu->max_subslices) - 1;
>>> +    subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
>>> +               GEN9_F2_SS_DIS_SHIFT);
>>>         /*
>>>        * Iterate through enabled slices and subslices to
>>>        * count the total enabled EU.
>>>       */
>>> -    for (s = 0; s < s_max; s++) {
>>> +    for (s = 0; s < sseu->max_slices; s++) {
>>>           if (!(sseu->slice_mask & BIT(s)))
>>>               /* skip disabled slice */
>>>               continue;
>>>   +        sseu->subslices_mask[s] = subslice_mask;
>>> +
>>>           eu_disable = I915_READ(GEN9_EU_DISABLE(s));
>>> -        for (ss = 0; ss < ss_max; ss++) {
>>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>>>               int eu_per_ss;
>>>   -            if (!(sseu->subslice_mask & BIT(ss)))
>>> +            if (!(sseu->subslices_mask[s] & BIT(ss)))
>>>                   /* skip disabled subslice */
>>>                   continue;
>>>   -            eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
>>> -                              eu_mask);
>>> +            sseu->eu_mask[ss + s * sseu->max_subslices] =
>>> +                ~((eu_disable >> (ss*8)) & eu_mask);
>>> +
>>> +            eu_per_ss = sseu->max_eus_per_subslice -
>>> +                hweight8((eu_disable >> (ss*8)) & eu_mask);
>>
>> Store "eu_disable >> (ss*8)) & eu_mask" in a local for readability 
>> since it is calculated twice?
> 
> Done.
> 
>>
>>>                 /*
>>>                * Record which subslice(s) has(have) 7 EUs. we
>>> @@ -202,11 +267,11 @@ static void gen9_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>                */
>>>               if (eu_per_ss == 7)
>>>                   sseu->subslice_7eu[s] |= BIT(ss);
>>> -
>>> -            sseu->eu_total += eu_per_ss;
>>
>> You could theoretically keep this running total and then at the end 
>> compare against compute_eu_total under a GEM_BUG_ON, just to check 
>> everyting is working as expected.
>>
>>>           }
>>>       }
>>>   +    sseu->eu_total = compute_eu_total(sseu);
>>> +
>>>       /*
>>>        * SKL is expected to always have a uniform distribution
>>>        * of EU across subslices with the exception that any one
>>> @@ -232,8 +297,8 @@ static void gen9_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>       sseu->has_eu_pg = sseu->eu_per_subslice > 2;
>>>         if (IS_GEN9_LP(dev_priv)) {
>>> -#define IS_SS_DISABLED(ss)    (!(sseu->subslice_mask & BIT(ss)))
>>> -        info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
>>> +#define IS_SS_DISABLED(ss)    (!(sseu->subslices_mask[0] & BIT(ss)))
>>> +        info->has_pooled_eu = hweight8(sseu->subslices_mask[0]) == 3;
>>>             sseu->min_eu_in_pool = 0;
>>>           if (info->has_pooled_eu) {
>>> @@ -251,19 +316,22 @@ static void gen9_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>   static void broadwell_sseu_info_init(struct drm_i915_private 
>>> *dev_priv)
>>>   {
>>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>>> -    const int s_max = 3, ss_max = 3, eu_max = 8;
>>>       int s, ss;
>>> -    u32 fuse2, eu_disable[3]; /* s_max */
>>> +    u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
>>>         fuse2 = I915_READ(GEN8_FUSE2);
>>>       sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> 
>>> GEN8_F2_S_ENA_SHIFT;
>>> +    sseu->max_slices = 3;
>>> +    sseu->max_subslices = 3;
>>> +    sseu->max_eus_per_subslice = 8;
>>> +
>>>       /*
>>>        * The subslice disable field is global, i.e. it applies
>>>        * to each of the enabled slices.
>>>        */
>>> -    sseu->subslice_mask = GENMASK(ss_max - 1, 0);
>>> -    sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
>>> -                 GEN8_F2_SS_DIS_SHIFT);
>>> +    subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
>>> +    subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
>>> +               GEN8_F2_SS_DIS_SHIFT);
>>>         eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & 
>>> GEN8_EU_DIS0_S0_MASK;
>>>       eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> 
>>> GEN8_EU_DIS0_S1_SHIFT) |
>>> @@ -277,30 +345,36 @@ static void broadwell_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>        * Iterate through enabled slices and subslices to
>>>        * count the total enabled EU.
>>>        */
>>> -    for (s = 0; s < s_max; s++) {
>>> +    for (s = 0; s < sseu->max_slices; s++) {
>>>           if (!(sseu->slice_mask & BIT(s)))
>>>               /* skip disabled slice */
>>>               continue;
>>>   -        for (ss = 0; ss < ss_max; ss++) {
>>> +        sseu->subslices_mask[s] = subslice_mask;
>>> +
>>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>>>               u32 n_disabled;
>>>   -            if (!(sseu->subslice_mask & BIT(ss)))
>>> +            if (!(sseu->subslices_mask[ss] & BIT(ss)))
>>>                   /* skip disabled subslice */
>>>                   continue;
>>>   -            n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
>>> +            sseu->eu_mask[ss + s * sseu->max_subslices] =
>>> +                ~(eu_disable[s] >>
>>> +                  (ss * sseu->max_eus_per_subslice));
>>> +            n_disabled = hweight8(eu_disable[s] >>
>>> +                          (ss * sseu->max_eus_per_subslice));
>>
>> As above could use a local for "eu_disable[s] >> (ss * 
>> sseu->max_eus_per_subslice)"
> 
> Done.
> 
>>
>>>                 /*
>>>                * Record which subslices have 7 EUs.
>>>                */
>>> -            if (eu_max - n_disabled == 7)
>>> +            if (sseu->max_eus_per_subslice - n_disabled == 7)
>>>                   sseu->subslice_7eu[s] |= 1 << ss;
>>> -
>>> -            sseu->eu_total += eu_max - n_disabled;
>>>           }
>>>       }
>>>   +    sseu->eu_total = compute_eu_total(sseu);
>>> +
>>>       /*
>>>        * BDW is expected to always have a uniform distribution of EU 
>>> across
>>>        * subslices with the exception that any one EU in any one 
>>> subslice may
>>> @@ -437,6 +511,7 @@ void intel_device_info_runtime_init(struct 
>>> drm_i915_private *dev_priv)
>>>   {
>>>       struct intel_device_info *info = mkwrite_device_info(dev_priv);
>>>       enum pipe pipe;
>>> +    int s;
>>>         if (INTEL_GEN(dev_priv) >= 10) {
>>>           for_each_pipe(dev_priv, pipe)
>>> @@ -548,9 +623,11 @@ void intel_device_info_runtime_init(struct 
>>> drm_i915_private *dev_priv)
>>>       DRM_DEBUG_DRIVER("slice total: %u\n", 
>>> hweight8(info->sseu.slice_mask));
>>>       DRM_DEBUG_DRIVER("subslice total: %u\n",
>>>                sseu_subslice_total(&info->sseu));
>>> -    DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask);
>>> -    DRM_DEBUG_DRIVER("subslice per slice: %u\n",
>>> -             hweight8(info->sseu.subslice_mask));
>>> +    for (s = 0; s < ARRAY_SIZE(info->sseu.subslices_mask); s++) {
>>> +        DRM_DEBUG_DRIVER("subslice mask %04x\n", 
>>> info->sseu.subslices_mask[s]);
>>> +        DRM_DEBUG_DRIVER("subslice per slice: %u\n",
>>> +                 hweight8(info->sseu.subslices_mask[s]));
>>
>> Put a subslice index into messages?
> 
> Done.
> 
>>
>>> +    }
>>>       DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total);
>>>       DRM_DEBUG_DRIVER("EU per subslice: %u\n", 
>>> info->sseu.eu_per_subslice);
>>>       DRM_DEBUG_DRIVER("has slice power gating: %s\n",
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 2e38fbfdf08f..6e55b842c21d 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -2072,7 +2072,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
>>>         if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
>>>           rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
>>> -        rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
>>> +        rpcs |= 
>>> hweight8(INTEL_INFO(dev_priv)->sseu.subslices_mask[0]) <<
>>>               GEN8_RPCS_SS_CNT_SHIFT;
>>>           rpcs |= GEN8_RPCS_ENABLE;
>>>       }
>>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
>>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>>> index c5ff203e42d6..a94bc1b5c502 100644
>>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>>> @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum 
>>> intel_engine_hangcheck_action a)
>>>     #define instdone_subslice_mask(dev_priv__) \
>>>       (INTEL_GEN(dev_priv__) == 7 ? \
>>> -     1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
>>> +     1 : INTEL_INFO(dev_priv__)->sseu.subslices_mask[0])
>>>     #define for_each_instdone_slice_subslice(dev_priv__, slice__, 
>>> subslice__) \
>>>       for ((slice__) = 0, (subslice__) = 0; \
>>>
>>
>> Regards,
>>
>> Tvrtko
>>
> 
>
Lionel Landwerlin Jan. 11, 2018, 4 p.m. UTC | #4
On 11/01/18 11:12, Tvrtko Ursulin wrote:
>> */
>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE0);
>> +    for (ss = 0; ss < sseu->max_subslices; ss++)
>> +        sseu->eu_mask[ss]     = (eu_en >> (8 * ss)) & eu_mask;
>> +    /* Slice1 */
>> +    sseu->eu_mask[sseu->max_subslices]         = (eu_en >> 24) & 
>> eu_mask;
>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE1);
>> +    sseu->eu_mask[sseu->max_subslices + 1]     = eu_en & eu_mask;
>
> I suggest a helper to index into sse->eu_mask, like 
> sseu->eu_mask[_eu_mask_idx(slice, subslice)] or something, so it is 
> more readable what is happening here. Or even:
>
>  _eu_mask(sseu, slice, subslice) = mask;
>
> Doable? I am not 100% I picked up exactly on the layout of the eu_mask 
> array.. each element is one subslice? Consecutive for slice0, 
> subslice0..N, slice1... sliceN ? 
Oops, I misread that part.
I thought you wanted a helper for accessing the registers.
I'll add a helper for what you would like.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0ddce72552bf..0c7890b695c5 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4293,7 +4293,7 @@  static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
 			continue;
 
 		sseu->slice_mask = BIT(0);
-		sseu->subslice_mask |= BIT(ss);
+		sseu->subslices_mask[0] |= BIT(ss);
 		eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
 			 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
 			 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
@@ -4340,7 +4340,7 @@  static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 			continue;
 
 		sseu->slice_mask |= BIT(s);
-		sseu->subslice_mask = info->sseu.subslice_mask;
+		sseu->subslices_mask[s] = info->sseu.subslices_mask[s];
 
 		for (ss = 0; ss < ss_max; ss++) {
 			unsigned int eu_cnt;
@@ -4395,8 +4395,8 @@  static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 		sseu->slice_mask |= BIT(s);
 
 		if (IS_GEN9_BC(dev_priv))
-			sseu->subslice_mask =
-				INTEL_INFO(dev_priv)->sseu.subslice_mask;
+			sseu->subslices_mask[s] =
+				INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
 
 		for (ss = 0; ss < ss_max; ss++) {
 			unsigned int eu_cnt;
@@ -4406,7 +4406,7 @@  static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 					/* skip disabled subslice */
 					continue;
 
-				sseu->subslice_mask |= BIT(ss);
+				sseu->subslices_mask[s] |= BIT(ss);
 			}
 
 			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
@@ -4428,9 +4428,12 @@  static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
 	sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
 
 	if (sseu->slice_mask) {
-		sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
 		sseu->eu_per_subslice =
 				INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
+		for (s = 0; s < fls(sseu->slice_mask); s++) {
+			sseu->subslices_mask[s] =
+				INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
+		}
 		sseu->eu_total = sseu->eu_per_subslice *
 				 sseu_subslice_total(sseu);
 
@@ -4449,6 +4452,7 @@  static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	const char *type = is_available_info ? "Available" : "Enabled";
+	int s;
 
 	seq_printf(m, "  %s Slice Mask: %04x\n", type,
 		   sseu->slice_mask);
@@ -4456,10 +4460,10 @@  static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
 		   hweight8(sseu->slice_mask));
 	seq_printf(m, "  %s Subslice Total: %u\n", type,
 		   sseu_subslice_total(sseu));
-	seq_printf(m, "  %s Subslice Mask: %04x\n", type,
-		   sseu->subslice_mask);
-	seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
-		   hweight8(sseu->subslice_mask));
+	for (s = 0; s < fls(sseu->slice_mask); s++) {
+		seq_printf(m, "  %s Slice%i Subslice Mask: %04x\n", type,
+			   s, sseu->subslices_mask[s]);
+	}
 	seq_printf(m, "  %s EU Total: %u\n", type,
 		   sseu->eu_total);
 	seq_printf(m, "  %s EU Per Subslice: %u\n", type,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 72bea281edb7..8b99e415c345 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -414,7 +414,7 @@  static int i915_getparam(struct drm_device *dev, void *data,
 			return -ENODEV;
 		break;
 	case I915_PARAM_SUBSLICE_MASK:
-		value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
+		value = INTEL_INFO(dev_priv)->sseu.subslices_mask[0];
 		if (!value)
 			return -ENODEV;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1aba5657f5f0..82fc59078c6a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -802,9 +802,12 @@  struct intel_csr {
 	func(supports_tv); \
 	func(has_ipc);
 
+#define GEN_MAX_SLICES		(6) /* CNL upper bound */
+#define GEN_MAX_SUBSLICES	(7)
+
 struct sseu_dev_info {
 	u8 slice_mask;
-	u8 subslice_mask;
+	u8 subslices_mask[GEN_MAX_SLICES];
 	u8 eu_total;
 	u8 eu_per_subslice;
 	u8 min_eu_in_pool;
@@ -813,11 +816,27 @@  struct sseu_dev_info {
 	u8 has_slice_pg:1;
 	u8 has_subslice_pg:1;
 	u8 has_eu_pg:1;
+
+	/* Topology fields */
+	u8 max_slices;
+	u8 max_subslices;
+	u8 max_eus_per_subslice;
+
+	/* We don't have more than 8 eus per subslice at the moment and as we
+	 * store eus enabled using bits, no need to multiply by eus per
+	 * subslice.
+	 */
+	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
 };
 
 static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
 {
-	return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
+	unsigned s, total = 0;
+
+	for (s = 0; s < ARRAY_SIZE(sseu->subslices_mask); s++)
+		total += hweight8(sseu->subslices_mask[s]);
+
+	return total;
 }
 
 /* Keep in gen based order, and chronological order within a gen */
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index f478be3ae0ba..6a3c40439e83 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -82,22 +82,74 @@  void intel_device_info_dump(struct drm_i915_private *dev_priv)
 #undef PRINT_FLAG
 }
 
+static u8 compute_eu_total(const struct sseu_dev_info *sseu)
+{
+	u8 i, total = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
+		total += hweight8(sseu->eu_mask[i]);
+
+	return total;
+}
+
 static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
 	const u32 fuse2 = I915_READ(GEN8_FUSE2);
+	int s, ss, eu_mask = 0xff;
+	u32 subslice_mask, eu_en;
 
 	sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
 			    GEN10_F2_S_ENA_SHIFT;
-	sseu->subslice_mask = (1 << 4) - 1;
-	sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
-				 GEN10_F2_SS_DIS_SHIFT);
+	sseu->max_slices = 6;
+	sseu->max_subslices = 4;
+	sseu->max_eus_per_subslice = 8;
+
+	subslice_mask = (1 << 4) - 1;
+	subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
+			   GEN10_F2_SS_DIS_SHIFT);
+
+	/* Slice0 can have up to 3 subslices, but there are only 2 in
+	 * slice1/2.
+	 */
+	sseu->subslices_mask[0] = subslice_mask;
+	for (s = 1; s < sseu->max_slices; s++)
+		sseu->subslices_mask[s] = subslice_mask & 0x3;
+
+	/* Slice0 */
+	eu_en = ~I915_READ(GEN8_EU_DISABLE0);
+	for (ss = 0; ss < sseu->max_subslices; ss++)
+		sseu->eu_mask[ss]     = (eu_en >> (8 * ss)) & eu_mask;
+	/* Slice1 */
+	sseu->eu_mask[sseu->max_subslices]         = (eu_en >> 24) & eu_mask;
+	eu_en = ~I915_READ(GEN8_EU_DISABLE1);
+	sseu->eu_mask[sseu->max_subslices + 1]     = eu_en & eu_mask;
+	/* Slice2 */
+	sseu->eu_mask[2 * sseu->max_subslices]     = (eu_en >> 8) & eu_mask;
+	sseu->eu_mask[2 * sseu->max_subslices + 1] = (eu_en >> 16) & eu_mask;
+	/* Slice3 */
+	sseu->eu_mask[3 * sseu->max_subslices]     = (eu_en >> 24) & eu_mask;
+	eu_en = ~I915_READ(GEN8_EU_DISABLE2);
+	sseu->eu_mask[3 * sseu->max_subslices + 1] = eu_en & eu_mask;
+	/* Slice4 */
+	sseu->eu_mask[4 * sseu->max_subslices]     = (eu_en >> 8) & eu_mask;
+	sseu->eu_mask[4 * sseu->max_subslices + 1] = (eu_en >> 16) & eu_mask;
+	/* Slice5 */
+	sseu->eu_mask[5 * sseu->max_subslices]     = (eu_en >> 24) & eu_mask;
+	eu_en = ~I915_READ(GEN10_EU_DISABLE3);
+	sseu->eu_mask[5 * sseu->max_subslices + 1] = eu_en & eu_mask;
+
+	/* Do a second pass where we marked the subslices disabled if all
+	 * their eus are off.
+	 */
+	for (s = 0; s < sseu->max_slices; s++) {
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
+			if (sseu->eu_mask[s * sseu->max_subslices + ss] == 0)
+				sseu->subslices_mask[s] &= ~BIT(ss);
+		}
+	}
 
-	sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
-	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
-	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
-	sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
-				     GEN10_EU_DIS_SS_MASK));
+	sseu->eu_total = compute_eu_total(sseu);
 
 	/*
 	 * CNL is expected to always have a uniform distribution
@@ -118,26 +170,30 @@  static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
-	u32 fuse, eu_dis;
+	u32 fuse;
 
 	fuse = I915_READ(CHV_FUSE_GT);
 
 	sseu->slice_mask = BIT(0);
+	sseu->max_slices = 1;
+	sseu->max_subslices = 2;
+	sseu->max_eus_per_subslice = 8;
 
 	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
-		sseu->subslice_mask |= BIT(0);
-		eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
-				 CHV_FGT_EU_DIS_SS0_R1_MASK);
-		sseu->eu_total += 8 - hweight32(eu_dis);
+		sseu->subslices_mask[0] |= BIT(0);
+		sseu->eu_mask[0] = (fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
+		sseu->eu_mask[0] |= ((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;
+		sseu->subslices_mask[0] = 1;
 	}
 
 	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
-		sseu->subslice_mask |= BIT(1);
-		eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
-				 CHV_FGT_EU_DIS_SS1_R1_MASK);
-		sseu->eu_total += 8 - hweight32(eu_dis);
+		sseu->subslices_mask[0] |= BIT(1);
+		sseu->eu_mask[1] = (fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
+		sseu->eu_mask[2] |= ((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;
 	}
 
+	sseu->eu_total = compute_eu_total(sseu);
+
 	/*
 	 * CHV expected to always have a uniform distribution of EU
 	 * across subslices.
@@ -159,41 +215,50 @@  static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct intel_device_info *info = mkwrite_device_info(dev_priv);
 	struct sseu_dev_info *sseu = &info->sseu;
-	int s_max = 3, ss_max = 4, eu_max = 8;
 	int s, ss;
-	u32 fuse2, eu_disable;
+	u32 fuse2, eu_disable, subslice_mask;
 	u8 eu_mask = 0xff;
 
 	fuse2 = I915_READ(GEN8_FUSE2);
 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
 
+	/* BXT has a single slice and at most 3 subslices. */
+	sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
+	sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
+	sseu->max_eus_per_subslice = 8;
+
 	/*
 	 * The subslice disable field is global, i.e. it applies
 	 * to each of the enabled slices.
 	*/
-	sseu->subslice_mask = (1 << ss_max) - 1;
-	sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
-				 GEN9_F2_SS_DIS_SHIFT);
+	subslice_mask = (1 << sseu->max_subslices) - 1;
+	subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
+			   GEN9_F2_SS_DIS_SHIFT);
 
 	/*
 	 * Iterate through enabled slices and subslices to
 	 * count the total enabled EU.
 	*/
-	for (s = 0; s < s_max; s++) {
+	for (s = 0; s < sseu->max_slices; s++) {
 		if (!(sseu->slice_mask & BIT(s)))
 			/* skip disabled slice */
 			continue;
 
+		sseu->subslices_mask[s] = subslice_mask;
+
 		eu_disable = I915_READ(GEN9_EU_DISABLE(s));
-		for (ss = 0; ss < ss_max; ss++) {
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			int eu_per_ss;
 
-			if (!(sseu->subslice_mask & BIT(ss)))
+			if (!(sseu->subslices_mask[s] & BIT(ss)))
 				/* skip disabled subslice */
 				continue;
 
-			eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
-						      eu_mask);
+			sseu->eu_mask[ss + s * sseu->max_subslices] =
+				~((eu_disable >> (ss*8)) & eu_mask);
+
+			eu_per_ss = sseu->max_eus_per_subslice -
+				hweight8((eu_disable >> (ss*8)) & eu_mask);
 
 			/*
 			 * Record which subslice(s) has(have) 7 EUs. we
@@ -202,11 +267,11 @@  static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 			 */
 			if (eu_per_ss == 7)
 				sseu->subslice_7eu[s] |= BIT(ss);
-
-			sseu->eu_total += eu_per_ss;
 		}
 	}
 
+	sseu->eu_total = compute_eu_total(sseu);
+
 	/*
 	 * SKL is expected to always have a uniform distribution
 	 * of EU across subslices with the exception that any one
@@ -232,8 +297,8 @@  static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
 
 	if (IS_GEN9_LP(dev_priv)) {
-#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask & BIT(ss)))
-		info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
+#define IS_SS_DISABLED(ss)	(!(sseu->subslices_mask[0] & BIT(ss)))
+		info->has_pooled_eu = hweight8(sseu->subslices_mask[0]) == 3;
 
 		sseu->min_eu_in_pool = 0;
 		if (info->has_pooled_eu) {
@@ -251,19 +316,22 @@  static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
-	const int s_max = 3, ss_max = 3, eu_max = 8;
 	int s, ss;
-	u32 fuse2, eu_disable[3]; /* s_max */
+	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
 
 	fuse2 = I915_READ(GEN8_FUSE2);
 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
+	sseu->max_slices = 3;
+	sseu->max_subslices = 3;
+	sseu->max_eus_per_subslice = 8;
+
 	/*
 	 * The subslice disable field is global, i.e. it applies
 	 * to each of the enabled slices.
 	 */
-	sseu->subslice_mask = GENMASK(ss_max - 1, 0);
-	sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
-				 GEN8_F2_SS_DIS_SHIFT);
+	subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
+	subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
+			   GEN8_F2_SS_DIS_SHIFT);
 
 	eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK;
 	eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) |
@@ -277,30 +345,36 @@  static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 	 * Iterate through enabled slices and subslices to
 	 * count the total enabled EU.
 	 */
-	for (s = 0; s < s_max; s++) {
+	for (s = 0; s < sseu->max_slices; s++) {
 		if (!(sseu->slice_mask & BIT(s)))
 			/* skip disabled slice */
 			continue;
 
-		for (ss = 0; ss < ss_max; ss++) {
+		sseu->subslices_mask[s] = subslice_mask;
+
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			u32 n_disabled;
 
-			if (!(sseu->subslice_mask & BIT(ss)))
+			if (!(sseu->subslices_mask[ss] & BIT(ss)))
 				/* skip disabled subslice */
 				continue;
 
-			n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
+			sseu->eu_mask[ss + s * sseu->max_subslices] =
+				~(eu_disable[s] >>
+				  (ss * sseu->max_eus_per_subslice));
+			n_disabled = hweight8(eu_disable[s] >>
+					      (ss * sseu->max_eus_per_subslice));
 
 			/*
 			 * Record which subslices have 7 EUs.
 			 */
-			if (eu_max - n_disabled == 7)
+			if (sseu->max_eus_per_subslice - n_disabled == 7)
 				sseu->subslice_7eu[s] |= 1 << ss;
-
-			sseu->eu_total += eu_max - n_disabled;
 		}
 	}
 
+	sseu->eu_total = compute_eu_total(sseu);
+
 	/*
 	 * BDW is expected to always have a uniform distribution of EU across
 	 * subslices with the exception that any one EU in any one subslice may
@@ -437,6 +511,7 @@  void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 {
 	struct intel_device_info *info = mkwrite_device_info(dev_priv);
 	enum pipe pipe;
+	int s;
 
 	if (INTEL_GEN(dev_priv) >= 10) {
 		for_each_pipe(dev_priv, pipe)
@@ -548,9 +623,11 @@  void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
 	DRM_DEBUG_DRIVER("subslice total: %u\n",
 			 sseu_subslice_total(&info->sseu));
-	DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask);
-	DRM_DEBUG_DRIVER("subslice per slice: %u\n",
-			 hweight8(info->sseu.subslice_mask));
+	for (s = 0; s < ARRAY_SIZE(info->sseu.subslices_mask); s++) {
+		DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslices_mask[s]);
+		DRM_DEBUG_DRIVER("subslice per slice: %u\n",
+				 hweight8(info->sseu.subslices_mask[s]));
+	}
 	DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total);
 	DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->sseu.eu_per_subslice);
 	DRM_DEBUG_DRIVER("has slice power gating: %s\n",
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2e38fbfdf08f..6e55b842c21d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2072,7 +2072,7 @@  make_rpcs(struct drm_i915_private *dev_priv)
 
 	if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
 		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
-		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
+		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslices_mask[0]) <<
 			GEN8_RPCS_SS_CNT_SHIFT;
 		rpcs |= GEN8_RPCS_ENABLE;
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c5ff203e42d6..a94bc1b5c502 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -90,7 +90,7 @@  hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
 
 #define instdone_subslice_mask(dev_priv__) \
 	(INTEL_GEN(dev_priv__) == 7 ? \
-	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
+	 1 : INTEL_INFO(dev_priv__)->sseu.subslices_mask[0])
 
 #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
 	for ((slice__) = 0, (subslice__) = 0; \