diff mbox series

[9/9] drm/i915: Expand subslice mask

Message ID 20190723154934.26967-10-stuart.summers@intel.com (mailing list archive)
State New, archived
Headers show
Series Refactor to expand subslice mask (rev 2) | expand

Commit Message

Summers, Stuart July 23, 2019, 3:49 p.m. UTC
Currently, the subslice_mask runtime parameter is stored as an
array of subslices per slice. Expand the subslice mask array to
better match what is presented to userspace through the
I915_QUERY_TOPOLOGY_INFO ioctl. The index into this array is
then calculated:
  slice * subslice stride + subslice index / 8

Signed-off-by: Stuart Summers <stuart.summers@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_sseu.c        | 26 ++++++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_sseu.h        |  5 +++-
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  3 +--
 drivers/gpu/drm/i915/i915_debugfs.c         |  5 +++-
 drivers/gpu/drm/i915/intel_device_info.c    |  8 +++----
 5 files changed, 38 insertions(+), 9 deletions(-)

Comments

Tvrtko Ursulin July 24, 2019, 1:05 p.m. UTC | #1
On 23/07/2019 16:49, Stuart Summers wrote:
> Currently, the subslice_mask runtime parameter is stored as an
> array of subslices per slice. Expand the subslice mask array to
> better match what is presented to userspace through the
> I915_QUERY_TOPOLOGY_INFO ioctl. The index into this array is
> then calculated:
>    slice * subslice stride + subslice index / 8
> 
> Signed-off-by: Stuart Summers <stuart.summers@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_sseu.c        | 26 ++++++++++++++++++++-
>   drivers/gpu/drm/i915/gt/intel_sseu.h        |  5 +++-
>   drivers/gpu/drm/i915/gt/intel_workarounds.c |  3 +--
>   drivers/gpu/drm/i915/i915_debugfs.c         |  5 +++-
>   drivers/gpu/drm/i915/intel_device_info.c    |  8 +++----
>   5 files changed, 38 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
> index 607c1447287c..7abc2487b994 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
> @@ -30,6 +30,30 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
>   	return total;
>   }
>   
> +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
> +{
> +	int i, offset = slice * sseu->ss_stride;
> +	u32 mask = 0;
> +
> +	if (slice >= sseu->max_slices) {
> +		DRM_ERROR("%s: invalid slice %d, max: %d\n",
> +			  __func__, slice, sseu->max_slices);
> +		return 0;
> +	}
> +
> +	if (sseu->ss_stride > sizeof(mask)) {
> +		DRM_ERROR("%s: invalid subslice stride %d, max: %lu\n",
> +			  __func__, sseu->ss_stride, sizeof(mask));
> +		return 0;
> +	}
> +
> +	for (i = 0; i < sseu->ss_stride; i++)
> +		mask |= (u32)sseu->subslice_mask[offset + i] <<
> +			i * BITS_PER_BYTE;
> +
> +	return mask;
> +}

Why do you actually need these complications when the plan from the 
start was that the driver and user sseu representation structures can be 
different?

I only gave it a quick look so I might be wrong, but why not just expand 
the driver representations of subslice mask up from u8? Userspace API 
should be able to cope with strides already.

Regards,

Tvrtko

> +
>   void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
>   			      u32 ss_mask)
>   {
> @@ -43,7 +67,7 @@ void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
>   unsigned int
>   intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
>   {
> -	return hweight8(sseu->subslice_mask[slice]);
> +	return hweight32(intel_sseu_get_subslices(sseu, slice));
>   }
>   
>   u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> index 0ecc1c35a7a1..2291764b7db5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> @@ -15,10 +15,11 @@ struct drm_i915_private;
>   #define GEN_MAX_SLICES		(6) /* CNL upper bound */
>   #define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
>   #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
> +#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
>   
>   struct sseu_dev_info {
>   	u8 slice_mask;
> -	u8 subslice_mask[GEN_MAX_SLICES];
> +	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>   	u16 eu_total;
>   	u8 eu_per_subslice;
>   	u8 min_eu_in_pool;
> @@ -85,6 +86,8 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
>   unsigned int
>   intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
>   
> +u32  intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
> +
>   void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
>   			      u32 ss_mask);
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 704ace01e7f5..7ec60435d871 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -794,8 +794,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
>   	}
>   
>   	slice = fls(sseu->slice_mask) - 1;
> -	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
> -	subslice = fls(l3_en & sseu->subslice_mask[slice]);
> +	subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
>   	if (!subslice) {
>   		DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
>   			 sseu->subslice_mask[slice], l3_en);
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 7f842506b9ea..96a25a770ade 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3944,13 +3944,16 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
>   
>   		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
>   			unsigned int eu_cnt;
> +			u8 ss_idx = s * info->sseu.ss_stride +
> +				    ss / BITS_PER_BYTE;
>   
>   			if (IS_GEN9_LP(dev_priv)) {
>   				if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
>   					/* skip disabled subslice */
>   					continue;
>   
> -				sseu->subslice_mask[s] |= BIT(ss);
> +				sseu->subslice_mask[ss_idx] |=
> +					BIT(ss % BITS_PER_BYTE);
>   			}
>   
>   			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index 723b1fde5fc4..04dde4f204c3 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -93,9 +93,9 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
>   		   hweight8(sseu->slice_mask), sseu->slice_mask);
>   	drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
>   	for (s = 0; s < sseu->max_slices; s++) {
> -		drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
> +		drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
>   			   s, intel_sseu_subslices_per_slice(sseu, s),
> -			   sseu->subslice_mask[s]);
> +			   intel_sseu_get_subslices(sseu, s));
>   	}
>   	drm_printf(p, "EU total: %u\n", sseu->eu_total);
>   	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
> @@ -159,9 +159,9 @@ void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
>   	}
>   
>   	for (s = 0; s < sseu->max_slices; s++) {
> -		drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
> +		drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
>   			   s, intel_sseu_subslices_per_slice(sseu, s),
> -			   sseu->subslice_mask[s]);
> +			   intel_sseu_get_subslices(sseu, s));
>   
>   		for (ss = 0; ss < sseu->max_subslices; ss++) {
>   			u16 enabled_eus = sseu_get_eus(sseu, s, ss);
>
Tvrtko Ursulin Sept. 2, 2019, 1:42 p.m. UTC | #2
On 24/07/2019 14:05, Tvrtko Ursulin wrote:
> 
> On 23/07/2019 16:49, Stuart Summers wrote:
>> Currently, the subslice_mask runtime parameter is stored as an
>> array of subslices per slice. Expand the subslice mask array to
>> better match what is presented to userspace through the
>> I915_QUERY_TOPOLOGY_INFO ioctl. The index into this array is
>> then calculated:
>>    slice * subslice stride + subslice index / 8
>>
>> Signed-off-by: Stuart Summers <stuart.summers@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_sseu.c        | 26 ++++++++++++++++++++-
>>   drivers/gpu/drm/i915/gt/intel_sseu.h        |  5 +++-
>>   drivers/gpu/drm/i915/gt/intel_workarounds.c |  3 +--
>>   drivers/gpu/drm/i915/i915_debugfs.c         |  5 +++-
>>   drivers/gpu/drm/i915/intel_device_info.c    |  8 +++----
>>   5 files changed, 38 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
>> b/drivers/gpu/drm/i915/gt/intel_sseu.c
>> index 607c1447287c..7abc2487b994 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
>> @@ -30,6 +30,30 @@ intel_sseu_subslice_total(const struct 
>> sseu_dev_info *sseu)
>>       return total;
>>   }
>> +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
>> +{
>> +    int i, offset = slice * sseu->ss_stride;
>> +    u32 mask = 0;
>> +
>> +    if (slice >= sseu->max_slices) {
>> +        DRM_ERROR("%s: invalid slice %d, max: %d\n",
>> +              __func__, slice, sseu->max_slices);
>> +        return 0;
>> +    }
>> +
>> +    if (sseu->ss_stride > sizeof(mask)) {
>> +        DRM_ERROR("%s: invalid subslice stride %d, max: %lu\n",
>> +              __func__, sseu->ss_stride, sizeof(mask));
>> +        return 0;
>> +    }
>> +
>> +    for (i = 0; i < sseu->ss_stride; i++)
>> +        mask |= (u32)sseu->subslice_mask[offset + i] <<
>> +            i * BITS_PER_BYTE;
>> +
>> +    return mask;
>> +}
> 
> Why do you actually need these complications when the plan from the 
> start was that the driver and user sseu representation structures can be 
> different?
> 
> I only gave it a quick look so I might be wrong, but why not just expand 
> the driver representations of subslice mask up from u8? Userspace API 
> should be able to cope with strides already.

I never got an answer to this and the series was merged in the meantime.

Maybe not much harm but I still don't understand why all the 
complications seemingly just to avoid bumping the *internal* ss mask up 
from u8. As long as the internal and abi sseu info struct are well 
separated and access point few and well controlled (I think they are) 
then I don't see why the internal side had to be converted to u8 and 
strides. But maybe I am missing something.

Regards,

Tvrtko

> 
> Regards,
> 
> Tvrtko
> 
>> +
>>   void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
>>                     u32 ss_mask)
>>   {
>> @@ -43,7 +67,7 @@ void intel_sseu_set_subslices(struct sseu_dev_info 
>> *sseu, int slice,
>>   unsigned int
>>   intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 
>> slice)
>>   {
>> -    return hweight8(sseu->subslice_mask[slice]);
>> +    return hweight32(intel_sseu_get_subslices(sseu, slice));
>>   }
>>   u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
>> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h 
>> b/drivers/gpu/drm/i915/gt/intel_sseu.h
>> index 0ecc1c35a7a1..2291764b7db5 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
>> @@ -15,10 +15,11 @@ struct drm_i915_private;
>>   #define GEN_MAX_SLICES        (6) /* CNL upper bound */
>>   #define GEN_MAX_SUBSLICES    (8) /* ICL upper bound */
>>   #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, 
>> BITS_PER_BYTE)
>> +#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
>>   struct sseu_dev_info {
>>       u8 slice_mask;
>> -    u8 subslice_mask[GEN_MAX_SLICES];
>> +    u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>>       u16 eu_total;
>>       u8 eu_per_subslice;
>>       u8 min_eu_in_pool;
>> @@ -85,6 +86,8 @@ intel_sseu_subslice_total(const struct sseu_dev_info 
>> *sseu);
>>   unsigned int
>>   intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 
>> slice);
>> +u32  intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 
>> slice);
>> +
>>   void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
>>                     u32 ss_mask);
>> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
>> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> index 704ace01e7f5..7ec60435d871 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> @@ -794,8 +794,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct 
>> i915_wa_list *wal)
>>       }
>>       slice = fls(sseu->slice_mask) - 1;
>> -    GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
>> -    subslice = fls(l3_en & sseu->subslice_mask[slice]);
>> +    subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
>>       if (!subslice) {
>>           DRM_WARN("No common index found between subslice mask %x and 
>> L3 bank mask %x!\n",
>>                sseu->subslice_mask[slice], l3_en);
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 7f842506b9ea..96a25a770ade 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -3944,13 +3944,16 @@ static void gen9_sseu_device_status(struct 
>> drm_i915_private *dev_priv,
>>           for (ss = 0; ss < info->sseu.max_subslices; ss++) {
>>               unsigned int eu_cnt;
>> +            u8 ss_idx = s * info->sseu.ss_stride +
>> +                    ss / BITS_PER_BYTE;
>>               if (IS_GEN9_LP(dev_priv)) {
>>                   if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
>>                       /* skip disabled subslice */
>>                       continue;
>> -                sseu->subslice_mask[s] |= BIT(ss);
>> +                sseu->subslice_mask[ss_idx] |=
>> +                    BIT(ss % BITS_PER_BYTE);
>>               }
>>               eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
>> b/drivers/gpu/drm/i915/intel_device_info.c
>> index 723b1fde5fc4..04dde4f204c3 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -93,9 +93,9 @@ static void sseu_dump(const struct sseu_dev_info 
>> *sseu, struct drm_printer *p)
>>              hweight8(sseu->slice_mask), sseu->slice_mask);
>>       drm_printf(p, "subslice total: %u\n", 
>> intel_sseu_subslice_total(sseu));
>>       for (s = 0; s < sseu->max_slices; s++) {
>> -        drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
>> +        drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
>>                  s, intel_sseu_subslices_per_slice(sseu, s),
>> -               sseu->subslice_mask[s]);
>> +               intel_sseu_get_subslices(sseu, s));
>>       }
>>       drm_printf(p, "EU total: %u\n", sseu->eu_total);
>>       drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
>> @@ -159,9 +159,9 @@ void intel_device_info_dump_topology(const struct 
>> sseu_dev_info *sseu,
>>       }
>>       for (s = 0; s < sseu->max_slices; s++) {
>> -        drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
>> +        drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
>>                  s, intel_sseu_subslices_per_slice(sseu, s),
>> -               sseu->subslice_mask[s]);
>> +               intel_sseu_get_subslices(sseu, s));
>>           for (ss = 0; ss < sseu->max_subslices; ss++) {
>>               u16 enabled_eus = sseu_get_eus(sseu, s, ss);
>>
Chris Wilson Sept. 6, 2019, 6:13 p.m. UTC | #3
Quoting Tvrtko Ursulin (2019-09-02 14:42:44)
> 
> On 24/07/2019 14:05, Tvrtko Ursulin wrote:
> > 
> > On 23/07/2019 16:49, Stuart Summers wrote:
> >> +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
> >> +{
> >> +    int i, offset = slice * sseu->ss_stride;
> >> +    u32 mask = 0;
> >> +
> >> +    if (slice >= sseu->max_slices) {
> >> +        DRM_ERROR("%s: invalid slice %d, max: %d\n",
> >> +              __func__, slice, sseu->max_slices);
> >> +        return 0;
> >> +    }
> >> +
> >> +    if (sseu->ss_stride > sizeof(mask)) {
> >> +        DRM_ERROR("%s: invalid subslice stride %d, max: %lu\n",
> >> +              __func__, sseu->ss_stride, sizeof(mask));
> >> +        return 0;
> >> +    }
> >> +
> >> +    for (i = 0; i < sseu->ss_stride; i++)
> >> +        mask |= (u32)sseu->subslice_mask[offset + i] <<
> >> +            i * BITS_PER_BYTE;
> >> +
> >> +    return mask;
> >> +}
> > 
> > Why do you actually need these complications when the plan from the 
> > start was that the driver and user sseu representation structures can be 
> > different?
> > 
> > I only gave it a quick look so I might be wrong, but why not just expand 
> > the driver representations of subslice mask up from u8? Userspace API 
> > should be able to cope with strides already.
> 
> I never got an answer to this and the series was merged in the meantime.
> 
> Maybe not much harm but I still don't understand why all the 
> complications seemingly just to avoid bumping the *internal* ss mask up 
> from u8. As long as the internal and abi sseu info struct are well 
> separated and access point few and well controlled (I think they are) 
> then I don't see why the internal side had to be converted to u8 and 
> strides. But maybe I am missing something.

I looked at it and thought it was open-coding bitmap.h as well. I
accepted it in good faith that it improved certain use cases and should
even make tidying up the code without regressing those easier.
-Chris
Summers, Stuart Sept. 10, 2019, 4:53 a.m. UTC | #4
On Fri, 2019-09-06 at 19:13 +0100, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-09-02 14:42:44)
> > 
> > On 24/07/2019 14:05, Tvrtko Ursulin wrote:
> > > 
> > > On 23/07/2019 16:49, Stuart Summers wrote:
> > > > +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu,
> > > > u8 slice)
> > > > +{
> > > > +    int i, offset = slice * sseu->ss_stride;
> > > > +    u32 mask = 0;
> > > > +
> > > > +    if (slice >= sseu->max_slices) {
> > > > +        DRM_ERROR("%s: invalid slice %d, max: %d\n",
> > > > +              __func__, slice, sseu->max_slices);
> > > > +        return 0;
> > > > +    }
> > > > +
> > > > +    if (sseu->ss_stride > sizeof(mask)) {
> > > > +        DRM_ERROR("%s: invalid subslice stride %d, max:
> > > > %lu\n",
> > > > +              __func__, sseu->ss_stride, sizeof(mask));
> > > > +        return 0;
> > > > +    }
> > > > +
> > > > +    for (i = 0; i < sseu->ss_stride; i++)
> > > > +        mask |= (u32)sseu->subslice_mask[offset + i] <<
> > > > +            i * BITS_PER_BYTE;
> > > > +
> > > > +    return mask;
> > > > +}
> > > 
> > > Why do you actually need these complications when the plan from
> > > the 
> > > start was that the driver and user sseu representation structures
> > > can be 
> > > different?
> > > 
> > > I only gave it a quick look so I might be wrong, but why not just
> > > expand 
> > > the driver representations of subslice mask up from u8? Userspace
> > > API 
> > > should be able to cope with strides already.
> > 
> > I never got an answer to this and the series was merged in the
> > meantime.

Thanks for the note here Tvrtko and sorry for the missed response! For
some reason I hadn't caught this comment earlier :(

> > 
> > Maybe not much harm but I still don't understand why all the 
> > complications seemingly just to avoid bumping the *internal* ss
> > mask up 
> > from u8. As long as the internal and abi sseu info struct are well 
> > separated and access point few and well controlled (I think they
> > are) 
> > then I don't see why the internal side had to be converted to u8
> > and 
> > strides. But maybe I am missing something.
> 
> I looked at it and thought it was open-coding bitmap.h as well. I
> accepted it in good faith that it improved certain use cases and
> should
> even make tidying up the code without regressing those easier.

The goal here is to make sure we have an infrastructure in place that
always provides a consistent bit layout to userspace regardless of
underlying architecture endianness. Perhaps this could have been made
more clear in the commit message here.

Thanks,
Stuart

> -Chris
Tvrtko Ursulin Sept. 10, 2019, 8:13 a.m. UTC | #5
On 10/09/2019 05:53, Summers, Stuart wrote:
> On Fri, 2019-09-06 at 19:13 +0100, Chris Wilson wrote:
>> Quoting Tvrtko Ursulin (2019-09-02 14:42:44)
>>>
>>> On 24/07/2019 14:05, Tvrtko Ursulin wrote:
>>>>
>>>> On 23/07/2019 16:49, Stuart Summers wrote:
>>>>> +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu,
>>>>> u8 slice)
>>>>> +{
>>>>> +    int i, offset = slice * sseu->ss_stride;
>>>>> +    u32 mask = 0;
>>>>> +
>>>>> +    if (slice >= sseu->max_slices) {
>>>>> +        DRM_ERROR("%s: invalid slice %d, max: %d\n",
>>>>> +              __func__, slice, sseu->max_slices);
>>>>> +        return 0;
>>>>> +    }
>>>>> +
>>>>> +    if (sseu->ss_stride > sizeof(mask)) {
>>>>> +        DRM_ERROR("%s: invalid subslice stride %d, max:
>>>>> %lu\n",
>>>>> +              __func__, sseu->ss_stride, sizeof(mask));
>>>>> +        return 0;
>>>>> +    }
>>>>> +
>>>>> +    for (i = 0; i < sseu->ss_stride; i++)
>>>>> +        mask |= (u32)sseu->subslice_mask[offset + i] <<
>>>>> +            i * BITS_PER_BYTE;
>>>>> +
>>>>> +    return mask;
>>>>> +}
>>>>
>>>> Why do you actually need these complications when the plan from
>>>> the
>>>> start was that the driver and user sseu representation structures
>>>> can be
>>>> different?
>>>>
>>>> I only gave it a quick look so I might be wrong, but why not just
>>>> expand
>>>> the driver representations of subslice mask up from u8? Userspace
>>>> API
>>>> should be able to cope with strides already.
>>>
>>> I never got an answer to this and the series was merged in the
>>> meantime.
> 
> Thanks for the note here Tvrtko and sorry for the missed response! For
> some reason I hadn't caught this comment earlier :(

Ok no worries.

>>>
>>> Maybe not much harm but I still don't understand why all the
>>> complications seemingly just to avoid bumping the *internal* ss
>>> mask up
>>> from u8. As long as the internal and abi sseu info struct are well
>>> separated and access point few and well controlled (I think they
>>> are)
>>> then I don't see why the internal side had to be converted to u8
>>> and
>>> strides. But maybe I am missing something.
>>
>> I looked at it and thought it was open-coding bitmap.h as well. I
>> accepted it in good faith that it improved certain use cases and
>> should
>> even make tidying up the code without regressing those easier.
> 
> The goal here is to make sure we have an infrastructure in place that
> always provides a consistent bit layout to userspace regardless of
> underlying architecture endianness. Perhaps this could have been made
> more clear in the commit message here.

My point was that internal and userspace representation do not have to 
match and that it probably would have been much simpler code if that 
principle remained. We already had a split between internal and ABI sseu 
structs and whereas the latter understands concept of stride already, 
the former could have just had it's subslice mask field expended from u8 
to u16, or whatever. But anyway, at this point I don't even remember all 
the details your series did, and given it's merged I won't be going 
re-reading it.

Regards,

Tvrtko
Summers, Stuart Sept. 18, 2019, 8:25 p.m. UTC | #6
On Tue, 2019-09-10 at 09:13 +0100, Tvrtko Ursulin wrote:
> On 10/09/2019 05:53, Summers, Stuart wrote:
> > On Fri, 2019-09-06 at 19:13 +0100, Chris Wilson wrote:
> > > Quoting Tvrtko Ursulin (2019-09-02 14:42:44)
> > > > 
> > > > On 24/07/2019 14:05, Tvrtko Ursulin wrote:
> > > > > 
> > > > > On 23/07/2019 16:49, Stuart Summers wrote:
> > > > > > +u32 intel_sseu_get_subslices(const struct sseu_dev_info
> > > > > > *sseu,
> > > > > > u8 slice)
> > > > > > +{
> > > > > > +    int i, offset = slice * sseu->ss_stride;
> > > > > > +    u32 mask = 0;
> > > > > > +
> > > > > > +    if (slice >= sseu->max_slices) {
> > > > > > +        DRM_ERROR("%s: invalid slice %d, max: %d\n",
> > > > > > +              __func__, slice, sseu->max_slices);
> > > > > > +        return 0;
> > > > > > +    }
> > > > > > +
> > > > > > +    if (sseu->ss_stride > sizeof(mask)) {
> > > > > > +        DRM_ERROR("%s: invalid subslice stride %d, max:
> > > > > > %lu\n",
> > > > > > +              __func__, sseu->ss_stride, sizeof(mask));
> > > > > > +        return 0;
> > > > > > +    }
> > > > > > +
> > > > > > +    for (i = 0; i < sseu->ss_stride; i++)
> > > > > > +        mask |= (u32)sseu->subslice_mask[offset + i] <<
> > > > > > +            i * BITS_PER_BYTE;
> > > > > > +
> > > > > > +    return mask;
> > > > > > +}
> > > > > 
> > > > > Why do you actually need these complications when the plan
> > > > > from
> > > > > the
> > > > > start was that the driver and user sseu representation
> > > > > structures
> > > > > can be
> > > > > different?
> > > > > 
> > > > > I only gave it a quick look so I might be wrong, but why not
> > > > > just
> > > > > expand
> > > > > the driver representations of subslice mask up from u8?
> > > > > Userspace
> > > > > API
> > > > > should be able to cope with strides already.
> > > > 
> > > > I never got an answer to this and the series was merged in the
> > > > meantime.
> > 
> > Thanks for the note here Tvrtko and sorry for the missed response!
> > For
> > some reason I hadn't caught this comment earlier :(
> 
> Ok no worries.
> 
> > > > 
> > > > Maybe not much harm but I still don't understand why all the
> > > > complications seemingly just to avoid bumping the *internal* ss
> > > > mask up
> > > > from u8. As long as the internal and abi sseu info struct are
> > > > well
> > > > separated and access point few and well controlled (I think
> > > > they
> > > > are)
> > > > then I don't see why the internal side had to be converted to
> > > > u8
> > > > and
> > > > strides. But maybe I am missing something.
> > > 
> > > I looked at it and thought it was open-coding bitmap.h as well. I
> > > accepted it in good faith that it improved certain use cases and
> > > should
> > > even make tidying up the code without regressing those easier.
> > 
> > The goal here is to make sure we have an infrastructure in place
> > that
> > always provides a consistent bit layout to userspace regardless of
> > underlying architecture endianness. Perhaps this could have been
> > made
> > more clear in the commit message here.
> 
> My point was that internal and userspace representation do not have
> to 
> match and that it probably would have been much simpler code if that 
> principle remained. We already had a split between internal and ABI
> sseu 
> structs and whereas the latter understands concept of stride
> already, 
> the former could have just had it's subslice mask field expended from
> u8 
> to u16, or whatever. But anyway, at this point I don't even remember
> all 
> the details your series did, and given it's merged I won't be going 
> re-reading it.

Thanks Tvrtko, I'll keep this in mind for future changes.

-Stuart

> 
> Regards,
> 
> Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 607c1447287c..7abc2487b994 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -30,6 +30,30 @@  intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
 	return total;
 }
 
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
+{
+	int i, offset = slice * sseu->ss_stride;
+	u32 mask = 0;
+
+	if (slice >= sseu->max_slices) {
+		DRM_ERROR("%s: invalid slice %d, max: %d\n",
+			  __func__, slice, sseu->max_slices);
+		return 0;
+	}
+
+	if (sseu->ss_stride > sizeof(mask)) {
+		DRM_ERROR("%s: invalid subslice stride %d, max: %lu\n",
+			  __func__, sseu->ss_stride, sizeof(mask));
+		return 0;
+	}
+
+	for (i = 0; i < sseu->ss_stride; i++)
+		mask |= (u32)sseu->subslice_mask[offset + i] <<
+			i * BITS_PER_BYTE;
+
+	return mask;
+}
+
 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
 			      u32 ss_mask)
 {
@@ -43,7 +67,7 @@  void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
 unsigned int
 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
 {
-	return hweight8(sseu->subslice_mask[slice]);
+	return hweight32(intel_sseu_get_subslices(sseu, slice));
 }
 
 u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 0ecc1c35a7a1..2291764b7db5 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -15,10 +15,11 @@  struct drm_i915_private;
 #define GEN_MAX_SLICES		(6) /* CNL upper bound */
 #define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
 #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
+#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
 
 struct sseu_dev_info {
 	u8 slice_mask;
-	u8 subslice_mask[GEN_MAX_SLICES];
+	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
 	u16 eu_total;
 	u8 eu_per_subslice;
 	u8 min_eu_in_pool;
@@ -85,6 +86,8 @@  intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
 unsigned int
 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
 
+u32  intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
+
 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
 			      u32 ss_mask);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 704ace01e7f5..7ec60435d871 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -794,8 +794,7 @@  wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 	}
 
 	slice = fls(sseu->slice_mask) - 1;
-	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
-	subslice = fls(l3_en & sseu->subslice_mask[slice]);
+	subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
 	if (!subslice) {
 		DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
 			 sseu->subslice_mask[slice], l3_en);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 7f842506b9ea..96a25a770ade 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3944,13 +3944,16 @@  static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 
 		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
 			unsigned int eu_cnt;
+			u8 ss_idx = s * info->sseu.ss_stride +
+				    ss / BITS_PER_BYTE;
 
 			if (IS_GEN9_LP(dev_priv)) {
 				if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
 					/* skip disabled subslice */
 					continue;
 
-				sseu->subslice_mask[s] |= BIT(ss);
+				sseu->subslice_mask[ss_idx] |=
+					BIT(ss % BITS_PER_BYTE);
 			}
 
 			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 723b1fde5fc4..04dde4f204c3 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -93,9 +93,9 @@  static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
 		   hweight8(sseu->slice_mask), sseu->slice_mask);
 	drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
 	for (s = 0; s < sseu->max_slices; s++) {
-		drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
+		drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
 			   s, intel_sseu_subslices_per_slice(sseu, s),
-			   sseu->subslice_mask[s]);
+			   intel_sseu_get_subslices(sseu, s));
 	}
 	drm_printf(p, "EU total: %u\n", sseu->eu_total);
 	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
@@ -159,9 +159,9 @@  void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
 	}
 
 	for (s = 0; s < sseu->max_slices; s++) {
-		drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
+		drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
 			   s, intel_sseu_subslices_per_slice(sseu, s),
-			   sseu->subslice_mask[s]);
+			   intel_sseu_get_subslices(sseu, s));
 
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			u16 enabled_eus = sseu_get_eus(sseu, s, ss);