diff mbox

drm/i915: read EU powergating status from command streamer

Message ID 20180621112712.15174-1-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lionel Landwerlin June 21, 2018, 11:27 a.m. UTC
Powergating of the EU array is configured as part of the context
image. This seems to imply we need a context to have run before we can
read the slice/subslice/EU powergating status.

This change captures the values of the powergating status registers
from the command streamer to ensure a valid we get valid values. This
is currently used only on gen9+ but someone could rightfully argue to
go as far as gen8.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103484
---
 drivers/gpu/drm/i915/i915_debugfs.c | 177 ++++++++++++++++++++++++----
 1 file changed, 155 insertions(+), 22 deletions(-)

Comments

Chris Wilson June 21, 2018, 11:54 a.m. UTC | #1
Quoting Lionel Landwerlin (2018-06-21 12:27:12)
> Powergating of the EU array is configured as part of the context
> image. This seems to imply we need a context to have run before we can
> read the slice/subslice/EU powergating status.
> 
> This change captures the values of the powergating status registers
> from the command streamer to ensure a valid we get valid values. This
> is currently used only on gen9+ but someone could rightfully argue to
> go as far as gen8.
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103484
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c | 177 ++++++++++++++++++++++++----
>  1 file changed, 155 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index c400f42a54ec..0da8ce8acbcb 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4310,14 +4310,120 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
>  #undef SS_MAX
>  }
>  
> -static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
> -                                    struct sseu_dev_info *sseu)
> +static struct drm_i915_gem_object *
> +gen9_read_registers(struct drm_i915_private *i915,
> +                   u32 *register_offsets,
> +                   u32 n_registers)

unsigned int n_register (or even unsigned long for total pedantry). I
don't see why you want to specify a bitwidth.

> +{
> +       struct drm_i915_gem_object *bo;
> +       struct i915_request *rq;
> +       struct i915_vma *vma;
> +       u32 *cs, bo_size = PAGE_SIZE * DIV_ROUND_UP(n_registers * 4, PAGE_SIZE);
> +       int ret, r;
> +
> +       ret = i915_mutex_lock_interruptible(&i915->drm);
> +       if (ret)
> +               return ERR_PTR(ret);
> +
> +       bo = i915_gem_object_create(i915, bo_size);

create_internal() (just be sure you don't read back uninitialised data)

bo_size = PAGE_ALIGN(sizeof(u32) * n_registers);

> +       if (IS_ERR(bo)) {
> +               ret = PTR_ERR(bo);
> +               goto unlock;
> +       }
> +
> +       ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
> +       if (ret)
> +               goto put_bo;
> +
> +
> +       vma = i915_vma_instance(bo,
> +                               &i915->kernel_context->ppgtt->vm,
> +                               NULL);
> +       if (IS_ERR(vma)) {
> +               ret = PTR_ERR(vma);
> +               goto put_bo;
> +       }
> +
> +       ret = i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, PIN_USER);

CONTEXT_ALIGN?

> +       if (ret)
> +               goto vma_unpin;
> +
> +
> +       rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context);
> +       if (IS_ERR(rq)) {
> +               ret = PTR_ERR(rq);
> +               goto vma_unpin;
> +       }
> +
> +       cs = intel_ring_begin(rq, n_registers * 4);
> +       if (IS_ERR(cs)) {
> +               i915_request_add(rq);
> +               ret = PTR_ERR(cs);
> +               goto vma_unpin;
> +       }
> +
> +       for (r = 0; r < n_registers; r++) {
> +               u64 offset = vma->node.start + r * 4;
> +
> +               *cs++ = MI_STORE_REGISTER_MEM_GEN8;
> +               *cs++ = register_offsets[r];
> +               *cs++ = lower_32_bits(offset);
> +               *cs++ = upper_32_bits(offset);
> +       }

I think you should i915_vma_move_to_active() for safety and give it
an active reference.

> +       intel_ring_advance(rq, cs);
> +
> +       i915_request_add(rq);
> +
> +       mutex_unlock(&i915->drm.struct_mutex);
> +
> +       i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);

For sanity, check for an error. (Hence why using vma_move_to_active
becomes relevant.)

> +       return bo;
> +
> +vma_unpin:
> +       i915_vma_unpin(vma);
> +put_bo:
> +       i915_gem_object_put(bo);
> +unlock:
> +       mutex_unlock(&i915->drm.struct_mutex);
> +       return bo;

Report the ERR_PTR(ret) (otherwise, a neat use-after-free).

> +}
> +
> +
> +static int gen10_sseu_device_status(struct drm_i915_private *dev_priv,
> +                                   struct sseu_dev_info *sseu)
>  {
>  #define SS_MAX 6
>         const struct intel_device_info *info = INTEL_INFO(dev_priv);
> -       u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
> +       struct drm_i915_gem_object *bo;
> +       struct {
> +               u32 s_reg[SS_MAX];
> +               u32 eu_reg[2 * SS_MAX];
> +       } reg_offsets, *reg_data;
> +       u32 eu_mask[2];
>         int s, ss;
>  
> +       for (s = 0; s < info->sseu.max_slices; s++) {
> +               reg_offsets.s_reg[s] =
> +                       i915_mmio_reg_offset(GEN10_SLICE_PGCTL_ACK(s));
> +               reg_offsets.eu_reg[2 * s] =
> +                       i915_mmio_reg_offset(GEN10_SS01_EU_PGCTL_ACK(s));
> +               reg_offsets.eu_reg[2 * s] =
> +                       i915_mmio_reg_offset(GEN10_SS23_EU_PGCTL_ACK(s));

I started by wishing you used i915_mmio_t, but I can appreciate the
logic of having offset/data tied together in the same layout.

> +       }
> +
> +       bo = gen9_read_registers(dev_priv, reg_offsets.s_reg,
> +                                sizeof(reg_offsets) / sizeof(u32));
> +       if (IS_ERR(bo))
> +               return PTR_ERR(bo);

I'd prefer this to use i915_gem_object_to_cpu_domain() but there's no
advantage to that, and you've set it up to just work. So leave it.

> +       reg_data = i915_gem_object_pin_map(bo, I915_MAP_WB);
> +       if (IS_ERR(reg_data)) {
> +               i915_gem_object_put(bo);
> +               return PTR_ERR(reg_data);
> +       }

Other than questioning your sanity here at doing this rather than just
deleting the debugfs, there's nothing inherently broken. I would do it
for gen8 as well, no point leaving the odd one out.

How about if we just hexdumped the kernel_context image and let the
debugger inspect any and all registers at their discretion?
-Chris
Lionel Landwerlin June 21, 2018, 1:14 p.m. UTC | #2
On 21/06/18 12:54, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2018-06-21 12:27:12)
>> Powergating of the EU array is configured as part of the context
>> image. This seems to imply we need a context to have run before we can
>> read the slice/subslice/EU powergating status.
>>
>> This change captures the values of the powergating status registers
>> from the command streamer to ensure a valid we get valid values. This
>> is currently used only on gen9+ but someone could rightfully argue to
>> go as far as gen8.
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103484
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c | 177 ++++++++++++++++++++++++----
>>   1 file changed, 155 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> index c400f42a54ec..0da8ce8acbcb 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -4310,14 +4310,120 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
>>   #undef SS_MAX
>>   }
>>   
>> -static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>> -                                    struct sseu_dev_info *sseu)
>> +static struct drm_i915_gem_object *
>> +gen9_read_registers(struct drm_i915_private *i915,
>> +                   u32 *register_offsets,
>> +                   u32 n_registers)
> unsigned int n_register (or even unsigned long for total pedantry). I
> don't see why you want to specify a bitwidth.

Sure.

>
>> +{
>> +       struct drm_i915_gem_object *bo;
>> +       struct i915_request *rq;
>> +       struct i915_vma *vma;
>> +       u32 *cs, bo_size = PAGE_SIZE * DIV_ROUND_UP(n_registers * 4, PAGE_SIZE);
>> +       int ret, r;
>> +
>> +       ret = i915_mutex_lock_interruptible(&i915->drm);
>> +       if (ret)
>> +               return ERR_PTR(ret);
>> +
>> +       bo = i915_gem_object_create(i915, bo_size);
> create_internal() (just be sure you don't read back uninitialised data)
>
> bo_size = PAGE_ALIGN(sizeof(u32) * n_registers);

Thanks.

>
>> +       if (IS_ERR(bo)) {
>> +               ret = PTR_ERR(bo);
>> +               goto unlock;
>> +       }
>> +
>> +       ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
>> +       if (ret)
>> +               goto put_bo;
>> +
>> +
>> +       vma = i915_vma_instance(bo,
>> +                               &i915->kernel_context->ppgtt->vm,
>> +                               NULL);
>> +       if (IS_ERR(vma)) {
>> +               ret = PTR_ERR(vma);
>> +               goto put_bo;
>> +       }
>> +
>> +       ret = i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, PIN_USER);
> CONTEXT_ALIGN?

Dammit... copy-paste...

>
>> +       if (ret)
>> +               goto vma_unpin;
>> +
>> +
>> +       rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context);
>> +       if (IS_ERR(rq)) {
>> +               ret = PTR_ERR(rq);
>> +               goto vma_unpin;
>> +       }
>> +
>> +       cs = intel_ring_begin(rq, n_registers * 4);
>> +       if (IS_ERR(cs)) {
>> +               i915_request_add(rq);
>> +               ret = PTR_ERR(cs);
>> +               goto vma_unpin;
>> +       }
>> +
>> +       for (r = 0; r < n_registers; r++) {
>> +               u64 offset = vma->node.start + r * 4;
>> +
>> +               *cs++ = MI_STORE_REGISTER_MEM_GEN8;
>> +               *cs++ = register_offsets[r];
>> +               *cs++ = lower_32_bits(offset);
>> +               *cs++ = upper_32_bits(offset);
>> +       }
> I think you should i915_vma_move_to_active() for safety and give it
> an active reference.

Indeed.

>
>> +       intel_ring_advance(rq, cs);
>> +
>> +       i915_request_add(rq);
>> +
>> +       mutex_unlock(&i915->drm.struct_mutex);
>> +
>> +       i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
> For sanity, check for an error. (Hence why using vma_move_to_active
> becomes relevant.)

Thanks!

>
>> +       return bo;
>> +
>> +vma_unpin:
>> +       i915_vma_unpin(vma);
>> +put_bo:
>> +       i915_gem_object_put(bo);
>> +unlock:
>> +       mutex_unlock(&i915->drm.struct_mutex);
>> +       return bo;
> Report the ERR_PTR(ret) (otherwise, a neat use-after-free).

Oops my bad.

>
>> +}
>> +
>> +
>> +static int gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>> +                                   struct sseu_dev_info *sseu)
>>   {
>>   #define SS_MAX 6
>>          const struct intel_device_info *info = INTEL_INFO(dev_priv);
>> -       u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
>> +       struct drm_i915_gem_object *bo;
>> +       struct {
>> +               u32 s_reg[SS_MAX];
>> +               u32 eu_reg[2 * SS_MAX];
>> +       } reg_offsets, *reg_data;
>> +       u32 eu_mask[2];
>>          int s, ss;
>>   
>> +       for (s = 0; s < info->sseu.max_slices; s++) {
>> +               reg_offsets.s_reg[s] =
>> +                       i915_mmio_reg_offset(GEN10_SLICE_PGCTL_ACK(s));
>> +               reg_offsets.eu_reg[2 * s] =
>> +                       i915_mmio_reg_offset(GEN10_SS01_EU_PGCTL_ACK(s));
>> +               reg_offsets.eu_reg[2 * s] =
>> +                       i915_mmio_reg_offset(GEN10_SS23_EU_PGCTL_ACK(s));
> I started by wishing you used i915_mmio_t, but I can appreciate the
> logic of having offset/data tied together in the same layout.
>
>> +       }
>> +
>> +       bo = gen9_read_registers(dev_priv, reg_offsets.s_reg,
>> +                                sizeof(reg_offsets) / sizeof(u32));
>> +       if (IS_ERR(bo))
>> +               return PTR_ERR(bo);
> I'd prefer this to use i915_gem_object_to_cpu_domain() but there's no
> advantage to that, and you've set it up to just work. So leave it.
>
>> +       reg_data = i915_gem_object_pin_map(bo, I915_MAP_WB);
>> +       if (IS_ERR(reg_data)) {
>> +               i915_gem_object_put(bo);
>> +               return PTR_ERR(reg_data);
>> +       }
> Other than questioning your sanity here at doing this rather than just
> deleting the debugfs, there's nothing inherently broken. I would do it
> for gen8 as well, no point leaving the odd one out.

We have an igt tests checking these values and because you haven't 
brought up deleting the test before, I assumed we still wanted it fixed.
But now that you brought it up, I'm equally fine deleting this debugfs 
code and removing the igt test.

>
> How about if we just hexdumped the kernel_context image and let the
> debugger inspect any and all registers at their discretion?

Hmm... these Ack registers aren't saved in the context image I think.

> -Chris
>
Chris Wilson June 21, 2018, 1:20 p.m. UTC | #3
Quoting Lionel Landwerlin (2018-06-21 14:14:52)
> On 21/06/18 12:54, Chris Wilson wrote:
> > Other than questioning your sanity here at doing this rather than just
> > deleting the debugfs, there's nothing inherently broken. I would do it
> > for gen8 as well, no point leaving the odd one out.
> 
> We have an igt tests checking these values and because you haven't 
> brought up deleting the test before, I assumed we still wanted it fixed.
> But now that you brought it up, I'm equally fine deleting this debugfs 
> code and removing the igt test.

I presume as it exists someone found it useful. If not, wave goodbye :)

Checking over the bug again, it has only been ourselves discussing it,
no one else has noticed the bug so one presumes it is only igt that
cares.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c400f42a54ec..0da8ce8acbcb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4310,14 +4310,120 @@  static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
 #undef SS_MAX
 }
 
-static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
-				     struct sseu_dev_info *sseu)
+static struct drm_i915_gem_object *
+gen9_read_registers(struct drm_i915_private *i915,
+		    u32 *register_offsets,
+		    u32 n_registers)
+{
+	struct drm_i915_gem_object *bo;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	u32 *cs, bo_size = PAGE_SIZE * DIV_ROUND_UP(n_registers * 4, PAGE_SIZE);
+	int ret, r;
+
+	ret = i915_mutex_lock_interruptible(&i915->drm);
+	if (ret)
+		return ERR_PTR(ret);
+
+	bo = i915_gem_object_create(i915, bo_size);
+	if (IS_ERR(bo)) {
+		ret = PTR_ERR(bo);
+		goto unlock;
+	}
+
+	ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
+	if (ret)
+		goto put_bo;
+
+
+	vma = i915_vma_instance(bo,
+				&i915->kernel_context->ppgtt->vm,
+				NULL);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto put_bo;
+	}
+
+	ret = i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, PIN_USER);
+	if (ret)
+		goto vma_unpin;
+
+
+	rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto vma_unpin;
+	}
+
+	cs = intel_ring_begin(rq, n_registers * 4);
+	if (IS_ERR(cs)) {
+		i915_request_add(rq);
+		ret = PTR_ERR(cs);
+		goto vma_unpin;
+	}
+
+	for (r = 0; r < n_registers; r++) {
+		u64 offset = vma->node.start + r * 4;
+
+		*cs++ = MI_STORE_REGISTER_MEM_GEN8;
+		*cs++ = register_offsets[r];
+		*cs++ = lower_32_bits(offset);
+		*cs++ = upper_32_bits(offset);
+	}
+
+	intel_ring_advance(rq, cs);
+
+	i915_request_add(rq);
+
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+
+	return bo;
+
+vma_unpin:
+	i915_vma_unpin(vma);
+put_bo:
+	i915_gem_object_put(bo);
+unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return bo;
+}
+
+
+static int gen10_sseu_device_status(struct drm_i915_private *dev_priv,
+				    struct sseu_dev_info *sseu)
 {
 #define SS_MAX 6
 	const struct intel_device_info *info = INTEL_INFO(dev_priv);
-	u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
+	struct drm_i915_gem_object *bo;
+	struct {
+		u32 s_reg[SS_MAX];
+		u32 eu_reg[2 * SS_MAX];
+	} reg_offsets, *reg_data;
+	u32 eu_mask[2];
 	int s, ss;
 
+	for (s = 0; s < info->sseu.max_slices; s++) {
+		reg_offsets.s_reg[s] =
+			i915_mmio_reg_offset(GEN10_SLICE_PGCTL_ACK(s));
+		reg_offsets.eu_reg[2 * s] =
+			i915_mmio_reg_offset(GEN10_SS01_EU_PGCTL_ACK(s));
+		reg_offsets.eu_reg[2 * s] =
+			i915_mmio_reg_offset(GEN10_SS23_EU_PGCTL_ACK(s));
+	}
+
+	bo = gen9_read_registers(dev_priv, reg_offsets.s_reg,
+				 sizeof(reg_offsets) / sizeof(u32));
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	reg_data = i915_gem_object_pin_map(bo, I915_MAP_WB);
+	if (IS_ERR(reg_data)) {
+		i915_gem_object_put(bo);
+		return PTR_ERR(reg_data);
+	}
+
 	for (s = 0; s < info->sseu.max_slices; s++) {
 		/*
 		 * FIXME: Valid SS Mask respects the spec and read
@@ -4325,10 +4431,7 @@  static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 		 * although this seems wrong because it would leave many
 		 * subslices without ACK.
 		 */
-		s_reg[s] = I915_READ(GEN10_SLICE_PGCTL_ACK(s)) &
-			GEN10_PGCTL_VALID_SS_MASK(s);
-		eu_reg[2 * s] = I915_READ(GEN10_SS01_EU_PGCTL_ACK(s));
-		eu_reg[2 * s + 1] = I915_READ(GEN10_SS23_EU_PGCTL_ACK(s));
+		reg_data->s_reg[s] &= GEN10_PGCTL_VALID_SS_MASK(s);
 	}
 
 	eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK |
@@ -4341,7 +4444,7 @@  static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 		     GEN9_PGCTL_SSB_EU311_ACK;
 
 	for (s = 0; s < info->sseu.max_slices; s++) {
-		if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
+		if ((reg_data->s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
 			/* skip disabled slice */
 			continue;
 
@@ -4351,11 +4454,11 @@  static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
 			unsigned int eu_cnt;
 
-			if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
+			if (!(reg_data->s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
 				/* skip disabled subslice */
 				continue;
 
-			eu_cnt = 2 * hweight32(eu_reg[2 * s + ss / 2] &
+			eu_cnt = 2 * hweight32(reg_data->eu_reg[2 * s + ss / 2] &
 					       eu_mask[ss % 2]);
 			sseu->eu_total += eu_cnt;
 			sseu->eu_per_subslice = max_t(unsigned int,
@@ -4364,20 +4467,44 @@  static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 		}
 	}
 #undef SS_MAX
+
+	i915_gem_object_unpin_map(bo);
+	i915_gem_object_put(bo);
+
+	return 0;
 }
 
-static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
-				    struct sseu_dev_info *sseu)
+static int gen9_sseu_device_status(struct drm_i915_private *dev_priv,
+				   struct sseu_dev_info *sseu)
 {
 #define SS_MAX 3
 	const struct intel_device_info *info = INTEL_INFO(dev_priv);
-	u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
+	struct drm_i915_gem_object *bo;
+	struct {
+		u32 s_reg[SS_MAX];
+		u32 eu_reg[2 * SS_MAX];
+	} reg_offsets, *reg_data;
+	u32 eu_mask[2];
 	int s, ss;
 
 	for (s = 0; s < info->sseu.max_slices; s++) {
-		s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s));
-		eu_reg[2*s] = I915_READ(GEN9_SS01_EU_PGCTL_ACK(s));
-		eu_reg[2*s + 1] = I915_READ(GEN9_SS23_EU_PGCTL_ACK(s));
+		reg_offsets.s_reg[s] =
+			i915_mmio_reg_offset(GEN9_SLICE_PGCTL_ACK(s));
+		reg_offsets.eu_reg[2*s] =
+			i915_mmio_reg_offset(GEN9_SS01_EU_PGCTL_ACK(s));
+		reg_offsets.eu_reg[2*s + 1] =
+			i915_mmio_reg_offset(GEN9_SS23_EU_PGCTL_ACK(s));
+	}
+
+	bo = gen9_read_registers(dev_priv, reg_offsets.s_reg,
+				 sizeof(reg_offsets) / sizeof(u32));
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	reg_data = i915_gem_object_pin_map(bo, I915_MAP_WB);
+	if (IS_ERR(reg_data)) {
+		i915_gem_object_put(bo);
+		return PTR_ERR(reg_data);
 	}
 
 	eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK |
@@ -4390,7 +4517,7 @@  static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 		     GEN9_PGCTL_SSB_EU311_ACK;
 
 	for (s = 0; s < info->sseu.max_slices; s++) {
-		if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
+		if ((reg_data->s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
 			/* skip disabled slice */
 			continue;
 
@@ -4404,14 +4531,14 @@  static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 			unsigned int eu_cnt;
 
 			if (IS_GEN9_LP(dev_priv)) {
-				if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
+				if (!(reg_data->s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
 					/* skip disabled subslice */
 					continue;
 
 				sseu->subslice_mask[s] |= BIT(ss);
 			}
 
-			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
+			eu_cnt = 2 * hweight32(reg_data->eu_reg[2*s + ss/2] &
 					       eu_mask[ss%2]);
 			sseu->eu_total += eu_cnt;
 			sseu->eu_per_subslice = max_t(unsigned int,
@@ -4420,6 +4547,11 @@  static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 		}
 	}
 #undef SS_MAX
+
+	i915_gem_object_unpin_map(bo);
+	i915_gem_object_put(bo);
+
+	return 0;
 }
 
 static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
@@ -4491,6 +4623,7 @@  static int i915_sseu_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct sseu_dev_info sseu;
+	int ret = 0;
 
 	if (INTEL_GEN(dev_priv) < 8)
 		return -ENODEV;
@@ -4512,16 +4645,16 @@  static int i915_sseu_status(struct seq_file *m, void *unused)
 	} else if (IS_BROADWELL(dev_priv)) {
 		broadwell_sseu_device_status(dev_priv, &sseu);
 	} else if (IS_GEN9(dev_priv)) {
-		gen9_sseu_device_status(dev_priv, &sseu);
+		ret = gen9_sseu_device_status(dev_priv, &sseu);
 	} else if (INTEL_GEN(dev_priv) >= 10) {
-		gen10_sseu_device_status(dev_priv, &sseu);
+		ret = gen10_sseu_device_status(dev_priv, &sseu);
 	}
 
 	intel_runtime_pm_put(dev_priv);
 
 	i915_print_sseu_info(m, false, &sseu);
 
-	return 0;
+	return ret;
 }
 
 static int i915_forcewake_open(struct inode *inode, struct file *file)