diff mbox series

drm/i915/gt: Manual rc6 entry upon parking

Message ID 20191126100435.2636304-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series drm/i915/gt: Manual rc6 entry upon parking | expand

Commit Message

Chris Wilson Nov. 26, 2019, 10:04 a.m. UTC
Now that we rapidly park the GT when the GPU idles, we often find
ourselves idling faster than the RC6 promotion timer. Thus if we tell
the GPU to enter RC6 manually as we park, we can do so quicker (by
around 50ms, half an EI on average) and marginally increase our
powersaving across all execlists platforms.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Andi Shyti <andi.shyti@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt_pm.c     |  2 +
 drivers/gpu/drm/i915/gt/intel_rc6.c       | 53 ++++++++++++++++-------
 drivers/gpu/drm/i915/gt/intel_rc6.h       |  3 ++
 drivers/gpu/drm/i915/gt/intel_rc6_types.h |  2 +
 4 files changed, 45 insertions(+), 15 deletions(-)

Comments

Imre Deak Nov. 26, 2019, 12:22 p.m. UTC | #1
Hi,

On Tue, Nov 26, 2019 at 10:04:35AM +0000, Chris Wilson wrote:
> Now that we rapidly park the GT when the GPU idles, we often find
> ourselves idling faster than the RC6 promotion timer. Thus if we tell
> the GPU to enter RC6 manually as we park, we can do so quicker (by
> around 50ms, half an EI on average) and marginally increase our
> powersaving across all execlists platforms.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Andi Shyti <andi.shyti@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Imre Deak <imre.deak@intel.com>

Looks ok:
Acked-by: Imre Deak <imre.deak@intel.com>

Does intel_rc6_park() work on VLV/CHV too? I can't see at least that
we'd enable RC6 on those with GEN6_RC_CTL_RC6_ENABLE.

Also what is the value written to GEN6_RC_STATE? Is it ok to use the
same value after unpark()?

> ---
>  drivers/gpu/drm/i915/gt/intel_gt_pm.c     |  2 +
>  drivers/gpu/drm/i915/gt/intel_rc6.c       | 53 ++++++++++++++++-------
>  drivers/gpu/drm/i915/gt/intel_rc6.h       |  3 ++
>  drivers/gpu/drm/i915/gt/intel_rc6_types.h |  2 +
>  4 files changed, 45 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index 9d3045096e37..2000018b6464 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -64,6 +64,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
>  	if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
>  		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
>  
> +	intel_rc6_unpark(&gt->rc6);
>  	intel_rps_unpark(&gt->rps);
>  	i915_pmu_gt_unparked(i915);
>  
> @@ -85,6 +86,7 @@ static int __gt_park(struct intel_wakeref *wf)
>  	i915_vma_parked(gt);
>  	i915_pmu_gt_parked(i915);
>  	intel_rps_park(&gt->rps);
> +	intel_rc6_park(&gt->rc6);
>  
>  	/* Everything switched off, flush any residual interrupt just in case */
>  	intel_synchronize_irq(i915);
> diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
> index 7a0bc6dde009..c4379c49c20d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rc6.c
> +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
> @@ -96,10 +96,10 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
>  	set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
>  
>  	/* 3a: Enable RC6 */
> -	set(uncore, GEN6_RC_CONTROL,
> -	    GEN6_RC_CTL_HW_ENABLE |
> -	    GEN6_RC_CTL_RC6_ENABLE |
> -	    GEN6_RC_CTL_EI_MODE(1));
> +	rc6->ctl_enable =
> +		GEN6_RC_CTL_HW_ENABLE |
> +		GEN6_RC_CTL_RC6_ENABLE |
> +		GEN6_RC_CTL_EI_MODE(1);
>  
>  	set(uncore, GEN9_PG_ENABLE,
>  	    GEN9_RENDER_PG_ENABLE |
> @@ -170,10 +170,10 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
>  	else
>  		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
>  
> -	set(uncore, GEN6_RC_CONTROL,
> -	    GEN6_RC_CTL_HW_ENABLE |
> -	    GEN6_RC_CTL_RC6_ENABLE |
> -	    rc6_mode);
> +	rc6->ctl_enable =
> +		GEN6_RC_CTL_HW_ENABLE |
> +		GEN6_RC_CTL_RC6_ENABLE |
> +		rc6_mode;
>  
>  	/*
>  	 * WaRsDisableCoarsePowerGating:skl,cnl
> @@ -200,10 +200,10 @@ static void gen8_rc6_enable(struct intel_rc6 *rc6)
>  	set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
>  
>  	/* 3: Enable RC6 */
> -	set(uncore, GEN6_RC_CONTROL,
> +	rc6->ctl_enable =
>  	    GEN6_RC_CTL_HW_ENABLE |
>  	    GEN7_RC_CTL_TO_MODE |
> -	    GEN6_RC_CTL_RC6_ENABLE);
> +	    GEN6_RC_CTL_RC6_ENABLE;
>  }
>  
>  static void gen6_rc6_enable(struct intel_rc6 *rc6)
> @@ -239,10 +239,10 @@ static void gen6_rc6_enable(struct intel_rc6 *rc6)
>  		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
>  	if (HAS_RC6pp(i915))
>  		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
> -	set(uncore, GEN6_RC_CONTROL,
> +	rc6->ctl_enable =
>  	    rc6_mask |
>  	    GEN6_RC_CTL_EI_MODE(1) |
> -	    GEN6_RC_CTL_HW_ENABLE);
> +	    GEN6_RC_CTL_HW_ENABLE;
>  
>  	rc6vids = 0;
>  	ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
> @@ -360,7 +360,7 @@ static void chv_rc6_enable(struct intel_rc6 *rc6)
>  			       VLV_RENDER_RC6_COUNT_EN));
>  
>  	/* 3: Enable RC6 */
> -	set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE);
> +	rc6->ctl_enable = GEN7_RC_CTL_TO_MODE;
>  }
>  
>  static void vlv_rc6_enable(struct intel_rc6 *rc6)
> @@ -386,8 +386,8 @@ static void vlv_rc6_enable(struct intel_rc6 *rc6)
>  			       VLV_MEDIA_RC6_COUNT_EN |
>  			       VLV_RENDER_RC6_COUNT_EN));
>  
> -	set(uncore, GEN6_RC_CONTROL,
> -	    GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
> +	rc6->ctl_enable =
> +	    GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
>  }
>  
>  static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
> @@ -634,6 +634,29 @@ void intel_rc6_enable(struct intel_rc6 *rc6)
>  	rc6->enabled = true;
>  }
>  
> +void intel_rc6_unpark(struct intel_rc6 *rc6)
> +{
> +	struct intel_uncore *uncore = rc6_to_uncore(rc6);
> +
> +	if (!rc6->enabled)
> +		return;
> +
> +	/* Restore HW timers for automatic RC6 entry while busy */
> +	set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
> +}
> +
> +void intel_rc6_park(struct intel_rc6 *rc6)
> +{
> +	struct intel_uncore *uncore = rc6_to_uncore(rc6);
> +
> +	if (!rc6->enabled)
> +		return;
> +
> +	/* Turn off the HW timers and go directly to rc6 */
> +	set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
> +	set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT);
> +}
> +
>  void intel_rc6_disable(struct intel_rc6 *rc6)
>  {
>  	if (!rc6->enabled)
> diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h
> index 1370f6834a4c..20d740ff7e55 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rc6.h
> +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h
> @@ -15,6 +15,9 @@ struct intel_rc6;
>  void intel_rc6_init(struct intel_rc6 *rc6);
>  void intel_rc6_fini(struct intel_rc6 *rc6);
>  
> +void intel_rc6_unpark(struct intel_rc6 *rc6);
> +void intel_rc6_park(struct intel_rc6 *rc6);
> +
>  void intel_rc6_sanitize(struct intel_rc6 *rc6);
>  void intel_rc6_enable(struct intel_rc6 *rc6);
>  void intel_rc6_disable(struct intel_rc6 *rc6);
> diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
> index 89ad5697a8d4..9f3d56817c97 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
> @@ -18,6 +18,8 @@ struct intel_rc6 {
>  	u64 prev_hw_residency[4];
>  	u64 cur_residency[4];
>  
> +	u32 ctl_enable;
> +
>  	struct drm_i915_gem_object *pctx;
>  
>  	bool supported : 1;
> -- 
> 2.24.0
>
Chris Wilson Nov. 26, 2019, 12:32 p.m. UTC | #2
Quoting Imre Deak (2019-11-26 12:22:48)
> Hi,
> 
> On Tue, Nov 26, 2019 at 10:04:35AM +0000, Chris Wilson wrote:
> > Now that we rapidly park the GT when the GPU idles, we often find
> > ourselves idling faster than the RC6 promotion timer. Thus if we tell
> > the GPU to enter RC6 manually as we park, we can do so quicker (by
> > around 50ms, half an EI on average) and marginally increase our
> > powersaving across all execlists platforms.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Andi Shyti <andi.shyti@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: Imre Deak <imre.deak@intel.com>
> 
> Looks ok:
> Acked-by: Imre Deak <imre.deak@intel.com>
> 
> Does intel_rc6_park() work on VLV/CHV too? I can't see at least that
> we'd enable RC6 on those with GEN6_RC_CTL_RC6_ENABLE.

I have not yet observed any ill effects, but I also haven't completed a
pm run on bsw/byt, so I can not say if it's simply ignored by the PCU.

I'll do a selftest to confirm that by disabling the HW timer and setting
those bits, we do enter rc6 (and vice versa on returning control to HW).
 
> Also what is the value written to GEN6_RC_STATE? Is it ok to use the
> same value after unpark()?

Magic :)

Aiui (based mostly on my own hypothesis and watching the HW) it acts like
a request and will be written over by the HW at the end of its EI (or
whenever exactly it decides on what mode to be in). So as we enable the
HW timers, we lose ownership of that field and control over when to
enter rc6.
-Chris
Imre Deak Nov. 26, 2019, 12:42 p.m. UTC | #3
On Tue, Nov 26, 2019 at 12:32:13PM +0000, Chris Wilson wrote:
> Quoting Imre Deak (2019-11-26 12:22:48)
> > Hi,
> > 
> > On Tue, Nov 26, 2019 at 10:04:35AM +0000, Chris Wilson wrote:
> > > Now that we rapidly park the GT when the GPU idles, we often find
> > > ourselves idling faster than the RC6 promotion timer. Thus if we tell
> > > the GPU to enter RC6 manually as we park, we can do so quicker (by
> > > around 50ms, half an EI on average) and marginally increase our
> > > powersaving across all execlists platforms.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Andi Shyti <andi.shyti@intel.com>
> > > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > > Cc: Imre Deak <imre.deak@intel.com>
> > 
> > Looks ok:
> > Acked-by: Imre Deak <imre.deak@intel.com>
> > 
> > Does intel_rc6_park() work on VLV/CHV too? I can't see at least that
> > we'd enable RC6 on those with GEN6_RC_CTL_RC6_ENABLE.
> 
> I have not yet observed any ill effects, but I also haven't completed a
> pm run on bsw/byt, so I can not say if it's simply ignored by the PCU.
> 
> I'll do a selftest to confirm that by disabling the HW timer and setting
> those bits, we do enter rc6 (and vice versa on returning control to HW).

Ok, I try to find the docs for this too.

>  
> > Also what is the value written to GEN6_RC_STATE? Is it ok to use the
> > same value after unpark()?
> 
> Magic :)
> 
> Aiui (based mostly on my own hypothesis and watching the HW) it acts like
> a request and will be written over by the HW at the end of its EI (or
> whenever exactly it decides on what mode to be in). So as we enable the
> HW timers, we lose ownership of that field and control over when to
> enter rc6.

Okay, and then I suppose the value written is needed to immediately
enter RC6.

> -Chris
Andi Shyti Nov. 26, 2019, 1:40 p.m. UTC | #4
Hi Chris,

On Tue, Nov 26, 2019 at 10:04:35AM +0000, Chris Wilson wrote:
> Now that we rapidly park the GT when the GPU idles, we often find
> ourselves idling faster than the RC6 promotion timer. Thus if we tell
> the GPU to enter RC6 manually as we park, we can do so quicker (by
> around 50ms, half an EI on average) and marginally increase our
> powersaving across all execlists platforms.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Andi Shyti <andi.shyti@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Imre Deak <imre.deak@intel.com>

looks good to me,

Reveiwed-by: Andi Shyti <andi.shyti@intel.com>

Thanks,
Andi
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 9d3045096e37..2000018b6464 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -64,6 +64,7 @@  static int __gt_unpark(struct intel_wakeref *wf)
 	if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
 		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
 
+	intel_rc6_unpark(&gt->rc6);
 	intel_rps_unpark(&gt->rps);
 	i915_pmu_gt_unparked(i915);
 
@@ -85,6 +86,7 @@  static int __gt_park(struct intel_wakeref *wf)
 	i915_vma_parked(gt);
 	i915_pmu_gt_parked(i915);
 	intel_rps_park(&gt->rps);
+	intel_rc6_park(&gt->rc6);
 
 	/* Everything switched off, flush any residual interrupt just in case */
 	intel_synchronize_irq(i915);
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 7a0bc6dde009..c4379c49c20d 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -96,10 +96,10 @@  static void gen11_rc6_enable(struct intel_rc6 *rc6)
 	set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
 
 	/* 3a: Enable RC6 */
-	set(uncore, GEN6_RC_CONTROL,
-	    GEN6_RC_CTL_HW_ENABLE |
-	    GEN6_RC_CTL_RC6_ENABLE |
-	    GEN6_RC_CTL_EI_MODE(1));
+	rc6->ctl_enable =
+		GEN6_RC_CTL_HW_ENABLE |
+		GEN6_RC_CTL_RC6_ENABLE |
+		GEN6_RC_CTL_EI_MODE(1);
 
 	set(uncore, GEN9_PG_ENABLE,
 	    GEN9_RENDER_PG_ENABLE |
@@ -170,10 +170,10 @@  static void gen9_rc6_enable(struct intel_rc6 *rc6)
 	else
 		rc6_mode = GEN6_RC_CTL_EI_MODE(1);
 
-	set(uncore, GEN6_RC_CONTROL,
-	    GEN6_RC_CTL_HW_ENABLE |
-	    GEN6_RC_CTL_RC6_ENABLE |
-	    rc6_mode);
+	rc6->ctl_enable =
+		GEN6_RC_CTL_HW_ENABLE |
+		GEN6_RC_CTL_RC6_ENABLE |
+		rc6_mode;
 
 	/*
 	 * WaRsDisableCoarsePowerGating:skl,cnl
@@ -200,10 +200,10 @@  static void gen8_rc6_enable(struct intel_rc6 *rc6)
 	set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
 
 	/* 3: Enable RC6 */
-	set(uncore, GEN6_RC_CONTROL,
+	rc6->ctl_enable =
 	    GEN6_RC_CTL_HW_ENABLE |
 	    GEN7_RC_CTL_TO_MODE |
-	    GEN6_RC_CTL_RC6_ENABLE);
+	    GEN6_RC_CTL_RC6_ENABLE;
 }
 
 static void gen6_rc6_enable(struct intel_rc6 *rc6)
@@ -239,10 +239,10 @@  static void gen6_rc6_enable(struct intel_rc6 *rc6)
 		rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
 	if (HAS_RC6pp(i915))
 		rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
-	set(uncore, GEN6_RC_CONTROL,
+	rc6->ctl_enable =
 	    rc6_mask |
 	    GEN6_RC_CTL_EI_MODE(1) |
-	    GEN6_RC_CTL_HW_ENABLE);
+	    GEN6_RC_CTL_HW_ENABLE;
 
 	rc6vids = 0;
 	ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
@@ -360,7 +360,7 @@  static void chv_rc6_enable(struct intel_rc6 *rc6)
 			       VLV_RENDER_RC6_COUNT_EN));
 
 	/* 3: Enable RC6 */
-	set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE);
+	rc6->ctl_enable = GEN7_RC_CTL_TO_MODE;
 }
 
 static void vlv_rc6_enable(struct intel_rc6 *rc6)
@@ -386,8 +386,8 @@  static void vlv_rc6_enable(struct intel_rc6 *rc6)
 			       VLV_MEDIA_RC6_COUNT_EN |
 			       VLV_RENDER_RC6_COUNT_EN));
 
-	set(uncore, GEN6_RC_CONTROL,
-	    GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
+	rc6->ctl_enable =
+	    GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
 }
 
 static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
@@ -634,6 +634,29 @@  void intel_rc6_enable(struct intel_rc6 *rc6)
 	rc6->enabled = true;
 }
 
+void intel_rc6_unpark(struct intel_rc6 *rc6)
+{
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+	if (!rc6->enabled)
+		return;
+
+	/* Restore HW timers for automatic RC6 entry while busy */
+	set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
+}
+
+void intel_rc6_park(struct intel_rc6 *rc6)
+{
+	struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+	if (!rc6->enabled)
+		return;
+
+	/* Turn off the HW timers and go directly to rc6 */
+	set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
+	set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT);
+}
+
 void intel_rc6_disable(struct intel_rc6 *rc6)
 {
 	if (!rc6->enabled)
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h
index 1370f6834a4c..20d740ff7e55 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.h
@@ -15,6 +15,9 @@  struct intel_rc6;
 void intel_rc6_init(struct intel_rc6 *rc6);
 void intel_rc6_fini(struct intel_rc6 *rc6);
 
+void intel_rc6_unpark(struct intel_rc6 *rc6);
+void intel_rc6_park(struct intel_rc6 *rc6);
+
 void intel_rc6_sanitize(struct intel_rc6 *rc6);
 void intel_rc6_enable(struct intel_rc6 *rc6);
 void intel_rc6_disable(struct intel_rc6 *rc6);
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
index 89ad5697a8d4..9f3d56817c97 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
@@ -18,6 +18,8 @@  struct intel_rc6 {
 	u64 prev_hw_residency[4];
 	u64 cur_residency[4];
 
+	u32 ctl_enable;
+
 	struct drm_i915_gem_object *pctx;
 
 	bool supported : 1;