diff mbox series

[2/2] drm/i915/gt: Warn if not in RC6 when GT is parked

Message ID da81208882ba7fb24a8051c5edfbf6a735127f88.1665458640.git.ashutosh.dixit@intel.com (mailing list archive)
State New, archived
Headers show
Series Firm up gt park/unpark | expand

Commit Message

Dixit, Ashutosh Oct. 11, 2022, 3:29 a.m. UTC
Some i915 modules implicitly assume that there is no user, kernel or
firmware activity after GT is parked. For example, PMU calculations are
incorrect if GT is not in RC6 when GT is parked (outside of the GT
wakeref). Therefore check and warn if GT is not in RC6 at the time of
parking the GT.

Bug: https://gitlab.freedesktop.org/drm/intel/-/issues/7025
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt_pm.c |  6 ++++++
 drivers/gpu/drm/i915/gt/intel_rc6.c   | 15 +++++++++++++++
 drivers/gpu/drm/i915/gt/intel_rc6.h   |  1 +
 3 files changed, 22 insertions(+)

Comments

Dixit, Ashutosh Oct. 11, 2022, 5:53 a.m. UTC | #1
On Mon, 10 Oct 2022 20:29:23 -0700, Ashutosh Dixit wrote:
>
> Some i915 modules implicitly assume that there is no user, kernel or
> firmware activity after GT is parked. For example, PMU calculations are
> incorrect if GT is not in RC6 when GT is parked (outside of the GT
> wakeref). Therefore check and warn if GT is not in RC6 at the time of
> parking the GT.

This patch has cause widespread dmesg_warn's in premerge CI so there is no
intention of merging this. It just proves that these assumptions in PMU
(for quantities such as frequency and RC6 residency) are incorrect and need
to be addressed.

Thanks.
--
Ashutosh


> @@ -123,6 +125,10 @@ static int __gt_park(struct intel_wakeref *wf)
>		intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
>	}
>
> +	ret = intel_rc6_in_rc6(&gt->rc6, &in_rc6);
> +	if (!ret && !in_rc6)
> +		drm_warn_once(&i915->drm, "Parking, but GT is not in RC6!\n");
> +
>	return 0;
>  }
Tvrtko Ursulin Oct. 11, 2022, 8:35 a.m. UTC | #2
On 11/10/2022 06:53, Dixit, Ashutosh wrote:
> On Mon, 10 Oct 2022 20:29:23 -0700, Ashutosh Dixit wrote:
>>
>> Some i915 modules implicitly assume that there is no user, kernel or
>> firmware activity after GT is parked. For example, PMU calculations are
>> incorrect if GT is not in RC6 when GT is parked (outside of the GT
>> wakeref). Therefore check and warn if GT is not in RC6 at the time of
>> parking the GT.
> 
> This patch has cause widespread dmesg_warn's in premerge CI so there is no
> intention of merging this. It just proves that these assumptions in PMU
> (for quantities such as frequency and RC6 residency) are incorrect and need
> to be addressed.

Yeah this probably does not make sense since I am not sure if driver 
actually controls when hardware goes into RC6.

Regards,

Tvrtko

> 
> Thanks.
> --
> Ashutosh
> 
> 
>> @@ -123,6 +125,10 @@ static int __gt_park(struct intel_wakeref *wf)
>> 		intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
>> 	}
>>
>> +	ret = intel_rc6_in_rc6(&gt->rc6, &in_rc6);
>> +	if (!ret && !in_rc6)
>> +		drm_warn_once(&i915->drm, "Parking, but GT is not in RC6!\n");
>> +
>> 	return 0;
>>   }
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 26aa2e979a148..5da81e22d0980 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -102,6 +102,8 @@  static int __gt_park(struct intel_wakeref *wf)
 	struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
 	intel_wakeref_t wakeref = fetch_and_zero(&gt->awake);
 	struct drm_i915_private *i915 = gt->i915;
+	bool in_rc6;
+	int ret;
 
 	GT_TRACE(gt, "\n");
 
@@ -123,6 +125,10 @@  static int __gt_park(struct intel_wakeref *wf)
 		intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
 	}
 
+	ret = intel_rc6_in_rc6(&gt->rc6, &in_rc6);
+	if (!ret && !in_rc6)
+		drm_warn_once(&i915->drm, "Parking, but GT is not in RC6!\n");
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index f8d0523f4c18e..4517988e704a4 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -666,6 +666,21 @@  void intel_rc6_park(struct intel_rc6 *rc6)
 	set(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
 }
 
+int intel_rc6_in_rc6(struct intel_rc6 *rc6, bool *in_rc6)
+{
+	u32 gt_core_status;
+
+	/* GEN6_GT_CORE_STATUS exists only for Gen11+ */
+	if (GRAPHICS_VER(rc6_to_i915(rc6)) < 11)
+		return -ENODEV;
+
+	gt_core_status = intel_uncore_read_fw(rc6_to_uncore(rc6), GEN6_GT_CORE_STATUS)
+				& GEN6_RCn_MASK;
+	*in_rc6 = gt_core_status == GEN6_RC6 || gt_core_status == GEN6_RC7;
+
+	return 0;
+}
+
 void intel_rc6_disable(struct intel_rc6 *rc6)
 {
 	if (!rc6->enabled)
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h
index b6fea71afc223..30da10399028d 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.h
@@ -16,6 +16,7 @@  void intel_rc6_fini(struct intel_rc6 *rc6);
 
 void intel_rc6_unpark(struct intel_rc6 *rc6);
 void intel_rc6_park(struct intel_rc6 *rc6);
+int intel_rc6_in_rc6(struct intel_rc6 *rc6, bool *in_rc6);
 
 void intel_rc6_sanitize(struct intel_rc6 *rc6);
 void intel_rc6_enable(struct intel_rc6 *rc6);