drm/i915: Take runtime pm reference on hangcheck_info

Message ID	1423131392-23071-1-git-send-email-mika.kuoppala@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Mika Kuoppala <mika.kuoppala@linux.intel.com> To: intel-gfx@lists.freedesktop.org Date: Thu, 5 Feb 2015 12:16:32 +0200 Message-Id: <1423131392-23071-1-git-send-email-mika.kuoppala@intel.com> Subject: [Intel-gfx] [PATCH] drm/i915: Take runtime pm reference on hangcheck_info Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

1423131392-23071-1-git-send-email-mika.kuoppala@intel.com (mailing list archive)

State

New, archived

Headers

From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
To: intel-gfx@lists.freedesktop.org
Date: Thu,  5 Feb 2015 12:16:32 +0200
Message-Id: <1423131392-23071-1-git-send-email-mika.kuoppala@intel.com>
Subject: [Intel-gfx] [PATCH] drm/i915: Take runtime pm reference on
	hangcheck_info
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Mika Kuoppala Feb. 5, 2015, 10:16 a.m. UTC

We read the coherent current seqno and actual head from ring.
For hardware access we need to take runtime_pm reference, which brings in
locking. As this debugfs entry is for debugging hangcheck behaviour,
including locking problems, we need to be flexible on taking them.

Try to see if we get a lock and if so, get seqno and actual head
from hardware. If we don't have exclusive access, get lazy coherent
seqno and print token acthd for which the user can see that the
seqno is of different nature.

Testcase: igt/pm_rpm/debugfs-read
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88910
Tested-by: Ding Heng <hengx.ding@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

Comments

Chris Wilson Feb. 5, 2015, 10:25 a.m. UTC | #1

On Thu, Feb 05, 2015 at 12:16:32PM +0200, Mika Kuoppala wrote:
> We read the coherent current seqno and actual head from ring.
> For hardware access we need to take runtime_pm reference, which brings in
> locking. As this debugfs entry is for debugging hangcheck behaviour,
> including locking problems, we need to be flexible on taking them.
> 
> Try to see if we get a lock and if so, get seqno and actual head
> from hardware. If we don't have exclusive access, get lazy coherent
> seqno and print token acthd for which the user can see that the
> seqno is of different nature.
> 
> Testcase: igt/pm_rpm/debugfs-read
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88910
> Tested-by: Ding Heng <hengx.ding@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>

It also has the nice effect of collating all the hw access into a single
block which makes it easier to spot the hw details vs bookkeeping. It
probably suggests we should reorder the output similarly?

Anyway,
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris

Daniel Vetter Feb. 5, 2015, 2:29 p.m. UTC | #2

On Thu, Feb 05, 2015 at 12:16:32PM +0200, Mika Kuoppala wrote:
> We read the coherent current seqno and actual head from ring.
> For hardware access we need to take runtime_pm reference, which brings in
> locking. As this debugfs entry is for debugging hangcheck behaviour,
> including locking problems, we need to be flexible on taking them.
> 
> Try to see if we get a lock and if so, get seqno and actual head
> from hardware. If we don't have exclusive access, get lazy coherent
> seqno and print token acthd for which the user can see that the
> seqno is of different nature.
> 
> Testcase: igt/pm_rpm/debugfs-read
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88910
> Tested-by: Ding Heng <hengx.ding@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c | 26 +++++++++++++++++++++++---
>  1 file changed, 23 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 9af17fb..5a6b0e2 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1223,8 +1223,11 @@ out:
>  static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  {
>  	struct drm_info_node *node = m->private;
> -	struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
> +	struct drm_device *dev = node->minor->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *ring;
> +	u64 acthd[I915_NUM_RINGS];
> +	u32 seqno[I915_NUM_RINGS];
>  	int i;
>  
>  	if (!i915.enable_hangcheck) {
> @@ -1232,6 +1235,23 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  		return 0;
>  	}
>  
> +	if (mutex_trylock(&dev->struct_mutex)) {

Why do we need dev->struct_mutex here?
-Daniel

> +		intel_runtime_pm_get(dev_priv);
> +
> +		for_each_ring(ring, dev_priv, i) {
> +			seqno[i] = ring->get_seqno(ring, false);
> +			acthd[i] = intel_ring_get_active_head(ring);
> +		}
> +
> +		intel_runtime_pm_put(dev_priv);
> +		mutex_unlock(&dev->struct_mutex);
> +	} else {
> +		for_each_ring(ring, dev_priv, i) {
> +			seqno[i] = ring->get_seqno(ring, true);
> +			acthd[i] = -1;
> +		}
> +	}
> +
>  	if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) {
>  		seq_printf(m, "Hangcheck active, fires in %dms\n",
>  			   jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
> @@ -1242,12 +1262,12 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  	for_each_ring(ring, dev_priv, i) {
>  		seq_printf(m, "%s:\n", ring->name);
>  		seq_printf(m, "\tseqno = %x [current %x]\n",
> -			   ring->hangcheck.seqno, ring->get_seqno(ring, false));
> +			   ring->hangcheck.seqno, seqno[i]);
>  		seq_printf(m, "\taction = %d\n", ring->hangcheck.action);
>  		seq_printf(m, "\tscore = %d\n", ring->hangcheck.score);
>  		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
>  			   (long long)ring->hangcheck.acthd,
> -			   (long long)intel_ring_get_active_head(ring));
> +			   (long long)acthd[i]);
>  		seq_printf(m, "\tmax ACTHD = 0x%08llx\n",
>  			   (long long)ring->hangcheck.max_acthd);
>  	}
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Shuang He Feb. 5, 2015, 3:33 p.m. UTC | #3

Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 5715
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
PNV                 -6              282/283              276/283
ILK              +1                 316/319              317/319
SNB                                  322/346              322/346
IVB                 -2              382/384              380/384
BYT                                  296/296              296/296
HSW              +1-2              425/428              424/428
BDW                                  318/333              318/333
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
 PNV  igt_gem_userptr_blits_coherency-sync      CRASH(1, M7)PASS(1, M23)      CRASH(1, M7)
 PNV  igt_gem_userptr_blits_coherency-unsync      CRASH(1, M7)PASS(1, M23)      CRASH(1, M7)
 PNV  igt_gen3_render_linear_blits      FAIL(1, M7)CRASH(1, M23)PASS(3, M25M23)      FAIL(1, M7)
 PNV  igt_gen3_render_mixed_blits      FAIL(1, M7)PASS(1, M23)      FAIL(1, M7)
 PNV  igt_gen3_render_tiledx_blits      FAIL(1, M7)PASS(1, M23)      FAIL(1, M7)
 PNV  igt_gen3_render_tiledy_blits      FAIL(1, M7)PASS(1, M23)      FAIL(1, M7)
*PNV  igt_gem_fence_thrash_bo-write-verify-threaded-none      PASS(2, M23M7)      CRASH(1, M7)
*ILK  igt_drv_suspend_forcewake      DMESG_WARN(2, M26)      PASS(1, M26)
*IVB  igt_gem_pwrite_pread_display-copy-performance      PASS(2, M21M4)      DMESG_WARN(1, M4)
 IVB  igt_gem_storedw_batches_loop_secure-dispatch      DMESG_WARN(1, M34)PASS(4, M21M34M4)      DMESG_WARN(1, M4)
 HSW  igt_gem_storedw_loop_blt      DMESG_WARN(1, M20)PASS(1, M20)      DMESG_WARN(1, M20)
 HSW  igt_gem_storedw_loop_vebox      DMESG_WARN(1, M20)PASS(1, M20)      DMESG_WARN(1, M20)
*HSW  igt_pm_rpm_debugfs-read      DMESG_WARN(2, M20)      PASS(1, M20)
Note: You need to pay more attention to line start with '*'

Mika Kuoppala Feb. 5, 2015, 3:54 p.m. UTC | #4

Daniel Vetter <daniel@ffwll.ch> writes:

> On Thu, Feb 05, 2015 at 12:16:32PM +0200, Mika Kuoppala wrote:
>> We read the coherent current seqno and actual head from ring.
>> For hardware access we need to take runtime_pm reference, which brings in
>> locking. As this debugfs entry is for debugging hangcheck behaviour,
>> including locking problems, we need to be flexible on taking them.
>> 
>> Try to see if we get a lock and if so, get seqno and actual head
>> from hardware. If we don't have exclusive access, get lazy coherent
>> seqno and print token acthd for which the user can see that the
>> seqno is of different nature.
>> 
>> Testcase: igt/pm_rpm/debugfs-read
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88910
>> Tested-by: Ding Heng <hengx.ding@intel.com>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_debugfs.c | 26 +++++++++++++++++++++++---
>>  1 file changed, 23 insertions(+), 3 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 9af17fb..5a6b0e2 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -1223,8 +1223,11 @@ out:
>>  static int i915_hangcheck_info(struct seq_file *m, void *unused)
>>  {
>>  	struct drm_info_node *node = m->private;
>> -	struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
>> +	struct drm_device *dev = node->minor->dev;
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>>  	struct intel_engine_cs *ring;
>> +	u64 acthd[I915_NUM_RINGS];
>> +	u32 seqno[I915_NUM_RINGS];
>>  	int i;
>>  
>>  	if (!i915.enable_hangcheck) {
>> @@ -1232,6 +1235,23 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>>  		return 0;
>>  	}
>>  
>> +	if (mutex_trylock(&dev->struct_mutex)) {
>
> Why do we need dev->struct_mutex here?

Apparently intel_runtime_pm_get is fine without it. Cargoculted
from the neighbouring debugfs entry.

Only thing I can think of is that it reveals if we had
exclusive access depending of achtd token value printed. But is
that information then of any use I dont know.

I'll respin

-Mika

> -Daniel
>
>> +		intel_runtime_pm_get(dev_priv);
>> +
>> +		for_each_ring(ring, dev_priv, i) {
>> +			seqno[i] = ring->get_seqno(ring, false);
>> +			acthd[i] = intel_ring_get_active_head(ring);
>> +		}
>> +
>> +		intel_runtime_pm_put(dev_priv);
>> +		mutex_unlock(&dev->struct_mutex);
>> +	} else {
>> +		for_each_ring(ring, dev_priv, i) {
>> +			seqno[i] = ring->get_seqno(ring, true);
>> +			acthd[i] = -1;
>> +		}
>> +	}
>> +
>>  	if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) {
>>  		seq_printf(m, "Hangcheck active, fires in %dms\n",
>>  			   jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
>> @@ -1242,12 +1262,12 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>>  	for_each_ring(ring, dev_priv, i) {
>>  		seq_printf(m, "%s:\n", ring->name);
>>  		seq_printf(m, "\tseqno = %x [current %x]\n",
>> -			   ring->hangcheck.seqno, ring->get_seqno(ring, false));
>> +			   ring->hangcheck.seqno, seqno[i]);
>>  		seq_printf(m, "\taction = %d\n", ring->hangcheck.action);
>>  		seq_printf(m, "\tscore = %d\n", ring->hangcheck.score);
>>  		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
>>  			   (long long)ring->hangcheck.acthd,
>> -			   (long long)intel_ring_get_active_head(ring));
>> +			   (long long)acthd[i]);
>>  		seq_printf(m, "\tmax ACTHD = 0x%08llx\n",
>>  			   (long long)ring->hangcheck.max_acthd);
>>  	}
>> -- 
>> 1.9.1
>> 
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9af17fb..5a6b0e2 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1223,8 +1223,11 @@  out:
 static int i915_hangcheck_info(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = m->private;
-	struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
+	u64 acthd[I915_NUM_RINGS];
+	u32 seqno[I915_NUM_RINGS];
 	int i;
 
 	if (!i915.enable_hangcheck) {
@@ -1232,6 +1235,23 @@  static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		return 0;
 	}
 
+	if (mutex_trylock(&dev->struct_mutex)) {
+		intel_runtime_pm_get(dev_priv);
+
+		for_each_ring(ring, dev_priv, i) {
+			seqno[i] = ring->get_seqno(ring, false);
+			acthd[i] = intel_ring_get_active_head(ring);
+		}
+
+		intel_runtime_pm_put(dev_priv);
+		mutex_unlock(&dev->struct_mutex);
+	} else {
+		for_each_ring(ring, dev_priv, i) {
+			seqno[i] = ring->get_seqno(ring, true);
+			acthd[i] = -1;
+		}
+	}
+
 	if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) {
 		seq_printf(m, "Hangcheck active, fires in %dms\n",
 			   jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
@@ -1242,12 +1262,12 @@  static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	for_each_ring(ring, dev_priv, i) {
 		seq_printf(m, "%s:\n", ring->name);
 		seq_printf(m, "\tseqno = %x [current %x]\n",
-			   ring->hangcheck.seqno, ring->get_seqno(ring, false));
+			   ring->hangcheck.seqno, seqno[i]);
 		seq_printf(m, "\taction = %d\n", ring->hangcheck.action);
 		seq_printf(m, "\tscore = %d\n", ring->hangcheck.score);
 		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
 			   (long long)ring->hangcheck.acthd,
-			   (long long)intel_ring_get_active_head(ring));
+			   (long long)acthd[i]);
 		seq_printf(m, "\tmax ACTHD = 0x%08llx\n",
 			   (long long)ring->hangcheck.max_acthd);
 	}