diff mbox

drm/i915: hangcheck parameter

Message ID 1308763970-2989-1-git-send-email-ben@bwidawsk.net (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky June 22, 2011, 5:32 p.m. UTC
The hangcheck is undesirable when doing shader debugging. The debugger
interacts with the EU, and these may cause the hangcheck to fire at most
unfortunate times.

This provides a way to let the user disable the hangcheck when they want
to do shader debugging.

Not Signed-off-by, for review only
I will be resubmitting this with full series is ready
---
 drivers/gpu/drm/i915/i915_dma.c |   13 +++++++++----
 drivers/gpu/drm/i915/i915_drv.c |    3 +++
 drivers/gpu/drm/i915/i915_drv.h |    1 +
 drivers/gpu/drm/i915/i915_gem.c |   10 +++++++---
 drivers/gpu/drm/i915/i915_irq.c |   16 +++++++++++-----
 5 files changed, 31 insertions(+), 12 deletions(-)

Comments

Chris Wilson June 22, 2011, 5:45 p.m. UTC | #1
On Wed, 22 Jun 2011 10:32:50 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> The hangcheck is undesirable when doing shader debugging. The debugger
> interacts with the EU, and these may cause the hangcheck to fire at most
> unfortunate times.
> 
> This provides a way to let the user disable the hangcheck when they want
> to do shader debugging.

Looks like a reasonable idea, I'm tempted to suggest to make it the period
in ms rather a boolean. But for actual use, either a new ioctl for
enable/disable or another wakelock? Just in case we have more than one
program trying to debug at the same time. (Not so far fetched, as the user
may be snooping on the results whilst debugging in another process?)
-Chris
Ben Widawsky June 22, 2011, 5:55 p.m. UTC | #2
On Wed, Jun 22, 2011 at 06:45:56PM +0100, Chris Wilson wrote:
> On Wed, 22 Jun 2011 10:32:50 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> > The hangcheck is undesirable when doing shader debugging. The debugger
> > interacts with the EU, and these may cause the hangcheck to fire at most
> > unfortunate times.
> > 
> > This provides a way to let the user disable the hangcheck when they want
> > to do shader debugging.
> 
> Looks like a reasonable idea, I'm tempted to suggest to make it the period
> in ms rather a boolean. But for actual use, either a new ioctl for
> enable/disable or another wakelock? Just in case we have more than one
> program trying to debug at the same time. (Not so far fetched, as the user
> may be snooping on the results whilst debugging in another process?)
> 
> -Chris

Ah, Chris! You reminded me what I had planned to do a month ago, but it
got lost in all the other stuff floating around in my brain.

Yes, actually I will make a debugfs file, which notifies the kernel
we're debugging from userspace, and it should disable the hangcheck
timer. This will allow us to read/write potentially valuable information
in the future, and like you said acts as a lock to prevent concurrent
debuggers.

Thanks! Very helpful response/reminder.

I was planning a debugfs entry for it. Do you see any advantages to
using an IOCTL?

Ben
Chris Wilson June 22, 2011, 6:05 p.m. UTC | #3
On Wed, 22 Jun 2011 10:55:33 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> I was planning a debugfs entry for it. Do you see any advantages to
> using an IOCTL?

The only danger is that we developing a useful api and hiding it debugfs.
We may be caught and forced to maintain such even if we do declare it as a
work-in-progress...

I'm not that worried about such just yet, we are a long way from having a
mature enough hw and gfx stack to support a ptrace-like interface for
generic GPU debugging.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0239e99..b10249a 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -43,6 +43,8 @@ 
 #include <linux/slab.h>
 #include <acpi/video.h>
 
+extern unsigned int i915_enable_hangcheck;
+
 static void i915_write_hws_pga(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -2089,9 +2091,11 @@  int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	/* Must be done after probing outputs */
 	intel_opregion_init(dev);
 	acpi_video_register();
-
-	setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed,
-		    (unsigned long) dev);
+	if (i915_enable_hangcheck) {
+		dev_priv->enable_hangcheck = true;
+		setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed,
+			    (unsigned long) dev);
+	}
 
 	spin_lock(&mchdev_lock);
 	i915_mch_dev = dev_priv;
@@ -2169,7 +2173,8 @@  int i915_driver_unload(struct drm_device *dev)
 	}
 
 	/* Free error state after interrupts are fully disabled. */
-	del_timer_sync(&dev_priv->hangcheck_timer);
+	if (dev_priv->enable_hangcheck)
+		del_timer_sync(&dev_priv->hangcheck_timer);
 	cancel_work_sync(&dev_priv->error_work);
 	i915_destroy_error_state(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0defd42..a38ce4f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -70,6 +70,9 @@  module_param_named(vbt_sdvo_panel_type, i915_vbt_sdvo_panel_type, int, 0600);
 static bool i915_try_reset = true;
 module_param_named(reset, i915_try_reset, bool, 0600);
 
+unsigned int i915_enable_hangcheck = 1;
+module_param_named(enable_hangcheck, i915_enable_hangcheck, int, 0600);
+
 static struct drm_driver driver;
 extern int intel_agp_enabled;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8a9fd91..bfced70 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -320,6 +320,7 @@  typedef struct drm_i915_private {
 #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
 	struct timer_list hangcheck_timer;
 	int hangcheck_count;
+	bool enable_hangcheck;
 	uint32_t last_acthd;
 	uint32_t last_instdone;
 	uint32_t last_instdone1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cb1f61d..71377b1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1777,8 +1777,11 @@  i915_add_request(struct intel_ring_buffer *ring,
 	ring->outstanding_lazy_request = false;
 
 	if (!dev_priv->mm.suspended) {
-		mod_timer(&dev_priv->hangcheck_timer,
-			  jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+		if (dev_priv->enable_hangcheck) {
+			mod_timer(&dev_priv->hangcheck_timer,
+				  jiffies +
+				  msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+		}
 		if (was_empty)
 			queue_delayed_work(dev_priv->wq,
 					   &dev_priv->mm.retire_work, HZ);
@@ -3813,7 +3816,8 @@  i915_gem_idle(struct drm_device *dev)
 	 * And not confound mm.suspended!
 	 */
 	dev_priv->mm.suspended = 1;
-	del_timer_sync(&dev_priv->hangcheck_timer);
+	if (dev_priv->enable_hangcheck)
+		del_timer_sync(&dev_priv->hangcheck_timer);
 
 	i915_kernel_lost_context(dev);
 	i915_gem_cleanup_ringbuffer(dev);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b9fafe3..73b7071 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -362,9 +362,12 @@  static void notify_ring(struct drm_device *dev,
 	ring->irq_seqno = seqno;
 	wake_up_all(&ring->irq_queue);
 
-	dev_priv->hangcheck_count = 0;
-	mod_timer(&dev_priv->hangcheck_timer,
-		  jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+	if (dev_priv->enable_hangcheck) {
+		dev_priv->hangcheck_count = 0;
+		mod_timer(&dev_priv->hangcheck_timer,
+			  jiffies +
+			  msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+	}
 }
 
 static void gen6_pm_rps_work(struct work_struct *work)
@@ -1722,8 +1725,11 @@  void i915_hangcheck_elapsed(unsigned long data)
 
 repeat:
 	/* Reset timer case chip hangs without another request being added */
-	mod_timer(&dev_priv->hangcheck_timer,
-		  jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+	if (dev_priv->enable_hangcheck) {
+		mod_timer(&dev_priv->hangcheck_timer,
+			  jiffies +
+			  msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
+	}
 }
 
 /* drm_dma.h hooks