drm/i915: Report to userspace if we have a (presumed) working GPU reset
diff mbox

Message ID 1434367428-23048-1-git-send-email-chris@chris-wilson.co.uk
State New
Headers show

Commit Message

Chris Wilson June 15, 2015, 11:23 a.m. UTC
In igt, we want to test handling of GPU hangs, both for recovery
purposes and for reporting. However, we don't want to inject a genuine
GPU hang onto a machine that cannot recover and so be permenantly
wedged. Rather than embed heuristics into igt, have the kernel report
exactly when it expects the GPU reset to work.

This can also be usefully extended in future to indicate different
levels of fine-grained resets.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Tim Gore <tim.gore@intel.com>
Cc: Tomas Elf <tomas.elf@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c     |  5 +++++
 drivers/gpu/drm/i915/i915_drv.h     |  1 +
 drivers/gpu/drm/i915/intel_uncore.c | 28 ++++++++++++++++++++++------
 include/uapi/drm/i915_drm.h         |  1 +
 4 files changed, 29 insertions(+), 6 deletions(-)

Comments

Daniel Vetter June 15, 2015, 1:45 p.m. UTC | #1
On Mon, Jun 15, 2015 at 12:23:48PM +0100, Chris Wilson wrote:
> In igt, we want to test handling of GPU hangs, both for recovery
> purposes and for reporting. However, we don't want to inject a genuine
> GPU hang onto a machine that cannot recover and so be permenantly
> wedged. Rather than embed heuristics into igt, have the kernel report
> exactly when it expects the GPU reset to work.
> 
> This can also be usefully extended in future to indicate different
> levels of fine-grained resets.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Tim Gore <tim.gore@intel.com>
> Cc: Tomas Elf <tomas.elf@intel.com>

Yeah makes sense. Will merge as soon as someone smashes a t-b with a few
igt patches using this on top.
-Daniel
> ---
>  drivers/gpu/drm/i915/i915_dma.c     |  5 +++++
>  drivers/gpu/drm/i915/i915_drv.h     |  1 +
>  drivers/gpu/drm/i915/intel_uncore.c | 28 ++++++++++++++++++++++------
>  include/uapi/drm/i915_drm.h         |  1 +
>  4 files changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 34248635c36c..88795d2f1819 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -163,6 +163,11 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  		if (!value)
>  			return -ENODEV;
>  		break;
> +	case I915_PARAM_HAS_GPU_RESET:
> +		value = i915.enable_hangcheck &&
> +			i915.reset &&
> +			intel_has_gpu_reset(dev);
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1110d492ec01..85da0dc3c0e6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2599,6 +2599,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
>  			      unsigned long arg);
>  #endif
>  extern int intel_gpu_reset(struct drm_device *dev);
> +extern bool intel_has_gpu_reset(struct drm_device *dev);
>  extern int i915_reset(struct drm_device *dev);
>  extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
>  extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 40382bff5ca0..a61de6e944d2 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1489,20 +1489,36 @@ static int gen6_do_reset(struct drm_device *dev)
>  	return ret;
>  }
>  
> -int intel_gpu_reset(struct drm_device *dev)
> +static int (*intel_get_gpu_reset(struct drm_device *dev))(struct drm_device *)
>  {
>  	if (INTEL_INFO(dev)->gen >= 6)
> -		return gen6_do_reset(dev);
> +		return gen6_do_reset;
>  	else if (IS_GEN5(dev))
> -		return ironlake_do_reset(dev);
> +		return ironlake_do_reset;
>  	else if (IS_G4X(dev))
> -		return g4x_do_reset(dev);
> +		return g4x_do_reset;
>  	else if (IS_G33(dev))
> -		return g33_do_reset(dev);
> +		return g33_do_reset;
>  	else if (INTEL_INFO(dev)->gen >= 3)
> -		return i915_do_reset(dev);
> +		return i915_do_reset;
>  	else
> +		return NULL;
> +}
> +
> +int intel_gpu_reset(struct drm_device *dev)
> +{
> +	int (*reset)(struct drm_device *);
> +
> +	reset = intel_get_gpu_reset(dev);
> +	if (reset == NULL)
>  		return -ENODEV;
> +
> +	return reset(dev);
> +}
> +
> +bool intel_has_gpu_reset(struct drm_device *dev)
> +{
> +	return intel_get_gpu_reset(dev) != NULL;
>  }
>  
>  void intel_uncore_check_errors(struct drm_device *dev)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 4c3420f932a5..312adbeb4eec 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -354,6 +354,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_REVISION              32
>  #define I915_PARAM_SUBSLICE_TOTAL	 33
>  #define I915_PARAM_EU_TOTAL		 34
> +#define I915_PARAM_HAS_GPU_RESET	 35
>  
>  typedef struct drm_i915_getparam {
>  	int param;
> -- 
> 2.1.4
>

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 34248635c36c..88795d2f1819 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -163,6 +163,11 @@  static int i915_getparam(struct drm_device *dev, void *data,
 		if (!value)
 			return -ENODEV;
 		break;
+	case I915_PARAM_HAS_GPU_RESET:
+		value = i915.enable_hangcheck &&
+			i915.reset &&
+			intel_has_gpu_reset(dev);
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1110d492ec01..85da0dc3c0e6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2599,6 +2599,7 @@  extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
 			      unsigned long arg);
 #endif
 extern int intel_gpu_reset(struct drm_device *dev);
+extern bool intel_has_gpu_reset(struct drm_device *dev);
 extern int i915_reset(struct drm_device *dev);
 extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
 extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 40382bff5ca0..a61de6e944d2 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1489,20 +1489,36 @@  static int gen6_do_reset(struct drm_device *dev)
 	return ret;
 }
 
-int intel_gpu_reset(struct drm_device *dev)
+static int (*intel_get_gpu_reset(struct drm_device *dev))(struct drm_device *)
 {
 	if (INTEL_INFO(dev)->gen >= 6)
-		return gen6_do_reset(dev);
+		return gen6_do_reset;
 	else if (IS_GEN5(dev))
-		return ironlake_do_reset(dev);
+		return ironlake_do_reset;
 	else if (IS_G4X(dev))
-		return g4x_do_reset(dev);
+		return g4x_do_reset;
 	else if (IS_G33(dev))
-		return g33_do_reset(dev);
+		return g33_do_reset;
 	else if (INTEL_INFO(dev)->gen >= 3)
-		return i915_do_reset(dev);
+		return i915_do_reset;
 	else
+		return NULL;
+}
+
+int intel_gpu_reset(struct drm_device *dev)
+{
+	int (*reset)(struct drm_device *);
+
+	reset = intel_get_gpu_reset(dev);
+	if (reset == NULL)
 		return -ENODEV;
+
+	return reset(dev);
+}
+
+bool intel_has_gpu_reset(struct drm_device *dev)
+{
+	return intel_get_gpu_reset(dev) != NULL;
 }
 
 void intel_uncore_check_errors(struct drm_device *dev)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 4c3420f932a5..312adbeb4eec 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -354,6 +354,7 @@  typedef struct drm_i915_irq_wait {
 #define I915_PARAM_REVISION              32
 #define I915_PARAM_SUBSLICE_TOTAL	 33
 #define I915_PARAM_EU_TOTAL		 34
+#define I915_PARAM_HAS_GPU_RESET	 35
 
 typedef struct drm_i915_getparam {
 	int param;