diff mbox

RFC drm/i915: Stop the machine whilst capturing the GPU crash dump

Message ID 1444393305-20977-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Oct. 9, 2015, 12:21 p.m. UTC
The error state is purposefully racy as we expect it to be called at any
time and so have avoided any locking whilst capturing the crash dump.
However, with multi-engine GPUs and multiple CPUs, those races can
manifest into OOPSes as we attempt to chase dangling pointers freed on
other CPUs. Under discussion are lots of ways to slow down normal
operation in order to protect the post-mortem error capture, but what it
we take the opposite approach and freeze the machine whilst the error
catpure runs (note the GPU may still running, but as long as we don't
process any of the results the driver's bookkeeping will be static).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig          |  1 +
 drivers/gpu/drm/i915/i915_drv.h       |  2 ++
 drivers/gpu/drm/i915/i915_gpu_error.c | 31 +++++++++++++++++++++----------
 3 files changed, 24 insertions(+), 10 deletions(-)

Comments

Daniel Vetter Oct. 9, 2015, 5:33 p.m. UTC | #1
On Fri, Oct 09, 2015 at 01:21:45PM +0100, Chris Wilson wrote:
> The error state is purposefully racy as we expect it to be called at any
> time and so have avoided any locking whilst capturing the crash dump.
> However, with multi-engine GPUs and multiple CPUs, those races can
> manifest into OOPSes as we attempt to chase dangling pointers freed on
> other CPUs. Under discussion are lots of ways to slow down normal
> operation in order to protect the post-mortem error capture, but what it
> we take the opposite approach and freeze the machine whilst the error
> catpure runs (note the GPU may still running, but as long as we don't
> process any of the results the driver's bookkeeping will be static).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

One risk I see is that the list walking might still go off the rails when
we stop the machine right in the middle of a list_move. With that we might
start scanning the active list (of objects) on one engine and then midway
through get to another engine and so never see the list_head again,
looping forever. No idea yet what to do with that.

Intriguing approach nevertheless.
-Daniel

> ---
>  drivers/gpu/drm/i915/Kconfig          |  1 +
>  drivers/gpu/drm/i915/i915_drv.h       |  2 ++
>  drivers/gpu/drm/i915/i915_gpu_error.c | 31 +++++++++++++++++++++----------
>  3 files changed, 24 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 99e819767204..63df28910c8f 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -7,6 +7,7 @@ config DRM_I915
>  	select AGP_INTEL if AGP
>  	select INTERVAL_TREE
>  	select ZLIB_DEFLATE
> +	select STOP_MACHINE
>  	# we need shmfs for the swappable backing store, and in particular
>  	# the shmem_readpage() which depends upon tmpfs
>  	select SHMEM
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9d16fc1189d6..14a882fe486c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -488,6 +488,8 @@ struct drm_i915_error_state {
>  	struct kref ref;
>  	struct timeval time;
>  
> +	struct drm_i915_private *i915;
> +
>  	char error_msg[128];
>  	int iommu;
>  	u32 reset_count;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 64bdffcffb50..29cbec67bcfc 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -29,6 +29,7 @@
>  
>  #include <generated/utsrelease.h>
>  #include <linux/zlib.h>
> +#include <linux/stop_machine.h>
>  #include "i915_drv.h"
>  
>  static const char *yesno(int v)
> @@ -1352,6 +1353,24 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
>  	error->suspend_count = dev_priv->suspend_count;
>  }
>  
> +static int capture(void *data)
> +{
> +	struct drm_i915_error_state *error = data;
> +
> +	i915_capture_gen_state(error->i915, error);
> +	i915_capture_reg_state(error->i915, error);
> +	i915_gem_capture_buffers(error->i915, error);
> +	i915_gem_record_fences(error->i915->dev, error);
> +	i915_gem_record_rings(error->i915->dev, error);
> +
> +	do_gettimeofday(&error->time);
> +
> +	error->overlay = intel_overlay_capture_error_state(error->i915->dev);
> +	error->display = intel_display_capture_error_state(error->i915->dev);
> +
> +	return 0;
> +}
> +
>  /**
>   * i915_capture_error_state - capture an error record for later analysis
>   * @dev: drm device
> @@ -1377,17 +1396,9 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged,
>  	}
>  
>  	kref_init(&error->ref);
> +	error->i915 = dev_priv;
>  
> -	i915_capture_gen_state(dev_priv, error);
> -	i915_capture_reg_state(dev_priv, error);
> -	i915_gem_capture_buffers(dev_priv, error);
> -	i915_gem_record_fences(dev, error);
> -	i915_gem_record_rings(dev, error);
> -
> -	do_gettimeofday(&error->time);
> -
> -	error->overlay = intel_overlay_capture_error_state(dev);
> -	error->display = intel_display_capture_error_state(dev);
> +	stop_machine(capture, error, NULL);
>  
>  	i915_error_capture_msg(dev, error, wedged, error_msg);
>  	DRM_INFO("%s\n", error->error_msg);
> -- 
> 2.6.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Oct. 9, 2015, 5:55 p.m. UTC | #2
On Fri, Oct 09, 2015 at 07:33:23PM +0200, Daniel Vetter wrote:
> On Fri, Oct 09, 2015 at 01:21:45PM +0100, Chris Wilson wrote:
> > The error state is purposefully racy as we expect it to be called at any
> > time and so have avoided any locking whilst capturing the crash dump.
> > However, with multi-engine GPUs and multiple CPUs, those races can
> > manifest into OOPSes as we attempt to chase dangling pointers freed on
> > other CPUs. Under discussion are lots of ways to slow down normal
> > operation in order to protect the post-mortem error capture, but what it
> > we take the opposite approach and freeze the machine whilst the error
> > catpure runs (note the GPU may still running, but as long as we don't
> > process any of the results the driver's bookkeeping will be static).
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> One risk I see is that the list walking might still go off the rails when
> we stop the machine right in the middle of a list_move. With that we might
> start scanning the active list (of objects) on one engine and then midway
> through get to another engine and so never see the list_head again,
> looping forever. No idea yet what to do with that.

A move is a del followed by an insertion, you cannot step into an entry
that is the middle of moving lists - don't forget that stop_machine() is
a very heavy memory barrier. Similarly, the list_add() should ensure we
can't step forward into an element that will not lead back to the list.
So I am not convinced that it would be suspectible to that style of
hijacking.

The only alternative I see to list walking, is not to do any from the
error capture and rely on attaching enough information to the request
(along with register state) to be able to do postmortems.
-Chris
Daniel Vetter Oct. 13, 2015, 12:09 p.m. UTC | #3
On Fri, Oct 09, 2015 at 06:55:23PM +0100, Chris Wilson wrote:
> On Fri, Oct 09, 2015 at 07:33:23PM +0200, Daniel Vetter wrote:
> > On Fri, Oct 09, 2015 at 01:21:45PM +0100, Chris Wilson wrote:
> > > The error state is purposefully racy as we expect it to be called at any
> > > time and so have avoided any locking whilst capturing the crash dump.
> > > However, with multi-engine GPUs and multiple CPUs, those races can
> > > manifest into OOPSes as we attempt to chase dangling pointers freed on
> > > other CPUs. Under discussion are lots of ways to slow down normal
> > > operation in order to protect the post-mortem error capture, but what it
> > > we take the opposite approach and freeze the machine whilst the error
> > > catpure runs (note the GPU may still running, but as long as we don't
> > > process any of the results the driver's bookkeeping will be static).
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > One risk I see is that the list walking might still go off the rails when
> > we stop the machine right in the middle of a list_move. With that we might
> > start scanning the active list (of objects) on one engine and then midway
> > through get to another engine and so never see the list_head again,
> > looping forever. No idea yet what to do with that.
> 
> A move is a del followed by an insertion, you cannot step into an entry
> that is the middle of moving lists - don't forget that stop_machine() is
> a very heavy memory barrier. Similarly, the list_add() should ensure we
> can't step forward into an element that will not lead back to the list.
> So I am not convinced that it would be suspectible to that style of
> hijacking.

The compiler could do havoc, so I think we need at least somewhat ordered
lists updates. Using rcu lists primitives but stop_machine instead of
kfree_rcu might do the trick.

> The only alternative I see to list walking, is not to do any from the
> error capture and rely on attaching enough information to the request
> (along with register state) to be able to do postmortems.

That still means we need to at least protect the request lists to get at
said request. And it sounded like you wouldn't like a kfree_rcu in there
that much.
-Daniel
Chris Wilson Oct. 13, 2015, 12:24 p.m. UTC | #4
On Tue, Oct 13, 2015 at 02:09:59PM +0200, Daniel Vetter wrote:
> On Fri, Oct 09, 2015 at 06:55:23PM +0100, Chris Wilson wrote:
> > On Fri, Oct 09, 2015 at 07:33:23PM +0200, Daniel Vetter wrote:
> > > On Fri, Oct 09, 2015 at 01:21:45PM +0100, Chris Wilson wrote:
> > > > The error state is purposefully racy as we expect it to be called at any
> > > > time and so have avoided any locking whilst capturing the crash dump.
> > > > However, with multi-engine GPUs and multiple CPUs, those races can
> > > > manifest into OOPSes as we attempt to chase dangling pointers freed on
> > > > other CPUs. Under discussion are lots of ways to slow down normal
> > > > operation in order to protect the post-mortem error capture, but what it
> > > > we take the opposite approach and freeze the machine whilst the error
> > > > catpure runs (note the GPU may still running, but as long as we don't
> > > > process any of the results the driver's bookkeeping will be static).
> > > > 
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > 
> > > One risk I see is that the list walking might still go off the rails when
> > > we stop the machine right in the middle of a list_move. With that we might
> > > start scanning the active list (of objects) on one engine and then midway
> > > through get to another engine and so never see the list_head again,
> > > looping forever. No idea yet what to do with that.
> > 
> > A move is a del followed by an insertion, you cannot step into an entry
> > that is the middle of moving lists - don't forget that stop_machine() is
> > a very heavy memory barrier. Similarly, the list_add() should ensure we
> > can't step forward into an element that will not lead back to the list.
> > So I am not convinced that it would be suspectible to that style of
> > hijacking.
> 
> The compiler could do havoc, so I think we need at least somewhat ordered
> lists updates. Using rcu lists primitives but stop_machine instead of
> kfree_rcu might do the trick.

I'd take the compiler barriers, but I don't want the mb() inside every
list update. And with a barrier, only walking the lists forwards in the
error capture, and the error capture being inside a stop_machine (so
mb() and no concurrent access) is safe. (Quite a list of brittle
caveats.)
 
> > The only alternative I see to list walking, is not to do any from the
> > error capture and rely on attaching enough information to the request
> > (along with register state) to be able to do postmortems.
> 
> That still means we need to at least protect the request lists to get at
> said request. And it sounded like you wouldn't like a kfree_rcu in there
> that much.

The burden has to be on the error capture side as having to do any atomic
operations whilst processing the requests quickly show up in the
profiles (at the moment here those profiles are dominated by the memory
access required to update the lists, where once those accesses were
dwarfed by the locked operations.) so I don't even relish the prospect
of adding atomic operations around list walking in the normal case.
-Chris
Daniel Vetter Oct. 13, 2015, 1:52 p.m. UTC | #5
On Tue, Oct 13, 2015 at 01:24:53PM +0100, Chris Wilson wrote:
> On Tue, Oct 13, 2015 at 02:09:59PM +0200, Daniel Vetter wrote:
> > On Fri, Oct 09, 2015 at 06:55:23PM +0100, Chris Wilson wrote:
> > > On Fri, Oct 09, 2015 at 07:33:23PM +0200, Daniel Vetter wrote:
> > > > On Fri, Oct 09, 2015 at 01:21:45PM +0100, Chris Wilson wrote:
> > > > > The error state is purposefully racy as we expect it to be called at any
> > > > > time and so have avoided any locking whilst capturing the crash dump.
> > > > > However, with multi-engine GPUs and multiple CPUs, those races can
> > > > > manifest into OOPSes as we attempt to chase dangling pointers freed on
> > > > > other CPUs. Under discussion are lots of ways to slow down normal
> > > > > operation in order to protect the post-mortem error capture, but what it
> > > > > we take the opposite approach and freeze the machine whilst the error
> > > > > catpure runs (note the GPU may still running, but as long as we don't
> > > > > process any of the results the driver's bookkeeping will be static).
> > > > > 
> > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > 
> > > > One risk I see is that the list walking might still go off the rails when
> > > > we stop the machine right in the middle of a list_move. With that we might
> > > > start scanning the active list (of objects) on one engine and then midway
> > > > through get to another engine and so never see the list_head again,
> > > > looping forever. No idea yet what to do with that.
> > > 
> > > A move is a del followed by an insertion, you cannot step into an entry
> > > that is the middle of moving lists - don't forget that stop_machine() is
> > > a very heavy memory barrier. Similarly, the list_add() should ensure we
> > > can't step forward into an element that will not lead back to the list.
> > > So I am not convinced that it would be suspectible to that style of
> > > hijacking.
> > 
> > The compiler could do havoc, so I think we need at least somewhat ordered
> > lists updates. Using rcu lists primitives but stop_machine instead of
> > kfree_rcu might do the trick.
> 
> I'd take the compiler barriers, but I don't want the mb() inside every
> list update. And with a barrier, only walking the lists forwards in the
> error capture, and the error capture being inside a stop_machine (so
> mb() and no concurrent access) is safe. (Quite a list of brittle
> caveats.)

Yeah, hence using _rcu list macros. They have the relevant barriers
already and should work. The only difference is that instead of
synchronize_rcu on the write side before kfree, we'll use stop_machine on
the read side. It's still RCU, but with all the cost moved to the read
side while still keeping the benefit that the read side can be done
locklessly.

> > > The only alternative I see to list walking, is not to do any from the
> > > error capture and rely on attaching enough information to the request
> > > (along with register state) to be able to do postmortems.
> > 
> > That still means we need to at least protect the request lists to get at
> > said request. And it sounded like you wouldn't like a kfree_rcu in there
> > that much.
> 
> The burden has to be on the error capture side as having to do any atomic
> operations whilst processing the requests quickly show up in the
> profiles (at the moment here those profiles are dominated by the memory
> access required to update the lists, where once those accesses were
> dwarfed by the locked operations.) so I don't even relish the prospect
> of adding atomic operations around list walking in the normal case.

Yeah, spin_lock_irq would be the horror, and that's the only other solid
plan we have really. One caveat of stop_machine is that we can only use it
in the error capture, not in the hangcheck itself. But at most we'd need
to rcu requests properly, and using a engine-local buffer (to avoid the
risk of jumping off the rails onto another list) for that would fully
mitigate any rcu costs for freeing. But I didn't check the code whether we
even need that ;-)
-Daniel
Chris Wilson Oct. 13, 2015, 1:52 p.m. UTC | #6
On Tue, Oct 13, 2015 at 03:52:08PM +0200, Daniel Vetter wrote:
> On Tue, Oct 13, 2015 at 01:24:53PM +0100, Chris Wilson wrote:
> > On Tue, Oct 13, 2015 at 02:09:59PM +0200, Daniel Vetter wrote:
> > > On Fri, Oct 09, 2015 at 06:55:23PM +0100, Chris Wilson wrote:
> > > > On Fri, Oct 09, 2015 at 07:33:23PM +0200, Daniel Vetter wrote:
> > > > > On Fri, Oct 09, 2015 at 01:21:45PM +0100, Chris Wilson wrote:
> > > > > > The error state is purposefully racy as we expect it to be called at any
> > > > > > time and so have avoided any locking whilst capturing the crash dump.
> > > > > > However, with multi-engine GPUs and multiple CPUs, those races can
> > > > > > manifest into OOPSes as we attempt to chase dangling pointers freed on
> > > > > > other CPUs. Under discussion are lots of ways to slow down normal
> > > > > > operation in order to protect the post-mortem error capture, but what it
> > > > > > we take the opposite approach and freeze the machine whilst the error
> > > > > > catpure runs (note the GPU may still running, but as long as we don't
> > > > > > process any of the results the driver's bookkeeping will be static).
> > > > > > 
> > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > 
> > > > > One risk I see is that the list walking might still go off the rails when
> > > > > we stop the machine right in the middle of a list_move. With that we might
> > > > > start scanning the active list (of objects) on one engine and then midway
> > > > > through get to another engine and so never see the list_head again,
> > > > > looping forever. No idea yet what to do with that.
> > > > 
> > > > A move is a del followed by an insertion, you cannot step into an entry
> > > > that is the middle of moving lists - don't forget that stop_machine() is
> > > > a very heavy memory barrier. Similarly, the list_add() should ensure we
> > > > can't step forward into an element that will not lead back to the list.
> > > > So I am not convinced that it would be suspectible to that style of
> > > > hijacking.
> > > 
> > > The compiler could do havoc, so I think we need at least somewhat ordered
> > > lists updates. Using rcu lists primitives but stop_machine instead of
> > > kfree_rcu might do the trick.
> > 
> > I'd take the compiler barriers, but I don't want the mb() inside every
> > list update. And with a barrier, only walking the lists forwards in the
> > error capture, and the error capture being inside a stop_machine (so
> > mb() and no concurrent access) is safe. (Quite a list of brittle
> > caveats.)
> 
> Yeah, hence using _rcu list macros. They have the relevant barriers
> already and should work. The only difference is that instead of
> synchronize_rcu on the write side before kfree, we'll use stop_machine on
> the read side. It's still RCU, but with all the cost moved to the read
> side while still keeping the benefit that the read side can be done
> locklessly.

They imply a mb() on every write not just a barrier(), and we do a fair few
list updates on each buffer.

> > > > The only alternative I see to list walking, is not to do any from the
> > > > error capture and rely on attaching enough information to the request
> > > > (along with register state) to be able to do postmortems.
> > > 
> > > That still means we need to at least protect the request lists to get at
> > > said request. And it sounded like you wouldn't like a kfree_rcu in there
> > > that much.
> > 
> > The burden has to be on the error capture side as having to do any atomic
> > operations whilst processing the requests quickly show up in the
> > profiles (at the moment here those profiles are dominated by the memory
> > access required to update the lists, where once those accesses were
> > dwarfed by the locked operations.) so I don't even relish the prospect
> > of adding atomic operations around list walking in the normal case.
> 
> Yeah, spin_lock_irq would be the horror, and that's the only other solid
> plan we have really. One caveat of stop_machine is that we can only use it
> in the error capture, not in the hangcheck itself. But at most we'd need
> to rcu requests properly, and using a engine-local buffer (to avoid the
> risk of jumping off the rails onto another list) for that would fully
> mitigate any rcu costs for freeing. But I didn't check the code whether we
> even need that ;-)

So far we have successfully devised strategies at keeping hangcheck nice
and racy, let's keep believing we can do so in the future.
-Chris
Chris Wilson Oct. 13, 2015, 2:02 p.m. UTC | #7
On Tue, Oct 13, 2015 at 02:52:46PM +0100, Chris Wilson wrote:
> On Tue, Oct 13, 2015 at 03:52:08PM +0200, Daniel Vetter wrote:
> > Yeah, hence using _rcu list macros. They have the relevant barriers
> > already and should work. The only difference is that instead of
> > synchronize_rcu on the write side before kfree, we'll use stop_machine on
> > the read side. It's still RCU, but with all the cost moved to the read
> > side while still keeping the benefit that the read side can be done
> > locklessly.
> 
> They imply a mb() on every write not just a barrier(), and we do a fair few
> list updates on each buffer.

Daniel pointed out that I was reading the version for pentium pro. But
the next question will be when does that actually get set?
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 99e819767204..63df28910c8f 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -7,6 +7,7 @@  config DRM_I915
 	select AGP_INTEL if AGP
 	select INTERVAL_TREE
 	select ZLIB_DEFLATE
+	select STOP_MACHINE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9d16fc1189d6..14a882fe486c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -488,6 +488,8 @@  struct drm_i915_error_state {
 	struct kref ref;
 	struct timeval time;
 
+	struct drm_i915_private *i915;
+
 	char error_msg[128];
 	int iommu;
 	u32 reset_count;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 64bdffcffb50..29cbec67bcfc 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -29,6 +29,7 @@ 
 
 #include <generated/utsrelease.h>
 #include <linux/zlib.h>
+#include <linux/stop_machine.h>
 #include "i915_drv.h"
 
 static const char *yesno(int v)
@@ -1352,6 +1353,24 @@  static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
 	error->suspend_count = dev_priv->suspend_count;
 }
 
+static int capture(void *data)
+{
+	struct drm_i915_error_state *error = data;
+
+	i915_capture_gen_state(error->i915, error);
+	i915_capture_reg_state(error->i915, error);
+	i915_gem_capture_buffers(error->i915, error);
+	i915_gem_record_fences(error->i915->dev, error);
+	i915_gem_record_rings(error->i915->dev, error);
+
+	do_gettimeofday(&error->time);
+
+	error->overlay = intel_overlay_capture_error_state(error->i915->dev);
+	error->display = intel_display_capture_error_state(error->i915->dev);
+
+	return 0;
+}
+
 /**
  * i915_capture_error_state - capture an error record for later analysis
  * @dev: drm device
@@ -1377,17 +1396,9 @@  void i915_capture_error_state(struct drm_device *dev, bool wedged,
 	}
 
 	kref_init(&error->ref);
+	error->i915 = dev_priv;
 
-	i915_capture_gen_state(dev_priv, error);
-	i915_capture_reg_state(dev_priv, error);
-	i915_gem_capture_buffers(dev_priv, error);
-	i915_gem_record_fences(dev, error);
-	i915_gem_record_rings(dev, error);
-
-	do_gettimeofday(&error->time);
-
-	error->overlay = intel_overlay_capture_error_state(dev);
-	error->display = intel_display_capture_error_state(dev);
+	stop_machine(capture, error, NULL);
 
 	i915_error_capture_msg(dev, error, wedged, error_msg);
 	DRM_INFO("%s\n", error->error_msg);