diff mbox

[1/5] drm/i915: downgrade non-lethal BUG_ONs

Message ID 1303245964-3022-2-git-send-email-daniel.vetter@ffwll.ch (mailing list archive)
State New, archived
Headers show

Commit Message

Daniel Vetter April 19, 2011, 8:46 p.m. UTC
If it's a simple gem accounting error that won't lead to immediate harm
(like a NULL-deref) or is a simple violation of a required invariant
that the caller should always check/ensure, downgrade the BUG_ON to
a WARN_ON and hope the system survives long enough to grab the dmesg.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c            |    6 +++---
 drivers/gpu/drm/i915/i915_gem_evict.c      |    4 ++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

Comments

Chris Wilson April 20, 2011, 8:18 a.m. UTC | #1
On Tue, 19 Apr 2011 22:46:00 +0200, Daniel Vetter <daniel.vetter@ffwll.ch> wrote:
> If it's a simple gem accounting error that won't lead to immediate harm
> (like a NULL-deref) or is a simple violation of a required invariant
> that the caller should always check/ensure, downgrade the BUG_ON to
> a WARN_ON and hope the system survives long enough to grab the dmesg.

When converting to a WARN we should include the error checking and
propagate the failure back up rather than continuing on with inconsistent
state.

> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_gem.c            |    6 +++---
>  drivers/gpu/drm/i915/i915_gem_evict.c      |    4 ++--
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    4 ++--
>  3 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ae40272..1ef0b91 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1748,7 +1748,7 @@ i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
>  	struct drm_device *dev = obj->base.dev;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  
> -	BUG_ON(!obj->active);
> +	WARN_ON(!obj->active);

This warning can be moved up into the caller and so share the warning
between move_to_flushing and move_to_inactive.

>  	list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
>  
>  	i915_gem_object_move_off_active(obj);
> @@ -1765,8 +1765,8 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
>  	else
>  		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
>  
> -	BUG_ON(!list_empty(&obj->gpu_write_list));
> -	BUG_ON(!obj->active);
> +	WARN_ON(!list_empty(&obj->gpu_write_list));
> +	WARN_ON(!obj->active);
>  	obj->ring = NULL;
>  
>  	i915_gem_object_move_off_active(obj);
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index da05a26..db62fae 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -136,7 +136,7 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
>  				       exec_list);
>  
>  		ret = drm_mm_scan_remove_block(obj->gtt_space);
> -		BUG_ON(ret);
> +		WARN_ON(ret);

drm_mm_scan_remove_block() should be returning a bool so that we (I!) don't
confuse it with an error code.

if (WARN_ON(ret)) return -EIO; /* or whatever is safe. */

If there is no safe way to handle the error, it is a BUG.

>  		list_del_init(&obj->exec_list);
>  		drm_gem_object_unreference(&obj->base);
> @@ -199,7 +199,7 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
>  	if (ret)
>  		return ret;
>  
> -	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
> +	WARN_ON(!list_empty(&dev_priv->mm.flushing_list));

if (WARN_ON(!list_empty()) return -EIO;
 
>  	return i915_gem_evict_inactive(dev, purgeable_only);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 316603e..8cac87c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1093,7 +1093,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  								&objects, eb,
>  								exec,
>  								args->buffer_count);
> -			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> +			WARN_ON(!mutex_is_locked(&dev->struct_mutex));

I think this can be dropped after close inspection of the call path.

>  		}
>  		if (ret)
>  			goto err;
> @@ -1122,7 +1122,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  			if (ret)
>  				goto err;
>  
> -			BUG_ON(ring->sync_seqno[i]);
> +			WARN_ON(ring->sync_seqno[i]);

if (WARN_ON()) { ret = -EIO; goto err; }
-Chris
Ben Widawsky April 20, 2011, 2:27 p.m. UTC | #2
On Wed, Apr 20, 2011 at 09:18:03AM +0100, Chris Wilson wrote:

> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index 316603e..8cac87c 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -1093,7 +1093,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> >  								&objects, eb,
> >  								exec,
> >  								args->buffer_count);
> > -			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> > +			WARN_ON(!mutex_is_locked(&dev->struct_mutex));
> 
> I think this can be dropped after close inspection of the call path.
> 

Is that right? There are definitely cases where the mutex is released
and not reacquired. You would know better than I if those cases can
occur in a normal system. Assuming they can, Won't we just BUG_ON when
we try to release struct_mutex?

> -Chris
> 

Ben
Chris Wilson April 20, 2011, 2:36 p.m. UTC | #3
On Wed, 20 Apr 2011 07:27:24 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> On Wed, Apr 20, 2011 at 09:18:03AM +0100, Chris Wilson wrote:
> 
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > index 316603e..8cac87c 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > > @@ -1093,7 +1093,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> > >  								&objects, eb,
> > >  								exec,
> > >  								args->buffer_count);
> > > -			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> > > +			WARN_ON(!mutex_is_locked(&dev->struct_mutex));
> > 
> > I think this can be dropped after close inspection of the call path.
> > 
> 
> Is that right? There are definitely cases where the mutex is released
> and not reacquired. You would know better than I if those cases can
> occur in a normal system. Assuming they can, Won't we just BUG_ON when
> we try to release struct_mutex?

This particular BUG_ON() I added at Daniel's request to clarify the
reservation fallback logic. Code inspection should be sufficient to
verify that the BUG_ON() is not required, and by now we should be happy
that we didn't miss anything.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ae40272..1ef0b91 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1748,7 +1748,7 @@  i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
-	BUG_ON(!obj->active);
+	WARN_ON(!obj->active);
 	list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
 
 	i915_gem_object_move_off_active(obj);
@@ -1765,8 +1765,8 @@  i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 	else
 		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
-	BUG_ON(!list_empty(&obj->gpu_write_list));
-	BUG_ON(!obj->active);
+	WARN_ON(!list_empty(&obj->gpu_write_list));
+	WARN_ON(!obj->active);
 	obj->ring = NULL;
 
 	i915_gem_object_move_off_active(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index da05a26..db62fae 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -136,7 +136,7 @@  i915_gem_evict_something(struct drm_device *dev, int min_size,
 				       exec_list);
 
 		ret = drm_mm_scan_remove_block(obj->gtt_space);
-		BUG_ON(ret);
+		WARN_ON(ret);
 
 		list_del_init(&obj->exec_list);
 		drm_gem_object_unreference(&obj->base);
@@ -199,7 +199,7 @@  i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
 	if (ret)
 		return ret;
 
-	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
+	WARN_ON(!list_empty(&dev_priv->mm.flushing_list));
 
 	return i915_gem_evict_inactive(dev, purgeable_only);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 316603e..8cac87c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1093,7 +1093,7 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 								&objects, eb,
 								exec,
 								args->buffer_count);
-			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
+			WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 		}
 		if (ret)
 			goto err;
@@ -1122,7 +1122,7 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			if (ret)
 				goto err;
 
-			BUG_ON(ring->sync_seqno[i]);
+			WARN_ON(ring->sync_seqno[i]);
 		}
 	}