drm/i915: Fix failure paths around initial fbdev allocation
diff mbox

Message ID 1434365392-19808-1-git-send-email-tvrtko.ursulin@linux.intel.com
State New
Headers show

Commit Message

Tvrtko Ursulin June 15, 2015, 10:49 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We had two failure modes here:

1.
Deadlock in intelfb_alloc failure path where it calls drm_framebuffer_remove,
which grabs the struct mutex and intelfb_create (caller of intelfb_alloc) was
already holding it.

2.
Double unreference on the object if __intel_framebuffer_create fails since
both it and the caller (intelfb_alloc) do the unreference.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_fbdev.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

Comments

Ville Syrjälä June 15, 2015, 11:22 a.m. UTC | #1
On Mon, Jun 15, 2015 at 11:49:52AM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> We had two failure modes here:
> 
> 1.
> Deadlock in intelfb_alloc failure path where it calls drm_framebuffer_remove,
> which grabs the struct mutex and intelfb_create (caller of intelfb_alloc) was
> already holding it.
> 
> 2.
> Double unreference on the object if __intel_framebuffer_create fails since
> both it and the caller (intelfb_alloc) do the unreference.

I would suggest removing the unref from __intel_framebuffer_create().

> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_fbdev.c | 21 +++++++++++++--------
>  1 file changed, 13 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> index 6372cfc..b998f69 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -141,7 +141,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  {
>  	struct intel_fbdev *ifbdev =
>  		container_of(helper, struct intel_fbdev, helper);
> -	struct drm_framebuffer *fb;
> +	struct drm_framebuffer *fb = NULL;
>  	struct drm_device *dev = helper->dev;
>  	struct drm_mode_fb_cmd2 mode_cmd = {};
>  	struct drm_i915_gem_object *obj;
> @@ -159,6 +159,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
>  							  sizes->surface_depth);
>  
> +	mutex_lock(&dev->struct_mutex);
> +
>  	size = mode_cmd.pitches[0] * mode_cmd.height;
>  	size = PAGE_ALIGN(size);
>  	obj = i915_gem_object_create_stolen(dev, size);
> @@ -172,8 +174,9 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  
>  	fb = __intel_framebuffer_create(dev, &mode_cmd, obj);
>  	if (IS_ERR(fb)) {
> +		/* Drops object reference on failure. */
>  		ret = PTR_ERR(fb);
> -		goto out_unref;
> +		goto out;
>  	}
>  
>  	/* Flush everything out, we'll be doing GTT only from now on */
> @@ -183,15 +186,18 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  		goto out_fb;
>  	}
>  
> +	mutex_unlock(&dev->struct_mutex);
> +
>  	ifbdev->fb = to_intel_framebuffer(fb);
>  
>  	return 0;
>  
>  out_fb:
> -	drm_framebuffer_remove(fb);
> -out_unref:
>  	drm_gem_object_unreference(&obj->base);
>  out:
> +	mutex_unlock(&dev->struct_mutex);
> +	if (fb)
> +		drm_framebuffer_remove(fb);
>  	return ret;
>  }
>  
> @@ -209,8 +215,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	int size, ret;
>  	bool prealloc = false;
>  
> -	mutex_lock(&dev->struct_mutex);
> -
>  	if (intel_fb &&
>  	    (sizes->fb_width > intel_fb->base.width ||
>  	     sizes->fb_height > intel_fb->base.height)) {
> @@ -225,7 +229,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  		DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n");
>  		ret = intelfb_alloc(helper, sizes);
>  		if (ret)
> -			goto out_unlock;
> +			return ret;
>  		intel_fb = ifbdev->fb;
>  	} else {
>  		DRM_DEBUG_KMS("re-using BIOS fb\n");
> @@ -237,6 +241,8 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	obj = intel_fb->obj;
>  	size = obj->base.size;
>  
> +	mutex_lock(&dev->struct_mutex);
> +
>  	info = framebuffer_alloc(0, &dev->pdev->dev);
>  	if (!info) {
>  		ret = -ENOMEM;
> @@ -307,7 +313,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  out_unpin:
>  	i915_gem_object_ggtt_unpin(obj);
>  	drm_gem_object_unreference(&obj->base);
> -out_unlock:
>  	mutex_unlock(&dev->struct_mutex);
>  	return ret;
>  }
> -- 
> 2.4.2
Shuang He June 29, 2015, 7:03 a.m. UTC | #2
Tested-By: Intel Graphics QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6642
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
ILK                                  302/302              302/302
SNB                                  312/316              312/316
IVB                                  343/343              343/343
BYT                 -1              287/287              286/287
HSW                                  380/380              380/380
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
*BYT  igt@gem_tiled_partial_pwrite_pread@reads      PASS(1)      FAIL(1)
Note: You need to pay more attention to line start with '*'
Lukas Wunner June 29, 2015, 2:39 p.m. UTC | #3
Hi,

On Mon, Jun 15, 2015 at 11:49:52AM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> We had two failure modes here:
> 
> 1.
> Deadlock in intelfb_alloc failure path where it calls drm_framebuffer_remove,
> which grabs the struct mutex and intelfb_create (caller of intelfb_alloc) was
> already holding it.

s/intelfb_alloc/intelfb_create/
s/(caller of intelfb_alloc)//


> 
> 2.
> Double unreference on the object if __intel_framebuffer_create fails since
> both it and the caller (intelfb_alloc) do the unreference.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>

Fixes: 60a5ca015ffd2aacfe5674b5a401cd2a37159e07
Reported-By: Lukas Wunner <lukas@wunner.de>


> ---
>  drivers/gpu/drm/i915/intel_fbdev.c | 21 +++++++++++++--------
>  1 file changed, 13 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> index 6372cfc..b998f69 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -141,7 +141,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  {
>  	struct intel_fbdev *ifbdev =
>  		container_of(helper, struct intel_fbdev, helper);
> -	struct drm_framebuffer *fb;
> +	struct drm_framebuffer *fb = NULL;
>  	struct drm_device *dev = helper->dev;
>  	struct drm_mode_fb_cmd2 mode_cmd = {};
>  	struct drm_i915_gem_object *obj;
> @@ -159,6 +159,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
>  							  sizes->surface_depth);
>  
> +	mutex_lock(&dev->struct_mutex);
> +
>  	size = mode_cmd.pitches[0] * mode_cmd.height;
>  	size = PAGE_ALIGN(size);
>  	obj = i915_gem_object_create_stolen(dev, size);
> @@ -172,8 +174,9 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  
>  	fb = __intel_framebuffer_create(dev, &mode_cmd, obj);
>  	if (IS_ERR(fb)) {
> +		/* Drops object reference on failure. */
>  		ret = PTR_ERR(fb);
> -		goto out_unref;
> +		goto out;
>  	}
>  
>  	/* Flush everything out, we'll be doing GTT only from now on */
> @@ -183,15 +186,18 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  		goto out_fb;
>  	}
>  
> +	mutex_unlock(&dev->struct_mutex);
> +
>  	ifbdev->fb = to_intel_framebuffer(fb);
>  
>  	return 0;
>  
>  out_fb:
> -	drm_framebuffer_remove(fb);
> -out_unref:
>  	drm_gem_object_unreference(&obj->base);
>  out:
> +	mutex_unlock(&dev->struct_mutex);
> +	if (fb)
> +		drm_framebuffer_remove(fb);

Hm, the order of drm_gem_object_unreference() and drm_framebuffer_remove()
is reversed now, could this cause any issues?

Apart from that,

Reviewed-By: Lukas Wunner <lukas@wunner.de>

I will also test the patch and report back in a bit.

Thanks a lot,

Lukas


>  	return ret;
>  }
>  
> @@ -209,8 +215,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	int size, ret;
>  	bool prealloc = false;
>  
> -	mutex_lock(&dev->struct_mutex);
> -
>  	if (intel_fb &&
>  	    (sizes->fb_width > intel_fb->base.width ||
>  	     sizes->fb_height > intel_fb->base.height)) {
> @@ -225,7 +229,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  		DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n");
>  		ret = intelfb_alloc(helper, sizes);
>  		if (ret)
> -			goto out_unlock;
> +			return ret;
>  		intel_fb = ifbdev->fb;
>  	} else {
>  		DRM_DEBUG_KMS("re-using BIOS fb\n");
> @@ -237,6 +241,8 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	obj = intel_fb->obj;
>  	size = obj->base.size;
>  
> +	mutex_lock(&dev->struct_mutex);
> +
>  	info = framebuffer_alloc(0, &dev->pdev->dev);
>  	if (!info) {
>  		ret = -ENOMEM;
> @@ -307,7 +313,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  out_unpin:
>  	i915_gem_object_ggtt_unpin(obj);
>  	drm_gem_object_unreference(&obj->base);
> -out_unlock:
>  	mutex_unlock(&dev->struct_mutex);
>  	return ret;
>  }
> -- 
> 2.4.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Tvrtko Ursulin June 29, 2015, 3:15 p.m. UTC | #4
Hi,

On 06/29/2015 03:39 PM, Lukas Wunner wrote:
> Hi,
>
> On Mon, Jun 15, 2015 at 11:49:52AM +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> We had two failure modes here:
>>
>> 1.
>> Deadlock in intelfb_alloc failure path where it calls drm_framebuffer_remove,
>> which grabs the struct mutex and intelfb_create (caller of intelfb_alloc) was
>> already holding it.
>
> s/intelfb_alloc/intelfb_create/
> s/(caller of intelfb_alloc)//

I looked again and don't see that I made a mistake with regards to this?

>> 2.
>> Double unreference on the object if __intel_framebuffer_create fails since
>> both it and the caller (intelfb_alloc) do the unreference.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> Fixes: 60a5ca015ffd2aacfe5674b5a401cd2a37159e07
> Reported-By: Lukas Wunner <lukas@wunner.de>

Sorry probably my oversight, don't remember now how it all came about.

>> @@ -183,15 +186,18 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>>   		goto out_fb;
>>   	}
>>
>> +	mutex_unlock(&dev->struct_mutex);
>> +
>>   	ifbdev->fb = to_intel_framebuffer(fb);
>>
>>   	return 0;
>>
>>   out_fb:
>> -	drm_framebuffer_remove(fb);
>> -out_unref:
>>   	drm_gem_object_unreference(&obj->base);
>>   out:
>> +	mutex_unlock(&dev->struct_mutex);
>> +	if (fb)
>> +		drm_framebuffer_remove(fb);
>
> Hm, the order of drm_gem_object_unreference() and drm_framebuffer_remove()
> is reversed now, could this cause any issues?

No, that is fine.

> Apart from that,
>
> Reviewed-By: Lukas Wunner <lukas@wunner.de>
>
> I will also test the patch and report back in a bit.

That will be very useful. I did not test it extensively myself, just 
fired it off quickly since I was briefly hitting this failure path myself.

Regards,

Tvrtko
Lukas Wunner June 29, 2015, 5:48 p.m. UTC | #5
Hi Tvrtko,

On Mon, Jun 29, 2015 at 04:15:08PM +0100, Tvrtko Ursulin wrote:
> On 06/29/2015 03:39 PM, Lukas Wunner wrote:
> >On Mon, Jun 15, 2015 at 11:49:52AM +0100, Tvrtko Ursulin wrote:
> >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >>We had two failure modes here:
> >>
> >>1.
> >>Deadlock in intelfb_alloc failure path where it calls drm_framebuffer_remove,
> >>which grabs the struct mutex and intelfb_create (caller of intelfb_alloc) was
> >>already holding it.
> >
> >s/intelfb_alloc/intelfb_create/
> >s/(caller of intelfb_alloc)//
> 
> I looked again and don't see that I made a mistake with regards to this?

Sorry, I was confused myself:

Apart from the two issues you mentioned in the commit message there's
a third one fixed by this patch which could briefly be described as:

"Deadlock in intelfb_create failure path where it calls
drm_framebuffer_unreference, which grabs the struct mutex
and intelfb_create was already holding it."

Only that single issue was reported by me, the others were subsequently
discovered by you or your colleagues.

A somewhat more elaborate description is in my original report:
http://lists.freedesktop.org/archives/intel-gfx/2015-June/067965.html


> >I will also test the patch and report back in a bit.
> 
> That will be very useful. I did not test it extensively myself, just fired
> it off quickly since I was briefly hitting this failure path myself.

Okay the patch works for me (in the specific scenario on the
MacBook Pro where the existing fb is discarded because it is
too small and a new one is allocated).

PRTS did report some issue on Baytrail though:
http://lists.freedesktop.org/archives/intel-gfx/2015-June/069978.html

One nitpick is that your commit message exceeds 72 characters per line.

Also, Ville had posted a suggestion which was not discussed further,
removing the drm_gem_object_unreference() from __intel_framebuffer_create()
in favor of the one you now removed from intelfb_alloc(). (Personally
I'm totally indifferent on that.)

Best regards,

Lukas

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 6372cfc..b998f69 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -141,7 +141,7 @@  static int intelfb_alloc(struct drm_fb_helper *helper,
 {
 	struct intel_fbdev *ifbdev =
 		container_of(helper, struct intel_fbdev, helper);
-	struct drm_framebuffer *fb;
+	struct drm_framebuffer *fb = NULL;
 	struct drm_device *dev = helper->dev;
 	struct drm_mode_fb_cmd2 mode_cmd = {};
 	struct drm_i915_gem_object *obj;
@@ -159,6 +159,8 @@  static int intelfb_alloc(struct drm_fb_helper *helper,
 	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
 							  sizes->surface_depth);
 
+	mutex_lock(&dev->struct_mutex);
+
 	size = mode_cmd.pitches[0] * mode_cmd.height;
 	size = PAGE_ALIGN(size);
 	obj = i915_gem_object_create_stolen(dev, size);
@@ -172,8 +174,9 @@  static int intelfb_alloc(struct drm_fb_helper *helper,
 
 	fb = __intel_framebuffer_create(dev, &mode_cmd, obj);
 	if (IS_ERR(fb)) {
+		/* Drops object reference on failure. */
 		ret = PTR_ERR(fb);
-		goto out_unref;
+		goto out;
 	}
 
 	/* Flush everything out, we'll be doing GTT only from now on */
@@ -183,15 +186,18 @@  static int intelfb_alloc(struct drm_fb_helper *helper,
 		goto out_fb;
 	}
 
+	mutex_unlock(&dev->struct_mutex);
+
 	ifbdev->fb = to_intel_framebuffer(fb);
 
 	return 0;
 
 out_fb:
-	drm_framebuffer_remove(fb);
-out_unref:
 	drm_gem_object_unreference(&obj->base);
 out:
+	mutex_unlock(&dev->struct_mutex);
+	if (fb)
+		drm_framebuffer_remove(fb);
 	return ret;
 }
 
@@ -209,8 +215,6 @@  static int intelfb_create(struct drm_fb_helper *helper,
 	int size, ret;
 	bool prealloc = false;
 
-	mutex_lock(&dev->struct_mutex);
-
 	if (intel_fb &&
 	    (sizes->fb_width > intel_fb->base.width ||
 	     sizes->fb_height > intel_fb->base.height)) {
@@ -225,7 +229,7 @@  static int intelfb_create(struct drm_fb_helper *helper,
 		DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n");
 		ret = intelfb_alloc(helper, sizes);
 		if (ret)
-			goto out_unlock;
+			return ret;
 		intel_fb = ifbdev->fb;
 	} else {
 		DRM_DEBUG_KMS("re-using BIOS fb\n");
@@ -237,6 +241,8 @@  static int intelfb_create(struct drm_fb_helper *helper,
 	obj = intel_fb->obj;
 	size = obj->base.size;
 
+	mutex_lock(&dev->struct_mutex);
+
 	info = framebuffer_alloc(0, &dev->pdev->dev);
 	if (!info) {
 		ret = -ENOMEM;
@@ -307,7 +313,6 @@  static int intelfb_create(struct drm_fb_helper *helper,
 out_unpin:
 	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
-out_unlock:
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }