diff mbox

[v5,1/2] drm/i915: Fix failure paths around initial fbdev allocation

Message ID f0cd66427039ce1bdc61460a9d833e6d858cff3e.1441363361.git.lukas@wunner.de (mailing list archive)
State New, archived
Headers show

Commit Message

Lukas Wunner June 30, 2015, 9:06 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We had two failure modes here:

1.
Deadlock in intelfb_alloc failure path where it calls
drm_framebuffer_remove, which grabs the struct mutex and intelfb_create
(caller of intelfb_alloc) was already holding it.

2.
Deadlock in intelfb_create failure path where it calls
drm_framebuffer_unreference, which grabs the struct mutex and
intelfb_create was already holding it.

v2:
   * Reformat commit msg to 72 chars. (Lukas Wunner)
   * Add third failure mode. (Lukas Wunner)

v3:
   * On fb alloc failure, unref gem object where it gets refed,
     fix double unref in separate commit. (Ville Syrjälä)

v4:
   * Lock struct mutex on unref. (Chris Wilson)

v5:
   * Rebase on drm-intel-nightly 2015y-09m-04d-08h-19m-35s UTC,
     rephrase commit message. (Jani Nicula)

Tested-by: Pierre Moreau <pierre.morrow@free.fr>
    [MBP  5,3 2009  nvidia 9400M + 9600M GT   pre-retina]
Tested-by: Paul Hordiienko <pvt.gord@gmail.com>
    [MBP  6,2 2010  intel ILK + nvidia GT216  pre-retina]
Tested-by: William Brown <william@blackhats.net.au>
    [MBP  8,2 2011  intel SNB + amd turks     pre-retina]
Tested-by: Lukas Wunner <lukas@wunner.de>
    [MBP  9,1 2012  intel IVB + nvidia GK107  pre-retina]
Tested-by: Bruno Bierbaumer <bruno@bierbaumer.net>
    [MBP 11,3 2013  intel HSW + nvidia GK107  retina]

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: 60a5ca015ffd ("drm/i915: Add locking around
    framebuffer_references--")
Reported-by: Lukas Wunner <lukas@wunner.de>
[Lukas: Create v3 + v4 + v5 based on Tvrtko's v2]
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_fbdev.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

Comments

Ville Syrjälä Oct. 13, 2015, 3:04 p.m. UTC | #1
On Tue, Jun 30, 2015 at 10:06:27AM +0100, Lukas Wunner wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> We had two failure modes here:
> 
> 1.
> Deadlock in intelfb_alloc failure path where it calls
> drm_framebuffer_remove, which grabs the struct mutex and intelfb_create
> (caller of intelfb_alloc) was already holding it.
> 
> 2.
> Deadlock in intelfb_create failure path where it calls
> drm_framebuffer_unreference, which grabs the struct mutex and
> intelfb_create was already holding it.
> 
> v2:
>    * Reformat commit msg to 72 chars. (Lukas Wunner)
>    * Add third failure mode. (Lukas Wunner)
> 
> v3:
>    * On fb alloc failure, unref gem object where it gets refed,
>      fix double unref in separate commit. (Ville Syrjälä)
> 
> v4:
>    * Lock struct mutex on unref. (Chris Wilson)
> 
> v5:
>    * Rebase on drm-intel-nightly 2015y-09m-04d-08h-19m-35s UTC,
>      rephrase commit message. (Jani Nicula)
> 
> Tested-by: Pierre Moreau <pierre.morrow@free.fr>
>     [MBP  5,3 2009  nvidia 9400M + 9600M GT   pre-retina]
> Tested-by: Paul Hordiienko <pvt.gord@gmail.com>
>     [MBP  6,2 2010  intel ILK + nvidia GT216  pre-retina]
> Tested-by: William Brown <william@blackhats.net.au>
>     [MBP  8,2 2011  intel SNB + amd turks     pre-retina]
> Tested-by: Lukas Wunner <lukas@wunner.de>
>     [MBP  9,1 2012  intel IVB + nvidia GK107  pre-retina]
> Tested-by: Bruno Bierbaumer <bruno@bierbaumer.net>
>     [MBP 11,3 2013  intel HSW + nvidia GK107  retina]
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Fixes: 60a5ca015ffd ("drm/i915: Add locking around
>     framebuffer_references--")
> Reported-by: Lukas Wunner <lukas@wunner.de>
> [Lukas: Create v3 + v4 + v5 based on Tvrtko's v2]
> Signed-off-by: Lukas Wunner <lukas@wunner.de>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Jani Nikula <jani.nikula@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_fbdev.c | 20 ++++++++++++--------
>  1 file changed, 12 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> index 96476d7..eee3306 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -119,7 +119,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  {
>  	struct intel_fbdev *ifbdev =
>  		container_of(helper, struct intel_fbdev, helper);
> -	struct drm_framebuffer *fb;
> +	struct drm_framebuffer *fb = NULL;
>  	struct drm_device *dev = helper->dev;
>  	struct drm_mode_fb_cmd2 mode_cmd = {};
>  	struct drm_i915_gem_object *obj;
> @@ -137,6 +137,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
>  							  sizes->surface_depth);
>  
> +	mutex_lock(&dev->struct_mutex);
> +
>  	size = mode_cmd.pitches[0] * mode_cmd.height;
>  	size = PAGE_ALIGN(size);
>  	obj = i915_gem_object_create_stolen(dev, size);
> @@ -158,18 +160,21 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>  	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
>  	if (ret) {
>  		DRM_ERROR("failed to pin obj: %d\n", ret);
> -		goto out_fb;
> +		goto out_unref;
>  	}
>  
> +	mutex_unlock(&dev->struct_mutex);
> +
>  	ifbdev->fb = to_intel_framebuffer(fb);
>  
>  	return 0;
>  
> -out_fb:
> -	drm_framebuffer_remove(fb);
>  out_unref:
>  	drm_gem_object_unreference(&obj->base);

If fb init succeeded it took over the ref, no? So drm_framebuffer_remove()
will now attempt to unref one too many times.

This taking over refs stuff is confusing. Maybe it would be better if
everyone just took an extra ref when they stash the obj pointer
somewhere, and everyone would then always release whatever ref they own
and no longer need.

>  out:
> +	mutex_unlock(&dev->struct_mutex);
> +	if (fb)
> +		drm_framebuffer_remove(fb);
>  	return ret;
>  }
>  
> @@ -187,8 +192,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	int size, ret;
>  	bool prealloc = false;
>  
> -	mutex_lock(&dev->struct_mutex);
> -
>  	if (intel_fb &&
>  	    (sizes->fb_width > intel_fb->base.width ||
>  	     sizes->fb_height > intel_fb->base.height)) {
> @@ -203,7 +206,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  		DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n");
>  		ret = intelfb_alloc(helper, sizes);
>  		if (ret)
> -			goto out_unlock;
> +			return ret;
>  		intel_fb = ifbdev->fb;
>  	} else {
>  		DRM_DEBUG_KMS("re-using BIOS fb\n");
> @@ -215,6 +218,8 @@ static int intelfb_create(struct drm_fb_helper *helper,
>  	obj = intel_fb->obj;
>  	size = obj->base.size;
>  
> +	mutex_lock(&dev->struct_mutex);
> +

I'm thinking we won't even need the lock here anymore. But maybe I'm
missing something.

>  	info = drm_fb_helper_alloc_fbi(helper);
>  	if (IS_ERR(info)) {
>  		ret = PTR_ERR(info);
> @@ -276,7 +281,6 @@ out_destroy_fbi:
>  out_unpin:
>  	i915_gem_object_ggtt_unpin(obj);
>  	drm_gem_object_unreference(&obj->base);

And this ref we don't own either AFAICS.

> -out_unlock:
>  	mutex_unlock(&dev->struct_mutex);
>  	return ret;
>  }
> -- 
> 2.1.0
Chris Wilson Oct. 14, 2015, 9:35 a.m. UTC | #2
On Tue, Oct 13, 2015 at 06:04:40PM +0300, Ville Syrjälä wrote:
> > +	mutex_lock(&dev->struct_mutex);
> > +
> 
> I'm thinking we won't even need the lock here anymore. But maybe I'm
> missing something.

Yeah, not so much here atm, but there is a bug here where we don't take an
explicit pin on the VMA we setup for the fbdev which requires the lock.
-chris
Lukas Wunner Oct. 15, 2015, 5:14 p.m. UTC | #3
Hi Ville,

On Tue, Oct 13, 2015 at 06:04:40PM +0300, Ville Syrjälä wrote:
> On Tue, Jun 30, 2015 at 10:06:27AM +0100, Lukas Wunner wrote:
> > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > 
> > We had two failure modes here:
> > 
> > 1.
> > Deadlock in intelfb_alloc failure path where it calls
> > drm_framebuffer_remove, which grabs the struct mutex and intelfb_create
> > (caller of intelfb_alloc) was already holding it.
> > 
> > 2.
> > Deadlock in intelfb_create failure path where it calls
> > drm_framebuffer_unreference, which grabs the struct mutex and
> > intelfb_create was already holding it.
> > 
> > v2:
> >    * Reformat commit msg to 72 chars. (Lukas Wunner)
> >    * Add third failure mode. (Lukas Wunner)
> > 
> > v3:
> >    * On fb alloc failure, unref gem object where it gets refed,
> >      fix double unref in separate commit. (Ville Syrjälä)
> > 
> > v4:
> >    * Lock struct mutex on unref. (Chris Wilson)
> > 
> > v5:
> >    * Rebase on drm-intel-nightly 2015y-09m-04d-08h-19m-35s UTC,
> >      rephrase commit message. (Jani Nicula)
> > 
> > Tested-by: Pierre Moreau <pierre.morrow@free.fr>
> >     [MBP  5,3 2009  nvidia 9400M + 9600M GT   pre-retina]
> > Tested-by: Paul Hordiienko <pvt.gord@gmail.com>
> >     [MBP  6,2 2010  intel ILK + nvidia GT216  pre-retina]
> > Tested-by: William Brown <william@blackhats.net.au>
> >     [MBP  8,2 2011  intel SNB + amd turks     pre-retina]
> > Tested-by: Lukas Wunner <lukas@wunner.de>
> >     [MBP  9,1 2012  intel IVB + nvidia GK107  pre-retina]
> > Tested-by: Bruno Bierbaumer <bruno@bierbaumer.net>
> >     [MBP 11,3 2013  intel HSW + nvidia GK107  retina]
> > 
> > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Fixes: 60a5ca015ffd ("drm/i915: Add locking around
> >     framebuffer_references--")
> > Reported-by: Lukas Wunner <lukas@wunner.de>
> > [Lukas: Create v3 + v4 + v5 based on Tvrtko's v2]
> > Signed-off-by: Lukas Wunner <lukas@wunner.de>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Cc: Jani Nikula <jani.nikula@intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_fbdev.c | 20 ++++++++++++--------
> >  1 file changed, 12 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> > index 96476d7..eee3306 100644
> > --- a/drivers/gpu/drm/i915/intel_fbdev.c
> > +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> > @@ -119,7 +119,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> >  {
> >  	struct intel_fbdev *ifbdev =
> >  		container_of(helper, struct intel_fbdev, helper);
> > -	struct drm_framebuffer *fb;
> > +	struct drm_framebuffer *fb = NULL;
> >  	struct drm_device *dev = helper->dev;
> >  	struct drm_mode_fb_cmd2 mode_cmd = {};
> >  	struct drm_i915_gem_object *obj;
> > @@ -137,6 +137,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> >  	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
> >  							  sizes->surface_depth);
> >  
> > +	mutex_lock(&dev->struct_mutex);
> > +
> >  	size = mode_cmd.pitches[0] * mode_cmd.height;
> >  	size = PAGE_ALIGN(size);
> >  	obj = i915_gem_object_create_stolen(dev, size);
> > @@ -158,18 +160,21 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> >  	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
> >  	if (ret) {
> >  		DRM_ERROR("failed to pin obj: %d\n", ret);
> > -		goto out_fb;
> > +		goto out_unref;
> >  	}
> >  
> > +	mutex_unlock(&dev->struct_mutex);
> > +
> >  	ifbdev->fb = to_intel_framebuffer(fb);
> >  
> >  	return 0;
> >  
> > -out_fb:
> > -	drm_framebuffer_remove(fb);
> >  out_unref:
> >  	drm_gem_object_unreference(&obj->base);
> 
> If fb init succeeded it took over the ref, no? So drm_framebuffer_remove()
> will now attempt to unref one too many times.
> 
> This taking over refs stuff is confusing. Maybe it would be better if
> everyone just took an extra ref when they stash the obj pointer
> somewhere, and everyone would then always release whatever ref they own
> and no longer need.
> 
> >  out:
> > +	mutex_unlock(&dev->struct_mutex);
> > +	if (fb)
> > +		drm_framebuffer_remove(fb);
> >  	return ret;
> >  }
> >  

Hm, why do you think we unref one too many times?

A bit further up in this function we call __intel_framebuffer_create()
which sets the refcount to 1. (It calls intel_framebuffer_init(), which
calls drm_framebuffer_init(), which calls kref_init(&fb->refcount).)

So if intel_pin_and_fence_fb_obj() fails, we do need to unreference and
tear down the fb. Thus, drm_framebuffer_remove() seems right here to me.

However, because of your objection I've noticed now that "if (fb)" seems
to be wrong, I think this should be "if (!IS_ERR_OR_NULL(fb))".

Because if __intel_framebuffer_create() failed, fb will be a PTR_ERR(),
so not null, and we'd call drm_framebuffer_remove() on this. Is that
what you meant?


> > @@ -187,8 +192,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
> >  	int size, ret;
> >  	bool prealloc = false;
> >  
> > -	mutex_lock(&dev->struct_mutex);
> > -
> >  	if (intel_fb &&
> >  	    (sizes->fb_width > intel_fb->base.width ||
> >  	     sizes->fb_height > intel_fb->base.height)) {
> > @@ -203,7 +206,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
> >  		DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n");
> >  		ret = intelfb_alloc(helper, sizes);
> >  		if (ret)
> > -			goto out_unlock;
> > +			return ret;
> >  		intel_fb = ifbdev->fb;
> >  	} else {
> >  		DRM_DEBUG_KMS("re-using BIOS fb\n");
> > @@ -215,6 +218,8 @@ static int intelfb_create(struct drm_fb_helper *helper,
> >  	obj = intel_fb->obj;
> >  	size = obj->base.size;
> >  
> > +	mutex_lock(&dev->struct_mutex);
> > +
> 
> I'm thinking we won't even need the lock here anymore. But maybe I'm
> missing something.
> 
> >  	info = drm_fb_helper_alloc_fbi(helper);
> >  	if (IS_ERR(info)) {
> >  		ret = PTR_ERR(info);
> > @@ -276,7 +281,6 @@ out_destroy_fbi:
> >  out_unpin:
> >  	i915_gem_object_ggtt_unpin(obj);
> >  	drm_gem_object_unreference(&obj->base);
> 
> And this ref we don't own either AFAICS.

Why? We did call intelfb_alloc() above, so if something subsequently
goes wrong, we need to revert the steps that intelfb_alloc() carried
out. The drm_gem_object_unreference() therefore seems right here to me.

However I'm puzzled why we don't call drm_framebuffer_remove() under
the out_unpin: label. Aren't we leaking a framebuffer here without that?

Maybe you're referring to the fact that this function either inherits
the BIOS fb or creates a new fb with intelfb_alloc(). I'm not sure if
the cleanup on error is identical in these two cases. Maybe you meant
that we don't own the ref in the case that the fb was inherited from
BIOS?


Best regards,

Lukas

> 
> > -out_unlock:
> >  	mutex_unlock(&dev->struct_mutex);
> >  	return ret;
> >  }
> > -- 
> > 2.1.0
> 
> -- 
> Ville Syrjälä
> Intel OTC
Daniel Vetter Oct. 15, 2015, 5:22 p.m. UTC | #4
On Thu, Oct 15, 2015 at 07:14:35PM +0200, Lukas Wunner wrote:
> Hi Ville,
> 
> On Tue, Oct 13, 2015 at 06:04:40PM +0300, Ville Syrjälä wrote:
> > On Tue, Jun 30, 2015 at 10:06:27AM +0100, Lukas Wunner wrote:
> > > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > 
> > > We had two failure modes here:
> > > 
> > > 1.
> > > Deadlock in intelfb_alloc failure path where it calls
> > > drm_framebuffer_remove, which grabs the struct mutex and intelfb_create
> > > (caller of intelfb_alloc) was already holding it.
> > > 
> > > 2.
> > > Deadlock in intelfb_create failure path where it calls
> > > drm_framebuffer_unreference, which grabs the struct mutex and
> > > intelfb_create was already holding it.
> > > 
> > > v2:
> > >    * Reformat commit msg to 72 chars. (Lukas Wunner)
> > >    * Add third failure mode. (Lukas Wunner)
> > > 
> > > v3:
> > >    * On fb alloc failure, unref gem object where it gets refed,
> > >      fix double unref in separate commit. (Ville Syrjälä)
> > > 
> > > v4:
> > >    * Lock struct mutex on unref. (Chris Wilson)
> > > 
> > > v5:
> > >    * Rebase on drm-intel-nightly 2015y-09m-04d-08h-19m-35s UTC,
> > >      rephrase commit message. (Jani Nicula)
> > > 
> > > Tested-by: Pierre Moreau <pierre.morrow@free.fr>
> > >     [MBP  5,3 2009  nvidia 9400M + 9600M GT   pre-retina]
> > > Tested-by: Paul Hordiienko <pvt.gord@gmail.com>
> > >     [MBP  6,2 2010  intel ILK + nvidia GT216  pre-retina]
> > > Tested-by: William Brown <william@blackhats.net.au>
> > >     [MBP  8,2 2011  intel SNB + amd turks     pre-retina]
> > > Tested-by: Lukas Wunner <lukas@wunner.de>
> > >     [MBP  9,1 2012  intel IVB + nvidia GK107  pre-retina]
> > > Tested-by: Bruno Bierbaumer <bruno@bierbaumer.net>
> > >     [MBP 11,3 2013  intel HSW + nvidia GK107  retina]
> > > 
> > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > Fixes: 60a5ca015ffd ("drm/i915: Add locking around
> > >     framebuffer_references--")
> > > Reported-by: Lukas Wunner <lukas@wunner.de>
> > > [Lukas: Create v3 + v4 + v5 based on Tvrtko's v2]
> > > Signed-off-by: Lukas Wunner <lukas@wunner.de>
> > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > Cc: Jani Nikula <jani.nikula@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/intel_fbdev.c | 20 ++++++++++++--------
> > >  1 file changed, 12 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> > > index 96476d7..eee3306 100644
> > > --- a/drivers/gpu/drm/i915/intel_fbdev.c
> > > +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> > > @@ -119,7 +119,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> > >  {
> > >  	struct intel_fbdev *ifbdev =
> > >  		container_of(helper, struct intel_fbdev, helper);
> > > -	struct drm_framebuffer *fb;
> > > +	struct drm_framebuffer *fb = NULL;
> > >  	struct drm_device *dev = helper->dev;
> > >  	struct drm_mode_fb_cmd2 mode_cmd = {};
> > >  	struct drm_i915_gem_object *obj;
> > > @@ -137,6 +137,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> > >  	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
> > >  							  sizes->surface_depth);
> > >  
> > > +	mutex_lock(&dev->struct_mutex);
> > > +
> > >  	size = mode_cmd.pitches[0] * mode_cmd.height;
> > >  	size = PAGE_ALIGN(size);
> > >  	obj = i915_gem_object_create_stolen(dev, size);
> > > @@ -158,18 +160,21 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> > >  	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
> > >  	if (ret) {
> > >  		DRM_ERROR("failed to pin obj: %d\n", ret);
> > > -		goto out_fb;
> > > +		goto out_unref;
> > >  	}
> > >  
> > > +	mutex_unlock(&dev->struct_mutex);
> > > +
> > >  	ifbdev->fb = to_intel_framebuffer(fb);
> > >  
> > >  	return 0;
> > >  
> > > -out_fb:
> > > -	drm_framebuffer_remove(fb);
> > >  out_unref:
> > >  	drm_gem_object_unreference(&obj->base);
> > 
> > If fb init succeeded it took over the ref, no? So drm_framebuffer_remove()
> > will now attempt to unref one too many times.
> > 
> > This taking over refs stuff is confusing. Maybe it would be better if
> > everyone just took an extra ref when they stash the obj pointer
> > somewhere, and everyone would then always release whatever ref they own
> > and no longer need.
> > 
> > >  out:
> > > +	mutex_unlock(&dev->struct_mutex);
> > > +	if (fb)
> > > +		drm_framebuffer_remove(fb);
> > >  	return ret;
> > >  }
> > >  
> 
> Hm, why do you think we unref one too many times?
> 
> A bit further up in this function we call __intel_framebuffer_create()
> which sets the refcount to 1. (It calls intel_framebuffer_init(), which
> calls drm_framebuffer_init(), which calls kref_init(&fb->refcount).)
> 
> So if intel_pin_and_fence_fb_obj() fails, we do need to unreference and
> tear down the fb. Thus, drm_framebuffer_remove() seems right here to me.
> 
> However, because of your objection I've noticed now that "if (fb)" seems
> to be wrong, I think this should be "if (!IS_ERR_OR_NULL(fb))".
> 
> Because if __intel_framebuffer_create() failed, fb will be a PTR_ERR(),
> so not null, and we'd call drm_framebuffer_remove() on this. Is that
> what you meant?

Quick aside: drm_framebuffer_remove is for framebuffers that userspace
has created. Since this is init code a drm_framebuffer_unreference should
be all we need - and drm_framebuffer_remove is getting somewhat
defeatured.
-Daniel

> 
> 
> > > @@ -187,8 +192,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
> > >  	int size, ret;
> > >  	bool prealloc = false;
> > >  
> > > -	mutex_lock(&dev->struct_mutex);
> > > -
> > >  	if (intel_fb &&
> > >  	    (sizes->fb_width > intel_fb->base.width ||
> > >  	     sizes->fb_height > intel_fb->base.height)) {
> > > @@ -203,7 +206,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
> > >  		DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n");
> > >  		ret = intelfb_alloc(helper, sizes);
> > >  		if (ret)
> > > -			goto out_unlock;
> > > +			return ret;
> > >  		intel_fb = ifbdev->fb;
> > >  	} else {
> > >  		DRM_DEBUG_KMS("re-using BIOS fb\n");
> > > @@ -215,6 +218,8 @@ static int intelfb_create(struct drm_fb_helper *helper,
> > >  	obj = intel_fb->obj;
> > >  	size = obj->base.size;
> > >  
> > > +	mutex_lock(&dev->struct_mutex);
> > > +
> > 
> > I'm thinking we won't even need the lock here anymore. But maybe I'm
> > missing something.
> > 
> > >  	info = drm_fb_helper_alloc_fbi(helper);
> > >  	if (IS_ERR(info)) {
> > >  		ret = PTR_ERR(info);
> > > @@ -276,7 +281,6 @@ out_destroy_fbi:
> > >  out_unpin:
> > >  	i915_gem_object_ggtt_unpin(obj);
> > >  	drm_gem_object_unreference(&obj->base);
> > 
> > And this ref we don't own either AFAICS.
> 
> Why? We did call intelfb_alloc() above, so if something subsequently
> goes wrong, we need to revert the steps that intelfb_alloc() carried
> out. The drm_gem_object_unreference() therefore seems right here to me.
> 
> However I'm puzzled why we don't call drm_framebuffer_remove() under
> the out_unpin: label. Aren't we leaking a framebuffer here without that?
> 
> Maybe you're referring to the fact that this function either inherits
> the BIOS fb or creates a new fb with intelfb_alloc(). I'm not sure if
> the cleanup on error is identical in these two cases. Maybe you meant
> that we don't own the ref in the case that the fb was inherited from
> BIOS?
> 
> 
> Best regards,
> 
> Lukas
> 
> > 
> > > -out_unlock:
> > >  	mutex_unlock(&dev->struct_mutex);
> > >  	return ret;
> > >  }
> > > -- 
> > > 2.1.0
> > 
> > -- 
> > Ville Syrjälä
> > Intel OTC
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Ville Syrjälä Oct. 15, 2015, 5:34 p.m. UTC | #5
On Thu, Oct 15, 2015 at 07:14:35PM +0200, Lukas Wunner wrote:
> Hi Ville,
> 
> On Tue, Oct 13, 2015 at 06:04:40PM +0300, Ville Syrjälä wrote:
> > On Tue, Jun 30, 2015 at 10:06:27AM +0100, Lukas Wunner wrote:
> > > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > 
> > > We had two failure modes here:
> > > 
> > > 1.
> > > Deadlock in intelfb_alloc failure path where it calls
> > > drm_framebuffer_remove, which grabs the struct mutex and intelfb_create
> > > (caller of intelfb_alloc) was already holding it.
> > > 
> > > 2.
> > > Deadlock in intelfb_create failure path where it calls
> > > drm_framebuffer_unreference, which grabs the struct mutex and
> > > intelfb_create was already holding it.
> > > 
> > > v2:
> > >    * Reformat commit msg to 72 chars. (Lukas Wunner)
> > >    * Add third failure mode. (Lukas Wunner)
> > > 
> > > v3:
> > >    * On fb alloc failure, unref gem object where it gets refed,
> > >      fix double unref in separate commit. (Ville Syrjälä)
> > > 
> > > v4:
> > >    * Lock struct mutex on unref. (Chris Wilson)
> > > 
> > > v5:
> > >    * Rebase on drm-intel-nightly 2015y-09m-04d-08h-19m-35s UTC,
> > >      rephrase commit message. (Jani Nicula)
> > > 
> > > Tested-by: Pierre Moreau <pierre.morrow@free.fr>
> > >     [MBP  5,3 2009  nvidia 9400M + 9600M GT   pre-retina]
> > > Tested-by: Paul Hordiienko <pvt.gord@gmail.com>
> > >     [MBP  6,2 2010  intel ILK + nvidia GT216  pre-retina]
> > > Tested-by: William Brown <william@blackhats.net.au>
> > >     [MBP  8,2 2011  intel SNB + amd turks     pre-retina]
> > > Tested-by: Lukas Wunner <lukas@wunner.de>
> > >     [MBP  9,1 2012  intel IVB + nvidia GK107  pre-retina]
> > > Tested-by: Bruno Bierbaumer <bruno@bierbaumer.net>
> > >     [MBP 11,3 2013  intel HSW + nvidia GK107  retina]
> > > 
> > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > Fixes: 60a5ca015ffd ("drm/i915: Add locking around
> > >     framebuffer_references--")
> > > Reported-by: Lukas Wunner <lukas@wunner.de>
> > > [Lukas: Create v3 + v4 + v5 based on Tvrtko's v2]
> > > Signed-off-by: Lukas Wunner <lukas@wunner.de>
> > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > Cc: Jani Nikula <jani.nikula@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/intel_fbdev.c | 20 ++++++++++++--------
> > >  1 file changed, 12 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> > > index 96476d7..eee3306 100644
> > > --- a/drivers/gpu/drm/i915/intel_fbdev.c
> > > +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> > > @@ -119,7 +119,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> > >  {
> > >  	struct intel_fbdev *ifbdev =
> > >  		container_of(helper, struct intel_fbdev, helper);
> > > -	struct drm_framebuffer *fb;
> > > +	struct drm_framebuffer *fb = NULL;
> > >  	struct drm_device *dev = helper->dev;
> > >  	struct drm_mode_fb_cmd2 mode_cmd = {};
> > >  	struct drm_i915_gem_object *obj;
> > > @@ -137,6 +137,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> > >  	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
> > >  							  sizes->surface_depth);
> > >  
> > > +	mutex_lock(&dev->struct_mutex);
> > > +
> > >  	size = mode_cmd.pitches[0] * mode_cmd.height;
> > >  	size = PAGE_ALIGN(size);
> > >  	obj = i915_gem_object_create_stolen(dev, size);
> > > @@ -158,18 +160,21 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> > >  	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
> > >  	if (ret) {
> > >  		DRM_ERROR("failed to pin obj: %d\n", ret);
> > > -		goto out_fb;
> > > +		goto out_unref;
> > >  	}
> > >  
> > > +	mutex_unlock(&dev->struct_mutex);
> > > +
> > >  	ifbdev->fb = to_intel_framebuffer(fb);
> > >  
> > >  	return 0;
> > >  
> > > -out_fb:
> > > -	drm_framebuffer_remove(fb);
> > >  out_unref:
> > >  	drm_gem_object_unreference(&obj->base);
> > 
> > If fb init succeeded it took over the ref, no? So drm_framebuffer_remove()
> > will now attempt to unref one too many times.
> > 
> > This taking over refs stuff is confusing. Maybe it would be better if
> > everyone just took an extra ref when they stash the obj pointer
> > somewhere, and everyone would then always release whatever ref they own
> > and no longer need.
> > 
> > >  out:
> > > +	mutex_unlock(&dev->struct_mutex);
> > > +	if (fb)
> > > +		drm_framebuffer_remove(fb);
> > >  	return ret;
> > >  }
> > >  
> 
> Hm, why do you think we unref one too many times?

Because the fb now owns the reference, so it gets unreffed by the fb
.destroy() hook... I think.

> 
> A bit further up in this function we call __intel_framebuffer_create()
> which sets the refcount to 1. (It calls intel_framebuffer_init(), which
> calls drm_framebuffer_init(), which calls kref_init(&fb->refcount).)
> 
> So if intel_pin_and_fence_fb_obj() fails, we do need to unreference and
> tear down the fb. Thus, drm_framebuffer_remove() seems right here to me.

I wasn't complaining about the fb unref, but the bo unref.

> 
> However, because of your objection I've noticed now that "if (fb)" seems
> to be wrong, I think this should be "if (!IS_ERR_OR_NULL(fb))".
> 
> Because if __intel_framebuffer_create() failed, fb will be a PTR_ERR(),
> so not null, and we'd call drm_framebuffer_remove() on this. Is that
> what you meant?

No, but that's a good observation too.

> 
> 
> > > @@ -187,8 +192,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
> > >  	int size, ret;
> > >  	bool prealloc = false;
> > >  
> > > -	mutex_lock(&dev->struct_mutex);
> > > -
> > >  	if (intel_fb &&
> > >  	    (sizes->fb_width > intel_fb->base.width ||
> > >  	     sizes->fb_height > intel_fb->base.height)) {
> > > @@ -203,7 +206,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
> > >  		DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n");
> > >  		ret = intelfb_alloc(helper, sizes);
> > >  		if (ret)
> > > -			goto out_unlock;
> > > +			return ret;
> > >  		intel_fb = ifbdev->fb;
> > >  	} else {
> > >  		DRM_DEBUG_KMS("re-using BIOS fb\n");
> > > @@ -215,6 +218,8 @@ static int intelfb_create(struct drm_fb_helper *helper,
> > >  	obj = intel_fb->obj;
> > >  	size = obj->base.size;
> > >  
> > > +	mutex_lock(&dev->struct_mutex);
> > > +
> > 
> > I'm thinking we won't even need the lock here anymore. But maybe I'm
> > missing something.
> > 
> > >  	info = drm_fb_helper_alloc_fbi(helper);
> > >  	if (IS_ERR(info)) {
> > >  		ret = PTR_ERR(info);
> > > @@ -276,7 +281,6 @@ out_destroy_fbi:
> > >  out_unpin:
> > >  	i915_gem_object_ggtt_unpin(obj);
> > >  	drm_gem_object_unreference(&obj->base);
> > 
> > And this ref we don't own either AFAICS.
> 
> Why? We did call intelfb_alloc() above, so if something subsequently
> goes wrong, we need to revert the steps that intelfb_alloc() carried
> out. The drm_gem_object_unreference() therefore seems right here to me.

Here too the fb (if succesfully created) now owns that reference.

> 
> However I'm puzzled why we don't call drm_framebuffer_remove() under
> the out_unpin: label. Aren't we leaking a framebuffer here without that?
> 
> Maybe you're referring to the fact that this function either inherits
> the BIOS fb or creates a new fb with intelfb_alloc(). I'm not sure if
> the cleanup on error is identical in these two cases. Maybe you meant
> that we don't own the ref in the case that the fb was inherited from
> BIOS?
> 
> 
> Best regards,
> 
> Lukas
> 
> > 
> > > -out_unlock:
> > >  	mutex_unlock(&dev->struct_mutex);
> > >  	return ret;
> > >  }
> > > -- 
> > > 2.1.0
> > 
> > -- 
> > Ville Syrjälä
> > Intel OTC
Lukas Wunner Oct. 18, 2015, 6:03 p.m. UTC | #6
Hi Ville,

On Thu, Oct 15, 2015 at 08:34:23PM +0300, Ville Syrjälä wrote:
> On Thu, Oct 15, 2015 at 07:14:35PM +0200, Lukas Wunner wrote:
> > Hi Ville,
> > 
> > On Tue, Oct 13, 2015 at 06:04:40PM +0300, Ville Syrjälä wrote:
> > > On Tue, Jun 30, 2015 at 10:06:27AM +0100, Lukas Wunner wrote:
> > > > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > > 
> > > > We had two failure modes here:
> > > > 
> > > > 1.
> > > > Deadlock in intelfb_alloc failure path where it calls
> > > > drm_framebuffer_remove, which grabs the struct mutex and intelfb_create
> > > > (caller of intelfb_alloc) was already holding it.
> > > > 
> > > > 2.
> > > > Deadlock in intelfb_create failure path where it calls
> > > > drm_framebuffer_unreference, which grabs the struct mutex and
> > > > intelfb_create was already holding it.
> > > > 
> > > > v2:
> > > >    * Reformat commit msg to 72 chars. (Lukas Wunner)
> > > >    * Add third failure mode. (Lukas Wunner)
> > > > 
> > > > v3:
> > > >    * On fb alloc failure, unref gem object where it gets refed,
> > > >      fix double unref in separate commit. (Ville Syrjälä)
> > > > 
> > > > v4:
> > > >    * Lock struct mutex on unref. (Chris Wilson)
> > > > 
> > > > v5:
> > > >    * Rebase on drm-intel-nightly 2015y-09m-04d-08h-19m-35s UTC,
> > > >      rephrase commit message. (Jani Nicula)
> > > > 
> > > > Tested-by: Pierre Moreau <pierre.morrow@free.fr>
> > > >     [MBP  5,3 2009  nvidia 9400M + 9600M GT   pre-retina]
> > > > Tested-by: Paul Hordiienko <pvt.gord@gmail.com>
> > > >     [MBP  6,2 2010  intel ILK + nvidia GT216  pre-retina]
> > > > Tested-by: William Brown <william@blackhats.net.au>
> > > >     [MBP  8,2 2011  intel SNB + amd turks     pre-retina]
> > > > Tested-by: Lukas Wunner <lukas@wunner.de>
> > > >     [MBP  9,1 2012  intel IVB + nvidia GK107  pre-retina]
> > > > Tested-by: Bruno Bierbaumer <bruno@bierbaumer.net>
> > > >     [MBP 11,3 2013  intel HSW + nvidia GK107  retina]
> > > > 
> > > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > > Fixes: 60a5ca015ffd ("drm/i915: Add locking around
> > > >     framebuffer_references--")
> > > > Reported-by: Lukas Wunner <lukas@wunner.de>
> > > > [Lukas: Create v3 + v4 + v5 based on Tvrtko's v2]
> > > > Signed-off-by: Lukas Wunner <lukas@wunner.de>
> > > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > Cc: Jani Nikula <jani.nikula@intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/intel_fbdev.c | 20 ++++++++++++--------
> > > >  1 file changed, 12 insertions(+), 8 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
> > > > index 96476d7..eee3306 100644
> > > > --- a/drivers/gpu/drm/i915/intel_fbdev.c
> > > > +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> > > > @@ -119,7 +119,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> > > >  {
> > > >  	struct intel_fbdev *ifbdev =
> > > >  		container_of(helper, struct intel_fbdev, helper);
> > > > -	struct drm_framebuffer *fb;
> > > > +	struct drm_framebuffer *fb = NULL;
> > > >  	struct drm_device *dev = helper->dev;
> > > >  	struct drm_mode_fb_cmd2 mode_cmd = {};
> > > >  	struct drm_i915_gem_object *obj;
> > > > @@ -137,6 +137,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> > > >  	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
> > > >  							  sizes->surface_depth);
> > > >  
> > > > +	mutex_lock(&dev->struct_mutex);
> > > > +
> > > >  	size = mode_cmd.pitches[0] * mode_cmd.height;
> > > >  	size = PAGE_ALIGN(size);
> > > >  	obj = i915_gem_object_create_stolen(dev, size);
> > > > @@ -158,18 +160,21 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
> > > >  	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
> > > >  	if (ret) {
> > > >  		DRM_ERROR("failed to pin obj: %d\n", ret);
> > > > -		goto out_fb;
> > > > +		goto out_unref;
> > > >  	}
> > > >  
> > > > +	mutex_unlock(&dev->struct_mutex);
> > > > +
> > > >  	ifbdev->fb = to_intel_framebuffer(fb);
> > > >  
> > > >  	return 0;
> > > >  
> > > > -out_fb:
> > > > -	drm_framebuffer_remove(fb);
> > > >  out_unref:
> > > >  	drm_gem_object_unreference(&obj->base);
> > > 
> > > If fb init succeeded it took over the ref, no? So drm_framebuffer_remove()
> > > will now attempt to unref one too many times.
> > > 
> > > This taking over refs stuff is confusing. Maybe it would be better if
> > > everyone just took an extra ref when they stash the obj pointer
> > > somewhere, and everyone would then always release whatever ref they own
> > > and no longer need.
> > > 
> > > >  out:
> > > > +	mutex_unlock(&dev->struct_mutex);
> > > > +	if (fb)
> > > > +		drm_framebuffer_remove(fb);
> > > >  	return ret;
> > > >  }
> > > >  
> > 
> > Hm, why do you think we unref one too many times?
> 
> Because the fb now owns the reference, so it gets unreffed by the fb
> .destroy() hook... I think.

You're right. drm_framebuffer_remove() calls drm_framebuffer_unreference(),
if this was the last ref then drm_framebuffer_free() gets called,
which invokes the ->destroy callback intel_user_framebuffer_destroy(),
which in turn calls drm_gem_object_unreference().

So indeed it gets unrefed twice here.


> > 
> > A bit further up in this function we call __intel_framebuffer_create()
> > which sets the refcount to 1. (It calls intel_framebuffer_init(), which
> > calls drm_framebuffer_init(), which calls kref_init(&fb->refcount).)
> > 
> > So if intel_pin_and_fence_fb_obj() fails, we do need to unreference and
> > tear down the fb. Thus, drm_framebuffer_remove() seems right here to me.
> 
> I wasn't complaining about the fb unref, but the bo unref.
> 
> > 
> > However, because of your objection I've noticed now that "if (fb)" seems
> > to be wrong, I think this should be "if (!IS_ERR_OR_NULL(fb))".
> > 
> > Because if __intel_framebuffer_create() failed, fb will be a PTR_ERR(),
> > so not null, and we'd call drm_framebuffer_remove() on this. Is that
> > what you meant?
> 
> No, but that's a good observation too.
> 
> > 
> > 
> > > > @@ -187,8 +192,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
> > > >  	int size, ret;
> > > >  	bool prealloc = false;
> > > >  
> > > > -	mutex_lock(&dev->struct_mutex);
> > > > -
> > > >  	if (intel_fb &&
> > > >  	    (sizes->fb_width > intel_fb->base.width ||
> > > >  	     sizes->fb_height > intel_fb->base.height)) {
> > > > @@ -203,7 +206,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
> > > >  		DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n");
> > > >  		ret = intelfb_alloc(helper, sizes);
> > > >  		if (ret)
> > > > -			goto out_unlock;
> > > > +			return ret;
> > > >  		intel_fb = ifbdev->fb;
> > > >  	} else {
> > > >  		DRM_DEBUG_KMS("re-using BIOS fb\n");
> > > > @@ -215,6 +218,8 @@ static int intelfb_create(struct drm_fb_helper *helper,
> > > >  	obj = intel_fb->obj;
> > > >  	size = obj->base.size;
> > > >  
> > > > +	mutex_lock(&dev->struct_mutex);
> > > > +
> > > 
> > > I'm thinking we won't even need the lock here anymore. But maybe I'm
> > > missing something.
> > > 
> > > >  	info = drm_fb_helper_alloc_fbi(helper);
> > > >  	if (IS_ERR(info)) {
> > > >  		ret = PTR_ERR(info);
> > > > @@ -276,7 +281,6 @@ out_destroy_fbi:
> > > >  out_unpin:
> > > >  	i915_gem_object_ggtt_unpin(obj);
> > > >  	drm_gem_object_unreference(&obj->base);
> > > 
> > > And this ref we don't own either AFAICS.
> > 
> > Why? We did call intelfb_alloc() above, so if something subsequently
> > goes wrong, we need to revert the steps that intelfb_alloc() carried
> > out. The drm_gem_object_unreference() therefore seems right here to me.
> 
> Here too the fb (if succesfully created) now owns that reference.

But here the ->destroy callback isn't invoked because the fb isn't unrefed.
There's no call to drm_framebuffer_unreference() / drm_framebuffer_remove(),
so the gem object isn't unrefed twice but we seem to leak the fb.

That's exactly what I find confusing here, if intelfb_alloc() above was
successful and something else subsequently goes awry, we need to unref
the fb, right? Or am I missing something?

In the case when we've inherited the fb from BIOS (instead of creating it
with intelfb_alloc()), is it okay to unref the fb as well?


Best regards,

Lukas


> > However I'm puzzled why we don't call drm_framebuffer_remove() under
> > the out_unpin: label. Aren't we leaking a framebuffer here without that?
> > 
> > Maybe you're referring to the fact that this function either inherits
> > the BIOS fb or creates a new fb with intelfb_alloc(). I'm not sure if
> > the cleanup on error is identical in these two cases. Maybe you meant
> > that we don't own the ref in the case that the fb was inherited from
> > BIOS?
> > 
> > 
> > Best regards,
> > 
> > Lukas
> > 
> > > 
> > > > -out_unlock:
> > > >  	mutex_unlock(&dev->struct_mutex);
> > > >  	return ret;
> > > >  }
> > > > -- 
> > > > 2.1.0
> > > 
> > > -- 
> > > Ville Syrjälä
> > > Intel OTC
> 
> -- 
> Ville Syrjälä
> Intel OTC
Lukas Wunner Oct. 25, 2015, 11:14 a.m. UTC | #7
Next iteration of these fixes, taking into account the additional
issues discovered in the recent exchange with Ville (this series
is in-reply-to that thread):

* Patch 1 now with Reviewed-by tag by Ville (thanks!), moved to the
  front of the series.

* Patch 2 is new, fixes a double unref of the bo in a failure path
  of intelfb_alloc, spotted by Ville.

* Patch 3 is another attempt to eliminate the deadlocks in
  intelfb_alloc and intelfb_create, patch originally by Tvrtko.
  New in v6: Avoid unrefing the fb if it's an ERR_PTR,
  replace drm_framebuffer_remove with drm_framebuffer_unreference
  (Daniel).

* Patch 4 is new, plugs leak of a struct intel_fbdev in
  intelfb_create. Not sure if I made the right call in the case
  where the fb was inherited from BIOS, comments welcome.
  (I decided to leave the fb as is and limp on. Alternative would
  be to call drm_framebuffer_remove to free the memory occupied
  by the fb and disable the crtc; the fbcon will be unusable anyway
  at this point and if X11 is able to start up without errors,
  it should be able to reinitialize the crtc.)

Browsable on GitHub:
https://github.com/l1k/linux/commits/intel_fbdev_fixes


Lukas Wunner (3):
  drm/i915: On fb alloc failure, unref gem object where it gets refed
  drm/i915: Fix double unref in intelfb_alloc failure path
  drm/i915: Fix error handling in intelfb_create

Tvrtko Ursulin (1):
  drm/i915: Fix failure paths around initial fbdev allocation

 drivers/gpu/drm/i915/intel_display.c | 21 +++++++++++++-------
 drivers/gpu/drm/i915/intel_fbdev.c   | 38 +++++++++++++++++++++++-------------
 2 files changed, 38 insertions(+), 21 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 96476d7..eee3306 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -119,7 +119,7 @@  static int intelfb_alloc(struct drm_fb_helper *helper,
 {
 	struct intel_fbdev *ifbdev =
 		container_of(helper, struct intel_fbdev, helper);
-	struct drm_framebuffer *fb;
+	struct drm_framebuffer *fb = NULL;
 	struct drm_device *dev = helper->dev;
 	struct drm_mode_fb_cmd2 mode_cmd = {};
 	struct drm_i915_gem_object *obj;
@@ -137,6 +137,8 @@  static int intelfb_alloc(struct drm_fb_helper *helper,
 	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
 							  sizes->surface_depth);
 
+	mutex_lock(&dev->struct_mutex);
+
 	size = mode_cmd.pitches[0] * mode_cmd.height;
 	size = PAGE_ALIGN(size);
 	obj = i915_gem_object_create_stolen(dev, size);
@@ -158,18 +160,21 @@  static int intelfb_alloc(struct drm_fb_helper *helper,
 	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
 	if (ret) {
 		DRM_ERROR("failed to pin obj: %d\n", ret);
-		goto out_fb;
+		goto out_unref;
 	}
 
+	mutex_unlock(&dev->struct_mutex);
+
 	ifbdev->fb = to_intel_framebuffer(fb);
 
 	return 0;
 
-out_fb:
-	drm_framebuffer_remove(fb);
 out_unref:
 	drm_gem_object_unreference(&obj->base);
 out:
+	mutex_unlock(&dev->struct_mutex);
+	if (fb)
+		drm_framebuffer_remove(fb);
 	return ret;
 }
 
@@ -187,8 +192,6 @@  static int intelfb_create(struct drm_fb_helper *helper,
 	int size, ret;
 	bool prealloc = false;
 
-	mutex_lock(&dev->struct_mutex);
-
 	if (intel_fb &&
 	    (sizes->fb_width > intel_fb->base.width ||
 	     sizes->fb_height > intel_fb->base.height)) {
@@ -203,7 +206,7 @@  static int intelfb_create(struct drm_fb_helper *helper,
 		DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n");
 		ret = intelfb_alloc(helper, sizes);
 		if (ret)
-			goto out_unlock;
+			return ret;
 		intel_fb = ifbdev->fb;
 	} else {
 		DRM_DEBUG_KMS("re-using BIOS fb\n");
@@ -215,6 +218,8 @@  static int intelfb_create(struct drm_fb_helper *helper,
 	obj = intel_fb->obj;
 	size = obj->base.size;
 
+	mutex_lock(&dev->struct_mutex);
+
 	info = drm_fb_helper_alloc_fbi(helper);
 	if (IS_ERR(info)) {
 		ret = PTR_ERR(info);
@@ -276,7 +281,6 @@  out_destroy_fbi:
 out_unpin:
 	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
-out_unlock:
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }