Message ID | 1434365392-19808-1-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Jun 15, 2015 at 11:49:52AM +0100, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > We had two failure modes here: > > 1. > Deadlock in intelfb_alloc failure path where it calls drm_framebuffer_remove, > which grabs the struct mutex and intelfb_create (caller of intelfb_alloc) was > already holding it. > > 2. > Double unreference on the object if __intel_framebuffer_create fails since > both it and the caller (intelfb_alloc) do the unreference. I would suggest removing the unref from __intel_framebuffer_create(). > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> > --- > drivers/gpu/drm/i915/intel_fbdev.c | 21 +++++++++++++-------- > 1 file changed, 13 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c > index 6372cfc..b998f69 100644 > --- a/drivers/gpu/drm/i915/intel_fbdev.c > +++ b/drivers/gpu/drm/i915/intel_fbdev.c > @@ -141,7 +141,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, > { > struct intel_fbdev *ifbdev = > container_of(helper, struct intel_fbdev, helper); > - struct drm_framebuffer *fb; > + struct drm_framebuffer *fb = NULL; > struct drm_device *dev = helper->dev; > struct drm_mode_fb_cmd2 mode_cmd = {}; > struct drm_i915_gem_object *obj; > @@ -159,6 +159,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper, > mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, > sizes->surface_depth); > > + mutex_lock(&dev->struct_mutex); > + > size = mode_cmd.pitches[0] * mode_cmd.height; > size = PAGE_ALIGN(size); > obj = i915_gem_object_create_stolen(dev, size); > @@ -172,8 +174,9 @@ static int intelfb_alloc(struct drm_fb_helper *helper, > > fb = __intel_framebuffer_create(dev, &mode_cmd, obj); > if (IS_ERR(fb)) { > + /* Drops object reference on failure. */ > ret = PTR_ERR(fb); > - goto out_unref; > + goto out; > } > > /* Flush everything out, we'll be doing GTT only from now on */ > @@ -183,15 +186,18 @@ static int intelfb_alloc(struct drm_fb_helper *helper, > goto out_fb; > } > > + mutex_unlock(&dev->struct_mutex); > + > ifbdev->fb = to_intel_framebuffer(fb); > > return 0; > > out_fb: > - drm_framebuffer_remove(fb); > -out_unref: > drm_gem_object_unreference(&obj->base); > out: > + mutex_unlock(&dev->struct_mutex); > + if (fb) > + drm_framebuffer_remove(fb); > return ret; > } > > @@ -209,8 +215,6 @@ static int intelfb_create(struct drm_fb_helper *helper, > int size, ret; > bool prealloc = false; > > - mutex_lock(&dev->struct_mutex); > - > if (intel_fb && > (sizes->fb_width > intel_fb->base.width || > sizes->fb_height > intel_fb->base.height)) { > @@ -225,7 +229,7 @@ static int intelfb_create(struct drm_fb_helper *helper, > DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n"); > ret = intelfb_alloc(helper, sizes); > if (ret) > - goto out_unlock; > + return ret; > intel_fb = ifbdev->fb; > } else { > DRM_DEBUG_KMS("re-using BIOS fb\n"); > @@ -237,6 +241,8 @@ static int intelfb_create(struct drm_fb_helper *helper, > obj = intel_fb->obj; > size = obj->base.size; > > + mutex_lock(&dev->struct_mutex); > + > info = framebuffer_alloc(0, &dev->pdev->dev); > if (!info) { > ret = -ENOMEM; > @@ -307,7 +313,6 @@ static int intelfb_create(struct drm_fb_helper *helper, > out_unpin: > i915_gem_object_ggtt_unpin(obj); > drm_gem_object_unreference(&obj->base); > -out_unlock: > mutex_unlock(&dev->struct_mutex); > return ret; > } > -- > 2.4.2
Tested-By: Intel Graphics QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6642
-------------------------------------Summary-------------------------------------
Platform Delta drm-intel-nightly Series Applied
ILK 302/302 302/302
SNB 312/316 312/316
IVB 343/343 343/343
BYT -1 287/287 286/287
HSW 380/380 380/380
-------------------------------------Detailed-------------------------------------
Platform Test drm-intel-nightly Series Applied
*BYT igt@gem_tiled_partial_pwrite_pread@reads PASS(1) FAIL(1)
Note: You need to pay more attention to line start with '*'
Hi, On Mon, Jun 15, 2015 at 11:49:52AM +0100, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > We had two failure modes here: > > 1. > Deadlock in intelfb_alloc failure path where it calls drm_framebuffer_remove, > which grabs the struct mutex and intelfb_create (caller of intelfb_alloc) was > already holding it. s/intelfb_alloc/intelfb_create/ s/(caller of intelfb_alloc)// > > 2. > Double unreference on the object if __intel_framebuffer_create fails since > both it and the caller (intelfb_alloc) do the unreference. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Fixes: 60a5ca015ffd2aacfe5674b5a401cd2a37159e07 Reported-By: Lukas Wunner <lukas@wunner.de> > --- > drivers/gpu/drm/i915/intel_fbdev.c | 21 +++++++++++++-------- > 1 file changed, 13 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c > index 6372cfc..b998f69 100644 > --- a/drivers/gpu/drm/i915/intel_fbdev.c > +++ b/drivers/gpu/drm/i915/intel_fbdev.c > @@ -141,7 +141,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, > { > struct intel_fbdev *ifbdev = > container_of(helper, struct intel_fbdev, helper); > - struct drm_framebuffer *fb; > + struct drm_framebuffer *fb = NULL; > struct drm_device *dev = helper->dev; > struct drm_mode_fb_cmd2 mode_cmd = {}; > struct drm_i915_gem_object *obj; > @@ -159,6 +159,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper, > mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, > sizes->surface_depth); > > + mutex_lock(&dev->struct_mutex); > + > size = mode_cmd.pitches[0] * mode_cmd.height; > size = PAGE_ALIGN(size); > obj = i915_gem_object_create_stolen(dev, size); > @@ -172,8 +174,9 @@ static int intelfb_alloc(struct drm_fb_helper *helper, > > fb = __intel_framebuffer_create(dev, &mode_cmd, obj); > if (IS_ERR(fb)) { > + /* Drops object reference on failure. */ > ret = PTR_ERR(fb); > - goto out_unref; > + goto out; > } > > /* Flush everything out, we'll be doing GTT only from now on */ > @@ -183,15 +186,18 @@ static int intelfb_alloc(struct drm_fb_helper *helper, > goto out_fb; > } > > + mutex_unlock(&dev->struct_mutex); > + > ifbdev->fb = to_intel_framebuffer(fb); > > return 0; > > out_fb: > - drm_framebuffer_remove(fb); > -out_unref: > drm_gem_object_unreference(&obj->base); > out: > + mutex_unlock(&dev->struct_mutex); > + if (fb) > + drm_framebuffer_remove(fb); Hm, the order of drm_gem_object_unreference() and drm_framebuffer_remove() is reversed now, could this cause any issues? Apart from that, Reviewed-By: Lukas Wunner <lukas@wunner.de> I will also test the patch and report back in a bit. Thanks a lot, Lukas > return ret; > } > > @@ -209,8 +215,6 @@ static int intelfb_create(struct drm_fb_helper *helper, > int size, ret; > bool prealloc = false; > > - mutex_lock(&dev->struct_mutex); > - > if (intel_fb && > (sizes->fb_width > intel_fb->base.width || > sizes->fb_height > intel_fb->base.height)) { > @@ -225,7 +229,7 @@ static int intelfb_create(struct drm_fb_helper *helper, > DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n"); > ret = intelfb_alloc(helper, sizes); > if (ret) > - goto out_unlock; > + return ret; > intel_fb = ifbdev->fb; > } else { > DRM_DEBUG_KMS("re-using BIOS fb\n"); > @@ -237,6 +241,8 @@ static int intelfb_create(struct drm_fb_helper *helper, > obj = intel_fb->obj; > size = obj->base.size; > > + mutex_lock(&dev->struct_mutex); > + > info = framebuffer_alloc(0, &dev->pdev->dev); > if (!info) { > ret = -ENOMEM; > @@ -307,7 +313,6 @@ static int intelfb_create(struct drm_fb_helper *helper, > out_unpin: > i915_gem_object_ggtt_unpin(obj); > drm_gem_object_unreference(&obj->base); > -out_unlock: > mutex_unlock(&dev->struct_mutex); > return ret; > } > -- > 2.4.2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Hi, On 06/29/2015 03:39 PM, Lukas Wunner wrote: > Hi, > > On Mon, Jun 15, 2015 at 11:49:52AM +0100, Tvrtko Ursulin wrote: >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> We had two failure modes here: >> >> 1. >> Deadlock in intelfb_alloc failure path where it calls drm_framebuffer_remove, >> which grabs the struct mutex and intelfb_create (caller of intelfb_alloc) was >> already holding it. > > s/intelfb_alloc/intelfb_create/ > s/(caller of intelfb_alloc)// I looked again and don't see that I made a mistake with regards to this? >> 2. >> Double unreference on the object if __intel_framebuffer_create fails since >> both it and the caller (intelfb_alloc) do the unreference. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> > > Fixes: 60a5ca015ffd2aacfe5674b5a401cd2a37159e07 > Reported-By: Lukas Wunner <lukas@wunner.de> Sorry probably my oversight, don't remember now how it all came about. >> @@ -183,15 +186,18 @@ static int intelfb_alloc(struct drm_fb_helper *helper, >> goto out_fb; >> } >> >> + mutex_unlock(&dev->struct_mutex); >> + >> ifbdev->fb = to_intel_framebuffer(fb); >> >> return 0; >> >> out_fb: >> - drm_framebuffer_remove(fb); >> -out_unref: >> drm_gem_object_unreference(&obj->base); >> out: >> + mutex_unlock(&dev->struct_mutex); >> + if (fb) >> + drm_framebuffer_remove(fb); > > Hm, the order of drm_gem_object_unreference() and drm_framebuffer_remove() > is reversed now, could this cause any issues? No, that is fine. > Apart from that, > > Reviewed-By: Lukas Wunner <lukas@wunner.de> > > I will also test the patch and report back in a bit. That will be very useful. I did not test it extensively myself, just fired it off quickly since I was briefly hitting this failure path myself. Regards, Tvrtko
Hi Tvrtko, On Mon, Jun 29, 2015 at 04:15:08PM +0100, Tvrtko Ursulin wrote: > On 06/29/2015 03:39 PM, Lukas Wunner wrote: > >On Mon, Jun 15, 2015 at 11:49:52AM +0100, Tvrtko Ursulin wrote: > >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >> > >>We had two failure modes here: > >> > >>1. > >>Deadlock in intelfb_alloc failure path where it calls drm_framebuffer_remove, > >>which grabs the struct mutex and intelfb_create (caller of intelfb_alloc) was > >>already holding it. > > > >s/intelfb_alloc/intelfb_create/ > >s/(caller of intelfb_alloc)// > > I looked again and don't see that I made a mistake with regards to this? Sorry, I was confused myself: Apart from the two issues you mentioned in the commit message there's a third one fixed by this patch which could briefly be described as: "Deadlock in intelfb_create failure path where it calls drm_framebuffer_unreference, which grabs the struct mutex and intelfb_create was already holding it." Only that single issue was reported by me, the others were subsequently discovered by you or your colleagues. A somewhat more elaborate description is in my original report: http://lists.freedesktop.org/archives/intel-gfx/2015-June/067965.html > >I will also test the patch and report back in a bit. > > That will be very useful. I did not test it extensively myself, just fired > it off quickly since I was briefly hitting this failure path myself. Okay the patch works for me (in the specific scenario on the MacBook Pro where the existing fb is discarded because it is too small and a new one is allocated). PRTS did report some issue on Baytrail though: http://lists.freedesktop.org/archives/intel-gfx/2015-June/069978.html One nitpick is that your commit message exceeds 72 characters per line. Also, Ville had posted a suggestion which was not discussed further, removing the drm_gem_object_unreference() from __intel_framebuffer_create() in favor of the one you now removed from intelfb_alloc(). (Personally I'm totally indifferent on that.) Best regards, Lukas
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 6372cfc..b998f69 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -141,7 +141,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, { struct intel_fbdev *ifbdev = container_of(helper, struct intel_fbdev, helper); - struct drm_framebuffer *fb; + struct drm_framebuffer *fb = NULL; struct drm_device *dev = helper->dev; struct drm_mode_fb_cmd2 mode_cmd = {}; struct drm_i915_gem_object *obj; @@ -159,6 +159,8 @@ static int intelfb_alloc(struct drm_fb_helper *helper, mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); + mutex_lock(&dev->struct_mutex); + size = mode_cmd.pitches[0] * mode_cmd.height; size = PAGE_ALIGN(size); obj = i915_gem_object_create_stolen(dev, size); @@ -172,8 +174,9 @@ static int intelfb_alloc(struct drm_fb_helper *helper, fb = __intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) { + /* Drops object reference on failure. */ ret = PTR_ERR(fb); - goto out_unref; + goto out; } /* Flush everything out, we'll be doing GTT only from now on */ @@ -183,15 +186,18 @@ static int intelfb_alloc(struct drm_fb_helper *helper, goto out_fb; } + mutex_unlock(&dev->struct_mutex); + ifbdev->fb = to_intel_framebuffer(fb); return 0; out_fb: - drm_framebuffer_remove(fb); -out_unref: drm_gem_object_unreference(&obj->base); out: + mutex_unlock(&dev->struct_mutex); + if (fb) + drm_framebuffer_remove(fb); return ret; } @@ -209,8 +215,6 @@ static int intelfb_create(struct drm_fb_helper *helper, int size, ret; bool prealloc = false; - mutex_lock(&dev->struct_mutex); - if (intel_fb && (sizes->fb_width > intel_fb->base.width || sizes->fb_height > intel_fb->base.height)) { @@ -225,7 +229,7 @@ static int intelfb_create(struct drm_fb_helper *helper, DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n"); ret = intelfb_alloc(helper, sizes); if (ret) - goto out_unlock; + return ret; intel_fb = ifbdev->fb; } else { DRM_DEBUG_KMS("re-using BIOS fb\n"); @@ -237,6 +241,8 @@ static int intelfb_create(struct drm_fb_helper *helper, obj = intel_fb->obj; size = obj->base.size; + mutex_lock(&dev->struct_mutex); + info = framebuffer_alloc(0, &dev->pdev->dev); if (!info) { ret = -ENOMEM; @@ -307,7 +313,6 @@ static int intelfb_create(struct drm_fb_helper *helper, out_unpin: i915_gem_object_ggtt_unpin(obj); drm_gem_object_unreference(&obj->base); -out_unlock: mutex_unlock(&dev->struct_mutex); return ret; }