Message ID | 20170307205851.32578-3-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 07/03/2017 20:58, Chris Wilson wrote: > If we allow the user to convert a GTT mmap address into a userptr, we > may end up in recursion hell, where currently we hit a mutex deadlock > but other possibilities include use-after-free during the > unbind/cancel_userptr. > > [ 143.203989] gem_userptr_bli D 0 902 898 0x00000000 > [ 143.204054] Call Trace: > [ 143.204137] __schedule+0x511/0x1180 > [ 143.204195] ? pci_mmcfg_check_reserved+0xc0/0xc0 > [ 143.204274] schedule+0x57/0xe0 > [ 143.204327] schedule_timeout+0x383/0x670 > [ 143.204374] ? trace_hardirqs_on_caller+0x187/0x280 > [ 143.204457] ? trace_hardirqs_on_thunk+0x1a/0x1c > [ 143.204507] ? usleep_range+0x110/0x110 > [ 143.204657] ? irq_exit+0x89/0x100 > [ 143.204710] ? retint_kernel+0x2d/0x2d > [ 143.204794] ? trace_hardirqs_on_caller+0x187/0x280 > [ 143.204857] ? _raw_spin_unlock_irq+0x33/0x60 > [ 143.204944] wait_for_common+0x1f0/0x2f0 > [ 143.205006] ? out_of_line_wait_on_atomic_t+0x170/0x170 > [ 143.205103] ? wake_up_q+0xa0/0xa0 > [ 143.205159] ? flush_workqueue_prep_pwqs+0x15a/0x2c0 > [ 143.205237] wait_for_completion+0x1d/0x20 > [ 143.205292] flush_workqueue+0x2e9/0xbb0 > [ 143.205339] ? flush_workqueue+0x163/0xbb0 > [ 143.205418] ? __schedule+0x533/0x1180 > [ 143.205498] ? check_flush_dependency+0x1a0/0x1a0 > [ 143.205681] i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915] > [ 143.205865] ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915] > [ 143.205955] __mmu_notifier_invalidate_range_start+0xc6/0x120 > [ 143.206044] ? __mmu_notifier_invalidate_range_start+0x51/0x120 > [ 143.206123] zap_page_range_single+0x1c7/0x1f0 > [ 143.206171] ? unmap_single_vma+0x160/0x160 > [ 143.206260] ? unmap_mapping_range+0xa9/0x1b0 > [ 143.206308] ? vma_interval_tree_subtree_search+0x75/0xd0 > [ 143.206397] unmap_mapping_range+0x18f/0x1b0 > [ 143.206444] ? zap_vma_ptes+0x70/0x70 > [ 143.206524] ? __pm_runtime_resume+0x67/0xa0 > [ 143.206723] i915_gem_release_mmap+0x1ba/0x1c0 [i915] > [ 143.206846] i915_vma_unbind+0x5c2/0x690 [i915] > [ 143.206925] ? __lock_is_held+0x52/0x100 > [ 143.207076] i915_gem_object_set_tiling+0x1db/0x650 [i915] > [ 143.207236] i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915] > [ 143.207377] ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915] > [ 143.207457] drm_ioctl+0x36c/0x670 > [ 143.207535] ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30 > [ 143.207730] ? i915_gem_object_set_tiling+0x650/0x650 [i915] > [ 143.207793] ? drm_getunique+0x120/0x120 > [ 143.207875] ? __handle_mm_fault+0x996/0x14a0 > [ 143.207939] ? vm_insert_page+0x340/0x340 > [ 143.208028] ? up_write+0x28/0x50 > [ 143.208086] ? vm_mmap_pgoff+0x160/0x190 > [ 143.208163] do_vfs_ioctl+0x12c/0xa60 > [ 143.208218] ? debug_lockdep_rcu_enabled+0x35/0x40 > [ 143.208267] ? ioctl_preallocate+0x150/0x150 > [ 143.208353] ? __do_page_fault+0x36a/0x6e0 > [ 143.208400] ? mark_held_locks+0x23/0xc0 > [ 143.208479] ? up_read+0x1f/0x40 > [ 143.208526] ? entry_SYSCALL_64_fastpath+0x5/0xc6 > [ 143.208669] ? __fget_light+0xa7/0xc0 > [ 143.208747] SyS_ioctl+0x41/0x70 > > To prevent the possibility of a deadlock, we defer scheduling the worker > until after we have proven that given the current mm, the userptr range > does not overlap a GGTT mmaping. If another thread tries to remap the > GGTT over the userptr before the worker is scheduled, it will be stopped > by its invalidate-range flushing the current work, before the deadlock > can occur. > > v2: Improve discussion of how we end up in the deadlock. > > Reported-by: Michał Winiarski <michal.winiarski@intel.com> > Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Michał Winiarski <michal.winiarski@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++----------- > 1 file changed, 52 insertions(+), 24 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c > index dc9bf5282071..7addbf08bcb9 100644 > --- a/drivers/gpu/drm/i915/i915_gem_userptr.c > +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c > @@ -488,6 +488,36 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, > return ret; > } > > +static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm) > +{ > + const struct vm_operations_struct *gem_vm_ops = > + obj->base.dev->driver->gem_vm_ops; > + unsigned long addr = obj->userptr.ptr; > + const unsigned long end = addr + obj->base.size; > + struct vm_area_struct *vma; > + > + /* Check for a contiguous set of vma covering the userptr, if any > + * are absent, they will EFAULT. More importantly if any point back > + * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock > + * between the deferred gup of this userptr and the object being > + * unbound calling invalidate_range -> cancel_userptr. > + */ > + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { > + if (vma->vm_start > addr) /* gap */ > + break; If I understand this correctly it is checking for more than ggtt, so would be tempted to either remove those parts of the checks or remove the helper to something like invalid_backing_store. Also, again if I understand it correctly, if the backing store was made out of two VMAs then wouldn't this return a false positive? In which case the first option from above sounds better to me. Just checking if there is any overlap with the GTT mapped areas in this function and leave the holes etc to get_user_pages. > + > + if (vma->vm_end >= end) > + return false; > + > + if (vma->vm_ops == gem_vm_ops) /* GTT mmapping */ > + break; > + > + addr = vma->vm_end; > + } > + > + return true; > +} > + > static void > __i915_gem_userptr_get_pages_worker(struct work_struct *_work) > { > @@ -556,8 +586,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) > } > > static struct sg_table * > -__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, > - bool *active) > +__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) > { > struct get_pages_work *work; > > @@ -594,7 +623,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, > INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); > schedule_work(&work->work); > > - *active = true; > + __i915_gem_userptr_set_active(obj, true); > return ERR_PTR(-EAGAIN); > } > > @@ -602,10 +631,10 @@ static struct sg_table * > i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) > { > const int num_pages = obj->base.size >> PAGE_SHIFT; > + struct mm_struct *mm = obj->userptr.mm->mm; > struct page **pvec; > struct sg_table *pages; > - int pinned, ret; > - bool active; > + int pinned; > > /* If userspace should engineer that these pages are replaced in > * the vma between us binding this page into the GTT and completion > @@ -632,37 +661,36 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) > return ERR_PTR(-EAGAIN); > } > > - /* Let the mmu-notifier know that we have begun and need cancellation */ > - ret = __i915_gem_userptr_set_active(obj, true); > - if (ret) > - return ERR_PTR(ret); > - > pvec = NULL; > pinned = 0; > - if (obj->userptr.mm->mm == current->mm) { > - pvec = drm_malloc_gfp(num_pages, sizeof(struct page *), > - GFP_TEMPORARY); > - if (pvec == NULL) { > - __i915_gem_userptr_set_active(obj, false); > - return ERR_PTR(-ENOMEM); > - } > > - pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages, > - !obj->userptr.read_only, pvec); > + down_read(&mm->mmap_sem); > + if (unlikely(overlaps_ggtt(obj, mm))) { > + pinned = -EFAULT; > + } else if (mm == current->mm) { > + pvec = drm_malloc_gfp(num_pages, sizeof(struct page *), > + GFP_TEMPORARY | > + __GFP_NORETRY | > + __GFP_NOWARN); > + if (pvec) /* defer to worker if malloc fails */ > + pinned = __get_user_pages_fast(obj->userptr.ptr, > + num_pages, > + !obj->userptr.read_only, > + pvec); > } > > - active = false; > if (pinned < 0) > pages = ERR_PTR(pinned), pinned = 0; > else if (pinned < num_pages) > - pages = __i915_gem_userptr_get_pages_schedule(obj, &active); > + pages = __i915_gem_userptr_get_pages_schedule(obj); > else > pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages); I don't see _set_active on the fast gup path, maybe it is just too early? :) > - if (IS_ERR(pages)) { > - __i915_gem_userptr_set_active(obj, active); > + up_read(&mm->mmap_sem); > + > + if (IS_ERR(pages)) > release_pages(pvec, pinned, 0); > - } > drm_free_large(pvec); > + > return pages; > } > > Regards, Tvrtko
On Wed, Mar 08, 2017 at 08:25:12AM +0000, Tvrtko Ursulin wrote: > > On 07/03/2017 20:58, Chris Wilson wrote: > >If we allow the user to convert a GTT mmap address into a userptr, we > >may end up in recursion hell, where currently we hit a mutex deadlock > >but other possibilities include use-after-free during the > >unbind/cancel_userptr. > > > >[ 143.203989] gem_userptr_bli D 0 902 898 0x00000000 > >[ 143.204054] Call Trace: > >[ 143.204137] __schedule+0x511/0x1180 > >[ 143.204195] ? pci_mmcfg_check_reserved+0xc0/0xc0 > >[ 143.204274] schedule+0x57/0xe0 > >[ 143.204327] schedule_timeout+0x383/0x670 > >[ 143.204374] ? trace_hardirqs_on_caller+0x187/0x280 > >[ 143.204457] ? trace_hardirqs_on_thunk+0x1a/0x1c > >[ 143.204507] ? usleep_range+0x110/0x110 > >[ 143.204657] ? irq_exit+0x89/0x100 > >[ 143.204710] ? retint_kernel+0x2d/0x2d > >[ 143.204794] ? trace_hardirqs_on_caller+0x187/0x280 > >[ 143.204857] ? _raw_spin_unlock_irq+0x33/0x60 > >[ 143.204944] wait_for_common+0x1f0/0x2f0 > >[ 143.205006] ? out_of_line_wait_on_atomic_t+0x170/0x170 > >[ 143.205103] ? wake_up_q+0xa0/0xa0 > >[ 143.205159] ? flush_workqueue_prep_pwqs+0x15a/0x2c0 > >[ 143.205237] wait_for_completion+0x1d/0x20 > >[ 143.205292] flush_workqueue+0x2e9/0xbb0 > >[ 143.205339] ? flush_workqueue+0x163/0xbb0 > >[ 143.205418] ? __schedule+0x533/0x1180 > >[ 143.205498] ? check_flush_dependency+0x1a0/0x1a0 > >[ 143.205681] i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915] > >[ 143.205865] ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915] > >[ 143.205955] __mmu_notifier_invalidate_range_start+0xc6/0x120 > >[ 143.206044] ? __mmu_notifier_invalidate_range_start+0x51/0x120 > >[ 143.206123] zap_page_range_single+0x1c7/0x1f0 > >[ 143.206171] ? unmap_single_vma+0x160/0x160 > >[ 143.206260] ? unmap_mapping_range+0xa9/0x1b0 > >[ 143.206308] ? vma_interval_tree_subtree_search+0x75/0xd0 > >[ 143.206397] unmap_mapping_range+0x18f/0x1b0 > >[ 143.206444] ? zap_vma_ptes+0x70/0x70 > >[ 143.206524] ? __pm_runtime_resume+0x67/0xa0 > >[ 143.206723] i915_gem_release_mmap+0x1ba/0x1c0 [i915] > >[ 143.206846] i915_vma_unbind+0x5c2/0x690 [i915] > >[ 143.206925] ? __lock_is_held+0x52/0x100 > >[ 143.207076] i915_gem_object_set_tiling+0x1db/0x650 [i915] > >[ 143.207236] i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915] > >[ 143.207377] ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915] > >[ 143.207457] drm_ioctl+0x36c/0x670 > >[ 143.207535] ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30 > >[ 143.207730] ? i915_gem_object_set_tiling+0x650/0x650 [i915] > >[ 143.207793] ? drm_getunique+0x120/0x120 > >[ 143.207875] ? __handle_mm_fault+0x996/0x14a0 > >[ 143.207939] ? vm_insert_page+0x340/0x340 > >[ 143.208028] ? up_write+0x28/0x50 > >[ 143.208086] ? vm_mmap_pgoff+0x160/0x190 > >[ 143.208163] do_vfs_ioctl+0x12c/0xa60 > >[ 143.208218] ? debug_lockdep_rcu_enabled+0x35/0x40 > >[ 143.208267] ? ioctl_preallocate+0x150/0x150 > >[ 143.208353] ? __do_page_fault+0x36a/0x6e0 > >[ 143.208400] ? mark_held_locks+0x23/0xc0 > >[ 143.208479] ? up_read+0x1f/0x40 > >[ 143.208526] ? entry_SYSCALL_64_fastpath+0x5/0xc6 > >[ 143.208669] ? __fget_light+0xa7/0xc0 > >[ 143.208747] SyS_ioctl+0x41/0x70 > > > >To prevent the possibility of a deadlock, we defer scheduling the worker > >until after we have proven that given the current mm, the userptr range > >does not overlap a GGTT mmaping. If another thread tries to remap the > >GGTT over the userptr before the worker is scheduled, it will be stopped > >by its invalidate-range flushing the current work, before the deadlock > >can occur. > > > >v2: Improve discussion of how we end up in the deadlock. > > > >Reported-by: Michał Winiarski <michal.winiarski@intel.com> > >Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup > >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > >Cc: Michał Winiarski <michal.winiarski@intel.com> > >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >--- > > drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++----------- > > 1 file changed, 52 insertions(+), 24 deletions(-) > > > >diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c > >index dc9bf5282071..7addbf08bcb9 100644 > >--- a/drivers/gpu/drm/i915/i915_gem_userptr.c > >+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c > >@@ -488,6 +488,36 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, > > return ret; > > } > > > >+static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm) > >+{ > >+ const struct vm_operations_struct *gem_vm_ops = > >+ obj->base.dev->driver->gem_vm_ops; > >+ unsigned long addr = obj->userptr.ptr; > >+ const unsigned long end = addr + obj->base.size; > >+ struct vm_area_struct *vma; > >+ > >+ /* Check for a contiguous set of vma covering the userptr, if any > >+ * are absent, they will EFAULT. More importantly if any point back > >+ * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock > >+ * between the deferred gup of this userptr and the object being > >+ * unbound calling invalidate_range -> cancel_userptr. > >+ */ > >+ for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { > >+ if (vma->vm_start > addr) /* gap */ > >+ break; > > If I understand this correctly it is checking for more than ggtt, so > would be tempted to either remove those parts of the checks or > remove the helper to something like invalid_backing_store. > > Also, again if I understand it correctly, if the backing store was > made out of two VMAs then wouldn't this return a false positive? No. The vmas have to be contiguous or else the range covers an absent page (which will EFAULT). > In which case the first option from above sounds better to me. Just > checking if there is any overlap with the GTT mapped areas in this > function and leave the holes etc to get_user_pages. It's just whilst I was here the contiguity check falls out :) Since we also have to have the !vma check. > >- active = false; > > if (pinned < 0) > > pages = ERR_PTR(pinned), pinned = 0; > > else if (pinned < num_pages) > >- pages = __i915_gem_userptr_get_pages_schedule(obj, &active); > >+ pages = __i915_gem_userptr_get_pages_schedule(obj); > > else > > pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages); > > I don't see _set_active on the fast gup path, maybe it is just too early? :) Drat. No, I was just thinking about the worker. -Chris
On 08/03/2017 10:01, Chris Wilson wrote: > On Wed, Mar 08, 2017 at 08:25:12AM +0000, Tvrtko Ursulin wrote: >> >> On 07/03/2017 20:58, Chris Wilson wrote: >>> If we allow the user to convert a GTT mmap address into a userptr, we >>> may end up in recursion hell, where currently we hit a mutex deadlock >>> but other possibilities include use-after-free during the >>> unbind/cancel_userptr. >>> >>> [ 143.203989] gem_userptr_bli D 0 902 898 0x00000000 >>> [ 143.204054] Call Trace: >>> [ 143.204137] __schedule+0x511/0x1180 >>> [ 143.204195] ? pci_mmcfg_check_reserved+0xc0/0xc0 >>> [ 143.204274] schedule+0x57/0xe0 >>> [ 143.204327] schedule_timeout+0x383/0x670 >>> [ 143.204374] ? trace_hardirqs_on_caller+0x187/0x280 >>> [ 143.204457] ? trace_hardirqs_on_thunk+0x1a/0x1c >>> [ 143.204507] ? usleep_range+0x110/0x110 >>> [ 143.204657] ? irq_exit+0x89/0x100 >>> [ 143.204710] ? retint_kernel+0x2d/0x2d >>> [ 143.204794] ? trace_hardirqs_on_caller+0x187/0x280 >>> [ 143.204857] ? _raw_spin_unlock_irq+0x33/0x60 >>> [ 143.204944] wait_for_common+0x1f0/0x2f0 >>> [ 143.205006] ? out_of_line_wait_on_atomic_t+0x170/0x170 >>> [ 143.205103] ? wake_up_q+0xa0/0xa0 >>> [ 143.205159] ? flush_workqueue_prep_pwqs+0x15a/0x2c0 >>> [ 143.205237] wait_for_completion+0x1d/0x20 >>> [ 143.205292] flush_workqueue+0x2e9/0xbb0 >>> [ 143.205339] ? flush_workqueue+0x163/0xbb0 >>> [ 143.205418] ? __schedule+0x533/0x1180 >>> [ 143.205498] ? check_flush_dependency+0x1a0/0x1a0 >>> [ 143.205681] i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915] >>> [ 143.205865] ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915] >>> [ 143.205955] __mmu_notifier_invalidate_range_start+0xc6/0x120 >>> [ 143.206044] ? __mmu_notifier_invalidate_range_start+0x51/0x120 >>> [ 143.206123] zap_page_range_single+0x1c7/0x1f0 >>> [ 143.206171] ? unmap_single_vma+0x160/0x160 >>> [ 143.206260] ? unmap_mapping_range+0xa9/0x1b0 >>> [ 143.206308] ? vma_interval_tree_subtree_search+0x75/0xd0 >>> [ 143.206397] unmap_mapping_range+0x18f/0x1b0 >>> [ 143.206444] ? zap_vma_ptes+0x70/0x70 >>> [ 143.206524] ? __pm_runtime_resume+0x67/0xa0 >>> [ 143.206723] i915_gem_release_mmap+0x1ba/0x1c0 [i915] >>> [ 143.206846] i915_vma_unbind+0x5c2/0x690 [i915] >>> [ 143.206925] ? __lock_is_held+0x52/0x100 >>> [ 143.207076] i915_gem_object_set_tiling+0x1db/0x650 [i915] >>> [ 143.207236] i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915] >>> [ 143.207377] ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915] >>> [ 143.207457] drm_ioctl+0x36c/0x670 >>> [ 143.207535] ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30 >>> [ 143.207730] ? i915_gem_object_set_tiling+0x650/0x650 [i915] >>> [ 143.207793] ? drm_getunique+0x120/0x120 >>> [ 143.207875] ? __handle_mm_fault+0x996/0x14a0 >>> [ 143.207939] ? vm_insert_page+0x340/0x340 >>> [ 143.208028] ? up_write+0x28/0x50 >>> [ 143.208086] ? vm_mmap_pgoff+0x160/0x190 >>> [ 143.208163] do_vfs_ioctl+0x12c/0xa60 >>> [ 143.208218] ? debug_lockdep_rcu_enabled+0x35/0x40 >>> [ 143.208267] ? ioctl_preallocate+0x150/0x150 >>> [ 143.208353] ? __do_page_fault+0x36a/0x6e0 >>> [ 143.208400] ? mark_held_locks+0x23/0xc0 >>> [ 143.208479] ? up_read+0x1f/0x40 >>> [ 143.208526] ? entry_SYSCALL_64_fastpath+0x5/0xc6 >>> [ 143.208669] ? __fget_light+0xa7/0xc0 >>> [ 143.208747] SyS_ioctl+0x41/0x70 >>> >>> To prevent the possibility of a deadlock, we defer scheduling the worker >>> until after we have proven that given the current mm, the userptr range >>> does not overlap a GGTT mmaping. If another thread tries to remap the >>> GGTT over the userptr before the worker is scheduled, it will be stopped >>> by its invalidate-range flushing the current work, before the deadlock >>> can occur. >>> >>> v2: Improve discussion of how we end up in the deadlock. >>> >>> Reported-by: Michał Winiarski <michal.winiarski@intel.com> >>> Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>> Cc: Michał Winiarski <michal.winiarski@intel.com> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>> --- >>> drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++----------- >>> 1 file changed, 52 insertions(+), 24 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c >>> index dc9bf5282071..7addbf08bcb9 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c >>> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c >>> @@ -488,6 +488,36 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, >>> return ret; >>> } >>> >>> +static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm) >>> +{ >>> + const struct vm_operations_struct *gem_vm_ops = >>> + obj->base.dev->driver->gem_vm_ops; >>> + unsigned long addr = obj->userptr.ptr; >>> + const unsigned long end = addr + obj->base.size; >>> + struct vm_area_struct *vma; >>> + >>> + /* Check for a contiguous set of vma covering the userptr, if any >>> + * are absent, they will EFAULT. More importantly if any point back >>> + * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock >>> + * between the deferred gup of this userptr and the object being >>> + * unbound calling invalidate_range -> cancel_userptr. >>> + */ >>> + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { >>> + if (vma->vm_start > addr) /* gap */ >>> + break; >> >> If I understand this correctly it is checking for more than ggtt, so >> would be tempted to either remove those parts of the checks or >> remove the helper to something like invalid_backing_store. >> >> Also, again if I understand it correctly, if the backing store was >> made out of two VMAs then wouldn't this return a false positive? > > No. The vmas have to be contiguous or else the range covers an absent > page (which will EFAULT). I assume you imply there cannot be two adjacent VMAs, that the core would merge them? But I am thinking there could be two non-mergeable ones if the backing store was different. Not 100% sure, this is just my intuition talking. But it would still be a valid userptr object, one half backed with anon pages, another say file backed. Regards, Tvrtko
On Wed, Mar 08, 2017 at 01:04:29PM +0000, Tvrtko Ursulin wrote: > > On 08/03/2017 10:01, Chris Wilson wrote: > >On Wed, Mar 08, 2017 at 08:25:12AM +0000, Tvrtko Ursulin wrote: > >> > >>On 07/03/2017 20:58, Chris Wilson wrote: > >>>If we allow the user to convert a GTT mmap address into a userptr, we > >>>may end up in recursion hell, where currently we hit a mutex deadlock > >>>but other possibilities include use-after-free during the > >>>unbind/cancel_userptr. > >>> > >>>[ 143.203989] gem_userptr_bli D 0 902 898 0x00000000 > >>>[ 143.204054] Call Trace: > >>>[ 143.204137] __schedule+0x511/0x1180 > >>>[ 143.204195] ? pci_mmcfg_check_reserved+0xc0/0xc0 > >>>[ 143.204274] schedule+0x57/0xe0 > >>>[ 143.204327] schedule_timeout+0x383/0x670 > >>>[ 143.204374] ? trace_hardirqs_on_caller+0x187/0x280 > >>>[ 143.204457] ? trace_hardirqs_on_thunk+0x1a/0x1c > >>>[ 143.204507] ? usleep_range+0x110/0x110 > >>>[ 143.204657] ? irq_exit+0x89/0x100 > >>>[ 143.204710] ? retint_kernel+0x2d/0x2d > >>>[ 143.204794] ? trace_hardirqs_on_caller+0x187/0x280 > >>>[ 143.204857] ? _raw_spin_unlock_irq+0x33/0x60 > >>>[ 143.204944] wait_for_common+0x1f0/0x2f0 > >>>[ 143.205006] ? out_of_line_wait_on_atomic_t+0x170/0x170 > >>>[ 143.205103] ? wake_up_q+0xa0/0xa0 > >>>[ 143.205159] ? flush_workqueue_prep_pwqs+0x15a/0x2c0 > >>>[ 143.205237] wait_for_completion+0x1d/0x20 > >>>[ 143.205292] flush_workqueue+0x2e9/0xbb0 > >>>[ 143.205339] ? flush_workqueue+0x163/0xbb0 > >>>[ 143.205418] ? __schedule+0x533/0x1180 > >>>[ 143.205498] ? check_flush_dependency+0x1a0/0x1a0 > >>>[ 143.205681] i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915] > >>>[ 143.205865] ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915] > >>>[ 143.205955] __mmu_notifier_invalidate_range_start+0xc6/0x120 > >>>[ 143.206044] ? __mmu_notifier_invalidate_range_start+0x51/0x120 > >>>[ 143.206123] zap_page_range_single+0x1c7/0x1f0 > >>>[ 143.206171] ? unmap_single_vma+0x160/0x160 > >>>[ 143.206260] ? unmap_mapping_range+0xa9/0x1b0 > >>>[ 143.206308] ? vma_interval_tree_subtree_search+0x75/0xd0 > >>>[ 143.206397] unmap_mapping_range+0x18f/0x1b0 > >>>[ 143.206444] ? zap_vma_ptes+0x70/0x70 > >>>[ 143.206524] ? __pm_runtime_resume+0x67/0xa0 > >>>[ 143.206723] i915_gem_release_mmap+0x1ba/0x1c0 [i915] > >>>[ 143.206846] i915_vma_unbind+0x5c2/0x690 [i915] > >>>[ 143.206925] ? __lock_is_held+0x52/0x100 > >>>[ 143.207076] i915_gem_object_set_tiling+0x1db/0x650 [i915] > >>>[ 143.207236] i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915] > >>>[ 143.207377] ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915] > >>>[ 143.207457] drm_ioctl+0x36c/0x670 > >>>[ 143.207535] ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30 > >>>[ 143.207730] ? i915_gem_object_set_tiling+0x650/0x650 [i915] > >>>[ 143.207793] ? drm_getunique+0x120/0x120 > >>>[ 143.207875] ? __handle_mm_fault+0x996/0x14a0 > >>>[ 143.207939] ? vm_insert_page+0x340/0x340 > >>>[ 143.208028] ? up_write+0x28/0x50 > >>>[ 143.208086] ? vm_mmap_pgoff+0x160/0x190 > >>>[ 143.208163] do_vfs_ioctl+0x12c/0xa60 > >>>[ 143.208218] ? debug_lockdep_rcu_enabled+0x35/0x40 > >>>[ 143.208267] ? ioctl_preallocate+0x150/0x150 > >>>[ 143.208353] ? __do_page_fault+0x36a/0x6e0 > >>>[ 143.208400] ? mark_held_locks+0x23/0xc0 > >>>[ 143.208479] ? up_read+0x1f/0x40 > >>>[ 143.208526] ? entry_SYSCALL_64_fastpath+0x5/0xc6 > >>>[ 143.208669] ? __fget_light+0xa7/0xc0 > >>>[ 143.208747] SyS_ioctl+0x41/0x70 > >>> > >>>To prevent the possibility of a deadlock, we defer scheduling the worker > >>>until after we have proven that given the current mm, the userptr range > >>>does not overlap a GGTT mmaping. If another thread tries to remap the > >>>GGTT over the userptr before the worker is scheduled, it will be stopped > >>>by its invalidate-range flushing the current work, before the deadlock > >>>can occur. > >>> > >>>v2: Improve discussion of how we end up in the deadlock. > >>> > >>>Reported-by: Michał Winiarski <michal.winiarski@intel.com> > >>>Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup > >>>Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > >>>Cc: Michał Winiarski <michal.winiarski@intel.com> > >>>Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >>>--- > >>>drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++----------- > >>>1 file changed, 52 insertions(+), 24 deletions(-) > >>> > >>>diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c > >>>index dc9bf5282071..7addbf08bcb9 100644 > >>>--- a/drivers/gpu/drm/i915/i915_gem_userptr.c > >>>+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c > >>>@@ -488,6 +488,36 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, > >>> return ret; > >>>} > >>> > >>>+static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm) > >>>+{ > >>>+ const struct vm_operations_struct *gem_vm_ops = > >>>+ obj->base.dev->driver->gem_vm_ops; > >>>+ unsigned long addr = obj->userptr.ptr; > >>>+ const unsigned long end = addr + obj->base.size; > >>>+ struct vm_area_struct *vma; > >>>+ > >>>+ /* Check for a contiguous set of vma covering the userptr, if any > >>>+ * are absent, they will EFAULT. More importantly if any point back > >>>+ * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock > >>>+ * between the deferred gup of this userptr and the object being > >>>+ * unbound calling invalidate_range -> cancel_userptr. > >>>+ */ > >>>+ for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { > >>>+ if (vma->vm_start > addr) /* gap */ > >>>+ break; > >> > >>If I understand this correctly it is checking for more than ggtt, so > >>would be tempted to either remove those parts of the checks or > >>remove the helper to something like invalid_backing_store. > >> > >>Also, again if I understand it correctly, if the backing store was > >>made out of two VMAs then wouldn't this return a false positive? > > > >No. The vmas have to be contiguous or else the range covers an absent > >page (which will EFAULT). > > I assume you imply there cannot be two adjacent VMAs, that the core > would merge them? But I am thinking there could be two non-mergeable > ones if the backing store was different. Not 100% sure, this is just > my intuition talking. But it would still be a valid userptr object, > one half backed with anon pages, another say file backed. There can be two adjacent vmas: vma->vm_next->vm_start == vma->vm_end. There can't be overlapping ones. We are testing whether vma->vm_next->vm_start > vma->vm_end which implies a hole, for which there can be no struct pages. -Chris
On 08/03/2017 13:10, Chris Wilson wrote: > On Wed, Mar 08, 2017 at 01:04:29PM +0000, Tvrtko Ursulin wrote: >> >> On 08/03/2017 10:01, Chris Wilson wrote: >>> On Wed, Mar 08, 2017 at 08:25:12AM +0000, Tvrtko Ursulin wrote: >>>> >>>> On 07/03/2017 20:58, Chris Wilson wrote: >>>>> If we allow the user to convert a GTT mmap address into a userptr, we >>>>> may end up in recursion hell, where currently we hit a mutex deadlock >>>>> but other possibilities include use-after-free during the >>>>> unbind/cancel_userptr. >>>>> >>>>> [ 143.203989] gem_userptr_bli D 0 902 898 0x00000000 >>>>> [ 143.204054] Call Trace: >>>>> [ 143.204137] __schedule+0x511/0x1180 >>>>> [ 143.204195] ? pci_mmcfg_check_reserved+0xc0/0xc0 >>>>> [ 143.204274] schedule+0x57/0xe0 >>>>> [ 143.204327] schedule_timeout+0x383/0x670 >>>>> [ 143.204374] ? trace_hardirqs_on_caller+0x187/0x280 >>>>> [ 143.204457] ? trace_hardirqs_on_thunk+0x1a/0x1c >>>>> [ 143.204507] ? usleep_range+0x110/0x110 >>>>> [ 143.204657] ? irq_exit+0x89/0x100 >>>>> [ 143.204710] ? retint_kernel+0x2d/0x2d >>>>> [ 143.204794] ? trace_hardirqs_on_caller+0x187/0x280 >>>>> [ 143.204857] ? _raw_spin_unlock_irq+0x33/0x60 >>>>> [ 143.204944] wait_for_common+0x1f0/0x2f0 >>>>> [ 143.205006] ? out_of_line_wait_on_atomic_t+0x170/0x170 >>>>> [ 143.205103] ? wake_up_q+0xa0/0xa0 >>>>> [ 143.205159] ? flush_workqueue_prep_pwqs+0x15a/0x2c0 >>>>> [ 143.205237] wait_for_completion+0x1d/0x20 >>>>> [ 143.205292] flush_workqueue+0x2e9/0xbb0 >>>>> [ 143.205339] ? flush_workqueue+0x163/0xbb0 >>>>> [ 143.205418] ? __schedule+0x533/0x1180 >>>>> [ 143.205498] ? check_flush_dependency+0x1a0/0x1a0 >>>>> [ 143.205681] i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915] >>>>> [ 143.205865] ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915] >>>>> [ 143.205955] __mmu_notifier_invalidate_range_start+0xc6/0x120 >>>>> [ 143.206044] ? __mmu_notifier_invalidate_range_start+0x51/0x120 >>>>> [ 143.206123] zap_page_range_single+0x1c7/0x1f0 >>>>> [ 143.206171] ? unmap_single_vma+0x160/0x160 >>>>> [ 143.206260] ? unmap_mapping_range+0xa9/0x1b0 >>>>> [ 143.206308] ? vma_interval_tree_subtree_search+0x75/0xd0 >>>>> [ 143.206397] unmap_mapping_range+0x18f/0x1b0 >>>>> [ 143.206444] ? zap_vma_ptes+0x70/0x70 >>>>> [ 143.206524] ? __pm_runtime_resume+0x67/0xa0 >>>>> [ 143.206723] i915_gem_release_mmap+0x1ba/0x1c0 [i915] >>>>> [ 143.206846] i915_vma_unbind+0x5c2/0x690 [i915] >>>>> [ 143.206925] ? __lock_is_held+0x52/0x100 >>>>> [ 143.207076] i915_gem_object_set_tiling+0x1db/0x650 [i915] >>>>> [ 143.207236] i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915] >>>>> [ 143.207377] ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915] >>>>> [ 143.207457] drm_ioctl+0x36c/0x670 >>>>> [ 143.207535] ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30 >>>>> [ 143.207730] ? i915_gem_object_set_tiling+0x650/0x650 [i915] >>>>> [ 143.207793] ? drm_getunique+0x120/0x120 >>>>> [ 143.207875] ? __handle_mm_fault+0x996/0x14a0 >>>>> [ 143.207939] ? vm_insert_page+0x340/0x340 >>>>> [ 143.208028] ? up_write+0x28/0x50 >>>>> [ 143.208086] ? vm_mmap_pgoff+0x160/0x190 >>>>> [ 143.208163] do_vfs_ioctl+0x12c/0xa60 >>>>> [ 143.208218] ? debug_lockdep_rcu_enabled+0x35/0x40 >>>>> [ 143.208267] ? ioctl_preallocate+0x150/0x150 >>>>> [ 143.208353] ? __do_page_fault+0x36a/0x6e0 >>>>> [ 143.208400] ? mark_held_locks+0x23/0xc0 >>>>> [ 143.208479] ? up_read+0x1f/0x40 >>>>> [ 143.208526] ? entry_SYSCALL_64_fastpath+0x5/0xc6 >>>>> [ 143.208669] ? __fget_light+0xa7/0xc0 >>>>> [ 143.208747] SyS_ioctl+0x41/0x70 >>>>> >>>>> To prevent the possibility of a deadlock, we defer scheduling the worker >>>>> until after we have proven that given the current mm, the userptr range >>>>> does not overlap a GGTT mmaping. If another thread tries to remap the >>>>> GGTT over the userptr before the worker is scheduled, it will be stopped >>>>> by its invalidate-range flushing the current work, before the deadlock >>>>> can occur. >>>>> >>>>> v2: Improve discussion of how we end up in the deadlock. >>>>> >>>>> Reported-by: Michał Winiarski <michal.winiarski@intel.com> >>>>> Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup >>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>>>> Cc: Michał Winiarski <michal.winiarski@intel.com> >>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>>>> --- >>>>> drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++----------- >>>>> 1 file changed, 52 insertions(+), 24 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c >>>>> index dc9bf5282071..7addbf08bcb9 100644 >>>>> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c >>>>> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c >>>>> @@ -488,6 +488,36 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, >>>>> return ret; >>>>> } >>>>> >>>>> +static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm) >>>>> +{ >>>>> + const struct vm_operations_struct *gem_vm_ops = >>>>> + obj->base.dev->driver->gem_vm_ops; >>>>> + unsigned long addr = obj->userptr.ptr; >>>>> + const unsigned long end = addr + obj->base.size; >>>>> + struct vm_area_struct *vma; >>>>> + >>>>> + /* Check for a contiguous set of vma covering the userptr, if any >>>>> + * are absent, they will EFAULT. More importantly if any point back >>>>> + * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock >>>>> + * between the deferred gup of this userptr and the object being >>>>> + * unbound calling invalidate_range -> cancel_userptr. >>>>> + */ >>>>> + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { >>>>> + if (vma->vm_start > addr) /* gap */ >>>>> + break; >>>> >>>> If I understand this correctly it is checking for more than ggtt, so >>>> would be tempted to either remove those parts of the checks or >>>> remove the helper to something like invalid_backing_store. >>>> >>>> Also, again if I understand it correctly, if the backing store was >>>> made out of two VMAs then wouldn't this return a false positive? >>> >>> No. The vmas have to be contiguous or else the range covers an absent >>> page (which will EFAULT). >> >> I assume you imply there cannot be two adjacent VMAs, that the core >> would merge them? But I am thinking there could be two non-mergeable >> ones if the backing store was different. Not 100% sure, this is just >> my intuition talking. But it would still be a valid userptr object, >> one half backed with anon pages, another say file backed. > > There can be two adjacent vmas: vma->vm_next->vm_start == vma->vm_end. > There can't be overlapping ones. > > We are testing whether vma->vm_next->vm_start > vma->vm_end which > implies a hole, for which there can be no struct pages. Bah sorry, I'm jumping between things and did not register the addr assignment at the end of the loop. Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index dc9bf5282071..7addbf08bcb9 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -488,6 +488,36 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, return ret; } +static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm) +{ + const struct vm_operations_struct *gem_vm_ops = + obj->base.dev->driver->gem_vm_ops; + unsigned long addr = obj->userptr.ptr; + const unsigned long end = addr + obj->base.size; + struct vm_area_struct *vma; + + /* Check for a contiguous set of vma covering the userptr, if any + * are absent, they will EFAULT. More importantly if any point back + * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock + * between the deferred gup of this userptr and the object being + * unbound calling invalidate_range -> cancel_userptr. + */ + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + if (vma->vm_start > addr) /* gap */ + break; + + if (vma->vm_end >= end) + return false; + + if (vma->vm_ops == gem_vm_ops) /* GTT mmapping */ + break; + + addr = vma->vm_end; + } + + return true; +} + static void __i915_gem_userptr_get_pages_worker(struct work_struct *_work) { @@ -556,8 +586,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) } static struct sg_table * -__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, - bool *active) +__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) { struct get_pages_work *work; @@ -594,7 +623,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); schedule_work(&work->work); - *active = true; + __i915_gem_userptr_set_active(obj, true); return ERR_PTR(-EAGAIN); } @@ -602,10 +631,10 @@ static struct sg_table * i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const int num_pages = obj->base.size >> PAGE_SHIFT; + struct mm_struct *mm = obj->userptr.mm->mm; struct page **pvec; struct sg_table *pages; - int pinned, ret; - bool active; + int pinned; /* If userspace should engineer that these pages are replaced in * the vma between us binding this page into the GTT and completion @@ -632,37 +661,36 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) return ERR_PTR(-EAGAIN); } - /* Let the mmu-notifier know that we have begun and need cancellation */ - ret = __i915_gem_userptr_set_active(obj, true); - if (ret) - return ERR_PTR(ret); - pvec = NULL; pinned = 0; - if (obj->userptr.mm->mm == current->mm) { - pvec = drm_malloc_gfp(num_pages, sizeof(struct page *), - GFP_TEMPORARY); - if (pvec == NULL) { - __i915_gem_userptr_set_active(obj, false); - return ERR_PTR(-ENOMEM); - } - pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages, - !obj->userptr.read_only, pvec); + down_read(&mm->mmap_sem); + if (unlikely(overlaps_ggtt(obj, mm))) { + pinned = -EFAULT; + } else if (mm == current->mm) { + pvec = drm_malloc_gfp(num_pages, sizeof(struct page *), + GFP_TEMPORARY | + __GFP_NORETRY | + __GFP_NOWARN); + if (pvec) /* defer to worker if malloc fails */ + pinned = __get_user_pages_fast(obj->userptr.ptr, + num_pages, + !obj->userptr.read_only, + pvec); } - active = false; if (pinned < 0) pages = ERR_PTR(pinned), pinned = 0; else if (pinned < num_pages) - pages = __i915_gem_userptr_get_pages_schedule(obj, &active); + pages = __i915_gem_userptr_get_pages_schedule(obj); else pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages); - if (IS_ERR(pages)) { - __i915_gem_userptr_set_active(obj, active); + up_read(&mm->mmap_sem); + + if (IS_ERR(pages)) release_pages(pvec, pinned, 0); - } drm_free_large(pvec); + return pages; }
If we allow the user to convert a GTT mmap address into a userptr, we may end up in recursion hell, where currently we hit a mutex deadlock but other possibilities include use-after-free during the unbind/cancel_userptr. [ 143.203989] gem_userptr_bli D 0 902 898 0x00000000 [ 143.204054] Call Trace: [ 143.204137] __schedule+0x511/0x1180 [ 143.204195] ? pci_mmcfg_check_reserved+0xc0/0xc0 [ 143.204274] schedule+0x57/0xe0 [ 143.204327] schedule_timeout+0x383/0x670 [ 143.204374] ? trace_hardirqs_on_caller+0x187/0x280 [ 143.204457] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 143.204507] ? usleep_range+0x110/0x110 [ 143.204657] ? irq_exit+0x89/0x100 [ 143.204710] ? retint_kernel+0x2d/0x2d [ 143.204794] ? trace_hardirqs_on_caller+0x187/0x280 [ 143.204857] ? _raw_spin_unlock_irq+0x33/0x60 [ 143.204944] wait_for_common+0x1f0/0x2f0 [ 143.205006] ? out_of_line_wait_on_atomic_t+0x170/0x170 [ 143.205103] ? wake_up_q+0xa0/0xa0 [ 143.205159] ? flush_workqueue_prep_pwqs+0x15a/0x2c0 [ 143.205237] wait_for_completion+0x1d/0x20 [ 143.205292] flush_workqueue+0x2e9/0xbb0 [ 143.205339] ? flush_workqueue+0x163/0xbb0 [ 143.205418] ? __schedule+0x533/0x1180 [ 143.205498] ? check_flush_dependency+0x1a0/0x1a0 [ 143.205681] i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915] [ 143.205865] ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915] [ 143.205955] __mmu_notifier_invalidate_range_start+0xc6/0x120 [ 143.206044] ? __mmu_notifier_invalidate_range_start+0x51/0x120 [ 143.206123] zap_page_range_single+0x1c7/0x1f0 [ 143.206171] ? unmap_single_vma+0x160/0x160 [ 143.206260] ? unmap_mapping_range+0xa9/0x1b0 [ 143.206308] ? vma_interval_tree_subtree_search+0x75/0xd0 [ 143.206397] unmap_mapping_range+0x18f/0x1b0 [ 143.206444] ? zap_vma_ptes+0x70/0x70 [ 143.206524] ? __pm_runtime_resume+0x67/0xa0 [ 143.206723] i915_gem_release_mmap+0x1ba/0x1c0 [i915] [ 143.206846] i915_vma_unbind+0x5c2/0x690 [i915] [ 143.206925] ? __lock_is_held+0x52/0x100 [ 143.207076] i915_gem_object_set_tiling+0x1db/0x650 [i915] [ 143.207236] i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915] [ 143.207377] ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915] [ 143.207457] drm_ioctl+0x36c/0x670 [ 143.207535] ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30 [ 143.207730] ? i915_gem_object_set_tiling+0x650/0x650 [i915] [ 143.207793] ? drm_getunique+0x120/0x120 [ 143.207875] ? __handle_mm_fault+0x996/0x14a0 [ 143.207939] ? vm_insert_page+0x340/0x340 [ 143.208028] ? up_write+0x28/0x50 [ 143.208086] ? vm_mmap_pgoff+0x160/0x190 [ 143.208163] do_vfs_ioctl+0x12c/0xa60 [ 143.208218] ? debug_lockdep_rcu_enabled+0x35/0x40 [ 143.208267] ? ioctl_preallocate+0x150/0x150 [ 143.208353] ? __do_page_fault+0x36a/0x6e0 [ 143.208400] ? mark_held_locks+0x23/0xc0 [ 143.208479] ? up_read+0x1f/0x40 [ 143.208526] ? entry_SYSCALL_64_fastpath+0x5/0xc6 [ 143.208669] ? __fget_light+0xa7/0xc0 [ 143.208747] SyS_ioctl+0x41/0x70 To prevent the possibility of a deadlock, we defer scheduling the worker until after we have proven that given the current mm, the userptr range does not overlap a GGTT mmaping. If another thread tries to remap the GGTT over the userptr before the worker is scheduled, it will be stopped by its invalidate-range flushing the current work, before the deadlock can occur. v2: Improve discussion of how we end up in the deadlock. Reported-by: Michał Winiarski <michal.winiarski@intel.com> Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Michał Winiarski <michal.winiarski@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 24 deletions(-)