diff mbox

[v2,3/3] drm/i915/userptr: Disallow wrapping GTT into a userptr

Message ID 20170307205851.32578-3-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson March 7, 2017, 8:58 p.m. UTC
If we allow the user to convert a GTT mmap address into a userptr, we
may end up in recursion hell, where currently we hit a mutex deadlock
but other possibilities include use-after-free during the
unbind/cancel_userptr.

[  143.203989] gem_userptr_bli D    0   902    898 0x00000000
[  143.204054] Call Trace:
[  143.204137]  __schedule+0x511/0x1180
[  143.204195]  ? pci_mmcfg_check_reserved+0xc0/0xc0
[  143.204274]  schedule+0x57/0xe0
[  143.204327]  schedule_timeout+0x383/0x670
[  143.204374]  ? trace_hardirqs_on_caller+0x187/0x280
[  143.204457]  ? trace_hardirqs_on_thunk+0x1a/0x1c
[  143.204507]  ? usleep_range+0x110/0x110
[  143.204657]  ? irq_exit+0x89/0x100
[  143.204710]  ? retint_kernel+0x2d/0x2d
[  143.204794]  ? trace_hardirqs_on_caller+0x187/0x280
[  143.204857]  ? _raw_spin_unlock_irq+0x33/0x60
[  143.204944]  wait_for_common+0x1f0/0x2f0
[  143.205006]  ? out_of_line_wait_on_atomic_t+0x170/0x170
[  143.205103]  ? wake_up_q+0xa0/0xa0
[  143.205159]  ? flush_workqueue_prep_pwqs+0x15a/0x2c0
[  143.205237]  wait_for_completion+0x1d/0x20
[  143.205292]  flush_workqueue+0x2e9/0xbb0
[  143.205339]  ? flush_workqueue+0x163/0xbb0
[  143.205418]  ? __schedule+0x533/0x1180
[  143.205498]  ? check_flush_dependency+0x1a0/0x1a0
[  143.205681]  i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915]
[  143.205865]  ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915]
[  143.205955]  __mmu_notifier_invalidate_range_start+0xc6/0x120
[  143.206044]  ? __mmu_notifier_invalidate_range_start+0x51/0x120
[  143.206123]  zap_page_range_single+0x1c7/0x1f0
[  143.206171]  ? unmap_single_vma+0x160/0x160
[  143.206260]  ? unmap_mapping_range+0xa9/0x1b0
[  143.206308]  ? vma_interval_tree_subtree_search+0x75/0xd0
[  143.206397]  unmap_mapping_range+0x18f/0x1b0
[  143.206444]  ? zap_vma_ptes+0x70/0x70
[  143.206524]  ? __pm_runtime_resume+0x67/0xa0
[  143.206723]  i915_gem_release_mmap+0x1ba/0x1c0 [i915]
[  143.206846]  i915_vma_unbind+0x5c2/0x690 [i915]
[  143.206925]  ? __lock_is_held+0x52/0x100
[  143.207076]  i915_gem_object_set_tiling+0x1db/0x650 [i915]
[  143.207236]  i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915]
[  143.207377]  ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915]
[  143.207457]  drm_ioctl+0x36c/0x670
[  143.207535]  ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30
[  143.207730]  ? i915_gem_object_set_tiling+0x650/0x650 [i915]
[  143.207793]  ? drm_getunique+0x120/0x120
[  143.207875]  ? __handle_mm_fault+0x996/0x14a0
[  143.207939]  ? vm_insert_page+0x340/0x340
[  143.208028]  ? up_write+0x28/0x50
[  143.208086]  ? vm_mmap_pgoff+0x160/0x190
[  143.208163]  do_vfs_ioctl+0x12c/0xa60
[  143.208218]  ? debug_lockdep_rcu_enabled+0x35/0x40
[  143.208267]  ? ioctl_preallocate+0x150/0x150
[  143.208353]  ? __do_page_fault+0x36a/0x6e0
[  143.208400]  ? mark_held_locks+0x23/0xc0
[  143.208479]  ? up_read+0x1f/0x40
[  143.208526]  ? entry_SYSCALL_64_fastpath+0x5/0xc6
[  143.208669]  ? __fget_light+0xa7/0xc0
[  143.208747]  SyS_ioctl+0x41/0x70

To prevent the possibility of a deadlock, we defer scheduling the worker
until after we have proven that given the current mm, the userptr range
does not overlap a GGTT mmaping. If another thread tries to remap the
GGTT over the userptr before the worker is scheduled, it will be stopped
by its invalidate-range flushing the current work, before the deadlock
can occur.

v2: Improve discussion of how we end up in the deadlock.

Reported-by: Michał Winiarski <michal.winiarski@intel.com>
Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++-----------
 1 file changed, 52 insertions(+), 24 deletions(-)

Comments

Tvrtko Ursulin March 8, 2017, 8:25 a.m. UTC | #1
On 07/03/2017 20:58, Chris Wilson wrote:
> If we allow the user to convert a GTT mmap address into a userptr, we
> may end up in recursion hell, where currently we hit a mutex deadlock
> but other possibilities include use-after-free during the
> unbind/cancel_userptr.
>
> [  143.203989] gem_userptr_bli D    0   902    898 0x00000000
> [  143.204054] Call Trace:
> [  143.204137]  __schedule+0x511/0x1180
> [  143.204195]  ? pci_mmcfg_check_reserved+0xc0/0xc0
> [  143.204274]  schedule+0x57/0xe0
> [  143.204327]  schedule_timeout+0x383/0x670
> [  143.204374]  ? trace_hardirqs_on_caller+0x187/0x280
> [  143.204457]  ? trace_hardirqs_on_thunk+0x1a/0x1c
> [  143.204507]  ? usleep_range+0x110/0x110
> [  143.204657]  ? irq_exit+0x89/0x100
> [  143.204710]  ? retint_kernel+0x2d/0x2d
> [  143.204794]  ? trace_hardirqs_on_caller+0x187/0x280
> [  143.204857]  ? _raw_spin_unlock_irq+0x33/0x60
> [  143.204944]  wait_for_common+0x1f0/0x2f0
> [  143.205006]  ? out_of_line_wait_on_atomic_t+0x170/0x170
> [  143.205103]  ? wake_up_q+0xa0/0xa0
> [  143.205159]  ? flush_workqueue_prep_pwqs+0x15a/0x2c0
> [  143.205237]  wait_for_completion+0x1d/0x20
> [  143.205292]  flush_workqueue+0x2e9/0xbb0
> [  143.205339]  ? flush_workqueue+0x163/0xbb0
> [  143.205418]  ? __schedule+0x533/0x1180
> [  143.205498]  ? check_flush_dependency+0x1a0/0x1a0
> [  143.205681]  i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915]
> [  143.205865]  ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915]
> [  143.205955]  __mmu_notifier_invalidate_range_start+0xc6/0x120
> [  143.206044]  ? __mmu_notifier_invalidate_range_start+0x51/0x120
> [  143.206123]  zap_page_range_single+0x1c7/0x1f0
> [  143.206171]  ? unmap_single_vma+0x160/0x160
> [  143.206260]  ? unmap_mapping_range+0xa9/0x1b0
> [  143.206308]  ? vma_interval_tree_subtree_search+0x75/0xd0
> [  143.206397]  unmap_mapping_range+0x18f/0x1b0
> [  143.206444]  ? zap_vma_ptes+0x70/0x70
> [  143.206524]  ? __pm_runtime_resume+0x67/0xa0
> [  143.206723]  i915_gem_release_mmap+0x1ba/0x1c0 [i915]
> [  143.206846]  i915_vma_unbind+0x5c2/0x690 [i915]
> [  143.206925]  ? __lock_is_held+0x52/0x100
> [  143.207076]  i915_gem_object_set_tiling+0x1db/0x650 [i915]
> [  143.207236]  i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915]
> [  143.207377]  ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915]
> [  143.207457]  drm_ioctl+0x36c/0x670
> [  143.207535]  ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30
> [  143.207730]  ? i915_gem_object_set_tiling+0x650/0x650 [i915]
> [  143.207793]  ? drm_getunique+0x120/0x120
> [  143.207875]  ? __handle_mm_fault+0x996/0x14a0
> [  143.207939]  ? vm_insert_page+0x340/0x340
> [  143.208028]  ? up_write+0x28/0x50
> [  143.208086]  ? vm_mmap_pgoff+0x160/0x190
> [  143.208163]  do_vfs_ioctl+0x12c/0xa60
> [  143.208218]  ? debug_lockdep_rcu_enabled+0x35/0x40
> [  143.208267]  ? ioctl_preallocate+0x150/0x150
> [  143.208353]  ? __do_page_fault+0x36a/0x6e0
> [  143.208400]  ? mark_held_locks+0x23/0xc0
> [  143.208479]  ? up_read+0x1f/0x40
> [  143.208526]  ? entry_SYSCALL_64_fastpath+0x5/0xc6
> [  143.208669]  ? __fget_light+0xa7/0xc0
> [  143.208747]  SyS_ioctl+0x41/0x70
>
> To prevent the possibility of a deadlock, we defer scheduling the worker
> until after we have proven that given the current mm, the userptr range
> does not overlap a GGTT mmaping. If another thread tries to remap the
> GGTT over the userptr before the worker is scheduled, it will be stopped
> by its invalidate-range flushing the current work, before the deadlock
> can occur.
>
> v2: Improve discussion of how we end up in the deadlock.
>
> Reported-by: Michał Winiarski <michal.winiarski@intel.com>
> Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++-----------
>  1 file changed, 52 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index dc9bf5282071..7addbf08bcb9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -488,6 +488,36 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
>  	return ret;
>  }
>
> +static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm)
> +{
> +	const struct vm_operations_struct *gem_vm_ops =
> +		obj->base.dev->driver->gem_vm_ops;
> +	unsigned long addr = obj->userptr.ptr;
> +	const unsigned long end = addr + obj->base.size;
> +	struct vm_area_struct *vma;
> +
> +	/* Check for a contiguous set of vma covering the userptr, if any
> +	 * are absent, they will EFAULT. More importantly if any point back
> +	 * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock
> +	 * between the deferred gup of this userptr and the object being
> +	 * unbound calling invalidate_range -> cancel_userptr.
> +	 */
> +	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
> +		if (vma->vm_start > addr) /* gap */
> +			break;

If I understand this correctly it is checking for more than ggtt, so 
would be tempted to either remove those parts of the checks or remove 
the helper to something like invalid_backing_store.

Also, again if I understand it correctly, if the backing store was made 
out of two VMAs then wouldn't this return a false positive?

In which case the first option from above sounds better to me. Just 
checking if there is any overlap with the GTT mapped areas in this 
function and leave the holes etc to get_user_pages.

> +
> +		if (vma->vm_end >= end)
> +			return false;
> +
> +		if (vma->vm_ops == gem_vm_ops) /* GTT mmapping */
> +			break;
> +
> +		addr = vma->vm_end;
> +	}
> +
> +	return true;
> +}
> +
>  static void
>  __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>  {
> @@ -556,8 +586,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>  }
>
>  static struct sg_table *
> -__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
> -				      bool *active)
> +__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
>  {
>  	struct get_pages_work *work;
>
> @@ -594,7 +623,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
>  	INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
>  	schedule_work(&work->work);
>
> -	*active = true;
> +	__i915_gem_userptr_set_active(obj, true);
>  	return ERR_PTR(-EAGAIN);
>  }
>
> @@ -602,10 +631,10 @@ static struct sg_table *
>  i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>  {
>  	const int num_pages = obj->base.size >> PAGE_SHIFT;
> +	struct mm_struct *mm = obj->userptr.mm->mm;
>  	struct page **pvec;
>  	struct sg_table *pages;
> -	int pinned, ret;
> -	bool active;
> +	int pinned;
>
>  	/* If userspace should engineer that these pages are replaced in
>  	 * the vma between us binding this page into the GTT and completion
> @@ -632,37 +661,36 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>  			return ERR_PTR(-EAGAIN);
>  	}
>
> -	/* Let the mmu-notifier know that we have begun and need cancellation */
> -	ret = __i915_gem_userptr_set_active(obj, true);
> -	if (ret)
> -		return ERR_PTR(ret);
> -
>  	pvec = NULL;
>  	pinned = 0;
> -	if (obj->userptr.mm->mm == current->mm) {
> -		pvec = drm_malloc_gfp(num_pages, sizeof(struct page *),
> -				      GFP_TEMPORARY);
> -		if (pvec == NULL) {
> -			__i915_gem_userptr_set_active(obj, false);
> -			return ERR_PTR(-ENOMEM);
> -		}
>
> -		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
> -					       !obj->userptr.read_only, pvec);
> +	down_read(&mm->mmap_sem);
> +	if (unlikely(overlaps_ggtt(obj, mm))) {
> +		pinned = -EFAULT;
> +	} else if (mm == current->mm) {
> +		pvec = drm_malloc_gfp(num_pages, sizeof(struct page *),
> +				      GFP_TEMPORARY |
> +				      __GFP_NORETRY |
> +				      __GFP_NOWARN);
> +		if (pvec) /* defer to worker if malloc fails */
> +			pinned = __get_user_pages_fast(obj->userptr.ptr,
> +						       num_pages,
> +						       !obj->userptr.read_only,
> +						       pvec);
>  	}
>
> -	active = false;
>  	if (pinned < 0)
>  		pages = ERR_PTR(pinned), pinned = 0;
>  	else if (pinned < num_pages)
> -		pages = __i915_gem_userptr_get_pages_schedule(obj, &active);
> +		pages = __i915_gem_userptr_get_pages_schedule(obj);
>  	else
>  		pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages);

I don't see _set_active on the fast gup path, maybe it is just too early? :)

> -	if (IS_ERR(pages)) {
> -		__i915_gem_userptr_set_active(obj, active);
> +	up_read(&mm->mmap_sem);
> +
> +	if (IS_ERR(pages))
>  		release_pages(pvec, pinned, 0);
> -	}
>  	drm_free_large(pvec);
> +
>  	return pages;
>  }
>
>

Regards,

Tvrtko
Chris Wilson March 8, 2017, 10:01 a.m. UTC | #2
On Wed, Mar 08, 2017 at 08:25:12AM +0000, Tvrtko Ursulin wrote:
> 
> On 07/03/2017 20:58, Chris Wilson wrote:
> >If we allow the user to convert a GTT mmap address into a userptr, we
> >may end up in recursion hell, where currently we hit a mutex deadlock
> >but other possibilities include use-after-free during the
> >unbind/cancel_userptr.
> >
> >[  143.203989] gem_userptr_bli D    0   902    898 0x00000000
> >[  143.204054] Call Trace:
> >[  143.204137]  __schedule+0x511/0x1180
> >[  143.204195]  ? pci_mmcfg_check_reserved+0xc0/0xc0
> >[  143.204274]  schedule+0x57/0xe0
> >[  143.204327]  schedule_timeout+0x383/0x670
> >[  143.204374]  ? trace_hardirqs_on_caller+0x187/0x280
> >[  143.204457]  ? trace_hardirqs_on_thunk+0x1a/0x1c
> >[  143.204507]  ? usleep_range+0x110/0x110
> >[  143.204657]  ? irq_exit+0x89/0x100
> >[  143.204710]  ? retint_kernel+0x2d/0x2d
> >[  143.204794]  ? trace_hardirqs_on_caller+0x187/0x280
> >[  143.204857]  ? _raw_spin_unlock_irq+0x33/0x60
> >[  143.204944]  wait_for_common+0x1f0/0x2f0
> >[  143.205006]  ? out_of_line_wait_on_atomic_t+0x170/0x170
> >[  143.205103]  ? wake_up_q+0xa0/0xa0
> >[  143.205159]  ? flush_workqueue_prep_pwqs+0x15a/0x2c0
> >[  143.205237]  wait_for_completion+0x1d/0x20
> >[  143.205292]  flush_workqueue+0x2e9/0xbb0
> >[  143.205339]  ? flush_workqueue+0x163/0xbb0
> >[  143.205418]  ? __schedule+0x533/0x1180
> >[  143.205498]  ? check_flush_dependency+0x1a0/0x1a0
> >[  143.205681]  i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915]
> >[  143.205865]  ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915]
> >[  143.205955]  __mmu_notifier_invalidate_range_start+0xc6/0x120
> >[  143.206044]  ? __mmu_notifier_invalidate_range_start+0x51/0x120
> >[  143.206123]  zap_page_range_single+0x1c7/0x1f0
> >[  143.206171]  ? unmap_single_vma+0x160/0x160
> >[  143.206260]  ? unmap_mapping_range+0xa9/0x1b0
> >[  143.206308]  ? vma_interval_tree_subtree_search+0x75/0xd0
> >[  143.206397]  unmap_mapping_range+0x18f/0x1b0
> >[  143.206444]  ? zap_vma_ptes+0x70/0x70
> >[  143.206524]  ? __pm_runtime_resume+0x67/0xa0
> >[  143.206723]  i915_gem_release_mmap+0x1ba/0x1c0 [i915]
> >[  143.206846]  i915_vma_unbind+0x5c2/0x690 [i915]
> >[  143.206925]  ? __lock_is_held+0x52/0x100
> >[  143.207076]  i915_gem_object_set_tiling+0x1db/0x650 [i915]
> >[  143.207236]  i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915]
> >[  143.207377]  ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915]
> >[  143.207457]  drm_ioctl+0x36c/0x670
> >[  143.207535]  ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30
> >[  143.207730]  ? i915_gem_object_set_tiling+0x650/0x650 [i915]
> >[  143.207793]  ? drm_getunique+0x120/0x120
> >[  143.207875]  ? __handle_mm_fault+0x996/0x14a0
> >[  143.207939]  ? vm_insert_page+0x340/0x340
> >[  143.208028]  ? up_write+0x28/0x50
> >[  143.208086]  ? vm_mmap_pgoff+0x160/0x190
> >[  143.208163]  do_vfs_ioctl+0x12c/0xa60
> >[  143.208218]  ? debug_lockdep_rcu_enabled+0x35/0x40
> >[  143.208267]  ? ioctl_preallocate+0x150/0x150
> >[  143.208353]  ? __do_page_fault+0x36a/0x6e0
> >[  143.208400]  ? mark_held_locks+0x23/0xc0
> >[  143.208479]  ? up_read+0x1f/0x40
> >[  143.208526]  ? entry_SYSCALL_64_fastpath+0x5/0xc6
> >[  143.208669]  ? __fget_light+0xa7/0xc0
> >[  143.208747]  SyS_ioctl+0x41/0x70
> >
> >To prevent the possibility of a deadlock, we defer scheduling the worker
> >until after we have proven that given the current mm, the userptr range
> >does not overlap a GGTT mmaping. If another thread tries to remap the
> >GGTT over the userptr before the worker is scheduled, it will be stopped
> >by its invalidate-range flushing the current work, before the deadlock
> >can occur.
> >
> >v2: Improve discussion of how we end up in the deadlock.
> >
> >Reported-by: Michał Winiarski <michal.winiarski@intel.com>
> >Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Michał Winiarski <michal.winiarski@intel.com>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >---
> > drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++-----------
> > 1 file changed, 52 insertions(+), 24 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> >index dc9bf5282071..7addbf08bcb9 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> >@@ -488,6 +488,36 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
> > 	return ret;
> > }
> >
> >+static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm)
> >+{
> >+	const struct vm_operations_struct *gem_vm_ops =
> >+		obj->base.dev->driver->gem_vm_ops;
> >+	unsigned long addr = obj->userptr.ptr;
> >+	const unsigned long end = addr + obj->base.size;
> >+	struct vm_area_struct *vma;
> >+
> >+	/* Check for a contiguous set of vma covering the userptr, if any
> >+	 * are absent, they will EFAULT. More importantly if any point back
> >+	 * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock
> >+	 * between the deferred gup of this userptr and the object being
> >+	 * unbound calling invalidate_range -> cancel_userptr.
> >+	 */
> >+	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
> >+		if (vma->vm_start > addr) /* gap */
> >+			break;
> 
> If I understand this correctly it is checking for more than ggtt, so
> would be tempted to either remove those parts of the checks or
> remove the helper to something like invalid_backing_store.
> 
> Also, again if I understand it correctly, if the backing store was
> made out of two VMAs then wouldn't this return a false positive?

No. The vmas have to be contiguous or else the range covers an absent
page (which will EFAULT).
 
> In which case the first option from above sounds better to me. Just
> checking if there is any overlap with the GTT mapped areas in this
> function and leave the holes etc to get_user_pages.

It's just whilst I was here the contiguity check falls out :) Since we
also have to have the !vma check.

> >-	active = false;
> > 	if (pinned < 0)
> > 		pages = ERR_PTR(pinned), pinned = 0;
> > 	else if (pinned < num_pages)
> >-		pages = __i915_gem_userptr_get_pages_schedule(obj, &active);
> >+		pages = __i915_gem_userptr_get_pages_schedule(obj);
> > 	else
> > 		pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
> 
> I don't see _set_active on the fast gup path, maybe it is just too early? :)

Drat. No, I was just thinking about the worker.
-Chris
Tvrtko Ursulin March 8, 2017, 1:04 p.m. UTC | #3
On 08/03/2017 10:01, Chris Wilson wrote:
> On Wed, Mar 08, 2017 at 08:25:12AM +0000, Tvrtko Ursulin wrote:
>>
>> On 07/03/2017 20:58, Chris Wilson wrote:
>>> If we allow the user to convert a GTT mmap address into a userptr, we
>>> may end up in recursion hell, where currently we hit a mutex deadlock
>>> but other possibilities include use-after-free during the
>>> unbind/cancel_userptr.
>>>
>>> [  143.203989] gem_userptr_bli D    0   902    898 0x00000000
>>> [  143.204054] Call Trace:
>>> [  143.204137]  __schedule+0x511/0x1180
>>> [  143.204195]  ? pci_mmcfg_check_reserved+0xc0/0xc0
>>> [  143.204274]  schedule+0x57/0xe0
>>> [  143.204327]  schedule_timeout+0x383/0x670
>>> [  143.204374]  ? trace_hardirqs_on_caller+0x187/0x280
>>> [  143.204457]  ? trace_hardirqs_on_thunk+0x1a/0x1c
>>> [  143.204507]  ? usleep_range+0x110/0x110
>>> [  143.204657]  ? irq_exit+0x89/0x100
>>> [  143.204710]  ? retint_kernel+0x2d/0x2d
>>> [  143.204794]  ? trace_hardirqs_on_caller+0x187/0x280
>>> [  143.204857]  ? _raw_spin_unlock_irq+0x33/0x60
>>> [  143.204944]  wait_for_common+0x1f0/0x2f0
>>> [  143.205006]  ? out_of_line_wait_on_atomic_t+0x170/0x170
>>> [  143.205103]  ? wake_up_q+0xa0/0xa0
>>> [  143.205159]  ? flush_workqueue_prep_pwqs+0x15a/0x2c0
>>> [  143.205237]  wait_for_completion+0x1d/0x20
>>> [  143.205292]  flush_workqueue+0x2e9/0xbb0
>>> [  143.205339]  ? flush_workqueue+0x163/0xbb0
>>> [  143.205418]  ? __schedule+0x533/0x1180
>>> [  143.205498]  ? check_flush_dependency+0x1a0/0x1a0
>>> [  143.205681]  i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915]
>>> [  143.205865]  ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915]
>>> [  143.205955]  __mmu_notifier_invalidate_range_start+0xc6/0x120
>>> [  143.206044]  ? __mmu_notifier_invalidate_range_start+0x51/0x120
>>> [  143.206123]  zap_page_range_single+0x1c7/0x1f0
>>> [  143.206171]  ? unmap_single_vma+0x160/0x160
>>> [  143.206260]  ? unmap_mapping_range+0xa9/0x1b0
>>> [  143.206308]  ? vma_interval_tree_subtree_search+0x75/0xd0
>>> [  143.206397]  unmap_mapping_range+0x18f/0x1b0
>>> [  143.206444]  ? zap_vma_ptes+0x70/0x70
>>> [  143.206524]  ? __pm_runtime_resume+0x67/0xa0
>>> [  143.206723]  i915_gem_release_mmap+0x1ba/0x1c0 [i915]
>>> [  143.206846]  i915_vma_unbind+0x5c2/0x690 [i915]
>>> [  143.206925]  ? __lock_is_held+0x52/0x100
>>> [  143.207076]  i915_gem_object_set_tiling+0x1db/0x650 [i915]
>>> [  143.207236]  i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915]
>>> [  143.207377]  ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915]
>>> [  143.207457]  drm_ioctl+0x36c/0x670
>>> [  143.207535]  ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30
>>> [  143.207730]  ? i915_gem_object_set_tiling+0x650/0x650 [i915]
>>> [  143.207793]  ? drm_getunique+0x120/0x120
>>> [  143.207875]  ? __handle_mm_fault+0x996/0x14a0
>>> [  143.207939]  ? vm_insert_page+0x340/0x340
>>> [  143.208028]  ? up_write+0x28/0x50
>>> [  143.208086]  ? vm_mmap_pgoff+0x160/0x190
>>> [  143.208163]  do_vfs_ioctl+0x12c/0xa60
>>> [  143.208218]  ? debug_lockdep_rcu_enabled+0x35/0x40
>>> [  143.208267]  ? ioctl_preallocate+0x150/0x150
>>> [  143.208353]  ? __do_page_fault+0x36a/0x6e0
>>> [  143.208400]  ? mark_held_locks+0x23/0xc0
>>> [  143.208479]  ? up_read+0x1f/0x40
>>> [  143.208526]  ? entry_SYSCALL_64_fastpath+0x5/0xc6
>>> [  143.208669]  ? __fget_light+0xa7/0xc0
>>> [  143.208747]  SyS_ioctl+0x41/0x70
>>>
>>> To prevent the possibility of a deadlock, we defer scheduling the worker
>>> until after we have proven that given the current mm, the userptr range
>>> does not overlap a GGTT mmaping. If another thread tries to remap the
>>> GGTT over the userptr before the worker is scheduled, it will be stopped
>>> by its invalidate-range flushing the current work, before the deadlock
>>> can occur.
>>>
>>> v2: Improve discussion of how we end up in the deadlock.
>>>
>>> Reported-by: Michał Winiarski <michal.winiarski@intel.com>
>>> Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Michał Winiarski <michal.winiarski@intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++-----------
>>> 1 file changed, 52 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
>>> index dc9bf5282071..7addbf08bcb9 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
>>> @@ -488,6 +488,36 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
>>> 	return ret;
>>> }
>>>
>>> +static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm)
>>> +{
>>> +	const struct vm_operations_struct *gem_vm_ops =
>>> +		obj->base.dev->driver->gem_vm_ops;
>>> +	unsigned long addr = obj->userptr.ptr;
>>> +	const unsigned long end = addr + obj->base.size;
>>> +	struct vm_area_struct *vma;
>>> +
>>> +	/* Check for a contiguous set of vma covering the userptr, if any
>>> +	 * are absent, they will EFAULT. More importantly if any point back
>>> +	 * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock
>>> +	 * between the deferred gup of this userptr and the object being
>>> +	 * unbound calling invalidate_range -> cancel_userptr.
>>> +	 */
>>> +	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
>>> +		if (vma->vm_start > addr) /* gap */
>>> +			break;
>>
>> If I understand this correctly it is checking for more than ggtt, so
>> would be tempted to either remove those parts of the checks or
>> remove the helper to something like invalid_backing_store.
>>
>> Also, again if I understand it correctly, if the backing store was
>> made out of two VMAs then wouldn't this return a false positive?
>
> No. The vmas have to be contiguous or else the range covers an absent
> page (which will EFAULT).

I assume you imply there cannot be two adjacent VMAs, that the core 
would merge them? But I am thinking there could be two non-mergeable 
ones if the backing store was different. Not 100% sure, this is just my 
intuition talking. But it would still be a valid userptr object, one 
half backed with anon pages, another say file backed.

Regards,

Tvrtko
Chris Wilson March 8, 2017, 1:10 p.m. UTC | #4
On Wed, Mar 08, 2017 at 01:04:29PM +0000, Tvrtko Ursulin wrote:
> 
> On 08/03/2017 10:01, Chris Wilson wrote:
> >On Wed, Mar 08, 2017 at 08:25:12AM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 07/03/2017 20:58, Chris Wilson wrote:
> >>>If we allow the user to convert a GTT mmap address into a userptr, we
> >>>may end up in recursion hell, where currently we hit a mutex deadlock
> >>>but other possibilities include use-after-free during the
> >>>unbind/cancel_userptr.
> >>>
> >>>[  143.203989] gem_userptr_bli D    0   902    898 0x00000000
> >>>[  143.204054] Call Trace:
> >>>[  143.204137]  __schedule+0x511/0x1180
> >>>[  143.204195]  ? pci_mmcfg_check_reserved+0xc0/0xc0
> >>>[  143.204274]  schedule+0x57/0xe0
> >>>[  143.204327]  schedule_timeout+0x383/0x670
> >>>[  143.204374]  ? trace_hardirqs_on_caller+0x187/0x280
> >>>[  143.204457]  ? trace_hardirqs_on_thunk+0x1a/0x1c
> >>>[  143.204507]  ? usleep_range+0x110/0x110
> >>>[  143.204657]  ? irq_exit+0x89/0x100
> >>>[  143.204710]  ? retint_kernel+0x2d/0x2d
> >>>[  143.204794]  ? trace_hardirqs_on_caller+0x187/0x280
> >>>[  143.204857]  ? _raw_spin_unlock_irq+0x33/0x60
> >>>[  143.204944]  wait_for_common+0x1f0/0x2f0
> >>>[  143.205006]  ? out_of_line_wait_on_atomic_t+0x170/0x170
> >>>[  143.205103]  ? wake_up_q+0xa0/0xa0
> >>>[  143.205159]  ? flush_workqueue_prep_pwqs+0x15a/0x2c0
> >>>[  143.205237]  wait_for_completion+0x1d/0x20
> >>>[  143.205292]  flush_workqueue+0x2e9/0xbb0
> >>>[  143.205339]  ? flush_workqueue+0x163/0xbb0
> >>>[  143.205418]  ? __schedule+0x533/0x1180
> >>>[  143.205498]  ? check_flush_dependency+0x1a0/0x1a0
> >>>[  143.205681]  i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915]
> >>>[  143.205865]  ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915]
> >>>[  143.205955]  __mmu_notifier_invalidate_range_start+0xc6/0x120
> >>>[  143.206044]  ? __mmu_notifier_invalidate_range_start+0x51/0x120
> >>>[  143.206123]  zap_page_range_single+0x1c7/0x1f0
> >>>[  143.206171]  ? unmap_single_vma+0x160/0x160
> >>>[  143.206260]  ? unmap_mapping_range+0xa9/0x1b0
> >>>[  143.206308]  ? vma_interval_tree_subtree_search+0x75/0xd0
> >>>[  143.206397]  unmap_mapping_range+0x18f/0x1b0
> >>>[  143.206444]  ? zap_vma_ptes+0x70/0x70
> >>>[  143.206524]  ? __pm_runtime_resume+0x67/0xa0
> >>>[  143.206723]  i915_gem_release_mmap+0x1ba/0x1c0 [i915]
> >>>[  143.206846]  i915_vma_unbind+0x5c2/0x690 [i915]
> >>>[  143.206925]  ? __lock_is_held+0x52/0x100
> >>>[  143.207076]  i915_gem_object_set_tiling+0x1db/0x650 [i915]
> >>>[  143.207236]  i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915]
> >>>[  143.207377]  ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915]
> >>>[  143.207457]  drm_ioctl+0x36c/0x670
> >>>[  143.207535]  ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30
> >>>[  143.207730]  ? i915_gem_object_set_tiling+0x650/0x650 [i915]
> >>>[  143.207793]  ? drm_getunique+0x120/0x120
> >>>[  143.207875]  ? __handle_mm_fault+0x996/0x14a0
> >>>[  143.207939]  ? vm_insert_page+0x340/0x340
> >>>[  143.208028]  ? up_write+0x28/0x50
> >>>[  143.208086]  ? vm_mmap_pgoff+0x160/0x190
> >>>[  143.208163]  do_vfs_ioctl+0x12c/0xa60
> >>>[  143.208218]  ? debug_lockdep_rcu_enabled+0x35/0x40
> >>>[  143.208267]  ? ioctl_preallocate+0x150/0x150
> >>>[  143.208353]  ? __do_page_fault+0x36a/0x6e0
> >>>[  143.208400]  ? mark_held_locks+0x23/0xc0
> >>>[  143.208479]  ? up_read+0x1f/0x40
> >>>[  143.208526]  ? entry_SYSCALL_64_fastpath+0x5/0xc6
> >>>[  143.208669]  ? __fget_light+0xa7/0xc0
> >>>[  143.208747]  SyS_ioctl+0x41/0x70
> >>>
> >>>To prevent the possibility of a deadlock, we defer scheduling the worker
> >>>until after we have proven that given the current mm, the userptr range
> >>>does not overlap a GGTT mmaping. If another thread tries to remap the
> >>>GGTT over the userptr before the worker is scheduled, it will be stopped
> >>>by its invalidate-range flushing the current work, before the deadlock
> >>>can occur.
> >>>
> >>>v2: Improve discussion of how we end up in the deadlock.
> >>>
> >>>Reported-by: Michał Winiarski <michal.winiarski@intel.com>
> >>>Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup
> >>>Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>Cc: Michał Winiarski <michal.winiarski@intel.com>
> >>>Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>---
> >>>drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++-----------
> >>>1 file changed, 52 insertions(+), 24 deletions(-)
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> >>>index dc9bf5282071..7addbf08bcb9 100644
> >>>--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> >>>+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> >>>@@ -488,6 +488,36 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
> >>>	return ret;
> >>>}
> >>>
> >>>+static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm)
> >>>+{
> >>>+	const struct vm_operations_struct *gem_vm_ops =
> >>>+		obj->base.dev->driver->gem_vm_ops;
> >>>+	unsigned long addr = obj->userptr.ptr;
> >>>+	const unsigned long end = addr + obj->base.size;
> >>>+	struct vm_area_struct *vma;
> >>>+
> >>>+	/* Check for a contiguous set of vma covering the userptr, if any
> >>>+	 * are absent, they will EFAULT. More importantly if any point back
> >>>+	 * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock
> >>>+	 * between the deferred gup of this userptr and the object being
> >>>+	 * unbound calling invalidate_range -> cancel_userptr.
> >>>+	 */
> >>>+	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
> >>>+		if (vma->vm_start > addr) /* gap */
> >>>+			break;
> >>
> >>If I understand this correctly it is checking for more than ggtt, so
> >>would be tempted to either remove those parts of the checks or
> >>remove the helper to something like invalid_backing_store.
> >>
> >>Also, again if I understand it correctly, if the backing store was
> >>made out of two VMAs then wouldn't this return a false positive?
> >
> >No. The vmas have to be contiguous or else the range covers an absent
> >page (which will EFAULT).
> 
> I assume you imply there cannot be two adjacent VMAs, that the core
> would merge them? But I am thinking there could be two non-mergeable
> ones if the backing store was different. Not 100% sure, this is just
> my intuition talking. But it would still be a valid userptr object,
> one half backed with anon pages, another say file backed.

There can be two adjacent vmas: vma->vm_next->vm_start == vma->vm_end.
There can't be overlapping ones.

We are testing whether vma->vm_next->vm_start > vma->vm_end which
implies a hole, for which there can be no struct pages.
-Chris
Tvrtko Ursulin March 8, 2017, 1:12 p.m. UTC | #5
On 08/03/2017 13:10, Chris Wilson wrote:
> On Wed, Mar 08, 2017 at 01:04:29PM +0000, Tvrtko Ursulin wrote:
>>
>> On 08/03/2017 10:01, Chris Wilson wrote:
>>> On Wed, Mar 08, 2017 at 08:25:12AM +0000, Tvrtko Ursulin wrote:
>>>>
>>>> On 07/03/2017 20:58, Chris Wilson wrote:
>>>>> If we allow the user to convert a GTT mmap address into a userptr, we
>>>>> may end up in recursion hell, where currently we hit a mutex deadlock
>>>>> but other possibilities include use-after-free during the
>>>>> unbind/cancel_userptr.
>>>>>
>>>>> [  143.203989] gem_userptr_bli D    0   902    898 0x00000000
>>>>> [  143.204054] Call Trace:
>>>>> [  143.204137]  __schedule+0x511/0x1180
>>>>> [  143.204195]  ? pci_mmcfg_check_reserved+0xc0/0xc0
>>>>> [  143.204274]  schedule+0x57/0xe0
>>>>> [  143.204327]  schedule_timeout+0x383/0x670
>>>>> [  143.204374]  ? trace_hardirqs_on_caller+0x187/0x280
>>>>> [  143.204457]  ? trace_hardirqs_on_thunk+0x1a/0x1c
>>>>> [  143.204507]  ? usleep_range+0x110/0x110
>>>>> [  143.204657]  ? irq_exit+0x89/0x100
>>>>> [  143.204710]  ? retint_kernel+0x2d/0x2d
>>>>> [  143.204794]  ? trace_hardirqs_on_caller+0x187/0x280
>>>>> [  143.204857]  ? _raw_spin_unlock_irq+0x33/0x60
>>>>> [  143.204944]  wait_for_common+0x1f0/0x2f0
>>>>> [  143.205006]  ? out_of_line_wait_on_atomic_t+0x170/0x170
>>>>> [  143.205103]  ? wake_up_q+0xa0/0xa0
>>>>> [  143.205159]  ? flush_workqueue_prep_pwqs+0x15a/0x2c0
>>>>> [  143.205237]  wait_for_completion+0x1d/0x20
>>>>> [  143.205292]  flush_workqueue+0x2e9/0xbb0
>>>>> [  143.205339]  ? flush_workqueue+0x163/0xbb0
>>>>> [  143.205418]  ? __schedule+0x533/0x1180
>>>>> [  143.205498]  ? check_flush_dependency+0x1a0/0x1a0
>>>>> [  143.205681]  i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915]
>>>>> [  143.205865]  ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915]
>>>>> [  143.205955]  __mmu_notifier_invalidate_range_start+0xc6/0x120
>>>>> [  143.206044]  ? __mmu_notifier_invalidate_range_start+0x51/0x120
>>>>> [  143.206123]  zap_page_range_single+0x1c7/0x1f0
>>>>> [  143.206171]  ? unmap_single_vma+0x160/0x160
>>>>> [  143.206260]  ? unmap_mapping_range+0xa9/0x1b0
>>>>> [  143.206308]  ? vma_interval_tree_subtree_search+0x75/0xd0
>>>>> [  143.206397]  unmap_mapping_range+0x18f/0x1b0
>>>>> [  143.206444]  ? zap_vma_ptes+0x70/0x70
>>>>> [  143.206524]  ? __pm_runtime_resume+0x67/0xa0
>>>>> [  143.206723]  i915_gem_release_mmap+0x1ba/0x1c0 [i915]
>>>>> [  143.206846]  i915_vma_unbind+0x5c2/0x690 [i915]
>>>>> [  143.206925]  ? __lock_is_held+0x52/0x100
>>>>> [  143.207076]  i915_gem_object_set_tiling+0x1db/0x650 [i915]
>>>>> [  143.207236]  i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915]
>>>>> [  143.207377]  ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915]
>>>>> [  143.207457]  drm_ioctl+0x36c/0x670
>>>>> [  143.207535]  ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30
>>>>> [  143.207730]  ? i915_gem_object_set_tiling+0x650/0x650 [i915]
>>>>> [  143.207793]  ? drm_getunique+0x120/0x120
>>>>> [  143.207875]  ? __handle_mm_fault+0x996/0x14a0
>>>>> [  143.207939]  ? vm_insert_page+0x340/0x340
>>>>> [  143.208028]  ? up_write+0x28/0x50
>>>>> [  143.208086]  ? vm_mmap_pgoff+0x160/0x190
>>>>> [  143.208163]  do_vfs_ioctl+0x12c/0xa60
>>>>> [  143.208218]  ? debug_lockdep_rcu_enabled+0x35/0x40
>>>>> [  143.208267]  ? ioctl_preallocate+0x150/0x150
>>>>> [  143.208353]  ? __do_page_fault+0x36a/0x6e0
>>>>> [  143.208400]  ? mark_held_locks+0x23/0xc0
>>>>> [  143.208479]  ? up_read+0x1f/0x40
>>>>> [  143.208526]  ? entry_SYSCALL_64_fastpath+0x5/0xc6
>>>>> [  143.208669]  ? __fget_light+0xa7/0xc0
>>>>> [  143.208747]  SyS_ioctl+0x41/0x70
>>>>>
>>>>> To prevent the possibility of a deadlock, we defer scheduling the worker
>>>>> until after we have proven that given the current mm, the userptr range
>>>>> does not overlap a GGTT mmaping. If another thread tries to remap the
>>>>> GGTT over the userptr before the worker is scheduled, it will be stopped
>>>>> by its invalidate-range flushing the current work, before the deadlock
>>>>> can occur.
>>>>>
>>>>> v2: Improve discussion of how we end up in the deadlock.
>>>>>
>>>>> Reported-by: Michał Winiarski <michal.winiarski@intel.com>
>>>>> Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Michał Winiarski <michal.winiarski@intel.com>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> ---
>>>>> drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++-----------
>>>>> 1 file changed, 52 insertions(+), 24 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
>>>>> index dc9bf5282071..7addbf08bcb9 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
>>>>> @@ -488,6 +488,36 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
>>>>> 	return ret;
>>>>> }
>>>>>
>>>>> +static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm)
>>>>> +{
>>>>> +	const struct vm_operations_struct *gem_vm_ops =
>>>>> +		obj->base.dev->driver->gem_vm_ops;
>>>>> +	unsigned long addr = obj->userptr.ptr;
>>>>> +	const unsigned long end = addr + obj->base.size;
>>>>> +	struct vm_area_struct *vma;
>>>>> +
>>>>> +	/* Check for a contiguous set of vma covering the userptr, if any
>>>>> +	 * are absent, they will EFAULT. More importantly if any point back
>>>>> +	 * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock
>>>>> +	 * between the deferred gup of this userptr and the object being
>>>>> +	 * unbound calling invalidate_range -> cancel_userptr.
>>>>> +	 */
>>>>> +	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
>>>>> +		if (vma->vm_start > addr) /* gap */
>>>>> +			break;
>>>>
>>>> If I understand this correctly it is checking for more than ggtt, so
>>>> would be tempted to either remove those parts of the checks or
>>>> remove the helper to something like invalid_backing_store.
>>>>
>>>> Also, again if I understand it correctly, if the backing store was
>>>> made out of two VMAs then wouldn't this return a false positive?
>>>
>>> No. The vmas have to be contiguous or else the range covers an absent
>>> page (which will EFAULT).
>>
>> I assume you imply there cannot be two adjacent VMAs, that the core
>> would merge them? But I am thinking there could be two non-mergeable
>> ones if the backing store was different. Not 100% sure, this is just
>> my intuition talking. But it would still be a valid userptr object,
>> one half backed with anon pages, another say file backed.
>
> There can be two adjacent vmas: vma->vm_next->vm_start == vma->vm_end.
> There can't be overlapping ones.
>
> We are testing whether vma->vm_next->vm_start > vma->vm_end which
> implies a hole, for which there can be no struct pages.

Bah sorry, I'm jumping between things and did not register the addr 
assignment at the end of the loop.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index dc9bf5282071..7addbf08bcb9 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -488,6 +488,36 @@  __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
 	return ret;
 }
 
+static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm)
+{
+	const struct vm_operations_struct *gem_vm_ops =
+		obj->base.dev->driver->gem_vm_ops;
+	unsigned long addr = obj->userptr.ptr;
+	const unsigned long end = addr + obj->base.size;
+	struct vm_area_struct *vma;
+
+	/* Check for a contiguous set of vma covering the userptr, if any
+	 * are absent, they will EFAULT. More importantly if any point back
+	 * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock
+	 * between the deferred gup of this userptr and the object being
+	 * unbound calling invalidate_range -> cancel_userptr.
+	 */
+	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
+		if (vma->vm_start > addr) /* gap */
+			break;
+
+		if (vma->vm_end >= end)
+			return false;
+
+		if (vma->vm_ops == gem_vm_ops) /* GTT mmapping */
+			break;
+
+		addr = vma->vm_end;
+	}
+
+	return true;
+}
+
 static void
 __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 {
@@ -556,8 +586,7 @@  __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 }
 
 static struct sg_table *
-__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
-				      bool *active)
+__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
 {
 	struct get_pages_work *work;
 
@@ -594,7 +623,7 @@  __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
 	INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
 	schedule_work(&work->work);
 
-	*active = true;
+	__i915_gem_userptr_set_active(obj, true);
 	return ERR_PTR(-EAGAIN);
 }
 
@@ -602,10 +631,10 @@  static struct sg_table *
 i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 {
 	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct mm_struct *mm = obj->userptr.mm->mm;
 	struct page **pvec;
 	struct sg_table *pages;
-	int pinned, ret;
-	bool active;
+	int pinned;
 
 	/* If userspace should engineer that these pages are replaced in
 	 * the vma between us binding this page into the GTT and completion
@@ -632,37 +661,36 @@  i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 			return ERR_PTR(-EAGAIN);
 	}
 
-	/* Let the mmu-notifier know that we have begun and need cancellation */
-	ret = __i915_gem_userptr_set_active(obj, true);
-	if (ret)
-		return ERR_PTR(ret);
-
 	pvec = NULL;
 	pinned = 0;
-	if (obj->userptr.mm->mm == current->mm) {
-		pvec = drm_malloc_gfp(num_pages, sizeof(struct page *),
-				      GFP_TEMPORARY);
-		if (pvec == NULL) {
-			__i915_gem_userptr_set_active(obj, false);
-			return ERR_PTR(-ENOMEM);
-		}
 
-		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
-					       !obj->userptr.read_only, pvec);
+	down_read(&mm->mmap_sem);
+	if (unlikely(overlaps_ggtt(obj, mm))) {
+		pinned = -EFAULT;
+	} else if (mm == current->mm) {
+		pvec = drm_malloc_gfp(num_pages, sizeof(struct page *),
+				      GFP_TEMPORARY |
+				      __GFP_NORETRY |
+				      __GFP_NOWARN);
+		if (pvec) /* defer to worker if malloc fails */
+			pinned = __get_user_pages_fast(obj->userptr.ptr,
+						       num_pages,
+						       !obj->userptr.read_only,
+						       pvec);
 	}
 
-	active = false;
 	if (pinned < 0)
 		pages = ERR_PTR(pinned), pinned = 0;
 	else if (pinned < num_pages)
-		pages = __i915_gem_userptr_get_pages_schedule(obj, &active);
+		pages = __i915_gem_userptr_get_pages_schedule(obj);
 	else
 		pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
-	if (IS_ERR(pages)) {
-		__i915_gem_userptr_set_active(obj, active);
+	up_read(&mm->mmap_sem);
+
+	if (IS_ERR(pages))
 		release_pages(pvec, pinned, 0);
-	}
 	drm_free_large(pvec);
+
 	return pages;
 }