diff mbox series

[02/46] drm/i915: Revoke mmaps and prevent access to fence registers across reset

Message ID 20190206130356.18771-3-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/46] drm/i915: Hack and slash, throttle execbuffer hogs | expand

Commit Message

Chris Wilson Feb. 6, 2019, 1:03 p.m. UTC
Previously, we were able to rely on the recursive properties of
struct_mutex to allow us to serialise revoking mmaps and reacquiring the
FENCE registers with them being clobbered over a global device reset.
I then proceeded to throw out the baby with the bath water in order to
pursue a struct_mutex-less reset.

Perusing LWN for alternative strategies, the dilemma on how to serialise
access to a global resource on one side was answered by
https://lwn.net/Articles/202847/ -- Sleepable RCU:

    1  int readside(void) {
    2      int idx;
    3      rcu_read_lock();
    4	   if (nomoresrcu) {
    5          rcu_read_unlock();
    6	       return -EINVAL;
    7      }
    8	   idx = srcu_read_lock(&ss);
    9	   rcu_read_unlock();
    10	   /* SRCU read-side critical section. */
    11	   srcu_read_unlock(&ss, idx);
    12	   return 0;
    13 }
    14
    15 void cleanup(void)
    16 {
    17     nomoresrcu = 1;
    18     synchronize_rcu();
    19     synchronize_srcu(&ss);
    20     cleanup_srcu_struct(&ss);
    21 }

No more worrying about stop_machine, just an uber-complex mutex,
optimised for reads, with the overhead pushed to the rare reset path.

However, we do run the risk of a deadlock as we allocate underneath the
SRCU read lock, and the allocation may require a GPU reset, causing a
dependency cycle via the in-flight requests. We resolve that by declaring
the driver wedged and cancelling all in-flight rendering.

v2: Use expedited rcu barriers to match our earlier timing
characteristics.
v3: Try to annotate locking contexts for sparse
v4: Reduce selftest lock duration to avoid a reset deadlock with fences
v5: s/srcu/reset_backoff_srcu/

Testcase: igt/gem_mmap_gtt/hang
Fixes: eb8d0f5af4ec ("drm/i915: Remove GPU reset dependence on struct_mutex")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c           |  12 +-
 drivers/gpu/drm/i915/i915_drv.h               |  18 +--
 drivers/gpu/drm/i915/i915_gem.c               |  56 +++------
 drivers/gpu/drm/i915/i915_gem_fence_reg.c     |  31 +----
 drivers/gpu/drm/i915/i915_gpu_error.h         |  12 +-
 drivers/gpu/drm/i915/i915_reset.c             | 109 +++++++++++-------
 drivers/gpu/drm/i915/i915_reset.h             |   4 +
 .../gpu/drm/i915/selftests/intel_hangcheck.c  |   5 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |   1 +
 9 files changed, 109 insertions(+), 139 deletions(-)

Comments

Mika Kuoppala Feb. 6, 2019, 3:56 p.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Previously, we were able to rely on the recursive properties of
> struct_mutex to allow us to serialise revoking mmaps and reacquiring the
> FENCE registers with them being clobbered over a global device reset.
> I then proceeded to throw out the baby with the bath water in order to
> pursue a struct_mutex-less reset.
>
> Perusing LWN for alternative strategies, the dilemma on how to serialise
> access to a global resource on one side was answered by
> https://lwn.net/Articles/202847/ -- Sleepable RCU:
>
>     1  int readside(void) {
>     2      int idx;
>     3      rcu_read_lock();
>     4	   if (nomoresrcu) {
>     5          rcu_read_unlock();
>     6	       return -EINVAL;
>     7      }
>     8	   idx = srcu_read_lock(&ss);
>     9	   rcu_read_unlock();
>     10	   /* SRCU read-side critical section. */
>     11	   srcu_read_unlock(&ss, idx);
>     12	   return 0;
>     13 }
>     14
>     15 void cleanup(void)
>     16 {
>     17     nomoresrcu = 1;
>     18     synchronize_rcu();
>     19     synchronize_srcu(&ss);
>     20     cleanup_srcu_struct(&ss);
>     21 }
>
> No more worrying about stop_machine, just an uber-complex mutex,
> optimised for reads, with the overhead pushed to the rare reset path.
>
> However, we do run the risk of a deadlock as we allocate underneath the
> SRCU read lock, and the allocation may require a GPU reset, causing a
> dependency cycle via the in-flight requests. We resolve that by declaring
> the driver wedged and cancelling all in-flight rendering.
>
> v2: Use expedited rcu barriers to match our earlier timing
> characteristics.
> v3: Try to annotate locking contexts for sparse
> v4: Reduce selftest lock duration to avoid a reset deadlock with fences
> v5: s/srcu/reset_backoff_srcu/
>
> Testcase: igt/gem_mmap_gtt/hang
> Fixes: eb8d0f5af4ec ("drm/i915: Remove GPU reset dependence on struct_mutex")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c           |  12 +-
>  drivers/gpu/drm/i915/i915_drv.h               |  18 +--
>  drivers/gpu/drm/i915/i915_gem.c               |  56 +++------
>  drivers/gpu/drm/i915/i915_gem_fence_reg.c     |  31 +----
>  drivers/gpu/drm/i915/i915_gpu_error.h         |  12 +-
>  drivers/gpu/drm/i915/i915_reset.c             | 109 +++++++++++-------
>  drivers/gpu/drm/i915/i915_reset.h             |   4 +
>  .../gpu/drm/i915/selftests/intel_hangcheck.c  |   5 +-
>  .../gpu/drm/i915/selftests/mock_gem_device.c  |   1 +
>  9 files changed, 109 insertions(+), 139 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 0bd890c04fe4..a6fd157b1637 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1281,14 +1281,11 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  	intel_wakeref_t wakeref;
>  	enum intel_engine_id id;
>  
> +	seq_printf(m, "Reset flags: %lx\n", dev_priv->gpu_error.flags);
>  	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
> -		seq_puts(m, "Wedged\n");
> +		seq_puts(m, "\tWedged\n");
>  	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
> -		seq_puts(m, "Reset in progress: struct_mutex backoff\n");
> -	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
> -		seq_puts(m, "Waiter holding struct mutex\n");
> -	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
> -		seq_puts(m, "struct_mutex blocked for reset\n");
> +		seq_puts(m, "\tDevice (global) reset in progress\n");
>  
>  	if (!i915_modparams.enable_hangcheck) {
>  		seq_puts(m, "Hangcheck disabled\n");
> @@ -3885,9 +3882,6 @@ i915_wedged_set(void *data, u64 val)
>  	 * while it is writing to 'i915_wedged'
>  	 */
>  
> -	if (i915_reset_backoff(&i915->gpu_error))
> -		return -EAGAIN;
> -
>  	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
>  			  "Manually set wedged engine mask = %llx", val);
>  	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a2293152cb6a..37230ae7fbe6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2989,7 +2989,12 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
>  	i915_gem_object_unpin_pages(obj);
>  }
>  
> -int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
> +static inline int __must_check
> +i915_mutex_lock_interruptible(struct drm_device *dev)
> +{
> +	return mutex_lock_interruptible(&dev->struct_mutex);
> +}
> +
>  int i915_gem_dumb_create(struct drm_file *file_priv,
>  			 struct drm_device *dev,
>  			 struct drm_mode_create_dumb *args);
> @@ -3006,21 +3011,11 @@ int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
>  struct i915_request *
>  i915_gem_find_active_request(struct intel_engine_cs *engine);
>  
> -static inline bool i915_reset_backoff(struct i915_gpu_error *error)
> -{
> -	return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
> -}
> -
>  static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
>  {
>  	return unlikely(test_bit(I915_WEDGED, &error->flags));
>  }
>  
> -static inline bool i915_reset_backoff_or_wedged(struct i915_gpu_error *error)
> -{
> -	return i915_reset_backoff(error) | i915_terminally_wedged(error);
> -}
> -
>  static inline u32 i915_reset_count(struct i915_gpu_error *error)
>  {
>  	return READ_ONCE(error->reset_count);
> @@ -3093,7 +3088,6 @@ struct drm_i915_fence_reg *
>  i915_reserve_fence(struct drm_i915_private *dev_priv);
>  void i915_unreserve_fence(struct drm_i915_fence_reg *fence);
>  
> -void i915_gem_revoke_fences(struct drm_i915_private *dev_priv);
>  void i915_gem_restore_fences(struct drm_i915_private *dev_priv);
>  
>  void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 05ce9176ac4e..1eb3a5f8654c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -100,47 +100,6 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
>  	spin_unlock(&dev_priv->mm.object_stat_lock);
>  }
>  
> -static int
> -i915_gem_wait_for_error(struct i915_gpu_error *error)
> -{
> -	int ret;
> -
> -	might_sleep();
> -
> -	/*
> -	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
> -	 * userspace. If it takes that long something really bad is going on and
> -	 * we should simply try to bail out and fail as gracefully as possible.
> -	 */
> -	ret = wait_event_interruptible_timeout(error->reset_queue,
> -					       !i915_reset_backoff(error),
> -					       I915_RESET_TIMEOUT);
> -	if (ret == 0) {
> -		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
> -		return -EIO;
> -	} else if (ret < 0) {
> -		return ret;
> -	} else {
> -		return 0;
> -	}
> -}
> -
> -int i915_mutex_lock_interruptible(struct drm_device *dev)
> -{
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -	int ret;
> -
> -	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
> -	if (ret)
> -		return ret;
> -
> -	ret = mutex_lock_interruptible(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
> -
> -	return 0;
> -}
> -
>  static u32 __i915_gem_park(struct drm_i915_private *i915)
>  {
>  	intel_wakeref_t wakeref;
> @@ -1869,6 +1828,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>  	intel_wakeref_t wakeref;
>  	struct i915_vma *vma;
>  	pgoff_t page_offset;
> +	int srcu;
>  	int ret;
>  
>  	/* Sanity check that we allow writing into this object */
> @@ -1908,7 +1868,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>  		goto err_unlock;
>  	}
>  
> -
>  	/* Now pin it into the GTT as needed */
>  	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
>  				       PIN_MAPPABLE |
> @@ -1946,9 +1905,15 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>  	if (ret)
>  		goto err_unpin;
>  
> +	srcu = i915_reset_trylock(dev_priv);
> +	if (srcu < 0) {
> +		ret = srcu;
> +		goto err_unpin;
> +	}
> +
>  	ret = i915_vma_pin_fence(vma);
>  	if (ret)
> -		goto err_unpin;
> +		goto err_reset;
>  
>  	/* Finally, remap it using the new GTT offset */
>  	ret = remap_io_mapping(area,
> @@ -1969,6 +1934,8 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>  
>  err_fence:
>  	i915_vma_unpin_fence(vma);
> +err_reset:
> +	i915_reset_unlock(dev_priv, srcu);
>  err_unpin:
>  	__i915_vma_unpin(vma);
>  err_unlock:
> @@ -5326,6 +5293,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
>  	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
>  	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
>  	mutex_init(&dev_priv->gpu_error.wedge_mutex);
> +	init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
>  
>  	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
>  
> @@ -5358,6 +5326,8 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
>  	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
>  	WARN_ON(dev_priv->mm.object_count);
>

synchronize_rcu and srcu and GEM_BUG_ON for reset backoff?

> +	cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
> +
>  	kmem_cache_destroy(dev_priv->priorities);
>  	kmem_cache_destroy(dev_priv->dependencies);
>  	kmem_cache_destroy(dev_priv->requests);
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> index e037e94792f3..36d548fa3aa2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -240,6 +240,10 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>  		i915_vma_flush_writes(old);
>  	}
>  
> +	ret = i915_reset_trylock(fence->i915);
> +	if (ret < 0)
> +		return ret;
> +
>  	if (fence->vma && fence->vma != vma) {
>  		/* Ensure that all userspace CPU access is completed before
>  		 * stealing the fence.
> @@ -272,6 +276,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>  		list_move_tail(&fence->link, &fence->i915->mm.fence_list);
>  	}
>  
> +	i915_reset_unlock(fence->i915, ret);
>  	return 0;
>  }
>  
> @@ -435,32 +440,6 @@ void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
>  	list_add(&fence->link, &fence->i915->mm.fence_list);
>  }
>  
> -/**
> - * i915_gem_revoke_fences - revoke fence state
> - * @dev_priv: i915 device private
> - *
> - * Removes all GTT mmappings via the fence registers. This forces any user
> - * of the fence to reacquire that fence before continuing with their access.
> - * One use is during GPU reset where the fence register is lost and we need to
> - * revoke concurrent userspace access via GTT mmaps until the hardware has been
> - * reset and the fence registers have been restored.
> - */
> -void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
> -{
> -	int i;
> -
> -	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> -
> -	for (i = 0; i < dev_priv->num_fence_regs; i++) {
> -		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
> -
> -		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
> -
> -		if (fence->vma)
> -			i915_vma_revoke_mmap(fence->vma);
> -	}
> -}
> -
>  /**
>   * i915_gem_restore_fences - restore fence state
>   * @dev_priv: i915 device private
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 53b1f22dd365..d5c58e82508b 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -231,12 +231,10 @@ struct i915_gpu_error {
>  	/**
>  	 * flags: Control various stages of the GPU reset
>  	 *
> -	 * #I915_RESET_BACKOFF - When we start a reset, we want to stop any
> -	 * other users acquiring the struct_mutex. To do this we set the
> -	 * #I915_RESET_BACKOFF bit in the error flags when we detect a reset
> -	 * and then check for that bit before acquiring the struct_mutex (in
> -	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
> -	 * secondary role in preventing two concurrent global reset attempts.
> +	 * #I915_RESET_BACKOFF - When we start a global reset, we need to
> +	 * serialise with any other users attempting to do the same, and
> +	 * any global resources that may be clobber by the reset (such as
> +	 * FENCE registers).
>  	 *
>  	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
>  	 * acquire the struct_mutex to reset an engine, we need an explicit
> @@ -272,6 +270,8 @@ struct i915_gpu_error {
>  	 */
>  	wait_queue_head_t reset_queue;
>  
> +	struct srcu_struct reset_backoff_srcu;
> +
>  	struct i915_gpu_restart *restart;
>  };
>  
> diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
> index 0e0ddf2e6815..272d00d4b8a3 100644
> --- a/drivers/gpu/drm/i915/i915_reset.c
> +++ b/drivers/gpu/drm/i915/i915_reset.c
> @@ -639,6 +639,31 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
>  	engine->reset.prepare(engine);
>  }
>  
> +static void revoke_mmaps(struct drm_i915_private *i915)
> +{
> +	int i;
> +
> +	for (i = 0; i < i915->num_fence_regs; i++) {
> +		struct i915_vma *vma = i915->fence_regs[i].vma;
> +		struct drm_vma_offset_node *node;
> +		u64 vma_offset;
> +
> +		if (!vma)
> +			continue;
> +
> +		GEM_BUG_ON(vma->fence != &i915->fence_regs[i]);
> +		if (!i915_vma_has_userfault(vma))
> +			continue;
> +
> +		node = &vma->obj->base.vma_node;
> +		vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
> +		unmap_mapping_range(i915->drm.anon_inode->i_mapping,
> +				    drm_vma_node_offset_addr(node) + vma_offset,
> +				    vma->size,
> +				    1);
> +	}
> +}
> +
>  static void reset_prepare(struct drm_i915_private *i915)
>  {
>  	struct intel_engine_cs *engine;
> @@ -648,6 +673,7 @@ static void reset_prepare(struct drm_i915_private *i915)
>  		reset_prepare_engine(engine);
>  
>  	intel_uc_sanitize(i915);
> +	revoke_mmaps(i915);
>  }
>  
>  static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
> @@ -911,50 +937,22 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  	return ret;
>  }
>  
> -struct __i915_reset {
> -	struct drm_i915_private *i915;
> -	unsigned int stalled_mask;
> -};
> -
> -static int __i915_reset__BKL(void *data)
> -{
> -	struct __i915_reset *arg = data;
> -	int err;
> -
> -	err = intel_gpu_reset(arg->i915, ALL_ENGINES);
> -	if (err)
> -		return err;
> -
> -	return gt_reset(arg->i915, arg->stalled_mask);
> -}
> -
> -#if RESET_UNDER_STOP_MACHINE
> -/*
> - * XXX An alternative to using stop_machine would be to park only the
> - * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP)
> - * we should be able to prevent their memmory accesses via the lost fence
> - * registers over the course of the reset without the potential recursive
> - * of mutexes between the pagefault handler and reset.
> - *
> - * See igt/gem_mmap_gtt/hang
> - */
> -#define __do_reset(fn, arg) stop_machine(fn, arg, NULL)
> -#else
> -#define __do_reset(fn, arg) fn(arg)
> -#endif
> -
>  static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
>  {
> -	struct __i915_reset arg = { i915, stalled_mask };
>  	int err, i;
>  
> -	err = __do_reset(__i915_reset__BKL, &arg);
> +	/* Flush everyone currently using a resource about to be clobbered */
> +	synchronize_srcu(&i915->gpu_error.reset_backoff_srcu);
> +
> +	err = intel_gpu_reset(i915, ALL_ENGINES);
>  	for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
> -		msleep(100);
> -		err = __do_reset(__i915_reset__BKL, &arg);
> +		msleep(10 * (i + 1));
> +		err = intel_gpu_reset(i915, ALL_ENGINES);
>  	}
> +	if (err)
> +		return err;
>  
> -	return err;
> +	return gt_reset(i915, stalled_mask);
>  }
>  
>  /**
> @@ -966,8 +964,6 @@ static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
>   * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
>   * on failure.
>   *
> - * Caller must hold the struct_mutex.
> - *
>   * Procedure is fairly simple:
>   *   - reset the chip using the reset reg
>   *   - re-init context state
> @@ -1274,9 +1270,12 @@ void i915_handle_error(struct drm_i915_private *i915,
>  		wait_event(i915->gpu_error.reset_queue,
>  			   !test_bit(I915_RESET_BACKOFF,
>  				     &i915->gpu_error.flags));
> -		goto out;
> +		goto out; /* piggy-back on the other reset */
>  	}
>

I should have been more specific in last run, sorry. Still errorneous
comment above test_and_set_bit(I915_RESET_BACKOFF).

Also as I understood from RCU/checklist.txt the rcu provides
no help for read ordering here. The test_and_set_bit provides
the mb on this side. But add smp_mb__before_atomic before
sampling the the bit in trylock?

-Mika


> +	/* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */
> +	synchronize_rcu_expedited();
> +
>  	/* Prevent any other reset-engine attempt. */
>  	for_each_engine(engine, i915, tmp) {
>  		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
> @@ -1300,6 +1299,36 @@ void i915_handle_error(struct drm_i915_private *i915,
>  	intel_runtime_pm_put(i915, wakeref);
>  }
>  
> +int i915_reset_trylock(struct drm_i915_private *i915)
> +{
> +	struct i915_gpu_error *error = &i915->gpu_error;
> +	int srcu;
> +
> +	rcu_read_lock();
> +	while (test_bit(I915_RESET_BACKOFF, &error->flags)) {
> +		rcu_read_unlock();
> +
> +		if (wait_event_interruptible(error->reset_queue,
> +					     !test_bit(I915_RESET_BACKOFF,
> +						       &error->flags)))
> +			return -EINTR;
> +
> +		rcu_read_lock();
> +	}
> +	srcu = srcu_read_lock(&error->reset_backoff_srcu);
> +	rcu_read_unlock();
> +
> +	return srcu;
> +}
> +
> +void i915_reset_unlock(struct drm_i915_private *i915, int tag)
> +__releases(&i915->gpu_error.reset_backoff_srcu)
> +{
> +	struct i915_gpu_error *error = &i915->gpu_error;
> +
> +	srcu_read_unlock(&error->reset_backoff_srcu, tag);
> +}
> +
>  bool i915_reset_flush(struct drm_i915_private *i915)
>  {
>  	int err;
> diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h
> index f2d347f319df..893c5d1c2eb8 100644
> --- a/drivers/gpu/drm/i915/i915_reset.h
> +++ b/drivers/gpu/drm/i915/i915_reset.h
> @@ -9,6 +9,7 @@
>  
>  #include <linux/compiler.h>
>  #include <linux/types.h>
> +#include <linux/srcu.h>
>  
>  struct drm_i915_private;
>  struct intel_engine_cs;
> @@ -32,6 +33,9 @@ int i915_reset_engine(struct intel_engine_cs *engine,
>  void i915_reset_request(struct i915_request *rq, bool guilty);
>  bool i915_reset_flush(struct drm_i915_private *i915);
>  
> +int __must_check i915_reset_trylock(struct drm_i915_private *i915);
> +void i915_reset_unlock(struct drm_i915_private *i915, int tag);
> +
>  bool intel_has_gpu_reset(struct drm_i915_private *i915);
>  bool intel_has_reset_engine(struct drm_i915_private *i915);
>  
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 7b6f3bea9ef8..4886fac12628 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -1039,8 +1039,6 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
>  
>  	/* Check that we can recover an unbind stuck on a hanging request */
>  
> -	igt_global_reset_lock(i915);
> -
>  	mutex_lock(&i915->drm.struct_mutex);
>  	err = hang_init(&h, i915);
>  	if (err)
> @@ -1138,7 +1136,9 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
>  	}
>  
>  out_reset:
> +	igt_global_reset_lock(i915);
>  	fake_hangcheck(rq->i915, intel_engine_flag(rq->engine));
> +	igt_global_reset_unlock(i915);
>  
>  	if (tsk) {
>  		struct igt_wedge_me w;
> @@ -1159,7 +1159,6 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
>  	hang_fini(&h);
>  unlock:
>  	mutex_unlock(&i915->drm.struct_mutex);
> -	igt_global_reset_unlock(i915);
>  
>  	if (i915_terminally_wedged(&i915->gpu_error))
>  		return -EIO;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 14ae46fda49f..fc516a2970f4 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -189,6 +189,7 @@ struct drm_i915_private *mock_gem_device(void)
>  
>  	init_waitqueue_head(&i915->gpu_error.wait_queue);
>  	init_waitqueue_head(&i915->gpu_error.reset_queue);
> +	init_srcu_struct(&i915->gpu_error.reset_backoff_srcu);
>  	mutex_init(&i915->gpu_error.wedge_mutex);
>  
>  	i915->wq = alloc_ordered_workqueue("mock", 0);
> -- 
> 2.20.1
Chris Wilson Feb. 6, 2019, 4:08 p.m. UTC | #2
Quoting Mika Kuoppala (2019-02-06 15:56:28)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > @@ -5358,6 +5326,8 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
> >       GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
> >       WARN_ON(dev_priv->mm.object_count);
> >
> 
> synchronize_rcu and srcu and GEM_BUG_ON for reset backoff?

There can't be any readers by this point, and we don't use call_[s]rcu
so no deferred tasks to worry about.

I don't see anything that we can bug on to prove that though.

> > @@ -1274,9 +1270,12 @@ void i915_handle_error(struct drm_i915_private *i915,
> >               wait_event(i915->gpu_error.reset_queue,
> >                          !test_bit(I915_RESET_BACKOFF,
> >                                    &i915->gpu_error.flags));
> > -             goto out;
> > +             goto out; /* piggy-back on the other reset */
> >       }
> >
> 
> I should have been more specific in last run, sorry. Still errorneous
> comment above test_and_set_bit(I915_RESET_BACKOFF).
> 
> Also as I understood from RCU/checklist.txt the rcu provides
> no help for read ordering here. The test_and_set_bit provides
> the mb on this side. But add smp_mb__before_atomic before
> sampling the the bit in trylock?

Wrong side, you apply that to the writer and we have the implicit mb in
the wakeup already for the write. The atomic reads are just fine in
conjunction with wait event serialisation.
-Chris
Chris Wilson Feb. 6, 2019, 4:18 p.m. UTC | #3
Quoting Chris Wilson (2019-02-06 16:08:23)
> Quoting Mika Kuoppala (2019-02-06 15:56:28)
> > Chris Wilson <chris@chris-wilson.co.uk> writes:
> > > @@ -5358,6 +5326,8 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
> > >       GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
> > >       WARN_ON(dev_priv->mm.object_count);
> > >
> > 
> > synchronize_rcu and srcu and GEM_BUG_ON for reset backoff?
> 
> There can't be any readers by this point, and we don't use call_[s]rcu
> so no deferred tasks to worry about.
> 
> I don't see anything that we can bug on to prove that though.
> 
> > > @@ -1274,9 +1270,12 @@ void i915_handle_error(struct drm_i915_private *i915,
> > >               wait_event(i915->gpu_error.reset_queue,
> > >                          !test_bit(I915_RESET_BACKOFF,
> > >                                    &i915->gpu_error.flags));
> > > -             goto out;
> > > +             goto out; /* piggy-back on the other reset */
> > >       }
> > >
> > 
> > I should have been more specific in last run, sorry. Still errorneous
> > comment above test_and_set_bit(I915_RESET_BACKOFF).
> > 
> > Also as I understood from RCU/checklist.txt the rcu provides
> > no help for read ordering here. The test_and_set_bit provides
> > the mb on this side. But add smp_mb__before_atomic before
> > sampling the the bit in trylock?
> 
> Wrong side, you apply that to the writer and we have the implicit mb in
> the wakeup already for the write. The atomic reads are just fine in
> conjunction with wait event serialisation.

It's even clearer than that since the write to the bit has an explicit
barrier.
-Chris
Rodrigo Vivi Feb. 26, 2019, 7:53 p.m. UTC | #4
On Wed, Feb 06, 2019 at 01:03:12PM +0000, Chris Wilson wrote:
> Previously, we were able to rely on the recursive properties of
> struct_mutex to allow us to serialise revoking mmaps and reacquiring the
> FENCE registers with them being clobbered over a global device reset.
> I then proceeded to throw out the baby with the bath water in order to
> pursue a struct_mutex-less reset.
> 
> Perusing LWN for alternative strategies, the dilemma on how to serialise
> access to a global resource on one side was answered by
> https://lwn.net/Articles/202847/ -- Sleepable RCU:
> 
>     1  int readside(void) {
>     2      int idx;
>     3      rcu_read_lock();
>     4	   if (nomoresrcu) {
>     5          rcu_read_unlock();
>     6	       return -EINVAL;
>     7      }
>     8	   idx = srcu_read_lock(&ss);
>     9	   rcu_read_unlock();
>     10	   /* SRCU read-side critical section. */
>     11	   srcu_read_unlock(&ss, idx);
>     12	   return 0;
>     13 }
>     14
>     15 void cleanup(void)
>     16 {
>     17     nomoresrcu = 1;
>     18     synchronize_rcu();
>     19     synchronize_srcu(&ss);
>     20     cleanup_srcu_struct(&ss);
>     21 }
> 
> No more worrying about stop_machine, just an uber-complex mutex,
> optimised for reads, with the overhead pushed to the rare reset path.
> 
> However, we do run the risk of a deadlock as we allocate underneath the
> SRCU read lock, and the allocation may require a GPU reset, causing a
> dependency cycle via the in-flight requests. We resolve that by declaring
> the driver wedged and cancelling all in-flight rendering.
> 
> v2: Use expedited rcu barriers to match our earlier timing
> characteristics.
> v3: Try to annotate locking contexts for sparse
> v4: Reduce selftest lock duration to avoid a reset deadlock with fences
> v5: s/srcu/reset_backoff_srcu/
> 
> Testcase: igt/gem_mmap_gtt/hang
> Fixes: eb8d0f5af4ec ("drm/i915: Remove GPU reset dependence on struct_mutex")

Hi Chris,

this patch didn't applied cleanly on dinf
so I noticed that I could also get

115ff80a97cf ("drm/i915: Defer removing fence register tracking to rpm wakeup")

so that applies, but then I noticed that there's a Fixes of
this patch here:

de3a87cf6352 ("drm/i915: Recursive i915_reset_trylock() verboten")

It seems sane to pick 3 of them, but I'd like to confirm with you first.

Thoughts?

Thanks in advance,
Rodrigo.

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c           |  12 +-
>  drivers/gpu/drm/i915/i915_drv.h               |  18 +--
>  drivers/gpu/drm/i915/i915_gem.c               |  56 +++------
>  drivers/gpu/drm/i915/i915_gem_fence_reg.c     |  31 +----
>  drivers/gpu/drm/i915/i915_gpu_error.h         |  12 +-
>  drivers/gpu/drm/i915/i915_reset.c             | 109 +++++++++++-------
>  drivers/gpu/drm/i915/i915_reset.h             |   4 +
>  .../gpu/drm/i915/selftests/intel_hangcheck.c  |   5 +-
>  .../gpu/drm/i915/selftests/mock_gem_device.c  |   1 +
>  9 files changed, 109 insertions(+), 139 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 0bd890c04fe4..a6fd157b1637 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1281,14 +1281,11 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  	intel_wakeref_t wakeref;
>  	enum intel_engine_id id;
>  
> +	seq_printf(m, "Reset flags: %lx\n", dev_priv->gpu_error.flags);
>  	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
> -		seq_puts(m, "Wedged\n");
> +		seq_puts(m, "\tWedged\n");
>  	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
> -		seq_puts(m, "Reset in progress: struct_mutex backoff\n");
> -	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
> -		seq_puts(m, "Waiter holding struct mutex\n");
> -	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
> -		seq_puts(m, "struct_mutex blocked for reset\n");
> +		seq_puts(m, "\tDevice (global) reset in progress\n");
>  
>  	if (!i915_modparams.enable_hangcheck) {
>  		seq_puts(m, "Hangcheck disabled\n");
> @@ -3885,9 +3882,6 @@ i915_wedged_set(void *data, u64 val)
>  	 * while it is writing to 'i915_wedged'
>  	 */
>  
> -	if (i915_reset_backoff(&i915->gpu_error))
> -		return -EAGAIN;
> -
>  	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
>  			  "Manually set wedged engine mask = %llx", val);
>  	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a2293152cb6a..37230ae7fbe6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2989,7 +2989,12 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
>  	i915_gem_object_unpin_pages(obj);
>  }
>  
> -int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
> +static inline int __must_check
> +i915_mutex_lock_interruptible(struct drm_device *dev)
> +{
> +	return mutex_lock_interruptible(&dev->struct_mutex);
> +}
> +
>  int i915_gem_dumb_create(struct drm_file *file_priv,
>  			 struct drm_device *dev,
>  			 struct drm_mode_create_dumb *args);
> @@ -3006,21 +3011,11 @@ int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
>  struct i915_request *
>  i915_gem_find_active_request(struct intel_engine_cs *engine);
>  
> -static inline bool i915_reset_backoff(struct i915_gpu_error *error)
> -{
> -	return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
> -}
> -
>  static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
>  {
>  	return unlikely(test_bit(I915_WEDGED, &error->flags));
>  }
>  
> -static inline bool i915_reset_backoff_or_wedged(struct i915_gpu_error *error)
> -{
> -	return i915_reset_backoff(error) | i915_terminally_wedged(error);
> -}
> -
>  static inline u32 i915_reset_count(struct i915_gpu_error *error)
>  {
>  	return READ_ONCE(error->reset_count);
> @@ -3093,7 +3088,6 @@ struct drm_i915_fence_reg *
>  i915_reserve_fence(struct drm_i915_private *dev_priv);
>  void i915_unreserve_fence(struct drm_i915_fence_reg *fence);
>  
> -void i915_gem_revoke_fences(struct drm_i915_private *dev_priv);
>  void i915_gem_restore_fences(struct drm_i915_private *dev_priv);
>  
>  void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 05ce9176ac4e..1eb3a5f8654c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -100,47 +100,6 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
>  	spin_unlock(&dev_priv->mm.object_stat_lock);
>  }
>  
> -static int
> -i915_gem_wait_for_error(struct i915_gpu_error *error)
> -{
> -	int ret;
> -
> -	might_sleep();
> -
> -	/*
> -	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
> -	 * userspace. If it takes that long something really bad is going on and
> -	 * we should simply try to bail out and fail as gracefully as possible.
> -	 */
> -	ret = wait_event_interruptible_timeout(error->reset_queue,
> -					       !i915_reset_backoff(error),
> -					       I915_RESET_TIMEOUT);
> -	if (ret == 0) {
> -		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
> -		return -EIO;
> -	} else if (ret < 0) {
> -		return ret;
> -	} else {
> -		return 0;
> -	}
> -}
> -
> -int i915_mutex_lock_interruptible(struct drm_device *dev)
> -{
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -	int ret;
> -
> -	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
> -	if (ret)
> -		return ret;
> -
> -	ret = mutex_lock_interruptible(&dev->struct_mutex);
> -	if (ret)
> -		return ret;
> -
> -	return 0;
> -}
> -
>  static u32 __i915_gem_park(struct drm_i915_private *i915)
>  {
>  	intel_wakeref_t wakeref;
> @@ -1869,6 +1828,7 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>  	intel_wakeref_t wakeref;
>  	struct i915_vma *vma;
>  	pgoff_t page_offset;
> +	int srcu;
>  	int ret;
>  
>  	/* Sanity check that we allow writing into this object */
> @@ -1908,7 +1868,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>  		goto err_unlock;
>  	}
>  
> -
>  	/* Now pin it into the GTT as needed */
>  	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
>  				       PIN_MAPPABLE |
> @@ -1946,9 +1905,15 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>  	if (ret)
>  		goto err_unpin;
>  
> +	srcu = i915_reset_trylock(dev_priv);
> +	if (srcu < 0) {
> +		ret = srcu;
> +		goto err_unpin;
> +	}
> +
>  	ret = i915_vma_pin_fence(vma);
>  	if (ret)
> -		goto err_unpin;
> +		goto err_reset;
>  
>  	/* Finally, remap it using the new GTT offset */
>  	ret = remap_io_mapping(area,
> @@ -1969,6 +1934,8 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>  
>  err_fence:
>  	i915_vma_unpin_fence(vma);
> +err_reset:
> +	i915_reset_unlock(dev_priv, srcu);
>  err_unpin:
>  	__i915_vma_unpin(vma);
>  err_unlock:
> @@ -5326,6 +5293,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
>  	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
>  	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
>  	mutex_init(&dev_priv->gpu_error.wedge_mutex);
> +	init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
>  
>  	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
>  
> @@ -5358,6 +5326,8 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
>  	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
>  	WARN_ON(dev_priv->mm.object_count);
>  
> +	cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
> +
>  	kmem_cache_destroy(dev_priv->priorities);
>  	kmem_cache_destroy(dev_priv->dependencies);
>  	kmem_cache_destroy(dev_priv->requests);
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> index e037e94792f3..36d548fa3aa2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
> @@ -240,6 +240,10 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>  		i915_vma_flush_writes(old);
>  	}
>  
> +	ret = i915_reset_trylock(fence->i915);
> +	if (ret < 0)
> +		return ret;
> +
>  	if (fence->vma && fence->vma != vma) {
>  		/* Ensure that all userspace CPU access is completed before
>  		 * stealing the fence.
> @@ -272,6 +276,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
>  		list_move_tail(&fence->link, &fence->i915->mm.fence_list);
>  	}
>  
> +	i915_reset_unlock(fence->i915, ret);
>  	return 0;
>  }
>  
> @@ -435,32 +440,6 @@ void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
>  	list_add(&fence->link, &fence->i915->mm.fence_list);
>  }
>  
> -/**
> - * i915_gem_revoke_fences - revoke fence state
> - * @dev_priv: i915 device private
> - *
> - * Removes all GTT mmappings via the fence registers. This forces any user
> - * of the fence to reacquire that fence before continuing with their access.
> - * One use is during GPU reset where the fence register is lost and we need to
> - * revoke concurrent userspace access via GTT mmaps until the hardware has been
> - * reset and the fence registers have been restored.
> - */
> -void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
> -{
> -	int i;
> -
> -	lockdep_assert_held(&dev_priv->drm.struct_mutex);
> -
> -	for (i = 0; i < dev_priv->num_fence_regs; i++) {
> -		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
> -
> -		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
> -
> -		if (fence->vma)
> -			i915_vma_revoke_mmap(fence->vma);
> -	}
> -}
> -
>  /**
>   * i915_gem_restore_fences - restore fence state
>   * @dev_priv: i915 device private
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 53b1f22dd365..d5c58e82508b 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -231,12 +231,10 @@ struct i915_gpu_error {
>  	/**
>  	 * flags: Control various stages of the GPU reset
>  	 *
> -	 * #I915_RESET_BACKOFF - When we start a reset, we want to stop any
> -	 * other users acquiring the struct_mutex. To do this we set the
> -	 * #I915_RESET_BACKOFF bit in the error flags when we detect a reset
> -	 * and then check for that bit before acquiring the struct_mutex (in
> -	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
> -	 * secondary role in preventing two concurrent global reset attempts.
> +	 * #I915_RESET_BACKOFF - When we start a global reset, we need to
> +	 * serialise with any other users attempting to do the same, and
> +	 * any global resources that may be clobber by the reset (such as
> +	 * FENCE registers).
>  	 *
>  	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
>  	 * acquire the struct_mutex to reset an engine, we need an explicit
> @@ -272,6 +270,8 @@ struct i915_gpu_error {
>  	 */
>  	wait_queue_head_t reset_queue;
>  
> +	struct srcu_struct reset_backoff_srcu;
> +
>  	struct i915_gpu_restart *restart;
>  };
>  
> diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
> index 0e0ddf2e6815..272d00d4b8a3 100644
> --- a/drivers/gpu/drm/i915/i915_reset.c
> +++ b/drivers/gpu/drm/i915/i915_reset.c
> @@ -639,6 +639,31 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
>  	engine->reset.prepare(engine);
>  }
>  
> +static void revoke_mmaps(struct drm_i915_private *i915)
> +{
> +	int i;
> +
> +	for (i = 0; i < i915->num_fence_regs; i++) {
> +		struct i915_vma *vma = i915->fence_regs[i].vma;
> +		struct drm_vma_offset_node *node;
> +		u64 vma_offset;
> +
> +		if (!vma)
> +			continue;
> +
> +		GEM_BUG_ON(vma->fence != &i915->fence_regs[i]);
> +		if (!i915_vma_has_userfault(vma))
> +			continue;
> +
> +		node = &vma->obj->base.vma_node;
> +		vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
> +		unmap_mapping_range(i915->drm.anon_inode->i_mapping,
> +				    drm_vma_node_offset_addr(node) + vma_offset,
> +				    vma->size,
> +				    1);
> +	}
> +}
> +
>  static void reset_prepare(struct drm_i915_private *i915)
>  {
>  	struct intel_engine_cs *engine;
> @@ -648,6 +673,7 @@ static void reset_prepare(struct drm_i915_private *i915)
>  		reset_prepare_engine(engine);
>  
>  	intel_uc_sanitize(i915);
> +	revoke_mmaps(i915);
>  }
>  
>  static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
> @@ -911,50 +937,22 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  	return ret;
>  }
>  
> -struct __i915_reset {
> -	struct drm_i915_private *i915;
> -	unsigned int stalled_mask;
> -};
> -
> -static int __i915_reset__BKL(void *data)
> -{
> -	struct __i915_reset *arg = data;
> -	int err;
> -
> -	err = intel_gpu_reset(arg->i915, ALL_ENGINES);
> -	if (err)
> -		return err;
> -
> -	return gt_reset(arg->i915, arg->stalled_mask);
> -}
> -
> -#if RESET_UNDER_STOP_MACHINE
> -/*
> - * XXX An alternative to using stop_machine would be to park only the
> - * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP)
> - * we should be able to prevent their memmory accesses via the lost fence
> - * registers over the course of the reset without the potential recursive
> - * of mutexes between the pagefault handler and reset.
> - *
> - * See igt/gem_mmap_gtt/hang
> - */
> -#define __do_reset(fn, arg) stop_machine(fn, arg, NULL)
> -#else
> -#define __do_reset(fn, arg) fn(arg)
> -#endif
> -
>  static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
>  {
> -	struct __i915_reset arg = { i915, stalled_mask };
>  	int err, i;
>  
> -	err = __do_reset(__i915_reset__BKL, &arg);
> +	/* Flush everyone currently using a resource about to be clobbered */
> +	synchronize_srcu(&i915->gpu_error.reset_backoff_srcu);
> +
> +	err = intel_gpu_reset(i915, ALL_ENGINES);
>  	for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
> -		msleep(100);
> -		err = __do_reset(__i915_reset__BKL, &arg);
> +		msleep(10 * (i + 1));
> +		err = intel_gpu_reset(i915, ALL_ENGINES);
>  	}
> +	if (err)
> +		return err;
>  
> -	return err;
> +	return gt_reset(i915, stalled_mask);
>  }
>  
>  /**
> @@ -966,8 +964,6 @@ static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
>   * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
>   * on failure.
>   *
> - * Caller must hold the struct_mutex.
> - *
>   * Procedure is fairly simple:
>   *   - reset the chip using the reset reg
>   *   - re-init context state
> @@ -1274,9 +1270,12 @@ void i915_handle_error(struct drm_i915_private *i915,
>  		wait_event(i915->gpu_error.reset_queue,
>  			   !test_bit(I915_RESET_BACKOFF,
>  				     &i915->gpu_error.flags));
> -		goto out;
> +		goto out; /* piggy-back on the other reset */
>  	}
>  
> +	/* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */
> +	synchronize_rcu_expedited();
> +
>  	/* Prevent any other reset-engine attempt. */
>  	for_each_engine(engine, i915, tmp) {
>  		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
> @@ -1300,6 +1299,36 @@ void i915_handle_error(struct drm_i915_private *i915,
>  	intel_runtime_pm_put(i915, wakeref);
>  }
>  
> +int i915_reset_trylock(struct drm_i915_private *i915)
> +{
> +	struct i915_gpu_error *error = &i915->gpu_error;
> +	int srcu;
> +
> +	rcu_read_lock();
> +	while (test_bit(I915_RESET_BACKOFF, &error->flags)) {
> +		rcu_read_unlock();
> +
> +		if (wait_event_interruptible(error->reset_queue,
> +					     !test_bit(I915_RESET_BACKOFF,
> +						       &error->flags)))
> +			return -EINTR;
> +
> +		rcu_read_lock();
> +	}
> +	srcu = srcu_read_lock(&error->reset_backoff_srcu);
> +	rcu_read_unlock();
> +
> +	return srcu;
> +}
> +
> +void i915_reset_unlock(struct drm_i915_private *i915, int tag)
> +__releases(&i915->gpu_error.reset_backoff_srcu)
> +{
> +	struct i915_gpu_error *error = &i915->gpu_error;
> +
> +	srcu_read_unlock(&error->reset_backoff_srcu, tag);
> +}
> +
>  bool i915_reset_flush(struct drm_i915_private *i915)
>  {
>  	int err;
> diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h
> index f2d347f319df..893c5d1c2eb8 100644
> --- a/drivers/gpu/drm/i915/i915_reset.h
> +++ b/drivers/gpu/drm/i915/i915_reset.h
> @@ -9,6 +9,7 @@
>  
>  #include <linux/compiler.h>
>  #include <linux/types.h>
> +#include <linux/srcu.h>
>  
>  struct drm_i915_private;
>  struct intel_engine_cs;
> @@ -32,6 +33,9 @@ int i915_reset_engine(struct intel_engine_cs *engine,
>  void i915_reset_request(struct i915_request *rq, bool guilty);
>  bool i915_reset_flush(struct drm_i915_private *i915);
>  
> +int __must_check i915_reset_trylock(struct drm_i915_private *i915);
> +void i915_reset_unlock(struct drm_i915_private *i915, int tag);
> +
>  bool intel_has_gpu_reset(struct drm_i915_private *i915);
>  bool intel_has_reset_engine(struct drm_i915_private *i915);
>  
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 7b6f3bea9ef8..4886fac12628 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -1039,8 +1039,6 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
>  
>  	/* Check that we can recover an unbind stuck on a hanging request */
>  
> -	igt_global_reset_lock(i915);
> -
>  	mutex_lock(&i915->drm.struct_mutex);
>  	err = hang_init(&h, i915);
>  	if (err)
> @@ -1138,7 +1136,9 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
>  	}
>  
>  out_reset:
> +	igt_global_reset_lock(i915);
>  	fake_hangcheck(rq->i915, intel_engine_flag(rq->engine));
> +	igt_global_reset_unlock(i915);
>  
>  	if (tsk) {
>  		struct igt_wedge_me w;
> @@ -1159,7 +1159,6 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
>  	hang_fini(&h);
>  unlock:
>  	mutex_unlock(&i915->drm.struct_mutex);
> -	igt_global_reset_unlock(i915);
>  
>  	if (i915_terminally_wedged(&i915->gpu_error))
>  		return -EIO;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 14ae46fda49f..fc516a2970f4 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -189,6 +189,7 @@ struct drm_i915_private *mock_gem_device(void)
>  
>  	init_waitqueue_head(&i915->gpu_error.wait_queue);
>  	init_waitqueue_head(&i915->gpu_error.reset_queue);
> +	init_srcu_struct(&i915->gpu_error.reset_backoff_srcu);
>  	mutex_init(&i915->gpu_error.wedge_mutex);
>  
>  	i915->wq = alloc_ordered_workqueue("mock", 0);
> -- 
> 2.20.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Feb. 26, 2019, 8:27 p.m. UTC | #5
Quoting Rodrigo Vivi (2019-02-26 19:53:47)
> On Wed, Feb 06, 2019 at 01:03:12PM +0000, Chris Wilson wrote:
> > Previously, we were able to rely on the recursive properties of
> > struct_mutex to allow us to serialise revoking mmaps and reacquiring the
> > FENCE registers with them being clobbered over a global device reset.
> > I then proceeded to throw out the baby with the bath water in order to
> > pursue a struct_mutex-less reset.
> > 
> > Perusing LWN for alternative strategies, the dilemma on how to serialise
> > access to a global resource on one side was answered by
> > https://lwn.net/Articles/202847/ -- Sleepable RCU:
> > 
> >     1  int readside(void) {
> >     2      int idx;
> >     3      rcu_read_lock();
> >     4    if (nomoresrcu) {
> >     5          rcu_read_unlock();
> >     6        return -EINVAL;
> >     7      }
> >     8    idx = srcu_read_lock(&ss);
> >     9    rcu_read_unlock();
> >     10           /* SRCU read-side critical section. */
> >     11           srcu_read_unlock(&ss, idx);
> >     12           return 0;
> >     13 }
> >     14
> >     15 void cleanup(void)
> >     16 {
> >     17     nomoresrcu = 1;
> >     18     synchronize_rcu();
> >     19     synchronize_srcu(&ss);
> >     20     cleanup_srcu_struct(&ss);
> >     21 }
> > 
> > No more worrying about stop_machine, just an uber-complex mutex,
> > optimised for reads, with the overhead pushed to the rare reset path.
> > 
> > However, we do run the risk of a deadlock as we allocate underneath the
> > SRCU read lock, and the allocation may require a GPU reset, causing a
> > dependency cycle via the in-flight requests. We resolve that by declaring
> > the driver wedged and cancelling all in-flight rendering.
> > 
> > v2: Use expedited rcu barriers to match our earlier timing
> > characteristics.
> > v3: Try to annotate locking contexts for sparse
> > v4: Reduce selftest lock duration to avoid a reset deadlock with fences
> > v5: s/srcu/reset_backoff_srcu/
> > 
> > Testcase: igt/gem_mmap_gtt/hang
> > Fixes: eb8d0f5af4ec ("drm/i915: Remove GPU reset dependence on struct_mutex")
> 
> Hi Chris,
> 
> this patch didn't applied cleanly on dinf
> so I noticed that I could also get
> 
> 115ff80a97cf ("drm/i915: Defer removing fence register tracking to rpm wakeup")
> 
> so that applies, but then I noticed that there's a Fixes of
> this patch here:
> 
> de3a87cf6352 ("drm/i915: Recursive i915_reset_trylock() verboten")
> 
> It seems sane to pick 3 of them, but I'd like to confirm with you first.
> 
> Thoughts?

There's a at least one later patch as well. I think the safest course of
option is to ignore these fixups, as the glitch is very rare (after a
gpu hang as well), only temporary and does not impact stability or security
(they can only see their own buffers/data in a slightly different order).
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0bd890c04fe4..a6fd157b1637 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1281,14 +1281,11 @@  static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	intel_wakeref_t wakeref;
 	enum intel_engine_id id;
 
+	seq_printf(m, "Reset flags: %lx\n", dev_priv->gpu_error.flags);
 	if (test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
-		seq_puts(m, "Wedged\n");
+		seq_puts(m, "\tWedged\n");
 	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
-		seq_puts(m, "Reset in progress: struct_mutex backoff\n");
-	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
-		seq_puts(m, "Waiter holding struct mutex\n");
-	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
-		seq_puts(m, "struct_mutex blocked for reset\n");
+		seq_puts(m, "\tDevice (global) reset in progress\n");
 
 	if (!i915_modparams.enable_hangcheck) {
 		seq_puts(m, "Hangcheck disabled\n");
@@ -3885,9 +3882,6 @@  i915_wedged_set(void *data, u64 val)
 	 * while it is writing to 'i915_wedged'
 	 */
 
-	if (i915_reset_backoff(&i915->gpu_error))
-		return -EAGAIN;
-
 	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
 			  "Manually set wedged engine mask = %llx", val);
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a2293152cb6a..37230ae7fbe6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2989,7 +2989,12 @@  i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
 	i915_gem_object_unpin_pages(obj);
 }
 
-int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
+static inline int __must_check
+i915_mutex_lock_interruptible(struct drm_device *dev)
+{
+	return mutex_lock_interruptible(&dev->struct_mutex);
+}
+
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
@@ -3006,21 +3011,11 @@  int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
 struct i915_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine);
 
-static inline bool i915_reset_backoff(struct i915_gpu_error *error)
-{
-	return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
-}
-
 static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
 	return unlikely(test_bit(I915_WEDGED, &error->flags));
 }
 
-static inline bool i915_reset_backoff_or_wedged(struct i915_gpu_error *error)
-{
-	return i915_reset_backoff(error) | i915_terminally_wedged(error);
-}
-
 static inline u32 i915_reset_count(struct i915_gpu_error *error)
 {
 	return READ_ONCE(error->reset_count);
@@ -3093,7 +3088,6 @@  struct drm_i915_fence_reg *
 i915_reserve_fence(struct drm_i915_private *dev_priv);
 void i915_unreserve_fence(struct drm_i915_fence_reg *fence);
 
-void i915_gem_revoke_fences(struct drm_i915_private *dev_priv);
 void i915_gem_restore_fences(struct drm_i915_private *dev_priv);
 
 void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 05ce9176ac4e..1eb3a5f8654c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -100,47 +100,6 @@  static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 
-static int
-i915_gem_wait_for_error(struct i915_gpu_error *error)
-{
-	int ret;
-
-	might_sleep();
-
-	/*
-	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
-	 * userspace. If it takes that long something really bad is going on and
-	 * we should simply try to bail out and fail as gracefully as possible.
-	 */
-	ret = wait_event_interruptible_timeout(error->reset_queue,
-					       !i915_reset_backoff(error),
-					       I915_RESET_TIMEOUT);
-	if (ret == 0) {
-		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
-		return -EIO;
-	} else if (ret < 0) {
-		return ret;
-	} else {
-		return 0;
-	}
-}
-
-int i915_mutex_lock_interruptible(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	int ret;
-
-	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
-	if (ret)
-		return ret;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 static u32 __i915_gem_park(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
@@ -1869,6 +1828,7 @@  vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	intel_wakeref_t wakeref;
 	struct i915_vma *vma;
 	pgoff_t page_offset;
+	int srcu;
 	int ret;
 
 	/* Sanity check that we allow writing into this object */
@@ -1908,7 +1868,6 @@  vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 		goto err_unlock;
 	}
 
-
 	/* Now pin it into the GTT as needed */
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE |
@@ -1946,9 +1905,15 @@  vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	if (ret)
 		goto err_unpin;
 
+	srcu = i915_reset_trylock(dev_priv);
+	if (srcu < 0) {
+		ret = srcu;
+		goto err_unpin;
+	}
+
 	ret = i915_vma_pin_fence(vma);
 	if (ret)
-		goto err_unpin;
+		goto err_reset;
 
 	/* Finally, remap it using the new GTT offset */
 	ret = remap_io_mapping(area,
@@ -1969,6 +1934,8 @@  vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 
 err_fence:
 	i915_vma_unpin_fence(vma);
+err_reset:
+	i915_reset_unlock(dev_priv, srcu);
 err_unpin:
 	__i915_vma_unpin(vma);
 err_unlock:
@@ -5326,6 +5293,7 @@  int i915_gem_init_early(struct drm_i915_private *dev_priv)
 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
 	mutex_init(&dev_priv->gpu_error.wedge_mutex);
+	init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
 
 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 
@@ -5358,6 +5326,8 @@  void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
 	WARN_ON(dev_priv->mm.object_count);
 
+	cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
+
 	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
 	kmem_cache_destroy(dev_priv->requests);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index e037e94792f3..36d548fa3aa2 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -240,6 +240,10 @@  static int fence_update(struct drm_i915_fence_reg *fence,
 		i915_vma_flush_writes(old);
 	}
 
+	ret = i915_reset_trylock(fence->i915);
+	if (ret < 0)
+		return ret;
+
 	if (fence->vma && fence->vma != vma) {
 		/* Ensure that all userspace CPU access is completed before
 		 * stealing the fence.
@@ -272,6 +276,7 @@  static int fence_update(struct drm_i915_fence_reg *fence,
 		list_move_tail(&fence->link, &fence->i915->mm.fence_list);
 	}
 
+	i915_reset_unlock(fence->i915, ret);
 	return 0;
 }
 
@@ -435,32 +440,6 @@  void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
 	list_add(&fence->link, &fence->i915->mm.fence_list);
 }
 
-/**
- * i915_gem_revoke_fences - revoke fence state
- * @dev_priv: i915 device private
- *
- * Removes all GTT mmappings via the fence registers. This forces any user
- * of the fence to reacquire that fence before continuing with their access.
- * One use is during GPU reset where the fence register is lost and we need to
- * revoke concurrent userspace access via GTT mmaps until the hardware has been
- * reset and the fence registers have been restored.
- */
-void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
-{
-	int i;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
-
-		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
-
-		if (fence->vma)
-			i915_vma_revoke_mmap(fence->vma);
-	}
-}
-
 /**
  * i915_gem_restore_fences - restore fence state
  * @dev_priv: i915 device private
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 53b1f22dd365..d5c58e82508b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -231,12 +231,10 @@  struct i915_gpu_error {
 	/**
 	 * flags: Control various stages of the GPU reset
 	 *
-	 * #I915_RESET_BACKOFF - When we start a reset, we want to stop any
-	 * other users acquiring the struct_mutex. To do this we set the
-	 * #I915_RESET_BACKOFF bit in the error flags when we detect a reset
-	 * and then check for that bit before acquiring the struct_mutex (in
-	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
-	 * secondary role in preventing two concurrent global reset attempts.
+	 * #I915_RESET_BACKOFF - When we start a global reset, we need to
+	 * serialise with any other users attempting to do the same, and
+	 * any global resources that may be clobber by the reset (such as
+	 * FENCE registers).
 	 *
 	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
 	 * acquire the struct_mutex to reset an engine, we need an explicit
@@ -272,6 +270,8 @@  struct i915_gpu_error {
 	 */
 	wait_queue_head_t reset_queue;
 
+	struct srcu_struct reset_backoff_srcu;
+
 	struct i915_gpu_restart *restart;
 };
 
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 0e0ddf2e6815..272d00d4b8a3 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -639,6 +639,31 @@  static void reset_prepare_engine(struct intel_engine_cs *engine)
 	engine->reset.prepare(engine);
 }
 
+static void revoke_mmaps(struct drm_i915_private *i915)
+{
+	int i;
+
+	for (i = 0; i < i915->num_fence_regs; i++) {
+		struct i915_vma *vma = i915->fence_regs[i].vma;
+		struct drm_vma_offset_node *node;
+		u64 vma_offset;
+
+		if (!vma)
+			continue;
+
+		GEM_BUG_ON(vma->fence != &i915->fence_regs[i]);
+		if (!i915_vma_has_userfault(vma))
+			continue;
+
+		node = &vma->obj->base.vma_node;
+		vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
+		unmap_mapping_range(i915->drm.anon_inode->i_mapping,
+				    drm_vma_node_offset_addr(node) + vma_offset,
+				    vma->size,
+				    1);
+	}
+}
+
 static void reset_prepare(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
@@ -648,6 +673,7 @@  static void reset_prepare(struct drm_i915_private *i915)
 		reset_prepare_engine(engine);
 
 	intel_uc_sanitize(i915);
+	revoke_mmaps(i915);
 }
 
 static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
@@ -911,50 +937,22 @@  bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	return ret;
 }
 
-struct __i915_reset {
-	struct drm_i915_private *i915;
-	unsigned int stalled_mask;
-};
-
-static int __i915_reset__BKL(void *data)
-{
-	struct __i915_reset *arg = data;
-	int err;
-
-	err = intel_gpu_reset(arg->i915, ALL_ENGINES);
-	if (err)
-		return err;
-
-	return gt_reset(arg->i915, arg->stalled_mask);
-}
-
-#if RESET_UNDER_STOP_MACHINE
-/*
- * XXX An alternative to using stop_machine would be to park only the
- * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP)
- * we should be able to prevent their memmory accesses via the lost fence
- * registers over the course of the reset without the potential recursive
- * of mutexes between the pagefault handler and reset.
- *
- * See igt/gem_mmap_gtt/hang
- */
-#define __do_reset(fn, arg) stop_machine(fn, arg, NULL)
-#else
-#define __do_reset(fn, arg) fn(arg)
-#endif
-
 static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
 {
-	struct __i915_reset arg = { i915, stalled_mask };
 	int err, i;
 
-	err = __do_reset(__i915_reset__BKL, &arg);
+	/* Flush everyone currently using a resource about to be clobbered */
+	synchronize_srcu(&i915->gpu_error.reset_backoff_srcu);
+
+	err = intel_gpu_reset(i915, ALL_ENGINES);
 	for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
-		msleep(100);
-		err = __do_reset(__i915_reset__BKL, &arg);
+		msleep(10 * (i + 1));
+		err = intel_gpu_reset(i915, ALL_ENGINES);
 	}
+	if (err)
+		return err;
 
-	return err;
+	return gt_reset(i915, stalled_mask);
 }
 
 /**
@@ -966,8 +964,6 @@  static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
  * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
  * on failure.
  *
- * Caller must hold the struct_mutex.
- *
  * Procedure is fairly simple:
  *   - reset the chip using the reset reg
  *   - re-init context state
@@ -1274,9 +1270,12 @@  void i915_handle_error(struct drm_i915_private *i915,
 		wait_event(i915->gpu_error.reset_queue,
 			   !test_bit(I915_RESET_BACKOFF,
 				     &i915->gpu_error.flags));
-		goto out;
+		goto out; /* piggy-back on the other reset */
 	}
 
+	/* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */
+	synchronize_rcu_expedited();
+
 	/* Prevent any other reset-engine attempt. */
 	for_each_engine(engine, i915, tmp) {
 		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
@@ -1300,6 +1299,36 @@  void i915_handle_error(struct drm_i915_private *i915,
 	intel_runtime_pm_put(i915, wakeref);
 }
 
+int i915_reset_trylock(struct drm_i915_private *i915)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+	int srcu;
+
+	rcu_read_lock();
+	while (test_bit(I915_RESET_BACKOFF, &error->flags)) {
+		rcu_read_unlock();
+
+		if (wait_event_interruptible(error->reset_queue,
+					     !test_bit(I915_RESET_BACKOFF,
+						       &error->flags)))
+			return -EINTR;
+
+		rcu_read_lock();
+	}
+	srcu = srcu_read_lock(&error->reset_backoff_srcu);
+	rcu_read_unlock();
+
+	return srcu;
+}
+
+void i915_reset_unlock(struct drm_i915_private *i915, int tag)
+__releases(&i915->gpu_error.reset_backoff_srcu)
+{
+	struct i915_gpu_error *error = &i915->gpu_error;
+
+	srcu_read_unlock(&error->reset_backoff_srcu, tag);
+}
+
 bool i915_reset_flush(struct drm_i915_private *i915)
 {
 	int err;
diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h
index f2d347f319df..893c5d1c2eb8 100644
--- a/drivers/gpu/drm/i915/i915_reset.h
+++ b/drivers/gpu/drm/i915/i915_reset.h
@@ -9,6 +9,7 @@ 
 
 #include <linux/compiler.h>
 #include <linux/types.h>
+#include <linux/srcu.h>
 
 struct drm_i915_private;
 struct intel_engine_cs;
@@ -32,6 +33,9 @@  int i915_reset_engine(struct intel_engine_cs *engine,
 void i915_reset_request(struct i915_request *rq, bool guilty);
 bool i915_reset_flush(struct drm_i915_private *i915);
 
+int __must_check i915_reset_trylock(struct drm_i915_private *i915);
+void i915_reset_unlock(struct drm_i915_private *i915, int tag);
+
 bool intel_has_gpu_reset(struct drm_i915_private *i915);
 bool intel_has_reset_engine(struct drm_i915_private *i915);
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 7b6f3bea9ef8..4886fac12628 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -1039,8 +1039,6 @@  static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 
 	/* Check that we can recover an unbind stuck on a hanging request */
 
-	igt_global_reset_lock(i915);
-
 	mutex_lock(&i915->drm.struct_mutex);
 	err = hang_init(&h, i915);
 	if (err)
@@ -1138,7 +1136,9 @@  static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 	}
 
 out_reset:
+	igt_global_reset_lock(i915);
 	fake_hangcheck(rq->i915, intel_engine_flag(rq->engine));
+	igt_global_reset_unlock(i915);
 
 	if (tsk) {
 		struct igt_wedge_me w;
@@ -1159,7 +1159,6 @@  static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 	hang_fini(&h);
 unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
-	igt_global_reset_unlock(i915);
 
 	if (i915_terminally_wedged(&i915->gpu_error))
 		return -EIO;
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 14ae46fda49f..fc516a2970f4 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -189,6 +189,7 @@  struct drm_i915_private *mock_gem_device(void)
 
 	init_waitqueue_head(&i915->gpu_error.wait_queue);
 	init_waitqueue_head(&i915->gpu_error.reset_queue);
+	init_srcu_struct(&i915->gpu_error.reset_backoff_srcu);
 	mutex_init(&i915->gpu_error.wedge_mutex);
 
 	i915->wq = alloc_ordered_workqueue("mock", 0);