diff mbox series

[25/33] drm/i915: Move object close under its own lock

Message ID 20190520080127.18255-25-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/33] drm/i915: Restore control over ppgtt for context creation ABI | expand

Commit Message

Chris Wilson May 20, 2019, 8:01 a.m. UTC
Use i915_gem_object_lock() to guard the LUT and active reference to
allow us to break free of struct_mutex for handling GEM_CLOSE.

Testcase: igt/gem_close_race
Testcase: igt/gem_exec_parallel
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   | 76 ++++++++++---------
 .../gpu/drm/i915/gem/i915_gem_context_types.h | 12 +--
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 23 ++++--
 drivers/gpu/drm/i915/gem/i915_gem_object.c    | 38 ++++++----
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 -
 .../gpu/drm/i915/gem/selftests/mock_context.c |  1 -
 drivers/gpu/drm/i915/i915_drv.h               |  4 +-
 drivers/gpu/drm/i915/i915_gem.c               |  1 +
 drivers/gpu/drm/i915/i915_gem_gtt.c           |  1 +
 drivers/gpu/drm/i915/i915_timeline.c          | 13 ++--
 drivers/gpu/drm/i915/i915_vma.c               | 42 ++++++----
 drivers/gpu/drm/i915/i915_vma.h               | 17 ++---
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
 13 files changed, 132 insertions(+), 98 deletions(-)

Comments

Mika Kuoppala May 22, 2019, 2:32 p.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Use i915_gem_object_lock() to guard the LUT and active reference to
> allow us to break free of struct_mutex for handling GEM_CLOSE.
>
> Testcase: igt/gem_close_race
> Testcase: igt/gem_exec_parallel
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c   | 76 ++++++++++---------
>  .../gpu/drm/i915/gem/i915_gem_context_types.h | 12 +--
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 23 ++++--
>  drivers/gpu/drm/i915/gem/i915_gem_object.c    | 38 ++++++----
>  .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 -
>  .../gpu/drm/i915/gem/selftests/mock_context.c |  1 -
>  drivers/gpu/drm/i915/i915_drv.h               |  4 +-
>  drivers/gpu/drm/i915/i915_gem.c               |  1 +
>  drivers/gpu/drm/i915/i915_gem_gtt.c           |  1 +
>  drivers/gpu/drm/i915/i915_timeline.c          | 13 ++--
>  drivers/gpu/drm/i915/i915_vma.c               | 42 ++++++----
>  drivers/gpu/drm/i915/i915_vma.h               | 17 ++---
>  .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
>  13 files changed, 132 insertions(+), 98 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 5016a3e1f863..a9608d9ced6a 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -95,24 +95,42 @@ void i915_lut_handle_free(struct i915_lut_handle *lut)
>  
>  static void lut_close(struct i915_gem_context *ctx)
>  {
> -	struct i915_lut_handle *lut, *ln;
>  	struct radix_tree_iter iter;
>  	void __rcu **slot;
>  
> -	list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
> -		list_del(&lut->obj_link);
> -		i915_lut_handle_free(lut);
> -	}
> -	INIT_LIST_HEAD(&ctx->handles_list);
> +	lockdep_assert_held(&ctx->mutex);
>  
>  	rcu_read_lock();
>  	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
>  		struct i915_vma *vma = rcu_dereference_raw(*slot);
> -
> -		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
> -
> -		vma->open_count--;
> -		i915_vma_put(vma);
> +		struct drm_i915_gem_object *obj = vma->obj;
> +		struct i915_lut_handle *lut;
> +		bool found = false;
> +
> +		rcu_read_unlock();
> +		i915_gem_object_lock(obj);
> +		list_for_each_entry(lut, &obj->lut_list, obj_link) {
> +			if (lut->ctx != ctx)
> +				continue;
> +
> +			if (lut->handle != iter.index)
> +				continue;
> +
> +			list_del(&lut->obj_link);
> +			i915_lut_handle_free(lut);

You could free this after object_unlock if you want. Shrug.

> +			found = true;
> +			break;
> +		}
> +		i915_gem_object_unlock(obj);
> +		rcu_read_lock();
> +
> +		if (found) {
> +			radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
> +			if (atomic_dec_and_test(&vma->open_count) &&
> +			    !i915_vma_is_ggtt(vma))
> +				i915_vma_close(vma);
> +			i915_gem_object_put(obj);

I am strugging to pair this with get.

> +		}
>  	}
>  	rcu_read_unlock();
>  }
> @@ -250,15 +268,9 @@ static void free_engines(struct i915_gem_engines *e)
>  	__free_engines(e, e->num_engines);
>  }
>  
> -static void free_engines_rcu(struct work_struct *wrk)
> +static void free_engines_rcu(struct rcu_head *rcu)
>  {
> -	struct i915_gem_engines *e =
> -		container_of(wrk, struct i915_gem_engines, rcu.work);
> -	struct drm_i915_private *i915 = e->i915;
> -
> -	mutex_lock(&i915->drm.struct_mutex);
> -	free_engines(e);
> -	mutex_unlock(&i915->drm.struct_mutex);
> +	free_engines(container_of(rcu, struct i915_gem_engines, rcu));
>  }
>  
>  static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
> @@ -271,7 +283,7 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
>  	if (!e)
>  		return ERR_PTR(-ENOMEM);
>  
> -	e->i915 = ctx->i915;
> +	init_rcu_head(&e->rcu);
>  	for_each_engine(engine, ctx->i915, id) {
>  		struct intel_context *ce;
>  
> @@ -359,7 +371,10 @@ void i915_gem_context_release(struct kref *ref)
>  
>  static void context_close(struct i915_gem_context *ctx)
>  {
> +	mutex_lock(&ctx->mutex);
> +
>  	i915_gem_context_set_closed(ctx);
> +	ctx->file_priv = ERR_PTR(-EBADF);
>  
>  	/*
>  	 * This context will never again be assinged to HW, so we can
> @@ -374,7 +389,7 @@ static void context_close(struct i915_gem_context *ctx)
>  	 */
>  	lut_close(ctx);
>  
> -	ctx->file_priv = ERR_PTR(-EBADF);
> +	mutex_unlock(&ctx->mutex);
>  	i915_gem_context_put(ctx);
>  }
>  
> @@ -429,7 +444,6 @@ __create_context(struct drm_i915_private *dev_priv)
>  	RCU_INIT_POINTER(ctx->engines, e);
>  
>  	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> -	INIT_LIST_HEAD(&ctx->handles_list);
>  	INIT_LIST_HEAD(&ctx->hw_id_link);
>  
>  	/* NB: Mark all slices as needing a remap so that when the context first
> @@ -772,9 +786,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
>  	return 0;
>  
>  err_ctx:
> -	mutex_lock(&i915->drm.struct_mutex);
>  	context_close(ctx);
> -	mutex_unlock(&i915->drm.struct_mutex);
>  err:
>  	idr_destroy(&file_priv->vm_idr);
>  	idr_destroy(&file_priv->context_idr);
> @@ -787,8 +799,6 @@ void i915_gem_context_close(struct drm_file *file)
>  {
>  	struct drm_i915_file_private *file_priv = file->driver_priv;
>  
> -	lockdep_assert_held(&file_priv->dev_priv->drm.struct_mutex);
> -
>  	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
>  	idr_destroy(&file_priv->context_idr);
>  	mutex_destroy(&file_priv->context_idr_lock);
> @@ -1093,7 +1103,9 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
>  		goto unlock;
>  
>  	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
> +	mutex_lock(&ctx->mutex);
>  	lut_close(ctx);
> +	mutex_unlock(&ctx->mutex);
>  
>  	old = __set_ppgtt(ctx, ppgtt);
>  
> @@ -1612,7 +1624,7 @@ set_engines(struct i915_gem_context *ctx,
>  	if (!set.engines)
>  		return -ENOMEM;
>  
> -	set.engines->i915 = ctx->i915;
> +	init_rcu_head(&set.engines->rcu);
>  	for (n = 0; n < num_engines; n++) {
>  		struct i915_engine_class_instance ci;
>  		struct intel_engine_cs *engine;
> @@ -1666,8 +1678,7 @@ set_engines(struct i915_gem_context *ctx,
>  	rcu_swap_protected(ctx->engines, set.engines, 1);
>  	mutex_unlock(&ctx->engines_mutex);
>  
> -	INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu);
> -	queue_rcu_work(system_wq, &set.engines->rcu);
> +	call_rcu(&set.engines->rcu, free_engines_rcu);

Why can we omit the queue now?

>  
>  	return 0;
>  }
> @@ -1865,7 +1876,7 @@ static int clone_engines(struct i915_gem_context *dst,
>  	if (!clone)
>  		goto err_unlock;
>  
> -	clone->i915 = dst->i915;
> +	init_rcu_head(&clone->rcu);
>  	for (n = 0; n < e->num_engines; n++) {
>  		struct intel_engine_cs *engine;
>  
> @@ -2137,9 +2148,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  
>  err_ctx:
> -	mutex_lock(&dev->struct_mutex);
>  	context_close(ext_data.ctx);
> -	mutex_unlock(&dev->struct_mutex);
>  	return ret;
>  }
>  
> @@ -2164,10 +2173,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>  	if (!ctx)
>  		return -ENOENT;
>  
> -	mutex_lock(&dev->struct_mutex);
>  	context_close(ctx);
> -	mutex_unlock(&dev->struct_mutex);
> -
>  	return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> index fb965ded2508..3db7448b9732 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
> @@ -30,8 +30,7 @@ struct i915_timeline;
>  struct intel_ring;
>  
>  struct i915_gem_engines {
> -	struct rcu_work rcu;
> -	struct drm_i915_private *i915;
> +	struct rcu_head rcu;
>  	unsigned int num_engines;
>  	struct intel_context *engines[];
>  };
> @@ -192,17 +191,12 @@ struct i915_gem_context {
>  	/** remap_slice: Bitmask of cache lines that need remapping */
>  	u8 remap_slice;
>  
> -	/** handles_vma: rbtree to look up our context specific obj/vma for
> +	/**
> +	 * handles_vma: rbtree to look up our context specific obj/vma for
>  	 * the user handle. (user handles are per fd, but the binding is
>  	 * per vm, which may be one per context or shared with the global GTT)
>  	 */
>  	struct radix_tree_root handles_vma;
> -
> -	/** handles_list: reverse list of all the rbtree entries in use for
> -	 * this context, which allows us to free all the allocations on
> -	 * context close.
> -	 */
> -	struct list_head handles_list;
>  };
>  
>  #endif /* __I915_GEM_CONTEXT_TYPES_H__ */
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 13ab2a2e0099..fa0a880ca4de 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -801,9 +801,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>  	unsigned int i, batch;
>  	int err;
>  
> -	if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
> -		return -ENOENT;
> -
>  	if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
>  		return -EIO;
>  
> @@ -812,6 +809,12 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>  
>  	batch = eb_batch_index(eb);
>  
> +	mutex_lock(&eb->gem_context->mutex);
> +	if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
> +		err = -ENOENT;
> +		goto err_ctx;
> +	}
> +
>  	for (i = 0; i < eb->buffer_count; i++) {
>  		u32 handle = eb->exec[i].handle;
>  		struct i915_lut_handle *lut;
> @@ -846,12 +849,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>  		}
>  
>  		/* transfer ref to ctx */
> -		if (!vma->open_count++)
> +		if (!atomic_fetch_inc(&vma->open_count))
>  			i915_vma_reopen(vma);
> -		list_add(&lut->obj_link, &obj->lut_list);
> -		list_add(&lut->ctx_link, &eb->gem_context->handles_list);
> -		lut->ctx = eb->gem_context;
>  		lut->handle = handle;
> +		lut->ctx = eb->gem_context;
> +
> +		i915_gem_object_lock(obj);
> +		list_add(&lut->obj_link, &obj->lut_list);
> +		i915_gem_object_unlock(obj);
>  
>  add_vma:
>  		err = eb_add_vma(eb, i, batch, vma);
> @@ -864,6 +869,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>  			   eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
>  	}
>  
> +	mutex_unlock(&eb->gem_context->mutex);
> +
>  	eb->args->flags |= __EXEC_VALIDATED;
>  	return eb_reserve(eb);
>  
> @@ -871,6 +878,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>  	i915_gem_object_put(obj);
>  err_vma:
>  	eb->vma[i] = NULL;
> +err_ctx:
> +	mutex_unlock(&eb->gem_context->mutex);
>  	return err;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index 9b3bd9387b70..98dce0210154 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -126,39 +126,47 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
>  
>  void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
>  {
> -	struct drm_i915_private *i915 = to_i915(gem->dev);
>  	struct drm_i915_gem_object *obj = to_intel_bo(gem);
>  	struct drm_i915_file_private *fpriv = file->driver_priv;
>  	struct i915_lut_handle *lut, *ln;
> +	LIST_HEAD(close);
>  
> -	mutex_lock(&i915->drm.struct_mutex);
> -
> +	i915_gem_object_lock(obj);
>  	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
>  		struct i915_gem_context *ctx = lut->ctx;
> -		struct i915_vma *vma;
>  
> -		GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
>  		if (ctx->file_priv != fpriv)
>  			continue;
>  
> -		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
> -		GEM_BUG_ON(vma->obj != obj);
> +		i915_gem_context_get(ctx);
> +		list_move(&lut->obj_link, &close);
> +	}
> +	i915_gem_object_unlock(obj);
> +
> +	list_for_each_entry_safe(lut, ln, &close, obj_link) {
> +		struct i915_gem_context *ctx = lut->ctx;
> +		struct i915_vma *vma;
>  
> -		/* We allow the process to have multiple handles to the same
> +		/*
> +		 * We allow the process to have multiple handles to the same
>  		 * vma, in the same fd namespace, by virtue of flink/open.
>  		 */
> -		GEM_BUG_ON(!vma->open_count);
> -		if (!--vma->open_count && !i915_vma_is_ggtt(vma))
> -			i915_vma_close(vma);
>  
> -		list_del(&lut->obj_link);
> -		list_del(&lut->ctx_link);
> +		mutex_lock(&ctx->mutex);
> +		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
> +		if (vma) {
> +			GEM_BUG_ON(vma->obj != obj);
> +			GEM_BUG_ON(!atomic_read(&vma->open_count));
> +			if (atomic_dec_and_test(&vma->open_count) &&
> +			    !i915_vma_is_ggtt(vma))
> +				i915_vma_close(vma);
> +		}
> +		mutex_unlock(&ctx->mutex);
>  
> +		i915_gem_context_put(lut->ctx);
>  		i915_lut_handle_free(lut);
>  		i915_gem_object_put(obj);
>  	}
> -
> -	mutex_unlock(&i915->drm.struct_mutex);
>  }
>  
>  static bool discard_backing_storage(struct drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index 67a992d6ee0c..9c161ba73558 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -24,7 +24,6 @@ struct drm_i915_gem_object;
>   */
>  struct i915_lut_handle {
>  	struct list_head obj_link;
> -	struct list_head ctx_link;
>  	struct i915_gem_context *ctx;
>  	u32 handle;
>  };
> diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
> index 68d50da035e6..6578f2f6c3f8 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
> @@ -30,7 +30,6 @@ mock_context(struct drm_i915_private *i915,
>  	RCU_INIT_POINTER(ctx->engines, e);
>  
>  	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> -	INIT_LIST_HEAD(&ctx->handles_list);
>  	INIT_LIST_HEAD(&ctx->hw_id_link);
>  	mutex_init(&ctx->mutex);
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 25e29ee03935..8443e58dc160 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1991,10 +1991,12 @@ struct drm_i915_private {
>  		} timelines;
>  
>  		struct list_head active_rings;
> -		struct list_head closed_vma;
>  
>  		struct intel_wakeref wakeref;
>  
> +		struct list_head closed_vma;
> +		spinlock_t closed_lock; /* guards the list of closed_vma */
> +
>  		/**
>  		 * Is the GPU currently considered idle, or busy executing
>  		 * userspace requests? Whilst idle, we allow runtime power
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 2a9e8ecf2926..90923fa6603d 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1732,6 +1732,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
>  
>  	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
>  	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
> +	spin_lock_init(&dev_priv->gt.closed_lock);
>  
>  	i915_gem_init__mm(dev_priv);
>  	i915_gem_init__pm(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 56be4b091bc7..f26eaa81e7fa 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1929,6 +1929,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
>  	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
>  
>  	INIT_LIST_HEAD(&vma->obj_link);
> +	INIT_LIST_HEAD(&vma->closed_link);
>  
>  	mutex_lock(&vma->vm->mutex);
>  	list_add(&vma->vm_link, &vma->vm->unbound_list);
> diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> index 5fbea0892f33..000e1a9b6750 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_timeline.c
> @@ -61,7 +61,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
>  
>  	BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
>  
> -	spin_lock(&gt->hwsp_lock);
> +	spin_lock_irq(&gt->hwsp_lock);

Why do we need this?

>  
>  	/* hwsp_free_list only contains HWSP that have available cachelines */
>  	hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
> @@ -69,7 +69,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
>  	if (!hwsp) {
>  		struct i915_vma *vma;
>  
> -		spin_unlock(&gt->hwsp_lock);
> +		spin_unlock_irq(&gt->hwsp_lock);
>  
>  		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
>  		if (!hwsp)
> @@ -86,7 +86,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
>  		hwsp->free_bitmap = ~0ull;
>  		hwsp->gt = gt;
>  
> -		spin_lock(&gt->hwsp_lock);
> +		spin_lock_irq(&gt->hwsp_lock);
>  		list_add(&hwsp->free_link, &gt->hwsp_free_list);
>  	}
>  
> @@ -96,7 +96,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
>  	if (!hwsp->free_bitmap)
>  		list_del(&hwsp->free_link);
>  
> -	spin_unlock(&gt->hwsp_lock);
> +	spin_unlock_irq(&gt->hwsp_lock);
>  
>  	GEM_BUG_ON(hwsp->vma->private != hwsp);
>  	return hwsp->vma;
> @@ -105,8 +105,9 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
>  static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
>  {
>  	struct i915_gt_timelines *gt = hwsp->gt;
> +	unsigned long flags;
>  
> -	spin_lock(&gt->hwsp_lock);
> +	spin_lock_irqsave(&gt->hwsp_lock, flags);
>  
>  	/* As a cacheline becomes available, publish the HWSP on the freelist */
>  	if (!hwsp->free_bitmap)
> @@ -122,7 +123,7 @@ static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
>  		kfree(hwsp);
>  	}
>  
> -	spin_unlock(&gt->hwsp_lock);
> +	spin_unlock_irqrestore(&gt->hwsp_lock, flags);
>  }
>  
>  static void __idle_cacheline_free(struct i915_timeline_cacheline *cl)
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 16d47f1f645a..392c2757c217 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -143,6 +143,8 @@ vma_create(struct drm_i915_gem_object *obj,
>  	vma->size = obj->base.size;
>  	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
>  
> +	INIT_LIST_HEAD(&vma->closed_link);
> +
>  	if (view && view->type != I915_GGTT_VIEW_NORMAL) {
>  		vma->ggtt_view = *view;
>  		if (view->type == I915_GGTT_VIEW_PARTIAL) {
> @@ -785,10 +787,10 @@ int __i915_vma_do_pin(struct i915_vma *vma,
>  
>  void i915_vma_close(struct i915_vma *vma)
>  {
> -	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> +	struct drm_i915_private *i915 = vma->vm->i915;
> +	unsigned long flags;
>  
>  	GEM_BUG_ON(i915_vma_is_closed(vma));
> -	vma->flags |= I915_VMA_CLOSED;
>  
>  	/*
>  	 * We defer actually closing, unbinding and destroying the VMA until
> @@ -802,17 +804,26 @@ void i915_vma_close(struct i915_vma *vma)
>  	 * causing us to rebind the VMA once more. This ends up being a lot
>  	 * of wasted work for the steady state.
>  	 */
> -	list_add_tail(&vma->closed_link, &vma->vm->i915->gt.closed_vma);
> +	spin_lock_irqsave(&i915->gt.closed_lock, flags);
> +	list_add(&vma->closed_link, &i915->gt.closed_vma);
> +	spin_unlock_irqrestore(&i915->gt.closed_lock, flags);
>  }
>  
> -void i915_vma_reopen(struct i915_vma *vma)
> +static void __i915_vma_remove_closed(struct i915_vma *vma)
>  {
> -	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
> +	struct drm_i915_private *i915 = vma->vm->i915;
>  
> -	if (vma->flags & I915_VMA_CLOSED) {
> -		vma->flags &= ~I915_VMA_CLOSED;
> -		list_del(&vma->closed_link);
> -	}
> +	if (!i915_vma_is_closed(vma))
> +		return;

After initial racyness alarm, it seems that list_empty
has READ_ONCE so this should work.

-Mika

> +
> +	spin_lock_irq(&i915->gt.closed_lock);
> +	list_del_init(&vma->closed_link);
> +	spin_unlock_irq(&i915->gt.closed_lock);
> +}
> +
> +void i915_vma_reopen(struct i915_vma *vma)
> +{
> +	__i915_vma_remove_closed(vma);
>  }
>  
>  static void __i915_vma_destroy(struct i915_vma *vma)
> @@ -846,8 +857,7 @@ void i915_vma_destroy(struct i915_vma *vma)
>  
>  	GEM_BUG_ON(i915_vma_is_pinned(vma));
>  
> -	if (i915_vma_is_closed(vma))
> -		list_del(&vma->closed_link);
> +	__i915_vma_remove_closed(vma);
>  
>  	WARN_ON(i915_vma_unbind(vma));
>  	GEM_BUG_ON(i915_vma_is_active(vma));
> @@ -859,12 +869,16 @@ void i915_vma_parked(struct drm_i915_private *i915)
>  {
>  	struct i915_vma *vma, *next;
>  
> +	spin_lock_irq(&i915->gt.closed_lock);
>  	list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) {
> -		GEM_BUG_ON(!i915_vma_is_closed(vma));
> +		list_del_init(&vma->closed_link);
> +		spin_unlock_irq(&i915->gt.closed_lock);
> +
>  		i915_vma_destroy(vma);
> -	}
>  
> -	GEM_BUG_ON(!list_empty(&i915->gt.closed_vma));
> +		spin_lock_irq(&i915->gt.closed_lock);
> +	}
> +	spin_unlock_irq(&i915->gt.closed_lock);
>  }
>  
>  static void __i915_vma_iounmap(struct i915_vma *vma)
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 71ac7ee8620a..f0884cc8b2d3 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -69,7 +69,7 @@ struct i915_vma {
>  	 * handles (but same file) for execbuf, i.e. the number of aliases
>  	 * that exist in the ctx->handle_vmas LUT for this vma.
>  	 */
> -	unsigned int open_count;
> +	atomic_t open_count;
>  	unsigned long flags;
>  	/**
>  	 * How many users have pinned this object in GTT space.
> @@ -104,10 +104,9 @@ struct i915_vma {
>  
>  #define I915_VMA_GGTT		BIT(11)
>  #define I915_VMA_CAN_FENCE	BIT(12)
> -#define I915_VMA_CLOSED		BIT(13)
> -#define I915_VMA_USERFAULT_BIT	14
> +#define I915_VMA_USERFAULT_BIT	13
>  #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
> -#define I915_VMA_GGTT_WRITE	BIT(15)
> +#define I915_VMA_GGTT_WRITE	BIT(14)
>  
>  	struct i915_active active;
>  	struct i915_active_request last_fence;
> @@ -190,11 +189,6 @@ static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
>  	return vma->flags & I915_VMA_CAN_FENCE;
>  }
>  
> -static inline bool i915_vma_is_closed(const struct i915_vma *vma)
> -{
> -	return vma->flags & I915_VMA_CLOSED;
> -}
> -
>  static inline bool i915_vma_set_userfault(struct i915_vma *vma)
>  {
>  	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
> @@ -211,6 +205,11 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
>  	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
>  }
>  
> +static inline bool i915_vma_is_closed(const struct i915_vma *vma)
> +{
> +	return !list_empty(&vma->closed_link);
> +}
> +
>  static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
>  {
>  	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index e25b74a27f83..b7f3fbb4ae89 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -203,6 +203,7 @@ struct drm_i915_private *mock_gem_device(void)
>  
>  	INIT_LIST_HEAD(&i915->gt.active_rings);
>  	INIT_LIST_HEAD(&i915->gt.closed_vma);
> +	spin_lock_init(&i915->gt.closed_lock);
>  
>  	mutex_lock(&i915->drm.struct_mutex);
>  
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson May 22, 2019, 2:47 p.m. UTC | #2
Quoting Mika Kuoppala (2019-05-22 15:32:34)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > -static void free_engines_rcu(struct work_struct *wrk)
> > +static void free_engines_rcu(struct rcu_head *rcu)
> >  {
> > -     struct i915_gem_engines *e =
> > -             container_of(wrk, struct i915_gem_engines, rcu.work);
> > -     struct drm_i915_private *i915 = e->i915;
> > -
> > -     mutex_lock(&i915->drm.struct_mutex);
> > -     free_engines(e);
> > -     mutex_unlock(&i915->drm.struct_mutex);
> > +     free_engines(container_of(rcu, struct i915_gem_engines, rcu));
> >  }

> > @@ -1666,8 +1678,7 @@ set_engines(struct i915_gem_context *ctx,
> >       rcu_swap_protected(ctx->engines, set.engines, 1);
> >       mutex_unlock(&ctx->engines_mutex);
> >  
> > -     INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu);
> > -     queue_rcu_work(system_wq, &set.engines->rcu);
> > +     call_rcu(&set.engines->rcu, free_engines_rcu);
> 
> Why can we omit the queue now?

We only required the worker for acquiring struct_mutex. After the
removal, we can do the kref_put and intel_context destroy from softirq
context...

> > diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> > index 5fbea0892f33..000e1a9b6750 100644
> > --- a/drivers/gpu/drm/i915/i915_timeline.c
> > +++ b/drivers/gpu/drm/i915/i915_timeline.c
> > @@ -61,7 +61,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
> >  
> >       BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
> >  
> > -     spin_lock(&gt->hwsp_lock);
> > +     spin_lock_irq(&gt->hwsp_lock);
> 
> Why do we need this?

Because we can now reach other hwsp_lock callsites from softirq context.
We might be able to get away with _bh -- I may have overreacted to the
lockdep warning.
-Chris
Chris Wilson May 22, 2019, 2:52 p.m. UTC | #3
Quoting Mika Kuoppala (2019-05-22 15:32:34)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index 5016a3e1f863..a9608d9ced6a 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > @@ -95,24 +95,42 @@ void i915_lut_handle_free(struct i915_lut_handle *lut)
> >  
> >  static void lut_close(struct i915_gem_context *ctx)
> >  {
> > -     struct i915_lut_handle *lut, *ln;
> >       struct radix_tree_iter iter;
> >       void __rcu **slot;
> >  
> > -     list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
> > -             list_del(&lut->obj_link);
> > -             i915_lut_handle_free(lut);
> > -     }
> > -     INIT_LIST_HEAD(&ctx->handles_list);
> > +     lockdep_assert_held(&ctx->mutex);
> >  
> >       rcu_read_lock();
> >       radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
> >               struct i915_vma *vma = rcu_dereference_raw(*slot);
> > -
> > -             radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
> > -
> > -             vma->open_count--;
> > -             i915_vma_put(vma);
> > +             struct drm_i915_gem_object *obj = vma->obj;
> > +             struct i915_lut_handle *lut;
> > +             bool found = false;
> > +
> > +             rcu_read_unlock();
> > +             i915_gem_object_lock(obj);
> > +             list_for_each_entry(lut, &obj->lut_list, obj_link) {
> > +                     if (lut->ctx != ctx)
> > +                             continue;
> > +
> > +                     if (lut->handle != iter.index)
> > +                             continue;
> > +
> > +                     list_del(&lut->obj_link);
> > +                     i915_lut_handle_free(lut);
> 
> You could free this after object_unlock if you want. Shrug.
> 
> > +                     found = true;
> > +                     break;
> > +             }
> > +             i915_gem_object_unlock(obj);
> > +             rcu_read_lock();
> > +
> > +             if (found) {
> > +                     radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
> > +                     if (atomic_dec_and_test(&vma->open_count) &&
> > +                         !i915_vma_is_ggtt(vma))
> > +                             i915_vma_close(vma);
> > +                     i915_gem_object_put(obj);
> 
> I am strugging to pair this with get.

[snip]

> >  #endif /* __I915_GEM_CONTEXT_TYPES_H__ */
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > index 13ab2a2e0099..fa0a880ca4de 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > @@ -801,9 +801,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
> >       unsigned int i, batch;
> >       int err;
> >  
> > -     if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
> > -             return -ENOENT;
> > -
> >       if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
> >               return -EIO;
> >  
> > @@ -812,6 +809,12 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
> >  
> >       batch = eb_batch_index(eb);
> >  
> > +     mutex_lock(&eb->gem_context->mutex);
> > +     if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
> > +             err = -ENOENT;
> > +             goto err_ctx;
> > +     }
> > +
> >       for (i = 0; i < eb->buffer_count; i++) {
> >               u32 handle = eb->exec[i].handle;
> >               struct i915_lut_handle *lut;
> > @@ -846,12 +849,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
> >               }
> >  
> >               /* transfer ref to ctx */

It is this reference that we transfer from the object lookup into the
lut, that we must release if we throwaway the lut.

> > -             if (!vma->open_count++)
> > +             if (!atomic_fetch_inc(&vma->open_count))
> >                       i915_vma_reopen(vma);
> > -             list_add(&lut->obj_link, &obj->lut_list);
> > -             list_add(&lut->ctx_link, &eb->gem_context->handles_list);
> > -             lut->ctx = eb->gem_context;
> >               lut->handle = handle;
> > +             lut->ctx = eb->gem_context;
> > +
> > +             i915_gem_object_lock(obj);
> > +             list_add(&lut->obj_link, &obj->lut_list);
> > +             i915_gem_object_unlock(obj);
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 5016a3e1f863..a9608d9ced6a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -95,24 +95,42 @@  void i915_lut_handle_free(struct i915_lut_handle *lut)
 
 static void lut_close(struct i915_gem_context *ctx)
 {
-	struct i915_lut_handle *lut, *ln;
 	struct radix_tree_iter iter;
 	void __rcu **slot;
 
-	list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
-		list_del(&lut->obj_link);
-		i915_lut_handle_free(lut);
-	}
-	INIT_LIST_HEAD(&ctx->handles_list);
+	lockdep_assert_held(&ctx->mutex);
 
 	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
 		struct i915_vma *vma = rcu_dereference_raw(*slot);
-
-		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
-
-		vma->open_count--;
-		i915_vma_put(vma);
+		struct drm_i915_gem_object *obj = vma->obj;
+		struct i915_lut_handle *lut;
+		bool found = false;
+
+		rcu_read_unlock();
+		i915_gem_object_lock(obj);
+		list_for_each_entry(lut, &obj->lut_list, obj_link) {
+			if (lut->ctx != ctx)
+				continue;
+
+			if (lut->handle != iter.index)
+				continue;
+
+			list_del(&lut->obj_link);
+			i915_lut_handle_free(lut);
+			found = true;
+			break;
+		}
+		i915_gem_object_unlock(obj);
+		rcu_read_lock();
+
+		if (found) {
+			radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+			if (atomic_dec_and_test(&vma->open_count) &&
+			    !i915_vma_is_ggtt(vma))
+				i915_vma_close(vma);
+			i915_gem_object_put(obj);
+		}
 	}
 	rcu_read_unlock();
 }
@@ -250,15 +268,9 @@  static void free_engines(struct i915_gem_engines *e)
 	__free_engines(e, e->num_engines);
 }
 
-static void free_engines_rcu(struct work_struct *wrk)
+static void free_engines_rcu(struct rcu_head *rcu)
 {
-	struct i915_gem_engines *e =
-		container_of(wrk, struct i915_gem_engines, rcu.work);
-	struct drm_i915_private *i915 = e->i915;
-
-	mutex_lock(&i915->drm.struct_mutex);
-	free_engines(e);
-	mutex_unlock(&i915->drm.struct_mutex);
+	free_engines(container_of(rcu, struct i915_gem_engines, rcu));
 }
 
 static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
@@ -271,7 +283,7 @@  static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
 	if (!e)
 		return ERR_PTR(-ENOMEM);
 
-	e->i915 = ctx->i915;
+	init_rcu_head(&e->rcu);
 	for_each_engine(engine, ctx->i915, id) {
 		struct intel_context *ce;
 
@@ -359,7 +371,10 @@  void i915_gem_context_release(struct kref *ref)
 
 static void context_close(struct i915_gem_context *ctx)
 {
+	mutex_lock(&ctx->mutex);
+
 	i915_gem_context_set_closed(ctx);
+	ctx->file_priv = ERR_PTR(-EBADF);
 
 	/*
 	 * This context will never again be assinged to HW, so we can
@@ -374,7 +389,7 @@  static void context_close(struct i915_gem_context *ctx)
 	 */
 	lut_close(ctx);
 
-	ctx->file_priv = ERR_PTR(-EBADF);
+	mutex_unlock(&ctx->mutex);
 	i915_gem_context_put(ctx);
 }
 
@@ -429,7 +444,6 @@  __create_context(struct drm_i915_private *dev_priv)
 	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
 
 	/* NB: Mark all slices as needing a remap so that when the context first
@@ -772,9 +786,7 @@  int i915_gem_context_open(struct drm_i915_private *i915,
 	return 0;
 
 err_ctx:
-	mutex_lock(&i915->drm.struct_mutex);
 	context_close(ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
 err:
 	idr_destroy(&file_priv->vm_idr);
 	idr_destroy(&file_priv->context_idr);
@@ -787,8 +799,6 @@  void i915_gem_context_close(struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	lockdep_assert_held(&file_priv->dev_priv->drm.struct_mutex);
-
 	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
 	idr_destroy(&file_priv->context_idr);
 	mutex_destroy(&file_priv->context_idr_lock);
@@ -1093,7 +1103,9 @@  static int set_ppgtt(struct drm_i915_file_private *file_priv,
 		goto unlock;
 
 	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
+	mutex_lock(&ctx->mutex);
 	lut_close(ctx);
+	mutex_unlock(&ctx->mutex);
 
 	old = __set_ppgtt(ctx, ppgtt);
 
@@ -1612,7 +1624,7 @@  set_engines(struct i915_gem_context *ctx,
 	if (!set.engines)
 		return -ENOMEM;
 
-	set.engines->i915 = ctx->i915;
+	init_rcu_head(&set.engines->rcu);
 	for (n = 0; n < num_engines; n++) {
 		struct i915_engine_class_instance ci;
 		struct intel_engine_cs *engine;
@@ -1666,8 +1678,7 @@  set_engines(struct i915_gem_context *ctx,
 	rcu_swap_protected(ctx->engines, set.engines, 1);
 	mutex_unlock(&ctx->engines_mutex);
 
-	INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu);
-	queue_rcu_work(system_wq, &set.engines->rcu);
+	call_rcu(&set.engines->rcu, free_engines_rcu);
 
 	return 0;
 }
@@ -1865,7 +1876,7 @@  static int clone_engines(struct i915_gem_context *dst,
 	if (!clone)
 		goto err_unlock;
 
-	clone->i915 = dst->i915;
+	init_rcu_head(&clone->rcu);
 	for (n = 0; n < e->num_engines; n++) {
 		struct intel_engine_cs *engine;
 
@@ -2137,9 +2148,7 @@  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	return 0;
 
 err_ctx:
-	mutex_lock(&dev->struct_mutex);
 	context_close(ext_data.ctx);
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
 
@@ -2164,10 +2173,7 @@  int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	if (!ctx)
 		return -ENOENT;
 
-	mutex_lock(&dev->struct_mutex);
 	context_close(ctx);
-	mutex_unlock(&dev->struct_mutex);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index fb965ded2508..3db7448b9732 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -30,8 +30,7 @@  struct i915_timeline;
 struct intel_ring;
 
 struct i915_gem_engines {
-	struct rcu_work rcu;
-	struct drm_i915_private *i915;
+	struct rcu_head rcu;
 	unsigned int num_engines;
 	struct intel_context *engines[];
 };
@@ -192,17 +191,12 @@  struct i915_gem_context {
 	/** remap_slice: Bitmask of cache lines that need remapping */
 	u8 remap_slice;
 
-	/** handles_vma: rbtree to look up our context specific obj/vma for
+	/**
+	 * handles_vma: rbtree to look up our context specific obj/vma for
 	 * the user handle. (user handles are per fd, but the binding is
 	 * per vm, which may be one per context or shared with the global GTT)
 	 */
 	struct radix_tree_root handles_vma;
-
-	/** handles_list: reverse list of all the rbtree entries in use for
-	 * this context, which allows us to free all the allocations on
-	 * context close.
-	 */
-	struct list_head handles_list;
 };
 
 #endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 13ab2a2e0099..fa0a880ca4de 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -801,9 +801,6 @@  static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	unsigned int i, batch;
 	int err;
 
-	if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
-		return -ENOENT;
-
 	if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
 		return -EIO;
 
@@ -812,6 +809,12 @@  static int eb_lookup_vmas(struct i915_execbuffer *eb)
 
 	batch = eb_batch_index(eb);
 
+	mutex_lock(&eb->gem_context->mutex);
+	if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
+		err = -ENOENT;
+		goto err_ctx;
+	}
+
 	for (i = 0; i < eb->buffer_count; i++) {
 		u32 handle = eb->exec[i].handle;
 		struct i915_lut_handle *lut;
@@ -846,12 +849,14 @@  static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		}
 
 		/* transfer ref to ctx */
-		if (!vma->open_count++)
+		if (!atomic_fetch_inc(&vma->open_count))
 			i915_vma_reopen(vma);
-		list_add(&lut->obj_link, &obj->lut_list);
-		list_add(&lut->ctx_link, &eb->gem_context->handles_list);
-		lut->ctx = eb->gem_context;
 		lut->handle = handle;
+		lut->ctx = eb->gem_context;
+
+		i915_gem_object_lock(obj);
+		list_add(&lut->obj_link, &obj->lut_list);
+		i915_gem_object_unlock(obj);
 
 add_vma:
 		err = eb_add_vma(eb, i, batch, vma);
@@ -864,6 +869,8 @@  static int eb_lookup_vmas(struct i915_execbuffer *eb)
 			   eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
 	}
 
+	mutex_unlock(&eb->gem_context->mutex);
+
 	eb->args->flags |= __EXEC_VALIDATED;
 	return eb_reserve(eb);
 
@@ -871,6 +878,8 @@  static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	i915_gem_object_put(obj);
 err_vma:
 	eb->vma[i] = NULL;
+err_ctx:
+	mutex_unlock(&eb->gem_context->mutex);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 9b3bd9387b70..98dce0210154 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -126,39 +126,47 @@  void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
 
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 {
-	struct drm_i915_private *i915 = to_i915(gem->dev);
 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
 	struct drm_i915_file_private *fpriv = file->driver_priv;
 	struct i915_lut_handle *lut, *ln;
+	LIST_HEAD(close);
 
-	mutex_lock(&i915->drm.struct_mutex);
-
+	i915_gem_object_lock(obj);
 	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
 		struct i915_gem_context *ctx = lut->ctx;
-		struct i915_vma *vma;
 
-		GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
 		if (ctx->file_priv != fpriv)
 			continue;
 
-		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
-		GEM_BUG_ON(vma->obj != obj);
+		i915_gem_context_get(ctx);
+		list_move(&lut->obj_link, &close);
+	}
+	i915_gem_object_unlock(obj);
+
+	list_for_each_entry_safe(lut, ln, &close, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+		struct i915_vma *vma;
 
-		/* We allow the process to have multiple handles to the same
+		/*
+		 * We allow the process to have multiple handles to the same
 		 * vma, in the same fd namespace, by virtue of flink/open.
 		 */
-		GEM_BUG_ON(!vma->open_count);
-		if (!--vma->open_count && !i915_vma_is_ggtt(vma))
-			i915_vma_close(vma);
 
-		list_del(&lut->obj_link);
-		list_del(&lut->ctx_link);
+		mutex_lock(&ctx->mutex);
+		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
+		if (vma) {
+			GEM_BUG_ON(vma->obj != obj);
+			GEM_BUG_ON(!atomic_read(&vma->open_count));
+			if (atomic_dec_and_test(&vma->open_count) &&
+			    !i915_vma_is_ggtt(vma))
+				i915_vma_close(vma);
+		}
+		mutex_unlock(&ctx->mutex);
 
+		i915_gem_context_put(lut->ctx);
 		i915_lut_handle_free(lut);
 		i915_gem_object_put(obj);
 	}
-
-	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 static bool discard_backing_storage(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 67a992d6ee0c..9c161ba73558 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -24,7 +24,6 @@  struct drm_i915_gem_object;
  */
 struct i915_lut_handle {
 	struct list_head obj_link;
-	struct list_head ctx_link;
 	struct i915_gem_context *ctx;
 	u32 handle;
 };
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 68d50da035e6..6578f2f6c3f8 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -30,7 +30,6 @@  mock_context(struct drm_i915_private *i915,
 	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
 	mutex_init(&ctx->mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 25e29ee03935..8443e58dc160 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1991,10 +1991,12 @@  struct drm_i915_private {
 		} timelines;
 
 		struct list_head active_rings;
-		struct list_head closed_vma;
 
 		struct intel_wakeref wakeref;
 
+		struct list_head closed_vma;
+		spinlock_t closed_lock; /* guards the list of closed_vma */
+
 		/**
 		 * Is the GPU currently considered idle, or busy executing
 		 * userspace requests? Whilst idle, we allow runtime power
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2a9e8ecf2926..90923fa6603d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1732,6 +1732,7 @@  int i915_gem_init_early(struct drm_i915_private *dev_priv)
 
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
+	spin_lock_init(&dev_priv->gt.closed_lock);
 
 	i915_gem_init__mm(dev_priv);
 	i915_gem_init__pm(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 56be4b091bc7..f26eaa81e7fa 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1929,6 +1929,7 @@  static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
 
 	INIT_LIST_HEAD(&vma->obj_link);
+	INIT_LIST_HEAD(&vma->closed_link);
 
 	mutex_lock(&vma->vm->mutex);
 	list_add(&vma->vm_link, &vma->vm->unbound_list);
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 5fbea0892f33..000e1a9b6750 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -61,7 +61,7 @@  hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 
 	BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
 
-	spin_lock(&gt->hwsp_lock);
+	spin_lock_irq(&gt->hwsp_lock);
 
 	/* hwsp_free_list only contains HWSP that have available cachelines */
 	hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
@@ -69,7 +69,7 @@  hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 	if (!hwsp) {
 		struct i915_vma *vma;
 
-		spin_unlock(&gt->hwsp_lock);
+		spin_unlock_irq(&gt->hwsp_lock);
 
 		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
 		if (!hwsp)
@@ -86,7 +86,7 @@  hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 		hwsp->free_bitmap = ~0ull;
 		hwsp->gt = gt;
 
-		spin_lock(&gt->hwsp_lock);
+		spin_lock_irq(&gt->hwsp_lock);
 		list_add(&hwsp->free_link, &gt->hwsp_free_list);
 	}
 
@@ -96,7 +96,7 @@  hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 	if (!hwsp->free_bitmap)
 		list_del(&hwsp->free_link);
 
-	spin_unlock(&gt->hwsp_lock);
+	spin_unlock_irq(&gt->hwsp_lock);
 
 	GEM_BUG_ON(hwsp->vma->private != hwsp);
 	return hwsp->vma;
@@ -105,8 +105,9 @@  hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
 {
 	struct i915_gt_timelines *gt = hwsp->gt;
+	unsigned long flags;
 
-	spin_lock(&gt->hwsp_lock);
+	spin_lock_irqsave(&gt->hwsp_lock, flags);
 
 	/* As a cacheline becomes available, publish the HWSP on the freelist */
 	if (!hwsp->free_bitmap)
@@ -122,7 +123,7 @@  static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
 		kfree(hwsp);
 	}
 
-	spin_unlock(&gt->hwsp_lock);
+	spin_unlock_irqrestore(&gt->hwsp_lock, flags);
 }
 
 static void __idle_cacheline_free(struct i915_timeline_cacheline *cl)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 16d47f1f645a..392c2757c217 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -143,6 +143,8 @@  vma_create(struct drm_i915_gem_object *obj,
 	vma->size = obj->base.size;
 	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
+	INIT_LIST_HEAD(&vma->closed_link);
+
 	if (view && view->type != I915_GGTT_VIEW_NORMAL) {
 		vma->ggtt_view = *view;
 		if (view->type == I915_GGTT_VIEW_PARTIAL) {
@@ -785,10 +787,10 @@  int __i915_vma_do_pin(struct i915_vma *vma,
 
 void i915_vma_close(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	struct drm_i915_private *i915 = vma->vm->i915;
+	unsigned long flags;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
-	vma->flags |= I915_VMA_CLOSED;
 
 	/*
 	 * We defer actually closing, unbinding and destroying the VMA until
@@ -802,17 +804,26 @@  void i915_vma_close(struct i915_vma *vma)
 	 * causing us to rebind the VMA once more. This ends up being a lot
 	 * of wasted work for the steady state.
 	 */
-	list_add_tail(&vma->closed_link, &vma->vm->i915->gt.closed_vma);
+	spin_lock_irqsave(&i915->gt.closed_lock, flags);
+	list_add(&vma->closed_link, &i915->gt.closed_vma);
+	spin_unlock_irqrestore(&i915->gt.closed_lock, flags);
 }
 
-void i915_vma_reopen(struct i915_vma *vma)
+static void __i915_vma_remove_closed(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	struct drm_i915_private *i915 = vma->vm->i915;
 
-	if (vma->flags & I915_VMA_CLOSED) {
-		vma->flags &= ~I915_VMA_CLOSED;
-		list_del(&vma->closed_link);
-	}
+	if (!i915_vma_is_closed(vma))
+		return;
+
+	spin_lock_irq(&i915->gt.closed_lock);
+	list_del_init(&vma->closed_link);
+	spin_unlock_irq(&i915->gt.closed_lock);
+}
+
+void i915_vma_reopen(struct i915_vma *vma)
+{
+	__i915_vma_remove_closed(vma);
 }
 
 static void __i915_vma_destroy(struct i915_vma *vma)
@@ -846,8 +857,7 @@  void i915_vma_destroy(struct i915_vma *vma)
 
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
 
-	if (i915_vma_is_closed(vma))
-		list_del(&vma->closed_link);
+	__i915_vma_remove_closed(vma);
 
 	WARN_ON(i915_vma_unbind(vma));
 	GEM_BUG_ON(i915_vma_is_active(vma));
@@ -859,12 +869,16 @@  void i915_vma_parked(struct drm_i915_private *i915)
 {
 	struct i915_vma *vma, *next;
 
+	spin_lock_irq(&i915->gt.closed_lock);
 	list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) {
-		GEM_BUG_ON(!i915_vma_is_closed(vma));
+		list_del_init(&vma->closed_link);
+		spin_unlock_irq(&i915->gt.closed_lock);
+
 		i915_vma_destroy(vma);
-	}
 
-	GEM_BUG_ON(!list_empty(&i915->gt.closed_vma));
+		spin_lock_irq(&i915->gt.closed_lock);
+	}
+	spin_unlock_irq(&i915->gt.closed_lock);
 }
 
 static void __i915_vma_iounmap(struct i915_vma *vma)
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 71ac7ee8620a..f0884cc8b2d3 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -69,7 +69,7 @@  struct i915_vma {
 	 * handles (but same file) for execbuf, i.e. the number of aliases
 	 * that exist in the ctx->handle_vmas LUT for this vma.
 	 */
-	unsigned int open_count;
+	atomic_t open_count;
 	unsigned long flags;
 	/**
 	 * How many users have pinned this object in GTT space.
@@ -104,10 +104,9 @@  struct i915_vma {
 
 #define I915_VMA_GGTT		BIT(11)
 #define I915_VMA_CAN_FENCE	BIT(12)
-#define I915_VMA_CLOSED		BIT(13)
-#define I915_VMA_USERFAULT_BIT	14
+#define I915_VMA_USERFAULT_BIT	13
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
-#define I915_VMA_GGTT_WRITE	BIT(15)
+#define I915_VMA_GGTT_WRITE	BIT(14)
 
 	struct i915_active active;
 	struct i915_active_request last_fence;
@@ -190,11 +189,6 @@  static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
 	return vma->flags & I915_VMA_CAN_FENCE;
 }
 
-static inline bool i915_vma_is_closed(const struct i915_vma *vma)
-{
-	return vma->flags & I915_VMA_CLOSED;
-}
-
 static inline bool i915_vma_set_userfault(struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
@@ -211,6 +205,11 @@  static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
 	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
 }
 
+static inline bool i915_vma_is_closed(const struct i915_vma *vma)
+{
+	return !list_empty(&vma->closed_link);
+}
+
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index e25b74a27f83..b7f3fbb4ae89 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -203,6 +203,7 @@  struct drm_i915_private *mock_gem_device(void)
 
 	INIT_LIST_HEAD(&i915->gt.active_rings);
 	INIT_LIST_HEAD(&i915->gt.closed_vma);
+	spin_lock_init(&i915->gt.closed_lock);
 
 	mutex_lock(&i915->drm.struct_mutex);