[3/3] drm/i915: Replace execbuf vma ht with an idr

Message ID	20170616160251.6163-3-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Fri, 16 Jun 2017 17:02:51 +0100 Message-Id: <20170616160251.6163-3-chris@chris-wilson.co.uk> In-Reply-To: <20170616160251.6163-1-chris@chris-wilson.co.uk> References: <20170616160251.6163-1-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH 3/3] drm/i915: Replace execbuf vma ht with an idr Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

20170616160251.6163-3-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Fri, 16 Jun 2017 17:02:51 +0100
Message-Id: <20170616160251.6163-3-chris@chris-wilson.co.uk>
In-Reply-To: <20170616160251.6163-1-chris@chris-wilson.co.uk>
References: <20170616160251.6163-1-chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH 3/3] drm/i915: Replace execbuf vma ht with an idr
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Chris Wilson June 16, 2017, 4:02 p.m. UTC

This was the competing idea long ago, but it was only with the rewrite
of the idr as an radixtree and using the radixtree directly ourselves,
and the realisation that we can store the vma directly in the radixtree
and only need a list for the reverse mapping, that made the patch
performant enough to displace using a hashtable.  Though the vma ht is
fast and doesn't require and extra allocation (as we can embed the node
inside the vma), it does require a thread for resizing and serialization.
That is hairy enough to investigate alternative and favour them if
equivalent in performance. One advantage of allocating an indirection
entry is that we can support a single shared bo between many clients,
something that was done on a first-come first-serve basis for shared GGTT
vma previously. To offset the extra allocations, we create yet another
kmem_cache for them.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c           |  6 --
 drivers/gpu/drm/i915/i915_drv.h               |  1 +
 drivers/gpu/drm/i915/i915_gem.c               | 32 ++++++++--
 drivers/gpu/drm/i915/i915_gem_context.c       | 86 +++++----------------------
 drivers/gpu/drm/i915/i915_gem_context.h       | 30 ++--------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 70 +++++++---------------
 drivers/gpu/drm/i915/i915_gem_object.h        | 10 +++-
 drivers/gpu/drm/i915/i915_vma.c               | 22 -------
 drivers/gpu/drm/i915/i915_vma.h               |  4 --
 drivers/gpu/drm/i915/selftests/mock_context.c | 15 ++---
 lib/radix-tree.c                              |  1 +
 11 files changed, 81 insertions(+), 196 deletions(-)

Comments

Tvrtko Ursulin June 20, 2017, 11:26 a.m. UTC | #1

On 16/06/2017 17:02, Chris Wilson wrote:
> This was the competing idea long ago, but it was only with the rewrite
> of the idr as an radixtree and using the radixtree directly ourselves,
> and the realisation that we can store the vma directly in the radixtree
> and only need a list for the reverse mapping, that made the patch
> performant enough to displace using a hashtable.  Though the vma ht is
> fast and doesn't require and extra allocation (as we can embed the node
> inside the vma), it does require a thread for resizing and serialization.
> That is hairy enough to investigate alternative and favour them if
> equivalent in performance. One advantage of allocating an indirection
> entry is that we can support a single shared bo between many clients,
> something that was done on a first-come first-serve basis for shared GGTT
> vma previously. To offset the extra allocations, we create yet another
> kmem_cache for them.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c           |  6 --
>   drivers/gpu/drm/i915/i915_drv.h               |  1 +
>   drivers/gpu/drm/i915/i915_gem.c               | 32 ++++++++--
>   drivers/gpu/drm/i915/i915_gem_context.c       | 86 +++++----------------------
>   drivers/gpu/drm/i915/i915_gem_context.h       | 30 ++--------
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c    | 70 +++++++---------------
>   drivers/gpu/drm/i915/i915_gem_object.h        | 10 +++-
>   drivers/gpu/drm/i915/i915_vma.c               | 22 -------
>   drivers/gpu/drm/i915/i915_vma.h               |  4 --
>   drivers/gpu/drm/i915/selftests/mock_context.c | 15 ++---
>   lib/radix-tree.c                              |  1 +
>   11 files changed, 81 insertions(+), 196 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 4577b0af6886..a6ba2100bb88 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1998,12 +1998,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
>   			seq_putc(m, '\n');
>   		}
>   
> -		seq_printf(m,
> -			   "\tvma hashtable size=%u (actual %lu), count=%u\n",
> -			   ctx->vma_lut.ht_size,
> -			   BIT(ctx->vma_lut.ht_bits),
> -			   ctx->vma_lut.ht_count);
> -
>   		seq_putc(m, '\n');
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cd15697c1ca4..de8dd0c06ce6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2083,6 +2083,7 @@ struct drm_i915_private {
>   
>   	struct kmem_cache *objects;
>   	struct kmem_cache *vmas;
> +	struct kmem_cache *luts;
>   	struct kmem_cache *requests;
>   	struct kmem_cache *dependencies;
>   	struct kmem_cache *priorities;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7dcac3bfb771..3388c65b54a3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3252,19 +3252,33 @@ i915_gem_idle_work_handler(struct work_struct *work)
>   
>   void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
>   {
> +	struct drm_i915_private *i915 = to_i915(gem->dev);
>   	struct drm_i915_gem_object *obj = to_intel_bo(gem);
>   	struct drm_i915_file_private *fpriv = file->driver_priv;
>   	struct i915_vma *vma, *vn;
> +	struct i915_lut_handle *lut, *ln;
>   
>   	mutex_lock(&obj->base.dev->struct_mutex);
> +
> +	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
> +		struct i915_gem_context *ctx = lut->ctx;
> +
> +		if (ctx->file_priv != fpriv)
> +			continue;
> +
> +		radix_tree_delete(&ctx->handles_vma, lut->handle);
> +		i915_gem_object_put(obj);
> +
> +		list_del(&lut->obj_link);
> +		list_del(&lut->ctx_link);
> +
> +		kmem_cache_free(i915->luts, lut);
> +	}
> +
>   	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
>   		if (vma->vm->file == fpriv)
>   			i915_vma_close(vma);
>   
> -	vma = obj->vma_hashed;
> -	if (vma && vma->ctx->file_priv == fpriv)
> -		i915_vma_unlink_ctx(vma);
> -
>   	if (i915_gem_object_is_active(obj) &&
>   	    !i915_gem_object_has_active_reference(obj)) {
>   		i915_gem_object_set_active_reference(obj);
> @@ -4259,6 +4273,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>   	INIT_LIST_HEAD(&obj->global_link);
>   	INIT_LIST_HEAD(&obj->userfault_link);
>   	INIT_LIST_HEAD(&obj->vma_list);
> +	INIT_LIST_HEAD(&obj->lut_list);
>   	INIT_LIST_HEAD(&obj->batch_pool_link);
>   
>   	obj->ops = ops;
> @@ -4897,12 +4912,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
>   	if (!dev_priv->vmas)
>   		goto err_objects;
>   
> +	dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
> +	if (!dev_priv->luts)
> +		goto err_vmas;
> +
>   	dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
>   					SLAB_HWCACHE_ALIGN |
>   					SLAB_RECLAIM_ACCOUNT |
>   					SLAB_TYPESAFE_BY_RCU);
>   	if (!dev_priv->requests)
> -		goto err_vmas;
> +		goto err_luts;
>   
>   	dev_priv->dependencies = KMEM_CACHE(i915_dependency,
>   					    SLAB_HWCACHE_ALIGN |
> @@ -4949,6 +4968,8 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
>   	kmem_cache_destroy(dev_priv->dependencies);
>   err_requests:
>   	kmem_cache_destroy(dev_priv->requests);
> +err_luts:
> +	kmem_cache_destroy(dev_priv->luts);
>   err_vmas:
>   	kmem_cache_destroy(dev_priv->vmas);
>   err_objects:
> @@ -4971,6 +4992,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
>   	kmem_cache_destroy(dev_priv->priorities);
>   	kmem_cache_destroy(dev_priv->dependencies);
>   	kmem_cache_destroy(dev_priv->requests);
> +	kmem_cache_destroy(dev_priv->luts);
>   	kmem_cache_destroy(dev_priv->vmas);
>   	kmem_cache_destroy(dev_priv->objects);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 39ed58a21fc1..dc182a8dd4e8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -93,69 +93,21 @@
>   
>   #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
>   
> -/* Initial size (as log2) to preallocate the handle->object hashtable */
> -#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */
> -
> -static void resize_vma_ht(struct work_struct *work)
> +static void lut_close(struct i915_gem_context *ctx)
>   {
> -	struct i915_gem_context_vma_lut *lut =
> -		container_of(work, typeof(*lut), resize);
> -	unsigned int bits, new_bits, size, i;
> -	struct hlist_head *new_ht;
> -
> -	GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
> -
> -	bits = 1 + ilog2(4*lut->ht_count/3 + 1);
> -	new_bits = min_t(unsigned int,
> -			 max(bits, VMA_HT_BITS),
> -			 sizeof(unsigned int) * BITS_PER_BYTE - 1);
> -	if (new_bits == lut->ht_bits)
> -		goto out;
> -
> -	new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
> -	if (!new_ht)
> -		new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
> -	if (!new_ht)
> -		/* Pretend resize succeeded and stop calling us for a bit! */
> -		goto out;
> +	struct i915_lut_handle *lut, *ln;
> +	struct radix_tree_iter iter;
> +	void __rcu **slot;
>   
> -	size = BIT(lut->ht_bits);
> -	for (i = 0; i < size; i++) {
> -		struct i915_vma *vma;
> -		struct hlist_node *tmp;
> -
> -		hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
> -			hlist_add_head(&vma->ctx_node,
> -				       &new_ht[hash_32(vma->ctx_handle,
> -						       new_bits)]);
> +	list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
> +		list_del(&lut->obj_link);
I assume you deliberately did not bother unlinking from the ctx list 
since all that is getting zapped anyway?

> +		kmem_cache_free(ctx->i915->luts, lut);
>   	}
> -	kvfree(lut->ht);
> -	lut->ht = new_ht;
> -	lut->ht_bits = new_bits;
> -out:
> -	smp_store_release(&lut->ht_size, BIT(bits));
> -	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
> -}
>   
> -static void vma_lut_free(struct i915_gem_context *ctx)
> -{
> -	struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
> -	unsigned int i, size;
> -
> -	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
> -		cancel_work_sync(&lut->resize);
> -
> -	size = BIT(lut->ht_bits);
> -	for (i = 0; i < size; i++) {
> -		struct i915_vma *vma;
> -
> -		hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
> -			vma->obj->vma_hashed = NULL;
> -			vma->ctx = NULL;
> -			i915_vma_put(vma);
> -		}
> +	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
> +		i915_vma_put(rcu_dereference_raw(*slot));
> +		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
>   	}
> -	kvfree(lut->ht);
>   }
>   
>   void i915_gem_context_free(struct kref *ctx_ref)
> @@ -167,7 +119,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
>   	trace_i915_context_free(ctx);
>   	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
>   
> -	vma_lut_free(ctx);
>   	i915_ppgtt_put(ctx->ppgtt);
>   
>   	for (i = 0; i < I915_NUM_ENGINES; i++) {
> @@ -195,8 +146,11 @@ void i915_gem_context_free(struct kref *ctx_ref)
>   static void context_close(struct i915_gem_context *ctx)
>   {
>   	i915_gem_context_set_closed(ctx);
> +
> +	lut_close(ctx);
>   	if (ctx->ppgtt)
>   		i915_ppgtt_close(&ctx->ppgtt->base);
> +
>   	ctx->file_priv = ERR_PTR(-EBADF);
>   	i915_gem_context_put(ctx);
>   }
> @@ -269,16 +223,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>   	ctx->i915 = dev_priv;
>   	ctx->priority = I915_PRIORITY_NORMAL;
>   
> -	ctx->vma_lut.ht_bits = VMA_HT_BITS;
> -	ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
> -	BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
> -	ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
> -				  sizeof(*ctx->vma_lut.ht),
> -				  GFP_KERNEL);
> -	if (!ctx->vma_lut.ht)
> -		goto err_out;
> -
> -	INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);
> +	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> +	INIT_LIST_HEAD(&ctx->handles_list);
>   
>   	/* Default context will never have a file_priv */
>   	ret = DEFAULT_CONTEXT_HANDLE;
> @@ -328,8 +274,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
>   	put_pid(ctx->pid);
>   	idr_remove(&file_priv->context_idr, ctx->user_handle);
>   err_lut:
> -	kvfree(ctx->vma_lut.ht);
> -err_out:
>   	context_close(ctx);
>   	return ERR_PTR(ret);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 82c99ba92ad3..cd0c5d8e0db7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -27,6 +27,7 @@
>   
>   #include <linux/bitops.h>
>   #include <linux/list.h>
> +#include <linux/radix-tree.h>
>   
>   struct pid;
>   
> @@ -143,32 +144,6 @@ struct i915_gem_context {
>   	/** ggtt_offset_bias: placement restriction for context objects */
>   	u32 ggtt_offset_bias;
>   
> -	struct i915_gem_context_vma_lut {
> -		/** ht_size: last request size to allocate the hashtable for. */
> -		unsigned int ht_size;
> -#define I915_CTX_RESIZE_IN_PROGRESS BIT(0)
> -		/** ht_bits: real log2(size) of hashtable. */
> -		unsigned int ht_bits;
> -		/** ht_count: current number of entries inside the hashtable */
> -		unsigned int ht_count;
> -
> -		/** ht: the array of buckets comprising the simple hashtable */
> -		struct hlist_head *ht;
> -
> -		/**
> -		 * resize: After an execbuf completes, we check the load factor
> -		 * of the hashtable. If the hashtable is too full, or too empty,
> -		 * we schedule a task to resize the hashtable. During the
> -		 * resize, the entries are moved between different buckets and
> -		 * so we cannot simultaneously read the hashtable as it is
> -		 * being resized (unlike rhashtable). Therefore we treat the
> -		 * active work as a strong barrier, pausing a subsequent
> -		 * execbuf to wait for the resize worker to complete, if
> -		 * required.
> -		 */
> -		struct work_struct resize;
> -	} vma_lut;
> -
>   	/** engine: per-engine logical HW state */
>   	struct intel_context {
>   		struct i915_vma *state;
> @@ -199,6 +174,9 @@ struct i915_gem_context {
>   
>   	/** remap_slice: Bitmask of cache lines that need remapping */
>   	u8 remap_slice;
> +
> +	struct radix_tree_root handles_vma;
> +	struct list_head handles_list;

Add kernel doc for the two please.

>   };
>   
>   static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 18d6581bc6c5..adcea065985c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -637,19 +637,6 @@ static int eb_reserve(struct i915_execbuffer *eb)
>   	} while (1);
>   }
>   
> -static inline struct hlist_head *
> -ht_head(const  struct i915_gem_context_vma_lut *lut, u32 handle)
> -{
> -	return &lut->ht[hash_32(handle, lut->ht_bits)];
> -}
> -
> -static inline bool
> -ht_needs_resize(const struct i915_gem_context_vma_lut *lut)
> -{
> -	return (4*lut->ht_count > 3*lut->ht_size ||
> -		4*lut->ht_count + 1 < lut->ht_size);
> -}
> -
>   static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
>   {
>   	if (eb->args->flags & I915_EXEC_BATCH_FIRST)
> @@ -684,31 +671,22 @@ static int eb_select_context(struct i915_execbuffer *eb)
>   
>   static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
>   {
> -	struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
> -	struct drm_i915_gem_object *uninitialized_var(obj);
> +	struct drm_i915_gem_object *obj;
> +	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
>   	unsigned int i;
>   	int err;
>   
>   	INIT_LIST_HEAD(&eb->relocs);
>   	INIT_LIST_HEAD(&eb->unbound);
>   
> -	if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS))
> -		flush_work(&lut->resize);
> -	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
> -
>   	for (i = 0; i < eb->buffer_count; i++) {
>   		u32 handle = eb->exec[i].handle;
> -		struct hlist_head *hl = ht_head(lut, handle);
>   		struct i915_vma *vma;
> +		struct i915_lut_handle *lut;
>   
> -		hlist_for_each_entry(vma, hl, ctx_node) {
> -			GEM_BUG_ON(vma->ctx != eb->ctx);
> -
> -			if (vma->ctx_handle != handle)
> -				continue;
> -
> +		vma = radix_tree_lookup(handles_vma, handle);
> +		if (likely(vma))
>   			goto add_vma;
> -		}
>   
>   		obj = i915_gem_object_lookup(eb->file, handle);
>   		if (unlikely(!obj)) {
> @@ -716,43 +694,36 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
>   			goto err_vma;
>   		}
>   
> -		flags |= __EXEC_OBJECT_HAS_REF;
> -
>   		vma = i915_vma_instance(obj, eb->vm, NULL);
>   		if (unlikely(IS_ERR(vma))) {
>   			err = PTR_ERR(vma);
>   			goto err_obj;
>   		}
>   
> -		/* First come, first served */
> -		if (!vma->ctx) {
> -			vma->ctx = eb->ctx;
> -			vma->ctx_handle = handle;
> -			hlist_add_head(&vma->ctx_node, hl);
> -			lut->ht_count++;
> -			lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
> -			if (i915_vma_is_ggtt(vma)) {
> -				GEM_BUG_ON(obj->vma_hashed);
> -				obj->vma_hashed = vma;
> -			}
> +		lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
> +		if (unlikely(!lut)) {
> +			err = -ENOMEM;
> +			goto err_obj;
> +		}
>   
> -			/* transfer ref to ctx */
> -			flags &= ~__EXEC_OBJECT_HAS_REF;
> +		/* transfer ref to ctx */
> +		err = radix_tree_insert(handles_vma, handle, vma);
> +		if (unlikely(err)) {
> +			kfree(lut);
> +			goto err_obj;
>   		}
>   
> +		list_add(&lut->obj_link, &obj->lut_list);
> +		list_add(&lut->ctx_link, &eb->ctx->handles_list);
> +		lut->ctx = eb->ctx;
> +		lut->handle = handle;
> +
>   add_vma:
>   		err = eb_add_vma(eb, i, vma, flags);
>   		if (unlikely(err))
>   			goto err_vma;
>   	}
>   
> -	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
> -		if (ht_needs_resize(lut))
> -			queue_work(system_highpri_wq, &lut->resize);
> -		else
> -			lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
> -	}
> -
>   	/* take note of the batch buffer before we might reorder the lists */
>   	i = eb_batch_index(eb);
>   	eb->batch = eb->vma[i];
> @@ -778,7 +749,6 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
>   	i915_gem_object_put(obj);
>   err_vma:
>   	eb->vma[i] = NULL;
> -	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
>   	return err;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index 5b19a4916a4d..8188d3cf5e11 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -35,6 +35,13 @@
>   
>   #include "i915_selftest.h"
>   
> +struct i915_lut_handle {
> +	struct list_head obj_link;
> +	struct list_head ctx_link;
> +	struct i915_gem_context *ctx;
> +	u32 handle;
> +};
> +
>   struct drm_i915_gem_object_ops {
>   	unsigned int flags;
>   #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
> @@ -86,7 +93,8 @@ struct drm_i915_gem_object {
>   	 * They are also added to @vma_list for easy iteration.
>   	 */
>   	struct rb_root vma_tree;
> -	struct i915_vma *vma_hashed;
> +
> +	struct list_head lut_list;

And some kernel doc here please.

>   
>   	/** Stolen memory for this object, instead of being backed by shmem. */
>   	struct drm_mm_node *stolen;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 532c709febbd..93329a809764 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -591,33 +591,11 @@ static void i915_vma_destroy(struct i915_vma *vma)
>   	kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
>   }
>   
> -void i915_vma_unlink_ctx(struct i915_vma *vma)
> -{
> -	struct i915_gem_context *ctx = vma->ctx;
> -
> -	if (ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
> -		cancel_work_sync(&ctx->vma_lut.resize);
> -		ctx->vma_lut.ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
> -	}
> -
> -	__hlist_del(&vma->ctx_node);
> -	ctx->vma_lut.ht_count--;
> -
> -	if (i915_vma_is_ggtt(vma))
> -		vma->obj->vma_hashed = NULL;
> -	vma->ctx = NULL;
> -
> -	i915_vma_put(vma);
> -}
> -
>   void i915_vma_close(struct i915_vma *vma)
>   {
>   	GEM_BUG_ON(i915_vma_is_closed(vma));
>   	vma->flags |= I915_VMA_CLOSED;
>   
> -	if (vma->ctx)
> -		i915_vma_unlink_ctx(vma);
> -
>   	list_del(&vma->obj_link);
>   	rb_erase(&vma->obj_node, &vma->obj->vma_tree);
>   
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 5f1356e93c6c..6d35cf7ddcfb 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -115,10 +115,6 @@ struct i915_vma {
>   	unsigned int *exec_flags;
>   	struct hlist_node exec_node;
>   	u32 exec_handle;
> -
> -	struct i915_gem_context *ctx;
> -	struct hlist_node ctx_node;
> -	u32 ctx_handle;
>   };
>   
>   struct i915_vma *
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index f8b9cc212b02..3ee31ef84607 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -40,18 +40,13 @@ mock_context(struct drm_i915_private *i915,
>   	INIT_LIST_HEAD(&ctx->link);
>   	ctx->i915 = i915;
>   
> -	ctx->vma_lut.ht_bits = VMA_HT_BITS;
> -	ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
> -	ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
> -				  sizeof(*ctx->vma_lut.ht),
> -				  GFP_KERNEL);
> -	if (!ctx->vma_lut.ht)
> -		goto err_free;
> +	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> +	INIT_LIST_HEAD(&ctx->handles_list);
>   
>   	ret = ida_simple_get(&i915->context_hw_ida,
>   			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
>   	if (ret < 0)
> -		goto err_vma_ht;
> +		goto err_handles;
>   	ctx->hw_id = ret;
>   
>   	if (name) {
> @@ -66,9 +61,7 @@ mock_context(struct drm_i915_private *i915,
>   
>   	return ctx;
>   
> -err_vma_ht:
> -	kvfree(ctx->vma_lut.ht);
> -err_free:
> +err_handles:
>   	kfree(ctx);
>   	return NULL;
>   
> diff --git a/lib/radix-tree.c b/lib/radix-tree.c
> index 898e87998417..3527eb364964 100644
> --- a/lib/radix-tree.c
> +++ b/lib/radix-tree.c
> @@ -2022,6 +2022,7 @@ void radix_tree_iter_delete(struct radix_tree_root *root,
>   	if (__radix_tree_delete(root, iter->node, slot))
>   		iter->index = iter->next_index;
>   }
> +EXPORT_SYMBOL(radix_tree_iter_delete);
>   
>   /**
>    * radix_tree_delete_item - delete an item from a radix tree
> 

Very nice! Much shorter and more elegant (and so readable) than the 
previous state of affairs.

With the kerneldoc added:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

Chris Wilson June 21, 2017, 10:30 a.m. UTC | #2

Quoting Tvrtko Ursulin (2017-06-20 12:26:44)
> 
> On 16/06/2017 17:02, Chris Wilson wrote:
> > -static void resize_vma_ht(struct work_struct *work)
> > +static void lut_close(struct i915_gem_context *ctx)
> >   {
> > -     struct i915_gem_context_vma_lut *lut =
> > -             container_of(work, typeof(*lut), resize);
> > -     unsigned int bits, new_bits, size, i;
> > -     struct hlist_head *new_ht;
> > -
> > -     GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
> > -
> > -     bits = 1 + ilog2(4*lut->ht_count/3 + 1);
> > -     new_bits = min_t(unsigned int,
> > -                      max(bits, VMA_HT_BITS),
> > -                      sizeof(unsigned int) * BITS_PER_BYTE - 1);
> > -     if (new_bits == lut->ht_bits)
> > -             goto out;
> > -
> > -     new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
> > -     if (!new_ht)
> > -             new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
> > -     if (!new_ht)
> > -             /* Pretend resize succeeded and stop calling us for a bit! */
> > -             goto out;
> > +     struct i915_lut_handle *lut, *ln;
> > +     struct radix_tree_iter iter;
> > +     void __rcu **slot;
> >   
> > -     size = BIT(lut->ht_bits);
> > -     for (i = 0; i < size; i++) {
> > -             struct i915_vma *vma;
> > -             struct hlist_node *tmp;
> > -
> > -             hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
> > -                     hlist_add_head(&vma->ctx_node,
> > -                                    &new_ht[hash_32(vma->ctx_handle,
> > -                                                    new_bits)]);
> > +     list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
> > +             list_del(&lut->obj_link);
> I assume you deliberately did not bother unlinking from the ctx list 
> since all that is getting zapped anyway?

Yes, it is not obvious though. The context is marked as closed so we
should thereafter prevent further usage of the ctx->handles_list.

At the moment, this is governed by struct_mutex but in the future it is
likely to be replaced by file->mutex or ctx->mutex.

If I add a GEM_BUG_ON(eb->ctx->closed) to eb_lookup_vmas that should
help explain the relationship.
-Chris

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4577b0af6886..a6ba2100bb88 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1998,12 +1998,6 @@  static int i915_context_status(struct seq_file *m, void *unused)
 			seq_putc(m, '\n');
 		}
 
-		seq_printf(m,
-			   "\tvma hashtable size=%u (actual %lu), count=%u\n",
-			   ctx->vma_lut.ht_size,
-			   BIT(ctx->vma_lut.ht_bits),
-			   ctx->vma_lut.ht_count);
-
 		seq_putc(m, '\n');
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cd15697c1ca4..de8dd0c06ce6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2083,6 +2083,7 @@  struct drm_i915_private {
 
 	struct kmem_cache *objects;
 	struct kmem_cache *vmas;
+	struct kmem_cache *luts;
 	struct kmem_cache *requests;
 	struct kmem_cache *dependencies;
 	struct kmem_cache *priorities;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7dcac3bfb771..3388c65b54a3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3252,19 +3252,33 @@  i915_gem_idle_work_handler(struct work_struct *work)
 
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 {
+	struct drm_i915_private *i915 = to_i915(gem->dev);
 	struct drm_i915_gem_object *obj = to_intel_bo(gem);
 	struct drm_i915_file_private *fpriv = file->driver_priv;
 	struct i915_vma *vma, *vn;
+	struct i915_lut_handle *lut, *ln;
 
 	mutex_lock(&obj->base.dev->struct_mutex);
+
+	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+
+		if (ctx->file_priv != fpriv)
+			continue;
+
+		radix_tree_delete(&ctx->handles_vma, lut->handle);
+		i915_gem_object_put(obj);
+
+		list_del(&lut->obj_link);
+		list_del(&lut->ctx_link);
+
+		kmem_cache_free(i915->luts, lut);
+	}
+
 	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
 		if (vma->vm->file == fpriv)
 			i915_vma_close(vma);
 
-	vma = obj->vma_hashed;
-	if (vma && vma->ctx->file_priv == fpriv)
-		i915_vma_unlink_ctx(vma);
-
 	if (i915_gem_object_is_active(obj) &&
 	    !i915_gem_object_has_active_reference(obj)) {
 		i915_gem_object_set_active_reference(obj);
@@ -4259,6 +4273,7 @@  void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&obj->global_link);
 	INIT_LIST_HEAD(&obj->userfault_link);
 	INIT_LIST_HEAD(&obj->vma_list);
+	INIT_LIST_HEAD(&obj->lut_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	obj->ops = ops;
@@ -4897,12 +4912,16 @@  i915_gem_load_init(struct drm_i915_private *dev_priv)
 	if (!dev_priv->vmas)
 		goto err_objects;
 
+	dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0);
+	if (!dev_priv->luts)
+		goto err_vmas;
+
 	dev_priv->requests = KMEM_CACHE(drm_i915_gem_request,
 					SLAB_HWCACHE_ALIGN |
 					SLAB_RECLAIM_ACCOUNT |
 					SLAB_TYPESAFE_BY_RCU);
 	if (!dev_priv->requests)
-		goto err_vmas;
+		goto err_luts;
 
 	dev_priv->dependencies = KMEM_CACHE(i915_dependency,
 					    SLAB_HWCACHE_ALIGN |
@@ -4949,6 +4968,8 @@  i915_gem_load_init(struct drm_i915_private *dev_priv)
 	kmem_cache_destroy(dev_priv->dependencies);
 err_requests:
 	kmem_cache_destroy(dev_priv->requests);
+err_luts:
+	kmem_cache_destroy(dev_priv->luts);
 err_vmas:
 	kmem_cache_destroy(dev_priv->vmas);
 err_objects:
@@ -4971,6 +4992,7 @@  void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
 	kmem_cache_destroy(dev_priv->requests);
+	kmem_cache_destroy(dev_priv->luts);
 	kmem_cache_destroy(dev_priv->vmas);
 	kmem_cache_destroy(dev_priv->objects);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 39ed58a21fc1..dc182a8dd4e8 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -93,69 +93,21 @@ 
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
-/* Initial size (as log2) to preallocate the handle->object hashtable */
-#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */
-
-static void resize_vma_ht(struct work_struct *work)
+static void lut_close(struct i915_gem_context *ctx)
 {
-	struct i915_gem_context_vma_lut *lut =
-		container_of(work, typeof(*lut), resize);
-	unsigned int bits, new_bits, size, i;
-	struct hlist_head *new_ht;
-
-	GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
-
-	bits = 1 + ilog2(4*lut->ht_count/3 + 1);
-	new_bits = min_t(unsigned int,
-			 max(bits, VMA_HT_BITS),
-			 sizeof(unsigned int) * BITS_PER_BYTE - 1);
-	if (new_bits == lut->ht_bits)
-		goto out;
-
-	new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
-	if (!new_ht)
-		new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
-	if (!new_ht)
-		/* Pretend resize succeeded and stop calling us for a bit! */
-		goto out;
+	struct i915_lut_handle *lut, *ln;
+	struct radix_tree_iter iter;
+	void __rcu **slot;
 
-	size = BIT(lut->ht_bits);
-	for (i = 0; i < size; i++) {
-		struct i915_vma *vma;
-		struct hlist_node *tmp;
-
-		hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
-			hlist_add_head(&vma->ctx_node,
-				       &new_ht[hash_32(vma->ctx_handle,
-						       new_bits)]);
+	list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
+		list_del(&lut->obj_link);
+		kmem_cache_free(ctx->i915->luts, lut);
 	}
-	kvfree(lut->ht);
-	lut->ht = new_ht;
-	lut->ht_bits = new_bits;
-out:
-	smp_store_release(&lut->ht_size, BIT(bits));
-	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
-}
 
-static void vma_lut_free(struct i915_gem_context *ctx)
-{
-	struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
-	unsigned int i, size;
-
-	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
-		cancel_work_sync(&lut->resize);
-
-	size = BIT(lut->ht_bits);
-	for (i = 0; i < size; i++) {
-		struct i915_vma *vma;
-
-		hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
-			vma->obj->vma_hashed = NULL;
-			vma->ctx = NULL;
-			i915_vma_put(vma);
-		}
+	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
+		i915_vma_put(rcu_dereference_raw(*slot));
+		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
 	}
-	kvfree(lut->ht);
 }
 
 void i915_gem_context_free(struct kref *ctx_ref)
@@ -167,7 +119,6 @@  void i915_gem_context_free(struct kref *ctx_ref)
 	trace_i915_context_free(ctx);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
-	vma_lut_free(ctx);
 	i915_ppgtt_put(ctx->ppgtt);
 
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -195,8 +146,11 @@  void i915_gem_context_free(struct kref *ctx_ref)
 static void context_close(struct i915_gem_context *ctx)
 {
 	i915_gem_context_set_closed(ctx);
+
+	lut_close(ctx);
 	if (ctx->ppgtt)
 		i915_ppgtt_close(&ctx->ppgtt->base);
+
 	ctx->file_priv = ERR_PTR(-EBADF);
 	i915_gem_context_put(ctx);
 }
@@ -269,16 +223,8 @@  __create_hw_context(struct drm_i915_private *dev_priv,
 	ctx->i915 = dev_priv;
 	ctx->priority = I915_PRIORITY_NORMAL;
 
-	ctx->vma_lut.ht_bits = VMA_HT_BITS;
-	ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
-	BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
-	ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
-				  sizeof(*ctx->vma_lut.ht),
-				  GFP_KERNEL);
-	if (!ctx->vma_lut.ht)
-		goto err_out;
-
-	INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);
+	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+	INIT_LIST_HEAD(&ctx->handles_list);
 
 	/* Default context will never have a file_priv */
 	ret = DEFAULT_CONTEXT_HANDLE;
@@ -328,8 +274,6 @@  __create_hw_context(struct drm_i915_private *dev_priv,
 	put_pid(ctx->pid);
 	idr_remove(&file_priv->context_idr, ctx->user_handle);
 err_lut:
-	kvfree(ctx->vma_lut.ht);
-err_out:
 	context_close(ctx);
 	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 82c99ba92ad3..cd0c5d8e0db7 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -27,6 +27,7 @@ 
 
 #include <linux/bitops.h>
 #include <linux/list.h>
+#include <linux/radix-tree.h>
 
 struct pid;
 
@@ -143,32 +144,6 @@  struct i915_gem_context {
 	/** ggtt_offset_bias: placement restriction for context objects */
 	u32 ggtt_offset_bias;
 
-	struct i915_gem_context_vma_lut {
-		/** ht_size: last request size to allocate the hashtable for. */
-		unsigned int ht_size;
-#define I915_CTX_RESIZE_IN_PROGRESS BIT(0)
-		/** ht_bits: real log2(size) of hashtable. */
-		unsigned int ht_bits;
-		/** ht_count: current number of entries inside the hashtable */
-		unsigned int ht_count;
-
-		/** ht: the array of buckets comprising the simple hashtable */
-		struct hlist_head *ht;
-
-		/**
-		 * resize: After an execbuf completes, we check the load factor
-		 * of the hashtable. If the hashtable is too full, or too empty,
-		 * we schedule a task to resize the hashtable. During the
-		 * resize, the entries are moved between different buckets and
-		 * so we cannot simultaneously read the hashtable as it is
-		 * being resized (unlike rhashtable). Therefore we treat the
-		 * active work as a strong barrier, pausing a subsequent
-		 * execbuf to wait for the resize worker to complete, if
-		 * required.
-		 */
-		struct work_struct resize;
-	} vma_lut;
-
 	/** engine: per-engine logical HW state */
 	struct intel_context {
 		struct i915_vma *state;
@@ -199,6 +174,9 @@  struct i915_gem_context {
 
 	/** remap_slice: Bitmask of cache lines that need remapping */
 	u8 remap_slice;
+
+	struct radix_tree_root handles_vma;
+	struct list_head handles_list;
 };
 
 static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 18d6581bc6c5..adcea065985c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -637,19 +637,6 @@  static int eb_reserve(struct i915_execbuffer *eb)
 	} while (1);
 }
 
-static inline struct hlist_head *
-ht_head(const  struct i915_gem_context_vma_lut *lut, u32 handle)
-{
-	return &lut->ht[hash_32(handle, lut->ht_bits)];
-}
-
-static inline bool
-ht_needs_resize(const struct i915_gem_context_vma_lut *lut)
-{
-	return (4*lut->ht_count > 3*lut->ht_size ||
-		4*lut->ht_count + 1 < lut->ht_size);
-}
-
 static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
 {
 	if (eb->args->flags & I915_EXEC_BATCH_FIRST)
@@ -684,31 +671,22 @@  static int eb_select_context(struct i915_execbuffer *eb)
 
 static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
 {
-	struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
-	struct drm_i915_gem_object *uninitialized_var(obj);
+	struct drm_i915_gem_object *obj;
+	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
 	unsigned int i;
 	int err;
 
 	INIT_LIST_HEAD(&eb->relocs);
 	INIT_LIST_HEAD(&eb->unbound);
 
-	if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS))
-		flush_work(&lut->resize);
-	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
-
 	for (i = 0; i < eb->buffer_count; i++) {
 		u32 handle = eb->exec[i].handle;
-		struct hlist_head *hl = ht_head(lut, handle);
 		struct i915_vma *vma;
+		struct i915_lut_handle *lut;
 
-		hlist_for_each_entry(vma, hl, ctx_node) {
-			GEM_BUG_ON(vma->ctx != eb->ctx);
-
-			if (vma->ctx_handle != handle)
-				continue;
-
+		vma = radix_tree_lookup(handles_vma, handle);
+		if (likely(vma))
 			goto add_vma;
-		}
 
 		obj = i915_gem_object_lookup(eb->file, handle);
 		if (unlikely(!obj)) {
@@ -716,43 +694,36 @@  static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
 			goto err_vma;
 		}
 
-		flags |= __EXEC_OBJECT_HAS_REF;
-
 		vma = i915_vma_instance(obj, eb->vm, NULL);
 		if (unlikely(IS_ERR(vma))) {
 			err = PTR_ERR(vma);
 			goto err_obj;
 		}
 
-		/* First come, first served */
-		if (!vma->ctx) {
-			vma->ctx = eb->ctx;
-			vma->ctx_handle = handle;
-			hlist_add_head(&vma->ctx_node, hl);
-			lut->ht_count++;
-			lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
-			if (i915_vma_is_ggtt(vma)) {
-				GEM_BUG_ON(obj->vma_hashed);
-				obj->vma_hashed = vma;
-			}
+		lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
+		if (unlikely(!lut)) {
+			err = -ENOMEM;
+			goto err_obj;
+		}
 
-			/* transfer ref to ctx */
-			flags &= ~__EXEC_OBJECT_HAS_REF;
+		/* transfer ref to ctx */
+		err = radix_tree_insert(handles_vma, handle, vma);
+		if (unlikely(err)) {
+			kfree(lut);
+			goto err_obj;
 		}
 
+		list_add(&lut->obj_link, &obj->lut_list);
+		list_add(&lut->ctx_link, &eb->ctx->handles_list);
+		lut->ctx = eb->ctx;
+		lut->handle = handle;
+
 add_vma:
 		err = eb_add_vma(eb, i, vma, flags);
 		if (unlikely(err))
 			goto err_vma;
 	}
 
-	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
-		if (ht_needs_resize(lut))
-			queue_work(system_highpri_wq, &lut->resize);
-		else
-			lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
-	}
-
 	/* take note of the batch buffer before we might reorder the lists */
 	i = eb_batch_index(eb);
 	eb->batch = eb->vma[i];
@@ -778,7 +749,6 @@  static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
 	i915_gem_object_put(obj);
 err_vma:
 	eb->vma[i] = NULL;
-	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 5b19a4916a4d..8188d3cf5e11 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -35,6 +35,13 @@ 
 
 #include "i915_selftest.h"
 
+struct i915_lut_handle {
+	struct list_head obj_link;
+	struct list_head ctx_link;
+	struct i915_gem_context *ctx;
+	u32 handle;
+};
+
 struct drm_i915_gem_object_ops {
 	unsigned int flags;
 #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
@@ -86,7 +93,8 @@  struct drm_i915_gem_object {
 	 * They are also added to @vma_list for easy iteration.
 	 */
 	struct rb_root vma_tree;
-	struct i915_vma *vma_hashed;
+
+	struct list_head lut_list;
 
 	/** Stolen memory for this object, instead of being backed by shmem. */
 	struct drm_mm_node *stolen;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 532c709febbd..93329a809764 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -591,33 +591,11 @@  static void i915_vma_destroy(struct i915_vma *vma)
 	kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
 }
 
-void i915_vma_unlink_ctx(struct i915_vma *vma)
-{
-	struct i915_gem_context *ctx = vma->ctx;
-
-	if (ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
-		cancel_work_sync(&ctx->vma_lut.resize);
-		ctx->vma_lut.ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
-	}
-
-	__hlist_del(&vma->ctx_node);
-	ctx->vma_lut.ht_count--;
-
-	if (i915_vma_is_ggtt(vma))
-		vma->obj->vma_hashed = NULL;
-	vma->ctx = NULL;
-
-	i915_vma_put(vma);
-}
-
 void i915_vma_close(struct i915_vma *vma)
 {
 	GEM_BUG_ON(i915_vma_is_closed(vma));
 	vma->flags |= I915_VMA_CLOSED;
 
-	if (vma->ctx)
-		i915_vma_unlink_ctx(vma);
-
 	list_del(&vma->obj_link);
 	rb_erase(&vma->obj_node, &vma->obj->vma_tree);
 
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 5f1356e93c6c..6d35cf7ddcfb 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -115,10 +115,6 @@  struct i915_vma {
 	unsigned int *exec_flags;
 	struct hlist_node exec_node;
 	u32 exec_handle;
-
-	struct i915_gem_context *ctx;
-	struct hlist_node ctx_node;
-	u32 ctx_handle;
 };
 
 struct i915_vma *
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index f8b9cc212b02..3ee31ef84607 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -40,18 +40,13 @@  mock_context(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&ctx->link);
 	ctx->i915 = i915;
 
-	ctx->vma_lut.ht_bits = VMA_HT_BITS;
-	ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
-	ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
-				  sizeof(*ctx->vma_lut.ht),
-				  GFP_KERNEL);
-	if (!ctx->vma_lut.ht)
-		goto err_free;
+	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+	INIT_LIST_HEAD(&ctx->handles_list);
 
 	ret = ida_simple_get(&i915->context_hw_ida,
 			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
 	if (ret < 0)
-		goto err_vma_ht;
+		goto err_handles;
 	ctx->hw_id = ret;
 
 	if (name) {
@@ -66,9 +61,7 @@  mock_context(struct drm_i915_private *i915,
 
 	return ctx;
 
-err_vma_ht:
-	kvfree(ctx->vma_lut.ht);
-err_free:
+err_handles:
 	kfree(ctx);
 	return NULL;
 
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 898e87998417..3527eb364964 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2022,6 +2022,7 @@  void radix_tree_iter_delete(struct radix_tree_root *root,
 	if (__radix_tree_delete(root, iter->node, slot))
 		iter->index = iter->next_index;
 }
+EXPORT_SYMBOL(radix_tree_iter_delete);
 
 /**
  * radix_tree_delete_item - delete an item from a radix tree

[3/3] drm/i915: Replace execbuf vma ht with an idr

Commit Message

Comments

Patch