From patchwork Fri Jan 25 02:29:47 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 10780427 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3FADC13B4 for ; Fri, 25 Jan 2019 02:49:54 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 2D31D2FA8F for ; Fri, 25 Jan 2019 02:49:54 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 20F8F2FA94; Fri, 25 Jan 2019 02:49:54 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 5F45F2FA8F for ; Fri, 25 Jan 2019 02:49:53 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id CA1C06F255; Fri, 25 Jan 2019 02:49:52 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id AA9506F255 for ; Fri, 25 Jan 2019 02:49:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 15342838-1500050 for multiple; Fri, 25 Jan 2019 02:30:12 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 25 Jan 2019 02:29:47 +0000 Message-Id: <20190125023005.1007-15-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190125023005.1007-1-chris@chris-wilson.co.uk> References: <20190125023005.1007-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 15/33] drm/i915: Move vma lookup to its own lock X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP Remove the struct_mutex requirement for looking up the vma for an object. v2: Highlight how the race for duplicate vma creation is resolved on reacquiring the lock with a short comment. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c | 6 +-- drivers/gpu/drm/i915/i915_gem.c | 33 +++++++----- drivers/gpu/drm/i915/i915_gem_object.h | 45 +++++++++------- drivers/gpu/drm/i915/i915_vma.c | 66 ++++++++++++++++------- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 4 +- 6 files changed, 98 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3b995f9fdc06..b64f758803f3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -160,14 +160,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : ""); if (obj->base.name) seq_printf(m, " (name: %d)", obj->base.name); - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (i915_vma_is_pinned(vma)) pin_count++; } seq_printf(m, " (pinned x %d)", pin_count); if (obj->pin_global) seq_printf(m, " (global)"); - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -323,7 +323,7 @@ static int per_file_stats(int id, void *ptr, void *data) if (obj->base.name || obj->base.dma_buf) stats->shared += obj->base.size; - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 111a047a45b7..653c7ba4c69f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -439,15 +439,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) if (ret) return ret; - while ((vma = list_first_entry_or_null(&obj->vma_list, - struct i915_vma, - obj_link))) { + spin_lock(&obj->vma.lock); + while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { list_move_tail(&vma->obj_link, &still_in_list); + spin_unlock(&obj->vma.lock); + ret = i915_vma_unbind(vma); - if (ret) - break; + + spin_lock(&obj->vma.lock); } - list_splice(&still_in_list, &obj->vma_list); + list_splice(&still_in_list, &obj->vma.list); + spin_unlock(&obj->vma.lock); return ret; } @@ -3491,7 +3495,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * reading an invalid PTE on older architectures. */ restart: - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3569,7 +3573,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, */ } - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3579,7 +3583,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, } } - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) vma->node.color = cache_level; i915_gem_object_set_cache_coherency(obj, cache_level); obj->cache_dirty = true; /* Always invalidate stale cachelines */ @@ -4155,7 +4159,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { mutex_init(&obj->mm.lock); - INIT_LIST_HEAD(&obj->vma_list); + spin_lock_init(&obj->vma.lock); + INIT_LIST_HEAD(&obj->vma.list); + INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4321,14 +4327,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, mutex_lock(&i915->drm.struct_mutex); GEM_BUG_ON(i915_gem_object_is_active(obj)); - list_for_each_entry_safe(vma, vn, - &obj->vma_list, obj_link) { + list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; i915_vma_destroy(vma); } - GEM_BUG_ON(!list_empty(&obj->vma_list)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); + GEM_BUG_ON(!list_empty(&obj->vma.list)); + GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); /* This serializes freeing with the shrinker. Since the free * is delayed, first by RCU then by the workqueue, we want the diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index cb1b0144d274..5a33b6d9f942 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -87,24 +87,33 @@ struct drm_i915_gem_object { const struct drm_i915_gem_object_ops *ops; - /** - * @vma_list: List of VMAs backed by this object - * - * The VMA on this list are ordered by type, all GGTT vma are placed - * at the head and all ppGTT vma are placed at the tail. The different - * types of GGTT vma are unordered between themselves, use the - * @vma_tree (which has a defined order between all VMA) to find an - * exact match. - */ - struct list_head vma_list; - /** - * @vma_tree: Ordered tree of VMAs backed by this object - * - * All VMA created for this object are placed in the @vma_tree for - * fast retrieval via a binary search in i915_vma_instance(). - * They are also added to @vma_list for easy iteration. - */ - struct rb_root vma_tree; + struct { + /** + * @vma.lock: protect the list/tree of vmas + */ + struct spinlock lock; + + /** + * @vma.list: List of VMAs backed by this object + * + * The VMA on this list are ordered by type, all GGTT vma are + * placed at the head and all ppGTT vma are placed at the tail. + * The different types of GGTT vma are unordered between + * themselves, use the @vma.tree (which has a defined order + * between all VMA) to quickly find an exact match. + */ + struct list_head list; + + /** + * @vma.tree: Ordered tree of VMAs backed by this object + * + * All VMA created for this object are placed in the @vma.tree + * for fast retrieval via a binary search in + * i915_vma_instance(). They are also added to @vma.list for + * easy iteration. + */ + struct rb_root tree; + } vma; /** * @lut_list: List of vma lookup entries in use for this object. diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index dcbd0d345c72..d83b8ad5f859 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -187,32 +187,52 @@ vma_create(struct drm_i915_gem_object *obj, i915_gem_object_get_stride(obj)); GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); - /* - * We put the GGTT vma at the start of the vma-list, followed - * by the ppGGTT vma. This allows us to break early when - * iterating over only the GGTT vma for an object, see - * for_each_ggtt_vma() - */ vma->flags |= I915_VMA_GGTT; - list_add(&vma->obj_link, &obj->vma_list); - } else { - list_add_tail(&vma->obj_link, &obj->vma_list); } + spin_lock(&obj->vma.lock); + rb = NULL; - p = &obj->vma_tree.rb_node; + p = &obj->vma.tree.rb_node; while (*p) { struct i915_vma *pos; + long cmp; rb = *p; pos = rb_entry(rb, struct i915_vma, obj_node); - if (i915_vma_compare(pos, vm, view) < 0) + + /* + * If the view already exists in the tree, another thread + * already created a matching vma, so return the older instance + * and dispose of ours. + */ + cmp = i915_vma_compare(pos, vm, view); + if (cmp == 0) { + spin_unlock(&obj->vma.lock); + kmem_cache_free(vm->i915->vmas, vma); + return pos; + } + + if (cmp < 0) p = &rb->rb_right; else p = &rb->rb_left; } rb_link_node(&vma->obj_node, rb, p); - rb_insert_color(&vma->obj_node, &obj->vma_tree); + rb_insert_color(&vma->obj_node, &obj->vma.tree); + + if (i915_vma_is_ggtt(vma)) + /* + * We put the GGTT vma at the start of the vma-list, followed + * by the ppGGTT vma. This allows us to break early when + * iterating over only the GGTT vma for an object, see + * for_each_ggtt_vma() + */ + list_add(&vma->obj_link, &obj->vma.list); + else + list_add_tail(&vma->obj_link, &obj->vma.list); + + spin_unlock(&obj->vma.lock); mutex_lock(&vm->mutex); list_add(&vma->vm_link, &vm->unbound_list); @@ -232,7 +252,7 @@ vma_lookup(struct drm_i915_gem_object *obj, { struct rb_node *rb; - rb = obj->vma_tree.rb_node; + rb = obj->vma.tree.rb_node; while (rb) { struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); long cmp; @@ -272,16 +292,18 @@ i915_vma_instance(struct drm_i915_gem_object *obj, { struct i915_vma *vma; - lockdep_assert_held(&obj->base.dev->struct_mutex); GEM_BUG_ON(view && !i915_is_ggtt(vm)); GEM_BUG_ON(vm->closed); + spin_lock(&obj->vma.lock); vma = vma_lookup(obj, vm, view); - if (!vma) + spin_unlock(&obj->vma.lock); + + /* vma_create() will resolve the race if another creates the vma */ + if (unlikely(!vma)) vma = vma_create(obj, vm, view); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); - GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma); return vma; } @@ -808,14 +830,18 @@ static void __i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); - list_del(&vma->obj_link); - mutex_lock(&vma->vm->mutex); list_del(&vma->vm_link); mutex_unlock(&vma->vm->mutex); - if (vma->obj) - rb_erase(&vma->obj_node, &vma->obj->vma_tree); + if (vma->obj) { + struct drm_i915_gem_object *obj = vma->obj; + + spin_lock(&obj->vma.lock); + list_del(&vma->obj_link); + rb_erase(&vma->obj_node, &vma->obj->vma.tree); + spin_unlock(&obj->vma.lock); + } rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) { GEM_BUG_ON(i915_gem_active_isset(&iter->base)); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 4f7c1c7599f4..7252abc73d3e 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -425,7 +425,7 @@ void i915_vma_parked(struct drm_i915_private *i915); * or the list is empty ofc. */ #define for_each_ggtt_vma(V, OBJ) \ - list_for_each_entry(V, &(OBJ)->vma_list, obj_link) \ + list_for_each_entry(V, &(OBJ)->vma.list, obj_link) \ for_each_until(!i915_vma_is_ggtt(V)) #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index f0a32edfb9b1..cf1de82741fa 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -672,7 +672,7 @@ static int igt_vma_partial(void *arg) } count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) count++; if (count != nvma) { pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n", @@ -701,7 +701,7 @@ static int igt_vma_partial(void *arg) i915_vma_unpin(vma); count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) count++; if (count != nvma) { pr_err("(%s) allocated an extra full vma!\n", p->name);