From patchwork Fri Oct 4 13:39:56 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174537 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 7AFCE16B1 for ; Fri, 4 Oct 2019 13:40:50 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 6327D20700 for ; Fri, 4 Oct 2019 13:40:50 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 6327D20700 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 3AA6C6EB8B; Fri, 4 Oct 2019 13:40:42 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 448986EB80 for ; Fri, 4 Oct 2019 13:40:41 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723951-1500050 for multiple; Fri, 04 Oct 2019 14:40:17 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:39:56 +0100 Message-Id: <20191004134015.13204-2-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 01/20] drm/i915: Only track bound elements of the GTT X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Matthew Auld Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" The premise here is to simply avoiding having to acquire the vm->mutex inside vma create/destroy to update the vm->unbound_lists, to avoid some nasty lock recursions later. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 23 ++++--------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 5 ---- drivers/gpu/drm/i915/i915_vma.c | 12 ++-------- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 5 files changed, 8 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index bfbc3e3daf92..e45eb8721850 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -692,7 +692,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv __i915_vma_set_map_and_fenceable(vma); mutex_lock(&ggtt->vm.mutex); - list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); + list_add_tail(&vma->vm_link, &ggtt->vm.bound_list); mutex_unlock(&ggtt->vm.mutex); GEM_BUG_ON(i915_gem_object_is_shrinkable(obj)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e62e9d1a1307..ad9eb2d68f3f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -505,19 +505,12 @@ static void i915_address_space_fini(struct i915_address_space *vm) static void ppgtt_destroy_vma(struct i915_address_space *vm) { - struct list_head *phases[] = { - &vm->bound_list, - &vm->unbound_list, - NULL, - }, **phase; + struct i915_vma *vma, *vn; mutex_lock(&vm->i915->drm.struct_mutex); - for (phase = phases; *phase; phase++) { - struct i915_vma *vma, *vn; - - list_for_each_entry_safe(vma, vn, *phase, vm_link) - i915_vma_destroy(vma); - } + list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) + i915_vma_destroy(vma); + GEM_BUG_ON(!list_empty(&vm->bound_list)); mutex_unlock(&vm->i915->drm.struct_mutex); } @@ -528,9 +521,6 @@ static void __i915_vm_release(struct work_struct *work) ppgtt_destroy_vma(vm); - GEM_BUG_ON(!list_empty(&vm->bound_list)); - GEM_BUG_ON(!list_empty(&vm->unbound_list)); - vm->cleanup(vm); i915_address_space_fini(vm); @@ -569,7 +559,6 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass) stash_init(&vm->free_pages); - INIT_LIST_HEAD(&vm->unbound_list); INIT_LIST_HEAD(&vm->bound_list); } @@ -1887,10 +1876,6 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) INIT_LIST_HEAD(&vma->obj_link); INIT_LIST_HEAD(&vma->closed_link); - mutex_lock(&vma->vm->mutex); - list_add(&vma->vm_link, &vma->vm->unbound_list); - mutex_unlock(&vma->vm->mutex); - return vma; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 8fd2234ba0bf..bbdc735466c1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -320,11 +320,6 @@ struct i915_address_space { */ struct list_head bound_list; - /** - * List of vma that are not unbound. - */ - struct list_head unbound_list; - struct pagestash free_pages; /* Global GTT */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 68c34b1a20e4..d097f77890ba 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -218,10 +218,6 @@ vma_create(struct drm_i915_gem_object *obj, spin_unlock(&obj->vma.lock); - mutex_lock(&vm->mutex); - list_add(&vma->vm_link, &vm->unbound_list); - mutex_unlock(&vm->mutex); - return vma; err_vma: @@ -657,7 +653,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); mutex_lock(&vma->vm->mutex); - list_move_tail(&vma->vm_link, &vma->vm->bound_list); + list_add_tail(&vma->vm_link, &vma->vm->bound_list); mutex_unlock(&vma->vm->mutex); if (vma->obj) { @@ -685,7 +681,7 @@ i915_vma_remove(struct i915_vma *vma) mutex_lock(&vma->vm->mutex); drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + list_del(&vma->vm_link); mutex_unlock(&vma->vm->mutex); /* @@ -798,10 +794,6 @@ static void __i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(vma->fence); - mutex_lock(&vma->vm->mutex); - list_del(&vma->vm_link); - mutex_unlock(&vma->vm->mutex); - if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f901bbb9586e..0945d6e978a2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1242,7 +1242,7 @@ static void track_vma_bind(struct i915_vma *vma) vma->pages = obj->mm.pages; mutex_lock(&vma->vm->mutex); - list_move_tail(&vma->vm_link, &vma->vm->bound_list); + list_add_tail(&vma->vm_link, &vma->vm->bound_list); mutex_unlock(&vma->vm->mutex); } From patchwork Fri Oct 4 13:39:57 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174539 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id AB6E916B1 for ; Fri, 4 Oct 2019 13:40:51 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 93D23222C0 for ; Fri, 4 Oct 2019 13:40:51 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 93D23222C0 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 5DB216EB86; Fri, 4 Oct 2019 13:40:44 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 3D3716EB80 for ; Fri, 4 Oct 2019 13:40:40 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723952-1500050 for multiple; Fri, 04 Oct 2019 14:40:17 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:39:57 +0100 Message-Id: <20191004134015.13204-3-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 02/20] drm/i915: Mark up address spaces that may need to allocate X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Since we cannot allocate underneath the vm->mutex (it is used in the direct-reclaim paths), we need to shift the allocations off into a mutexless worker with fence recursion prevention. To know when we need this protection, we mark up the address spaces that do allocate before insertion. In the future, we may wish to extend the async bind scheme to more than just allocations. v2: s/vm->bind_alloc/vm->bind_async_flags/ Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++ drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ad9eb2d68f3f..8eba63ecdb03 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1502,6 +1502,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) goto err_free_pd; } + ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->vm.insert_entries = gen8_ppgtt_insert; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; @@ -1950,6 +1951,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) ppgtt_init(&ppgtt->base, &i915->gt); ppgtt->base.vm.top = 1; + ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; @@ -2581,6 +2583,7 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) goto err_ppgtt; ggtt->alias = ppgtt; + ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index bbdc735466c1..3502b9c85a8e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -305,6 +305,8 @@ struct i915_address_space { u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ + unsigned int bind_async_flags; + bool closed; struct mutex mutex; /* protects vma and our lists */ From patchwork Fri Oct 4 13:39:58 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174547 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 91C5616B1 for ; Fri, 4 Oct 2019 13:40:55 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 795EB222C0 for ; Fri, 4 Oct 2019 13:40:55 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 795EB222C0 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 2D72B6EB91; Fri, 4 Oct 2019 13:40:50 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 51DFE6EB86 for ; Fri, 4 Oct 2019 13:40:41 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723953-1500050 for multiple; Fri, 04 Oct 2019 14:40:17 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:39:58 +0100 Message-Id: <20191004134015.13204-4-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 03/20] drm/i915: Pull i915_vma_pin under the vm->mutex X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Replace the struct_mutex requirement for pinning the i915_vma with the local vm->mutex instead. Note that the vm->mutex is tainted by the shrinker (we require unbinding from inside fs-reclaim) and so we cannot allocate while holding that mutex. Instead we have to preallocate workers to do allocate and apply the PTE updates after we have we reserved their slot in the drm_mm (using fences to order the PTE writes with the GPU work and with later unbind). In adding the asynchronous vma binding, one subtle requirement is to avoid coupling the binding fence into the backing object->resv. That is the asynchronous binding only applies to the vma timeline itself and not to the pages as that is a more global timeline (the binding of one vma does not need to be ordered with another vma, nor does the implicit GEM fencing depend on a vma, only on writes to the backing store). Keeping the vma binding distinct from the backing store timelines is verified by a number of async gem_exec_fence and gem_exec_schedule tests. The way we do this is quite simple, we keep the fence for the vma binding separate and only wait on it as required, and never add it to the obj->resv itself. Another consequence in reducing the locking around the vma is the destruction of the vma is no longer globally serialised by struct_mutex. A natural solution would be to add a kref to i915_vma, but that requires decoupling the reference cycles, possibly by introducing a new i915_mm_pages object that is own by both obj->mm and vma->pages. However, we have not taken that route due to the overshadowing lmem/ttm discussions, and instead play a series of complicated games with trylocks to (hopefully) ensure that only one destruction path is called! v2: Add some commentary, and some helpers to reduce patch churn. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_display.c | 29 +- drivers/gpu/drm/i915/display/intel_dsb.c | 7 +- drivers/gpu/drm/i915/display/intel_fbdev.c | 8 +- drivers/gpu/drm/i915/display/intel_overlay.c | 11 +- .../gpu/drm/i915/gem/i915_gem_client_blt.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_context.c | 20 +- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 13 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 43 +- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 20 +- drivers/gpu/drm/i915/gem/i915_gem_object.c | 33 +- drivers/gpu/drm/i915/gem/i915_gem_object.h | 5 + drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 73 +-- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 8 +- drivers/gpu/drm/i915/gem/i915_gem_tiling.c | 27 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 27 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 9 +- .../drm/i915/gem/selftests/i915_gem_context.c | 12 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 4 - .../drm/i915/gem/selftests/igt_gem_utils.c | 7 +- drivers/gpu/drm/i915/gt/intel_gt.c | 5 +- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 4 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 19 +- drivers/gpu/drm/i915/gvt/aperture_gm.c | 12 +- drivers/gpu/drm/i915/i915_active.c | 95 +++- drivers/gpu/drm/i915/i915_active.h | 7 + drivers/gpu/drm/i915/i915_active_types.h | 5 + drivers/gpu/drm/i915/i915_drv.c | 2 - drivers/gpu/drm/i915/i915_gem.c | 83 ++- drivers/gpu/drm/i915/i915_gem_evict.c | 28 +- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 9 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 109 ++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 45 +- drivers/gpu/drm/i915/i915_perf.c | 32 +- drivers/gpu/drm/i915/i915_vma.c | 524 ++++++++++++------ drivers/gpu/drm/i915/i915_vma.h | 84 +-- drivers/gpu/drm/i915/selftests/i915_gem.c | 2 - .../gpu/drm/i915/selftests/i915_gem_evict.c | 36 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 58 +- drivers/gpu/drm/i915/selftests/i915_request.c | 7 + drivers/gpu/drm/i915/selftests/i915_vma.c | 6 +- 40 files changed, 824 insertions(+), 706 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index c3ac5a5c5185..8f7365b8dffb 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2079,7 +2079,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int pinctl; u32 alignment; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); if (WARN_ON(!i915_gem_object_is_framebuffer(obj))) return ERR_PTR(-EINVAL); @@ -2163,8 +2162,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - i915_gem_object_lock(vma->obj); if (flags & PLANE_HAS_FENCE) i915_vma_unpin_fence(vma); @@ -3065,12 +3062,10 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; } - mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen_for_preallocated(dev_priv, base_aligned, base_aligned, size_aligned); - mutex_unlock(&dev->struct_mutex); if (!obj) return false; @@ -3232,13 +3227,11 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, intel_state->color_plane[0].stride = intel_fb_pitch(fb, 0, intel_state->base.rotation); - mutex_lock(&dev->struct_mutex); intel_state->vma = intel_pin_and_fence_fb_obj(fb, &intel_state->view, intel_plane_uses_fence(intel_state), &intel_state->flags); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(intel_state->vma)) { DRM_ERROR("failed to pin boot fb on pipe %d: %li\n", intel_crtc->pipe, PTR_ERR(intel_state->vma)); @@ -14365,8 +14358,6 @@ static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) * bits. Some older platforms need special physical address handling for * cursor planes. * - * Must be called with struct_mutex held. - * * Returns 0 on success, negative error code on failure. */ int @@ -14423,15 +14414,8 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret) return ret; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); - if (ret) { - i915_gem_object_unpin_pages(obj); - return ret; - } - ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); - mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_object_unpin_pages(obj); if (ret) return ret; @@ -14480,8 +14464,6 @@ intel_prepare_plane_fb(struct drm_plane *plane, * @old_state: the state from the previous modeset * * Cleans up a framebuffer that has just been removed from a plane. - * - * Must be called with struct_mutex held. */ void intel_cleanup_plane_fb(struct drm_plane *plane, @@ -14497,9 +14479,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, } /* Should only be called after a successful intel_prepare_plane_fb()! */ - mutex_lock(&dev_priv->drm.struct_mutex); intel_plane_unpin_fb(to_intel_plane_state(old_state)); - mutex_unlock(&dev_priv->drm.struct_mutex); } int @@ -14702,7 +14682,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, u32 src_w, u32 src_h, struct drm_modeset_acquire_ctx *ctx) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct drm_plane_state *old_plane_state, *new_plane_state; struct intel_plane *intel_plane = to_intel_plane(plane); struct intel_crtc_state *crtc_state = @@ -14768,13 +14747,9 @@ intel_legacy_cursor_update(struct drm_plane *plane, if (ret) goto out_free; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); - if (ret) - goto out_free; - ret = intel_plane_pin_fb(to_intel_plane_state(new_plane_state)); if (ret) - goto out_unlock; + goto out_free; intel_frontbuffer_flush(to_intel_frontbuffer(fb), ORIGIN_FLIP); intel_frontbuffer_track(to_intel_frontbuffer(old_plane_state->fb), @@ -14804,8 +14779,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, intel_plane_unpin_fb(to_intel_plane_state(old_plane_state)); -out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); out_free: if (new_crtc_state) intel_crtc_destroy_state(crtc, &new_crtc_state->base); diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c index 0a0a1536ac96..bb5a0e91b370 100644 --- a/drivers/gpu/drm/i915/display/intel_dsb.c +++ b/drivers/gpu/drm/i915/display/intel_dsb.c @@ -119,9 +119,7 @@ intel_dsb_get(struct intel_crtc *crtc) goto err; } - mutex_lock(&i915->drm.struct_mutex); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); - mutex_unlock(&i915->drm.struct_mutex); if (IS_ERR(vma)) { DRM_ERROR("Vma creation failed\n"); i915_gem_object_put(obj); @@ -164,10 +162,7 @@ void intel_dsb_put(struct intel_dsb *dsb) return; if (atomic_dec_and_test(&dsb->refcount)) { - mutex_lock(&i915->drm.struct_mutex); - i915_gem_object_unpin_map(dsb->vma->obj); - i915_vma_unpin_and_release(&dsb->vma, 0); - mutex_unlock(&i915->drm.struct_mutex); + i915_vma_unpin_and_release(&dsb->vma, I915_VMA_RELEASE_MAP); dsb->cmd_buf = NULL; dsb->free_pos = 0; dsb->ins_start_offset = 0; diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 68338669f054..97cde017670a 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -204,7 +204,6 @@ static int intelfb_create(struct drm_fb_helper *helper, sizes->fb_height = intel_fb->base.height; } - mutex_lock(&dev->struct_mutex); wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); /* Pin the GGTT vma for our access via info->screen_base. @@ -266,7 +265,6 @@ static int intelfb_create(struct drm_fb_helper *helper, ifbdev->vma_flags = flags; intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev->struct_mutex); vga_switcheroo_client_fb_set(pdev, info); return 0; @@ -274,7 +272,6 @@ static int intelfb_create(struct drm_fb_helper *helper, intel_unpin_fb_vma(vma, flags); out_unlock: intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev->struct_mutex); return ret; } @@ -291,11 +288,8 @@ static void intel_fbdev_destroy(struct intel_fbdev *ifbdev) drm_fb_helper_fini(&ifbdev->helper); - if (ifbdev->vma) { - mutex_lock(&ifbdev->helper.dev->struct_mutex); + if (ifbdev->vma) intel_unpin_fb_vma(ifbdev->vma, ifbdev->vma_flags); - mutex_unlock(&ifbdev->helper.dev->struct_mutex); - } if (ifbdev->fb) drm_framebuffer_remove(&ifbdev->fb->base); diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 5efef9babadb..3f4ac1ee7668 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -1303,15 +1303,11 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) struct i915_vma *vma; int err; - mutex_lock(&i915->drm.struct_mutex); - obj = i915_gem_object_create_stolen(i915, PAGE_SIZE); if (obj == NULL) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_unlock; - } + if (IS_ERR(obj)) + return PTR_ERR(obj); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (IS_ERR(vma)) { @@ -1332,13 +1328,10 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys) } overlay->reg_bo = obj; - mutex_unlock(&i915->drm.struct_mutex); return 0; err_put_bo: i915_gem_object_put(obj); -err_unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 7f61a8024133..c1fca5728e6e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -211,7 +211,7 @@ static void clear_pages_worker(struct work_struct *work) * keep track of the GPU activity within this vma/request, and * propagate the signal from the request to w->dma. */ - err = i915_active_add_request(&vma->active, rq); + err = __i915_vma_move_to_active(vma, rq); if (err) goto out_request; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index f7ba0935ed67..95f8e66e45db 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -313,8 +313,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); release_hw_id(ctx); - if (ctx->vm) - i915_vm_put(ctx->vm); free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); @@ -379,9 +377,13 @@ void i915_gem_context_release(struct kref *ref) static void context_close(struct i915_gem_context *ctx) { + i915_gem_context_set_closed(ctx); + + if (ctx->vm) + i915_vm_close(ctx->vm); + mutex_lock(&ctx->mutex); - i915_gem_context_set_closed(ctx); ctx->file_priv = ERR_PTR(-EBADF); /* @@ -474,7 +476,7 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); - ctx->vm = i915_vm_get(vm); + ctx->vm = i915_vm_open(vm); context_apply_all(ctx, __apply_ppgtt, vm); return old; @@ -488,7 +490,7 @@ static void __assign_ppgtt(struct i915_gem_context *ctx, vm = __set_ppgtt(ctx, vm); if (vm) - i915_vm_put(vm); + i915_vm_close(vm); } static void __set_timeline(struct intel_timeline **dst, @@ -953,7 +955,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (ret < 0) goto err_unlock; - i915_vm_get(vm); + i915_vm_open(vm); args->size = 0; args->value = ret; @@ -973,7 +975,7 @@ static void set_ppgtt_barrier(void *data) if (INTEL_GEN(old->i915) < 8) gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old)); - i915_vm_put(old); + i915_vm_close(old); } static int emit_ppgtt_update(struct i915_request *rq, void *data) @@ -1090,8 +1092,8 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, set_ppgtt_barrier, old); if (err) { - i915_vm_put(__set_ppgtt(ctx, old)); - i915_vm_put(old); + i915_vm_close(__set_ppgtt(ctx, old)); + i915_vm_close(old); } unlock: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 55c3ab59e3aa..9937b4c341f1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -288,7 +288,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, if (!drm_mm_node_allocated(&vma->node)) continue; - ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); + /* Wait for an earlier async bind, need to rewrite it */ + ret = i915_vma_sync(vma); + if (ret) + return ret; + + ret = i915_vma_bind(vma, cache_level, PIN_UPDATE, NULL); if (ret) return ret; } @@ -391,16 +396,11 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, if (ret) goto out; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - goto out; - ret = i915_gem_object_lock_interruptible(obj); if (ret == 0) { ret = i915_gem_object_set_cache_level(obj, level); i915_gem_object_unlock(obj); } - mutex_unlock(&i915->drm.struct_mutex); out: i915_gem_object_put(obj); @@ -485,6 +485,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) if (!drm_mm_node_allocated(&vma->node)) continue; + GEM_BUG_ON(vma->vm != &i915->ggtt.vm); list_move_tail(&vma->vm_link, &vma->vm->bound_list); } mutex_unlock(&i915->ggtt.vm.mutex); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index c049199a1df5..88a881be12ec 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -698,7 +698,9 @@ static int eb_reserve(struct i915_execbuffer *eb) case 1: /* Too fragmented, unbind everything and retry */ + mutex_lock(&eb->context->vm->mutex); err = i915_gem_evict_vm(eb->context->vm); + mutex_unlock(&eb->context->vm->mutex); if (err) return err; break; @@ -972,7 +974,9 @@ static void reloc_cache_reset(struct reloc_cache *cache) ggtt->vm.clear_range(&ggtt->vm, cache->node.start, cache->node.size); + mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(&cache->node); + mutex_unlock(&ggtt->vm.mutex); } else { i915_vma_unpin((struct i915_vma *)cache->node.mm); } @@ -1047,11 +1051,13 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, PIN_NOEVICT); if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); + mutex_lock(&ggtt->vm.mutex); err = drm_mm_insert_node_in_range (&ggtt->vm.mm, &cache->node, PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); + mutex_unlock(&ggtt->vm.mutex); if (err) /* no inactive aperture space, use cpu reloc */ return NULL; } else { @@ -1416,7 +1422,7 @@ eb_relocate_entry(struct i915_execbuffer *eb, if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && IS_GEN(eb->i915, 6)) { err = i915_vma_bind(target, target->obj->cache_level, - PIN_GLOBAL); + PIN_GLOBAL, NULL); if (WARN_ONCE(err, "Unexpected failure to bind target VMA!")) return err; @@ -2140,35 +2146,6 @@ static struct i915_request *eb_throttle(struct intel_context *ce) return i915_request_get(rq); } -static int -__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) -{ - int err; - - if (likely(atomic_inc_not_zero(&ce->pin_count))) - return 0; - - err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex); - if (err) - return err; - - err = __intel_context_do_pin(ce); - mutex_unlock(&eb->i915->drm.struct_mutex); - - return err; -} - -static void -__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce) -{ - if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) - return; - - mutex_lock(&eb->i915->drm.struct_mutex); - intel_context_unpin(ce); - mutex_unlock(&eb->i915->drm.struct_mutex); -} - static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) { struct intel_timeline *tl; @@ -2188,7 +2165,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - err = __eb_pin_context(eb, ce); + err = intel_context_pin(ce); if (err) return err; @@ -2232,7 +2209,7 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) intel_context_exit(ce); intel_context_timeline_unlock(tl); err_unpin: - __eb_unpin_context(eb, ce); + intel_context_unpin(ce); return err; } @@ -2245,7 +2222,7 @@ static void eb_unpin_engine(struct i915_execbuffer *eb) intel_context_exit(ce); mutex_unlock(&tl->mutex); - __eb_unpin_context(eb, ce); + intel_context_unpin(ce); } static unsigned int diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index dd0c2840ba4d..c19431d609fc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -249,16 +249,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) if (ret) goto err_rpm; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto err_reset; - - /* Access to snoopable pages through the GTT is incoherent. */ - if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { - ret = -EFAULT; - goto err_unlock; - } - /* Now pin it into the GTT as needed */ vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | @@ -291,7 +281,13 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) } if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto err_unlock; + goto err_reset; + } + + /* Access to snoopable pages through the GTT is incoherent. */ + if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) { + ret = -EFAULT; + goto err_unpin; } ret = i915_vma_pin_fence(vma); @@ -329,8 +325,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf) i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); -err_unlock: - mutex_unlock(&dev->struct_mutex); err_reset: intel_gt_reset_unlock(ggtt->vm.gt, srcu); err_rpm: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 0ef60dae23a7..dbf9be9a79f4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -155,21 +155,30 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, wakeref = intel_runtime_pm_get(&i915->runtime_pm); llist_for_each_entry_safe(obj, on, freed, freed) { - struct i915_vma *vma, *vn; - trace_i915_gem_object_destroy(obj); - mutex_lock(&i915->drm.struct_mutex); - - list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { - GEM_BUG_ON(i915_vma_is_active(vma)); - atomic_and(~I915_VMA_PIN_MASK, &vma->flags); - i915_vma_destroy(vma); + if (!list_empty(&obj->vma.list)) { + struct i915_vma *vma; + + /* + * Note that the vma keeps an object reference while + * it is active, so it *should* not sleep while we + * destroy it. Our debug code errs insits it *might*. + * For the moment, play along. + */ + spin_lock(&obj->vma.lock); + while ((vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { + GEM_BUG_ON(vma->obj != obj); + spin_unlock(&obj->vma.lock); + + i915_vma_destroy(vma); + + spin_lock(&obj->vma.lock); + } + spin_unlock(&obj->vma.lock); } - GEM_BUG_ON(!list_empty(&obj->vma.list)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); - - mutex_unlock(&i915->drm.struct_mutex); GEM_BUG_ON(atomic_read(&obj->bind_count)); GEM_BUG_ON(obj->userfault_count); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 53c7069ba3e8..086a9bf5adcc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -106,6 +106,11 @@ static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) dma_resv_lock(obj->base.resv, NULL); } +static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj) +{ + return dma_resv_trylock(obj->base.resv); +} + static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index d2c05d752909..fd3ce6da8497 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -16,40 +16,6 @@ #include "i915_trace.h" -static bool shrinker_lock(struct drm_i915_private *i915, - unsigned int flags, - bool *unlock) -{ - struct mutex *m = &i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(m)) { - case MUTEX_TRYLOCK_RECURSIVE: - *unlock = false; - return true; - - case MUTEX_TRYLOCK_FAILED: - *unlock = false; - if (flags & I915_SHRINK_ACTIVE && - mutex_lock_killable_nested(m, I915_MM_SHRINKER) == 0) - *unlock = true; - return *unlock; - - case MUTEX_TRYLOCK_SUCCESS: - *unlock = true; - return true; - } - - BUG(); -} - -static void shrinker_unlock(struct drm_i915_private *i915, bool unlock) -{ - if (!unlock) - return; - - mutex_unlock(&i915->drm.struct_mutex); -} - static bool swap_available(void) { return get_nr_swap_pages() > 0; @@ -155,10 +121,6 @@ i915_gem_shrink(struct drm_i915_private *i915, intel_wakeref_t wakeref = 0; unsigned long count = 0; unsigned long scanned = 0; - bool unlock; - - if (!shrinker_lock(i915, shrink, &unlock)) - return 0; /* * When shrinking the active list, we should also consider active @@ -268,8 +230,6 @@ i915_gem_shrink(struct drm_i915_private *i915, if (shrink & I915_SHRINK_BOUND) intel_runtime_pm_put(&i915->runtime_pm, wakeref); - shrinker_unlock(i915, unlock); - if (nr_scanned) *nr_scanned += scanned; return count; @@ -339,19 +299,14 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) struct drm_i915_private *i915 = container_of(shrinker, struct drm_i915_private, mm.shrinker); unsigned long freed; - bool unlock; sc->nr_scanned = 0; - if (!shrinker_lock(i915, 0, &unlock)) - return SHRINK_STOP; - freed = i915_gem_shrink(i915, sc->nr_to_scan, &sc->nr_scanned, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_WRITEBACK); + I915_SHRINK_UNBOUND); if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { intel_wakeref_t wakeref; @@ -366,8 +321,6 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) } } - shrinker_unlock(i915, unlock); - return sc->nr_scanned ? freed : SHRINK_STOP; } @@ -384,6 +337,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) freed_pages = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, + I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_WRITEBACK); @@ -419,10 +373,6 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr struct i915_vma *vma, *next; unsigned long freed_pages = 0; intel_wakeref_t wakeref; - bool unlock; - - if (!shrinker_lock(i915, 0, &unlock)) - return NOTIFY_DONE; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, @@ -439,15 +389,11 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr if (!vma->iomap || i915_vma_is_active(vma)) continue; - mutex_unlock(&i915->ggtt.vm.mutex); - if (i915_vma_unbind(vma) == 0) + if (__i915_vma_unbind(vma) == 0) freed_pages += count; - mutex_lock(&i915->ggtt.vm.mutex); } mutex_unlock(&i915->ggtt.vm.mutex); - shrinker_unlock(i915, unlock); - *(unsigned long *)ptr += freed_pages; return NOTIFY_DONE; } @@ -490,22 +436,9 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915, fs_reclaim_acquire(GFP_KERNEL); - /* - * As we invariably rely on the struct_mutex within the shrinker, - * but have a complicated recursion dance, taint all the mutexes used - * within the shrinker with the struct_mutex. For completeness, we - * taint with all subclass of struct_mutex, even though we should - * only need tainting by I915_MM_NORMAL to catch possible ABBA - * deadlocks from using struct_mutex inside @mutex. - */ - mutex_acquire(&i915->drm.struct_mutex.dep_map, - I915_MM_SHRINKER, 0, _RET_IP_); - mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_); mutex_release(&mutex->dep_map, 0, _RET_IP_); - mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_); - fs_reclaim_release(GFP_KERNEL); if (unlock) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index e45eb8721850..fad98a921cde 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -624,8 +624,6 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (!drm_mm_initialized(&dev_priv->mm.stolen)) return NULL; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", &stolen_offset, >t_offset, &size); @@ -677,21 +675,25 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv * setting up the GTT space. The actual reservation will occur * later. */ + mutex_lock(&ggtt->vm.mutex); ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, size, gtt_offset, obj->cache_level, 0); if (ret) { DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); + mutex_unlock(&ggtt->vm.mutex); goto err_pages; } GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->pages); vma->pages = obj->mm.pages; + atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE); + set_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); __i915_vma_set_map_and_fenceable(vma); - mutex_lock(&ggtt->vm.mutex); list_add_tail(&vma->vm_link, &ggtt->vm.bound_list); mutex_unlock(&ggtt->vm.mutex); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c index e5d1ae8d4dba..dc2a83ce44d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c @@ -181,22 +181,25 @@ static int i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode, unsigned int stride) { + struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt; struct i915_vma *vma; - int ret; + int ret = 0; if (tiling_mode == I915_TILING_NONE) return 0; + mutex_lock(&ggtt->vm.mutex); for_each_ggtt_vma(vma, obj) { if (i915_vma_fence_prepare(vma, tiling_mode, stride)) continue; - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); if (ret) - return ret; + break; } + mutex_unlock(&ggtt->vm.mutex); - return 0; + return ret; } int @@ -212,7 +215,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride)); GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE)); - lockdep_assert_held(&i915->drm.struct_mutex); if ((tiling | stride) == obj->tiling_and_stride) return 0; @@ -233,16 +235,18 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, * whilst executing a fenced command for an untiled object. */ - err = i915_gem_object_fence_prepare(obj, tiling, stride); - if (err) - return err; - i915_gem_object_lock(obj); if (i915_gem_object_is_framebuffer(obj)) { i915_gem_object_unlock(obj); return -EBUSY; } + err = i915_gem_object_fence_prepare(obj, tiling, stride); + if (err) { + i915_gem_object_unlock(obj); + return err; + } + /* If the memory has unknown (i.e. varying) swizzling, we pin the * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. @@ -368,12 +372,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data, } } - err = mutex_lock_interruptible(&dev->struct_mutex); - if (err) - goto err; - err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride); - mutex_unlock(&dev->struct_mutex); /* We have to maintain this existing ABI... */ args->stride = i915_gem_object_get_stride(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 6b3b50f0f6d9..1738a15eb911 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -92,7 +92,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn); struct interval_tree_node *it; - struct mutex *unlock = NULL; unsigned long end; int ret = 0; @@ -129,33 +128,13 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); - if (!unlock) { - unlock = &mn->mm->i915->drm.struct_mutex; - - switch (mutex_trylock_recursive(unlock)) { - default: - case MUTEX_TRYLOCK_FAILED: - if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) { - i915_gem_object_put(obj); - return -EINTR; - } - /* fall through */ - case MUTEX_TRYLOCK_SUCCESS: - break; - - case MUTEX_TRYLOCK_RECURSIVE: - unlock = ERR_PTR(-EEXIST); - break; - } - } - ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); if (ret == 0) ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); i915_gem_object_put(obj); if (ret) - goto unlock; + return ret; spin_lock(&mn->lock); @@ -168,10 +147,6 @@ userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, } spin_unlock(&mn->lock); -unlock: - if (!IS_ERR_OR_NULL(unlock)) - mutex_unlock(unlock); - return ret; } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index c5cea4379216..98b2a6ccfcc1 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -333,7 +333,12 @@ static int igt_check_page_sizes(struct i915_vma *vma) struct drm_i915_private *i915 = vma->vm->i915; unsigned int supported = INTEL_INFO(i915)->page_sizes; struct drm_i915_gem_object *obj = vma->obj; - int err = 0; + int err; + + /* We have to wait for the async bind to complete before our asserts */ + err = i915_vma_sync(vma); + if (err) + return err; if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { pr_err("unsupported page_sizes.sg=%u, supported=%u\n", @@ -1390,7 +1395,7 @@ static int igt_ppgtt_pin_update(void *arg) goto out_unpin; } - err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE); + err = i915_vma_bind(vma, I915_CACHE_NONE, PIN_UPDATE, NULL); if (err) goto out_unpin; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 0f4d0644a480..8eba0d3a31de 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -971,10 +971,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (err) goto skip_request; - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); - + i915_vma_unpin_and_release(&batch, 0); i915_vma_unpin(vma); *rq_out = i915_request_get(rq); @@ -988,8 +985,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, err_request: i915_request_add(rq); err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); err_vma: i915_vma_unpin(vma); @@ -1533,9 +1529,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (err) goto skip_request; - i915_vma_unpin(vma); - i915_vma_close(vma); - i915_vma_put(vma); + i915_vma_unpin_and_release(&vma, 0); i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index aefe557527f8..36aca1c172e7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -322,7 +322,6 @@ static int igt_partial_tiling(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (1) { @@ -415,7 +414,6 @@ next_tiling: ; out_unlock: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); @@ -458,7 +456,6 @@ static int igt_smoke_tiling(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); count = 0; @@ -508,7 +505,6 @@ static int igt_smoke_tiling(void *arg) pr_info("%s: Completed %lu trials\n", __func__, count); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_unpin_pages(obj); out: i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index ee5dc13a30b3..6718da20f35d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -154,9 +154,7 @@ int igt_gpu_fill_dw(struct intel_context *ce, i915_request_add(rq); - i915_vma_unpin(batch); - i915_vma_close(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); return 0; @@ -165,7 +163,6 @@ int igt_gpu_fill_dw(struct intel_context *ce, err_request: i915_request_add(rq); err_batch: - i915_vma_unpin(batch); - i915_vma_put(batch); + i915_vma_unpin_and_release(&batch, 0); return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 0e5909ee0657..7205595369be 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -302,11 +302,12 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) with_intel_runtime_pm(&i915->runtime_pm, wakeref) { struct intel_uncore *uncore = gt->uncore; + unsigned long flags; - spin_lock_irq(&uncore->lock); + spin_lock_irqsave(&uncore->lock, flags); intel_uncore_posting_read_fw(uncore, RING_HEAD(RENDER_RING_BASE)); - spin_unlock_irq(&uncore->lock); + spin_unlock_irqrestore(&uncore->lock, flags); } } diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 0747b8c9f768..ec32996254c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1338,15 +1338,13 @@ void intel_ring_free(struct kref *ref) { struct intel_ring *ring = container_of(ref, typeof(*ring), ref); - i915_vma_close(ring->vma); i915_vma_put(ring->vma); - kfree(ring); } static void __ring_context_fini(struct intel_context *ce) { - i915_gem_object_put(ce->state->obj); + i915_vma_put(ce->state); } static void ring_context_destroy(struct kref *ref) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 9c0c8441c22a..d3bee9f88008 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1127,15 +1127,14 @@ static int evict_vma(void *data) { struct evict_vma *arg = data; struct i915_address_space *vm = arg->vma->vm; - struct drm_i915_private *i915 = vm->i915; struct drm_mm_node evict = arg->vma->node; int err; complete(&arg->completion); - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&vm->mutex); err = i915_gem_evict_for_node(vm, &evict, 0); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&vm->mutex); return err; } @@ -1143,39 +1142,33 @@ static int evict_vma(void *data) static int evict_fence(void *data) { struct evict_vma *arg = data; - struct drm_i915_private *i915 = arg->vma->vm->i915; int err; complete(&arg->completion); - mutex_lock(&i915->drm.struct_mutex); - /* Mark the fence register as dirty to force the mmio update. */ err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512); if (err) { pr_err("Invalid Y-tiling settings; err:%d\n", err); - goto out_unlock; + return err; } err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE); if (err) { pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err); - goto out_unlock; + return err; } err = i915_vma_pin_fence(arg->vma); i915_vma_unpin(arg->vma); if (err) { pr_err("Unable to pin Y-tiled fence; err:%d\n", err); - goto out_unlock; + return err; } i915_vma_unpin_fence(arg->vma); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - - return err; + return 0; } static int __igt_reset_evict_vma(struct intel_gt *gt, diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 5ff2437b2998..d996bbc7ea59 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -61,14 +61,14 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm) flags = PIN_MAPPABLE; } - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); mmio_hw_access_pre(dev_priv); ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node, size, I915_GTT_PAGE_SIZE, I915_COLOR_UNEVICTABLE, start, end, flags); mmio_hw_access_post(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); if (ret) gvt_err("fail to alloc %s gm space from host\n", high_gm ? "high" : "low"); @@ -98,9 +98,9 @@ static int alloc_vgpu_gm(struct intel_vgpu *vgpu) return 0; out_free_aperture: - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); drm_mm_remove_node(&vgpu->gm.low_gm_node); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); return ret; } @@ -108,10 +108,10 @@ static void free_vgpu_gm(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - mutex_lock(&dev_priv->drm.struct_mutex); + mutex_lock(&dev_priv->ggtt.vm.mutex); drm_mm_remove_node(&vgpu->gm.low_gm_node); drm_mm_remove_node(&vgpu->gm.high_gm_node); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_unlock(&dev_priv->ggtt.vm.mutex); } /** diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index d5aac6ff803a..0791736a08fd 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -146,6 +146,7 @@ __active_retire(struct i915_active *ref) if (!retire) return; + GEM_BUG_ON(rcu_access_pointer(ref->excl)); rbtree_postorder_for_each_entry_safe(it, n, &root, node) { GEM_BUG_ON(i915_active_request_isset(&it->base)); kmem_cache_free(global.slab_cache, it); @@ -245,6 +246,8 @@ void __i915_active_init(struct drm_i915_private *i915, ref->flags = 0; ref->active = active; ref->retire = retire; + + ref->excl = NULL; ref->tree = RB_ROOT; ref->cache = NULL; init_llist_head(&ref->preallocated_barriers); @@ -341,6 +344,46 @@ int i915_active_ref(struct i915_active *ref, return err; } +static void excl_cb(struct dma_fence *f, struct dma_fence_cb *cb) +{ + struct i915_active *ref = container_of(cb, typeof(*ref), excl_cb); + + RCU_INIT_POINTER(ref->excl, NULL); + dma_fence_put(f); + + active_retire(ref); +} + +void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) +{ + /* We expect the caller to manage the exclusive timeline ordering */ + GEM_BUG_ON(i915_active_is_idle(ref)); + + dma_fence_get(f); + + rcu_read_lock(); + if (rcu_access_pointer(ref->excl)) { + struct dma_fence *old; + + old = dma_fence_get_rcu_safe(&ref->excl); + if (old) { + if (dma_fence_remove_callback(old, &ref->excl_cb)) + atomic_dec(&ref->count); + dma_fence_put(old); + } + } + rcu_read_unlock(); + + atomic_inc(&ref->count); + rcu_assign_pointer(ref->excl, f); + + if (dma_fence_add_callback(f, &ref->excl_cb, excl_cb)) { + RCU_INIT_POINTER(ref->excl, NULL); + atomic_dec(&ref->count); + dma_fence_put(f); + } +} + int i915_active_acquire(struct i915_active *ref) { int err; @@ -399,6 +442,25 @@ void i915_active_ungrab(struct i915_active *ref) __active_ungrab(ref); } +static int excl_wait(struct i915_active *ref) +{ + struct dma_fence *old; + int err = 0; + + if (!rcu_access_pointer(ref->excl)) + return 0; + + rcu_read_lock(); + old = dma_fence_get_rcu_safe(&ref->excl); + rcu_read_unlock(); + if (old) { + err = dma_fence_wait(old, true); + dma_fence_put(old); + } + + return err; +} + int i915_active_wait(struct i915_active *ref) { struct active_node *it, *n; @@ -419,6 +481,10 @@ int i915_active_wait(struct i915_active *ref) return 0; } + err = excl_wait(ref); + if (err) + goto out; + rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { if (is_barrier(&it->base)) { /* unconnected idle-barrier */ err = -EBUSY; @@ -430,6 +496,7 @@ int i915_active_wait(struct i915_active *ref) break; } +out: __active_retire(ref); if (err) return err; @@ -454,26 +521,22 @@ int i915_request_await_active_request(struct i915_request *rq, int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) { - struct active_node *it, *n; - int err; - - if (RB_EMPTY_ROOT(&ref->tree)) - return 0; + int err = 0; - /* await allocates and so we need to avoid hitting the shrinker */ - err = i915_active_acquire(ref); - if (err) - return err; + if (rcu_access_pointer(ref->excl)) { + struct dma_fence *fence; - mutex_lock(&ref->mutex); - rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - err = i915_request_await_active_request(rq, &it->base); - if (err) - break; + rcu_read_lock(); + fence = dma_fence_get_rcu_safe(&ref->excl); + rcu_read_unlock(); + if (fence) { + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } } - mutex_unlock(&ref->mutex); - i915_active_release(ref); + /* In the future we may choose to await on all fences */ + return err; } diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 949c6835335b..90034f61b7c2 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -379,6 +379,13 @@ i915_active_add_request(struct i915_active *ref, struct i915_request *rq) return i915_active_ref(ref, i915_request_timeline(rq), rq); } +void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f); + +static inline bool i915_active_has_exclusive(struct i915_active *ref) +{ + return rcu_access_pointer(ref->excl); +} + int i915_active_wait(struct i915_active *ref); int i915_request_await_active(struct i915_request *rq, diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index 1854e7d168c1..86e7a232ea3c 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -8,6 +8,7 @@ #define _I915_ACTIVE_TYPES_H_ #include +#include #include #include #include @@ -51,6 +52,10 @@ struct i915_active { struct mutex mutex; atomic_t count; + /* Preallocated "exclusive" node */ + struct dma_fence __rcu *excl; + struct dma_fence_cb excl_cb; + unsigned long flags; #define I915_ACTIVE_GRAB_BIT 0 diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3306c6bb515a..5323e4fa55d9 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1858,10 +1858,8 @@ static int i915_drm_resume(struct drm_device *dev) if (ret) DRM_ERROR("failed to re-enable GGTT\n"); - mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_restore_gtt_mappings(dev_priv); i915_gem_restore_fences(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); intel_csr_ucode_resume(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7046067f70c1..f50058cf8ab8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -62,20 +62,31 @@ #include "intel_pm.h" static int -insert_mappable_node(struct i915_ggtt *ggtt, - struct drm_mm_node *node, u32 size) +insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) { + int err; + + err = mutex_lock_interruptible(&ggtt->vm.mutex); + if (err) + return err; + memset(node, 0, sizeof(*node)); - return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, - size, 0, I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); + err = drm_mm_insert_node_in_range(&ggtt->vm.mm, node, + size, 0, I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + + mutex_unlock(&ggtt->vm.mutex); + + return err; } static void -remove_mappable_node(struct drm_mm_node *node) +remove_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node) { + mutex_lock(&ggtt->vm.mutex); drm_mm_remove_node(node); + mutex_unlock(&ggtt->vm.mutex); } int @@ -87,7 +98,8 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct i915_vma *vma; u64 pinned; - mutex_lock(&ggtt->vm.mutex); + if (mutex_lock_interruptible(&ggtt->vm.mutex)) + return -EINTR; pinned = ggtt->vm.reserved; list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) @@ -109,20 +121,24 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj, LIST_HEAD(still_in_list); int ret = 0; - lockdep_assert_held(&obj->base.dev->struct_mutex); - spin_lock(&obj->vma.lock); while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, struct i915_vma, obj_link))) { + struct i915_address_space *vm = vma->vm; + + ret = -EBUSY; + if (!i915_vm_tryopen(vm)) + break; + list_move_tail(&vma->obj_link, &still_in_list); spin_unlock(&obj->vma.lock); - ret = -EBUSY; if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || !i915_vma_is_active(vma)) ret = i915_vma_unbind(vma); + i915_vm_close(vm); spin_lock(&obj->vma.lock); } list_splice(&still_in_list, &obj->vma.list); @@ -338,10 +354,6 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, u64 remain, offset; int ret; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); vma = ERR_PTR(-ENODEV); if (!i915_gem_object_is_tiled(obj)) @@ -355,12 +367,10 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, } else { ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) - goto out_unlock; + goto out_rpm; GEM_BUG_ON(!drm_mm_node_allocated(&node)); } - mutex_unlock(&i915->drm.struct_mutex); - ret = i915_gem_object_lock_interruptible(obj); if (ret) goto out_unpin; @@ -414,17 +424,14 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, i915_gem_object_unlock_fence(obj, fence); out_unpin: - mutex_lock(&i915->drm.struct_mutex); if (drm_mm_node_allocated(&node)) { ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); - remove_mappable_node(&node); + remove_mappable_node(ggtt, &node); } else { i915_vma_unpin(vma); } -out_unlock: +out_rpm: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return ret; } @@ -531,10 +538,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, void __user *user_data; int ret; - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - if (i915_gem_object_has_struct_page(obj)) { /* * Avoid waking the device up if we can fallback, as @@ -544,10 +547,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, * using the cache bypass of indirect GGTT access. */ wakeref = intel_runtime_pm_get_if_in_use(rpm); - if (!wakeref) { - ret = -EFAULT; - goto out_unlock; - } + if (!wakeref) + return -EFAULT; } else { /* No backing pages, no fallback, we must force GGTT access */ wakeref = intel_runtime_pm_get(rpm); @@ -569,8 +570,6 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, GEM_BUG_ON(!drm_mm_node_allocated(&node)); } - mutex_unlock(&i915->drm.struct_mutex); - ret = i915_gem_object_lock_interruptible(obj); if (ret) goto out_unpin; @@ -634,18 +633,15 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, i915_gem_object_unlock_fence(obj, fence); out_unpin: - mutex_lock(&i915->drm.struct_mutex); intel_gt_flush_ggtt_writes(ggtt->vm.gt); if (drm_mm_node_allocated(&node)) { ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); - remove_mappable_node(&node); + remove_mappable_node(ggtt, &node); } else { i915_vma_unpin(vma); } out_rpm: intel_runtime_pm_put(rpm, wakeref); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return ret; } @@ -968,8 +964,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); - if (i915_gem_object_never_bind_ggtt(obj)) return ERR_PTR(-ENODEV); @@ -1019,13 +1013,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOSPC); } - WARN(i915_vma_is_pinned(vma), - "bo is already pinned in ggtt with incorrect alignment:" - " offset=%08x, req.alignment=%llx," - " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", - i915_ggtt_offset(vma), alignment, - !!(flags & PIN_MAPPABLE), - i915_vma_is_map_and_fenceable(vma)); ret = i915_vma_unbind(vma); if (ret) return ERR_PTR(ret); @@ -1444,8 +1431,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) } if (ret == -EIO) { - mutex_lock(&dev_priv->drm.struct_mutex); - /* * Allow engines or uC initialisation to fail by marking the GPU * as wedged. But we only want to do this when the GPU is angry, @@ -1462,8 +1447,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) i915_gem_restore_gtt_mappings(dev_priv); i915_gem_restore_fences(dev_priv); intel_init_clock_gating(dev_priv); - - mutex_unlock(&dev_priv->drm.struct_mutex); } i915_gem_drain_freed_objects(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 8c1e04f402bc..0552bf93eea3 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -47,8 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915) * bound by their active reference. */ return i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, + I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } @@ -104,7 +103,7 @@ i915_gem_evict_something(struct i915_address_space *vm, struct i915_vma *active; int ret; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); trace_i915_gem_evict(vm, min_size, alignment, flags); /* @@ -127,15 +126,6 @@ i915_gem_evict_something(struct i915_address_space *vm, min_size, alignment, color, start, end, mode); - /* - * Retire before we search the active list. Although we have - * reasonable accuracy in our retirement lists, we may have - * a stray pin (preventing eviction) that can only be resolved by - * retiring. - */ - if (!(flags & PIN_NONBLOCK)) - i915_retire_requests(dev_priv); - search_again: active = NULL; INIT_LIST_HEAD(&eviction_list); @@ -235,12 +225,12 @@ i915_gem_evict_something(struct i915_address_space *vm, list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } while (ret == 0 && (node = drm_mm_scan_color_evict(&scan))) { vma = container_of(node, struct i915_vma, node); - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } return ret; @@ -268,7 +258,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, struct i915_vma *vma, *next; int ret = 0; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); @@ -349,7 +339,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } return ret; @@ -373,7 +363,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm) struct i915_vma *vma, *next; int ret; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); trace_i915_gem_evict_vm(vm); /* Switch back to the default context in order to unpin @@ -388,7 +378,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm) } INIT_LIST_HEAD(&eviction_list); - mutex_lock(&vm->mutex); list_for_each_entry(vma, &vm->bound_list, vm_link) { if (i915_vma_is_pinned(vma)) continue; @@ -396,13 +385,12 @@ int i915_gem_evict_vm(struct i915_address_space *vm) __i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list); } - mutex_unlock(&vm->mutex); ret = 0; list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { __i915_vma_unpin(vma); if (ret == 0) - ret = i915_vma_unbind(vma); + ret = __i915_vma_unbind(vma); } return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 615a9f4ef30c..487b7261f7ed 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -230,14 +230,15 @@ static int fence_update(struct i915_fence_reg *fence, i915_gem_object_get_tiling(vma->obj))) return -EINVAL; - ret = i915_active_wait(&vma->active); + ret = i915_vma_sync(vma); if (ret) return ret; } old = xchg(&fence->vma, NULL); if (old) { - ret = i915_active_wait(&old->active); + /* XXX Ideally we would move the waiting to outside the mutex */ + ret = i915_vma_sync(old); if (ret) { fence->vma = old; return ret; @@ -331,13 +332,15 @@ static struct i915_fence_reg *fence_find(struct drm_i915_private *i915) return ERR_PTR(-EDEADLK); } -static int __i915_vma_pin_fence(struct i915_vma *vma) +int __i915_vma_pin_fence(struct i915_vma *vma) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); struct i915_fence_reg *fence; struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; int err; + lockdep_assert_held(&vma->vm->mutex); + /* Just update our place in the LRU if our fence is getting reused. */ if (vma->fence) { fence = vma->fence; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8eba63ecdb03..55cebf256d03 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -150,16 +150,18 @@ static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) static int ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 unused) + u32 flags) { u32 pte_flags; int err; - if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) { + if (flags & I915_VMA_ALLOC) { err = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); if (err) return err; + + set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); } /* Applicable to VLV, and gen8+ */ @@ -167,6 +169,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, if (i915_gem_object_is_readonly(vma->obj)) pte_flags |= PTE_READ_ONLY; + GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))); vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); wmb(); @@ -175,7 +178,8 @@ static int ppgtt_bind_vma(struct i915_vma *vma, static void ppgtt_unbind_vma(struct i915_vma *vma) { - vma->vm->clear_range(vma->vm, vma->node.start, vma->size); + if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } static int ppgtt_set_pages(struct i915_vma *vma) @@ -503,15 +507,26 @@ static void i915_address_space_fini(struct i915_address_space *vm) mutex_destroy(&vm->mutex); } -static void ppgtt_destroy_vma(struct i915_address_space *vm) +void __i915_vm_close(struct i915_address_space *vm) { struct i915_vma *vma, *vn; - mutex_lock(&vm->i915->drm.struct_mutex); - list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) + mutex_lock(&vm->mutex); + list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; + + /* Keep the obj (and hence the vma) alive as _we_ destroy it */ + if (!kref_get_unless_zero(&obj->base.refcount)) + continue; + + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); i915_vma_destroy(vma); + + i915_gem_object_put(obj); + } GEM_BUG_ON(!list_empty(&vm->bound_list)); - mutex_unlock(&vm->i915->drm.struct_mutex); + mutex_unlock(&vm->mutex); } static void __i915_vm_release(struct work_struct *work) @@ -519,8 +534,6 @@ static void __i915_vm_release(struct work_struct *work) struct i915_address_space *vm = container_of(work, struct i915_address_space, rcu.work); - ppgtt_destroy_vma(vm); - vm->cleanup(vm); i915_address_space_fini(vm); @@ -535,7 +548,6 @@ void i915_vm_release(struct kref *kref) GEM_BUG_ON(i915_is_ggtt(vm)); trace_i915_ppgtt_release(vm); - vm->closed = true; queue_rcu_work(vm->i915->wq, &vm->rcu); } @@ -543,6 +555,7 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass) { kref_init(&vm->ref); INIT_RCU_WORK(&vm->rcu, __i915_vm_release); + atomic_set(&vm->open, 1); /* * The vm->mutex must be reclaim safe (for use in the shrinker). @@ -1771,12 +1784,8 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - struct drm_i915_private *i915 = vm->i915; - /* FIXME remove the struct_mutex to bring the locking under control */ - mutex_lock(&i915->drm.struct_mutex); i915_vma_destroy(ppgtt->vma); - mutex_unlock(&i915->drm.struct_mutex); gen6_ppgtt_free_pd(ppgtt); free_scratch(vm); @@ -1865,7 +1874,8 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) i915_active_init(i915, &vma->active, NULL, NULL); - vma->vm = &ggtt->vm; + mutex_init(&vma->pages_mutex); + vma->vm = i915_vm_get(&ggtt->vm); vma->ops = &pd_vma_ops; vma->private = ppgtt; @@ -1885,7 +1895,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base) struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); int err = 0; - GEM_BUG_ON(ppgtt->base.vm.closed); + GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open)); /* * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt @@ -2463,14 +2473,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (flags & I915_VMA_LOCAL_BIND) { struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; - if (!i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) { + if (flags & I915_VMA_ALLOC) { ret = alias->vm.allocate_va_range(&alias->vm, vma->node.start, vma->size); if (ret) return ret; + + set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); } + GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, + __i915_vma_flags(vma))); alias->vm.insert_entries(&alias->vm, vma, cache_level, pte_flags); } @@ -2499,7 +2513,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma) vm->clear_range(vm, vma->node.start, vma->size); } - if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) { + if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { struct i915_address_space *vm = &i915_vm_to_ggtt(vma->vm)->alias->vm; @@ -2602,22 +2616,16 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) { - struct drm_i915_private *i915 = ggtt->vm.i915; struct i915_ppgtt *ppgtt; - mutex_lock(&i915->drm.struct_mutex); - ppgtt = fetch_and_zero(&ggtt->alias); if (!ppgtt) - goto out; + return; i915_vm_put(&ppgtt->vm); ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; - -out: - mutex_unlock(&i915->drm.struct_mutex); } static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) @@ -2734,32 +2742,28 @@ int i915_init_ggtt(struct drm_i915_private *i915) static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) { - struct drm_i915_private *i915 = ggtt->vm.i915; struct i915_vma *vma, *vn; - ggtt->vm.closed = true; + atomic_set(&ggtt->vm.open, 0); rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ - flush_workqueue(i915->wq); + flush_workqueue(ggtt->vm.i915->wq); - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&ggtt->vm.mutex); list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) - WARN_ON(i915_vma_unbind(vma)); + WARN_ON(__i915_vma_unbind(vma)); if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); ggtt_release_guc_top(ggtt); - - if (drm_mm_initialized(&ggtt->vm.mm)) { - intel_vgt_deballoon(ggtt); - i915_address_space_fini(&ggtt->vm); - } + intel_vgt_deballoon(ggtt); ggtt->vm.cleanup(&ggtt->vm); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&ggtt->vm.mutex); + i915_address_space_fini(&ggtt->vm); arch_phys_wc_del(ggtt->mtrr); io_mapping_fini(&ggtt->iomap); @@ -3188,9 +3192,6 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915) static int ggtt_init_hw(struct i915_ggtt *ggtt) { struct drm_i915_private *i915 = ggtt->vm.i915; - int ret = 0; - - mutex_lock(&i915->drm.struct_mutex); i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); @@ -3206,18 +3207,14 @@ static int ggtt_init_hw(struct i915_ggtt *ggtt) ggtt->gmadr.start, ggtt->mappable_end)) { ggtt->vm.cleanup(&ggtt->vm); - ret = -EIO; - goto out; + return -EIO; } ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end); i915_ggtt_init_fences(ggtt); -out: - mutex_unlock(&i915->drm.struct_mutex); - - return ret; + return 0; } /** @@ -3289,6 +3286,7 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) { struct i915_vma *vma, *vn; bool flush = false; + int open; intel_gt_check_and_clear_faults(ggtt->vm.gt); @@ -3296,7 +3294,9 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) /* First fill our portion of the GTT with scratch pages */ ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ + + /* Skip rewriting PTE on VMA unbind. */ + open = atomic_xchg(&ggtt->vm.open, 0); /* clflush objects bound into the GGTT and rebind them. */ list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { @@ -3305,24 +3305,20 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) continue; - mutex_unlock(&ggtt->vm.mutex); - - if (!i915_vma_unbind(vma)) - goto lock; + if (!__i915_vma_unbind(vma)) + continue; + clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); WARN_ON(i915_vma_bind(vma, obj ? obj->cache_level : 0, - PIN_UPDATE)); + PIN_GLOBAL, NULL)); if (obj) { /* only used during resume => exclusive access */ flush |= fetch_and_zero(&obj->write_domain); obj->read_domains |= I915_GEM_DOMAIN_GTT; } - -lock: - mutex_lock(&ggtt->vm.mutex); } - ggtt->vm.closed = false; + atomic_set(&ggtt->vm.open, open); ggtt->invalidate(ggtt); mutex_unlock(&ggtt->vm.mutex); @@ -3714,7 +3710,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, u64 offset; int err; - lockdep_assert_held(&vm->i915->drm.struct_mutex); + lockdep_assert_held(&vm->mutex); + GEM_BUG_ON(!size); GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(alignment && !is_power_of_2(alignment)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 3502b9c85a8e..0a18fdfe63ff 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -307,7 +307,14 @@ struct i915_address_space { unsigned int bind_async_flags; - bool closed; + /* + * Each active user context has its own address space (in full-ppgtt). + * Since the vm may be shared between multiple contexts, we count how + * many contexts keep us "open". Once open hits zero, we are closed + * and do not allow any new attachments, and proceed to shutdown our + * vma and page directories. + */ + atomic_t open; struct mutex mutex; /* protects vma and our lists */ #define VM_CLASS_GGTT 0 @@ -581,6 +588,35 @@ static inline void i915_vm_put(struct i915_address_space *vm) kref_put(&vm->ref, i915_vm_release); } +static inline struct i915_address_space * +i915_vm_open(struct i915_address_space *vm) +{ + GEM_BUG_ON(!atomic_read(&vm->open)); + atomic_inc(&vm->open); + return i915_vm_get(vm); +} + +static inline bool +i915_vm_tryopen(struct i915_address_space *vm) +{ + if (atomic_add_unless(&vm->open, 1, 0)) + return i915_vm_get(vm); + + return false; +} + +void __i915_vm_close(struct i915_address_space *vm); + +static inline void +i915_vm_close(struct i915_address_space *vm) +{ + GEM_BUG_ON(!atomic_read(&vm->open)); + if (atomic_dec_and_test(&vm->open)) + __i915_vm_close(vm); + + i915_vm_put(vm); +} + int gen6_ppgtt_pin(struct i915_ppgtt *base); void gen6_ppgtt_unpin(struct i915_ppgtt *base); void gen6_ppgtt_unpin_all(struct i915_ppgtt *base); @@ -613,10 +649,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, #define PIN_OFFSET_BIAS BIT_ULL(6) #define PIN_OFFSET_FIXED BIT_ULL(7) -#define PIN_MBZ BIT_ULL(8) /* I915_VMA_PIN_OVERFLOW */ -#define PIN_GLOBAL BIT_ULL(9) /* I915_VMA_GLOBAL_BIND */ -#define PIN_USER BIT_ULL(10) /* I915_VMA_LOCAL_BIND */ -#define PIN_UPDATE BIT_ULL(11) +#define PIN_UPDATE BIT_ULL(9) +#define PIN_GLOBAL BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */ +#define PIN_USER BIT_ULL(11) /* I915_VMA_LOCAL_BIND */ #define PIN_OFFSET_MASK (-I915_GTT_PAGE_SIZE) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 524f6710b7aa..80055501eccb 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1204,15 +1204,10 @@ static int i915_oa_read(struct i915_perf_stream *stream, static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) { struct i915_gem_engines_iter it; - struct drm_i915_private *i915 = stream->dev_priv; struct i915_gem_context *ctx = stream->ctx; struct intel_context *ce; int err; - err = i915_mutex_lock_interruptible(&i915->drm); - if (err) - return ERR_PTR(err); - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { if (ce->engine->class != RENDER_CLASS) continue; @@ -1229,10 +1224,6 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) } i915_gem_context_unlock_engines(ctx); - mutex_unlock(&i915->drm.struct_mutex); - if (err) - return ERR_PTR(err); - return stream->pinned_ctx; } @@ -1331,32 +1322,22 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) */ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { - struct drm_i915_private *dev_priv = stream->dev_priv; struct intel_context *ce; stream->specific_ctx_id = INVALID_CTX_ID; stream->specific_ctx_id_mask = 0; ce = fetch_and_zero(&stream->pinned_ctx); - if (ce) { - mutex_lock(&dev_priv->drm.struct_mutex); + if (ce) intel_context_unpin(ce); - mutex_unlock(&dev_priv->drm.struct_mutex); - } } static void free_oa_buffer(struct i915_perf_stream *stream) { - struct drm_i915_private *i915 = stream->dev_priv; - - mutex_lock(&i915->drm.struct_mutex); - i915_vma_unpin_and_release(&stream->oa_buffer.vma, I915_VMA_RELEASE_MAP); - mutex_unlock(&i915->drm.struct_mutex); - stream->oa_buffer.vaddr = NULL; } @@ -1511,18 +1492,13 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) if (WARN_ON(stream->oa_buffer.vma)) return -ENODEV; - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - return ret; - BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE); if (IS_ERR(bo)) { DRM_ERROR("Failed to allocate OA buffer\n"); - ret = PTR_ERR(bo); - goto unlock; + return PTR_ERR(bo); } i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); @@ -1546,7 +1522,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) i915_ggtt_offset(stream->oa_buffer.vma), stream->oa_buffer.vaddr); - goto unlock; + return 0; err_unpin: __i915_vma_unpin(vma); @@ -1557,8 +1533,6 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) stream->oa_buffer.vaddr = NULL; stream->oa_buffer.vma = NULL; -unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); return ret; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index d097f77890ba..fe91a0e47b88 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -32,6 +32,7 @@ #include "i915_drv.h" #include "i915_globals.h" +#include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_vma.h" @@ -110,7 +111,8 @@ vma_create(struct drm_i915_gem_object *obj, if (vma == NULL) return ERR_PTR(-ENOMEM); - vma->vm = vm; + mutex_init(&vma->pages_mutex); + vma->vm = i915_vm_get(vm); vma->ops = &vm->vma_ops; vma->obj = obj; vma->resv = obj->base.resv; @@ -261,8 +263,6 @@ vma_lookup(struct drm_i915_gem_object *obj, * Once created, the VMA is kept until either the object is freed, or the * address space is closed. * - * Must be called with struct_mutex held. - * * Returns the vma, or an error pointer. */ struct i915_vma * @@ -273,7 +273,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj, struct i915_vma *vma; GEM_BUG_ON(view && !i915_is_ggtt(vm)); - GEM_BUG_ON(vm->closed); + GEM_BUG_ON(!atomic_read(&vm->open)); spin_lock(&obj->vma.lock); vma = vma_lookup(obj, vm, view); @@ -287,18 +287,63 @@ i915_vma_instance(struct drm_i915_gem_object *obj, return vma; } +struct i915_vma_work { + struct dma_fence_work base; + struct i915_vma *vma; + enum i915_cache_level cache_level; + unsigned int flags; +}; + +static int __vma_bind(struct dma_fence_work *work) +{ + struct i915_vma_work *vw = container_of(work, typeof(*vw), base); + struct i915_vma *vma = vw->vma; + int err; + + err = vma->ops->bind_vma(vma, vw->cache_level, vw->flags); + if (err) + atomic_or(I915_VMA_ERROR, &vma->flags); + + if (vma->obj) + __i915_gem_object_unpin_pages(vma->obj); + + return err; +} + +static const struct dma_fence_work_ops bind_ops = { + .name = "bind", + .work = __vma_bind, +}; + +struct i915_vma_work *i915_vma_work(void) +{ + struct i915_vma_work *vw; + + vw = kzalloc(sizeof(*vw), GFP_KERNEL); + if (!vw) + return NULL; + + dma_fence_work_init(&vw->base, &bind_ops); + vw->base.dma.error = -EAGAIN; /* disable the worker by default */ + + return vw; +} + /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map * @cache_level: mapping cache level * @flags: flags like global or local mapping + * @work: preallocated worker for allocating and binding the PTE * * DMA addresses are taken from the scatter-gather table of this object (or of * this VMA in case of non-default GGTT views) and PTE entries set up. * Note that DMA addresses are also the only part of the SG table we care about. */ -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags) +int i915_vma_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags, + struct i915_vma_work *work) { u32 bind_flags; u32 vma_flags; @@ -315,11 +360,8 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (GEM_DEBUG_WARN_ON(!flags)) return -EINVAL; - bind_flags = 0; - if (flags & PIN_GLOBAL) - bind_flags |= I915_VMA_GLOBAL_BIND; - if (flags & PIN_USER) - bind_flags |= I915_VMA_LOCAL_BIND; + bind_flags = flags; + bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; vma_flags = atomic_read(&vma->flags); vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; @@ -333,9 +375,32 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, GEM_BUG_ON(!vma->pages); trace_i915_vma_bind(vma, bind_flags); - ret = vma->ops->bind_vma(vma, cache_level, bind_flags); - if (ret) - return ret; + if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) { + work->vma = vma; + work->cache_level = cache_level; + work->flags = bind_flags | I915_VMA_ALLOC; + + /* + * Note we only want to chain up to the migration fence on + * the pages (not the object itself). As we don't track that, + * yet, we have to use the exclusive fence instead. + * + * Also note that we do not want to track the async vma as + * part of the obj->resv->excl_fence as it only affects + * execution and not content or object's backing store lifetime. + */ + GEM_BUG_ON(i915_active_has_exclusive(&vma->active)); + i915_active_set_exclusive(&vma->active, &work->base.dma); + work->base.dma.error = 0; /* enable the queue_work() */ + + if (vma->obj) + __i915_gem_object_pin_pages(vma->obj); + } else { + GEM_BUG_ON((bind_flags & ~vma_flags) & vma->vm->bind_async_flags); + ret = vma->ops->bind_vma(vma, cache_level, bind_flags); + if (ret) + return ret; + } atomic_or(bind_flags, &vma->flags); return 0; @@ -348,9 +413,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) /* Access through the GTT requires the device to be awake. */ assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm); - - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { + if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) { err = -ENODEV; goto err; } @@ -358,7 +421,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)); - ptr = vma->iomap; + ptr = READ_ONCE(vma->iomap); if (ptr == NULL) { ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap, vma->node.start, @@ -368,7 +431,10 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) goto err; } - vma->iomap = ptr; + if (unlikely(cmpxchg(&vma->iomap, NULL, ptr))) { + io_mapping_unmap(ptr); + ptr = vma->iomap; + } } __i915_vma_pin(vma); @@ -388,18 +454,12 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) void i915_vma_flush_writes(struct i915_vma *vma) { - if (!i915_vma_has_ggtt_write(vma)) - return; - - intel_gt_flush_ggtt_writes(vma->vm->gt); - - i915_vma_unset_ggtt_write(vma); + if (i915_vma_unset_ggtt_write(vma)) + intel_gt_flush_ggtt_writes(vma->vm->gt); } void i915_vma_unpin_iomap(struct i915_vma *vma) { - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - GEM_BUG_ON(vma->iomap == NULL); i915_vma_flush_writes(vma); @@ -435,6 +495,9 @@ bool i915_vma_misplaced(const struct i915_vma *vma, if (!drm_mm_node_allocated(&vma->node)) return false; + if (test_bit(I915_VMA_ERROR_BIT, __i915_vma_flags(vma))) + return true; + if (vma->node.size < size) return true; @@ -535,7 +598,6 @@ static void assert_bind_count(const struct drm_i915_gem_object *obj) static int i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - struct drm_i915_private *dev_priv = vma->vm->i915; unsigned long color; u64 start, end; int ret; @@ -561,7 +623,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) end = vma->vm->total; if (flags & PIN_MAPPABLE) - end = min_t(u64, end, dev_priv->ggtt.mappable_end); + end = min_t(u64, end, i915_vm_to_ggtt(vma->vm)->mappable_end); if (flags & PIN_ZONE_4G) end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE); GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); @@ -578,34 +640,20 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) } color = 0; - if (vma->obj) { - ret = i915_gem_object_pin_pages(vma->obj); - if (ret) - return ret; - - if (i915_vm_has_cache_coloring(vma->vm)) - color = vma->obj->cache_level; - } - - GEM_BUG_ON(vma->pages); - - ret = vma->ops->set_pages(vma); - if (ret) - goto err_unpin; + if (vma->obj && i915_vm_has_cache_coloring(vma->vm)) + color = vma->obj->cache_level; if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; if (!IS_ALIGNED(offset, alignment) || - range_overflows(offset, size, end)) { - ret = -EINVAL; - goto err_clear; - } + range_overflows(offset, size, end)) + return -EINVAL; ret = i915_gem_gtt_reserve(vma->vm, &vma->node, size, offset, color, flags); if (ret) - goto err_clear; + return ret; } else { /* * We only support huge gtt pages through the 48b PPGTT, @@ -644,7 +692,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) size, alignment, color, start, end, flags); if (ret) - goto err_clear; + return ret; GEM_BUG_ON(vma->node.start < start); GEM_BUG_ON(vma->node.start + vma->node.size > end); @@ -652,23 +700,15 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color)); - mutex_lock(&vma->vm->mutex); list_add_tail(&vma->vm_link, &vma->vm->bound_list); - mutex_unlock(&vma->vm->mutex); if (vma->obj) { + atomic_inc(&vma->obj->mm.pages_pin_count); atomic_inc(&vma->obj->bind_count); assert_bind_count(vma->obj); } return 0; - -err_clear: - vma->ops->clear_pages(vma); -err_unpin: - if (vma->obj) - i915_gem_object_unpin_pages(vma->obj); - return ret; } static void @@ -677,12 +717,7 @@ i915_vma_remove(struct i915_vma *vma) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); - vma->ops->clear_pages(vma); - - mutex_lock(&vma->vm->mutex); - drm_mm_remove_node(&vma->node); list_del(&vma->vm_link); - mutex_unlock(&vma->vm->mutex); /* * Since the unbound list is global, only move to that list if @@ -701,51 +736,211 @@ i915_vma_remove(struct i915_vma *vma) i915_gem_object_unpin_pages(obj); assert_bind_count(obj); } + + drm_mm_remove_node(&vma->node); } -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags) +static bool try_qad_pin(struct i915_vma *vma, unsigned int flags) { - const unsigned int bound = atomic_read(&vma->flags); - int ret; + unsigned int bound; + bool pinned = true; - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); - GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); + bound = atomic_read(&vma->flags); + do { + if (unlikely(flags & ~bound)) + return false; - if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { - ret = -EBUSY; - goto err_unpin; + if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) + return false; + + if (!(bound & I915_VMA_PIN_MASK)) + goto unpinned; + + GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0); + } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1)); + + return true; + +unpinned: + /* + * If pin_count==0, but we are bound, check under the lock to avoid + * racing with a concurrent i915_vma_unbind(). + */ + mutex_lock(&vma->vm->mutex); + do { + if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) { + pinned = false; + break; + } + + if (unlikely(flags & ~bound)) { + pinned = false; + break; + } + } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1)); + mutex_unlock(&vma->vm->mutex); + + return pinned; +} + +static int vma_get_pages(struct i915_vma *vma) +{ + int err = 0; + + if (atomic_add_unless(&vma->pages_count, 1, 0)) + return 0; + + /* Allocations ahoy! */ + if (mutex_lock_interruptible(&vma->pages_mutex)) + return -EINTR; + + if (!atomic_read(&vma->pages_count)) { + if (vma->obj) { + err = i915_gem_object_pin_pages(vma->obj); + if (err) + goto unlock; + } + + err = vma->ops->set_pages(vma); + if (err) + goto unlock; } + atomic_inc(&vma->pages_count); - if ((bound & I915_VMA_BIND_MASK) == 0) { - ret = i915_vma_insert(vma, size, alignment, flags); - if (ret) - goto err_unpin; +unlock: + mutex_unlock(&vma->pages_mutex); + + return err; +} + +static void __vma_put_pages(struct i915_vma *vma, unsigned int count) +{ + /* We allocate under vma_get_pages, so beware the shrinker */ + mutex_lock_nested(&vma->pages_mutex, SINGLE_DEPTH_NESTING); + GEM_BUG_ON(atomic_read(&vma->pages_count) < count); + if (atomic_sub_return(count, &vma->pages_count) == 0) { + vma->ops->clear_pages(vma); + GEM_BUG_ON(vma->pages); + if (vma->obj) + i915_gem_object_unpin_pages(vma->obj); } - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + mutex_unlock(&vma->pages_mutex); +} - ret = i915_vma_bind(vma, vma->obj ? vma->obj->cache_level : 0, flags); - if (ret) - goto err_remove; +static void vma_put_pages(struct i915_vma *vma) +{ + if (atomic_add_unless(&vma->pages_count, -1, 1)) + return; + + __vma_put_pages(vma, 1); +} + +static void vma_unbind_pages(struct i915_vma *vma) +{ + unsigned int count; + + lockdep_assert_held(&vma->vm->mutex); + + /* The upper portion of pages_count is the number of bindings */ + count = atomic_read(&vma->pages_count); + count >>= I915_VMA_PAGES_BIAS; + GEM_BUG_ON(!count); + + __vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS); +} + +int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + struct i915_vma_work *work = NULL; + unsigned int bound; + int err; + + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); + BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); + + GEM_BUG_ON(flags & PIN_UPDATE); + GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL))); + + /* First try and grab the pin without rebinding the vma */ + if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK)) + return 0; + + err = vma_get_pages(vma); + if (err) + return err; + + if (flags & vma->vm->bind_async_flags) { + work = i915_vma_work(); + if (!work) { + err = -ENOMEM; + goto err_pages; + } + } + + /* No more allocations allowed once we hold vm->mutex */ + err = mutex_lock_interruptible(&vma->vm->mutex); + if (err) + goto err_fence; + + bound = atomic_read(&vma->flags); + if (unlikely(bound & I915_VMA_ERROR)) { + err = -ENOMEM; + goto err_unlock; + } + + if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) { + err = -EAGAIN; /* pins are meant to be fairly temporary */ + goto err_unlock; + } + + if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) { + __i915_vma_pin(vma); + goto err_unlock; + } + + err = i915_active_acquire(&vma->active); + if (err) + goto err_unlock; + + if (!(bound & I915_VMA_BIND_MASK)) { + err = i915_vma_insert(vma, size, alignment, flags); + if (err) + goto err_active; + + if (i915_is_ggtt(vma->vm)) + __i915_vma_set_map_and_fenceable(vma); + } - GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)); + GEM_BUG_ON(!vma->pages); + err = i915_vma_bind(vma, + vma->obj ? vma->obj->cache_level : 0, + flags, work); + if (err) + goto err_remove; - if ((bound ^ atomic_read(&vma->flags)) & I915_VMA_GLOBAL_BIND) - __i915_vma_set_map_and_fenceable(vma); + /* There should only be at most 2 active bindings (user, global) */ + GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound); + atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count); + list_move_tail(&vma->vm_link, &vma->vm->bound_list); + __i915_vma_pin(vma); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + GEM_BUG_ON(!i915_vma_is_bound(vma, flags)); GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - return 0; err_remove: - if ((bound & I915_VMA_BIND_MASK) == 0) { + if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) i915_vma_remove(vma); - GEM_BUG_ON(vma->pages); - GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_BIND_MASK); - } -err_unpin: - __i915_vma_unpin(vma); - return ret; +err_active: + i915_active_release(&vma->active); +err_unlock: + mutex_unlock(&vma->vm->mutex); +err_fence: + if (work) + dma_fence_work_commit(&work->base); +err_pages: + vma_put_pages(vma); + return err; } void i915_vma_close(struct i915_vma *vma) @@ -776,9 +971,6 @@ static void __i915_vma_remove_closed(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - if (!i915_vma_is_closed(vma)) - return; - spin_lock_irq(&i915->gt.closed_lock); list_del_init(&vma->closed_link); spin_unlock_irq(&i915->gt.closed_lock); @@ -786,40 +978,35 @@ static void __i915_vma_remove_closed(struct i915_vma *vma) void i915_vma_reopen(struct i915_vma *vma) { - __i915_vma_remove_closed(vma); + if (i915_vma_is_closed(vma)) + __i915_vma_remove_closed(vma); } -static void __i915_vma_destroy(struct i915_vma *vma) +void i915_vma_destroy(struct i915_vma *vma) { - GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(vma->fence); + if (drm_mm_node_allocated(&vma->node)) { + mutex_lock(&vma->vm->mutex); + atomic_and(~I915_VMA_PIN_MASK, &vma->flags); + WARN_ON(__i915_vma_unbind(vma)); + mutex_unlock(&vma->vm->mutex); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + } + GEM_BUG_ON(i915_vma_is_active(vma)); if (vma->obj) { struct drm_i915_gem_object *obj = vma->obj; spin_lock(&obj->vma.lock); list_del(&vma->obj_link); - rb_erase(&vma->obj_node, &vma->obj->vma.tree); + rb_erase(&vma->obj_node, &obj->vma.tree); spin_unlock(&obj->vma.lock); } - i915_active_fini(&vma->active); - - i915_vma_free(vma); -} - -void i915_vma_destroy(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - - GEM_BUG_ON(i915_vma_is_pinned(vma)); - __i915_vma_remove_closed(vma); + i915_vm_put(vma->vm); - WARN_ON(i915_vma_unbind(vma)); - GEM_BUG_ON(i915_vma_is_active(vma)); - - __i915_vma_destroy(vma); + i915_active_fini(&vma->active); + i915_vma_free(vma); } void i915_vma_parked(struct drm_i915_private *i915) @@ -828,12 +1015,32 @@ void i915_vma_parked(struct drm_i915_private *i915) spin_lock_irq(&i915->gt.closed_lock); list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) { - list_del_init(&vma->closed_link); + struct drm_i915_gem_object *obj = vma->obj; + struct i915_address_space *vm = vma->vm; + + /* XXX All to avoid keeping a reference on i915_vma itself */ + + if (!kref_get_unless_zero(&obj->base.refcount)) + continue; + + if (!i915_vm_tryopen(vm)) { + i915_gem_object_put(obj); + obj = NULL; + } + spin_unlock_irq(&i915->gt.closed_lock); - i915_vma_destroy(vma); + if (obj) { + i915_vma_destroy(vma); + i915_gem_object_put(obj); + } + i915_vm_close(vm); + + /* Restart after dropping lock */ spin_lock_irq(&i915->gt.closed_lock); + next = list_first_entry(&i915->gt.closed_vma, + typeof(*next), closed_link); } spin_unlock_irq(&i915->gt.closed_lock); } @@ -873,6 +1080,20 @@ void i915_vma_revoke_mmap(struct i915_vma *vma) list_del(&vma->obj->userfault_link); } +int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq) +{ + int err; + + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + + /* Wait for the vma to be bound before we start! */ + err = i915_request_await_active(rq, &vma->active); + if (err) + return err; + + return i915_active_add_request(&vma->active, rq); +} + int i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, unsigned int flags) @@ -880,19 +1101,9 @@ int i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; int err; - assert_vma_held(vma); assert_object_held(obj); - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - /* - * Add a reference if we're newly entering the active list. - * The order in which we add operations to the retirement queue is - * vital here: mark_active adds to the start of the callback list, - * such that subsequent callbacks are called first. Therefore we - * add the active reference first and queue for it to be dropped - * *last*. - */ - err = i915_active_add_request(&vma->active, rq); + err = __i915_vma_move_to_active(vma, rq); if (unlikely(err)) return err; @@ -918,38 +1129,23 @@ int i915_vma_move_to_active(struct i915_vma *vma, return 0; } -int i915_vma_unbind(struct i915_vma *vma) +int __i915_vma_unbind(struct i915_vma *vma) { int ret; - lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + lockdep_assert_held(&vma->vm->mutex); /* * First wait upon any activity as retiring the request may * have side-effects such as unpinning or even unbinding this vma. + * + * XXX Actually waiting under the vm->mutex is a hinderance and + * should be pipelined wherever possible. In cases where that is + * unavoidable, we should lift the wait to before the mutex. */ - might_sleep(); - if (i915_vma_is_active(vma)) { - /* - * When a closed VMA is retired, it is unbound - eek. - * In order to prevent it from being recursively closed, - * take a pin on the vma so that the second unbind is - * aborted. - * - * Even more scary is that the retire callback may free - * the object (last active vma). To prevent the explosion - * we defer the actual object free to a worker that can - * only proceed once it acquires the struct_mutex (which - * we currently hold, therefore it cannot free this object - * before we are finished). - */ - __i915_vma_pin(vma); - ret = i915_active_wait(&vma->active); - __i915_vma_unpin(vma); - if (ret) - return ret; - } - GEM_BUG_ON(i915_vma_is_active(vma)); + ret = i915_vma_sync(vma); + if (ret) + return ret; if (i915_vma_is_pinned(vma)) { vma_print_allocator(vma, "is pinned"); @@ -970,16 +1166,12 @@ int i915_vma_unbind(struct i915_vma *vma) GEM_BUG_ON(i915_vma_has_ggtt_write(vma)); /* release the fence reg _after_ flushing */ - mutex_lock(&vma->vm->mutex); ret = i915_vma_revoke_fence(vma); - mutex_unlock(&vma->vm->mutex); if (ret) return ret; /* Force a pagefault for domain tracking on next user access */ - mutex_lock(&vma->vm->mutex); i915_vma_revoke_mmap(vma); - mutex_unlock(&vma->vm->mutex); __i915_vma_iounmap(vma); clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(vma)); @@ -987,17 +1179,33 @@ int i915_vma_unbind(struct i915_vma *vma) GEM_BUG_ON(vma->fence); GEM_BUG_ON(i915_vma_has_userfault(vma)); - if (likely(!vma->vm->closed)) { + if (likely(atomic_read(&vma->vm->open))) { trace_i915_vma_unbind(vma); vma->ops->unbind_vma(vma); } - atomic_and(~I915_VMA_BIND_MASK, &vma->flags); + atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR), &vma->flags); + vma_unbind_pages(vma); i915_vma_remove(vma); return 0; } +int i915_vma_unbind(struct i915_vma *vma) +{ + struct i915_address_space *vm = vma->vm; + int err; + + err = mutex_lock_interruptible(&vm->mutex); + if (err) + return err; + + err = __i915_vma_unbind(vma); + mutex_unlock(&vm->mutex); + + return err; +} + struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma) { i915_gem_object_make_unshrinkable(vma->obj); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e49b199f7de7..858908e3d1cc 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -96,25 +96,28 @@ struct i915_vma { * exclusive cachelines of a single page, so a maximum of 64 possible * users. */ -#define I915_VMA_PIN_MASK 0xff -#define I915_VMA_PIN_OVERFLOW_BIT 8 -#define I915_VMA_PIN_OVERFLOW ((int)BIT(I915_VMA_PIN_OVERFLOW_BIT)) +#define I915_VMA_PIN_MASK 0x3ff +#define I915_VMA_OVERFLOW 0x200 /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND_BIT 9 -#define I915_VMA_LOCAL_BIND_BIT 10 +#define I915_VMA_GLOBAL_BIND_BIT 10 +#define I915_VMA_LOCAL_BIND_BIT 11 #define I915_VMA_GLOBAL_BIND ((int)BIT(I915_VMA_GLOBAL_BIND_BIT)) #define I915_VMA_LOCAL_BIND ((int)BIT(I915_VMA_LOCAL_BIND_BIT)) -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | \ - I915_VMA_LOCAL_BIND | \ - I915_VMA_PIN_OVERFLOW) +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND) -#define I915_VMA_GGTT_BIT 11 -#define I915_VMA_CAN_FENCE_BIT 12 -#define I915_VMA_USERFAULT_BIT 13 -#define I915_VMA_GGTT_WRITE_BIT 14 +#define I915_VMA_ALLOC_BIT 12 +#define I915_VMA_ALLOC ((int)BIT(I915_VMA_ALLOC_BIT)) + +#define I915_VMA_ERROR_BIT 13 +#define I915_VMA_ERROR ((int)BIT(I915_VMA_ERROR_BIT)) + +#define I915_VMA_GGTT_BIT 14 +#define I915_VMA_CAN_FENCE_BIT 15 +#define I915_VMA_USERFAULT_BIT 16 +#define I915_VMA_GGTT_WRITE_BIT 17 #define I915_VMA_GGTT ((int)BIT(I915_VMA_GGTT_BIT)) #define I915_VMA_CAN_FENCE ((int)BIT(I915_VMA_CAN_FENCE_BIT)) @@ -123,6 +126,11 @@ struct i915_vma { struct i915_active active; +#define I915_VMA_PAGES_BIAS 24 +#define I915_VMA_PAGES_ACTIVE (BIT(24) | 1) + atomic_t pages_count; /* number of active binds to the pages */ + struct mutex pages_mutex; /* protect acquire/release of backing pages */ + /** * Support different GGTT views into the same object. * This means there can be multiple VMA mappings per object and per VM. @@ -169,6 +177,8 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma) return !i915_active_is_idle(&vma->active); } +int __must_check __i915_vma_move_to_active(struct i915_vma *vma, + struct i915_request *rq); int __must_check i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq, unsigned int flags); @@ -307,13 +317,18 @@ i915_vma_compare(struct i915_vma *vma, return memcmp(&vma->ggtt_view.partial, &view->partial, view->type); } -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags); +struct i915_vma_work *i915_vma_work(void); +int i915_vma_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags, + struct i915_vma_work *work); + bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color); bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); void i915_vma_revoke_mmap(struct i915_vma *vma); +int __i915_vma_unbind(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); @@ -332,26 +347,8 @@ static inline void i915_vma_unlock(struct i915_vma *vma) dma_resv_unlock(vma->resv); } -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags); -static inline int __must_check -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); - BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); - BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); - - /* Pin early to prevent the shrinker/eviction logic from destroying - * our vma as we insert and bind. - */ - if (likely(((atomic_inc_return(&vma->flags) ^ flags) & I915_VMA_BIND_MASK) == 0)) { - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - return 0; - } - - return __i915_vma_do_pin(vma, size, alignment, flags); -} +int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); static inline int i915_vma_pin_count(const struct i915_vma *vma) { @@ -366,17 +363,17 @@ static inline bool i915_vma_is_pinned(const struct i915_vma *vma) static inline void __i915_vma_pin(struct i915_vma *vma) { atomic_inc(&vma->flags); - GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_PIN_OVERFLOW); + GEM_BUG_ON(!i915_vma_is_pinned(vma)); } static inline void __i915_vma_unpin(struct i915_vma *vma) { + GEM_BUG_ON(!i915_vma_is_pinned(vma)); atomic_dec(&vma->flags); } static inline void i915_vma_unpin(struct i915_vma *vma) { - GEM_BUG_ON(!i915_vma_is_pinned(vma)); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); __i915_vma_unpin(vma); } @@ -402,8 +399,6 @@ static inline bool i915_node_color_differs(const struct drm_mm_node *node, * the caller must call i915_vma_unpin_iomap to relinquish the pinning * after the iomapping is no longer required. * - * Callers must hold the struct_mutex. - * * Returns a valid iomapped pointer or ERR_PTR. */ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); @@ -415,8 +410,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); * * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). * - * Callers must hold the struct_mutex. This function is only valid to be - * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). + * This function is only valid to be called on a VMA previously + * iomapped by the caller with i915_vma_pin_iomap(). */ void i915_vma_unpin_iomap(struct i915_vma *vma); @@ -444,6 +439,8 @@ static inline struct page *i915_vma_first_page(struct i915_vma *vma) int __must_check i915_vma_pin_fence(struct i915_vma *vma); int __must_check i915_vma_revoke_fence(struct i915_vma *vma); +int __i915_vma_pin_fence(struct i915_vma *vma); + static inline void __i915_vma_unpin_fence(struct i915_vma *vma) { GEM_BUG_ON(atomic_read(&vma->fence->pin_count) <= 0); @@ -461,7 +458,6 @@ static inline void __i915_vma_unpin_fence(struct i915_vma *vma) static inline void i915_vma_unpin_fence(struct i915_vma *vma) { - /* lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); */ if (vma->fence) __i915_vma_unpin_fence(vma); } @@ -490,4 +486,10 @@ struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma); void i915_vma_make_shrinkable(struct i915_vma *vma); void i915_vma_make_purgeable(struct i915_vma *vma); +static inline int i915_vma_sync(struct i915_vma *vma) +{ + /* Wait for the asynchronous bindings and pending GPU reads */ + return i915_active_wait(&vma->active); +} + #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 37593831b539..0346c3e5b6b6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -119,10 +119,8 @@ static void pm_resume(struct drm_i915_private *i915) intel_gt_sanitize(&i915->gt, false); i915_gem_sanitize(i915); - mutex_lock(&i915->drm.struct_mutex); i915_gem_restore_gtt_mappings(i915); i915_gem_restore_fences(i915); - mutex_unlock(&i915->drm.struct_mutex); i915_gem_resume(i915); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 2905fb21d866..75a4695b82bb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -106,14 +106,11 @@ static int populate_ggtt(struct drm_i915_private *i915, static void unpin_ggtt(struct drm_i915_private *i915) { - struct i915_ggtt *ggtt = &i915->ggtt; struct i915_vma *vma; - mutex_lock(&ggtt->vm.mutex); list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link) if (vma->obj->mm.quirked) i915_vma_unpin(vma); - mutex_unlock(&ggtt->vm.mutex); } static void cleanup_objects(struct drm_i915_private *i915, @@ -127,11 +124,7 @@ static void cleanup_objects(struct drm_i915_private *i915, i915_gem_object_put(obj); } - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_drain_freed_objects(i915); - - mutex_lock(&i915->drm.struct_mutex); } static int igt_evict_something(void *arg) @@ -148,10 +141,12 @@ static int igt_evict_something(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_something(&ggtt->vm, I915_GTT_PAGE_SIZE, 0, 0, 0, U64_MAX, 0); + mutex_unlock(&ggtt->vm.mutex); if (err != -ENOSPC) { pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", err); @@ -161,10 +156,12 @@ static int igt_evict_something(void *arg) unpin_ggtt(i915); /* Everything is unpinned, we should be able to evict something */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_something(&ggtt->vm, I915_GTT_PAGE_SIZE, 0, 0, 0, U64_MAX, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", err); @@ -230,7 +227,9 @@ static int igt_evict_for_vma(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (err != -ENOSPC) { pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n", err); @@ -240,7 +239,9 @@ static int igt_evict_for_vma(void *arg) unpin_ggtt(i915); /* Everything is unpinned, we should be able to evict the node */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_for_node returned err=%d\n", err); @@ -319,7 +320,9 @@ static int igt_evict_for_cache_color(void *arg) i915_vma_unpin(vma); /* Remove just the second vma */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err); goto cleanup; @@ -330,7 +333,9 @@ static int igt_evict_for_cache_color(void *arg) */ target.color = I915_CACHE_L3_LLC; + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); + mutex_unlock(&ggtt->vm.mutex); if (!err) { pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err); err = -EINVAL; @@ -360,7 +365,9 @@ static int igt_evict_vm(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_vm(&ggtt->vm); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", err); @@ -369,7 +376,9 @@ static int igt_evict_vm(void *arg) unpin_ggtt(i915); + mutex_lock(&ggtt->vm.mutex); err = i915_gem_evict_vm(&ggtt->vm); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", err); @@ -410,11 +419,11 @@ static int igt_evict_contexts(void *arg) if (!HAS_FULL_PPGTT(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); wakeref = intel_runtime_pm_get(&i915->runtime_pm); /* Reserve a block so that we know we have enough to fit a few rq */ memset(&hole, 0, sizeof(hole)); + mutex_lock(&i915->ggtt.vm.mutex); err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole, PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, i915->ggtt.vm.total, @@ -427,7 +436,9 @@ static int igt_evict_contexts(void *arg) do { struct reserved *r; + mutex_unlock(&i915->ggtt.vm.mutex); r = kcalloc(1, sizeof(*r), GFP_KERNEL); + mutex_lock(&i915->ggtt.vm.mutex); if (!r) { err = -ENOMEM; goto out_locked; @@ -447,7 +458,7 @@ static int igt_evict_contexts(void *arg) count++; } while (1); drm_mm_remove_node(&hole); - mutex_unlock(&i915->drm.struct_mutex); + mutex_unlock(&i915->ggtt.vm.mutex); pr_info("Filled GGTT with %lu 1MiB nodes\n", count); /* Overfill the GGTT with context objects and so try to evict one. */ @@ -510,7 +521,7 @@ static int igt_evict_contexts(void *arg) break; } - mutex_lock(&i915->drm.struct_mutex); + mutex_lock(&i915->ggtt.vm.mutex); out_locked: if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; @@ -524,8 +535,8 @@ static int igt_evict_contexts(void *arg) } if (drm_mm_node_allocated(&hole)) drm_mm_remove_node(&hole); + mutex_unlock(&i915->ggtt.vm.mutex); intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -547,12 +558,9 @@ int i915_gem_evict_mock_selftests(void) if (!i915) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); with_intel_runtime_pm(&i915->runtime_pm, wakeref) err = i915_subtests(tests, i915); - mutex_unlock(&i915->drm.struct_mutex); - drm_dev_put(&i915->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 0945d6e978a2..02749bbfd0cf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -38,16 +38,7 @@ static void cleanup_freed_objects(struct drm_i915_private *i915) { - /* - * As we may hold onto the struct_mutex for inordinate lengths of - * time, the NMI khungtaskd detector may fire for the free objects - * worker. - */ - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_drain_freed_objects(i915); - - mutex_lock(&i915->drm.struct_mutex); } static void fake_free_pages(struct drm_i915_gem_object *obj, @@ -880,6 +871,15 @@ static int __shrink_hole(struct drm_i915_private *i915, i915_vma_unpin(vma); addr += size; + /* + * Since we are injecting allocation faults at random intervals, + * wait for this allocation to complete before we change the + * faultinjection. + */ + err = i915_vma_sync(vma); + if (err) + break; + if (igt_timeout(end_time, "%s timed out at ofset %llx [%llx - %llx]\n", __func__, addr, hole_start, hole_end)) { @@ -1013,21 +1013,19 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv, if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&dev_priv->drm.struct_mutex); ppgtt = i915_ppgtt_create(dev_priv); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); - goto out_unlock; + goto out_free; } GEM_BUG_ON(offset_in_page(ppgtt->vm.total)); - GEM_BUG_ON(ppgtt->vm.closed); + GEM_BUG_ON(!atomic_read(&ppgtt->vm.open)); err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time); i915_vm_put(&ppgtt->vm); -out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); +out_free: mock_file_free(dev_priv, file); return err; } @@ -1090,7 +1088,6 @@ static int exercise_ggtt(struct drm_i915_private *i915, IGT_TIMEOUT(end_time); int err = 0; - mutex_lock(&i915->drm.struct_mutex); restart: list_sort(NULL, &ggtt->vm.mm.hole_stack, sort_holes); drm_mm_for_each_hole(node, &ggtt->vm.mm, hole_start, hole_end) { @@ -1111,7 +1108,6 @@ static int exercise_ggtt(struct drm_i915_private *i915, last = hole_end; goto restart; } - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1153,13 +1149,9 @@ static int igt_ggtt_page(void *arg) unsigned int *order, n; int err; - mutex_lock(&i915->drm.struct_mutex); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto out_unlock; - } + if (IS_ERR(obj)) + return PTR_ERR(obj); err = i915_gem_object_pin_pages(obj); if (err) @@ -1227,8 +1219,6 @@ static int igt_ggtt_page(void *arg) i915_gem_object_unpin_pages(obj); out_free: i915_gem_object_put(obj); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1239,6 +1229,9 @@ static void track_vma_bind(struct i915_vma *vma) atomic_inc(&obj->bind_count); /* track for eviction later */ __i915_gem_object_pin_pages(obj); + GEM_BUG_ON(vma->pages); + atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE); + __i915_gem_object_pin_pages(obj); vma->pages = obj->mm.pages; mutex_lock(&vma->vm->mutex); @@ -1336,11 +1329,13 @@ static int igt_gtt_reserve(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, total, obj->cache_level, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1386,11 +1381,13 @@ static int igt_gtt_reserve(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, total, obj->cache_level, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1431,11 +1428,13 @@ static int igt_gtt_reserve(void *arg) 2 * I915_GTT_PAGE_SIZE, I915_GTT_MIN_ALIGNMENT); + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, obj->base.size, offset, obj->cache_level, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1504,11 +1503,13 @@ static int igt_gtt_insert(void *arg) /* Check a couple of obviously invalid requests */ for (ii = invalid_insert; ii->size; ii++) { + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &tmp, ii->size, ii->alignment, I915_COLOR_UNEVICTABLE, ii->start, ii->end, 0); + mutex_unlock(&ggtt->vm.mutex); if (err != -ENOSPC) { pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n", ii->size, ii->alignment, ii->start, ii->end, @@ -1544,10 +1545,12 @@ static int igt_gtt_insert(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, 0, ggtt->vm.total, 0); + mutex_unlock(&ggtt->vm.mutex); if (err == -ENOSPC) { /* maxed out the GGTT space */ i915_gem_object_put(obj); @@ -1602,10 +1605,12 @@ static int igt_gtt_insert(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, 0, ggtt->vm.total, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1649,10 +1654,12 @@ static int igt_gtt_insert(void *arg) goto out; } + mutex_lock(&ggtt->vm.mutex); err = i915_gem_gtt_insert(&ggtt->vm, &vma->node, obj->base.size, 0, obj->cache_level, 0, ggtt->vm.total, 0); + mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n", total, ggtt->vm.total, err); @@ -1696,8 +1703,9 @@ int i915_gem_gtt_mock_selftests(void) } mock_init_ggtt(i915, ggtt); - mutex_lock(&i915->drm.struct_mutex); err = i915_subtests(tests, ggtt); + + mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); mutex_unlock(&i915->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 57cd4180d06c..eb175da48547 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -647,8 +647,15 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) if (err) goto err; + /* Force the wait wait now to avoid including it in the benchmark */ + err = i915_vma_sync(vma); + if (err) + goto err_pin; + return vma; +err_pin: + i915_vma_unpin(vma); err: i915_gem_object_put(obj); return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 97752deecccb..0e4f66312b39 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -831,8 +831,9 @@ int i915_vma_mock_selftests(void) } mock_init_ggtt(i915, ggtt); - mutex_lock(&i915->drm.struct_mutex); err = i915_subtests(tests, ggtt); + + mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); mutex_unlock(&i915->drm.struct_mutex); @@ -879,8 +880,6 @@ static int igt_vma_remapped_gtt(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); for (t = types; *t; t++) { @@ -976,7 +975,6 @@ static int igt_vma_remapped_gtt(void *arg) out: intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); i915_gem_object_put(obj); return err; From patchwork Fri Oct 4 13:39:59 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174549 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 5291114DB for ; Fri, 4 Oct 2019 13:40:56 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 3AE412084D for ; Fri, 4 Oct 2019 13:40:56 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 3AE412084D Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 0E5026EB95; Fri, 4 Oct 2019 13:40:51 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id A8C126EB83 for ; Fri, 4 Oct 2019 13:40:39 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723954-1500050 for multiple; Fri, 04 Oct 2019 14:40:18 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:39:59 +0100 Message-Id: <20191004134015.13204-5-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 04/20] drm/i915: Push the i915_active.retire into a worker X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Matthew Auld Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" As we need to use a mutex to serialise i915_active activation (because we want to allow the callback to sleep), we need to push the i915_active.retire into a worker callback in case we get need to retire from an atomic context. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld --- .../gpu/drm/i915/display/intel_frontbuffer.c | 4 ++- drivers/gpu/drm/i915/gem/i915_gem_context.c | 1 + drivers/gpu/drm/i915/gt/intel_context.c | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_pool.c | 1 + drivers/gpu/drm/i915/gt/intel_timeline.c | 1 + drivers/gpu/drm/i915/i915_active.c | 34 ++++++++++++++++--- drivers/gpu/drm/i915/i915_active_types.h | 13 ++++++- drivers/gpu/drm/i915/i915_vma.c | 2 ++ drivers/gpu/drm/i915/selftests/i915_active.c | 6 ++-- 9 files changed, 55 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index fc40dc1fdbcc..6428b8dd70d3 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -206,6 +206,7 @@ static int frontbuffer_active(struct i915_active *ref) return 0; } +__i915_active_call static void frontbuffer_retire(struct i915_active *ref) { struct intel_frontbuffer *front = @@ -257,7 +258,8 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj) kref_init(&front->ref); atomic_set(&front->bits, 0); i915_active_init(i915, &front->write, - frontbuffer_active, frontbuffer_retire); + frontbuffer_active, + i915_active_may_sleep(frontbuffer_retire)); spin_lock(&i915->fb_tracking.lock); if (obj->frontbuffer) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 95f8e66e45db..4cd7d2ecf1d5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -848,6 +848,7 @@ struct context_barrier_task { void *data; }; +__i915_active_call static void cb_retire(struct i915_active *base) { struct context_barrier_task *cb = container_of(base, typeof(*cb), base); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 26cb838c272c..06fabdf205cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -138,6 +138,7 @@ static void __context_unpin_state(struct i915_vma *vma) __i915_vma_unpin(vma); } +__i915_active_call static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); @@ -150,6 +151,7 @@ static void __intel_context_retire(struct i915_active *active) intel_timeline_unpin(ce->timeline); intel_ring_unpin(ce->ring); + intel_context_put(ce); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c index 97d36cca8ded..81fab101fdb4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c @@ -61,6 +61,7 @@ static int pool_active(struct i915_active *ref) return 0; } +__i915_active_call static void pool_retire(struct i915_active *ref) { struct intel_engine_pool_node *node = diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 9d436e14ea8d..653f60e78392 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -136,6 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) kfree(cl); } +__i915_active_call static void __cacheline_retire(struct i915_active *active) { struct intel_timeline_cacheline *cl = diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 0791736a08fd..7ca066688b98 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -132,6 +132,7 @@ __active_retire(struct i915_active *ref) bool retire = false; lockdep_assert_held(&ref->mutex); + GEM_BUG_ON(i915_active_is_idle(ref)); /* return the unused nodes to our slabcache -- flushing the allocator */ if (atomic_dec_and_test(&ref->count)) { @@ -157,6 +158,19 @@ __active_retire(struct i915_active *ref) ref->retire(ref); } +static void +active_work(struct work_struct *wrk) +{ + struct i915_active *ref = container_of(wrk, typeof(*ref), work); + + GEM_BUG_ON(!atomic_read(&ref->count)); + if (atomic_add_unless(&ref->count, -1, 1)) + return; + + mutex_lock(&ref->mutex); + __active_retire(ref); +} + static void active_retire(struct i915_active *ref) { @@ -164,8 +178,13 @@ active_retire(struct i915_active *ref) if (atomic_add_unless(&ref->count, -1, 1)) return; - /* One active may be flushed from inside the acquire of another */ - mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); + /* If we are inside interrupt context (fence signaling), defer */ + if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS || + !mutex_trylock(&ref->mutex)) { + queue_work(system_unbound_wq, &ref->work); + return; + } + __active_retire(ref); } @@ -240,12 +259,16 @@ void __i915_active_init(struct drm_i915_private *i915, void (*retire)(struct i915_active *ref), struct lock_class_key *key) { + unsigned long bits; + debug_active_init(ref); ref->i915 = i915; ref->flags = 0; ref->active = active; - ref->retire = retire; + ref->retire = ptr_unpack_bits(retire, &bits, 2); + if (bits & I915_ACTIVE_MAY_SLEEP) + ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; ref->excl = NULL; ref->tree = RB_ROOT; @@ -253,6 +276,7 @@ void __i915_active_init(struct drm_i915_private *i915, init_llist_head(&ref->preallocated_barriers); atomic_set(&ref->count, 0); __mutex_init(&ref->mutex, "i915_active", key); + INIT_WORK(&ref->work, active_work); } static bool ____active_del_barrier(struct i915_active *ref, @@ -504,6 +528,7 @@ int i915_active_wait(struct i915_active *ref) if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE)) return -EINTR; + flush_work(&ref->work); if (!i915_active_is_idle(ref)) return -EBUSY; @@ -544,8 +569,9 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) void i915_active_fini(struct i915_active *ref) { debug_active_fini(ref); - GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); GEM_BUG_ON(atomic_read(&ref->count)); + GEM_BUG_ON(work_pending(&ref->work)); + GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); mutex_destroy(&ref->mutex); } #endif diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index 86e7a232ea3c..021167f0004d 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -13,6 +13,9 @@ #include #include #include +#include + +#include "i915_utils.h" struct drm_i915_private; struct i915_active_request; @@ -44,6 +47,11 @@ struct i915_active_request { struct active_node; +#define I915_ACTIVE_MAY_SLEEP BIT(0) + +#define __i915_active_call __aligned(4) +#define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2) + struct i915_active { struct drm_i915_private *i915; @@ -57,11 +65,14 @@ struct i915_active { struct dma_fence_cb excl_cb; unsigned long flags; -#define I915_ACTIVE_GRAB_BIT 0 +#define I915_ACTIVE_RETIRE_SLEEPS BIT(0) +#define I915_ACTIVE_GRAB_BIT 1 int (*active)(struct i915_active *ref); void (*retire)(struct i915_active *ref); + struct work_struct work; + struct llist_head preallocated_barriers; }; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index fe91a0e47b88..e191247c7c5f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -91,6 +91,7 @@ static int __i915_vma_active(struct i915_active *ref) return i915_vma_tryget(active_to_vma(ref)) ? 0 : -ENOENT; } +__i915_active_call static void __i915_vma_retire(struct i915_active *ref) { i915_vma_put(active_to_vma(ref)); @@ -1152,6 +1153,7 @@ int __i915_vma_unbind(struct i915_vma *vma) return -EBUSY; } + GEM_BUG_ON(i915_vma_is_active(vma)); if (!drm_mm_node_allocated(&vma->node)) return 0; diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index afecfa081ff4..a41785822ed9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -121,7 +121,7 @@ __live_active_setup(struct drm_i915_private *i915) } i915_active_release(&active->base); - if (active->retired && count) { + if (READ_ONCE(active->retired) && count) { pr_err("i915_active retired before submission!\n"); err = -EINVAL; } @@ -161,7 +161,7 @@ static int live_active_wait(void *arg) } i915_active_wait(&active->base); - if (!active->retired) { + if (!READ_ONCE(active->retired)) { pr_err("i915_active not retired after waiting!\n"); err = -EINVAL; } @@ -200,7 +200,7 @@ static int live_active_retire(void *arg) if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; - if (!active->retired) { + if (!READ_ONCE(active->retired)) { pr_err("i915_active not retired after flushing!\n"); err = -EINVAL; } From patchwork Fri Oct 4 13:40:00 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174579 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 7CE6913B1 for ; Fri, 4 Oct 2019 13:58:22 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 3977D20700 for ; Fri, 4 Oct 2019 13:58:20 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 3977D20700 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 329BF6EB9E; Fri, 4 Oct 2019 13:58:17 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 062F06EB9B for ; Fri, 4 Oct 2019 13:58:14 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723955-1500050 for multiple; Fri, 04 Oct 2019 14:40:18 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:00 +0100 Message-Id: <20191004134015.13204-6-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 05/20] drm/i915: Coordinate i915_active with its own mutex X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Forgo the struct_mutex serialisation for i915_active, and interpose its own mutex handling for active/retire. This is a multi-layered sleight-of-hand. First, we had to ensure that no active/retire callbacks accidentally inverted the mutex ordering rules, nor assumed that they were themselves serialised by struct_mutex. More challenging though, is the rule over updating elements of the active rbtree. Instead of the whole i915_active now being serialised by struct_mutex, allocations/rotations of the tree are serialised by the i915_active.mutex and individual nodes are serialised by the caller using the i915_timeline.mutex (we need to use nested spinlocks to interact with the dma_fence callback lists). The pain point here is that instead of a single mutex around execbuf, we now have to take a mutex for active tracker (one for each vma, context, etc) and a couple of spinlocks for each fence update. The improvement in fine grained locking allowing for multiple concurrent clients (eventually!) should be worth it in typical loads. v2: Add some comments that barely elucidate anything :( Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- .../gpu/drm/i915/display/intel_frontbuffer.c | 2 +- drivers/gpu/drm/i915/display/intel_overlay.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_context.c | 4 +- .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_pm.c | 9 +- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- drivers/gpu/drm/i915/gt/intel_engine_pool.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 10 +- drivers/gpu/drm/i915/gt/intel_timeline.c | 7 +- .../gpu/drm/i915/gt/intel_timeline_types.h | 9 +- drivers/gpu/drm/i915/gt/selftest_context.c | 16 +- drivers/gpu/drm/i915/gt/selftest_lrc.c | 10 +- .../gpu/drm/i915/gt/selftests/mock_timeline.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 3 - drivers/gpu/drm/i915/i915_active.c | 322 +++++++++--------- drivers/gpu/drm/i915/i915_active.h | 319 ++++------------- drivers/gpu/drm/i915/i915_active_types.h | 23 +- drivers/gpu/drm/i915/i915_gem.c | 42 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +- drivers/gpu/drm/i915/i915_gpu_error.c | 4 +- drivers/gpu/drm/i915/i915_request.c | 38 +-- drivers/gpu/drm/i915/i915_request.h | 1 - drivers/gpu/drm/i915/i915_vma.c | 4 +- drivers/gpu/drm/i915/selftests/i915_active.c | 32 +- 24 files changed, 298 insertions(+), 572 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c index 6428b8dd70d3..84b164f31895 100644 --- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c @@ -257,7 +257,7 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj) front->obj = obj; kref_init(&front->ref); atomic_set(&front->bits, 0); - i915_active_init(i915, &front->write, + i915_active_init(&front->write, frontbuffer_active, i915_active_may_sleep(frontbuffer_retire)); diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 3f4ac1ee7668..e12e1a753af0 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -1360,8 +1360,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) overlay->contrast = 75; overlay->saturation = 146; - i915_active_init(dev_priv, - &overlay->last_flip, + i915_active_init(&overlay->last_flip, NULL, intel_overlay_last_flip_retire); ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv)); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 4cd7d2ecf1d5..9d85aab68d34 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -868,20 +868,18 @@ static int context_barrier_task(struct i915_gem_context *ctx, void (*task)(void *data), void *data) { - struct drm_i915_private *i915 = ctx->i915; struct context_barrier_task *cb; struct i915_gem_engines_iter it; struct intel_context *ce; int err = 0; - lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!task); cb = kmalloc(sizeof(*cb), GFP_KERNEL); if (!cb) return -ENOMEM; - i915_active_init(i915, &cb->base, NULL, cb_retire); + i915_active_init(&cb->base, NULL, cb_retire); err = i915_active_acquire(&cb->base); if (err) { kfree(cb); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index e1aab2fd1cd9..c00b4f077f9e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -8,6 +8,7 @@ #define __I915_GEM_OBJECT_TYPES_H__ #include +#include #include "i915_active.h" #include "i915_selftest.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index cca192ecff8b..0a4115c6c275 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -16,14 +16,11 @@ static void call_idle_barriers(struct intel_engine_cs *engine) struct llist_node *node, *next; llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { - struct i915_active_request *active = + struct dma_fence_cb *cb = container_of((struct list_head *)node, - typeof(*active), link); + typeof(*cb), node); - INIT_LIST_HEAD(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, NULL); + cb->func(NULL, cb); } } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 06fabdf205cf..35a40c2820a2 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -240,7 +240,7 @@ intel_context_init(struct intel_context *ce, mutex_init(&ce->pin_mutex); - i915_active_init(ctx->i915, &ce->active, + i915_active_init(&ce->active, __intel_context_active, __intel_context_retire); } @@ -307,7 +307,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce, return err; /* Queue this switch after current activity by this context. */ - err = i915_active_request_set(&tl->last_request, rq); + err = i915_active_fence_set(&tl->last_request, rq); mutex_unlock(&tl->mutex); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c index 81fab101fdb4..3cdbd5f8b5be 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c @@ -95,7 +95,7 @@ node_create(struct intel_engine_pool *pool, size_t sz) return ERR_PTR(-ENOMEM); node->pool = pool; - i915_active_init(engine->i915, &node->active, pool_active, pool_retire); + i915_active_init(&node->active, pool_active, pool_retire); obj = i915_gem_object_create_internal(engine->i915, sz); if (IS_ERR(obj)) { diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index e189897e8797..055496f0825f 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -844,10 +844,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) */ spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry(tl, &timelines->active_list, link) { - struct i915_request *rq; + struct dma_fence *fence; - rq = i915_active_request_get_unlocked(&tl->last_request); - if (!rq) + fence = i915_active_fence_get(&tl->last_request); + if (!fence) continue; spin_unlock_irqrestore(&timelines->lock, flags); @@ -859,8 +859,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt) * (I915_FENCE_TIMEOUT) so this wait should not be unbounded * in the worst case. */ - dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); + dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT); + dma_fence_put(fence); /* Restart iteration after droping lock */ spin_lock_irqsave(&timelines->lock, flags); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 653f60e78392..0f959694303c 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -178,8 +178,7 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) cl->hwsp = hwsp; cl->vaddr = page_pack_bits(vaddr, cacheline); - i915_active_init(hwsp->gt->i915, &cl->active, - __cacheline_active, __cacheline_retire); + i915_active_init(&cl->active, __cacheline_active, __cacheline_retire); return cl; } @@ -255,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline, mutex_init(&timeline->mutex); - INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); @@ -443,7 +442,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, * free it after the current request is retired, which ensures that * all writes into the cacheline from previous requests are complete. */ - err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq); + err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence); if (err) goto err_cacheline; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index c668c4c50e75..98d9ee166379 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -58,12 +58,13 @@ struct intel_timeline { */ struct list_head requests; - /* Contains an RCU guarded pointer to the last request. No reference is + /* + * Contains an RCU guarded pointer to the last request. No reference is * held to the request, users must carefully acquire a reference to - * the request using i915_active_request_get_request_rcu(), or hold the - * struct_mutex. + * the request using i915_active_fence_get(), or manage the RCU + * protection themselves (cf the i915_active_fence API). */ - struct i915_active_request last_request; + struct i915_active_fence last_request; /** * We track the most recent seqno that we wait on in every context so diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 4ce1e25433d2..e6bcbe7ab5e1 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -47,24 +47,20 @@ static int context_sync(struct intel_context *ce) mutex_lock(&tl->mutex); do { - struct i915_request *rq; + struct dma_fence *fence; long timeout; - rcu_read_lock(); - rq = rcu_dereference(tl->last_request.request); - if (rq) - rq = i915_request_get_rcu(rq); - rcu_read_unlock(); - if (!rq) + fence = i915_active_fence_get(&tl->last_request); + if (!fence) break; - timeout = i915_request_wait(rq, 0, HZ / 10); + timeout = dma_fence_wait_timeout(fence, false, HZ / 10); if (timeout < 0) err = timeout; else - i915_request_retire_upto(rq); + i915_request_retire_upto(to_request(fence)); - i915_request_put(rq); + dma_fence_put(fence); } while (!err); mutex_unlock(&tl->mutex); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index e7bc2dbbb2a5..dd25636abc5b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1172,9 +1172,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine) if (!rq) return NULL; - INIT_LIST_HEAD(&rq->active_list); rq->engine = engine; + spin_lock_init(&rq->lock); + INIT_LIST_HEAD(&rq->fence.cb_list); + rq->fence.lock = &rq->lock; + rq->fence.ops = &i915_fence_ops; + i915_sched_node_init(&rq->sched); /* mark this request as permanently incomplete */ @@ -1267,8 +1271,8 @@ static int live_suppress_wait_preempt(void *arg) } /* Disable NEWCLIENT promotion */ - __i915_active_request_set(&i915_request_timeline(rq[i])->last_request, - dummy); + __i915_active_fence_set(&i915_request_timeline(rq[i])->last_request, + &dummy->fence); i915_request_add(rq[i]); } diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c index 598170efcaf6..2a77c051f36a 100644 --- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c @@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context) mutex_init(&timeline->mutex); - INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex); + INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex); INIT_LIST_HEAD(&timeline->requests); i915_syncmap_init(&timeline->sync); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 6c79d16b381e..03f567084548 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -385,11 +385,8 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct i915_request *rq; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (workload->req) return 0; diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 7ca066688b98..023652ded4be 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -12,8 +12,6 @@ #include "i915_active.h" #include "i915_globals.h" -#define BKL(ref) (&(ref)->i915->drm.struct_mutex) - /* * Active refs memory management * @@ -27,35 +25,35 @@ static struct i915_global_active { } global; struct active_node { - struct i915_active_request base; + struct i915_active_fence base; struct i915_active *ref; struct rb_node node; u64 timeline; }; static inline struct active_node * -node_from_active(struct i915_active_request *active) +node_from_active(struct i915_active_fence *active) { return container_of(active, struct active_node, base); } #define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) -static inline bool is_barrier(const struct i915_active_request *active) +static inline bool is_barrier(const struct i915_active_fence *active) { - return IS_ERR(rcu_access_pointer(active->request)); + return IS_ERR(rcu_access_pointer(active->fence)); } static inline struct llist_node *barrier_to_ll(struct active_node *node) { GEM_BUG_ON(!is_barrier(&node->base)); - return (struct llist_node *)&node->base.link; + return (struct llist_node *)&node->base.cb.node; } static inline struct intel_engine_cs * __barrier_to_engine(struct active_node *node) { - return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev); + return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev); } static inline struct intel_engine_cs * @@ -68,7 +66,7 @@ barrier_to_engine(struct active_node *node) static inline struct active_node *barrier_from_ll(struct llist_node *x) { return container_of((struct list_head *)x, - struct active_node, base.link); + struct active_node, base.cb.node); } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) @@ -147,15 +145,18 @@ __active_retire(struct i915_active *ref) if (!retire) return; - GEM_BUG_ON(rcu_access_pointer(ref->excl)); + GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); rbtree_postorder_for_each_entry_safe(it, n, &root, node) { - GEM_BUG_ON(i915_active_request_isset(&it->base)); + GEM_BUG_ON(i915_active_fence_isset(&it->base)); kmem_cache_free(global.slab_cache, it); } /* After the final retire, the entire struct may be freed */ if (ref->retire) ref->retire(ref); + + /* ... except if you wait on it, you must manage your own references! */ + wake_up_var(ref); } static void @@ -189,12 +190,20 @@ active_retire(struct i915_active *ref) } static void -node_retire(struct i915_active_request *base, struct i915_request *rq) +node_retire(struct dma_fence *fence, struct dma_fence_cb *cb) { - active_retire(node_from_active(base)->ref); + i915_active_fence_cb(fence, cb); + active_retire(container_of(cb, struct active_node, base.cb)->ref); } -static struct i915_active_request * +static void +excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + i915_active_fence_cb(fence, cb); + active_retire(container_of(cb, struct i915_active, excl.cb)); +} + +static struct i915_active_fence * active_instance(struct i915_active *ref, struct intel_timeline *tl) { struct active_node *node, *prealloc; @@ -238,7 +247,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) } node = prealloc; - i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire); + __i915_active_fence_init(&node->base, &tl->mutex, NULL, node_retire); node->ref = ref; node->timeline = idx; @@ -253,8 +262,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) return &node->base; } -void __i915_active_init(struct drm_i915_private *i915, - struct i915_active *ref, +void __i915_active_init(struct i915_active *ref, int (*active)(struct i915_active *ref), void (*retire)(struct i915_active *ref), struct lock_class_key *key) @@ -263,19 +271,18 @@ void __i915_active_init(struct drm_i915_private *i915, debug_active_init(ref); - ref->i915 = i915; ref->flags = 0; ref->active = active; ref->retire = ptr_unpack_bits(retire, &bits, 2); if (bits & I915_ACTIVE_MAY_SLEEP) ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; - ref->excl = NULL; ref->tree = RB_ROOT; ref->cache = NULL; init_llist_head(&ref->preallocated_barriers); atomic_set(&ref->count, 0); __mutex_init(&ref->mutex, "i915_active", key); + __i915_active_fence_init(&ref->excl, &ref->mutex, NULL, excl_retire); INIT_WORK(&ref->work, active_work); } @@ -329,9 +336,9 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node) int i915_active_ref(struct i915_active *ref, struct intel_timeline *tl, - struct i915_request *rq) + struct dma_fence *fence) { - struct i915_active_request *active; + struct i915_active_fence *active; int err; lockdep_assert_held(&tl->mutex); @@ -354,66 +361,44 @@ int i915_active_ref(struct i915_active *ref, * request that we want to emit on the kernel_context. */ __active_del_barrier(ref, node_from_active(active)); - RCU_INIT_POINTER(active->request, NULL); - INIT_LIST_HEAD(&active->link); - } else { - if (!i915_active_request_isset(active)) - atomic_inc(&ref->count); + RCU_INIT_POINTER(active->fence, NULL); + atomic_dec(&ref->count); } - GEM_BUG_ON(!atomic_read(&ref->count)); - __i915_active_request_set(active, rq); + if (!__i915_active_fence_set(active, fence)) + atomic_inc(&ref->count); out: i915_active_release(ref); return err; } -static void excl_cb(struct dma_fence *f, struct dma_fence_cb *cb) -{ - struct i915_active *ref = container_of(cb, typeof(*ref), excl_cb); - - RCU_INIT_POINTER(ref->excl, NULL); - dma_fence_put(f); - - active_retire(ref); -} - void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f) { /* We expect the caller to manage the exclusive timeline ordering */ GEM_BUG_ON(i915_active_is_idle(ref)); - dma_fence_get(f); - - rcu_read_lock(); - if (rcu_access_pointer(ref->excl)) { - struct dma_fence *old; - - old = dma_fence_get_rcu_safe(&ref->excl); - if (old) { - if (dma_fence_remove_callback(old, &ref->excl_cb)) - atomic_dec(&ref->count); - dma_fence_put(old); - } - } - rcu_read_unlock(); - - atomic_inc(&ref->count); - rcu_assign_pointer(ref->excl, f); + /* + * As we don't know which mutex the caller is using, we told a small + * lie to the debug code that it is using the i915_active.mutex; + * and now we must stick to that lie. + */ + mutex_acquire(&ref->mutex.dep_map, 0, 0, _THIS_IP_); + if (!__i915_active_fence_set(&ref->excl, f)) + atomic_inc(&ref->count); + mutex_release(&ref->mutex.dep_map, 0, _THIS_IP_); +} - if (dma_fence_add_callback(f, &ref->excl_cb, excl_cb)) { - RCU_INIT_POINTER(ref->excl, NULL); - atomic_dec(&ref->count); - dma_fence_put(f); - } +bool i915_active_acquire_if_busy(struct i915_active *ref) +{ + debug_active_assert(ref); + return atomic_add_unless(&ref->count, 1, 0); } int i915_active_acquire(struct i915_active *ref) { int err; - debug_active_assert(ref); - if (atomic_add_unless(&ref->count, 1, 0)) + if (i915_active_acquire_if_busy(ref)) return 0; err = mutex_lock_interruptible(&ref->mutex); @@ -438,121 +423,57 @@ void i915_active_release(struct i915_active *ref) active_retire(ref); } -static void __active_ungrab(struct i915_active *ref) -{ - clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags); -} - -bool i915_active_trygrab(struct i915_active *ref) +static void enable_signaling(struct i915_active_fence *active) { - debug_active_assert(ref); - - if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)) - return false; - - if (!atomic_add_unless(&ref->count, 1, 0)) { - __active_ungrab(ref); - return false; - } + struct dma_fence *fence; - return true; -} - -void i915_active_ungrab(struct i915_active *ref) -{ - GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)); - - active_retire(ref); - __active_ungrab(ref); -} - -static int excl_wait(struct i915_active *ref) -{ - struct dma_fence *old; - int err = 0; - - if (!rcu_access_pointer(ref->excl)) - return 0; - - rcu_read_lock(); - old = dma_fence_get_rcu_safe(&ref->excl); - rcu_read_unlock(); - if (old) { - err = dma_fence_wait(old, true); - dma_fence_put(old); - } + fence = i915_active_fence_get(active); + if (!fence) + return; - return err; + dma_fence_enable_sw_signaling(fence); + dma_fence_put(fence); } int i915_active_wait(struct i915_active *ref) { struct active_node *it, *n; - int err; + int err = 0; might_sleep(); - might_lock(&ref->mutex); - - if (i915_active_is_idle(ref)) - return 0; - - err = mutex_lock_interruptible(&ref->mutex); - if (err) - return err; - if (!atomic_add_unless(&ref->count, 1, 0)) { - mutex_unlock(&ref->mutex); + if (!i915_active_acquire_if_busy(ref)) return 0; - } - - err = excl_wait(ref); - if (err) - goto out; + /* Flush lazy signals */ + enable_signaling(&ref->excl); rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { - if (is_barrier(&it->base)) { /* unconnected idle-barrier */ - err = -EBUSY; - break; - } + if (is_barrier(&it->base)) /* unconnected idle barrier */ + continue; - err = i915_active_request_retire(&it->base, BKL(ref)); - if (err) - break; + enable_signaling(&it->base); } + /* Any fence added after the wait begins will not be auto-signaled */ -out: - __active_retire(ref); + i915_active_release(ref); if (err) return err; - if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE)) + if (wait_var_event_interruptible(ref, i915_active_is_idle(ref))) return -EINTR; - flush_work(&ref->work); - if (!i915_active_is_idle(ref)) - return -EBUSY; - return 0; } -int i915_request_await_active_request(struct i915_request *rq, - struct i915_active_request *active) -{ - struct i915_request *barrier = - i915_active_request_raw(active, &rq->i915->drm.struct_mutex); - - return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; -} - int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) { int err = 0; - if (rcu_access_pointer(ref->excl)) { + if (rcu_access_pointer(ref->excl.fence)) { struct dma_fence *fence; rcu_read_lock(); - fence = dma_fence_get_rcu_safe(&ref->excl); + fence = dma_fence_get_rcu_safe(&ref->excl.fence); rcu_read_unlock(); if (fence) { err = i915_request_await_dma_fence(rq, fence); @@ -578,7 +499,7 @@ void i915_active_fini(struct i915_active *ref) static inline bool is_idle_barrier(struct active_node *node, u64 idx) { - return node->timeline == idx && !i915_active_request_isset(&node->base); + return node->timeline == idx && !i915_active_fence_isset(&node->base); } static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) @@ -698,13 +619,13 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, node->base.lock = &engine->kernel_context->timeline->mutex; #endif - RCU_INIT_POINTER(node->base.request, NULL); - node->base.retire = node_retire; + RCU_INIT_POINTER(node->base.fence, NULL); + node->base.cb.func = node_retire; node->timeline = idx; node->ref = ref; } - if (!i915_active_request_isset(&node->base)) { + if (!i915_active_fence_isset(&node->base)) { /* * Mark this as being *our* unconnected proto-node. * @@ -714,8 +635,8 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref, * and then we can use the rb_node and list pointers * for our tracking of the pending barrier. */ - RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); - node->base.link.prev = (void *)engine; + RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN)); + node->base.cb.node.prev = (void *)engine; atomic_inc(&ref->count); } @@ -782,44 +703,113 @@ void i915_request_add_active_barriers(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct llist_node *node, *next; + unsigned long flags; GEM_BUG_ON(intel_engine_is_virtual(engine)); GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline); + node = llist_del_all(&engine->barrier_tasks); + if (!node) + return; /* * Attach the list of proto-fences to the in-flight request such * that the parent i915_active will be released when this request * is retired. */ - llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { - RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq); + spin_lock_irqsave(&rq->lock, flags); + llist_for_each_safe(node, next, node) { + RCU_INIT_POINTER(barrier_from_ll(node)->base.fence, &rq->fence); smp_wmb(); /* serialise with reuse_idle_barrier */ - list_add_tail((struct list_head *)node, &rq->active_list); + list_add_tail((struct list_head *)node, &rq->fence.cb_list); + } + spin_unlock_irqrestore(&rq->lock, flags); +} + +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) +#define active_is_held(active) lockdep_is_held((active)->lock) +#else +#define active_is_held(active) true +#endif + +/* + * __i915_active_fence_set: Update the last active fence along its timeline + * @active: the active tracker + * @fence: the new fence (under construction) + * + * Records the new @fence as the last active fence along its timeline in + * this active tracker, moving the tracking callbacks from the previous + * fence onto this one. Returns the previous fence (if not already completed), + * which the caller must ensure is executed before the new fence. To ensure + * that the order of fences within the timeline of the i915_active_fence is + * maintained, it must be locked by the caller. + */ +struct dma_fence * +__i915_active_fence_set(struct i915_active_fence *active, + struct dma_fence *fence) +{ + struct dma_fence *prev; + unsigned long flags; + + /* NB: must be serialised by an outer timeline mutex (active->lock) */ + spin_lock_irqsave(fence->lock, flags); + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)); + + prev = rcu_dereference_protected(active->fence, active_is_held(active)); + if (prev) { + GEM_BUG_ON(prev == fence); + spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING); + __list_del_entry(&active->cb.node); + spin_unlock(prev->lock); /* serialise with prev->cb_list */ + + /* + * active->fence is reset by the callback from inside + * interrupt context. We need to serialise our list + * manipulation with the fence->lock to prevent the prev + * being lost inside an interrupt (it can't be replaced as + * no other caller is allowed to enter __i915_active_fence_set + * as we hold the timeline lock). After serialising with + * the callback, we need to double check which ran first, + * our list_del() [decoupling prev from the callback] or + * the callback... + */ + prev = rcu_access_pointer(active->fence); } + + rcu_assign_pointer(active->fence, fence); + list_add_tail(&active->cb.node, &fence->cb_list); + + spin_unlock_irqrestore(fence->lock, flags); + + return prev; } -int i915_active_request_set(struct i915_active_request *active, - struct i915_request *rq) +int i915_active_fence_set(struct i915_active_fence *active, + struct i915_request *rq) { - int err; + struct dma_fence *fence; + int err = 0; #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) lockdep_assert_held(active->lock); #endif - /* Must maintain ordering wrt previous active requests */ - err = i915_request_await_active_request(rq, active); - if (err) - return err; + /* Must maintain timeline ordering wrt previous active requests */ + rcu_read_lock(); + fence = __i915_active_fence_set(active, &rq->fence); + if (fence) /* but the previous fence may not belong to that timeline! */ + fence = dma_fence_get_rcu(fence); + rcu_read_unlock(); + if (fence) { + err = i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } - __i915_active_request_set(active, rq); - return 0; + return err; } -void i915_active_retire_noop(struct i915_active_request *active, - struct i915_request *request) +void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) { - /* Space left intentionally blank */ + i915_active_fence_cb(fence, cb); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index 90034f61b7c2..4f52fe6146d2 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -12,6 +12,10 @@ #include "i915_active_types.h" #include "i915_request.h" +struct i915_request; +struct intel_engine_cs; +struct intel_timeline; + /* * We treat requests as fences. This is not be to confused with our * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. @@ -28,308 +32,108 @@ * write access so that we can perform concurrent read operations between * the CPU and GPU engines, as well as waiting for all rendering to * complete, or waiting for the last GPU user of a "fence register". The - * object then embeds a #i915_active_request to track the most recent (in + * object then embeds a #i915_active_fence to track the most recent (in * retirement order) request relevant for the desired mode of access. - * The #i915_active_request is updated with i915_active_request_set() to + * The #i915_active_fence is updated with i915_active_fence_set() to * track the most recent fence request, typically this is done as part of * i915_vma_move_to_active(). * - * When the #i915_active_request completes (is retired), it will + * When the #i915_active_fence completes (is retired), it will * signal its completion to the owner through a callback as well as mark - * itself as idle (i915_active_request.request == NULL). The owner + * itself as idle (i915_active_fence.request == NULL). The owner * can then perform any action, such as delayed freeing of an active * resource including itself. */ -void i915_active_retire_noop(struct i915_active_request *active, - struct i915_request *request); +void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb); /** - * i915_active_request_init - prepares the activity tracker for use + * __i915_active_fence_init - prepares the activity tracker for use * @active - the active tracker - * @rq - initial request to track, can be NULL + * @fence - initial fence to track, can be NULL * @func - a callback when then the tracker is retired (becomes idle), * can be NULL * - * i915_active_request_init() prepares the embedded @active struct for use as - * an activity tracker, that is for tracking the last known active request - * associated with it. When the last request becomes idle, when it is retired + * i915_active_fence_init() prepares the embedded @active struct for use as + * an activity tracker, that is for tracking the last known active fence + * associated with it. When the last fence becomes idle, when it is retired * after completion, the optional callback @func is invoked. */ static inline void -i915_active_request_init(struct i915_active_request *active, +__i915_active_fence_init(struct i915_active_fence *active, struct mutex *lock, - struct i915_request *rq, - i915_active_retire_fn retire) + void *fence, + dma_fence_func_t fn) { - RCU_INIT_POINTER(active->request, rq); - INIT_LIST_HEAD(&active->link); - active->retire = retire ?: i915_active_retire_noop; + RCU_INIT_POINTER(active->fence, fence); + active->cb.func = fn ?: i915_active_noop; #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) active->lock = lock; #endif } -#define INIT_ACTIVE_REQUEST(name, lock) \ - i915_active_request_init((name), (lock), NULL, NULL) - -/** - * i915_active_request_set - updates the tracker to watch the current request - * @active - the active tracker - * @request - the request to watch - * - * __i915_active_request_set() watches the given @request for completion. Whilst - * that @request is busy, the @active reports busy. When that @request is - * retired, the @active tracker is updated to report idle. - */ -static inline void -__i915_active_request_set(struct i915_active_request *active, - struct i915_request *request) -{ -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) - lockdep_assert_held(active->lock); -#endif - list_move(&active->link, &request->active_list); - rcu_assign_pointer(active->request, request); -} +#define INIT_ACTIVE_FENCE(A, LOCK) \ + __i915_active_fence_init((A), (LOCK), NULL, NULL) -int __must_check -i915_active_request_set(struct i915_active_request *active, - struct i915_request *rq); +struct dma_fence * +__i915_active_fence_set(struct i915_active_fence *active, + struct dma_fence *fence); /** - * i915_active_request_raw - return the active request + * i915_active_fence_set - updates the tracker to watch the current fence * @active - the active tracker + * @rq - the request to watch * - * i915_active_request_raw() returns the current request being tracked, or NULL. - * It does not obtain a reference on the request for the caller, so the caller - * must hold struct_mutex. + * i915_active_fence_set() watches the given @rq for completion. While + * that @rq is busy, the @active reports busy. When that @rq is signaled + * (or else retired) the @active tracker is updated to report idle. */ -static inline struct i915_request * -i915_active_request_raw(const struct i915_active_request *active, - struct mutex *mutex) -{ - return rcu_dereference_protected(active->request, - lockdep_is_held(mutex)); -} - -/** - * i915_active_request_peek - report the active request being monitored - * @active - the active tracker - * - * i915_active_request_peek() returns the current request being tracked if - * still active, or NULL. It does not obtain a reference on the request - * for the caller, so the caller must hold struct_mutex. - */ -static inline struct i915_request * -i915_active_request_peek(const struct i915_active_request *active, - struct mutex *mutex) -{ - struct i915_request *request; - - request = i915_active_request_raw(active, mutex); - if (!request || i915_request_completed(request)) - return NULL; - - return request; -} - -/** - * i915_active_request_get - return a reference to the active request - * @active - the active tracker - * - * i915_active_request_get() returns a reference to the active request, or NULL - * if the active tracker is idle. The caller must hold struct_mutex. - */ -static inline struct i915_request * -i915_active_request_get(const struct i915_active_request *active, - struct mutex *mutex) -{ - return i915_request_get(i915_active_request_peek(active, mutex)); -} - -/** - * __i915_active_request_get_rcu - return a reference to the active request - * @active - the active tracker - * - * __i915_active_request_get() returns a reference to the active request, - * or NULL if the active tracker is idle. The caller must hold the RCU read - * lock, but the returned pointer is safe to use outside of RCU. - */ -static inline struct i915_request * -__i915_active_request_get_rcu(const struct i915_active_request *active) -{ - /* - * Performing a lockless retrieval of the active request is super - * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing - * slab of request objects will not be freed whilst we hold the - * RCU read lock. It does not guarantee that the request itself - * will not be freed and then *reused*. Viz, - * - * Thread A Thread B - * - * rq = active.request - * retire(rq) -> free(rq); - * (rq is now first on the slab freelist) - * active.request = NULL - * - * rq = new submission on a new object - * ref(rq) - * - * To prevent the request from being reused whilst the caller - * uses it, we take a reference like normal. Whilst acquiring - * the reference we check that it is not in a destroyed state - * (refcnt == 0). That prevents the request being reallocated - * whilst the caller holds on to it. To check that the request - * was not reallocated as we acquired the reference we have to - * check that our request remains the active request across - * the lookup, in the same manner as a seqlock. The visibility - * of the pointer versus the reference counting is controlled - * by using RCU barriers (rcu_dereference and rcu_assign_pointer). - * - * In the middle of all that, we inspect whether the request is - * complete. Retiring is lazy so the request may be completed long - * before the active tracker is updated. Querying whether the - * request is complete is far cheaper (as it involves no locked - * instructions setting cachelines to exclusive) than acquiring - * the reference, so we do it first. The RCU read lock ensures the - * pointer dereference is valid, but does not ensure that the - * seqno nor HWS is the right one! However, if the request was - * reallocated, that means the active tracker's request was complete. - * If the new request is also complete, then both are and we can - * just report the active tracker is idle. If the new request is - * incomplete, then we acquire a reference on it and check that - * it remained the active request. - * - * It is then imperative that we do not zero the request on - * reallocation, so that we can chase the dangling pointers! - * See i915_request_alloc(). - */ - do { - struct i915_request *request; - - request = rcu_dereference(active->request); - if (!request || i915_request_completed(request)) - return NULL; - - /* - * An especially silly compiler could decide to recompute the - * result of i915_request_completed, more specifically - * re-emit the load for request->fence.seqno. A race would catch - * a later seqno value, which could flip the result from true to - * false. Which means part of the instructions below might not - * be executed, while later on instructions are executed. Due to - * barriers within the refcounting the inconsistency can't reach - * past the call to i915_request_get_rcu, but not executing - * that while still executing i915_request_put() creates - * havoc enough. Prevent this with a compiler barrier. - */ - barrier(); - - request = i915_request_get_rcu(request); - - /* - * What stops the following rcu_access_pointer() from occurring - * before the above i915_request_get_rcu()? If we were - * to read the value before pausing to get the reference to - * the request, we may not notice a change in the active - * tracker. - * - * The rcu_access_pointer() is a mere compiler barrier, which - * means both the CPU and compiler are free to perform the - * memory read without constraint. The compiler only has to - * ensure that any operations after the rcu_access_pointer() - * occur afterwards in program order. This means the read may - * be performed earlier by an out-of-order CPU, or adventurous - * compiler. - * - * The atomic operation at the heart of - * i915_request_get_rcu(), see dma_fence_get_rcu(), is - * atomic_inc_not_zero() which is only a full memory barrier - * when successful. That is, if i915_request_get_rcu() - * returns the request (and so with the reference counted - * incremented) then the following read for rcu_access_pointer() - * must occur after the atomic operation and so confirm - * that this request is the one currently being tracked. - * - * The corresponding write barrier is part of - * rcu_assign_pointer(). - */ - if (!request || request == rcu_access_pointer(active->request)) - return rcu_pointer_handoff(request); - - i915_request_put(request); - } while (1); -} - +int __must_check +i915_active_fence_set(struct i915_active_fence *active, + struct i915_request *rq); /** - * i915_active_request_get_unlocked - return a reference to the active request + * i915_active_fence_get - return a reference to the active fence * @active - the active tracker * - * i915_active_request_get_unlocked() returns a reference to the active request, + * i915_active_fence_get() returns a reference to the active fence, * or NULL if the active tracker is idle. The reference is obtained under RCU, * so no locking is required by the caller. * - * The reference should be freed with i915_request_put(). + * The reference should be freed with dma_fence_put(). */ -static inline struct i915_request * -i915_active_request_get_unlocked(const struct i915_active_request *active) +static inline struct dma_fence * +i915_active_fence_get(struct i915_active_fence *active) { - struct i915_request *request; + struct dma_fence *fence; rcu_read_lock(); - request = __i915_active_request_get_rcu(active); + fence = dma_fence_get_rcu_safe(&active->fence); rcu_read_unlock(); - return request; + return fence; } /** - * i915_active_request_isset - report whether the active tracker is assigned + * i915_active_fence_isset - report whether the active tracker is assigned * @active - the active tracker * - * i915_active_request_isset() returns true if the active tracker is currently - * assigned to a request. Due to the lazy retiring, that request may be idle + * i915_active_fence_isset() returns true if the active tracker is currently + * assigned to a fence. Due to the lazy retiring, that fence may be idle * and this may report stale information. */ static inline bool -i915_active_request_isset(const struct i915_active_request *active) +i915_active_fence_isset(const struct i915_active_fence *active) { - return rcu_access_pointer(active->request); + return rcu_access_pointer(active->fence); } -/** - * i915_active_request_retire - waits until the request is retired - * @active - the active request on which to wait - * - * i915_active_request_retire() waits until the request is completed, - * and then ensures that at least the retirement handler for this - * @active tracker is called before returning. If the @active - * tracker is idle, the function returns immediately. - */ -static inline int __must_check -i915_active_request_retire(struct i915_active_request *active, - struct mutex *mutex) +static inline void +i915_active_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { - struct i915_request *request; - long ret; - - request = i915_active_request_raw(active, mutex); - if (!request) - return 0; - - ret = i915_request_wait(request, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (ret < 0) - return ret; + struct i915_active_fence *active = + container_of(cb, typeof(*active), cb); - list_del_init(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, request); - - return 0; + RCU_INIT_POINTER(active->fence, NULL); } /* @@ -358,47 +162,40 @@ i915_active_request_retire(struct i915_active_request *active, * synchronisation. */ -void __i915_active_init(struct drm_i915_private *i915, - struct i915_active *ref, +void __i915_active_init(struct i915_active *ref, int (*active)(struct i915_active *ref), void (*retire)(struct i915_active *ref), struct lock_class_key *key); -#define i915_active_init(i915, ref, active, retire) do { \ +#define i915_active_init(ref, active, retire) do { \ static struct lock_class_key __key; \ \ - __i915_active_init(i915, ref, active, retire, &__key); \ + __i915_active_init(ref, active, retire, &__key); \ } while (0) int i915_active_ref(struct i915_active *ref, struct intel_timeline *tl, - struct i915_request *rq); + struct dma_fence *fence); static inline int i915_active_add_request(struct i915_active *ref, struct i915_request *rq) { - return i915_active_ref(ref, i915_request_timeline(rq), rq); + return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence); } void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f); static inline bool i915_active_has_exclusive(struct i915_active *ref) { - return rcu_access_pointer(ref->excl); + return rcu_access_pointer(ref->excl.fence); } int i915_active_wait(struct i915_active *ref); -int i915_request_await_active(struct i915_request *rq, - struct i915_active *ref); -int i915_request_await_active_request(struct i915_request *rq, - struct i915_active_request *active); +int i915_request_await_active(struct i915_request *rq, struct i915_active *ref); int i915_active_acquire(struct i915_active *ref); +bool i915_active_acquire_if_busy(struct i915_active *ref); void i915_active_release(struct i915_active *ref); -void __i915_active_release_nested(struct i915_active *ref, int subclass); - -bool i915_active_trygrab(struct i915_active *ref); -void i915_active_ungrab(struct i915_active *ref); static inline bool i915_active_is_idle(const struct i915_active *ref) diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index 021167f0004d..d89a74c142c6 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -17,17 +17,9 @@ #include "i915_utils.h" -struct drm_i915_private; -struct i915_active_request; -struct i915_request; - -typedef void (*i915_active_retire_fn)(struct i915_active_request *, - struct i915_request *); - -struct i915_active_request { - struct i915_request __rcu *request; - struct list_head link; - i915_active_retire_fn retire; +struct i915_active_fence { + struct dma_fence __rcu *fence; + struct dma_fence_cb cb; #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) /* * Incorporeal! @@ -53,20 +45,17 @@ struct active_node; #define i915_active_may_sleep(fn) ptr_pack_bits(&(fn), I915_ACTIVE_MAY_SLEEP, 2) struct i915_active { - struct drm_i915_private *i915; + atomic_t count; + struct mutex mutex; struct active_node *cache; struct rb_root tree; - struct mutex mutex; - atomic_t count; /* Preallocated "exclusive" node */ - struct dma_fence __rcu *excl; - struct dma_fence_cb excl_cb; + struct i915_active_fence excl; unsigned long flags; #define I915_ACTIVE_RETIRE_SLEEPS BIT(0) -#define I915_ACTIVE_GRAB_BIT 1 int (*active)(struct i915_active *ref); void (*retire)(struct i915_active *ref); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f50058cf8ab8..64890627d638 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -892,28 +892,38 @@ wait_for_timelines(struct intel_gt *gt, unsigned int wait, long timeout) spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry(tl, &timelines->active_list, link) { - struct i915_request *rq; + struct dma_fence *fence; - rq = i915_active_request_get_unlocked(&tl->last_request); - if (!rq) + fence = i915_active_fence_get(&tl->last_request); + if (!fence) continue; spin_unlock_irqrestore(&timelines->lock, flags); - /* - * "Race-to-idle". - * - * Switching to the kernel context is often used a synchronous - * step prior to idling, e.g. in suspend for flushing all - * current operations to memory before sleeping. These we - * want to complete as quickly as possible to avoid prolonged - * stalls, so allow the gpu to boost to maximum clocks. - */ - if (wait & I915_WAIT_FOR_IDLE_BOOST) - gen6_rps_boost(rq); + if (!dma_fence_is_i915(fence)) { + timeout = dma_fence_wait_timeout(fence, + flags & I915_WAIT_INTERRUPTIBLE, + timeout); + } else { + struct i915_request *rq = to_request(fence); + + /* + * "Race-to-idle". + * + * Switching to the kernel context is often used as + * a synchronous step prior to idling, e.g. in suspend + * for flushing all current operations to memory before + * sleeping. These we want to complete as quickly as + * possible to avoid prolonged stalls, so allow the gpu + * to boost to maximum clocks. + */ + if (flags & I915_WAIT_FOR_IDLE_BOOST) + gen6_rps_boost(rq); + + timeout = i915_request_wait(rq, flags, timeout); + } - timeout = i915_request_wait(rq, wait, timeout); - i915_request_put(rq); + dma_fence_put(fence); if (timeout < 0) return timeout; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 55cebf256d03..7462d87f7a48 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1861,7 +1861,6 @@ static const struct i915_vma_ops pd_vma_ops = { static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) { - struct drm_i915_private *i915 = ppgtt->base.vm.i915; struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; struct i915_vma *vma; @@ -1872,7 +1871,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) if (!vma) return ERR_PTR(-ENOMEM); - i915_active_init(i915, &vma->active, NULL, NULL); + i915_active_init(&vma->active, NULL, NULL); mutex_init(&vma->pages_mutex); vma->vm = i915_vm_get(&ggtt->vm); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 6384a06aa5bf..a28ee754b7b4 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1299,7 +1299,7 @@ capture_vma(struct capture_vma *next, if (!c) return next; - if (!i915_active_trygrab(&vma->active)) { + if (!i915_active_acquire_if_busy(&vma->active)) { kfree(c); return next; } @@ -1439,7 +1439,7 @@ gem_record_rings(struct i915_gpu_state *error, struct compress *compress) *this->slot = i915_error_object_create(i915, vma, compress); - i915_active_ungrab(&vma->active); + i915_active_release(&vma->active); i915_vma_put(vma); capture = this->next; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a8916412759b..4ffe62a42186 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -218,8 +218,6 @@ static void remove_from_engine(struct i915_request *rq) static bool i915_request_retire(struct i915_request *rq) { - struct i915_active_request *active, *next; - if (!i915_request_completed(rq)) return false; @@ -244,35 +242,6 @@ static bool i915_request_retire(struct i915_request *rq) &i915_request_timeline(rq)->requests)); rq->ring->head = rq->postfix; - /* - * Walk through the active list, calling retire on each. This allows - * objects to track their GPU activity and mark themselves as idle - * when their *last* active request is completed (updating state - * tracking lists for eviction, active references for GEM, etc). - * - * As the ->retire() may free the node, we decouple it first and - * pass along the auxiliary information (to avoid dereferencing - * the node after the callback). - */ - list_for_each_entry_safe(active, next, &rq->active_list, link) { - /* - * In microbenchmarks or focusing upon time inside the kernel, - * we may spend an inordinate amount of time simply handling - * the retirement of requests and processing their callbacks. - * Of which, this loop itself is particularly hot due to the - * cache misses when jumping around the list of - * i915_active_request. So we try to keep this loop as - * streamlined as possible and also prefetch the next - * i915_active_request to try and hide the likely cache miss. - */ - prefetchw(next); - - INIT_LIST_HEAD(&active->link); - RCU_INIT_POINTER(active->request, NULL); - - active->retire(active, rq); - } - local_irq_disable(); /* @@ -704,7 +673,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->flags = 0; rq->execution_mask = ALL_ENGINES; - INIT_LIST_HEAD(&rq->active_list); INIT_LIST_HEAD(&rq->execute_cb); /* @@ -743,7 +711,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) ce->ring->emit = rq->head; /* Make sure we didn't add ourselves to external state before freeing */ - GEM_BUG_ON(!list_empty(&rq->active_list)); GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); @@ -1174,8 +1141,8 @@ __i915_request_add_to_timeline(struct i915_request *rq) * precludes optimising to use semaphores serialisation of a single * timeline across engines. */ - prev = rcu_dereference_protected(timeline->last_request.request, - lockdep_is_held(&timeline->mutex)); + prev = to_request(__i915_active_fence_set(&timeline->last_request, + &rq->fence)); if (prev && !i915_request_completed(prev)) { if (is_power_of_2(prev->engine->mask | rq->engine->mask)) i915_sw_fence_await_sw_fence(&rq->submit, @@ -1200,7 +1167,6 @@ __i915_request_add_to_timeline(struct i915_request *rq) * us, the timeline will hold its seqno which is later than ours. */ GEM_BUG_ON(timeline->seqno != rq->fence.seqno); - __i915_active_request_set(&timeline->last_request, rq); return prev; } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index ec5bb4c2e5ae..91a885c36c6b 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -211,7 +211,6 @@ struct i915_request { * on the active_list (of their final request). */ struct i915_capture_list *capture_list; - struct list_head active_list; /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e191247c7c5f..9fdcd4e2c799 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -120,8 +120,7 @@ vma_create(struct drm_i915_gem_object *obj, vma->size = obj->base.size; vma->display_alignment = I915_GTT_MIN_ALIGNMENT; - i915_active_init(vm->i915, &vma->active, - __i915_vma_active, __i915_vma_retire); + i915_active_init(&vma->active, __i915_vma_active, __i915_vma_retire); /* Declare ourselves safe for use inside shrinkers */ if (IS_ENABLED(CONFIG_LOCKDEP)) { @@ -1148,6 +1147,7 @@ int __i915_vma_unbind(struct i915_vma *vma) if (ret) return ret; + GEM_BUG_ON(i915_vma_is_active(vma)); if (i915_vma_is_pinned(vma)) { vma_print_allocator(vma, "is pinned"); return -EBUSY; diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index a41785822ed9..2cc71bcf884f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -68,7 +68,7 @@ static struct live_active *__live_alloc(struct drm_i915_private *i915) return NULL; kref_init(&active->ref); - i915_active_init(i915, &active->base, __live_active, __live_retire); + i915_active_init(&active->base, __live_active, __live_retire); return active; } @@ -146,19 +146,13 @@ static int live_active_wait(void *arg) { struct drm_i915_private *i915 = arg; struct live_active *active; - intel_wakeref_t wakeref; int err = 0; /* Check that we get a callback when requests retire upon waiting */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - active = __live_active_setup(i915); - if (IS_ERR(active)) { - err = PTR_ERR(active); - goto err; - } + if (IS_ERR(active)) + return PTR_ERR(active); i915_active_wait(&active->base); if (!READ_ONCE(active->retired)) { @@ -168,11 +162,9 @@ static int live_active_wait(void *arg) __live_put(active); + mutex_lock(&i915->drm.struct_mutex); if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; - -err: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -182,23 +174,19 @@ static int live_active_retire(void *arg) { struct drm_i915_private *i915 = arg; struct live_active *active; - intel_wakeref_t wakeref; int err = 0; /* Check that we get a callback when requests are indirectly retired */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - active = __live_active_setup(i915); - if (IS_ERR(active)) { - err = PTR_ERR(active); - goto err; - } + if (IS_ERR(active)) + return PTR_ERR(active); /* waits for & retires all requests */ + mutex_lock(&i915->drm.struct_mutex); if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); if (!READ_ONCE(active->retired)) { pr_err("i915_active not retired after flushing!\n"); @@ -207,10 +195,6 @@ static int live_active_retire(void *arg) __live_put(active); -err: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; } From patchwork Fri Oct 4 13:40:01 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174535 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C7A1814DB for ; Fri, 4 Oct 2019 13:40:49 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id B005120700 for ; Fri, 4 Oct 2019 13:40:49 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org B005120700 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 878FC6EB8D; Fri, 4 Oct 2019 13:40:42 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id C126B6EB89 for ; Fri, 4 Oct 2019 13:40:39 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723956-1500050 for multiple; Fri, 04 Oct 2019 14:40:18 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:01 +0100 Message-Id: <20191004134015.13204-7-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 06/20] drm/i915: Move idle barrier cleanup into engine-pm X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Now that we now longer need to guarantee that the active callback is under the struct_mutex, we can lift it out of the i915_gem_park() and into the engine parking itself. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 19 ------------------- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 15 +++++++++++++++ drivers/gpu/drm/i915/i915_active.c | 1 + 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 0a4115c6c275..5180b2ee1cb7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -11,29 +11,10 @@ #include "i915_drv.h" #include "i915_globals.h" -static void call_idle_barriers(struct intel_engine_cs *engine) -{ - struct llist_node *node, *next; - - llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { - struct dma_fence_cb *cb = - container_of((struct list_head *)node, - typeof(*cb), node); - - cb->func(NULL, cb); - } -} - static void i915_gem_park(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - lockdep_assert_held(&i915->drm.struct_mutex); - for_each_engine(engine, i915, id) - call_idle_barriers(engine); /* cleanup after wedging */ - i915_vma_parked(i915); i915_globals_park(); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 66fc49b76ea8..8e5e513eddc9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -124,6 +124,19 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) return result; } +static void call_idle_barriers(struct intel_engine_cs *engine) +{ + struct llist_node *node, *next; + + llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { + struct dma_fence_cb *cb = + container_of((struct list_head *)node, + typeof(*cb), node); + + cb->func(NULL, cb); + } +} + static int __engine_park(struct intel_wakeref *wf) { struct intel_engine_cs *engine = @@ -143,6 +156,8 @@ static int __engine_park(struct intel_wakeref *wf) GEM_TRACE("%s\n", engine->name); + call_idle_barriers(engine); /* cleanup after wedging */ + intel_engine_disarm_breadcrumbs(engine); intel_engine_pool_park(&engine->pool); diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 023652ded4be..aa37c07004b9 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -693,6 +693,7 @@ void i915_active_acquire_barrier(struct i915_active *ref) rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); llist_add(barrier_to_ll(node), &engine->barrier_tasks); intel_engine_pm_put(engine); } From patchwork Fri Oct 4 13:40:02 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174529 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8DCD514DB for ; Fri, 4 Oct 2019 13:40:46 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 75B5420700 for ; Fri, 4 Oct 2019 13:40:46 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 75B5420700 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 455356EB8C; Fri, 4 Oct 2019 13:40:42 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id B0A206EB87 for ; Fri, 4 Oct 2019 13:40:39 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723957-1500050 for multiple; Fri, 04 Oct 2019 14:40:18 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:02 +0100 Message-Id: <20191004134015.13204-8-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 07/20] drm/i915: Drop struct_mutex from around i915_retire_requests() X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" We don't need to hold struct_mutex now for retiring requests, so drop it from i915_retire_requests() and i915_gem_wait_for_idle(), finally removing I915_WAIT_LOCKED for good. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- .../gpu/drm/i915/gem/i915_gem_client_blt.c | 7 +- drivers/gpu/drm/i915/gem/i915_gem_context.c | 20 +-- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 45 ++---- .../i915/gem/selftests/i915_gem_coherency.c | 40 +++-- .../drm/i915/gem/selftests/i915_gem_context.c | 18 +-- .../drm/i915/gem/selftests/i915_gem_mman.c | 6 +- .../i915/gem/selftests/i915_gem_object_blt.c | 4 - drivers/gpu/drm/i915/gt/intel_gt_pm.c | 28 +--- drivers/gpu/drm/i915/gt/selftest_context.c | 4 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 89 ++--------- drivers/gpu/drm/i915/gt/selftest_lrc.c | 23 ++- drivers/gpu/drm/i915/gt/selftest_timeline.c | 91 +++++------ .../gpu/drm/i915/gt/selftest_workarounds.c | 6 +- drivers/gpu/drm/i915/i915_debugfs.c | 42 ++--- drivers/gpu/drm/i915/i915_gem.c | 19 +-- drivers/gpu/drm/i915/i915_request.h | 7 +- drivers/gpu/drm/i915/selftests/i915_active.c | 8 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 6 +- drivers/gpu/drm/i915/selftests/i915_request.c | 151 +++++------------- .../gpu/drm/i915/selftests/i915_selftest.c | 8 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 4 - .../gpu/drm/i915/selftests/igt_flush_test.c | 30 ++-- .../gpu/drm/i915/selftests/igt_flush_test.h | 2 +- .../gpu/drm/i915/selftests/igt_live_test.c | 9 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 4 - 26 files changed, 213 insertions(+), 460 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index c1fca5728e6e..81366aa4812b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -155,7 +155,6 @@ static void clear_pages_dma_fence_cb(struct dma_fence *fence, static void clear_pages_worker(struct work_struct *work) { struct clear_pages_work *w = container_of(work, typeof(*w), work); - struct drm_i915_private *i915 = w->ce->engine->i915; struct drm_i915_gem_object *obj = w->sleeve->vma->obj; struct i915_vma *vma = w->sleeve->vma; struct i915_request *rq; @@ -173,11 +172,9 @@ static void clear_pages_worker(struct work_struct *work) obj->read_domains = I915_GEM_GPU_DOMAINS; obj->write_domain = 0; - /* XXX: we need to kill this */ - mutex_lock(&i915->drm.struct_mutex); err = i915_vma_pin(vma, 0, 0, PIN_USER); if (unlikely(err)) - goto out_unlock; + goto out_signal; batch = intel_emit_vma_fill_blt(w->ce, vma, w->value); if (IS_ERR(batch)) { @@ -229,8 +226,6 @@ static void clear_pages_worker(struct work_struct *work) intel_emit_vma_release(w->ce, batch); out_unpin: i915_vma_unpin(vma); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); out_signal: if (unlikely(err)) { dma_fence_set_error(&w->dma, err); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 9d85aab68d34..0ab416887fc2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1159,8 +1159,7 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) } static int -__intel_context_reconfigure_sseu(struct intel_context *ce, - struct intel_sseu sseu) +intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) { int ret; @@ -1183,23 +1182,6 @@ __intel_context_reconfigure_sseu(struct intel_context *ce, return ret; } -static int -intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) -{ - struct drm_i915_private *i915 = ce->engine->i915; - int ret; - - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - - ret = __intel_context_reconfigure_sseu(ce, sseu); - - mutex_unlock(&i915->drm.struct_mutex); - - return ret; -} - static int user_to_context_sseu(struct drm_i915_private *i915, const struct drm_i915_gem_context_param_sseu *user, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 5180b2ee1cb7..2ddc3aeaac9d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -48,11 +48,7 @@ static void retire_work_handler(struct work_struct *work) struct drm_i915_private *i915 = container_of(work, typeof(*i915), gem.retire_work.work); - /* Come back later if the device is busy... */ - if (mutex_trylock(&i915->drm.struct_mutex)) { - i915_retire_requests(i915); - mutex_unlock(&i915->drm.struct_mutex); - } + i915_retire_requests(i915); queue_delayed_work(i915->wq, &i915->gem.retire_work, @@ -86,26 +82,23 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt) { bool result = !intel_gt_is_wedged(gt); - do { - if (i915_gem_wait_for_idle(gt->i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { - /* XXX hide warning from gem_eio */ - if (i915_modparams.reset) { - dev_err(gt->i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* - * Forcibly cancel outstanding work and leave - * the gpu quiet. - */ - intel_gt_set_wedged(gt); - result = false; + if (i915_gem_wait_for_idle(gt->i915, + I915_WAIT_FOR_IDLE_BOOST, + I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* XXX hide warning from gem_eio */ + if (i915_modparams.reset) { + dev_err(gt->i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); } - } while (i915_retire_requests(gt->i915) && result); + + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + intel_gt_set_wedged(gt); + result = false; + } if (intel_gt_pm_wait_for_idle(gt)) result = false; @@ -145,8 +138,6 @@ void i915_gem_suspend(struct drm_i915_private *i915) user_forcewake(&i915->gt, true); - mutex_lock(&i915->drm.struct_mutex); - /* * We have to flush all the executing contexts to main memory so * that they can saved in the hibernation image. To ensure the last @@ -158,8 +149,6 @@ void i915_gem_suspend(struct drm_i915_private *i915) */ switch_to_kernel_context_sync(&i915->gt); - mutex_unlock(&i915->drm.struct_mutex); - cancel_delayed_work_sync(&i915->gt.hangcheck.work); i915_gem_drain_freed_objects(i915); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 0ff7a89aadca..549810f70aeb 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -7,6 +7,7 @@ #include #include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" #include "i915_selftest.h" #include "selftests/i915_random.h" @@ -78,7 +79,7 @@ static int gtt_set(struct drm_i915_gem_object *obj, { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, true); @@ -90,15 +91,21 @@ static int gtt_set(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } iowrite32(v, &map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } static int gtt_get(struct drm_i915_gem_object *obj, @@ -107,7 +114,7 @@ static int gtt_get(struct drm_i915_gem_object *obj, { struct i915_vma *vma; u32 __iomem *map; - int err; + int err = 0; i915_gem_object_lock(obj); err = i915_gem_object_set_to_gtt_domain(obj, false); @@ -119,15 +126,21 @@ static int gtt_get(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) return PTR_ERR(vma); + intel_gt_pm_get(vma->vm->gt); + map = i915_vma_pin_iomap(vma); i915_vma_unpin(vma); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto out_rpm; + } *v = ioread32(&map[offset / sizeof(*map)]); i915_vma_unpin_iomap(vma); - return 0; +out_rpm: + intel_gt_pm_put(vma->vm->gt); + return err; } static int wc_set(struct drm_i915_gem_object *obj, @@ -280,7 +293,6 @@ static int igt_gem_coherency(void *arg) struct drm_i915_private *i915 = arg; const struct igt_coherency_mode *read, *write, *over; struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; unsigned long count, n; u32 *offsets, *values; int err = 0; @@ -299,8 +311,6 @@ static int igt_gem_coherency(void *arg) values = offsets + ncachelines; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); for (over = igt_coherency_mode; over->name; over++) { if (!over->set) continue; @@ -326,7 +336,7 @@ static int igt_gem_coherency(void *arg) obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto unlock; + goto free; } i915_random_reorder(offsets, ncachelines, &prng); @@ -377,15 +387,13 @@ static int igt_gem_coherency(void *arg) } } } -unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); +free: kfree(offsets); return err; put_object: i915_gem_object_put(obj); - goto unlock; + goto free; } int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 8eba0d3a31de..f5402aad9b5a 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -164,7 +164,6 @@ struct parallel_switch { static int __live_parallel_switch1(void *data) { struct parallel_switch *arg = data; - struct drm_i915_private *i915 = arg->ce[0]->engine->i915; IGT_TIMEOUT(end_time); unsigned long count; @@ -176,16 +175,12 @@ static int __live_parallel_switch1(void *data) for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { i915_request_put(rq); - mutex_lock(&i915->drm.struct_mutex); rq = i915_request_create(arg->ce[n]); - if (IS_ERR(rq)) { - mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(rq)) return PTR_ERR(rq); - } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(&i915->drm.struct_mutex); } err = 0; @@ -205,7 +200,6 @@ static int __live_parallel_switch1(void *data) static int __live_parallel_switchN(void *data) { struct parallel_switch *arg = data; - struct drm_i915_private *i915 = arg->ce[0]->engine->i915; IGT_TIMEOUT(end_time); unsigned long count; int n; @@ -215,15 +209,11 @@ static int __live_parallel_switchN(void *data) for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { struct i915_request *rq; - mutex_lock(&i915->drm.struct_mutex); rq = i915_request_create(arg->ce[n]); - if (IS_ERR(rq)) { - mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(rq)) return PTR_ERR(rq); - } i915_request_add(rq); - mutex_unlock(&i915->drm.struct_mutex); } count++; @@ -1173,7 +1163,7 @@ __sseu_test(const char *name, if (ret) return ret; - ret = __intel_context_reconfigure_sseu(ce, sseu); + ret = intel_context_reconfigure_sseu(ce, sseu); if (ret) goto out_spin; @@ -1277,7 +1267,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915, goto out_fail; out_fail: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) ret = -EIO; intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 36aca1c172e7..856b8e467ee8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -581,12 +581,8 @@ static void disable_retire_worker(struct drm_i915_private *i915) static void restore_retire_worker(struct drm_i915_private *i915) { + igt_flush_test(i915); intel_gt_pm_put(&i915->gt); - - mutex_lock(&i915->drm.struct_mutex); - igt_flush_test(i915, I915_WAIT_LOCKED); - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_driver_register__shrinker(i915); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index c21d747e7d05..9ec55b3a3815 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -65,9 +65,7 @@ static int igt_fill_blt(void *arg) if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) obj->cache_dirty = true; - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_fill_blt(obj, ce, val); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto err_unpin; @@ -166,9 +164,7 @@ static int igt_copy_blt(void *arg) if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) dst->cache_dirty = true; - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_copy_blt(src, dst, ce); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto err_unpin; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 29fa1dabbc2e..d4cefdd38431 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -196,26 +196,14 @@ int intel_gt_resume(struct intel_gt *gt) static void wait_for_idle(struct intel_gt *gt) { - mutex_lock(>->i915->drm.struct_mutex); /* XXX */ - do { - if (i915_gem_wait_for_idle(gt->i915, - I915_WAIT_LOCKED, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { - /* XXX hide warning from gem_eio */ - if (i915_modparams.reset) { - dev_err(gt->i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* - * Forcibly cancel outstanding work and leave - * the gpu quiet. - */ - intel_gt_set_wedged(gt); - } - } while (i915_retire_requests(gt->i915)); - mutex_unlock(>->i915->drm.struct_mutex); + if (i915_gem_wait_for_idle(gt->i915, 0, + I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + intel_gt_set_wedged(gt); + } intel_gt_pm_wait_for_idle(gt); } diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index e6bcbe7ab5e1..86cffbb0a9cb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -318,7 +318,7 @@ static int live_active_context(void *arg) if (err) break; - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } @@ -431,7 +431,7 @@ static int live_remote_context(void *arg) if (err) break; - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index d3bee9f88008..ffbb3d23b887 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -58,7 +58,9 @@ static int hang_init(struct hang *h, struct intel_gt *gt) memset(h, 0, sizeof(*h)); h->gt = gt; + mutex_lock(>->i915->drm.struct_mutex); h->ctx = kernel_context(gt->i915); + mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(h->ctx)) return PTR_ERR(h->ctx); @@ -285,7 +287,7 @@ static void hang_fini(struct hang *h) kernel_context_close(h->ctx); - igt_flush_test(h->gt->i915, I915_WAIT_LOCKED); + igt_flush_test(h->gt->i915); } static bool wait_until_running(struct hang *h, struct i915_request *rq) @@ -309,10 +311,9 @@ static int igt_hang_sanitycheck(void *arg) /* Basic check that we can execute our hanging batch */ - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) - goto unlock; + return err; for_each_engine(engine, gt->i915, id) { struct intel_wedge_me w; @@ -355,8 +356,6 @@ static int igt_hang_sanitycheck(void *arg) fini: hang_fini(&h); -unlock: - mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -395,8 +394,6 @@ static int igt_reset_nop(void *arg) reset_count = i915_reset_count(global); count = 0; do { - mutex_lock(>->i915->drm.struct_mutex); - for_each_engine(engine, gt->i915, id) { int i; @@ -417,7 +414,6 @@ static int igt_reset_nop(void *arg) intel_gt_reset(gt, ALL_ENGINES, NULL); igt_global_reset_unlock(gt); - mutex_unlock(>->i915->drm.struct_mutex); if (intel_gt_is_wedged(gt)) { err = -EIO; break; @@ -429,16 +425,13 @@ static int igt_reset_nop(void *arg) break; } - err = igt_flush_test(gt->i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } while (time_before(jiffies, end_time)); pr_info("%s: %d resets\n", __func__, count); - mutex_lock(>->i915->drm.struct_mutex); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); - + err = igt_flush_test(gt->i915); out: mock_file_free(gt->i915, file); if (intel_gt_is_wedged(gt)) @@ -494,7 +487,6 @@ static int igt_reset_nop_engine(void *arg) break; } - mutex_lock(>->i915->drm.struct_mutex); for (i = 0; i < 16; i++) { struct i915_request *rq; @@ -507,7 +499,6 @@ static int igt_reset_nop_engine(void *arg) i915_request_add(rq); } err = intel_engine_reset(engine, NULL); - mutex_unlock(>->i915->drm.struct_mutex); if (err) { pr_err("i915_reset_engine failed\n"); break; @@ -533,15 +524,12 @@ static int igt_reset_nop_engine(void *arg) if (err) break; - err = igt_flush_test(gt->i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } - mutex_lock(>->i915->drm.struct_mutex); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); - + err = igt_flush_test(gt->i915); out: mock_file_free(gt->i915, file); if (intel_gt_is_wedged(gt)) @@ -563,9 +551,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) return 0; if (active) { - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); - mutex_unlock(>->i915->drm.struct_mutex); if (err) return err; } @@ -593,17 +579,14 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (active) { struct i915_request *rq; - mutex_lock(>->i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(>->i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -647,7 +630,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (err) break; - err = igt_flush_test(gt->i915, 0); + err = igt_flush_test(gt->i915); if (err) break; } @@ -655,11 +638,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (intel_gt_is_wedged(gt)) err = -EIO; - if (active) { - mutex_lock(>->i915->drm.struct_mutex); + if (active) hang_fini(&h); - mutex_unlock(>->i915->drm.struct_mutex); - } return err; } @@ -741,10 +721,8 @@ static int active_engine(void *data) struct i915_request *old = rq[idx]; struct i915_request *new; - mutex_lock(&engine->i915->drm.struct_mutex); new = igt_request_alloc(ctx[idx], engine); if (IS_ERR(new)) { - mutex_unlock(&engine->i915->drm.struct_mutex); err = PTR_ERR(new); break; } @@ -755,7 +733,6 @@ static int active_engine(void *data) rq[idx] = i915_request_get(new); i915_request_add(new); - mutex_unlock(&engine->i915->drm.struct_mutex); err = active_request_put(old); if (err) @@ -795,9 +772,7 @@ static int __igt_reset_engines(struct intel_gt *gt, return 0; if (flags & TEST_ACTIVE) { - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); - mutex_unlock(>->i915->drm.struct_mutex); if (err) return err; @@ -855,17 +830,14 @@ static int __igt_reset_engines(struct intel_gt *gt, struct i915_request *rq = NULL; if (flags & TEST_ACTIVE) { - mutex_lock(>->i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - mutex_unlock(>->i915->drm.struct_mutex); break; } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(>->i915->drm.struct_mutex); if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -977,9 +949,7 @@ static int __igt_reset_engines(struct intel_gt *gt, if (err) break; - mutex_lock(>->i915->drm.struct_mutex); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); + err = igt_flush_test(gt->i915); if (err) break; } @@ -987,11 +957,8 @@ static int __igt_reset_engines(struct intel_gt *gt, if (intel_gt_is_wedged(gt)) err = -EIO; - if (flags & TEST_ACTIVE) { - mutex_lock(>->i915->drm.struct_mutex); + if (flags & TEST_ACTIVE) hang_fini(&h); - mutex_unlock(>->i915->drm.struct_mutex); - } return err; } @@ -1061,7 +1028,6 @@ static int igt_reset_wait(void *arg) igt_global_reset_lock(gt); - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) goto unlock; @@ -1109,7 +1075,6 @@ static int igt_reset_wait(void *arg) fini: hang_fini(&h); unlock: - mutex_unlock(>->i915->drm.struct_mutex); igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) @@ -1189,10 +1154,9 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, /* Check that we can recover an unbind stuck on a hanging request */ - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) - goto unlock; + return err; obj = i915_gem_object_create_internal(gt->i915, SZ_1M); if (IS_ERR(obj)) { @@ -1255,8 +1219,6 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, if (err) goto out_rq; - mutex_unlock(>->i915->drm.struct_mutex); - if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); @@ -1305,16 +1267,12 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, put_task_struct(tsk); } - mutex_lock(>->i915->drm.struct_mutex); out_rq: i915_request_put(rq); out_obj: i915_gem_object_put(obj); fini: hang_fini(&h); -unlock: - mutex_unlock(>->i915->drm.struct_mutex); - if (intel_gt_is_wedged(gt)) return -EIO; @@ -1396,7 +1354,6 @@ static int igt_reset_queue(void *arg) igt_global_reset_lock(gt); - mutex_lock(>->i915->drm.struct_mutex); err = hang_init(&h, gt); if (err) goto unlock; @@ -1511,7 +1468,7 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); - err = igt_flush_test(gt->i915, I915_WAIT_LOCKED); + err = igt_flush_test(gt->i915); if (err) break; } @@ -1519,7 +1476,6 @@ static int igt_reset_queue(void *arg) fini: hang_fini(&h); unlock: - mutex_unlock(>->i915->drm.struct_mutex); igt_global_reset_unlock(gt); if (intel_gt_is_wedged(gt)) @@ -1546,11 +1502,9 @@ static int igt_handle_error(void *arg) if (!engine || !intel_engine_can_store_dword(engine)) return 0; - mutex_lock(>->i915->drm.struct_mutex); - err = hang_init(&h, gt); if (err) - goto err_unlock; + return err; rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { @@ -1574,8 +1528,6 @@ static int igt_handle_error(void *arg) goto err_request; } - mutex_unlock(>->i915->drm.struct_mutex); - /* Temporarily disable error capture */ error = xchg(&global->first_error, (void *)-1); @@ -1583,8 +1535,6 @@ static int igt_handle_error(void *arg) xchg(&global->first_error, error); - mutex_lock(>->i915->drm.struct_mutex); - if (rq->fence.error != -EIO) { pr_err("Guilty request not identified!\n"); err = -EINVAL; @@ -1595,8 +1545,6 @@ static int igt_handle_error(void *arg) i915_request_put(rq); err_fini: hang_fini(&h); -err_unlock: - mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -1689,7 +1637,6 @@ static int igt_reset_engines_atomic(void *arg) return 0; igt_global_reset_lock(gt); - mutex_lock(>->i915->drm.struct_mutex); /* Flush any requests before we get started and check basics */ if (!igt_force_reset(gt)) @@ -1709,9 +1656,7 @@ static int igt_reset_engines_atomic(void *arg) out: /* As we poke around the guts, do a full reset before continuing. */ igt_force_reset(gt); - unlock: - mutex_unlock(>->i915->drm.struct_mutex); igt_global_reset_unlock(gt); return err; @@ -1751,10 +1696,6 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) err = intel_gt_live_subtests(tests, gt); - mutex_lock(>->i915->drm.struct_mutex); - igt_flush_test(gt->i915, I915_WAIT_LOCKED); - mutex_unlock(>->i915->drm.struct_mutex); - i915_modparams.enable_hangcheck = saved_hangcheck; intel_runtime_pm_put(>->i915->runtime_pm, wakeref); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index dd25636abc5b..04c1cf573642 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -61,7 +61,7 @@ static int live_sanitycheck(void *arg) } igt_spinner_end(&spin); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(i915)) { err = -EIO; goto err_ctx; } @@ -384,8 +384,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, if (err) goto out; - if (i915_request_wait(head, - I915_WAIT_LOCKED, + if (i915_request_wait(head, 0, 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) { pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", count, n); @@ -457,7 +456,7 @@ static int live_timeslice_preempt(void *arg) if (err) goto err_pin; - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(i915)) { err = -EIO; goto err_pin; } @@ -1010,7 +1009,7 @@ static int live_nopreempt(void *arg) goto err_wedged; } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) goto err_wedged; } @@ -1075,7 +1074,7 @@ static int live_suppress_self_preempt(void *arg) if (!intel_engine_has_preemption(engine)) continue; - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) goto err_wedged; intel_engine_pm_get(engine); @@ -1136,7 +1135,7 @@ static int live_suppress_self_preempt(void *arg) } intel_engine_pm_put(engine); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) goto err_wedged; } @@ -1297,7 +1296,7 @@ static int live_suppress_wait_preempt(void *arg) for (i = 0; i < ARRAY_SIZE(client); i++) igt_spinner_end(&client[i].spin); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) goto err_wedged; if (engine->execlists.preempt_hang.count) { @@ -1576,7 +1575,7 @@ static int live_preempt_hang(void *arg) igt_spinner_end(&spin_hi); igt_spinner_end(&spin_lo); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(i915)) { err = -EIO; goto err_ctx_lo; } @@ -1973,7 +1972,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, prime, div64_u64(ktime_to_ns(times[1]), prime)); out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; for (nc = 0; nc < nctx; nc++) { @@ -2118,7 +2117,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, goto out; out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; for (n = 0; n < nsibling; n++) @@ -2296,7 +2295,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915, out: for (n = 0; !IS_ERR(rq[n]); n++) i915_request_put(rq[n]); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; kernel_context_close(ctx); diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 321481403165..16abfabf08c7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -6,7 +6,7 @@ #include -#include "gem/i915_gem_pm.h" +#include "intel_engine_pm.h" #include "intel_gt.h" #include "../selftests/i915_random.h" @@ -136,7 +136,6 @@ static int mock_hwsp_freelist(void *arg) goto err_put; } - mutex_lock(&state.i915->drm.struct_mutex); for (p = phases; p->name; p++) { pr_debug("%s(%s)\n", __func__, p->name); for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { @@ -149,7 +148,6 @@ static int mock_hwsp_freelist(void *arg) out: for (na = 0; na < state.max; na++) __mock_hwsp_record(&state, na, NULL); - mutex_unlock(&state.i915->drm.struct_mutex); kfree(state.history); err_put: drm_dev_put(&state.i915->drm); @@ -449,8 +447,6 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) struct i915_request *rq; int err; - lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */ - err = intel_timeline_pin(tl); if (err) { rq = ERR_PTR(err); @@ -461,10 +457,14 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) if (IS_ERR(rq)) goto out_unpin; + i915_request_get(rq); + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); i915_request_add(rq); - if (err) + if (err) { + i915_request_put(rq); rq = ERR_PTR(err); + } out_unpin: intel_timeline_unpin(tl); @@ -500,7 +500,6 @@ static int live_hwsp_engine(void *arg) struct intel_timeline **timelines; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count, n; int err = 0; @@ -515,14 +514,13 @@ static int live_hwsp_engine(void *arg) if (!timelines) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; for_each_engine(engine, i915, id) { if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); + for (n = 0; n < NUM_TIMELINES; n++) { struct intel_timeline *tl; struct i915_request *rq; @@ -530,22 +528,26 @@ static int live_hwsp_engine(void *arg) tl = checked_intel_timeline_create(i915); if (IS_ERR(tl)) { err = PTR_ERR(tl); - goto out; + break; } rq = tl_write(tl, engine, count); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); - goto out; + break; } timelines[count++] = tl; + i915_request_put(rq); } + + intel_engine_pm_put(engine); + if (err) + break; } -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; for (n = 0; n < count; n++) { @@ -559,11 +561,7 @@ static int live_hwsp_engine(void *arg) intel_timeline_put(tl); } - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - kvfree(timelines); - return err; #undef NUM_TIMELINES } @@ -575,7 +573,6 @@ static int live_hwsp_alternate(void *arg) struct intel_timeline **timelines; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count, n; int err = 0; @@ -591,9 +588,6 @@ static int live_hwsp_alternate(void *arg) if (!timelines) return -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; for (n = 0; n < NUM_TIMELINES; n++) { for_each_engine(engine, i915, id) { @@ -605,11 +599,14 @@ static int live_hwsp_alternate(void *arg) tl = checked_intel_timeline_create(i915); if (IS_ERR(tl)) { + intel_engine_pm_put(engine); err = PTR_ERR(tl); goto out; } + intel_engine_pm_get(engine); rq = tl_write(tl, engine, count); + intel_engine_pm_put(engine); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); @@ -617,11 +614,12 @@ static int live_hwsp_alternate(void *arg) } timelines[count++] = tl; + i915_request_put(rq); } } out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; for (n = 0; n < count; n++) { @@ -635,11 +633,7 @@ static int live_hwsp_alternate(void *arg) intel_timeline_put(tl); } - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - kvfree(timelines); - return err; #undef NUM_TIMELINES } @@ -650,7 +644,6 @@ static int live_hwsp_wrap(void *arg) struct intel_engine_cs *engine; struct intel_timeline *tl; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = 0; /* @@ -658,14 +651,10 @@ static int live_hwsp_wrap(void *arg) * foreign GPU references. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - tl = intel_timeline_create(&i915->gt, NULL); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out_rpm; - } + if (IS_ERR(tl)) + return PTR_ERR(tl); + if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) goto out_free; @@ -681,7 +670,9 @@ static int live_hwsp_wrap(void *arg) if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); rq = i915_request_create(engine->kernel_context); + intel_engine_pm_put(engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out; @@ -747,16 +738,12 @@ static int live_hwsp_wrap(void *arg) } out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; intel_timeline_unpin(tl); out_free: intel_timeline_put(tl); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; } @@ -765,7 +752,6 @@ static int live_hwsp_recycle(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; unsigned long count; int err = 0; @@ -775,9 +761,6 @@ static int live_hwsp_recycle(void *arg) * want to confuse ourselves or the GPU. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - count = 0; for_each_engine(engine, i915, id) { IGT_TIMEOUT(end_time); @@ -785,6 +768,8 @@ static int live_hwsp_recycle(void *arg) if (!intel_engine_can_store_dword(engine)) continue; + intel_engine_pm_get(engine); + do { struct intel_timeline *tl; struct i915_request *rq; @@ -792,21 +777,22 @@ static int live_hwsp_recycle(void *arg) tl = checked_intel_timeline_create(i915); if (IS_ERR(tl)) { err = PTR_ERR(tl); - goto out; + break; } rq = tl_write(tl, engine, count); if (IS_ERR(rq)) { intel_timeline_put(tl); err = PTR_ERR(rq); - goto out; + break; } if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("Wait for timeline writes timed out!\n"); + i915_request_put(rq); intel_timeline_put(tl); err = -EIO; - goto out; + break; } if (*tl->hwsp_seqno != count) { @@ -815,17 +801,18 @@ static int live_hwsp_recycle(void *arg) err = -EINVAL; } + i915_request_put(rq); intel_timeline_put(tl); count++; if (err) - goto out; + break; } while (!__igt_timeout(end_time, NULL)); - } -out: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); + intel_engine_pm_put(engine); + if (err) + break; + } return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index d40ce0709bff..4ee2e2babd0d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -676,7 +676,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, break; } - if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED)) + if (igt_flush_test(ctx->i915)) err = -EIO; out_batch: i915_vma_unpin_and_release(&batch, 0); @@ -1090,7 +1090,7 @@ static int live_isolated_whitelist(void *arg) kernel_context_close(client[i].ctx); } - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; return err; @@ -1248,7 +1248,7 @@ live_engine_reset_workarounds(void *arg) igt_global_reset_unlock(&i915->gt); kernel_context_close(ctx); - igt_flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(i915); return ret; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fec9fb7cc384..385289895107 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3621,6 +3621,7 @@ static int i915_drop_caches_set(void *data, u64 val) { struct drm_i915_private *i915 = data; + int ret; DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", val, val & DROP_ALL); @@ -3630,40 +3631,21 @@ i915_drop_caches_set(void *data, u64 val) I915_IDLE_ENGINES_TIMEOUT)) intel_gt_set_wedged(&i915->gt); - /* No need to check and wait for gpu resets, only libdrm auto-restarts - * on ioctls on -EAGAIN. */ - if (val & (DROP_ACTIVE | DROP_IDLE | DROP_RETIRE | DROP_RESET_SEQNO)) { - int ret; + if (val & DROP_RETIRE) + i915_retire_requests(i915); - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); + if (val & (DROP_IDLE | DROP_ACTIVE)) { + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; + } - /* - * To finish the flush of the idle_worker, we must complete - * the switch-to-kernel-context, which requires a double - * pass through wait_for_idle: first queues the switch, - * second waits for the switch. - */ - if (ret == 0 && val & (DROP_IDLE | DROP_ACTIVE)) - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - if (ret == 0 && val & DROP_IDLE) - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - - if (val & DROP_RETIRE) - i915_retire_requests(i915); - - mutex_unlock(&i915->drm.struct_mutex); - - if (ret == 0 && val & DROP_IDLE) - ret = intel_gt_pm_wait_for_idle(&i915->gt); + if (val & DROP_IDLE) { + ret = intel_gt_pm_wait_for_idle(&i915->gt); + if (ret) + return ret; } if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt)) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 64890627d638..e4c553d9aa07 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -945,19 +945,16 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, if (!intel_gt_pm_is_awake(gt)) return 0; - GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", - flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", - timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); - - timeout = wait_for_timelines(gt, flags, timeout); - if (timeout < 0) - return timeout; + do { + timeout = wait_for_timelines(gt, flags, timeout); + if (timeout < 0) + return timeout; - if (flags & I915_WAIT_LOCKED) { - lockdep_assert_held(&i915->drm.struct_mutex); + cond_resched(); + if (signal_pending(current)) + return -EINTR; - i915_retire_requests(i915); - } + } while (i915_retire_requests(i915)); return 0; } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 91a885c36c6b..621fb33cda30 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -308,10 +308,9 @@ long i915_request_wait(struct i915_request *rq, long timeout) __attribute__((nonnull(1))); #define I915_WAIT_INTERRUPTIBLE BIT(0) -#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ -#define I915_WAIT_PRIORITY BIT(2) /* small priority bump for the request */ -#define I915_WAIT_ALL BIT(3) /* used by i915_gem_object_wait() */ -#define I915_WAIT_FOR_IDLE_BOOST BIT(4) +#define I915_WAIT_PRIORITY BIT(1) /* small priority bump for the request */ +#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ +#define I915_WAIT_FOR_IDLE_BOOST BIT(3) static inline bool i915_request_signaled(const struct i915_request *rq) { diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 2cc71bcf884f..268192b5613b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -162,10 +162,8 @@ static int live_active_wait(void *arg) __live_put(active); - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -183,10 +181,8 @@ static int live_active_retire(void *arg) return PTR_ERR(active); /* waits for & retires all requests */ - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); if (!READ_ONCE(active->retired)) { pr_err("i915_active not retired after flushing!\n"); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 75a4695b82bb..52d2df843148 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -523,7 +523,7 @@ static int igt_evict_contexts(void *arg) mutex_lock(&i915->ggtt.vm.mutex); out_locked: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; while (reserved) { struct reserved *next = reserved->next; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 02749bbfd0cf..e40e6cfa51f1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1705,12 +1705,8 @@ int i915_gem_gtt_mock_selftests(void) err = i915_subtests(tests, ggtt); - mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_drain_freed_objects(i915); - mock_fini_ggtt(ggtt); kfree(ggtt); out_put: @@ -2006,7 +2002,7 @@ static int igt_cs_tlb(void *arg) } } end: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; i915_gem_context_unlock_engines(ctx); i915_gem_object_unpin_map(out); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index eb175da48547..d7d68c6a6bd5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -41,21 +41,16 @@ static int igt_add_request(void *arg) { struct drm_i915_private *i915 = arg; struct i915_request *request; - int err = -ENOMEM; /* Basic preliminary test to create a request and let it loose! */ - mutex_lock(&i915->drm.struct_mutex); request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10); if (!request) - goto out_unlock; + return -ENOMEM; i915_request_add(request); - err = 0; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } static int igt_wait_request(void *arg) @@ -67,12 +62,10 @@ static int igt_wait_request(void *arg) /* Submit a request, then wait upon it */ - mutex_lock(&i915->drm.struct_mutex); request = mock_request(i915->engine[RCS0]->kernel_context, T); - if (!request) { - err = -ENOMEM; - goto out_unlock; - } + if (!request) + return -ENOMEM; + i915_request_get(request); if (i915_request_wait(request, 0, 0) != -ETIME) { @@ -125,9 +118,7 @@ static int igt_wait_request(void *arg) err = 0; out_request: i915_request_put(request); -out_unlock: mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -140,52 +131,45 @@ static int igt_fence_wait(void *arg) /* Submit a request, treat it as a fence and wait upon it */ - mutex_lock(&i915->drm.struct_mutex); request = mock_request(i915->engine[RCS0]->kernel_context, T); - if (!request) { - err = -ENOMEM; - goto out_locked; - } + if (!request) + return -ENOMEM; if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { pr_err("fence wait success before submit (expected timeout)!\n"); - goto out_locked; + goto out; } i915_request_add(request); - mutex_unlock(&i915->drm.struct_mutex); if (dma_fence_is_signaled(&request->fence)) { pr_err("fence signaled immediately!\n"); - goto out_device; + goto out; } if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { pr_err("fence wait success after submit (expected timeout)!\n"); - goto out_device; + goto out; } if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { pr_err("fence wait timed out (expected success)!\n"); - goto out_device; + goto out; } if (!dma_fence_is_signaled(&request->fence)) { pr_err("fence unsignaled after waiting!\n"); - goto out_device; + goto out; } if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { pr_err("fence wait timed out when complete (expected success)!\n"); - goto out_device; + goto out; } err = 0; -out_device: - mutex_lock(&i915->drm.struct_mutex); -out_locked: +out: mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -199,6 +183,8 @@ static int igt_request_rewind(void *arg) mutex_lock(&i915->drm.struct_mutex); ctx[0] = mock_context(i915, "A"); + mutex_unlock(&i915->drm.struct_mutex); + ce = i915_gem_context_get_engine(ctx[0], RCS0); GEM_BUG_ON(IS_ERR(ce)); request = mock_request(ce, 2 * HZ); @@ -211,7 +197,10 @@ static int igt_request_rewind(void *arg) i915_request_get(request); i915_request_add(request); + mutex_lock(&i915->drm.struct_mutex); ctx[1] = mock_context(i915, "B"); + mutex_unlock(&i915->drm.struct_mutex); + ce = i915_gem_context_get_engine(ctx[1], RCS0); GEM_BUG_ON(IS_ERR(ce)); vip = mock_request(ce, 0); @@ -233,7 +222,6 @@ static int igt_request_rewind(void *arg) request->engine->submit_request(request); rcu_read_unlock(); - mutex_unlock(&i915->drm.struct_mutex); if (i915_request_wait(vip, 0, HZ) == -ETIME) { pr_err("timed out waiting for high priority request\n"); @@ -248,14 +236,12 @@ static int igt_request_rewind(void *arg) err = 0; err: i915_request_put(vip); - mutex_lock(&i915->drm.struct_mutex); err_context_1: mock_context_close(ctx[1]); i915_request_put(request); err_context_0: mock_context_close(ctx[0]); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -282,7 +268,6 @@ __live_request_alloc(struct intel_context *ce) static int __igt_breadcrumbs_smoketest(void *arg) { struct smoketest *t = arg; - struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; const unsigned int total = 4 * t->ncontexts + 1; unsigned int num_waits = 0, num_fences = 0; @@ -337,14 +322,11 @@ static int __igt_breadcrumbs_smoketest(void *arg) struct i915_request *rq; struct intel_context *ce; - mutex_lock(BKL); - ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); rq = t->request_alloc(ce); intel_context_put(ce); if (IS_ERR(rq)) { - mutex_unlock(BKL); err = PTR_ERR(rq); count = n; break; @@ -357,8 +339,6 @@ static int __igt_breadcrumbs_smoketest(void *arg) requests[n] = i915_request_get(rq); i915_request_add(rq); - mutex_unlock(BKL); - if (err >= 0) err = i915_sw_fence_await_dma_fence(wait, &rq->fence, @@ -457,15 +437,15 @@ static int mock_breadcrumbs_smoketest(void *arg) goto out_threads; } - mutex_lock(&t.engine->i915->drm.struct_mutex); for (n = 0; n < t.ncontexts; n++) { + mutex_lock(&t.engine->i915->drm.struct_mutex); t.contexts[n] = mock_context(t.engine->i915, "mock"); + mutex_unlock(&t.engine->i915->drm.struct_mutex); if (!t.contexts[n]) { ret = -ENOMEM; goto out_contexts; } } - mutex_unlock(&t.engine->i915->drm.struct_mutex); for (n = 0; n < ncpus; n++) { threads[n] = kthread_run(__igt_breadcrumbs_smoketest, @@ -495,18 +475,15 @@ static int mock_breadcrumbs_smoketest(void *arg) atomic_long_read(&t.num_fences), ncpus); - mutex_lock(&t.engine->i915->drm.struct_mutex); out_contexts: for (n = 0; n < t.ncontexts; n++) { if (!t.contexts[n]) break; mock_context_close(t.contexts[n]); } - mutex_unlock(&t.engine->i915->drm.struct_mutex); kfree(t.contexts); out_threads: kfree(threads); - return ret; } @@ -539,7 +516,6 @@ static int live_nop_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; unsigned int id; int err = -ENODEV; @@ -549,28 +525,25 @@ static int live_nop_request(void *arg) * the overhead of submitting requests to the hardware. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - for_each_engine(engine, i915, id) { - struct i915_request *request = NULL; unsigned long n, prime; IGT_TIMEOUT(end_time); ktime_t times[2] = {}; err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + return err; for_each_prime_number_from(prime, 1, 8192) { + struct i915_request *request = NULL; + times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { + i915_request_put(request); request = i915_request_create(engine->kernel_context); - if (IS_ERR(request)) { - err = PTR_ERR(request); - goto out_unlock; - } + if (IS_ERR(request)) + return PTR_ERR(request); /* This space is left intentionally blank. * @@ -585,9 +558,11 @@ static int live_nop_request(void *arg) * for latency. */ + i915_request_get(request); i915_request_add(request); } i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(request); times[1] = ktime_sub(ktime_get_raw(), times[1]); if (prime == 1) @@ -599,7 +574,7 @@ static int live_nop_request(void *arg) err = igt_live_test_end(&t); if (err) - goto out_unlock; + return err; pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", engine->name, @@ -607,9 +582,6 @@ static int live_nop_request(void *arg) prime, div64_u64(ktime_to_ns(times[1]), prime)); } -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -679,6 +651,7 @@ empty_request(struct intel_engine_cs *engine, if (err) goto out_request; + i915_request_get(request); out_request: i915_request_add(request); return err ? ERR_PTR(err) : request; @@ -688,7 +661,6 @@ static int live_empty_request(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; struct i915_vma *batch; unsigned int id; @@ -699,14 +671,9 @@ static int live_empty_request(void *arg) * the overhead of submitting requests to the hardware. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - batch = empty_batch(i915); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_unlock; - } + if (IS_ERR(batch)) + return PTR_ERR(batch); for_each_engine(engine, i915, id) { IGT_TIMEOUT(end_time); @@ -730,6 +697,7 @@ static int live_empty_request(void *arg) times[1] = ktime_get_raw(); for (n = 0; n < prime; n++) { + i915_request_put(request); request = empty_request(engine, batch); if (IS_ERR(request)) { err = PTR_ERR(request); @@ -745,6 +713,7 @@ static int live_empty_request(void *arg) if (__igt_timeout(end_time, NULL)) break; } + i915_request_put(request); err = igt_live_test_end(&t); if (err) @@ -759,9 +728,6 @@ static int live_empty_request(void *arg) out_batch: i915_vma_unpin(batch); i915_vma_put(batch); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -841,7 +807,6 @@ static int live_all_engines(void *arg) struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; struct i915_request *request[I915_NUM_ENGINES]; - intel_wakeref_t wakeref; struct igt_live_test t; struct i915_vma *batch; unsigned int id; @@ -852,18 +817,15 @@ static int live_all_engines(void *arg) * block doing so, and that they don't complete too soon. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + return err; batch = recursive_batch(i915); if (IS_ERR(batch)) { err = PTR_ERR(batch); pr_err("%s: Unable to create batch, err=%d\n", __func__, err); - goto out_unlock; + return err; } for_each_engine(engine, i915, id) { @@ -933,9 +895,6 @@ static int live_all_engines(void *arg) i915_request_put(request[id]); i915_vma_unpin(batch); i915_vma_put(batch); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -945,7 +904,6 @@ static int live_sequential_engines(void *arg) struct i915_request *request[I915_NUM_ENGINES] = {}; struct i915_request *prev = NULL; struct intel_engine_cs *engine; - intel_wakeref_t wakeref; struct igt_live_test t; unsigned int id; int err; @@ -956,12 +914,9 @@ static int live_sequential_engines(void *arg) * they are running on independent engines. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + return err; for_each_engine(engine, i915, id) { struct i915_vma *batch; @@ -971,7 +926,7 @@ static int live_sequential_engines(void *arg) err = PTR_ERR(batch); pr_err("%s: Unable to create batch for %s, err=%d\n", __func__, engine->name, err); - goto out_unlock; + return err; } request[id] = i915_request_create(engine->kernel_context); @@ -1063,9 +1018,6 @@ static int live_sequential_engines(void *arg) i915_vma_put(request[id]->batch); i915_request_put(request[id]); } -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1080,16 +1032,12 @@ static int __live_parallel_engine1(void *arg) struct i915_request *rq; int err; - mutex_lock(&engine->i915->drm.struct_mutex); rq = i915_request_create(engine->kernel_context); - if (IS_ERR(rq)) { - mutex_unlock(&engine->i915->drm.struct_mutex); + if (IS_ERR(rq)) return PTR_ERR(rq); - } i915_request_get(rq); i915_request_add(rq); - mutex_unlock(&engine->i915->drm.struct_mutex); err = 0; if (i915_request_wait(rq, 0, HZ / 5) < 0) @@ -1115,16 +1063,11 @@ static int __live_parallel_engineN(void *arg) do { struct i915_request *rq; - mutex_lock(&engine->i915->drm.struct_mutex); rq = i915_request_create(engine->kernel_context); - if (IS_ERR(rq)) { - mutex_unlock(&engine->i915->drm.struct_mutex); + if (IS_ERR(rq)) return PTR_ERR(rq); - } i915_request_add(rq); - mutex_unlock(&engine->i915->drm.struct_mutex); - count++; } while (!__igt_timeout(end_time, NULL)); @@ -1154,9 +1097,7 @@ static int live_parallel_engines(void *arg) struct task_struct *tsk[I915_NUM_ENGINES] = {}; struct igt_live_test t; - mutex_lock(&i915->drm.struct_mutex); err = igt_live_test_begin(&t, i915, __func__, ""); - mutex_unlock(&i915->drm.struct_mutex); if (err) break; @@ -1184,10 +1125,8 @@ static int live_parallel_engines(void *arg) put_task_struct(tsk[id]); } - mutex_lock(&i915->drm.struct_mutex); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); } return err; @@ -1280,9 +1219,10 @@ static int live_breadcrumbs_smoketest(void *arg) goto out_threads; } - mutex_lock(&i915->drm.struct_mutex); for (n = 0; n < t[0].ncontexts; n++) { + mutex_lock(&i915->drm.struct_mutex); t[0].contexts[n] = live_context(i915, file); + mutex_unlock(&i915->drm.struct_mutex); if (!t[0].contexts[n]) { ret = -ENOMEM; goto out_contexts; @@ -1299,7 +1239,6 @@ static int live_breadcrumbs_smoketest(void *arg) t[id].max_batch = max_batches(t[0].contexts[0], engine); if (t[id].max_batch < 0) { ret = t[id].max_batch; - mutex_unlock(&i915->drm.struct_mutex); goto out_flush; } /* One ring interleaved between requests from all cpus */ @@ -1314,7 +1253,6 @@ static int live_breadcrumbs_smoketest(void *arg) &t[id], "igt/%d.%d", id, n); if (IS_ERR(tsk)) { ret = PTR_ERR(tsk); - mutex_unlock(&i915->drm.struct_mutex); goto out_flush; } @@ -1322,7 +1260,6 @@ static int live_breadcrumbs_smoketest(void *arg) threads[id * ncpus + n] = tsk; } } - mutex_unlock(&i915->drm.struct_mutex); msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); @@ -1350,10 +1287,8 @@ static int live_breadcrumbs_smoketest(void *arg) pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); - mutex_lock(&i915->drm.struct_mutex); ret = igt_live_test_end(&live) ?: ret; out_contexts: - mutex_unlock(&i915->drm.struct_mutex); kfree(t[0].contexts); out_threads: kfree(threads); diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index 438ea0eaa416..825a8286cbe8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -263,10 +263,8 @@ int __i915_live_teardown(int err, void *data) { struct drm_i915_private *i915 = data; - mutex_lock(&i915->drm.struct_mutex); - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); i915_gem_drain_freed_objects(i915); @@ -284,10 +282,8 @@ int __intel_gt_live_teardown(int err, void *data) { struct intel_gt *gt = data; - mutex_lock(>->i915->drm.struct_mutex); - if (igt_flush_test(gt->i915, I915_WAIT_LOCKED)) + if (igt_flush_test(gt->i915)) err = -EIO; - mutex_unlock(>->i915->drm.struct_mutex); i915_gem_drain_freed_objects(gt->i915); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 0e4f66312b39..1c9db08f7c28 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -833,12 +833,8 @@ int i915_vma_mock_selftests(void) err = i915_subtests(tests, ggtt); - mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); - i915_gem_drain_freed_objects(i915); - mock_fini_ggtt(ggtt); kfree(ggtt); out_put: diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index d3b5eb402d33..2a5fbe46ea9f 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -12,31 +12,25 @@ #include "igt_flush_test.h" -int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) +int igt_flush_test(struct drm_i915_private *i915) { int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0; - int repeat = !!(flags & I915_WAIT_LOCKED); cond_resched(); - do { - if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) { - pr_err("%pS timed out, cancelling all further testing.\n", - __builtin_return_address(0)); + i915_retire_requests(i915); + if (i915_gem_wait_for_idle(i915, 0, HZ / 5) == -ETIME) { + pr_err("%pS timed out, cancelling all further testing.\n", + __builtin_return_address(0)); - GEM_TRACE("%pS timed out.\n", - __builtin_return_address(0)); - GEM_TRACE_DUMP(); + GEM_TRACE("%pS timed out.\n", + __builtin_return_address(0)); + GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); - repeat = 0; - ret = -EIO; - } - - /* Ensure we also flush after wedging. */ - if (flags & I915_WAIT_LOCKED) - i915_retire_requests(i915); - } while (repeat--); + intel_gt_set_wedged(&i915->gt); + ret = -EIO; + } + i915_retire_requests(i915); return ret; } diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h index 63e009927c43..7541fa74e641 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.h +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h @@ -9,6 +9,6 @@ struct drm_i915_private; -int igt_flush_test(struct drm_i915_private *i915, unsigned int flags); +int igt_flush_test(struct drm_i915_private *i915); #endif /* IGT_FLUSH_TEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 3e902761cd16..04a6f88fdf64 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -19,15 +19,12 @@ int igt_live_test_begin(struct igt_live_test *t, enum intel_engine_id id; int err; - lockdep_assert_held(&i915->drm.struct_mutex); - t->i915 = i915; t->func = func; t->name = name; err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, + I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); if (err) { pr_err("%s(%s): failed to idle before, with err=%d!", @@ -50,9 +47,7 @@ int igt_live_test_end(struct igt_live_test *t) struct intel_engine_cs *engine; enum intel_engine_id id; - lockdep_assert_held(&i915->drm.struct_mutex); - - if (igt_flush_test(i915, I915_WAIT_LOCKED)) + if (igt_flush_test(i915)) return -EIO; if (t->reset_global != i915_reset_count(&i915->gpu_error)) { diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 2448067822af..622bb2127453 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -41,8 +41,6 @@ void mock_device_flush(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; - lockdep_assert_held(&i915->drm.struct_mutex); - do { for_each_engine(engine, i915, id) mock_engine_flush(engine); @@ -55,9 +53,7 @@ static void mock_device_release(struct drm_device *dev) struct intel_engine_cs *engine; enum intel_engine_id id; - mutex_lock(&i915->drm.struct_mutex); mock_device_flush(i915); - mutex_unlock(&i915->drm.struct_mutex); flush_work(&i915->gem.idle_work); i915_gem_drain_workqueue(i915); From patchwork Fri Oct 4 13:40:03 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174541 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A83C414DB for ; Fri, 4 Oct 2019 13:40:52 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 90A19222C4 for ; Fri, 4 Oct 2019 13:40:52 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 90A19222C4 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 833EE6EB90; Fri, 4 Oct 2019 13:40:45 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 210CC6EB84 for ; Fri, 4 Oct 2019 13:40:38 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723958-1500050 for multiple; Fri, 04 Oct 2019 14:40:18 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:03 +0100 Message-Id: <20191004134015.13204-9-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 08/20] drm/i915: Remove the GEM idle worker X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Nothing inside the idle worker now requires struct_mutex, so we can remove the indirection of using our own worker. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 28 ++----------------- .../drm/i915/gem/selftests/i915_gem_mman.c | 3 -- drivers/gpu/drm/i915/i915_debugfs.c | 5 ---- drivers/gpu/drm/i915/i915_drv.h | 9 ------ .../gpu/drm/i915/selftests/mock_gem_device.c | 6 ---- 5 files changed, 2 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 2ddc3aeaac9d..26f325bbfe4d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -13,36 +13,13 @@ static void i915_gem_park(struct drm_i915_private *i915) { - lockdep_assert_held(&i915->drm.struct_mutex); + cancel_delayed_work(&i915->gem.retire_work); i915_vma_parked(i915); i915_globals_park(); } -static void idle_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.idle_work); - bool park; - - cancel_delayed_work_sync(&i915->gem.retire_work); - mutex_lock(&i915->drm.struct_mutex); - - intel_wakeref_lock(&i915->gt.wakeref); - park = (!intel_wakeref_is_active(&i915->gt.wakeref) && - !work_pending(work)); - intel_wakeref_unlock(&i915->gt.wakeref); - if (park) - i915_gem_park(i915); - else - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); - - mutex_unlock(&i915->drm.struct_mutex); -} - static void retire_work_handler(struct work_struct *work) { struct drm_i915_private *i915 = @@ -71,7 +48,7 @@ static int pm_notifier(struct notifier_block *nb, break; case INTEL_GT_PARK: - queue_work(i915->wq, &i915->gem.idle_work); + i915_gem_park(i915); break; } @@ -264,7 +241,6 @@ void i915_gem_resume(struct drm_i915_private *i915) void i915_gem_init__pm(struct drm_i915_private *i915) { - INIT_WORK(&i915->gem.idle_work, idle_work_handler); INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); i915->gem.pm_notifier.notifier_call = pm_notifier; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 856b8e467ee8..4ba6ed5c8313 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -572,11 +572,8 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_driver_unregister__shrinker(i915); - intel_gt_pm_get(&i915->gt); - cancel_delayed_work_sync(&i915->gem.retire_work); - flush_work(&i915->gem.idle_work); } static void restore_retire_worker(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 385289895107..7c4bba21adcd 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3662,11 +3662,6 @@ i915_drop_caches_set(void *data, u64 val) i915_gem_shrink_all(i915); fs_reclaim_release(GFP_KERNEL); - if (val & DROP_IDLE) { - flush_delayed_work(&i915->gem.retire_work); - flush_work(&i915->gem.idle_work); - } - if (val & DROP_FREED) i915_gem_drain_freed_objects(i915); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 337d8306416a..ad31852e4309 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1719,15 +1719,6 @@ struct drm_i915_private { * fires, go retire requests. */ struct delayed_work retire_work; - - /** - * When we detect an idle GPU, we want to turn on - * powersaving features. So once we see that there - * are no more requests outstanding and no more - * arrive within a small period of time, we fire - * off the idle_work. - */ - struct work_struct idle_work; } gem; /* For i945gm vblank irq vs. C3 workaround */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 622bb2127453..a8be5da2b3cf 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -55,7 +55,6 @@ static void mock_device_release(struct drm_device *dev) mock_device_flush(i915); - flush_work(&i915->gem.idle_work); i915_gem_drain_workqueue(i915); mutex_lock(&i915->drm.struct_mutex); @@ -103,10 +102,6 @@ static void mock_retire_work_handler(struct work_struct *work) { } -static void mock_idle_work_handler(struct work_struct *work) -{ -} - static int pm_domain_resume(struct device *dev) { return pm_generic_runtime_resume(dev); @@ -187,7 +182,6 @@ struct drm_i915_private *mock_gem_device(void) mock_init_contexts(i915); INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler); - INIT_WORK(&i915->gem.idle_work, mock_idle_work_handler); intel_timelines_init(i915); From patchwork Fri Oct 4 13:40:04 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174533 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C733E16B1 for ; Fri, 4 Oct 2019 13:40:48 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id AF9BD20700 for ; Fri, 4 Oct 2019 13:40:48 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org AF9BD20700 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 1FD126EB80; Fri, 4 Oct 2019 13:40:42 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id B6B8D6EB84 for ; Fri, 4 Oct 2019 13:40:40 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723959-1500050 for multiple; Fri, 04 Oct 2019 14:40:19 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:04 +0100 Message-Id: <20191004134015.13204-10-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 09/20] drm/i915: Merge wait_for_timelines with retire_request X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" wait_for_timelines is essentially the same loop as retiring requests (with an extra timeout), so merge the two into one routine. v2: i915_retire_requests_timeout and keep VT'd w/a as !interruptible Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 4 +- .../drm/i915/gem/selftests/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 3 +- drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem.c | 67 ++----------------- drivers/gpu/drm/i915/i915_gem_evict.c | 12 ++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +- drivers/gpu/drm/i915/i915_request.c | 26 ++++++- drivers/gpu/drm/i915/i915_request.h | 7 +- .../gpu/drm/i915/selftests/igt_flush_test.c | 4 +- .../gpu/drm/i915/selftests/igt_live_test.c | 4 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 2 +- 14 files changed, 50 insertions(+), 96 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index c19431d609fc..418d0d2b5fa9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -432,9 +432,7 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj) /* Attempt to reap some mmap space from dead objects */ do { - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); + err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); if (err) break; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 26f325bbfe4d..90b211257f2d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -59,9 +59,7 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt) { bool result = !intel_gt_is_wedged(gt); - if (i915_gem_wait_for_idle(gt->i915, - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { + if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* XXX hide warning from gem_eio */ if (i915_modparams.reset) { dev_err(gt->i915->drm.dev, diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index f5402aad9b5a..f902aeee1755 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1137,7 +1137,7 @@ __sseu_finish(const char *name, if ((flags & TEST_IDLE) && ret == 0) { ret = i915_gem_wait_for_idle(ce->engine->i915, - 0, MAX_SCHEDULE_TIMEOUT); + MAX_SCHEDULE_TIMEOUT); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index d4cefdd38431..bdb34f03ec47 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -196,8 +196,7 @@ int intel_gt_resume(struct intel_gt *gt) static void wait_for_idle(struct intel_gt *gt) { - if (i915_gem_wait_for_idle(gt->i915, 0, - I915_GEM_IDLE_TIMEOUT) == -ETIME) { + if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* * Forcibly cancel outstanding work and leave * the gpu quiet. diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7c4bba21adcd..5888a658e2b7 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3635,9 +3635,7 @@ i915_drop_caches_set(void *data, u64 val) i915_retire_requests(i915); if (val & (DROP_IDLE | DROP_ACTIVE)) { - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); + ret = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ad31852e4309..44f3463ff9f1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2321,8 +2321,7 @@ void i915_gem_driver_register(struct drm_i915_private *i915); void i915_gem_driver_unregister(struct drm_i915_private *i915); void i915_gem_driver_remove(struct drm_i915_private *dev_priv); void i915_gem_driver_release(struct drm_i915_private *dev_priv); -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, - unsigned int flags, long timeout); +int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, long timeout); void i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_suspend_late(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e4c553d9aa07..7c82fc39f655 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -883,61 +883,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) } } -static long -wait_for_timelines(struct intel_gt *gt, unsigned int wait, long timeout) -{ - struct intel_gt_timelines *timelines = >->timelines; - struct intel_timeline *tl; - unsigned long flags; - - spin_lock_irqsave(&timelines->lock, flags); - list_for_each_entry(tl, &timelines->active_list, link) { - struct dma_fence *fence; - - fence = i915_active_fence_get(&tl->last_request); - if (!fence) - continue; - - spin_unlock_irqrestore(&timelines->lock, flags); - - if (!dma_fence_is_i915(fence)) { - timeout = dma_fence_wait_timeout(fence, - flags & I915_WAIT_INTERRUPTIBLE, - timeout); - } else { - struct i915_request *rq = to_request(fence); - - /* - * "Race-to-idle". - * - * Switching to the kernel context is often used as - * a synchronous step prior to idling, e.g. in suspend - * for flushing all current operations to memory before - * sleeping. These we want to complete as quickly as - * possible to avoid prolonged stalls, so allow the gpu - * to boost to maximum clocks. - */ - if (flags & I915_WAIT_FOR_IDLE_BOOST) - gen6_rps_boost(rq); - - timeout = i915_request_wait(rq, flags, timeout); - } - - dma_fence_put(fence); - if (timeout < 0) - return timeout; - - /* restart after reacquiring the lock */ - spin_lock_irqsave(&timelines->lock, flags); - tl = list_entry(&timelines->active_list, typeof(*tl), link); - } - spin_unlock_irqrestore(&timelines->lock, flags); - - return timeout; -} - -int i915_gem_wait_for_idle(struct drm_i915_private *i915, - unsigned int flags, long timeout) +int i915_gem_wait_for_idle(struct drm_i915_private *i915, long timeout) { struct intel_gt *gt = &i915->gt; @@ -945,18 +891,13 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, if (!intel_gt_pm_is_awake(gt)) return 0; - do { - timeout = wait_for_timelines(gt, flags, timeout); - if (timeout < 0) - return timeout; - + while ((timeout = i915_retire_requests_timeout(i915, timeout)) > 0) { cond_resched(); if (signal_pending(current)) return -EINTR; + } - } while (i915_retire_requests(i915)); - - return 0; + return timeout; } struct i915_vma * diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 0552bf93eea3..0a412f6d01d7 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -46,9 +46,7 @@ static int ggtt_flush(struct drm_i915_private *i915) * the hopes that we can then remove contexts and the like only * bound by their active reference. */ - return i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); + return i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); } static bool @@ -126,6 +124,8 @@ i915_gem_evict_something(struct i915_address_space *vm, min_size, alignment, color, start, end, mode); + i915_retire_requests(vm->i915); + search_again: active = NULL; INIT_LIST_HEAD(&eviction_list); @@ -264,13 +264,13 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, trace_i915_gem_evict_node(vm, target, flags); - /* Retire before we search the active list. Although we have + /* + * Retire before we search the active list. Although we have * reasonable accuracy in our retirement lists, we may have * a stray pin (preventing eviction) that can only be resolved by * retiring. */ - if (!(flags & PIN_NONBLOCK)) - i915_retire_requests(vm->i915); + i915_retire_requests(vm->i915); if (i915_vm_has_cache_coloring(vm)) { /* Expand search to cover neighbouring guard pages (or lack!) */ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7462d87f7a48..082fcf9085a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2528,7 +2528,9 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &dev_priv->ggtt; if (unlikely(ggtt->do_idle_maps)) { - if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) { + /* XXX This does not prevent more requests being submitted! */ + if (i915_retire_requests_timeout(dev_priv, + -MAX_SCHEDULE_TIMEOUT)) { DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); /* Wait a bit, in hopes it avoids the hang */ udelay(10); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 4ffe62a42186..52f7c4e5b644 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1508,13 +1508,19 @@ long i915_request_wait(struct i915_request *rq, return timeout; } -bool i915_retire_requests(struct drm_i915_private *i915) +long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout) { struct intel_gt_timelines *timelines = &i915->gt.timelines; struct intel_timeline *tl, *tn; + unsigned long active_count = 0; unsigned long flags; + bool interruptible; LIST_HEAD(free); + interruptible = true; + if (timeout < 0) + timeout = -timeout, interruptible = false; + spin_lock_irqsave(&timelines->lock, flags); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { if (!mutex_trylock(&tl->mutex)) @@ -1525,13 +1531,27 @@ bool i915_retire_requests(struct drm_i915_private *i915) tl->active_count++; /* pin the list element */ spin_unlock_irqrestore(&timelines->lock, flags); + if (timeout > 0) { + struct dma_fence *fence; + + fence = i915_active_fence_get(&tl->last_request); + if (fence) { + timeout = dma_fence_wait_timeout(fence, + interruptible, + timeout); + dma_fence_put(fence); + } + } + retire_requests(tl); spin_lock_irqsave(&timelines->lock, flags); /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); - if (!--tl->active_count) + if (--tl->active_count) + active_count += !!rcu_access_pointer(tl->last_request.fence); + else list_del(&tl->link); mutex_unlock(&tl->mutex); @@ -1547,7 +1567,7 @@ bool i915_retire_requests(struct drm_i915_private *i915) list_for_each_entry_safe(tl, tn, &free, link) __intel_timeline_free(&tl->kref); - return !list_empty(&timelines->active_list); + return active_count ? timeout : 0; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 621fb33cda30..256b0715180f 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -310,7 +310,6 @@ long i915_request_wait(struct i915_request *rq, #define I915_WAIT_INTERRUPTIBLE BIT(0) #define I915_WAIT_PRIORITY BIT(1) /* small priority bump for the request */ #define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ -#define I915_WAIT_FOR_IDLE_BOOST BIT(3) static inline bool i915_request_signaled(const struct i915_request *rq) { @@ -460,6 +459,10 @@ i915_request_active_timeline(struct i915_request *rq) lockdep_is_held(&rq->engine->active.lock)); } -bool i915_retire_requests(struct drm_i915_private *i915); +long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout); +static inline void i915_retire_requests(struct drm_i915_private *i915) +{ + i915_retire_requests_timeout(i915, 0); +} #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index 2a5fbe46ea9f..ed496bd6d84f 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -18,8 +18,7 @@ int igt_flush_test(struct drm_i915_private *i915) cond_resched(); - i915_retire_requests(i915); - if (i915_gem_wait_for_idle(i915, 0, HZ / 5) == -ETIME) { + if (i915_gem_wait_for_idle(i915, HZ / 5) == -ETIME) { pr_err("%pS timed out, cancelling all further testing.\n", __builtin_return_address(0)); @@ -30,7 +29,6 @@ int igt_flush_test(struct drm_i915_private *i915) intel_gt_set_wedged(&i915->gt); ret = -EIO; } - i915_retire_requests(i915); return ret; } diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index 04a6f88fdf64..eae90f97df6c 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -23,9 +23,7 @@ int igt_live_test_begin(struct igt_live_test *t, t->func = func; t->name = name; - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); + err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); if (err) { pr_err("%s(%s): failed to idle before, with err=%d!", func, name, err); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index a8be5da2b3cf..3b589bbb2c2d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -44,7 +44,7 @@ void mock_device_flush(struct drm_i915_private *i915) do { for_each_engine(engine, i915, id) mock_engine_flush(engine); - } while (i915_retire_requests(i915)); + } while (i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT)); } static void mock_device_release(struct drm_device *dev) From patchwork Fri Oct 4 13:40:05 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174525 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 815B614DB for ; Fri, 4 Oct 2019 13:40:41 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 698B820700 for ; Fri, 4 Oct 2019 13:40:41 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 698B820700 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 819BC6EB83; Fri, 4 Oct 2019 13:40:40 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 05CDE6EB80 for ; Fri, 4 Oct 2019 13:40:37 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723960-1500050 for multiple; Fri, 04 Oct 2019 14:40:19 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:05 +0100 Message-Id: <20191004134015.13204-11-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 10/20] drm/i915/gem: Retire directly for mmap-offset shrinking X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Matthew Auld Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Now that we can retire without taking struct_mutex, we can do so to handle shrinking the mmap-offset space after an allocation failure. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Matthew Auld Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 418d0d2b5fa9..45bbd22c14f1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -431,19 +431,12 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj) return 0; /* Attempt to reap some mmap space from dead objects */ - do { - err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); - if (err) - break; + err = i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT); + if (err) + return err; - i915_gem_drain_freed_objects(i915); - err = drm_gem_create_mmap_offset(&obj->base); - if (!err) - break; - - } while (flush_delayed_work(&i915->gem.retire_work)); - - return err; + i915_gem_drain_freed_objects(i915); + return drm_gem_create_mmap_offset(&obj->base); } int From patchwork Fri Oct 4 13:40:06 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174545 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8F0DC16B1 for ; Fri, 4 Oct 2019 13:40:54 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 777AA20867 for ; Fri, 4 Oct 2019 13:40:54 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 777AA20867 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 1F13D6EB88; Fri, 4 Oct 2019 13:40:49 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id B5D436EB88 for ; Fri, 4 Oct 2019 13:40:39 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723961-1500050 for multiple; Fri, 04 Oct 2019 14:40:19 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:06 +0100 Message-Id: <20191004134015.13204-12-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 11/20] drm/i915: Move request runtime management onto gt X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Requests are run from the gt and are tided into the gt runtime power management, so pull the runtime request management under gt/ Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gem/i915_gem_mman.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 28 +--- .../drm/i915/gem/selftests/i915_gem_context.c | 5 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt.c | 2 + drivers/gpu/drm/i915/gt/intel_gt_pm.c | 5 +- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 123 ++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_requests.h | 24 ++++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 11 ++ drivers/gpu/drm/i915/gt/selftest_timeline.c | 8 +- drivers/gpu/drm/i915/i915_debugfs.c | 17 +-- drivers/gpu/drm/i915/i915_drv.h | 10 -- drivers/gpu/drm/i915/i915_gem.c | 17 --- drivers/gpu/drm/i915/i915_gem_evict.c | 14 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +- drivers/gpu/drm/i915/i915_request.c | 64 +-------- drivers/gpu/drm/i915/i915_request.h | 7 +- .../gpu/drm/i915/selftests/igt_flush_test.c | 9 +- .../gpu/drm/i915/selftests/igt_live_test.c | 5 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 10 +- 21 files changed, 213 insertions(+), 158 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_requests.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_requests.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index d2b53b5add81..06e1876d0250 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -83,6 +83,7 @@ gt-y += \ gt/intel_gt_irq.o \ gt/intel_gt_pm.o \ gt/intel_gt_pm_irq.o \ + gt/intel_gt_requests.o \ gt/intel_hangcheck.o \ gt/intel_lrc.o \ gt/intel_rc6.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 45bbd22c14f1..fd4122d8c0a9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -8,6 +8,7 @@ #include #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_gem_gtt.h" @@ -424,6 +425,7 @@ void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) static int create_mmap_offset(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_gt *gt = &i915->gt; int err; err = drm_gem_create_mmap_offset(&obj->base); @@ -431,7 +433,7 @@ static int create_mmap_offset(struct drm_i915_gem_object *obj) return 0; /* Attempt to reap some mmap space from dead objects */ - err = i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT); + err = intel_gt_retire_requests_timeout(gt, MAX_SCHEDULE_TIMEOUT); if (err) return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 90b211257f2d..9194d8464bf7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -7,31 +7,18 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_globals.h" static void i915_gem_park(struct drm_i915_private *i915) { - cancel_delayed_work(&i915->gem.retire_work); - i915_vma_parked(i915); i915_globals_park(); } -static void retire_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gem.retire_work.work); - - i915_retire_requests(i915); - - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); -} - static int pm_notifier(struct notifier_block *nb, unsigned long action, void *data) @@ -42,9 +29,6 @@ static int pm_notifier(struct notifier_block *nb, switch (action) { case INTEL_GT_UNPARK: i915_globals_unpark(); - queue_delayed_work(i915->wq, - &i915->gem.retire_work, - round_jiffies_up_relative(HZ)); break; case INTEL_GT_PARK: @@ -59,7 +43,7 @@ static bool switch_to_kernel_context_sync(struct intel_gt *gt) { bool result = !intel_gt_is_wedged(gt); - if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* XXX hide warning from gem_eio */ if (i915_modparams.reset) { dev_err(gt->i915->drm.dev, @@ -122,14 +106,12 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - switch_to_kernel_context_sync(&i915->gt); + intel_gt_suspend(&i915->gt); + intel_uc_suspend(&i915->gt.uc); cancel_delayed_work_sync(&i915->gt.hangcheck.work); i915_gem_drain_freed_objects(i915); - - intel_uc_suspend(&i915->gt.uc); - intel_gt_suspend(&i915->gt); } static struct drm_i915_gem_object *first_mm_object(struct list_head *list) @@ -239,8 +221,6 @@ void i915_gem_resume(struct drm_i915_private *i915) void i915_gem_init__pm(struct drm_i915_private *i915) { - INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); - i915->gem.pm_notifier.notifier_call = pm_notifier; blocking_notifier_chain_register(&i915->gt.pm_notifications, &i915->gem.pm_notifier); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index f902aeee1755..2288757808ae 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -8,6 +8,7 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -518,7 +519,7 @@ create_test_object(struct i915_address_space *vm, int err; /* Keep in GEM's good graces */ - i915_retire_requests(vm->i915); + intel_gt_retire_requests(vm->gt); size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); size = round_down(size, DW_PER_PAGE * PAGE_SIZE); @@ -1136,7 +1137,7 @@ __sseu_finish(const char *name, igt_spinner_end(spin); if ((flags & TEST_IDLE) && ret == 0) { - ret = i915_gem_wait_for_idle(ce->engine->i915, + ret = intel_gt_wait_for_idle(ce->engine->gt, MAX_SCHEDULE_TIMEOUT); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 4ba6ed5c8313..1cd25cfd0246 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -573,7 +573,7 @@ static void disable_retire_worker(struct drm_i915_private *i915) { i915_gem_driver_unregister__shrinker(i915); intel_gt_pm_get(&i915->gt); - cancel_delayed_work_sync(&i915->gem.retire_work); + cancel_delayed_work_sync(&i915->gt.requests.retire_work); } static void restore_retire_worker(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 7205595369be..53220741e49e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_mocs.h" #include "intel_rc6.h" #include "intel_uncore.h" @@ -23,6 +24,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) intel_gt_init_hangcheck(gt); intel_gt_init_reset(gt); + intel_gt_init_requests(gt); intel_gt_pm_init_early(gt); intel_uc_init_early(>->uc); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index bdb34f03ec47..d2e80ba64d69 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -10,6 +10,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_pm.h" #include "intel_rc6.h" #include "intel_wakeref.h" @@ -49,6 +50,7 @@ static int __gt_unpark(struct intel_wakeref *wf) i915_pmu_gt_unparked(i915); intel_gt_queue_hangcheck(gt); + intel_gt_unpark_requests(gt); pm_notify(gt, INTEL_GT_UNPARK); @@ -64,6 +66,7 @@ static int __gt_park(struct intel_wakeref *wf) GEM_TRACE("\n"); pm_notify(gt, INTEL_GT_PARK); + intel_gt_park_requests(gt); i915_pmu_gt_parked(i915); if (INTEL_GEN(i915) >= 6) @@ -196,7 +199,7 @@ int intel_gt_resume(struct intel_gt *gt) static void wait_for_idle(struct intel_gt *gt) { - if (i915_gem_wait_for_idle(gt->i915, I915_GEM_IDLE_TIMEOUT) == -ETIME) { + if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) { /* * Forcibly cancel outstanding work and leave * the gpu quiet. diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c new file mode 100644 index 000000000000..8aed89fd2cdc --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -0,0 +1,123 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_request.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_gt_requests.h" +#include "intel_timeline.h" + +static void retire_requests(struct intel_timeline *tl) +{ + struct i915_request *rq, *rn; + + list_for_each_entry_safe(rq, rn, &tl->requests, link) + if (!i915_request_retire(rq)) + break; +} + +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) +{ + struct intel_gt_timelines *timelines = >->timelines; + struct intel_timeline *tl, *tn; + unsigned long active_count = 0; + unsigned long flags; + bool interruptible; + LIST_HEAD(free); + + interruptible = true; + if (unlikely(timeout < 0)) + timeout = -timeout, interruptible = false; + + spin_lock_irqsave(&timelines->lock, flags); + list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { + if (!mutex_trylock(&tl->mutex)) + continue; + + intel_timeline_get(tl); + GEM_BUG_ON(!tl->active_count); + tl->active_count++; /* pin the list element */ + spin_unlock_irqrestore(&timelines->lock, flags); + + if (timeout > 0) { + struct dma_fence *fence; + + fence = i915_active_fence_get(&tl->last_request); + if (fence) { + timeout = dma_fence_wait_timeout(fence, + true, + timeout); + dma_fence_put(fence); + } + } + + retire_requests(tl); + + spin_lock_irqsave(&timelines->lock, flags); + + /* Resume iteration after dropping lock */ + list_safe_reset_next(tl, tn, link); + if (--tl->active_count) + active_count += !!rcu_access_pointer(tl->last_request.fence); + else + list_del(&tl->link); + + mutex_unlock(&tl->mutex); + + /* Defer the final release to after the spinlock */ + if (refcount_dec_and_test(&tl->kref.refcount)) { + GEM_BUG_ON(tl->active_count); + list_add(&tl->link, &free); + } + } + spin_unlock_irqrestore(&timelines->lock, flags); + + list_for_each_entry_safe(tl, tn, &free, link) + __intel_timeline_free(&tl->kref); + + return active_count ? timeout : 0; +} + +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) +{ + /* If the device is asleep, we have no requests outstanding */ + if (!intel_gt_pm_is_awake(gt)) + return 0; + + while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + } + + return timeout; +} + +static void retire_work_handler(struct work_struct *work) +{ + struct intel_gt *gt = + container_of(work, typeof(*gt), requests.retire_work.work); + + intel_gt_retire_requests(gt); + schedule_delayed_work(>->requests.retire_work, + round_jiffies_up_relative(HZ)); +} + +void intel_gt_init_requests(struct intel_gt *gt) +{ + INIT_DELAYED_WORK(>->requests.retire_work, retire_work_handler); +} + +void intel_gt_park_requests(struct intel_gt *gt) +{ + cancel_delayed_work(>->requests.retire_work); +} + +void intel_gt_unpark_requests(struct intel_gt *gt) +{ + schedule_delayed_work(>->requests.retire_work, + round_jiffies_up_relative(HZ)); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h new file mode 100644 index 000000000000..bd31cbce47e0 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_REQUESTS_H +#define INTEL_GT_REQUESTS_H + +struct intel_gt; + +long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); +static inline void intel_gt_retire_requests(struct intel_gt *gt) +{ + intel_gt_retire_requests_timeout(gt, 0); +} + +int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); + +void intel_gt_init_requests(struct intel_gt *gt); +void intel_gt_park_requests(struct intel_gt *gt); +void intel_gt_unpark_requests(struct intel_gt *gt); + +#endif /* INTEL_GT_REQUESTS_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 7134f1319bbe..802f516a3430 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -50,6 +50,17 @@ struct intel_gt { struct list_head hwsp_free_list; } timelines; + struct intel_gt_requests { + /** + * We leave the user IRQ off as much as possible, + * but this means that requests will finish and never + * be retired once the system goes idle. Set a timer to + * fire periodically while the ring is running. When it + * fires, go retire requests. + */ + struct delayed_work retire_work; + } requests; + struct intel_wakeref wakeref; atomic_t user_wakeref; diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 16abfabf08c7..d6df40cdc8a6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -8,6 +8,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" +#include "intel_gt_requests.h" #include "../selftests/i915_random.h" #include "../i915_selftest.h" @@ -641,6 +642,7 @@ static int live_hwsp_alternate(void *arg) static int live_hwsp_wrap(void *arg) { struct drm_i915_private *i915 = arg; + struct intel_gt *gt = &i915->gt; struct intel_engine_cs *engine; struct intel_timeline *tl; enum intel_engine_id id; @@ -651,7 +653,7 @@ static int live_hwsp_wrap(void *arg) * foreign GPU references. */ - tl = intel_timeline_create(&i915->gt, NULL); + tl = intel_timeline_create(gt, NULL); if (IS_ERR(tl)) return PTR_ERR(tl); @@ -662,7 +664,7 @@ static int live_hwsp_wrap(void *arg) if (err) goto out_free; - for_each_engine(engine, i915, id) { + for_each_engine(engine, gt->i915, id) { const u32 *hwsp_seqno[2]; struct i915_request *rq; u32 seqno[2]; @@ -734,7 +736,7 @@ static int live_hwsp_wrap(void *arg) goto out; } - i915_retire_requests(i915); /* recycle HWSP */ + intel_gt_retire_requests(gt); /* recycle HWSP */ } out: diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5888a658e2b7..2afc41e43b6e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -41,6 +41,7 @@ #include "gem/i915_gem_context.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_gt_requests.h" #include "gt/intel_reset.h" #include "gt/intel_rc6.h" #include "gt/uc/intel_guc_submission.h" @@ -3621,33 +3622,33 @@ static int i915_drop_caches_set(void *data, u64 val) { struct drm_i915_private *i915 = data; + struct intel_gt *gt = &i915->gt; int ret; DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n", val, val & DROP_ALL); if (val & DROP_RESET_ACTIVE && - wait_for(intel_engines_are_idle(&i915->gt), - I915_IDLE_ENGINES_TIMEOUT)) - intel_gt_set_wedged(&i915->gt); + wait_for(intel_engines_are_idle(gt), I915_IDLE_ENGINES_TIMEOUT)) + intel_gt_set_wedged(gt); if (val & DROP_RETIRE) - i915_retire_requests(i915); + intel_gt_retire_requests(gt); if (val & (DROP_IDLE | DROP_ACTIVE)) { - ret = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); + ret = intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); if (ret) return ret; } if (val & DROP_IDLE) { - ret = intel_gt_pm_wait_for_idle(&i915->gt); + ret = intel_gt_pm_wait_for_idle(gt); if (ret) return ret; } - if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(&i915->gt)) - intel_gt_handle_error(&i915->gt, ALL_ENGINES, 0, NULL); + if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt)) + intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL); fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 44f3463ff9f1..cb63b2bd0ce8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1710,15 +1710,6 @@ struct drm_i915_private { struct { struct notifier_block pm_notifier; - - /** - * We leave the user IRQ off as much as possible, - * but this means that requests will finish and never - * be retired once the system goes idle. Set a timer to - * fire periodically while the ring is running. When it - * fires, go retire requests. - */ - struct delayed_work retire_work; } gem; /* For i945gm vblank irq vs. C3 workaround */ @@ -2321,7 +2312,6 @@ void i915_gem_driver_register(struct drm_i915_private *i915); void i915_gem_driver_unregister(struct drm_i915_private *i915); void i915_gem_driver_remove(struct drm_i915_private *dev_priv); void i915_gem_driver_release(struct drm_i915_private *dev_priv); -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, long timeout); void i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_suspend_late(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7c82fc39f655..5a664bdead8c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -883,23 +883,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) } } -int i915_gem_wait_for_idle(struct drm_i915_private *i915, long timeout) -{ - struct intel_gt *gt = &i915->gt; - - /* If the device is asleep, we have no requests outstanding */ - if (!intel_gt_pm_is_awake(gt)) - return 0; - - while ((timeout = i915_retire_requests_timeout(i915, timeout)) > 0) { - cond_resched(); - if (signal_pending(current)) - return -EINTR; - } - - return timeout; -} - struct i915_vma * i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 0a412f6d01d7..7e62c310290f 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -29,6 +29,7 @@ #include #include "gem/i915_gem_context.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_trace.h" @@ -37,7 +38,7 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl { bool fail_if_busy:1; } igt_evict_ctl;) -static int ggtt_flush(struct drm_i915_private *i915) +static int ggtt_flush(struct intel_gt *gt) { /* * Not everything in the GGTT is tracked via vma (otherwise we @@ -46,7 +47,7 @@ static int ggtt_flush(struct drm_i915_private *i915) * the hopes that we can then remove contexts and the like only * bound by their active reference. */ - return i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); + return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT); } static bool @@ -92,7 +93,6 @@ i915_gem_evict_something(struct i915_address_space *vm, u64 start, u64 end, unsigned flags) { - struct drm_i915_private *dev_priv = vm->i915; struct drm_mm_scan scan; struct list_head eviction_list; struct i915_vma *vma, *next; @@ -124,7 +124,7 @@ i915_gem_evict_something(struct i915_address_space *vm, min_size, alignment, color, start, end, mode); - i915_retire_requests(vm->i915); + intel_gt_retire_requests(vm->gt); search_again: active = NULL; @@ -197,7 +197,7 @@ i915_gem_evict_something(struct i915_address_space *vm, if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) return -EBUSY; - ret = ggtt_flush(dev_priv); + ret = ggtt_flush(vm->gt); if (ret) return ret; @@ -270,7 +270,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * a stray pin (preventing eviction) that can only be resolved by * retiring. */ - i915_retire_requests(vm->i915); + intel_gt_retire_requests(vm->gt); if (i915_vm_has_cache_coloring(vm)) { /* Expand search to cover neighbouring guard pages (or lack!) */ @@ -372,7 +372,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm) * switch otherwise is ineffective. */ if (i915_is_ggtt(vm)) { - ret = ggtt_flush(vm->i915); + ret = ggtt_flush(vm->gt); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 082fcf9085a6..1d26634ca597 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -38,6 +38,7 @@ #include "display/intel_frontbuffer.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_scatterlist.h" @@ -2529,8 +2530,8 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, if (unlikely(ggtt->do_idle_maps)) { /* XXX This does not prevent more requests being submitted! */ - if (i915_retire_requests_timeout(dev_priv, - -MAX_SCHEDULE_TIMEOUT)) { + if (intel_gt_retire_requests_timeout(ggtt->vm.gt, + -MAX_SCHEDULE_TIMEOUT)) { DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); /* Wait a bit, in hopes it avoids the hang */ udelay(10); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 52f7c4e5b644..437f9fc6282e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -216,7 +216,7 @@ static void remove_from_engine(struct i915_request *rq) spin_unlock(&locked->active.lock); } -static bool i915_request_retire(struct i915_request *rq) +bool i915_request_retire(struct i915_request *rq) { if (!i915_request_completed(rq)) return false; @@ -1508,68 +1508,6 @@ long i915_request_wait(struct i915_request *rq, return timeout; } -long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout) -{ - struct intel_gt_timelines *timelines = &i915->gt.timelines; - struct intel_timeline *tl, *tn; - unsigned long active_count = 0; - unsigned long flags; - bool interruptible; - LIST_HEAD(free); - - interruptible = true; - if (timeout < 0) - timeout = -timeout, interruptible = false; - - spin_lock_irqsave(&timelines->lock, flags); - list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { - if (!mutex_trylock(&tl->mutex)) - continue; - - intel_timeline_get(tl); - GEM_BUG_ON(!tl->active_count); - tl->active_count++; /* pin the list element */ - spin_unlock_irqrestore(&timelines->lock, flags); - - if (timeout > 0) { - struct dma_fence *fence; - - fence = i915_active_fence_get(&tl->last_request); - if (fence) { - timeout = dma_fence_wait_timeout(fence, - interruptible, - timeout); - dma_fence_put(fence); - } - } - - retire_requests(tl); - - spin_lock_irqsave(&timelines->lock, flags); - - /* Resume iteration after dropping lock */ - list_safe_reset_next(tl, tn, link); - if (--tl->active_count) - active_count += !!rcu_access_pointer(tl->last_request.fence); - else - list_del(&tl->link); - - mutex_unlock(&tl->mutex); - - /* Defer the final release to after the spinlock */ - if (refcount_dec_and_test(&tl->kref.refcount)) { - GEM_BUG_ON(tl->active_count); - list_add(&tl->link, &free); - } - } - spin_unlock_irqrestore(&timelines->lock, flags); - - list_for_each_entry_safe(tl, tn, &free, link) - __intel_timeline_free(&tl->kref); - - return active_count ? timeout : 0; -} - #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_request.c" #include "selftests/i915_request.c" diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 256b0715180f..6a95242b280d 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -250,6 +250,7 @@ struct i915_request *__i915_request_commit(struct i915_request *request); void __i915_request_queue(struct i915_request *rq, const struct i915_sched_attr *attr); +bool i915_request_retire(struct i915_request *rq); void i915_request_retire_upto(struct i915_request *rq); static inline struct i915_request * @@ -459,10 +460,4 @@ i915_request_active_timeline(struct i915_request *rq) lockdep_is_held(&rq->engine->active.lock)); } -long i915_retire_requests_timeout(struct drm_i915_private *i915, long timeout); -static inline void i915_retire_requests(struct drm_i915_private *i915) -{ - i915_retire_requests_timeout(i915, 0); -} - #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c index ed496bd6d84f..7b0939e3f007 100644 --- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -4,8 +4,8 @@ * Copyright © 2018 Intel Corporation */ -#include "gem/i915_gem_context.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "i915_drv.h" #include "i915_selftest.h" @@ -14,11 +14,12 @@ int igt_flush_test(struct drm_i915_private *i915) { - int ret = intel_gt_is_wedged(&i915->gt) ? -EIO : 0; + struct intel_gt *gt = &i915->gt; + int ret = intel_gt_is_wedged(gt) ? -EIO : 0; cond_resched(); - if (i915_gem_wait_for_idle(i915, HZ / 5) == -ETIME) { + if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) { pr_err("%pS timed out, cancelling all further testing.\n", __builtin_return_address(0)); @@ -26,7 +27,7 @@ int igt_flush_test(struct drm_i915_private *i915) __builtin_return_address(0)); GEM_TRACE_DUMP(); - intel_gt_set_wedged(&i915->gt); + intel_gt_set_wedged(gt); ret = -EIO; } diff --git a/drivers/gpu/drm/i915/selftests/igt_live_test.c b/drivers/gpu/drm/i915/selftests/igt_live_test.c index eae90f97df6c..810b60100c2c 100644 --- a/drivers/gpu/drm/i915/selftests/igt_live_test.c +++ b/drivers/gpu/drm/i915/selftests/igt_live_test.c @@ -4,7 +4,8 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_drv.h" +#include "i915_drv.h" +#include "gt/intel_gt_requests.h" #include "../i915_selftest.h" #include "igt_flush_test.h" @@ -23,7 +24,7 @@ int igt_live_test_begin(struct igt_live_test *t, t->func = func; t->name = name; - err = i915_gem_wait_for_idle(i915, MAX_SCHEDULE_TIMEOUT); + err = intel_gt_wait_for_idle(&i915->gt, MAX_SCHEDULE_TIMEOUT); if (err) { pr_err("%s(%s): failed to idle before, with err=%d!", func, name, err); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 3b589bbb2c2d..4e6cde0d4859 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -26,6 +26,7 @@ #include #include "gt/intel_gt.h" +#include "gt/intel_gt_requests.h" #include "gt/mock_engine.h" #include "mock_request.h" @@ -44,7 +45,8 @@ void mock_device_flush(struct drm_i915_private *i915) do { for_each_engine(engine, i915, id) mock_engine_flush(engine); - } while (i915_retire_requests_timeout(i915, MAX_SCHEDULE_TIMEOUT)); + } while (intel_gt_retire_requests_timeout(&i915->gt, + MAX_SCHEDULE_TIMEOUT)); } static void mock_device_release(struct drm_device *dev) @@ -98,10 +100,6 @@ static void release_dev(struct device *dev) kfree(pdev); } -static void mock_retire_work_handler(struct work_struct *work) -{ -} - static int pm_domain_resume(struct device *dev) { return pm_generic_runtime_resume(dev); @@ -181,8 +179,6 @@ struct drm_i915_private *mock_gem_device(void) mock_init_contexts(i915); - INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler); - intel_timelines_init(i915); mutex_lock(&i915->drm.struct_mutex); From patchwork Fri Oct 4 13:40:07 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174577 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 777C913B1 for ; Fri, 4 Oct 2019 13:58:21 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 600102077B for ; Fri, 4 Oct 2019 13:58:21 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 600102077B Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id A2D5C6EB9D; Fri, 4 Oct 2019 13:58:18 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 019156EB98 for ; Fri, 4 Oct 2019 13:58:14 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723962-1500050 for multiple; Fri, 04 Oct 2019 14:40:19 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:07 +0100 Message-Id: <20191004134015.13204-13-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 12/20] drm/i915: Move global activity tracking from GEM to GT X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" As our global unpark/park keep track of the number of active users, we can simply move the accounting from the GEM layer to the base GT layer. It was placed originally inside GEM to benefit from the 100ms extra delay on idleness, but that has been eliminated and now there is no substantive difference between the layers. In moving it, we move another piece of the puzzle out from underneath struct_mutex. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 11 +---------- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 5 +++++ 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 9194d8464bf7..7c316d4633db 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -10,14 +10,6 @@ #include "gt/intel_gt_requests.h" #include "i915_drv.h" -#include "i915_globals.h" - -static void i915_gem_park(struct drm_i915_private *i915) -{ - i915_vma_parked(i915); - - i915_globals_park(); -} static int pm_notifier(struct notifier_block *nb, unsigned long action, @@ -28,11 +20,10 @@ static int pm_notifier(struct notifier_block *nb, switch (action) { case INTEL_GT_UNPARK: - i915_globals_unpark(); break; case INTEL_GT_PARK: - i915_gem_park(i915); + i915_vma_parked(i915); break; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index d2e80ba64d69..b52e2ba3d092 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "i915_globals.h" #include "i915_params.h" #include "intel_context.h" #include "intel_engine_pm.h" @@ -27,6 +28,8 @@ static int __gt_unpark(struct intel_wakeref *wf) GEM_TRACE("\n"); + i915_globals_unpark(); + /* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during @@ -78,6 +81,8 @@ static int __gt_park(struct intel_wakeref *wf) GEM_BUG_ON(!wakeref); intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); + i915_globals_park(); + return 0; } From patchwork Fri Oct 4 13:40:08 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174527 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 44BC114DB for ; Fri, 4 Oct 2019 13:40:43 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 2CFA920700 for ; Fri, 4 Oct 2019 13:40:43 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 2CFA920700 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 8CD206EB89; Fri, 4 Oct 2019 13:40:41 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id A98116EB80 for ; Fri, 4 Oct 2019 13:40:38 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723963-1500050 for multiple; Fri, 04 Oct 2019 14:40:20 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:08 +0100 Message-Id: <20191004134015.13204-14-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 13/20] drm/i915: Remove logical HW ID X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" With the introduction of ctx->engines[] we allow multiple logical contexts to be used on the same engine (e.g. with virtual engines). According to bspec, aach logical context requires a unique tag in order for context-switching to occur correctly between them. [Simple experiments show that it is not so easy to trick the HW into performing a lite-restore with matching logical IDs, though my memory from early Broadwell experiments do suggest that it should be generating lite-restores.] We only need to keep a unique tag for the active lifetime of the context, and for as long as we need to identify that context. The HW uses the tag to determine if it should use a lite-restore (why not the LRCA?) and passes the tag back for various status identifies. The only status we need to track is for OA, so when using perf, we assign the specific context a unique tag. v2: Calculate required number of tags to fill ELSP. Fixes: 976b55f0e1db ("drm/i915: Allow a context to define its set of engines") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111895 Signed-off-by: Chris Wilson Acked-by: Daniele Ceraolo Spurio Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 153 ------------------ drivers/gpu/drm/i915/gem/i915_gem_context.h | 15 -- .../gpu/drm/i915/gem/i915_gem_context_types.h | 18 --- .../drm/i915/gem/selftests/i915_gem_context.c | 13 +- .../gpu/drm/i915/gem/selftests/mock_context.c | 8 - drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 4 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 32 ++-- drivers/gpu/drm/i915/gt/intel_lrc.h | 6 + drivers/gpu/drm/i915/gvt/kvmgt.c | 17 -- drivers/gpu/drm/i915/i915_debugfs.c | 3 - drivers/gpu/drm/i915/i915_drv.h | 12 -- drivers/gpu/drm/i915/i915_gpu_error.c | 7 +- drivers/gpu/drm/i915/i915_gpu_error.h | 1 - drivers/gpu/drm/i915/i915_perf.c | 25 ++- drivers/gpu/drm/i915/i915_trace.h | 38 ++--- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 18 files changed, 57 insertions(+), 302 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 0ab416887fc2..cd4f327b23bd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -167,97 +167,6 @@ lookup_user_engine(struct i915_gem_context *ctx, return i915_gem_context_get_engine(ctx, idx); } -static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) -{ - unsigned int max; - - lockdep_assert_held(&i915->contexts.mutex); - - if (INTEL_GEN(i915) >= 12) - max = GEN12_MAX_CONTEXT_HW_ID; - else if (INTEL_GEN(i915) >= 11) - max = GEN11_MAX_CONTEXT_HW_ID; - else if (USES_GUC_SUBMISSION(i915)) - /* - * When using GuC in proxy submission, GuC consumes the - * highest bit in the context id to indicate proxy submission. - */ - max = MAX_GUC_CONTEXT_HW_ID; - else - max = MAX_CONTEXT_HW_ID; - - return ida_simple_get(&i915->contexts.hw_ida, 0, max, gfp); -} - -static int steal_hw_id(struct drm_i915_private *i915) -{ - struct i915_gem_context *ctx, *cn; - LIST_HEAD(pinned); - int id = -ENOSPC; - - lockdep_assert_held(&i915->contexts.mutex); - - list_for_each_entry_safe(ctx, cn, - &i915->contexts.hw_id_list, hw_id_link) { - if (atomic_read(&ctx->hw_id_pin_count)) { - list_move_tail(&ctx->hw_id_link, &pinned); - continue; - } - - GEM_BUG_ON(!ctx->hw_id); /* perma-pinned kernel context */ - list_del_init(&ctx->hw_id_link); - id = ctx->hw_id; - break; - } - - /* - * Remember how far we got up on the last repossesion scan, so the - * list is kept in a "least recently scanned" order. - */ - list_splice_tail(&pinned, &i915->contexts.hw_id_list); - return id; -} - -static int assign_hw_id(struct drm_i915_private *i915, unsigned int *out) -{ - int ret; - - lockdep_assert_held(&i915->contexts.mutex); - - /* - * We prefer to steal/stall ourselves and our users over that of the - * entire system. That may be a little unfair to our users, and - * even hurt high priority clients. The choice is whether to oomkill - * something else, or steal a context id. - */ - ret = new_hw_id(i915, GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); - if (unlikely(ret < 0)) { - ret = steal_hw_id(i915); - if (ret < 0) /* once again for the correct errno code */ - ret = new_hw_id(i915, GFP_KERNEL); - if (ret < 0) - return ret; - } - - *out = ret; - return 0; -} - -static void release_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - - if (list_empty(&ctx->hw_id_link)) - return; - - mutex_lock(&i915->contexts.mutex); - if (!list_empty(&ctx->hw_id_link)) { - ida_simple_remove(&i915->contexts.hw_ida, ctx->hw_id); - list_del_init(&ctx->hw_id_link); - } - mutex_unlock(&i915->contexts.mutex); -} - static void __free_engines(struct i915_gem_engines *e, unsigned int count) { while (count--) { @@ -312,8 +221,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); - release_hw_id(ctx); - free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); @@ -386,12 +293,6 @@ static void context_close(struct i915_gem_context *ctx) ctx->file_priv = ERR_PTR(-EBADF); - /* - * This context will never again be assinged to HW, so we can - * reuse its ID for the next context. - */ - release_hw_id(ctx); - /* * The LUT uses the VMA as a backpointer to unref the object, * so we need to clear the LUT before we close all the VMA (inside @@ -430,7 +331,6 @@ __create_context(struct drm_i915_private *i915) RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); /* NB: Mark all slices as needing a remap so that when the context first * loads it will restore whatever remap state already exists. If there @@ -584,18 +484,11 @@ struct i915_gem_context * i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) { struct i915_gem_context *ctx; - int err; ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; - err = i915_gem_context_pin_hw_id(ctx); - if (err) { - destroy_kernel_context(&ctx); - return ERR_PTR(err); - } - i915_gem_context_clear_bannable(ctx); ctx->sched.priority = I915_USER_PRIORITY(prio); @@ -609,12 +502,6 @@ static void init_contexts(struct drm_i915_private *i915) mutex_init(&i915->contexts.mutex); INIT_LIST_HEAD(&i915->contexts.list); - /* Using the simple ida interface, the max is limited by sizeof(int) */ - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > INT_MAX); - ida_init(&i915->contexts.hw_ida); - INIT_LIST_HEAD(&i915->contexts.hw_id_list); - INIT_WORK(&i915->contexts.free_work, contexts_free_worker); init_llist_head(&i915->contexts.free_list); } @@ -634,15 +521,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) DRM_ERROR("Failed to create default global context\n"); return PTR_ERR(ctx); } - /* - * For easy recognisablity, we want the kernel context to be 0 and then - * all user contexts will have non-zero hw_id. Kernel contexts are - * permanently pinned, so that we never suffer a stall and can - * use them from any allocation context (e.g. for evicting other - * contexts and from inside the shrinker). - */ - GEM_BUG_ON(ctx->hw_id); - GEM_BUG_ON(!atomic_read(&ctx->hw_id_pin_count)); dev_priv->kernel_context = ctx; DRM_DEBUG_DRIVER("%s context support initialized\n", @@ -656,10 +534,6 @@ void i915_gem_contexts_fini(struct drm_i915_private *i915) lockdep_assert_held(&i915->drm.struct_mutex); destroy_kernel_context(&i915->kernel_context); - - /* Must free all deferred contexts (via flush_workqueue) first */ - GEM_BUG_ON(!list_empty(&i915->contexts.hw_id_list)); - ida_destroy(&i915->contexts.hw_ida); } static int context_idr_cleanup(int id, void *p, void *data) @@ -2316,33 +2190,6 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, return ret; } -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - struct drm_i915_private *i915 = ctx->i915; - int err = 0; - - mutex_lock(&i915->contexts.mutex); - - GEM_BUG_ON(i915_gem_context_is_closed(ctx)); - - if (list_empty(&ctx->hw_id_link)) { - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count)); - - err = assign_hw_id(i915, &ctx->hw_id); - if (err) - goto out_unlock; - - list_add_tail(&ctx->hw_id_link, &i915->contexts.hw_id_list); - } - - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == ~0u); - atomic_inc(&ctx->hw_id_pin_count); - -out_unlock: - mutex_unlock(&i915->contexts.mutex); - return err; -} - /* GEM context-engines iterator: for_each_gem_engine() */ struct intel_context * i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 176978608b6f..50bc27d30c03 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -112,21 +112,6 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); } -int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); -static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) -{ - if (atomic_inc_not_zero(&ctx->hw_id_pin_count)) - return 0; - - return __i915_gem_context_pin_hw_id(ctx); -} - -static inline void i915_gem_context_unpin_hw_id(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(atomic_read(&ctx->hw_id_pin_count) == 0u); - atomic_dec(&ctx->hw_id_pin_count); -} - static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) { return !ctx->file_priv; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 260d59cc3de8..87be27877e22 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -147,24 +147,6 @@ struct i915_gem_context { #define CONTEXT_FORCE_SINGLE_SUBMISSION 2 #define CONTEXT_USER_ENGINES 3 - /** - * @hw_id: - unique identifier for the context - * - * The hardware needs to uniquely identify the context for a few - * functions like fault reporting, PASID, scheduling. The - * &drm_i915_private.context_hw_ida is used to assign a unqiue - * id for the lifetime of the context. - * - * @hw_id_pin_count: - number of times this context had been pinned - * for use (should be, at most, once per engine). - * - * @hw_id_link: - all contexts with an assigned id are tracked - * for possible repossession. - */ - unsigned int hw_id; - atomic_t hw_id_pin_count; - struct list_head hw_id_link; - struct mutex mutex; struct i915_sched_attr sched; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 2288757808ae..2fb31ada2fa7 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -660,9 +660,9 @@ static int igt_ctx_exec(void *arg) err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, + engine->name, yesno(!!ctx->vm), err); intel_context_put(ce); kernel_context_close(ctx); @@ -798,9 +798,9 @@ static int igt_shared_ctx_exec(void *arg) err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - engine->name, ctx->hw_id, + engine->name, yesno(!!ctx->vm), err); intel_context_put(ce); kernel_context_close(ctx); @@ -1382,10 +1382,9 @@ static int igt_ctx_readonly(void *arg) err = gpu_fill(ce, obj, dw); if (err) { - pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - ce->engine->name, ctx->hw_id, - yesno(!!ctx->vm), err); + ce->engine->name, yesno(!!ctx->vm), err); i915_gem_context_unlock_engines(ctx); goto out_unlock; } diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index be8974ccff24..0104f16b1327 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -13,7 +13,6 @@ mock_context(struct drm_i915_private *i915, { struct i915_gem_context *ctx; struct i915_gem_engines *e; - int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -30,13 +29,8 @@ mock_context(struct drm_i915_private *i915, RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); - INIT_LIST_HEAD(&ctx->hw_id_link); mutex_init(&ctx->mutex); - ret = i915_gem_context_pin_hw_id(ctx); - if (ret < 0) - goto err_engines; - if (name) { struct i915_ppgtt *ppgtt; @@ -54,8 +48,6 @@ mock_context(struct drm_i915_private *i915, return ctx; -err_engines: - free_engines(rcu_access_pointer(ctx->engines)); err_free: kfree(ctx); return NULL; diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index bf9cedfccbf0..6959b05ae5f8 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -58,6 +58,7 @@ struct intel_context { u32 *lrc_reg_state; u64 lrc_desc; + u32 tag; /* cookie passed to HW to track this context on submission */ unsigned int active_count; /* protected by timeline->mutex */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 943f0663837e..6199064f332b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -303,10 +303,12 @@ struct intel_engine_cs { u8 uabi_class; u8 uabi_instance; + u32 uabi_capabilities; u32 context_size; u32 mmio_base; - u32 uabi_capabilities; + unsigned int context_tag; +#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) struct rb_node uabi_node; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 3cfea1758fd2..468438fb47af 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -440,12 +440,8 @@ assert_priority_queue(const struct i915_request *prev, static u64 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) { - struct i915_gem_context *ctx = ce->gem_context; u64 desc; - BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); - BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH))); - desc = INTEL_LEGACY_32B_CONTEXT; if (i915_vm_is_4lvl(ce->vm)) desc = INTEL_LEGACY_64B_CONTEXT; @@ -463,20 +459,11 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) * anything below. */ if (INTEL_GEN(engine->i915) >= 11) { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; - /* bits 37-47 */ - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; /* bits 48-53 */ - /* TODO: decide what to do with SW counter (bits 55-60) */ - desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; /* bits 61-63 */ - } else { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ } return desc; @@ -985,6 +972,18 @@ __execlists_schedule_in(struct i915_request *rq) intel_context_get(ce); + if (ce->tag) { + /* Use a fixed tag for OA and friends */ + ce->lrc_desc |= (u64)ce->tag << 32; + } else { + /* We don't need a strict matching tag, just different values */ + ce->lrc_desc &= ~GENMASK_ULL(47, 37); + ce->lrc_desc |= + (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) << + GEN11_SW_CTX_ID_SHIFT; + BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); + } + intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -2114,7 +2113,6 @@ static void execlists_context_unpin(struct intel_context *ce) check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE, ce->engine); - i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); intel_ring_reset(ce->ring, ce->ring->tail); } @@ -2164,18 +2162,12 @@ __execlists_context_pin(struct intel_context *ce, goto unpin_active; } - ret = i915_gem_context_pin_hw_id(ce->gem_context); - if (ret) - goto unpin_map; - ce->lrc_desc = lrc_descriptor(ce, engine); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine); return 0; -unpin_map: - i915_gem_object_unpin_map(ce->state->obj); unpin_active: intel_context_active_release(ce); err: diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 66ac616361c1..99dc576a4e25 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -66,6 +66,12 @@ struct intel_engine_cs; #define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8) #define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) +#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ +#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ +#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ +/* in Gen12 ID 0x7FF is reserved to indicate idle */ +#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) + enum { INTEL_CONTEXT_SCHEDULE_IN = 0, INTEL_CONTEXT_SCHEDULE_OUT, diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 343d79c1cb7e..04a5a0d90823 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1564,27 +1564,10 @@ vgpu_id_show(struct device *dev, struct device_attribute *attr, return sprintf(buf, "\n"); } -static ssize_t -hw_id_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct mdev_device *mdev = mdev_from_dev(dev); - - if (mdev) { - struct intel_vgpu *vgpu = (struct intel_vgpu *) - mdev_get_drvdata(mdev); - return sprintf(buf, "%u\n", - vgpu->submission.shadow[0]->gem_context->hw_id); - } - return sprintf(buf, "\n"); -} - static DEVICE_ATTR_RO(vgpu_id); -static DEVICE_ATTR_RO(hw_id); static struct attribute *intel_vgpu_attrs[] = { &dev_attr_vgpu_id.attr, - &dev_attr_hw_id.attr, NULL }; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2afc41e43b6e..0e90ac608e07 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1593,9 +1593,6 @@ static int i915_context_status(struct seq_file *m, void *unused) struct intel_context *ce; seq_puts(m, "HW context "); - if (!list_empty(&ctx->hw_id_link)) - seq_printf(m, "%x [pin %u]", ctx->hw_id, - atomic_read(&ctx->hw_id_pin_count)); if (ctx->pid) { struct task_struct *task; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cb63b2bd0ce8..6bdcffbf1b9b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1541,18 +1541,6 @@ struct drm_i915_private { struct list_head list; struct llist_head free_list; struct work_struct free_work; - - /* The hw wants to have a stable context identifier for the - * lifetime of the context (for OA, PASID, faults, etc). - * This is limited in execlists to 21 bits. - */ - struct ida hw_ida; -#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ -#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ -/* in Gen12 ID 0x7FF is reserved to indicate idle */ -#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) - struct list_head hw_id_list; } contexts; u32 fdi_rx_config; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a28ee754b7b4..5cf4eed5add8 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -471,9 +471,9 @@ static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, const struct drm_i915_error_context *ctx) { - err_printf(m, "%s%s[%d] hw_id %d, prio %d, guilty %d active %d\n", - header, ctx->comm, ctx->pid, ctx->hw_id, - ctx->sched_attr.priority, ctx->guilty, ctx->active); + err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n", + header, ctx->comm, ctx->pid, ctx->sched_attr.priority, + ctx->guilty, ctx->active); } static void error_print_engine(struct drm_i915_error_state_buf *m, @@ -1271,7 +1271,6 @@ static bool record_context(struct drm_i915_error_context *e, rcu_read_unlock(); } - e->hw_id = ctx->hw_id; e->sched_attr = ctx->sched; e->guilty = atomic_read(&ctx->guilty_count); e->active = atomic_read(&ctx->active_count); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 63cf387411e0..7f1cd0b1fef7 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -119,7 +119,6 @@ struct i915_gpu_state { struct drm_i915_error_context { char comm[TASK_COMM_LEN]; pid_t pid; - u32 hw_id; int active; int guilty; struct i915_sched_attr sched_attr; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 80055501eccb..ecfbc37b738b 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1283,22 +1283,15 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) } else { stream->specific_ctx_id_mask = (1U << GEN8_CTX_ID_WIDTH) - 1; - stream->specific_ctx_id = - upper_32_bits(ce->lrc_desc); - stream->specific_ctx_id &= - stream->specific_ctx_id_mask; + stream->specific_ctx_id = stream->specific_ctx_id_mask; } break; case 11: case 12: { stream->specific_ctx_id_mask = - ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) | - ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) | - ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32); - stream->specific_ctx_id = upper_32_bits(ce->lrc_desc); - stream->specific_ctx_id &= - stream->specific_ctx_id_mask; + ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); + stream->specific_ctx_id = stream->specific_ctx_id_mask; break; } @@ -1306,6 +1299,8 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) MISSING_CASE(INTEL_GEN(i915)); } + ce->tag = stream->specific_ctx_id_mask; + DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", stream->specific_ctx_id, stream->specific_ctx_id_mask); @@ -1324,12 +1319,14 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { struct intel_context *ce; - stream->specific_ctx_id = INVALID_CTX_ID; - stream->specific_ctx_id_mask = 0; - ce = fetch_and_zero(&stream->pinned_ctx); - if (ce) + if (ce) { + ce->tag = 0; /* recomputed on next submission after parking */ intel_context_unpin(ce); + } + + stream->specific_ctx_id = INVALID_CTX_ID; + stream->specific_ctx_id_mask = 0; } static void diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 24f2944da09d..1f2cf6cfafb5 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -665,7 +665,6 @@ TRACE_EVENT(i915_request_queue, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -675,7 +674,6 @@ TRACE_EVENT(i915_request_queue, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -683,10 +681,9 @@ TRACE_EVENT(i915_request_queue, __entry->flags = flags; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->flags) + __entry->ctx, __entry->seqno, __entry->flags) ); DECLARE_EVENT_CLASS(i915_request, @@ -695,7 +692,6 @@ DECLARE_EVENT_CLASS(i915_request, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -704,16 +700,15 @@ DECLARE_EVENT_CLASS(i915_request, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno) + __entry->ctx, __entry->seqno) ); DEFINE_EVENT(i915_request, i915_request_add, @@ -738,7 +733,6 @@ TRACE_EVENT(i915_request_in, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -749,7 +743,6 @@ TRACE_EVENT(i915_request_in, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -758,9 +751,9 @@ TRACE_EVENT(i915_request_in, __entry->port = port; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, prio=%u, port=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, prio=%u, port=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, + __entry->ctx, __entry->seqno, __entry->prio, __entry->port) ); @@ -770,7 +763,6 @@ TRACE_EVENT(i915_request_out, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -780,7 +772,6 @@ TRACE_EVENT(i915_request_out, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -788,10 +779,9 @@ TRACE_EVENT(i915_request_out, __entry->completed = i915_request_completed(rq); ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, completed?=%u", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, completed?=%u", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, - __entry->completed) + __entry->ctx, __entry->seqno, __entry->completed) ); #else @@ -829,7 +819,6 @@ TRACE_EVENT(i915_request_wait_begin, TP_STRUCT__entry( __field(u32, dev) - __field(u32, hw_id) __field(u64, ctx) __field(u16, class) __field(u16, instance) @@ -845,7 +834,6 @@ TRACE_EVENT(i915_request_wait_begin, */ TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->gem_context->hw_id; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; __entry->ctx = rq->fence.context; @@ -853,9 +841,9 @@ TRACE_EVENT(i915_request_wait_begin, __entry->flags = flags; ), - TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x", + TP_printk("dev=%u, engine=%u:%u, ctx=%llu, seqno=%u, flags=0x%x", __entry->dev, __entry->class, __entry->instance, - __entry->hw_id, __entry->ctx, __entry->seqno, + __entry->ctx, __entry->seqno, __entry->flags) ); @@ -958,19 +946,17 @@ DECLARE_EVENT_CLASS(i915_context, TP_STRUCT__entry( __field(u32, dev) __field(struct i915_gem_context *, ctx) - __field(u32, hw_id) __field(struct i915_address_space *, vm) ), TP_fast_assign( __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; - __entry->hw_id = ctx->hw_id; __entry->vm = ctx->vm; ), - TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u", - __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id) + TP_printk("dev=%u, ctx=%p, ctx_vm=%p", + __entry->dev, __entry->ctx, __entry->vm) ) DEFINE_EVENT(i915_context, i915_context_create, diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 52d2df843148..f39f0282e78c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -491,8 +491,8 @@ static int igt_evict_contexts(void *arg) if (IS_ERR(rq)) { /* When full, fail_if_busy will trigger EBUSY */ if (PTR_ERR(rq) != -EBUSY) { - pr_err("Unexpected error from request alloc (ctx hw id %u, on %s): %d\n", - ctx->hw_id, engine->name, + pr_err("Unexpected error from request alloc (on %s): %d\n", + engine->name, (int)PTR_ERR(rq)); err = PTR_ERR(rq); } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 1c9db08f7c28..ac1ff558eb90 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -170,7 +170,7 @@ static int igt_vma_create(void *arg) } nc = 0; - for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) { + for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) { for (; nc < num_ctx; nc++) { ctx = mock_context(i915, "mock"); if (!ctx) From patchwork Fri Oct 4 13:40:09 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174553 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D8CAF1709 for ; Fri, 4 Oct 2019 13:40:57 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id C163E20867 for ; Fri, 4 Oct 2019 13:40:57 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org C163E20867 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 13E236EB9C; Fri, 4 Oct 2019 13:40:52 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 463356EB87 for ; Fri, 4 Oct 2019 13:40:41 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723964-1500050 for multiple; Fri, 04 Oct 2019 14:40:20 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:09 +0100 Message-Id: <20191004134015.13204-15-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 14/20] drm/i915: Move context management under GEM X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Keep track of the GEM contexts underneath i915->gem.contexts and assign them their own lock for the purposes of list management. v2: Focus on lock tracking; ctx->vm is protected by ctx->mutex v3: Correct split with removal of logical HW ID Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 177 +++++++++--------- drivers/gpu/drm/i915/gem/i915_gem_context.h | 27 ++- .../gpu/drm/i915/gem/i915_gem_context_types.h | 2 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 3 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 36 ++-- .../drm/i915/gem/selftests/i915_gem_context.c | 168 ++++++++--------- .../gpu/drm/i915/gem/selftests/mock_context.c | 7 +- drivers/gpu/drm/i915/gt/intel_context.c | 10 +- drivers/gpu/drm/i915/gt/selftest_context.c | 24 +-- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 39 ++-- drivers/gpu/drm/i915/gt/selftest_lrc.c | 6 +- .../gpu/drm/i915/gt/selftest_workarounds.c | 22 ++- drivers/gpu/drm/i915/gvt/scheduler.c | 24 +-- drivers/gpu/drm/i915/i915_debugfs.c | 50 +++-- drivers/gpu/drm/i915/i915_drv.c | 2 - drivers/gpu/drm/i915/i915_drv.h | 15 +- drivers/gpu/drm/i915/i915_gem.c | 10 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +- drivers/gpu/drm/i915/i915_perf.c | 24 ++- drivers/gpu/drm/i915/i915_sysfs.c | 43 ++--- drivers/gpu/drm/i915/i915_trace.h | 2 +- drivers/gpu/drm/i915/selftests/i915_gem.c | 8 - .../gpu/drm/i915/selftests/i915_gem_evict.c | 3 - drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 +- drivers/gpu/drm/i915/selftests/i915_request.c | 12 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 7 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 6 +- 28 files changed, 394 insertions(+), 354 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index cd4f327b23bd..5d8221c7ba83 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -218,9 +218,12 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) static void i915_gem_context_free(struct i915_gem_context *ctx) { - lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + spin_lock(&ctx->i915->gem.contexts.lock); + list_del(&ctx->link); + spin_unlock(&ctx->i915->gem.contexts.lock); + free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); @@ -230,67 +233,54 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) kfree(ctx->name); put_pid(ctx->pid); - list_del(&ctx->link); mutex_destroy(&ctx->mutex); kfree_rcu(ctx, rcu); } -static void contexts_free(struct drm_i915_private *i915) +static void contexts_free_all(struct llist_node *list) { - struct llist_node *freed = llist_del_all(&i915->contexts.free_list); struct i915_gem_context *ctx, *cn; - lockdep_assert_held(&i915->drm.struct_mutex); - - llist_for_each_entry_safe(ctx, cn, freed, free_link) + llist_for_each_entry_safe(ctx, cn, list, free_link) i915_gem_context_free(ctx); } -static void contexts_free_first(struct drm_i915_private *i915) +static void contexts_flush_free(struct i915_gem_contexts *gc) { - struct i915_gem_context *ctx; - struct llist_node *freed; - - lockdep_assert_held(&i915->drm.struct_mutex); - - freed = llist_del_first(&i915->contexts.free_list); - if (!freed) - return; - - ctx = container_of(freed, typeof(*ctx), free_link); - i915_gem_context_free(ctx); + contexts_free_all(llist_del_all(&gc->free_list)); } static void contexts_free_worker(struct work_struct *work) { - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), contexts.free_work); + struct i915_gem_contexts *gc = + container_of(work, typeof(*gc), free_work); - mutex_lock(&i915->drm.struct_mutex); - contexts_free(i915); - mutex_unlock(&i915->drm.struct_mutex); + contexts_flush_free(gc); } void i915_gem_context_release(struct kref *ref) { struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); - struct drm_i915_private *i915 = ctx->i915; + struct i915_gem_contexts *gc = &ctx->i915->gem.contexts; trace_i915_context_free(ctx); - if (llist_add(&ctx->free_link, &i915->contexts.free_list)) - queue_work(i915->wq, &i915->contexts.free_work); + if (llist_add(&ctx->free_link, &gc->free_list)) + schedule_work(&gc->free_work); } static void context_close(struct i915_gem_context *ctx) { - i915_gem_context_set_closed(ctx); + struct i915_address_space *vm; - if (ctx->vm) - i915_vm_close(ctx->vm); + i915_gem_context_set_closed(ctx); mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + i915_vm_close(vm); + ctx->file_priv = ERR_PTR(-EBADF); /* @@ -317,7 +307,6 @@ __create_context(struct drm_i915_private *i915) return ERR_PTR(-ENOMEM); kref_init(&ctx->ref); - list_add_tail(&ctx->link, &i915->contexts.list); ctx->i915 = i915; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); mutex_init(&ctx->mutex); @@ -343,6 +332,10 @@ __create_context(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + spin_lock(&i915->gem.contexts.lock); + list_add_tail(&ctx->link, &i915->gem.contexts.list); + spin_unlock(&i915->gem.contexts.lock); + return ctx; err_free: @@ -372,11 +365,11 @@ static void __apply_ppgtt(struct intel_context *ce, void *vm) static struct i915_address_space * __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { - struct i915_address_space *old = ctx->vm; + struct i915_address_space *old = i915_gem_context_vm(ctx); GEM_BUG_ON(old && i915_vm_is_4lvl(vm) != i915_vm_is_4lvl(old)); - ctx->vm = i915_vm_open(vm); + rcu_assign_pointer(ctx->vm, i915_vm_open(vm)); context_apply_all(ctx, __apply_ppgtt, vm); return old; @@ -385,7 +378,7 @@ __set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) static void __assign_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm) { - if (vm == ctx->vm) + if (vm == rcu_access_pointer(ctx->vm)) return; vm = __set_ppgtt(ctx, vm); @@ -417,27 +410,25 @@ static void __assign_timeline(struct i915_gem_context *ctx, } static struct i915_gem_context * -i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) +i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) { struct i915_gem_context *ctx; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && - !HAS_EXECLISTS(dev_priv)) + !HAS_EXECLISTS(i915)) return ERR_PTR(-EINVAL); - /* Reap the most stale context */ - contexts_free_first(dev_priv); + /* Reap the stale contexts */ + contexts_flush_free(&i915->gem.contexts); - ctx = __create_context(dev_priv); + ctx = __create_context(i915); if (IS_ERR(ctx)) return ctx; - if (HAS_FULL_PPGTT(dev_priv)) { + if (HAS_FULL_PPGTT(i915)) { struct i915_ppgtt *ppgtt; - ppgtt = i915_ppgtt_create(dev_priv); + ppgtt = i915_ppgtt_create(i915); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); @@ -445,14 +436,17 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) return ERR_CAST(ppgtt); } + mutex_lock(&ctx->mutex); __assign_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { struct intel_timeline *timeline; - timeline = intel_timeline_create(&dev_priv->gt, NULL); + timeline = intel_timeline_create(&i915->gt, NULL); if (IS_ERR(timeline)) { context_close(ctx); return ERR_CAST(timeline); @@ -497,42 +491,40 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) return ctx; } -static void init_contexts(struct drm_i915_private *i915) +static void init_contexts(struct i915_gem_contexts *gc) { - mutex_init(&i915->contexts.mutex); - INIT_LIST_HEAD(&i915->contexts.list); + spin_lock_init(&gc->lock); + INIT_LIST_HEAD(&gc->list); - INIT_WORK(&i915->contexts.free_work, contexts_free_worker); - init_llist_head(&i915->contexts.free_list); + INIT_WORK(&gc->free_work, contexts_free_worker); + init_llist_head(&gc->free_list); } -int i915_gem_contexts_init(struct drm_i915_private *dev_priv) +int i915_gem_init_contexts(struct drm_i915_private *i915) { struct i915_gem_context *ctx; /* Reassure ourselves we are only called once */ - GEM_BUG_ON(dev_priv->kernel_context); + GEM_BUG_ON(i915->kernel_context); - init_contexts(dev_priv); + init_contexts(&i915->gem.contexts); /* lowest priority; idle task */ - ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN); + ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MIN); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context\n"); return PTR_ERR(ctx); } - dev_priv->kernel_context = ctx; + i915->kernel_context = ctx; DRM_DEBUG_DRIVER("%s context support initialized\n", - DRIVER_CAPS(dev_priv)->has_logical_contexts ? + DRIVER_CAPS(i915)->has_logical_contexts ? "logical" : "fake"); return 0; } -void i915_gem_contexts_fini(struct drm_i915_private *i915) +void i915_gem_driver_release__contexts(struct drm_i915_private *i915) { - lockdep_assert_held(&i915->drm.struct_mutex); - destroy_kernel_context(&i915->kernel_context); } @@ -551,11 +543,16 @@ static int vm_idr_cleanup(int id, void *p, void *data) static int gem_context_register(struct i915_gem_context *ctx, struct drm_i915_file_private *fpriv) { + struct i915_address_space *vm; int ret; ctx->file_priv = fpriv; - if (ctx->vm) - ctx->vm->file = fpriv; + + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->file, fpriv); /* XXX */ + mutex_unlock(&ctx->mutex); ctx->pid = get_task_pid(current, PIDTYPE_PID); ctx->name = kasprintf(GFP_KERNEL, "%s[%d]", @@ -592,9 +589,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, idr_init(&file_priv->context_idr); idr_init_base(&file_priv->vm_idr, 1); - mutex_lock(&i915->drm.struct_mutex); ctx = i915_gem_create_context(i915, 0); - mutex_unlock(&i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto err; @@ -622,6 +617,7 @@ int i915_gem_context_open(struct drm_i915_private *i915, void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_private *i915 = file_priv->dev_priv; idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); idr_destroy(&file_priv->context_idr); @@ -630,6 +626,8 @@ void i915_gem_context_close(struct drm_file *file) idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL); idr_destroy(&file_priv->vm_idr); mutex_destroy(&file_priv->vm_idr_lock); + + contexts_flush_free(&i915->gem.contexts); } int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, @@ -808,16 +806,12 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_address_space *vm; int ret; - if (!ctx->vm) + if (!rcu_access_pointer(ctx->vm)) return -ENODEV; - /* XXX rcu acquire? */ - ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); - if (ret) - return ret; - + rcu_read_lock(); vm = i915_vm_get(ctx->vm); - mutex_unlock(&ctx->i915->drm.struct_mutex); + rcu_read_unlock(); ret = mutex_lock_interruptible(&file_priv->vm_idr_lock); if (ret) @@ -926,7 +920,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (args->size) return -EINVAL; - if (!ctx->vm) + if (!rcu_access_pointer(ctx->vm)) return -ENODEV; if (upper_32_bits(args->value)) @@ -940,17 +934,20 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (!vm) return -ENOENT; - err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + err = mutex_lock_interruptible(&ctx->mutex); if (err) goto out; - if (vm == ctx->vm) + if (i915_gem_context_is_closed(ctx)) { + err = -ENOENT; + goto out; + } + + if (vm == rcu_access_pointer(ctx->vm)) goto unlock; /* Teardown the existing obj:vma cache, it will have to be rebuilt. */ - mutex_lock(&ctx->mutex); lut_close(ctx); - mutex_unlock(&ctx->mutex); old = __set_ppgtt(ctx, vm); @@ -970,8 +967,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, } unlock: - mutex_unlock(&ctx->i915->drm.struct_mutex); - + mutex_unlock(&ctx->mutex); out: i915_vm_put(vm); return err; @@ -1827,10 +1823,11 @@ static int clone_vm(struct i915_gem_context *dst, struct i915_gem_context *src) { struct i915_address_space *vm; + int err = 0; rcu_read_lock(); do { - vm = READ_ONCE(src->vm); + vm = rcu_dereference(src->vm); if (!vm) break; @@ -1852,7 +1849,7 @@ static int clone_vm(struct i915_gem_context *dst, * it cannot be reallocated elsewhere. */ - if (vm == READ_ONCE(src->vm)) + if (vm == rcu_access_pointer(src->vm)) break; i915_vm_put(vm); @@ -1860,11 +1857,16 @@ static int clone_vm(struct i915_gem_context *dst, rcu_read_unlock(); if (vm) { - __assign_ppgtt(dst, vm); + if (!mutex_lock_interruptible(&dst->mutex)) { + __assign_ppgtt(dst, vm); + mutex_unlock(&dst->mutex); + } else { + err = -EINTR; + } i915_vm_put(vm); } - return 0; + return err; } static int create_clone(struct i915_user_extension __user *ext, void *data) @@ -1954,12 +1956,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, return -EIO; } - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - ext_data.ctx = i915_gem_create_context(i915, args->flags); - mutex_unlock(&dev->struct_mutex); if (IS_ERR(ext_data.ctx)) return PTR_ERR(ext_data.ctx); @@ -2086,10 +2083,12 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; - if (ctx->vm) - args->value = ctx->vm->total; + rcu_read_lock(); + if (rcu_access_pointer(ctx->vm)) + args->value = rcu_dereference(ctx->vm)->total; else args->value = to_i915(dev)->ggtt.vm.total; + rcu_read_unlock(); break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: @@ -2155,7 +2154,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_reset_stats *args = data; struct i915_gem_context *ctx; int ret; @@ -2177,7 +2176,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, */ if (capable(CAP_SYS_ADMIN)) - args->reset_count = i915_reset_count(&dev_priv->gpu_error); + args->reset_count = i915_reset_count(&i915->gpu_error); else args->reset_count = 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 50bc27d30c03..9234586830d1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -11,7 +11,9 @@ #include "gt/intel_context.h" +#include "i915_drv.h" #include "i915_gem.h" +#include "i915_gem_gtt.h" #include "i915_scheduler.h" #include "intel_device_info.h" @@ -118,8 +120,8 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) } /* i915_gem_context.c */ -int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); -void i915_gem_contexts_fini(struct drm_i915_private *dev_priv); +int __must_check i915_gem_init_contexts(struct drm_i915_private *i915); +void i915_gem_driver_release__contexts(struct drm_i915_private *i915); int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); @@ -158,6 +160,27 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +static inline struct i915_address_space * +i915_gem_context_vm(struct i915_gem_context *ctx) +{ + return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex)); +} + +static inline struct i915_address_space * +i915_gem_context_get_vm_rcu(struct i915_gem_context *ctx) +{ + struct i915_address_space *vm; + + rcu_read_lock(); + vm = rcu_dereference(ctx->vm); + if (!vm) + vm = &ctx->i915->ggtt.vm; + vm = i915_vm_get(vm); + rcu_read_unlock(); + + return vm; +} + static inline struct i915_gem_engines * i915_gem_context_engines(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 87be27877e22..ab8e1367dfc8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -88,7 +88,7 @@ struct i915_gem_context { * In other modes, this is a NULL pointer with the expectation that * the caller uses the shared global GTT. */ - struct i915_address_space *vm; + struct i915_address_space __rcu *vm; /** * @pid: process id of creator diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 88a881be12ec..98816c35ffc3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -728,7 +728,7 @@ static int eb_select_context(struct i915_execbuffer *eb) return -ENOENT; eb->gem_context = ctx; - if (ctx->vm) + if (rcu_access_pointer(ctx->vm)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; eb->context_flags = 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 1738a15eb911..4f970474013f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -758,7 +758,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, * On almost all of the older hw, we cannot tell the GPU that * a page is readonly. */ - vm = dev_priv->kernel_context->vm; + vm = rcu_dereference_protected(dev_priv->kernel_context->vm, + true); /* static vm */ if (!vm || !vm->has_read_only) return -ENODEV; } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 98b2a6ccfcc1..3314858f3046 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1322,15 +1322,15 @@ static int igt_ppgtt_pin_update(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *dev_priv = ctx->i915; unsigned long supported = INTEL_INFO(dev_priv)->page_sizes; - struct i915_address_space *vm = ctx->vm; struct drm_i915_gem_object *obj; struct i915_gem_engines_iter it; + struct i915_address_space *vm; struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; unsigned int n; int first, last; - int err; + int err = 0; /* * Make sure there's no funny business when doing a PIN_UPDATE -- in the @@ -1340,9 +1340,10 @@ static int igt_ppgtt_pin_update(void *arg) * huge-gtt-pages. */ - if (!vm || !i915_vm_is_4lvl(vm)) { + vm = i915_gem_context_get_vm_rcu(ctx); + if (!i915_vm_is_4lvl(vm)) { pr_info("48b PPGTT not supported, skipping\n"); - return 0; + goto out_vm; } first = ilog2(I915_GTT_PAGE_SIZE_64K); @@ -1451,6 +1452,8 @@ static int igt_ppgtt_pin_update(void *arg) i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1460,7 +1463,7 @@ static int igt_tmpfs_fallback(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 *vaddr; @@ -1510,6 +1513,7 @@ static int igt_tmpfs_fallback(void *arg) out_restore: i915->mm.gemfs = gemfs; + i915_vm_put(vm); return err; } @@ -1517,14 +1521,14 @@ static int igt_shrink_thp(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); struct drm_i915_gem_object *obj; struct i915_gem_engines_iter it; struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER; unsigned int n; - int err; + int err = 0; /* * Sanity check shrinking huge-paged object -- make sure nothing blows @@ -1533,12 +1537,14 @@ static int igt_shrink_thp(void *arg) if (!igt_can_allocate_thp(i915)) { pr_info("missing THP support, skipping\n"); - return 0; + goto out_vm; } obj = i915_gem_object_create_shmem(i915, SZ_2M); - if (IS_ERR(obj)) - return PTR_ERR(obj); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { @@ -1607,6 +1613,8 @@ static int igt_shrink_thp(void *arg) i915_vma_close(vma); out_put: i915_gem_object_put(obj); +out_vm: + i915_vm_put(vm); return err; } @@ -1675,6 +1683,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) }; struct drm_file *file; struct i915_gem_context *ctx; + struct i915_address_space *vm; intel_wakeref_t wakeref; int err; @@ -1699,8 +1708,11 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) goto out_unlock; } - if (ctx->vm) - ctx->vm->scrub_64K = true; + mutex_lock(&ctx->mutex); + vm = i915_gem_context_vm(ctx); + if (vm) + WRITE_ONCE(vm->scrub_64K, true); + mutex_unlock(&ctx->mutex); err = i915_subtests(tests, ctx); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 2fb31ada2fa7..d44fa9d356f1 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -53,19 +53,17 @@ static int live_nop_switch(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); if (!ctx) { err = -ENOMEM; - goto out_unlock; + goto out_file; } for (n = 0; n < nctx; n++) { ctx[n] = live_context(i915, file); if (IS_ERR(ctx[n])) { err = PTR_ERR(ctx[n]); - goto out_unlock; + goto out_file; } } @@ -79,7 +77,7 @@ static int live_nop_switch(void *arg) rq = igt_request_alloc(ctx[n], engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unlock; + goto out_file; } i915_request_add(rq); } @@ -87,7 +85,7 @@ static int live_nop_switch(void *arg) pr_err("Failed to populated %d contexts\n", nctx); intel_gt_set_wedged(&i915->gt); err = -EIO; - goto out_unlock; + goto out_file; } times[1] = ktime_get_raw(); @@ -97,7 +95,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; end_time = jiffies + i915_selftest.timeout_jiffies; for_each_prime_number_from(prime, 2, 8192) { @@ -107,7 +105,7 @@ static int live_nop_switch(void *arg) rq = igt_request_alloc(ctx[n % nctx], engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_unlock; + goto out_file; } /* @@ -143,7 +141,7 @@ static int live_nop_switch(void *arg) err = igt_live_test_end(&t); if (err) - goto out_unlock; + goto out_file; pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", engine->name, @@ -151,8 +149,7 @@ static int live_nop_switch(void *arg) prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); +out_file: mock_file_free(i915, file); return err; } @@ -253,12 +250,10 @@ static int live_parallel_switch(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_locked; + goto out_file; } engines = i915_gem_context_lock_engines(ctx); @@ -268,7 +263,7 @@ static int live_parallel_switch(void *arg) if (!data) { i915_gem_context_unlock_engines(ctx); err = -ENOMEM; - goto out_locked; + goto out; } m = 0; /* Use the first context as our template for the engines */ @@ -276,7 +271,7 @@ static int live_parallel_switch(void *arg) err = intel_context_pin(ce); if (err) { i915_gem_context_unlock_engines(ctx); - goto out_locked; + goto out; } data[m++].ce[0] = intel_context_get(ce); } @@ -287,7 +282,7 @@ static int live_parallel_switch(void *arg) ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_locked; + goto out; } for (m = 0; m < count; m++) { @@ -296,20 +291,18 @@ static int live_parallel_switch(void *arg) ce = intel_context_create(ctx, data[m].ce[0]->engine); if (IS_ERR(ce)) - goto out_locked; + goto out; err = intel_context_pin(ce); if (err) { intel_context_put(ce); - goto out_locked; + goto out; } data[m].ce[n] = ce; } } - mutex_unlock(&i915->drm.struct_mutex); - for (fn = func; !err && *fn; fn++) { struct igt_live_test t; int n; @@ -354,8 +347,7 @@ static int live_parallel_switch(void *arg) mutex_unlock(&i915->drm.struct_mutex); } - mutex_lock(&i915->drm.struct_mutex); -out_locked: +out: for (n = 0; n < count; n++) { for (m = 0; m < ARRAY_SIZE(data->ce); m++) { if (!data[n].ce[m]) @@ -365,8 +357,8 @@ static int live_parallel_switch(void *arg) intel_context_put(data[n].ce[m]); } } - mutex_unlock(&i915->drm.struct_mutex); kfree(data); +out_file: mock_file_free(i915, file); return err; } @@ -626,11 +618,9 @@ static int igt_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, engine->name); if (err) - goto out_unlock; + goto out_file; ncontexts = 0; ndwords = 0; @@ -642,7 +632,7 @@ static int igt_ctx_exec(void *arg) ctx = kernel_context(i915); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); @@ -654,7 +644,7 @@ static int igt_ctx_exec(void *arg) err = PTR_ERR(obj); intel_context_put(ce); kernel_context_close(ctx); - goto out_unlock; + goto out_file; } } @@ -663,17 +653,18 @@ static int igt_ctx_exec(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, - yesno(!!ctx->vm), err); + yesno(!!rcu_access_pointer(ctx->vm)), + err); intel_context_put(ce); kernel_context_close(ctx); - goto out_unlock; + goto out_file; } err = throttle(ce, tq, ARRAY_SIZE(tq)); if (err) { intel_context_put(ce); kernel_context_close(ctx); - goto out_unlock; + goto out_file; } if (++dw == max_dwords(obj)) { @@ -703,11 +694,10 @@ static int igt_ctx_exec(void *arg) dw += rem; } -out_unlock: +out_file: throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); if (err) @@ -742,22 +732,20 @@ static int igt_shared_ctx_exec(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - parent = live_context(i915, file); if (IS_ERR(parent)) { err = PTR_ERR(parent); - goto out_unlock; + goto out_file; } if (!parent->vm) { /* not full-ppgtt; nothing to share */ err = 0; - goto out_unlock; + goto out_file; } err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; for_each_engine(engine, i915, id) { unsigned long ncontexts, ndwords, dw; @@ -781,7 +769,9 @@ static int igt_shared_ctx_exec(void *arg) goto out_test; } + mutex_lock(&ctx->mutex); __assign_ppgtt(ctx, parent->vm); + mutex_unlock(&ctx->mutex); ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); GEM_BUG_ON(IS_ERR(ce)); @@ -801,7 +791,8 @@ static int igt_shared_ctx_exec(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, - yesno(!!ctx->vm), err); + yesno(!!rcu_access_pointer(ctx->vm)), + err); intel_context_put(ce); kernel_context_close(ctx); goto out_test; @@ -840,17 +831,13 @@ static int igt_shared_ctx_exec(void *arg) dw += rem; } - mutex_unlock(&i915->drm.struct_mutex); i915_gem_drain_freed_objects(i915); - mutex_lock(&i915->drm.struct_mutex); } out_test: throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - +out_file: mock_file_free(i915, file); return err; } @@ -1222,8 +1209,6 @@ __igt_ctx_sseu(struct drm_i915_private *i915, if (flags & TEST_RESET) igt_global_reset_lock(&i915->gt); - mutex_lock(&i915->drm.struct_mutex); - ctx = live_context(i915, file); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); @@ -1278,8 +1263,6 @@ __igt_ctx_sseu(struct drm_i915_private *i915, i915_gem_object_put(obj); out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - if (flags & TEST_RESET) igt_global_reset_unlock(&i915->gt); @@ -1339,23 +1322,24 @@ static int igt_ctx_readonly(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } - vm = ctx->vm ?: &i915->ggtt.alias->vm; + rcu_read_lock(); + vm = rcu_dereference(ctx->vm) ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { + rcu_read_unlock(); err = 0; - goto out_unlock; + goto out_file; } + rcu_read_unlock(); ndwords = 0; dw = 0; @@ -1373,7 +1357,7 @@ static int igt_ctx_readonly(void *arg) if (IS_ERR(obj)) { err = PTR_ERR(obj); i915_gem_context_unlock_engines(ctx); - goto out_unlock; + goto out_file; } if (prandom_u32_state(&prng) & 1) @@ -1384,15 +1368,17 @@ static int igt_ctx_readonly(void *arg) if (err) { pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), - ce->engine->name, yesno(!!ctx->vm), err); + ce->engine->name, + yesno(!!rcu_access_pointer(ctx->vm)), + err); i915_gem_context_unlock_engines(ctx); - goto out_unlock; + goto out_file; } err = throttle(ce, tq, ARRAY_SIZE(tq)); if (err) { i915_gem_context_unlock_engines(ctx); - goto out_unlock; + goto out_file; } if (++dw == max_dwords(obj)) { @@ -1424,20 +1410,19 @@ static int igt_ctx_readonly(void *arg) dw += rem; } -out_unlock: +out_file: throttle_release(tq, ARRAY_SIZE(tq)); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); return err; } -static int check_scratch(struct i915_gem_context *ctx, u64 offset) +static int check_scratch(struct i915_address_space *vm, u64 offset) { struct drm_mm_node *node = - __drm_mm_interval_first(&ctx->vm->mm, + __drm_mm_interval_first(&vm->mm, offset, offset + sizeof(u32) - 1); if (!node || node->start > offset) return 0; @@ -1455,6 +1440,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; struct i915_request *rq; struct i915_vma *vma; u32 *cmd; @@ -1487,17 +1473,18 @@ static int write_to_scratch(struct i915_gem_context *ctx, intel_gt_chipset_flush(engine->gt); - vma = i915_vma_instance(obj, ctx->vm, NULL); + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto err_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto err_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1523,6 +1510,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, i915_request_add(rq); + i915_vm_put(vm); return 0; skip_request: @@ -1531,6 +1519,8 @@ static int write_to_scratch(struct i915_gem_context *ctx, i915_request_add(rq); err_unpin: i915_vma_unpin(vma); +err_vm: + i915_vm_put(vm); err: i915_gem_object_put(obj); return err; @@ -1542,6 +1532,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_object *obj; + struct i915_address_space *vm; const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ const u32 result = 0x100; struct i915_request *rq; @@ -1586,17 +1577,18 @@ static int read_from_scratch(struct i915_gem_context *ctx, intel_gt_chipset_flush(engine->gt); - vma = i915_vma_instance(obj, ctx->vm, NULL); + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto err; + goto err_vm; } err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); if (err) - goto err; + goto err_vm; - err = check_scratch(ctx, offset); + err = check_scratch(vm, offset); if (err) goto err_unpin; @@ -1627,12 +1619,12 @@ static int read_from_scratch(struct i915_gem_context *ctx, err = i915_gem_object_set_to_cpu_domain(obj, false); i915_gem_object_unlock(obj); if (err) - goto err; + goto err_vm; cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(cmd)) { err = PTR_ERR(cmd); - goto err; + goto err_vm; } *value = cmd[result / sizeof(*cmd)]; @@ -1647,6 +1639,8 @@ static int read_from_scratch(struct i915_gem_context *ctx, i915_request_add(rq); err_unpin: i915_vma_unpin(vma); +err_vm: + i915_vm_put(vm); err: i915_gem_object_put(obj); return err; @@ -1677,27 +1671,25 @@ static int igt_vm_isolation(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - err = igt_live_test_begin(&t, i915, __func__, ""); if (err) - goto out_unlock; + goto out_file; ctx_a = live_context(i915, file); if (IS_ERR(ctx_a)) { err = PTR_ERR(ctx_a); - goto out_unlock; + goto out_file; } ctx_b = live_context(i915, file); if (IS_ERR(ctx_b)) { err = PTR_ERR(ctx_b); - goto out_unlock; + goto out_file; } /* We can only test vm isolation, if the vm are distinct */ if (ctx_a->vm == ctx_b->vm) - goto out_unlock; + goto out_file; vm_total = ctx_a->vm->total; GEM_BUG_ON(ctx_b->vm->total != vm_total); @@ -1726,7 +1718,7 @@ static int igt_vm_isolation(void *arg) err = read_from_scratch(ctx_b, engine, offset, &value); if (err) - goto out_unlock; + goto out_file; if (value) { pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", @@ -1735,7 +1727,7 @@ static int igt_vm_isolation(void *arg) lower_32_bits(offset), this); err = -EINVAL; - goto out_unlock; + goto out_file; } this++; @@ -1745,11 +1737,9 @@ static int igt_vm_isolation(void *arg) pr_info("Checked %lu scratch offsets across %d engines\n", count, RUNTIME_INFO(i915)->num_engines); -out_unlock: +out_file: if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); - mock_file_free(i915, file); return err; } @@ -1781,13 +1771,9 @@ static int mock_context_barrier(void *arg) * a request; useful for retiring old state after loading new. */ - mutex_lock(&i915->drm.struct_mutex); - ctx = mock_context(i915, "mock"); - if (!ctx) { - err = -ENOMEM; - goto unlock; - } + if (!ctx) + return -ENOMEM; counter = 0; err = context_barrier_task(ctx, 0, @@ -1860,8 +1846,6 @@ static int mock_context_barrier(void *arg) out: mock_context_close(ctx); -unlock: - mutex_unlock(&i915->drm.struct_mutex); return err; #undef pr_fmt #define pr_fmt(x) x diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index 0104f16b1327..74ddd682c9cd 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -42,7 +42,10 @@ mock_context(struct drm_i915_private *i915, if (!ppgtt) goto err_put; + mutex_lock(&ctx->mutex); __set_ppgtt(ctx, &ppgtt->vm); + mutex_unlock(&ctx->mutex); + i915_vm_put(&ppgtt->vm); } @@ -65,7 +68,7 @@ void mock_context_close(struct i915_gem_context *ctx) void mock_init_contexts(struct drm_i915_private *i915) { - init_contexts(i915); + init_contexts(&i915->gem.contexts); } struct i915_gem_context * @@ -74,8 +77,6 @@ live_context(struct drm_i915_private *i915, struct drm_file *file) struct i915_gem_context *ctx; int err; - lockdep_assert_held(&i915->drm.struct_mutex); - ctx = i915_gem_create_context(i915, 0); if (IS_ERR(ctx)) return ctx; diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 35a40c2820a2..be34d97ac18f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -221,12 +221,20 @@ intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine) { + struct i915_address_space *vm; + GEM_BUG_ON(!engine->cops); kref_init(&ce->ref); ce->gem_context = ctx; - ce->vm = i915_vm_get(ctx->vm ?: &engine->gt->ggtt->vm); + rcu_read_lock(); + vm = rcu_dereference(ctx->vm); + if (vm) + ce->vm = i915_vm_get(vm); + else + ce->vm = i915_vm_get(&engine->gt->ggtt->vm); + rcu_read_unlock(); if (ctx->timeline) ce->timeline = intel_timeline_get(ctx->timeline); diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 86cffbb0a9cb..7c838a57e174 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -155,13 +155,9 @@ static int live_context_size(void *arg) * HW tries to write past the end of one. */ - mutex_lock(>->i915->drm.struct_mutex); - fixme = kernel_context(gt->i915); - if (IS_ERR(fixme)) { - err = PTR_ERR(fixme); - goto unlock; - } + if (IS_ERR(fixme)) + return PTR_ERR(fixme); for_each_engine(engine, gt->i915, id) { struct { @@ -201,8 +197,6 @@ static int live_context_size(void *arg) } kernel_context_close(fixme); -unlock: - mutex_unlock(>->i915->drm.struct_mutex); return err; } @@ -305,12 +299,10 @@ static int live_active_context(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); - fixme = live_context(gt->i915, file); if (IS_ERR(fixme)) { err = PTR_ERR(fixme); - goto unlock; + goto out_file; } for_each_engine(engine, gt->i915, id) { @@ -323,8 +315,7 @@ static int live_active_context(void *arg) break; } -unlock: - mutex_unlock(>->i915->drm.struct_mutex); +out_file: mock_file_free(gt->i915, file); return err; } @@ -418,12 +409,10 @@ static int live_remote_context(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); - fixme = live_context(gt->i915, file); if (IS_ERR(fixme)) { err = PTR_ERR(fixme); - goto unlock; + goto out_file; } for_each_engine(engine, gt->i915, id) { @@ -436,8 +425,7 @@ static int live_remote_context(void *arg) break; } -unlock: - mutex_unlock(>->i915->drm.struct_mutex); +out_file: mock_file_free(gt->i915, file); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index ffbb3d23b887..e8a40df79bd0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -58,9 +58,7 @@ static int hang_init(struct hang *h, struct intel_gt *gt) memset(h, 0, sizeof(*h)); h->gt = gt; - mutex_lock(>->i915->drm.struct_mutex); h->ctx = kernel_context(gt->i915); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(h->ctx)) return PTR_ERR(h->ctx); @@ -133,7 +131,7 @@ static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { struct intel_gt *gt = h->gt; - struct i915_address_space *vm = h->ctx->vm ?: &engine->gt->ggtt->vm; + struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx); struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; @@ -143,12 +141,15 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) int err; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); - if (IS_ERR(obj)) + if (IS_ERR(obj)) { + i915_vm_put(vm); return ERR_CAST(obj); + } vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915)); if (IS_ERR(vaddr)) { i915_gem_object_put(obj); + i915_vm_put(vm); return ERR_CAST(vaddr); } @@ -159,16 +160,22 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) h->batch = vaddr; vma = i915_vma_instance(h->obj, vm, NULL); - if (IS_ERR(vma)) + if (IS_ERR(vma)) { + i915_vm_put(vm); return ERR_CAST(vma); + } hws = i915_vma_instance(h->hws, vm, NULL); - if (IS_ERR(hws)) + if (IS_ERR(hws)) { + i915_vm_put(vm); return ERR_CAST(hws); + } err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) + if (err) { + i915_vm_put(vm); return ERR_PTR(err); + } err = i915_vma_pin(hws, 0, 0, PIN_USER); if (err) @@ -266,6 +273,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) i915_vma_unpin(hws); unpin_vma: i915_vma_unpin(vma); + i915_vm_put(vm); return err ? ERR_PTR(err) : rq; } @@ -382,9 +390,7 @@ static int igt_reset_nop(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); ctx = live_context(gt->i915, file); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; @@ -458,9 +464,7 @@ static int igt_reset_nop_engine(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); ctx = live_context(gt->i915, file); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; @@ -705,9 +709,7 @@ static int active_engine(void *data) return PTR_ERR(file); for (count = 0; count < ARRAY_SIZE(ctx); count++) { - mutex_lock(&engine->i915->drm.struct_mutex); ctx[count] = live_context(engine->i915, file); - mutex_unlock(&engine->i915->drm.struct_mutex); if (IS_ERR(ctx[count])) { err = PTR_ERR(ctx[count]); while (--count) @@ -1291,6 +1293,7 @@ static int igt_reset_evict_ppgtt(void *arg) { struct intel_gt *gt = arg; struct i915_gem_context *ctx; + struct i915_address_space *vm; struct drm_file *file; int err; @@ -1298,18 +1301,20 @@ static int igt_reset_evict_ppgtt(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(>->i915->drm.struct_mutex); ctx = live_context(gt->i915, file); - mutex_unlock(>->i915->drm.struct_mutex); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out; } err = 0; - if (ctx->vm) /* aliasing == global gtt locking, covered above */ - err = __igt_reset_evict_vma(gt, ctx->vm, + vm = i915_gem_context_get_vm_rcu(ctx); + if (!i915_is_ggtt(vm)) { + /* aliasing == global gtt locking, covered above */ + err = __igt_reset_evict_vma(gt, vm, evict_vma, EXEC_OBJECT_WRITE); + } + i915_vm_put(vm); out: mock_file_free(gt->i915, file); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 04c1cf573642..8dc42c5c7569 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1631,7 +1631,11 @@ static int smoke_submit(struct preempt_smoke *smoke, int err = 0; if (batch) { - vma = i915_vma_instance(batch, ctx->vm, NULL); + struct i915_address_space *vm; + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(batch, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 4ee2e2babd0d..7c7aceb85a74 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -260,7 +260,6 @@ switch_to_scratch_context(struct intel_engine_cs *engine, rq = igt_spinner_create_request(spin, ce, MI_NOOP); intel_context_put(ce); - kernel_context_close(ctx); if (IS_ERR(rq)) { spin = NULL; @@ -279,6 +278,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine, if (err && spin) igt_spinner_end(spin); + kernel_context_close(ctx); return err; } @@ -355,6 +355,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, static struct i915_vma *create_batch(struct i915_gem_context *ctx) { struct drm_i915_gem_object *obj; + struct i915_address_space *vm; struct i915_vma *vma; int err; @@ -362,7 +363,9 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx) if (IS_ERR(obj)) return ERR_CAST(obj); - vma = i915_vma_instance(obj, ctx->vm, NULL); + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(obj, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -463,12 +466,15 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, 0xffff00ff, 0xffffffff, }; + struct i915_address_space *vm; struct i915_vma *scratch; struct i915_vma *batch; int err = 0, i, v; u32 *cs, *results; - scratch = create_scratch(ctx->vm, 2 * ARRAY_SIZE(values) + 1); + vm = i915_gem_context_get_vm_rcu(ctx); + scratch = create_scratch(vm, 2 * ARRAY_SIZE(values) + 1); + i915_vm_put(vm); if (IS_ERR(scratch)) return PTR_ERR(scratch); @@ -1010,6 +1016,7 @@ static int live_isolated_whitelist(void *arg) return 0; for (i = 0; i < ARRAY_SIZE(client); i++) { + struct i915_address_space *vm; struct i915_gem_context *c; c = kernel_context(i915); @@ -1018,22 +1025,27 @@ static int live_isolated_whitelist(void *arg) goto err; } - client[i].scratch[0] = create_scratch(c->vm, 1024); + vm = i915_gem_context_get_vm_rcu(c); + + client[i].scratch[0] = create_scratch(vm, 1024); if (IS_ERR(client[i].scratch[0])) { err = PTR_ERR(client[i].scratch[0]); + i915_vm_put(vm); kernel_context_close(c); goto err; } - client[i].scratch[1] = create_scratch(c->vm, 1024); + client[i].scratch[1] = create_scratch(vm, 1024); if (IS_ERR(client[i].scratch[1])) { err = PTR_ERR(client[i].scratch[1]); i915_vma_unpin_and_release(&client[i].scratch[0], 0); + i915_vm_put(vm); kernel_context_close(c); goto err; } client[i].ctx = c; + i915_vm_put(vm); } for_each_engine(engine, i915, id) { diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 03f567084548..6850f1f40241 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -365,7 +365,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, struct i915_gem_context *ctx) { struct intel_vgpu_mm *mm = workload->shadow_mm; - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm); + struct i915_ppgtt *ppgtt = + i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx)); int i = 0; if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { @@ -378,6 +379,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; } } + + i915_vm_put(&ppgtt->vm); } static int @@ -1230,20 +1233,18 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) struct intel_vgpu_submission *s = &vgpu->submission; struct intel_engine_cs *engine; struct i915_gem_context *ctx; + struct i915_ppgtt *ppgtt; enum intel_engine_id i; int ret; - mutex_lock(&i915->drm.struct_mutex); - ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MAX); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto out_unlock; - } + if (IS_ERR(ctx)) + return PTR_ERR(ctx); i915_gem_context_set_force_single_submission(ctx); - i915_context_ppgtt_root_save(s, i915_vm_to_ppgtt(ctx->vm)); + ppgtt = i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx)); + i915_context_ppgtt_root_save(s, ppgtt); for_each_engine(engine, i915, i) { struct intel_context *ce; @@ -1288,12 +1289,12 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) atomic_set(&s->running_workload_num, 0); bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); + i915_vm_put(&ppgtt->vm); i915_gem_context_put(ctx); - mutex_unlock(&i915->drm.struct_mutex); return 0; out_shadow_ctx: - i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(ctx->vm)); + i915_context_ppgtt_root_restore(s, ppgtt); for_each_engine(engine, i915, i) { if (IS_ERR(s->shadow[i])) break; @@ -1301,9 +1302,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) intel_context_unpin(s->shadow[i]); intel_context_put(s->shadow[i]); } + i915_vm_put(&ppgtt->vm); i915_gem_context_put(ctx); -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); return ret; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0e90ac608e07..b04cebc26eca 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -316,12 +316,18 @@ static void print_context_stats(struct seq_file *m, struct drm_i915_private *i915) { struct file_stats kstats = {}; - struct i915_gem_context *ctx; + struct i915_gem_context *ctx, *cn; - list_for_each_entry(ctx, &i915->contexts.list, link) { + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { struct i915_gem_engines_iter it; struct intel_context *ce; + if (!kref_get_unless_zero(&ctx->ref)) + continue; + + spin_unlock(&i915->gem.contexts.lock); + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { intel_context_lock_pinned(ce); @@ -338,7 +344,9 @@ static void print_context_stats(struct seq_file *m, i915_gem_context_unlock_engines(ctx); if (!IS_ERR_OR_NULL(ctx->file_priv)) { - struct file_stats stats = { .vm = ctx->vm, }; + struct file_stats stats = { + .vm = rcu_access_pointer(ctx->vm), + }; struct drm_file *file = ctx->file_priv->file; struct task_struct *task; char name[80]; @@ -355,7 +363,12 @@ static void print_context_stats(struct seq_file *m, print_file_stats(m, name, stats); } + + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); } + spin_unlock(&i915->gem.contexts.lock); print_file_stats(m, "[k]contexts", kstats); } @@ -363,7 +376,6 @@ static void print_context_stats(struct seq_file *m, static int i915_gem_object_info(struct seq_file *m, void *data) { struct drm_i915_private *i915 = node_to_i915(m->private); - int ret; seq_printf(m, "%u shrinkable [%u free] objects, %llu bytes\n", i915->mm.shrink_count, @@ -372,12 +384,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) seq_putc(m, '\n'); - ret = mutex_lock_interruptible(&i915->drm.struct_mutex); - if (ret) - return ret; - print_context_stats(m, i915); - mutex_unlock(&i915->drm.struct_mutex); return 0; } @@ -1579,19 +1586,19 @@ static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) static int i915_context_status(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct i915_gem_context *ctx; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; + struct drm_i915_private *i915 = node_to_i915(m->private); + struct i915_gem_context *ctx, *cn; - list_for_each_entry(ctx, &dev_priv->contexts.list, link) { + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { struct i915_gem_engines_iter it; struct intel_context *ce; + if (!kref_get_unless_zero(&ctx->ref)) + continue; + + spin_unlock(&i915->gem.contexts.lock); + seq_puts(m, "HW context "); if (ctx->pid) { struct task_struct *task; @@ -1626,9 +1633,12 @@ static int i915_context_status(struct seq_file *m, void *unused) i915_gem_context_unlock_engines(ctx); seq_putc(m, '\n'); - } - mutex_unlock(&dev->struct_mutex); + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); + } + spin_unlock(&i915->gem.contexts.lock); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5323e4fa55d9..024da582ba0f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1665,10 +1665,8 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; - mutex_lock(&dev->struct_mutex); i915_gem_context_close(file); i915_gem_release(dev, file); - mutex_unlock(&dev->struct_mutex); kfree_rcu(file_priv, rcu); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6bdcffbf1b9b..35b610d52379 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1536,13 +1536,6 @@ struct drm_i915_private { int audio_power_refcount; u32 audio_freq_cntrl; - struct { - struct mutex mutex; - struct list_head list; - struct llist_head free_list; - struct work_struct free_work; - } contexts; - u32 fdi_rx_config; /* Shadow for DISPLAY_PHY_CONTROL which can't be safely read */ @@ -1698,6 +1691,14 @@ struct drm_i915_private { struct { struct notifier_block pm_notifier; + + struct i915_gem_contexts { + spinlock_t lock; /* locks list */ + struct list_head list; + + struct llist_head free_list; + struct work_struct free_work; + } contexts; } gem; /* For i945gm vblank irq vs. C3 workaround */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5a664bdead8c..f6db415985d5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1266,7 +1266,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_unlock; } - ret = i915_gem_contexts_init(dev_priv); + ret = i915_gem_init_contexts(dev_priv); if (ret) { GEM_BUG_ON(ret == -EIO); goto err_scratch; @@ -1348,7 +1348,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) } err_context: if (ret != -EIO) - i915_gem_contexts_fini(dev_priv); + i915_gem_driver_release__contexts(dev_priv); err_scratch: intel_gt_driver_release(&dev_priv->gt); err_unlock: @@ -1416,11 +1416,9 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv) void i915_gem_driver_release(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->drm.struct_mutex); intel_engines_cleanup(dev_priv); - i915_gem_contexts_fini(dev_priv); + i915_gem_driver_release__contexts(dev_priv); intel_gt_driver_release(&dev_priv->gt); - mutex_unlock(&dev_priv->drm.struct_mutex); intel_wa_list_free(&dev_priv->gt_wa_list); @@ -1430,7 +1428,7 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv) i915_gem_drain_freed_objects(dev_priv); - WARN_ON(!list_empty(&dev_priv->contexts.list)); + WARN_ON(!list_empty(&dev_priv->gem.contexts.list)); } void i915_gem_init_mmio(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1d26634ca597..7b15bb891970 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1366,7 +1366,9 @@ static int gen8_init_scratch(struct i915_address_space *vm) if (vm->has_read_only && vm->i915->kernel_context && vm->i915->kernel_context->vm) { - struct i915_address_space *clone = vm->i915->kernel_context->vm; + struct i915_address_space *clone = + rcu_dereference_protected(vm->i915->kernel_context->vm, + true); /* static */ GEM_BUG_ON(!clone->has_read_only); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index ecfbc37b738b..231388d06c82 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1853,8 +1853,8 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, }; #undef ctx_flexeuN struct intel_engine_cs *engine; - struct i915_gem_context *ctx; - int i; + struct i915_gem_context *ctx, *cn; + int i, err; for (i = 2; i < ARRAY_SIZE(regs); i++) regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); @@ -1877,16 +1877,27 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, * context. Contexts idle at the time of reconfiguration are not * trapped behind the barrier. */ - list_for_each_entry(ctx, &i915->contexts.list, link) { - int err; - + spin_lock(&i915->gem.contexts.lock); + list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { if (ctx == i915->kernel_context) continue; + if (!kref_get_unless_zero(&ctx->ref)) + continue; + + spin_unlock(&i915->gem.contexts.lock); + err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs)); - if (err) + if (err) { + i915_gem_context_put(ctx); return err; + } + + spin_lock(&i915->gem.contexts.lock); + list_safe_reset_next(ctx, cn, link); + i915_gem_context_put(ctx); } + spin_unlock(&i915->gem.contexts.lock); /* * After updating all other contexts, we need to modify ourselves. @@ -1895,7 +1906,6 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, */ for_each_uabi_engine(engine, i915) { struct intel_context *ce = engine->kernel_context; - int err; if (engine->class != RENDER_CLASS) continue; diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 1e08c5961535..bf039b8ba593 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -176,16 +176,12 @@ i915_l3_read(struct file *filp, struct kobject *kobj, count = min_t(size_t, GEN7_L3LOG_SIZE - offset, count); memset(buf, 0, count); - ret = i915_mutex_lock_interruptible(&i915->drm); - if (ret) - return ret; - + spin_lock(&i915->gem.contexts.lock); if (i915->l3_parity.remap_info[slice]) memcpy(buf, i915->l3_parity.remap_info[slice] + offset / sizeof(u32), count); - - mutex_unlock(&i915->drm.struct_mutex); + spin_unlock(&i915->gem.contexts.lock); return count; } @@ -198,8 +194,8 @@ i915_l3_write(struct file *filp, struct kobject *kobj, struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); int slice = (int)(uintptr_t)attr->private; + u32 *remap_info, *freeme = NULL; struct i915_gem_context *ctx; - u32 **remap_info; int ret; ret = l3_access_valid(i915, offset); @@ -209,37 +205,36 @@ i915_l3_write(struct file *filp, struct kobject *kobj, if (count < sizeof(u32)) return -EINVAL; - ret = i915_mutex_lock_interruptible(&i915->drm); - if (ret) - return ret; + remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); + if (!remap_info) + return -ENOMEM; - remap_info = &i915->l3_parity.remap_info[slice]; - if (!*remap_info) { - *remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL); - if (!*remap_info) { - ret = -ENOMEM; - goto out; - } + spin_lock(&i915->gem.contexts.lock); + + if (i915->l3_parity.remap_info[slice]) { + freeme = remap_info; + remap_info = i915->l3_parity.remap_info[slice]; + } else { + i915->l3_parity.remap_info[slice] = remap_info; } count = round_down(count, sizeof(u32)); - memcpy(*remap_info + offset / sizeof(u32), buf, count); + memcpy(remap_info + offset / sizeof(u32), buf, count); /* NB: We defer the remapping until we switch to the context */ - list_for_each_entry(ctx, &i915->contexts.list, link) + list_for_each_entry(ctx, &i915->gem.contexts.list, link) ctx->remap_slice |= BIT(slice); + spin_unlock(&i915->gem.contexts.lock); + kfree(freeme); + /* * TODO: Ideally we really want a GPU reset here to make sure errors * aren't propagated. Since I cannot find a stable way to reset the GPU * at this point it is left as a TODO. */ - ret = count; -out: - mutex_unlock(&i915->drm.struct_mutex); - - return ret; + return count; } static const struct bin_attribute dpf_attrs = { diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 1f2cf6cfafb5..7ef7a1e1664c 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -952,7 +952,7 @@ DECLARE_EVENT_CLASS(i915_context, TP_fast_assign( __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; - __entry->vm = ctx->vm; + __entry->vm = rcu_access_pointer(ctx->vm); ), TP_printk("dev=%u, ctx=%p, ctx_vm=%p", diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c index 0346c3e5b6b6..bfa40a5b6d98 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c @@ -138,11 +138,9 @@ static int igt_gem_suspend(void *arg) return PTR_ERR(file); err = -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); ctx = live_context(i915, file); if (!IS_ERR(ctx)) err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto out; @@ -157,9 +155,7 @@ static int igt_gem_suspend(void *arg) pm_resume(i915); - mutex_lock(&i915->drm.struct_mutex); err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); out: mock_file_free(i915, file); return err; @@ -177,11 +173,9 @@ static int igt_gem_hibernate(void *arg) return PTR_ERR(file); err = -ENOMEM; - mutex_lock(&i915->drm.struct_mutex); ctx = live_context(i915, file); if (!IS_ERR(ctx)) err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); if (err) goto out; @@ -196,9 +190,7 @@ static int igt_gem_hibernate(void *arg) pm_resume(i915); - mutex_lock(&i915->drm.struct_mutex); err = switch_to_context(i915, ctx); - mutex_unlock(&i915->drm.struct_mutex); out: mock_file_free(i915, file); return err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index f39f0282e78c..0af9a58d011d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -473,7 +473,6 @@ static int igt_evict_contexts(void *arg) } count = 0; - mutex_lock(&i915->drm.struct_mutex); onstack_fence_init(&fence); do { struct i915_request *rq; @@ -510,8 +509,6 @@ static int igt_evict_contexts(void *arg) count++; err = 0; } while(1); - mutex_unlock(&i915->drm.struct_mutex); - onstack_fence_fini(&fence); pr_info("Submitted %lu contexts/requests on %s\n", count, engine->name); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index e40e6cfa51f1..8d8121c02161 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1246,6 +1246,7 @@ static int exercise_mock(struct drm_i915_private *i915, unsigned long end_time)) { const u64 limit = totalram_pages() << PAGE_SHIFT; + struct i915_address_space *vm; struct i915_gem_context *ctx; IGT_TIMEOUT(end_time); int err; @@ -1254,7 +1255,9 @@ static int exercise_mock(struct drm_i915_private *i915, if (!ctx) return -ENOMEM; - err = func(i915, ctx->vm, 0, min(ctx->vm->total, limit), end_time); + vm = i915_gem_context_get_vm_rcu(ctx); + err = func(i915, vm, 0, min(vm->total, limit), end_time); + i915_vm_put(vm); mock_context_close(ctx); return err; @@ -1801,15 +1804,15 @@ static int igt_cs_tlb(void *arg) goto out_unlock; } - vm = ctx->vm; - if (!vm) - goto out_unlock; + vm = i915_gem_context_get_vm_rcu(ctx); + if (i915_is_ggtt(vm)) + goto out_vm; /* Create two pages; dummy we prefill the TLB, and intended */ bbe = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(bbe)) { err = PTR_ERR(bbe); - goto out_unlock; + goto out_vm; } batch = i915_gem_object_pin_map(bbe, I915_MAP_WC); @@ -2014,6 +2017,8 @@ static int igt_cs_tlb(void *arg) i915_gem_object_put(act); out_put_bbe: i915_gem_object_put(bbe); +out_vm: + i915_vm_put(vm); out_unlock: mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index d7d68c6a6bd5..0897a7b04944 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -181,9 +181,7 @@ static int igt_request_rewind(void *arg) struct intel_context *ce; int err = -EINVAL; - mutex_lock(&i915->drm.struct_mutex); ctx[0] = mock_context(i915, "A"); - mutex_unlock(&i915->drm.struct_mutex); ce = i915_gem_context_get_engine(ctx[0], RCS0); GEM_BUG_ON(IS_ERR(ce)); @@ -197,9 +195,7 @@ static int igt_request_rewind(void *arg) i915_request_get(request); i915_request_add(request); - mutex_lock(&i915->drm.struct_mutex); ctx[1] = mock_context(i915, "B"); - mutex_unlock(&i915->drm.struct_mutex); ce = i915_gem_context_get_engine(ctx[1], RCS0); GEM_BUG_ON(IS_ERR(ce)); @@ -438,9 +434,7 @@ static int mock_breadcrumbs_smoketest(void *arg) } for (n = 0; n < t.ncontexts; n++) { - mutex_lock(&t.engine->i915->drm.struct_mutex); t.contexts[n] = mock_context(t.engine->i915, "mock"); - mutex_unlock(&t.engine->i915->drm.struct_mutex); if (!t.contexts[n]) { ret = -ENOMEM; goto out_contexts; @@ -734,9 +728,9 @@ static int live_empty_request(void *arg) static struct i915_vma *recursive_batch(struct drm_i915_private *i915) { struct i915_gem_context *ctx = i915->kernel_context; - struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; struct drm_i915_gem_object *obj; const int gen = INTEL_GEN(i915); + struct i915_address_space *vm; struct i915_vma *vma; u32 *cmd; int err; @@ -745,7 +739,9 @@ static struct i915_vma *recursive_batch(struct drm_i915_private *i915) if (IS_ERR(obj)) return ERR_CAST(obj); + vm = i915_gem_context_get_vm_rcu(ctx); vma = i915_vma_instance(obj, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err; @@ -1220,9 +1216,7 @@ static int live_breadcrumbs_smoketest(void *arg) } for (n = 0; n < t[0].ncontexts; n++) { - mutex_lock(&i915->drm.struct_mutex); t[0].contexts[n] = live_context(i915, file); - mutex_unlock(&i915->drm.struct_mutex); if (!t[0].contexts[n]) { ret = -ENOMEM; goto out_contexts; diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index ac1ff558eb90..58b5f40a07dd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -24,6 +24,7 @@ #include +#include "gem/i915_gem_context.h" #include "gem/selftests/mock_context.h" #include "i915_scatterlist.h" @@ -38,7 +39,7 @@ static bool assert_vma(struct i915_vma *vma, { bool ok = true; - if (vma->vm != ctx->vm) { + if (vma->vm != rcu_access_pointer(ctx->vm)) { pr_err("VMA created with wrong VM\n"); ok = false; } @@ -113,11 +114,13 @@ static int create_vmas(struct drm_i915_private *i915, list_for_each_entry(obj, objects, st_link) { for (pinned = 0; pinned <= 1; pinned++) { list_for_each_entry(ctx, contexts, link) { - struct i915_address_space *vm = ctx->vm; + struct i915_address_space *vm; struct i915_vma *vma; int err; + vm = i915_gem_context_get_vm_rcu(ctx); vma = checked_vma_instance(obj, vm, NULL); + i915_vm_put(vm); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 4e6cde0d4859..335f37ba98de 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -59,11 +59,9 @@ static void mock_device_release(struct drm_device *dev) i915_gem_drain_workqueue(i915); - mutex_lock(&i915->drm.struct_mutex); for_each_engine(engine, i915, id) mock_engine_free(engine); - i915_gem_contexts_fini(i915); - mutex_unlock(&i915->drm.struct_mutex); + i915_gem_driver_release__contexts(i915); intel_timelines_fini(i915); @@ -206,7 +204,7 @@ struct drm_i915_private *mock_gem_device(void) return i915; err_context: - i915_gem_contexts_fini(i915); + i915_gem_driver_release__contexts(i915); err_engine: mock_engine_free(i915->engine[RCS0]); err_unlock: From patchwork Fri Oct 4 13:40:10 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174551 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 43C5A16B1 for ; Fri, 4 Oct 2019 13:40:57 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 2C91A2084D for ; Fri, 4 Oct 2019 13:40:57 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 2C91A2084D Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 0E59E6EB96; Fri, 4 Oct 2019 13:40:51 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 4680F6EB88 for ; Fri, 4 Oct 2019 13:40:41 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723965-1500050 for multiple; Fri, 04 Oct 2019 14:40:20 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:10 +0100 Message-Id: <20191004134015.13204-16-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 15/20] drm/i915/overlay: Drop struct_mutex guard X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" The overlay uses the modeset mutex to control itself and only required the struct_mutex for requests, which is now obsolete. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_display.c | 7 +------ drivers/gpu/drm/i915/display/intel_overlay.c | 13 ------------- drivers/gpu/drm/i915/gt/intel_reset.c | 4 ---- 3 files changed, 1 insertion(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 8f7365b8dffb..f3a94a9cff16 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5738,13 +5738,8 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state) static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc) { - if (intel_crtc->overlay) { - struct drm_device *dev = intel_crtc->base.dev; - - mutex_lock(&dev->struct_mutex); + if (intel_crtc->overlay) (void) intel_overlay_switch_off(intel_crtc->overlay); - mutex_unlock(&dev->struct_mutex); - } /* Let userspace switch the overlay on again. In most cases userspace * has to recompute where to put it anyway. diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index e12e1a753af0..daea112cbb87 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -439,8 +439,6 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) struct i915_request *rq; u32 *cs; - lockdep_assert_held(&dev_priv->drm.struct_mutex); - /* * Only wait if there is actually an old frame to release to * guarantee forward progress. @@ -751,7 +749,6 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, struct i915_vma *vma; int ret, tmp_width; - lockdep_assert_held(&dev_priv->drm.struct_mutex); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); ret = intel_overlay_release_old_vid(overlay); @@ -852,7 +849,6 @@ int intel_overlay_switch_off(struct intel_overlay *overlay) struct drm_i915_private *dev_priv = overlay->i915; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); ret = intel_overlay_recover_from_interrupt(overlay); @@ -1068,11 +1064,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, if (!(params->flags & I915_OVERLAY_ENABLE)) { drm_modeset_lock_all(dev); - mutex_lock(&dev->struct_mutex); - ret = intel_overlay_switch_off(overlay); - - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); return ret; @@ -1088,7 +1080,6 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, return -ENOENT; drm_modeset_lock_all(dev); - mutex_lock(&dev->struct_mutex); if (i915_gem_object_is_tiled(new_bo)) { DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n"); @@ -1152,14 +1143,12 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, if (ret != 0) goto out_unlock; - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); i915_gem_object_put(new_bo); return 0; out_unlock: - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); i915_gem_object_put(new_bo); @@ -1233,7 +1222,6 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, } drm_modeset_lock_all(dev); - mutex_lock(&dev->struct_mutex); ret = -EINVAL; if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) { @@ -1290,7 +1278,6 @@ int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, ret = 0; out_unlock: - mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); return ret; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 055496f0825f..7b3d9d4517a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1288,10 +1288,6 @@ int intel_gt_terminally_wedged(struct intel_gt *gt) if (!test_bit(I915_RESET_BACKOFF, >->reset.flags)) return -EIO; - /* XXX intel_reset_finish() still takes struct_mutex!!! */ - if (mutex_is_locked(>->i915->drm.struct_mutex)) - return -EAGAIN; - if (wait_event_interruptible(gt->reset.queue, !test_bit(I915_RESET_BACKOFF, >->reset.flags))) From patchwork Fri Oct 4 13:40:11 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174571 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A421313BD for ; Fri, 4 Oct 2019 13:58:16 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 8C12B20700 for ; Fri, 4 Oct 2019 13:58:16 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 8C12B20700 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id F11876EB8F; Fri, 4 Oct 2019 13:58:15 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 00C1C6EB8F for ; Fri, 4 Oct 2019 13:58:14 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723966-1500050 for multiple; Fri, 04 Oct 2019 14:40:20 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:11 +0100 Message-Id: <20191004134015.13204-17-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 16/20] drm/i915: Drop struct_mutex guard from debugfs/framebuffer_info X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" It protects nothing being accessed for the intel_framebuffer, so it's own locking had better be sufficient. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b04cebc26eca..77933b23070e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1534,11 +1534,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) struct drm_device *dev = &dev_priv->drm; struct intel_framebuffer *fbdev_fb = NULL; struct drm_framebuffer *drm_fb; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; #ifdef CONFIG_DRM_FBDEV_EMULATION if (dev_priv->fbdev && dev_priv->fbdev->helper.fb) { @@ -1573,7 +1568,6 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) seq_putc(m, '\n'); } mutex_unlock(&dev->mode_config.fb_lock); - mutex_unlock(&dev->struct_mutex); return 0; } From patchwork Fri Oct 4 13:40:12 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174575 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 66BB714DB for ; Fri, 4 Oct 2019 13:58:19 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 4F3C320700 for ; Fri, 4 Oct 2019 13:58:19 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 4F3C320700 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id B53F26EB94; Fri, 4 Oct 2019 13:58:16 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 00E156EB94 for ; Fri, 4 Oct 2019 13:58:14 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723967-1500050 for multiple; Fri, 04 Oct 2019 14:40:20 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:12 +0100 Message-Id: <20191004134015.13204-18-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 17/20] drm/i915: Remove struct_mutex guard for debugfs/opregion X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Having a struct_mutex around the read of a BIOS blob serves no purpose. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 77933b23070e..298a3e879e65 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1500,21 +1500,11 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) static int i915_opregion(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct intel_opregion *opregion = &dev_priv->opregion; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - goto out; + struct intel_opregion *opregion = &node_to_i915(m->private)->opregion; if (opregion->header) seq_write(m, opregion->header, OPREGION_SIZE); - mutex_unlock(&dev->struct_mutex); - -out: return 0; } From patchwork Fri Oct 4 13:40:13 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174543 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id AE2BC14DB for ; Fri, 4 Oct 2019 13:40:53 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 964FB20867 for ; Fri, 4 Oct 2019 13:40:53 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 964FB20867 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 1132D6EB8E; Fri, 4 Oct 2019 13:40:43 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id ABBF26EB86 for ; Fri, 4 Oct 2019 13:40:39 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723968-1500050 for multiple; Fri, 04 Oct 2019 14:40:21 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:13 +0100 Message-Id: <20191004134015.13204-19-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 18/20] drm/i915: Drop struct_mutex from suspend state save/restore X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" struct_mutex provides no serialisation of the registers and data structures being saved and restored across suspend/resume. It is completely superfluous here. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_suspend.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 2b2086def0f1..8812cdd9007f 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -66,8 +66,6 @@ int i915_save_state(struct drm_i915_private *dev_priv) struct pci_dev *pdev = dev_priv->drm.pdev; int i; - mutex_lock(&dev_priv->drm.struct_mutex); - i915_save_display(dev_priv); if (IS_GEN(dev_priv, 4)) @@ -101,8 +99,6 @@ int i915_save_state(struct drm_i915_private *dev_priv) dev_priv->regfile.saveSWF3[i] = I915_READ(SWF3(i)); } - mutex_unlock(&dev_priv->drm.struct_mutex); - return 0; } @@ -111,8 +107,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv) struct pci_dev *pdev = dev_priv->drm.pdev; int i; - mutex_lock(&dev_priv->drm.struct_mutex); - if (IS_GEN(dev_priv, 4)) pci_write_config_word(pdev, GCDGMBUS, dev_priv->regfile.saveGCDGMBUS); @@ -146,8 +140,6 @@ int i915_restore_state(struct drm_i915_private *dev_priv) I915_WRITE(SWF3(i), dev_priv->regfile.saveSWF3[i]); } - mutex_unlock(&dev_priv->drm.struct_mutex); - intel_gmbus_reset(dev_priv); return 0; From patchwork Fri Oct 4 13:40:14 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174573 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B05F913B1 for ; Fri, 4 Oct 2019 13:58:18 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 04D4A20700 for ; Fri, 4 Oct 2019 13:58:17 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 04D4A20700 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 7DFCB6EB97; Fri, 4 Oct 2019 13:58:16 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 011F76EB97 for ; Fri, 4 Oct 2019 13:58:14 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723969-1500050 for multiple; Fri, 04 Oct 2019 14:40:21 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:14 +0100 Message-Id: <20191004134015.13204-20-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 19/20] drm/i915/selftests: Drop vestigal struct_mutex guards X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" We no longer need struct_mutex to serialise request emission, so remove it from the gt selftests. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- .../gpu/drm/i915/gem/selftests/huge_pages.c | 15 +- .../drm/i915/gem/selftests/i915_gem_context.c | 4 - .../drm/i915/gem/selftests/i915_gem_mman.c | 2 - .../drm/i915/gem/selftests/i915_gem_phys.c | 2 - drivers/gpu/drm/i915/gt/selftest_lrc.c | 148 +++--------------- .../gpu/drm/i915/gt/selftest_workarounds.c | 11 +- drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 4 - drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 - 8 files changed, 27 insertions(+), 161 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 3314858f3046..e42abddd4a36 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1639,7 +1639,6 @@ int i915_gem_huge_page_mock_selftests(void) mkwrite_device_info(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; mkwrite_device_info(dev_priv)->ppgtt_size = 48; - mutex_lock(&dev_priv->drm.struct_mutex); ppgtt = i915_ppgtt_create(dev_priv); if (IS_ERR(ppgtt)) { err = PTR_ERR(ppgtt); @@ -1665,9 +1664,7 @@ int i915_gem_huge_page_mock_selftests(void) i915_vm_put(&ppgtt->vm); out_unlock: - mutex_unlock(&dev_priv->drm.struct_mutex); drm_dev_put(&dev_priv->drm); - return err; } @@ -1684,7 +1681,6 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) struct drm_file *file; struct i915_gem_context *ctx; struct i915_address_space *vm; - intel_wakeref_t wakeref; int err; if (!HAS_PPGTT(i915)) { @@ -1699,13 +1695,10 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); - goto out_unlock; + goto out_file; } mutex_lock(&ctx->mutex); @@ -1716,11 +1709,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) err = i915_subtests(tests, ctx); -out_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - +out_file: mock_file_free(i915, file); - return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index d44fa9d356f1..fb58c0919ea1 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -307,9 +307,7 @@ static int live_parallel_switch(void *arg) struct igt_live_test t; int n; - mutex_lock(&i915->drm.struct_mutex); err = igt_live_test_begin(&t, i915, __func__, ""); - mutex_unlock(&i915->drm.struct_mutex); if (err) break; @@ -341,10 +339,8 @@ static int live_parallel_switch(void *arg) data[n].tsk = NULL; } - mutex_lock(&i915->drm.struct_mutex); if (igt_live_test_end(&t)) err = -EIO; - mutex_unlock(&i915->drm.struct_mutex); } out: diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 1cd25cfd0246..cfa52c525691 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -669,9 +669,7 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = make_obj_busy(obj); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("[loop %d] Failed to busy the object\n", loop); goto err_obj; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c index 94a15e3f6db8..34932871b3a5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c @@ -25,9 +25,7 @@ static int mock_phys_object(void *arg) goto out; } - mutex_lock(&i915->drm.struct_mutex); err = i915_gem_object_attach_phys(obj, PAGE_SIZE); - mutex_unlock(&i915->drm.struct_mutex); if (err) { pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); goto out_obj; diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 8dc42c5c7569..393ae5321e1d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -26,17 +26,13 @@ static int live_sanitycheck(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; struct igt_spinner spin; - intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin, &i915->gt)) - goto err_unlock; + return -ENOMEM; ctx = kernel_context(i915); if (!ctx) @@ -73,9 +69,6 @@ static int live_sanitycheck(void *arg) kernel_context_close(ctx); err_spin: igt_spinner_fini(&spin); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -402,7 +395,6 @@ static int live_timeslice_preempt(void *arg) { struct drm_i915_private *i915 = arg; struct drm_i915_gem_object *obj; - intel_wakeref_t wakeref; struct i915_vma *vma; void *vaddr; int err = 0; @@ -417,14 +409,9 @@ static int live_timeslice_preempt(void *arg) * ready task. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_unlock; - } + if (IS_ERR(obj)) + return PTR_ERR(obj); vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { @@ -469,10 +456,6 @@ static int live_timeslice_preempt(void *arg) i915_gem_object_unpin_map(obj); err_obj: i915_gem_object_put(obj); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - return err; } @@ -484,7 +467,6 @@ static int live_busywait_preempt(void *arg) struct drm_i915_gem_object *obj; struct i915_vma *vma; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; u32 *map; @@ -493,12 +475,9 @@ static int live_busywait_preempt(void *arg) * preempt the busywaits used to synchronise between rings. */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - ctx_hi = kernel_context(i915); if (!ctx_hi) - goto err_unlock; + return -ENOMEM; ctx_hi->sched.priority = I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); @@ -652,9 +631,6 @@ static int live_busywait_preempt(void *arg) kernel_context_close(ctx_lo); err_ctx_hi: kernel_context_close(ctx_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -683,7 +659,6 @@ static int live_preempt(void *arg) struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_PREEMPTION(i915)) @@ -692,11 +667,8 @@ static int live_preempt(void *arg) if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) pr_err("Logical preemption supported, but not exposed\n"); - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin_hi, &i915->gt)) - goto err_unlock; + return -ENOMEM; if (igt_spinner_init(&spin_lo, &i915->gt)) goto err_spin_hi; @@ -776,9 +748,6 @@ static int live_preempt(void *arg) igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -790,17 +759,13 @@ static int live_late_preempt(void *arg) struct intel_engine_cs *engine; struct i915_sched_attr attr = {}; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin_hi, &i915->gt)) - goto err_unlock; + return -ENOMEM; if (igt_spinner_init(&spin_lo, &i915->gt)) goto err_spin_hi; @@ -882,9 +847,6 @@ static int live_late_preempt(void *arg) igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: @@ -929,7 +891,6 @@ static int live_nopreempt(void *arg) struct intel_engine_cs *engine; struct preempt_client a, b; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -940,11 +901,8 @@ static int live_nopreempt(void *arg) if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (preempt_client_init(i915, &a)) - goto err_unlock; + return -ENOMEM; if (preempt_client_init(i915, &b)) goto err_client_a; b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); @@ -1018,9 +976,6 @@ static int live_nopreempt(void *arg) preempt_client_fini(&b); err_client_a: preempt_client_fini(&a); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: @@ -1040,7 +995,6 @@ static int live_suppress_self_preempt(void *arg) }; struct preempt_client a, b; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -1059,11 +1013,8 @@ static int live_suppress_self_preempt(void *arg) if (intel_vgpu_active(i915)) return 0; /* GVT forces single port & request submission */ - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (preempt_client_init(i915, &a)) - goto err_unlock; + return -ENOMEM; if (preempt_client_init(i915, &b)) goto err_client_a; @@ -1144,9 +1095,6 @@ static int live_suppress_self_preempt(void *arg) preempt_client_fini(&b); err_client_a: preempt_client_fini(&a); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: @@ -1216,7 +1164,6 @@ static int live_suppress_wait_preempt(void *arg) struct preempt_client client[4]; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; int i; @@ -1229,11 +1176,8 @@ static int live_suppress_wait_preempt(void *arg) if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (preempt_client_init(i915, &client[0])) /* ELSP[0] */ - goto err_unlock; + return -ENOMEM; if (preempt_client_init(i915, &client[1])) /* ELSP[1] */ goto err_client_0; if (preempt_client_init(i915, &client[2])) /* head of queue */ @@ -1319,9 +1263,6 @@ static int live_suppress_wait_preempt(void *arg) preempt_client_fini(&client[1]); err_client_0: preempt_client_fini(&client[0]); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: @@ -1338,7 +1279,6 @@ static int live_chain_preempt(void *arg) struct intel_engine_cs *engine; struct preempt_client hi, lo; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; /* @@ -1350,11 +1290,8 @@ static int live_chain_preempt(void *arg) if (!HAS_LOGICAL_RING_PREEMPTION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (preempt_client_init(i915, &hi)) - goto err_unlock; + return -ENOMEM; if (preempt_client_init(i915, &lo)) goto err_client_hi; @@ -1465,9 +1402,6 @@ static int live_chain_preempt(void *arg) preempt_client_fini(&lo); err_client_hi: preempt_client_fini(&hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; err_wedged: @@ -1485,7 +1419,6 @@ static int live_preempt_hang(void *arg) struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; enum intel_engine_id id; - intel_wakeref_t wakeref; int err = -ENOMEM; if (!HAS_LOGICAL_RING_PREEMPTION(i915)) @@ -1494,11 +1427,8 @@ static int live_preempt_hang(void *arg) if (!intel_has_reset_engine(&i915->gt)) return 0; - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - if (igt_spinner_init(&spin_hi, &i915->gt)) - goto err_unlock; + return -ENOMEM; if (igt_spinner_init(&spin_lo, &i915->gt)) goto err_spin_hi; @@ -1590,9 +1520,6 @@ static int live_preempt_hang(void *arg) igt_spinner_fini(&spin_lo); err_spin_hi: igt_spinner_fini(&spin_hi); -err_unlock: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1684,11 +1611,9 @@ static int smoke_crescendo_thread(void *arg) struct i915_gem_context *ctx = smoke_context(smoke); int err; - mutex_lock(&smoke->i915->drm.struct_mutex); err = smoke_submit(smoke, ctx, count % I915_PRIORITY_MAX, smoke->batch); - mutex_unlock(&smoke->i915->drm.struct_mutex); if (err) return err; @@ -1709,8 +1634,6 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) unsigned long count; int err = 0; - mutex_unlock(&smoke->i915->drm.struct_mutex); - for_each_engine(engine, smoke->i915, id) { arg[id] = *smoke; arg[id].engine = engine; @@ -1743,8 +1666,6 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) put_task_struct(tsk[id]); } - mutex_lock(&smoke->i915->drm.struct_mutex); - pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", count, flags, RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext); @@ -1787,7 +1708,6 @@ static int live_preempt_smoke(void *arg) .ncontext = 1024, }; const unsigned int phase[] = { 0, BATCH }; - intel_wakeref_t wakeref; struct igt_live_test t; int err = -ENOMEM; u32 *cs; @@ -1802,13 +1722,10 @@ static int live_preempt_smoke(void *arg) if (!smoke.contexts) return -ENOMEM; - mutex_lock(&smoke.i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm); - smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE); if (IS_ERR(smoke.batch)) { err = PTR_ERR(smoke.batch); - goto err_unlock; + goto err_free; } cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); @@ -1855,9 +1772,7 @@ static int live_preempt_smoke(void *arg) err_batch: i915_gem_object_put(smoke.batch); -err_unlock: - intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref); - mutex_unlock(&smoke.i915->drm.struct_mutex); +err_free: kfree(smoke.contexts); return err; @@ -1995,19 +1910,17 @@ static int live_virtual_engine(void *arg) struct intel_gt *gt = &i915->gt; enum intel_engine_id id; unsigned int class, inst; - int err = -ENODEV; + int err; if (USES_GUC_SUBMISSION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - for_each_engine(engine, i915, id) { err = nop_virtual_engine(i915, &engine, 1, 1, 0); if (err) { pr_err("Failed to wrap engine %s: err=%d\n", engine->name, err); - goto out_unlock; + return err; } } @@ -2028,17 +1941,15 @@ static int live_virtual_engine(void *arg) err = nop_virtual_engine(i915, siblings, nsibling, n, 0); if (err) - goto out_unlock; + return err; } err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN); if (err) - goto out_unlock; + return err; } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } static int mask_virtual_engine(struct drm_i915_private *i915, @@ -2117,9 +2028,6 @@ static int mask_virtual_engine(struct drm_i915_private *i915, } err = igt_live_test_end(&t); - if (err) - goto out; - out: if (igt_flush_test(i915)) err = -EIO; @@ -2142,13 +2050,11 @@ static int live_virtual_mask(void *arg) struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; struct intel_gt *gt = &i915->gt; unsigned int class, inst; - int err = 0; + int err; if (USES_GUC_SUBMISSION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { unsigned int nsibling; @@ -2164,12 +2070,10 @@ static int live_virtual_mask(void *arg) err = mask_virtual_engine(i915, siblings, nsibling); if (err) - goto out_unlock; + return err; } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } static int bond_virtual_engine(struct drm_i915_private *i915, @@ -2320,13 +2224,11 @@ static int live_virtual_bond(void *arg) struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; struct intel_gt *gt = &i915->gt; unsigned int class, inst; - int err = 0; + int err; if (USES_GUC_SUBMISSION(i915)) return 0; - mutex_lock(&i915->drm.struct_mutex); - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { const struct phase *p; int nsibling; @@ -2349,14 +2251,12 @@ static int live_virtual_bond(void *arg) if (err) { pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", __func__, p->name, class, nsibling, err); - goto out_unlock; + return err; } } } -out_unlock: - mutex_unlock(&i915->drm.struct_mutex); - return err; + return 0; } int intel_execlists_live_selftests(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 7c7aceb85a74..95627e80f246 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -708,9 +708,7 @@ static int live_dirty_whitelist(void *arg) wakeref = intel_runtime_pm_get(&i915->runtime_pm); - mutex_unlock(&i915->drm.struct_mutex); file = mock_file(i915); - mutex_lock(&i915->drm.struct_mutex); if (IS_ERR(file)) { err = PTR_ERR(file); goto out_rpm; @@ -732,9 +730,7 @@ static int live_dirty_whitelist(void *arg) } out_file: - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); - mutex_lock(&i915->drm.struct_mutex); out_rpm: intel_runtime_pm_put(&i915->runtime_pm, wakeref); return err; @@ -1274,14 +1270,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) SUBTEST(live_gpu_reset_workarounds), SUBTEST(live_engine_reset_workarounds), }; - int err; if (intel_gt_is_wedged(&i915->gt)) return 0; - mutex_lock(&i915->drm.struct_mutex); - err = i915_subtests(tests, i915); - mutex_unlock(&i915->drm.struct_mutex); - - return err; + return i915_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index bba0eafe1cdb..f927f851aadf 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -116,7 +116,6 @@ static int igt_guc_clients(void *args) int err = 0; GEM_BUG_ON(!HAS_GT_UC(dev_priv)); - mutex_lock(&dev_priv->drm.struct_mutex); wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); guc = &dev_priv->gt.uc.guc; @@ -190,7 +189,6 @@ static int igt_guc_clients(void *args) guc_clients_enable(guc); unlock: intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); return err; } @@ -208,7 +206,6 @@ static int igt_guc_doorbells(void *arg) u16 db_id; GEM_BUG_ON(!HAS_GT_UC(dev_priv)); - mutex_lock(&dev_priv->drm.struct_mutex); wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); guc = &dev_priv->gt.uc.guc; @@ -299,7 +296,6 @@ static int igt_guc_doorbells(void *arg) } unlock: intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); - mutex_unlock(&dev_priv->drm.struct_mutex); return err; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 8d8121c02161..165b3a7f9744 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1797,7 +1797,6 @@ static int igt_cs_tlb(void *arg) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&i915->drm.struct_mutex); ctx = live_context(i915, file); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); @@ -2020,7 +2019,6 @@ static int igt_cs_tlb(void *arg) out_vm: i915_vm_put(vm); out_unlock: - mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); return err; } From patchwork Fri Oct 4 13:40:15 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11174531 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id BF24316B1 for ; Fri, 4 Oct 2019 13:40:47 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id A7CC820700 for ; Fri, 4 Oct 2019 13:40:47 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org A7CC820700 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id BF6836EB8A; Fri, 4 Oct 2019 13:40:42 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id DD1B76EB83 for ; Fri, 4 Oct 2019 13:40:38 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 18723970-1500050 for multiple; Fri, 04 Oct 2019 14:40:21 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Fri, 4 Oct 2019 14:40:15 +0100 Message-Id: <20191004134015.13204-21-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.23.0 In-Reply-To: <20191004134015.13204-1-chris@chris-wilson.co.uk> References: <20191004134015.13204-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 20/20] drm/i915: Drop struct_mutex from around GEM initialisation X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" We no longer need to placate lockdep by holding struct_mutex for our initialisation, so don't. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 2 -- drivers/gpu/drm/i915/i915_gem.c | 9 --------- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 7 ------- 3 files changed, 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 7c316d4633db..7987b54fb1f5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -174,7 +174,6 @@ void i915_gem_resume(struct drm_i915_private *i915) { GEM_TRACE("\n"); - mutex_lock(&i915->drm.struct_mutex); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); if (intel_gt_init_hw(&i915->gt)) @@ -198,7 +197,6 @@ void i915_gem_resume(struct drm_i915_private *i915) out_unlock: intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - mutex_unlock(&i915->drm.struct_mutex); return; err_wedged: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f6db415985d5..0ddbd3a5fb8d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1249,7 +1249,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * we hold the forcewake during initialisation these problems * just magically go away. */ - mutex_lock(&dev_priv->drm.struct_mutex); intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); ret = i915_init_ggtt(dev_priv); @@ -1319,7 +1318,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_gt; intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); return 0; @@ -1330,15 +1328,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv) * driver doesn't explode during runtime. */ err_gt: - mutex_unlock(&dev_priv->drm.struct_mutex); - intel_gt_set_wedged_on_init(&dev_priv->gt); i915_gem_suspend(dev_priv); i915_gem_suspend_late(dev_priv); i915_gem_drain_workqueue(dev_priv); - - mutex_lock(&dev_priv->drm.struct_mutex); err_init_hw: intel_uc_fini_hw(&dev_priv->gt.uc); err_uc_init: @@ -1353,7 +1347,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_gt_driver_release(&dev_priv->gt); err_unlock: intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); if (ret != -EIO) { intel_uc_cleanup_firmwares(&dev_priv->gt.uc); @@ -1406,10 +1399,8 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv) /* Flush any outstanding unpin_work. */ i915_gem_drain_workqueue(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); intel_uc_fini_hw(&dev_priv->gt.uc); intel_uc_fini(&dev_priv->gt.uc); - mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_drain_freed_objects(dev_priv); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 335f37ba98de..70a7026db08d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -68,10 +68,7 @@ static void mock_device_release(struct drm_device *dev) drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); - mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(&i915->ggtt); - mutex_unlock(&i915->drm.struct_mutex); - destroy_workqueue(i915->wq); i915_gemfs_fini(i915); @@ -179,8 +176,6 @@ struct drm_i915_private *mock_gem_device(void) intel_timelines_init(i915); - mutex_lock(&i915->drm.struct_mutex); - mock_init_ggtt(i915, &i915->ggtt); mkwrite_device_info(i915)->engine_mask = BIT(0); @@ -197,7 +192,6 @@ struct drm_i915_private *mock_gem_device(void) goto err_context; intel_engines_driver_register(i915); - mutex_unlock(&i915->drm.struct_mutex); WARN_ON(i915_gemfs_init(i915)); @@ -208,7 +202,6 @@ struct drm_i915_private *mock_gem_device(void) err_engine: mock_engine_free(i915->engine[RCS0]); err_unlock: - mutex_unlock(&i915->drm.struct_mutex); intel_timelines_fini(i915); destroy_workqueue(i915->wq); err_drv: