From patchwork Thu Nov 21 13:51:27 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Chris Wilson <chris@chris-wilson.co.uk>
X-Patchwork-Id: 11256257
Return-Path: <SRS0=Py3T=ZN=lists.freedesktop.org=intel-gfx-bounces@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 54A3B186D
	for <patchwork-intel-gfx@patchwork.kernel.org>;
 Thu, 21 Nov 2019 13:51:52 +0000 (UTC)
Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by mail.kernel.org (Postfix) with ESMTPS id 3CA9520679
	for <patchwork-intel-gfx@patchwork.kernel.org>;
 Thu, 21 Nov 2019 13:51:52 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 3CA9520679
Authentication-Results: mail.kernel.org;
 dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk
Authentication-Results: mail.kernel.org;
 spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org
Received: from gabe.freedesktop.org (localhost [127.0.0.1])
	by gabe.freedesktop.org (Postfix) with ESMTP id 413516EF32;
	Thu, 21 Nov 2019 13:51:51 +0000 (UTC)
X-Original-To: intel-gfx@lists.freedesktop.org
Delivered-To: intel-gfx@lists.freedesktop.org
Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192])
 by gabe.freedesktop.org (Postfix) with ESMTPS id 8A7436EF30
 for <intel-gfx@lists.freedesktop.org>; Thu, 21 Nov 2019 13:51:49 +0000 (UTC)
X-Default-Received-SPF: pass (skip=forwardok (res=PASS))
 x-ip-name=78.156.65.138;
Received: from haswell.alporthouse.com (unverified [78.156.65.138])
 by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 19286322-1500050
 for multiple; Thu, 21 Nov 2019 13:51:35 +0000
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Thu, 21 Nov 2019 13:51:27 +0000
Message-Id: <20191121135131.338984-1-chris@chris-wilson.co.uk>
X-Mailer: git-send-email 2.24.0
MIME-Version: 1.0
Subject: [Intel-gfx] [PATCH 1/5] drm/i915: Use a ctor for TYPESAFE_BY_RCU
 i915_request
X-BeenThere: intel-gfx@lists.freedesktop.org
X-Mailman-Version: 2.1.23
Precedence: list
List-Id: Intel graphics driver community testing & development
 <intel-gfx.lists.freedesktop.org>
List-Unsubscribe: <https://lists.freedesktop.org/mailman/options/intel-gfx>,
 <mailto:intel-gfx-request@lists.freedesktop.org?subject=unsubscribe>
List-Archive: <https://lists.freedesktop.org/archives/intel-gfx>
List-Post: <mailto:intel-gfx@lists.freedesktop.org>
List-Help: <mailto:intel-gfx-request@lists.freedesktop.org?subject=help>
List-Subscribe: <https://lists.freedesktop.org/mailman/listinfo/intel-gfx>,
 <mailto:intel-gfx-request@lists.freedesktop.org?subject=subscribe>
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

As we start peeking into requests for longer and longer, e.g.
incorporating use of spinlocks when only protected by an
rcu_read_lock(), we need to be careful in how we reset the request when
recycling and need to preserve any barriers that may still be in use as
the request is reset for reuse.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_request.c   | 37 ++++++++++++++++++---------
 drivers/gpu/drm/i915/i915_scheduler.c |  6 +++++
 drivers/gpu/drm/i915/i915_scheduler.h |  1 +
 drivers/gpu/drm/i915/i915_sw_fence.c  |  8 ++++++
 drivers/gpu/drm/i915/i915_sw_fence.h  |  2 ++
 5 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 00011f9533b6..5e5bd7d57134 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -214,7 +214,7 @@ static void remove_from_engine(struct i915_request *rq)
 		spin_lock(&engine->active.lock);
 		locked = engine;
 	}
-	list_del(&rq->sched.link);
+	list_del_init(&rq->sched.link);
 	spin_unlock_irq(&locked->active.lock);
 }
 
@@ -586,6 +586,19 @@ request_alloc_slow(struct intel_timeline *tl, gfp_t gfp)
 	return kmem_cache_alloc(global.slab_requests, gfp);
 }
 
+static void __i915_request_ctor(void *arg)
+{
+	struct i915_request *rq = arg;
+
+	spin_lock_init(&rq->lock);
+	i915_sched_node_init(&rq->sched);
+	i915_sw_fence_init(&rq->submit, submit_notify);
+	i915_sw_fence_init(&rq->semaphore, semaphore_notify);
+
+	INIT_LIST_HEAD(&rq->execute_cb);
+	rq->file_priv = NULL;
+}
+
 struct i915_request *
 __i915_request_create(struct intel_context *ce, gfp_t gfp)
 {
@@ -655,24 +668,20 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 
 	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
 
-	spin_lock_init(&rq->lock);
 	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
 		       tl->fence_context, seqno);
 
 	/* We bump the ref for the fence chain */
-	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
-	i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
+	i915_sw_fence_reinit(&i915_request_get(rq)->submit);
+	i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
 
-	i915_sched_node_init(&rq->sched);
+	i915_sched_node_reinit(&rq->sched);
 
 	/* No zalloc, must clear what we need by hand */
-	rq->file_priv = NULL;
 	rq->batch = NULL;
 	rq->capture_list = NULL;
 	rq->flags = 0;
 
-	INIT_LIST_HEAD(&rq->execute_cb);
-
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
 	 * eventually emit this request. This is to guarantee that the
@@ -1533,10 +1542,14 @@ static struct i915_global_request global = { {
 
 int __init i915_global_request_init(void)
 {
-	global.slab_requests = KMEM_CACHE(i915_request,
-					  SLAB_HWCACHE_ALIGN |
-					  SLAB_RECLAIM_ACCOUNT |
-					  SLAB_TYPESAFE_BY_RCU);
+	global.slab_requests =
+		kmem_cache_create("i915_request",
+				  sizeof(struct i915_request),
+				  __alignof__(struct i915_request),
+				  SLAB_HWCACHE_ALIGN |
+				  SLAB_RECLAIM_ACCOUNT |
+				  SLAB_TYPESAFE_BY_RCU,
+				  __i915_request_ctor);
 	if (!global.slab_requests)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 194246548c4d..a5a6dbe6a53c 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -387,6 +387,10 @@ void i915_sched_node_init(struct i915_sched_node *node)
 	INIT_LIST_HEAD(&node->signalers_list);
 	INIT_LIST_HEAD(&node->waiters_list);
 	INIT_LIST_HEAD(&node->link);
+}
+
+void i915_sched_node_reinit(struct i915_sched_node *node)
+{
 	node->attr.priority = I915_PRIORITY_INVALID;
 	node->semaphores = 0;
 	node->flags = 0;
@@ -481,6 +485,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 		if (dep->flags & I915_DEPENDENCY_ALLOC)
 			i915_dependency_free(dep);
 	}
+	INIT_LIST_HEAD(&node->signalers_list);
 
 	/* Remove ourselves from everyone who depends upon us */
 	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
@@ -491,6 +496,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 		if (dep->flags & I915_DEPENDENCY_ALLOC)
 			i915_dependency_free(dep);
 	}
+	INIT_LIST_HEAD(&node->waiters_list);
 
 	spin_unlock_irq(&schedule_lock);
 }
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 07d243acf553..d1dc4efef77b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -26,6 +26,7 @@
 					 sched.link)
 
 void i915_sched_node_init(struct i915_sched_node *node);
+void i915_sched_node_reinit(struct i915_sched_node *node);
 
 bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 				      struct i915_sched_node *signal,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 6a88db291252..eacc6c5ce0fd 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -227,6 +227,14 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
 	fence->flags = (unsigned long)fn;
 }
 
+void i915_sw_fence_reinit(struct i915_sw_fence *fence)
+{
+	debug_fence_init(fence);
+
+	atomic_set(&fence->pending, 1);
+	fence->error = 0;
+}
+
 void i915_sw_fence_commit(struct i915_sw_fence *fence)
 {
 	debug_fence_activate(fence);
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index ab7d58bd0b9d..1e90d9a51bd2 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -54,6 +54,8 @@ do {								\
 	__i915_sw_fence_init((fence), (fn), NULL, NULL)
 #endif
 
+void i915_sw_fence_reinit(struct i915_sw_fence *fence);
+
 #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
 void i915_sw_fence_fini(struct i915_sw_fence *fence);
 #else

From patchwork Thu Nov 21 13:51:28 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Chris Wilson <chris@chris-wilson.co.uk>
X-Patchwork-Id: 11256253
Return-Path: <SRS0=Py3T=ZN=lists.freedesktop.org=intel-gfx-bounces@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E5EAB112B
	for <patchwork-intel-gfx@patchwork.kernel.org>;
 Thu, 21 Nov 2019 13:51:46 +0000 (UTC)
Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by mail.kernel.org (Postfix) with ESMTPS id CE35420679
	for <patchwork-intel-gfx@patchwork.kernel.org>;
 Thu, 21 Nov 2019 13:51:46 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org CE35420679
Authentication-Results: mail.kernel.org;
 dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk
Authentication-Results: mail.kernel.org;
 spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org
Received: from gabe.freedesktop.org (localhost [127.0.0.1])
	by gabe.freedesktop.org (Postfix) with ESMTP id 2581A6EF2D;
	Thu, 21 Nov 2019 13:51:45 +0000 (UTC)
X-Original-To: intel-gfx@lists.freedesktop.org
Delivered-To: intel-gfx@lists.freedesktop.org
Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192])
 by gabe.freedesktop.org (Postfix) with ESMTPS id 8DEBF6E10D
 for <intel-gfx@lists.freedesktop.org>; Thu, 21 Nov 2019 13:51:42 +0000 (UTC)
X-Default-Received-SPF: pass (skip=forwardok (res=PASS))
 x-ip-name=78.156.65.138;
Received: from haswell.alporthouse.com (unverified [78.156.65.138])
 by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 19286323-1500050
 for multiple; Thu, 21 Nov 2019 13:51:35 +0000
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Thu, 21 Nov 2019 13:51:28 +0000
Message-Id: <20191121135131.338984-2-chris@chris-wilson.co.uk>
X-Mailer: git-send-email 2.24.0
In-Reply-To: <20191121135131.338984-1-chris@chris-wilson.co.uk>
References: <20191121135131.338984-1-chris@chris-wilson.co.uk>
MIME-Version: 1.0
Subject: [Intel-gfx] [PATCH 2/5] drm/i915/selftests: Force bonded submission
 to overlap
X-BeenThere: intel-gfx@lists.freedesktop.org
X-Mailman-Version: 2.1.23
Precedence: list
List-Id: Intel graphics driver community testing & development
 <intel-gfx.lists.freedesktop.org>
List-Unsubscribe: <https://lists.freedesktop.org/mailman/options/intel-gfx>,
 <mailto:intel-gfx-request@lists.freedesktop.org?subject=unsubscribe>
List-Archive: <https://lists.freedesktop.org/archives/intel-gfx>
List-Post: <mailto:intel-gfx@lists.freedesktop.org>
List-Help: <mailto:intel-gfx-request@lists.freedesktop.org?subject=help>
List-Subscribe: <https://lists.freedesktop.org/mailman/listinfo/intel-gfx>,
 <mailto:intel-gfx-request@lists.freedesktop.org?subject=subscribe>
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Bonded request submission is designed to allow requests to execute in
parallel as laid out by the user. If the master request is already
finished before its bonded pair is submitted, the pair were not destined
to run in parallel and we lose the information about the master engine
to dictate selection of the secondary. If the second request was
required to be run on a particular engine in a virtual set, that should
have been specified, rather than left to the whims of a random
unconnected requests!

In the selftest, I made the mistake of not ensuring the master would
overlap with its bonded pairs, meaning that it could indeed complete
before we submitted the bonds. Those bonds were then free to select any
available engine in their virtual set, and not the one expected by the
test.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 16ebe4d2308e..f3b0610d1f10 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -3036,15 +3036,21 @@ static int bond_virtual_engine(struct intel_gt *gt,
 	struct i915_gem_context *ctx;
 	struct i915_request *rq[16];
 	enum intel_engine_id id;
+	struct igt_spinner spin;
 	unsigned long n;
 	int err;
 
 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
 
-	ctx = kernel_context(gt->i915);
-	if (!ctx)
+	if (igt_spinner_init(&spin, gt))
 		return -ENOMEM;
 
+	ctx = kernel_context(gt->i915);
+	if (!ctx) {
+		err = -ENOMEM;
+		goto err_spin;
+	}
+
 	err = 0;
 	rq[0] = ERR_PTR(-ENOMEM);
 	for_each_engine(master, gt, id) {
@@ -3055,7 +3061,7 @@ static int bond_virtual_engine(struct intel_gt *gt,
 
 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
 
-		rq[0] = igt_request_alloc(ctx, master);
+		rq[0] = spinner_create_request(&spin, ctx, master, MI_NOOP);
 		if (IS_ERR(rq[0])) {
 			err = PTR_ERR(rq[0]);
 			goto out;
@@ -3068,10 +3074,17 @@ static int bond_virtual_engine(struct intel_gt *gt,
 							       &fence,
 							       GFP_KERNEL);
 		}
+
 		i915_request_add(rq[0]);
 		if (err < 0)
 			goto out;
 
+		if (!(flags & BOND_SCHEDULE) &&
+		    !igt_wait_for_spinner(&spin, rq[0])) {
+			err = -EIO;
+			goto out;
+		}
+
 		for (n = 0; n < nsibling; n++) {
 			struct intel_context *ve;
 
@@ -3119,6 +3132,8 @@ static int bond_virtual_engine(struct intel_gt *gt,
 			}
 		}
 		onstack_fence_fini(&fence);
+		intel_engine_flush_submission(master);
+		igt_spinner_end(&spin);
 
 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
 			pr_err("Master request did not execute (on %s)!\n",
@@ -3156,6 +3171,8 @@ static int bond_virtual_engine(struct intel_gt *gt,
 		err = -EIO;
 
 	kernel_context_close(ctx);
+err_spin:
+	igt_spinner_fini(&spin);
 	return err;
 }
 

From patchwork Thu Nov 21 13:51:29 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Chris Wilson <chris@chris-wilson.co.uk>
X-Patchwork-Id: 11256247
Return-Path: <SRS0=Py3T=ZN=lists.freedesktop.org=intel-gfx-bounces@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1D06D13A4
	for <patchwork-intel-gfx@patchwork.kernel.org>;
 Thu, 21 Nov 2019 13:51:44 +0000 (UTC)
Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by mail.kernel.org (Postfix) with ESMTPS id EE7932070B
	for <patchwork-intel-gfx@patchwork.kernel.org>;
 Thu, 21 Nov 2019 13:51:43 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org EE7932070B
Authentication-Results: mail.kernel.org;
 dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk
Authentication-Results: mail.kernel.org;
 spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org
Received: from gabe.freedesktop.org (localhost [127.0.0.1])
	by gabe.freedesktop.org (Postfix) with ESMTP id AA12B6EF25;
	Thu, 21 Nov 2019 13:51:42 +0000 (UTC)
X-Original-To: intel-gfx@lists.freedesktop.org
Delivered-To: intel-gfx@lists.freedesktop.org
Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192])
 by gabe.freedesktop.org (Postfix) with ESMTPS id AC41C6E10D
 for <intel-gfx@lists.freedesktop.org>; Thu, 21 Nov 2019 13:51:40 +0000 (UTC)
X-Default-Received-SPF: pass (skip=forwardok (res=PASS))
 x-ip-name=78.156.65.138;
Received: from haswell.alporthouse.com (unverified [78.156.65.138])
 by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 19286324-1500050
 for multiple; Thu, 21 Nov 2019 13:51:35 +0000
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Thu, 21 Nov 2019 13:51:29 +0000
Message-Id: <20191121135131.338984-3-chris@chris-wilson.co.uk>
X-Mailer: git-send-email 2.24.0
In-Reply-To: <20191121135131.338984-1-chris@chris-wilson.co.uk>
References: <20191121135131.338984-1-chris@chris-wilson.co.uk>
MIME-Version: 1.0
Subject: [Intel-gfx] [PATCH 3/5] drm/i915/selftests: Always hold a reference
 on a waited upon request
X-BeenThere: intel-gfx@lists.freedesktop.org
X-Mailman-Version: 2.1.23
Precedence: list
List-Id: Intel graphics driver community testing & development
 <intel-gfx.lists.freedesktop.org>
List-Unsubscribe: <https://lists.freedesktop.org/mailman/options/intel-gfx>,
 <mailto:intel-gfx-request@lists.freedesktop.org?subject=unsubscribe>
List-Archive: <https://lists.freedesktop.org/archives/intel-gfx>
List-Post: <mailto:intel-gfx@lists.freedesktop.org>
List-Help: <mailto:intel-gfx-request@lists.freedesktop.org?subject=help>
List-Subscribe: <https://lists.freedesktop.org/mailman/listinfo/intel-gfx>,
 <mailto:intel-gfx-request@lists.freedesktop.org?subject=subscribe>
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Whenever we wait on a request, make sure we actually hold a reference to
it and that it cannot be retired/freed on another CPU!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 87 +++++++++++++++++++-------
 1 file changed, 66 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index f3b0610d1f10..f1b38f39e7a7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -748,15 +748,19 @@ static int live_busywait_preempt(void *arg)
 		*cs++ = 0;
 
 		intel_ring_advance(lo, cs);
+
+		i915_request_get(lo);
 		i915_request_add(lo);
 
 		if (wait_for(READ_ONCE(*map), 10)) {
+			i915_request_put(lo);
 			err = -ETIMEDOUT;
 			goto err_vma;
 		}
 
 		/* Low priority request should be busywaiting now */
 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
+			i915_request_put(lo);
 			pr_err("%s: Busywaiting request did not!\n",
 			       engine->name);
 			err = -EIO;
@@ -766,6 +770,7 @@ static int live_busywait_preempt(void *arg)
 		hi = igt_request_alloc(ctx_hi, engine);
 		if (IS_ERR(hi)) {
 			err = PTR_ERR(hi);
+			i915_request_put(lo);
 			goto err_vma;
 		}
 
@@ -773,6 +778,7 @@ static int live_busywait_preempt(void *arg)
 		if (IS_ERR(cs)) {
 			err = PTR_ERR(cs);
 			i915_request_add(hi);
+			i915_request_put(lo);
 			goto err_vma;
 		}
 
@@ -793,11 +799,13 @@ static int live_busywait_preempt(void *arg)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 			GEM_TRACE_DUMP();
 
+			i915_request_put(lo);
 			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_vma;
 		}
 		GEM_BUG_ON(READ_ONCE(*map));
+		i915_request_put(lo);
 
 		if (igt_live_test_end(&t)) {
 			err = -EIO;
@@ -1665,6 +1673,7 @@ static int live_suppress_wait_preempt(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct preempt_client client[4];
+	struct i915_request *rq[ARRAY_SIZE(client)] = {};
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	int err = -ENOMEM;
@@ -1698,7 +1707,6 @@ static int live_suppress_wait_preempt(void *arg)
 			continue;
 
 		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
-			struct i915_request *rq[ARRAY_SIZE(client)];
 			struct i915_request *dummy;
 
 			engine->execlists.preempt_hang.count = 0;
@@ -1708,18 +1716,22 @@ static int live_suppress_wait_preempt(void *arg)
 				goto err_client_3;
 
 			for (i = 0; i < ARRAY_SIZE(client); i++) {
-				rq[i] = spinner_create_request(&client[i].spin,
-							       client[i].ctx, engine,
-							       MI_NOOP);
-				if (IS_ERR(rq[i])) {
-					err = PTR_ERR(rq[i]);
+				struct i915_request *this;
+
+				this = spinner_create_request(&client[i].spin,
+							      client[i].ctx, engine,
+							      MI_NOOP);
+				if (IS_ERR(this)) {
+					err = PTR_ERR(this);
 					goto err_wedged;
 				}
 
 				/* Disable NEWCLIENT promotion */
-				__i915_active_fence_set(&i915_request_timeline(rq[i])->last_request,
+				__i915_active_fence_set(&i915_request_timeline(this)->last_request,
 							&dummy->fence);
-				i915_request_add(rq[i]);
+
+				rq[i] = i915_request_get(this);
+				i915_request_add(this);
 			}
 
 			dummy_request_free(dummy);
@@ -1740,8 +1752,11 @@ static int live_suppress_wait_preempt(void *arg)
 				goto err_wedged;
 			}
 
-			for (i = 0; i < ARRAY_SIZE(client); i++)
+			for (i = 0; i < ARRAY_SIZE(client); i++) {
 				igt_spinner_end(&client[i].spin);
+				i915_request_put(rq[i]);
+				rq[i] = NULL;
+			}
 
 			if (igt_flush_test(gt->i915))
 				goto err_wedged;
@@ -1769,8 +1784,10 @@ static int live_suppress_wait_preempt(void *arg)
 	return err;
 
 err_wedged:
-	for (i = 0; i < ARRAY_SIZE(client); i++)
+	for (i = 0; i < ARRAY_SIZE(client); i++) {
 		igt_spinner_end(&client[i].spin);
+		i915_request_put(rq[i]);
+	}
 	intel_gt_set_wedged(gt);
 	err = -EIO;
 	goto err_client_3;
@@ -1815,6 +1832,8 @@ static int live_chain_preempt(void *arg)
 					    MI_ARB_CHECK);
 		if (IS_ERR(rq))
 			goto err_wedged;
+
+		i915_request_get(rq);
 		i915_request_add(rq);
 
 		ring_size = rq->wa_tail - rq->head;
@@ -1827,8 +1846,10 @@ static int live_chain_preempt(void *arg)
 		igt_spinner_end(&lo.spin);
 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
 			pr_err("Timed out waiting to flush %s\n", engine->name);
+			i915_request_put(rq);
 			goto err_wedged;
 		}
+		i915_request_put(rq);
 
 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 			err = -EIO;
@@ -1862,6 +1883,8 @@ static int live_chain_preempt(void *arg)
 			rq = igt_request_alloc(hi.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
+
+			i915_request_get(rq);
 			i915_request_add(rq);
 			engine->schedule(rq, &attr);
 
@@ -1874,14 +1897,19 @@ static int live_chain_preempt(void *arg)
 				       count);
 				intel_engine_dump(engine, &p,
 						  "%s\n", engine->name);
+				i915_request_put(rq);
 				goto err_wedged;
 			}
 			igt_spinner_end(&lo.spin);
+			i915_request_put(rq);
 
 			rq = igt_request_alloc(lo.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
+
+			i915_request_get(rq);
 			i915_request_add(rq);
+
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				struct drm_printer p =
 					drm_info_printer(gt->i915->drm.dev);
@@ -1890,8 +1918,11 @@ static int live_chain_preempt(void *arg)
 				       count);
 				intel_engine_dump(engine, &p,
 						  "%s\n", engine->name);
+
+				i915_request_put(rq);
 				goto err_wedged;
 			}
+			i915_request_put(rq);
 		}
 
 		if (igt_live_test_end(&t)) {
@@ -2586,7 +2617,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
 #define CHAIN BIT(0)
 {
 	IGT_TIMEOUT(end_time);
-	struct i915_request *request[16];
+	struct i915_request *request[16] = {};
 	struct i915_gem_context *ctx[16];
 	struct intel_context *ve[16];
 	unsigned long n, prime, nc;
@@ -2632,27 +2663,35 @@ static int nop_virtual_engine(struct intel_gt *gt,
 		if (flags & CHAIN) {
 			for (nc = 0; nc < nctx; nc++) {
 				for (n = 0; n < prime; n++) {
-					request[nc] =
-						i915_request_create(ve[nc]);
-					if (IS_ERR(request[nc])) {
-						err = PTR_ERR(request[nc]);
+					struct i915_request *rq;
+
+					rq = i915_request_create(ve[nc]);
+					if (IS_ERR(rq)) {
+						err = PTR_ERR(rq);
 						goto out;
 					}
 
-					i915_request_add(request[nc]);
+					if (request[nc])
+						i915_request_put(request[nc]);
+					request[nc] = i915_request_get(rq);
+					i915_request_add(rq);
 				}
 			}
 		} else {
 			for (n = 0; n < prime; n++) {
 				for (nc = 0; nc < nctx; nc++) {
-					request[nc] =
-						i915_request_create(ve[nc]);
-					if (IS_ERR(request[nc])) {
-						err = PTR_ERR(request[nc]);
+					struct i915_request *rq;
+
+					rq = i915_request_create(ve[nc]);
+					if (IS_ERR(rq)) {
+						err = PTR_ERR(rq);
 						goto out;
 					}
 
-					i915_request_add(request[nc]);
+					if (request[nc])
+						i915_request_put(request[nc]);
+					request[nc] = i915_request_get(rq);
+					i915_request_add(rq);
 				}
 			}
 		}
@@ -2678,6 +2717,11 @@ static int nop_virtual_engine(struct intel_gt *gt,
 		if (prime == 1)
 			times[0] = times[1];
 
+		for (nc = 0; nc < nctx; nc++) {
+			i915_request_put(request[nc]);
+			request[nc] = NULL;
+		}
+
 		if (__igt_timeout(end_time, NULL))
 			break;
 	}
@@ -2695,6 +2739,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
 		err = -EIO;
 
 	for (nc = 0; nc < nctx; nc++) {
+		i915_request_put(request[nc]);
 		intel_context_unpin(ve[nc]);
 		intel_context_put(ve[nc]);
 		kernel_context_close(ctx[nc]);

From patchwork Thu Nov 21 13:51:30 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Chris Wilson <chris@chris-wilson.co.uk>
X-Patchwork-Id: 11256251
Return-Path: <SRS0=Py3T=ZN=lists.freedesktop.org=intel-gfx-bounces@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id F13D1112B
	for <patchwork-intel-gfx@patchwork.kernel.org>;
 Thu, 21 Nov 2019 13:51:45 +0000 (UTC)
Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by mail.kernel.org (Postfix) with ESMTPS id D9B6020679
	for <patchwork-intel-gfx@patchwork.kernel.org>;
 Thu, 21 Nov 2019 13:51:45 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org D9B6020679
Authentication-Results: mail.kernel.org;
 dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk
Authentication-Results: mail.kernel.org;
 spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org
Received: from gabe.freedesktop.org (localhost [127.0.0.1])
	by gabe.freedesktop.org (Postfix) with ESMTP id 24E366E10D;
	Thu, 21 Nov 2019 13:51:45 +0000 (UTC)
X-Original-To: intel-gfx@lists.freedesktop.org
Delivered-To: intel-gfx@lists.freedesktop.org
Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192])
 by gabe.freedesktop.org (Postfix) with ESMTPS id 9D6B86E10D
 for <intel-gfx@lists.freedesktop.org>; Thu, 21 Nov 2019 13:51:41 +0000 (UTC)
X-Default-Received-SPF: pass (skip=forwardok (res=PASS))
 x-ip-name=78.156.65.138;
Received: from haswell.alporthouse.com (unverified [78.156.65.138])
 by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 19286325-1500050
 for multiple; Thu, 21 Nov 2019 13:51:35 +0000
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Thu, 21 Nov 2019 13:51:30 +0000
Message-Id: <20191121135131.338984-4-chris@chris-wilson.co.uk>
X-Mailer: git-send-email 2.24.0
In-Reply-To: <20191121135131.338984-1-chris@chris-wilson.co.uk>
References: <20191121135131.338984-1-chris@chris-wilson.co.uk>
MIME-Version: 1.0
Subject: [Intel-gfx] [PATCH 4/5] drm/i915/gt: Adopt engine_park
 synchronisation rules for engine_retire
X-BeenThere: intel-gfx@lists.freedesktop.org
X-Mailman-Version: 2.1.23
Precedence: list
List-Id: Intel graphics driver community testing & development
 <intel-gfx.lists.freedesktop.org>
List-Unsubscribe: <https://lists.freedesktop.org/mailman/options/intel-gfx>,
 <mailto:intel-gfx-request@lists.freedesktop.org?subject=unsubscribe>
List-Archive: <https://lists.freedesktop.org/archives/intel-gfx>
List-Post: <mailto:intel-gfx@lists.freedesktop.org>
List-Help: <mailto:intel-gfx-request@lists.freedesktop.org?subject=help>
List-Subscribe: <https://lists.freedesktop.org/mailman/listinfo/intel-gfx>,
 <mailto:intel-gfx-request@lists.freedesktop.org?subject=subscribe>
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

In the next patch, we will introduce a new asynchronous retirement
worker, fed by execlists CS events. Here we may queue a retirement as
soon as a request is submitted to HW (and completes instantly), and we
also want to process that retirement as early as possible and cannot
afford to postpone (as there may not be another opportunity to retire it
for a few seconds). To allow the new async retirer to run in parallel
with our submission, pull the __i915_request_queue (that passes the
request to HW) inside the timelines spinlock so that the retirement
cannot release the timeline before we have completed the submission.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.c | 29 ++++++++++++++++-------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 373a4b9f159c..bd0af02bea16 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -74,18 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
 
 static void
-__intel_timeline_enter_and_release_pm(struct intel_timeline *tl,
-				      struct intel_engine_cs *engine)
+__queue_and_release_pm(struct i915_request *rq,
+		       struct intel_timeline *tl,
+		       struct intel_engine_cs *engine)
 {
 	struct intel_gt_timelines *timelines = &engine->gt->timelines;
 
+	/*
+	 * We have to serialise all potential retirement paths with our
+	 * submission, as we don't want to underflow either the
+	 * engine->wakeref.counter or our timeline->active_count.
+	 *
+	 * Equally, we cannot allow a new submission to start until
+	 * after we finish queueing, nor could we allow that submitter
+	 * to retire us before we are ready!
+	 */
 	spin_lock(&timelines->lock);
 
-	if (!atomic_fetch_inc(&tl->active_count))
-		list_add_tail(&tl->link, &timelines->active_list);
+	/* Hand the request over to HW and so engine_retire() */
+	__i915_request_queue(rq, NULL);
 
+	/* Let new submissions commence (and maybe retire this timeline) */
 	__intel_wakeref_defer_park(&engine->wakeref);
 
+	/* Let intel_gt_retire_requests() retire us */
+	if (!atomic_fetch_inc(&tl->active_count))
+		list_add_tail(&tl->link, &timelines->active_list);
+
 	spin_unlock(&timelines->lock);
 }
 
@@ -148,10 +163,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
 	__i915_request_commit(rq);
 
-	__i915_request_queue(rq, NULL);
-
-	/* Expose ourselves to intel_gt_retire_requests() and new submission */
-	__intel_timeline_enter_and_release_pm(ce->timeline, engine);
+	/* Expose ourselves to the world */
+	__queue_and_release_pm(rq, ce->timeline, engine);
 
 	result = false;
 out_unlock:

From patchwork Thu Nov 21 13:51:31 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Chris Wilson <chris@chris-wilson.co.uk>
X-Patchwork-Id: 11256255
Return-Path: <SRS0=Py3T=ZN=lists.freedesktop.org=intel-gfx-bounces@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D287913A4
	for <patchwork-intel-gfx@patchwork.kernel.org>;
 Thu, 21 Nov 2019 13:51:51 +0000 (UTC)
Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by mail.kernel.org (Postfix) with ESMTPS id BA7052070A
	for <patchwork-intel-gfx@patchwork.kernel.org>;
 Thu, 21 Nov 2019 13:51:51 +0000 (UTC)
DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org BA7052070A
Authentication-Results: mail.kernel.org;
 dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk
Authentication-Results: mail.kernel.org;
 spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org
Received: from gabe.freedesktop.org (localhost [127.0.0.1])
	by gabe.freedesktop.org (Postfix) with ESMTP id 3AB606EF30;
	Thu, 21 Nov 2019 13:51:51 +0000 (UTC)
X-Original-To: intel-gfx@lists.freedesktop.org
Delivered-To: intel-gfx@lists.freedesktop.org
Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192])
 by gabe.freedesktop.org (Postfix) with ESMTPS id 8F5E36EF32
 for <intel-gfx@lists.freedesktop.org>; Thu, 21 Nov 2019 13:51:49 +0000 (UTC)
X-Default-Received-SPF: pass (skip=forwardok (res=PASS))
 x-ip-name=78.156.65.138;
Received: from haswell.alporthouse.com (unverified [78.156.65.138])
 by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 19286326-1500050
 for multiple; Thu, 21 Nov 2019 13:51:36 +0000
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Thu, 21 Nov 2019 13:51:31 +0000
Message-Id: <20191121135131.338984-5-chris@chris-wilson.co.uk>
X-Mailer: git-send-email 2.24.0
In-Reply-To: <20191121135131.338984-1-chris@chris-wilson.co.uk>
References: <20191121135131.338984-1-chris@chris-wilson.co.uk>
MIME-Version: 1.0
Subject: [Intel-gfx] [PATCH 5/5] drm/i915/gt: Schedule request retirement
 when timeline idles
X-BeenThere: intel-gfx@lists.freedesktop.org
X-Mailman-Version: 2.1.23
Precedence: list
List-Id: Intel graphics driver community testing & development
 <intel-gfx.lists.freedesktop.org>
List-Unsubscribe: <https://lists.freedesktop.org/mailman/options/intel-gfx>,
 <mailto:intel-gfx-request@lists.freedesktop.org?subject=unsubscribe>
List-Archive: <https://lists.freedesktop.org/archives/intel-gfx>
List-Post: <mailto:intel-gfx@lists.freedesktop.org>
List-Help: <mailto:intel-gfx-request@lists.freedesktop.org?subject=help>
List-Subscribe: <https://lists.freedesktop.org/mailman/listinfo/intel-gfx>,
 <mailto:intel-gfx-request@lists.freedesktop.org?subject=subscribe>
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

The major drawback of commit 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX
corruption WA") is that it disables RC6 while Skylake (and friends) is
active, and we do not consider the GPU idle until all outstanding
requests have been retired and the engine switched over to the kernel
context. If userspace is idle, this task falls onto our background idle
worker, which only runs roughly once a second, meaning that userspace has
to have been idle for a couple of seconds before we enable RC6 again.
Naturally, this causes us to consume considerably more energy than
before as powersaving is effectively disabled while a display server
(here's looking at you Xorg) is running.

As execlists will get a completion event as each context is completed,
we can use this interrupt to queue a retire worker bound to this engine
to cleanup idle timelines. We will then immediately notice the idle
engine (without userspace intervention or the aid of the background
retire worker) and start parking the GPU. Thus during light workloads,
we will do much more work to idle the GPU faster...  Hopefully with
commensurate power saving!

v2: Watch context completions and only look at those local to the engine
when retiring to reduce the amount of excess work we perform.

References: https://bugs.freedesktop.org/show_bug.cgi?id=112315
References: 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA")
References: 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  8 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  8 ++
 drivers/gpu/drm/i915/gt/intel_gt_requests.c   | 73 +++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gt_requests.h   | 12 ++-
 drivers/gpu/drm/i915/gt/intel_lrc.c           |  9 +++
 drivers/gpu/drm/i915/gt/intel_timeline.c      |  1 +
 .../gpu/drm/i915/gt/intel_timeline_types.h    |  3 +
 7 files changed, 110 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index b9613d044393..8f6e353caa66 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -28,13 +28,13 @@
 
 #include "i915_drv.h"
 
-#include "gt/intel_gt.h"
-
+#include "intel_context.h"
 #include "intel_engine.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_pool.h"
 #include "intel_engine_user.h"
-#include "intel_context.h"
+#include "intel_gt.h"
+#include "intel_gt_requests.h"
 #include "intel_lrc.h"
 #include "intel_reset.h"
 #include "intel_ring.h"
@@ -617,6 +617,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 	intel_engine_init_execlists(engine);
 	intel_engine_init_cmd_parser(engine);
 	intel_engine_init__pm(engine);
+	intel_engine_init_retire(engine);
 
 	intel_engine_pool_init(&engine->pool);
 
@@ -839,6 +840,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 
 	cleanup_status_page(engine);
 
+	intel_engine_fini_retire(engine);
 	intel_engine_pool_fini(&engine->pool);
 	intel_engine_fini_breadcrumbs(engine);
 	intel_engine_cleanup_cmd_parser(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 758f0e8ec672..17f1f1441efc 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -451,6 +451,14 @@ struct intel_engine_cs {
 
 	struct intel_engine_execlists execlists;
 
+	/*
+	 * Keep track of completed timelines on this engine for early
+	 * retirement with the goal of quickly enabling powersaving as
+	 * soon as the engine is idle.
+	 */
+	struct intel_timeline *retire;
+	struct work_struct retire_work;
+
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 819a266a8f29..5639398e9f60 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -29,6 +29,79 @@ static void flush_submission(struct intel_gt *gt)
 		intel_engine_flush_submission(engine);
 }
 
+static void engine_retire(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+		container_of(work, typeof(*engine), retire_work);
+	struct intel_timeline *tl = xchg(&engine->retire, NULL);
+
+	do {
+		struct intel_timeline *next = xchg(&tl->retire, NULL);
+
+		/*
+		 * Our goal here is to retire _idle_ timelines as soon as
+		 * possible (as they are idle, we do not expect userspace
+		 * to be cleaning up anytime soon).
+		 *
+		 * If the timeline is currently locked, either it is being
+		 * retired elsewhere or about to be!
+		 */
+		if (mutex_trylock(&tl->mutex)) {
+			retire_requests(tl);
+			mutex_unlock(&tl->mutex);
+		}
+		intel_timeline_put(tl);
+
+		GEM_BUG_ON(!next);
+		tl = ptr_mask_bits(next, 1);
+	} while (tl);
+}
+
+static bool add_retire(struct intel_engine_cs *engine,
+		       struct intel_timeline *tl)
+{
+	struct intel_timeline *first;
+
+	/*
+	 * We open-code a llist here to include the additional tag [BIT(0)]
+	 * so that we know when the timeline is already on a
+	 * retirement queue: either this engine or another.
+	 *
+	 * However, we rely on that a timeline can only be active on a single
+	 * engine at any one time and that add_retire() is called before the
+	 * engine releases the timeline and transferred to another to retire.
+	 */
+
+	if (READ_ONCE(tl->retire)) /* already queued */
+		return false;
+
+	intel_timeline_get(tl);
+	first = READ_ONCE(engine->retire);
+	do
+		tl->retire = ptr_pack_bits(first, 1, 1);
+	while (!try_cmpxchg(&engine->retire, &first, tl));
+
+	return !first;
+}
+
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+			     struct intel_timeline *tl)
+{
+	if (add_retire(engine, tl))
+		schedule_work(&engine->retire_work);
+}
+
+void intel_engine_init_retire(struct intel_engine_cs *engine)
+{
+	INIT_WORK(&engine->retire_work, engine_retire);
+}
+
+void intel_engine_fini_retire(struct intel_engine_cs *engine)
+{
+	flush_work(&engine->retire_work);
+	GEM_BUG_ON(engine->retire);
+}
+
 long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
 {
 	struct intel_gt_timelines *timelines = &gt->timelines;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
index fde546424c63..252c6064989c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -7,7 +7,12 @@
 #ifndef INTEL_GT_REQUESTS_H
 #define INTEL_GT_REQUESTS_H
 
-struct intel_gt;
+#include <linux/workqueue.h>
+
+#include "intel_gt_types.h"
+
+struct intel_engine_cs;
+struct intel_timeline;
 
 long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
 static inline void intel_gt_retire_requests(struct intel_gt *gt)
@@ -15,6 +20,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt)
 	intel_gt_retire_requests_timeout(gt, 0);
 }
 
+void intel_engine_init_retire(struct intel_engine_cs *engine);
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+			     struct intel_timeline *tl);
+void intel_engine_fini_retire(struct intel_engine_cs *engine);
+
 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
 
 void intel_gt_init_requests(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0e2065a13f24..062dd8ac472a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -142,6 +142,7 @@
 #include "intel_engine_pm.h"
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_reset.h"
@@ -1170,6 +1171,14 @@ __execlists_schedule_out(struct i915_request *rq,
 	 * refrain from doing non-trivial work here.
 	 */
 
+	/*
+	 * If we have just completed this context, the engine may now be
+	 * idle and we want to re-enter powersaving.
+	 */
+	if (list_is_last(&rq->link, &ce->timeline->requests) &&
+	    i915_request_completed(rq))
+		intel_engine_add_retire(engine, ce->timeline);
+
 	intel_engine_context_out(engine);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	intel_gt_pm_put_async(engine->gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index b190a5d9ab02..c1d2419444f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -277,6 +277,7 @@ void intel_timeline_fini(struct intel_timeline *timeline)
 {
 	GEM_BUG_ON(atomic_read(&timeline->pin_count));
 	GEM_BUG_ON(!list_empty(&timeline->requests));
+	GEM_BUG_ON(timeline->retire);
 
 	if (timeline->hwsp_cacheline)
 		cacheline_free(timeline->hwsp_cacheline);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 5244615ed1cb..aaf15cbe1ce1 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -66,6 +66,9 @@ struct intel_timeline {
 	 */
 	struct i915_active_fence last_request;
 
+	/** A chain of completed timelines ready for early retirement. */
+	struct intel_timeline *retire;
+
 	/**
 	 * We track the most recent seqno that we wait on in every context so
 	 * that we only have to emit a new await and dependency on a more