From patchwork Mon Dec 28 15:51:36 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991437 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id CFEABC433DB for ; Mon, 28 Dec 2020 15:54:23 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 8EF5B20829 for ; Mon, 28 Dec 2020 15:54:23 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 8EF5B20829 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 1D83B89BF5; Mon, 28 Dec 2020 15:54:23 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id A620089A0F for ; Mon, 28 Dec 2020 15:52:56 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448178-1500050 for multiple; Mon, 28 Dec 2020 15:52:32 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:36 +0000 Message-Id: <20201228155229.9516-1-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 01/54] drm/i915/gt: Cancel submitted requests upon context reset X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Since we process schedule-in of a context after submitting the request, if we decide to reset the context at that time, we also have to cancel the requets we have marked for submission. Signed-off-by: Chris Wilson --- .../drm/i915/gt/intel_execlists_submission.c | 22 ++++++++++++++----- drivers/gpu/drm/i915/i915_request.c | 2 ++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 1fae6c6f3868..eb2c086dbce6 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -215,22 +215,32 @@ static void mark_eio(struct i915_request *rq) } static struct i915_request * -active_request(const struct intel_timeline * const tl, struct i915_request *rq) +__active_request(const struct intel_timeline * const tl, + struct i915_request *rq, + int error) { struct i915_request *active = rq; - rcu_read_lock(); - list_for_each_entry_continue_reverse(rq, &tl->requests, link) { + list_for_each_entry_from_reverse(rq, &tl->requests, link) { if (__i915_request_is_complete(rq)) break; + if (error) { + i915_request_set_error_once(rq, error); + __i915_request_skip(rq); + } active = rq; } - rcu_read_unlock(); return active; } +static struct i915_request * +active_request(const struct intel_timeline * const tl, struct i915_request *rq) +{ + return __active_request(tl, rq, 0); +} + static inline void ring_set_paused(const struct intel_engine_cs *engine, int state) { @@ -487,14 +497,14 @@ static void reset_active(struct i915_request *rq, * remain correctly ordered. And we defer to __i915_request_submit() * so that all asynchronous waits are correctly handled. */ - ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n", + ENGINE_TRACE(engine, "{ reset rq=%llx:%lld }\n", rq->fence.context, rq->fence.seqno); /* On resubmission of the active request, payload will be scrubbed */ if (__i915_request_is_complete(rq)) head = rq->tail; else - head = active_request(ce->timeline, rq)->head; + head = __active_request(ce->timeline, rq, -EIO)->head; head = intel_ring_wrap(ce->ring, head); /* Scrub the context image to prevent replaying the previous batch */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index de434697dccd..03ac6eead4db 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -490,6 +490,8 @@ void __i915_request_skip(struct i915_request *rq) if (rq->infix == rq->postfix) return; + RQ_TRACE(rq, "error: %d\n", rq->fence.error); + /* * As this request likely depends on state from the lost * context, clear out all the user operations leaving the From patchwork Mon Dec 28 15:51:37 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991427 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 02DA0C4361A for ; Mon, 28 Dec 2020 15:53:30 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id B91C2206E5 for ; Mon, 28 Dec 2020 15:53:29 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org B91C2206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id D913989B48; Mon, 28 Dec 2020 15:53:01 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id AF22989A14 for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448179-1500050 for multiple; Mon, 28 Dec 2020 15:52:32 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:37 +0000 Message-Id: <20201228155229.9516-2-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 02/54] drm/i915/gt: Pull context closure check from request submit to schedule-in X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" We only need to evaluate the current status of the context when it is scheduled in, we will force a reschedule when the context is closed propagating the change to inflight contexts. Signed-off-by: Chris Wilson Cc: Matthew Brost --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 4 ++++ drivers/gpu/drm/i915/i915_request.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index eb2c086dbce6..cdd7606a65d4 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -522,6 +522,10 @@ __execlists_schedule_in(struct i915_request *rq) intel_context_get(ce); + if (unlikely(intel_context_is_closed(ce) && + !intel_engine_has_heartbeat(engine))) + intel_context_set_banned(ce); + if (unlikely(intel_context_is_banned(ce))) reset_active(rq, engine); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 03ac6eead4db..3290e743ba43 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -546,10 +546,6 @@ bool __i915_request_submit(struct i915_request *request) if (i915_request_completed(request)) goto xfer; - if (unlikely(intel_context_is_closed(request->context) && - !intel_engine_has_heartbeat(engine))) - intel_context_set_banned(request->context); - if (unlikely(intel_context_is_banned(request->context))) i915_request_set_error_once(request, -EIO); From patchwork Mon Dec 28 15:51:38 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991379 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 44A91C433E0 for ; Mon, 28 Dec 2020 15:53:16 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id E625D206E5 for ; Mon, 28 Dec 2020 15:53:15 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org E625D206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id EA80689AAD; Mon, 28 Dec 2020 15:52:58 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 93E43899DC for ; Mon, 28 Dec 2020 15:52:53 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448180-1500050 for multiple; Mon, 28 Dec 2020 15:52:32 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:38 +0000 Message-Id: <20201228155229.9516-3-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 03/54] drm/i915/gem: Peek at the inflight context X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" If supported by the backend, we can quickly look at the context's inflight engine rather than search along the active list to confirm. Signed-off-by: Chris Wilson Reviewed-by: Andi Shyti --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 +++ drivers/gpu/drm/i915/gt/intel_context.h | 10 ++++++++++ drivers/gpu/drm/i915/gt/intel_context_types.h | 9 +++++---- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 1 + 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index c7363036765a..68f58762d5e3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -423,6 +423,9 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) struct intel_engine_cs *engine = NULL; struct i915_request *rq; + if (intel_context_has_inflight(ce)) + return intel_context_inflight(ce); + if (!ce->timeline) return NULL; diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 2ce2ec639ba2..ecab3a7e4d1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -191,6 +191,16 @@ static inline bool intel_context_is_closed(const struct intel_context *ce) return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); } +static inline void intel_context_set_inflight(struct intel_context *ce) +{ + return __set_bit(CONTEXT_HAS_INFLIGHT, &ce->flags); +} + +static inline bool intel_context_has_inflight(const struct intel_context *ce) +{ + return test_bit(CONTEXT_HAS_INFLIGHT, &ce->flags); +} + static inline bool intel_context_use_semaphores(const struct intel_context *ce) { return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index f7a0fb6f3a2e..679b268f0911 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -87,10 +87,11 @@ struct intel_context { #define CONTEXT_ALLOC_BIT 1 #define CONTEXT_VALID_BIT 2 #define CONTEXT_CLOSED_BIT 3 -#define CONTEXT_USE_SEMAPHORES 4 -#define CONTEXT_BANNED 5 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 6 -#define CONTEXT_NOPREEMPT 7 +#define CONTEXT_HAS_INFLIGHT 4 +#define CONTEXT_USE_SEMAPHORES 5 +#define CONTEXT_BANNED 6 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 7 +#define CONTEXT_NOPREEMPT 8 u32 *lrc_reg_state; union { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index cdd7606a65d4..5969e688f78e 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2532,6 +2532,7 @@ static int __execlists_context_alloc(struct intel_context *ce, if (err) goto err_lrc; + intel_context_set_inflight(ce); return 0; err_lrc: From patchwork Mon Dec 28 15:51:39 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991389 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 09245C4332D for ; Mon, 28 Dec 2020 15:53:24 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id C6171206E5 for ; Mon, 28 Dec 2020 15:53:23 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org C6171206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id C86E689B46; Mon, 28 Dec 2020 15:53:01 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id D3DF28999C for ; Mon, 28 Dec 2020 15:52:52 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448181-1500050 for multiple; Mon, 28 Dec 2020 15:52:32 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:39 +0000 Message-Id: <20201228155229.9516-4-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 04/54] drm/i915: Mark up protected uses of 'i915_request_completed' X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" When we know that we are inside the timeline mutex, or inside the submission flow (under active.lock or the holder's rcu lock), we know that the rq->hwsp is stable and we can use the simpler direct version. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 3 +-- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 4 +++- drivers/gpu/drm/i915/gt/intel_timeline.c | 4 ++-- drivers/gpu/drm/i915/i915_request.c | 15 +++++++-------- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 68f58762d5e3..cac0c52fc681 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -408,7 +408,7 @@ __active_engine(struct i915_request *rq, struct intel_engine_cs **active) } if (i915_request_is_active(rq)) { - if (!i915_request_completed(rq)) + if (!__i915_request_is_complete(rq)) *active = locked; ret = true; } diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 2eabb9ab5d47..b7af8d9cc4dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -484,8 +484,8 @@ static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) list_for_each_entry_rcu(rq, &ce->signals, signal_link) drm_printf(p, "\t[%llx:%llx%s] @ %dms\n", rq->fence.context, rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : + __i915_request_is_complete(rq) ? "!" : + __i915_request_has_started(rq) ? "*" : "", jiffies_to_msecs(jiffies - rq->emitted_jiffies)); } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1847d3c2ea99..e325d559f29a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1792,7 +1792,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) struct intel_timeline *tl = request->context->timeline; list_for_each_entry_from_reverse(request, &tl->requests, link) { - if (i915_request_completed(request)) + if (__i915_request_is_complete(request)) break; active = request; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b85b6f3dcd60..e0b4291393ec 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -151,8 +151,7 @@ static void mark_innocent(struct i915_request *rq) void __i915_request_reset(struct i915_request *rq, bool guilty) { RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); - - GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(__i915_request_is_complete(rq)); rcu_read_lock(); /* protect the GEM context */ if (guilty) { diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 4ea741f488a8..0737a2608d0b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -404,12 +404,14 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled) rq = NULL; spin_lock_irqsave(&engine->active.lock, flags); + rcu_read_lock(); list_for_each_entry(pos, &engine->active.requests, sched.link) { - if (!i915_request_completed(pos)) { + if (!__i915_request_is_complete(pos)) { rq = pos; break; } } + rcu_read_unlock(); /* * The guilty request will get skipped on a hung engine. diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 7fe05918a76e..037b0e3ccbed 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -582,11 +582,11 @@ int intel_timeline_read_hwsp(struct i915_request *from, rcu_read_lock(); cl = rcu_dereference(from->hwsp_cacheline); - if (i915_request_completed(from)) /* confirm cacheline is valid */ + if (i915_request_signaled(from)) /* confirm cacheline is valid */ goto unlock; if (unlikely(!i915_active_acquire_if_busy(&cl->active))) goto unlock; /* seqno wrapped and completed! */ - if (unlikely(i915_request_completed(from))) + if (unlikely(__i915_request_is_complete(from))) goto release; rcu_read_unlock(); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 3290e743ba43..f5fe69c20b90 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -276,7 +276,7 @@ static void remove_from_engine(struct i915_request *rq) bool i915_request_retire(struct i915_request *rq) { - if (!i915_request_completed(rq)) + if (!__i915_request_is_complete(rq)) return false; RQ_TRACE(rq, "\n"); @@ -344,8 +344,7 @@ void i915_request_retire_upto(struct i915_request *rq) struct i915_request *tmp; RQ_TRACE(rq, "\n"); - - GEM_BUG_ON(!i915_request_completed(rq)); + GEM_BUG_ON(!__i915_request_is_complete(rq)); do { tmp = list_first_entry(&tl->requests, typeof(*tmp), link); @@ -543,7 +542,7 @@ bool __i915_request_submit(struct i915_request *request) * dropped upon retiring. (Otherwise if resubmit a *retired* * request, this would be a horrible use-after-free.) */ - if (i915_request_completed(request)) + if (__i915_request_is_complete(request)) goto xfer; if (unlikely(intel_context_is_banned(request->context))) @@ -643,7 +642,7 @@ void __i915_request_unsubmit(struct i915_request *request) i915_request_cancel_breadcrumb(request); /* We've already spun, don't charge on resubmitting. */ - if (request->sched.semaphores && i915_request_started(request)) + if (request->sched.semaphores && __i915_request_has_started(request)) request->sched.semaphores = 0; /* @@ -855,7 +854,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) RCU_INIT_POINTER(rq->timeline, tl); RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); rq->hwsp_seqno = tl->hwsp_seqno; - GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(__i915_request_is_complete(rq)); rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -969,7 +968,7 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) struct i915_request *prev; /* Confirm signal has not been retired, the link is valid */ - if (unlikely(i915_request_started(signal))) + if (unlikely(__i915_request_has_started(signal))) break; /* Is signal the earliest request on its timeline? */ @@ -1511,7 +1510,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) */ prev = to_request(__i915_active_fence_set(&timeline->last_request, &rq->fence)); - if (prev && !i915_request_completed(prev)) { + if (prev && !__i915_request_is_complete(prev)) { /* * The requests are supposed to be kept in order. However, * we need to be wary in case the timeline->last_request From patchwork Mon Dec 28 15:51:40 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991377 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4B632C432C3 for ; Mon, 28 Dec 2020 15:53:17 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 0EDB0224D2 for ; Mon, 28 Dec 2020 15:53:17 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 0EDB0224D2 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 8DCB7899E7; Mon, 28 Dec 2020 15:52:56 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 976EB899F2 for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448182-1500050 for multiple; Mon, 28 Dec 2020 15:52:33 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:40 +0000 Message-Id: <20201228155229.9516-5-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 05/54] drm/i915: Drop i915_request.lock serialisation around await_start X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Originally, we used the signal->lock as a means of following the previous link in its timeline and peeking at the previous fence. However, we have replaced the explicit serialisation with a series of very careful probes that anticipate the links being deleted and the fences recycled before we are able to acquire a strong reference to it. We do not need the signal->lock crutch anymore, nor want the contention. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_request.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f5fe69c20b90..2d2882344e40 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -960,9 +960,16 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) if (i915_request_started(signal)) return 0; + /* + * The caller holds a reference on @signal, but we do not serialise + * against it being retired and removed from the lists. + * + * We do not hold a reference to the request before @signal, and + * so must be very careful to ensure that it is not _recycled_ as + * we follow the link backwards. + */ fence = NULL; rcu_read_lock(); - spin_lock_irq(&signal->lock); do { struct list_head *pos = READ_ONCE(signal->link.prev); struct i915_request *prev; @@ -993,7 +1000,6 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) fence = &prev->fence; } while (0); - spin_unlock_irq(&signal->lock); rcu_read_unlock(); if (!fence) return 0; From patchwork Mon Dec 28 15:51:41 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991441 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id E6C64C433DB for ; Mon, 28 Dec 2020 15:54:28 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id A4946206E5 for ; Mon, 28 Dec 2020 15:54:28 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org A4946206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 2FB8C89BFB; Mon, 28 Dec 2020 15:54:28 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id A37EA89A0F for ; Mon, 28 Dec 2020 15:52:57 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448183-1500050 for multiple; Mon, 28 Dec 2020 15:52:33 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:41 +0000 Message-Id: <20201228155229.9516-6-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 06/54] drm/i915: Drop i915_request.lock requirement for intel_rps_boost() X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Since we use a flag within i915_request.flags to indicate when we have boosted the request (so that we only apply the boost) once, this can be used as the serialisation with i915_request_retire() to avoid having to explicitly take the i915_request.lock which is more heavily contended. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/gt/intel_rps.c | 15 ++++++--------- drivers/gpu/drm/i915/i915_request.c | 4 +--- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index f74d5e09e176..e1397b8d3586 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -917,17 +917,15 @@ void intel_rps_park(struct intel_rps *rps) void intel_rps_boost(struct i915_request *rq) { - struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; - unsigned long flags; - - if (i915_request_signaled(rq) || !intel_rps_is_active(rps)) + if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) return; /* Serializes with i915_request_retire() */ - spin_lock_irqsave(&rq->lock, flags); - if (!i915_request_has_waitboost(rq) && - !dma_fence_is_signaled_locked(&rq->fence)) { - set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); + if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { + struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; + + if (!intel_rps_is_active(rps)) + return; GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", rq->fence.context, rq->fence.seqno); @@ -938,7 +936,6 @@ void intel_rps_boost(struct i915_request *rq) atomic_inc(&rps->boosts); } - spin_unlock_irqrestore(&rq->lock, flags); } int intel_rps_set(struct intel_rps *rps, u8 val) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2d2882344e40..2a7bad88038b 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -307,10 +307,8 @@ bool i915_request_retire(struct i915_request *rq) spin_unlock_irq(&rq->lock); } - if (i915_request_has_waitboost(rq)) { - GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); + if (test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) atomic_dec(&rq->engine->gt->rps.num_waiters); - } /* * We only loosely track inflight requests across preemption, From patchwork Mon Dec 28 15:51:42 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991395 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id D7441C433DB for ; Mon, 28 Dec 2020 15:53:15 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 3C893206E5 for ; Mon, 28 Dec 2020 15:53:15 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 3C893206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 76B5E89A35; Mon, 28 Dec 2020 15:52:57 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 51D87899DB for ; Mon, 28 Dec 2020 15:52:52 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448184-1500050 for multiple; Mon, 28 Dec 2020 15:52:33 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:42 +0000 Message-Id: <20201228155229.9516-7-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 07/54] drm/i915/gem: Reduce ctx->engine_mutex for reading the clone source X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" When cloning the engines from the source context, we need to ensure that the engines are not freed as we copy them, and that the flags we clone from the source correspond with the engines we copy across. To do this we need only take a reference to the src->engines, rather than hold the src->engine_mutex, so long as we verify that nothing changed under the read. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 24 +++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index cac0c52fc681..4a709c625ccb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -717,7 +717,8 @@ __create_context(struct drm_i915_private *i915) } static inline struct i915_gem_engines * -__context_engines_await(const struct i915_gem_context *ctx) +__context_engines_await(const struct i915_gem_context *ctx, + bool *user_engines) { struct i915_gem_engines *engines; @@ -726,6 +727,10 @@ __context_engines_await(const struct i915_gem_context *ctx) engines = rcu_dereference(ctx->engines); GEM_BUG_ON(!engines); + if (user_engines) + *user_engines = i915_gem_context_user_engines(ctx); + + /* successful await => strong mb */ if (unlikely(!i915_sw_fence_await(&engines->fence))) continue; @@ -749,7 +754,7 @@ context_apply_all(struct i915_gem_context *ctx, struct intel_context *ce; int err = 0; - e = __context_engines_await(ctx); + e = __context_engines_await(ctx, NULL); for_each_gem_engine(ce, e, it) { err = fn(ce, data); if (err) @@ -1075,7 +1080,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, return err; } - e = __context_engines_await(ctx); + e = __context_engines_await(ctx, NULL); if (!e) { i915_active_release(&cb->base); return -ENOENT; @@ -2095,11 +2100,14 @@ static int copy_ring_size(struct intel_context *dst, static int clone_engines(struct i915_gem_context *dst, struct i915_gem_context *src) { - struct i915_gem_engines *e = i915_gem_context_lock_engines(src); - struct i915_gem_engines *clone; + struct i915_gem_engines *clone, *e; bool user_engines; unsigned long n; + e = __context_engines_await(src, &user_engines); + if (!e) + return -ENOENT; + clone = alloc_engines(e->num_engines); if (!clone) goto err_unlock; @@ -2141,9 +2149,7 @@ static int clone_engines(struct i915_gem_context *dst, } } clone->num_engines = n; - - user_engines = i915_gem_context_user_engines(src); - i915_gem_context_unlock_engines(src); + i915_sw_fence_complete(&e->fence); /* Serialised by constructor */ engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1)); @@ -2154,7 +2160,7 @@ static int clone_engines(struct i915_gem_context *dst, return 0; err_unlock: - i915_gem_context_unlock_engines(src); + i915_sw_fence_complete(&e->fence); return -ENOMEM; } From patchwork Mon Dec 28 15:51:43 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991407 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id DD6BCC43219 for ; Mon, 28 Dec 2020 15:53:22 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 89B6B206E5 for ; Mon, 28 Dec 2020 15:53:22 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 89B6B206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 7892489AB9; Mon, 28 Dec 2020 15:52:59 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id E93C2899D4 for ; Mon, 28 Dec 2020 15:52:52 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448185-1500050 for multiple; Mon, 28 Dec 2020 15:52:33 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:43 +0000 Message-Id: <20201228155229.9516-8-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 08/54] drm/i915/gem: Reduce ctx->engines_mutex for get_engines() X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Take a snapshot of the ctx->engines, so we can avoid taking the ctx->engines_mutex for a mere read in get_engines(). Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 39 +++++---------------- 1 file changed, 8 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 4a709c625ccb..4d2f40cf237b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1843,27 +1843,6 @@ set_engines(struct i915_gem_context *ctx, return 0; } -static struct i915_gem_engines * -__copy_engines(struct i915_gem_engines *e) -{ - struct i915_gem_engines *copy; - unsigned int n; - - copy = alloc_engines(e->num_engines); - if (!copy) - return ERR_PTR(-ENOMEM); - - for (n = 0; n < e->num_engines; n++) { - if (e->engines[n]) - copy->engines[n] = intel_context_get(e->engines[n]); - else - copy->engines[n] = NULL; - } - copy->num_engines = n; - - return copy; -} - static int get_engines(struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1871,19 +1850,17 @@ get_engines(struct i915_gem_context *ctx, struct i915_context_param_engines __user *user; struct i915_gem_engines *e; size_t n, count, size; + bool user_engines; int err = 0; - err = mutex_lock_interruptible(&ctx->engines_mutex); - if (err) - return err; + e = __context_engines_await(ctx, &user_engines); + if (!e) + return -ENOENT; - e = NULL; - if (i915_gem_context_user_engines(ctx)) - e = __copy_engines(i915_gem_context_engines(ctx)); - mutex_unlock(&ctx->engines_mutex); - if (IS_ERR_OR_NULL(e)) { + if (!user_engines) { + i915_sw_fence_complete(&e->fence); args->size = 0; - return PTR_ERR_OR_ZERO(e); + return 0; } count = e->num_engines; @@ -1934,7 +1911,7 @@ get_engines(struct i915_gem_context *ctx, args->size = size; err_free: - free_engines(e); + i915_sw_fence_complete(&e->fence); return err; } From patchwork Mon Dec 28 15:51:44 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991345 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id E2D6EC4332B for ; Mon, 28 Dec 2020 15:52:56 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 9590820829 for ; Mon, 28 Dec 2020 15:52:56 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 9590820829 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 0CB5D8999C; Mon, 28 Dec 2020 15:52:56 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 82858899DC for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448186-1500050 for multiple; Mon, 28 Dec 2020 15:52:33 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:44 +0000 Message-Id: <20201228155229.9516-9-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 09/54] drm/i915: Reduce test_and_set_bit to set_bit in i915_request_submit() X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Avoid the full blown memory barrier of test_and_set_bit() by noting the completed request and removing it from the lists. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_request.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2a7bad88038b..7c5eec2fd631 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -540,8 +540,10 @@ bool __i915_request_submit(struct i915_request *request) * dropped upon retiring. (Otherwise if resubmit a *retired* * request, this would be a horrible use-after-free.) */ - if (__i915_request_is_complete(request)) - goto xfer; + if (__i915_request_is_complete(request)) { + list_del_init(&request->sched.link); + goto active; + } if (unlikely(intel_context_is_banned(request->context))) i915_request_set_error_once(request, -EIO); @@ -576,11 +578,11 @@ bool __i915_request_submit(struct i915_request *request) engine->serial++; result = true; -xfer: - if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) { - list_move_tail(&request->sched.link, &engine->active.requests); - clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); - } + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)); + list_move_tail(&request->sched.link, &engine->active.requests); +active: + clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags); + set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); /* * XXX Rollback bonded-execution on __i915_request_unsubmit()? From patchwork Mon Dec 28 15:51:45 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991419 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id C6935C433E9 for ; Mon, 28 Dec 2020 15:53:33 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 987F022B2C for ; Mon, 28 Dec 2020 15:53:33 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 987F022B2C Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 763FC89B69; Mon, 28 Dec 2020 15:53:02 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 95E46899E7 for ; Mon, 28 Dec 2020 15:52:53 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448187-1500050 for multiple; Mon, 28 Dec 2020 15:52:33 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:45 +0000 Message-Id: <20201228155229.9516-10-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 10/54] drm/i915/gt: Drop atomic for engine->fw_active tracking X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Since schedule-in/out is now entirely serialised by the tasklet bitlock, we do not need to worry about concurrent in/out operations and so reduce the atomic operations to plain instructions. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 +- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e325d559f29a..5ead72c1b7d2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1657,7 +1657,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, ktime_to_ms(intel_engine_get_busy_time(engine, &dummy))); drm_printf(m, "\tForcewake: %x domains, %d active\n", - engine->fw_domain, atomic_read(&engine->fw_active)); + engine->fw_domain, READ_ONCE(engine->fw_active)); rcu_read_lock(); rq = READ_ONCE(engine->heartbeat.systole); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index c28f4e190fe6..1fbee35cb5ad 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -329,7 +329,7 @@ struct intel_engine_cs { * as possible. */ enum forcewake_domains fw_domain; - atomic_t fw_active; + unsigned int fw_active; unsigned long context_tag; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 5969e688f78e..782c4af9c250 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -550,7 +550,7 @@ __execlists_schedule_in(struct i915_request *rq) ce->lrc.ccid |= engine->execlists.ccid; __intel_gt_pm_get(engine->gt); - if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active)) + if (engine->fw_domain && !engine->fw_active++) intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -661,7 +661,7 @@ static inline void __execlists_schedule_out(struct i915_request *rq) lrc_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); - if (engine->fw_domain && !atomic_dec_return(&engine->fw_active)) + if (engine->fw_domain && !--engine->fw_active) intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); intel_gt_pm_put_async(engine->gt); From patchwork Mon Dec 28 15:51:46 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991351 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0D0ADC433DB for ; Mon, 28 Dec 2020 15:53:05 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id C5389206E5 for ; Mon, 28 Dec 2020 15:53:04 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org C5389206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id C9ABA89A1F; Mon, 28 Dec 2020 15:52:56 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 7C3F9899DB for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448188-1500050 for multiple; Mon, 28 Dec 2020 15:52:34 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:46 +0000 Message-Id: <20201228155229.9516-11-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 11/54] drm/i915/gt: Extract busy-stats for ring-scheduler X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Lift the busy-stats context-in/out implementation out of intel_lrc, so that we can reuse it for other scheduler implementations. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine_stats.h | 49 +++++++++++++++++++ .../drm/i915/gt/intel_execlists_submission.c | 34 +------------ 2 files changed, 50 insertions(+), 33 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_engine_stats.h diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h b/drivers/gpu/drm/i915/gt/intel_engine_stats.h new file mode 100644 index 000000000000..58491eae3482 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __INTEL_ENGINE_STATS_H__ +#define __INTEL_ENGINE_STATS_H__ + +#include +#include +#include + +#include "i915_gem.h" /* GEM_BUG_ON */ +#include "intel_engine.h" + +static inline void intel_engine_context_in(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (atomic_add_unless(&engine->stats.active, 1, 0)) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + if (!atomic_add_unless(&engine->stats.active, 1, 0)) { + engine->stats.start = ktime_get(); + atomic_inc(&engine->stats.active); + } + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +static inline void intel_engine_context_out(struct intel_engine_cs *engine) +{ + unsigned long flags; + + GEM_BUG_ON(!atomic_read(&engine->stats.active)); + + if (atomic_add_unless(&engine->stats.active, -1, 1)) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + if (atomic_dec_and_test(&engine->stats.active)) { + engine->stats.total = + ktime_add(engine->stats.total, + ktime_sub(ktime_get(), engine->stats.start)); + } + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +#endif /* __INTEL_ENGINE_STATS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 782c4af9c250..96852894e6f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -115,6 +115,7 @@ #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine_pm.h" +#include "intel_engine_stats.h" #include "intel_execlists_submission.h" #include "intel_gt.h" #include "intel_gt_pm.h" @@ -443,39 +444,6 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } -static void intel_engine_context_in(struct intel_engine_cs *engine) -{ - unsigned long flags; - - if (atomic_add_unless(&engine->stats.active, 1, 0)) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - if (!atomic_add_unless(&engine->stats.active, 1, 0)) { - engine->stats.start = ktime_get(); - atomic_inc(&engine->stats.active); - } - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - -static void intel_engine_context_out(struct intel_engine_cs *engine) -{ - unsigned long flags; - - GEM_BUG_ON(!atomic_read(&engine->stats.active)); - - if (atomic_add_unless(&engine->stats.active, -1, 1)) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - if (atomic_dec_and_test(&engine->stats.active)) { - engine->stats.total = - ktime_add(engine->stats.total, - ktime_sub(ktime_get(), engine->stats.start)); - } - write_sequnlock_irqrestore(&engine->stats.lock, flags); -} - static void reset_active(struct i915_request *rq, struct intel_engine_cs *engine) { From patchwork Mon Dec 28 15:51:47 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991397 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id AB1E2C433E0 for ; Mon, 28 Dec 2020 15:53:30 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 6AD93206E5 for ; Mon, 28 Dec 2020 15:53:30 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 6AD93206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 8222789A75; Mon, 28 Dec 2020 15:52:58 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 931B18925D for ; Mon, 28 Dec 2020 15:52:53 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448189-1500050 for multiple; Mon, 28 Dec 2020 15:52:34 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:47 +0000 Message-Id: <20201228155229.9516-12-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 12/54] drm/i915/gt: Convert stats.active to plain unsigned int X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" As context-in/out is now always serialised, we do not have to worry about concurrent enabling/disable of the busy-stats and can reduce the atomic_t active to a plain unsigned int, and the seqlock to a seqcount. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 8 ++-- drivers/gpu/drm/i915/gt/intel_engine_stats.h | 45 ++++++++++++-------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 4 +- 3 files changed, 34 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5ead72c1b7d2..d2a20e53b080 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -341,7 +341,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->schedule = NULL; ewma__engine_latency_init(&engine->latency); - seqlock_init(&engine->stats.lock); + seqcount_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -1735,7 +1735,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine, * add it to the total. */ *now = ktime_get(); - if (atomic_read(&engine->stats.active)) + if (READ_ONCE(engine->stats.active)) total = ktime_add(total, ktime_sub(*now, engine->stats.start)); return total; @@ -1754,9 +1754,9 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) ktime_t total; do { - seq = read_seqbegin(&engine->stats.lock); + seq = read_seqcount_begin(&engine->stats.lock); total = __intel_engine_get_busy_time(engine, now); - } while (read_seqretry(&engine->stats.lock, seq)); + } while (read_seqcount_retry(&engine->stats.lock, seq)); return total; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h b/drivers/gpu/drm/i915/gt/intel_engine_stats.h index 58491eae3482..24fbdd94351a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_stats.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h @@ -17,33 +17,44 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) { unsigned long flags; - if (atomic_add_unless(&engine->stats.active, 1, 0)) + if (engine->stats.active) { + engine->stats.active++; return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - if (!atomic_add_unless(&engine->stats.active, 1, 0)) { - engine->stats.start = ktime_get(); - atomic_inc(&engine->stats.active); } - write_sequnlock_irqrestore(&engine->stats.lock, flags); + + /* The writer is serialised; but the pmu reader may be from hardirq */ + local_irq_save(flags); + write_seqcount_begin(&engine->stats.lock); + + engine->stats.start = ktime_get(); + engine->stats.active++; + + write_seqcount_end(&engine->stats.lock); + local_irq_restore(flags); + + GEM_BUG_ON(!engine->stats.active); } static inline void intel_engine_context_out(struct intel_engine_cs *engine) { unsigned long flags; - GEM_BUG_ON(!atomic_read(&engine->stats.active)); - - if (atomic_add_unless(&engine->stats.active, -1, 1)) + GEM_BUG_ON(!engine->stats.active); + if (engine->stats.active > 1) { + engine->stats.active--; return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - if (atomic_dec_and_test(&engine->stats.active)) { - engine->stats.total = - ktime_add(engine->stats.total, - ktime_sub(ktime_get(), engine->stats.start)); } - write_sequnlock_irqrestore(&engine->stats.lock, flags); + + local_irq_save(flags); + write_seqcount_begin(&engine->stats.lock); + + engine->stats.active--; + engine->stats.total = + ktime_add(engine->stats.total, + ktime_sub(ktime_get(), engine->stats.start)); + + write_seqcount_end(&engine->stats.lock); + local_irq_restore(flags); } #endif /* __INTEL_ENGINE_STATS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 1fbee35cb5ad..fdec129a6317 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -526,12 +526,12 @@ struct intel_engine_cs { /** * @active: Number of contexts currently scheduled in. */ - atomic_t active; + unsigned int active; /** * @lock: Lock protecting the below fields. */ - seqlock_t lock; + seqcount_t lock; /** * @total: Total time this engine was busy. From patchwork Mon Dec 28 15:51:48 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991409 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1151CC43142 for ; Mon, 28 Dec 2020 15:53:32 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id CF274206E5 for ; Mon, 28 Dec 2020 15:53:31 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org CF274206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id BEADF89B06; Mon, 28 Dec 2020 15:53:00 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 924DD899E7 for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448190-1500050 for multiple; Mon, 28 Dec 2020 15:52:34 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:48 +0000 Message-Id: <20201228155229.9516-13-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 13/54] drm/i915/gt: Do not suspend bonded requests if one hangs X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Treat the dependency between bonded requests as weak and leave the remainder of the pair on the GPU if one hangs. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 96852894e6f2..17c0c9d210b2 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -1995,6 +1995,9 @@ static void __execlists_hold(struct i915_request *rq) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Leave semaphores spinning on the other engines */ if (w->engine != rq->engine) continue; @@ -2093,6 +2096,9 @@ static void __execlists_unhold(struct i915_request *rq) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Propagate any change in error status */ if (rq->fence.error) i915_request_set_error_once(w, rq->fence.error); From patchwork Mon Dec 28 15:51:49 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991371 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 45A61C433E9 for ; Mon, 28 Dec 2020 15:53:14 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id F2678224D2 for ; Mon, 28 Dec 2020 15:53:13 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org F2678224D2 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 045C389A5E; Mon, 28 Dec 2020 15:52:58 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 61B23899DC for ; Mon, 28 Dec 2020 15:52:52 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448191-1500050 for multiple; Mon, 28 Dec 2020 15:52:34 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:49 +0000 Message-Id: <20201228155229.9516-14-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 14/54] drm/i915/gt: Remove timeslice suppression X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" In the next patch, we remove the strict priority system and continuously re-evaluate the relative priority of tasks. As such we need to enable the timeslice whenever there is more than one context in the pipeline. This simplifies the decision and removes some of the tweaks to suppress timeslicing, allowing us to lift the timeslice enabling to a common spot at the end of running the submission tasklet. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 10 -- .../drm/i915/gt/intel_execlists_submission.c | 159 +++++++----------- 2 files changed, 61 insertions(+), 108 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index fdec129a6317..f850179a583f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -238,16 +238,6 @@ struct intel_engine_execlists { */ unsigned int port_mask; - /** - * @switch_priority_hint: Second context priority. - * - * We submit multiple contexts to the HW simultaneously and would - * like to occasionally switch between them to emulate timeslicing. - * To know when timeslicing is suitable, we track the priority of - * the context submitted second. - */ - int switch_priority_hint; - /** * @queue_priority_hint: Highest pending priority. * diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 17c0c9d210b2..325da35a3238 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -1113,25 +1113,6 @@ static void defer_active(struct intel_engine_cs *engine) defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); } -static bool -need_timeslice(const struct intel_engine_cs *engine, - const struct i915_request *rq) -{ - int hint; - - if (!intel_engine_has_timeslices(engine)) - return false; - - hint = max(engine->execlists.queue_priority_hint, - virtual_prio(&engine->execlists)); - - if (!list_is_last(&rq->sched.link, &engine->active.requests)) - hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); - - GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE); - return hint >= effective_prio(rq); -} - static bool timeslice_yield(const struct intel_engine_execlists *el, const struct i915_request *rq) @@ -1151,76 +1132,68 @@ timeslice_yield(const struct intel_engine_execlists *el, return rq->context->lrc.ccid == READ_ONCE(el->yield); } -static bool -timeslice_expired(const struct intel_engine_execlists *el, - const struct i915_request *rq) +static bool needs_timeslice(const struct intel_engine_cs *engine, + const struct i915_request *rq) { - return timer_expired(&el->timer) || timeslice_yield(el, rq); -} + if (!intel_engine_has_timeslices(engine)) + return false; -static int -switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) -{ - if (list_is_last(&rq->sched.link, &engine->active.requests)) - return engine->execlists.queue_priority_hint; + /* If not currently active, or about to switch, wait for next event */ + if (!rq || __i915_request_is_complete(rq)) + return false; - return rq_prio(list_next_entry(rq, sched.link)); -} + /* We do not need to start the timeslice until after the ACK */ + if (READ_ONCE(engine->execlists.pending[0])) + return false; -static inline unsigned long -timeslice(const struct intel_engine_cs *engine) -{ - return READ_ONCE(engine->props.timeslice_duration_ms); + /* If ELSP[1] is occupied, always check to see if worth slicing */ + if (!list_is_last_rcu(&rq->sched.link, &engine->active.requests)) + return true; + + /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */ + if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) + return true; + + return !RB_EMPTY_ROOT(&engine->execlists.virtual.rb_root); } -static unsigned long active_timeslice(const struct intel_engine_cs *engine) +static bool +timeslice_expired(struct intel_engine_cs *engine, const struct i915_request *rq) { - const struct intel_engine_execlists *execlists = &engine->execlists; - const struct i915_request *rq = *execlists->active; + const struct intel_engine_execlists *el = &engine->execlists; - if (!rq || __i915_request_is_complete(rq)) - return 0; + if (i915_request_has_nopreempt(rq) && __i915_request_has_started(rq)) + return false; - if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq)) - return 0; + if (!needs_timeslice(engine, rq)) + return false; - return timeslice(engine); + return timer_expired(&el->timer) || timeslice_yield(el, rq); } -static void set_timeslice(struct intel_engine_cs *engine) +static unsigned long timeslice(const struct intel_engine_cs *engine) { - unsigned long duration; - - if (!intel_engine_has_timeslices(engine)) - return; - - duration = active_timeslice(engine); - ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration); - - set_timer_ms(&engine->execlists.timer, duration); + return READ_ONCE(engine->props.timeslice_duration_ms); } -static void start_timeslice(struct intel_engine_cs *engine, int prio) +static void start_timeslice(struct intel_engine_cs *engine) { - struct intel_engine_execlists *execlists = &engine->execlists; + struct intel_engine_execlists *el = &engine->execlists; unsigned long duration; - if (!intel_engine_has_timeslices(engine)) - return; - - WRITE_ONCE(execlists->switch_priority_hint, prio); - if (prio == INT_MIN) - return; - - if (timer_pending(&execlists->timer)) - return; + /* Disable the timer if there is nothing to switch to */ + duration = 0; + if (needs_timeslice(engine, *el->active)) { + if (el->timer.expires) { + if (!timer_pending(&el->timer)) + tasklet_hi_schedule(&engine->execlists.tasklet); + return; + } - duration = timeslice(engine); - ENGINE_TRACE(engine, - "start timeslicing, prio:%d, interval:%lu", - prio, duration); + duration = timeslice(engine); + } - set_timer_ms(&execlists->timer, duration); + set_timer_ms(&el->timer, duration); } static void record_preemption(struct intel_engine_execlists *execlists) @@ -1333,16 +1306,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine) __unwind_incomplete_requests(engine); last = NULL; - } else if (need_timeslice(engine, last) && - timeslice_expired(execlists, last)) { + } else if (timeslice_expired(engine, last)) { ENGINE_TRACE(engine, - "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", - last->fence.context, - last->fence.seqno, - last->sched.attr.priority, + "expired:%s last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", + yesno(timer_expired(&execlists->timer)), + last->fence.context, last->fence.seqno, + rq_prio(last), execlists->queue_priority_hint, yesno(timeslice_yield(execlists, last))); + cancel_timer(&execlists->timer); ring_set_paused(engine, 1); defer_active(engine); @@ -1378,7 +1351,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * of timeslices, our queue might be. */ spin_unlock(&engine->active.lock); - start_timeslice(engine, queue_prio(execlists)); return; } } @@ -1407,7 +1379,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); spin_unlock(&engine->active.lock); - start_timeslice(engine, rq_prio(rq)); return; /* leave this for another sibling */ } @@ -1571,29 +1542,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine) execlists->queue_priority_hint = queue_prio(execlists); spin_unlock(&engine->active.lock); - if (submit) { - /* - * Skip if we ended up with exactly the same set of requests, - * e.g. trying to timeslice a pair of ordered contexts - */ - if (!memcmp(execlists->active, - execlists->pending, - (port - execlists->pending) * sizeof(*port))) - goto skip_submit; - + /* + * We can skip poking the HW if we ended up with exactly the same set + * of requests as currently running, e.g. trying to timeslice a pair + * of ordered contexts. + */ + if (submit && + memcmp(execlists->active, + execlists->pending, + (port - execlists->pending) * sizeof(*port))) { *port = NULL; while (port-- != execlists->pending) execlists_schedule_in(*port, port - execlists->pending); - execlists->switch_priority_hint = - switch_prio(engine, *execlists->pending); - WRITE_ONCE(execlists->yield, -1); set_preempt_timeout(engine, *execlists->active); execlists_submit_ports(engine); } else { - start_timeslice(engine, execlists->queue_priority_hint); -skip_submit: ring_set_paused(engine, 0); while (port-- != execlists->pending) i915_request_put(*port); @@ -1951,8 +1916,6 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) } } while (head != tail); - set_timeslice(engine); - /* * Gen11 has proven to fail wrt global observation point between * entry and tail update, failing on the ordering and thus @@ -1965,6 +1928,7 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * invalidation before. */ invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); + cancel_timer(&execlists->timer); return inactive; } @@ -2383,8 +2347,10 @@ static void execlists_submission_tasklet(unsigned long data) execlists_reset(engine, msg); } - if (!engine->execlists.pending[0]) + if (!engine->execlists.pending[0]) { execlists_dequeue_irq(engine); + start_timeslice(engine); + } post_process_csb(post, inactive); rcu_read_unlock(); @@ -3861,9 +3827,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\t", 0); } - if (execlists->switch_priority_hint != INT_MIN) - drm_printf(m, "\t\tSwitch priority hint: %d\n", - READ_ONCE(execlists->switch_priority_hint)); if (execlists->queue_priority_hint != INT_MIN) drm_printf(m, "\t\tQueue priority hint: %d\n", READ_ONCE(execlists->queue_priority_hint)); From patchwork Mon Dec 28 15:51:50 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991365 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1C911C43381 for ; Mon, 28 Dec 2020 15:53:11 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id DA5EC206E5 for ; Mon, 28 Dec 2020 15:53:10 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org DA5EC206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 1BA99899DB; Mon, 28 Dec 2020 15:52:57 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 795028999C for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448192-1500050 for multiple; Mon, 28 Dec 2020 15:52:34 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:50 +0000 Message-Id: <20201228155229.9516-15-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 15/54] drm/i915/gt: Skip over completed active execlists, again X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Now that we are careful to always force-restore contexts upon rewinding (where necessary), we can restore our optimisation to skip over completed active execlists when dequeuing. Referenecs: 35f3fd8182ba ("drm/i915/execlists: Workaround switching back to a completed context") References: 8ab3a3812aa9 ("drm/i915/gt: Incrementally check for rewinding") Signed-off-by: Chris Wilson Cc: Mika Kuoppala --- .../drm/i915/gt/intel_execlists_submission.c | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 325da35a3238..dff2fad8d5e3 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -1224,12 +1224,20 @@ static void set_preempt_timeout(struct intel_engine_cs *engine, active_preempt_timeout(engine, rq)); } +static bool completed(const struct i915_request *rq) +{ + if (i915_request_has_sentinel(rq)) + return false; + + return __i915_request_is_complete(rq); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; - struct i915_request *last = *execlists->active; + struct i915_request *last, * const *active; struct virtual_engine *ve; struct rb_node *rb; bool submit = false; @@ -1266,21 +1274,15 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * i.e. we will retrigger preemption following the ack in case * of trouble. * - * In theory we can skip over completed contexts that have not - * yet been processed by events (as those events are in flight): - * - * while ((last = *active) && i915_request_completed(last)) - * active++; - * - * However, the GPU cannot handle this as it will ultimately - * find itself trying to jump back into a context it has just - * completed and barf. */ + active = execlists->active; + while ((last = *active) && completed(last)) + active++; if (last) { - if (__i915_request_is_complete(last)) { - goto check_secondary; - } else if (need_preempt(engine, last)) { + GEM_BUG_ON(!i915_request_is_active(last)); + + if (need_preempt(engine, last)) { ENGINE_TRACE(engine, "preempting last=%llx:%lld, prio=%d, hint=%d\n", last->fence.context, @@ -1343,7 +1345,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * we hopefully coalesce several updates into a single * submission. */ -check_secondary: if (!list_is_last(&last->sched.link, &engine->active.requests)) { /* @@ -1548,7 +1549,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * of ordered contexts. */ if (submit && - memcmp(execlists->active, + memcmp(active, execlists->pending, (port - execlists->pending) * sizeof(*port))) { *port = NULL; @@ -1556,7 +1557,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) execlists_schedule_in(*port, port - execlists->pending); WRITE_ONCE(execlists->yield, -1); - set_preempt_timeout(engine, *execlists->active); + set_preempt_timeout(engine, *active); execlists_submit_ports(engine); } else { ring_set_paused(engine, 0); From patchwork Mon Dec 28 15:51:51 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991385 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 51BBDC43332 for ; Mon, 28 Dec 2020 15:53:22 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id EBF08206E5 for ; Mon, 28 Dec 2020 15:53:21 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org EBF08206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id D5A8689A9F; Mon, 28 Dec 2020 15:52:58 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 4B41F899D4 for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448193-1500050 for multiple; Mon, 28 Dec 2020 15:52:35 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:51 +0000 Message-Id: <20201228155229.9516-16-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 16/54] drm/i915: Strip out internal priorities X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Since we are not using any internal priority levels, and in the next few patches will introduce a new index for which the optimisation is not so lear cut, discard the small table within the priolist. Signed-off-by: Chris Wilson --- .../gpu/drm/i915/gt/intel_engine_heartbeat.c | 2 +- .../drm/i915/gt/intel_execlists_submission.c | 22 ++------ drivers/gpu/drm/i915/gt/selftest_execlists.c | 1 - drivers/gpu/drm/i915/gt/selftest_lrc.c | 1 - .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 +-- drivers/gpu/drm/i915/i915_priolist_types.h | 8 +-- drivers/gpu/drm/i915/i915_scheduler.c | 51 +++---------------- drivers/gpu/drm/i915/i915_scheduler.h | 16 ++---- 8 files changed, 20 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index d7be2b9339f9..1732a42e9075 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -125,7 +125,7 @@ static void heartbeat(struct work_struct *wrk) * low latency and no jitter] the chance to naturally * complete before being preempted. */ - attr.priority = I915_PRIORITY_MASK; + attr.priority = 0; if (rq->sched.attr.priority >= attr.priority) attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT); if (rq->sched.attr.priority >= attr.priority) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index dff2fad8d5e3..1c30107e2c1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -286,22 +286,13 @@ static int effective_prio(const struct i915_request *rq) static int queue_prio(const struct intel_engine_execlists *execlists) { - struct i915_priolist *p; struct rb_node *rb; rb = rb_first_cached(&execlists->queue); if (!rb) return INT_MIN; - /* - * As the priolist[] are inverted, with the highest priority in [0], - * we have to flip the index value to become priority. - */ - p = to_priolist(rb); - if (!I915_USER_PRIORITY_SHIFT) - return p->priority; - - return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); + return to_priolist(rb)->priority; } static int virtual_prio(const struct intel_engine_execlists *el) @@ -1441,9 +1432,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - int i; - priolist_for_each_request_consume(rq, rn, p, i) { + priolist_for_each_request_consume(rq, rn, p) { bool merge = true; /* @@ -2969,9 +2959,8 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) /* Flush the queued requests to the timeline list (for retiring). */ while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); - int i; - priolist_for_each_request_consume(rq, rn, p, i) { + priolist_for_each_request_consume(rq, rn, p) { mark_eio(rq); __i915_request_submit(rq); } @@ -3242,7 +3231,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) static struct list_head *virtual_queue(struct virtual_engine *ve) { - return &ve->base.execlists.default_priolist.requests[0]; + return &ve->base.execlists.default_priolist.requests; } static void rcu_virtual_context_destroy(struct work_struct *wrk) @@ -3836,9 +3825,8 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, count = 0; for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - int i; - priolist_for_each_request(rq, p, i) { + priolist_for_each_request(rq, p) { if (count++ < max - 1) show_request(m, rq, "\t\t", 0); else diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 080b63000a4e..2e49b31be96f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -1078,7 +1078,6 @@ create_rewinder(struct intel_context *ce, intel_ring_advance(rq, cs); - rq->sched.attr.priority = I915_PRIORITY_MASK; err = 0; err: i915_request_get(rq); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index ba6c2be5c8ff..f449c56e0946 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -730,7 +730,6 @@ create_timestamp(struct intel_context *ce, void *slot, int idx) intel_ring_advance(rq, cs); - rq->sched.attr.priority = I915_PRIORITY_MASK; err = 0; err: i915_request_get(rq); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 694ee424b4ee..6a70f3a2c002 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -313,9 +313,8 @@ static void __guc_dequeue(struct intel_engine_cs *engine) while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - int i; - priolist_for_each_request_consume(rq, rn, p, i) { + priolist_for_each_request_consume(rq, rn, p) { if (last && rq->context != last->context) { if (port == last_port) goto done; @@ -486,9 +485,8 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) /* Flush the queued requests to the timeline list (for retiring). */ while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); - int i; - priolist_for_each_request_consume(rq, rn, p, i) { + priolist_for_each_request_consume(rq, rn, p) { list_del_init(&rq->sched.link); __i915_request_submit(rq); dma_fence_set_error(&rq->fence, -EIO); diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 8aa7866ec6b6..9a7657bb002e 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -27,11 +27,8 @@ enum { #define I915_USER_PRIORITY_SHIFT 0 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) -#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) -#define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1) - /* Smallest priority value that cannot be bumped. */ -#define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK) +#define I915_PRIORITY_INVALID (INT_MIN) /* * Requests containing performance queries must not be preempted by @@ -45,9 +42,8 @@ enum { #define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1) struct i915_priolist { - struct list_head requests[I915_PRIORITY_COUNT]; + struct list_head requests; struct rb_node node; - unsigned long used; int priority; }; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 318e359bf5c3..1f033eab9a1c 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -43,7 +43,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) static void assert_priolists(struct intel_engine_execlists * const execlists) { struct rb_node *rb; - long last_prio, i; + long last_prio; if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; @@ -57,14 +57,6 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) GEM_BUG_ON(p->priority > last_prio); last_prio = p->priority; - - GEM_BUG_ON(!p->used); - for (i = 0; i < ARRAY_SIZE(p->requests); i++) { - if (list_empty(&p->requests[i])) - continue; - - GEM_BUG_ON(!(p->used & BIT(i))); - } } } @@ -75,13 +67,10 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) struct i915_priolist *p; struct rb_node **parent, *rb; bool first = true; - int idx, i; lockdep_assert_held(&engine->active.lock); assert_priolists(execlists); - /* buckets sorted from highest [in slot 0] to lowest priority */ - idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; prio >>= I915_USER_PRIORITY_SHIFT; if (unlikely(execlists->no_priolist)) prio = I915_PRIORITY_NORMAL; @@ -99,7 +88,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) parent = &rb->rb_right; first = false; } else { - goto out; + return &p->requests; } } @@ -125,15 +114,12 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) } p->priority = prio; - for (i = 0; i < ARRAY_SIZE(p->requests); i++) - INIT_LIST_HEAD(&p->requests[i]); + INIT_LIST_HEAD(&p->requests); + rb_link_node(&p->node, rb, parent); rb_insert_color_cached(&p->node, &execlists->queue, first); - p->used = 0; -out: - p->used |= BIT(idx); - return &p->requests[idx]; + return &p->requests; } void __i915_priolist_free(struct i915_priolist *p) @@ -363,30 +349,6 @@ void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) spin_unlock_irq(&schedule_lock); } -static void __bump_priority(struct i915_sched_node *node, unsigned int bump) -{ - struct i915_sched_attr attr = node->attr; - - if (attr.priority & bump) - return; - - attr.priority |= bump; - __i915_schedule(node, &attr); -} - -void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) -{ - unsigned long flags; - - GEM_BUG_ON(bump & ~I915_PRIORITY_MASK); - if (READ_ONCE(rq->sched.attr.priority) & bump) - return; - - spin_lock_irqsave(&schedule_lock, flags); - __bump_priority(&rq->sched, bump); - spin_unlock_irqrestore(&schedule_lock, flags); -} - void i915_sched_node_init(struct i915_sched_node *node) { INIT_LIST_HEAD(&node->signalers_list); @@ -553,8 +515,7 @@ int __init i915_global_scheduler_init(void) if (!global.slab_dependencies) return -ENOMEM; - global.slab_priorities = KMEM_CACHE(i915_priolist, - SLAB_HWCACHE_ALIGN); + global.slab_priorities = KMEM_CACHE(i915_priolist, 0); if (!global.slab_priorities) goto err_priorities; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 4501e5ac2637..858a0938f47a 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -15,17 +15,11 @@ struct drm_printer; -#define priolist_for_each_request(it, plist, idx) \ - for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \ - list_for_each_entry(it, &(plist)->requests[idx], sched.link) +#define priolist_for_each_request(it, plist) \ + list_for_each_entry(it, &(plist)->requests, sched.link) -#define priolist_for_each_request_consume(it, n, plist, idx) \ - for (; \ - (plist)->used ? (idx = __ffs((plist)->used)), 1 : 0; \ - (plist)->used &= ~BIT(idx)) \ - list_for_each_entry_safe(it, n, \ - &(plist)->requests[idx], \ - sched.link) +#define priolist_for_each_request_consume(it, n, plist) \ + list_for_each_entry_safe(it, n, &(plist)->requests, sched.link) void i915_sched_node_init(struct i915_sched_node *node); void i915_sched_node_reinit(struct i915_sched_node *node); @@ -44,8 +38,6 @@ void i915_sched_node_fini(struct i915_sched_node *node); void i915_schedule(struct i915_request *request, const struct i915_sched_attr *attr); -void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump); - struct list_head * i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); From patchwork Mon Dec 28 15:51:52 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991417 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id C0727C432C3 for ; Mon, 28 Dec 2020 15:53:28 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 7B0FF206E5 for ; Mon, 28 Dec 2020 15:53:28 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 7B0FF206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id E2D1689AA6; Mon, 28 Dec 2020 15:52:58 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 399838925D for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448194-1500050 for multiple; Mon, 28 Dec 2020 15:52:35 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:52 +0000 Message-Id: <20201228155229.9516-17-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 17/54] drm/i915: Remove I915_USER_PRIORITY_SHIFT X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" As we do not have any internal priority levels, the priority can be set directed from the user values. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_display.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 +-- .../i915/gem/selftests/i915_gem_object_blt.c | 4 +- .../gpu/drm/i915/gt/intel_engine_heartbeat.c | 10 ++--- drivers/gpu/drm/i915/gt/selftest_execlists.c | 44 +++++++------------ drivers/gpu/drm/i915/i915_priolist_types.h | 3 -- drivers/gpu/drm/i915/i915_scheduler.c | 1 - 7 files changed, 24 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index f2c48e5cdb43..112e3a3d2dab 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15613,9 +15613,7 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_DISPLAY), - }; + struct i915_sched_attr attr = { .priority = I915_PRIORITY_DISPLAY }; i915_gem_object_wait_priority(obj, 0, &attr); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 4d2f40cf237b..61a7360c4d9a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -679,7 +679,7 @@ __create_context(struct drm_i915_private *i915) kref_init(&ctx->ref); ctx->i915 = i915; - ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); + ctx->sched.priority = I915_PRIORITY_NORMAL; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link); @@ -1959,7 +1959,7 @@ static int set_priority(struct i915_gem_context *ctx, !capable(CAP_SYS_NICE)) return -EPERM; - ctx->sched.priority = I915_USER_PRIORITY(priority); + ctx->sched.priority = priority; context_apply_all(ctx, __apply_priority, ctx); return 0; @@ -2463,7 +2463,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_PRIORITY: args->size = 0; - args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT; + args->value = ctx->sched.priority; break; case I915_CONTEXT_PARAM_SSEU: diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index 23b6e11bbc3e..c4c04fb97d14 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -220,7 +220,7 @@ static int igt_fill_blt_thread(void *arg) return PTR_ERR(ctx); prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = I915_USER_PRIORITY(prio); + ctx->sched.priority = prio; } ce = i915_gem_context_get_engine(ctx, 0); @@ -338,7 +338,7 @@ static int igt_copy_blt_thread(void *arg) return PTR_ERR(ctx); prio = i915_prandom_u32_max_state(I915_PRIORITY_MAX, prng); - ctx->sched.priority = I915_USER_PRIORITY(prio); + ctx->sched.priority = prio; } ce = i915_gem_context_get_engine(ctx, 0); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 1732a42e9075..ed03c08737f5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -81,9 +81,7 @@ static void show_heartbeat(const struct i915_request *rq, static void heartbeat(struct work_struct *wrk) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN), - }; + struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN }; struct intel_engine_cs *engine = container_of(wrk, typeof(*engine), heartbeat.work.work); struct intel_context *ce = engine->kernel_context; @@ -127,7 +125,7 @@ static void heartbeat(struct work_struct *wrk) */ attr.priority = 0; if (rq->sched.attr.priority >= attr.priority) - attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT); + attr.priority = I915_PRIORITY_HEARTBEAT; if (rq->sched.attr.priority >= attr.priority) attr.priority = I915_PRIORITY_BARRIER; @@ -285,9 +283,7 @@ int intel_engine_pulse(struct intel_engine_cs *engine) int intel_engine_flush_barriers(struct intel_engine_cs *engine) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN), - }; + struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN }; struct intel_context *ce = engine->kernel_context; struct i915_request *rq; int err; diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 2e49b31be96f..5152cf3f59a7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -321,7 +321,7 @@ static int live_unlite_switch(void *arg) static int live_unlite_preempt(void *arg) { - return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); + return live_unlite_restore(arg, I915_PRIORITY_MAX); } static int live_unlite_ring(void *arg) @@ -1308,9 +1308,7 @@ static int live_timeslice_queue(void *arg) goto err_pin; for_each_engine(engine, gt, id) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), - }; + struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; struct i915_request *rq, *nop; if (!intel_engine_has_preemption(engine)) @@ -1525,14 +1523,12 @@ static int live_busywait_preempt(void *arg) ctx_hi = kernel_context(gt->i915); if (!ctx_hi) return -ENOMEM; - ctx_hi->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; - ctx_lo->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); if (IS_ERR(obj)) { @@ -1735,14 +1731,12 @@ static int live_preempt(void *arg) ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; - ctx_hi->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; - ctx_lo->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; for_each_engine(engine, gt, id) { struct igt_live_test t; @@ -1838,7 +1832,7 @@ static int live_late_preempt(void *arg) goto err_ctx_hi; /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ - ctx_lo->sched.priority = I915_USER_PRIORITY(1); + ctx_lo->sched.priority = 1; for_each_engine(engine, gt, id) { struct igt_live_test t; @@ -1879,7 +1873,7 @@ static int live_late_preempt(void *arg) goto err_wedged; } - attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); + attr.priority = I915_PRIORITY_MAX; engine->schedule(rq, &attr); if (!igt_wait_for_spinner(&spin_hi, rq)) { @@ -1963,7 +1957,7 @@ static int live_nopreempt(void *arg) return -ENOMEM; if (preempt_client_init(gt, &b)) goto err_client_a; - b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); + b.ctx->sched.priority = I915_PRIORITY_MAX; for_each_engine(engine, gt, id) { struct i915_request *rq_a, *rq_b; @@ -2356,11 +2350,9 @@ static int live_preempt_cancel(void *arg) static int live_suppress_self_preempt(void *arg) { + struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; struct intel_gt *gt = arg; struct intel_engine_cs *engine; - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) - }; struct preempt_client a, b; enum intel_engine_id id; int err = -ENOMEM; @@ -2497,9 +2489,7 @@ static int live_chain_preempt(void *arg) goto err_client_hi; for_each_engine(engine, gt, id) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), - }; + struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; struct igt_live_test t; struct i915_request *rq; int ring_size, count, i; @@ -2917,9 +2907,7 @@ static int live_preempt_gang(void *arg) return -EIO; do { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(prio++), - }; + struct i915_sched_attr attr = { .priority = prio++ }; err = create_gang(engine, &rq); if (err) @@ -2955,7 +2943,7 @@ static int live_preempt_gang(void *arg) drm_info_printer(engine->i915->drm.dev); pr_err("Failed to flush chain of %d requests, at %d\n", - prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); + prio, rq_prio(rq)); intel_engine_dump(engine, &p, "%s\n", engine->name); @@ -3329,14 +3317,12 @@ static int live_preempt_timeout(void *arg) ctx_hi = kernel_context(gt->i915); if (!ctx_hi) goto err_spin_lo; - ctx_hi->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(gt->i915); if (!ctx_lo) goto err_ctx_hi; - ctx_lo->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; for_each_engine(engine, gt, id) { unsigned long saved_timeout; diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 9a7657bb002e..bc2fa84f98a8 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -24,9 +24,6 @@ enum { I915_PRIORITY_DISPLAY, }; -#define I915_USER_PRIORITY_SHIFT 0 -#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) - /* Smallest priority value that cannot be bumped. */ #define I915_PRIORITY_INVALID (INT_MIN) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 1f033eab9a1c..a57353191d12 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -71,7 +71,6 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) lockdep_assert_held(&engine->active.lock); assert_priolists(execlists); - prio >>= I915_USER_PRIORITY_SHIFT; if (unlikely(execlists->no_priolist)) prio = I915_PRIORITY_NORMAL; From patchwork Mon Dec 28 15:51:53 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991429 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7F94FC4321A for ; Mon, 28 Dec 2020 15:53:25 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 29687206E5 for ; Mon, 28 Dec 2020 15:53:25 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 29687206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id B42E589B03; Mon, 28 Dec 2020 15:53:00 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 39CA9899C7 for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448196-1500050 for multiple; Mon, 28 Dec 2020 15:52:35 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:53 +0000 Message-Id: <20201228155229.9516-18-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 18/54] drm/i915: Replace engine->schedule() with a known request operation X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Looking to the future, we want to set the scheduling attributes explicitly and so replace the generic engine->schedule() with the more direct i915_request_set_priority() What it loses in removing the 'schedule' name from the function, it gains in having an explicit entry point with a stated goal. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_display.c | 9 +---- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 1 - drivers/gpu/drm/i915/gem/i915_gem_object.h | 2 +- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 27 +++++---------- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 -- .../gpu/drm/i915/gt/intel_engine_heartbeat.c | 4 +-- drivers/gpu/drm/i915/gt/intel_engine_types.h | 29 ++++++++-------- drivers/gpu/drm/i915/gt/intel_engine_user.c | 2 +- .../drm/i915/gt/intel_execlists_submission.c | 3 +- drivers/gpu/drm/i915/gt/selftest_execlists.c | 33 +++++-------------- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 11 +++---- drivers/gpu/drm/i915/i915_request.c | 10 +++--- drivers/gpu/drm/i915/i915_scheduler.c | 15 +++++---- drivers/gpu/drm/i915/i915_scheduler.h | 3 +- 14 files changed, 56 insertions(+), 96 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 112e3a3d2dab..f25ab2b15ca1 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15611,13 +15611,6 @@ void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) intel_unpin_fb_vma(vma, old_plane_state->flags); } -static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) -{ - struct i915_sched_attr attr = { .priority = I915_PRIORITY_DISPLAY }; - - i915_gem_object_wait_priority(obj, 0, &attr); -} - /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @_plane: drm plane to prepare for @@ -15694,7 +15687,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret) return ret; - fb_obj_bump_render_priority(obj); + i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB); if (!new_plane_state->uapi.fence) { /* implicit fencing */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 19eeb3f8c5e8..dee0e2cbc71c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2653,7 +2653,6 @@ static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool throt int err; GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED); - if (unlikely(intel_context_is_banned(ce))) return ERR_PTR(-EIO); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index be14486f63a7..b106bc81c303 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -517,7 +517,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, long timeout); int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - const struct i915_sched_attr *attr); + int prio); void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, enum fb_op_origin origin); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index c1b13ac50d0f..a5d7efe67021 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -91,28 +91,17 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, return timeout; } -static void __fence_set_priority(struct dma_fence *fence, - const struct i915_sched_attr *attr) +static void __fence_set_priority(struct dma_fence *fence, int prio) { - struct i915_request *rq; - struct intel_engine_cs *engine; - if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) return; - rq = to_request(fence); - engine = rq->engine; - local_bh_disable(); - rcu_read_lock(); /* RCU serialisation for set-wedged protection */ - if (engine->schedule) - engine->schedule(rq, attr); - rcu_read_unlock(); + i915_request_set_priority(to_request(fence), prio); local_bh_enable(); /* kick the tasklets if queues were reprioritised */ } -static void fence_set_priority(struct dma_fence *fence, - const struct i915_sched_attr *attr) +static void fence_set_priority(struct dma_fence *fence, int prio) { /* Recurse once into a fence-array */ if (dma_fence_is_array(fence)) { @@ -120,16 +109,16 @@ static void fence_set_priority(struct dma_fence *fence, int i; for (i = 0; i < array->num_fences; i++) - __fence_set_priority(array->fences[i], attr); + __fence_set_priority(array->fences[i], prio); } else { - __fence_set_priority(fence, attr); + __fence_set_priority(fence, prio); } } int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - const struct i915_sched_attr *attr) + int prio) { struct dma_fence *excl; @@ -144,7 +133,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, return ret; for (i = 0; i < count; i++) { - fence_set_priority(shared[i], attr); + fence_set_priority(shared[i], prio); dma_fence_put(shared[i]); } @@ -154,7 +143,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, } if (excl) { - fence_set_priority(excl, attr); + fence_set_priority(excl, prio); dma_fence_put(excl); } return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index d2a20e53b080..2f630b47783a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -337,9 +337,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) if (engine->context_size) DRIVER_CAPS(i915)->has_logical_contexts = true; - /* Nothing to do here, execute in order of dependencies */ - engine->schedule = NULL; - ewma__engine_latency_init(&engine->latency); seqcount_init(&engine->stats.lock); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index ed03c08737f5..bccbb932a315 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -115,7 +115,7 @@ static void heartbeat(struct work_struct *wrk) * but all other contexts, including the kernel * context are stuck waiting for the signal. */ - } else if (engine->schedule && + } else if (intel_engine_has_scheduler(engine) && rq->sched.attr.priority < I915_PRIORITY_BARRIER) { /* * Gradually raise the priority of the heartbeat to @@ -130,7 +130,7 @@ static void heartbeat(struct work_struct *wrk) attr.priority = I915_PRIORITY_BARRIER; local_bh_disable(); - engine->schedule(rq, &attr); + i915_request_set_priority(rq, attr.priority); local_bh_enable(); } else { if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index f850179a583f..08ed958292af 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -454,14 +454,6 @@ struct intel_engine_cs { void (*bond_execute)(struct i915_request *rq, struct dma_fence *signal); - /* - * Call when the priority on a request has changed and it and its - * dependencies may need rescheduling. Note the request itself may - * not be ready to run! - */ - void (*schedule)(struct i915_request *request, - const struct i915_sched_attr *attr); - void (*release)(struct intel_engine_cs *engine); struct intel_engine_execlists execlists; @@ -479,13 +471,14 @@ struct intel_engine_cs { #define I915_ENGINE_USING_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) -#define I915_ENGINE_HAS_PREEMPTION BIT(2) -#define I915_ENGINE_HAS_SEMAPHORES BIT(3) -#define I915_ENGINE_HAS_TIMESLICES BIT(4) -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5) -#define I915_ENGINE_IS_VIRTUAL BIT(6) -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7) -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8) +#define I915_ENGINE_HAS_SCHEDULER BIT(2) +#define I915_ENGINE_HAS_PREEMPTION BIT(3) +#define I915_ENGINE_HAS_SEMAPHORES BIT(4) +#define I915_ENGINE_HAS_TIMESLICES BIT(5) +#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(6) +#define I915_ENGINE_IS_VIRTUAL BIT(7) +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(8) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(9) unsigned int flags; /* @@ -571,6 +564,12 @@ intel_engine_supports_stats(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_SUPPORTS_STATS; } +static inline bool +intel_engine_has_scheduler(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_SCHEDULER; +} + static inline bool intel_engine_has_preemption(const struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 34e6096f196e..6b5a4fdc14a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -108,7 +108,7 @@ static void set_scheduler_caps(struct drm_i915_private *i915) for_each_uabi_engine(engine, i915) { /* all engines must agree! */ int i; - if (engine->schedule) + if (intel_engine_has_scheduler(engine)) enabled |= (I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY); else diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 1c30107e2c1d..9eaa3dc47af3 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3048,7 +3048,6 @@ static void execlists_park(struct intel_engine_cs *engine) void intel_execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; - engine->schedule = i915_schedule; engine->execlists.tasklet.func = execlists_submission_tasklet; engine->reset.prepare = execlists_reset_prepare; @@ -3059,6 +3058,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->park = execlists_park; engine->unpark = NULL; + engine->flags |= I915_ENGINE_HAS_SCHEDULER; engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (!intel_vgpu_active(engine->i915)) { engine->flags |= I915_ENGINE_HAS_SEMAPHORES; @@ -3629,7 +3629,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; - ve->base.schedule = i915_schedule; ve->base.submit_request = virtual_submit_request; ve->base.bond_execute = virtual_bond_execute; diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 5152cf3f59a7..0a2952bf1dcb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -269,12 +269,8 @@ static int live_unlite_restore(struct intel_gt *gt, int prio) i915_request_put(rq[0]); if (prio) { - struct i915_sched_attr attr = { - .priority = prio, - }; - /* Alternatively preempt the spinner with ce[1] */ - engine->schedule(rq[1], &attr); + i915_request_set_priority(rq[1], prio); } /* And switch back to ce[0] for good measure */ @@ -874,9 +870,6 @@ release_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx, int prio) { - struct i915_sched_attr attr = { - .priority = prio, - }; struct i915_request *rq; u32 *cs; @@ -901,7 +894,7 @@ release_queue(struct intel_engine_cs *engine, i915_request_add(rq); local_bh_disable(); - engine->schedule(rq, &attr); + i915_request_set_priority(rq, prio); local_bh_enable(); /* kick tasklet */ i915_request_put(rq); @@ -1308,7 +1301,6 @@ static int live_timeslice_queue(void *arg) goto err_pin; for_each_engine(engine, gt, id) { - struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; struct i915_request *rq, *nop; if (!intel_engine_has_preemption(engine)) @@ -1323,7 +1315,7 @@ static int live_timeslice_queue(void *arg) err = PTR_ERR(rq); goto err_heartbeat; } - engine->schedule(rq, &attr); + i915_request_set_priority(rq, I915_PRIORITY_MAX); err = wait_for_submit(engine, rq, HZ / 2); if (err) { pr_err("%s: Timed out trying to submit semaphores\n", @@ -1810,7 +1802,6 @@ static int live_late_preempt(void *arg) struct i915_gem_context *ctx_hi, *ctx_lo; struct igt_spinner spin_hi, spin_lo; struct intel_engine_cs *engine; - struct i915_sched_attr attr = {}; enum intel_engine_id id; int err = -ENOMEM; @@ -1873,8 +1864,7 @@ static int live_late_preempt(void *arg) goto err_wedged; } - attr.priority = I915_PRIORITY_MAX; - engine->schedule(rq, &attr); + i915_request_set_priority(rq, I915_PRIORITY_MAX); if (!igt_wait_for_spinner(&spin_hi, rq)) { pr_err("High priority context failed to preempt the low priority context\n"); @@ -2350,7 +2340,6 @@ static int live_preempt_cancel(void *arg) static int live_suppress_self_preempt(void *arg) { - struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; struct intel_gt *gt = arg; struct intel_engine_cs *engine; struct preempt_client a, b; @@ -2421,7 +2410,7 @@ static int live_suppress_self_preempt(void *arg) i915_request_add(rq_b); GEM_BUG_ON(i915_request_completed(rq_a)); - engine->schedule(rq_a, &attr); + i915_request_set_priority(rq_a, I915_PRIORITY_MAX); igt_spinner_end(&a.spin); if (!igt_wait_for_spinner(&b.spin, rq_b)) { @@ -2489,7 +2478,6 @@ static int live_chain_preempt(void *arg) goto err_client_hi; for_each_engine(engine, gt, id) { - struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX }; struct igt_live_test t; struct i915_request *rq; int ring_size, count, i; @@ -2556,7 +2544,7 @@ static int live_chain_preempt(void *arg) i915_request_get(rq); i915_request_add(rq); - engine->schedule(rq, &attr); + i915_request_set_priority(rq, I915_PRIORITY_MAX); igt_spinner_end(&hi.spin); if (i915_request_wait(rq, 0, HZ / 5) < 0) { @@ -2907,14 +2895,12 @@ static int live_preempt_gang(void *arg) return -EIO; do { - struct i915_sched_attr attr = { .priority = prio++ }; - err = create_gang(engine, &rq); if (err) break; /* Submit each spinner at increasing priority */ - engine->schedule(rq, &attr); + i915_request_set_priority(rq, prio++); } while (prio <= I915_PRIORITY_MAX && !__igt_timeout(end_time, NULL)); pr_debug("%s: Preempt chain of %d requests\n", @@ -3135,9 +3121,6 @@ static int preempt_user(struct intel_engine_cs *engine, struct i915_vma *global, int id) { - struct i915_sched_attr attr = { - .priority = I915_PRIORITY_MAX - }; struct i915_request *rq; int err = 0; u32 *cs; @@ -3162,7 +3145,7 @@ static int preempt_user(struct intel_engine_cs *engine, i915_request_get(rq); i915_request_add(rq); - engine->schedule(rq, &attr); + i915_request_set_priority(rq, I915_PRIORITY_MAX); if (i915_request_wait(rq, 0, HZ / 2) < 0) err = -ETIME; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index c28d1fcad673..927d54c702f4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -726,12 +726,11 @@ static int active_engine(void *data) rq[idx] = i915_request_get(new); i915_request_add(new); - if (engine->schedule && arg->flags & TEST_PRIORITY) { - struct i915_sched_attr attr = { - .priority = - i915_prandom_u32_max_state(512, &prng), - }; - engine->schedule(rq[idx], &attr); + if (intel_engine_has_scheduler(engine) && + arg->flags & TEST_PRIORITY) { + int prio = i915_prandom_u32_max_state(512, &prng); + + i915_request_set_priority(rq[idx], prio); } err = active_request_put(old); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 7c5eec2fd631..5c3b575855f1 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1207,7 +1207,7 @@ __i915_request_await_execution(struct i915_request *to, } /* Couple the dependency tree for PI on this exposed to->fence */ - if (to->engine->schedule) { + if (intel_engine_has_scheduler(to->engine)) { err = i915_sched_node_add_dependency(&to->sched, &from->sched, I915_DEPENDENCY_WEAK); @@ -1348,7 +1348,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; } - if (to->engine->schedule) { + if (intel_engine_has_scheduler(to->engine)) { ret = i915_sched_node_add_dependency(&to->sched, &from->sched, I915_DEPENDENCY_EXTERNAL); @@ -1535,7 +1535,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) __i915_sw_fence_await_dma_fence(&rq->submit, &prev->fence, &rq->dmaq); - if (rq->engine->schedule) + if (intel_engine_has_scheduler(rq->engine)) __i915_sched_node_add_dependency(&rq->sched, &prev->sched, &rq->dep, @@ -1607,8 +1607,8 @@ void __i915_request_queue(struct i915_request *rq, * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - if (attr && rq->engine->schedule) - rq->engine->schedule(rq, attr); + if (attr) + i915_request_set_priority(rq, attr->priority); local_bh_disable(); __i915_request_queue_bh(rq); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index a57353191d12..264ed53821d8 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -216,10 +216,8 @@ static void kick_submission(struct intel_engine_cs *engine, rcu_read_unlock(); } -static void __i915_schedule(struct i915_sched_node *node, - const struct i915_sched_attr *attr) +static void __i915_schedule(struct i915_sched_node *node, int prio) { - const int prio = max(attr->priority, node->attr.priority); struct intel_engine_cs *engine; struct i915_dependency *dep, *p; struct i915_dependency stack; @@ -233,6 +231,8 @@ static void __i915_schedule(struct i915_sched_node *node, if (node_signaled(node)) return; + prio = max(prio, node->attr.priority); + stack.signaler = node; list_add(&stack.dfs_link, &dfs); @@ -286,7 +286,7 @@ static void __i915_schedule(struct i915_sched_node *node, */ if (node->attr.priority == I915_PRIORITY_INVALID) { GEM_BUG_ON(!list_empty(&node->link)); - node->attr = *attr; + node->attr.priority = prio; if (stack.dfs_link.next == stack.dfs_link.prev) return; @@ -341,10 +341,13 @@ static void __i915_schedule(struct i915_sched_node *node, spin_unlock(&engine->active.lock); } -void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) +void i915_request_set_priority(struct i915_request *rq, int prio) { + if (!intel_engine_has_scheduler(rq->engine)) + return; + spin_lock_irq(&schedule_lock); - __i915_schedule(&rq->sched, attr); + __i915_schedule(&rq->sched, prio); spin_unlock_irq(&schedule_lock); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 858a0938f47a..ccee506c9a26 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -35,8 +35,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, void i915_sched_node_fini(struct i915_sched_node *node); -void i915_schedule(struct i915_request *request, - const struct i915_sched_attr *attr); +void i915_request_set_priority(struct i915_request *request, int prio); struct list_head * i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); From patchwork Mon Dec 28 15:51:54 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991391 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id B3396C43333 for ; Mon, 28 Dec 2020 15:53:24 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 7032A206E5 for ; Mon, 28 Dec 2020 15:53:24 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 7032A206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 72B9689AEB; Mon, 28 Dec 2020 15:53:00 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id C0B8D899C7 for ; Mon, 28 Dec 2020 15:52:52 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448197-1500050 for multiple; Mon, 28 Dec 2020 15:52:35 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:54 +0000 Message-Id: <20201228155229.9516-19-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 19/54] drm/i915: Teach the i915_dependency to use a double-lock X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Currently, we construct and teardown the i915_dependency chains using a global spinlock. As the lists are entirely local, it should be possible to use an double-lock with an explicit nesting [signaler -> waiter, always] and so avoid the costly convenience of a global spinlock. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/i915_scheduler.c | 63 ++++++++++++++------- drivers/gpu/drm/i915/i915_scheduler.h | 2 +- drivers/gpu/drm/i915/i915_scheduler_types.h | 2 + 4 files changed, 45 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 5c3b575855f1..600e681e4894 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -330,7 +330,7 @@ bool i915_request_retire(struct i915_request *rq) intel_context_unpin(rq->context); free_capture_list(rq); - i915_sched_node_fini(&rq->sched); + i915_sched_node_retire(&rq->sched); i915_request_put(rq); return true; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 264ed53821d8..8391841d927d 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -19,6 +19,17 @@ static struct i915_global_scheduler { static DEFINE_SPINLOCK(schedule_lock); +static struct i915_sched_node *node_get(struct i915_sched_node *node) +{ + i915_request_get(container_of(node, struct i915_request, sched)); + return node; +} + +static void node_put(struct i915_sched_node *node) +{ + i915_request_put(container_of(node, struct i915_request, sched)); +} + static const struct i915_request * node_to_request(const struct i915_sched_node *node) { @@ -353,6 +364,8 @@ void i915_request_set_priority(struct i915_request *rq, int prio) void i915_sched_node_init(struct i915_sched_node *node) { + spin_lock_init(&node->lock); + INIT_LIST_HEAD(&node->signalers_list); INIT_LIST_HEAD(&node->waiters_list); INIT_LIST_HEAD(&node->link); @@ -377,10 +390,17 @@ i915_dependency_alloc(void) return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL); } +static void +rcu_dependency_free(struct rcu_head *rcu) +{ + kmem_cache_free(global.slab_dependencies, + container_of(rcu, typeof(struct i915_dependency), rcu)); +} + static void i915_dependency_free(struct i915_dependency *dep) { - kmem_cache_free(global.slab_dependencies, dep); + call_rcu(&dep->rcu, rcu_dependency_free); } bool __i915_sched_node_add_dependency(struct i915_sched_node *node, @@ -390,24 +410,27 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, { bool ret = false; - spin_lock_irq(&schedule_lock); + /* The signal->lock is always the outer lock in this double-lock. */ + spin_lock(&signal->lock); if (!node_signaled(signal)) { INIT_LIST_HEAD(&dep->dfs_link); dep->signaler = signal; - dep->waiter = node; + dep->waiter = node_get(node); dep->flags = flags; /* All set, now publish. Beware the lockless walkers. */ + spin_lock_nested(&node->lock, SINGLE_DEPTH_NESTING); list_add_rcu(&dep->signal_link, &node->signalers_list); list_add_rcu(&dep->wait_link, &signal->waiters_list); + spin_unlock(&node->lock); /* Propagate the chains */ node->flags |= signal->flags; ret = true; } - spin_unlock_irq(&schedule_lock); + spin_unlock(&signal->lock); return ret; } @@ -429,39 +452,36 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, return 0; } -void i915_sched_node_fini(struct i915_sched_node *node) +void i915_sched_node_retire(struct i915_sched_node *node) { struct i915_dependency *dep, *tmp; - spin_lock_irq(&schedule_lock); - /* * Everyone we depended upon (the fences we wait to be signaled) * should retire before us and remove themselves from our list. * However, retirement is run independently on each timeline and - * so we may be called out-of-order. + * so we may be called out-of-order. As we need to avoid taking + * the signaler's lock, just mark up our completion and be wary + * in traversing the signalers->waiters_list. */ - list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { - GEM_BUG_ON(!list_empty(&dep->dfs_link)); - - list_del_rcu(&dep->wait_link); - if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(dep); - } - INIT_LIST_HEAD(&node->signalers_list); /* Remove ourselves from everyone who depends upon us */ + spin_lock(&node->lock); list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { + struct i915_sched_node *w = dep->waiter; + GEM_BUG_ON(dep->signaler != node); - GEM_BUG_ON(!list_empty(&dep->dfs_link)); + spin_lock_nested(&w->lock, SINGLE_DEPTH_NESTING); list_del_rcu(&dep->signal_link); + spin_unlock(&w->lock); + node_put(w); + if (dep->flags & I915_DEPENDENCY_ALLOC) i915_dependency_free(dep); } - INIT_LIST_HEAD(&node->waiters_list); - - spin_unlock_irq(&schedule_lock); + INIT_LIST_HEAD_RCU(&node->waiters_list); + spin_unlock(&node->lock); } void i915_request_show_with_schedule(struct drm_printer *m, @@ -512,8 +532,7 @@ static struct i915_global_scheduler global = { { int __init i915_global_scheduler_init(void) { global.slab_dependencies = KMEM_CACHE(i915_dependency, - SLAB_HWCACHE_ALIGN | - SLAB_TYPESAFE_BY_RCU); + SLAB_HWCACHE_ALIGN); if (!global.slab_dependencies) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index ccee506c9a26..a045be784c67 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -33,7 +33,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, struct i915_sched_node *signal, unsigned long flags); -void i915_sched_node_fini(struct i915_sched_node *node); +void i915_sched_node_retire(struct i915_sched_node *node); void i915_request_set_priority(struct i915_request *request, int prio); diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 343ed44d5ed4..623bf41fcf35 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -60,6 +60,7 @@ struct i915_sched_attr { * others. */ struct i915_sched_node { + spinlock_t lock; /* protect the lists */ struct list_head signalers_list; /* those before us, we depend upon */ struct list_head waiters_list; /* those after us, they depend upon us */ struct list_head link; @@ -75,6 +76,7 @@ struct i915_dependency { struct list_head signal_link; struct list_head wait_link; struct list_head dfs_link; + struct rcu_head rcu; unsigned long flags; #define I915_DEPENDENCY_ALLOC BIT(0) #define I915_DEPENDENCY_EXTERNAL BIT(1) From patchwork Mon Dec 28 15:51:55 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991421 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id D0461C43331 for ; Mon, 28 Dec 2020 15:53:20 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 8A0B2206E5 for ; Mon, 28 Dec 2020 15:53:20 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 8A0B2206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id F2A9D89AC2; Mon, 28 Dec 2020 15:52:59 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 9397C899C7 for ; Mon, 28 Dec 2020 15:52:53 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448198-1500050 for multiple; Mon, 28 Dec 2020 15:52:35 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:55 +0000 Message-Id: <20201228155229.9516-20-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 20/54] drm/i915: Restructure priority inheritance X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" In anticipation of wanting to be able to call pi from underneath an engine's active.lock, rework the priority inheritance to primarily work along an engine's priority queue, delegating any other engine that the chain may traverse to a worker. This reduces the global spinlock from governing the multi-entire priority inheritance depth-first search, to a smaller lock on each engine around a single list on that engine. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 3 + drivers/gpu/drm/i915/i915_scheduler.c | 340 ++++++++++++------- drivers/gpu/drm/i915/i915_scheduler.h | 2 + drivers/gpu/drm/i915/i915_scheduler_types.h | 19 +- 5 files changed, 230 insertions(+), 136 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 2f630b47783a..72a58604d4c4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -594,6 +594,8 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine) execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; + + i915_sched_init_ipi(&execlists->ipi); } static void cleanup_status_page(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 08ed958292af..d19710191690 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -21,6 +21,7 @@ #include "i915_gem.h" #include "i915_pmu.h" #include "i915_priolist_types.h" +#include "i915_scheduler_types.h" #include "i915_selftest.h" #include "intel_breadcrumbs_types.h" #include "intel_sseu.h" @@ -258,6 +259,8 @@ struct intel_engine_execlists { struct rb_root_cached queue; struct rb_root_cached virtual; + struct i915_sched_ipi ipi; + /** * @csb_write: control register for Context Switch buffer * diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 8391841d927d..d3d60fb8caf5 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -17,8 +17,6 @@ static struct i915_global_scheduler { struct kmem_cache *slab_priorities; } global; -static DEFINE_SPINLOCK(schedule_lock); - static struct i915_sched_node *node_get(struct i915_sched_node *node) { i915_request_get(container_of(node, struct i915_request, sched)); @@ -30,15 +28,114 @@ static void node_put(struct i915_sched_node *node) i915_request_put(container_of(node, struct i915_request, sched)); } -static const struct i915_request * -node_to_request(const struct i915_sched_node *node) +static inline int rq_prio(const struct i915_request *rq) { - return container_of(node, const struct i915_request, sched); + return READ_ONCE(rq->sched.attr.priority); } -static inline bool node_started(const struct i915_sched_node *node) +static int ipi_get_prio(struct i915_request *rq) { - return i915_request_started(node_to_request(node)); + if (READ_ONCE(rq->sched.ipi_priority) == I915_PRIORITY_INVALID) + return I915_PRIORITY_INVALID; + + return xchg(&rq->sched.ipi_priority, I915_PRIORITY_INVALID); +} + +static void ipi_schedule(struct work_struct *wrk) +{ + struct i915_sched_ipi *ipi = container_of(wrk, typeof(*ipi), work); + struct i915_request *rq = xchg(&ipi->list, NULL); + + do { + struct i915_request *rn = xchg(&rq->sched.ipi_link, NULL); + int prio; + + prio = ipi_get_prio(rq); + + /* + * For cross-engine scheduling to work we rely on one of two + * things: + * + * a) The requests are using dma-fence fences and so will not + * be scheduled until the previous engine is completed, and + * so we cannot cross back onto the original engine and end up + * queuing an earlier request after the first (due to the + * interrupted DFS). + * + * b) The requests are using semaphores and so may be already + * be in flight, in which case if we cross back onto the same + * engine, we will already have put the interrupted DFS into + * the priolist, and the continuation will now be queued + * afterwards [out-of-order]. However, since we are using + * semaphores in this case, we also perform yield on semaphore + * waits and so will reorder the requests back into the correct + * sequence. This occurrence (of promoting a request chain + * that crosses the engines using semaphores back unto itself) + * should be unlikely enough that it probably does not matter... + */ + local_bh_disable(); + i915_request_set_priority(rq, prio); + local_bh_enable(); + + i915_request_put(rq); + rq = ptr_mask_bits(rn, 1); + } while (rq); +} + +void i915_sched_init_ipi(struct i915_sched_ipi *ipi) +{ + INIT_WORK(&ipi->work, ipi_schedule); + ipi->list = NULL; +} + +static void __ipi_add(struct i915_request *rq) +{ +#define STUB ((struct i915_request *)1) + struct intel_engine_cs *engine = READ_ONCE(rq->engine); + struct i915_request *first; + + if (!i915_request_get_rcu(rq)) + return; + + if (__i915_request_is_complete(rq) || + cmpxchg(&rq->sched.ipi_link, NULL, STUB)) { /* already queued */ + i915_request_put(rq); + return; + } + + first = READ_ONCE(engine->execlists.ipi.list); + do + rq->sched.ipi_link = ptr_pack_bits(first, 1, 1); + while (!try_cmpxchg(&engine->execlists.ipi.list, &first, rq)); + + if (!first) + queue_work(system_unbound_wq, &engine->execlists.ipi.work); +} + +/* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ +#define lock_engine_irqsave(rq, flags) ({ \ + struct i915_request * const rq__ = (rq); \ + struct intel_engine_cs *engine__ = READ_ONCE(rq__->engine); \ +\ + spin_lock_irqsave(&engine__->active.lock, (flags)); \ + while (engine__ != READ_ONCE((rq__)->engine)) { \ + spin_unlock(&engine__->active.lock); \ + engine__ = READ_ONCE(rq__->engine); \ + spin_lock(&engine__->active.lock); \ + } \ +\ + engine__; \ +}) + +static const struct i915_request * +node_to_request(const struct i915_sched_node *node) +{ + return container_of(node, const struct i915_request, sched); } static inline bool node_signaled(const struct i915_sched_node *node) @@ -137,42 +234,6 @@ void __i915_priolist_free(struct i915_priolist *p) kmem_cache_free(global.slab_priorities, p); } -struct sched_cache { - struct list_head *priolist; -}; - -static struct intel_engine_cs * -sched_lock_engine(const struct i915_sched_node *node, - struct intel_engine_cs *locked, - struct sched_cache *cache) -{ - const struct i915_request *rq = node_to_request(node); - struct intel_engine_cs *engine; - - GEM_BUG_ON(!locked); - - /* - * Virtual engines complicate acquiring the engine timeline lock, - * as their rq->engine pointer is not stable until under that - * engine lock. The simple ploy we use is to take the lock then - * check that the rq still belongs to the newly locked engine. - */ - while (locked != (engine = READ_ONCE(rq->engine))) { - spin_unlock(&locked->active.lock); - memset(cache, 0, sizeof(*cache)); - spin_lock(&engine->active.lock); - locked = engine; - } - - GEM_BUG_ON(locked != engine); - return locked; -} - -static inline int rq_prio(const struct i915_request *rq) -{ - return rq->sched.attr.priority; -} - static inline bool need_preempt(int prio, int active) { /* @@ -198,19 +259,17 @@ static void kick_submission(struct intel_engine_cs *engine, if (prio <= engine->execlists.queue_priority_hint) return; - rcu_read_lock(); - /* Nothing currently active? We're overdue for a submission! */ inflight = execlists_active(&engine->execlists); if (!inflight) - goto unlock; + return; /* * If we are already the currently executing context, don't * bother evaluating if we should preempt ourselves. */ if (inflight->context == rq->context) - goto unlock; + return; ENGINE_TRACE(engine, "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", @@ -222,30 +281,28 @@ static void kick_submission(struct intel_engine_cs *engine, engine->execlists.queue_priority_hint = prio; if (need_preempt(prio, rq_prio(inflight))) tasklet_hi_schedule(&engine->execlists.tasklet); - -unlock: - rcu_read_unlock(); } -static void __i915_schedule(struct i915_sched_node *node, int prio) +static void ipi_priority(struct i915_request *rq, int prio) { - struct intel_engine_cs *engine; - struct i915_dependency *dep, *p; - struct i915_dependency stack; - struct sched_cache cache; - LIST_HEAD(dfs); + int old = READ_ONCE(rq->sched.ipi_priority); - /* Needed in order to use the temporary link inside i915_dependency */ - lockdep_assert_held(&schedule_lock); - GEM_BUG_ON(prio == I915_PRIORITY_INVALID); + do { + if (prio <= old) + return; + } while (!try_cmpxchg(&rq->sched.ipi_priority, &old, prio)); - if (node_signaled(node)) - return; + __ipi_add(rq); +} - prio = max(prio, node->attr.priority); +static void __i915_request_set_priority(struct i915_request *rq, int prio) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_request *rn; + struct list_head *plist; + LIST_HEAD(dfs); - stack.signaler = node; - list_add(&stack.dfs_link, &dfs); + list_add(&rq->sched.dfs, &dfs); /* * Recursively bump all dependent priorities to match the new request. @@ -265,66 +322,41 @@ static void __i915_schedule(struct i915_sched_node *node, int prio) * end result is a topological list of requests in reverse order, the * last element in the list is the request we must execute first. */ - list_for_each_entry(dep, &dfs, dfs_link) { - struct i915_sched_node *node = dep->signaler; + list_for_each_entry(rq, &dfs, sched.dfs) { + struct i915_dependency *p; - /* If we are already flying, we know we have no signalers */ - if (node_started(node)) - continue; + /* Also release any children on this engine that are ready */ + GEM_BUG_ON(rq->engine != engine); - /* - * Within an engine, there can be no cycle, but we may - * refer to the same dependency chain multiple times - * (redundant dependencies are not eliminated) and across - * engines. - */ - list_for_each_entry(p, &node->signalers_list, signal_link) { - GEM_BUG_ON(p == dep); /* no cycles! */ + for_each_signaler(p, rq) { + struct i915_request *s = + container_of(p->signaler, typeof(*s), sched); - if (node_signaled(p->signaler)) - continue; + GEM_BUG_ON(s == rq); - if (prio > READ_ONCE(p->signaler->attr.priority)) - list_move_tail(&p->dfs_link, &dfs); - } - } + if (rq_prio(s) >= prio) + continue; - /* - * If we didn't need to bump any existing priorities, and we haven't - * yet submitted this request (i.e. there is no potential race with - * execlists_submit_request()), we can set our own priority and skip - * acquiring the engine locks. - */ - if (node->attr.priority == I915_PRIORITY_INVALID) { - GEM_BUG_ON(!list_empty(&node->link)); - node->attr.priority = prio; + if (__i915_request_is_complete(s)) + continue; - if (stack.dfs_link.next == stack.dfs_link.prev) - return; + if (s->engine != rq->engine) { + ipi_priority(s, prio); + continue; + } - __list_del_entry(&stack.dfs_link); + list_move_tail(&s->sched.dfs, &dfs); + } } - memset(&cache, 0, sizeof(cache)); - engine = node_to_request(node)->engine; - spin_lock(&engine->active.lock); + plist = i915_sched_lookup_priolist(engine, prio); - /* Fifo and depth-first replacement ensure our deps execute before us */ - engine = sched_lock_engine(node, engine, &cache); - list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { - INIT_LIST_HEAD(&dep->dfs_link); + /* Fifo and depth-first replacement ensure our deps execute first */ + list_for_each_entry_safe_reverse(rq, rn, &dfs, sched.dfs) { + GEM_BUG_ON(rq->engine != engine); - node = dep->signaler; - engine = sched_lock_engine(node, engine, &cache); - lockdep_assert_held(&engine->active.lock); - - /* Recheck after acquiring the engine->timeline.lock */ - if (prio <= node->attr.priority || node_signaled(node)) - continue; - - GEM_BUG_ON(node_to_request(node)->engine != engine); - - WRITE_ONCE(node->attr.priority, prio); + INIT_LIST_HEAD(&rq->sched.dfs); + WRITE_ONCE(rq->sched.attr.priority, prio); /* * Once the request is ready, it will be placed into the @@ -334,32 +366,73 @@ static void __i915_schedule(struct i915_sched_node *node, int prio) * any preemption required, be dealt with upon submission. * See engine->submit_request() */ - if (list_empty(&node->link)) + if (!i915_request_is_ready(rq)) continue; - if (i915_request_in_priority_queue(node_to_request(node))) { - if (!cache.priolist) - cache.priolist = - i915_sched_lookup_priolist(engine, - prio); - list_move_tail(&node->link, cache.priolist); - } + if (i915_request_in_priority_queue(rq)) + list_move_tail(&rq->sched.link, plist); - /* Defer (tasklet) submission until after all of our updates. */ - kick_submission(engine, node_to_request(node), prio); + /* Defer (tasklet) submission until after all updates. */ + kick_submission(engine, rq, prio); } - - spin_unlock(&engine->active.lock); } void i915_request_set_priority(struct i915_request *rq, int prio) { - if (!intel_engine_has_scheduler(rq->engine)) + struct intel_engine_cs *engine; + unsigned long flags; + + if (prio <= rq_prio(rq)) return; - spin_lock_irq(&schedule_lock); - __i915_schedule(&rq->sched, prio); - spin_unlock_irq(&schedule_lock); + /* + * If we are setting the priority before being submitted, see if we + * can quickly adjust our own priority in-situ and avoid taking + * the contended engine->active.lock. If we need priority inheritance, + * take the slow route. + */ + if (rq_prio(rq) == I915_PRIORITY_INVALID) { + struct i915_dependency *p; + + rcu_read_lock(); + for_each_signaler(p, rq) { + struct i915_request *s = + container_of(p->signaler, typeof(*s), sched); + + if (rq_prio(s) >= prio) + continue; + + if (__i915_request_is_complete(s)) + continue; + + break; + } + rcu_read_unlock(); + + if (&p->signal_link == &rq->sched.signalers_list && + cmpxchg(&rq->sched.attr.priority, + I915_PRIORITY_INVALID, + prio) == I915_PRIORITY_INVALID) + return; + } + + engine = lock_engine_irqsave(rq, flags); + if (!intel_engine_has_scheduler(engine)) + goto unlock; + + if (prio <= rq_prio(rq)) + goto unlock; + + if (__i915_request_is_complete(rq)) + goto unlock; + + rcu_read_lock(); + __i915_request_set_priority(rq, prio); + rcu_read_unlock(); + GEM_BUG_ON(rq_prio(rq) != prio); + +unlock: + spin_unlock_irqrestore(&engine->active.lock, flags); } void i915_sched_node_init(struct i915_sched_node *node) @@ -369,6 +442,9 @@ void i915_sched_node_init(struct i915_sched_node *node) INIT_LIST_HEAD(&node->signalers_list); INIT_LIST_HEAD(&node->waiters_list); INIT_LIST_HEAD(&node->link); + INIT_LIST_HEAD(&node->dfs); + + node->ipi_link = NULL; i915_sched_node_reinit(node); } @@ -379,6 +455,9 @@ void i915_sched_node_reinit(struct i915_sched_node *node) node->semaphores = 0; node->flags = 0; + GEM_BUG_ON(node->ipi_link); + node->ipi_priority = I915_PRIORITY_INVALID; + GEM_BUG_ON(!list_empty(&node->signalers_list)); GEM_BUG_ON(!list_empty(&node->waiters_list)); GEM_BUG_ON(!list_empty(&node->link)); @@ -414,7 +493,6 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, spin_lock(&signal->lock); if (!node_signaled(signal)) { - INIT_LIST_HEAD(&dep->dfs_link); dep->signaler = signal; dep->waiter = node_get(node); dep->flags = flags; @@ -504,7 +582,7 @@ void i915_request_show_with_schedule(struct drm_printer *m, if (signaler->timeline == rq->timeline) continue; - if (i915_request_completed(signaler)) + if (__i915_request_is_complete(signaler)) continue; i915_request_show(m, signaler, prefix, indent + 2); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index a045be784c67..5be7f90e7896 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -35,6 +35,8 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, void i915_sched_node_retire(struct i915_sched_node *node); +void i915_sched_init_ipi(struct i915_sched_ipi *ipi); + void i915_request_set_priority(struct i915_request *request, int prio); struct list_head * diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 623bf41fcf35..5a84d59134ee 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -8,8 +8,8 @@ #define _I915_SCHEDULER_TYPES_H_ #include +#include -#include "gt/intel_engine_types.h" #include "i915_priolist_types.h" struct drm_i915_private; @@ -61,13 +61,23 @@ struct i915_sched_attr { */ struct i915_sched_node { spinlock_t lock; /* protect the lists */ + struct list_head signalers_list; /* those before us, we depend upon */ struct list_head waiters_list; /* those after us, they depend upon us */ - struct list_head link; + struct list_head link; /* guarded by engine->active.lock */ + struct list_head dfs; /* guarded by engine->active.lock */ struct i915_sched_attr attr; - unsigned int flags; + unsigned long flags; #define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(0) - intel_engine_mask_t semaphores; + unsigned long semaphores; + + struct i915_request *ipi_link; + int ipi_priority; +}; + +struct i915_sched_ipi { + struct i915_request *list; + struct work_struct work; }; struct i915_dependency { @@ -75,7 +85,6 @@ struct i915_dependency { struct i915_sched_node *waiter; struct list_head signal_link; struct list_head wait_link; - struct list_head dfs_link; struct rcu_head rcu; unsigned long flags; #define I915_DEPENDENCY_ALLOC BIT(0) From patchwork Mon Dec 28 15:51:56 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991435 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 82D92C4361B for ; Mon, 28 Dec 2020 15:53:32 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 518C220829 for ; Mon, 28 Dec 2020 15:53:32 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 518C220829 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 2088589ACD; Mon, 28 Dec 2020 15:53:00 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id C983189A0E for ; Mon, 28 Dec 2020 15:52:50 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448199-1500050 for multiple; Mon, 28 Dec 2020 15:52:36 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:56 +0000 Message-Id: <20201228155229.9516-21-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 21/54] drm/i915/selftests: Measure set-priority duration X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" As a topological sort, we expect it to run in linear graph time, O(V+E). In removing the recursion, it is no longer a DFS but rather a BFS, and performs as O(VE). Let's demonstrate how bad this is with a few examples, and build a few test cases to verify a potential fix. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_scheduler.c | 4 + .../drm/i915/selftests/i915_live_selftests.h | 1 + .../drm/i915/selftests/i915_perf_selftests.h | 1 + .../gpu/drm/i915/selftests/i915_scheduler.c | 679 ++++++++++++++++++ 4 files changed, 685 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_scheduler.c diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index d3d60fb8caf5..2cc25923dde7 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -590,6 +590,10 @@ void i915_request_show_with_schedule(struct drm_printer *m, rcu_read_unlock(); } +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_scheduler.c" +#endif + static void i915_global_scheduler_shrink(void) { kmem_cache_shrink(global.slab_dependencies); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index a92c0e9b7e6b..2200a5baa68e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -26,6 +26,7 @@ selftest(gt_mocs, intel_mocs_live_selftests) selftest(gt_pm, intel_gt_pm_live_selftests) selftest(gt_heartbeat, intel_heartbeat_live_selftests) selftest(requests, i915_request_live_selftests) +selftest(scheduler, i915_scheduler_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(mman, i915_gem_mman_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h index c2389f8a257d..137e35283fee 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h @@ -17,5 +17,6 @@ */ selftest(engine_cs, intel_engine_cs_perf_selftests) selftest(request, i915_request_perf_selftests) +selftest(scheduler, i915_scheduler_perf_selftests) selftest(blt, i915_gem_object_blt_perf_selftests) selftest(region, intel_memory_region_perf_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c new file mode 100644 index 000000000000..cb67de304aeb --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c @@ -0,0 +1,679 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "i915_selftest.h" + +#include "gt/intel_context.h" +#include "gt/intel_gpu_commands.h" +#include "gt/selftest_engine_heartbeat.h" +#include "selftests/igt_spinner.h" +#include "selftests/i915_random.h" + +static void scheduling_disable(struct intel_engine_cs *engine) +{ + engine->props.preempt_timeout_ms = 0; + engine->props.timeslice_duration_ms = 0; + + st_engine_heartbeat_disable(engine); +} + +static void scheduling_enable(struct intel_engine_cs *engine) +{ + st_engine_heartbeat_enable(engine); + + engine->props.preempt_timeout_ms = + engine->defaults.preempt_timeout_ms; + engine->props.timeslice_duration_ms = + engine->defaults.timeslice_duration_ms; +} + +static int first_engine(struct drm_i915_private *i915, + int (*chain)(struct intel_engine_cs *engine, + unsigned long param, + bool (*fn)(struct i915_request *rq, + unsigned long v, + unsigned long e)), + unsigned long param, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + struct intel_engine_cs *engine; + + for_each_uabi_engine(engine, i915) { + if (!intel_engine_has_scheduler(engine)) + continue; + + return chain(engine, param, fn); + } + + return 0; +} + +static int all_engines(struct drm_i915_private *i915, + int (*chain)(struct intel_engine_cs *engine, + unsigned long param, + bool (*fn)(struct i915_request *rq, + unsigned long v, + unsigned long e)), + unsigned long param, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + struct intel_engine_cs *engine; + int err; + + for_each_uabi_engine(engine, i915) { + if (!intel_engine_has_scheduler(engine)) + continue; + + err = chain(engine, param, fn); + if (err) + return err; + } + + return 0; +} + +static bool check_context_order(struct intel_engine_cs *engine) +{ + u64 last_seqno, last_context; + unsigned long count; + bool result = false; + struct rb_node *rb; + int last_prio; + + /* We expect the execution order to follow ascending fence-context */ + spin_lock_irq(&engine->active.lock); + + count = 0; + last_context = 0; + last_seqno = 0; + last_prio = 0; + for (rb = rb_first_cached(&engine->execlists.queue); rb; rb = rb_next(rb)) { + struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + struct i915_request *rq; + + priolist_for_each_request(rq, p) { + if (rq->fence.context < last_context || + (rq->fence.context == last_context && + rq->fence.seqno < last_seqno)) { + pr_err("[%lu] %llx:%lld [prio:%d] after %llx:%lld [prio:%d]\n", + count, + rq->fence.context, + rq->fence.seqno, + rq_prio(rq), + last_context, + last_seqno, + last_prio); + goto out_unlock; + } + + last_context = rq->fence.context; + last_seqno = rq->fence.seqno; + last_prio = rq_prio(rq); + count++; + } + } + result = true; +out_unlock: + spin_unlock_irq(&engine->active.lock); + + return result; +} + +static int __single_chain(struct intel_engine_cs *engine, unsigned long length, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + struct intel_context *ce; + struct igt_spinner spin; + struct i915_request *rq; + unsigned long count; + unsigned long min; + int err = 0; + + if (!intel_engine_can_store_dword(engine)) + return 0; + + scheduling_disable(engine); + + if (igt_spinner_init(&spin, engine->gt)) { + err = -ENOMEM; + goto err_heartbeat; + } + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err_spin; + } + ce->ring = __intel_context_ring_size(SZ_512K); + + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_context; + } + i915_request_add(rq); + min = ce->ring->size - ce->ring->space; + + count = 1; + while (count < length && ce->ring->space > min) { + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + i915_request_add(rq); + count++; + } + intel_engine_flush_submission(engine); + + tasklet_disable(&engine->execlists.tasklet); + local_bh_disable(); + if (fn(rq, count, count - 1) && !check_context_order(engine)) + err = -EINVAL; + local_bh_enable(); + tasklet_enable(&engine->execlists.tasklet); + + igt_spinner_end(&spin); +err_context: + intel_context_put(ce); +err_spin: + igt_spinner_fini(&spin); +err_heartbeat: + scheduling_enable(engine); + return err; +} + +static int __wide_chain(struct intel_engine_cs *engine, unsigned long width, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + struct intel_context **ce; + struct i915_request **rq; + struct igt_spinner spin; + unsigned long count; + unsigned long i, j; + int err = 0; + + if (!intel_engine_can_store_dword(engine)) + return 0; + + scheduling_disable(engine); + + if (igt_spinner_init(&spin, engine->gt)) { + err = -ENOMEM; + goto err_heartbeat; + } + + ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL); + if (!ce) { + err = -ENOMEM; + goto err_spin; + } + + for (i = 0; i < width; i++) { + ce[i] = intel_context_create(engine); + if (IS_ERR(ce[i])) { + err = PTR_ERR(ce[i]); + width = i; + goto err_context; + } + } + + rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL); + if (!rq) { + err = -ENOMEM; + goto err_context; + } + + rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + goto err_free; + } + i915_request_add(rq[0]); + + count = 0; + for (i = 1; i < width; i++) { + GEM_BUG_ON(i915_request_completed(rq[0])); + + rq[i] = intel_context_create_request(ce[i]); + if (IS_ERR(rq[i])) { + err = PTR_ERR(rq[i]); + break; + } + for (j = 0; j < i; j++) { + err = i915_request_await_dma_fence(rq[i], + &rq[j]->fence); + if (err) + break; + count++; + } + i915_request_add(rq[i]); + } + intel_engine_flush_submission(engine); + + tasklet_disable(&engine->execlists.tasklet); + local_bh_disable(); + if (fn(rq[i - 1], i, count) && !check_context_order(engine)) + err = -EINVAL; + local_bh_enable(); + tasklet_enable(&engine->execlists.tasklet); + + igt_spinner_end(&spin); +err_free: + kfree(rq); +err_context: + for (i = 0; i < width; i++) + intel_context_put(ce[i]); + kfree(ce); +err_spin: + igt_spinner_fini(&spin); +err_heartbeat: + scheduling_enable(engine); + return err; +} + +static int __inv_chain(struct intel_engine_cs *engine, unsigned long width, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + struct intel_context **ce; + struct i915_request **rq; + struct igt_spinner spin; + unsigned long count; + unsigned long i, j; + int err = 0; + + if (!intel_engine_can_store_dword(engine)) + return 0; + + scheduling_disable(engine); + + if (igt_spinner_init(&spin, engine->gt)) { + err = -ENOMEM; + goto err_heartbeat; + } + + ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL); + if (!ce) { + err = -ENOMEM; + goto err_spin; + } + + for (i = 0; i < width; i++) { + ce[i] = intel_context_create(engine); + if (IS_ERR(ce[i])) { + err = PTR_ERR(ce[i]); + width = i; + goto err_context; + } + } + + rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL); + if (!rq) { + err = -ENOMEM; + goto err_context; + } + + rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + goto err_free; + } + i915_request_add(rq[0]); + + count = 0; + for (i = 1; i < width; i++) { + GEM_BUG_ON(i915_request_completed(rq[0])); + + rq[i] = intel_context_create_request(ce[i]); + if (IS_ERR(rq[i])) { + err = PTR_ERR(rq[i]); + break; + } + for (j = i; j > 0; j--) { + err = i915_request_await_dma_fence(rq[i], + &rq[j - 1]->fence); + if (err) + break; + count++; + } + i915_request_add(rq[i]); + } + intel_engine_flush_submission(engine); + + tasklet_disable(&engine->execlists.tasklet); + local_bh_disable(); + if (fn(rq[i - 1], i, count) && !check_context_order(engine)) + err = -EINVAL; + local_bh_enable(); + tasklet_enable(&engine->execlists.tasklet); + + igt_spinner_end(&spin); +err_free: + kfree(rq); +err_context: + for (i = 0; i < width; i++) + intel_context_put(ce[i]); + kfree(ce); +err_spin: + igt_spinner_fini(&spin); +err_heartbeat: + scheduling_enable(engine); + return err; +} + +static int __sparse_chain(struct intel_engine_cs *engine, unsigned long width, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + struct intel_context **ce; + struct i915_request **rq; + struct igt_spinner spin; + I915_RND_STATE(prng); + unsigned long count; + unsigned long i, j; + int err = 0; + + if (!intel_engine_can_store_dword(engine)) + return 0; + + scheduling_disable(engine); + + if (igt_spinner_init(&spin, engine->gt)) { + err = -ENOMEM; + goto err_heartbeat; + } + + ce = kmalloc_array(width, sizeof(*ce), GFP_KERNEL); + if (!ce) { + err = -ENOMEM; + goto err_spin; + } + + for (i = 0; i < width; i++) { + ce[i] = intel_context_create(engine); + if (IS_ERR(ce[i])) { + err = PTR_ERR(ce[i]); + width = i; + goto err_context; + } + } + + rq = kmalloc_array(width, sizeof(*rq), GFP_KERNEL); + if (!rq) { + err = -ENOMEM; + goto err_context; + } + + rq[0] = igt_spinner_create_request(&spin, ce[0], MI_NOOP); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + goto err_free; + } + i915_request_add(rq[0]); + + count = 0; + for (i = 1; i < width; i++) { + GEM_BUG_ON(i915_request_completed(rq[0])); + + rq[i] = intel_context_create_request(ce[i]); + if (IS_ERR(rq[i])) { + err = PTR_ERR(rq[i]); + break; + } + + if (err == 0 && i > 1) { + j = i915_prandom_u32_max_state(i - 1, &prng); + err = i915_request_await_dma_fence(rq[i], + &rq[j]->fence); + count++; + } + + if (err == 0) { + err = i915_request_await_dma_fence(rq[i], + &rq[i - 1]->fence); + count++; + } + + if (err == 0 && i > 2) { + j = i915_prandom_u32_max_state(i - 2, &prng); + err = i915_request_await_dma_fence(rq[i], + &rq[j]->fence); + count++; + } + + i915_request_add(rq[i]); + if (err) + break; + } + intel_engine_flush_submission(engine); + + tasklet_disable(&engine->execlists.tasklet); + local_bh_disable(); + if (fn(rq[i - 1], i, count) && !check_context_order(engine)) + err = -EINVAL; + local_bh_enable(); + tasklet_enable(&engine->execlists.tasklet); + + igt_spinner_end(&spin); +err_free: + kfree(rq); +err_context: + for (i = 0; i < width; i++) + intel_context_put(ce[i]); + kfree(ce); +err_spin: + igt_spinner_fini(&spin); +err_heartbeat: + scheduling_enable(engine); + return err; +} + +static int igt_schedule_chains(struct drm_i915_private *i915, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + static int (* const chains[])(struct intel_engine_cs *engine, + unsigned long length, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) = { + __single_chain, + __wide_chain, + __inv_chain, + __sparse_chain, + }; + int n, err; + + for (n = 0; n < ARRAY_SIZE(chains); n++) { + err = all_engines(i915, chains[n], 17, fn); + if (err) + return err; + } + + return 0; +} + +static bool igt_priority(struct i915_request *rq, + unsigned long v, unsigned long e) +{ + i915_request_set_priority(rq, I915_PRIORITY_BARRIER); + GEM_BUG_ON(rq_prio(rq) != I915_PRIORITY_BARRIER); + return true; +} + +static int igt_priority_chains(void *arg) +{ + return igt_schedule_chains(arg, igt_priority); +} + +int i915_scheduler_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_priority_chains), + }; + + return i915_subtests(tests, i915); +} + +static int chains(struct drm_i915_private *i915, + int (*chain)(struct drm_i915_private *i915, + unsigned long length, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)), + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + unsigned long x[] = { 1, 4, 16, 64, 128, 256, 512, 1024, 4096 }; + int i, err; + + for (i = 0; i < ARRAY_SIZE(x); i++) { + IGT_TIMEOUT(end_time); + + err = chain(i915, x[i], fn); + if (err) + return err; + + if (__igt_timeout(end_time, NULL)) + break; + } + + return 0; +} + +static int single_chain(struct drm_i915_private *i915, + unsigned long length, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + return first_engine(i915, __single_chain, length, fn); +} + +static int single(struct drm_i915_private *i915, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + return chains(i915, single_chain, fn); +} + +static int wide_chain(struct drm_i915_private *i915, + unsigned long width, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + return first_engine(i915, __wide_chain, width, fn); +} + +static int wide(struct drm_i915_private *i915, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + return chains(i915, wide_chain, fn); +} + +static int inv_chain(struct drm_i915_private *i915, + unsigned long width, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + return first_engine(i915, __inv_chain, width, fn); +} + +static int inv(struct drm_i915_private *i915, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + return chains(i915, inv_chain, fn); +} + +static int sparse_chain(struct drm_i915_private *i915, + unsigned long width, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + return first_engine(i915, __sparse_chain, width, fn); +} + +static int sparse(struct drm_i915_private *i915, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + return chains(i915, sparse_chain, fn); +} + +static void report(const char *what, unsigned long v, unsigned long e, u64 dt) +{ + pr_info("(%4lu, %7lu), %s:%10lluns\n", v, e, what, dt); +} + +static u64 __set_priority(struct i915_request *rq, int prio) +{ + u64 dt; + + preempt_disable(); + dt = ktime_get_raw_fast_ns(); + i915_request_set_priority(rq, prio); + dt = ktime_get_raw_fast_ns() - dt; + preempt_enable(); + + return dt; +} + +static bool set_priority(struct i915_request *rq, + unsigned long v, unsigned long e) +{ + report("set-priority", v, e, __set_priority(rq, I915_PRIORITY_BARRIER)); + return true; +} + +static int single_priority(void *arg) +{ + return single(arg, set_priority); +} + +static int wide_priority(void *arg) +{ + return wide(arg, set_priority); +} + +static int inv_priority(void *arg) +{ + return inv(arg, set_priority); +} + +static int sparse_priority(void *arg) +{ + return sparse(arg, set_priority); +} + +int i915_scheduler_perf_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(single_priority), + SUBTEST(wide_priority), + SUBTEST(inv_priority), + SUBTEST(sparse_priority), + }; + static const struct { + const char *name; + size_t sz; + } types[] = { +#define T(t) { #t, sizeof(struct t) } + T(i915_priolist), + T(i915_sched_attr), + T(i915_sched_node), +#undef T + {} + }; + typeof(*types) *t; + + for (t = types; t->name; t++) + pr_info("sizeof(%s): %zd\n", t->name, t->sz); + + return i915_subtests(tests, i915); +} From patchwork Mon Dec 28 15:51:57 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991359 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id C41FBC433E6 for ; Mon, 28 Dec 2020 15:53:08 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 9119C206E5 for ; Mon, 28 Dec 2020 15:53:08 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 9119C206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id C715E89A14; Mon, 28 Dec 2020 15:52:56 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id A443A899C7 for ; Mon, 28 Dec 2020 15:52:49 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448200-1500050 for multiple; Mon, 28 Dec 2020 15:52:36 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:57 +0000 Message-Id: <20201228155229.9516-22-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 22/54] drm/i915/selftests: Exercise priority inheritance around an engine loop X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Exercise rescheduling priority inheritance around a sequence of requests that wrap around all the engines. Signed-off-by: Chris Wilson --- .../gpu/drm/i915/selftests/i915_scheduler.c | 219 ++++++++++++++++++ 1 file changed, 219 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c index cb67de304aeb..e6910f4c429d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c +++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c @@ -7,6 +7,7 @@ #include "gt/intel_context.h" #include "gt/intel_gpu_commands.h" +#include "gt/intel_ring.h" #include "gt/selftest_engine_heartbeat.h" #include "selftests/igt_spinner.h" #include "selftests/i915_random.h" @@ -512,10 +513,228 @@ static int igt_priority_chains(void *arg) return igt_schedule_chains(arg, igt_priority); } +static struct i915_request * +__write_timestamp(struct intel_engine_cs *engine, + struct drm_i915_gem_object *obj, + int slot, + struct i915_request *prev) +{ + struct i915_request *rq = ERR_PTR(-EINVAL); + struct intel_context *ce; + struct i915_vma *vma; + int err = 0; + u32 *cs; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + vma = i915_vma_instance(obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_ce; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_ce; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unpin; + } + + i915_vma_lock(vma); + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); + if (err) + goto out_request; + + if (prev) { + err = i915_request_await_dma_fence(rq, &prev->fence); + if (err) + goto out_request; + } + + if (engine->emit_init_breadcrumb) { + err = engine->emit_init_breadcrumb(rq); + if (err) + goto out_request; + } + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out_request; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8; + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(engine->mmio_base)); + *cs++ = lower_32_bits(vma->node.start) + sizeof(u32) * slot; + *cs++ = upper_32_bits(vma->node.start); + intel_ring_advance(rq, cs); + + i915_request_get(rq); +out_request: + i915_request_add(rq); +out_unpin: + i915_vma_unpin(vma); +out_ce: + intel_context_put(ce); + i915_request_put(prev); + return err ? ERR_PTR(err) : rq; +} + +static struct i915_request *create_spinner(struct drm_i915_private *i915, + struct igt_spinner *spin) +{ + struct intel_engine_cs *engine; + + for_each_uabi_engine(engine, i915) { + struct intel_context *ce; + struct i915_request *rq; + + if (igt_spinner_init(spin, engine->gt)) + return ERR_PTR(-ENOMEM); + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + rq = igt_spinner_create_request(spin, ce, MI_NOOP); + intel_context_put(ce); + if (rq == ERR_PTR(-ENODEV)) + continue; + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + return rq; + } + + return ERR_PTR(-ENODEV); +} + +static int __igt_schedule_cycle(struct drm_i915_private *i915, + bool (*fn)(struct i915_request *rq, + unsigned long v, unsigned long e)) +{ + struct intel_engine_cs *engine; + struct drm_i915_gem_object *obj; + struct igt_spinner spin; + struct i915_request *rq; + unsigned long count, n; + u32 *time, last; + int err; + + /* + * Queue a bunch of ordered requests (each waiting on the previous) + * around the engines a couple of times. Each request will write + * the timestamp it executes at into the scratch, with the expectation + * that the timestamp will be in our desired execution order. + */ + + if (INTEL_GEN(i915) < 8) + return 0; + + obj = i915_gem_object_create_internal(i915, SZ_64K); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + time = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(time)) { + err = PTR_ERR(time); + goto out_obj; + } + + rq = create_spinner(i915, &spin); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_obj; + } + + err = 0; + count = 0; + for_each_uabi_engine(engine, i915) { + if (!intel_engine_has_scheduler(engine)) + continue; + + rq = __write_timestamp(engine, obj, count, rq); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + count++; + } + for_each_uabi_engine(engine, i915) { + if (!intel_engine_has_scheduler(engine)) + continue; + + rq = __write_timestamp(engine, obj, count, rq); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + count++; + } + GEM_BUG_ON(count * sizeof(u32) > obj->base.size); + if (err || !count) + goto out_spin; + + fn(rq, count + 1, count); + igt_spinner_end(&spin); + + if (i915_request_wait(rq, 0, HZ / 2) < 0) { + err = -ETIME; + goto out_request; + } + + last = time[0]; + for (n = 1; n < count; n++) { + if (i915_seqno_passed(last, time[n])) { + pr_err("Timestamp[%lu] %x before previous %x\n", + n, time[n], last); + err = -EINVAL; + break; + } + last = time[n]; + } + +out_request: + i915_request_put(rq); +out_spin: + igt_spinner_fini(&spin); +out_obj: + i915_gem_object_put(obj); + return 0; +} + +static bool noop(struct i915_request *rq, unsigned long v, unsigned long e) +{ + return true; +} + +static int igt_schedule_cycle(void *arg) +{ + return __igt_schedule_cycle(arg, noop); +} + +static int igt_priority_cycle(void *arg) +{ + return __igt_schedule_cycle(arg, igt_priority); +} + int i915_scheduler_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_priority_chains), + + SUBTEST(igt_schedule_cycle), + SUBTEST(igt_priority_cycle), }; return i915_subtests(tests, i915); From patchwork Mon Dec 28 15:51:58 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991343 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id EBEE5C433E9 for ; Mon, 28 Dec 2020 15:52:54 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 9509620791 for ; Mon, 28 Dec 2020 15:52:54 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 9509620791 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id F38BD89A08; Mon, 28 Dec 2020 15:52:49 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 90C0A89255 for ; Mon, 28 Dec 2020 15:52:48 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448201-1500050 for multiple; Mon, 28 Dec 2020 15:52:36 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:58 +0000 Message-Id: <20201228155229.9516-23-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 23/54] drm/i915: Improve DFS for priority inheritance X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" The core of the scheduling algorithm is that we compute the topological order of the fence DAG. Knowing that we have a DAG, we should be able to use a DFS to compute the topological sort in linear time. However, during the conversion of the recursive algorithm into an iterative one, the memorization of how far we had progressed down a branch was forgotten. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_scheduler.c | 58 ++++++++++++++++----------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 2cc25923dde7..71edc867e500 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -234,6 +234,26 @@ void __i915_priolist_free(struct i915_priolist *p) kmem_cache_free(global.slab_priorities, p); } +static struct i915_request * +stack_push(struct i915_request *rq, + struct i915_request *stack, + struct list_head *pos) +{ + stack->sched.dfs.prev = pos; + rq->sched.dfs.next = (struct list_head *)stack; + return rq; +} + +static struct i915_request * +stack_pop(struct i915_request *rq, + struct list_head **pos) +{ + rq = (struct i915_request *)rq->sched.dfs.next; + if (rq) + *pos = rq->sched.dfs.prev; + return rq; +} + static inline bool need_preempt(int prio, int active) { /* @@ -298,11 +318,10 @@ static void ipi_priority(struct i915_request *rq, int prio) static void __i915_request_set_priority(struct i915_request *rq, int prio) { struct intel_engine_cs *engine = rq->engine; - struct i915_request *rn; + struct list_head *pos = &rq->sched.signalers_list; struct list_head *plist; - LIST_HEAD(dfs); - list_add(&rq->sched.dfs, &dfs); + plist = i915_sched_lookup_priolist(engine, prio); /* * Recursively bump all dependent priorities to match the new request. @@ -322,40 +341,31 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio) * end result is a topological list of requests in reverse order, the * last element in the list is the request we must execute first. */ - list_for_each_entry(rq, &dfs, sched.dfs) { - struct i915_dependency *p; - - /* Also release any children on this engine that are ready */ - GEM_BUG_ON(rq->engine != engine); - - for_each_signaler(p, rq) { + rq->sched.dfs.next = NULL; + do { + list_for_each_continue(pos, &rq->sched.signalers_list) { + struct i915_dependency *p = + list_entry(pos, typeof(*p), signal_link); struct i915_request *s = container_of(p->signaler, typeof(*s), sched); - GEM_BUG_ON(s == rq); - if (rq_prio(s) >= prio) continue; if (__i915_request_is_complete(s)) continue; - if (s->engine != rq->engine) { + if (s->engine != engine) { ipi_priority(s, prio); continue; } - list_move_tail(&s->sched.dfs, &dfs); + /* Remember our position along this branch */ + rq = stack_push(s, rq, pos); + pos = &rq->sched.signalers_list; } - } - - plist = i915_sched_lookup_priolist(engine, prio); - - /* Fifo and depth-first replacement ensure our deps execute first */ - list_for_each_entry_safe_reverse(rq, rn, &dfs, sched.dfs) { - GEM_BUG_ON(rq->engine != engine); - INIT_LIST_HEAD(&rq->sched.dfs); + RQ_TRACE(rq, "set-priority:%d\n", prio); WRITE_ONCE(rq->sched.attr.priority, prio); /* @@ -369,12 +379,13 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio) if (!i915_request_is_ready(rq)) continue; + GEM_BUG_ON(rq->engine != engine); if (i915_request_in_priority_queue(rq)) list_move_tail(&rq->sched.link, plist); /* Defer (tasklet) submission until after all updates. */ kick_submission(engine, rq, prio); - } + } while ((rq = stack_pop(rq, &pos))); } void i915_request_set_priority(struct i915_request *rq, int prio) @@ -442,7 +453,6 @@ void i915_sched_node_init(struct i915_sched_node *node) INIT_LIST_HEAD(&node->signalers_list); INIT_LIST_HEAD(&node->waiters_list); INIT_LIST_HEAD(&node->link); - INIT_LIST_HEAD(&node->dfs); node->ipi_link = NULL; From patchwork Mon Dec 28 15:51:59 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991393 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 80173C4332B for ; Mon, 28 Dec 2020 15:53:21 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 35134206E5 for ; Mon, 28 Dec 2020 15:53:21 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 35134206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id E746589B55; Mon, 28 Dec 2020 15:53:01 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 7B20E899F2 for ; Mon, 28 Dec 2020 15:52:52 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448202-1500050 for multiple; Mon, 28 Dec 2020 15:52:36 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:51:59 +0000 Message-Id: <20201228155229.9516-24-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 24/54] drm/i915: Extract request submission from execlists X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" In the process of preparing to reuse the request submission logic for other backends, lift it out of the execlists backend. It already operates on the common structs, so just a matter of moving and renaming. Signed-off-by: Chris Wilson --- .../drm/i915/gt/intel_execlists_submission.c | 55 +------------ drivers/gpu/drm/i915/i915_scheduler.c | 82 +++++++++++++++++++ drivers/gpu/drm/i915/i915_scheduler.h | 2 + 3 files changed, 85 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 9eaa3dc47af3..198e5a9eb6fd 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2366,59 +2366,6 @@ static void execlists_preempt(struct timer_list *timer) execlists_kick(timer, preempt); } -static void queue_request(struct intel_engine_cs *engine, - struct i915_request *rq) -{ - GEM_BUG_ON(!list_empty(&rq->sched.link)); - list_add_tail(&rq->sched.link, - i915_sched_lookup_priolist(engine, rq_prio(rq))); - set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); -} - -static bool submit_queue(struct intel_engine_cs *engine, - const struct i915_request *rq) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - - if (rq_prio(rq) <= execlists->queue_priority_hint) - return false; - - execlists->queue_priority_hint = rq_prio(rq); - return true; -} - -static bool ancestor_on_hold(const struct intel_engine_cs *engine, - const struct i915_request *rq) -{ - GEM_BUG_ON(i915_request_on_hold(rq)); - return !list_empty(&engine->active.hold) && hold_request(rq); -} - -static void execlists_submit_request(struct i915_request *request) -{ - struct intel_engine_cs *engine = request->engine; - unsigned long flags; - - /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); - - if (unlikely(ancestor_on_hold(engine, request))) { - RQ_TRACE(request, "ancestor on hold\n"); - list_add_tail(&request->sched.link, &engine->active.hold); - i915_request_set_hold(request); - } else { - queue_request(engine, request); - - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - GEM_BUG_ON(list_empty(&request->sched.link)); - - if (submit_queue(engine, request)) - __execlists_kick(&engine->execlists); - } - - spin_unlock_irqrestore(&engine->active.lock, flags); -} - static int execlists_context_pre_pin(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr) @@ -3047,7 +2994,7 @@ static void execlists_park(struct intel_engine_cs *engine) void intel_execlists_set_default_submission(struct intel_engine_cs *engine) { - engine->submit_request = execlists_submit_request; + engine->submit_request = i915_request_enqueue; engine->execlists.tasklet.func = execlists_submission_tasklet; engine->reset.prepare = execlists_reset_prepare; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 71edc867e500..5a32c82b1136 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -446,6 +446,88 @@ void i915_request_set_priority(struct i915_request *rq, int prio) spin_unlock_irqrestore(&engine->active.lock, flags); } +static void queue_request(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + GEM_BUG_ON(!list_empty(&rq->sched.link)); + list_add_tail(&rq->sched.link, + i915_sched_lookup_priolist(engine, rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); +} + +static bool submit_queue(struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + struct intel_engine_execlists *execlists = &engine->execlists; + + if (rq_prio(rq) <= execlists->queue_priority_hint) + return false; + + execlists->queue_priority_hint = rq_prio(rq); + return true; +} + +static bool hold_request(const struct i915_request *rq) +{ + struct i915_dependency *p; + bool result = false; + + /* + * If one of our ancestors is on hold, we must also be put on hold, + * otherwise we will bypass it and execute before it. + */ + rcu_read_lock(); + for_each_signaler(p, rq) { + const struct i915_request *s = + container_of(p->signaler, typeof(*s), sched); + + if (s->engine != rq->engine) + continue; + + result = i915_request_on_hold(s); + if (result) + break; + } + rcu_read_unlock(); + + return result; +} + +static bool ancestor_on_hold(const struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + GEM_BUG_ON(i915_request_on_hold(rq)); + return unlikely(!list_empty(&engine->active.hold)) && hold_request(rq); +} + +void i915_request_enqueue(struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + unsigned long flags; + bool kick = false; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->active.lock, flags); + GEM_BUG_ON(test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags)); + + if (unlikely(ancestor_on_hold(engine, rq))) { + RQ_TRACE(rq, "ancestor on hold\n"); + list_add_tail(&rq->sched.link, &engine->active.hold); + i915_request_set_hold(rq); + } else { + queue_request(engine, rq); + + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + + kick = submit_queue(engine, rq); + } + + GEM_BUG_ON(list_empty(&rq->sched.link)); + spin_unlock_irqrestore(&engine->active.lock, flags); + if (kick) + tasklet_hi_schedule(&engine->execlists.tasklet); +} + void i915_sched_node_init(struct i915_sched_node *node) { spin_lock_init(&node->lock); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 5be7f90e7896..c4c086d56f81 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -39,6 +39,8 @@ void i915_sched_init_ipi(struct i915_sched_ipi *ipi); void i915_request_set_priority(struct i915_request *request, int prio); +void i915_request_enqueue(struct i915_request *request); + struct list_head * i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); From patchwork Mon Dec 28 15:52:00 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991415 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 29C3EC43332 for ; Mon, 28 Dec 2020 15:53:33 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id EC22A206E5 for ; Mon, 28 Dec 2020 15:53:32 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org EC22A206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 8CE3489B3C; Mon, 28 Dec 2020 15:53:01 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id C114A89A0E for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448203-1500050 for multiple; Mon, 28 Dec 2020 15:52:36 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:00 +0000 Message-Id: <20201228155229.9516-25-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 25/54] drm/i915: Extract request rewinding from execlists X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" In the process of preparing to reuse the request submission logic for other backends, lift it out of the execlists backend. While this operates on the common structs, we do have a bit of backend knowledge, which is harmless for !lrc but still unsightly. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine.h | 3 - .../drm/i915/gt/intel_execlists_submission.c | 58 ++----------------- drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 3 + drivers/gpu/drm/i915/gt/selftest_execlists.c | 2 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/i915_scheduler.c | 44 ++++++++++++++ drivers/gpu/drm/i915/i915_scheduler.h | 3 + 7 files changed, 56 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 47ee8578e511..20974415e7d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -136,9 +136,6 @@ execlists_active_unlock_bh(struct intel_engine_execlists *execlists) local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ } -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); - static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 198e5a9eb6fd..e6df16a3811c 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -371,56 +371,6 @@ assert_priority_queue(const struct i915_request *prev, return rq_prio(prev) >= rq_prio(next); } -static struct i915_request * -__unwind_incomplete_requests(struct intel_engine_cs *engine) -{ - struct i915_request *rq, *rn, *active = NULL; - struct list_head *pl; - int prio = I915_PRIORITY_INVALID; - - lockdep_assert_held(&engine->active.lock); - - list_for_each_entry_safe_reverse(rq, rn, - &engine->active.requests, - sched.link) { - if (__i915_request_is_complete(rq)) { - list_del_init(&rq->sched.link); - continue; - } - - __i915_request_unsubmit(rq); - - GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); - if (rq_prio(rq) != prio) { - prio = rq_prio(rq); - pl = i915_sched_lookup_priolist(engine, prio); - } - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - - list_move(&rq->sched.link, pl); - set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - - /* Check in case we rollback so far we wrap [size/2] */ - if (intel_ring_direction(rq->ring, - rq->tail, - rq->ring->tail + 8) > 0) - rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; - - active = rq; - } - - return active; -} - -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) -{ - struct intel_engine_cs *engine = - container_of(execlists, typeof(*engine), execlists); - - return __unwind_incomplete_requests(engine); -} - static inline void execlists_context_status_change(struct i915_request *rq, unsigned long status) { @@ -1097,7 +1047,7 @@ static void defer_active(struct intel_engine_cs *engine) { struct i915_request *rq; - rq = __unwind_incomplete_requests(engine); + rq = __intel_engine_rewind_requests(engine); if (!rq) return; @@ -1296,7 +1246,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the preemption, some of the unwound requests may * complete! */ - __unwind_incomplete_requests(engine); + __intel_engine_rewind_requests(engine); last = NULL; } else if (timeslice_expired(engine, last)) { @@ -2238,7 +2188,7 @@ static void execlists_capture(struct intel_engine_cs *engine) * which we return it to the queue for signaling. * * By removing them from the execlists queue, we also remove the - * requests from being processed by __unwind_incomplete_requests() + * requests from being processed by __intel_engine_rewind_requests() * during the intel_engine_reset(), and so they will *not* be replayed * afterwards. * @@ -2857,7 +2807,7 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) /* Push back any incomplete requests for replay after the reset. */ rcu_read_lock(); spin_lock_irqsave(&engine->active.lock, flags); - __unwind_incomplete_requests(engine); + __intel_engine_rewind_requests(engine); spin_unlock_irqrestore(&engine->active.lock, flags); rcu_read_unlock(); } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 65fe76738335..9eda2ef8a497 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -93,4 +93,7 @@ /* in Gen12 ID 0x7FF is reserved to indicate idle */ #define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) +#define CTX_DESC_RELOAD_PD BIT_ULL(1) +#define CTX_DESC_FORCE_RESTORE BIT_ULL(2) + #endif /* _INTEL_LRC_REG_H_ */ diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 0a2952bf1dcb..276743d553a9 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -4532,7 +4532,7 @@ static int reset_virtual_engine(struct intel_gt *gt, /* Fake a preemption event; failed of course */ spin_lock_irq(&engine->active.lock); - __unwind_incomplete_requests(engine); + __intel_engine_rewind_requests(engine); spin_unlock_irq(&engine->active.lock); GEM_BUG_ON(rq->engine != engine); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 6a70f3a2c002..7097aece016a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -434,7 +434,7 @@ static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) cancel_port_requests(execlists); /* Push back any incomplete requests for replay after the reset. */ - rq = execlists_unwind_incomplete_requests(execlists); + rq = __intel_engine_rewind_requests(engine); if (!rq) goto out_unlock; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 5a32c82b1136..6b92523b9259 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -6,6 +6,9 @@ #include +#include "gt/intel_ring.h" +#include "gt/intel_lrc_reg.h" + #include "i915_drv.h" #include "i915_globals.h" #include "i915_request.h" @@ -528,6 +531,47 @@ void i915_request_enqueue(struct i915_request *rq) tasklet_hi_schedule(&engine->execlists.tasklet); } +struct i915_request * +__intel_engine_rewind_requests(struct intel_engine_cs *engine) +{ + struct i915_request *rq, *rn, *active = NULL; + struct list_head *pl; + int prio = I915_PRIORITY_INVALID; + + lockdep_assert_held(&engine->active.lock); + + list_for_each_entry_safe_reverse(rq, rn, + &engine->active.requests, + sched.link) { + if (__i915_request_is_complete(rq)) { + list_del_init(&rq->sched.link); + continue; + } + + __i915_request_unsubmit(rq); + + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); + if (rq_prio(rq) != prio) { + prio = rq_prio(rq); + pl = i915_sched_lookup_priolist(engine, prio); + } + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + + list_move(&rq->sched.link, pl); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + + /* Check in case we rollback so far we wrap [size/2] */ + if (intel_ring_direction(rq->ring, + rq->tail, + rq->ring->tail + 8) > 0) + rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; + + active = rq; + } + + return active; +} + void i915_sched_node_init(struct i915_sched_node *node) { spin_lock_init(&node->lock); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index c4c086d56f81..50fdc7168d38 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -41,6 +41,9 @@ void i915_request_set_priority(struct i915_request *request, int prio); void i915_request_enqueue(struct i915_request *request); +struct i915_request * +__intel_engine_rewind_requests(struct intel_engine_cs *engine); + struct list_head * i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); From patchwork Mon Dec 28 15:52:01 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991363 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4FF7CC4332B for ; Mon, 28 Dec 2020 15:53:10 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 0B98F206E5 for ; Mon, 28 Dec 2020 15:53:10 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 0B98F206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id E95E789AA7; Mon, 28 Dec 2020 15:52:58 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 00ADE8925D for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448205-1500050 for multiple; Mon, 28 Dec 2020 15:52:36 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:01 +0000 Message-Id: <20201228155229.9516-26-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 26/54] drm/i915: Extract request suspension from the execlists backend X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Make the ability to suspend and resume a request and its dependents generic. Signed-off-by: Chris Wilson --- .../drm/i915/gt/intel_execlists_submission.c | 148 +----------------- drivers/gpu/drm/i915/i915_scheduler.c | 120 ++++++++++++++ drivers/gpu/drm/i915/i915_scheduler.h | 5 + 3 files changed, 129 insertions(+), 144 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index e6df16a3811c..778aa21523d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -1881,166 +1881,26 @@ static void post_process_csb(struct i915_request **port, execlists_schedule_out(*port++); } -static void __execlists_hold(struct i915_request *rq) -{ - LIST_HEAD(list); - - do { - struct i915_dependency *p; - - if (i915_request_is_active(rq)) - __i915_request_unsubmit(rq); - - clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - list_move_tail(&rq->sched.link, &rq->engine->active.hold); - i915_request_set_hold(rq); - RQ_TRACE(rq, "on hold\n"); - - for_each_waiter(p, rq) { - struct i915_request *w = - container_of(p->waiter, typeof(*w), sched); - - if (p->flags & I915_DEPENDENCY_WEAK) - continue; - - /* Leave semaphores spinning on the other engines */ - if (w->engine != rq->engine) - continue; - - if (!i915_request_is_ready(w)) - continue; - - if (__i915_request_is_complete(w)) - continue; - - if (i915_request_on_hold(w)) - continue; - - list_move_tail(&w->sched.link, &list); - } - - rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); - } while (rq); -} - static bool execlists_hold(struct intel_engine_cs *engine, struct i915_request *rq) { + bool result; + if (i915_request_on_hold(rq)) return false; spin_lock_irq(&engine->active.lock); - - if (__i915_request_is_complete(rq)) { /* too late! */ - rq = NULL; - goto unlock; - } - - /* - * Transfer this request onto the hold queue to prevent it - * being resumbitted to HW (and potentially completed) before we have - * released it. Since we may have already submitted following - * requests, we need to remove those as well. - */ - GEM_BUG_ON(i915_request_on_hold(rq)); - GEM_BUG_ON(rq->engine != engine); - __execlists_hold(rq); - GEM_BUG_ON(list_empty(&engine->active.hold)); - -unlock: + result = __intel_engine_hold_request(engine, rq); spin_unlock_irq(&engine->active.lock); - return rq; -} - -static bool hold_request(const struct i915_request *rq) -{ - struct i915_dependency *p; - bool result = false; - - /* - * If one of our ancestors is on hold, we must also be on hold, - * otherwise we will bypass it and execute before it. - */ - rcu_read_lock(); - for_each_signaler(p, rq) { - const struct i915_request *s = - container_of(p->signaler, typeof(*s), sched); - - if (s->engine != rq->engine) - continue; - - result = i915_request_on_hold(s); - if (result) - break; - } - rcu_read_unlock(); return result; } -static void __execlists_unhold(struct i915_request *rq) -{ - LIST_HEAD(list); - - do { - struct i915_dependency *p; - - RQ_TRACE(rq, "hold release\n"); - - GEM_BUG_ON(!i915_request_on_hold(rq)); - GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); - - i915_request_clear_hold(rq); - list_move_tail(&rq->sched.link, - i915_sched_lookup_priolist(rq->engine, - rq_prio(rq))); - set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - - /* Also release any children on this engine that are ready */ - for_each_waiter(p, rq) { - struct i915_request *w = - container_of(p->waiter, typeof(*w), sched); - - if (p->flags & I915_DEPENDENCY_WEAK) - continue; - - /* Propagate any change in error status */ - if (rq->fence.error) - i915_request_set_error_once(w, rq->fence.error); - - if (w->engine != rq->engine) - continue; - - if (!i915_request_on_hold(w)) - continue; - - /* Check that no other parents are also on hold */ - if (hold_request(w)) - continue; - - list_move_tail(&w->sched.link, &list); - } - - rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); - } while (rq); -} - static void execlists_unhold(struct intel_engine_cs *engine, struct i915_request *rq) { spin_lock_irq(&engine->active.lock); - - /* - * Move this request back to the priority queue, and all of its - * children and grandchildren that were suspended along with it. - */ - __execlists_unhold(rq); - - if (rq_prio(rq) > engine->execlists.queue_priority_hint) { - engine->execlists.queue_priority_hint = rq_prio(rq); - tasklet_hi_schedule(&engine->execlists.tasklet); - } - + __intel_engine_unhold_request(engine, rq); spin_unlock_irq(&engine->active.lock); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 6b92523b9259..db31906ad9ec 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -572,6 +572,126 @@ __intel_engine_rewind_requests(struct intel_engine_cs *engine) return active; } +bool __intel_engine_hold_request(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + LIST_HEAD(list); + + lockdep_assert_held(&engine->active.lock); + GEM_BUG_ON(i915_request_on_hold(rq)); + GEM_BUG_ON(rq->engine != engine); + + if (__i915_request_is_complete(rq)) /* too late! */ + return false; + + /* + * Transfer this request onto the hold queue to prevent it + * being resumbitted to HW (and potentially completed) before we have + * released it. Since we may have already submitted following + * requests, we need to remove those as well. + */ + do { + struct i915_dependency *p; + + if (i915_request_is_active(rq)) + __i915_request_unsubmit(rq); + + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + list_move_tail(&rq->sched.link, &rq->engine->active.hold); + i915_request_set_hold(rq); + RQ_TRACE(rq, "on hold\n"); + + for_each_waiter(p, rq) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + + /* Leave semaphores spinning on the other engines */ + if (w->engine != engine) + continue; + + if (!i915_request_is_ready(w)) + continue; + + if (__i915_request_is_complete(w)) + continue; + + if (i915_request_on_hold(w)) /* acts as a visited bit */ + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); + + GEM_BUG_ON(list_empty(&engine->active.hold)); + + return true; +} + +void __intel_engine_unhold_request(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + LIST_HEAD(list); + + lockdep_assert_held(&engine->active.lock); + + if (rq_prio(rq) > engine->execlists.queue_priority_hint) { + engine->execlists.queue_priority_hint = rq_prio(rq); + tasklet_hi_schedule(&engine->execlists.tasklet); + } + + /* + * Move this request back to the priority queue, and all of its + * children and grandchildren that were suspended along with it. + */ + do { + struct i915_dependency *p; + + RQ_TRACE(rq, "hold release\n"); + + GEM_BUG_ON(!i915_request_on_hold(rq)); + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); + + i915_request_clear_hold(rq); + list_move_tail(&rq->sched.link, + i915_sched_lookup_priolist(rq->engine, + rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + + /* Also release any children on this engine that are ready */ + for_each_waiter(p, rq) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + + /* Propagate any change in error status */ + if (rq->fence.error) + i915_request_set_error_once(w, rq->fence.error); + + if (w->engine != engine) + continue; + + /* We also treat the on-hold status as a visited bit */ + if (!i915_request_on_hold(w)) + continue; + + /* Check that no other parents are also on hold [BFS] */ + if (hold_request(w)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + void i915_sched_node_init(struct i915_sched_node *node) { spin_lock_init(&node->lock); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 50fdc7168d38..b4b722982870 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -44,6 +44,11 @@ void i915_request_enqueue(struct i915_request *request); struct i915_request * __intel_engine_rewind_requests(struct intel_engine_cs *engine); +bool __intel_engine_hold_request(struct intel_engine_cs *engine, + struct i915_request *request); +void __intel_engine_unhold_request(struct intel_engine_cs *engine, + struct i915_request *request); + struct list_head * i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); From patchwork Mon Dec 28 15:52:02 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991347 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 61DB7C433DB for ; Mon, 28 Dec 2020 15:52:59 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 2E2E120829 for ; Mon, 28 Dec 2020 15:52:58 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 2E2E120829 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 5DC888925D; Mon, 28 Dec 2020 15:52:56 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 16FFB8999C for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448206-1500050 for multiple; Mon, 28 Dec 2020 15:52:37 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:02 +0000 Message-Id: <20201228155229.9516-27-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 27/54] drm/i915: Extract the ability to defer and rerun a request later X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Lift the ability to defer a request until later from execlists into the common layer. Signed-off-by: Chris Wilson --- .../drm/i915/gt/intel_execlists_submission.c | 55 ++-------------- drivers/gpu/drm/i915/i915_scheduler.c | 66 ++++++++++++++++--- drivers/gpu/drm/i915/i915_scheduler.h | 5 +- 3 files changed, 67 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 778aa21523d9..ce65997508f9 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -995,9 +995,13 @@ static void virtual_xfer_context(struct virtual_engine *ve, } } -static void defer_request(struct i915_request *rq, struct list_head * const pl) +static void defer_active(struct intel_engine_cs *engine) { - LIST_HEAD(list); + struct i915_request *rq; + + rq = __intel_engine_rewind_requests(engine); + if (!rq) + return; /* * We want to move the interrupted request to the back of @@ -1006,52 +1010,7 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) * flight and were waiting for the interrupted request to * be run after it again. */ - do { - struct i915_dependency *p; - - GEM_BUG_ON(i915_request_is_active(rq)); - list_move_tail(&rq->sched.link, pl); - - for_each_waiter(p, rq) { - struct i915_request *w = - container_of(p->waiter, typeof(*w), sched); - - if (p->flags & I915_DEPENDENCY_WEAK) - continue; - - /* Leave semaphores spinning on the other engines */ - if (w->engine != rq->engine) - continue; - - /* No waiter should start before its signaler */ - GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && - __i915_request_has_started(w) && - !__i915_request_is_complete(rq)); - - GEM_BUG_ON(i915_request_is_active(w)); - if (!i915_request_is_ready(w)) - continue; - - if (rq_prio(w) < rq_prio(rq)) - continue; - - GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); - list_move_tail(&w->sched.link, &list); - } - - rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); - } while (rq); -} - -static void defer_active(struct intel_engine_cs *engine) -{ - struct i915_request *rq; - - rq = __intel_engine_rewind_requests(engine); - if (!rq) - return; - - defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); + __intel_engine_defer_request(engine, rq); } static bool diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index db31906ad9ec..e2ba5d63a8cb 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -171,8 +171,8 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) } } -struct list_head * -i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) +static struct list_head * +lookup_priolist(struct intel_engine_cs *engine, int prio) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_priolist *p; @@ -324,7 +324,7 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio) struct list_head *pos = &rq->sched.signalers_list; struct list_head *plist; - plist = i915_sched_lookup_priolist(engine, prio); + plist = lookup_priolist(engine, prio); /* * Recursively bump all dependent priorities to match the new request. @@ -449,12 +449,63 @@ void i915_request_set_priority(struct i915_request *rq, int prio) spin_unlock_irqrestore(&engine->active.lock, flags); } +void __intel_engine_defer_request(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + struct list_head *pl; + LIST_HEAD(list); + + lockdep_assert_held(&engine->active.lock); + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags)); + + /* + * When we defer a request, we must maintain its order with respect + * to those that are waiting upon it. So we traverse its chain of + * waiters and move any that are earlier than the request to after it. + */ + pl = lookup_priolist(engine, rq_prio(rq)); + do { + struct i915_dependency *p; + + GEM_BUG_ON(i915_request_is_active(rq)); + list_move_tail(&rq->sched.link, pl); + + for_each_waiter(p, rq) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + + /* Leave semaphores spinning on the other engines */ + if (w->engine != engine) + continue; + + /* No waiter should start before its signaler */ + GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && + __i915_request_has_started(w) && + !__i915_request_is_complete(rq)); + + GEM_BUG_ON(i915_request_is_active(w)); + if (!i915_request_is_ready(w)) + continue; + + if (rq_prio(w) < rq_prio(rq)) + continue; + + GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + static void queue_request(struct intel_engine_cs *engine, struct i915_request *rq) { GEM_BUG_ON(!list_empty(&rq->sched.link)); - list_add_tail(&rq->sched.link, - i915_sched_lookup_priolist(engine, rq_prio(rq))); + list_add_tail(&rq->sched.link, lookup_priolist(engine, rq_prio(rq))); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } @@ -553,7 +604,7 @@ __intel_engine_rewind_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != prio) { prio = rq_prio(rq); - pl = i915_sched_lookup_priolist(engine, prio); + pl = lookup_priolist(engine, prio); } GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); @@ -658,8 +709,7 @@ void __intel_engine_unhold_request(struct intel_engine_cs *engine, i915_request_clear_hold(rq); list_move_tail(&rq->sched.link, - i915_sched_lookup_priolist(rq->engine, - rq_prio(rq))); + lookup_priolist(rq->engine, rq_prio(rq))); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); /* Also release any children on this engine that are ready */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index b4b722982870..152faac61468 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -43,15 +43,14 @@ void i915_request_enqueue(struct i915_request *request); struct i915_request * __intel_engine_rewind_requests(struct intel_engine_cs *engine); +void __intel_engine_defer_request(struct intel_engine_cs *engine, + struct i915_request *request); bool __intel_engine_hold_request(struct intel_engine_cs *engine, struct i915_request *request); void __intel_engine_unhold_request(struct intel_engine_cs *engine, struct i915_request *request); -struct list_head * -i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); - void __i915_priolist_free(struct i915_priolist *p); static inline void i915_priolist_free(struct i915_priolist *p) { From patchwork Mon Dec 28 15:52:03 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991381 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id C197CC4332E for ; Mon, 28 Dec 2020 15:53:16 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 8799220829 for ; Mon, 28 Dec 2020 15:53:16 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 8799220829 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 6D3F0899D4; Mon, 28 Dec 2020 15:52:56 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id AFB618925D for ; Mon, 28 Dec 2020 15:52:52 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448207-1500050 for multiple; Mon, 28 Dec 2020 15:52:37 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:03 +0000 Message-Id: <20201228155229.9516-28-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 28/54] drm/i915: Fix the iterative dfs for defering requests X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" The current implementation of walking the children of a deferred requests lacks the backtracking required to reduce the dfs to linear. Having pulled it from execlists into the common layer, we can reuse the dfs code for priority inheritance. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_scheduler.c | 58 +++++++++++++++++++-------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index e2ba5d63a8cb..f76e8960d382 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -452,25 +452,26 @@ void i915_request_set_priority(struct i915_request *rq, int prio) void __intel_engine_defer_request(struct intel_engine_cs *engine, struct i915_request *rq) { - struct list_head *pl; - LIST_HEAD(list); + struct list_head *pos = &rq->sched.waiters_list; + struct i915_request *rn; + LIST_HEAD(dfs); + int prio; lockdep_assert_held(&engine->active.lock); GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags)); + prio = rq_prio(rq); + /* * When we defer a request, we must maintain its order with respect * to those that are waiting upon it. So we traverse its chain of * waiters and move any that are earlier than the request to after it. */ - pl = lookup_priolist(engine, rq_prio(rq)); + rq->sched.dfs.next = NULL; do { - struct i915_dependency *p; - - GEM_BUG_ON(i915_request_is_active(rq)); - list_move_tail(&rq->sched.link, pl); - - for_each_waiter(p, rq) { + list_for_each_continue(pos, &rq->sched.waiters_list) { + struct i915_dependency *p = + list_entry(pos, typeof(*p), wait_link); struct i915_request *w = container_of(p->waiter, typeof(*w), sched); @@ -486,19 +487,44 @@ void __intel_engine_defer_request(struct intel_engine_cs *engine, __i915_request_has_started(w) && !__i915_request_is_complete(rq)); - GEM_BUG_ON(i915_request_is_active(w)); - if (!i915_request_is_ready(w)) + if (!i915_request_in_priority_queue(w)) continue; - if (rq_prio(w) < rq_prio(rq)) + /* + * We also need to reorder within the same priority. + * + * This is unlike priority-inheritance, where if the + * signaler already has a higher priority [earlier + * deadline] than us, we can ignore as it will be + * scheduled first. If a waiter already has the + * same priority, we still have to push it to the end + * of the list. This unfortunately means we cannot + * use the rq_deadline() itself as a 'visited' bit. + */ + if (rq_prio(w) < prio) continue; - GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); - list_move_tail(&w->sched.link, &list); + GEM_BUG_ON(rq_prio(w) != prio); + + /* Remember our position along this branch */ + rq = stack_push(w, rq, pos); + pos = &rq->sched.waiters_list; } - rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); - } while (rq); + /* Note list is reversed for waiters wrt signal hierarchy */ + GEM_BUG_ON(rq->engine != engine); + GEM_BUG_ON(!i915_request_in_priority_queue(rq)); + list_move(&rq->sched.link, &dfs); + + /* Track our visit, and prevent duplicate processing */ + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + } while ((rq = stack_pop(rq, &pos))); + + pos = lookup_priolist(engine, prio); + list_for_each_entry_safe(rq, rn, &dfs, sched.link) { + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + list_add_tail(&rq->sched.link, pos); + } } static void queue_request(struct intel_engine_cs *engine, From patchwork Mon Dec 28 15:52:04 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991373 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id C4F3AC43381 for ; Mon, 28 Dec 2020 15:53:14 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 90046206E5 for ; Mon, 28 Dec 2020 15:53:14 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 90046206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 0721E89AB2; Mon, 28 Dec 2020 15:52:59 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 109D0899C7 for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448208-1500050 for multiple; Mon, 28 Dec 2020 15:52:37 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:04 +0000 Message-Id: <20201228155229.9516-29-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 29/54] drm/i915: Move common active lists from engine to i915_scheduler X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Extract the scheduler lists into a related structure, stop sprawling over struct intel_engine_cs Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 26 +------------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 8 +---- .../drm/i915/gt/intel_execlists_submission.c | 2 +- drivers/gpu/drm/i915/gt/mock_engine.c | 2 +- drivers/gpu/drm/i915/i915_scheduler.c | 34 ++++++++++++++++--- drivers/gpu/drm/i915/i915_scheduler.h | 3 +- drivers/gpu/drm/i915/i915_scheduler_types.h | 8 +++++ 7 files changed, 43 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 72a58604d4c4..007c5e228452 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -594,8 +594,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine) execlists->queue_priority_hint = INT_MIN; execlists->queue = RB_ROOT_CACHED; - - i915_sched_init_ipi(&execlists->ipi); } static void cleanup_status_page(struct intel_engine_cs *engine) @@ -711,7 +709,7 @@ static int engine_setup_common(struct intel_engine_cs *engine) goto err_status; } - intel_engine_init_active(engine, ENGINE_PHYSICAL); + i915_sched_init_engine(&engine->active, ENGINE_PHYSICAL); intel_engine_init_execlists(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); @@ -776,28 +774,6 @@ static int measure_breadcrumb_dw(struct intel_context *ce) return dw; } -void -intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) -{ - INIT_LIST_HEAD(&engine->active.requests); - INIT_LIST_HEAD(&engine->active.hold); - - spin_lock_init(&engine->active.lock); - lockdep_set_subclass(&engine->active.lock, subclass); - - /* - * Due to an interesting quirk in lockdep's internal debug tracking, - * after setting a subclass we must ensure the lock is used. Otherwise, - * nr_unused_locks is incremented once too often. - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - local_irq_disable(); - lock_map_acquire(&engine->active.lock.dep_map); - lock_map_release(&engine->active.lock.dep_map); - local_irq_enable(); -#endif -} - static struct intel_context * create_pinned_context(struct intel_engine_cs *engine, unsigned int hwsp, diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index d19710191690..6379b1e0b7ea 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -259,8 +259,6 @@ struct intel_engine_execlists { struct rb_root_cached queue; struct rb_root_cached virtual; - struct i915_sched_ipi ipi; - /** * @csb_write: control register for Context Switch buffer * @@ -330,11 +328,7 @@ struct intel_engine_cs { struct intel_sseu sseu; - struct { - spinlock_t lock; - struct list_head requests; - struct list_head hold; /* ready requests, but on hold */ - } active; + struct i915_sched_engine active; /* keep a request in reserve for a [pm] barrier under oom */ struct i915_request *request_pool; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index ce65997508f9..b3bbcf76a6b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3339,7 +3339,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); - intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); + i915_sched_init_engine(&ve->base.active, ENGINE_VIRTUAL); intel_engine_init_execlists(&ve->base); ve->base.cops = &virtual_context_ops; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 2f830017c51d..c00bc0f4afec 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -355,7 +355,7 @@ int mock_engine_init(struct intel_engine_cs *engine) { struct intel_context *ce; - intel_engine_init_active(engine, ENGINE_MOCK); + i915_sched_init_engine(&engine->active, ENGINE_MOCK); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index f76e8960d382..61b110ff33d4 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -85,12 +85,36 @@ static void ipi_schedule(struct work_struct *wrk) } while (rq); } -void i915_sched_init_ipi(struct i915_sched_ipi *ipi) +static void i915_sched_init_ipi(struct i915_sched_ipi *ipi) { INIT_WORK(&ipi->work, ipi_schedule); ipi->list = NULL; } +void i915_sched_init_engine(struct i915_sched_engine *se, + unsigned int subclass) +{ + spin_lock_init(&se->lock); + lockdep_set_subclass(&se->lock, subclass); + + INIT_LIST_HEAD(&se->requests); + INIT_LIST_HEAD(&se->hold); + + i915_sched_init_ipi(&se->ipi); + + /* + * Due to an interesting quirk in lockdep's internal debug tracking, + * after setting a subclass we must ensure the lock is used. Otherwise, + * nr_unused_locks is incremented once too often. + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + local_irq_disable(); + lock_map_acquire(&se->lock.dep_map); + lock_map_release(&se->lock.dep_map); + local_irq_enable(); +#endif +} + static void __ipi_add(struct i915_request *rq) { #define STUB ((struct i915_request *)1) @@ -106,13 +130,13 @@ static void __ipi_add(struct i915_request *rq) return; } - first = READ_ONCE(engine->execlists.ipi.list); - do + first = READ_ONCE(engine->active.ipi.list); + do { rq->sched.ipi_link = ptr_pack_bits(first, 1, 1); - while (!try_cmpxchg(&engine->execlists.ipi.list, &first, rq)); + } while (!try_cmpxchg(&engine->active.ipi.list, &first, rq)); if (!first) - queue_work(system_unbound_wq, &engine->execlists.ipi.work); + queue_work(system_unbound_wq, &engine->active.ipi.work); } /* diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 152faac61468..ea5595188f8e 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -35,7 +35,8 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, void i915_sched_node_retire(struct i915_sched_node *node); -void i915_sched_init_ipi(struct i915_sched_ipi *ipi); +void i915_sched_init_engine(struct i915_sched_engine *se, + unsigned int subclass); void i915_request_set_priority(struct i915_request *request, int prio); diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 5a84d59134ee..5f21f5ac6dd9 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -80,6 +80,14 @@ struct i915_sched_ipi { struct work_struct work; }; +struct i915_sched_engine { + spinlock_t lock; + struct list_head requests; + struct list_head hold; /* ready requests, but on hold */ + + struct i915_sched_ipi ipi; +}; + struct i915_dependency { struct i915_sched_node *signaler; struct i915_sched_node *waiter; From patchwork Mon Dec 28 15:52:05 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991413 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 86B9FC43217 for ; Mon, 28 Dec 2020 15:53:27 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 315A2206E5 for ; Mon, 28 Dec 2020 15:53:27 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 315A2206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id D961B89B4D; Mon, 28 Dec 2020 15:53:01 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 1E9DA899D4 for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448209-1500050 for multiple; Mon, 28 Dec 2020 15:52:37 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:05 +0000 Message-Id: <20201228155229.9516-30-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 30/54] drm/i915: Move scheduler queue X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Extract the scheduling queue from "execlists" into the per-engine scheduling structs, for reuse by other backends. Signed-off-by: Chris Wilson --- .../gpu/drm/i915/gem/i915_gem_context_types.h | 2 +- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 1 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 5 +-- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 3 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 14 ------- .../drm/i915/gt/intel_execlists_submission.c | 29 +++++++------- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 +++--- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_request.h | 2 +- drivers/gpu/drm/i915/i915_scheduler.c | 38 ++++++++++++------- drivers/gpu/drm/i915/i915_scheduler.h | 15 ++++++++ drivers/gpu/drm/i915/i915_scheduler_types.h | 15 ++++++++ .../gpu/drm/i915/selftests/i915_scheduler.c | 2 +- 13 files changed, 84 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 1449f54924e0..99bd7b4f4ffe 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -19,7 +19,7 @@ #include "gt/intel_context_types.h" -#include "i915_scheduler.h" +#include "i915_scheduler_types.h" #include "i915_sw_fence.h" struct pid; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index a5d7efe67021..0d9dea4b0b65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -12,6 +12,7 @@ #include "dma_resv_utils.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" +#include "i915_scheduler.h" static long i915_gem_object_wait_fence(struct dma_fence *fence, diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 007c5e228452..db17d9fe3333 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -593,7 +593,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine) memset(execlists->inflight, 0, sizeof(execlists->inflight)); execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; } static void cleanup_status_page(struct intel_engine_cs *engine) @@ -915,7 +914,7 @@ int intel_engines_init(struct intel_gt *gt) */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { - GEM_BUG_ON(!list_empty(&engine->active.requests)); + i915_sched_fini_engine(&engine->active); tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ intel_breadcrumbs_free(engine->breadcrumbs); @@ -1236,7 +1235,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) } /* ELSP is empty, but there are ready requests? E.g. after reset */ - if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) + if (!i915_sched_is_idle(&engine->active)) return false; /* Ring stopped? */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 2843db731b7d..14378e3a7a50 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "i915_scheduler.h" #include "intel_breadcrumbs.h" #include "intel_context.h" @@ -277,7 +278,7 @@ static int __engine_park(struct intel_wakeref *wf) if (engine->park) engine->park(engine); - engine->execlists.no_priolist = false; + i915_sched_park_engine(&engine->active); /* While gt calls i915_vma_parked(), we have to break the lock cycle */ intel_gt_pm_put_async(engine->gt); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 6379b1e0b7ea..a2a49e51b92d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -154,11 +154,6 @@ struct intel_engine_execlists { */ struct timer_list preempt; - /** - * @default_priolist: priority list for I915_PRIORITY_NORMAL - */ - struct i915_priolist default_priolist; - /** * @ccid: identifier for contexts submitted to this engine */ @@ -193,11 +188,6 @@ struct intel_engine_execlists { */ u32 reset_ccid; - /** - * @no_priolist: priority lists disabled - */ - bool no_priolist; - /** * @submit_reg: gen-specific execlist submission register * set to the ExecList Submission Port (elsp) register pre-Gen11 and to @@ -253,10 +243,6 @@ struct intel_engine_execlists { */ int queue_priority_hint; - /** - * @queue: queue of requests, in priority lists - */ - struct rb_root_cached queue; struct rb_root_cached virtual; /** diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index b3bbcf76a6b1..5d76a02d945e 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -284,11 +284,11 @@ static int effective_prio(const struct i915_request *rq) return prio; } -static int queue_prio(const struct intel_engine_execlists *execlists) +static int queue_prio(const struct i915_sched_engine *se) { struct rb_node *rb; - rb = rb_first_cached(&execlists->queue); + rb = rb_first_cached(&se->queue); if (!rb) return INT_MIN; @@ -351,7 +351,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * context, it's priority would not exceed ELSP[0] aka last_prio. */ return max(virtual_prio(&engine->execlists), - queue_prio(&engine->execlists)) > last_prio; + queue_prio(&engine->active)) > last_prio; } __maybe_unused static inline bool @@ -1047,11 +1047,11 @@ static bool needs_timeslice(const struct intel_engine_cs *engine, return false; /* If ELSP[1] is occupied, always check to see if worth slicing */ - if (!list_is_last_rcu(&rq->sched.link, &engine->active.requests)) + if (!i915_sched_is_last_request(&engine->active, rq)) return true; /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */ - if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) + if (!i915_sched_is_idle(&engine->active)) return true; return !RB_EMPTY_ROOT(&engine->execlists.virtual.rb_root); @@ -1270,7 +1270,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(rq->engine != &ve->base); GEM_BUG_ON(rq->context != &ve->context); - if (unlikely(rq_prio(rq) < queue_prio(execlists))) { + if (unlikely(rq_prio(rq) < queue_prio(&engine->active))) { spin_unlock(&ve->base.active.lock); break; } @@ -1338,7 +1338,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) break; } - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&engine->active.queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -1417,7 +1417,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &engine->active.queue); i915_priolist_free(p); } done: @@ -1439,7 +1439,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * request triggering preemption on the next dequeue (or subsequent * interrupt for secondary ports). */ - execlists->queue_priority_hint = queue_prio(execlists); + execlists->queue_priority_hint = queue_prio(&engine->active); spin_unlock(&engine->active.lock); /* @@ -2673,7 +2673,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) intel_engine_signal_breadcrumbs(engine); /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&engine->active.queue))) { struct i915_priolist *p = to_priolist(rb); priolist_for_each_request_consume(rq, rn, p) { @@ -2681,9 +2681,10 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) __i915_request_submit(rq); } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &engine->active.queue); i915_priolist_free(p); } + GEM_BUG_ON(!i915_sched_is_idle(&engine->active)); /* On-hold requests will be flushed to timeline upon their release */ list_for_each_entry(rq, &engine->active.hold, sched.link) @@ -2714,7 +2715,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; + engine->active.queue = RB_ROOT_CACHED; GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); execlists->tasklet.func = nop_submission_tasklet; @@ -2947,7 +2948,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) static struct list_head *virtual_queue(struct virtual_engine *ve) { - return &ve->base.execlists.default_priolist.requests; + return &ve->base.active.default_priolist.requests; } static void rcu_virtual_context_destroy(struct work_struct *wrk) @@ -3538,7 +3539,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, last = NULL; count = 0; - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&engine->active.queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); priolist_for_each_request(rq, p) { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 7097aece016a..6d97b5ad09b5 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -310,7 +310,7 @@ static void __guc_dequeue(struct intel_engine_cs *engine) * event. */ port = first; - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&engine->active.queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -330,7 +330,7 @@ static void __guc_dequeue(struct intel_engine_cs *engine) last = rq; } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &engine->active.queue); i915_priolist_free(p); } done: @@ -483,7 +483,7 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) } /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { + while ((rb = rb_first_cached(&engine->active.queue))) { struct i915_priolist *p = to_priolist(rb); priolist_for_each_request_consume(rq, rn, p) { @@ -493,14 +493,15 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) i915_request_mark_complete(rq); } - rb_erase_cached(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &engine->active.queue); i915_priolist_free(p); } + GEM_BUG_ON(!i915_sched_is_idle(&engine->active)); /* Remaining _unready_ requests will be nop'ed when submitted */ execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; + engine->active.queue = RB_ROOT_CACHED; spin_unlock_irqrestore(&engine->active.lock, flags); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e38a10d5c128..795be17ce239 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -97,7 +97,6 @@ #include "i915_gpu_error.h" #include "i915_perf_types.h" #include "i915_request.h" -#include "i915_scheduler.h" #include "gt/intel_timeline.h" #include "i915_vma.h" #include "i915_irq.h" diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index a8c413203f72..adfe863f778e 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -35,7 +35,7 @@ #include "gt/intel_timeline_types.h" #include "i915_gem.h" -#include "i915_scheduler.h" +#include "i915_scheduler_types.h" #include "i915_selftest.h" #include "i915_sw_fence.h" diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 61b110ff33d4..61150ba22733 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -99,6 +99,7 @@ void i915_sched_init_engine(struct i915_sched_engine *se, INIT_LIST_HEAD(&se->requests); INIT_LIST_HEAD(&se->hold); + se->queue = RB_ROOT_CACHED; i915_sched_init_ipi(&se->ipi); @@ -115,6 +116,17 @@ void i915_sched_init_engine(struct i915_sched_engine *se, #endif } +void i915_sched_park_engine(struct i915_sched_engine *se) +{ + GEM_BUG_ON(!i915_sched_is_idle(se)); + se->no_priolist = false; +} + +void i915_sched_fini_engine(struct i915_sched_engine *se) +{ + GEM_BUG_ON(!list_empty(&se->requests)); +} + static void __ipi_add(struct i915_request *rq) { #define STUB ((struct i915_request *)1) @@ -175,7 +187,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) return rb_entry(rb, struct i915_priolist, node); } -static void assert_priolists(struct intel_engine_execlists * const execlists) +static void assert_priolists(struct i915_sched_engine * const se) { struct rb_node *rb; long last_prio; @@ -183,11 +195,11 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; - GEM_BUG_ON(rb_first_cached(&execlists->queue) != - rb_first(&execlists->queue.rb_root)); + GEM_BUG_ON(rb_first_cached(&se->queue) != + rb_first(&se->queue.rb_root)); last_prio = INT_MAX; - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) { const struct i915_priolist *p = to_priolist(rb); GEM_BUG_ON(p->priority > last_prio); @@ -198,21 +210,21 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) static struct list_head * lookup_priolist(struct intel_engine_cs *engine, int prio) { - struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_sched_engine * const se = &engine->active; struct i915_priolist *p; struct rb_node **parent, *rb; bool first = true; lockdep_assert_held(&engine->active.lock); - assert_priolists(execlists); + assert_priolists(se); - if (unlikely(execlists->no_priolist)) + if (unlikely(se->no_priolist)) prio = I915_PRIORITY_NORMAL; find_priolist: /* most positive priority is scheduled first, equal priorities fifo */ rb = NULL; - parent = &execlists->queue.rb_root.rb_node; + parent = &se->queue.rb_root.rb_node; while (*parent) { rb = *parent; p = to_priolist(rb); @@ -227,7 +239,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) } if (prio == I915_PRIORITY_NORMAL) { - p = &execlists->default_priolist; + p = &se->default_priolist; } else { p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC); /* Convert an allocation failure to a priority bump */ @@ -242,7 +254,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) * requests, so if userspace lied about their * dependencies that reordering may be visible. */ - execlists->no_priolist = true; + se->no_priolist = true; goto find_priolist; } } @@ -251,7 +263,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) INIT_LIST_HEAD(&p->requests); rb_link_node(&p->node, rb, parent); - rb_insert_color_cached(&p->node, &execlists->queue, first); + rb_insert_color_cached(&p->node, &se->queue, first); return &p->requests; } @@ -621,7 +633,7 @@ void i915_request_enqueue(struct i915_request *rq) } else { queue_request(engine, rq); - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(i915_sched_is_idle(&engine->active)); kick = submit_queue(engine, rq); } @@ -656,7 +668,7 @@ __intel_engine_rewind_requests(struct intel_engine_cs *engine) prio = rq_prio(rq); pl = lookup_priolist(engine, prio); } - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(i915_sched_is_idle(&engine->active)); list_move(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index ea5595188f8e..6bb9d8c3519a 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -12,6 +12,7 @@ #include #include "i915_scheduler_types.h" +#include "i915_request.h" struct drm_printer; @@ -37,6 +38,8 @@ void i915_sched_node_retire(struct i915_sched_node *node); void i915_sched_init_engine(struct i915_sched_engine *se, unsigned int subclass); +void i915_sched_park_engine(struct i915_sched_engine *se); +void i915_sched_fini_engine(struct i915_sched_engine *se); void i915_request_set_priority(struct i915_request *request, int prio); @@ -59,6 +62,18 @@ static inline void i915_priolist_free(struct i915_priolist *p) __i915_priolist_free(p); } +static inline bool i915_sched_is_idle(const struct i915_sched_engine *se) +{ + return RB_EMPTY_ROOT(&se->queue.rb_root); +} + +static inline bool +i915_sched_is_last_request(const struct i915_sched_engine *se, + const struct i915_request *rq) +{ + return list_is_last_rcu(&rq->sched.link, &se->requests); +} + void i915_request_show_with_schedule(struct drm_printer *m, const struct i915_request *rq, const char *prefix, diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 5f21f5ac6dd9..1a4cf7e52186 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -82,10 +82,25 @@ struct i915_sched_ipi { struct i915_sched_engine { spinlock_t lock; + struct list_head requests; struct list_head hold; /* ready requests, but on hold */ + /** + * @queue: queue of requests, in priority lists + */ + struct rb_root_cached queue; struct i915_sched_ipi ipi; + + /** + * @default_priolist: priority list for I915_PRIORITY_NORMAL + */ + struct i915_priolist default_priolist; + + /** + * @no_priolist: priority lists disabled + */ + bool no_priolist; }; struct i915_dependency { diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c index e6910f4c429d..5c5b574af253 100644 --- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c +++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c @@ -92,7 +92,7 @@ static bool check_context_order(struct intel_engine_cs *engine) last_context = 0; last_seqno = 0; last_prio = 0; - for (rb = rb_first_cached(&engine->execlists.queue); rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&engine->active.queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); struct i915_request *rq; From patchwork Mon Dec 28 15:52:06 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991431 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id AA3D1C43603 for ; Mon, 28 Dec 2020 15:53:26 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 6CA77206E5 for ; Mon, 28 Dec 2020 15:53:26 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 6CA77206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 24BB489B18; Mon, 28 Dec 2020 15:53:01 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id B05A489A14 for ; Mon, 28 Dec 2020 15:52:50 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448210-1500050 for multiple; Mon, 28 Dec 2020 15:52:37 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:06 +0000 Message-Id: <20201228155229.9516-31-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 31/54] drm/i915: Move tasklet from execlists to sched X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Move the scheduling tasklists out of the execlists backend into the per-engine scheduling bookkeeping. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine.h | 14 ---- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 11 ++-- drivers/gpu/drm/i915/gt/intel_engine_types.h | 5 -- .../drm/i915/gt/intel_execlists_submission.c | 64 +++++++++---------- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 2 +- drivers/gpu/drm/i915/gt/selftest_execlists.c | 16 ++--- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +- drivers/gpu/drm/i915/gt/selftest_lrc.c | 6 +- drivers/gpu/drm/i915/gt/selftest_reset.c | 6 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 14 ++-- drivers/gpu/drm/i915/i915_scheduler.c | 14 ++-- drivers/gpu/drm/i915/i915_scheduler.h | 20 ++++++ drivers/gpu/drm/i915/i915_scheduler_types.h | 6 ++ .../gpu/drm/i915/selftests/i915_scheduler.c | 16 ++--- 14 files changed, 99 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 20974415e7d8..801ae54cf60d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -122,20 +122,6 @@ execlists_active(const struct intel_engine_execlists *execlists) return active; } -static inline void -execlists_active_lock_bh(struct intel_engine_execlists *execlists) -{ - local_bh_disable(); /* prevent local softirq and lock recursion */ - tasklet_lock(&execlists->tasklet); -} - -static inline void -execlists_active_unlock_bh(struct intel_engine_execlists *execlists) -{ - tasklet_unlock(&execlists->tasklet); - local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ -} - static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index db17d9fe3333..eb429a90518e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -915,7 +915,6 @@ int intel_engines_init(struct intel_gt *gt) void intel_engine_cleanup_common(struct intel_engine_cs *engine) { i915_sched_fini_engine(&engine->active); - tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ intel_breadcrumbs_free(engine->breadcrumbs); @@ -1189,7 +1188,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine) void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync) { - struct tasklet_struct *t = &engine->execlists.tasklet; + struct tasklet_struct *t = &engine->active.tasklet; if (!t->func) return; @@ -1454,8 +1453,8 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n", yesno(test_bit(TASKLET_STATE_SCHED, - &engine->execlists.tasklet.state)), - enableddisabled(!atomic_read(&engine->execlists.tasklet.count)), + &engine->active.tasklet.state)), + enableddisabled(!atomic_read(&engine->active.tasklet.count)), repr_timer(&engine->execlists.preempt), repr_timer(&engine->execlists.timer)); @@ -1479,7 +1478,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, idx, hws[idx * 2], hws[idx * 2 + 1]); } - execlists_active_lock_bh(execlists); + i915_sched_lock_bh(&engine->active); rcu_read_lock(); for (port = execlists->active; (rq = *port); port++) { char hdr[160]; @@ -1510,7 +1509,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, i915_request_show(m, rq, hdr, 0); } rcu_read_unlock(); - execlists_active_unlock_bh(execlists); + i915_sched_unlock_bh(&engine->active); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", ENGINE_READ(engine, RING_PP_DIR_BASE)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index a2a49e51b92d..694d2e04ba6a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -139,11 +139,6 @@ struct st_preempt_hang { * driver and the hardware state for execlist mode of submission. */ struct intel_engine_execlists { - /** - * @tasklet: softirq tasklet for bottom handler - */ - struct tasklet_struct tasklet; - /** * @timer: kick the current context if its timeslice expires */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 5d76a02d945e..2277958aefd9 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -530,7 +530,7 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) resubmit_virtual_request(rq, ve); if (READ_ONCE(ve->request)) - tasklet_hi_schedule(&ve->base.execlists.tasklet); + i915_sched_kick(&ve->base.active); } static inline void __execlists_schedule_out(struct i915_request *rq) @@ -700,9 +700,9 @@ trace_ports(const struct intel_engine_execlists *execlists, } static inline bool -reset_in_progress(const struct intel_engine_execlists *execlists) +reset_in_progress(const struct intel_engine_cs *engine) { - return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); + return unlikely(!__tasklet_is_enabled(&engine->active.tasklet)); } static __maybe_unused bool @@ -719,7 +719,7 @@ assert_pending_valid(const struct intel_engine_execlists *execlists, trace_ports(execlists, msg, execlists->pending); /* We may be messing around with the lists during reset, lalala */ - if (reset_in_progress(execlists)) + if (reset_in_progress(engine)) return true; if (!execlists->pending[0]) { @@ -1086,7 +1086,7 @@ static void start_timeslice(struct intel_engine_cs *engine) if (needs_timeslice(engine, *el->active)) { if (el->timer.expires) { if (!timer_pending(&el->timer)) - tasklet_hi_schedule(&engine->execlists.tasklet); + i915_sched_kick(&engine->active); return; } @@ -1650,8 +1650,8 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * access. Either we are inside the tasklet, or the tasklet is disabled * and we assume that is only inside the reset paths and so serialised. */ - GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && - !reset_in_progress(execlists)); + GEM_BUG_ON(!tasklet_is_locked(&engine->active.tasklet) && + !reset_in_progress(engine)); GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine)); /* @@ -2048,13 +2048,13 @@ static void execlists_reset(struct intel_engine_cs *engine, const char *msg) ENGINE_TRACE(engine, "reset for %s\n", msg); /* Mark this tasklet as disabled to avoid waiting for it to complete */ - tasklet_disable_nosync(&engine->execlists.tasklet); + tasklet_disable_nosync(&engine->active.tasklet); ring_set_paused(engine, 1); /* Freeze the current request in place */ execlists_capture(engine); intel_engine_reset(engine, msg); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->active.tasklet); clear_and_wake_up_bit(bit, lock); } @@ -2118,8 +2118,10 @@ static void execlists_submission_tasklet(unsigned long data) static void __execlists_kick(struct intel_engine_execlists *execlists) { - /* Kick the tasklet for some interrupt coalescing and reset handling */ - tasklet_hi_schedule(&execlists->tasklet); + struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + + i915_sched_kick(&engine->active); } #define execlists_kick(t, member) \ @@ -2468,10 +2470,8 @@ static int execlists_resume(struct intel_engine_cs *engine) static void execlists_reset_prepare(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - ENGINE_TRACE(engine, "depth<-%d\n", - atomic_read(&execlists->tasklet.count)); + atomic_read(&engine->active.tasklet.count)); /* * Prevent request submission to the hardware until we have @@ -2482,8 +2482,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * Turning off the execlists->tasklet until the reset is over * prevents the race. */ - __tasklet_disable_sync_once(&execlists->tasklet); - GEM_BUG_ON(!reset_in_progress(execlists)); + __tasklet_disable_sync_once(&engine->active.tasklet); + GEM_BUG_ON(!reset_in_progress(engine)); /* * We stop engines, otherwise we might get failed reset and a @@ -2717,8 +2717,8 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) execlists->queue_priority_hint = INT_MIN; engine->active.queue = RB_ROOT_CACHED; - GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); - execlists->tasklet.func = nop_submission_tasklet; + GEM_BUG_ON(__tasklet_is_enabled(&engine->active.tasklet)); + engine->active.tasklet.func = nop_submission_tasklet; spin_unlock_irqrestore(&engine->active.lock, flags); rcu_read_unlock(); @@ -2726,22 +2726,20 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) static void execlists_reset_finish(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - /* * After a GPU reset, we may have requests to replay. Do so now while * we still have the forcewake to be sure that the GPU is not allowed * to sleep before we restart and reload a context. */ - GEM_BUG_ON(!reset_in_progress(execlists)); + GEM_BUG_ON(!reset_in_progress(engine)); GEM_BUG_ON(engine->execlists.pending[0]); /* And kick in case we missed a new request submission. */ - if (__tasklet_enable(&execlists->tasklet)) - __execlists_kick(execlists); + if (__tasklet_enable(&engine->active.tasklet)) + i915_sched_kick(&engine->active); ENGINE_TRACE(engine, "depth->%d\n", - atomic_read(&execlists->tasklet.count)); + atomic_read(&engine->active.tasklet.count)); } static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) @@ -2765,7 +2763,7 @@ static void execlists_park(struct intel_engine_cs *engine) void intel_execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = i915_request_enqueue; - engine->execlists.tasklet.func = execlists_submission_tasklet; + engine->active.tasklet.func = execlists_submission_tasklet; engine->reset.prepare = execlists_reset_prepare; engine->reset.rewind = execlists_reset_rewind; @@ -2800,7 +2798,7 @@ static void execlists_shutdown(struct intel_engine_cs *engine) /* Synchronise with residual timers and any softirq they raise */ del_timer_sync(&engine->execlists.timer); del_timer_sync(&engine->execlists.preempt); - tasklet_kill(&engine->execlists.tasklet); + tasklet_kill(&engine->active.tasklet); } static void execlists_release(struct intel_engine_cs *engine) @@ -2893,7 +2891,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) struct intel_uncore *uncore = engine->uncore; u32 base = engine->mmio_base; - tasklet_init(&engine->execlists.tasklet, + tasklet_init(&engine->active.tasklet, execlists_submission_tasklet, (unsigned long)engine); timer_setup(&engine->execlists.timer, execlists_timeslice, 0); timer_setup(&engine->execlists.preempt, execlists_preempt, 0); @@ -2982,7 +2980,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) * rbtrees as in the case it is running in parallel, it may reinsert * the rb_node into a sibling. */ - tasklet_kill(&ve->base.execlists.tasklet); + tasklet_kill(&ve->base.active.tasklet); /* Decouple ourselves from the siblings, no more access allowed. */ for (n = 0; n < ve->num_siblings; n++) { @@ -3000,7 +2998,7 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) spin_unlock_irq(&sibling->active.lock); } - GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); + GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.active.tasklet)); GEM_BUG_ON(!list_empty(virtual_queue(ve))); lrc_fini(&ve->context); @@ -3212,7 +3210,7 @@ static void virtual_submission_tasklet(unsigned long data) GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); node->prio = prio; if (first && prio > sibling->execlists.queue_priority_hint) - tasklet_hi_schedule(&sibling->execlists.tasklet); + i915_sched_kick(&sibling->active); unlock_engine: spin_unlock_irq(&sibling->active.lock); @@ -3253,7 +3251,7 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(!list_empty(virtual_queue(ve))); list_move_tail(&rq->sched.link, virtual_queue(ve)); - tasklet_hi_schedule(&ve->base.execlists.tasklet); + i915_sched_kick(&ve->base.active); unlock: spin_unlock_irqrestore(&ve->base.active.lock, flags); @@ -3351,7 +3349,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, INIT_LIST_HEAD(virtual_queue(ve)); ve->base.execlists.queue_priority_hint = INT_MIN; - tasklet_init(&ve->base.execlists.tasklet, + tasklet_init(&ve->base.active.tasklet, virtual_submission_tasklet, (unsigned long)ve); @@ -3381,7 +3379,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, * layering if we handle cloning of the requests and * submitting a copy into each backend. */ - if (sibling->execlists.tasklet.func != + if (sibling->active.tasklet.func != execlists_submission_tasklet) { err = -ENODEV; goto err_put; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 9830342aa6f4..2106fb403c3e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -60,7 +60,7 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir) } if (tasklet) - tasklet_hi_schedule(&engine->execlists.tasklet); + i915_sched_kick(&engine->active); } static u32 diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 276743d553a9..291fb04807b9 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -44,7 +44,7 @@ static int wait_for_submit(struct intel_engine_cs *engine, unsigned long timeout) { /* Ignore our own attempts to suppress excess tasklets */ - tasklet_hi_schedule(&engine->execlists.tasklet); + i915_sched_kick(&engine->active); timeout += jiffies; do { @@ -603,9 +603,9 @@ static int live_hold_reset(void *arg) err = -EBUSY; goto out; } - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->active.tasklet); - engine->execlists.tasklet.func(engine->execlists.tasklet.data); + engine->active.tasklet.func(engine->active.tasklet.data); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); i915_request_get(rq); @@ -615,7 +615,7 @@ static int live_hold_reset(void *arg) __intel_engine_reset_bh(engine, NULL); GEM_BUG_ON(rq->fence.error != -EIO); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->active.tasklet); clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); local_bh_enable(); @@ -1174,7 +1174,7 @@ static int live_timeslice_rewind(void *arg) while (i915_request_is_active(rq[A2])) { /* semaphore yield! */ /* Wait for the timeslice to kick in */ del_timer(&engine->execlists.timer); - tasklet_hi_schedule(&engine->execlists.tasklet); + i915_sched_kick(&engine->active); intel_engine_flush_submission(engine); } /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ @@ -4525,9 +4525,9 @@ static int reset_virtual_engine(struct intel_gt *gt, err = -EBUSY; goto out_heartbeat; } - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->active.tasklet); - engine->execlists.tasklet.func(engine->execlists.tasklet.data); + engine->active.tasklet.func(engine->active.tasklet.data); GEM_BUG_ON(execlists_active(&engine->execlists) != rq); /* Fake a preemption event; failed of course */ @@ -4544,7 +4544,7 @@ static int reset_virtual_engine(struct intel_gt *gt, GEM_BUG_ON(rq->fence.error != -EIO); /* Release our grasp on the engine, letting CS flow again */ - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->active.tasklet); clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); local_bh_enable(); diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 927d54c702f4..cc0120411d62 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1568,7 +1568,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, const struct igt_atomic_section *p, const char *mode) { - struct tasklet_struct * const t = &engine->execlists.tasklet; + struct tasklet_struct * const t = &engine->active.tasklet; int err; GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index f449c56e0946..e26f7f957468 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -49,7 +49,7 @@ static int wait_for_submit(struct intel_engine_cs *engine, unsigned long timeout) { /* Ignore our own attempts to suppress excess tasklets */ - tasklet_hi_schedule(&engine->execlists.tasklet); + i915_sched_kick(&engine->active); timeout += jiffies; do { @@ -1608,12 +1608,12 @@ static void garbage_reset(struct intel_engine_cs *engine, local_bh_disable(); if (!test_and_set_bit(bit, lock)) { - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->active.tasklet); if (!rq->fence.error) __intel_engine_reset_bh(engine, NULL); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->active.tasklet); clear_and_wake_up_bit(bit, lock); } local_bh_enable(); diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 5ec8d4e9983f..2a7059923a01 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -321,7 +321,7 @@ static int igt_atomic_engine_reset(void *arg) goto out_unlock; for_each_engine(engine, gt, id) { - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->active.tasklet); intel_engine_pm_get(engine); for (p = igt_atomic_phases; p->name; p++) { @@ -345,8 +345,8 @@ static int igt_atomic_engine_reset(void *arg) } intel_engine_pm_put(engine); - tasklet_enable(&engine->execlists.tasklet); - tasklet_hi_schedule(&engine->execlists.tasklet); + tasklet_enable(&engine->active.tasklet); + tasklet_hi_schedule(&engine->active.tasklet); if (err) break; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 6d97b5ad09b5..b9cb6807d101 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -372,8 +372,6 @@ static void guc_submission_tasklet(unsigned long data) static void guc_reset_prepare(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - ENGINE_TRACE(engine, "\n"); /* @@ -385,7 +383,7 @@ static void guc_reset_prepare(struct intel_engine_cs *engine) * Turning off the execlists->tasklet until the reset is over * prevents the race. */ - __tasklet_disable_sync_once(&execlists->tasklet); + __tasklet_disable_sync_once(&engine->active.tasklet); } static void @@ -508,14 +506,12 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) static void guc_reset_finish(struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - - if (__tasklet_enable(&execlists->tasklet)) + if (__tasklet_enable(&engine->active.tasklet)) /* And kick in case we missed a new request submission. */ - tasklet_hi_schedule(&execlists->tasklet); + tasklet_hi_schedule(&engine->active.tasklet); ENGINE_TRACE(engine, "depth->%d\n", - atomic_read(&execlists->tasklet.count)); + atomic_read(&engine->active.tasklet.count)); } /* @@ -613,7 +609,7 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) */ intel_execlists_set_default_submission(engine); - engine->execlists.tasklet.func = guc_submission_tasklet; + engine->active.tasklet.func = guc_submission_tasklet; /* do not use execlists park/unpark */ engine->park = engine->unpark = NULL; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 61150ba22733..f99d757d4d5c 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -125,6 +125,7 @@ void i915_sched_park_engine(struct i915_sched_engine *se) void i915_sched_fini_engine(struct i915_sched_engine *se) { GEM_BUG_ON(!list_empty(&se->requests)); + tasklet_kill(&se->tasklet); /* flush the callback */ } static void __ipi_add(struct i915_request *rq) @@ -339,7 +340,7 @@ static void kick_submission(struct intel_engine_cs *engine, engine->execlists.queue_priority_hint = prio; if (need_preempt(prio, rq_prio(inflight))) - tasklet_hi_schedule(&engine->execlists.tasklet); + i915_sched_kick(&engine->active); } static void ipi_priority(struct i915_request *rq, int prio) @@ -619,16 +620,17 @@ static bool ancestor_on_hold(const struct intel_engine_cs *engine, void i915_request_enqueue(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; + struct i915_sched_engine *se = &engine->active; unsigned long flags; bool kick = false; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); + spin_lock_irqsave(&se->lock, flags); GEM_BUG_ON(test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags)); if (unlikely(ancestor_on_hold(engine, rq))) { RQ_TRACE(rq, "ancestor on hold\n"); - list_add_tail(&rq->sched.link, &engine->active.hold); + list_add_tail(&rq->sched.link, &se->hold); i915_request_set_hold(rq); } else { queue_request(engine, rq); @@ -639,9 +641,9 @@ void i915_request_enqueue(struct i915_request *rq) } GEM_BUG_ON(list_empty(&rq->sched.link)); - spin_unlock_irqrestore(&engine->active.lock, flags); + spin_unlock_irqrestore(&se->lock, flags); if (kick) - tasklet_hi_schedule(&engine->execlists.tasklet); + i915_sched_kick(se); } struct i915_request * @@ -754,7 +756,7 @@ void __intel_engine_unhold_request(struct intel_engine_cs *engine, if (rq_prio(rq) > engine->execlists.queue_priority_hint) { engine->execlists.queue_priority_hint = rq_prio(rq); - tasklet_hi_schedule(&engine->execlists.tasklet); + i915_sched_kick(&engine->active); } /* diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 6bb9d8c3519a..a68747d682d2 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -74,6 +74,26 @@ i915_sched_is_last_request(const struct i915_sched_engine *se, return list_is_last_rcu(&rq->sched.link, &se->requests); } +static inline void +i915_sched_lock_bh(struct i915_sched_engine *se) +{ + local_bh_disable(); /* prevent local softirq and lock recursion */ + tasklet_lock(&se->tasklet); +} + +static inline void +i915_sched_unlock_bh(struct i915_sched_engine *se) +{ + tasklet_unlock(&se->tasklet); + local_bh_enable(); /* restore softirq, and kick ksoftirqd! */ +} + +static inline void i915_sched_kick(struct i915_sched_engine *se) +{ + /* Kick the tasklet for some interrupt coalescing and reset handling */ + tasklet_hi_schedule(&se->tasklet); +} + void i915_request_show_with_schedule(struct drm_printer *m, const struct i915_request *rq, const char *prefix, diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 1a4cf7e52186..92332fb71b14 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -7,6 +7,7 @@ #ifndef _I915_SCHEDULER_TYPES_H_ #define _I915_SCHEDULER_TYPES_H_ +#include #include #include @@ -101,6 +102,11 @@ struct i915_sched_engine { * @no_priolist: priority lists disabled */ bool no_priolist; + + /** + * @tasklet: softirq tasklet for bottom half + */ + struct tasklet_struct tasklet; }; struct i915_dependency { diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c index 5c5b574af253..5b1dd227e24e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c +++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c @@ -172,12 +172,12 @@ static int __single_chain(struct intel_engine_cs *engine, unsigned long length, } intel_engine_flush_submission(engine); - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->active.tasklet); local_bh_disable(); if (fn(rq, count, count - 1) && !check_context_order(engine)) err = -EINVAL; local_bh_enable(); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->active.tasklet); igt_spinner_end(&spin); err_context: @@ -258,12 +258,12 @@ static int __wide_chain(struct intel_engine_cs *engine, unsigned long width, } intel_engine_flush_submission(engine); - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->active.tasklet); local_bh_disable(); if (fn(rq[i - 1], i, count) && !check_context_order(engine)) err = -EINVAL; local_bh_enable(); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->active.tasklet); igt_spinner_end(&spin); err_free: @@ -348,12 +348,12 @@ static int __inv_chain(struct intel_engine_cs *engine, unsigned long width, } intel_engine_flush_submission(engine); - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->active.tasklet); local_bh_disable(); if (fn(rq[i - 1], i, count) && !check_context_order(engine)) err = -EINVAL; local_bh_enable(); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->active.tasklet); igt_spinner_end(&spin); err_free: @@ -455,12 +455,12 @@ static int __sparse_chain(struct intel_engine_cs *engine, unsigned long width, } intel_engine_flush_submission(engine); - tasklet_disable(&engine->execlists.tasklet); + tasklet_disable(&engine->active.tasklet); local_bh_disable(); if (fn(rq[i - 1], i, count) && !check_context_order(engine)) err = -EINVAL; local_bh_enable(); - tasklet_enable(&engine->execlists.tasklet); + tasklet_enable(&engine->active.tasklet); igt_spinner_end(&spin); err_free: From patchwork Mon Dec 28 15:52:07 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991439 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 26D3EC433E0 for ; Mon, 28 Dec 2020 15:54:26 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id D6B2F206E5 for ; Mon, 28 Dec 2020 15:54:25 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org D6B2F206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 651E289BAF; Mon, 28 Dec 2020 15:54:25 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 9F7C38925D for ; Mon, 28 Dec 2020 15:52:55 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448211-1500050 for multiple; Mon, 28 Dec 2020 15:52:38 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:07 +0000 Message-Id: <20201228155229.9516-32-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 32/54] drm/i915: Replace priolist rbtree with a skiplist X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Replace the priolist rbtree with a skiplist. The crucial difference is that walking and removing the first element of a skiplist is O(1), but O(lgN) for an rbtree, as we need to rebalance on remove. This is a hindrance for submission latency as it occurs between picking a request for the priolist and submitting it to hardware, as well effectively trippling the number of O(lgN) operations required under the irqoff lock. This is critical to reducing the latency jitter with multiple clients. The downsides to skiplists are that lookup/insertion is only probablistically O(lgN) and there is a significant memory penalty to as each skip node is larger than the rbtree equivalent. Furthermore, we don't use dynamic arrays for the skiplist, so the allocation is fixed, and imposes an upper bound on the scalability wrt to the number of inflight requests. Signed-off-by: Chris Wilson --- .../drm/i915/gt/intel_execlists_submission.c | 63 ++--- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 30 +-- drivers/gpu/drm/i915/i915_priolist_types.h | 29 ++- drivers/gpu/drm/i915/i915_scheduler.c | 233 +++++++++++++----- drivers/gpu/drm/i915/i915_scheduler.h | 11 +- drivers/gpu/drm/i915/i915_scheduler_types.h | 2 +- .../drm/i915/selftests/i915_mock_selftests.h | 1 + .../gpu/drm/i915/selftests/i915_scheduler.c | 42 +++- 8 files changed, 294 insertions(+), 117 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 2277958aefd9..27c26fde3021 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -256,11 +256,6 @@ ring_set_paused(const struct intel_engine_cs *engine, int state) wmb(); } -static inline struct i915_priolist *to_priolist(struct rb_node *rb) -{ - return rb_entry(rb, struct i915_priolist, node); -} - static inline int rq_prio(const struct i915_request *rq) { return READ_ONCE(rq->sched.attr.priority); @@ -284,15 +279,31 @@ static int effective_prio(const struct i915_request *rq) return prio; } -static int queue_prio(const struct i915_sched_engine *se) +static struct i915_request *first_request(struct i915_sched_engine *se) { - struct rb_node *rb; + struct i915_priolist *pl; + + for_each_priolist(pl, &se->queue) { + if (likely(!list_empty(&pl->requests))) + return list_first_entry(&pl->requests, + struct i915_request, + sched.link); + + i915_priolist_advance(&se->queue, pl); + } + + return NULL; +} - rb = rb_first_cached(&se->queue); - if (!rb) +static int queue_prio(struct i915_sched_engine *se) +{ + struct i915_request *rq; + + rq = first_request(se); + if (!rq) return INT_MIN; - return to_priolist(rb)->priority; + return rq_prio(rq); } static int virtual_prio(const struct intel_engine_execlists *el) @@ -302,7 +313,7 @@ static int virtual_prio(const struct intel_engine_execlists *el) return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; } -static inline bool need_preempt(const struct intel_engine_cs *engine, +static inline bool need_preempt(struct intel_engine_cs *engine, const struct i915_request *rq) { int last_prio; @@ -1139,6 +1150,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct i915_request ** const last_port = port + execlists->port_mask; struct i915_request *last, * const *active; struct virtual_engine *ve; + struct i915_priolist *pl; struct rb_node *rb; bool submit = false; @@ -1338,11 +1350,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) break; } - while ((rb = rb_first_cached(&engine->active.queue))) { - struct i915_priolist *p = to_priolist(rb); + for_each_priolist(pl, &engine->active.queue) { struct i915_request *rq, *rn; - priolist_for_each_request_consume(rq, rn, p) { + priolist_for_each_request_safe(rq, rn, pl) { bool merge = true; /* @@ -1417,8 +1428,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } } - rb_erase_cached(&p->node, &engine->active.queue); - i915_priolist_free(p); + i915_priolist_advance(&engine->active.queue, pl); } done: *port++ = i915_request_get(last); @@ -2643,6 +2653,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *rq, *rn; + struct i915_priolist *pl; struct rb_node *rb; unsigned long flags; @@ -2673,16 +2684,12 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) intel_engine_signal_breadcrumbs(engine); /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&engine->active.queue))) { - struct i915_priolist *p = to_priolist(rb); - - priolist_for_each_request_consume(rq, rn, p) { + for_each_priolist(pl, &engine->active.queue) { + priolist_for_each_request_safe(rq, rn, pl) { mark_eio(rq); __i915_request_submit(rq); } - - rb_erase_cached(&p->node, &engine->active.queue); - i915_priolist_free(p); + i915_priolist_advance(&engine->active.queue, pl); } GEM_BUG_ON(!i915_sched_is_idle(&engine->active)); @@ -2715,7 +2722,6 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ execlists->queue_priority_hint = INT_MIN; - engine->active.queue = RB_ROOT_CACHED; GEM_BUG_ON(__tasklet_is_enabled(&engine->active.tasklet)); engine->active.tasklet.func = nop_submission_tasklet; @@ -3098,6 +3104,8 @@ static void virtual_context_exit(struct intel_context *ce) for (n = 0; n < ve->num_siblings; n++) intel_engine_pm_put(ve->siblings[n]); + + i915_sched_park_engine(&ve->base.active); } static const struct intel_context_ops virtual_context_ops = { @@ -3508,6 +3516,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, { const struct intel_engine_execlists *execlists = &engine->execlists; struct i915_request *rq, *last; + struct i915_priolist *pl; unsigned long flags; unsigned int count; struct rb_node *rb; @@ -3537,10 +3546,8 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, last = NULL; count = 0; - for (rb = rb_first_cached(&engine->active.queue); rb; rb = rb_next(rb)) { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - - priolist_for_each_request(rq, p) { + for_each_priolist(pl, &engine->active.queue) { + priolist_for_each_request(rq, pl) { if (count++ < max - 1) show_request(m, rq, "\t\t", 0); else diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index b9cb6807d101..34be8dc8949e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -55,11 +55,6 @@ * */ -static inline struct i915_priolist *to_priolist(struct rb_node *rb) -{ - return rb_entry(rb, struct i915_priolist, node); -} - static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id) { struct guc_stage_desc *base = guc->stage_desc_pool_vaddr; @@ -292,8 +287,8 @@ static void __guc_dequeue(struct intel_engine_cs *engine) struct i915_request ** const last_port = first + execlists->port_mask; struct i915_request *last = first[0]; struct i915_request **port; + struct i915_priolist *pl; bool submit = false; - struct rb_node *rb; lockdep_assert_held(&engine->active.lock); @@ -310,11 +305,10 @@ static void __guc_dequeue(struct intel_engine_cs *engine) * event. */ port = first; - while ((rb = rb_first_cached(&engine->active.queue))) { - struct i915_priolist *p = to_priolist(rb); + for_each_priolist(pl, &engine->active.queue) { struct i915_request *rq, *rn; - priolist_for_each_request_consume(rq, rn, p) { + priolist_for_each_request_safe(rq, rn, pl) { if (last && rq->context != last->context) { if (port == last_port) goto done; @@ -330,12 +324,11 @@ static void __guc_dequeue(struct intel_engine_cs *engine) last = rq; } - rb_erase_cached(&p->node, &engine->active.queue); - i915_priolist_free(p); + i915_priolist_advance(&engine->active.queue, pl); } done: execlists->queue_priority_hint = - rb ? to_priolist(rb)->priority : INT_MIN; + pl != &engine->active.queue.sentinel ? pl->priority : INT_MIN; if (submit) { *port = schedule_in(last, port - execlists->inflight); *++port = NULL; @@ -450,7 +443,7 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *rq, *rn; - struct rb_node *rb; + struct i915_priolist *p; unsigned long flags; ENGINE_TRACE(engine, "\n"); @@ -481,25 +474,20 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) } /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&engine->active.queue))) { - struct i915_priolist *p = to_priolist(rb); - - priolist_for_each_request_consume(rq, rn, p) { + for_each_priolist(p, &engine->active.queue) { + priolist_for_each_request_safe(rq, rn, p) { list_del_init(&rq->sched.link); __i915_request_submit(rq); dma_fence_set_error(&rq->fence, -EIO); i915_request_mark_complete(rq); } - - rb_erase_cached(&p->node, &engine->active.queue); - i915_priolist_free(p); + i915_priolist_advance(&engine->active.queue, p); } GEM_BUG_ON(!i915_sched_is_idle(&engine->active)); /* Remaining _unready_ requests will be nop'ed when submitted */ execlists->queue_priority_hint = INT_MIN; - engine->active.queue = RB_ROOT_CACHED; spin_unlock_irqrestore(&engine->active.lock, flags); } diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index bc2fa84f98a8..807cd8f65481 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -38,10 +38,37 @@ enum { #define I915_PRIORITY_UNPREEMPTABLE INT_MAX #define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1) +#ifdef CONFIG_64BIT +#define I915_PRIOLIST_HEIGHT 12 +#else +#define I915_PRIOLIST_HEIGHT 11 +#endif + struct i915_priolist { struct list_head requests; - struct rb_node node; int priority; + + int level; + struct i915_priolist *next[I915_PRIOLIST_HEIGHT]; +}; + +struct i915_priolist_root { + struct i915_priolist sentinel; + u32 prng; }; +#define priolist_first(root) ((root)->sentinel.next[0]) +#define i915_priolist_is_empty(root) (priolist_first(root) == &(root)->sentinel) + +#define for_each_priolist(p, root) \ + for ((p) = priolist_first(root); \ + (p) != &(root)->sentinel; \ + (p) = (p)->next[0]) + +#define priolist_for_each_request(it, plist) \ + list_for_each_entry(it, &(plist)->requests, sched.link) + +#define priolist_for_each_request_safe(it, n, plist) \ + list_for_each_entry_safe(it, n, &(plist)->requests, sched.link) + #endif /* _I915_PRIOLIST_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index f99d757d4d5c..fa6262f92afd 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -4,7 +4,9 @@ * Copyright © 2018 Intel Corporation */ +#include #include +#include #include "gt/intel_ring.h" #include "gt/intel_lrc_reg.h" @@ -91,15 +93,23 @@ static void i915_sched_init_ipi(struct i915_sched_ipi *ipi) ipi->list = NULL; } +static void init_priolist(struct i915_priolist_root *const root) +{ + struct i915_priolist *pl = &root->sentinel; + + memset_p((void **)pl->next, pl, ARRAY_SIZE(pl->next)); + pl->priority = INT_MIN; +} + void i915_sched_init_engine(struct i915_sched_engine *se, unsigned int subclass) { spin_lock_init(&se->lock); lockdep_set_subclass(&se->lock, subclass); + init_priolist(&se->queue); INIT_LIST_HEAD(&se->requests); INIT_LIST_HEAD(&se->hold); - se->queue = RB_ROOT_CACHED; i915_sched_init_ipi(&se->ipi); @@ -116,8 +126,57 @@ void i915_sched_init_engine(struct i915_sched_engine *se, #endif } +__maybe_unused static bool priolist_idle(struct i915_priolist_root *root) +{ + struct i915_priolist *pl = &root->sentinel; + int lvl; + + for (lvl = 0; lvl < ARRAY_SIZE(pl->next); lvl++) { + if (pl->next[lvl] != pl) { + GEM_TRACE_ERR("root[%d] is not empty\n", lvl); + return false; + } + } + + if (pl->level) { + GEM_TRACE_ERR("root is not clear: %d\n", pl->level); + return false; + } + + return true; +} + +static void pl_push(struct i915_priolist *pl, struct list_head *head) +{ + pl->requests.next = head->next; + head->next = &pl->requests; +} + +static struct i915_priolist *pl_pop(struct list_head *head) +{ + struct i915_priolist *pl; + + pl = container_of(head->next, typeof(*pl), requests); + head->next = pl->requests.next; + + return pl; +} + +static bool pl_empty(struct list_head *head) +{ + return !head->next; +} + void i915_sched_park_engine(struct i915_sched_engine *se) { + struct i915_priolist_root *root = &se->queue; + struct list_head *list = &root->sentinel.requests; + + GEM_BUG_ON(!priolist_idle(root)); + + while (!pl_empty(list)) + kmem_cache_free(global.slab_priorities, pl_pop(list)); + GEM_BUG_ON(!i915_sched_is_idle(se)); se->no_priolist = false; } @@ -183,71 +242,55 @@ static inline bool node_signaled(const struct i915_sched_node *node) return i915_request_completed(node_to_request(node)); } -static inline struct i915_priolist *to_priolist(struct rb_node *rb) -{ - return rb_entry(rb, struct i915_priolist, node); -} - -static void assert_priolists(struct i915_sched_engine * const se) +static inline unsigned int random_level(struct i915_priolist_root *root) { - struct rb_node *rb; - long last_prio; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - return; - - GEM_BUG_ON(rb_first_cached(&se->queue) != - rb_first(&se->queue.rb_root)); - - last_prio = INT_MAX; - for (rb = rb_first_cached(&se->queue); rb; rb = rb_next(rb)) { - const struct i915_priolist *p = to_priolist(rb); - - GEM_BUG_ON(p->priority > last_prio); - last_prio = p->priority; - } + root->prng = next_pseudo_random32(root->prng); + return min_t(int, __ffs(root->prng) / 2, I915_PRIOLIST_HEIGHT - 1); } static struct list_head * lookup_priolist(struct intel_engine_cs *engine, int prio) { + struct i915_priolist *update[I915_PRIOLIST_HEIGHT]; struct i915_sched_engine * const se = &engine->active; - struct i915_priolist *p; - struct rb_node **parent, *rb; - bool first = true; - - lockdep_assert_held(&engine->active.lock); - assert_priolists(se); + struct i915_priolist_root *root = &se->queue; + struct i915_priolist *pl, *tmp; + int lvl; + lockdep_assert_held(&se->lock); if (unlikely(se->no_priolist)) prio = I915_PRIORITY_NORMAL; -find_priolist: - /* most positive priority is scheduled first, equal priorities fifo */ - rb = NULL; - parent = &se->queue.rb_root.rb_node; - while (*parent) { - rb = *parent; - p = to_priolist(rb); - if (prio > p->priority) { - parent = &rb->rb_left; - } else if (prio < p->priority) { - parent = &rb->rb_right; - first = false; - } else { - return &p->requests; - } + for_each_priolist(pl, root) { /* recycle any empty elements before us */ + if (pl->priority >= prio || !list_empty(&pl->requests)) + break; + + i915_priolist_advance(root, pl); } +find_priolist: + pl = &root->sentinel; + lvl = pl->level; + do { + while (tmp = pl->next[lvl], tmp->priority >= prio) + pl = tmp; + if (pl->priority == prio) + goto out; + update[lvl] = pl; + } while (--lvl >= 0); + if (prio == I915_PRIORITY_NORMAL) { - p = &se->default_priolist; + pl = &se->default_priolist; + } else if (!pl_empty(&root->sentinel.requests)) { + pl = pl_pop(&root->sentinel.requests); } else { - p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC); + pl = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC); /* Convert an allocation failure to a priority bump */ - if (unlikely(!p)) { + if (unlikely(!pl)) { prio = I915_PRIORITY_NORMAL; /* recurses just once */ - /* To maintain ordering with all rendering, after an + /* + * To maintain ordering with all rendering, after an * allocation failure we have to disable all scheduling. * Requests will then be executed in fifo, and schedule * will ensure that dependencies are emitted in fifo. @@ -260,18 +303,91 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) } } - p->priority = prio; - INIT_LIST_HEAD(&p->requests); + pl->priority = prio; + INIT_LIST_HEAD(&pl->requests); + + lvl = random_level(root); + if (lvl > root->sentinel.level) { + lvl = ++root->sentinel.level; + update[lvl] = &root->sentinel; + } + + pl->level = lvl; + do { + tmp = update[lvl]; + pl->next[lvl] = update[lvl]->next[lvl]; + tmp->next[lvl] = pl; + } while (--lvl >= 0); + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) { + struct i915_priolist *chk; + + chk = &root->sentinel; + lvl = chk->level; + do { + while (tmp = chk->next[lvl], tmp->priority >= prio) + chk = tmp; + } while (--lvl >= 0); + + GEM_BUG_ON(chk != pl); + } - rb_link_node(&p->node, rb, parent); - rb_insert_color_cached(&p->node, &se->queue, first); +out: + GEM_BUG_ON(pl == &root->sentinel); + return &pl->requests; +} - return &p->requests; +static void remove_priolist(struct intel_engine_cs *engine, + struct list_head *plist) +{ + struct i915_sched_engine * const se = &engine->active; + struct i915_priolist_root *root = &se->queue; + struct i915_priolist *pl, *tmp; + struct i915_priolist *old = + container_of(plist, struct i915_priolist, requests); + int prio = old->priority; + int lvl; + + lockdep_assert_held(&se->lock); + GEM_BUG_ON(!list_empty(plist)); + + pl_push(old, &root->sentinel.requests); + + pl = &root->sentinel; + lvl = pl->level; + do { + while (tmp = pl->next[lvl], tmp->priority > prio) + pl = tmp; + if (lvl <= old->level) { + pl->next[lvl] = old->next[lvl]; + if (pl == &root->sentinel && old->next[lvl] == pl) + pl->level--; + } + } while (--lvl >= 0); + GEM_BUG_ON(&tmp->requests != plist); } -void __i915_priolist_free(struct i915_priolist *p) +void i915_priolist_advance(struct i915_priolist_root *root, + struct i915_priolist *pl) { - kmem_cache_free(global.slab_priorities, p); + int lvl; + + GEM_BUG_ON(!list_empty(&pl->requests)); + GEM_BUG_ON(pl == &root->sentinel); + + if (pl->priority != I915_PRIORITY_NORMAL) + pl_push(pl, &root->sentinel.requests); + + for (lvl = 0; lvl <= pl->level; lvl++) + root->sentinel.next[lvl] = pl->next[lvl]; + + if (pl->level > 0 && pl->level == root->sentinel.level) { + pl = &root->sentinel; + lvl = pl->level; + while (lvl > 0 && pl->next[lvl] == pl) + lvl--; + pl->level = lvl; + } } static struct i915_request * @@ -420,8 +536,13 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio) continue; GEM_BUG_ON(rq->engine != engine); - if (i915_request_in_priority_queue(rq)) + if (i915_request_in_priority_queue(rq)) { + struct list_head *prev = rq->sched.link.prev; + list_move_tail(&rq->sched.link, plist); + if (list_empty(prev)) + remove_priolist(engine, prev); + } /* Defer (tasklet) submission until after all updates. */ kick_submission(engine, rq, prio); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index a68747d682d2..4432aab230b6 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -16,12 +16,6 @@ struct drm_printer; -#define priolist_for_each_request(it, plist) \ - list_for_each_entry(it, &(plist)->requests, sched.link) - -#define priolist_for_each_request_consume(it, n, plist) \ - list_for_each_entry_safe(it, n, &(plist)->requests, sched.link) - void i915_sched_node_init(struct i915_sched_node *node); void i915_sched_node_reinit(struct i915_sched_node *node); @@ -64,7 +58,7 @@ static inline void i915_priolist_free(struct i915_priolist *p) static inline bool i915_sched_is_idle(const struct i915_sched_engine *se) { - return RB_EMPTY_ROOT(&se->queue.rb_root); + return i915_priolist_is_empty(&se->queue); } static inline bool @@ -94,6 +88,9 @@ static inline void i915_sched_kick(struct i915_sched_engine *se) tasklet_hi_schedule(&se->tasklet); } +void i915_priolist_advance(struct i915_priolist_root *root, + struct i915_priolist *old); + void i915_request_show_with_schedule(struct drm_printer *m, const struct i915_request *rq, const char *prefix, diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 92332fb71b14..c155e4faa30b 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -89,7 +89,7 @@ struct i915_sched_engine { /** * @queue: queue of requests, in priority lists */ - struct rb_root_cached queue; + struct i915_priolist_root queue; struct i915_sched_ipi ipi; diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 3db34d3eea58..946c93441c1f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -25,6 +25,7 @@ selftest(ring, intel_ring_mock_selftests) selftest(engine, intel_engine_cs_mock_selftests) selftest(timelines, intel_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) +selftest(scheduler, i915_scheduler_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(phys, i915_gem_phys_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c index 5b1dd227e24e..a851a156d460 100644 --- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c +++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c @@ -12,6 +12,43 @@ #include "selftests/igt_spinner.h" #include "selftests/i915_random.h" +static int mock_skiplist_levels(void *dummy) +{ + struct i915_priolist_root root = {}; + struct i915_priolist *pl = &root.sentinel; + int count, lvl; + + for (count = 0; count < 16384; count++) { + lvl = random_level(&root); + if (lvl > pl->level) + lvl = ++pl->level; + + pl->next[lvl] = ptr_inc(pl->next[lvl]); + } + + for (lvl = 0; lvl <= pl->level; lvl++) { + int x = ilog2((unsigned long)pl->next[lvl]); + char row[80]; + + memset(row, '*', x); + row[x] = '\0'; + + pr_info("[%2d] %5lu %s\n", + lvl, (unsigned long)pl->next[lvl], row); + } + + return 0; +} + +int i915_scheduler_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(mock_skiplist_levels), + }; + + return i915_subtests(tests, NULL); +} + static void scheduling_disable(struct intel_engine_cs *engine) { engine->props.preempt_timeout_ms = 0; @@ -80,9 +117,9 @@ static int all_engines(struct drm_i915_private *i915, static bool check_context_order(struct intel_engine_cs *engine) { u64 last_seqno, last_context; + struct i915_priolist *p; unsigned long count; bool result = false; - struct rb_node *rb; int last_prio; /* We expect the execution order to follow ascending fence-context */ @@ -92,8 +129,7 @@ static bool check_context_order(struct intel_engine_cs *engine) last_context = 0; last_seqno = 0; last_prio = 0; - for (rb = rb_first_cached(&engine->active.queue); rb; rb = rb_next(rb)) { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + for_each_priolist(p, &engine->active.queue) { struct i915_request *rq; priolist_for_each_request(rq, p) { From patchwork Mon Dec 28 15:52:08 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991361 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id A06B9C433DB for ; Mon, 28 Dec 2020 15:53:09 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 61529206E5 for ; Mon, 28 Dec 2020 15:53:09 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 61529206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 44A4B89A60; Mon, 28 Dec 2020 15:52:58 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 88E648999C for ; Mon, 28 Dec 2020 15:52:49 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448212-1500050 for multiple; Mon, 28 Dec 2020 15:52:38 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:08 +0000 Message-Id: <20201228155229.9516-33-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 33/54] drm/i915: Wrap cmpxchg64 with try_cmpxchg64() helper X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Wrap cmpxchg64 with a try_cmpxchg()-esque helper. Hiding the old-value dance in the helper allows for cleaner code. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_utils.h | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 54773371e6bd..f8addd6d5afa 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -456,4 +456,36 @@ static inline bool timer_expired(const struct timer_list *t) */ #define IS_ACTIVE(config) ((config) != 0) +#ifndef try_cmpxchg64 +#if IS_ENABLED(CONFIG_64BIT) +#define try_cmpxchg64(_ptr, _pold, _new) try_cmpxchg(_ptr, _pold, _new) +#else +#define try_cmpxchg64(_ptr, _pold, _new) \ +({ \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __cur = cmpxchg64(_ptr, __old, _new); \ + bool success = __cur == __old; \ + if (unlikely(!success)) \ + *_old = __cur; \ + likely(success); \ +}) +#endif +#endif + +#ifndef xchg64 +#if IS_ENABLED(CONFIG_64BIT) +#define xchg64(_ptr, _new) xchg(_ptr, _new) +#else +#define xchg64(_ptr, _new) \ +({ \ + __typeof__(_ptr) __ptr = (_ptr); \ + __typeof__(*(_ptr)) __old = *__ptr; \ + while (!try_cmpxchg64(__ptr, &__old, (_new))) \ + ; \ + __old; \ +}) +#endif +#endif + #endif /* !__I915_UTILS_H */ From patchwork Mon Dec 28 15:52:09 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991375 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id BD371C433E6 for ; Mon, 28 Dec 2020 15:53:11 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 8ABE922B2A for ; Mon, 28 Dec 2020 15:53:11 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 8ABE922B2A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 9165F899F2; Mon, 28 Dec 2020 15:52:56 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id E006A899E7 for ; Mon, 28 Dec 2020 15:52:49 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448213-1500050 for multiple; Mon, 28 Dec 2020 15:52:38 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:09 +0000 Message-Id: <20201228155229.9516-34-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 34/54] drm/i915: Fair low-latency scheduling X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" The first "scheduler" was a topographical sorting of requests into priority order. The execution order was deterministic, the earliest submitted, highest priority request would be executed first. Priority inheritance ensured that inversions were kept at bay, and allowed us to dynamically boost priorities (e.g. for interactive pageflips). The minimalistic timeslicing scheme was an attempt to introduce fairness between long running requests, by evicting the active request at the end of a timeslice and moving it to the back of its priority queue (while ensuring that dependencies were kept in order). For short running requests from many clients of equal priority, the scheme is still very much FIFO submission ordering, and as unfair as before. To impose fairness, we need an external metric that ensures that clients are interpersed, so we don't execute one long chain from client A before executing any of client B. This could be imposed by the clients themselves by using fences based on an external clock, that is they only submit work for a "frame" at frame-intervals, instead of submitting as much work as they are able to. The standard SwapBuffers approach is akin to double bufferring, where as one frame is being executed, the next is being submitted, such that there is always a maximum of two frames per client in the pipeline and so ideally maintains consistent input-output latency. Even this scheme exhibits unfairness under load as a single client will execute two frames back to back before the next, and with enough clients, deadlines will be missed. The idea introduced by BFS/MuQSS is that fairness is introduced by metering with an external clock. Every request, when it becomes ready to execute is assigned a virtual deadline, and execution order is then determined by earliest deadline. Priority is used as a hint, rather than strict ordering, where high priority requests have earlier deadlines, but not necessarily earlier than outstanding work. Thus work is executed in order of 'readiness', with timeslicing to demote long running work. The Achille's heel of this scheduler is its strong preference for low-latency and favouring of new queues. Whereas it was easy to dominate the old scheduler by flooding it with many requests over a short period of time, the new scheduler can be dominated by a 'synchronous' client that waits for each of its requests to complete before submitting the next. As such a client has no history, it is always considered ready-to-run and receives an earlier deadline than the long running requests. This is compensated for by refreshing the current execution's deadline and by disallowing preemption for timeslice shuffling. To check the impact on throughput (often the downfall of latency sensitive schedulers), we used gem_wsim to simulate various transcode workloads with different load balancers, and varying the number of competing [heterogenous] clients. +delta%------------------------------------------------------------------+ | a | | a | | aa | | aa | | aa | | aa | | aaa | | aaaa | | a aaaaa | | a aaaaaa | |a aa a aaaaaaaaaa aa a a| | A_| | +------------------------------------------------------------------------+ N Min Max Median Avg Stddev 108 -23.982194 28.421527 -0.077474828 -0.072650418 0.16179718 The impact was on average 0.1% under contention due to the change in context execution order and number of context switches. The biggest swings are due to the execution ordering favouring one client or another, and maybe room for improvement. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 - .../gpu/drm/i915/gt/intel_engine_heartbeat.c | 1 + drivers/gpu/drm/i915/gt/intel_engine_pm.c | 4 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 14 - .../drm/i915/gt/intel_execlists_submission.c | 205 ++++----- drivers/gpu/drm/i915/gt/selftest_execlists.c | 30 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 5 +- drivers/gpu/drm/i915/gt/selftest_lrc.c | 1 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 - drivers/gpu/drm/i915/i915_priolist_types.h | 7 +- drivers/gpu/drm/i915/i915_request.c | 14 +- drivers/gpu/drm/i915/i915_scheduler.c | 429 +++++++++++++----- drivers/gpu/drm/i915/i915_scheduler.h | 18 +- drivers/gpu/drm/i915/i915_scheduler_types.h | 23 + drivers/gpu/drm/i915/selftests/i915_request.c | 1 + .../gpu/drm/i915/selftests/i915_scheduler.c | 136 ++++++ 16 files changed, 632 insertions(+), 262 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index eb429a90518e..4854a8d15ef4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -591,8 +591,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine) memset(execlists->pending, 0, sizeof(execlists->pending)); execlists->active = memset(execlists->inflight, 0, sizeof(execlists->inflight)); - - execlists->queue_priority_hint = INT_MIN; } static void cleanup_status_page(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index bccbb932a315..495e8d5e2bf4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -205,6 +205,7 @@ static int __intel_engine_pulse(struct intel_engine_cs *engine) if (IS_ERR(rq)) return PTR_ERR(rq); + rq->sched.deadline = 0; __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); heartbeat_commit(rq, &attr); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 14378e3a7a50..511a379c794d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -212,6 +212,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) i915_request_add_active_barriers(rq); /* Install ourselves as a preemption barrier */ + rq->sched.deadline = 0; rq->sched.attr.priority = I915_PRIORITY_BARRIER; if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */ /* @@ -272,9 +273,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_breadcrumbs_park(engine->breadcrumbs); - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); - if (engine->park) engine->park(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 694d2e04ba6a..824a187b2803 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -224,20 +224,6 @@ struct intel_engine_execlists { */ unsigned int port_mask; - /** - * @queue_priority_hint: Highest pending priority. - * - * When we add requests into the queue, or adjust the priority of - * executing requests, we compute the maximum priority of those - * pending requests. We can then use this value to determine if - * we need to preempt the executing requests to service the queue. - * However, since the we may have recorded the priority of an inflight - * request we wanted to preempt but since completed, at the time of - * dequeuing the priority hint may no longer may match the highest - * available request priority. - */ - int queue_priority_hint; - struct rb_root_cached virtual; /** diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 27c26fde3021..cae29268ccea 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -178,7 +178,7 @@ struct virtual_engine { */ struct ve_node { struct rb_node rb; - int prio; + u64 deadline; } nodes[I915_NUM_ENGINES]; /* @@ -258,25 +258,12 @@ ring_set_paused(const struct intel_engine_cs *engine, int state) static inline int rq_prio(const struct i915_request *rq) { - return READ_ONCE(rq->sched.attr.priority); + return rq->sched.attr.priority; } -static int effective_prio(const struct i915_request *rq) +static inline u64 rq_deadline(const struct i915_request *rq) { - int prio = rq_prio(rq); - - /* - * If this request is special and must not be interrupted at any - * cost, so be it. Note we are only checking the most recent request - * in the context and so may be masking an earlier vip request. It - * is hoped that under the conditions where nopreempt is used, this - * will not matter (i.e. all requests to that context will be - * nopreempt for as long as desired). - */ - if (i915_request_has_nopreempt(rq)) - prio = I915_PRIORITY_UNPREEMPTABLE; - - return prio; + return rq->sched.deadline; } static struct i915_request *first_request(struct i915_sched_engine *se) @@ -295,61 +282,61 @@ static struct i915_request *first_request(struct i915_sched_engine *se) return NULL; } -static int queue_prio(struct i915_sched_engine *se) +static struct i915_request *first_virtual(const struct intel_engine_cs *engine) { - struct i915_request *rq; + struct rb_node *rb; - rq = first_request(se); - if (!rq) - return INT_MIN; + rb = rb_first_cached(&engine->execlists.virtual); + if (!rb) + return NULL; - return rq_prio(rq); + return READ_ONCE(rb_entry(rb, + struct virtual_engine, + nodes[engine->id].rb)->request); } -static int virtual_prio(const struct intel_engine_execlists *el) +static const struct i915_request * +next_elsp_request(struct intel_engine_cs *engine, const struct i915_request *rq) { - struct rb_node *rb = rb_first_cached(&el->virtual); + if (list_is_last(&rq->sched.link, &engine->active.requests)) + return NULL; - return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; + return list_next_entry(rq, sched.link); +} + +static inline bool +dl_before(const struct i915_request *next, const struct i915_request *prev) +{ + return !prev || (next && rq_deadline(next) < rq_deadline(prev)); } static inline bool need_preempt(struct intel_engine_cs *engine, const struct i915_request *rq) { - int last_prio; + const struct i915_request *first = NULL; + const struct i915_request *next; if (!intel_engine_has_semaphores(engine)) return false; /* - * Check if the current priority hint merits a preemption attempt. - * - * We record the highest value priority we saw during rescheduling - * prior to this dequeue, therefore we know that if it is strictly - * less than the current tail of ESLP[0], we do not need to force - * a preempt-to-idle cycle. - * - * However, the priority hint is a mere hint that we may need to - * preempt. If that hint is stale or we may be trying to preempt - * ourselves, ignore the request. - * - * More naturally we would write - * prio >= max(0, last); - * except that we wish to prevent triggering preemption at the same - * priority level: the task that is running should remain running - * to preserve FIFO ordering of dependencies. + * If this request is special and must not be interrupted at any + * cost, so be it. Note we are only checking the most recent request + * in the context and so may be masking an earlier vip request. It + * is hoped that under the conditions where nopreempt is used, this + * will not matter (i.e. all requests to that context will be + * nopreempt for as long as desired). */ - last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); - if (engine->execlists.queue_priority_hint <= last_prio) + if (i915_request_has_nopreempt(rq)) return false; /* * Check against the first request in ELSP[1], it will, thanks to the * power of PI, be the highest priority of that context. */ - if (!list_is_last(&rq->sched.link, &engine->active.requests) && - rq_prio(list_next_entry(rq, sched.link)) > last_prio) - return true; + next = next_elsp_request(engine, rq); + if (dl_before(next, first)) + first = next; /* * If the inflight context did not trigger the preemption, then maybe @@ -361,8 +348,31 @@ static inline bool need_preempt(struct intel_engine_cs *engine, * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same * context, it's priority would not exceed ELSP[0] aka last_prio. */ - return max(virtual_prio(&engine->execlists), - queue_prio(&engine->active)) > last_prio; + next = first_request(&engine->active); + if (dl_before(next, first)) + first = next; + + next = first_virtual(engine); + if (dl_before(next, first)) + first = next; + + if (!dl_before(first, rq)) + return false; + + /* + * While a request may have been queued that has an earlier deadline + * than is currently running, we only allow it to perform an urgent + * preemption if it also has higher priority. The cost of frequently + * switching between contexts is noticeable, so we try to keep + * the deadline shuffling only to timeslice boundaries. + */ + ENGINE_TRACE(engine, + "preempt for first=%llx:%llu, dl=%llu, prio=%d?\n", + first->fence.context, + first->fence.seqno, + rq_deadline(first), + rq_prio(first)); + return rq_prio(first) > max(rq_prio(rq), I915_PRIORITY_NORMAL - 1); } __maybe_unused static inline bool @@ -379,7 +389,7 @@ assert_priority_queue(const struct i915_request *prev, if (i915_request_is_active(prev)) return true; - return rq_prio(prev) >= rq_prio(next); + return rq_deadline(prev) <= rq_deadline(next); } static inline void @@ -565,9 +575,12 @@ static inline void __execlists_schedule_out(struct i915_request *rq) * If we have just completed this context, the engine may now be * idle and we want to re-enter powersaving. */ - if (list_is_last_rcu(&rq->link, &ce->timeline->requests) && - __i915_request_is_complete(rq)) - intel_engine_add_retire(engine, ce->timeline); + if (__i915_request_is_complete(rq)) { + if (!list_is_last_rcu(&rq->link, &ce->timeline->requests)) + i915_request_update_deadline(list_next_entry(rq, link)); + else + intel_engine_add_retire(engine, ce->timeline); + } ccid = ce->lrc.ccid; ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; @@ -681,14 +694,14 @@ dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) if (!rq) return ""; - snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d", + snprintf(buf, buflen, "%sccid:%x %llx:%lld%s dl:%llu", prefix, rq->context->lrc.ccid, rq->fence.context, rq->fence.seqno, __i915_request_is_complete(rq) ? "!" : __i915_request_has_started(rq) ? "*" : "", - rq_prio(rq)); + rq_deadline(rq)); return buf; } @@ -1196,11 +1209,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (need_preempt(engine, last)) { ENGINE_TRACE(engine, - "preempting last=%llx:%lld, prio=%d, hint=%d\n", + "preempting last=%llx:%llu, dl=%llu, prio=%d\n", last->fence.context, last->fence.seqno, - last->sched.attr.priority, - execlists->queue_priority_hint); + rq_deadline(last), + rq_prio(last)); record_preemption(execlists); /* @@ -1222,11 +1235,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last = NULL; } else if (timeslice_expired(engine, last)) { ENGINE_TRACE(engine, - "expired:%s last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", + "expired:%s last=%llx:%llu, deadline=%llu, now=%llu, yield?=%s\n", yesno(timer_expired(&execlists->timer)), last->fence.context, last->fence.seqno, - rq_prio(last), - execlists->queue_priority_hint, + rq_deadline(last), + i915_sched_to_ticks(ktime_get()), yesno(timeslice_yield(execlists, last))); cancel_timer(&execlists->timer); @@ -1282,7 +1295,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(rq->engine != &ve->base); GEM_BUG_ON(rq->context != &ve->context); - if (unlikely(rq_prio(rq) < queue_prio(&engine->active))) { + if (!dl_before(rq, first_request(&engine->active))) { spin_unlock(&ve->base.active.lock); break; } @@ -1296,16 +1309,15 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } ENGINE_TRACE(engine, - "virtual rq=%llx:%lld%s, new engine? %s\n", + "virtual rq=%llx:%lld%s, dl %llx, new engine? %s\n", rq->fence.context, rq->fence.seqno, __i915_request_is_complete(rq) ? "!" : __i915_request_has_started(rq) ? "*" : "", + rq_deadline(rq), yesno(engine != ve->siblings[0])); - WRITE_ONCE(ve->request, NULL); - WRITE_ONCE(ve->base.execlists.queue_priority_hint, INT_MIN); rb = &ve->nodes[engine->id].rb; rb_erase_cached(rb, &execlists->virtual); @@ -1396,6 +1408,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (rq->execution_mask != engine->mask) goto done; + if (unlikely(dl_before(first_virtual(engine), + rq))) + goto done; + /* * If GVT overrides us we only ever submit * port[0], leaving port[1] empty. Note that we @@ -1432,24 +1448,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } done: *port++ = i915_request_get(last); - - /* - * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. - * - * We choose the priority hint such that if we add a request of greater - * priority than this, we kick the submission tasklet to decide on - * the right order of submitting the requests to hardware. We must - * also be prepared to reorder requests as they are in-flight on the - * HW. We derive the priority hint then as the first "hole" in - * the HW submission ports and if there are no available slots, - * the priority of the lowest executing request, i.e. last. - * - * When we do receive a higher priority request ready to run from the - * user, see queue_request(), the priority hint is bumped to that - * request triggering preemption on the next dequeue (or subsequent - * interrupt for secondary ports). - */ - execlists->queue_priority_hint = queue_prio(&engine->active); spin_unlock(&engine->active.lock); /* @@ -2643,10 +2641,6 @@ static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) static void nop_submission_tasklet(unsigned long data) { - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - - /* The driver is wedged; don't process any more events. */ - WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); } static void execlists_reset_cancel(struct intel_engine_cs *engine) @@ -2713,16 +2707,12 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) rq->engine = engine; __i915_request_submit(rq); i915_request_put(rq); - - ve->base.execlists.queue_priority_hint = INT_MIN; } spin_unlock(&ve->base.active.lock); } /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority_hint = INT_MIN; - GEM_BUG_ON(__tasklet_is_enabled(&engine->active.tasklet)); engine->active.tasklet.func = nop_submission_tasklet; @@ -3122,7 +3112,8 @@ static const struct intel_context_ops virtual_context_ops = { .destroy = virtual_context_destroy, }; -static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) +static intel_engine_mask_t +virtual_submission_mask(struct virtual_engine *ve, u64 *deadline) { struct i915_request *rq; intel_engine_mask_t mask; @@ -3139,9 +3130,11 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) mask = ve->siblings[0]->mask; } - ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", + *deadline = rq_deadline(rq); + + ENGINE_TRACE(&ve->base, "rq=%llx:%llu, mask=%x, dl=%llu\n", rq->fence.context, rq->fence.seqno, - mask, ve->base.execlists.queue_priority_hint); + mask, *deadline); return mask; } @@ -3149,12 +3142,12 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) static void virtual_submission_tasklet(unsigned long data) { struct virtual_engine * const ve = (struct virtual_engine *)data; - const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); intel_engine_mask_t mask; + u64 deadline; unsigned int n; rcu_read_lock(); - mask = virtual_submission_mask(ve); + mask = virtual_submission_mask(ve, &deadline); rcu_read_unlock(); if (unlikely(!mask)) return; @@ -3187,7 +3180,8 @@ static void virtual_submission_tasklet(unsigned long data) */ first = rb_first_cached(&sibling->execlists.virtual) == &node->rb; - if (prio == node->prio || (prio > node->prio && first)) + if (deadline == node->deadline || + (deadline < node->deadline && first)) goto submit_engine; rb_erase_cached(&node->rb, &sibling->execlists.virtual); @@ -3201,7 +3195,7 @@ static void virtual_submission_tasklet(unsigned long data) rb = *parent; other = rb_entry(rb, typeof(*other), rb); - if (prio > other->prio) { + if (deadline < other->deadline) { parent = &rb->rb_left; } else { parent = &rb->rb_right; @@ -3216,8 +3210,8 @@ static void virtual_submission_tasklet(unsigned long data) submit_engine: GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); - node->prio = prio; - if (first && prio > sibling->execlists.queue_priority_hint) + node->deadline = deadline; + if (first) i915_sched_kick(&sibling->active); unlock_engine: @@ -3253,7 +3247,9 @@ static void virtual_submit_request(struct i915_request *rq) i915_request_put(ve->request); } - ve->base.execlists.queue_priority_hint = rq_prio(rq); + rq->sched.deadline = + min(rq->sched.deadline, + i915_scheduler_next_virtual_deadline(rq_prio(rq))); ve->request = i915_request_get(rq); GEM_BUG_ON(!list_empty(virtual_queue(ve))); @@ -3356,7 +3352,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.bond_execute = virtual_bond_execute; INIT_LIST_HEAD(virtual_queue(ve)); - ve->base.execlists.queue_priority_hint = INT_MIN; tasklet_init(&ve->base.active.tasklet, virtual_submission_tasklet, (unsigned long)ve); @@ -3540,10 +3535,6 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\t", 0); } - if (execlists->queue_priority_hint != INT_MIN) - drm_printf(m, "\t\tQueue priority hint: %d\n", - READ_ONCE(execlists->queue_priority_hint)); - last = NULL; count = 0; for_each_priolist(pl, &engine->active.queue) { diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 291fb04807b9..bd170ebc7e49 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -868,7 +868,7 @@ semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) static int release_queue(struct intel_engine_cs *engine, struct i915_vma *vma, - int idx, int prio) + int idx, u64 deadline) { struct i915_request *rq; u32 *cs; @@ -893,10 +893,7 @@ release_queue(struct intel_engine_cs *engine, i915_request_get(rq); i915_request_add(rq); - local_bh_disable(); - i915_request_set_priority(rq, prio); - local_bh_enable(); /* kick tasklet */ - + i915_request_set_deadline(rq, deadline); i915_request_put(rq); return 0; @@ -910,6 +907,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer, struct intel_engine_cs *engine; struct i915_request *head; enum intel_engine_id id; + long timeout; int err, i, n = 0; head = semaphore_queue(outer, vma, n++); @@ -930,12 +928,16 @@ slice_semaphore_queue(struct intel_engine_cs *outer, } } - err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); + err = release_queue(outer, vma, n, 0); if (err) goto out; - if (i915_request_wait(head, 0, - 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) { + /* Expected number of pessimal slices required */ + timeout = outer->gt->info.num_engines * (count + 2) * (count + 3); + timeout *= 4; /* safety factor, including bucketing */ + timeout += HZ / 2; /* and include the request completion */ + + if (i915_request_wait(head, 0, timeout) < 0) { pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", count, n); GEM_TRACE_DUMP(); @@ -1040,6 +1042,8 @@ create_rewinder(struct intel_context *ce, err = i915_request_await_dma_fence(rq, &wait->fence); if (err) goto err; + + i915_request_set_deadline(rq, rq_deadline(wait)); } cs = intel_ring_begin(rq, 14); @@ -1316,6 +1320,7 @@ static int live_timeslice_queue(void *arg) goto err_heartbeat; } i915_request_set_priority(rq, I915_PRIORITY_MAX); + i915_request_set_deadline(rq, 0); err = wait_for_submit(engine, rq, HZ / 2); if (err) { pr_err("%s: Timed out trying to submit semaphores\n", @@ -1338,10 +1343,9 @@ static int live_timeslice_queue(void *arg) } GEM_BUG_ON(i915_request_completed(rq)); - GEM_BUG_ON(execlists_active(&engine->execlists) != rq); /* Queue: semaphore signal, matching priority as semaphore */ - err = release_queue(engine, vma, 1, effective_prio(rq)); + err = release_queue(engine, vma, 1, rq_deadline(rq)); if (err) goto err_rq; @@ -1452,6 +1456,7 @@ static int live_timeslice_nopreempt(void *arg) goto out_spin; } + rq->sched.deadline = 0; rq->sched.attr.priority = I915_PRIORITY_BARRIER; i915_request_get(rq); i915_request_add(rq); @@ -1824,6 +1829,7 @@ static int live_late_preempt(void *arg) /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ ctx_lo->sched.priority = 1; + ctx_hi->sched.priority = I915_PRIORITY_MIN; for_each_engine(engine, gt, id) { struct igt_live_test t; @@ -2924,6 +2930,9 @@ static int live_preempt_gang(void *arg) while (rq) { /* wait for each rq from highest to lowest prio */ struct i915_request *n = list_next_entry(rq, mock.link); + /* With deadlines, no strict priority ordering */ + i915_request_set_deadline(rq, 0); + if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = drm_info_printer(engine->i915->drm.dev); @@ -3146,6 +3155,7 @@ static int preempt_user(struct intel_engine_cs *engine, i915_request_add(rq); i915_request_set_priority(rq, I915_PRIORITY_MAX); + i915_request_set_deadline(rq, 0); if (i915_request_wait(rq, 0, HZ / 2) < 0) err = -ETIME; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index cc0120411d62..3201dc334268 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -878,7 +878,10 @@ static int __igt_reset_engines(struct intel_gt *gt, break; } - if (i915_request_wait(rq, 0, HZ / 5) < 0) { + /* With deadlines, no strict priority */ + i915_request_set_deadline(rq, 0); + + if (i915_request_wait(rq, 0, HZ / 2) < 0) { struct drm_printer p = drm_info_printer(gt->i915->drm.dev); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index e26f7f957468..88a61423385d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1171,6 +1171,7 @@ static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema) intel_ring_advance(rq, cs); + rq->sched.deadline = 0; rq->sched.attr.priority = I915_PRIORITY_BARRIER; err_rq: i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 34be8dc8949e..1d4045ade19c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -327,8 +327,6 @@ static void __guc_dequeue(struct intel_engine_cs *engine) i915_priolist_advance(&engine->active.queue, pl); } done: - execlists->queue_priority_hint = - pl != &engine->active.queue.sentinel ? pl->priority : INT_MIN; if (submit) { *port = schedule_in(last, port - execlists->inflight); *++port = NULL; @@ -487,8 +485,6 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ - execlists->queue_priority_hint = INT_MIN; - spin_unlock_irqrestore(&engine->active.lock, flags); } diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 807cd8f65481..839c48f4b0e9 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -22,6 +22,8 @@ enum { /* Interactive workload, scheduled for immediate pageflipping */ I915_PRIORITY_DISPLAY, + + __I915_PRIORITY_KERNEL__ }; /* Smallest priority value that cannot be bumped. */ @@ -35,8 +37,7 @@ enum { * i.e. nothing can have higher priority and force us to usurp the * active request. */ -#define I915_PRIORITY_UNPREEMPTABLE INT_MAX -#define I915_PRIORITY_BARRIER (I915_PRIORITY_UNPREEMPTABLE - 1) +#define I915_PRIORITY_BARRIER INT_MAX #ifdef CONFIG_64BIT #define I915_PRIOLIST_HEIGHT 12 @@ -46,7 +47,7 @@ enum { struct i915_priolist { struct list_head requests; - int priority; + u64 deadline; int level; struct i915_priolist *next[I915_PRIOLIST_HEIGHT]; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 600e681e4894..dd315455874f 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -519,7 +519,7 @@ bool __i915_request_submit(struct i915_request *request) struct intel_engine_cs *engine = request->engine; bool result = false; - RQ_TRACE(request, "\n"); + RQ_TRACE(request, "dl %llu\n", request->sched.deadline); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->active.lock); @@ -708,6 +708,7 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: + i915_request_update_deadline(rq); break; case FENCE_FREE: @@ -1868,14 +1869,15 @@ long i915_request_wait(struct i915_request *rq, return timeout; } -static int print_sched_attr(const struct i915_sched_attr *attr, - char *buf, int x, int len) +static int print_sched(const struct i915_sched_node *node, + char *buf, int x, int len) { - if (attr->priority == I915_PRIORITY_INVALID) + if (node->attr.priority == I915_PRIORITY_INVALID) return x; x += snprintf(buf + x, len - x, - " prio=%d", attr->priority); + " prio=%d, dl=%llu", + node->attr.priority, node->deadline); return x; } @@ -1955,7 +1957,7 @@ void i915_request_show(struct drm_printer *m, * from the lists */ - x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf)); + x = print_sched(&rq->sched, buf, x, sizeof(buf)); drm_printf(m, "%s%.*s%c %llx:%lld%s%s %s @ %dms: %s\n", prefix, indent, " ", diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index fa6262f92afd..92c7cbf98302 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -33,6 +33,11 @@ static void node_put(struct i915_sched_node *node) i915_request_put(container_of(node, struct i915_request, sched)); } +static inline u64 rq_deadline(const struct i915_request *rq) +{ + return READ_ONCE(rq->sched.deadline); +} + static inline int rq_prio(const struct i915_request *rq) { return READ_ONCE(rq->sched.attr.priority); @@ -46,6 +51,14 @@ static int ipi_get_prio(struct i915_request *rq) return xchg(&rq->sched.ipi_priority, I915_PRIORITY_INVALID); } +static u64 ipi_get_deadline(struct i915_request *rq) +{ + if (READ_ONCE(rq->sched.ipi_deadline) == I915_DEADLINE_NEVER) + return I915_DEADLINE_NEVER; + + return xchg64(&rq->sched.ipi_deadline, I915_DEADLINE_NEVER); +} + static void ipi_schedule(struct work_struct *wrk) { struct i915_sched_ipi *ipi = container_of(wrk, typeof(*ipi), work); @@ -53,9 +66,11 @@ static void ipi_schedule(struct work_struct *wrk) do { struct i915_request *rn = xchg(&rq->sched.ipi_link, NULL); + u64 deadline; int prio; prio = ipi_get_prio(rq); + deadline = ipi_get_deadline(rq); /* * For cross-engine scheduling to work we rely on one of two @@ -80,6 +95,7 @@ static void ipi_schedule(struct work_struct *wrk) */ local_bh_disable(); i915_request_set_priority(rq, prio); + i915_request_set_deadline(rq, deadline); local_bh_enable(); i915_request_put(rq); @@ -98,7 +114,7 @@ static void init_priolist(struct i915_priolist_root *const root) struct i915_priolist *pl = &root->sentinel; memset_p((void **)pl->next, pl, ARRAY_SIZE(pl->next)); - pl->priority = INT_MIN; + pl->deadline = I915_DEADLINE_NEVER; } void i915_sched_init_engine(struct i915_sched_engine *se, @@ -249,7 +265,7 @@ static inline unsigned int random_level(struct i915_priolist_root *root) } static struct list_head * -lookup_priolist(struct intel_engine_cs *engine, int prio) +lookup_priolist(struct intel_engine_cs *engine, u64 deadline) { struct i915_priolist *update[I915_PRIOLIST_HEIGHT]; struct i915_sched_engine * const se = &engine->active; @@ -257,12 +273,13 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) struct i915_priolist *pl, *tmp; int lvl; + GEM_BUG_ON(deadline == I915_DEADLINE_NEVER); lockdep_assert_held(&se->lock); if (unlikely(se->no_priolist)) - prio = I915_PRIORITY_NORMAL; + deadline = 0; for_each_priolist(pl, root) { /* recycle any empty elements before us */ - if (pl->priority >= prio || !list_empty(&pl->requests)) + if (pl->deadline >= deadline || !list_empty(&pl->requests)) break; i915_priolist_advance(root, pl); @@ -272,14 +289,14 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) pl = &root->sentinel; lvl = pl->level; do { - while (tmp = pl->next[lvl], tmp->priority >= prio) + while (tmp = pl->next[lvl], tmp->deadline <= deadline) pl = tmp; - if (pl->priority == prio) + if (pl->deadline == deadline) goto out; update[lvl] = pl; } while (--lvl >= 0); - if (prio == I915_PRIORITY_NORMAL) { + if (!deadline) { pl = &se->default_priolist; } else if (!pl_empty(&root->sentinel.requests)) { pl = pl_pop(&root->sentinel.requests); @@ -287,7 +304,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) pl = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC); /* Convert an allocation failure to a priority bump */ if (unlikely(!pl)) { - prio = I915_PRIORITY_NORMAL; /* recurses just once */ + deadline = 0; /* recurses just once */ /* * To maintain ordering with all rendering, after an @@ -303,7 +320,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) } } - pl->priority = prio; + pl->deadline = deadline; INIT_LIST_HEAD(&pl->requests); lvl = random_level(root); @@ -325,7 +342,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) chk = &root->sentinel; lvl = chk->level; do { - while (tmp = chk->next[lvl], tmp->priority >= prio) + while (tmp = chk->next[lvl], tmp->deadline <= deadline) chk = tmp; } while (--lvl >= 0); @@ -345,7 +362,7 @@ static void remove_priolist(struct intel_engine_cs *engine, struct i915_priolist *pl, *tmp; struct i915_priolist *old = container_of(plist, struct i915_priolist, requests); - int prio = old->priority; + u64 deadline = old->deadline; int lvl; lockdep_assert_held(&se->lock); @@ -356,7 +373,7 @@ static void remove_priolist(struct intel_engine_cs *engine, pl = &root->sentinel; lvl = pl->level; do { - while (tmp = pl->next[lvl], tmp->priority > prio) + while (tmp = pl->next[lvl], tmp->deadline < deadline) pl = tmp; if (lvl <= old->level) { pl->next[lvl] = old->next[lvl]; @@ -375,7 +392,7 @@ void i915_priolist_advance(struct i915_priolist_root *root, GEM_BUG_ON(!list_empty(&pl->requests)); GEM_BUG_ON(pl == &root->sentinel); - if (pl->priority != I915_PRIORITY_NORMAL) + if (pl->deadline) pl_push(pl, &root->sentinel.requests); for (lvl = 0; lvl <= pl->level; lvl++) @@ -410,53 +427,245 @@ stack_pop(struct i915_request *rq, return rq; } -static inline bool need_preempt(int prio, int active) +static void ipi_deadline(struct i915_request *rq, u64 deadline) { + u64 old = READ_ONCE(rq->sched.ipi_deadline); + + do { + if (deadline >= old) + return; + } while (!try_cmpxchg64(&rq->sched.ipi_deadline, &old, deadline)); + + __ipi_add(rq); +} + +static bool is_first_priolist(const struct intel_engine_cs *engine, + const struct list_head *requests) +{ + return requests == &priolist_first(&engine->active.queue)->requests; +} + +static bool __i915_request_set_deadline(struct i915_request *rq, u64 deadline) +{ + struct intel_engine_cs *engine = rq->engine; + struct list_head *pos = &rq->sched.signalers_list; + struct list_head *plist; + + if (unlikely(!i915_request_in_priority_queue(rq))) { + rq->sched.deadline = deadline; + return false; + } + + /* Fifo and depth-first replacement ensure our deps execute first */ + plist = lookup_priolist(engine, deadline); + + rq->sched.dfs.next = NULL; + do { + list_for_each_continue(pos, &rq->sched.signalers_list) { + struct i915_dependency *p = + list_entry(pos, typeof(*p), signal_link); + struct i915_request *s = + container_of(p->signaler, typeof(*s), sched); + + if (rq_deadline(s) <= deadline) + continue; + + if (__i915_request_is_complete(s)) + continue; + + if (s->engine != engine) { + ipi_deadline(s, deadline); + continue; + } + + /* Remember our position along this branch */ + rq = stack_push(s, rq, pos); + pos = &rq->sched.signalers_list; + } + + RQ_TRACE(rq, "set-deadline:%llu\n", deadline); + WRITE_ONCE(rq->sched.deadline, deadline); + + /* + * Once the request is ready, it will be placed into the + * priority lists and then onto the HW runlist. Before the + * request is ready, it does not contribute to our preemption + * decisions and we can safely ignore it, as it will, and + * any preemption required, be dealt with upon submission. + * See engine->submit_request() + */ + GEM_BUG_ON(rq->engine != engine); + if (i915_request_in_priority_queue(rq)) { + struct list_head *prev = rq->sched.link.prev; + + list_move_tail(&rq->sched.link, plist); + if (list_empty(prev)) + remove_priolist(engine, prev); + } + } while ((rq = stack_pop(rq, &pos))); + + return is_first_priolist(engine, plist); +} + +void i915_request_set_deadline(struct i915_request *rq, u64 deadline) +{ + struct intel_engine_cs *engine; + unsigned long flags; + + if (deadline >= rq_deadline(rq)) + return; + + engine = lock_engine_irqsave(rq, flags); + if (!intel_engine_has_scheduler(engine)) + goto unlock; + + if (deadline >= rq_deadline(rq)) + goto unlock; + + if (__i915_request_is_complete(rq)) + goto unlock; + + rcu_read_lock(); + if (__i915_request_set_deadline(rq, deadline)) + i915_sched_kick(&engine->active); + rcu_read_unlock(); + GEM_BUG_ON(rq_deadline(rq) != deadline); + +unlock: + spin_unlock_irqrestore(&engine->active.lock, flags); +} + +static u64 prio_slice(int prio) +{ + u64 slice; + int sf; + /* - * Allow preemption of low -> normal -> high, but we do - * not allow low priority tasks to preempt other low priority - * tasks under the impression that latency for low priority - * tasks does not matter (as much as background throughput), - * so kiss. + * This is the central heuristic to the virtual deadlines. By + * imposing that each task takes an equal amount of time, we + * let each client have an equal slice of the GPU time. By + * bringing the virtual deadline forward, that client will then + * have more GPU time, and vice versa a lower priority client will + * have a later deadline and receive less GPU time. + * + * In BFS/MuQSS, the prio_ratios[] are based on the task nice range of + * [-20, 20], with each lower priority having a ~10% longer deadline, + * with the note that the proportion of CPU time between two clients + * of different priority will be the square of the relative prio_slice. + * + * In contrast, this prio_slice() curve was chosen because it gave good + * results with igt/gem_exec_schedule. It may not be the best choice! + * + * With a 1ms scheduling quantum: + * + * MAX USER: ~32us deadline + * 0: ~16ms deadline + * MIN_USER: 1000ms deadline */ - return prio >= max(I915_PRIORITY_NORMAL, active); + + if (prio >= __I915_PRIORITY_KERNEL__) + return INT_MAX - prio; + + slice = __I915_PRIORITY_KERNEL__ - prio; + if (prio >= 0) + sf = 20 - 6; + else + sf = 20 - 1; + + return slice << sf; } -static void kick_submission(struct intel_engine_cs *engine, - const struct i915_request *rq, - int prio) +static u64 virtual_deadline(u64 kt, int priority) { - const struct i915_request *inflight; + return i915_sched_to_ticks(kt + prio_slice(priority)); +} + +u64 i915_scheduler_next_virtual_deadline(int priority) +{ + return virtual_deadline(ktime_get_mono_fast_ns(), priority); +} + +static u64 signal_deadline(const struct i915_request *rq) +{ + u64 last = ktime_get_mono_fast_ns(); + const struct i915_dependency *p; /* - * We only need to kick the tasklet once for the high priority - * new context we add into the queue. + * Find the earliest point at which we will become 'ready', + * which we infer from the deadline of all active signalers. + * We will position ourselves at the end of that chain of work. */ - if (prio <= engine->execlists.queue_priority_hint) - return; - /* Nothing currently active? We're overdue for a submission! */ - inflight = execlists_active(&engine->execlists); - if (!inflight) - return; + rcu_read_lock(); + for_each_signaler(p, rq) { + const struct i915_request *s = + container_of(p->signaler, typeof(*s), sched); + u64 deadline; + int prio; + + if (__i915_request_is_complete(s)) + continue; + + if (s->timeline == rq->timeline && + __i915_request_has_started(s)) + continue; + + prio = rq_prio(s); + if (prio < rq_prio(rq)) + continue; + + deadline = rq_deadline(s); + if (deadline == I915_DEADLINE_NEVER) /* retired & reused */ + continue; + + deadline = i915_sched_to_ns(deadline); + if (p->flags & I915_DEPENDENCY_WEAK) + deadline -= prio_slice(prio); + + last = max(last, deadline); + } + rcu_read_unlock(); + + return last; +} + +static int adj_prio(const struct i915_request *rq) +{ + int prio = rq_prio(rq); /* - * If we are already the currently executing context, don't - * bother evaluating if we should preempt ourselves. + * Deprioritize semaphore waiters. We only want to run these if there + * is nothing ready to run first. + * + * Note by giving a more distant deadline (due to a lower priority) + * we do not prevent them from having a slice of the GPU, and if there + * is still contention at that point, we expect to immediately yield + * on the semaphore. + * + * When all semaphores are signaled, we will update the request + * to remove the semaphore penalty. */ - if (inflight->context == rq->context) - return; + if (rq->sched.semaphores && !i915_sw_fence_signaled(&rq->semaphore)) + prio -= 1024; + + return prio; +} + +static u64 earliest_deadline(const struct i915_request *rq) +{ + return virtual_deadline(signal_deadline(rq), adj_prio(rq)); +} - ENGINE_TRACE(engine, - "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", - prio, - rq->fence.context, rq->fence.seqno, - inflight->fence.context, inflight->fence.seqno, - inflight->sched.attr.priority); +static bool set_earliest_deadline(struct i915_request *rq, u64 old) +{ + u64 dl; - engine->execlists.queue_priority_hint = prio; - if (need_preempt(prio, rq_prio(inflight))) - i915_sched_kick(&engine->active); + /* Recompute our deadlines and promote after a priority change */ + dl = min(earliest_deadline(rq), rq_deadline(rq)); + if (dl >= old) + return false; + + return __i915_request_set_deadline(rq, dl); } static void ipi_priority(struct i915_request *rq, int prio) @@ -471,13 +680,11 @@ static void ipi_priority(struct i915_request *rq, int prio) __ipi_add(rq); } -static void __i915_request_set_priority(struct i915_request *rq, int prio) +static bool __i915_request_set_priority(struct i915_request *rq, int prio) { struct intel_engine_cs *engine = rq->engine; struct list_head *pos = &rq->sched.signalers_list; - struct list_head *plist; - - plist = lookup_priolist(engine, prio); + bool kick = false; /* * Recursively bump all dependent priorities to match the new request. @@ -499,6 +706,8 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio) */ rq->sched.dfs.next = NULL; do { + struct i915_request *next; + list_for_each_continue(pos, &rq->sched.signalers_list) { struct i915_dependency *p = list_entry(pos, typeof(*p), signal_link); @@ -524,6 +733,8 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio) RQ_TRACE(rq, "set-priority:%d\n", prio); WRITE_ONCE(rq->sched.attr.priority, prio); + next = stack_pop(rq, &pos); + /* * Once the request is ready, it will be placed into the * priority lists and then onto the HW runlist. Before the @@ -532,21 +743,15 @@ static void __i915_request_set_priority(struct i915_request *rq, int prio) * any preemption required, be dealt with upon submission. * See engine->submit_request() */ - if (!i915_request_is_ready(rq)) - continue; - GEM_BUG_ON(rq->engine != engine); - if (i915_request_in_priority_queue(rq)) { - struct list_head *prev = rq->sched.link.prev; + if (i915_request_is_ready(rq) && + set_earliest_deadline(rq, rq_deadline(rq))) + kick = true; - list_move_tail(&rq->sched.link, plist); - if (list_empty(prev)) - remove_priolist(engine, prev); - } + rq = next; + } while (rq); - /* Defer (tasklet) submission until after all updates. */ - kick_submission(engine, rq, prio); - } while ((rq = stack_pop(rq, &pos))); + return kick; } void i915_request_set_priority(struct i915_request *rq, int prio) @@ -599,7 +804,8 @@ void i915_request_set_priority(struct i915_request *rq, int prio) goto unlock; rcu_read_lock(); - __i915_request_set_priority(rq, prio); + if (__i915_request_set_priority(rq, prio)) + i915_sched_kick(&engine->active); rcu_read_unlock(); GEM_BUG_ON(rq_prio(rq) != prio); @@ -607,18 +813,27 @@ void i915_request_set_priority(struct i915_request *rq, int prio) spin_unlock_irqrestore(&engine->active.lock, flags); } +bool __intel_engine_queue_request(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + lockdep_assert_held(&engine->active.lock); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + return set_earliest_deadline(rq, I915_DEADLINE_NEVER); +} + void __intel_engine_defer_request(struct intel_engine_cs *engine, struct i915_request *rq) { struct list_head *pos = &rq->sched.waiters_list; struct i915_request *rn; LIST_HEAD(dfs); - int prio; + u64 deadline; lockdep_assert_held(&engine->active.lock); GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags)); - prio = rq_prio(rq); + deadline = max(rq_deadline(rq), + i915_scheduler_next_virtual_deadline(adj_prio(rq))); /* * When we defer a request, we must maintain its order with respect @@ -645,30 +860,32 @@ void __intel_engine_defer_request(struct intel_engine_cs *engine, __i915_request_has_started(w) && !__i915_request_is_complete(rq)); + /* An unready waiter imposes no deadline */ if (!i915_request_in_priority_queue(w)) continue; /* - * We also need to reorder within the same priority. + * We also need to reorder within the same deadline. * * This is unlike priority-inheritance, where if the * signaler already has a higher priority [earlier * deadline] than us, we can ignore as it will be * scheduled first. If a waiter already has the - * same priority, we still have to push it to the end + * same deadline, we still have to push it to the end * of the list. This unfortunately means we cannot * use the rq_deadline() itself as a 'visited' bit. */ - if (rq_prio(w) < prio) + if (rq_deadline(w) > deadline) continue; - GEM_BUG_ON(rq_prio(w) != prio); - /* Remember our position along this branch */ rq = stack_push(w, rq, pos); pos = &rq->sched.waiters_list; } + RQ_TRACE(rq, "set-deadline:%llu\n", deadline); + WRITE_ONCE(rq->sched.deadline, deadline); + /* Note list is reversed for waiters wrt signal hierarchy */ GEM_BUG_ON(rq->engine != engine); GEM_BUG_ON(!i915_request_in_priority_queue(rq)); @@ -678,33 +895,13 @@ void __intel_engine_defer_request(struct intel_engine_cs *engine, clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } while ((rq = stack_pop(rq, &pos))); - pos = lookup_priolist(engine, prio); + pos = lookup_priolist(engine, deadline); list_for_each_entry_safe(rq, rn, &dfs, sched.link) { set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); list_add_tail(&rq->sched.link, pos); } } -static void queue_request(struct intel_engine_cs *engine, - struct i915_request *rq) -{ - GEM_BUG_ON(!list_empty(&rq->sched.link)); - list_add_tail(&rq->sched.link, lookup_priolist(engine, rq_prio(rq))); - set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); -} - -static bool submit_queue(struct intel_engine_cs *engine, - const struct i915_request *rq) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - - if (rq_prio(rq) <= execlists->queue_priority_hint) - return false; - - execlists->queue_priority_hint = rq_prio(rq); - return true; -} - static bool hold_request(const struct i915_request *rq) { struct i915_dependency *p; @@ -742,6 +939,7 @@ void i915_request_enqueue(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct i915_sched_engine *se = &engine->active; + u64 dl = earliest_deadline(rq); unsigned long flags; bool kick = false; @@ -754,11 +952,11 @@ void i915_request_enqueue(struct i915_request *rq) list_add_tail(&rq->sched.link, &se->hold); i915_request_set_hold(rq); } else { - queue_request(engine, rq); - - GEM_BUG_ON(i915_sched_is_idle(&engine->active)); - - kick = submit_queue(engine, rq); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + kick = __i915_request_set_deadline(rq, + min(dl, rq_deadline(rq))); + GEM_BUG_ON(rq_deadline(rq) == I915_DEADLINE_NEVER); + GEM_BUG_ON(i915_sched_is_idle(se)); } GEM_BUG_ON(list_empty(&rq->sched.link)); @@ -771,8 +969,8 @@ struct i915_request * __intel_engine_rewind_requests(struct intel_engine_cs *engine) { struct i915_request *rq, *rn, *active = NULL; + u64 deadline = I915_DEADLINE_NEVER; struct list_head *pl; - int prio = I915_PRIORITY_INVALID; lockdep_assert_held(&engine->active.lock); @@ -786,13 +984,20 @@ __intel_engine_rewind_requests(struct intel_engine_cs *engine) __i915_request_unsubmit(rq); - GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); - if (rq_prio(rq) != prio) { - prio = rq_prio(rq); - pl = lookup_priolist(engine, prio); + if (__i915_request_has_started(rq)) { + u64 deadline = + i915_scheduler_next_virtual_deadline(rq_prio(rq)); + rq->sched.deadline = min(rq_deadline(rq), deadline); + } + GEM_BUG_ON(rq_deadline(rq) == I915_DEADLINE_NEVER); + + if (rq_deadline(rq) != deadline) { + deadline = rq_deadline(rq); + pl = lookup_priolist(engine, deadline); } GEM_BUG_ON(i915_sched_is_idle(&engine->active)); + GEM_BUG_ON(i915_request_in_priority_queue(rq)); list_move(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -872,14 +1077,10 @@ void __intel_engine_unhold_request(struct intel_engine_cs *engine, struct i915_request *rq) { LIST_HEAD(list); + bool submit = false; lockdep_assert_held(&engine->active.lock); - if (rq_prio(rq) > engine->execlists.queue_priority_hint) { - engine->execlists.queue_priority_hint = rq_prio(rq); - i915_sched_kick(&engine->active); - } - /* * Move this request back to the priority queue, and all of its * children and grandchildren that were suspended along with it. @@ -893,9 +1094,7 @@ void __intel_engine_unhold_request(struct intel_engine_cs *engine, GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); i915_request_clear_hold(rq); - list_move_tail(&rq->sched.link, - lookup_priolist(rq->engine, rq_prio(rq))); - set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + submit |= __intel_engine_queue_request(rq->engine, rq); /* Also release any children on this engine that are ready */ for_each_waiter(p, rq) { @@ -925,6 +1124,18 @@ void __intel_engine_unhold_request(struct intel_engine_cs *engine, rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); } while (rq); + + if (submit) + i915_sched_kick(&engine->active); +} + +void i915_request_update_deadline(struct i915_request *rq) +{ + if (!i915_request_in_priority_queue(rq)) + return; + + /* Recompute our deadlines and promote after a priority change */ + i915_request_set_deadline(rq, earliest_deadline(rq)); } void i915_sched_node_init(struct i915_sched_node *node) @@ -943,10 +1154,12 @@ void i915_sched_node_init(struct i915_sched_node *node) void i915_sched_node_reinit(struct i915_sched_node *node) { node->attr.priority = I915_PRIORITY_INVALID; + node->deadline = I915_DEADLINE_NEVER; node->semaphores = 0; node->flags = 0; GEM_BUG_ON(node->ipi_link); + node->ipi_deadline = I915_DEADLINE_NEVER; node->ipi_priority = I915_PRIORITY_INVALID; GEM_BUG_ON(!list_empty(&node->signalers_list)); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 4432aab230b6..df0f1993b3b8 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -36,9 +36,16 @@ void i915_sched_park_engine(struct i915_sched_engine *se); void i915_sched_fini_engine(struct i915_sched_engine *se); void i915_request_set_priority(struct i915_request *request, int prio); +void i915_request_set_deadline(struct i915_request *request, u64 deadline); + +void i915_request_update_deadline(struct i915_request *request); + +u64 i915_scheduler_next_virtual_deadline(int priority); void i915_request_enqueue(struct i915_request *request); +bool __intel_engine_queue_request(struct intel_engine_cs *engine, + struct i915_request *request); struct i915_request * __intel_engine_rewind_requests(struct intel_engine_cs *engine); void __intel_engine_defer_request(struct intel_engine_cs *engine, @@ -49,11 +56,14 @@ bool __intel_engine_hold_request(struct intel_engine_cs *engine, void __intel_engine_unhold_request(struct intel_engine_cs *engine, struct i915_request *request); -void __i915_priolist_free(struct i915_priolist *p); -static inline void i915_priolist_free(struct i915_priolist *p) +static inline u64 i915_sched_to_ticks(ktime_t kt) +{ + return ktime_to_ns(kt) >> I915_SCHED_DEADLINE_SHIFT; +} + +static inline u64 i915_sched_to_ns(u64 deadline) { - if (p->priority != I915_PRIORITY_NORMAL) - __i915_priolist_free(p); + return deadline << I915_SCHED_DEADLINE_SHIFT; } static inline bool i915_sched_is_idle(const struct i915_sched_engine *se) diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index c155e4faa30b..b8c484511185 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -72,7 +72,30 @@ struct i915_sched_node { #define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(0) unsigned long semaphores; + /** + * @deadline: [virtual] deadline + * + * When the request is ready for execution, it is given a quota + * (the engine's timeslice) and a virtual deadline. The virtual + * deadline is derived from the current time: + * ktime_get() + (prio_ratio * timeslice) + * + * Requests are then executed in order of deadline completion. + * Requests with earlier deadlines than currently executing on + * the engine will preempt the active requests. + * + * By treating it as a virtual deadline, we use it as a hint for + * when it is appropriate for a request to start with respect to + * all other requests in the system. It is not a hard deadline, as + * we allow requests to miss them, and we do not account for the + * request runtime. + */ + u64 deadline; +#define I915_SCHED_DEADLINE_SHIFT 19 /* i.e. roughly 500us buckets */ +#define I915_DEADLINE_NEVER U64_MAX + struct i915_request *ipi_link; + u64 ipi_deadline; int ipi_priority; }; diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index d2a678a2497e..382f2d490959 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -2130,6 +2130,7 @@ static int measure_preemption(struct intel_context *ce) intel_ring_advance(rq, cs); rq->sched.attr.priority = I915_PRIORITY_BARRIER; + rq->sched.deadline = 0; elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/selftests/i915_scheduler.c b/drivers/gpu/drm/i915/selftests/i915_scheduler.c index a851a156d460..d28ce320760f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_scheduler.c +++ b/drivers/gpu/drm/i915/selftests/i915_scheduler.c @@ -12,6 +12,40 @@ #include "selftests/igt_spinner.h" #include "selftests/i915_random.h" +static int mock_scheduler_slices(void *dummy) +{ + u64 min, max, normal, kernel; + + min = prio_slice(I915_PRIORITY_MIN); + pr_info("%8s slice: %lluus\n", "min", min >> 10); + + normal = prio_slice(0); + pr_info("%8s slice: %lluus\n", "normal", normal >> 10); + + max = prio_slice(I915_PRIORITY_MAX); + pr_info("%8s slice: %lluus\n", "max", max >> 10); + + kernel = prio_slice(I915_PRIORITY_BARRIER); + pr_info("%8s slice: %lluus\n", "kernel", kernel >> 10); + + if (kernel != 0) { + pr_err("kernel prio slice should be 0\n"); + return -EINVAL; + } + + if (max >= normal) { + pr_err("maximum prio slice should be shorter than normal\n"); + return -EINVAL; + } + + if (min <= normal) { + pr_err("minimum prio slice should be longer than normal\n"); + return -EINVAL; + } + + return 0; +} + static int mock_skiplist_levels(void *dummy) { struct i915_priolist_root root = {}; @@ -43,6 +77,7 @@ static int mock_skiplist_levels(void *dummy) int i915_scheduler_mock_selftests(void) { static const struct i915_subtest tests[] = { + SUBTEST(mock_scheduler_slices), SUBTEST(mock_skiplist_levels), }; @@ -549,6 +584,53 @@ static int igt_priority_chains(void *arg) return igt_schedule_chains(arg, igt_priority); } +static bool igt_deadline(struct i915_request *rq, + unsigned long v, unsigned long e) +{ + i915_request_set_deadline(rq, 0); + GEM_BUG_ON(rq_deadline(rq) != 0); + return true; +} + +static int igt_deadline_chains(void *arg) +{ + return igt_schedule_chains(arg, igt_deadline); +} + +static bool igt_defer(struct i915_request *rq, unsigned long v, unsigned long e) +{ + struct intel_engine_cs *engine = rq->engine; + + /* XXX No generic means to unwind incomplete requests yet */ + if (!i915_request_in_priority_queue(rq)) + return false; + + if (!intel_engine_has_preemption(engine)) + return false; + + spin_lock_irq(&engine->active.lock); + + /* Push all the requests to the same deadline */ + __i915_request_set_deadline(rq, 0); + GEM_BUG_ON(rq_deadline(rq) != 0); + + /* Then the very first request must be the one everyone depends on */ + rq = list_first_entry(lookup_priolist(engine, 0), + typeof(*rq), sched.link); + GEM_BUG_ON(rq->engine != engine); + + /* Deferring the first request will then have to defer all requests */ + __intel_engine_defer_request(engine, rq); + + spin_unlock_irq(&engine->active.lock); + return true; +} + +static int igt_deadline_defer(void *arg) +{ + return igt_schedule_chains(arg, igt_defer); +} + static struct i915_request * __write_timestamp(struct intel_engine_cs *engine, struct drm_i915_gem_object *obj, @@ -764,13 +846,22 @@ static int igt_priority_cycle(void *arg) return __igt_schedule_cycle(arg, igt_priority); } +static int igt_deadline_cycle(void *arg) +{ + return __igt_schedule_cycle(arg, igt_deadline); +} + int i915_scheduler_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(igt_deadline_chains), SUBTEST(igt_priority_chains), SUBTEST(igt_schedule_cycle), + SUBTEST(igt_deadline_cycle), SUBTEST(igt_priority_cycle), + + SUBTEST(igt_deadline_defer), }; return i915_subtests(tests, i915); @@ -906,9 +997,54 @@ static int sparse_priority(void *arg) return sparse(arg, set_priority); } +static u64 __set_deadline(struct i915_request *rq, u64 deadline) +{ + u64 dt; + + preempt_disable(); + dt = ktime_get_raw_fast_ns(); + i915_request_set_deadline(rq, deadline); + dt = ktime_get_raw_fast_ns() - dt; + preempt_enable(); + + return dt; +} + +static bool set_deadline(struct i915_request *rq, + unsigned long v, unsigned long e) +{ + report("set-deadline", v, e, __set_deadline(rq, 0)); + return true; +} + +static int single_deadline(void *arg) +{ + return single(arg, set_deadline); +} + +static int wide_deadline(void *arg) +{ + return wide(arg, set_deadline); +} + +static int inv_deadline(void *arg) +{ + return inv(arg, set_deadline); +} + +static int sparse_deadline(void *arg) +{ + return sparse(arg, set_deadline); +} + int i915_scheduler_perf_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(single_deadline), + SUBTEST(wide_deadline), + SUBTEST(inv_deadline), + SUBTEST(sparse_deadline), + SUBTEST(single_priority), SUBTEST(wide_priority), SUBTEST(inv_priority), From patchwork Mon Dec 28 15:52:10 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991403 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6E6CEC433E6 for ; Mon, 28 Dec 2020 15:53:29 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 2AF2B206E5 for ; Mon, 28 Dec 2020 15:53:29 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 2AF2B206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 04D3689AC3; Mon, 28 Dec 2020 15:53:00 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 0AC20899DB for ; Mon, 28 Dec 2020 15:52:52 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448214-1500050 for multiple; Mon, 28 Dec 2020 15:52:38 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:10 +0000 Message-Id: <20201228155229.9516-35-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 35/54] drm/i915/gt: Specify a deadline for the heartbeat X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" As we know when we expect the heartbeat to be checked for completion, pass this information along as its deadline. We still do not complain if the deadline is missed, at least until we have tried a few times, but it will allow for quicker hang detection on systems where deadlines are adhered to. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 495e8d5e2bf4..0eb4a07b29b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -66,6 +66,16 @@ static void heartbeat_commit(struct i915_request *rq, __i915_request_queue(rq, attr); } +static void set_heartbeat_deadline(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + unsigned long interval; + + interval = READ_ONCE(engine->props.heartbeat_interval_ms); + if (interval) + i915_request_set_deadline(rq, ktime_get() + (interval << 20)); +} + static void show_heartbeat(const struct i915_request *rq, struct intel_engine_cs *engine) { @@ -131,6 +141,8 @@ static void heartbeat(struct work_struct *wrk) local_bh_disable(); i915_request_set_priority(rq, attr.priority); + if (attr.priority == I915_PRIORITY_BARRIER) + i915_request_set_deadline(rq, 0); local_bh_enable(); } else { if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) @@ -162,6 +174,7 @@ static void heartbeat(struct work_struct *wrk) if (IS_ERR(rq)) goto unlock; + set_heartbeat_deadline(engine, rq); heartbeat_commit(rq, &attr); unlock: From patchwork Mon Dec 28 15:52:11 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991411 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id BBA98C43381 for ; Mon, 28 Dec 2020 15:53:18 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 7B9C6206E5 for ; Mon, 28 Dec 2020 15:53:18 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 7B9C6206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id CB24589A94; Mon, 28 Dec 2020 15:52:58 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 97AC389A0F for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448215-1500050 for multiple; Mon, 28 Dec 2020 15:52:38 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:11 +0000 Message-Id: <20201228155229.9516-36-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 36/54] drm/i915: Extend the priority boosting for the display with a deadline X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" For a modeset/pageflip, there is a very precise deadline by which the frame must be completed in order to hit the vblank and be shown. While we don't pass along that exact information, we can at least inform the scheduler that this request-chain needs to be completed asap. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/display/intel_display.c | 4 +++- drivers/gpu/drm/i915/gem/i915_gem_object.h | 2 +- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 21 +++++++++++--------- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index f25ab2b15ca1..22c902188c3d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15687,7 +15687,9 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret) return ret; - i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); + i915_gem_object_wait_priority(obj, 0, + I915_PRIORITY_DISPLAY, + ktime_get() /* next vblank? */); i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB); if (!new_plane_state->uapi.fence) { /* implicit fencing */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index b106bc81c303..88b849c6f49d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -517,7 +517,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, long timeout); int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - int prio); + int prio, ktime_t deadline); void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, enum fb_op_origin origin); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 0d9dea4b0b65..d905d1111412 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -45,8 +45,7 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int count, i; int ret; - ret = dma_resv_get_fences_rcu(resv, - &excl, &count, &shared); + ret = dma_resv_get_fences_rcu(resv, &excl, &count, &shared); if (ret) return ret; @@ -92,17 +91,20 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, return timeout; } -static void __fence_set_priority(struct dma_fence *fence, int prio) +static void +__fence_set_prio(struct dma_fence *fence, int prio, ktime_t deadline) { if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) return; local_bh_disable(); + i915_request_set_deadline(to_request(fence), + i915_sched_to_ticks(deadline)); i915_request_set_priority(to_request(fence), prio); local_bh_enable(); /* kick the tasklets if queues were reprioritised */ } -static void fence_set_priority(struct dma_fence *fence, int prio) +static void fence_set_prio(struct dma_fence *fence, int prio, ktime_t deadline) { /* Recurse once into a fence-array */ if (dma_fence_is_array(fence)) { @@ -110,16 +112,17 @@ static void fence_set_priority(struct dma_fence *fence, int prio) int i; for (i = 0; i < array->num_fences; i++) - __fence_set_priority(array->fences[i], prio); + __fence_set_prio(array->fences[i], prio, deadline); } else { - __fence_set_priority(fence, prio); + __fence_set_prio(fence, prio, deadline); } } int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - int prio) + int prio, + ktime_t deadline) { struct dma_fence *excl; @@ -134,7 +137,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, return ret; for (i = 0; i < count; i++) { - fence_set_priority(shared[i], prio); + fence_set_prio(shared[i], prio, deadline); dma_fence_put(shared[i]); } @@ -144,7 +147,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, } if (excl) { - fence_set_priority(excl, prio); + fence_set_prio(excl, prio, deadline); dma_fence_put(excl); } return 0; From patchwork Mon Dec 28 15:52:12 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991353 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4A6F4C433E9 for ; Mon, 28 Dec 2020 15:53:06 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id CD510206E5 for ; Mon, 28 Dec 2020 15:53:05 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org CD510206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id EC6BD89A57; Mon, 28 Dec 2020 15:52:57 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 2ED8B89A1F for ; Mon, 28 Dec 2020 15:52:52 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448216-1500050 for multiple; Mon, 28 Dec 2020 15:52:39 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:12 +0000 Message-Id: <20201228155229.9516-37-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 37/54] drm/i915/gt: Support virtual engine queues X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Allow multiple requests to be queued unto a virtual engine, whereas before we only allowed a single request to be queued at a time. The advantage of keeping just one request in the queue was to ensure that we always decided late which engine to use. However, with the introduction of the virtual deadline we throttle submission and still only drip one request into the sibling at a time (unless it is truly empty, but then a second request will have an earlier deadline than the queued virtual engine and force itself in front). This also takes advantage that a virtual engine will remain bound while it is active, i.e. we can not switch to a second engine until the context is completed -- such that we cannot be as lazy as lazy can be. By allowing a full queue, we avoid having to synchronize via the breadcrumb interrupt everytime, letting the virtual engine reach the full throughput of the siblings. Signed-off-by: Chris Wilson --- .../drm/i915/gt/intel_execlists_submission.c | 397 +++++++++--------- drivers/gpu/drm/i915/i915_request.c | 3 +- drivers/gpu/drm/i915/i915_scheduler.c | 65 ++- drivers/gpu/drm/i915/i915_scheduler.h | 4 +- 4 files changed, 261 insertions(+), 208 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index cae29268ccea..7d6a39cf4a99 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -160,17 +160,6 @@ struct virtual_engine { struct intel_context context; struct rcu_work rcu; - /* - * We allow only a single request through the virtual engine at a time - * (each request in the timeline waits for the completion fence of - * the previous before being submitted). By restricting ourselves to - * only submitting a single request, each request is placed on to a - * physical to maximise load spreading (by virtue of the late greedy - * scheduling -- each real engine takes the next available request - * upon idling). - */ - struct i915_request *request; - /* * We keep a rbtree of available virtual engines inside each physical * engine, sorted by priority. Here we preallocate the nodes we need @@ -282,17 +271,27 @@ static struct i915_request *first_request(struct i915_sched_engine *se) return NULL; } -static struct i915_request *first_virtual(const struct intel_engine_cs *engine) +static struct virtual_engine * +first_virtual_engine(struct intel_engine_cs *engine) { - struct rb_node *rb; + return rb_entry_safe(rb_first_cached(&engine->execlists.virtual), + struct virtual_engine, + nodes[engine->id].rb); +} - rb = rb_first_cached(&engine->execlists.virtual); - if (!rb) - return NULL; +static const struct i915_request *first_virtual(struct intel_engine_cs *engine) +{ + struct i915_request *rq = NULL; + struct virtual_engine *ve; + + ve = first_virtual_engine(engine); + if (ve) { + spin_lock(&ve->base.active.lock); + rq = first_request(&ve->base.active); + spin_unlock(&ve->base.active.lock); + } - return READ_ONCE(rb_entry(rb, - struct virtual_engine, - nodes[engine->id].rb)->request); + return rq; } static const struct i915_request * @@ -389,7 +388,15 @@ assert_priority_queue(const struct i915_request *prev, if (i915_request_is_active(prev)) return true; - return rq_deadline(prev) <= rq_deadline(next); + if (rq_deadline(prev) <= rq_deadline(next)) + return true; + + ENGINE_TRACE(prev->engine, + "next %llx:%lld dl %lld is before prev %llx:%lld dl %lld\n", + next->fence.context, next->fence.seqno, rq_deadline(next), + prev->fence.context, prev->fence.seqno, rq_deadline(prev)); + + return false; } static inline void @@ -499,7 +506,7 @@ static inline void execlists_schedule_in(struct i915_request *rq, int idx) trace_i915_request_in(rq, idx); old = ce->inflight; - if (!old) + if (!__intel_context_inflight_count(old)) old = __execlists_schedule_in(rq); WRITE_ONCE(ce->inflight, ptr_inc(old)); @@ -507,9 +514,11 @@ static inline void execlists_schedule_in(struct i915_request *rq, int idx) } static void -resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve) +__resubmit_virtual_request(struct i915_request *rq, + struct intel_engine_cs *engine, + struct virtual_engine *ve) { - struct intel_engine_cs *engine = rq->engine; + GEM_BUG_ON(rq->engine != engine); /* Flush concurrent rcu iterators in signal_irq_work */ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) { @@ -526,12 +535,42 @@ resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve) cpu_relax(); } + __i915_request_requeue(rq, &ve->base); +} + +static void +resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve) +{ + struct intel_engine_cs *engine = rq->engine; + struct i915_request *pos = rq; + struct intel_timeline *tl; + spin_lock_irq(&engine->active.lock); - clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - WRITE_ONCE(rq->engine, &ve->base); - ve->base.submit_request(rq); + if (__i915_request_is_complete(rq)) + goto unlock; + + tl = i915_request_active_timeline(rq); + + /* Rewind back to the start of this virtual engine queue */ + list_for_each_entry_continue_reverse(rq, &tl->requests, link) { + if (__i915_request_is_complete(rq)) + break; + + pos = rq; + } + + /* Resubmit the queue in execution order */ + spin_lock(&ve->base.active.lock); + list_for_each_entry_from(pos, &tl->requests, link) { + if (pos->engine != engine) + break; + __resubmit_virtual_request(pos, engine, ve); + } + spin_unlock(&ve->base.active.lock); + +unlock: spin_unlock_irq(&engine->active.lock); } @@ -550,7 +589,7 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) rq->execution_mask != engine->mask) resubmit_virtual_request(rq, ve); - if (READ_ONCE(ve->request)) + if (!i915_sched_is_idle(&ve->base.active)) i915_sched_kick(&ve->base.active); } @@ -897,10 +936,16 @@ static bool ctx_single_port_submission(const struct intel_context *ce) intel_context_force_single_submission(ce)); } +static bool __can_merge_ctx(const struct intel_context *prev, + const struct intel_context *next) +{ + return prev == next; +} + static bool can_merge_ctx(const struct intel_context *prev, const struct intel_context *next) { - if (prev != next) + if (!__can_merge_ctx(prev, next)) return false; if (ctx_single_port_submission(prev)) @@ -949,6 +994,9 @@ static bool virtual_matches(const struct virtual_engine *ve, { const struct intel_engine_cs *inflight; + if (!rq) + return false; + if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ return false; @@ -968,31 +1016,6 @@ static bool virtual_matches(const struct virtual_engine *ve, return true; } -static struct virtual_engine * -first_virtual_engine(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *el = &engine->execlists; - struct rb_node *rb = rb_first_cached(&el->virtual); - - while (rb) { - struct virtual_engine *ve = - rb_entry(rb, typeof(*ve), nodes[engine->id].rb); - struct i915_request *rq = READ_ONCE(ve->request); - - /* lazily cleanup after another engine handled rq */ - if (!rq || !virtual_matches(ve, rq, engine)) { - rb_erase_cached(rb, &el->virtual); - RB_CLEAR_NODE(rb); - rb = rb_first_cached(&el->virtual); - continue; - } - - return ve; - } - - return NULL; -} - static void virtual_xfer_context(struct virtual_engine *ve, struct intel_engine_cs *engine) { @@ -1156,15 +1179,124 @@ static bool completed(const struct i915_request *rq) return __i915_request_is_complete(rq); } +static void __virtual_dequeue(struct virtual_engine *ve, + struct intel_engine_cs *sibling) +{ + struct ve_node * const node = &ve->nodes[sibling->id]; + struct rb_node **parent, *rb; + struct i915_request *rq; + u64 deadline; + bool first; + + rb_erase_cached(&node->rb, &sibling->execlists.virtual); + RB_CLEAR_NODE(&node->rb); + + rq = first_request(&ve->base.active); + if (!virtual_matches(ve, rq, sibling)) + return; + + rb = NULL; + first = true; + parent = &sibling->execlists.virtual.rb_root.rb_node; + deadline = rq_deadline(rq); + while (*parent) { + struct ve_node *other; + + rb = *parent; + other = rb_entry(rb, typeof(*other), rb); + if (deadline <= other->deadline) { + parent = &rb->rb_left; + } else { + parent = &rb->rb_right; + first = false; + } + } + + rb_link_node(&node->rb, rb, parent); + rb_insert_color_cached(&node->rb, &sibling->execlists.virtual, first); +} + +static void virtual_requeue(struct intel_engine_cs *engine, + struct i915_request *last) +{ + const struct i915_request * const first = + first_request(&engine->active); + struct virtual_engine *ve; + + while ((ve = first_virtual_engine(engine))) { + struct i915_request *rq; + + spin_lock(&ve->base.active.lock); + + rq = first_request(&ve->base.active); + if (unlikely(!virtual_matches(ve, rq, engine))) + /* lost the race to a sibling */ + goto unlock; + + GEM_BUG_ON(rq->engine != &ve->base); + GEM_BUG_ON(rq->context != &ve->context); + + if (!dl_before(rq, first)) { + spin_unlock(&ve->base.active.lock); + return; + } + + if (last && !__can_merge_ctx(last->context, rq->context)) { + spin_unlock(&ve->base.active.lock); + return; /* leave this for another sibling? */ + } + + ENGINE_TRACE(engine, + "virtual rq=%llx:%lld%s, dl %llx, new engine? %s\n", + rq->fence.context, + rq->fence.seqno, + __i915_request_is_complete(rq) ? "!" : + __i915_request_has_started(rq) ? "*" : + "", + rq_deadline(rq), + yesno(engine != ve->siblings[0])); + + GEM_BUG_ON(!(rq->execution_mask & engine->mask)); + if (__i915_request_requeue(rq, engine)) { + /* + * Only after we confirm that we will submit + * this request (i.e. it has not already + * completed), do we want to update the context. + * + * This serves two purposes. It avoids + * unnecessary work if we are resubmitting an + * already completed request after timeslicing. + * But more importantly, it prevents us altering + * ve->siblings[] on an idle context, where + * we may be using ve->siblings[] in + * virtual_context_enter / virtual_context_exit. + */ + virtual_xfer_context(ve, engine); + + /* Bind this ve before we release the lock */ + if (!ve->context.inflight) + WRITE_ONCE(ve->context.inflight, engine); + + GEM_BUG_ON(rq->engine != engine); + GEM_BUG_ON(ve->siblings[0] != engine); + GEM_BUG_ON(intel_context_inflight(rq->context) != engine); + + last = rq; + } + +unlock: + __virtual_dequeue(ve, engine); + spin_unlock(&ve->base.active.lock); + } +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; struct i915_request *last, * const *active; - struct virtual_engine *ve; struct i915_priolist *pl; - struct rb_node *rb; bool submit = false; /* @@ -1282,85 +1414,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } } - /* XXX virtual is always taking precedence */ - while ((ve = first_virtual_engine(engine))) { - struct i915_request *rq; - - spin_lock(&ve->base.active.lock); - - rq = ve->request; - if (unlikely(!rq)) /* lost the race to a sibling */ - goto unlock; - - GEM_BUG_ON(rq->engine != &ve->base); - GEM_BUG_ON(rq->context != &ve->context); - - if (!dl_before(rq, first_request(&engine->active))) { - spin_unlock(&ve->base.active.lock); - break; - } - - GEM_BUG_ON(!virtual_matches(ve, rq, engine)); - - if (last && !can_merge_rq(last, rq)) { - spin_unlock(&ve->base.active.lock); - spin_unlock(&engine->active.lock); - return; /* leave this for another sibling */ - } - - ENGINE_TRACE(engine, - "virtual rq=%llx:%lld%s, dl %llx, new engine? %s\n", - rq->fence.context, - rq->fence.seqno, - __i915_request_is_complete(rq) ? "!" : - __i915_request_has_started(rq) ? "*" : - "", - rq_deadline(rq), - yesno(engine != ve->siblings[0])); - WRITE_ONCE(ve->request, NULL); - - rb = &ve->nodes[engine->id].rb; - rb_erase_cached(rb, &execlists->virtual); - RB_CLEAR_NODE(rb); - - GEM_BUG_ON(!(rq->execution_mask & engine->mask)); - WRITE_ONCE(rq->engine, engine); - - if (__i915_request_submit(rq)) { - /* - * Only after we confirm that we will submit - * this request (i.e. it has not already - * completed), do we want to update the context. - * - * This serves two purposes. It avoids - * unnecessary work if we are resubmitting an - * already completed request after timeslicing. - * But more importantly, it prevents us altering - * ve->siblings[] on an idle context, where - * we may be using ve->siblings[] in - * virtual_context_enter / virtual_context_exit. - */ - virtual_xfer_context(ve, engine); - GEM_BUG_ON(ve->siblings[0] != engine); - - submit = true; - last = rq; - } - - i915_request_put(rq); -unlock: - spin_unlock(&ve->base.active.lock); - - /* - * Hmm, we have a bunch of virtual engine requests, - * but the first one was already completed (thanks - * preempt-to-busy!). Keep looking at the veng queue - * until we have no more relevant requests (i.e. - * the normal submit queue has higher priority). - */ - if (submit) - break; - } + if (!RB_EMPTY_ROOT(&execlists->virtual.rb_root)) + virtual_requeue(engine, last); for_each_priolist(pl, &engine->active.queue) { struct i915_request *rq, *rn; @@ -1368,6 +1423,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) priolist_for_each_request_safe(rq, rn, pl) { bool merge = true; + GEM_BUG_ON(rq->engine != engine); + /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -2700,13 +2757,11 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) RB_CLEAR_NODE(rb); spin_lock(&ve->base.active.lock); - rq = fetch_and_zero(&ve->request); - if (rq) { + while ((rq = first_request(&ve->base.active))) { mark_eio(rq); rq->engine = engine; __i915_request_submit(rq); - i915_request_put(rq); } spin_unlock(&ve->base.active.lock); } @@ -2940,11 +2995,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) return 0; } -static struct list_head *virtual_queue(struct virtual_engine *ve) -{ - return &ve->base.active.default_priolist.requests; -} - static void rcu_virtual_context_destroy(struct work_struct *wrk) { struct virtual_engine *ve = @@ -2954,17 +3004,13 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) GEM_BUG_ON(ve->context.inflight); /* Preempt-to-busy may leave a stale request behind. */ - if (unlikely(ve->request)) { + if (unlikely(!i915_sched_is_idle(&ve->base.active))) { struct i915_request *old; spin_lock_irq(&ve->base.active.lock); - old = fetch_and_zero(&ve->request); - if (old) { - GEM_BUG_ON(!i915_request_completed(old)); + while ((old = first_request(&ve->base.active))) __i915_request_submit(old); - i915_request_put(old); - } spin_unlock_irq(&ve->base.active.lock); } @@ -2995,7 +3041,6 @@ static void rcu_virtual_context_destroy(struct work_struct *wrk) spin_unlock_irq(&sibling->active.lock); } GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.active.tasklet)); - GEM_BUG_ON(!list_empty(virtual_queue(ve))); lrc_fini(&ve->context); intel_context_fini(&ve->context); @@ -3118,7 +3163,9 @@ virtual_submission_mask(struct virtual_engine *ve, u64 *deadline) struct i915_request *rq; intel_engine_mask_t mask; - rq = READ_ONCE(ve->request); + spin_lock_irq(&ve->base.active.lock); + rq = first_request(&ve->base.active); + spin_unlock_irq(&ve->base.active.lock); if (!rq) return 0; @@ -3158,9 +3205,6 @@ static void virtual_submission_tasklet(unsigned long data) struct rb_node **parent, *rb; bool first; - if (!READ_ONCE(ve->request)) - break; /* already handled by a sibling's tasklet */ - spin_lock_irq(&sibling->active.lock); if (unlikely(!(mask & sibling->mask))) { @@ -3222,45 +3266,6 @@ static void virtual_submission_tasklet(unsigned long data) } } -static void virtual_submit_request(struct i915_request *rq) -{ - struct virtual_engine *ve = to_virtual_engine(rq->engine); - unsigned long flags; - - ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n", - rq->fence.context, - rq->fence.seqno); - - GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); - - spin_lock_irqsave(&ve->base.active.lock, flags); - - /* By the time we resubmit a request, it may be completed */ - if (__i915_request_is_complete(rq)) { - __i915_request_submit(rq); - goto unlock; - } - - if (ve->request) { /* background completion from preempt-to-busy */ - GEM_BUG_ON(!i915_request_completed(ve->request)); - __i915_request_submit(ve->request); - i915_request_put(ve->request); - } - - rq->sched.deadline = - min(rq->sched.deadline, - i915_scheduler_next_virtual_deadline(rq_prio(rq))); - ve->request = i915_request_get(rq); - - GEM_BUG_ON(!list_empty(virtual_queue(ve))); - list_move_tail(&rq->sched.link, virtual_queue(ve)); - - i915_sched_kick(&ve->base.active); - -unlock: - spin_unlock_irqrestore(&ve->base.active.lock, flags); -} - static struct ve_bond * virtual_find_bond(struct virtual_engine *ve, const struct intel_engine_cs *master) @@ -3348,10 +3353,9 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.cops = &virtual_context_ops; ve->base.request_alloc = execlists_request_alloc; - ve->base.submit_request = virtual_submit_request; + ve->base.submit_request = i915_request_enqueue; ve->base.bond_execute = virtual_bond_execute; - INIT_LIST_HEAD(virtual_queue(ve)); tasklet_init(&ve->base.active.tasklet, virtual_submission_tasklet, (unsigned long)ve); @@ -3559,14 +3563,17 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { struct virtual_engine *ve = rb_entry(rb, typeof(*ve), nodes[engine->id].rb); - struct i915_request *rq = READ_ONCE(ve->request); + struct i915_request *rq; + spin_lock(&ve->base.active.lock); + rq = first_request(&ve->base.active); if (rq) { if (count++ < max - 1) show_request(m, rq, "\t\t", 0); else last = rq; } + spin_unlock(&ve->base.active.lock); } if (last) { if (count > max) { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index dd315455874f..3f9bcc427d07 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1343,6 +1343,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) GEM_BUG_ON(to == from); GEM_BUG_ON(to->timeline == from->timeline); + GEM_BUG_ON(to->context == from->context); if (i915_request_completed(from)) { i915_sw_fence_set_error_once(&to->submit, from->fence.error); @@ -1528,7 +1529,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) i915_seqno_passed(prev->fence.seqno, rq->fence.seqno)); - if (is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) + if (prev->context == rq->context) i915_sw_fence_await_sw_fence(&rq->submit, &prev->submit, &rq->submitq); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 92c7cbf98302..b8891b45d9b5 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -580,7 +580,7 @@ static u64 virtual_deadline(u64 kt, int priority) return i915_sched_to_ticks(kt + prio_slice(priority)); } -u64 i915_scheduler_next_virtual_deadline(int priority) +static u64 next_virtual_deadline(int priority) { return virtual_deadline(ktime_get_mono_fast_ns(), priority); } @@ -821,20 +821,17 @@ bool __intel_engine_queue_request(struct intel_engine_cs *engine, return set_earliest_deadline(rq, I915_DEADLINE_NEVER); } -void __intel_engine_defer_request(struct intel_engine_cs *engine, - struct i915_request *rq) +static void __defer_request(struct intel_engine_cs *engine, + struct i915_request *rq, + u64 deadline) { struct list_head *pos = &rq->sched.waiters_list; struct i915_request *rn; LIST_HEAD(dfs); - u64 deadline; lockdep_assert_held(&engine->active.lock); GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags)); - deadline = max(rq_deadline(rq), - i915_scheduler_next_virtual_deadline(adj_prio(rq))); - /* * When we defer a request, we must maintain its order with respect * to those that are waiting upon it. So we traverse its chain of @@ -902,6 +899,14 @@ void __intel_engine_defer_request(struct intel_engine_cs *engine, } } +void __intel_engine_defer_request(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + __defer_request(engine, rq, + max(rq_deadline(rq), + next_virtual_deadline(adj_prio(rq)))); +} + static bool hold_request(const struct i915_request *rq) { struct i915_dependency *p; @@ -935,6 +940,46 @@ static bool ancestor_on_hold(const struct intel_engine_cs *engine, return unlikely(!list_empty(&engine->active.hold)) && hold_request(rq); } +bool __i915_request_requeue(struct i915_request *rq, + struct intel_engine_cs *engine) +{ + RQ_TRACE(rq, "transfer from %s to %s\n", + rq->engine->name, engine->name); + + lockdep_assert_held(&engine->active.lock); + lockdep_assert_held(&rq->engine->active.lock); + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags)); + GEM_BUG_ON(rq->engine == engine); + + list_del_init(&rq->sched.link); + WRITE_ONCE(rq->engine, engine); + + if (__i915_request_is_complete(rq)) { + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + return false; + } + + if (unlikely(ancestor_on_hold(engine, rq))) { + RQ_TRACE(rq, "ancestor on hold\n"); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + list_add_tail(&rq->sched.link, &engine->active.hold); + i915_request_set_hold(rq); + } else { + u64 deadline = min(earliest_deadline(rq), rq_deadline(rq)); + + /* Maintain request ordering wrt to existing on target */ + __i915_request_set_deadline(rq, deadline); + if (!list_empty(&rq->sched.waiters_list)) + __defer_request(engine, rq, deadline); + + GEM_BUG_ON(rq_deadline(rq) == I915_DEADLINE_NEVER); + } + + GEM_BUG_ON(list_empty(&rq->sched.link)); + return true; +} + void i915_request_enqueue(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; @@ -985,9 +1030,9 @@ __intel_engine_rewind_requests(struct intel_engine_cs *engine) __i915_request_unsubmit(rq); if (__i915_request_has_started(rq)) { - u64 deadline = - i915_scheduler_next_virtual_deadline(rq_prio(rq)); - rq->sched.deadline = min(rq_deadline(rq), deadline); + rq->sched.deadline = + min(rq_deadline(rq), + next_virtual_deadline(rq_prio(rq))); } GEM_BUG_ON(rq_deadline(rq) == I915_DEADLINE_NEVER); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index df0f1993b3b8..da17faa883a9 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -40,9 +40,9 @@ void i915_request_set_deadline(struct i915_request *request, u64 deadline); void i915_request_update_deadline(struct i915_request *request); -u64 i915_scheduler_next_virtual_deadline(int priority); - void i915_request_enqueue(struct i915_request *request); +bool __i915_request_requeue(struct i915_request *rq, + struct intel_engine_cs *engine); bool __intel_engine_queue_request(struct intel_engine_cs *engine, struct i915_request *request); From patchwork Mon Dec 28 15:52:13 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991355 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 25437C43381 for ; Mon, 28 Dec 2020 15:53:07 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id C92C2206E5 for ; Mon, 28 Dec 2020 15:53:06 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org C92C2206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 9A09889A4F; Mon, 28 Dec 2020 15:52:57 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 9B1D2899DC for ; Mon, 28 Dec 2020 15:52:50 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448217-1500050 for multiple; Mon, 28 Dec 2020 15:52:39 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:13 +0000 Message-Id: <20201228155229.9516-38-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 38/54] drm/i915: Move saturated workload detection back to the context X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" When we introduced the saturated workload detection to tell us to back off from semaphore usage [semaphores have a noticeable impact on contended bus cycles with the CPU for some heavy workloads], we first introduced it as a per-context tracker. This allows individual contexts to try and optimise their own usage, but we found that with the local tracking and the no-semaphore boosting, the first context to disable semaphores got a massive priority boost and so would starve the rest and all new contexts (as they started with semaphores enabled and lower priority). Hence we moved the saturated workload detection to the engine, and a consequence had to disable semaphores on virtual engines. Now that we do not have semaphore priority boosting, and try to fairly schedule irrespective of semaphore usage, we can move the tracking back to the context and virtual engines can now utilise the faster inter-engine synchronisation. If we see that any context fairs to use the semaphore, because the system is oversubscribed and was busy doing something else instead of spinning on the semaphore, we disable further usage of semaphores with that context until it idles again. This should restrict the semaphores to lightly utilised system where the latency between requests is more noticeable, and curtail the bus-contention from checking for signaled semaphores. References: 44d89409a12e ("drm/i915: Make the semaphore saturation mask global") Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_context.c | 3 +++ drivers/gpu/drm/i915/gt/intel_context_types.h | 2 ++ drivers/gpu/drm/i915/gt/intel_engine_pm.c | 2 -- drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 -- .../gpu/drm/i915/gt/intel_execlists_submission.c | 15 --------------- drivers/gpu/drm/i915/i915_request.c | 4 ++-- 6 files changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 349e7fa1488d..b3eb7513659b 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -345,6 +345,9 @@ static int __intel_context_active(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); + CE_TRACE(ce, "active\n"); + ce->saturated = 0; + intel_context_get(ce); /* everything should already be activated by intel_context_pre_pin() */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 679b268f0911..f3e04ae53c51 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -103,6 +103,8 @@ struct intel_context { } lrc; u32 tag; /* cookie passed to HW to track this context on submission */ + intel_engine_mask_t saturated; /* submitting semaphores too late? */ + /* Time on GPU as tracked by the hw. */ struct { struct ewma_runtime avg; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 511a379c794d..64b9b857c71c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -254,8 +254,6 @@ static int __engine_park(struct intel_wakeref *wf) struct intel_engine_cs *engine = container_of(wf, typeof(*engine), wakeref); - engine->saturated = 0; - /* * If one and only one request is completed between pm events, * we know that we are inside the kernel context and it is diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 824a187b2803..74e55e0fbc65 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -304,8 +304,6 @@ struct intel_engine_cs { struct intel_context *kernel_context; /* pinned */ - intel_engine_mask_t saturated; /* submitting semaphores too late? */ - struct { struct delayed_work work; struct i915_request *systole; diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 7d6a39cf4a99..52c77912e66d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3330,21 +3330,6 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings, ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; - /* - * The decision on whether to submit a request using semaphores - * depends on the saturated state of the engine. We only compute - * this during HW submission of the request, and we need for this - * state to be globally applied to all requests being submitted - * to this engine. Virtual engines encompass more than one physical - * engine and so we cannot accurately tell in advance if one of those - * engines is already saturated and so cannot afford to use a semaphore - * and be pessimized in priority for doing so -- if we are the only - * context using semaphores after all other clients have stopped, we - * will be starved on the saturated system. Such a global switch for - * semaphores is less than ideal, but alas is the current compromise. - */ - ve->base.saturated = ALL_ENGINES; - snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); i915_sched_init_engine(&ve->base.active, ENGINE_VIRTUAL); diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 3f9bcc427d07..d73f4085f227 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -569,7 +569,7 @@ bool __i915_request_submit(struct i915_request *request) */ if (request->sched.semaphores && i915_sw_fence_signaled(&request->semaphore)) - engine->saturated |= request->sched.semaphores; + request->context->saturated |= request->sched.semaphores; engine->emit_fini_breadcrumb(request, request->ring->vaddr + request->postfix); @@ -1030,7 +1030,7 @@ already_busywaiting(struct i915_request *rq) * * See the are-we-too-late? check in __i915_request_submit(). */ - return rq->sched.semaphores | READ_ONCE(rq->engine->saturated); + return rq->sched.semaphores | READ_ONCE(rq->context->saturated); } static int From patchwork Mon Dec 28 15:52:14 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991405 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 43645C4332E for ; Mon, 28 Dec 2020 15:53:31 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 0B9DC206E5 for ; Mon, 28 Dec 2020 15:53:31 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 0B9DC206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 44DA289AB7; Mon, 28 Dec 2020 15:52:59 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id BB145899D4 for ; Mon, 28 Dec 2020 15:52:49 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448218-1500050 for multiple; Mon, 28 Dec 2020 15:52:39 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:14 +0000 Message-Id: <20201228155229.9516-39-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 39/54] drm/i915: Bump default timeslicing quantum to 5ms X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/Kconfig.profile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 35bbe2b80596..3eacea42b19f 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -90,7 +90,7 @@ config DRM_I915_STOP_TIMEOUT config DRM_I915_TIMESLICE_DURATION int "Scheduling quantum for userspace batches (ms, jiffy granularity)" - default 1 # milliseconds + default 5 # milliseconds help When two user batches of equal priority are executing, we will alternate execution of each batch to ensure forward progress of From patchwork Mon Dec 28 15:52:15 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991425 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id B798CC433DB for ; Mon, 28 Dec 2020 15:53:35 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 8677D206E5 for ; Mon, 28 Dec 2020 15:53:35 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 8677D206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 862F689BA9; Mon, 28 Dec 2020 15:53:34 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id E6B7889AC0 for ; Mon, 28 Dec 2020 15:53:14 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448222-1500050 for multiple; Mon, 28 Dec 2020 15:52:39 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:15 +0000 Message-Id: <20201228155229.9516-40-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 40/54] drm/i915/gt: Wrap intel_timeline.has_initial_breadcrumb X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" In preparation for removing the has_initial_breadcrumb field, add a helper function for the existing callers. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_timeline.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_timeline.h | 6 ++++++ drivers/gpu/drm/i915/gt/selftest_timeline.c | 5 +++-- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 1972dd5dca00..74f02d857acf 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -354,7 +354,7 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq) u32 *cs; GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); - if (!i915_request_timeline(rq)->has_initial_breadcrumb) + if (!intel_timeline_has_initial_breadcrumb(i915_request_timeline(rq))) return 0; cs = intel_ring_begin(rq, 6); diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 0737a2608d0b..cab775a78912 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -981,7 +981,7 @@ static int ring_request_alloc(struct i915_request *request) int ret; GEM_BUG_ON(!intel_context_is_pinned(request->context)); - GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb); + GEM_BUG_ON(intel_timeline_has_initial_breadcrumb(i915_request_timeline(request))); /* * Flush enough space to reduce the likelihood of waiting after @@ -1306,7 +1306,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) err = PTR_ERR(timeline); goto err; } - GEM_BUG_ON(timeline->has_initial_breadcrumb); + GEM_BUG_ON(intel_timeline_has_initial_breadcrumb(timeline)); err = intel_timeline_pin(timeline, NULL); if (err) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 037b0e3ccbed..18b1bda242bd 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -445,14 +445,14 @@ void intel_timeline_exit(struct intel_timeline *tl) static u32 timeline_advance(struct intel_timeline *tl) { GEM_BUG_ON(!atomic_read(&tl->pin_count)); - GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); + GEM_BUG_ON(tl->seqno & intel_timeline_has_initial_breadcrumb(tl)); - return tl->seqno += 1 + tl->has_initial_breadcrumb; + return tl->seqno += 1 + intel_timeline_has_initial_breadcrumb(tl); } static void timeline_rollback(struct intel_timeline *tl) { - tl->seqno -= 1 + tl->has_initial_breadcrumb; + tl->seqno -= 1 + intel_timeline_has_initial_breadcrumb(tl); } static noinline int diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index f502a619843f..0e5e9fdade5b 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -61,6 +61,12 @@ static inline void intel_timeline_put(struct intel_timeline *timeline) kref_put(&timeline->kref, __intel_timeline_free); } +static inline bool +intel_timeline_has_initial_breadcrumb(const struct intel_timeline *tl) +{ + return tl->has_initial_breadcrumb; +} + static inline int __intel_timeline_sync_set(struct intel_timeline *tl, u64 context, u32 seqno) { diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 6f3a3687ef0f..4f47a0535c91 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -666,7 +666,7 @@ static int live_hwsp_wrap(void *arg) if (IS_ERR(tl)) return PTR_ERR(tl); - if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) + if (!intel_timeline_has_initial_breadcrumb(tl) || !tl->hwsp_cacheline) goto out_free; err = intel_timeline_pin(tl, NULL); @@ -1235,7 +1235,8 @@ static int live_hwsp_rollover_user(void *arg) goto out; tl = ce->timeline; - if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) + if (!intel_timeline_has_initial_breadcrumb(tl) || + !tl->hwsp_cacheline) goto out; timeline_rollback(tl); From patchwork Mon Dec 28 15:52:16 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991337 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 06F13C433DB for ; Mon, 28 Dec 2020 15:52:48 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 9716E20791 for ; Mon, 28 Dec 2020 15:52:47 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 9716E20791 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id C0A9B89254; Mon, 28 Dec 2020 15:52:46 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 40D3B89254 for ; Mon, 28 Dec 2020 15:52:44 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448224-1500050 for multiple; Mon, 28 Dec 2020 15:52:39 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:16 +0000 Message-Id: <20201228155229.9516-41-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 41/54] drm/i915/gt: Track timeline GGTT offset separately from subpage offset X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Currently we know that the timeline status page is at most a page in size, and so we can preserve the lower 12bits of the offset when relocating the status page in the GGTT. If we want to use a larger object, such as the context state, we may not necessarily use a position within the first page and so need more than 12b. Signed-off-by: Chris Wilson Reviewed-by: Matthew Brost --- drivers/gpu/drm/i915/gt/gen6_engine_cs.c | 4 ++-- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 ++-- drivers/gpu/drm/i915/gt/intel_timeline.c | 17 +++++++---------- drivers/gpu/drm/i915/gt/intel_timeline_types.h | 1 + drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 2 +- drivers/gpu/drm/i915/gt/selftest_rc6.c | 2 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 16 ++++++++-------- 8 files changed, 23 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c index ce38d1bcaba3..2f59dd3bdc18 100644 --- a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c @@ -161,7 +161,7 @@ u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); - *cs++ = i915_request_active_timeline(rq)->hwsp_offset | + *cs++ = i915_request_active_timeline(rq)->ggtt_offset | PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->fence.seqno; @@ -359,7 +359,7 @@ u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL); - *cs++ = i915_request_active_timeline(rq)->hwsp_offset; + *cs++ = i915_request_active_timeline(rq)->ggtt_offset; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 74f02d857acf..dcd8786d796d 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -346,7 +346,7 @@ static u32 hwsp_offset(const struct i915_request *rq) if (cl) return cl->ggtt_offset; - return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset; + return rcu_dereference_protected(rq->timeline, 1)->ggtt_offset; } int gen8_emit_init_breadcrumb(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 4854a8d15ef4..0b96007c30f5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1324,7 +1324,7 @@ static int print_ring(char *buf, int sz, struct i915_request *rq) len = scnprintf(buf, sz, "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ", i915_ggtt_offset(rq->ring->vma), - tl ? tl->hwsp_offset : 0, + tl ? tl->ggtt_offset : 0, hwsp_seqno(rq), DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context), 1000 * 1000)); @@ -1663,7 +1663,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (tl) { drm_printf(m, "\t\tring->hwsp: 0x%08x\n", - tl->hwsp_offset); + tl->ggtt_offset); intel_timeline_put(tl); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 18b1bda242bd..24cc45777f9d 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -355,13 +355,11 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) if (err) return err; - tl->hwsp_offset = - i915_ggtt_offset(tl->hwsp_ggtt) + - offset_in_page(tl->hwsp_offset); + tl->ggtt_offset = i915_ggtt_offset(tl->hwsp_ggtt) + tl->hwsp_offset; GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", - tl->fence_context, tl->hwsp_offset); + tl->fence_context, tl->ggtt_offset); - cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset); + cacheline_acquire(tl->hwsp_cacheline, tl->ggtt_offset); if (atomic_fetch_inc(&tl->pin_count)) { cacheline_release(tl->hwsp_cacheline); __i915_vma_unpin(tl->hwsp_ggtt); @@ -529,14 +527,13 @@ __intel_timeline_get_seqno(struct intel_timeline *tl, vaddr = page_mask_bits(cl->vaddr); tl->hwsp_offset = cacheline * CACHELINE_BYTES; - tl->hwsp_seqno = - memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); + tl->hwsp_seqno = memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); - tl->hwsp_offset += i915_ggtt_offset(vma); + tl->ggtt_offset = i915_ggtt_offset(vma) + tl->hwsp_offset; GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", - tl->fence_context, tl->hwsp_offset); + tl->fence_context, tl->ggtt_offset); - cacheline_acquire(cl, tl->hwsp_offset); + cacheline_acquire(cl, tl->ggtt_offset); tl->hwsp_cacheline = cl; *seqno = timeline_advance(tl); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index e360f50706bf..f187c5aac11c 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -48,6 +48,7 @@ struct intel_timeline { const u32 *hwsp_seqno; struct i915_vma *hwsp_ggtt; u32 hwsp_offset; + u32 ggtt_offset; struct intel_timeline_cacheline *hwsp_cacheline; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 439c8984f5fa..bf4b8973b188 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -54,7 +54,7 @@ static int write_timestamp(struct i915_request *rq, int slot) cmd++; *cs++ = cmd; *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); - *cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32); + *cs++ = i915_request_timeline(rq)->ggtt_offset + slot * sizeof(u32); *cs++ = 0; intel_ring_advance(rq, cs); diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 61abc0556601..4eb4c627ec44 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -138,7 +138,7 @@ static const u32 *__live_rc6_ctx(struct intel_context *ce) *cs++ = cmd; *cs++ = i915_mmio_reg_offset(GEN8_RC6_CTX_INFO); - *cs++ = ce->timeline->hwsp_offset + 8; + *cs++ = ce->timeline->ggtt_offset + 8; *cs++ = 0; intel_ring_advance(rq, cs); diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 4f47a0535c91..e44bfceef413 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -469,7 +469,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) i915_request_get(rq); - err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); + err = emit_ggtt_store_dw(rq, tl->ggtt_offset, value); i915_request_add(rq); if (err) { i915_request_put(rq); @@ -565,7 +565,7 @@ static int live_hwsp_engine(void *arg) if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n", - n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); + n, tl->fence_context, tl->ggtt_offset, *tl->hwsp_seqno); GEM_TRACE_DUMP(); err = -EINVAL; } @@ -637,7 +637,7 @@ static int live_hwsp_alternate(void *arg) if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n", - n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); + n, tl->fence_context, tl->ggtt_offset, *tl->hwsp_seqno); GEM_TRACE_DUMP(); err = -EINVAL; } @@ -697,9 +697,9 @@ static int live_hwsp_wrap(void *arg) goto out; } pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", - seqno[0], tl->hwsp_offset); + seqno[0], tl->ggtt_offset); - err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); + err = emit_ggtt_store_dw(rq, tl->ggtt_offset, seqno[0]); if (err) { i915_request_add(rq); goto out; @@ -714,9 +714,9 @@ static int live_hwsp_wrap(void *arg) goto out; } pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", - seqno[1], tl->hwsp_offset); + seqno[1], tl->ggtt_offset); - err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); + err = emit_ggtt_store_dw(rq, tl->ggtt_offset, seqno[1]); if (err) { i915_request_add(rq); goto out; @@ -1344,7 +1344,7 @@ static int live_hwsp_recycle(void *arg) if (READ_ONCE(*tl->hwsp_seqno) != count) { GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n", count, tl->fence_context, - tl->hwsp_offset, *tl->hwsp_seqno); + tl->ggtt_offset, *tl->hwsp_seqno); GEM_TRACE_DUMP(); err = -EINVAL; } From patchwork Mon Dec 28 15:52:17 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991369 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 73CF4C433E0 for ; Mon, 28 Dec 2020 15:53:13 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 22D6722B3A for ; Mon, 28 Dec 2020 15:53:13 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 22D6722B3A Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 9AE2989A0E; Mon, 28 Dec 2020 15:52:56 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 8EFB88999C for ; Mon, 28 Dec 2020 15:52:50 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448226-1500050 for multiple; Mon, 28 Dec 2020 15:52:40 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:17 +0000 Message-Id: <20201228155229.9516-42-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 42/54] drm/i915/gt: Add timeline "mode" X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Explicitly differentiate between the absolute and relative timelines, and the global HWSP and ppHWSP relative offsets. When using a timeline that is relative to a known status page, we can replace the absolute addressing in the commands with indexed variants. Signed-off-by: Chris Wilson Reviewed-by: Matthew Brost --- drivers/gpu/drm/i915/gt/intel_timeline.c | 21 ++++++++++++++++--- drivers/gpu/drm/i915/gt/intel_timeline.h | 2 +- .../gpu/drm/i915/gt/intel_timeline_types.h | 10 +++++++-- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 24cc45777f9d..882b7916bb4a 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -227,7 +227,6 @@ static int intel_timeline_init(struct intel_timeline *timeline, timeline->gt = gt; - timeline->has_initial_breadcrumb = !hwsp; timeline->hwsp_cacheline = NULL; if (!hwsp) { @@ -244,13 +243,29 @@ static int intel_timeline_init(struct intel_timeline *timeline, return PTR_ERR(cl); } + timeline->mode = INTEL_TIMELINE_ABSOLUTE; timeline->hwsp_cacheline = cl; timeline->hwsp_offset = cacheline * CACHELINE_BYTES; vaddr = page_mask_bits(cl->vaddr); } else { - timeline->hwsp_offset = offset; - vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); + int preferred; + + if (offset & INTEL_TIMELINE_RELATIVE_CONTEXT) { + timeline->mode = INTEL_TIMELINE_RELATIVE_CONTEXT; + timeline->hwsp_offset = + offset & ~INTEL_TIMELINE_RELATIVE_CONTEXT; + preferred = i915_coherent_map_type(gt->i915); + } else { + timeline->mode = INTEL_TIMELINE_RELATIVE_ENGINE; + timeline->hwsp_offset = offset; + preferred = I915_MAP_WB; + } + + vaddr = i915_gem_object_pin_map(hwsp->obj, + preferred | I915_MAP_OVERRIDE); + if (IS_ERR(vaddr)) + vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WC); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index 0e5e9fdade5b..6e738a85beda 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -64,7 +64,7 @@ static inline void intel_timeline_put(struct intel_timeline *timeline) static inline bool intel_timeline_has_initial_breadcrumb(const struct intel_timeline *tl) { - return tl->has_initial_breadcrumb; + return tl->mode == INTEL_TIMELINE_ABSOLUTE; } static inline int __intel_timeline_sync_set(struct intel_timeline *tl, diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index f187c5aac11c..3c1ab901b702 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -20,6 +20,12 @@ struct i915_syncmap; struct intel_gt; struct intel_timeline_hwsp; +enum intel_timeline_mode { + INTEL_TIMELINE_ABSOLUTE = 0, + INTEL_TIMELINE_RELATIVE_CONTEXT = BIT(0), + INTEL_TIMELINE_RELATIVE_ENGINE = BIT(1), +}; + struct intel_timeline { u64 fence_context; u32 seqno; @@ -45,6 +51,8 @@ struct intel_timeline { atomic_t pin_count; atomic_t active_count; + enum intel_timeline_mode mode; + const u32 *hwsp_seqno; struct i915_vma *hwsp_ggtt; u32 hwsp_offset; @@ -52,8 +60,6 @@ struct intel_timeline { struct intel_timeline_cacheline *hwsp_cacheline; - bool has_initial_breadcrumb; - /** * List of breadcrumbs associated with GPU requests currently * outstanding. From patchwork Mon Dec 28 15:52:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991387 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 81087C433E9 for ; Mon, 28 Dec 2020 15:53:23 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 28A2D206E5 for ; Mon, 28 Dec 2020 15:53:23 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 28A2D206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 8DA6389A9B; Mon, 28 Dec 2020 15:53:00 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 6A395899C7 for ; Mon, 28 Dec 2020 15:52:50 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448227-1500050 for multiple; Mon, 28 Dec 2020 15:52:40 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:18 +0000 Message-Id: <20201228155229.9516-43-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 43/54] drm/i915/gt: Use indices for writing into relative timelines X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Relative timelines are relative to either the global or per-process HWSP, and so we can replace the absolute addressing with store-index variants for position invariance. Signed-off-by: Chris Wilson Reviewed-by: Matthew Brost --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 98 +++++++++++++++++------- drivers/gpu/drm/i915/gt/intel_timeline.h | 12 +++ 2 files changed, 82 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index dcd8786d796d..5f0485cb9e3b 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -502,7 +502,19 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs) { - return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0); + struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1); + unsigned int flags = MI_FLUSH_DW_OP_STOREDW; + u32 offset = hwsp_offset(rq); + + if (intel_timeline_is_relative(tl)) { + offset = offset_in_page(offset); + flags |= MI_FLUSH_DW_STORE_INDEX; + } + GEM_BUG_ON(offset & 7); + if (!intel_timeline_in_context(tl)) + offset |= MI_FLUSH_DW_USE_GTT; + + return __gen8_emit_flush_dw(cs, rq->fence.seqno, offset, flags); } u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) @@ -512,6 +524,18 @@ u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { + struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1); + unsigned int flags = PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL; + u32 offset = hwsp_offset(rq); + + if (intel_timeline_is_relative(tl)) { + offset = offset_in_page(offset); + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + } + GEM_BUG_ON(offset & 7); + if (!intel_timeline_in_context(tl)) + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | @@ -519,26 +543,33 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) 0); /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ - cs = gen8_emit_ggtt_write_rcs(cs, - rq->fence.seqno, - hwsp_offset(rq), - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_CS_STALL); + cs = __gen8_emit_write_rcs(cs, rq->fence.seqno, offset, 0, flags); return gen8_emit_fini_breadcrumb_tail(rq, cs); } u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { - cs = gen8_emit_ggtt_write_rcs(cs, - rq->fence.seqno, - hwsp_offset(rq), - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); + struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1); + u32 offset = hwsp_offset(rq); + unsigned int flags; + + flags = (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); + + if (intel_timeline_is_relative(tl)) { + offset = offset_in_page(offset); + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + } + GEM_BUG_ON(offset & 7); + if (!intel_timeline_in_context(tl)) + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + + cs = __gen8_emit_write_rcs(cs, rq->fence.seqno, offset, 0, flags); return gen8_emit_fini_breadcrumb_tail(rq, cs); } @@ -601,19 +632,30 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { - cs = gen12_emit_ggtt_write_rcs(cs, - rq->fence.seqno, - hwsp_offset(rq), - PIPE_CONTROL0_HDC_PIPELINE_FLUSH, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - /* Wa_1409600907:tgl */ - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); + struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1); + u32 offset = hwsp_offset(rq); + unsigned int flags; + + flags = (PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + /* Wa_1409600907:tgl */ + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); + + if (intel_timeline_is_relative(tl)) { + offset = offset_in_page(offset); + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + } + GEM_BUG_ON(offset & 7); + if (!intel_timeline_in_context(tl)) + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + + cs = __gen8_emit_write_rcs(cs, rq->fence.seqno, offset, + PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags); return gen12_emit_fini_breadcrumb_tail(rq, cs); } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index 6e738a85beda..a58823fe7a34 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -67,6 +67,18 @@ intel_timeline_has_initial_breadcrumb(const struct intel_timeline *tl) return tl->mode == INTEL_TIMELINE_ABSOLUTE; } +static inline bool +intel_timeline_is_relative(const struct intel_timeline *tl) +{ + return tl->mode != INTEL_TIMELINE_ABSOLUTE; +} + +static inline bool +intel_timeline_in_context(const struct intel_timeline *tl) +{ + return tl->mode == INTEL_TIMELINE_RELATIVE_CONTEXT; +} + static inline int __intel_timeline_sync_set(struct intel_timeline *tl, u64 context, u32 seqno) { From patchwork Mon Dec 28 15:52:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991367 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 98465C433DB for ; Mon, 28 Dec 2020 15:53:12 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 4D6E0229C6 for ; Mon, 28 Dec 2020 15:53:12 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 4D6E0229C6 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 577D889A67; Mon, 28 Dec 2020 15:52:58 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 622E8899E7 for ; Mon, 28 Dec 2020 15:52:52 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448228-1500050 for multiple; Mon, 28 Dec 2020 15:52:40 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:19 +0000 Message-Id: <20201228155229.9516-44-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 44/54] drm/i915/selftests: Exercise relative timeline modes X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" A quick test to verify that the backend accepts each type of timeline and can use them to track and control request emission. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/selftest_timeline.c | 105 ++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index e44bfceef413..a0a6a5ba09d2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -1365,9 +1365,114 @@ static int live_hwsp_recycle(void *arg) return err; } +static int live_hwsp_relative(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Check backend support for different timeline modes. + */ + + for_each_engine(engine, gt, id) { + enum intel_timeline_mode mode; + + if (!intel_engine_has_scheduler(engine)) + continue; + + for (mode = INTEL_TIMELINE_ABSOLUTE; + mode <= INTEL_TIMELINE_RELATIVE_ENGINE; + mode++) { + struct intel_timeline *tl; + struct i915_request *rq; + struct intel_context *ce; + const char *msg; + int err; + + if (mode == INTEL_TIMELINE_RELATIVE_CONTEXT && + !HAS_EXECLISTS(gt->i915)) + continue; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_alloc_state(ce); + if (err) { + intel_context_put(ce); + return err; + } + + switch (mode) { + case INTEL_TIMELINE_ABSOLUTE: + tl = intel_timeline_create(gt); + msg = "local"; + break; + + case INTEL_TIMELINE_RELATIVE_CONTEXT: + tl = __intel_timeline_create(gt, + ce->state, + INTEL_TIMELINE_RELATIVE_CONTEXT | + 0x400); + msg = "ppHWSP"; + break; + + case INTEL_TIMELINE_RELATIVE_ENGINE: + tl = __intel_timeline_create(gt, + engine->status_page.vma, + 0x400); + msg = "HWSP"; + break; + default: + continue; + } + if (IS_ERR(tl)) { + intel_context_put(ce); + return PTR_ERR(tl); + } + + pr_info("Testing %s timeline on %s\n", + msg, engine->name); + + intel_timeline_put(ce->timeline); + ce->timeline = tl; + + err = intel_timeline_pin(tl, NULL); + if (err) { + intel_context_put(ce); + return err; + } + tl->seqno = 0xc0000000; + WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); + intel_timeline_unpin(tl); + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + GEM_BUG_ON(rcu_access_pointer(rq->timeline) != tl); + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + i915_request_put(rq); + return -EIO; + } + + i915_request_put(rq); + } + } + + return 0; +} + int intel_timeline_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(live_hwsp_relative), SUBTEST(live_hwsp_recycle), SUBTEST(live_hwsp_engine), SUBTEST(live_hwsp_alternate), From patchwork Mon Dec 28 15:52:20 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991433 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2ED66C4360C for ; Mon, 28 Dec 2020 15:53:28 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id D7616206E5 for ; Mon, 28 Dec 2020 15:53:27 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org D7616206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id A27B489B00; Mon, 28 Dec 2020 15:53:00 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 564868925D for ; Mon, 28 Dec 2020 15:52:50 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448229-1500050 for multiple; Mon, 28 Dec 2020 15:52:40 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:20 +0000 Message-Id: <20201228155229.9516-45-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 45/54] drm/i915/gt: Use ppHWSP for unshared non-semaphore related timelines X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" When we are not using semaphores with a context/engine, we can simply reuse the same seqno location across wraps, but we still require each timeline to have its own address. For LRC submission, each context is prefixed by a per-process HWSP, which provides us with a unique location for each context-local timeline. A shared timeline that is common to multiple contexts will continue to use a separate page. This enables us to create position invariant contexts should we feel the need to relocate them. Initially they are automatically used by Broadwell/Braswell as they do not require independent timelines. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Matthew Brost --- drivers/gpu/drm/i915/gt/intel_lrc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 008f50a86355..d8829f7e2d8c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -834,6 +834,14 @@ pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine) return intel_timeline_create_from_engine(engine, page_unmask_bits(tl)); } +static struct intel_timeline * +pphwsp_timeline(struct intel_context *ce, struct i915_vma *state) +{ + return __intel_timeline_create(ce->engine->gt, state, + I915_GEM_HWS_SEQNO_ADDR | + INTEL_TIMELINE_RELATIVE_CONTEXT); +} + int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) { struct intel_ring *ring; @@ -861,8 +869,10 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) */ if (unlikely(ce->timeline)) tl = pinned_timeline(ce, engine); - else + else if (intel_engine_has_semaphores(engine)) tl = intel_timeline_create(engine->gt); + else + tl = pphwsp_timeline(ce, vma); if (IS_ERR(tl)) { err = PTR_ERR(tl); goto err_ring; From patchwork Mon Dec 28 15:52:21 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991383 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 14C59C4361B for ; Mon, 28 Dec 2020 15:53:18 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id B219022583 for ; Mon, 28 Dec 2020 15:53:17 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org B219022583 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 2A16089A34; Mon, 28 Dec 2020 15:52:57 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id C8F9A899DB for ; Mon, 28 Dec 2020 15:52:49 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448230-1500050 for multiple; Mon, 28 Dec 2020 15:52:40 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:21 +0000 Message-Id: <20201228155229.9516-46-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 46/54] Restore "drm/i915: drop engine_pin/unpin_breadcrumbs_irq" X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" This was removed in commit 478ffad6d690 ("drm/i915: drop engine_pin/unpin_breadcrumbs_irq") as the last user had been removed, but now there is a promise of a new user in the next patch. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 24 +++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_breadcrumbs.h | 3 +++ 2 files changed, 27 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index b7af8d9cc4dc..dc76b05991e7 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -329,6 +329,30 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) spin_unlock_irqrestore(&b->irq_lock, flags); } +void intel_breadcrumbs_pin_irq(struct intel_breadcrumbs *b) +{ + if (GEM_DEBUG_WARN_ON(!b->irq_engine)) + return; + + spin_lock_irq(&b->irq_lock); + if (!b->irq_enabled++) + irq_enable(b->irq_engine); + GEM_BUG_ON(!b->irq_enabled); /* no overflow! */ + spin_unlock_irq(&b->irq_lock); +} + +void intel_breadcrumbs_unpin_irq(struct intel_breadcrumbs *b) +{ + if (GEM_DEBUG_WARN_ON(!b->irq_engine)) + return; + + spin_lock_irq(&b->irq_lock); + GEM_BUG_ON(!b->irq_enabled); /* no underflow! */ + if (!--b->irq_enabled) + irq_disable(b->irq_engine); + spin_unlock_irq(&b->irq_lock); +} + void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) { if (!READ_ONCE(b->irq_armed)) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h index 75cc9cff3ae3..d400ac17c733 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h @@ -18,6 +18,9 @@ struct intel_breadcrumbs * intel_breadcrumbs_create(struct intel_engine_cs *irq_engine); void intel_breadcrumbs_free(struct intel_breadcrumbs *b); +void intel_breadcrumbs_pin_irq(struct intel_breadcrumbs *b); +void intel_breadcrumbs_unpin_irq(struct intel_breadcrumbs *b); + void intel_breadcrumbs_reset(struct intel_breadcrumbs *b); void __intel_breadcrumbs_park(struct intel_breadcrumbs *b); From patchwork Mon Dec 28 15:52:22 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991443 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 78B9AC433DB for ; Mon, 28 Dec 2020 15:54:32 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 428C1206E5 for ; Mon, 28 Dec 2020 15:54:32 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 428C1206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id D4A4A89BF6; Mon, 28 Dec 2020 15:54:31 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id F18B389AC0 for ; Mon, 28 Dec 2020 15:53:16 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448238-1500050 for multiple; Mon, 28 Dec 2020 15:52:41 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:22 +0000 Message-Id: <20201228155229.9516-47-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 47/54] drm/i915/gt: Couple tasklet scheduling for all CS interrupts X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" If any engine asks for the tasklet to be kicked from the CS interrupt, do so. Currently, this is used by the execlists scheduler backends to feed in the next request to the HW, and similarly could be used by a ring scheduler, as will be seen in the next patch. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/gt/intel_gt_irq.c | 17 ++++++++++++----- drivers/gpu/drm/i915/gt/intel_gt_irq.h | 3 +++ drivers/gpu/drm/i915/gt/intel_rps.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 8 ++++---- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 2106fb403c3e..dfb2d66e1556 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -63,6 +63,13 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir) i915_sched_kick(&engine->active); } +void gen2_engine_cs_irq(struct intel_engine_cs *engine) +{ + intel_engine_signal_breadcrumbs(engine); + if (intel_engine_needs_breadcrumb_tasklet(engine)) + i915_sched_kick(&engine->active); +} + static u32 gen11_gt_engine_identity(struct intel_gt *gt, const unsigned int bank, const unsigned int bit) @@ -276,9 +283,9 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]); + gen2_engine_cs_irq(gt->engine_class[RENDER_CLASS][0]); if (gt_iir & ILK_BSD_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]); + gen2_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]); } static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) @@ -302,11 +309,11 @@ static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir) void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir) { if (gt_iir & GT_RENDER_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]); + gen2_engine_cs_irq(gt->engine_class[RENDER_CLASS][0]); if (gt_iir & GT_BSD_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]); + gen2_engine_cs_irq(gt->engine_class[VIDEO_DECODE_CLASS][0]); if (gt_iir & GT_BLT_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]); + gen2_engine_cs_irq(gt->engine_class[COPY_ENGINE_CLASS][0]); if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT | GT_BSD_CS_ERROR_INTERRUPT | diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h index 886c5cf408a2..6c69cd563fe1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h @@ -9,6 +9,7 @@ #include +struct intel_engine_cs; struct intel_gt; #define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \ @@ -19,6 +20,8 @@ struct intel_gt; GEN8_GT_PM_IRQ | \ GEN8_GT_GUC_IRQ) +void gen2_engine_cs_irq(struct intel_engine_cs *engine); + void gen11_gt_irq_reset(struct intel_gt *gt); void gen11_gt_irq_postinstall(struct intel_gt *gt); void gen11_gt_irq_handler(struct intel_gt *gt, const u32 master_ctl); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index e1397b8d3586..2b443b735a98 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1771,7 +1771,7 @@ void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir) return; if (pm_iir & PM_VEBOX_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(gt->engine[VECS0]); + gen2_engine_cs_irq(gt->engine[VECS0]); if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index dd1971040bbc..8d15bec36996 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3941,7 +3941,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]); + gen2_engine_cs_irq(dev_priv->gt.engine[RCS0]); if (iir & I915_MASTER_ERROR_INTERRUPT) i8xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4049,7 +4049,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]); + gen2_engine_cs_irq(dev_priv->gt.engine[RCS0]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); @@ -4194,10 +4194,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) intel_uncore_write(&dev_priv->uncore, GEN2_IIR, iir); if (iir & I915_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(dev_priv->gt.engine[RCS0]); + gen2_engine_cs_irq(dev_priv->gt.engine[RCS0]); if (iir & I915_BSD_USER_INTERRUPT) - intel_engine_signal_breadcrumbs(dev_priv->gt.engine[VCS0]); + gen2_engine_cs_irq(dev_priv->gt.engine[VCS0]); if (iir & I915_MASTER_ERROR_INTERRUPT) i9xx_error_irq_handler(dev_priv, eir, eir_stuck); From patchwork Mon Dec 28 15:52:23 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991349 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4029AC433E0 for ; Mon, 28 Dec 2020 15:53:04 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 034AC206E5 for ; Mon, 28 Dec 2020 15:53:03 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 034AC206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 8B480899DC; Mon, 28 Dec 2020 15:52:56 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 742488925D for ; Mon, 28 Dec 2020 15:52:49 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448240-1500050 for multiple; Mon, 28 Dec 2020 15:52:41 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:23 +0000 Message-Id: <20201228155229.9516-48-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 48/54] drm/i915/gt: Support creation of 'internal' rings X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" To support legacy ring buffer scheduling, we want a virtual ringbuffer for each client. These rings are purely for holding the requests as they are being constructed on the CPU and never accessed by the GPU, so they should not be bound into the GGTT, and we can use plain old WB mapped pages. As they are not bound, we need to nerf a few assumptions that a rq->ring is in the GGTT. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 17 +----- drivers/gpu/drm/i915/gt/intel_ring.c | 66 ++++++++++++++-------- drivers/gpu/drm/i915/gt/intel_ring.h | 12 +++- drivers/gpu/drm/i915/gt/intel_ring_types.h | 2 + 5 files changed, 59 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index b3eb7513659b..d01678c26a91 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -259,7 +259,7 @@ int __intel_context_do_pin_ww(struct intel_context *ce, } CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n", - i915_ggtt_offset(ce->ring->vma), + intel_ring_address(ce->ring), ce->ring->head, ce->ring->tail); handoff = true; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0b96007c30f5..befd299cf675 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1323,7 +1323,7 @@ static int print_ring(char *buf, int sz, struct i915_request *rq) len = scnprintf(buf, sz, "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ", - i915_ggtt_offset(rq->ring->vma), + intel_ring_address(rq->ring), tl ? tl->ggtt_offset : 0, hwsp_seqno(rq), DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context), @@ -1651,7 +1651,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, i915_request_show(m, rq, "\t\tactive ", 0); drm_printf(m, "\t\tring->start: 0x%08x\n", - i915_ggtt_offset(rq->ring->vma)); + intel_ring_address(rq->ring)); drm_printf(m, "\t\tring->head: 0x%08x\n", rq->ring->head); drm_printf(m, "\t\tring->tail: 0x%08x\n", @@ -1732,13 +1732,6 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) return total; } -static bool match_ring(struct i915_request *rq) -{ - u32 ring = ENGINE_READ(rq->engine, RING_START); - - return ring == i915_ggtt_offset(rq->ring->vma); -} - struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine) { @@ -1778,11 +1771,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) continue; if (!i915_request_started(request)) - continue; - - /* More than one preemptible request may match! */ - if (!match_ring(request)) - continue; + break; active = request; break; diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 06385550450c..ea873c2a7d6b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -32,33 +32,42 @@ void __intel_ring_pin(struct intel_ring *ring) int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww) { struct i915_vma *vma = ring->vma; - unsigned int flags; void *addr; int ret; if (atomic_fetch_inc(&ring->pin_count)) return 0; - /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + if (!(ring->flags & INTEL_RING_CREATE_INTERNAL)) { + int type = i915_coherent_map_type(vma->vm->i915); + unsigned int pin; - if (vma->obj->stolen) - flags |= PIN_MAPPABLE; - else - flags |= PIN_HIGH; + /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ + pin |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - ret = i915_ggtt_pin(vma, ww, 0, flags); - if (unlikely(ret)) - goto err_unpin; + if (vma->obj->stolen) + pin |= PIN_MAPPABLE; + else + pin |= PIN_HIGH; - if (i915_vma_is_map_and_fenceable(vma)) - addr = (void __force *)i915_vma_pin_iomap(vma); - else - addr = i915_gem_object_pin_map(vma->obj, - i915_coherent_map_type(vma->vm->i915)); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_ring; + ret = i915_ggtt_pin(vma, ww, 0, pin); + if (unlikely(ret)) + goto err_unpin; + + if (i915_vma_is_map_and_fenceable(vma)) + addr = (void __force *)i915_vma_pin_iomap(vma); + else + addr = i915_gem_object_pin_map(vma->obj, type); + if (IS_ERR(addr)) { + ret = PTR_ERR(addr); + goto err_ring; + } + } else { + addr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(addr)) { + ret = PTR_ERR(addr); + goto err_ring; + } } i915_vma_make_unshrinkable(vma); @@ -99,10 +108,12 @@ void intel_ring_unpin(struct intel_ring *ring) i915_gem_object_unpin_map(vma->obj); i915_vma_make_purgeable(vma); - i915_vma_unpin(vma); + if (!(ring->flags & INTEL_RING_CREATE_INTERNAL)) + i915_vma_unpin(vma); } -static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) +static struct i915_vma * +create_ring_vma(struct i915_ggtt *ggtt, int size, unsigned int flags) { struct i915_address_space *vm = &ggtt->vm; struct drm_i915_private *i915 = vm->i915; @@ -110,8 +121,10 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) struct i915_vma *vma; obj = ERR_PTR(-ENODEV); - if (i915_ggtt_has_aperture(ggtt)) - obj = i915_gem_object_create_stolen(i915, size); + if (!(flags & INTEL_RING_CREATE_INTERNAL)) { + if (i915_ggtt_has_aperture(ggtt)) + obj = i915_gem_object_create_stolen(i915, size); + } if (IS_ERR(obj)) obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) @@ -136,12 +149,14 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) } struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size) +intel_engine_create_ring(struct intel_engine_cs *engine, unsigned int size) { struct drm_i915_private *i915 = engine->i915; + unsigned int flags = size & GENMASK(11, 0); struct intel_ring *ring; struct i915_vma *vma; + size ^= flags; GEM_BUG_ON(!is_power_of_2(size)); GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); @@ -150,8 +165,10 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) return ERR_PTR(-ENOMEM); kref_init(&ring->ref); + ring->size = size; ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(size); + ring->flags = flags; /* * Workaround an erratum on the i830 which causes a hang if @@ -164,11 +181,12 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) intel_ring_update_space(ring); - vma = create_ring_vma(engine->gt->ggtt, size); + vma = create_ring_vma(engine->gt->ggtt, size, flags); if (IS_ERR(vma)) { kfree(ring); return ERR_CAST(vma); } + ring->vma = vma; return ring; diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index 1700579bdc93..5419ac0327f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -9,12 +9,14 @@ #include "i915_gem.h" /* GEM_BUG_ON */ #include "i915_request.h" +#include "i915_vma.h" #include "intel_ring_types.h" struct intel_engine_cs; struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size); +intel_engine_create_ring(struct intel_engine_cs *engine, unsigned int size); +#define INTEL_RING_CREATE_INTERNAL BIT(0) u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords); int intel_ring_cacheline_align(struct i915_request *rq); @@ -138,4 +140,12 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) return (head - tail - CACHELINE_BYTES) & (size - 1); } +static inline u32 intel_ring_address(const struct intel_ring *ring) +{ + if (ring->flags & INTEL_RING_CREATE_INTERNAL) + return -1; + + return i915_ggtt_offset(ring->vma); +} + #endif /* INTEL_RING_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h index 1a189ea00fd8..d927deafcb33 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_types.h +++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h @@ -47,6 +47,8 @@ struct intel_ring { u32 size; u32 wrap; u32 effective_size; + + unsigned long flags; }; #endif /* INTEL_RING_TYPES_H */ From patchwork Mon Dec 28 15:52:24 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991423 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 27381C04E84 for ; Mon, 28 Dec 2020 15:53:20 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id D1D05206E5 for ; Mon, 28 Dec 2020 15:53:19 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org D1D05206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 13B7589AB3; Mon, 28 Dec 2020 15:52:59 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 9F91B89A0F for ; Mon, 28 Dec 2020 15:52:50 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448241-1500050 for multiple; Mon, 28 Dec 2020 15:52:41 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:24 +0000 Message-Id: <20201228155229.9516-49-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 49/54] drm/i915/gt: Use client timeline address for seqno writes X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" If we allow for per-client timelines, even with legacy ring submission, we open the door to a world full of possiblities [scheduling and semaphores]. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/gen6_engine_cs.c | 89 +++++++++++++++++------- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 23 ++---- drivers/gpu/drm/i915/i915_request.h | 13 ++++ 3 files changed, 82 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c index 2f59dd3bdc18..14cab4c726ce 100644 --- a/drivers/gpu/drm/i915/gt/gen6_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen6_engine_cs.c @@ -141,6 +141,12 @@ int gen6_emit_flush_rcs(struct i915_request *rq, u32 mode) u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { + struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1); + u32 offset = __i915_request_hwsp_offset(rq); + unsigned int flags; + + GEM_BUG_ON(tl->mode == INTEL_TIMELINE_RELATIVE_CONTEXT); + /* First we do the gen6_emit_post_sync_nonzero_flush w/a */ *cs++ = GFX_OP_PIPE_CONTROL(4); *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; @@ -154,15 +160,22 @@ u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_GLOBAL_GTT; *cs++ = 0; - /* Finally we can flush and with it emit the breadcrumb */ - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + flags = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL); - *cs++ = i915_request_active_timeline(rq)->ggtt_offset | - PIPE_CONTROL_GLOBAL_GTT; + if (intel_timeline_is_relative(tl)) { + offset = offset_in_page(offset); + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + } + if (!intel_timeline_in_context(tl)) + offset |= PIPE_CONTROL_GLOBAL_GTT; + + /* Finally we can flush and with it emit the breadcrumb */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = offset; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -351,15 +364,28 @@ int gen7_emit_flush_rcs(struct i915_request *rq, u32 mode) u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1); + u32 offset = __i915_request_hwsp_offset(rq); + unsigned int flags; + + GEM_BUG_ON(tl->mode == INTEL_TIMELINE_RELATIVE_CONTEXT); + + flags = (PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL); - *cs++ = i915_request_active_timeline(rq)->ggtt_offset; + if (intel_timeline_is_relative(tl)) { + offset = offset_in_page(offset); + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + } + if (!intel_timeline_in_context(tl)) + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = offset; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -373,11 +399,21 @@ u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs) u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs) { - GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1); + u32 offset = __i915_request_hwsp_offset(rq); + unsigned int flags = 0; + + GEM_BUG_ON(tl->mode == INTEL_TIMELINE_RELATIVE_CONTEXT); - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + if (intel_timeline_is_relative(tl)) { + offset = offset_in_page(offset); + flags |= MI_FLUSH_DW_STORE_INDEX; + } + if (!intel_timeline_in_context(tl)) + offset |= MI_FLUSH_DW_USE_GTT; + + *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | flags; + *cs++ = offset; *cs++ = rq->fence.seqno; *cs++ = MI_USER_INTERRUPT; @@ -391,28 +427,31 @@ u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs) #define GEN7_XCS_WA 32 u32 *gen7_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs) { + struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1); + u32 offset = __i915_request_hwsp_offset(rq); + u32 cmd = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW; int i; - GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma); - GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR); + GEM_BUG_ON(tl->mode == INTEL_TIMELINE_RELATIVE_CONTEXT); - *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB | - MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; + if (intel_timeline_is_relative(tl)) { + offset = offset_in_page(offset); + cmd |= MI_FLUSH_DW_STORE_INDEX; + } + if (!intel_timeline_in_context(tl)) + offset |= MI_FLUSH_DW_USE_GTT; + + *cs++ = cmd; + *cs++ = offset; *cs++ = rq->fence.seqno; for (i = 0; i < GEN7_XCS_WA; i++) { - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_SEQNO_ADDR; + *cs++ = cmd; + *cs++ = offset; *cs++ = rq->fence.seqno; } - *cs++ = MI_FLUSH_DW; - *cs++ = 0; - *cs++ = 0; - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 5f0485cb9e3b..171874589174 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -336,19 +336,6 @@ static inline u32 preempt_address(struct intel_engine_cs *engine) I915_GEM_HWS_PREEMPT_ADDR); } -static u32 hwsp_offset(const struct i915_request *rq) -{ - const struct intel_timeline_cacheline *cl; - - /* Before the request is executed, the timeline/cachline is fixed */ - - cl = rcu_dereference_protected(rq->hwsp_cacheline, 1); - if (cl) - return cl->ggtt_offset; - - return rcu_dereference_protected(rq->timeline, 1)->ggtt_offset; -} - int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; @@ -371,7 +358,7 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq) *cs++ = MI_NOOP; *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = hwsp_offset(rq); + *cs++ = __i915_request_hwsp_offset(rq); *cs++ = 0; *cs++ = rq->fence.seqno - 1; @@ -504,7 +491,7 @@ static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs) { struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1); unsigned int flags = MI_FLUSH_DW_OP_STOREDW; - u32 offset = hwsp_offset(rq); + u32 offset = __i915_request_hwsp_offset(rq); if (intel_timeline_is_relative(tl)) { offset = offset_in_page(offset); @@ -526,7 +513,7 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1); unsigned int flags = PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL; - u32 offset = hwsp_offset(rq); + u32 offset = __i915_request_hwsp_offset(rq); if (intel_timeline_is_relative(tl)) { offset = offset_in_page(offset); @@ -551,7 +538,7 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1); - u32 offset = hwsp_offset(rq); + u32 offset = __i915_request_hwsp_offset(rq); unsigned int flags; flags = (PIPE_CONTROL_CS_STALL | @@ -633,7 +620,7 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1); - u32 offset = hwsp_offset(rq); + u32 offset = __i915_request_hwsp_offset(rq); unsigned int flags; flags = (PIPE_CONTROL_CS_STALL | diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index adfe863f778e..560a0ef0c80d 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -615,4 +615,17 @@ i915_request_active_timeline(const struct i915_request *rq) lockdep_is_held(&rq->engine->active.lock)); } +static inline u32 __i915_request_hwsp_offset(const struct i915_request *rq) +{ + const struct intel_timeline_cacheline *cl; + + /* Before the request is executed, the timeline/cachline is fixed */ + + cl = rcu_dereference_protected(rq->hwsp_cacheline, 1); + if (cl) + return cl->ggtt_offset; + + return rcu_dereference_protected(rq->timeline, 1)->ggtt_offset; +} + #endif /* I915_REQUEST_H */ From patchwork Mon Dec 28 15:52:25 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991399 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 78C47C04ABA for ; Mon, 28 Dec 2020 15:53:19 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 2939C206E5 for ; Mon, 28 Dec 2020 15:53:19 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 2939C206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id BF9BD89A8C; Mon, 28 Dec 2020 15:52:58 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id BC9238925D for ; Mon, 28 Dec 2020 15:52:48 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448243-1500050 for multiple; Mon, 28 Dec 2020 15:52:41 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:25 +0000 Message-Id: <20201228155229.9516-50-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 50/54] drm/i915/gt: Infrastructure for ring scheduling X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Build a bare bones scheduler to sit on top the global legacy ringbuffer submission. This virtual execlists scheme should be applicable to all older platforms. A key problem we have with the legacy ring buffer submission is that it only allows for FIFO queuing. All clients share the global request queue and must contend for its lock when submitting. As any client may need to wait for external events, all clients must then wait. However, if we stage each client into their own virtual ringbuffer with their own timelines, we can copy the client requests into the global ringbuffer only when they are ready, reordering the submission around stalls. Furthermore, the ability to reorder gives us rudimentarily priority sorting -- although without preemption support, once something is on the GPU it stays on the GPU, and so it is still possible for a hog to delay a high priority request (such as updating the display). However, it does means that in keeping a short submission queue, the high priority request will be next. This design resembles the old guc submission scheduler, for reordering requests onto a global workqueue. The implementation uses the MI_USER_INTERRUPT at the end of every request to track completion, so is more interrupt happy than execlists [which has an interrupt for each context event, albeit two]. Our interrupts on these system are relatively heavy, and in the past we have been able to completely starve Sandybrige by the interrupt traffic. Our interrupt handlers are being much better (in part offloading the work to bottom halves leaving the interrupt itself only dealing with acking the registers) but we can still see the impact of starvation in the uneven submission latency on a saturated system. Overall though, the short sumission queues and extra interrupts do not appear to be affecting throughput (+-10%, some tasks even improve to the reduced request overheads) and improve latency. [Which is a massive improvement since the introduction of Sandybridge!] Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_engine.h | 1 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + .../gpu/drm/i915/gt/intel_ring_scheduler.c | 768 ++++++++++++++++++ .../gpu/drm/i915/gt/intel_ring_submission.c | 17 +- .../gpu/drm/i915/gt/intel_ring_submission.h | 17 + 6 files changed, 797 insertions(+), 8 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_ring_scheduler.c create mode 100644 drivers/gpu/drm/i915/gt/intel_ring_submission.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index f1c7c3246226..f14a3d9145dc 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -112,6 +112,7 @@ gt-y += \ gt/intel_renderstate.o \ gt/intel_reset.o \ gt/intel_ring.o \ + gt/intel_ring_scheduler.o \ gt/intel_ring_submission.o \ gt/intel_rps.o \ gt/intel_sseu.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 801ae54cf60d..fa257a305143 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -191,6 +191,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine); int intel_engine_resume(struct intel_engine_cs *engine); int intel_ring_submission_setup(struct intel_engine_cs *engine); +int intel_ring_scheduler_setup(struct intel_engine_cs *engine); int intel_engine_stop_cs(struct intel_engine_cs *engine); void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 74e55e0fbc65..a93bef46e455 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -319,6 +319,7 @@ struct intel_engine_cs { struct { struct intel_ring *ring; struct intel_timeline *timeline; + struct intel_context *context; } legacy; /* diff --git a/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c b/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c new file mode 100644 index 000000000000..338d847445ca --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c @@ -0,0 +1,768 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include + +#include + +#include "i915_drv.h" +#include "intel_breadcrumbs.h" +#include "intel_context.h" +#include "intel_engine_pm.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_gt_requests.h" +#include "intel_reset.h" +#include "intel_ring.h" +#include "intel_ring_submission.h" +#include "shmem_utils.h" + +/* + * Rough estimate of the typical request size, performing a flush, + * set-context and then emitting the batch. + */ +#define LEGACY_REQUEST_SIZE 200 + +static inline int rq_prio(const struct i915_request *rq) +{ + return rq->sched.attr.priority; +} + +static inline u64 rq_deadline(const struct i915_request *rq) +{ + return rq->sched.deadline; +} + +static inline bool reset_in_progress(const struct intel_engine_cs *engine) +{ + return unlikely(!__tasklet_is_enabled(&engine->active.tasklet)); +} + +static void +set_current_context(struct intel_context **ptr, struct intel_context *ce) +{ + if (ce) + intel_context_get(ce); + + ce = xchg(ptr, ce); + + if (ce) + intel_context_put(ce); +} + +static struct intel_engine_cs *__schedule_in(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + struct intel_engine_cs *engine = rq->engine; + + intel_context_get(ce); + + __intel_gt_pm_get(engine->gt); + if (engine->fw_domain && !engine->fw_active++) + intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); + + CE_TRACE(ce, "schedule-in\n"); + + return engine; +} + +static void schedule_in(struct i915_request *rq) +{ + struct intel_context * const ce = rq->context; + struct intel_engine_cs *old; + + GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); + + old = ce->inflight; + if (!old) + old = __schedule_in(rq); + WRITE_ONCE(ce->inflight, ptr_inc(old)); + + GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); + GEM_BUG_ON(!intel_context_inflight_count(ce)); +} + +static void __schedule_out(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + struct intel_engine_cs *engine = rq->engine; + + CE_TRACE(ce, "schedule-out\n"); + + if (list_is_last_rcu(&rq->link, &ce->timeline->requests)) + intel_engine_add_retire(engine, ce->timeline); + else + i915_request_update_deadline(list_next_entry(rq, link)); + + if (engine->fw_domain && !--engine->fw_active) + intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); + intel_gt_pm_put_async(engine->gt); +} + +static void schedule_out(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + + GEM_BUG_ON(!ce->inflight); + ce->inflight = ptr_dec(ce->inflight); + if (!intel_context_inflight_count(ce)) { + GEM_BUG_ON(ce->inflight != rq->engine); + __schedule_out(rq); + WRITE_ONCE(ce->inflight, NULL); + intel_context_put(ce); + } + + i915_request_put(rq); +} + +static u32 *ring_map(struct intel_ring *ring, u32 len) +{ + u32 *va; + + if (unlikely(ring->tail + len > ring->effective_size)) { + memset(ring->vaddr + ring->tail, 0, ring->size - ring->tail); + ring->tail = 0; + } + + va = ring->vaddr + ring->tail; + ring->tail = intel_ring_wrap(ring, ring->tail + len); + + return va; +} + +static inline u32 *ring_map_dw(struct intel_ring *ring, u32 len) +{ + return ring_map(ring, len * sizeof(u32)); +} + +static void ring_copy(struct intel_ring *dst, + const struct intel_ring *src, + u32 start, u32 end) +{ + unsigned int len; + void *out; + + len = end - start; + if (end < start) + len += src->size; + out = ring_map(dst, len); + + if (end < start) { + len = src->size - start; + memcpy(out, src->vaddr + start, len); + out += len; + start = 0; + } + + memcpy(out, src->vaddr + start, end - start); +} + +static void switch_context(struct intel_ring *ring, struct i915_request *rq) +{ +} + +static struct i915_request *ring_submit(struct i915_request *rq) +{ + struct intel_ring *ring = rq->engine->legacy.ring; + + __i915_request_submit(rq); + + if (rq->engine->legacy.context != rq->context) { + switch_context(ring, rq); + set_current_context(&rq->engine->legacy.context, rq->context); + } + + ring_copy(ring, rq->ring, rq->head, rq->tail); + return rq; +} + +static struct i915_request ** +copy_active(struct i915_request **port, struct i915_request * const *active) +{ + while (*active) + *port++ = *active++; + + return port; +} + +static inline void +copy_ports(struct i915_request **dst, struct i915_request **src, int count) +{ + /* A memcpy_p() would be very useful here! */ + while (count--) + WRITE_ONCE(*dst++, *src++); /* avoid write tearing */ +} + +static inline void write_tail(const struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_TAIL, engine->legacy.ring->tail); +} + +static void dequeue(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const el = &engine->execlists; + struct i915_request ** const last_port = el->pending + el->port_mask; + struct i915_request **port, **first, *last; + struct i915_priolist *p; + + first = copy_active(el->pending, el->active); + if (first > last_port) + return; + + local_irq_disable(); + + last = NULL; + port = first; + spin_lock(&engine->active.lock); + for_each_priolist(p, &engine->active.queue) { + struct i915_request *rq, *rn; + + priolist_for_each_request_safe(rq, rn, p) { + GEM_BUG_ON(rq == last); + if (last && rq->context != last->context) { + if (port == last_port) + goto done; + + *port++ = i915_request_get(last); + } + + last = ring_submit(rq); + } + + i915_priolist_advance(&engine->active.queue, p); + } +done: + spin_unlock(&engine->active.lock); + + if (last) { + *port++ = i915_request_get(last); + *port = NULL; + + WRITE_ONCE(el->active, el->pending); + + copy_ports(el->inflight, el->pending, port - el->pending + 1); + while (port-- != first) + schedule_in(*port); + + wmb(); /* paranoid flush of WCB before RING_TAIL write */ + write_tail(engine); + + WRITE_ONCE(el->active, el->inflight); + GEM_BUG_ON(!*el->active); + } + + local_irq_enable(); /* flush irq_work *after* RING_TAIL write */ +} + +static void post_process_csb(struct i915_request **port, + struct i915_request **last) +{ + while (port != last) + schedule_out(*port++); +} + +static struct i915_request ** +process_csb(struct intel_engine_execlists *el, struct i915_request **inactive) +{ + struct i915_request *rq; + + while ((rq = *el->active)) { + if (!__i915_request_is_complete(rq)) + break; + + *inactive++ = rq; + el->active++; + } + + return inactive; +} + +static void submission_tasklet(unsigned long data) +{ + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + struct i915_request *post[2 * EXECLIST_MAX_PORTS]; + struct i915_request **inactive; + + rcu_read_lock(); + inactive = process_csb(&engine->execlists, post); + GEM_BUG_ON(inactive - post > ARRAY_SIZE(post)); + + if (!i915_sched_is_idle(&engine->active)) + dequeue(engine); + + post_process_csb(post, inactive); + rcu_read_unlock(); +} + +static void reset_prepare(struct intel_engine_cs *engine) +{ + GEM_TRACE("%s\n", engine->name); + + __tasklet_disable_sync_once(&engine->active.tasklet); + GEM_BUG_ON(!reset_in_progress(engine)); + + intel_ring_submission_reset_prepare(engine); +} + +static inline void clear_ports(struct i915_request **ports, int count) +{ + memset_p((void **)ports, NULL, count); +} + +static struct i915_request ** +cancel_port_requests(struct intel_engine_execlists * const el, + struct i915_request **inactive) +{ + struct i915_request * const *port; + + clear_ports(el->pending, ARRAY_SIZE(el->pending)); + + /* Mark the end of active before we overwrite *active */ + for (port = xchg(&el->active, el->pending); *port; port++) + *inactive++ = *port; + clear_ports(el->inflight, ARRAY_SIZE(el->inflight)); + + smp_wmb(); /* complete the seqlock for execlists_active() */ + WRITE_ONCE(el->active, el->inflight); + + return inactive; +} + +static void __ring_rewind(struct intel_engine_cs *engine, bool stalled) +{ + struct i915_request *rq; + unsigned long flags; + + rcu_read_lock(); + spin_lock_irqsave(&engine->active.lock, flags); + rq = __intel_engine_rewind_requests(engine); + spin_unlock_irqrestore(&engine->active.lock, flags); + if (rq && __i915_request_has_started(rq)) + __i915_request_reset(rq, stalled); + rcu_read_unlock(); +} + +static void ring_reset_csb(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const el = &engine->execlists; + struct i915_request *post[2 * EXECLIST_MAX_PORTS]; + struct i915_request **inactive; + + rcu_read_lock(); + inactive = cancel_port_requests(el, post); + + /* Clear the global submission state, we will submit from scratch */ + intel_ring_reset(engine->legacy.ring, 0); + set_current_context(&engine->legacy.context, NULL); + + post_process_csb(post, inactive); + rcu_read_unlock(); +} + +static void ring_reset_rewind(struct intel_engine_cs *engine, bool stalled) +{ + ring_reset_csb(engine); + __ring_rewind(engine, stalled); +} + +static void nop_submission_tasklet(unsigned long data) +{ +} + +static void mark_eio(struct i915_request *rq) +{ + if (__i915_request_is_complete(rq)) + return; + + GEM_BUG_ON(i915_request_signaled(rq)); + + i915_request_set_error_once(rq, -EIO); + i915_request_mark_complete(rq); +} + +static void ring_reset_cancel(struct intel_engine_cs *engine) +{ + struct i915_request *rq, *rn; + struct i915_priolist *p; + unsigned long flags; + + ring_reset_csb(engine); + + rcu_read_lock(); + spin_lock_irqsave(&engine->active.lock, flags); + + /* Mark all submitted requests as skipped. */ + list_for_each_entry(rq, &engine->active.requests, sched.link) + mark_eio(rq); + intel_engine_signal_breadcrumbs(engine); + + /* Flush the queued requests to the timeline list (for retiring). */ + for_each_priolist(p, &engine->active.queue) { + priolist_for_each_request_safe(rq, rn, p) { + mark_eio(rq); + __i915_request_submit(rq); + } + i915_priolist_advance(&engine->active.queue, p); + } + GEM_BUG_ON(!i915_sched_is_idle(&engine->active)); + + /* Remaining _unready_ requests will be nop'ed when submitted */ + + GEM_BUG_ON(__tasklet_is_enabled(&engine->active.tasklet)); + engine->active.tasklet.func = nop_submission_tasklet; + + spin_unlock_irqrestore(&engine->active.lock, flags); + rcu_read_unlock(); +} + +static void reset_finish(struct intel_engine_cs *engine) +{ + intel_ring_submission_reset_finish(engine); + + if (__tasklet_enable(&engine->active.tasklet)) + i915_sched_kick(&engine->active); +} + +static void submission_park(struct intel_engine_cs *engine) +{ + /* drain the submit queue */ + intel_breadcrumbs_unpin_irq(engine->breadcrumbs); + i915_sched_kick(&engine->active); +} + +static void submission_unpark(struct intel_engine_cs *engine) +{ + intel_breadcrumbs_pin_irq(engine->breadcrumbs); +} + +static void ring_context_destroy(struct kref *ref) +{ + struct intel_context *ce = container_of(ref, typeof(*ce), ref); + + GEM_BUG_ON(intel_context_is_pinned(ce)); + + if (ce->state) + i915_vma_put(ce->state); + if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) + intel_ring_put(ce->ring); + + intel_context_fini(ce); + intel_context_free(ce); +} + +static int alloc_context_vma(struct intel_context *ce) + +{ + struct intel_engine_cs *engine = ce->engine; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_shmem(engine->i915, engine->context_size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + /* + * Try to make the context utilize L3 as well as LLC. + * + * On VLV we don't have L3 controls in the PTEs so we + * shouldn't touch the cache level, especially as that + * would make the object snooped which might have a + * negative performance impact. + * + * Snooping is required on non-llc platforms in execlist + * mode, but since all GGTT accesses use PAT entry 0 we + * get snooping anyway regardless of cache_level. + * + * This is only applicable for Ivy Bridge devices since + * later platforms don't have L3 control bits in the PTE. + */ + if (IS_IVYBRIDGE(engine->i915)) + i915_gem_object_set_cache_coherency(obj, I915_CACHE_L3_LLC); + + if (engine->default_state) { + void *vaddr; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + shmem_read(engine->default_state, 0, + vaddr, engine->context_size); + __set_bit(CONTEXT_VALID_BIT, &ce->flags); + + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + } + + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + ce->state = vma; + return 0; + +err_obj: + i915_gem_object_put(obj); + return err; +} + +static struct intel_timeline *pinned_timeline(struct intel_context *ce) +{ + struct intel_timeline *tl = fetch_and_zero(&ce->timeline); + + return intel_timeline_create_from_engine(ce->engine, + page_unmask_bits(tl)); +} + +static int alloc_timeline(struct intel_context *ce) +{ + struct intel_engine_cs *engine = ce->engine; + struct intel_timeline *tl; + + if (unlikely(ce->timeline)) + tl = pinned_timeline(ce); + else + tl = intel_timeline_create(engine->gt); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + ce->timeline = tl; + return 0; +} + +static int ring_context_alloc(struct intel_context *ce) +{ + struct intel_engine_cs *engine = ce->engine; + struct intel_ring *ring; + int err; + + GEM_BUG_ON(ce->state); + if (engine->context_size) { + err = alloc_context_vma(ce); + if (err) + return err; + } + + if (!page_mask_bits(ce->timeline)) { + err = alloc_timeline(ce); + if (err) + goto err_vma; + } + + ring = intel_engine_create_ring(engine, + (unsigned long)ce->ring | + INTEL_RING_CREATE_INTERNAL); + if (IS_ERR(ring)) { + err = PTR_ERR(ring); + goto err_timeline; + } + ce->ring = ring; + + return 0; + +err_timeline: + intel_timeline_put(ce->timeline); +err_vma: + if (ce->state) { + i915_vma_put(ce->state); + ce->state = NULL; + } + return err; +} + +static int ring_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + void **unused) +{ + return 0; +} + +static int ring_context_pin(struct intel_context *ce, void *unused) +{ + return 0; +} + +static void ring_context_unpin(struct intel_context *ce) +{ +} + +static void ring_context_post_unpin(struct intel_context *ce) +{ +} + +static void ring_context_reset(struct intel_context *ce) +{ + intel_ring_reset(ce->ring, 0); + clear_bit(CONTEXT_VALID_BIT, &ce->flags); +} + +static const struct intel_context_ops ring_context_ops = { + .alloc = ring_context_alloc, + + .pre_pin = ring_context_pre_pin, + .pin = ring_context_pin, + .unpin = ring_context_unpin, + .post_unpin = ring_context_post_unpin, + + .enter = intel_context_enter_engine, + .exit = intel_context_exit_engine, + + .reset = ring_context_reset, + .destroy = ring_context_destroy, +}; + +static int ring_request_alloc(struct i915_request *rq) +{ + int ret; + + GEM_BUG_ON(!intel_context_is_pinned(rq->context)); + + /* + * Flush enough space to reduce the likelihood of waiting after + * we start building the request - in which case we will just + * have to repeat work. + */ + rq->reserved_space += LEGACY_REQUEST_SIZE; + + /* Unconditionally invalidate GPU caches and TLBs. */ + ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (ret) + return ret; + + rq->reserved_space -= LEGACY_REQUEST_SIZE; + return 0; +} + +static void set_default_submission(struct intel_engine_cs *engine) +{ + engine->submit_request = i915_request_enqueue; + engine->active.tasklet.func = submission_tasklet; +} + +static void ring_release(struct intel_engine_cs *engine) +{ + intel_engine_cleanup_common(engine); + + set_current_context(&engine->legacy.context, NULL); + + intel_ring_unpin(engine->legacy.ring); + intel_ring_put(engine->legacy.ring); +} + +static void setup_irq(struct intel_engine_cs *engine) +{ +} + +static void setup_common(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + /* gen8+ are only supported with execlists */ + GEM_BUG_ON(INTEL_GEN(i915) >= 8); + GEM_BUG_ON(INTEL_GEN(i915) < 8); + + setup_irq(engine); + + engine->park = submission_park; + engine->unpark = submission_unpark; + + engine->resume = intel_ring_submission_resume; + engine->sanitize = intel_ring_submission_sanitize; + + engine->reset.prepare = reset_prepare; + engine->reset.rewind = ring_reset_rewind; + engine->reset.cancel = ring_reset_cancel; + engine->reset.finish = reset_finish; + + engine->cops = &ring_context_ops; + engine->request_alloc = ring_request_alloc; + + engine->set_default_submission = set_default_submission; +} + +static void setup_rcs(struct intel_engine_cs *engine) +{ +} + +static void setup_vcs(struct intel_engine_cs *engine) +{ +} + +static void setup_bcs(struct intel_engine_cs *engine) +{ +} + +static void setup_vecs(struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!IS_HASWELL(engine->i915)); +} + +static unsigned int global_ring_size(void) +{ + /* Enough space to hold 2 clients and the context switch */ + return roundup_pow_of_two(EXECLIST_MAX_PORTS * SZ_16K + SZ_4K); +} + +int intel_ring_scheduler_setup(struct intel_engine_cs *engine) +{ + struct intel_ring *ring; + int err; + + GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); + + tasklet_init(&engine->active.tasklet, + submission_tasklet, (unsigned long)engine); + + setup_common(engine); + + switch (engine->class) { + case RENDER_CLASS: + setup_rcs(engine); + break; + case VIDEO_DECODE_CLASS: + setup_vcs(engine); + break; + case COPY_ENGINE_CLASS: + setup_bcs(engine); + break; + case VIDEO_ENHANCEMENT_CLASS: + setup_vecs(engine); + break; + default: + MISSING_CASE(engine->class); + return -ENODEV; + } + + ring = intel_engine_create_ring(engine, global_ring_size()); + if (IS_ERR(ring)) { + err = PTR_ERR(ring); + goto err; + } + + err = intel_ring_pin(ring, NULL); + if (err) + goto err_ring; + + GEM_BUG_ON(engine->legacy.ring); + engine->legacy.ring = ring; + + engine->flags |= I915_ENGINE_HAS_SCHEDULER; + engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; + + /* Finally, take ownership and responsibility for cleanup! */ + engine->release = ring_release; + return 0; + +err_ring: + intel_ring_put(ring); +err: + intel_engine_cleanup_common(engine); + return err; +} diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index cab775a78912..19d7edda2a48 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -37,6 +37,7 @@ #include "intel_gt.h" #include "intel_reset.h" #include "intel_ring.h" +#include "intel_ring_submission.h" #include "shmem_utils.h" /* Rough estimate of the typical request size, performing a flush, @@ -217,7 +218,7 @@ static void set_pp_dir(struct intel_engine_cs *engine) } } -static int xcs_resume(struct intel_engine_cs *engine) +int intel_ring_submission_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; struct intel_ring *ring = engine->legacy.ring; @@ -329,7 +330,7 @@ static void sanitize_hwsp(struct intel_engine_cs *engine) intel_timeline_reset_seqno(tl); } -static void xcs_sanitize(struct intel_engine_cs *engine) +void intel_ring_submission_sanitize(struct intel_engine_cs *engine) { /* * Poison residual state on resume, in case the suspend didn't! @@ -354,7 +355,7 @@ static void xcs_sanitize(struct intel_engine_cs *engine) clflush_cache_range(engine->status_page.addr, PAGE_SIZE); } -static void reset_prepare(struct intel_engine_cs *engine) +void intel_ring_submission_reset_prepare(struct intel_engine_cs *engine) { struct intel_uncore *uncore = engine->uncore; const u32 base = engine->mmio_base; @@ -463,7 +464,7 @@ static void reset_rewind(struct intel_engine_cs *engine, bool stalled) spin_unlock_irqrestore(&engine->active.lock, flags); } -static void reset_finish(struct intel_engine_cs *engine) +void intel_ring_submission_reset_finish(struct intel_engine_cs *engine) { } @@ -1104,13 +1105,13 @@ static void setup_common(struct intel_engine_cs *engine) setup_irq(engine); - engine->resume = xcs_resume; - engine->sanitize = xcs_sanitize; + engine->resume = intel_ring_submission_resume; + engine->sanitize = intel_ring_submission_sanitize; - engine->reset.prepare = reset_prepare; + engine->reset.prepare = intel_ring_submission_reset_prepare; engine->reset.rewind = reset_rewind; engine->reset.cancel = reset_cancel; - engine->reset.finish = reset_finish; + engine->reset.finish = intel_ring_submission_reset_finish; engine->cops = &ring_context_ops; engine->request_alloc = ring_request_alloc; diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.h b/drivers/gpu/drm/i915/gt/intel_ring_submission.h new file mode 100644 index 000000000000..59a43c221748 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef __INTEL_RING_SUBMISSION_H__ +#define __INTEL_RING_SUBMISSION_H__ + +struct intel_engine_cs; + +void intel_ring_submission_reset_prepare(struct intel_engine_cs *engine); +void intel_ring_submission_reset_finish(struct intel_engine_cs *engine); + +int intel_ring_submission_resume(struct intel_engine_cs *engine); +void intel_ring_submission_sanitize(struct intel_engine_cs *engine); + +#endif /* __INTEL_RING_SUBMISSION_H__ */ From patchwork Mon Dec 28 15:52:26 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991357 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 04746C433E0 for ; Mon, 28 Dec 2020 15:53:08 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id B94EB206E5 for ; Mon, 28 Dec 2020 15:53:07 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org B94EB206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 1DB9489A32; Mon, 28 Dec 2020 15:52:57 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id E4738899F2 for ; Mon, 28 Dec 2020 15:52:49 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448244-1500050 for multiple; Mon, 28 Dec 2020 15:52:41 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:26 +0000 Message-Id: <20201228155229.9516-51-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 51/54] drm/i915/gt: Enable busy-stats for ring-scheduler X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Couple up the context in/out accounting to record how long each engine is busy handling requests. This is exposed to userspace for more accurate measurements, and also enables our soft-rps timer. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_ring_scheduler.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c b/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c index 338d847445ca..b95f4ad4bd00 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c @@ -11,6 +11,7 @@ #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_engine_pm.h" +#include "intel_engine_stats.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" @@ -63,6 +64,8 @@ static struct intel_engine_cs *__schedule_in(struct i915_request *rq) if (engine->fw_domain && !engine->fw_active++) intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); + intel_engine_context_in(engine); + CE_TRACE(ce, "schedule-in\n"); return engine; @@ -96,6 +99,8 @@ static void __schedule_out(struct i915_request *rq) else i915_request_update_deadline(list_next_entry(rq, link)); + intel_engine_context_out(engine); + if (engine->fw_domain && !--engine->fw_active) intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); intel_gt_pm_put_async(engine->gt); @@ -755,6 +760,7 @@ int intel_ring_scheduler_setup(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_SCHEDULER; engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; + engine->flags |= I915_ENGINE_SUPPORTS_STATS; /* Finally, take ownership and responsibility for cleanup! */ engine->release = ring_release; From patchwork Mon Dec 28 15:52:27 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991341 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6C4B0C433DB for ; Mon, 28 Dec 2020 15:52:53 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 0123B20791 for ; Mon, 28 Dec 2020 15:52:52 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 0123B20791 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id F156C899F3; Mon, 28 Dec 2020 15:52:49 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id DA63189255 for ; Mon, 28 Dec 2020 15:52:47 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448246-1500050 for multiple; Mon, 28 Dec 2020 15:52:41 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:27 +0000 Message-Id: <20201228155229.9516-52-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 52/54] drm/i915/gt: Implement ring scheduler for gen6/7 X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" A key prolem with legacy ring buffer submission is that it is an inheret FIFO queue across all clients; if one blocks, they all block. A scheduler allows us to avoid that limitation, and ensures that all clients can submit in parallel, removing the resource contention of the global ringbuffer. Having built the ring scheduler infrastructure over top of the global ringbuffer submission, we now need to provide the HW knowledge required to build command packets and implement context switching. Signed-off-by: Chris Wilson --- .../gpu/drm/i915/gt/intel_ring_scheduler.c | 447 +++++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 10 + 2 files changed, 454 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c b/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c index b95f4ad4bd00..91ac415a3f82 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_scheduler.c @@ -7,6 +7,10 @@ #include +#include "gen2_engine_cs.h" +#include "gen6_engine_cs.h" +#include "gen6_ppgtt.h" +#include "gen7_renderclear.h" #include "i915_drv.h" #include "intel_breadcrumbs.h" #include "intel_context.h" @@ -164,8 +168,263 @@ static void ring_copy(struct intel_ring *dst, memcpy(out, src->vaddr + start, end - start); } +static void mi_set_context(struct intel_ring *ring, + struct intel_engine_cs *engine, + struct intel_context *ce, + u32 flags) +{ + struct drm_i915_private *i915 = engine->i915; + enum intel_engine_id id; + const int num_engines = + IS_HASWELL(i915) ? engine->gt->info.num_engines - 1 : 0; + int len; + u32 *cs; + + len = 4; + if (IS_GEN(i915, 7)) + len += 2 + (num_engines ? 4 * num_engines + 6 : 0); + else if (IS_GEN(i915, 5)) + len += 2; + + cs = ring_map_dw(ring, len); + + /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ + if (IS_GEN(i915, 7)) { + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + if (num_engines) { + struct intel_engine_cs *signaller; + + *cs++ = MI_LOAD_REGISTER_IMM(num_engines); + for_each_engine(signaller, engine->gt, id) { + if (signaller == engine) + continue; + + *cs++ = i915_mmio_reg_offset( + RING_PSMI_CTL(signaller->mmio_base)); + *cs++ = _MASKED_BIT_ENABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); + } + } + } else if (IS_GEN(i915, 5)) { + /* + * This w/a is only listed for pre-production ilk a/b steppings, + * but is also mentioned for programming the powerctx. To be + * safe, just apply the workaround; we do not use SyncFlush so + * this should never take effect and so be a no-op! + */ + *cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN; + } + + *cs++ = MI_NOOP; + *cs++ = MI_SET_CONTEXT; + *cs++ = i915_ggtt_offset(ce->state) | flags; + /* + * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP + * WaMiSetContext_Hang:snb,ivb,vlv + */ + *cs++ = MI_NOOP; + + if (IS_GEN(i915, 7)) { + if (num_engines) { + struct intel_engine_cs *signaller; + i915_reg_t last_reg = {}; /* keep gcc quiet */ + + *cs++ = MI_LOAD_REGISTER_IMM(num_engines); + for_each_engine(signaller, engine->gt, id) { + if (signaller == engine) + continue; + + last_reg = RING_PSMI_CTL(signaller->mmio_base); + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = _MASKED_BIT_DISABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); + } + + /* Insert a delay before the next switch! */ + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); + *cs++ = MI_NOOP; + } + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + } else if (IS_GEN(i915, 5)) { + *cs++ = MI_SUSPEND_FLUSH; + } +} + +static struct i915_address_space *vm_alias(struct i915_address_space *vm) +{ + if (i915_is_ggtt(vm)) + vm = &i915_vm_to_ggtt(vm)->alias->vm; + + return vm; +} + +static u32 pp_dir(const struct i915_ppgtt *ppgtt) +{ + return container_of(ppgtt, const struct gen6_ppgtt, base)->pp_dir; +} + +static void load_pd_dir(struct intel_ring *ring, + struct intel_engine_cs *engine, + const struct i915_ppgtt *ppgtt) +{ + u32 *cs = ring_map_dw(ring, 10); + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base)); + *cs++ = PP_DIR_DCLV_2G; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); + *cs++ = pp_dir(ppgtt); + + /* Stall until the page table load is complete? */ + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); + *cs++ = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_DEFAULT); + *cs++ = MI_NOOP; +} + +static struct i915_address_space *current_vm(struct intel_engine_cs *engine) +{ + struct intel_context *old = engine->legacy.context; + + return old ? vm_alias(old->vm) : NULL; +} + +static void gen6_emit_invalidate_rcs(struct intel_ring *ring, + struct intel_engine_cs *engine) +{ + u32 addr, flags; + u32 *cs; + + addr = intel_gt_scratch_offset(engine->gt, + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); + + flags = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; + flags |= PIPE_CONTROL_TLB_INVALIDATE; + + if (INTEL_GEN(engine->i915) >= 7) + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + else + addr |= PIPE_CONTROL_GLOBAL_GTT; + + cs = ring_map_dw(ring, 4); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = addr; + *cs++ = 0; +} + +static struct i915_address_space * +clear_residuals(struct intel_ring *ring, struct intel_engine_cs *engine) +{ + struct intel_context *ce = engine->kernel_context; + struct i915_address_space *vm = vm_alias(engine->gt->vm); + u32 flags; + + if (vm != current_vm(engine)) + load_pd_dir(ring, engine, i915_vm_to_ppgtt(vm)); + + if (ce->state) + mi_set_context(ring, engine, ce, + MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT); + + if (IS_HASWELL(engine->i915)) + flags = MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW; + else + flags = MI_BATCH_NON_SECURE_I965; + + __gen6_emit_bb_start(ring_map_dw(ring, 2), + engine->wa_ctx.vma->node.start, flags); + + return vm; +} + +static void remap_l3_slice(struct intel_ring *ring, + struct intel_engine_cs *engine, + int slice) +{ + u32 *cs, *remap_info = engine->i915->l3_parity.remap_info[slice]; + int i; + + if (!remap_info) + return; + + /* + * Note: We do not worry about the concurrent register cacheline hang + * here because no other code should access these registers other than + * at initialization time. + */ + cs = ring_map_dw(ring, GEN7_L3LOG_SIZE / 4 * 2 + 2); + *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE / 4); + for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) { + *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); + *cs++ = remap_info[i]; + } + *cs++ = MI_NOOP; +} + +static void remap_l3(struct intel_ring *ring, + struct intel_engine_cs *engine, + struct intel_context *ce) +{ + struct i915_gem_context *ctx = + rcu_dereference_protected(ce->gem_context, true); + int bit, idx = -1; + + if (!ctx || !ctx->remap_slice) + return; + + do { + bit = ffs(ctx->remap_slice); + remap_l3_slice(ring, engine, idx += bit); + } while (ctx->remap_slice >>= bit); +} + static void switch_context(struct intel_ring *ring, struct i915_request *rq) { + struct intel_engine_cs *engine = rq->engine; + struct i915_address_space *cvm = current_vm(engine); + struct intel_context *ce = rq->context; + struct i915_address_space *vm; + + if (engine->wa_ctx.vma && ce != engine->kernel_context) { + if (engine->wa_ctx.vma->private != ce) { + cvm = clear_residuals(ring, engine); + intel_context_put(engine->wa_ctx.vma->private); + engine->wa_ctx.vma->private = intel_context_get(ce); + } + } + + vm = vm_alias(ce->vm); + if (vm != cvm) + load_pd_dir(ring, engine, i915_vm_to_ppgtt(vm)); + + if (ce->state) { + u32 flags; + + GEM_BUG_ON(engine->id != RCS0); + + /* For resource streamer on HSW+ and power context elsewhere */ + BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN); + BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN); + + flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT; + if (test_bit(CONTEXT_VALID_BIT, &ce->flags)) { + gen6_emit_invalidate_rcs(ring, engine); + flags |= MI_RESTORE_EXT_STATE_EN; + } else { + flags |= MI_RESTORE_INHIBIT; + } + + mi_set_context(ring, engine, ce, flags); + } + + remap_l3(ring, engine, ce); } static struct i915_request *ring_submit(struct i915_request *rq) @@ -205,6 +464,36 @@ static inline void write_tail(const struct intel_engine_cs *engine) ENGINE_WRITE(engine, RING_TAIL, engine->legacy.ring->tail); } +static void wa_write_tail(const struct intel_engine_cs *engine) +{ + const i915_reg_t psmi = RING_PSMI_CTL(engine->mmio_base); + struct intel_uncore *uncore = engine->uncore; + + intel_uncore_write_fw(uncore, psmi, + _MASKED_BIT_ENABLE(PSMI_SLEEP_MSG_DISABLE)); + + /* Clear the context id. Here be magic! */ + intel_uncore_write64_fw(uncore, RING_RNCID(engine->mmio_base), 0x0); + + /* Wait for the ring not to be idle, i.e. for it to wake up. */ + if (__intel_wait_for_register_fw(uncore, psmi, + PSMI_SLEEP_INDICATOR, 0, + 1000, 0, NULL)) + drm_err(&uncore->i915->drm, + "timed out waiting for %s to wake up\n", + engine->name); + + /* Now that the ring is fully powered up, update the tail */ + write_tail(engine); + + /* + * Let the ring send IDLE messages to the GT again, + * and so let it sleep to conserve power when idle. + */ + intel_uncore_write_fw(uncore, psmi, + _MASKED_BIT_DISABLE(PSMI_SLEEP_MSG_DISABLE)); +} + static void dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const el = &engine->execlists; @@ -252,7 +541,10 @@ static void dequeue(struct intel_engine_cs *engine) schedule_in(*port); wmb(); /* paranoid flush of WCB before RING_TAIL write */ - write_tail(engine); + if (!engine->fw_active) + write_tail(engine); + else + wa_write_tail(engine); WRITE_ONCE(el->active, el->inflight); GEM_BUG_ON(!*el->active); @@ -442,6 +734,33 @@ static void submission_unpark(struct intel_engine_cs *engine) intel_breadcrumbs_pin_irq(engine->breadcrumbs); } +static int gen6_emit_init_breadcrumb(struct i915_request *rq) +{ + struct intel_timeline *tl = i915_request_timeline(rq); + u32 *cs; + + GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); + if (!intel_timeline_has_initial_breadcrumb(tl)) + return 0; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = 0; + *cs++ = __i915_request_hwsp_offset(rq); + *cs++ = rq->fence.seqno - 1; + + intel_ring_advance(rq, cs); + + /* Record the updated position of the request's payload */ + rq->infix = intel_ring_offset(rq, cs); + + __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); + return 0; +} + static void ring_context_destroy(struct kref *ref) { struct intel_context *ce = container_of(ref, typeof(*ce), ref); @@ -586,7 +905,14 @@ static int ring_context_pre_pin(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **unused) { - return 0; + struct i915_address_space *vm; + int err = 0; + + vm = vm_alias(ce->vm); + if (vm) + err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww); + + return err; } static int ring_context_pin(struct intel_context *ce, void *unused) @@ -594,12 +920,22 @@ static int ring_context_pin(struct intel_context *ce, void *unused) return 0; } +static void __context_unpin_ppgtt(struct intel_context *ce) +{ + struct i915_address_space *vm; + + vm = vm_alias(ce->vm); + if (vm) + gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm)); +} + static void ring_context_unpin(struct intel_context *ce) { } static void ring_context_post_unpin(struct intel_context *ce) { + __context_unpin_ppgtt(ce); } static void ring_context_reset(struct intel_context *ce) @@ -657,12 +993,19 @@ static void ring_release(struct intel_engine_cs *engine) set_current_context(&engine->legacy.context, NULL); + if (engine->wa_ctx.vma) { + intel_context_put(engine->wa_ctx.vma->private); + i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); + } + intel_ring_unpin(engine->legacy.ring); intel_ring_put(engine->legacy.ring); } static void setup_irq(struct intel_engine_cs *engine) { + engine->irq_enable = gen6_irq_enable; + engine->irq_disable = gen6_irq_disable; } static void setup_common(struct intel_engine_cs *engine) @@ -671,7 +1014,7 @@ static void setup_common(struct intel_engine_cs *engine) /* gen8+ are only supported with execlists */ GEM_BUG_ON(INTEL_GEN(i915) >= 8); - GEM_BUG_ON(INTEL_GEN(i915) < 8); + GEM_BUG_ON(INTEL_GEN(i915) < 6); setup_irq(engine); @@ -689,24 +1032,62 @@ static void setup_common(struct intel_engine_cs *engine) engine->cops = &ring_context_ops; engine->request_alloc = ring_request_alloc; + engine->emit_init_breadcrumb = gen6_emit_init_breadcrumb; + if (INTEL_GEN(i915) >= 7) + engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_xcs; + else if (INTEL_GEN(i915) >= 6) + engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_xcs; + else + engine->emit_fini_breadcrumb = gen3_emit_breadcrumb; + engine->set_default_submission = set_default_submission; + + engine->emit_bb_start = gen6_emit_bb_start; } static void setup_rcs(struct intel_engine_cs *engine) { + struct drm_i915_private *i915 = engine->i915; + + if (HAS_L3_DPF(i915)) + engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; + + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; + + if (INTEL_GEN(i915) >= 7) { + engine->emit_flush = gen7_emit_flush_rcs; + engine->emit_fini_breadcrumb = gen7_emit_breadcrumb_rcs; + if (IS_HASWELL(i915)) + engine->emit_bb_start = hsw_emit_bb_start; + } else { + engine->emit_flush = gen6_emit_flush_rcs; + engine->emit_fini_breadcrumb = gen6_emit_breadcrumb_rcs; + } } static void setup_vcs(struct intel_engine_cs *engine) { + engine->emit_flush = gen6_emit_flush_vcs; + engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; + + if (IS_GEN(engine->i915, 6)) + engine->fw_domain = FORCEWAKE_ALL; } static void setup_bcs(struct intel_engine_cs *engine) { + engine->emit_flush = gen6_emit_flush_xcs; + engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; } static void setup_vecs(struct intel_engine_cs *engine) { GEM_BUG_ON(!IS_HASWELL(engine->i915)); + + engine->emit_flush = gen6_emit_flush_xcs; + engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; + engine->irq_enable = hsw_irq_enable_vecs; + engine->irq_disable = hsw_irq_disable_vecs; } static unsigned int global_ring_size(void) @@ -715,6 +1096,58 @@ static unsigned int global_ring_size(void) return roundup_pow_of_two(EXECLIST_MAX_PORTS * SZ_16K + SZ_4K); } +static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int size; + int err; + + size = gen7_setup_clear_gpr_bb(engine, NULL /* probe size */); + if (size <= 0) + return size; + + size = ALIGN(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(engine->i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, engine->gt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + vma->private = intel_context_create(engine); /* dummy residuals */ + if (IS_ERR(vma->private)) { + err = PTR_ERR(vma->private); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (err) + goto err_private; + + err = i915_vma_sync(vma); + if (err) + goto err_unpin; + + size = gen7_setup_clear_gpr_bb(engine, vma); + if (err) + goto err_unpin; + + engine->wa_ctx.vma = vma; + return 0; + +err_unpin: + i915_vma_unpin(vma); +err_private: + intel_context_put(vma->private); +err_obj: + i915_gem_object_put(obj); + return err; +} + int intel_ring_scheduler_setup(struct intel_engine_cs *engine) { struct intel_ring *ring; @@ -758,6 +1191,12 @@ int intel_ring_scheduler_setup(struct intel_engine_cs *engine) GEM_BUG_ON(engine->legacy.ring); engine->legacy.ring = ring; + if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) { + err = gen7_ctx_switch_bb_init(engine); + if (err) + goto err_ring_unpin; + } + engine->flags |= I915_ENGINE_HAS_SCHEDULER; engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; engine->flags |= I915_ENGINE_SUPPORTS_STATS; @@ -766,6 +1205,8 @@ int intel_ring_scheduler_setup(struct intel_engine_cs *engine) engine->release = ring_release; return 0; +err_ring_unpin: + intel_ring_unpin(ring); err_ring: intel_ring_put(ring); err: diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0023c023f472..8d38e92d05db 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2529,7 +2529,16 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN6_VERSYNC (RING_SYNC_1(VEBOX_RING_BASE)) #define GEN6_VEVSYNC (RING_SYNC_2(VEBOX_RING_BASE)) #define GEN6_NOSYNC INVALID_MMIO_REG + #define RING_PSMI_CTL(base) _MMIO((base) + 0x50) +#define PSMI_SLEEP_MSG_DISABLE REG_BIT(0) +#define PSMI_SLEEP_FLUSH_DISABLE REG_BIT(2) +#define PSMI_SLEEP_INDICATOR REG_BIT(3) +#define PSMI_GO_INDICATOR REG_BIT(4) +#define GEN12_PSMI_WAIT_FOR_EVENT_POWER_DOWN_DISABLE REG_BIT(7) +#define GEN8_PSMI_FF_DOP_CLOCK_GATE_DISABLE REG_BIT(10) +#define GEN8_PSMI_RC_SEMA_IDLE_MSG_DISABLE REG_BIT(12) + #define RING_MAX_IDLE(base) _MMIO((base) + 0x54) #define RING_HWS_PGA(base) _MMIO((base) + 0x80) #define RING_ID(base) _MMIO((base) + 0x8c) @@ -2539,6 +2548,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RESET_CTL_READY_TO_RESET REG_BIT(1) #define RESET_CTL_REQUEST_RESET REG_BIT(0) +#define RING_RNCID(base) _MMIO((base) + 0x198) #define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c) #define HSW_GTT_CACHE_EN _MMIO(0x4024) From patchwork Mon Dec 28 15:52:28 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991401 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 03EECC04EB5 for ; Mon, 28 Dec 2020 15:53:26 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id BDCAE206E5 for ; Mon, 28 Dec 2020 15:53:25 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org BDCAE206E5 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 053C589AAE; Mon, 28 Dec 2020 15:52:59 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 04CF889255 for ; Mon, 28 Dec 2020 15:52:46 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448248-1500050 for multiple; Mon, 28 Dec 2020 15:52:42 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:28 +0000 Message-Id: <20201228155229.9516-53-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 53/54] drm/i915/gt: Enable ring scheduling for gen6/7 X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Switch over from FIFO global submission to the priority-sorted topographical scheduler. At the cost of more busy work on the CPU to keep the GPU supplied with the next packet of requests, this allows us to reorder requests around submission stalls. This also enables the timer based RPS, with the exception of Valleyview whose PCU doesn't take kindly to our interference. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 ++ drivers/gpu/drm/i915/gt/intel_rps.c | 6 ++---- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index d3f87dc4eda3..2246b5c308dc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -94,7 +94,7 @@ static int live_nop_switch(void *arg) rq = i915_request_get(this); i915_request_add(this); } - if (i915_request_wait(rq, 0, HZ / 5) < 0) { + if (i915_request_wait(rq, 0, HZ) < 0) { pr_err("Failed to populated %d contexts\n", nctx); intel_gt_set_wedged(&i915->gt); i915_request_put(rq); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index befd299cf675..baaf3e8ea70c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -881,6 +881,8 @@ int intel_engines_init(struct intel_gt *gt) if (HAS_EXECLISTS(gt->i915)) setup = intel_execlists_submission_setup; + else if (INTEL_GEN(gt->i915) >= 6) + setup = intel_ring_scheduler_setup; else setup = intel_ring_submission_setup; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 2b443b735a98..2963ab5a86ff 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1078,9 +1078,7 @@ static bool gen6_rps_enable(struct intel_rps *rps) intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000); intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10); - rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD | - GEN6_PM_RP_DOWN_THRESHOLD | - GEN6_PM_RP_DOWN_TIMEOUT); + rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD; return rps_reset(rps); } @@ -1388,7 +1386,7 @@ void intel_rps_enable(struct intel_rps *rps) GEM_BUG_ON(rps->efficient_freq < rps->min_freq); GEM_BUG_ON(rps->efficient_freq > rps->max_freq); - if (has_busy_stats(rps)) + if (has_busy_stats(rps) && !IS_VALLEYVIEW(i915)) intel_rps_set_timer(rps); else if (INTEL_GEN(i915) >= 6) intel_rps_set_interrupts(rps); From patchwork Mon Dec 28 15:52:29 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11991339 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 68F6CC433E0 for ; Mon, 28 Dec 2020 15:52:51 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id E752620791 for ; Mon, 28 Dec 2020 15:52:50 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org E752620791 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 9043F899B5; Mon, 28 Dec 2020 15:52:49 +0000 (UTC) Received: from fireflyinternet.com (unknown [77.68.26.236]) by gabe.freedesktop.org (Postfix) with ESMTPS id 2265789254 for ; Mon, 28 Dec 2020 15:52:45 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 23448250-1500050 for multiple; Mon, 28 Dec 2020 15:52:42 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 28 Dec 2020 15:52:29 +0000 Message-Id: <20201228155229.9516-54-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20201228155229.9516-1-chris@chris-wilson.co.uk> References: <20201228155229.9516-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 54/54] drm/i915/gt: Limit C-states while waiting for requests X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Eero Tamminen , Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Allow the sysadmin to specify whether we should prevent the CPU from entering higher C-states while waiting for the CPU, in order to reduce the latency of request completions and so speed up client continuations. The target dma latency can be adjusted per-engine using, /sys/class/drm/card?/engine/*/dma_latency_ns (For waiting on a virtual engine, the underlying physical engine is used for the wait once the request is active, so set all the physical engines in the virtual set to the same target dma latency.) Note that in most cases, the ratelimiting step does not appear to the interrupt latency per se, but secondary effects of avoiding additional memory latencies while active. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Eero Tamminen Cc: Francisco Jerez Cc: Mika Kuoppala Cc: Dmitry Rogozhkin --- drivers/gpu/drm/i915/Kconfig.profile | 14 ++++++ drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 45 +++++++++++++++++++ .../gpu/drm/i915/gt/intel_breadcrumbs_types.h | 7 +++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 + drivers/gpu/drm/i915/gt/sysfs_engines.c | 43 ++++++++++++++++++ 6 files changed, 113 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 3eacea42b19f..7c996564c92b 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -24,6 +24,20 @@ config DRM_I915_USERFAULT_AUTOSUSPEND May be 0 to disable the extra delay and solely use the device level runtime pm autosuspend delay tunable. +config DRM_I915_DMA_LATENCY + int "Target CPU-DMA latency while waiting on active requests (ns)" + default -1 # nanoseconds + help + Specify a target latency for DMA wakeup, see /dev/cpu_dma_latency, + used while the CPU is waiting for GPU results. + + This is adjustable via + /sys/class/drm/card?/engine/*/dma_latency_ns + + May be -1 to prevent specifying a target wakeup and let the CPU + enter powersaving while waiting. Conversely, 0 may be used to + prevent the CPU from entering any C-states while waiting. + config DRM_I915_HEARTBEAT_INTERVAL int "Interval between heartbeat pulses (ms)" default 2500 # milliseconds diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index dc76b05991e7..235f62ca7ae7 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -34,6 +34,40 @@ #include "intel_gt_pm.h" #include "intel_gt_requests.h" +static void __dma_qos_update(struct work_struct *work) +{ + struct intel_breadcrumbs_dma_qos *qos = + container_of(work, typeof(*qos), update); + + if (cpu_latency_qos_request_active(&qos->req)) { + if (qos->latency < 0) + cpu_latency_qos_remove_request(&qos->req); + else + cpu_latency_qos_update_request(&qos->req, qos->latency); + } else { + if (qos->latency != -1) + cpu_latency_qos_add_request(&qos->req, qos->latency); + } +} + +static void dma_qos_add(struct intel_breadcrumbs *b, s32 latency) +{ + if (latency < 0) + return; + + b->qos.latency = latency; + queue_work(system_highpri_wq, &b->qos.update); +} + +static void dma_qos_del(struct intel_breadcrumbs *b) +{ + if (b->qos.latency < 0) + return; + + b->qos.latency = -1; + queue_work(system_highpri_wq, &b->qos.update); +} + static bool irq_enable(struct intel_engine_cs *engine) { if (!engine->irq_enable) @@ -74,6 +108,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) * the irq. */ WRITE_ONCE(b->irq_armed, true); + dma_qos_add(b, b->irq_engine->props.dma_latency_ns); /* Requests may have completed before we could enable the interrupt. */ if (!b->irq_enabled++ && irq_enable(b->irq_engine)) @@ -97,7 +132,9 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) if (!--b->irq_enabled) irq_disable(b->irq_engine); + dma_qos_del(b); WRITE_ONCE(b->irq_armed, false); + intel_gt_pm_put_async(b->irq_engine->gt); } @@ -306,6 +343,9 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) INIT_LIST_HEAD(&b->signalers); init_llist_head(&b->signaled_requests); + b->qos.latency = -1; + INIT_WORK(&b->qos.update, __dma_qos_update); + spin_lock_init(&b->irq_lock); init_irq_work(&b->irq_work, signal_irq_work); @@ -373,6 +413,11 @@ void intel_breadcrumbs_free(struct intel_breadcrumbs *b) irq_work_sync(&b->irq_work); GEM_BUG_ON(!list_empty(&b->signalers)); GEM_BUG_ON(b->irq_armed); + + GEM_BUG_ON(b->qos.latency != -1); + flush_work(&b->qos.update); + GEM_BUG_ON(cpu_latency_qos_request_active(&b->qos.req)); + kfree(b); } diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index 3a084ce8ff5e..d5ad47f36ba0 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -43,6 +44,12 @@ struct intel_breadcrumbs { /* Not all breadcrumbs are attached to physical HW */ struct intel_engine_cs *irq_engine; + + struct intel_breadcrumbs_dma_qos { + struct pm_qos_request req; + struct work_struct update; + s32 latency; + } qos; }; #endif /* __INTEL_BREADCRUMBS_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index baaf3e8ea70c..6cb0eaf9655c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -314,6 +314,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->instance = info->instance; __sprint_engine_name(engine); + engine->props.dma_latency_ns = + CONFIG_DRM_I915_DMA_LATENCY; engine->props.heartbeat_interval_ms = CONFIG_DRM_I915_HEARTBEAT_INTERVAL; engine->props.max_busywait_duration_ns = diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index a93bef46e455..46f92e3528d2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -506,6 +506,8 @@ struct intel_engine_cs { unsigned long preempt_timeout_ms; unsigned long stop_timeout_ms; unsigned long timeslice_duration_ms; + + s32 dma_latency_ns; } props, defaults; }; diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c index 967031056202..ec49ffa8d9b9 100644 --- a/drivers/gpu/drm/i915/gt/sysfs_engines.c +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -301,6 +301,47 @@ stop_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) static struct kobj_attribute stop_timeout_def = __ATTR(stop_timeout_ms, 0444, stop_default, NULL); +static ssize_t +dma_latency_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + long long latency; + int err; + + err = kstrtoll(buf, 0, &latency); + if (err) + return err; + + if (latency > S32_MAX) + return -EINVAL; + + WRITE_ONCE(engine->props.dma_latency_ns, latency); + return count; +} + +static ssize_t +dma_latency_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%d\n", engine->props.dma_latency_ns); +} + +static struct kobj_attribute dma_latency_attr = +__ATTR(dma_latency_ns, 0644, dma_latency_show, dma_latency_store); + +static ssize_t +dma_latency_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%d\n", engine->defaults.dma_latency_ns); +} + +static struct kobj_attribute dma_latency_def = +__ATTR(dma_latency_ns, 0444, dma_latency_default, NULL); + static ssize_t preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -447,6 +488,7 @@ static void add_defaults(struct kobj_engine *parent) static const struct attribute *files[] = { &max_spin_def.attr, &stop_timeout_def.attr, + &dma_latency_def.attr, #if CONFIG_DRM_I915_HEARTBEAT_INTERVAL &heartbeat_interval_def.attr, #endif @@ -489,6 +531,7 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915) &all_caps_attr.attr, &max_spin_attr.attr, &stop_timeout_attr.attr, + &dma_latency_attr.attr, #if CONFIG_DRM_I915_HEARTBEAT_INTERVAL &heartbeat_interval_attr.attr, #endif