From patchwork Tue Mar 17 12:27:08 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11442775 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D9D911667 for ; Tue, 17 Mar 2020 12:27:32 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id C2C80206EC for ; Tue, 17 Mar 2020 12:27:32 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org C2C80206EC Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 5D21889F85; Tue, 17 Mar 2020 12:27:30 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 4318689F61 for ; Tue, 17 Mar 2020 12:27:28 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 20588517-1500050 for multiple; Tue, 17 Mar 2020 12:27:16 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Tue, 17 Mar 2020 12:27:08 +0000 Message-Id: <20200317122719.1889-1-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 01/12] drm/i915/selftests: Add request throughput measurement to perf X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Under ideal circumstances, the driver should be able to keep the GPU fully saturated with work. Measure how close to ideal we get under the harshest of conditions with no user payload. v2: Also measure throughput using only one thread. Signed-off-by: Chris Wilson --- .../drm/i915/selftests/i915_perf_selftests.h | 1 + drivers/gpu/drm/i915/selftests/i915_request.c | 590 +++++++++++++++++- 2 files changed, 590 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h index 3bf7f53e9924..d8da142985eb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h @@ -16,5 +16,6 @@ * Tests are executed in order by igt/i915_selftest */ selftest(engine_cs, intel_engine_cs_perf_selftests) +selftest(request, i915_request_perf_selftests) selftest(blt, i915_gem_object_blt_perf_selftests) selftest(region, intel_memory_region_perf_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index f89d9c42f1fa..a32dfcc6c2ec 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -23,6 +23,7 @@ */ #include +#include #include "gem/i915_gem_pm.h" #include "gem/selftests/mock_context.h" @@ -1233,7 +1234,7 @@ static int live_parallel_engines(void *arg) struct igt_live_test t; unsigned int idx; - snprintf(name, sizeof(name), "%pS", fn); + snprintf(name, sizeof(name), "%ps", *fn); err = igt_live_test_begin(&t, i915, __func__, name); if (err) break; @@ -1470,3 +1471,590 @@ int i915_request_live_selftests(struct drm_i915_private *i915) return i915_subtests(tests, i915); } + +static int switch_to_kernel_sync(struct intel_context *ce, int err) +{ + struct i915_request *rq; + struct dma_fence *fence; + + rq = intel_engine_create_kernel_request(ce->engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + fence = i915_active_fence_get(&ce->timeline->last_request); + if (fence) { + i915_request_await_dma_fence(rq, fence); + dma_fence_put(fence); + } + + rq = i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err) + err = -ETIME; + i915_request_put(rq); + + while (!err && !intel_engine_is_idle(ce->engine)) + intel_engine_flush_submission(ce->engine); + + return err; +} + +struct perf_stats { + struct intel_engine_cs *engine; + unsigned long count; + ktime_t time; + ktime_t busy; + u64 runtime; +}; + +struct perf_series { + struct drm_i915_private *i915; + unsigned int nengines; + struct intel_context *ce[]; +}; + +static int s_sync0(void *arg) +{ + struct perf_series *ps = arg; + IGT_TIMEOUT(end_time); + unsigned int idx = 0; + int err = 0; + + GEM_BUG_ON(!ps->nengines); + do { + struct i915_request *rq; + + rq = i915_request_create(ps->ce[idx]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + break; + + if (++idx == ps->nengines) + idx = 0; + } while (!__igt_timeout(end_time, NULL)); + + return err; +} + +static int s_sync1(void *arg) +{ + struct perf_series *ps = arg; + struct i915_request *prev = NULL; + IGT_TIMEOUT(end_time); + unsigned int idx = 0; + int err = 0; + + GEM_BUG_ON(!ps->nengines); + do { + struct i915_request *rq; + + rq = i915_request_create(ps->ce[idx]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(prev); + prev = rq; + if (err) + break; + + if (++idx == ps->nengines) + idx = 0; + } while (!__igt_timeout(end_time, NULL)); + i915_request_put(prev); + + return err; +} + +static int s_many(void *arg) +{ + struct perf_series *ps = arg; + IGT_TIMEOUT(end_time); + unsigned int idx = 0; + + GEM_BUG_ON(!ps->nengines); + do { + struct i915_request *rq; + + rq = i915_request_create(ps->ce[idx]); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_add(rq); + + if (++idx == ps->nengines) + idx = 0; + } while (!__igt_timeout(end_time, NULL)); + + return 0; +} + +static int perf_series_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + s_sync0, + s_sync1, + s_many, + NULL, + }; + const unsigned int nengines = num_uabi_engines(i915); + struct intel_engine_cs *engine; + int (* const *fn)(void *arg); + struct pm_qos_request *qos; + struct perf_stats *stats; + struct perf_series *ps; + unsigned int idx; + int err = 0; + + stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL); + if (!stats) + return -ENOMEM; + + qos = kzalloc(sizeof(*qos), GFP_KERNEL); + if (qos) + pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0); + + ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL); + if (!ps) { + kfree(stats); + return -ENOMEM; + } + + ps->i915 = i915; + ps->nengines = nengines; + + idx = 0; + for_each_uabi_engine(engine, i915) { + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + goto out; + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + goto out; + } + + ps->ce[idx++] = ce; + } + GEM_BUG_ON(idx != ps->nengines); + + for (fn = func; *fn && !err; fn++) { + char name[KSYM_NAME_LEN]; + struct igt_live_test t; + + snprintf(name, sizeof(name), "%ps", *fn); + err = igt_live_test_begin(&t, i915, __func__, name); + if (err) + break; + + for (idx = 0; idx < nengines; idx++) { + struct perf_stats *p = + memset(&stats[idx], 0, sizeof(stats[idx])); + struct intel_context *ce = ps->ce[idx]; + + p->engine = ps->ce[idx]->engine; + intel_engine_pm_get(p->engine); + + if (intel_engine_supports_stats(p->engine) && + !intel_enable_engine_stats(p->engine)) + p->busy = intel_engine_get_busy_time(p->engine) + 1; + p->runtime = -intel_context_get_total_runtime_ns(ce); + p->time = ktime_get(); + } + + err = (*fn)(ps); + if (igt_live_test_end(&t)) + err = -EIO; + + for (idx = 0; idx < nengines; idx++) { + struct perf_stats *p = &stats[idx]; + struct intel_context *ce = ps->ce[idx]; + int integer, decimal; + u64 busy, dt; + + p->time = ktime_sub(ktime_get(), p->time); + if (p->busy) { + p->busy = ktime_sub(intel_engine_get_busy_time(p->engine), + p->busy - 1); + intel_disable_engine_stats(p->engine); + } + + err = switch_to_kernel_sync(ce, err); + p->runtime += intel_context_get_total_runtime_ns(ce); + intel_engine_pm_put(p->engine); + + busy = 100 * ktime_to_ns(p->busy); + dt = ktime_to_ns(p->time); + if (dt) { + integer = div64_u64(busy, dt); + busy -= integer * dt; + decimal = div64_u64(100 * busy, dt); + } else { + integer = 0; + decimal = 0; + } + + pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n", + name, p->engine->name, ce->timeline->seqno, + integer, decimal, + div_u64(p->runtime, 1000 * 1000), + div_u64(ktime_to_ns(p->time), 1000 * 1000)); + } + } + +out: + for (idx = 0; idx < nengines; idx++) { + if (IS_ERR_OR_NULL(ps->ce[idx])) + break; + + intel_context_unpin(ps->ce[idx]); + intel_context_put(ps->ce[idx]); + } + kfree(ps); + + if (qos) { + pm_qos_remove_request(qos); + kfree(qos); + } + kfree(stats); + return err; +} + +static int p_sync0(void *arg) +{ + struct perf_stats *p = arg; + struct intel_engine_cs *engine = p->engine; + struct intel_context *ce; + IGT_TIMEOUT(end_time); + unsigned long count; + bool busy; + int err = 0; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + return err; + } + + busy = false; + if (intel_engine_supports_stats(engine) && + !intel_enable_engine_stats(engine)) { + p->busy = intel_engine_get_busy_time(engine); + busy = true; + } + + p->time = ktime_get(); + count = 0; + do { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_get(rq); + i915_request_add(rq); + + err = 0; + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + if (err) + break; + + count++; + } while (!__igt_timeout(end_time, NULL)); + p->time = ktime_sub(ktime_get(), p->time); + + if (busy) { + p->busy = ktime_sub(intel_engine_get_busy_time(engine), + p->busy); + intel_disable_engine_stats(engine); + } + + err = switch_to_kernel_sync(ce, err); + p->runtime = intel_context_get_total_runtime_ns(ce); + p->count = count; + + intel_context_unpin(ce); + intel_context_put(ce); + return err; +} + +static int p_sync1(void *arg) +{ + struct perf_stats *p = arg; + struct intel_engine_cs *engine = p->engine; + struct i915_request *prev = NULL; + struct intel_context *ce; + IGT_TIMEOUT(end_time); + unsigned long count; + bool busy; + int err = 0; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + return err; + } + + busy = false; + if (intel_engine_supports_stats(engine) && + !intel_enable_engine_stats(engine)) { + p->busy = intel_engine_get_busy_time(engine); + busy = true; + } + + p->time = ktime_get(); + count = 0; + do { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_get(rq); + i915_request_add(rq); + + err = 0; + if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(prev); + prev = rq; + if (err) + break; + + count++; + } while (!__igt_timeout(end_time, NULL)); + i915_request_put(prev); + p->time = ktime_sub(ktime_get(), p->time); + + if (busy) { + p->busy = ktime_sub(intel_engine_get_busy_time(engine), + p->busy); + intel_disable_engine_stats(engine); + } + + err = switch_to_kernel_sync(ce, err); + p->runtime = intel_context_get_total_runtime_ns(ce); + p->count = count; + + intel_context_unpin(ce); + intel_context_put(ce); + return err; +} + +static int p_many(void *arg) +{ + struct perf_stats *p = arg; + struct intel_engine_cs *engine = p->engine; + struct intel_context *ce; + IGT_TIMEOUT(end_time); + unsigned long count; + int err = 0; + bool busy; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + return err; + } + + busy = false; + if (intel_engine_supports_stats(engine) && + !intel_enable_engine_stats(engine)) { + p->busy = intel_engine_get_busy_time(engine); + busy = true; + } + + count = 0; + p->time = ktime_get(); + do { + struct i915_request *rq; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + i915_request_add(rq); + count++; + } while (!__igt_timeout(end_time, NULL)); + p->time = ktime_sub(ktime_get(), p->time); + + if (busy) { + p->busy = ktime_sub(intel_engine_get_busy_time(engine), + p->busy); + intel_disable_engine_stats(engine); + } + + err = switch_to_kernel_sync(ce, err); + p->runtime = intel_context_get_total_runtime_ns(ce); + p->count = count; + + intel_context_unpin(ce); + intel_context_put(ce); + return err; +} + +static int perf_parallel_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + static int (* const func[])(void *arg) = { + p_sync0, + p_sync1, + p_many, + NULL, + }; + const unsigned int nengines = num_uabi_engines(i915); + struct intel_engine_cs *engine; + int (* const *fn)(void *arg); + struct pm_qos_request *qos; + struct { + struct perf_stats p; + struct task_struct *tsk; + } *engines; + int err = 0; + + engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL); + if (!engines) + return -ENOMEM; + + qos = kzalloc(sizeof(*qos), GFP_KERNEL); + if (qos) + pm_qos_add_request(qos, PM_QOS_CPU_DMA_LATENCY, 0); + + for (fn = func; *fn; fn++) { + char name[KSYM_NAME_LEN]; + struct igt_live_test t; + unsigned int idx; + + snprintf(name, sizeof(name), "%ps", *fn); + err = igt_live_test_begin(&t, i915, __func__, name); + if (err) + break; + + atomic_set(&i915->selftest.counter, nengines); + + idx = 0; + for_each_uabi_engine(engine, i915) { + intel_engine_pm_get(engine); + + memset(&engines[idx].p, 0, sizeof(engines[idx].p)); + engines[idx].p.engine = engine; + + engines[idx].tsk = kthread_run(*fn, &engines[idx].p, + "igt:%s", engine->name); + if (IS_ERR(engines[idx].tsk)) { + err = PTR_ERR(engines[idx].tsk); + intel_engine_pm_put(engine); + break; + } + get_task_struct(engines[idx++].tsk); + } + + yield(); /* start all threads before we kthread_stop() */ + + idx = 0; + for_each_uabi_engine(engine, i915) { + int status; + + if (IS_ERR(engines[idx].tsk)) + break; + + status = kthread_stop(engines[idx].tsk); + if (status && !err) + err = status; + + intel_engine_pm_put(engine); + put_task_struct(engines[idx++].tsk); + } + + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + + idx = 0; + for_each_uabi_engine(engine, i915) { + struct perf_stats *p = &engines[idx].p; + u64 busy = 100 * ktime_to_ns(p->busy); + u64 dt = ktime_to_ns(p->time); + int integer, decimal; + + if (dt) { + integer = div64_u64(busy, dt); + busy -= integer * dt; + decimal = div64_u64(100 * busy, dt); + } else { + integer = 0; + decimal = 0; + } + + GEM_BUG_ON(engine != p->engine); + pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n", + name, engine->name, p->count, integer, decimal, + div_u64(p->runtime, 1000 * 1000), + div_u64(ktime_to_ns(p->time), 1000 * 1000)); + idx++; + } + } + + if (qos) { + pm_qos_remove_request(qos); + kfree(qos); + } + kfree(engines); + return err; +} + +int i915_request_perf_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(perf_series_engines), + SUBTEST(perf_parallel_engines), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_subtests(tests, i915); +} From patchwork Tue Mar 17 12:27:09 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11442793 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D7B441820 for ; Tue, 17 Mar 2020 12:27:54 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id C072C20767 for ; Tue, 17 Mar 2020 12:27:54 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org C072C20767 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 0CD946E0E3; Tue, 17 Mar 2020 12:27:54 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 51D606E0DC for ; Tue, 17 Mar 2020 12:27:44 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 20588518-1500050 for multiple; Tue, 17 Mar 2020 12:27:16 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Tue, 17 Mar 2020 12:27:09 +0000 Message-Id: <20200317122719.1889-2-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200317122719.1889-1-chris@chris-wilson.co.uk> References: <20200317122719.1889-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 02/12] drm/i915: Wrap i915_active in a simple kreffed struct X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" For conveniences of callers that just want to use an i915_active to track a wide array of concurrent timelines, wrap the base i915_active struct inside a kref. This i915_active will self-destruct after use. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_active.c | 53 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_active.h | 4 +++ 2 files changed, 57 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index c4048628188a..535b8161a597 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -937,6 +937,59 @@ void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb) active_fence_cb(fence, cb); } +struct auto_active { + struct i915_active base; + struct kref ref; +}; + +struct i915_active *i915_active_get(struct i915_active *ref) +{ + struct auto_active *aa = container_of(ref, typeof(*aa), base); + + kref_get(&aa->ref); + return &aa->base; +} + +static void auto_release(struct kref *ref) +{ + struct auto_active *aa = container_of(ref, typeof(*aa), ref); + + i915_active_fini(&aa->base); + kfree(aa); +} + +void i915_active_put(struct i915_active *ref) +{ + struct auto_active *aa = container_of(ref, typeof(*aa), base); + + kref_put(&aa->ref, auto_release); +} + +static int auto_active(struct i915_active *ref) +{ + i915_active_get(ref); + return 0; +} + +static void auto_retire(struct i915_active *ref) +{ + i915_active_put(ref); +} + +struct i915_active *i915_active_create(void) +{ + struct auto_active *aa; + + aa = kmalloc(sizeof(*aa), GFP_KERNEL); + if (!aa) + return NULL; + + kref_init(&aa->ref); + i915_active_init(&aa->base, auto_active, auto_retire); + + return &aa->base; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_active.c" #endif diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index b3282ae7913c..bffbcf7751a7 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -221,4 +221,8 @@ void i915_request_add_active_barriers(struct i915_request *rq); void i915_active_print(struct i915_active *ref, struct drm_printer *m); void i915_active_unlock_wait(struct i915_active *ref); +struct i915_active *i915_active_create(void); +struct i915_active *i915_active_get(struct i915_active *ref); +void i915_active_put(struct i915_active *ref); + #endif /* _I915_ACTIVE_H_ */ From patchwork Tue Mar 17 12:27:10 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11442791 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9440392A for ; Tue, 17 Mar 2020 12:27:54 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 7CD7F206EC for ; Tue, 17 Mar 2020 12:27:54 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 7CD7F206EC Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id DC30B6E0DC; Tue, 17 Mar 2020 12:27:53 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 513E16E0D2 for ; Tue, 17 Mar 2020 12:27:45 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 20588519-1500050 for multiple; Tue, 17 Mar 2020 12:27:16 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Tue, 17 Mar 2020 12:27:10 +0000 Message-Id: <20200317122719.1889-3-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200317122719.1889-1-chris@chris-wilson.co.uk> References: <20200317122719.1889-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 03/12] drm/i915/perf: Schedule oa_config after modifying the contexts X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" We wish that the scheduler emit the context modification commands prior to enabling the oa_config, for which we must explicitly inform it of the ordering constraints. This is especially important as we now wait for the final oa_config setup to be completed and as this wait may be on a distinct context to the state modifications, we need that command packet to be always last in the queue. We borrow the i915_active for its ability to track multiple timelines and the last dma_fence on each; a flexible dma_resv. Keeping track of each dma_fence is important for us so that we can efficiently schedule the requests and reprioritise as required. Reported-by: Lionel Landwerlin Signed-off-by: Chris Wilson Cc: Lionel Landwerlin --- drivers/gpu/drm/i915/display/intel_overlay.c | 8 +- drivers/gpu/drm/i915/gt/intel_context_param.c | 2 +- drivers/gpu/drm/i915/i915_active.c | 6 +- drivers/gpu/drm/i915/i915_active.h | 2 +- drivers/gpu/drm/i915/i915_perf.c | 154 +++++++++++------- drivers/gpu/drm/i915/i915_perf_types.h | 5 +- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/selftests/i915_active.c | 4 +- 8 files changed, 115 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 481187223101..88711c2a74f6 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -272,7 +272,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) i915_request_add(rq); - return i915_active_wait(&overlay->last_flip); + return i915_active_wait(&overlay->last_flip, TASK_INTERRUPTIBLE); } static void intel_overlay_flip_prepare(struct intel_overlay *overlay, @@ -429,14 +429,14 @@ static int intel_overlay_off(struct intel_overlay *overlay) intel_overlay_flip_prepare(overlay, NULL); i915_request_add(rq); - return i915_active_wait(&overlay->last_flip); + return i915_active_wait(&overlay->last_flip, TASK_INTERRUPTIBLE); } /* recover from an interruption due to a signal * We have to be careful not to repeat work forever an make forward progess. */ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) { - return i915_active_wait(&overlay->last_flip); + return i915_active_wait(&overlay->last_flip, TASK_INTERRUPTIBLE); } /* Wait for pending overlay flip and release old frame. @@ -477,7 +477,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) i915_request_add(rq); - return i915_active_wait(&overlay->last_flip); + return i915_active_wait(&overlay->last_flip, TASK_INTERRUPTIBLE); } void intel_overlay_reset(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c index 65dcd090245d..903cce8c23c4 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_param.c +++ b/drivers/gpu/drm/i915/gt/intel_context_param.c @@ -15,7 +15,7 @@ int intel_context_set_ring_size(struct intel_context *ce, long sz) if (intel_context_lock_pinned(ce)) return -EINTR; - err = i915_active_wait(&ce->active); + err = i915_active_wait(&ce->active, TASK_INTERRUPTIBLE); if (err < 0) goto unlock; diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 535b8161a597..d26295a6812e 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -496,7 +496,7 @@ static int flush_lazy_signals(struct i915_active *ref) return err; } -int i915_active_wait(struct i915_active *ref) +int i915_active_wait(struct i915_active *ref, int state) { int err; @@ -511,7 +511,9 @@ int i915_active_wait(struct i915_active *ref) if (err) return err; - if (wait_var_event_interruptible(ref, i915_active_is_idle(ref))) + if (!i915_active_is_idle(ref) && + ___wait_var_event(ref, i915_active_is_idle(ref), + state, 0, 0, schedule())) return -EINTR; flush_work(&ref->work); diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index bffbcf7751a7..224b95a95fcd 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -181,7 +181,7 @@ static inline bool i915_active_has_exclusive(struct i915_active *ref) return rcu_access_pointer(ref->excl.fence); } -int i915_active_wait(struct i915_active *ref); +int i915_active_wait(struct i915_active *ref, int state); int i915_sw_fence_await_active(struct i915_sw_fence *fence, struct i915_active *ref, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1b074bb4a7fe..214e72670738 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1972,10 +1972,11 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) return i915_vma_get(oa_bo->vma); } -static struct i915_request * +static int emit_oa_config(struct i915_perf_stream *stream, struct i915_oa_config *oa_config, - struct intel_context *ce) + struct intel_context *ce, + struct i915_active *active) { struct i915_request *rq; struct i915_vma *vma; @@ -1983,7 +1984,7 @@ emit_oa_config(struct i915_perf_stream *stream, vma = get_oa_vma(stream, oa_config); if (IS_ERR(vma)) - return ERR_CAST(vma); + return PTR_ERR(vma); err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) @@ -1997,6 +1998,18 @@ emit_oa_config(struct i915_perf_stream *stream, goto err_vma_unpin; } + if (!IS_ERR_OR_NULL(active)) { + /* After all individual context modifications */ + err = i915_request_await_active(rq, active, + I915_ACTIVE_AWAIT_ALL); + if (err) + goto err_add_request; + + err = i915_active_add_request(active, rq); + if (err) + goto err_add_request; + } + i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, 0); if (!err) @@ -2011,14 +2024,13 @@ emit_oa_config(struct i915_perf_stream *stream, if (err) goto err_add_request; - i915_request_get(rq); err_add_request: i915_request_add(rq); err_vma_unpin: i915_vma_unpin(vma); err_vma_put: i915_vma_put(vma); - return err ? ERR_PTR(err) : rq; + return err; } static struct intel_context *oa_context(struct i915_perf_stream *stream) @@ -2026,8 +2038,9 @@ static struct intel_context *oa_context(struct i915_perf_stream *stream) return stream->pinned_ctx ?: stream->engine->kernel_context; } -static struct i915_request * -hsw_enable_metric_set(struct i915_perf_stream *stream) +static int +hsw_enable_metric_set(struct i915_perf_stream *stream, + struct i915_active *active) { struct intel_uncore *uncore = stream->uncore; @@ -2046,7 +2059,9 @@ hsw_enable_metric_set(struct i915_perf_stream *stream) intel_uncore_rmw(uncore, GEN6_UCGCTL1, 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); - return emit_oa_config(stream, stream->oa_config, oa_context(stream)); + return emit_oa_config(stream, + stream->oa_config, oa_context(stream), + active); } static void hsw_disable_metric_set(struct i915_perf_stream *stream) @@ -2196,8 +2211,10 @@ static int gen8_modify_context(struct intel_context *ce, return err; } -static int gen8_modify_self(struct intel_context *ce, - const struct flex *flex, unsigned int count) +static int +gen8_modify_self(struct intel_context *ce, + const struct flex *flex, unsigned int count, + struct i915_active *active) { struct i915_request *rq; int err; @@ -2208,8 +2225,17 @@ static int gen8_modify_self(struct intel_context *ce, if (IS_ERR(rq)) return PTR_ERR(rq); + if (!IS_ERR_OR_NULL(active)) { + err = i915_active_add_request(active, rq); + if (err) + goto err_add_request; + } + err = gen8_load_flex(rq, ce, flex, count); + if (err) + goto err_add_request; +err_add_request: i915_request_add(rq); return err; } @@ -2243,7 +2269,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx, return err; } -static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable) +static int gen12_configure_oar_context(struct i915_perf_stream *stream, + struct i915_active *active) { int err; struct intel_context *ce = stream->pinned_ctx; @@ -2252,7 +2279,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena { GEN8_OACTXCONTROL, stream->perf->ctx_oactxctrl_offset + 1, - enable ? GEN8_OA_COUNTER_RESUME : 0, + active ? GEN8_OA_COUNTER_RESUME : 0, }, }; /* Offsets in regs_lri are not used since this configuration is only @@ -2264,13 +2291,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena GEN12_OAR_OACONTROL, GEN12_OAR_OACONTROL_OFFSET + 1, (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | - (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0) + (active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0) }, { RING_CONTEXT_CONTROL(ce->engine->mmio_base), CTX_CONTEXT_CONTROL, _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE, - enable ? + active ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0) }, @@ -2287,7 +2314,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena return err; /* Apply regs_lri using LRI with pinned context */ - return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri)); + return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active); } /* @@ -2315,9 +2342,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena * Note: it's only the RCS/Render context that has any OA state. * Note: the first flex register passed must always be R_PWR_CLK_STATE */ -static int oa_configure_all_contexts(struct i915_perf_stream *stream, - struct flex *regs, - size_t num_regs) +static int +oa_configure_all_contexts(struct i915_perf_stream *stream, + struct flex *regs, + size_t num_regs, + struct i915_active *active) { struct drm_i915_private *i915 = stream->perf->i915; struct intel_engine_cs *engine; @@ -2374,7 +2403,7 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream, regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); - err = gen8_modify_self(ce, regs, num_regs); + err = gen8_modify_self(ce, regs, num_regs, active); if (err) return err; } @@ -2382,8 +2411,10 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream, return 0; } -static int gen12_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config) +static int +gen12_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config, + struct i915_active *active) { struct flex regs[] = { { @@ -2392,11 +2423,15 @@ static int gen12_configure_all_contexts(struct i915_perf_stream *stream, }, }; - return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); + return oa_configure_all_contexts(stream, + regs, ARRAY_SIZE(regs), + active); } -static int lrc_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config) +static int +lrc_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config, + struct i915_active *active) { /* The MMIO offsets for Flex EU registers aren't contiguous */ const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; @@ -2429,11 +2464,14 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, for (i = 2; i < ARRAY_SIZE(regs); i++) regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); - return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); + return oa_configure_all_contexts(stream, + regs, ARRAY_SIZE(regs), + active); } -static struct i915_request * -gen8_enable_metric_set(struct i915_perf_stream *stream) +static int +gen8_enable_metric_set(struct i915_perf_stream *stream, + struct i915_active *active) { struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; @@ -2473,11 +2511,13 @@ gen8_enable_metric_set(struct i915_perf_stream *stream) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = lrc_configure_all_contexts(stream, oa_config); + ret = lrc_configure_all_contexts(stream, oa_config, active); if (ret) - return ERR_PTR(ret); + return ret; - return emit_oa_config(stream, oa_config, oa_context(stream)); + return emit_oa_config(stream, + stream->oa_config, oa_context(stream), + active); } static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) @@ -2487,8 +2527,9 @@ static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) 0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); } -static struct i915_request * -gen12_enable_metric_set(struct i915_perf_stream *stream) +static int +gen12_enable_metric_set(struct i915_perf_stream *stream, + struct i915_active *active) { struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; @@ -2517,9 +2558,9 @@ gen12_enable_metric_set(struct i915_perf_stream *stream) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = gen12_configure_all_contexts(stream, oa_config); + ret = gen12_configure_all_contexts(stream, oa_config, active); if (ret) - return ERR_PTR(ret); + return ret; /* * For Gen12, performance counters are context @@ -2527,12 +2568,14 @@ gen12_enable_metric_set(struct i915_perf_stream *stream) * requested this. */ if (stream->ctx) { - ret = gen12_configure_oar_context(stream, true); + ret = gen12_configure_oar_context(stream, active); if (ret) - return ERR_PTR(ret); + return ret; } - return emit_oa_config(stream, oa_config, oa_context(stream)); + return emit_oa_config(stream, + stream->oa_config, oa_context(stream), + active); } static void gen8_disable_metric_set(struct i915_perf_stream *stream) @@ -2540,7 +2583,7 @@ static void gen8_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - lrc_configure_all_contexts(stream, NULL); + lrc_configure_all_contexts(stream, NULL, NULL); intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } @@ -2550,7 +2593,7 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - lrc_configure_all_contexts(stream, NULL); + lrc_configure_all_contexts(stream, NULL, NULL); /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); @@ -2561,11 +2604,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - gen12_configure_all_contexts(stream, NULL); + gen12_configure_all_contexts(stream, NULL, NULL); /* disable the context save/restore or OAR counters */ if (stream->ctx) - gen12_configure_oar_context(stream, false); + gen12_configure_oar_context(stream, NULL); /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); @@ -2729,16 +2772,19 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = { static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream) { - struct i915_request *rq; + struct i915_active *active; + int err; - rq = stream->perf->ops.enable_metric_set(stream); - if (IS_ERR(rq)) - return PTR_ERR(rq); + active = i915_active_create(); + if (!active) + return -ENOMEM; - i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); - i915_request_put(rq); + err = stream->perf->ops.enable_metric_set(stream, active); + if (err == 0) + i915_active_wait(active, TASK_UNINTERRUPTIBLE); - return 0; + i915_active_put(active); + return err; } /** @@ -3192,7 +3238,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, return -EINVAL; if (config != stream->oa_config) { - struct i915_request *rq; + int err; /* * If OA is bound to a specific context, emit the @@ -3203,13 +3249,11 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, * When set globally, we use a low priority kernel context, * so it will effectively take effect when idle. */ - rq = emit_oa_config(stream, config, oa_context(stream)); - if (!IS_ERR(rq)) { + err = emit_oa_config(stream, config, oa_context(stream), NULL); + if (!err) config = xchg(&stream->oa_config, config); - i915_request_put(rq); - } else { - ret = PTR_ERR(rq); - } + else + ret = err; } i915_oa_config_put(config); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index a0e22f00f6cf..5eaf874a0d25 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -21,6 +21,7 @@ struct drm_i915_private; struct file; +struct i915_active; struct i915_gem_context; struct i915_perf; struct i915_vma; @@ -339,8 +340,8 @@ struct i915_oa_ops { * counter reports being sampled. May apply system constraints such as * disabling EU clock gating as required. */ - struct i915_request * - (*enable_metric_set)(struct i915_perf_stream *stream); + int (*enable_metric_set)(struct i915_perf_stream *stream, + struct i915_active *active); /** * @disable_metric_set: Remove system constraints associated with using diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index b958ad07f212..25a74163d477 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -380,7 +380,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma); static inline int i915_vma_sync(struct i915_vma *vma) { /* Wait for the asynchronous bindings and pending GPU reads */ - return i915_active_wait(&vma->active); + return i915_active_wait(&vma->active, TASK_INTERRUPTIBLE); } #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c index 68bbb1580162..7357d2130024 100644 --- a/drivers/gpu/drm/i915/selftests/i915_active.c +++ b/drivers/gpu/drm/i915/selftests/i915_active.c @@ -153,7 +153,7 @@ static int live_active_wait(void *arg) if (IS_ERR(active)) return PTR_ERR(active); - i915_active_wait(&active->base); + i915_active_wait(&active->base, TASK_UNINTERRUPTIBLE); if (!READ_ONCE(active->retired)) { struct drm_printer p = drm_err_printer(__func__); @@ -230,7 +230,7 @@ static int live_active_barrier(void *arg) i915_active_release(&active->base); if (err == 0) - err = i915_active_wait(&active->base); + err = i915_active_wait(&active->base, TASK_UNINTERRUPTIBLE); if (err == 0 && !READ_ONCE(active->retired)) { pr_err("i915_active not retired after flushing barriers!\n"); From patchwork Tue Mar 17 12:27:11 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11442787 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 500321667 for ; Tue, 17 Mar 2020 12:27:51 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 387E7206EC for ; Tue, 17 Mar 2020 12:27:51 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 387E7206EC Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id C77B86E0D2; Tue, 17 Mar 2020 12:27:50 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 4F1956E0D2 for ; Tue, 17 Mar 2020 12:27:44 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 20588520-1500050 for multiple; Tue, 17 Mar 2020 12:27:16 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Tue, 17 Mar 2020 12:27:11 +0000 Message-Id: <20200317122719.1889-4-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200317122719.1889-1-chris@chris-wilson.co.uk> References: <20200317122719.1889-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 04/12] dma-buf: Prettify typecasts for dma-fence-chain X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Inside dma-fence-chain, we use a cmpxchg on an RCU-protected pointer. To avoid the sparse warning for using the RCU pointer directly, we have to cast away the __rcu annotation. However, we don't need to use void* everywhere and can stick to the dma_fence*. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala --- drivers/dma-buf/dma-fence-chain.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index 44a741677d25..3d123502ff12 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -62,7 +62,8 @@ struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence) replacement = NULL; } - tmp = cmpxchg((void **)&chain->prev, (void *)prev, (void *)replacement); + tmp = cmpxchg((struct dma_fence __force **)&chain->prev, + prev, replacement); if (tmp == prev) dma_fence_put(tmp); else From patchwork Tue Mar 17 12:27:12 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11442773 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 00D8E92A for ; Tue, 17 Mar 2020 12:27:32 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id DD5FF206EC for ; Tue, 17 Mar 2020 12:27:31 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org DD5FF206EC Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 0AEE589F69; Tue, 17 Mar 2020 12:27:30 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 46FC289F69 for ; Tue, 17 Mar 2020 12:27:28 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 20588521-1500050 for multiple; Tue, 17 Mar 2020 12:27:17 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Tue, 17 Mar 2020 12:27:12 +0000 Message-Id: <20200317122719.1889-5-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200317122719.1889-1-chris@chris-wilson.co.uk> References: <20200317122719.1889-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 05/12] dma-buf: Report signaled links inside dma-fence-chain X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Whenever we walk along the dma-fence-chain, we prune signaled links to keep the chain nice and tidy. This leads to situations where we can prune a link and report the earlier fence as the target seqno -- violating our own consistency checks that the seqno is not more advanced than the last element in a dma-fence-chain. Report a NULL fence and success if the seqno has already been signaled. Signed-off-by: Chris Wilson --- drivers/dma-buf/dma-fence-chain.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index 3d123502ff12..c435bbba851c 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -99,6 +99,12 @@ int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno) return -EINVAL; dma_fence_chain_for_each(*pfence, &chain->base) { + if ((*pfence)->seqno < seqno) { /* already signaled */ + dma_fence_put(*pfence); + *pfence = NULL; + break; + } + if ((*pfence)->context != chain->base.context || to_dma_fence_chain(*pfence)->prev_seqno < seqno) break; @@ -222,6 +228,7 @@ EXPORT_SYMBOL(dma_fence_chain_ops); * @chain: the chain node to initialize * @prev: the previous fence * @fence: the current fence + * @seqno: the sequence number (syncpt) of the fence within the chain * * Initialize a new chain node and either start a new chain or add the node to * the existing chain of the previous fence. From patchwork Tue Mar 17 12:27:13 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11442777 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9AB1C1667 for ; Tue, 17 Mar 2020 12:27:33 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 83501206EC for ; Tue, 17 Mar 2020 12:27:33 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 83501206EC Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 4D69689F82; Tue, 17 Mar 2020 12:27:30 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 42C5E89725 for ; Tue, 17 Mar 2020 12:27:28 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 20588522-1500050 for multiple; Tue, 17 Mar 2020 12:27:18 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Tue, 17 Mar 2020 12:27:13 +0000 Message-Id: <20200317122719.1889-6-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200317122719.1889-1-chris@chris-wilson.co.uk> References: <20200317122719.1889-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 06/12] dma-buf: Exercise dma-fence-chain under selftests X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" A few very simple testcases to exercise the dma-fence-chain API. Signed-off-by: Chris Wilson --- drivers/dma-buf/Makefile | 3 +- drivers/dma-buf/selftests.h | 1 + drivers/dma-buf/st-dma-fence-chain.c | 713 +++++++++++++++++++++++++++ 3 files changed, 716 insertions(+), 1 deletion(-) create mode 100644 drivers/dma-buf/st-dma-fence-chain.c diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 9c190026bfab..995e05f609ff 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_UDMABUF) += udmabuf.o dmabuf_selftests-y := \ selftest.o \ - st-dma-fence.o + st-dma-fence.o \ + st-dma-fence-chain.o obj-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h index 5320386f02e5..55918ef9adab 100644 --- a/drivers/dma-buf/selftests.h +++ b/drivers/dma-buf/selftests.h @@ -11,3 +11,4 @@ */ selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */ selftest(dma_fence, dma_fence) +selftest(dma_fence_chain, dma_fence_chain) diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c new file mode 100644 index 000000000000..bd08ba67b03b --- /dev/null +++ b/drivers/dma-buf/st-dma-fence-chain.c @@ -0,0 +1,713 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright © 2019 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "selftest.h" + +static struct kmem_cache *slab_fences; + +static inline struct mock_fence { + struct dma_fence base; + spinlock_t lock; +} *to_mock_fence(struct dma_fence *f) { + return container_of(f, struct mock_fence, base); +} + +static const char *mock_name(struct dma_fence *f) +{ + return "mock"; +} + +static void mock_fence_release(struct dma_fence *f) +{ + kmem_cache_free(slab_fences, to_mock_fence(f)); +} + +static const struct dma_fence_ops mock_ops = { + .get_driver_name = mock_name, + .get_timeline_name = mock_name, + .release = mock_fence_release, +}; + +static struct dma_fence *mock_fence(void) +{ + struct mock_fence *f; + + f = kmem_cache_alloc(slab_fences, GFP_KERNEL); + if (!f) + return NULL; + + spin_lock_init(&f->lock); + dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0); + + return &f->base; +} + +static inline struct mock_chain { + struct dma_fence_chain base; +} *to_mock_chain(struct dma_fence *f) { + return container_of(f, struct mock_chain, base.base); +} + +static struct dma_fence *mock_chain(struct dma_fence *prev, + struct dma_fence *fence, + u64 seqno) +{ + struct mock_chain *f; + + f = kmalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + + dma_fence_chain_init(&f->base, + dma_fence_get(prev), + dma_fence_get(fence), + seqno); + + return &f->base.base; +} + +static int sanitycheck(void *arg) +{ + struct dma_fence *f, *chain; + int err = 0; + + f = mock_fence(); + if (!f) + return -ENOMEM; + + chain = mock_chain(NULL, f, 1); + if (!chain) + err = -ENOMEM; + + dma_fence_signal(f); + dma_fence_put(f); + + dma_fence_put(chain); + + return err; +} + +struct fence_chains { + unsigned int chain_length; + struct dma_fence **fences; + struct dma_fence **chains; + + struct dma_fence *tail; +}; + +static uint64_t seqno_inc(unsigned int i) +{ + return i + 1; +} + +static int fence_chains_init(struct fence_chains *fc, unsigned int count, + uint64_t (*seqno_fn)(unsigned int)) +{ + unsigned int i; + int err = 0; + + fc->chains = kvmalloc_array(count, sizeof(*fc->chains), + GFP_KERNEL | __GFP_ZERO); + if (!fc->chains) + return -ENOMEM; + + fc->fences = kvmalloc_array(count, sizeof(*fc->fences), + GFP_KERNEL | __GFP_ZERO); + if (!fc->fences) { + err = -ENOMEM; + goto err_chains; + } + + fc->tail = NULL; + for (i = 0; i < count; i++) { + fc->fences[i] = mock_fence(); + if (!fc->fences[i]) { + err = -ENOMEM; + goto unwind; + } + + fc->chains[i] = mock_chain(fc->tail, + fc->fences[i], + seqno_fn(i)); + if (!fc->chains[i]) { + err = -ENOMEM; + goto unwind; + } + + fc->tail = fc->chains[i]; + } + + fc->chain_length = i; + return 0; + +unwind: + for (i = 0; i < count; i++) { + dma_fence_put(fc->fences[i]); + dma_fence_put(fc->chains[i]); + } + kvfree(fc->fences); +err_chains: + kvfree(fc->chains); + return err; +} + +static void fence_chains_fini(struct fence_chains *fc) +{ + unsigned int i; + + for (i = 0; i < fc->chain_length; i++) { + dma_fence_signal(fc->fences[i]); + dma_fence_put(fc->fences[i]); + } + kvfree(fc->fences); + + for (i = 0; i < fc->chain_length; i++) + dma_fence_put(fc->chains[i]); + kvfree(fc->chains); +} + +static int find_seqno(void *arg) +{ + struct fence_chains fc; + struct dma_fence *fence; + int err; + int i; + + err = fence_chains_init(&fc, 64, seqno_inc); + if (err) + return err; + + fence = dma_fence_get(fc.tail); + err = dma_fence_chain_find_seqno(&fence, 0); + dma_fence_put(fence); + if (err) { + pr_err("Reported %d for find_seqno(0)!\n", err); + goto err; + } + + for (i = 0; i < fc.chain_length; i++) { + fence = dma_fence_get(fc.tail); + err = dma_fence_chain_find_seqno(&fence, i + 1); + dma_fence_put(fence); + if (err) { + pr_err("Reported %d for find_seqno(%d:%d)!\n", + err, fc.chain_length + 1, i + 1); + goto err; + } + if (fence != fc.chains[i]) { + pr_err("Incorrect fence reported by find_seqno(%d:%d)\n", + fc.chain_length + 1, i + 1); + err = -EINVAL; + goto err; + } + + dma_fence_get(fence); + err = dma_fence_chain_find_seqno(&fence, i + 1); + dma_fence_put(fence); + if (err) { + pr_err("Error reported for finding self\n"); + goto err; + } + if (fence != fc.chains[i]) { + pr_err("Incorrect fence reported by find self\n"); + err = -EINVAL; + goto err; + } + + dma_fence_get(fence); + err = dma_fence_chain_find_seqno(&fence, i + 2); + dma_fence_put(fence); + if (!err) { + pr_err("Error not reported for future fence: find_seqno(%d:%d)!\n", + i + 1, i + 2); + err = -EINVAL; + goto err; + } + + dma_fence_get(fence); + err = dma_fence_chain_find_seqno(&fence, i); + dma_fence_put(fence); + if (err) { + pr_err("Error reported for previous fence!\n"); + goto err; + } + if (i > 0 && fence != fc.chains[i - 1]) { + pr_err("Incorrect fence reported by find_seqno(%d:%d)\n", + i + 1, i); + err = -EINVAL; + goto err; + } + } + +err: + fence_chains_fini(&fc); + return err; +} + +static int find_signaled(void *arg) +{ + struct fence_chains fc; + struct dma_fence *fence; + int err; + + err = fence_chains_init(&fc, 2, seqno_inc); + if (err) + return err; + + dma_fence_signal(fc.fences[0]); + + fence = dma_fence_get(fc.tail); + err = dma_fence_chain_find_seqno(&fence, 1); + dma_fence_put(fence); + if (err) { + pr_err("Reported %d for find_seqno()!\n", err); + goto err; + } + + if (fence && fence != fc.chains[0]) { + pr_err("Incorrect chain-fence.seqno:%lld reported for completed seqno:1\n", + fence->seqno); + + dma_fence_get(fence); + err = dma_fence_chain_find_seqno(&fence, 1); + dma_fence_put(fence); + if (err) + pr_err("Reported %d for finding self!\n", err); + + err = -EINVAL; + } + +err: + fence_chains_fini(&fc); + return err; +} + +static int find_out_of_order(void *arg) +{ + struct fence_chains fc; + struct dma_fence *fence; + int err; + + err = fence_chains_init(&fc, 3, seqno_inc); + if (err) + return err; + + dma_fence_signal(fc.fences[1]); + + fence = dma_fence_get(fc.tail); + err = dma_fence_chain_find_seqno(&fence, 2); + dma_fence_put(fence); + if (err) { + pr_err("Reported %d for find_seqno()!\n", err); + goto err; + } + + if (fence && fence != fc.chains[1]) { + pr_err("Incorrect chain-fence.seqno:%lld reported for completed seqno:2\n", + fence->seqno); + + dma_fence_get(fence); + err = dma_fence_chain_find_seqno(&fence, 2); + dma_fence_put(fence); + if (err) + pr_err("Reported %d for finding self!\n", err); + + err = -EINVAL; + } + +err: + fence_chains_fini(&fc); + return err; +} + +static uint64_t seqno_inc2(unsigned int i) +{ + return 2 * i + 2; +} + +static int find_gap(void *arg) +{ + struct fence_chains fc; + struct dma_fence *fence; + int err; + int i; + + err = fence_chains_init(&fc, 64, seqno_inc2); + if (err) + return err; + + for (i = 0; i < fc.chain_length; i++) { + fence = dma_fence_get(fc.tail); + err = dma_fence_chain_find_seqno(&fence, 2 * i + 1); + dma_fence_put(fence); + if (err) { + pr_err("Reported %d for find_seqno(%d:%d)!\n", + err, fc.chain_length + 1, 2 * i + 1); + goto err; + } + if (fence != fc.chains[i]) { + pr_err("Incorrect fence.seqno:%lld reported by find_seqno(%d:%d)\n", + fence->seqno, + fc.chain_length + 1, + 2 * i + 1); + err = -EINVAL; + goto err; + } + + dma_fence_get(fence); + err = dma_fence_chain_find_seqno(&fence, 2 * i + 2); + dma_fence_put(fence); + if (err) { + pr_err("Error reported for finding self\n"); + goto err; + } + if (fence != fc.chains[i]) { + pr_err("Incorrect fence reported by find self\n"); + err = -EINVAL; + goto err; + } + } + +err: + fence_chains_fini(&fc); + return err; +} + +struct find_race { + struct fence_chains fc; + atomic_t children; +}; + +static int __find_race(void *arg) +{ + struct find_race *data = arg; + int err = 0; + + while (!kthread_should_stop()) { + struct dma_fence *fence = dma_fence_get(data->fc.tail); + int seqno; + + seqno = prandom_u32_max(data->fc.chain_length) + 1; + + err = dma_fence_chain_find_seqno(&fence, seqno); + if (err) { + pr_err("Failed to find fence seqno:%d\n", + seqno); + dma_fence_put(fence); + break; + } + if (!fence) + goto signal; + + err = dma_fence_chain_find_seqno(&fence, seqno); + if (err) { + pr_err("Reported an invalid fence for find-self:%d\n", + seqno); + dma_fence_put(fence); + break; + } + + if (fence->seqno < seqno) { + pr_err("Reported an earlier fence.seqno:%lld for seqno:%d\n", + fence->seqno, seqno); + err = -EINVAL; + dma_fence_put(fence); + break; + } + + dma_fence_put(fence); + +signal: + seqno = prandom_u32_max(data->fc.chain_length - 1); + dma_fence_signal(data->fc.fences[seqno]); + cond_resched(); + } + + if (atomic_dec_and_test(&data->children)) + wake_up_var(&data->children); + return err; +} + +static int find_race(void *arg) +{ + struct find_race data; + int ncpus = num_online_cpus(); + struct task_struct **threads; + unsigned long count; + int err; + int i; + + err = fence_chains_init(&data.fc, 64 << 10, seqno_inc); + if (err) + return err; + + threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); + if (!threads) { + err = -ENOMEM; + goto err; + } + + atomic_set(&data.children, 0); + for (i = 0; i < ncpus; i++) { + threads[i] = kthread_run(__find_race, &data, "dmabuf/%d", i); + if (IS_ERR(threads[i])) { + ncpus = i; + break; + } + atomic_inc(&data.children); + get_task_struct(threads[i]); + } + + wait_var_event_timeout(&data.children, + !atomic_read(&data.children), + 5 * HZ); + + for (i = 0; i < ncpus; i++) { + int ret; + + ret = kthread_stop(threads[i]); + if (ret && !err) + err = ret; + put_task_struct(threads[i]); + } + kfree(threads); + + count = 0; + for (i = 0; i < data.fc.chain_length; i++) + if (dma_fence_is_signaled(data.fc.fences[i])) + count++; + pr_info("Completed %lu cycles\n", count); + +err: + fence_chains_fini(&data.fc); + return err; +} + +static int signal_forward(void *arg) +{ + struct fence_chains fc; + int err; + int i; + + err = fence_chains_init(&fc, 64, seqno_inc); + if (err) + return err; + + for (i = 0; i < fc.chain_length; i++) { + dma_fence_signal(fc.fences[i]); + + if (!dma_fence_is_signaled(fc.chains[i])) { + pr_err("chain[%d] not signaled!\n", i); + err = -EINVAL; + goto err; + } + + if (i + 1 < fc.chain_length && + dma_fence_is_signaled(fc.chains[i + 1])) { + pr_err("chain[%d] is signaled!\n", i); + err = -EINVAL; + goto err; + } + } + +err: + fence_chains_fini(&fc); + return err; +} + +static int signal_backward(void *arg) +{ + struct fence_chains fc; + int err; + int i; + + err = fence_chains_init(&fc, 64, seqno_inc); + if (err) + return err; + + for (i = fc.chain_length; i--; ) { + dma_fence_signal(fc.fences[i]); + + if (i > 0 && dma_fence_is_signaled(fc.chains[i])) { + pr_err("chain[%d] is signaled!\n", i); + err = -EINVAL; + goto err; + } + } + + for (i = 0; i < fc.chain_length; i++) { + if (!dma_fence_is_signaled(fc.chains[i])) { + pr_err("chain[%d] was not signaled!\n", i); + err = -EINVAL; + goto err; + } + } + +err: + fence_chains_fini(&fc); + return err; +} + +static int __wait_fence_chains(void *arg) +{ + struct fence_chains *fc = arg; + + if (dma_fence_wait(fc->tail, false)) + return -EIO; + + return 0; +} + +static int wait_forward(void *arg) +{ + struct fence_chains fc; + struct task_struct *tsk; + int err; + int i; + + err = fence_chains_init(&fc, 64 << 10, seqno_inc); + if (err) + return err; + + tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait"); + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); + goto err; + } + get_task_struct(tsk); + yield_to(tsk, true); + + for (i = 0; i < fc.chain_length; i++) + dma_fence_signal(fc.fences[i]); + + err = kthread_stop(tsk); + put_task_struct(tsk); + +err: + fence_chains_fini(&fc); + return err; +} + +static int wait_backward(void *arg) +{ + struct fence_chains fc; + struct task_struct *tsk; + int err; + int i; + + err = fence_chains_init(&fc, 64 << 10, seqno_inc); + if (err) + return err; + + tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait"); + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); + goto err; + } + get_task_struct(tsk); + yield_to(tsk, true); + + for (i = fc.chain_length; i--; ) + dma_fence_signal(fc.fences[i]); + + err = kthread_stop(tsk); + put_task_struct(tsk); + +err: + fence_chains_fini(&fc); + return err; +} + +static void randomise_fences(struct fence_chains *fc) +{ + unsigned int count = fc->chain_length; + + /* Fisher-Yates shuffle courtesy of Knuth */ + while (--count) { + unsigned int swp; + + swp = prandom_u32_max(count + 1); + if (swp == count) + continue; + + swap(fc->fences[count], fc->fences[swp]); + } +} + +static int wait_random(void *arg) +{ + struct fence_chains fc; + struct task_struct *tsk; + int err; + int i; + + err = fence_chains_init(&fc, 64 << 10, seqno_inc); + if (err) + return err; + + randomise_fences(&fc); + + tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait"); + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); + goto err; + } + get_task_struct(tsk); + yield_to(tsk, true); + + for (i = 0; i < fc.chain_length; i++) + dma_fence_signal(fc.fences[i]); + + err = kthread_stop(tsk); + put_task_struct(tsk); + +err: + fence_chains_fini(&fc); + return err; +} + +int dma_fence_chain(void) +{ + static const struct subtest tests[] = { + SUBTEST(sanitycheck), + SUBTEST(find_seqno), + SUBTEST(find_signaled), + SUBTEST(find_out_of_order), + SUBTEST(find_gap), + SUBTEST(find_race), + SUBTEST(signal_forward), + SUBTEST(signal_backward), + SUBTEST(wait_forward), + SUBTEST(wait_backward), + SUBTEST(wait_random), + }; + int ret; + + pr_info("sizeof(dma_fence_chain)=%zu\n", + sizeof(struct dma_fence_chain)); + + slab_fences = KMEM_CACHE(mock_fence, + SLAB_TYPESAFE_BY_RCU | + SLAB_HWCACHE_ALIGN); + if (!slab_fences) + return -ENOMEM; + + ret = subtests(tests, NULL); + + kmem_cache_destroy(slab_fences); + return ret; +} From patchwork Tue Mar 17 12:27:14 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11442795 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8D2AE92A for ; Tue, 17 Mar 2020 12:27:55 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 761DB206EC for ; Tue, 17 Mar 2020 12:27:55 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 761DB206EC Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 332B86E0E4; Tue, 17 Mar 2020 12:27:54 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 600B96E0DC for ; Tue, 17 Mar 2020 12:27:45 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 20588523-1500050 for multiple; Tue, 17 Mar 2020 12:27:18 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Tue, 17 Mar 2020 12:27:14 +0000 Message-Id: <20200317122719.1889-7-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200317122719.1889-1-chris@chris-wilson.co.uk> References: <20200317122719.1889-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 07/12] dma-buf: Proxy fence, an unsignaled fence placeholder X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Often we need to create a fence for a future event that has not yet been associated with a fence. We can store a proxy fence, a placeholder, in the timeline and replace it later when the real fence is known. Any listeners that attach to the proxy fence will automatically be signaled when the real fence completes, and any future listeners will instead be attach directly to the real fence avoiding any indirection overhead. Signed-off-by: Chris Wilson Cc: Lionel Landwerlin --- drivers/dma-buf/Makefile | 13 +- drivers/dma-buf/dma-fence-private.h | 20 + drivers/dma-buf/dma-fence-proxy.c | 189 +++++++++ drivers/dma-buf/dma-fence.c | 4 +- drivers/dma-buf/selftests.h | 1 + drivers/dma-buf/st-dma-fence-proxy.c | 581 +++++++++++++++++++++++++++ include/linux/dma-fence-proxy.h | 20 + 7 files changed, 824 insertions(+), 4 deletions(-) create mode 100644 drivers/dma-buf/dma-fence-private.h create mode 100644 drivers/dma-buf/dma-fence-proxy.c create mode 100644 drivers/dma-buf/st-dma-fence-proxy.c create mode 100644 include/linux/dma-fence-proxy.h diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 995e05f609ff..afaf6dadd9a3 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -1,6 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \ - dma-resv.o seqno-fence.o +obj-y := \ + dma-buf.o \ + dma-fence.o \ + dma-fence-array.o \ + dma-fence-chain.o \ + dma-fence-proxy.o \ + dma-resv.o \ + seqno-fence.o obj-$(CONFIG_DMABUF_HEAPS) += dma-heap.o obj-$(CONFIG_DMABUF_HEAPS) += heaps/ obj-$(CONFIG_SYNC_FILE) += sync_file.o @@ -10,6 +16,7 @@ obj-$(CONFIG_UDMABUF) += udmabuf.o dmabuf_selftests-y := \ selftest.o \ st-dma-fence.o \ - st-dma-fence-chain.o + st-dma-fence-chain.o \ + st-dma-fence-proxy.o obj-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o diff --git a/drivers/dma-buf/dma-fence-private.h b/drivers/dma-buf/dma-fence-private.h new file mode 100644 index 000000000000..6924d28af0fa --- /dev/null +++ b/drivers/dma-buf/dma-fence-private.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Fence mechanism for dma-buf and to allow for asynchronous dma access + * + * Copyright (C) 2012 Canonical Ltd + * Copyright (C) 2012 Texas Instruments + * + * Authors: + * Rob Clark + * Maarten Lankhorst + */ + +#ifndef DMA_FENCE_PRIVATE_H +#define DMA_FENCE_PRIAVTE_H + +struct dma_fence; + +bool __dma_fence_enable_signaling(struct dma_fence *fence); + +#endif /* DMA_FENCE_PRIAVTE_H */ diff --git a/drivers/dma-buf/dma-fence-proxy.c b/drivers/dma-buf/dma-fence-proxy.c new file mode 100644 index 000000000000..6dce543d0757 --- /dev/null +++ b/drivers/dma-buf/dma-fence-proxy.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * dma-fence-proxy: placeholder unsignaled fence + * + * Copyright (C) 2017-2019 Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include "dma-fence-private.h" + +struct dma_fence_proxy { + struct dma_fence base; + spinlock_t lock; + + struct dma_fence *real; + struct dma_fence_cb cb; + struct irq_work work; +}; + +static const char *proxy_get_driver_name(struct dma_fence *fence) +{ + struct dma_fence_proxy *p = container_of(fence, typeof(*p), base); + struct dma_fence *real = READ_ONCE(p->real); + + return real ? real->ops->get_driver_name(real) : "proxy"; +} + +static const char *proxy_get_timeline_name(struct dma_fence *fence) +{ + struct dma_fence_proxy *p = container_of(fence, typeof(*p), base); + struct dma_fence *real = READ_ONCE(p->real); + + return real ? real->ops->get_timeline_name(real) : "unset"; +} + +static void proxy_irq_work(struct irq_work *work) +{ + struct dma_fence_proxy *p = container_of(work, typeof(*p), work); + + dma_fence_signal(&p->base); + dma_fence_put(&p->base); +} + +static void proxy_callback(struct dma_fence *real, struct dma_fence_cb *cb) +{ + struct dma_fence_proxy *p = container_of(cb, typeof(*p), cb); + + if (real->error) + dma_fence_set_error(&p->base, real->error); + + /* Lower the height of the proxy chain -> single stack frame */ + irq_work_queue(&p->work); +} + +static bool proxy_enable_signaling(struct dma_fence *fence) +{ + struct dma_fence_proxy *p = container_of(fence, typeof(*p), base); + struct dma_fence *real = READ_ONCE(p->real); + bool ret = true; + + if (real) { + spin_lock_nested(real->lock, SINGLE_DEPTH_NESTING); + ret = __dma_fence_enable_signaling(real); + spin_unlock(real->lock); + } + + return ret; +} + +static void proxy_release(struct dma_fence *fence) +{ + struct dma_fence_proxy *p = container_of(fence, typeof(*p), base); + + dma_fence_put(p->real); + dma_fence_free(&p->base); +} + +static const struct dma_fence_ops dma_fence_proxy_ops = { + .get_driver_name = proxy_get_driver_name, + .get_timeline_name = proxy_get_timeline_name, + .enable_signaling = proxy_enable_signaling, + .wait = dma_fence_default_wait, + .release = proxy_release, +}; + +/** + * dma_fence_create_proxy - Create an unset dma-fence + * + * dma_fence_create_proxy() creates a new dma_fence stub that is initially + * unsignaled and may later be replaced with a real fence. Any listeners + * to the proxy fence will be signaled when the target fence signals its + * completion. + */ +struct dma_fence *dma_fence_create_proxy(void) +{ + struct dma_fence_proxy *p; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return NULL; + + spin_lock_init(&p->lock); + dma_fence_init(&p->base, &dma_fence_proxy_ops, &p->lock, + dma_fence_context_alloc(1), 0); + init_irq_work(&p->work, proxy_irq_work); + + return &p->base; +} +EXPORT_SYMBOL(dma_fence_create_proxy); + +static void wrap_signal_locked(struct dma_fence *fence, struct dma_fence *real) +{ + if (real->error) + dma_fence_set_error(fence, real->error); + dma_fence_signal_locked(fence); +} + +static void proxy_assign(struct dma_fence *fence, struct dma_fence *real) +{ + struct dma_fence_proxy *p = container_of(fence, typeof(*p), base); + unsigned long flags; + + if (WARN_ON(fence == real)) + return; + + if (WARN_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) + return; + + if (WARN_ON(p->real)) + return; + + spin_lock_irqsave(p->base.lock, flags); + + if (unlikely(!real)) { + dma_fence_signal_locked(&p->base); + goto unlock; + } + + p->real = dma_fence_get(real); + + spin_lock_nested(real->lock, SINGLE_DEPTH_NESTING); + if (dma_fence_is_signaled(real)) { + wrap_signal_locked(&p->base, real); + } else if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &p->base.flags) && + !__dma_fence_enable_signaling(real)) { + wrap_signal_locked(&p->base, real); + } else { + dma_fence_get(&p->base); + p->cb.func = proxy_callback; + list_add_tail(&p->cb.node, &real->cb_list); + } + spin_unlock(real->lock); + +unlock: + spin_unlock_irqrestore(p->base.lock, flags); +} + +/** + * dma_fence_replace_proxy - Replace the proxy fence with the real target + * @slot: pointer to location of fence to update + * @fence: the new fence to store in @slot + * + * Once the real dma_fence is known, we can replace the proxy fence holder + * with a pointer to the real dma fence. Future listeners will attach to + * the real fence, avoiding any indirection overhead. Previous listeners + * will remain attached to the proxy fence, and be signaled in turn when + * the target fence completes. + */ +struct dma_fence * +dma_fence_replace_proxy(struct dma_fence __rcu **slot, struct dma_fence *fence) +{ + struct dma_fence *old; + + if (fence) + dma_fence_get(fence); + + old = rcu_replace_pointer(*slot, fence, true); + if (old && old->ops == &dma_fence_proxy_ops) + proxy_assign(old, fence); + + return old; +} +EXPORT_SYMBOL(dma_fence_replace_proxy); diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 052a41e2451c..fa7bedc6703d 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -19,6 +19,8 @@ #define CREATE_TRACE_POINTS #include +#include "dma-fence-private.h" + EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); @@ -273,7 +275,7 @@ void dma_fence_free(struct dma_fence *fence) } EXPORT_SYMBOL(dma_fence_free); -static bool __dma_fence_enable_signaling(struct dma_fence *fence) +bool __dma_fence_enable_signaling(struct dma_fence *fence) { bool was_set; diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h index 55918ef9adab..616eca70e2d8 100644 --- a/drivers/dma-buf/selftests.h +++ b/drivers/dma-buf/selftests.h @@ -12,3 +12,4 @@ selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */ selftest(dma_fence, dma_fence) selftest(dma_fence_chain, dma_fence_chain) +selftest(dma_fence_proxy, dma_fence_proxy) diff --git a/drivers/dma-buf/st-dma-fence-proxy.c b/drivers/dma-buf/st-dma-fence-proxy.c new file mode 100644 index 000000000000..658f6b90abc4 --- /dev/null +++ b/drivers/dma-buf/st-dma-fence-proxy.c @@ -0,0 +1,581 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "selftest.h" + +static struct kmem_cache *slab_fences; + +static struct mock_fence { + struct dma_fence base; + spinlock_t lock; +} *to_mock_fence(struct dma_fence *f) { + return container_of(f, struct mock_fence, base); +} + +static const char *mock_name(struct dma_fence *f) +{ + return "mock"; +} + +static void mock_fence_release(struct dma_fence *f) +{ + kmem_cache_free(slab_fences, to_mock_fence(f)); +} + +static const struct dma_fence_ops mock_ops = { + .get_driver_name = mock_name, + .get_timeline_name = mock_name, + .release = mock_fence_release, +}; + +static struct dma_fence *mock_fence(void) +{ + struct mock_fence *f; + + f = kmem_cache_alloc(slab_fences, GFP_KERNEL); + if (!f) + return NULL; + + spin_lock_init(&f->lock); + dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0); + + return &f->base; +} + +static int sanitycheck(void *arg) +{ + struct dma_fence *f; + + f = dma_fence_create_proxy(); + if (!f) + return -ENOMEM; + + dma_fence_signal(f); + dma_fence_put(f); + + return 0; +} + +struct fences { + struct dma_fence *real; + struct dma_fence *proxy; + struct dma_fence __rcu *slot; +}; + +static int create_fences(struct fences *f, bool attach) +{ + f->proxy = dma_fence_create_proxy(); + if (!f->proxy) + return -ENOMEM; + + RCU_INIT_POINTER(f->slot, f->proxy); + + f->real = mock_fence(); + if (!f->real) { + dma_fence_put(f->proxy); + return -ENOMEM; + } + + if (attach) + dma_fence_replace_proxy(&f->slot, f->real); + + return 0; +} + +static void free_fences(struct fences *f) +{ + dma_fence_put(dma_fence_replace_proxy(&f->slot, NULL)); + dma_fence_put(f->real); + dma_fence_put(f->proxy); +} + +static int wrap_signaling(void *arg) +{ + struct fences f; + int err = -EINVAL; + + if (create_fences(&f, true)) + return -ENOMEM; + + if (dma_fence_is_signaled(f.proxy)) { + pr_err("Fence unexpectedly signaled on creation\n"); + goto err_free; + } + + if (dma_fence_signal(f.real)) { + pr_err("Fence reported being already signaled\n"); + goto err_free; + } + + if (!dma_fence_is_signaled(f.proxy)) { + pr_err("Fence not reporting signaled\n"); + goto err_free; + } + + err = 0; +err_free: + free_fences(&f); + return err; +} + +static int wrap_signaling_recurse(void *arg) +{ + struct fences f; + struct dma_fence *chain; + int err = -EINVAL; + + if (create_fences(&f, false)) + return -ENOMEM; + + chain = dma_fence_create_proxy(); + if (!chain) { + err = -ENOMEM; + goto err_free; + } + + dma_fence_replace_proxy(&f.slot, chain); + dma_fence_put(dma_fence_replace_proxy(&f.slot, f.real)); + dma_fence_put(chain); + + /* f.real <- chain <- f.proxy */ + + if (dma_fence_is_signaled(f.proxy)) { + pr_err("Fence unexpectedly signaled on creation\n"); + goto err_free; + } + + if (dma_fence_signal(f.real)) { + pr_err("Fence reported being already signaled\n"); + goto err_free; + } + + if (!dma_fence_is_signaled(f.proxy)) { + pr_err("Fence not reporting signaled\n"); + goto err_free; + } + + err = 0; +err_free: + free_fences(&f); + return err; +} + +struct simple_cb { + struct dma_fence_cb cb; + bool seen; +}; + +static void simple_callback(struct dma_fence *f, struct dma_fence_cb *cb) +{ + smp_store_mb(container_of(cb, struct simple_cb, cb)->seen, true); +} + +static int wrap_add_callback(void *arg) +{ + struct simple_cb cb = {}; + struct fences f; + int err = -EINVAL; + + if (create_fences(&f, true)) + return -ENOMEM; + + if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) { + pr_err("Failed to add callback, fence already signaled!\n"); + goto err_free; + } + + dma_fence_signal(f.real); + if (!cb.seen) { + pr_err("Callback failed!\n"); + goto err_free; + } + + err = 0; +err_free: + free_fences(&f); + return err; +} + +static int wrap_add_callback_recurse(void *arg) +{ + struct simple_cb cb = {}; + struct dma_fence *chain; + struct fences f; + int err = -EINVAL; + + if (create_fences(&f, false)) + return -ENOMEM; + + chain = dma_fence_create_proxy(); + if (!chain) { + err = -ENOMEM; + goto err_free; + } + + dma_fence_replace_proxy(&f.slot, chain); + dma_fence_put(dma_fence_replace_proxy(&f.slot, f.real)); + dma_fence_put(chain); + + /* f.real <- chain <- f.proxy */ + + if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) { + pr_err("Failed to add callback, fence already signaled!\n"); + goto err_free; + } + + dma_fence_signal(f.real); + if (!cb.seen) { + pr_err("Callback failed!\n"); + goto err_free; + } + + err = 0; +err_free: + free_fences(&f); + return err; +} + +static int wrap_late_add_callback(void *arg) +{ + struct simple_cb cb = {}; + struct fences f; + int err = -EINVAL; + + if (create_fences(&f, true)) + return -ENOMEM; + + dma_fence_signal(f.real); + + if (!dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) { + pr_err("Added callback, but fence was already signaled!\n"); + goto err_free; + } + + dma_fence_signal(f.real); + if (cb.seen) { + pr_err("Callback called after failed attachment!\n"); + goto err_free; + } + + err = 0; +err_free: + free_fences(&f); + return err; +} + +static int wrap_early_add_callback(void *arg) +{ + struct simple_cb cb = {}; + struct fences f; + int err = -EINVAL; + + if (create_fences(&f, false)) + return -ENOMEM; + + if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) { + pr_err("Failed to add callback, fence already signaled!\n"); + goto err_free; + } + + dma_fence_replace_proxy(&f.slot, f.real); + dma_fence_signal(f.real); + if (!cb.seen) { + pr_err("Callback failed!\n"); + goto err_free; + } + + err = 0; +err_free: + free_fences(&f); + return err; +} + +static int wrap_early_add_callback_late(void *arg) +{ + struct simple_cb cb = {}; + struct fences f; + int err = -EINVAL; + + if (create_fences(&f, false)) + return -ENOMEM; + + dma_fence_signal(f.real); + + if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) { + pr_err("Failed to add callback, fence already signaled!\n"); + goto err_free; + } + + dma_fence_replace_proxy(&f.slot, f.real); + dma_fence_signal(f.real); + if (!cb.seen) { + pr_err("Callback failed!\n"); + goto err_free; + } + + err = 0; +err_free: + free_fences(&f); + return err; +} + +static int wrap_early_add_callback_early(void *arg) +{ + struct simple_cb cb = {}; + struct fences f; + int err = -EINVAL; + + if (create_fences(&f, false)) + return -ENOMEM; + + if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) { + pr_err("Failed to add callback, fence already signaled!\n"); + goto err_free; + } + + dma_fence_replace_proxy(&f.slot, f.real); + dma_fence_signal(f.real); + if (!cb.seen) { + pr_err("Callback failed!\n"); + goto err_free; + } + + err = 0; +err_free: + free_fences(&f); + return err; +} + +static int wrap_rm_callback(void *arg) +{ + struct simple_cb cb = {}; + struct fences f; + int err = -EINVAL; + + if (create_fences(&f, true)) + return -ENOMEM; + + if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) { + pr_err("Failed to add callback, fence already signaled!\n"); + goto err_free; + } + + if (!dma_fence_remove_callback(f.proxy, &cb.cb)) { + pr_err("Failed to remove callback!\n"); + goto err_free; + } + + dma_fence_signal(f.real); + if (cb.seen) { + pr_err("Callback still signaled after removal!\n"); + goto err_free; + } + + err = 0; +err_free: + free_fences(&f); + return err; +} + +static int wrap_late_rm_callback(void *arg) +{ + struct simple_cb cb = {}; + struct fences f; + int err = -EINVAL; + + if (create_fences(&f, true)) + return -ENOMEM; + + if (dma_fence_add_callback(f.proxy, &cb.cb, simple_callback)) { + pr_err("Failed to add callback, fence already signaled!\n"); + goto err_free; + } + + dma_fence_signal(f.real); + if (!cb.seen) { + pr_err("Callback failed!\n"); + goto err_free; + } + + if (dma_fence_remove_callback(f.proxy, &cb.cb)) { + pr_err("Callback removal succeed after being executed!\n"); + goto err_free; + } + + err = 0; +err_free: + free_fences(&f); + return err; +} + +static int wrap_status(void *arg) +{ + struct fences f; + int err = -EINVAL; + + if (create_fences(&f, true)) + return -ENOMEM; + + if (dma_fence_get_status(f.proxy)) { + pr_err("Fence unexpectedly has signaled status on creation\n"); + goto err_free; + } + + dma_fence_signal(f.real); + if (!dma_fence_get_status(f.proxy)) { + pr_err("Fence not reporting signaled status\n"); + goto err_free; + } + + err = 0; +err_free: + free_fences(&f); + return err; +} + +static int wrap_error(void *arg) +{ + struct fences f; + int err = -EINVAL; + + if (create_fences(&f, true)) + return -ENOMEM; + + dma_fence_set_error(f.real, -EIO); + + if (dma_fence_get_status(f.proxy)) { + pr_err("Fence unexpectedly has error status before signal\n"); + goto err_free; + } + + dma_fence_signal(f.real); + if (dma_fence_get_status(f.proxy) != -EIO) { + pr_err("Fence not reporting error status, got %d\n", + dma_fence_get_status(f.proxy)); + goto err_free; + } + + err = 0; +err_free: + free_fences(&f); + return err; +} + +static int wrap_wait(void *arg) +{ + struct fences f; + int err = -EINVAL; + + if (create_fences(&f, true)) + return -ENOMEM; + + if (dma_fence_wait_timeout(f.proxy, false, 0) != 0) { + pr_err("Wait reported complete before being signaled\n"); + goto err_free; + } + + dma_fence_signal(f.real); + + if (dma_fence_wait_timeout(f.proxy, false, 0) == 0) { + pr_err("Wait reported incomplete after being signaled\n"); + goto err_free; + } + + err = 0; +err_free: + dma_fence_signal(f.real); + free_fences(&f); + return err; +} + +struct wait_timer { + struct timer_list timer; + struct fences f; +}; + +static void wait_timer(struct timer_list *timer) +{ + struct wait_timer *wt = from_timer(wt, timer, timer); + + dma_fence_signal(wt->f.real); +} + +static int wrap_wait_timeout(void *arg) +{ + struct wait_timer wt; + int err = -EINVAL; + + if (create_fences(&wt.f, true)) + return -ENOMEM; + + timer_setup_on_stack(&wt.timer, wait_timer, 0); + + if (dma_fence_wait_timeout(wt.f.proxy, false, 1) != 0) { + pr_err("Wait reported complete before being signaled\n"); + goto err_free; + } + + mod_timer(&wt.timer, jiffies + 1); + + if (dma_fence_wait_timeout(wt.f.proxy, false, 2) != 0) { + if (timer_pending(&wt.timer)) { + pr_notice("Timer did not fire within the jiffie!\n"); + err = 0; /* not our fault! */ + } else { + pr_err("Wait reported incomplete after timeout\n"); + } + goto err_free; + } + + err = 0; +err_free: + del_timer_sync(&wt.timer); + destroy_timer_on_stack(&wt.timer); + dma_fence_signal(wt.f.real); + free_fences(&wt.f); + return err; +} + +int dma_fence_proxy(void) +{ + static const struct subtest tests[] = { + SUBTEST(sanitycheck), + SUBTEST(wrap_signaling), + SUBTEST(wrap_signaling_recurse), + SUBTEST(wrap_add_callback), + SUBTEST(wrap_add_callback_recurse), + SUBTEST(wrap_late_add_callback), + SUBTEST(wrap_early_add_callback), + SUBTEST(wrap_early_add_callback_late), + SUBTEST(wrap_early_add_callback_early), + SUBTEST(wrap_rm_callback), + SUBTEST(wrap_late_rm_callback), + SUBTEST(wrap_status), + SUBTEST(wrap_error), + SUBTEST(wrap_wait), + SUBTEST(wrap_wait_timeout), + }; + int ret; + + slab_fences = KMEM_CACHE(mock_fence, + SLAB_TYPESAFE_BY_RCU | + SLAB_HWCACHE_ALIGN); + if (!slab_fences) + return -ENOMEM; + + ret = subtests(tests, NULL); + + kmem_cache_destroy(slab_fences); + + return ret; +} diff --git a/include/linux/dma-fence-proxy.h b/include/linux/dma-fence-proxy.h new file mode 100644 index 000000000000..587d5044f0bf --- /dev/null +++ b/include/linux/dma-fence-proxy.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * dma-fence-proxy: allows waiting upon unset and future fences + * + * Copyright (C) 2017 Intel Corporation + */ + +#ifndef __LINUX_DMA_FENCE_PROXY_H +#define __LINUX_DMA_FENCE_PROXY_H + +#include + +struct dma_fence; + +struct dma_fence *dma_fence_create_proxy(void); + +struct dma_fence * +dma_fence_replace_proxy(struct dma_fence __rcu **slot, struct dma_fence *fence); + +#endif /* __LINUX_DMA_FENCE_PROXY_H */ From patchwork Tue Mar 17 12:27:16 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11442769 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D720F1667 for ; Tue, 17 Mar 2020 12:27:30 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 7D22720736 for ; Tue, 17 Mar 2020 12:27:30 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 7D22720736 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id CCC3D89725; Tue, 17 Mar 2020 12:27:29 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 47EF489F6F for ; Tue, 17 Mar 2020 12:27:28 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 20588525-1500050 for multiple; Tue, 17 Mar 2020 12:27:18 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Tue, 17 Mar 2020 12:27:16 +0000 Message-Id: <20200317122719.1889-9-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200317122719.1889-1-chris@chris-wilson.co.uk> References: <20200317122719.1889-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 09/12] drm/i915/gem: Teach execbuf how to wait on future syncobj X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" If a syncobj has not yet been assigned, treat it as a future fence and install and wait upon a dma-fence-proxy. The proxy will be replace by the real fence later, and that fence will be responsible for signaling our waiter. Signed-off-by: Chris Wilson --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index d3f4f28e9468..bbe501a3e619 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -2260,8 +2261,24 @@ await_fence_array(struct i915_execbuffer *eb, continue; fence = drm_syncobj_fence_get(syncobj); - if (!fence) - return -EINVAL; + if (!fence) { + struct dma_fence *old; + + fence = dma_fence_create_proxy(); + if (!fence) + return -ENOMEM; + + spin_lock(&syncobj->lock); + old = rcu_dereference_protected(syncobj->fence, true); + if (unlikely(old)) { + dma_fence_put(fence); + fence = dma_fence_get(old); + } else { + rcu_assign_pointer(syncobj->fence, + dma_fence_get(fence)); + } + spin_unlock(&syncobj->lock); + } err = i915_request_await_dma_fence(eb->request, fence); dma_fence_put(fence); From patchwork Tue Mar 17 12:27:17 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11442781 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 974A11667 for ; Tue, 17 Mar 2020 12:27:43 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 7F4B520736 for ; Tue, 17 Mar 2020 12:27:43 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 7F4B520736 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id D0CC989F8B; Tue, 17 Mar 2020 12:27:42 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 25A8489725 for ; Tue, 17 Mar 2020 12:27:29 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 20588526-1500050 for multiple; Tue, 17 Mar 2020 12:27:18 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Tue, 17 Mar 2020 12:27:17 +0000 Message-Id: <20200317122719.1889-10-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200317122719.1889-1-chris@chris-wilson.co.uk> References: <20200317122719.1889-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 10/12] drm/i915/gem: Allow combining submit-fences with syncobj X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Fixes: a88b6e4cbafd ("drm/i915: Allow specification of parallel execbuf") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Lionel Landwerlin --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 10 +++++++--- include/uapi/drm/i915_drm.h | 7 ++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index bbe501a3e619..675506ee392d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2225,7 +2225,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); + fences[n] = ptr_pack_bits(syncobj, fence.flags, 3); } return fences; @@ -2256,7 +2256,7 @@ await_fence_array(struct i915_execbuffer *eb, struct dma_fence *fence; unsigned int flags; - syncobj = ptr_unpack_bits(fences[n], &flags, 2); + syncobj = ptr_unpack_bits(fences[n], &flags, 3); if (!(flags & I915_EXEC_FENCE_WAIT)) continue; @@ -2280,7 +2280,11 @@ await_fence_array(struct i915_execbuffer *eb, spin_unlock(&syncobj->lock); } - err = i915_request_await_dma_fence(eb->request, fence); + if (flags & I915_EXEC_FENCE_WAIT_SUBMIT) + err = i915_request_await_execution(eb->request, fence, + eb->engine->bond_execute); + else + err = i915_request_await_dma_fence(eb->request, fence); dma_fence_put(fence); if (err < 0) return err; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2813e579b480..3a24817ca25b 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1040,9 +1040,10 @@ struct drm_i915_gem_exec_fence { */ __u32 handle; -#define I915_EXEC_FENCE_WAIT (1<<0) -#define I915_EXEC_FENCE_SIGNAL (1<<1) -#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) +#define I915_EXEC_FENCE_WAIT (1u << 0) +#define I915_EXEC_FENCE_SIGNAL (1u << 1) +#define I915_EXEC_FENCE_WAIT_SUBMIT (1u << 2) +#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_WAIT_SUBMIT << 1)) __u32 flags; }; From patchwork Tue Mar 17 12:27:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11442783 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C6B1E1820 for ; Tue, 17 Mar 2020 12:27:43 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id AF72B20736 for ; Tue, 17 Mar 2020 12:27:43 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org AF72B20736 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id D4C0B6E0B9; Tue, 17 Mar 2020 12:27:42 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 275A989F8B for ; Tue, 17 Mar 2020 12:27:32 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 20588527-1500050 for multiple; Tue, 17 Mar 2020 12:27:19 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Tue, 17 Mar 2020 12:27:18 +0000 Message-Id: <20200317122719.1889-11-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200317122719.1889-1-chris@chris-wilson.co.uk> References: <20200317122719.1889-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 11/12] drm/i915/gt: Declare when we enabled timeslicing X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kenneth Graunke Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Let userspace know if they can trust timeslicing by including it as part of the I915_PARAM_HAS_SCHEDULER::I915_SCHEDULER_CAP_TIMESLICING v2: Only declare timeslicing if we can safely preempt userspace. Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing") Signed-off-by: Chris Wilson Cc: Kenneth Graunke --- drivers/gpu/drm/i915/gt/intel_engine.h | 3 ++- drivers/gpu/drm/i915/gt/intel_engine_user.c | 5 +++++ include/uapi/drm/i915_drm.h | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index b469de0dd9b6..424672ee7874 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -339,7 +339,8 @@ intel_engine_has_timeslices(const struct intel_engine_cs *engine) if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) return false; - return intel_engine_has_semaphores(engine); + return (intel_engine_has_semaphores(engine) && + intel_engine_has_preemption(engine)); } #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 848decee9066..b84fdd722781 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -121,6 +121,11 @@ static void set_scheduler_caps(struct drm_i915_private *i915) else disabled |= BIT(map[i].sched); } + + if (intel_engine_has_timeslices(engine)) + enabled |= I915_SCHEDULER_CAP_TIMESLICING; + else + disabled |= I915_SCHEDULER_CAP_TIMESLICING; } i915->caps.scheduler = enabled & ~disabled; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 3a24817ca25b..ff7be293ec31 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -523,6 +523,7 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) +#define I915_SCHEDULER_CAP_TIMESLICING (1ul << 5) #define I915_PARAM_HUC_STATUS 42 From patchwork Tue Mar 17 12:27:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11442785 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6726D92A for ; Tue, 17 Mar 2020 12:27:44 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 4FC9B206EC for ; Tue, 17 Mar 2020 12:27:44 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 4FC9B206EC Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 19CA46E0BA; Tue, 17 Mar 2020 12:27:43 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 4D13E89F8B for ; Tue, 17 Mar 2020 12:27:41 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from build.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 20588528-1500050 for multiple; Tue, 17 Mar 2020 12:27:19 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Tue, 17 Mar 2020 12:27:19 +0000 Message-Id: <20200317122719.1889-12-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20200317122719.1889-1-chris@chris-wilson.co.uk> References: <20200317122719.1889-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 12/12] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Kenneth Graunke Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the user batch or in our own preamble, the engine raises a GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so respond to a semaphore wait by yielding the timeslice, if we have another context to yield to! The only real complication is that the interrupt is only generated for the start of the semaphore wait, and is asynchronous to our process_csb() -- that is, we may not have registered the timeslice before we see the interrupt. To ensure we don't miss a potential semaphore blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark the interrupt and apply it to the next timeslice regardless of whether it was active at the time. v2: We use semaphores in preempt-to-busy, within the timeslicing implementation itself! Ergo, when we do insert a preemption due to an expired timeslice, the new context may start with the missed semaphore flagged by the retired context and be yielded, ad infinitum. To avoid this, read the context id at the time of the semaphore interrupt and only yield if that context is still active. Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Kenneth Graunke --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 +++++ drivers/gpu/drm/i915/gt/intel_gt_irq.c | 13 ++++++- drivers/gpu/drm/i915/gt/intel_lrc.c | 40 +++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 1 + 5 files changed, 61 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3aa8a652c16d..883a9b7fe88d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1295,6 +1295,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7)) drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID)); + if (HAS_EXECLISTS(dev_priv)) { + drm_printf(m, "\tEL_STAT_HI: 0x%08x\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_HI)); + drm_printf(m, "\tEL_STAT_LO: 0x%08x\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_LO)); + } drm_printf(m, "\tRING_START: 0x%08x\n", ENGINE_READ(engine, RING_START)); drm_printf(m, "\tRING_HEAD: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 80cdde712842..ac283ab5d89c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -156,6 +156,15 @@ struct intel_engine_execlists { */ struct i915_priolist default_priolist; + /** + * @yield: CCID at the time of the last semaphore-wait interrupt. + * + * Instead of leaving a semaphore busy-spinning on an engine, we would + * like to switch to another ready context, i.e. yielding the semaphore + * timeslice. + */ + u32 yield; + /** * @error_interrupt: CS Master EIR * diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index f0e7fd95165a..875bd0392ffc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -39,6 +39,13 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir) } } + if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) { + WRITE_ONCE(engine->execlists.yield, + ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); + if (del_timer(&engine->execlists.timer)) + tasklet = true; + } + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) tasklet = true; @@ -228,7 +235,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) const u32 irqs = GT_CS_MASTER_ERROR_INTERRUPT | GT_RENDER_USER_INTERRUPT | - GT_CONTEXT_SWITCH_INTERRUPT; + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; struct intel_uncore *uncore = gt->uncore; const u32 dmask = irqs << 16 | irqs; const u32 smask = irqs << 16; @@ -366,7 +374,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt) const u32 irqs = GT_CS_MASTER_ERROR_INTERRUPT | GT_RENDER_USER_INTERRUPT | - GT_CONTEXT_SWITCH_INTERRUPT; + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; const u32 gt_interrupts[] = { irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT, irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 112531b29f59..ae6409605dcd 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1744,7 +1744,8 @@ static void defer_active(struct intel_engine_cs *engine) } static bool -need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) +need_timeslice(const struct intel_engine_cs *engine, + const struct i915_request *rq) { int hint; @@ -1758,6 +1759,31 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) return hint >= effective_prio(rq); } +static bool +timeslice_yield(const struct intel_engine_execlists *el, + const struct i915_request *rq) +{ + /* + * Once bitten, forever smitten! + * + * If the active context ever busy-waited on a semaphore, + * it will be treated as a hog until the end of its timeslice. + * The HW only sends an interrupt on the first miss, and we + * do know if that semaphore has been signaled, or even if it + * is now stuck on another semaphore. Play safe, yield if it + * might be stuck -- it will be given a fresh timeslice in + * the near future. + */ + return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield); +} + +static bool +timeslice_expired(const struct intel_engine_execlists *el, + const struct i915_request *rq) +{ + return timer_expired(&el->timer) || timeslice_yield(el, rq); +} + static int switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) { @@ -1773,8 +1799,7 @@ timeslice(const struct intel_engine_cs *engine) return READ_ONCE(engine->props.timeslice_duration_ms); } -static unsigned long -active_timeslice(const struct intel_engine_cs *engine) +static unsigned long active_timeslice(const struct intel_engine_cs *engine) { const struct intel_engine_execlists *execlists = &engine->execlists; const struct i915_request *rq = *execlists->active; @@ -1936,13 +1961,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last = NULL; } else if (need_timeslice(engine, last) && - timer_expired(&engine->execlists.timer)) { + timeslice_expired(execlists, last)) { ENGINE_TRACE(engine, - "expired last=%llx:%lld, prio=%d, hint=%d\n", + "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", last->fence.context, last->fence.seqno, last->sched.attr.priority, - execlists->queue_priority_hint); + execlists->queue_priority_hint, + yesno(timeslice_yield(execlists, last))); ring_set_paused(engine, 1); defer_active(engine); @@ -2203,6 +2229,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } clear_ports(port + 1, last_port - port); + WRITE_ONCE(execlists->yield, -1); execlists_submit_ports(engine); set_preempt_timeout(engine, *active); } else { @@ -4439,6 +4466,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; + engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; } static void rcs_submission_override(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 309cb7d96b35..b67ae9192f7c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3092,6 +3092,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GT_BSD_CS_ERROR_INTERRUPT (1 << 15) #define GT_BSD_USER_INTERRUPT (1 << 12) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */ +#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) /* bdw+ */ #define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */ #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4)