From patchwork Mon Jul 17 09:11:41 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 9844371 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id C01DE60392 for ; Mon, 17 Jul 2017 09:39:09 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id B0ED327DCD for ; Mon, 17 Jul 2017 09:39:09 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id A4B032821F; Mon, 17 Jul 2017 09:39:09 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-4.2 required=2.0 tests=BAYES_00, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 03BBE27DCD for ; Mon, 17 Jul 2017 09:39:08 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 3D3836E0AD; Mon, 17 Jul 2017 09:39:08 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 7A9956E0AD for ; Mon, 17 Jul 2017 09:39:06 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 7804347-1500050 for multiple; Mon, 17 Jul 2017 10:12:16 +0100 Received: by haswell.alporthouse.com (sSMTP sendmail emulation); Mon, 17 Jul 2017 10:12:16 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 17 Jul 2017 10:11:41 +0100 Message-Id: <20170717091141.23102-15-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.13.2 In-Reply-To: <20170717091141.23102-1-chris@chris-wilson.co.uk> References: <20170717091141.23102-1-chris@chris-wilson.co.uk> X-Originating-IP: 78.156.65.138 X-Country: code=GB country="United Kingdom" ip=78.156.65.138 Subject: [Intel-gfx] [PATCH 15/15] drm/i915/selftests: Exercise independence of per-engine resets X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP If all goes well, resetting one engine should not affect the operation of any others. So to test this, we setup a continuous stream of requests onto to each of the "innocent" engines whilst constantly resetting our target engine. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Michel Thierry --- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 165 +++++++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_context.c | 8 ++ drivers/gpu/drm/i915/selftests/mock_context.h | 3 + 3 files changed, 176 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 7096c3911cd3..dbfcb31ba9f4 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -22,8 +22,13 @@ * */ +#include + #include "../i915_selftest.h" +#include "mock_context.h" +#include "mock_drm.h" + struct hang { struct drm_i915_private *i915; struct drm_i915_gem_object *hws; @@ -372,6 +377,165 @@ static int igt_reset_engine(void *arg) return err; } +static int active_engine(void *data) +{ + struct intel_engine_cs *engine = data; + struct drm_i915_gem_request *rq[2] = {}; + struct i915_gem_context *ctx[2]; + struct drm_file *file; + unsigned long count = 0; + int err = 0; + + file = mock_file(engine->i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&engine->i915->drm.struct_mutex); + ctx[0] = live_context(engine->i915, file); + mutex_unlock(&engine->i915->drm.struct_mutex); + if (IS_ERR(ctx[0])) { + err = PTR_ERR(ctx[0]); + goto err_file; + } + + mutex_lock(&engine->i915->drm.struct_mutex); + ctx[1] = live_context(engine->i915, file); + mutex_unlock(&engine->i915->drm.struct_mutex); + if (IS_ERR(ctx[1])) { + err = PTR_ERR(ctx[1]); + i915_gem_context_put(ctx[0]); + goto err_file; + } + + while (!kthread_should_stop()) { + unsigned int idx = count++ & 1; + struct drm_i915_gem_request *old = rq[idx]; + struct drm_i915_gem_request *new; + + mutex_lock(&engine->i915->drm.struct_mutex); + new = i915_gem_request_alloc(engine, ctx[idx]); + if (IS_ERR(new)) { + mutex_unlock(&engine->i915->drm.struct_mutex); + err = PTR_ERR(new); + break; + } + + rq[idx] = i915_gem_request_get(new); + i915_add_request(new); + mutex_unlock(&engine->i915->drm.struct_mutex); + + if (old) { + i915_wait_request(old, 0, MAX_SCHEDULE_TIMEOUT); + i915_gem_request_put(old); + } + } + + for (count = 0; count < ARRAY_SIZE(rq); count++) + i915_gem_request_put(rq[count]); + +err_file: + mock_file_free(engine->i915, file); + return err; +} + +static int igt_reset_active_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine, *active; + enum intel_engine_id id, tmp; + int err = 0; + + /* Check that issuing a reset on one engine does not interfere + * with any other engine. + */ + + if (!intel_has_reset_engine(i915)) + return 0; + + for_each_engine(engine, i915, id) { + struct task_struct *threads[I915_NUM_ENGINES]; + unsigned long resets[I915_NUM_ENGINES]; + unsigned long global = i915_reset_count(&i915->gpu_error); + IGT_TIMEOUT(end_time); + + memset(threads, 0, sizeof(threads)); + for_each_engine(active, i915, tmp) { + struct task_struct *tsk; + + if (active == engine) + continue; + + resets[tmp] = i915_reset_engine_count(&i915->gpu_error, + active); + + tsk = kthread_run(active_engine, active, + "igt/%s", active->name); + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); + goto unwind; + } + + threads[tmp] = tsk; + get_task_struct(tsk); + + } + + set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags); + do { + err = i915_reset_engine(engine); + if (err) { + pr_err("i915_reset_engine(%s) failed, err=%d\n", + engine->name, err); + break; + } + } while (time_before(jiffies, end_time)); + clear_bit(I915_RESET_ENGINE + engine->id, + &i915->gpu_error.flags); + +unwind: + for_each_engine(active, i915, tmp) { + int ret; + + if (!threads[tmp]) + continue; + + ret = kthread_stop(threads[tmp]); + if (ret) { + pr_err("kthread for active engine %s failed, err=%d\n", + active->name, ret); + if (!err) + err = ret; + } + put_task_struct(threads[tmp]); + + if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error, + active)) { + pr_err("Innocent engine %s was reset (count=%ld)\n", + active->name, + i915_reset_engine_count(&i915->gpu_error, + active) - resets[tmp]); + err = -EIO; + } + } + + if (global != i915_reset_count(&i915->gpu_error)) { + pr_err("Global reset (count=%ld)!\n", + i915_reset_count(&i915->gpu_error) - global); + err = -EIO; + } + + if (err) + break; + + cond_resched(); + } + + if (i915_terminally_wedged(&i915->gpu_error)) + err = -EIO; + + return err; +} + static u32 fake_hangcheck(struct drm_i915_gem_request *rq) { u32 reset_count; @@ -689,6 +853,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_hang_sanitycheck), SUBTEST(igt_global_reset), SUBTEST(igt_reset_engine), + SUBTEST(igt_reset_active_engines), SUBTEST(igt_wait_reset), SUBTEST(igt_reset_queue), SUBTEST(igt_render_engine_reset_fallback), diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index 9c7c68181f82..d436f2d5089b 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -95,3 +95,11 @@ void mock_init_contexts(struct drm_i915_private *i915) INIT_WORK(&i915->contexts.free_work, contexts_free_worker); init_llist_head(&i915->contexts.free_list); } + +struct i915_gem_context * +live_context(struct drm_i915_private *i915, struct drm_file *file) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + + return i915_gem_create_context(i915, file->driver_priv); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h index 383941a61124..2f432c03d413 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.h +++ b/drivers/gpu/drm/i915/selftests/mock_context.h @@ -33,4 +33,7 @@ mock_context(struct drm_i915_private *i915, void mock_context_close(struct i915_gem_context *ctx); +struct i915_gem_context * +live_context(struct drm_i915_private *i915, struct drm_file *file); + #endif /* !__MOCK_CONTEXT_H */