From patchwork Wed Jan 8 10:45:36 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11323201 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8F83C1580 for ; Wed, 8 Jan 2020 10:45:55 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 77C99205F4 for ; Wed, 8 Jan 2020 10:45:55 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 77C99205F4 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 6D1666E1B3; Wed, 8 Jan 2020 10:45:54 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 86D216E1B1 for ; Wed, 8 Jan 2020 10:45:52 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 19806937-1500050 for multiple; Wed, 08 Jan 2020 10:45:40 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Wed, 8 Jan 2020 10:45:36 +0000 Message-Id: <20200108104539.3422768-1-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.25.0.rc1 MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 1/4] drm/i915: Pin the context as we work on it X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Since we now allow the intel_context_unpin() to run unserialised, we risk our operations under the intel_context_lock_pinned() being run as the context is unpinned (and thus invalidating our state). We can atomically acquire the pin, testing to see if it is pinned in the process, thus ensuring that the state remains consistent during the course of the whole operation. Fixes: 841350223816 ("drm/i915/gt: Drop mutex serialisation between context pin/unpin") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +++++++--- drivers/gpu/drm/i915/gt/intel_context.h | 7 ++++++- drivers/gpu/drm/i915/i915_debugfs.c | 10 ++++------ drivers/gpu/drm/i915/i915_perf.c | 13 +++++-------- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 88f6253f5405..a2e57e62af30 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1236,12 +1236,14 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) * image, or into the registers directory, does not stick). Pristine * and idle contexts will be configured on pinning. */ - if (!intel_context_is_pinned(ce)) + if (!intel_context_pin_if_active(ce)) return 0; rq = intel_engine_create_kernel_request(ce->engine); - if (IS_ERR(rq)) - return PTR_ERR(rq); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto out_unpin; + } /* Serialise with the remote context */ ret = intel_context_prepare_remote_request(ce, rq); @@ -1249,6 +1251,8 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) ret = gen8_emit_rpcs_config(rq, ce, sseu); i915_request_add(rq); +out_unpin: + intel_context_unpin(ce); return ret; } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 0f5ae4ff3b10..63073ebc6cf1 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -76,9 +76,14 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce) int __intel_context_do_pin(struct intel_context *ce); +static inline bool intel_context_pin_if_active(struct intel_context *ce) +{ + return atomic_inc_not_zero(&ce->pin_count); +} + static inline int intel_context_pin(struct intel_context *ce) { - if (likely(atomic_inc_not_zero(&ce->pin_count))) + if (likely(intel_context_pin_if_active(ce))) return 0; return __intel_context_do_pin(ce); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0ac98e39eb75..db184536acef 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -321,16 +321,15 @@ static void print_context_stats(struct seq_file *m, for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - intel_context_lock_pinned(ce); - if (intel_context_is_pinned(ce)) { + if (intel_context_pin_if_active(ce)) { rcu_read_lock(); if (ce->state) per_file_stats(0, ce->state->obj, &kstats); per_file_stats(0, ce->ring->vma->obj, &kstats); rcu_read_unlock(); + intel_context_unpin(ce); } - intel_context_unlock_pinned(ce); } i915_gem_context_unlock_engines(ctx); @@ -1513,15 +1512,14 @@ static int i915_context_status(struct seq_file *m, void *unused) for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - intel_context_lock_pinned(ce); - if (intel_context_is_pinned(ce)) { + if (intel_context_pin_if_active(ce)) { seq_printf(m, "%s: ", ce->engine->name); if (ce->state) describe_obj(m, ce->state->obj); describe_ctx_ring(m, ce->ring); seq_putc(m, '\n'); + intel_context_unpin(ce); } - intel_context_unlock_pinned(ce); } i915_gem_context_unlock_engines(ctx); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 84350c7bc711..c7a7b676f079 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2203,17 +2203,14 @@ static int gen8_configure_context(struct i915_gem_context *ctx, if (ce->engine->class != RENDER_CLASS) continue; - err = intel_context_lock_pinned(ce); - if (err) - break; + /* Otherwise OA settings will be set upon first use */ + if (!intel_context_pin_if_active(ce)) + continue; flex->value = intel_sseu_make_rpcs(ctx->i915, &ce->sseu); + err = gen8_modify_context(ce, flex, count); - /* Otherwise OA settings will be set upon first use */ - if (intel_context_is_pinned(ce)) - err = gen8_modify_context(ce, flex, count); - - intel_context_unlock_pinned(ce); + intel_context_unpin(ce); if (err) break; } From patchwork Wed Jan 8 10:45:37 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11323197 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E6265139A for ; Wed, 8 Jan 2020 10:45:53 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id CDA23206DB for ; Wed, 8 Jan 2020 10:45:53 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org CDA23206DB Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 526336E1B1; Wed, 8 Jan 2020 10:45:53 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 8743C6E1B3 for ; Wed, 8 Jan 2020 10:45:52 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 19806938-1500050 for multiple; Wed, 08 Jan 2020 10:45:40 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Wed, 8 Jan 2020 10:45:37 +0000 Message-Id: <20200108104539.3422768-2-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.25.0.rc1 In-Reply-To: <20200108104539.3422768-1-chris@chris-wilson.co.uk> References: <20200108104539.3422768-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 2/4] drm/i915: Replace vma parking with a clock aging algorithm X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" We cache the user's vma for a brief period of time after they close them so that if they are immediately reopened we avoid having to unbind and rebind them. This happens quite frequently for display servers which only keep a client's frame open for as long as they are copying from it, and so they open/close every vma about 30 Hz (every other frame for double buffering). Our current strategy is to keep the vma alive until the next global idle point. However this cache should be purely temporal, so switch over from using the parked notifier to using its own clock based aging algorithm: if the closed vma is not reused within 2 clock ticks, it is destroyed. Closes: https://gitlab.freedesktop.org/drm/intel/issues/644 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_gt.c | 3 - drivers/gpu/drm/i915/gt/intel_gt_pm.c | 1 - drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 - drivers/gpu/drm/i915/i915_debugfs.c | 3 + drivers/gpu/drm/i915/i915_drv.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_vma.c | 83 +++++++++++++++---- drivers/gpu/drm/i915/i915_vma.h | 11 ++- .../gpu/drm/i915/selftests/mock_gem_device.c | 2 + 9 files changed, 86 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index da2b6e2ae692..63d70cf62ddb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -23,9 +23,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) spin_lock_init(>->irq_lock); - INIT_LIST_HEAD(>->closed_vma); - spin_lock_init(>->closed_lock); - intel_gt_init_reset(gt); intel_gt_init_requests(gt); intel_gt_init_timelines(gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index d1c2f034296a..3302f676d12b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -80,7 +80,6 @@ static int __gt_park(struct intel_wakeref *wf) intel_gt_park_requests(gt); - i915_vma_parked(gt); i915_pmu_gt_parked(i915); intel_rps_park(>->rps); intel_rc6_park(>->rc6); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 96890dd12b5f..4589dea67b8f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -58,9 +58,6 @@ struct intel_gt { struct intel_wakeref wakeref; atomic_t user_wakeref; - struct list_head closed_vma; - spinlock_t closed_lock; /* guards the list of closed_vma */ - struct intel_reset reset; /** diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index db184536acef..9e18e10c125f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3587,6 +3587,9 @@ i915_drop_caches_set(void *data, u64 val) if (ret) return ret; + if (val & DROP_IDLE) + i915_vma_clock_flush(&i915->vma_clock); + fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) i915_gem_shrink(i915, LONG_MAX, NULL, I915_SHRINK_BOUND); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f7385abdd74b..9fde3918094f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -523,8 +523,8 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) intel_wopcm_init_early(&dev_priv->wopcm); + i915_vma_clock_init_early(&dev_priv->vma_clock); intel_gt_init_early(&dev_priv->gt, dev_priv); - i915_gem_init_early(dev_priv); /* This must be called before any calls to HAS_PCH_* */ @@ -561,6 +561,8 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) */ static void i915_driver_late_release(struct drm_i915_private *dev_priv) { + i915_vma_clock_flush(&dev_priv->vma_clock); + intel_irq_fini(dev_priv); intel_power_domains_cleanup(dev_priv); i915_gem_cleanup_early(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 50181113dd2b..d61d73c680b1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1240,6 +1240,7 @@ struct drm_i915_private { struct intel_runtime_pm runtime_pm; struct i915_perf perf; + struct i915_vma_clock vma_clock; /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct intel_gt gt; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index cbd783c31adb..89ed287ab892 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -985,8 +985,7 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags) void i915_vma_close(struct i915_vma *vma) { - struct intel_gt *gt = vma->vm->gt; - unsigned long flags; + struct i915_vma_clock *clock = &vma->vm->i915->vma_clock; GEM_BUG_ON(i915_vma_is_closed(vma)); @@ -1002,18 +1001,20 @@ void i915_vma_close(struct i915_vma *vma) * causing us to rebind the VMA once more. This ends up being a lot * of wasted work for the steady state. */ - spin_lock_irqsave(>->closed_lock, flags); - list_add(&vma->closed_link, >->closed_vma); - spin_unlock_irqrestore(>->closed_lock, flags); + spin_lock(&clock->lock); + list_add(&vma->closed_link, &clock->age[0]); + spin_unlock(&clock->lock); + + schedule_delayed_work(&clock->work, round_jiffies_up_relative(HZ)); } static void __i915_vma_remove_closed(struct i915_vma *vma) { - struct intel_gt *gt = vma->vm->gt; + struct i915_vma_clock *clock = &vma->vm->i915->vma_clock; - spin_lock_irq(>->closed_lock); + spin_lock(&clock->lock); list_del_init(&vma->closed_link); - spin_unlock_irq(>->closed_lock); + spin_unlock(&clock->lock); } void i915_vma_reopen(struct i915_vma *vma) @@ -1051,15 +1052,35 @@ void i915_vma_release(struct kref *ref) i915_vma_free(vma); } -void i915_vma_parked(struct intel_gt *gt) +static void i915_vma_clock(struct work_struct *w) { + struct i915_vma_clock *clock = + container_of(w, typeof(*clock), work.work); struct i915_vma *vma, *next; - spin_lock_irq(>->closed_lock); - list_for_each_entry_safe(vma, next, >->closed_vma, closed_link) { + /* + * A very simple clock aging algorithm: we keep the user's closed + * vma alive for a couple of timer ticks before destroying them. + * This serves a shortlived cache so that frequently reused VMA + * are kept alive between frames and we skip having to rebing them. + * + * When closed, we insert the vma into age[0]. Upon completion of + * a timer tick, it is moved to age[1]. At the start of each timer + * tick, we destroy all the old vma that were accumulated into age[1] + * and have not been reused. All destroyed vma have therefore been + * unused for more than 1 tick (at least a second), and at most 2 + * ticks (we expect the average to be 1.5 ticks). + */ + + spin_lock(&clock->lock); + + list_for_each_entry_safe(vma, next, &clock->age[1], closed_link) { struct drm_i915_gem_object *obj = vma->obj; struct i915_address_space *vm = vma->vm; + if (i915_vma_is_active(vma)) + continue; + /* XXX All to avoid keeping a reference on i915_vma itself */ if (!kref_get_unless_zero(&obj->base.refcount)) @@ -1072,7 +1093,7 @@ void i915_vma_parked(struct intel_gt *gt) obj = NULL; } - spin_unlock_irq(>->closed_lock); + spin_unlock(&clock->lock); if (obj) { __i915_vma_put(vma); @@ -1082,11 +1103,26 @@ void i915_vma_parked(struct intel_gt *gt) i915_vm_close(vm); /* Restart after dropping lock */ - spin_lock_irq(>->closed_lock); - next = list_first_entry(>->closed_vma, + spin_lock(&clock->lock); + next = list_first_entry(&clock->age[1], typeof(*next), closed_link); } - spin_unlock_irq(>->closed_lock); + list_splice_tail_init(&clock->age[0], &clock->age[1]); + + if (!list_empty(&clock->age[1])) { + /* Keep active VMA around until second tick after idling */ + list_for_each_entry_safe(vma, next, + &clock->age[1], closed_link) { + if (i915_vma_is_active(vma)) + list_move_tail(&vma->closed_link, + &clock->age[0]); + } + + schedule_delayed_work(&clock->work, + round_jiffies_up_relative(HZ)); + } + + spin_unlock(&clock->lock); } static void __i915_vma_iounmap(struct i915_vma *vma) @@ -1277,6 +1313,23 @@ void i915_vma_make_purgeable(struct i915_vma *vma) i915_gem_object_make_purgeable(vma->obj); } +void i915_vma_clock_init_early(struct i915_vma_clock *clock) +{ + spin_lock_init(&clock->lock); + INIT_LIST_HEAD(&clock->age[0]); + INIT_LIST_HEAD(&clock->age[1]); + + INIT_DELAYED_WORK(&clock->work, i915_vma_clock); +} + +void i915_vma_clock_flush(struct i915_vma_clock *clock) +{ + do { + if (cancel_delayed_work_sync(&clock->work)) + i915_vma_clock(&clock->work.work); + } while (delayed_work_pending(&clock->work)); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_vma.c" #endif diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 02b31a62951e..7b8a18b72d81 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -351,8 +351,6 @@ i915_vma_unpin_fence(struct i915_vma *vma) __i915_vma_unpin_fence(vma); } -void i915_vma_parked(struct intel_gt *gt); - #define for_each_until(cond) if (cond) break; else /** @@ -381,4 +379,13 @@ static inline int i915_vma_sync(struct i915_vma *vma) return i915_active_wait(&vma->active); } +struct i915_vma_clock { + spinlock_t lock; + struct list_head age[2]; + struct delayed_work work; +}; + +void i915_vma_clock_init_early(struct i915_vma_clock *clock); +void i915_vma_clock_flush(struct i915_vma_clock *clock); + #endif diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 3b8986983afc..6c27f43155ea 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -57,6 +57,7 @@ static void mock_device_release(struct drm_device *dev) mock_device_flush(i915); intel_gt_driver_remove(&i915->gt); + i915_vma_clock_flush(&i915->vma_clock); i915_gem_driver_release__contexts(i915); @@ -164,6 +165,7 @@ struct drm_i915_private *mock_gem_device(void) mock_uncore_init(&i915->uncore, i915); i915_gem_init__mm(i915); + i915_vma_clock_init_early(&i915->vma_clock); intel_gt_init_early(&i915->gt, i915); atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */ i915->gt.awake = -ENODEV; From patchwork Wed Jan 8 10:45:38 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11323203 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C5B92139A for ; Wed, 8 Jan 2020 10:45:56 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id AE5F2206DB for ; Wed, 8 Jan 2020 10:45:56 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org AE5F2206DB Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 26B516E492; Wed, 8 Jan 2020 10:45:56 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 9CD096E1B4 for ; Wed, 8 Jan 2020 10:45:53 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 19806939-1500050 for multiple; Wed, 08 Jan 2020 10:45:40 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Wed, 8 Jan 2020 10:45:38 +0000 Message-Id: <20200108104539.3422768-3-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.25.0.rc1 In-Reply-To: <20200108104539.3422768-1-chris@chris-wilson.co.uk> References: <20200108104539.3422768-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 3/4] drm/i915/gt: Always reset the timeslice after a context switch X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Currently, we reset the timer after a pre-eemption event. This has the side-effect that the timeslice runs into the second context after the first is completed. To be more fair, we want to reset the clock after promotion as well. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bd74b76c6403..f825df1ba638 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2289,7 +2289,6 @@ static void process_csb(struct intel_engine_cs *engine) /* Point active to the new ELSP; prevent overwriting */ WRITE_ONCE(execlists->active, execlists->pending); - set_timeslice(engine); if (!inject_preempt_hang(execlists)) ring_set_paused(engine, 0); @@ -2329,6 +2328,9 @@ static void process_csb(struct intel_engine_cs *engine) } } while (head != tail); + if (execlists_active(execlists)) + set_timeslice(engine); + execlists->csb_head = head; /* From patchwork Wed Jan 8 10:45:39 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11323199 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 5F8341398 for ; Wed, 8 Jan 2020 10:45:55 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 48162205F4 for ; Wed, 8 Jan 2020 10:45:55 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 48162205F4 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=chris-wilson.co.uk Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 7CB026E1B4; Wed, 8 Jan 2020 10:45:54 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 94BB16E1B3 for ; Wed, 8 Jan 2020 10:45:53 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 19806940-1500050 for multiple; Wed, 08 Jan 2020 10:45:40 +0000 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Wed, 8 Jan 2020 10:45:39 +0000 Message-Id: <20200108104539.3422768-4-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.25.0.rc1 In-Reply-To: <20200108104539.3422768-1-chris@chris-wilson.co.uk> References: <20200108104539.3422768-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 4/4] drm/i915/gt: Yield the timeslice if waiting on a semaphore X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the user batch or in our own preamble, the engine raises a GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so respond to a semaphore wait by yielding the timeslice, if we have another context to yield to! The only real complication is that the interrupt is only generated for the start of the semaphore wait, and is asynchronous to our process_csb() -- that is, we may not have registered the timeslice before we see the interrupt. To ensure we don't miss a potential semaphore blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark the interrupt and apply it to the next timeslice regardless of whether it was active at the time. v2: We use semaphores in preempt-to-busy, within the timeslicing implementation itself! Ergo, when we do insert a preemption due to an expired timeslice, the new context may start with the missed semaphore flagged by the retired context and be yielded, ad infinitum. To avoid this, read the context id at the time of the semaphore interrupt and only yield if that context is still active. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 ++++++ drivers/gpu/drm/i915/gt/intel_gt_irq.c | 32 +++++++++++--------- drivers/gpu/drm/i915/gt/intel_lrc.c | 31 ++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 5 +++ 4 files changed, 59 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 00287515e7af..d146d2fbd42a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -156,6 +156,15 @@ struct intel_engine_execlists { */ struct i915_priolist default_priolist; + /** + * @yield: CCID at the time of the last semaphore-wait interrupt. + * + * Instead of leaving a semaphore busy-spinning on an engine, we would + * like to switch to another ready context, i.e. yielding the semaphore + * timeslice. + */ + u32 yield; + /** * @no_priolist: priority lists disabled */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index f796bdf1ed30..6ae64a224b02 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -24,6 +24,13 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir) { bool tasklet = false; + if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) { + WRITE_ONCE(engine->execlists.yield, + ENGINE_READ_FW(engine, EXECLIST_CCID)); + if (del_timer(&engine->execlists.timer)) + tasklet = true; + } + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) tasklet = true; @@ -210,7 +217,10 @@ void gen11_gt_irq_reset(struct intel_gt *gt) void gen11_gt_irq_postinstall(struct intel_gt *gt) { - const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT; + const u32 irqs = + GT_RENDER_USER_INTERRUPT | + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; struct intel_uncore *uncore = gt->uncore; const u32 dmask = irqs << 16 | irqs; const u32 smask = irqs << 16; @@ -357,21 +367,15 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt) struct intel_uncore *uncore = gt->uncore; /* These are interrupts we'll toggle with the ring mask register */ + const u32 irqs = + GT_RENDER_USER_INTERRUPT | + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; u32 gt_interrupts[] = { - (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT), - - (GT_RENDER_USER_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS0_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT), - + irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT, + irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT, 0, - - (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT | - GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT) + irqs << GEN8_VECS_IRQ_SHIFT, }; gt->pm_ier = 0x0; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index f825df1ba638..7cf2aade6b3b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1662,7 +1662,8 @@ static void defer_active(struct intel_engine_cs *engine) } static bool -need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) +need_timeslice(const struct intel_engine_cs *engine, + const struct i915_request *rq) { int hint; @@ -1678,6 +1679,27 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) return hint >= effective_prio(rq); } +static bool +timeslice_expired(const struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + const struct intel_engine_execlists *el = &engine->execlists; + + return (timer_expired(&el->timer) || + /* + * Once bitten, forever smitten! + * + * If the active context ever busy-waited on a semaphore, + * it will be treated as a hog until the end of its timeslice. + * The HW only sends an interrupt on the first miss, and we + * do know if that semaphore has been signaled, or even if it + * is now stuck on another semaphore. Play safe, yield if it + * might be stuck -- it will be given a fresh timeslice in + * the near future. + */ + upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield)); +} + static int switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) { @@ -1693,8 +1715,7 @@ timeslice(const struct intel_engine_cs *engine) return READ_ONCE(engine->props.timeslice_duration_ms); } -static unsigned long -active_timeslice(const struct intel_engine_cs *engine) +static unsigned long active_timeslice(const struct intel_engine_cs *engine) { const struct i915_request *rq = *engine->execlists.active; @@ -1845,7 +1866,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE; last = NULL; } else if (need_timeslice(engine, last) && - timer_expired(&engine->execlists.timer)) { + timeslice_expired(engine, last)) { ENGINE_TRACE(engine, "expired last=%llx:%lld, prio=%d, hint=%d\n", last->fence.context, @@ -2111,6 +2132,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } clear_ports(port + 1, last_port - port); + WRITE_ONCE(execlists->yield, -1); execlists_submit_ports(engine); set_preempt_timeout(engine); } else { @@ -3972,6 +3994,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; + engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; } static void rcs_submission_override(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d90d7bf8d328..c0308682756b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3105,6 +3105,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GT_BSD_CS_ERROR_INTERRUPT (1 << 15) #define GT_BSD_USER_INTERRUPT (1 << 12) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */ +#define GT_WAIT_SEMAPHORE_INTERRUPT (1 << 11) /* bdw+ */ #define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */ #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4) @@ -4056,6 +4057,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define CCID_EN BIT(0) #define CCID_EXTENDED_STATE_RESTORE BIT(2) #define CCID_EXTENDED_STATE_SAVE BIT(3) + +#define EXECLIST_STATUS(base) _MMIO((base) + 0x234) +#define EXECLIST_CCID(base) _MMIO((base) + 0x238) + /* * Notes on SNB/IVB/VLV context size: * - Power context is saved elsewhere (LLC or stolen)