[v2,08/21] drm/i915/gt: Move TLB invalidation to its own file

Message ID	d9cbb37b472084a6a05f6fc8c2e802b122de08e8.1657800199.git.mchehab@kernel.org (mailing list archive)
State	New, archived
Headers	show Return-Path: <dri-devel-bounces@lists.freedesktop.org> From: Mauro Carvalho Chehab <mchehab@kernel.org> To: Subject: [PATCH v2 08/21] drm/i915/gt: Move TLB invalidation to its own file Date: Thu, 14 Jul 2022 13:06:13 +0100 Message-Id: <d9cbb37b472084a6a05f6fc8c2e802b122de08e8.1657800199.git.mchehab@kernel.org> In-Reply-To: <cover.1657800199.git.mchehab@kernel.org> References: <cover.1657800199.git.mchehab@kernel.org> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Precedence: list Cc: Fei Yang <fei.yang@intel.com>, David Airlie <airlied@linux.ie>, dri-devel@lists.freedesktop.org, Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>, Jason Ekstrand <jason@jlekstrand.net>, Chris Wilson <chris.p.wilson@intel.com>, Matthew Auld <matthew.auld@intel.com>, Andi Shyti <andi.shyti@linux.intel.com>, Tomas Winkler <tomas.winkler@intel.com>, =?utf-8?q?Thomas_Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>, Casey Bowman <casey.g.bowman@intel.com>, Lucas De Marchi <lucas.demarchi@intel.com>, intel-gfx@lists.freedesktop.org, Rodrigo Vivi <rodrigo.vivi@intel.com>, Mauro Carvalho Chehab <mchehab@kernel.org>, Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>, linux-kernel@vger.kernel.org, Prathap Kumar Valsan <prathap.kumar.valsan@intel.com> Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" <dri-devel-bounces@lists.freedesktop.org>
Series	Fix performance regressions with TLB and add GuC support \| expand [v2,00/21] Fix performance regressions with TLB and add GuC support [v2,01/21] drm/i915/gt: Ignore TLB invalidations on idle engines [v2,02/21] drm/i915/gt: document with_intel_gt_pm_if_awake() [v2,03/21] drm/i915/gt: Invalidate TLB of the OA unit at TLB invalidations [v2,04/21] drm/i915/gt: Only invalidate TLBs exposed to user manipulation [v2,05/21] drm/i915/gt: Skip TLB invalidations once wedged [v2,06/21] drm/i915/gt: Batch TLB invalidations [v2,07/21] drm/i915/gt: describe the new tlb parameter at i915_vma_resource [v2,08/21] drm/i915/gt: Move TLB invalidation to its own file [v2,09/21] drm/i915/guc: Define CTB based TLB invalidation routines [v2,10/21] drm/i915/guc: use kernel-doc for enum intel_guc_tlb_inval_mode [v2,11/21] drm/i915/guc: document the TLB invalidation struct members [v2,12/21] drm/i915/guc: Introduce TLB_INVALIDATION_ALL action [v2,13/21] drm/i915: Invalidate the TLBs on each GT [v2,14/21] drm/i915: document tlb field at struct drm_i915_gem_object [v2,15/21] drm/i915: Add platform macro for selective tlb flush [v2,16/21] drm/i915: Define GuC Based TLB invalidation routines [v2,17/21] drm/i915: Add generic interface for tlb invalidation for XeHP [v2,18/21] drm/i915: Use selective tlb invalidations where supported [v2,19/21] drm/i915/gt: document TLB cache invalidation functions [v2,20/21] drm/i915/guc: describe enum intel_guc_tlb_invalidation_type [v2,21/21] drm/i915/guc: document TLB cache invalidation functions

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 522ef9b4aff3..d3df9832d1f7 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -126,6 +126,7 @@ gt-y += \ gt/intel_sseu.o \ gt/intel_sseu_debugfs.o \ gt/intel_timeline.o \ + gt/intel_tlb.o \ gt/intel_workarounds.o \ gt/shmem_utils.o \ gt/sysfs_engines.o diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 8357dbdcab5c..1cd76cc5d9f3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -7,7 +7,7 @@ #include <drm/drm_cache.h> #include "gt/intel_gt.h" -#include "gt/intel_gt_pm.h" +#include "gt/intel_tlb.h" #include "i915_drv.h" #include "i915_gem_object.h" @@ -199,7 +199,7 @@ static void flush_tlb_invalidate(struct drm_i915_gem_object *obj) if (!obj->mm.tlb) return; - intel_gt_invalidate_tlb(gt, obj->mm.tlb); + intel_gt_invalidate_tlb_full(gt, obj->mm.tlb); obj->mm.tlb = 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f435e06125aa..18d82cd620bd 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -11,9 +11,7 @@ #include "pxp/intel_pxp.h" #include "i915_drv.h" -#include "i915_perf_oa_regs.h" #include "intel_context.h" -#include "intel_engine_pm.h" #include "intel_engine_regs.h" #include "intel_ggtt_gmch.h" #include "intel_gt.h" @@ -31,6 +29,7 @@ #include "intel_renderstate.h" #include "intel_rps.h" #include "intel_gt_sysfs.h" +#include "intel_tlb.h" #include "intel_uncore.h" #include "shmem_utils.h" @@ -48,8 +47,7 @@ static void __intel_gt_init_early(struct intel_gt *gt) intel_gt_init_reset(gt); intel_gt_init_requests(gt); intel_gt_init_timelines(gt); - mutex_init(&gt->tlb.invalidate_lock); - seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock); + intel_gt_init_tlb(gt); intel_gt_pm_init_early(gt); intel_uc_init_early(&gt->uc); @@ -770,7 +768,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915) intel_gt_fini_requests(gt); intel_gt_fini_reset(gt); intel_gt_fini_timelines(gt); - mutex_destroy(&gt->tlb.invalidate_lock); + intel_gt_fini_tlb(gt); intel_engines_free(gt); } } @@ -881,163 +879,3 @@ void intel_gt_info_print(const struct intel_gt_info *info, intel_sseu_dump(&info->sseu, p); } - -struct reg_and_bit { - i915_reg_t reg; - u32 bit; -}; - -static struct reg_and_bit -get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, - const i915_reg_t *regs, const unsigned int num) -{ - const unsigned int class = engine->class; - struct reg_and_bit rb = { }; - - if (drm_WARN_ON_ONCE(&engine->i915->drm, - class >= num || !regs[class].reg)) - return rb; - - rb.reg = regs[class]; - if (gen8 && class == VIDEO_DECODE_CLASS) - rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ - else - rb.bit = engine->instance; - - rb.bit = BIT(rb.bit); - - return rb; -} - -static void mmio_invalidate_full(struct intel_gt *gt) -{ - static const i915_reg_t gen8_regs[] = { - [RENDER_CLASS] = GEN8_RTCR, - [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ - [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, - [COPY_ENGINE_CLASS] = GEN8_BTCR, - }; - static const i915_reg_t gen12_regs[] = { - [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, - [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, - [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, - [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, - [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR, - }; - struct drm_i915_private *i915 = gt->i915; - struct intel_uncore *uncore = gt->uncore; - struct intel_engine_cs *engine; - intel_engine_mask_t awake, tmp; - enum intel_engine_id id; - const i915_reg_t *regs; - unsigned int num = 0; - - if (GRAPHICS_VER(i915) == 12) { - regs = gen12_regs; - num = ARRAY_SIZE(gen12_regs); - } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { - regs = gen8_regs; - num = ARRAY_SIZE(gen8_regs); - } else if (GRAPHICS_VER(i915) < 8) { - return; - } - - if (drm_WARN_ONCE(&i915->drm, !num, - "Platform does not implement TLB invalidation!")) - return; - - intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); - - spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ - - awake = 0; - for_each_engine(engine, gt, id) { - struct reg_and_bit rb; - - if (!intel_engine_pm_is_awake(engine)) - continue; - - rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); - if (!i915_mmio_reg_offset(rb.reg)) - continue; - - intel_uncore_write_fw(uncore, rb.reg, rb.bit); - awake |= engine->mask; - } - - GT_TRACE(gt, "invalidated engines %08x\n", awake); - - /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */ - if (awake && - (IS_TIGERLAKE(i915) || - IS_DG1(i915) || - IS_ROCKETLAKE(i915) || - IS_ALDERLAKE_S(i915) || - IS_ALDERLAKE_P(i915))) - intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1); - - spin_unlock_irq(&uncore->lock); - - for_each_engine_masked(engine, gt, awake, tmp) { - struct reg_and_bit rb; - - /* - * HW architecture suggest typical invalidation time at 40us, - * with pessimistic cases up to 100us and a recommendation to - * cap at 1ms. We go a bit higher just in case. - */ - const unsigned int timeout_us = 100; - const unsigned int timeout_ms = 4; - - rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); - if (__intel_wait_for_register_fw(uncore, - rb.reg, rb.bit, 0, - timeout_us, timeout_ms, - NULL)) - drm_err_ratelimited(&gt->i915->drm, - "%s TLB invalidation did not complete in %ums!\n", - engine->name, timeout_ms); - } - - /* - * Use delayed put since a) we mostly expect a flurry of TLB - * invalidations so it is good to avoid paying the forcewake cost and - * b) it works around a bug in Icelake which cannot cope with too rapid - * transitions. - */ - intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); -} - -static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno) -{ - u32 cur = intel_gt_tlb_seqno(gt); - - /* Only skip if a *full* TLB invalidate barrier has passed */ - return (s32)(cur - ALIGN(seqno, 2)) > 0; -} - -void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno) -{ - intel_wakeref_t wakeref; - - if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) - return; - - if (intel_gt_is_wedged(gt)) - return; - - if (tlb_seqno_passed(gt, seqno)) - return; - - with_intel_gt_pm_if_awake(gt, wakeref) { - mutex_lock(&gt->tlb.invalidate_lock); - if (tlb_seqno_passed(gt, seqno)) - goto unlock; - - mmio_invalidate_full(gt); - - write_seqcount_invalidate(&gt->tlb.seqno); -unlock: - mutex_unlock(&gt->tlb.invalidate_lock); - } -} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 40b06adf509a..b4bba16cdb53 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -101,16 +101,4 @@ void intel_gt_info_print(const struct intel_gt_info *info, void intel_gt_watchdog_work(struct work_struct *work); -static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt) -{ - return seqprop_sequence(&gt->tlb.seqno); -} - -static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt) -{ - return intel_gt_tlb_seqno(gt) | 1; -} - -void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno); - #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c new file mode 100644 index 000000000000..af8cae979489 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_tlb.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_perf_oa_regs.h" +#include "intel_engine_pm.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_gt_regs.h" +#include "intel_tlb.h" + +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { }; + + if (drm_WARN_ON_ONCE(&engine->i915->drm, + class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (gen8 && class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + else + rb.bit = engine->instance; + + rb.bit = BIT(rb.bit); + + return rb; +} + +static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno) +{ + u32 cur = intel_gt_tlb_seqno(gt); + + /* Only skip if a *full* TLB invalidate barrier has passed */ + return (s32)(cur - ALIGN(seqno, 2)) > 0; +} + +static void mmio_invalidate_full(struct intel_gt *gt) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + static const i915_reg_t gen12_regs[] = { + [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + [COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR, + }; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + intel_engine_mask_t awake, tmp; + enum intel_engine_id id; + const i915_reg_t *regs; + unsigned int num = 0; + + if (GRAPHICS_VER(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); + } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { + regs = gen8_regs; + num = ARRAY_SIZE(gen8_regs); + } else if (GRAPHICS_VER(i915) < 8) { + return; + } + + if (drm_WARN_ONCE(&i915->drm, !num, + "Platform does not implement TLB invalidation!")) + return; + + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + + awake = 0; + for_each_engine(engine, gt, id) { + struct reg_and_bit rb; + + if (!intel_engine_pm_is_awake(engine)) + continue; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + awake |= engine->mask; + } + + GT_TRACE(gt, "invalidated engines %08x\n", awake); + + /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */ + if (awake && + (IS_TIGERLAKE(i915) || + IS_DG1(i915) || + IS_ROCKETLAKE(i915) || + IS_ALDERLAKE_S(i915) || + IS_ALDERLAKE_P(i915))) + intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1); + + spin_unlock_irq(&uncore->lock); + + for_each_engine_masked(engine, gt, awake, tmp) { + struct reg_and_bit rb; + + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (__intel_wait_for_register_fw(uncore, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + drm_err_ratelimited(&gt->i915->drm, + "%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + /* + * Use delayed put since a) we mostly expect a flurry of TLB + * invalidations so it is good to avoid paying the forcewake cost and + * b) it works around a bug in Icelake which cannot cope with too rapid + * transitions. + */ + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); +} + +void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno) +{ + intel_wakeref_t wakeref; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (intel_gt_is_wedged(gt)) + return; + + if (tlb_seqno_passed(gt, seqno)) + return; + + with_intel_gt_pm_if_awake(gt, wakeref) { + mutex_lock(&gt->tlb.invalidate_lock); + if (tlb_seqno_passed(gt, seqno)) + goto unlock; + + mmio_invalidate_full(gt); + + write_seqcount_invalidate(&gt->tlb.seqno); +unlock: + mutex_unlock(&gt->tlb.invalidate_lock); + } +} + +void intel_gt_init_tlb(struct intel_gt *gt) +{ + mutex_init(&gt->tlb.invalidate_lock); + seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock); +} + +void intel_gt_fini_tlb(struct intel_gt *gt) +{ + mutex_destroy(&gt->tlb.invalidate_lock); +} diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.h b/drivers/gpu/drm/i915/gt/intel_tlb.h new file mode 100644 index 000000000000..46ce25bf5afe --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_tlb.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef INTEL_TLB_H +#define INTEL_TLB_H + +#include <linux/seqlock.h> +#include <linux/types.h> + +#include "intel_gt_types.h" + +void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno); + +void intel_gt_init_tlb(struct intel_gt *gt); +void intel_gt_fini_tlb(struct intel_gt *gt); + +static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt) +{ + return seqprop_sequence(&gt->tlb.seqno); +} + +static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt) +{ + return intel_gt_tlb_seqno(gt) | 1; +} + +#endif /* INTEL_TLB_H */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 84a9ccbc5fc5..fe947d1456d5 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -33,6 +33,7 @@ #include "gt/intel_engine_heartbeat.h" #include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" +#include "gt/intel_tlb.h" #include "i915_drv.h" #include "i915_gem_evict.h"

[v2,08/21] drm/i915/gt: Move TLB invalidation to its own file

Commit Message

Comments

Patch