diff mbox series

[dii-client,1/2] drm/i915: Add generic interface for tlb invalidation

Message ID 20231010184423.2118908-3-jonathan.cavitt@intel.com (mailing list archive)
State New, archived
Headers show
Series [dii-client,1/2] drm/i915: Add generic interface for tlb invalidation | expand

Commit Message

Cavitt, Jonathan Oct. 10, 2023, 6:44 p.m. UTC
From: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com>

This supports selective and full tlb invalidations. When GuC is enabled
the tlb invalidations use guc ct otherwise use mmio interface.

Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan@intel.com>
CC: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
CC: Fei Yang <fei.yang@intel.com>
Signed-off-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h       |  8 ++
 drivers/gpu/drm/i915/gt/intel_tlb.c           | 58 +++++++++++-
 drivers/gpu/drm/i915/gt/intel_tlb.h           |  1 +
 drivers/gpu/drm/i915/gt/selftest_tlb.c        | 92 +++++++++++++++++++
 .../drm/i915/selftests/i915_mock_selftests.h  |  1 +
 5 files changed, 159 insertions(+), 1 deletion(-)

Comments

kernel test robot Oct. 11, 2023, 12:10 a.m. UTC | #1
Hi Jonathan,

kernel test robot noticed the following build errors:

[auto build test ERROR on drm-tip/drm-tip]

url:    https://github.com/intel-lab-lkp/linux/commits/Jonathan-Cavitt/drm-i915-Use-selective-tlb-invalidations-where-supported/20231011-034501
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
patch link:    https://lore.kernel.org/r/20231010184423.2118908-3-jonathan.cavitt%40intel.com
patch subject: [Intel-gfx] [PATCH dii-client 1/2] drm/i915: Add generic interface for tlb invalidation
config: i386-buildonly-randconfig-002-20231011 (https://download.01.org/0day-ci/archive/20231011/202310110727.6wnxZYAI-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231011/202310110727.6wnxZYAI-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310110727.6wnxZYAI-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/gpu/drm/i915/gt/intel_tlb.c: In function 'intel_gt_invalidate_tlb_full':
>> drivers/gpu/drm/i915/gt/intel_tlb.c:141:21: error: implicit declaration of function 'intel_guc_invalidate_tlb_full'; did you mean 'intel_gt_invalidate_tlb_full'? [-Werror=implicit-function-declaration]
     141 |                 if (intel_guc_invalidate_tlb_full(guc, INTEL_GUC_TLB_INVAL_MODE_HEAVY) < 0)
         |                     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |                     intel_gt_invalidate_tlb_full
>> drivers/gpu/drm/i915/gt/intel_tlb.c:141:56: error: 'INTEL_GUC_TLB_INVAL_MODE_HEAVY' undeclared (first use in this function)
     141 |                 if (intel_guc_invalidate_tlb_full(guc, INTEL_GUC_TLB_INVAL_MODE_HEAVY) < 0)
         |                                                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/gt/intel_tlb.c:141:56: note: each undeclared identifier is reported only once for each function it appears in
   In file included from include/linux/bits.h:6,
                    from include/linux/ratelimit_types.h:5,
                    from include/linux/printk.h:9,
                    from include/asm-generic/bug.h:22,
                    from arch/x86/include/asm/bug.h:87,
                    from include/linux/plist.h:80,
                    from include/linux/pm_qos.h:15,
                    from drivers/gpu/drm/i915/i915_drv.h:35,
                    from drivers/gpu/drm/i915/gt/intel_tlb.c:6:
   drivers/gpu/drm/i915/gt/intel_tlb.c: In function 'intel_gt_invalidate_tlb_range':
>> drivers/gpu/drm/i915/gt/intel_tlb.c:190:48: error: 'const struct intel_device_info' has no member named 'ppgtt_size'
     190 |         vm_total = BIT_ULL(INTEL_INFO(gt->i915)->ppgtt_size);
         |                                                ^~
   include/vdso/bits.h:8:45: note: in definition of macro 'BIT_ULL'
       8 | #define BIT_ULL(nr)             (ULL(1) << (nr))
         |                                             ^~
>> drivers/gpu/drm/i915/gt/intel_tlb.c:195:23: error: implicit declaration of function 'intel_guc_invalidate_tlb_page_selective' [-Werror=implicit-function-declaration]
     195 |                 ret = intel_guc_invalidate_tlb_page_selective(guc,
         |                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/gt/intel_tlb.c:196:63: error: 'INTEL_GUC_TLB_INVAL_MODE_HEAVY' undeclared (first use in this function)
     196 |                                                               INTEL_GUC_TLB_INVAL_MODE_HEAVY,
         |                                                               ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +141 drivers/gpu/drm/i915/gt/intel_tlb.c

   120	
   121	void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
   122	{
   123		intel_wakeref_t wakeref;
   124	
   125		if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
   126			return;
   127	
   128		if (intel_gt_is_wedged(gt))
   129			return;
   130	
   131		if (tlb_seqno_passed(gt, seqno))
   132			return;
   133	
   134		with_intel_gt_pm_if_awake(gt, wakeref) {
   135			struct intel_guc *guc = &gt->uc.guc;
   136	
   137			mutex_lock(&gt->tlb.invalidate_lock);
   138			if (tlb_seqno_passed(gt, seqno))
   139				goto unlock;
   140	
 > 141			if (intel_guc_invalidate_tlb_full(guc, INTEL_GUC_TLB_INVAL_MODE_HEAVY) < 0)
   142				mmio_invalidate_full(gt);
   143	
   144			write_seqcount_invalidate(&gt->tlb.seqno);
   145	unlock:
   146			mutex_unlock(&gt->tlb.invalidate_lock);
   147		}
   148	}
   149	
   150	static u64 tlb_page_selective_size(u64 *addr, u64 length)
   151	{
   152		const u64 end = *addr + length;
   153		u64 start;
   154	
   155		/*
   156		 * Minimum invalidation size for a 2MB page that the hardware expects is
   157		 * 16MB
   158		 */
   159		length = max_t(u64, roundup_pow_of_two(length), SZ_4K);
   160		if (length >= SZ_2M)
   161			length = max_t(u64, SZ_16M, length);
   162	
   163		/*
   164		 * We need to invalidate a higher granularity if start address is not
   165		 * aligned to length. When start is not aligned with length we need to
   166		 * find the length large enough to create an address mask covering the
   167		 * required range.
   168		 */
   169		start = round_down(*addr, length);
   170		while (start + length < end) {
   171			length <<= 1;
   172			start = round_down(*addr, length);
   173		}
   174	
   175		*addr = start;
   176		return length;
   177	}
   178	
   179	bool intel_gt_invalidate_tlb_range(struct intel_gt *gt,
   180					   u64 start, u64 length)
   181	{
   182		struct intel_guc *guc = &gt->uc.guc;
   183		intel_wakeref_t wakeref;
   184		u64 size, vm_total;
   185		bool ret = true;
   186	
   187		if (intel_gt_is_wedged(gt))
   188			return true;
   189	
 > 190		vm_total = BIT_ULL(INTEL_INFO(gt->i915)->ppgtt_size);
   191		/* Align start and length */
   192		size =  min_t(u64, vm_total, tlb_page_selective_size(&start, length));
   193	
   194		with_intel_gt_pm_if_awake(gt, wakeref)
 > 195			ret = intel_guc_invalidate_tlb_page_selective(guc,
   196								      INTEL_GUC_TLB_INVAL_MODE_HEAVY,
   197								      start, size) == 0;
   198	
   199		return ret;
   200	}
   201
kernel test robot Oct. 11, 2023, 1:35 a.m. UTC | #2
Hi Jonathan,

kernel test robot noticed the following build errors:

[auto build test ERROR on drm-tip/drm-tip]

url:    https://github.com/intel-lab-lkp/linux/commits/Jonathan-Cavitt/drm-i915-Use-selective-tlb-invalidations-where-supported/20231011-034501
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
patch link:    https://lore.kernel.org/r/20231010184423.2118908-3-jonathan.cavitt%40intel.com
patch subject: [Intel-gfx] [PATCH dii-client 1/2] drm/i915: Add generic interface for tlb invalidation
config: x86_64-randconfig-001-20231011 (https://download.01.org/0day-ci/archive/20231011/202310110932.RZ34WR7w-lkp@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231011/202310110932.RZ34WR7w-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310110932.RZ34WR7w-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

   drivers/gpu/drm/i915/gt/intel_tlb.c: In function 'intel_gt_invalidate_tlb_full':
   drivers/gpu/drm/i915/gt/intel_tlb.c:141:7: error: implicit declaration of function 'intel_guc_invalidate_tlb_full'; did you mean 'intel_gt_invalidate_tlb_full'? [-Werror=implicit-function-declaration]
     141 |   if (intel_guc_invalidate_tlb_full(guc, INTEL_GUC_TLB_INVAL_MODE_HEAVY) < 0)
         |       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |       intel_gt_invalidate_tlb_full
   drivers/gpu/drm/i915/gt/intel_tlb.c:141:42: error: 'INTEL_GUC_TLB_INVAL_MODE_HEAVY' undeclared (first use in this function)
     141 |   if (intel_guc_invalidate_tlb_full(guc, INTEL_GUC_TLB_INVAL_MODE_HEAVY) < 0)
         |                                          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/gt/intel_tlb.c:141:42: note: each undeclared identifier is reported only once for each function it appears in
   In file included from include/linux/bits.h:6,
                    from include/linux/ratelimit_types.h:5,
                    from include/linux/printk.h:9,
                    from include/asm-generic/bug.h:22,
                    from arch/x86/include/asm/bug.h:87,
                    from include/linux/plist.h:80,
                    from include/linux/pm_qos.h:15,
                    from drivers/gpu/drm/i915/i915_drv.h:35,
                    from drivers/gpu/drm/i915/gt/intel_tlb.c:6:
   drivers/gpu/drm/i915/gt/intel_tlb.c: In function 'intel_gt_invalidate_tlb_range':
   drivers/gpu/drm/i915/gt/intel_tlb.c:190:41: error: 'const struct intel_device_info' has no member named 'ppgtt_size'
     190 |  vm_total = BIT_ULL(INTEL_INFO(gt->i915)->ppgtt_size);
         |                                         ^~
   include/vdso/bits.h:8:34: note: in definition of macro 'BIT_ULL'
       8 | #define BIT_ULL(nr)  (ULL(1) << (nr))
         |                                  ^~
   drivers/gpu/drm/i915/gt/intel_tlb.c:195:9: error: implicit declaration of function 'intel_guc_invalidate_tlb_page_selective' [-Werror=implicit-function-declaration]
     195 |   ret = intel_guc_invalidate_tlb_page_selective(guc,
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   drivers/gpu/drm/i915/gt/intel_tlb.c:196:14: error: 'INTEL_GUC_TLB_INVAL_MODE_HEAVY' undeclared (first use in this function)
     196 |              INTEL_GUC_TLB_INVAL_MODE_HEAVY,
         |              ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   In file included from drivers/gpu/drm/i915/gt/intel_tlb.c:214:
   drivers/gpu/drm/i915/gt/selftest_tlb.c: In function 'pte_tlbinv':
>> drivers/gpu/drm/i915/gt/selftest_tlb.c:161:1: error: version control conflict marker in file
     161 | <<<<<<< HEAD
         | ^~~~~~~
   drivers/gpu/drm/i915/gt/selftest_tlb.c:163:1: error: version control conflict marker in file
     163 | =======
         | ^~~~~~~
   drivers/gpu/drm/i915/gt/selftest_tlb.c:165:1: error: version control conflict marker in file
     165 | >>>>>>> 774058193c61b... INTEL_DII: drm/i915/xehpsdv: Add generic interface for tlb invalidation
         | ^~~~~~~
>> drivers/gpu/drm/i915/gt/selftest_tlb.c:165:9: error: invalid suffix "c61b..." on integer constant
     165 | >>>>>>> 774058193c61b... INTEL_DII: drm/i915/xehpsdv: Add generic interface for tlb invalidation
         |         ^~~~~~~~~~~~~~~~
>> drivers/gpu/drm/i915/gt/selftest_tlb.c:150:28: warning: unused variable 'vb_res' [-Wunused-variable]
     150 |   struct i915_vma_resource vb_res = {
         |                            ^~~~~~
>> drivers/gpu/drm/i915/gt/selftest_tlb.c:40:21: warning: unused variable 'pat_index' [-Wunused-variable]
      40 |  const unsigned int pat_index =
         |                     ^~~~~~~~~
   cc1: some warnings being treated as errors


vim +161 drivers/gpu/drm/i915/gt/selftest_tlb.c

    30	
    31	static int
    32	pte_tlbinv(struct intel_context *ce,
    33		   struct i915_vma *va,
    34		   struct i915_vma *vb,
    35		   u64 align,
    36		   void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length),
    37		   u64 length,
    38		   struct rnd_state *prng)
    39	{
  > 40		const unsigned int pat_index =
    41			i915_gem_get_pat_index(ce->vm->i915, I915_CACHE_NONE);
    42		struct drm_i915_gem_object *batch;
    43		struct drm_mm_node vb_node;
    44		struct i915_request *rq;
    45		struct i915_vma *vma;
    46		u64 addr;
    47		int err;
    48		u32 *cs;
    49	
    50		batch = i915_gem_object_create_internal(ce->vm->i915, 4096);
    51		if (IS_ERR(batch))
    52			return PTR_ERR(batch);
    53	
    54		vma = i915_vma_instance(batch, ce->vm, NULL);
    55		if (IS_ERR(vma)) {
    56			err = PTR_ERR(vma);
    57			goto out;
    58		}
    59	
    60		err = i915_vma_pin(vma, 0, 0, PIN_USER);
    61		if (err)
    62			goto out;
    63	
    64		/* Pin va at random but aligned offset after vma */
    65		addr = round_up(vma->node.start + vma->node.size, align);
    66		/* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */
    67		addr = igt_random_offset(prng, addr, min(ce->vm->total, BIT_ULL(48)),
    68					 va->size, align);
    69		err = i915_vma_pin(va,  0, 0, addr | PIN_OFFSET_FIXED | PIN_USER);
    70		if (err) {
    71			pr_err("Cannot pin at %llx+%llx\n", addr, va->size);
    72			goto out;
    73		}
    74		GEM_BUG_ON(i915_vma_offset(va) != addr);
    75		if (vb != va) {
    76			vb_node = vb->node;
    77			vb->node = va->node; /* overwrites the _same_ PTE  */
    78		}
    79	
    80		/*
    81		 * Now choose random dword at the 1st pinned page.
    82		 *
    83		 * SZ_64K pages on dg1 require that the whole PT be marked
    84		 * containing 64KiB entries. So we make sure that vma
    85		 * covers the whole PT, despite being randomly aligned to 64KiB
    86		 * and restrict our sampling to the 2MiB PT within where
    87		 * we know that we will be using 64KiB pages.
    88		 */
    89		if (align == SZ_64K)
    90			addr = round_up(addr, SZ_2M);
    91		addr = igt_random_offset(prng, addr, addr + align, 8, 8);
    92	
    93		if (va != vb)
    94			pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n",
    95				ce->engine->name, va->obj->mm.region->name ?: "smem",
    96				addr, align, va->resource->page_sizes_gtt,
    97				va->page_sizes.phys, va->page_sizes.sg,
    98				addr & -length, length);
    99	
   100		cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC);
   101		*cs++ = MI_NOOP; /* for later termination */
   102		/*
   103		 * Sample the target to see if we spot the updated backing store.
   104		 * Gen8 VCS compares immediate value with bitwise-and of two
   105		 * consecutive DWORDS pointed by addr, other gen/engines compare value
   106		 * with DWORD pointed by addr. Moreover we want to exercise DWORD size
   107		 * invalidations. To fulfill all these requirements below values
   108		 * have been chosen.
   109		 */
   110		*cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2;
   111		*cs++ = 0; /* break if *addr == 0 */
   112		*cs++ = lower_32_bits(addr);
   113		*cs++ = upper_32_bits(addr);
   114		vma_set_qw(va, addr, -1);
   115		vma_set_qw(vb, addr, 0);
   116	
   117		/* Keep sampling until we get bored */
   118		*cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1;
   119		*cs++ = lower_32_bits(i915_vma_offset(vma));
   120		*cs++ = upper_32_bits(i915_vma_offset(vma));
   121	
   122		i915_gem_object_flush_map(batch);
   123	
   124		rq = i915_request_create(ce);
   125		if (IS_ERR(rq)) {
   126			err = PTR_ERR(rq);
   127			goto out_va;
   128		}
   129	
   130		err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0);
   131		if (err) {
   132			i915_request_add(rq);
   133			goto out_va;
   134		}
   135	
   136		i915_request_get(rq);
   137		i915_request_add(rq);
   138	
   139		/* Short sleep to sanitycheck the batch is spinning before we begin */
   140		msleep(10);
   141		if (va == vb) {
   142			if (!i915_request_completed(rq)) {
   143				pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n",
   144				       ce->engine->name, va->obj->mm.region->name ?: "smem",
   145				       addr, align, va->resource->page_sizes_gtt,
   146				       va->page_sizes.phys, va->page_sizes.sg);
   147				err = -EIO;
   148			}
   149		} else if (!i915_request_completed(rq)) {
 > 150			struct i915_vma_resource vb_res = {
   151				.bi.pages = vb->obj->mm.pages,
   152				.bi.page_sizes = vb->obj->mm.page_sizes,
   153				.start = i915_vma_offset(vb),
   154				.vma_size = i915_vma_size(vb)
   155			};
   156			unsigned int pte_flags = 0;
   157	
   158			/* Flip the PTE between A and B */
   159			if (i915_gem_object_is_lmem(vb->obj))
   160				pte_flags |= PTE_LM;
 > 161	<<<<<<< HEAD
   162			ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags);
   163	=======
   164			ce->vm->insert_entries(ce->vm, &stash, vb, I915_CACHE_NONE, pte_flags);
 > 165	>>>>>>> 774058193c61b... INTEL_DII: drm/i915/xehpsdv: Add generic interface for tlb invalidation
   166	
   167			/* Flush the PTE update to concurrent HW */
   168			tlbinv(ce->vm, addr & -length, length);
   169	
   170			if (wait_for(i915_request_completed(rq), HZ / 2)) {
   171				pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n",
   172				       ce->engine->name);
   173				err = -EINVAL;
   174			}
   175		} else {
   176			pr_err("Spinner ended unexpectedly\n");
   177			err = -EIO;
   178		}
   179		i915_request_put(rq);
   180	
   181		cs = page_mask_bits(batch->mm.mapping);
   182		*cs = MI_BATCH_BUFFER_END;
   183		wmb();
   184	
   185	out_va:
   186		if (vb != va)
   187			vb->node = vb_node;
   188		i915_vma_unpin(va);
   189		if (i915_vma_unbind_unlocked(va))
   190			err = -EIO;
   191	out:
   192		i915_gem_object_put(batch);
   193		return err;
   194	}
   195
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index eecd0a87a6478..f2ca1c26ecde5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1124,6 +1124,14 @@ 
 
 #define GEN12_GAM_DONE				_MMIO(0xcf68)
 
+#define XEHPSDV_TLB_INV_DESC0			_MMIO(0xcf7c)
+#define   XEHPSDV_TLB_INV_DESC0_ADDR_LO		REG_GENMASK(31, 12)
+#define   XEHPSDV_TLB_INV_DESC0_ADDR_MASK	REG_GENMASK(8, 3)
+#define   XEHPSDV_TLB_INV_DESC0_G		REG_GENMASK(2, 1)
+#define   XEHPSDV_TLB_INV_DESC0_VALID		REG_BIT(0)
+#define XEHPSDV_TLB_INV_DESC1			_MMIO(0xcf80)
+#define   XEHPSDV_TLB_INV_DESC0_ADDR_HI		REG_GENMASK(31, 0)
+
 #define GEN7_HALF_SLICE_CHICKEN1		_MMIO(0xe100) /* IVB GT1 + VLV */
 #define GEN8_HALF_SLICE_CHICKEN1		MCR_REG(0xe100)
 #define   GEN7_MAX_PS_THREAD_DEP		(8 << 12)
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
index 139608c30d978..92fb455299717 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.c
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -12,6 +12,7 @@ 
 #include "intel_gt_print.h"
 #include "intel_gt_regs.h"
 #include "intel_tlb.h"
+#include "uc/intel_guc.h"
 
 /*
  * HW architecture suggest typical invalidation time at 40us,
@@ -131,11 +132,14 @@  void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
 		return;
 
 	with_intel_gt_pm_if_awake(gt, wakeref) {
+		struct intel_guc *guc = &gt->uc.guc;
+
 		mutex_lock(&gt->tlb.invalidate_lock);
 		if (tlb_seqno_passed(gt, seqno))
 			goto unlock;
 
-		mmio_invalidate_full(gt);
+		if (intel_guc_invalidate_tlb_full(guc, INTEL_GUC_TLB_INVAL_MODE_HEAVY) < 0)
+			mmio_invalidate_full(gt);
 
 		write_seqcount_invalidate(&gt->tlb.seqno);
 unlock:
@@ -143,6 +147,58 @@  void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
 	}
 }
 
+static u64 tlb_page_selective_size(u64 *addr, u64 length)
+{
+	const u64 end = *addr + length;
+	u64 start;
+
+	/*
+	 * Minimum invalidation size for a 2MB page that the hardware expects is
+	 * 16MB
+	 */
+	length = max_t(u64, roundup_pow_of_two(length), SZ_4K);
+	if (length >= SZ_2M)
+		length = max_t(u64, SZ_16M, length);
+
+	/*
+	 * We need to invalidate a higher granularity if start address is not
+	 * aligned to length. When start is not aligned with length we need to
+	 * find the length large enough to create an address mask covering the
+	 * required range.
+	 */
+	start = round_down(*addr, length);
+	while (start + length < end) {
+		length <<= 1;
+		start = round_down(*addr, length);
+	}
+
+	*addr = start;
+	return length;
+}
+
+bool intel_gt_invalidate_tlb_range(struct intel_gt *gt,
+				   u64 start, u64 length)
+{
+	struct intel_guc *guc = &gt->uc.guc;
+	intel_wakeref_t wakeref;
+	u64 size, vm_total;
+	bool ret = true;
+
+	if (intel_gt_is_wedged(gt))
+		return true;
+
+	vm_total = BIT_ULL(INTEL_INFO(gt->i915)->ppgtt_size);
+	/* Align start and length */
+	size =  min_t(u64, vm_total, tlb_page_selective_size(&start, length));
+
+	with_intel_gt_pm_if_awake(gt, wakeref)
+		ret = intel_guc_invalidate_tlb_page_selective(guc,
+							      INTEL_GUC_TLB_INVAL_MODE_HEAVY,
+							      start, size) == 0;
+
+	return ret;
+}
+
 void intel_gt_init_tlb(struct intel_gt *gt)
 {
 	mutex_init(&gt->tlb.invalidate_lock);
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.h b/drivers/gpu/drm/i915/gt/intel_tlb.h
index 337327af92ac4..9e5fc40c2b08e 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.h
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.h
@@ -12,6 +12,7 @@ 
 #include "intel_gt_types.h"
 
 void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno);
+bool intel_gt_invalidate_tlb_range(struct intel_gt *gt, u64 start, u64 length);
 
 void intel_gt_init_tlb(struct intel_gt *gt);
 void intel_gt_fini_tlb(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c
index 7e41f69fc818f..1dc4ff56916fe 100644
--- a/drivers/gpu/drm/i915/gt/selftest_tlb.c
+++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c
@@ -158,7 +158,11 @@  pte_tlbinv(struct intel_context *ce,
 		/* Flip the PTE between A and B */
 		if (i915_gem_object_is_lmem(vb->obj))
 			pte_flags |= PTE_LM;
+<<<<<<< HEAD
 		ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags);
+=======
+		ce->vm->insert_entries(ce->vm, &stash, vb, I915_CACHE_NONE, pte_flags);
+>>>>>>> 774058193c61b... INTEL_DII: drm/i915/xehpsdv: Add generic interface for tlb invalidation
 
 		/* Flush the PTE update to concurrent HW */
 		tlbinv(ce->vm, addr & -length, length);
@@ -375,10 +379,45 @@  static int invalidate_full(void *arg)
 	return err;
 }
 
+static void tlbinv_range(struct i915_address_space *vm, u64 addr, u64 length)
+{
+	if (!intel_gt_invalidate_tlb_range(vm->gt, addr, length))
+		pr_err("range invalidate failed\n");
+}
+
+static bool has_invalidate_range(struct intel_gt *gt)
+{
+	intel_wakeref_t wf;
+	bool result = false;
+
+	with_intel_gt_pm(gt, wf)
+		result = intel_gt_invalidate_tlb_range(gt, 0, gt->vm->total);
+
+	return result;
+}
+
+static int invalidate_range(void *arg)
+{
+	struct intel_gt *gt = arg;
+	int err;
+
+	if (!has_invalidate_range(gt))
+		return 0;
+
+	err = mem_tlbinv(gt, create_smem, tlbinv_range);
+	if (err == 0)
+		err = mem_tlbinv(gt, create_lmem, tlbinv_range);
+	if (err == -ENODEV || err == -ENXIO)
+		err = 0;
+
+	return err;
+}
+
 int intel_tlb_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(invalidate_full),
+		SUBTEST(invalidate_range),
 	};
 	struct intel_gt *gt;
 	unsigned int i;
@@ -396,3 +435,56 @@  int intel_tlb_live_selftests(struct drm_i915_private *i915)
 
 	return 0;
 }
+
+static int tlb_page_size(void *arg)
+{
+	int start, size, offset;
+
+	for (start = 0; start < 57; start++) {
+		for (size = 0; size <= 57 - start; size++) {
+			for (offset = 0; offset <= size; offset++) {
+				u64 len = BIT(size);
+				u64 addr = BIT(start) + len - BIT(offset);
+				u64 expected_start = addr;
+				u64 expected_end = addr + len - 1;
+				int err = 0;
+
+				if (addr + len < addr)
+					continue;
+
+				len = tlb_page_selective_size(&addr, len);
+				if (addr > expected_start) {
+					pr_err("(start:%d, size:%d, offset:%d, range:[%llx, %llx]) invalidate range:[%llx + %llx] after start:%llx\n",
+					       start, size, offset,
+					       expected_start, expected_end,
+					       addr, len,
+					       expected_start);
+					err = -EINVAL;
+				}
+
+				if (addr + len < expected_end) {
+					pr_err("(start:%d, size:%d, offset:%d, range:[%llx, %llx]) invalidate range:[%llx + %llx] before end:%llx\n",
+					       start, size, offset,
+					       expected_start, expected_end,
+					       addr, len,
+					       expected_end);
+					err = -EINVAL;
+				}
+
+				if (err)
+					return err;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int intel_tlb_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(tlb_page_size),
+	};
+
+	return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 0c22e0fc9059c..3e00cd2b6e53c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -21,6 +21,7 @@  selftest(fence, i915_sw_fence_mock_selftests)
 selftest(scatterlist, scatterlist_mock_selftests)
 selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
+selftest(tlb, intel_tlb_mock_selftests)
 selftest(ring, intel_ring_mock_selftests)
 selftest(engine, intel_engine_cs_mock_selftests)
 selftest(timelines, intel_timeline_mock_selftests)