diff mbox series

[1/8] drm/i915/mtl: Define MOCS and PAT tables for MTL

Message ID 20230407071236.1960642-2-fei.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/mtl: Define MOCS and PAT tables for MTL | expand

Commit Message

Yang, Fei April 7, 2023, 7:12 a.m. UTC
From: Fei Yang <fei.yang@intel.com>

On MTL, GT can no longer allocate on LLC - only the CPU can.
This, along with addition of support for ADM/L4 cache calls a
MOCS/PAT table update. Also defines PTE encode functions for
MTL as it has different PAT index definition than previous
platforms.

BSpec: 44509, 45101, 44235

Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c    | 28 +++++++++
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h    |  3 +
 drivers/gpu/drm/i915/gt/intel_ggtt.c    | 27 +++++++++
 drivers/gpu/drm/i915/gt/intel_gtt.c     | 23 +++++++-
 drivers/gpu/drm/i915/gt/intel_gtt.h     | 20 ++++++-
 drivers/gpu/drm/i915/gt/intel_mocs.c    | 76 +++++++++++++++++++++++--
 drivers/gpu/drm/i915/gt/selftest_mocs.c |  2 +-
 drivers/gpu/drm/i915/i915_pci.c         |  1 +
 8 files changed, 173 insertions(+), 7 deletions(-)

Comments

Matt Roper April 10, 2023, 4:34 p.m. UTC | #1
On Fri, Apr 07, 2023 at 12:12:29AM -0700, fei.yang@intel.com wrote:
> From: Fei Yang <fei.yang@intel.com>
> 
> On MTL, GT can no longer allocate on LLC - only the CPU can.
> This, along with addition of support for ADM/L4 cache calls a
> MOCS/PAT table update. Also defines PTE encode functions for
> MTL as it has different PAT index definition than previous
> platforms.

It might be best to keep the PTE encoding as a separate patch from the
MOCS/PAT tables.  It's a different enough topic that it probably
deserves a patch of its own.

> 
> BSpec: 44509, 45101, 44235
> 
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Signed-off-by: Madhumitha Tolakanahalli Pradeep <madhumitha.tolakanahalli.pradeep@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> Signed-off-by: Fei Yang <fei.yang@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c    | 28 +++++++++
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h    |  3 +
>  drivers/gpu/drm/i915/gt/intel_ggtt.c    | 27 +++++++++
>  drivers/gpu/drm/i915/gt/intel_gtt.c     | 23 +++++++-
>  drivers/gpu/drm/i915/gt/intel_gtt.h     | 20 ++++++-
>  drivers/gpu/drm/i915/gt/intel_mocs.c    | 76 +++++++++++++++++++++++--
>  drivers/gpu/drm/i915/gt/selftest_mocs.c |  2 +-
>  drivers/gpu/drm/i915/i915_pci.c         |  1 +
>  8 files changed, 173 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 4daaa6f55668..df4073d32114 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
>  	return pte;
>  }
>  
> +static u64 mtl_pte_encode(dma_addr_t addr,
> +			  enum i915_cache_level level,
> +			  u32 flags)
> +{
> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
> +
> +	if (unlikely(flags & PTE_READ_ONLY))
> +		pte &= ~GEN8_PAGE_RW;
> +
> +	if (flags & PTE_LM)
> +		pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
> +
> +	switch (level) {
> +	case I915_CACHE_NONE:
> +		pte |= GEN12_PPGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_LLC:
> +	case I915_CACHE_L3_LLC:
> +		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_WT:
> +		pte |= GEN12_PPGTT_PTE_PAT0;
> +		break;
> +	}
> +
> +	return pte;
> +}
> +
>  static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>  {
>  	struct drm_i915_private *i915 = ppgtt->vm.i915;
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> index f541d19264b4..6b8ce7f4d25a 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> @@ -18,5 +18,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>  			 enum i915_cache_level level,
>  			 u32 flags);
> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> +			unsigned int pat_index,
> +			u32 flags);
>  
>  #endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 3c7f1ed92f5b..4a16bfcde1de 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -220,6 +220,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>  	}
>  }
>  
> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> +			enum i915_cache_level level,
> +			u32 flags)
> +{
> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
> +
> +	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
> +
> +	if (flags & PTE_LM)
> +		pte |= GEN12_GGTT_PTE_LM;
> +
> +	switch (level) {
> +	case I915_CACHE_NONE:
> +		pte |= MTL_GGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_LLC:
> +	case I915_CACHE_L3_LLC:
> +		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_WT:
> +		pte |= MTL_GGTT_PTE_PAT0;
> +		break;
> +	}
> +
> +	return pte;
> +}
> +
>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>  			 enum i915_cache_level level,
>  			 u32 flags)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
> index 4f436ba7a3c8..1e1b34e22cf5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
> @@ -468,6 +468,25 @@ void gtt_write_workarounds(struct intel_gt *gt)
>  	}
>  }
>  
> +static void mtl_setup_private_ppat(struct intel_uncore *uncore)
> +{
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(0),
> +			   MTL_PPAT_L4_0_WB);
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(1),
> +			   MTL_PPAT_L4_1_WT);
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(2),
> +			   MTL_PPAT_L4_3_UC);
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(3),
> +			   MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
> +	intel_uncore_write(uncore, GEN12_PAT_INDEX(4),
> +			   MTL_PPAT_L4_0_WB | MTL_3_COH_2W);

These registers are multicast registers on the primary GT.  We need to
use XEHP_PAT_INDEX and the MCR-aware functions on the primary GT.

Although we're not programming the higher PAT entries on MTL (yet),
there's also a jump in the MMIO offsets that we should incorporate into
the PAT_INDEX macro to future-proof the code.

> +
> +	/*
> +	 * Remaining PAT entries are left at the hardware-default
> +	 * fully-cached setting
> +	 */
> +}
> +
>  static void tgl_setup_private_ppat(struct intel_uncore *uncore)
>  {
>  	/* TGL doesn't support LLC or AGE settings */
> @@ -603,7 +622,9 @@ void setup_private_pat(struct intel_gt *gt)
>  
>  	GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
>  
> -	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
> +	if (IS_METEORLAKE(i915))
> +		mtl_setup_private_ppat(uncore);

We need to handle the primary GT and media GT separately since the media
GT registers are unicast whereas primary are multicast.

See
https://gitlab.freedesktop.org/drm/xe/kernel/-/blob/drm-xe-next/drivers/gpu/drm/xe/xe_pat.c
for how this was handled in the Xe driver.

> +	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
>  		xehp_setup_private_ppat(gt);
>  	else if (GRAPHICS_VER(i915) >= 12)
>  		tgl_setup_private_ppat(uncore);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index 69ce55f517f5..b632167eaf2e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
>  #define BYT_PTE_WRITEABLE		REG_BIT(1)
>  
> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>  #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)

Which bspec page is this from?  The PPGTT descriptions in the bspec are
kind of hard to track down.

But if these only apply to MTL, why are they labelled as "GEN12?"

>  
> -#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
> +#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
> +#define MTL_GGTT_PTE_PAT0		BIT_ULL(52)
> +#define MTL_GGTT_PTE_PAT1		BIT_ULL(53)
> +#define GEN12_GGTT_PTE_ADDR_MASK	GENMASK_ULL(45, 12)
> +#define MTL_GGTT_PTE_PAT_MASK		GENMASK_ULL(53, 52)
>  
>  #define GEN12_PDE_64K BIT(6)
>  #define GEN12_PTE_PS64 BIT(8)
> @@ -147,6 +156,15 @@ typedef u64 gen8_pte_t;
>  #define GEN8_PDE_IPS_64K BIT(11)
>  #define GEN8_PDE_PS_2M   BIT(7)
>  
> +#define MTL_PPAT_L4_CACHE_POLICY_MASK	REG_GENMASK(3, 2)
> +#define MTL_PAT_INDEX_COH_MODE_MASK	REG_GENMASK(1, 0)
> +#define MTL_PPAT_L4_3_UC	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
> +#define MTL_PPAT_L4_1_WT	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
> +#define MTL_PPAT_L4_0_WB	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
> +#define MTL_3_COH_2W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
> +#define MTL_2_COH_1W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
> +#define MTL_0_COH_NON	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)

The values for these definitions don't seem to be aligned.

> +
>  enum i915_cache_level;
>  
>  struct drm_i915_gem_object;
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index 69b489e8dfed..89570f137b2c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -40,6 +40,10 @@ struct drm_i915_mocs_table {
>  #define LE_COS(value)		((value) << 15)
>  #define LE_SSE(value)		((value) << 17)
>  
> +/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
> +#define _L4_CACHEABILITY(value)	((value) << 2)

We should use REG_GENMASK + REG_FIELD_PREP for new code, which will help
ensure that we don't try to pack a value into a field that's too big to
fit.

> +#define IG_PAT(value)		((value) << 8)

Do we need this one parameterized?  It's just a boolean flag, so

        #define IG_PAT          REG_BIT(8)

would be fine (and then don't include the flag on the entries where it
isn't set.

> +
>  /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
>  #define L3_ESC(value)		((value) << 0)
>  #define L3_SCC(value)		((value) << 1)
> @@ -50,6 +54,7 @@ struct drm_i915_mocs_table {
>  /* Helper defines */
>  #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
>  #define PVC_NUM_MOCS_ENTRIES	3
> +#define MTL_NUM_MOCS_ENTRIES	16
>  
>  /* (e)LLC caching options */
>  /*
> @@ -73,6 +78,12 @@ struct drm_i915_mocs_table {
>  #define L3_2_RESERVED		_L3_CACHEABILITY(2)
>  #define L3_3_WB			_L3_CACHEABILITY(3)
>  
> +/* L4 caching options */
> +#define L4_0_WB			_L4_CACHEABILITY(0)
> +#define L4_1_WT			_L4_CACHEABILITY(1)
> +#define L4_2_RESERVED		_L4_CACHEABILITY(2)

This definition is unnecessary and unused.

> +#define L4_3_UC			_L4_CACHEABILITY(3)
> +
>  #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
>  	[__idx] = { \
>  		.control_value = __control_value, \
> @@ -416,6 +427,57 @@ static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
>  	MOCS_ENTRY(2, 0, L3_3_WB),
>  };
>  
> +static const struct drm_i915_mocs_entry mtl_mocs_table[] = {

The entries below with WB L4 cache don't include L4_0_WB.  That works
out okay since the value is 0, but it seems like it would still be worth
including it, just to make the table meaning more clear.

Conversely, I'd drop the "IG_PAT(0)" on entry 0 since that's just a
simple boolean flag, so the "0" there is easier to misread than just not
including it at all.

> +	/* Error - Reserved for Non-Use */
> +	MOCS_ENTRY(0,
> +		   IG_PAT(0),
> +		   L3_LKUP(1) | L3_3_WB),
> +	/* Cached - L3 + L4 */
> +	MOCS_ENTRY(1,
> +		   IG_PAT(1),
> +		   L3_LKUP(1) | L3_3_WB),
> +	/* L4 - GO:L3 */
> +	MOCS_ENTRY(2,
> +		   IG_PAT(1),
> +		   L3_LKUP(1) | L3_1_UC),
> +	/* Uncached - GO:L3 */
> +	MOCS_ENTRY(3,
> +		   IG_PAT(1) | L4_3_UC,
> +		   L3_LKUP(1) | L3_1_UC),
> +	/* L4 - GO:Mem */
> +	MOCS_ENTRY(4,
> +		   IG_PAT(1),
> +		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
> +	/* Uncached - GO:Mem */
> +	MOCS_ENTRY(5,
> +		   IG_PAT(1) | L4_3_UC,
> +		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
> +	/* L4 - L3:NoLKUP; GO:L3 */
> +	MOCS_ENTRY(6,
> +		   IG_PAT(1),
> +		   L3_1_UC),
> +	/* Uncached - L3:NoLKUP; GO:L3 */
> +	MOCS_ENTRY(7,
> +		   IG_PAT(1) | L4_3_UC,
> +		   L3_1_UC),
> +	/* L4 - L3:NoLKUP; GO:Mem */
> +	MOCS_ENTRY(8,
> +		   IG_PAT(1),
> +		   L3_GLBGO(1) | L3_1_UC),
> +	/* Uncached - L3:NoLKUP; GO:Mem */
> +	MOCS_ENTRY(9,
> +		   IG_PAT(1) | L4_3_UC,
> +		   L3_GLBGO(1) | L3_1_UC),
> +	/* Display - L3; L4:WT */
> +	MOCS_ENTRY(14,
> +		   IG_PAT(1) | L4_1_WT,
> +		   L3_LKUP(1) | L3_3_WB),
> +	/* CCS - Non-Displayable */
> +	MOCS_ENTRY(15,
> +		   IG_PAT(1),
> +		   L3_GLBGO(1) | L3_1_UC),
> +};
> +
>  enum {
>  	HAS_GLOBAL_MOCS = BIT(0),
>  	HAS_ENGINE_MOCS = BIT(1),
> @@ -445,7 +507,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
>  	memset(table, 0, sizeof(struct drm_i915_mocs_table));
>  
>  	table->unused_entries_index = I915_MOCS_PTE;
> -	if (IS_PONTEVECCHIO(i915)) {
> +	if (IS_METEORLAKE(i915)) {
> +		table->size = ARRAY_SIZE(mtl_mocs_table);
> +		table->table = mtl_mocs_table;
> +		table->n_entries = MTL_NUM_MOCS_ENTRIES;
> +		table->uc_index = 9;
> +		table->unused_entries_index = 1;
> +	} else if (IS_PONTEVECCHIO(i915)) {
>  		table->size = ARRAY_SIZE(pvc_mocs_table);
>  		table->table = pvc_mocs_table;
>  		table->n_entries = PVC_NUM_MOCS_ENTRIES;
> @@ -646,9 +714,9 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>  		init_l3cc_table(engine->gt, &table);
>  }
>  
> -static u32 global_mocs_offset(void)
> +static u32 global_mocs_offset(struct intel_gt *gt)
>  {
> -	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
> +	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)) + gt->uncore->gsi_offset;

Isn't this going to double-apply the GSI offset in __init_mocs_table?

We do need to handle the offset manually in read_mocs_table() since
those reads are done with an MI_STORE_REGISTER_MEM instruction.

Handling the GSI offset properly deserves its own patch with a dedicated
commit message explaining the details.

>  }
>  
>  void intel_set_mocs_index(struct intel_gt *gt)
> @@ -671,7 +739,7 @@ void intel_mocs_init(struct intel_gt *gt)
>  	 */
>  	flags = get_mocs_settings(gt->i915, &table);
>  	if (flags & HAS_GLOBAL_MOCS)
> -		__init_mocs_table(gt->uncore, &table, global_mocs_offset());
> +		__init_mocs_table(gt->uncore, &table, global_mocs_offset(gt));
>  
>  	/*
>  	 * Initialize the L3CC table as part of mocs initalization to make
> diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
> index ca009a6a13bd..730796346514 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
> @@ -137,7 +137,7 @@ static int read_mocs_table(struct i915_request *rq,
>  		return 0;
>  
>  	if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
> -		addr = global_mocs_offset();
> +		addr = global_mocs_offset(rq->engine->gt);
>  	else
>  		addr = mocs_offset(rq->engine);
>  
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index cddb6e197972..025d32c0b161 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1146,6 +1146,7 @@ static const struct intel_device_info mtl_info = {
>  	.has_flat_ccs = 0,
>  	.has_gmd_id = 1,
>  	.has_guc_deprivilege = 1,
> +	.has_llc = 0,

This should also be in its own patch since it isn't directly related to
the MOCS / PAT tables.


Matt

>  	.has_mslice_steering = 0,
>  	.has_snoop = 1,
>  	.__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,
> -- 
> 2.25.1
>
Yang, Fei April 11, 2023, 3:55 a.m. UTC | #2
>Subject: Re: [Intel-gfx] [PATCH 1/8] drm/i915/mtl: Define MOCS and PAT tables for MTL
>
> On Fri, Apr 07, 2023 at 12:12:29AM -0700, fei.yang@intel.com<mailto:fei.yang@intel.com> wrote:
>> From: Fei Yang fei.yang@intel.com<mailto:fei.yang@intel.com>
>>
>> On MTL, GT can no longer allocate on LLC - only the CPU can.
>> This, along with addition of support for ADM/L4 cache calls a MOCS/PAT
>> table update. Also defines PTE encode functions for MTL as it has
>> different PAT index definition than previous platforms.
>
> It might be best to keep the PTE encoding as a separate patch from the
> MOCS/PAT tables.  It's a different enough topic that it probably deserves
> a patch of its own.

Will update in the next revision.

>>
>> BSpec: 44509, 45101, 44235
>>
>> Cc: Matt Roper matthew.d.roper@intel.com<mailto:matthew.d.roper@intel.com>
>> Cc: Lucas De Marchi lucas.demarchi@intel.com<mailto:lucas.demarchi@intel.com>
>> Signed-off-by: Madhumitha Tolakanahalli Pradeep
>> madhumitha.tolakanahalli.pradeep@intel.com<mailto:madhumitha.tolakanahalli.pradeep@intel.com>
>> Signed-off-by: Aravind Iddamsetty aravind.iddamsetty@intel.com<mailto:aravind.iddamsetty@intel.com>
>> Signed-off-by: Fei Yang fei.yang@intel.com<mailto:fei.yang@intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c    | 28 +++++++++
>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h    |  3 +
>>  drivers/gpu/drm/i915/gt/intel_ggtt.c    | 27 +++++++++
>>  drivers/gpu/drm/i915/gt/intel_gtt.c     | 23 +++++++-
>>  drivers/gpu/drm/i915/gt/intel_gtt.h     | 20 ++++++-
>>  drivers/gpu/drm/i915/gt/intel_mocs.c    | 76 +++++++++++++++++++++++--
>>  drivers/gpu/drm/i915/gt/selftest_mocs.c |  2 +-
>>  drivers/gpu/drm/i915/i915_pci.c         |  1 +
>>  8 files changed, 173 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> index 4daaa6f55668..df4073d32114 100644
>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
>> @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
>>          return pte;
>>  }
>>
>> +static u64 mtl_pte_encode(dma_addr_t addr,
>> +                                       enum i915_cache_level level,
>> +                                       u32 flags)
>> +{
>> +       gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
>> +
>> +       if (unlikely(flags & PTE_READ_ONLY))
>> +                      pte &= ~GEN8_PAGE_RW;
>> +
>> +       if (flags & PTE_LM)
>> +                      pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
>> +
>> +       switch (level) {
>> +       case I915_CACHE_NONE:
>> +                      pte |= GEN12_PPGTT_PTE_PAT1;
>> +                      break;
>> +       case I915_CACHE_LLC:
>> +       case I915_CACHE_L3_LLC:
>> +                      pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
>> +                      break;
>> +       case I915_CACHE_WT:
>> +                      pte |= GEN12_PPGTT_PTE_PAT0;
>> +                      break;
>> +       }
>> +
>> +       return pte;
>> +}
>> +
>>  static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool
>> create)  {
>>          struct drm_i915_private *i915 = ppgtt->vm.i915; diff --git
>> a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> index f541d19264b4..6b8ce7f4d25a 100644
>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> @@ -18,5 +18,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt
>> *gt,
>>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>                                         enum i915_cache_level level,
>>                                         u32 flags);
>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>> +                                     unsigned int pat_index,
>> +                                     u32 flags);
>>
>>  #endif
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index 3c7f1ed92f5b..4a16bfcde1de 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -220,6 +220,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>>          }
>>  }
>>
>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>> +                                     enum i915_cache_level level,
>> +                                     u32 flags)
>> +{
>> +       gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
>> +
>> +       GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
>> +
>> +       if (flags & PTE_LM)
>> +                      pte |= GEN12_GGTT_PTE_LM;
>> +
>> +       switch (level) {
>> +       case I915_CACHE_NONE:
>> +                      pte |= MTL_GGTT_PTE_PAT1;
>> +                      break;
>> +       case I915_CACHE_LLC:
>> +       case I915_CACHE_L3_LLC:
>> +                      pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
>> +                      break;
>> +       case I915_CACHE_WT:
>> +                      pte |= MTL_GGTT_PTE_PAT0;
>> +                      break;
>> +       }
>> +
>> +       return pte;
>> +}
>> +
>>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>                                         enum i915_cache_level level,
>>                                         u32 flags)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c
>> b/drivers/gpu/drm/i915/gt/intel_gtt.c
>> index 4f436ba7a3c8..1e1b34e22cf5 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
>> @@ -468,6 +468,25 @@ void gtt_write_workarounds(struct intel_gt *gt)
>>          }
>>  }
>>
>> +static void mtl_setup_private_ppat(struct intel_uncore *uncore) {
>> +       intel_uncore_write(uncore, GEN12_PAT_INDEX(0),
>> +                                        MTL_PPAT_L4_0_WB);
>> +       intel_uncore_write(uncore, GEN12_PAT_INDEX(1),
>> +                                        MTL_PPAT_L4_1_WT);
>> +       intel_uncore_write(uncore, GEN12_PAT_INDEX(2),
>> +                                        MTL_PPAT_L4_3_UC);
>> +       intel_uncore_write(uncore, GEN12_PAT_INDEX(3),
>> +                                        MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
>> +       intel_uncore_write(uncore, GEN12_PAT_INDEX(4),
>> +                                        MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
>
> These registers are multicast registers on the primary GT.  We need
> to use XEHP_PAT_INDEX and the MCR-aware functions on the primary GT.

The multicast change has been merged to a different branch and not
validated extensively on MTL so far. Will post to mailing list after
proper validation.

> Although we're not programming the higher PAT entries on MTL (yet),
> there's also a jump in the MMIO offsets that we should incorporate
> into the PAT_INDEX macro to future-proof the code.
>
>> +
>> +       /*
>> +       * Remaining PAT entries are left at the hardware-default
>> +       * fully-cached setting
>> +       */
>> +}
>> +
>>  static void tgl_setup_private_ppat(struct intel_uncore *uncore)  {
>>          /* TGL doesn't support LLC or AGE settings */ @@ -603,7 +622,9 @@
>> void setup_private_pat(struct intel_gt *gt)
>>
>>          GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
>>
>> -        if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
>> +       if (IS_METEORLAKE(i915))
>> +                      mtl_setup_private_ppat(uncore);
>
> We need to handle the primary GT and media GT separately since the
> media GT registers are unicast whereas primary are multicast.

Same as above.

>See
> https://gitlab.freedesktop.org/drm/xe/kernel/-/blob/drm-xe-next/drivers/gpu/drm/xe/xe_pat.c
> for how this was handled in the Xe driver.
>
>> +       else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
>>                         xehp_setup_private_ppat(gt);
>>          else if (GRAPHICS_VER(i915) >= 12)
>>                         tgl_setup_private_ppat(uncore);
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
>> b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> index 69ce55f517f5..b632167eaf2e 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES             REG_BIT(2)
>>  #define BYT_PTE_WRITEABLE                            REG_BIT(1)
>>
>> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>>  #define GEN12_PPGTT_PTE_LM          BIT_ULL(11)
>> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
>> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
>> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
>> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
>
> Which bspec page is this from?  The PPGTT descriptions in
> the bspec are kind of hard to track down.

https://gfxspecs.intel.com/Predator/Home/Index/53521
PAT_Index[2:0] = {PAT, PCD, PWT} = BIT(7, 4, 3)
PAT_Index[3] = BIT(62)
PAT_Index[4] = BIT(61)

> But if these only apply to MTL, why are they labelled as "GEN12?"

These apply to GEN12plus.

>>
>> -#define GEN12_GGTT_PTE_LM           BIT_ULL(1)
>> +#define GEN12_GGTT_PTE_LM                         BIT_ULL(1)
>> +#define MTL_GGTT_PTE_PAT0                          BIT_ULL(52)
>> +#define MTL_GGTT_PTE_PAT1                          BIT_ULL(53)
>> +#define GEN12_GGTT_PTE_ADDR_MASK       GENMASK_ULL(45, 12)
>> +#define MTL_GGTT_PTE_PAT_MASK                               GENMASK_ULL(53, 52)
>>
>>  #define GEN12_PDE_64K BIT(6)
>>  #define GEN12_PTE_PS64 BIT(8)
>> @@ -147,6 +156,15 @@ typedef u64 gen8_pte_t;  #define GEN8_PDE_IPS_64K
>> BIT(11)
>>  #define GEN8_PDE_PS_2M   BIT(7)
>>
>> +#define MTL_PPAT_L4_CACHE_POLICY_MASK             REG_GENMASK(3, 2)
>> +#define MTL_PAT_INDEX_COH_MODE_MASK              REG_GENMASK(1, 0)
>> +#define MTL_PPAT_L4_3_UC               REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
>> +#define MTL_PPAT_L4_1_WT               REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
>> +#define MTL_PPAT_L4_0_WB               REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
>> +#define MTL_3_COH_2W     REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
>> +#define MTL_2_COH_1W     REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
>> +#define MTL_0_COH_NON   REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
>
>The values for these definitions don't seem to be aligned.

These are aligned with https://gfxspecs.intel.com/Predator/Home/Index/45101

>> +
>>  enum i915_cache_level;
>>
>>  struct drm_i915_gem_object;
>> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c
>> b/drivers/gpu/drm/i915/gt/intel_mocs.c
>> index 69b489e8dfed..89570f137b2c 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
>> @@ -40,6 +40,10 @@ struct drm_i915_mocs_table {
>>  #define LE_COS(value)                           ((value) << 15)
>>  #define LE_SSE(value)                            ((value) << 17)
>>
>> +/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
>> +#define _L4_CACHEABILITY(value)    ((value) << 2)
>
> We should use REG_GENMASK + REG_FIELD_PREP for new code, which will help
> ensure that we don't try to pack a value into a field that's too big to fit.
>
>> +#define IG_PAT(value)                          ((value) << 8)

This is used in defining MOCS table. I think it's easier to read with IG_PAT(0)
and IG_PAT(1), than with and without IG_PAT.

> Do we need this one parameterized?  It's just a boolean flag, so
>
>        #define IG_PAT          REG_BIT(8)
>
> would be fine (and then don't include the flag on the entries where it isn't set.
>
>> +
>>  /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
>>  #define L3_ESC(value)                            ((value) << 0)
>>  #define L3_SCC(value)                           ((value) << 1)
>> @@ -50,6 +54,7 @@ struct drm_i915_mocs_table {
>>  /* Helper defines */
>>  #define GEN9_NUM_MOCS_ENTRIES               64  /* 63-64 are reserved, but configured. */
>>  #define PVC_NUM_MOCS_ENTRIES   3
>> +#define MTL_NUM_MOCS_ENTRIES 16
>>
>>  /* (e)LLC caching options */
>>  /*
>> @@ -73,6 +78,12 @@ struct drm_i915_mocs_table {
>>  #define L3_2_RESERVED                       _L3_CACHEABILITY(2)
>>  #define L3_3_WB                                    _L3_CACHEABILITY(3)
>>
>> +/* L4 caching options */
>> +#define L4_0_WB                                  _L4_CACHEABILITY(0)
>> +#define L4_1_WT                                   _L4_CACHEABILITY(1)
>> +#define L4_2_RESERVED                      _L4_CACHEABILITY(2)
>
> This definition is unnecessary and unused.
>
>> +#define L4_3_UC                                   _L4_CACHEABILITY(3)
>> +
>>  #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
>>          [__idx] = { \
>>                         .control_value = __control_value, \ @@ -416,6 +427,57 @@ static
>> const struct drm_i915_mocs_entry pvc_mocs_table[] = {
>>          MOCS_ENTRY(2, 0, L3_3_WB),
>>  };
>>
>> +static const struct drm_i915_mocs_entry mtl_mocs_table[] = {
>
> The entries below with WB L4 cache don't include L4_0_WB.  That works
> out okay since the value is 0, but it seems like it would still be worth
> including it, just to make the table meaning more clear.
>
> Conversely, I'd drop the "IG_PAT(0)" on entry 0 since that's just a simple
> boolean flag, so the "0" there is easier to misread than just not including
> it at all.
>
>> +       /* Error - Reserved for Non-Use */
>> +       MOCS_ENTRY(0,
>> +                         IG_PAT(0),
>> +                         L3_LKUP(1) | L3_3_WB),
>> +       /* Cached - L3 + L4 */
>> +       MOCS_ENTRY(1,
>> +                         IG_PAT(1),
>> +                         L3_LKUP(1) | L3_3_WB),
>> +       /* L4 - GO:L3 */
>> +       MOCS_ENTRY(2,
>> +                         IG_PAT(1),
>> +                         L3_LKUP(1) | L3_1_UC),
>> +       /* Uncached - GO:L3 */
>> +       MOCS_ENTRY(3,
>> +                         IG_PAT(1) | L4_3_UC,
>> +                         L3_LKUP(1) | L3_1_UC),
>> +       /* L4 - GO:Mem */
>> +       MOCS_ENTRY(4,
>> +                         IG_PAT(1),
>> +                         L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
>> +       /* Uncached - GO:Mem */
>> +       MOCS_ENTRY(5,
>> +                         IG_PAT(1) | L4_3_UC,
>> +                         L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
>> +       /* L4 - L3:NoLKUP; GO:L3 */
>> +       MOCS_ENTRY(6,
>> +                         IG_PAT(1),
>> +                         L3_1_UC),
>> +       /* Uncached - L3:NoLKUP; GO:L3 */
>> +       MOCS_ENTRY(7,
>> +                         IG_PAT(1) | L4_3_UC,
>> +                         L3_1_UC),
>> +       /* L4 - L3:NoLKUP; GO:Mem */
>> +       MOCS_ENTRY(8,
>> +                         IG_PAT(1),
>> +                         L3_GLBGO(1) | L3_1_UC),
>> +       /* Uncached - L3:NoLKUP; GO:Mem */
>> +       MOCS_ENTRY(9,
>> +                         IG_PAT(1) | L4_3_UC,
>> +                         L3_GLBGO(1) | L3_1_UC),
>> +       /* Display - L3; L4:WT */
>> +       MOCS_ENTRY(14,
>> +                         IG_PAT(1) | L4_1_WT,
>> +                         L3_LKUP(1) | L3_3_WB),
>> +       /* CCS - Non-Displayable */
>> +       MOCS_ENTRY(15,
>> +                         IG_PAT(1),
>> +                         L3_GLBGO(1) | L3_1_UC),
>> +};
>> +
>>  enum {
>>          HAS_GLOBAL_MOCS = BIT(0),
>>          HAS_ENGINE_MOCS = BIT(1),
>> @@ -445,7 +507,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
>>          memset(table, 0, sizeof(struct drm_i915_mocs_table));
>>
>>          table->unused_entries_index = I915_MOCS_PTE;
>> -        if (IS_PONTEVECCHIO(i915)) {
>> +       if (IS_METEORLAKE(i915)) {
>> +                      table->size = ARRAY_SIZE(mtl_mocs_table);
>> +                      table->table = mtl_mocs_table;
>> +                      table->n_entries = MTL_NUM_MOCS_ENTRIES;
>> +                      table->uc_index = 9;
>> +                      table->unused_entries_index = 1;
>> +       } else if (IS_PONTEVECCHIO(i915)) {
>>                         table->size = ARRAY_SIZE(pvc_mocs_table);
>>                         table->table = pvc_mocs_table;
>>                         table->n_entries = PVC_NUM_MOCS_ENTRIES; @@ -646,9 +714,9 @@ void
>> intel_mocs_init_engine(struct intel_engine_cs *engine)
>>                         init_l3cc_table(engine->gt, &table);  }
>>
>> -static u32 global_mocs_offset(void)
>> +static u32 global_mocs_offset(struct intel_gt *gt)
>>  {
>> -        return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
>> +       return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)) +
>> +gt->uncore->gsi_offset;
>
> Isn't this going to double-apply the GSI offset in __init_mocs_table?

Good point. Not sure if there are test cases to validate this, but do need
to double check.

> We do need to handle the offset manually in read_mocs_table() since
> those reads are done with an MI_STORE_REGISTER_MEM instruction.
>
> Handling the GSI offset properly deserves its own patch with a dedicated
> commit message explaining the details.
>
>>  }
>>
>>  void intel_set_mocs_index(struct intel_gt *gt) @@ -671,7 +739,7 @@
>> void intel_mocs_init(struct intel_gt *gt)
>>           */
>>          flags = get_mocs_settings(gt->i915, &table);
>>          if (flags & HAS_GLOBAL_MOCS)
>> -                       __init_mocs_table(gt->uncore, &table, global_mocs_offset());
>> +                      __init_mocs_table(gt->uncore, &table, global_mocs_offset(gt));
>>
>>          /*
>>           * Initialize the L3CC table as part of mocs initalization to make
>> diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c
>> b/drivers/gpu/drm/i915/gt/selftest_mocs.c
>> index ca009a6a13bd..730796346514 100644
>> --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
>> +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
>> @@ -137,7 +137,7 @@ static int read_mocs_table(struct i915_request *rq,
>>                         return 0;
>>
>>          if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
>> -                       addr = global_mocs_offset();
>> +                      addr = global_mocs_offset(rq->engine->gt);
>>          else
>>                         addr = mocs_offset(rq->engine);
>>
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c
>> b/drivers/gpu/drm/i915/i915_pci.c index cddb6e197972..025d32c0b161
>> 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -1146,6 +1146,7 @@ static const struct intel_device_info mtl_info = {
>>          .has_flat_ccs = 0,
>>          .has_gmd_id = 1,
>>          .has_guc_deprivilege = 1,
>> +       .has_llc = 0,
>
> This should also be in its own patch since it isn't directly related to the
> MOCS / PAT tables.

Will update in the next revision.

> Matt
>
>>          .has_mslice_steering = 0,
>>          .has_snoop = 1,
>>          .__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,
>> --
>> 2.25.1
>>
>
> --
> Matt Roper
> Graphics Software Engineer
> Linux GPU Platform Enablement
> Intel Corporation
>
Matt Roper April 11, 2023, 7:28 p.m. UTC | #3
On Mon, Apr 10, 2023 at 08:55:16PM -0700, Yang, Fei wrote:
...
>    >> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
> 
>    >> b/drivers/gpu/drm/i915/gt/intel_gtt.h
> 
>    >> index 69ce55f517f5..b632167eaf2e 100644
> 
>    >> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> 
>    >> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> 
>    >> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
> 
>    >>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES             REG_BIT(2)
> 
>    >>  #define BYT_PTE_WRITEABLE                            REG_BIT(1)
> 
>    >> 
> 
>    >> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
> 
>    >>  #define GEN12_PPGTT_PTE_LM          BIT_ULL(11)
> 
>    >> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
> 
>    >> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
> 
>    >> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
> 
>    >> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
> 
>    > 
> 
>    > Which bspec page is this from?  The PPGTT descriptions in
> 
>    > the bspec are kind of hard to track down.
> 
>     
> 
>    [9]https://gfxspecs.intel.com/Predator/Home/Index/53521

The bspec tagging is a bit bizarre in this area, but I don't believe
this page is intended to apply to MTL.  Note that this page is inside a
section specifically listed as "57b VA Support" --- i.e., this general
section is for platforms like PVC rather than MTL.  MTL only has 48b
virtual address space (bspec 55416), so I think one of the pages in the
48b sections is what we should be referencing instead.

If they screwed up and put MTL-specific details only on a PVC-specific
page of the bspec, we should probably file a bspec issue about that to
get it fixed.

> 
>    PAT_Index[2:0] = {PAT, PCD, PWT} = BIT(7, 4, 3)
> 
>    PAT_Index[3] = BIT(62)
> 
>    PAT_Index[4] = BIT(61)
> 
>     
> 
>    > But if these only apply to MTL, why are they labelled as "GEN12?"
> 
>     
> 
>    These apply to GEN12plus.

That's not what your patch is doing; you're using them in a function
that only gets called on MTL.  So the question is whether these
definitions truly applied to older platforms like TGL/RKL/ADL/etc too
(and we need to go back and fix that code), or whether they're
Xe_LPG-specific, in which case the "GEN12_" prefix is incorrect.

Also, handling the MTL-specific PTE encoding later in the series, after
the transition from cache_level to PAT index, might be best since then
you can just implement it correctly at the time the code is introduced;
no need to add the cache_level implementation first (which can't even
use all the bits) just to come back a few patches later and replace it
all with PAT code.

> 
>     
> 
>    >> 
> 
>    >> -#define GEN12_GGTT_PTE_LM           BIT_ULL(1)
> 
>    >> +#define GEN12_GGTT_PTE_LM                         BIT_ULL(1)
> 
>    >> +#define MTL_GGTT_PTE_PAT0                          BIT_ULL(52)
> 
>    >> +#define MTL_GGTT_PTE_PAT1                          BIT_ULL(53)
> 
>    >> +#define GEN12_GGTT_PTE_ADDR_MASK       GENMASK_ULL(45, 12)
> 
>    >> +#define MTL_GGTT_PTE_PAT_MASK                              
>    GENMASK_ULL(53, 52)
> 
>    >> 
> 
>    >>  #define GEN12_PDE_64K BIT(6)
> 
>    >>  #define GEN12_PTE_PS64 BIT(8)
> 
>    >> @@ -147,6 +156,15 @@ typedef u64 gen8_pte_t;  #define GEN8_PDE_IPS_64K
> 
>    >> BIT(11)
> 
>    >>  #define GEN8_PDE_PS_2M   BIT(7)
> 
>    >> 
> 
>    >> +#define MTL_PPAT_L4_CACHE_POLICY_MASK             REG_GENMASK(3, 2)
> 
>    >> +#define MTL_PAT_INDEX_COH_MODE_MASK              REG_GENMASK(1, 0)
> 
>    >> +#define MTL_PPAT_L4_3_UC              
>    REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
> 
>    >> +#define MTL_PPAT_L4_1_WT              
>    REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
> 
>    >> +#define MTL_PPAT_L4_0_WB              
>    REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
> 
>    >> +#define MTL_3_COH_2W     REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK,
>    3)
> 
>    >> +#define MTL_2_COH_1W     REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK,
>    2)
> 
>    >> +#define MTL_0_COH_NON   REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
> 
>    > 
> 
>    >The values for these definitions don't seem to be aligned.
> 
>     
> 
>    These are aligned with
>    [10]https://gfxspecs.intel.com/Predator/Home/Index/45101

I mean spacing aligned.  If your tabstops are set to 8, then the values
don't line up visually.


Matt
Yang, Fei April 11, 2023, 9:54 p.m. UTC | #4
> Subject: Re: [Intel-gfx] [PATCH 1/8] drm/i915/mtl: Define MOCS and PAT tables for MTL
>
> On Mon, Apr 10, 2023 at 08:55:16PM -0700, Yang, Fei wrote:
> ...
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h
>>
>>>> b/drivers/gpu/drm/i915/gt/intel_gtt.h
>>
>>>> index 69ce55f517f5..b632167eaf2e 100644
>>
>>>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>>
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>>
>>>> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>>
>>>>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES             REG_BIT(2)
>>
>>>>  #define BYT_PTE_WRITEABLE                            REG_BIT(1)
>>
>>>>
>>
>>>> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>>
>>>>  #define GEN12_PPGTT_PTE_LM          BIT_ULL(11)
>>
>>>> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
>>
>>>> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
>>
>>>> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
>>
>>>> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
>>
>>> Which bspec page is this from?  The PPGTT descriptions in
>>
>>> the bspec are kind of hard to track down.
>>
>>
>>
>>    [9]https://gfxspecs.intel.com/Predator/Home/Index/53521
>
> The bspec tagging is a bit bizarre in this area, but I don't believe
> this page is intended to apply to MTL. Note that this page is inside
> a section specifically listed as "57b VA Support" --- i.e., this
> general section is for platforms like PVC rather than MTL.  MTL only
> has 48b virtual address space (bspec 55416), so I think one of the
> pages in the 48b sections is what we should be referencing instead.
>
> If they screwed up and put MTL-specific details only on a PVC-specific
> page of the bspec, we should probably file a bspec issue about that to
> get it fixed.

The Bspec is a bit confusing on these. Looked at the Bsec with filter set
to TGL/ADL/MTL/ALL respectively. Here are the differences,
>>    PAT_Index[2:0] = {PAT, PCD, PWT} = BIT(7, 4, 3)

These 3 PAT index bits are defined for all gen12+.
>>    PAT_Index[3] = BIT(62)

PAT_Index[3] is defined for MTL/ARL, will update this one to MTL_xxx

>>    PAT_Index[4] = BIT(61)

PAT_Index[4] shows up only when there is no filter set. And this bit is
marked as [NOT VALID FOR SPEC: GENERALASSETSXE], not sure how to interpret
this, but seems like it should not be used at all. Any suggestion?

>>
>>
>>> But if these only apply to MTL, why are they labelled as "GEN12?"
>>
>>    These apply to GEN12plus.
>
> That's not what your patch is doing; you're using them in a function
> that only gets called on MTL.

That PTE encode will be generalized to gen12 in a patch after after the
pat_index change.

> So the question is whether these
> definitions truly applied to older platforms like TGL/RKL/ADL/etc too
> (and we need to go back and fix that code), or whether they're
> Xe_LPG-specific, in which case the "GEN12_" prefix is incorrect.

The only difference is that MTL has PAT[3] defined, so we can still apply
the same PTE encode function for all gen12+.

> Also, handling the MTL-specific PTE encoding later in the series, after
> the transition from cache_level to PAT index, might be best since then
> you can just implement it correctly at the time the code is introduced;
> no need to add the cache_level implementation first (which can't even
> use all the bits) just to come back a few patches later and replace it
> all with PAT code.

I will squash the PTE encode patches.

>>>> -#define GEN12_GGTT_PTE_LM           BIT_ULL(1)
>>>> +#define GEN12_GGTT_PTE_LM                         BIT_ULL(1)
>>>> +#define MTL_GGTT_PTE_PAT0                          BIT_ULL(52)
>>>> +#define MTL_GGTT_PTE_PAT1                          BIT_ULL(53)
>>>> +#define GEN12_GGTT_PTE_ADDR_MASK       GENMASK_ULL(45, 12)
>>>> +#define MTL_GGTT_PTE_PAT_MASK          GENMASK_ULL(53, 52)
>>>>
>>>>  #define GEN12_PDE_64K BIT(6)
>>>>  #define GEN12_PTE_PS64 BIT(8)
>>>> @@ -147,6 +156,15 @@ typedef u64 gen8_pte_t;  #define
>> GEN8_PDE_IPS_64K
>>
>>>> BIT(11)
>>
>>>>  #define GEN8_PDE_PS_2M   BIT(7)
>>
>>>> +#define MTL_PPAT_L4_CACHE_POLICY_MASK
>> REG_GENMASK(3, 2)
>>>> +#define MTL_PAT_INDEX_COH_MODE_MASK              REG_GENMASK(1,
>> 0)
>>>> +#define MTL_PPAT_L4_3_UC
>>    REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
>>>> +#define MTL_PPAT_L4_1_WT
>>    REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
>>>> +#define MTL_PPAT_L4_0_WB
>>    REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
>>>> +#define MTL_3_COH_2W     REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK,
>>    3)
>>>> +#define MTL_2_COH_1W     REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK,
>>    2)
>>>> +#define MTL_0_COH_NON
>> REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
>>>
>>> The values for these definitions don't seem to be aligned.
>>    These are aligned with
>>    [10]https://gfxspecs.intel.com/Predator/Home/Index/45101
> I mean spacing aligned.  If your tabstops are set to 8, then the values don't line up visually.

Hmm... the three COH macro's are aligned, are you saying they should aligned with those PPAT macro's as well?

>
> Matt
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4daaa6f55668..df4073d32114 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,6 +55,34 @@  static u64 gen8_pte_encode(dma_addr_t addr,
 	return pte;
 }
 
+static u64 mtl_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+	if (unlikely(flags & PTE_READ_ONLY))
+		pte &= ~GEN8_PAGE_RW;
+
+	if (flags & PTE_LM)
+		pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= GEN12_PPGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_WT:
+		pte |= GEN12_PPGTT_PTE_PAT0;
+		break;
+	}
+
+	return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
 	struct drm_i915_private *i915 = ppgtt->vm.i915;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
index f541d19264b4..6b8ce7f4d25a 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -18,5 +18,8 @@  struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 			 enum i915_cache_level level,
 			 u32 flags);
+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+			unsigned int pat_index,
+			u32 flags);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 3c7f1ed92f5b..4a16bfcde1de 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -220,6 +220,33 @@  static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 	}
 }
 
+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+			enum i915_cache_level level,
+			u32 flags)
+{
+	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
+
+	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
+
+	if (flags & PTE_LM)
+		pte |= GEN12_GGTT_PTE_LM;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= MTL_GGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_WT:
+		pte |= MTL_GGTT_PTE_PAT0;
+		break;
+	}
+
+	return pte;
+}
+
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 			 enum i915_cache_level level,
 			 u32 flags)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 4f436ba7a3c8..1e1b34e22cf5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -468,6 +468,25 @@  void gtt_write_workarounds(struct intel_gt *gt)
 	}
 }
 
+static void mtl_setup_private_ppat(struct intel_uncore *uncore)
+{
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(0),
+			   MTL_PPAT_L4_0_WB);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(1),
+			   MTL_PPAT_L4_1_WT);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(2),
+			   MTL_PPAT_L4_3_UC);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(3),
+			   MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
+	intel_uncore_write(uncore, GEN12_PAT_INDEX(4),
+			   MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
+
+	/*
+	 * Remaining PAT entries are left at the hardware-default
+	 * fully-cached setting
+	 */
+}
+
 static void tgl_setup_private_ppat(struct intel_uncore *uncore)
 {
 	/* TGL doesn't support LLC or AGE settings */
@@ -603,7 +622,9 @@  void setup_private_pat(struct intel_gt *gt)
 
 	GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
 
-	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+	if (IS_METEORLAKE(i915))
+		mtl_setup_private_ppat(uncore);
+	else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
 		xehp_setup_private_ppat(gt);
 	else if (GRAPHICS_VER(i915) >= 12)
 		tgl_setup_private_ppat(uncore);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 69ce55f517f5..b632167eaf2e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -88,9 +88,18 @@  typedef u64 gen8_pte_t;
 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
 #define BYT_PTE_WRITEABLE		REG_BIT(1)
 
+#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
 #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
+#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
+#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
+#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
+#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
 
-#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
+#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
+#define MTL_GGTT_PTE_PAT0		BIT_ULL(52)
+#define MTL_GGTT_PTE_PAT1		BIT_ULL(53)
+#define GEN12_GGTT_PTE_ADDR_MASK	GENMASK_ULL(45, 12)
+#define MTL_GGTT_PTE_PAT_MASK		GENMASK_ULL(53, 52)
 
 #define GEN12_PDE_64K BIT(6)
 #define GEN12_PTE_PS64 BIT(8)
@@ -147,6 +156,15 @@  typedef u64 gen8_pte_t;
 #define GEN8_PDE_IPS_64K BIT(11)
 #define GEN8_PDE_PS_2M   BIT(7)
 
+#define MTL_PPAT_L4_CACHE_POLICY_MASK	REG_GENMASK(3, 2)
+#define MTL_PAT_INDEX_COH_MODE_MASK	REG_GENMASK(1, 0)
+#define MTL_PPAT_L4_3_UC	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 3)
+#define MTL_PPAT_L4_1_WT	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 1)
+#define MTL_PPAT_L4_0_WB	REG_FIELD_PREP(MTL_PPAT_L4_CACHE_POLICY_MASK, 0)
+#define MTL_3_COH_2W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 3)
+#define MTL_2_COH_1W	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 2)
+#define MTL_0_COH_NON	REG_FIELD_PREP(MTL_PAT_INDEX_COH_MODE_MASK, 0)
+
 enum i915_cache_level;
 
 struct drm_i915_gem_object;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 69b489e8dfed..89570f137b2c 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -40,6 +40,10 @@  struct drm_i915_mocs_table {
 #define LE_COS(value)		((value) << 15)
 #define LE_SSE(value)		((value) << 17)
 
+/* Defines for the tables (GLOB_MOCS_0 - GLOB_MOCS_16) */
+#define _L4_CACHEABILITY(value)	((value) << 2)
+#define IG_PAT(value)		((value) << 8)
+
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)		((value) << 0)
 #define L3_SCC(value)		((value) << 1)
@@ -50,6 +54,7 @@  struct drm_i915_mocs_table {
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
 #define PVC_NUM_MOCS_ENTRIES	3
+#define MTL_NUM_MOCS_ENTRIES	16
 
 /* (e)LLC caching options */
 /*
@@ -73,6 +78,12 @@  struct drm_i915_mocs_table {
 #define L3_2_RESERVED		_L3_CACHEABILITY(2)
 #define L3_3_WB			_L3_CACHEABILITY(3)
 
+/* L4 caching options */
+#define L4_0_WB			_L4_CACHEABILITY(0)
+#define L4_1_WT			_L4_CACHEABILITY(1)
+#define L4_2_RESERVED		_L4_CACHEABILITY(2)
+#define L4_3_UC			_L4_CACHEABILITY(3)
+
 #define MOCS_ENTRY(__idx, __control_value, __l3cc_value) \
 	[__idx] = { \
 		.control_value = __control_value, \
@@ -416,6 +427,57 @@  static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
 	MOCS_ENTRY(2, 0, L3_3_WB),
 };
 
+static const struct drm_i915_mocs_entry mtl_mocs_table[] = {
+	/* Error - Reserved for Non-Use */
+	MOCS_ENTRY(0,
+		   IG_PAT(0),
+		   L3_LKUP(1) | L3_3_WB),
+	/* Cached - L3 + L4 */
+	MOCS_ENTRY(1,
+		   IG_PAT(1),
+		   L3_LKUP(1) | L3_3_WB),
+	/* L4 - GO:L3 */
+	MOCS_ENTRY(2,
+		   IG_PAT(1),
+		   L3_LKUP(1) | L3_1_UC),
+	/* Uncached - GO:L3 */
+	MOCS_ENTRY(3,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_LKUP(1) | L3_1_UC),
+	/* L4 - GO:Mem */
+	MOCS_ENTRY(4,
+		   IG_PAT(1),
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - GO:Mem */
+	MOCS_ENTRY(5,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_LKUP(1) | L3_GLBGO(1) | L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(6,
+		   IG_PAT(1),
+		   L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:L3 */
+	MOCS_ENTRY(7,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_1_UC),
+	/* L4 - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(8,
+		   IG_PAT(1),
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Uncached - L3:NoLKUP; GO:Mem */
+	MOCS_ENTRY(9,
+		   IG_PAT(1) | L4_3_UC,
+		   L3_GLBGO(1) | L3_1_UC),
+	/* Display - L3; L4:WT */
+	MOCS_ENTRY(14,
+		   IG_PAT(1) | L4_1_WT,
+		   L3_LKUP(1) | L3_3_WB),
+	/* CCS - Non-Displayable */
+	MOCS_ENTRY(15,
+		   IG_PAT(1),
+		   L3_GLBGO(1) | L3_1_UC),
+};
+
 enum {
 	HAS_GLOBAL_MOCS = BIT(0),
 	HAS_ENGINE_MOCS = BIT(1),
@@ -445,7 +507,13 @@  static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
 	memset(table, 0, sizeof(struct drm_i915_mocs_table));
 
 	table->unused_entries_index = I915_MOCS_PTE;
-	if (IS_PONTEVECCHIO(i915)) {
+	if (IS_METEORLAKE(i915)) {
+		table->size = ARRAY_SIZE(mtl_mocs_table);
+		table->table = mtl_mocs_table;
+		table->n_entries = MTL_NUM_MOCS_ENTRIES;
+		table->uc_index = 9;
+		table->unused_entries_index = 1;
+	} else if (IS_PONTEVECCHIO(i915)) {
 		table->size = ARRAY_SIZE(pvc_mocs_table);
 		table->table = pvc_mocs_table;
 		table->n_entries = PVC_NUM_MOCS_ENTRIES;
@@ -646,9 +714,9 @@  void intel_mocs_init_engine(struct intel_engine_cs *engine)
 		init_l3cc_table(engine->gt, &table);
 }
 
-static u32 global_mocs_offset(void)
+static u32 global_mocs_offset(struct intel_gt *gt)
 {
-	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
+	return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)) + gt->uncore->gsi_offset;
 }
 
 void intel_set_mocs_index(struct intel_gt *gt)
@@ -671,7 +739,7 @@  void intel_mocs_init(struct intel_gt *gt)
 	 */
 	flags = get_mocs_settings(gt->i915, &table);
 	if (flags & HAS_GLOBAL_MOCS)
-		__init_mocs_table(gt->uncore, &table, global_mocs_offset());
+		__init_mocs_table(gt->uncore, &table, global_mocs_offset(gt));
 
 	/*
 	 * Initialize the L3CC table as part of mocs initalization to make
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index ca009a6a13bd..730796346514 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -137,7 +137,7 @@  static int read_mocs_table(struct i915_request *rq,
 		return 0;
 
 	if (HAS_GLOBAL_MOCS_REGISTERS(rq->engine->i915))
-		addr = global_mocs_offset();
+		addr = global_mocs_offset(rq->engine->gt);
 	else
 		addr = mocs_offset(rq->engine);
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index cddb6e197972..025d32c0b161 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1146,6 +1146,7 @@  static const struct intel_device_info mtl_info = {
 	.has_flat_ccs = 0,
 	.has_gmd_id = 1,
 	.has_guc_deprivilege = 1,
+	.has_llc = 0,
 	.has_mslice_steering = 0,
 	.has_snoop = 1,
 	.__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,