diff mbox series

[3/4] drm/i915/mtl: Define new PTE encode for MTL

Message ID 6f0bec943061dfa4604e3c479f44e98169ebd082.1670311877.git.aravind.iddamsetty@intel.com (mailing list archive)
State New, archived
Headers show
Series [1/4] drm/i915/mtl: Define MOCS and PAT tables for MTL | expand

Commit Message

Iddamsetty, Aravind Dec. 6, 2022, 7:37 a.m. UTC
Add a separate PTE encode function for MTL. The number of PAT registers
have increased to 16 on MTL. All 16 PAT registers are available for
PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
pages.

BSPEC: 63884

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Co-developed-by: Fei Yang <fei.yang@intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
 4 files changed, 78 insertions(+), 4 deletions(-)

Comments

Matt Roper Dec. 6, 2022, 11:39 p.m. UTC | #1
On Tue, Dec 06, 2022 at 01:07:28PM +0530, Aravind Iddamsetty wrote:
> Add a separate PTE encode function for MTL. The number of PAT registers
> have increased to 16 on MTL. All 16 PAT registers are available for
> PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
> pages.
> 
> BSPEC: 63884

I think you'll also want to include pages like 45015 (ggtt) and its
various equivalents for ppgtt since that's where the important layout
information is given.  And likely 63019 as well.

> 
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Co-developed-by: Fei Yang <fei.yang@intel.com>
> Signed-off-by: Fei Yang <fei.yang@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
>  drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
>  4 files changed, 78 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 31e838eee2ef..4197b43150cc 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
>  	return pte;
>  }
>  
> +static u64 mtl_pte_encode(dma_addr_t addr,
> +			  enum i915_cache_level level,
> +			  u32 flags)
> +{
> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
> +
> +	if (unlikely(flags & PTE_READ_ONLY))
> +		pte &= ~GEN8_PAGE_RW;
> +
> +	if (flags & PTE_LM)
> +		pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;

What is the GEN12_PPGTT_PTE_NC?  The bspec is a bit confusing since
there are several different PTE layouts for different ppgtt modes, but
the ones I checked had bit 5 listed as 'ignored' so I probably wasn't
looking in the right place (it's also listed as reserved on bspec
63019).

> +
> +	switch (level) {
> +	case I915_CACHE_NONE:
> +		pte |= GEN12_PPGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_LLC:
> +	case I915_CACHE_L3_LLC:
> +		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_WT:
> +		pte |= GEN12_PPGTT_PTE_PAT0;
> +		break;
> +	}

I forget what the plan was...are we going to move away from 'enum
i915_cache_level' and start working with PAT indices directly soon
(especially since the set_caching/get_caching ioctls are getting axed
and vm_bind is supposed to start taking platform-specific indicies
directly)?  If we're still using cache_level, then it's not clear to me
how the current platform-agnostic enum values (which talk about L3 and
LLC) are supposed to encode the L4 behavior we want on MTL.  It seems
like we'd need to extend the enum to also somehow reflect L4 behavior if
we were going to keep using it?  But given the continuing expansion of
caching functionality and complexity, I thought that was one of the
reasons why we wanted to get away from these platform-agnostic enums;
the userspace that actually cares about this stuff has the same PAT/MOCS
tables we do and knows the exact index it wants to use for an object
mapping, so eliminating the PAT idx -> cache_level -> PAT idx dance
would cut out a bunch of confusion.

It's also hard to follow these functions right now because it looks like
you're doing an implicit cache_level -> PAT index conversion, but also
mapping the PAT index bits into their placement in the PTE as part of
the same operation.  The behavior might turn out to be correct, but it's
really hard to follow the process, even with all the bspec docs at hand.
So if we do keep using cache_level for now, I think it would be better
to split out a MTL function to translate cache level into PAT index
(which we can review independently) and then let these pte_encode
functions handle the next step of figuring out where those index bits
should land in the PTE.  If the bits are contiguous, you can also just
define a mask and use REG_FIELD_PREP too.

> +
> +	return pte;
> +}
> +
>  static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>  {
>  	struct drm_i915_private *i915 = ppgtt->vm.i915;
> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>  	 */
>  	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>  
> -	ppgtt->vm.pte_encode = gen8_pte_encode;
> +	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
> +		ppgtt->vm.pte_encode = mtl_pte_encode;
> +	else
> +		ppgtt->vm.pte_encode = gen8_pte_encode;
>  
>  	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>  	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> index f541d19264b4..c48f1fc32909 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>  			 enum i915_cache_level level,
>  			 u32 flags);
>  
> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> +			enum i915_cache_level level,
> +			u32 flags);
> +
>  #endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 82203ad85b0e..3b6f1f6f780a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>  	}
>  }
>  
> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> +			enum i915_cache_level level,
> +			u32 flags)
> +{
> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
> +
> +	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
> +
> +	if (flags & PTE_LM)
> +		pte |= GEN12_GGTT_PTE_LM;
> +
> +	switch (level) {
> +	case I915_CACHE_NONE:
> +		pte |= MTL_GGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_LLC:
> +	case I915_CACHE_L3_LLC:
> +		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_WT:
> +		pte |= MTL_GGTT_PTE_PAT0;
> +		break;
> +	}
> +
> +	return pte;
> +}
> +
>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>  			 enum i915_cache_level level,
>  			 u32 flags)
> @@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
>  	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>  
> -	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
> +	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
> +		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
> +	else
> +		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>  
>  	return ggtt_probe_common(ggtt, size);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index 8a3e0a6793dd..4bb7a4005452 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
>  #define BYT_PTE_WRITEABLE		REG_BIT(1)
>  
> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>  #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
> -
> -#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)

This bit is never used anywhere in the patch.

> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)

As noted above, 

> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)

It sounds like these bits have been around since gen12; why didn't we
ever have to program them in the past?  Is there something that causes
the PAT index to never get used on the pre-MTL platforms?

> +
> +#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
> +#define MTL_GGTT_PTE_PAT0		BIT_ULL(52)
> +#define MTL_GGTT_PTE_PAT1		BIT_ULL(53)

If we do an explicit cache_level -> PAT index conversion as mentioned
above, we can drop these two bits and just do a REG_FIELD_PREP() with
the MTL_GGTT_PTE_PAT_MASK defined below instead.


Matt

> +#define GEN12_GGTT_PTE_ADDR_MASK	GENMASK_ULL(45, 12)
> +#define MTL_GGTT_PTE_PAT_MASK		GENMASK_ULL(53, 52)
>  
>  #define GEN12_PDE_64K BIT(6)
>  #define GEN12_PTE_PS64 BIT(8)
> -- 
> 2.25.1
>
Iddamsetty, Aravind Dec. 7, 2022, 7:26 a.m. UTC | #2
On 07-12-2022 05:09, Matt Roper wrote:
> On Tue, Dec 06, 2022 at 01:07:28PM +0530, Aravind Iddamsetty wrote:
>> Add a separate PTE encode function for MTL. The number of PAT registers
>> have increased to 16 on MTL. All 16 PAT registers are available for
>> PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
>> pages.
>>
>> BSPEC: 63884
> 
> I think you'll also want to include pages like 45015 (ggtt) and its
> various equivalents for ppgtt since that's where the important layout
> information is given.  And likely 63019 as well.
> 
>>
>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Co-developed-by: Fei Yang <fei.yang@intel.com>
>> Signed-off-by: Fei Yang <fei.yang@intel.com>
>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
>>  drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
>>  drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
>>  4 files changed, 78 insertions(+), 4 deletions(-)
>>

<snip>
>> +
>> +	switch (level) {
>> +	case I915_CACHE_NONE:
>> +		pte |= GEN12_PPGTT_PTE_PAT1;
>> +		break;
>> +	case I915_CACHE_LLC:
>> +	case I915_CACHE_L3_LLC:
>> +		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
>> +		break;
>> +	case I915_CACHE_WT:
>> +		pte |= GEN12_PPGTT_PTE_PAT0;
>> +		break;
>> +	}
> 
> I forget what the plan was...are we going to move away from 'enum
> i915_cache_level' and start working with PAT indices directly soon
> (especially since the set_caching/get_caching ioctls are getting axed
> and vm_bind is supposed to start taking platform-specific indicies
> directly)?  If we're still using cache_level, then it's not clear to me
> how the current platform-agnostic enum values (which talk about L3 and
> LLC) are supposed to encode the L4 behavior we want on MTL.  It seems
> like we'd need to extend the enum to also somehow reflect L4 behavior if
> we were going to keep using it?  But given the continuing expansion of
> caching functionality and complexity, I thought that was one of the
> reasons why we wanted to get away from these platform-agnostic enums;
> the userspace that actually cares about this stuff has the same PAT/MOCS
> tables we do and knows the exact index it wants to use for an object
> mapping, so eliminating the PAT idx -> cache_level -> PAT idx dance
> would cut out a bunch of confusion.

The current plan is not to expose PAT index setting via VM_BIND but go
with the defaults. Hence using the i915_cache_level till we decide on
enabling PAT index setting via VM_BIND.

Also, IIUC the cache level we have in i915 apply to L4 as well (BSPEC 45101)

I915_CACHE_NONE -> UC
I915_CACHE_LLC/I915_CACHE_L3_LLC -> WB
I915_CACHE_WT-> WT

But I do not see a means why which we'll know that L4 cache is present
on the platform to select the appropriate cache level.

> 
> It's also hard to follow these functions right now because it looks like
> you're doing an implicit cache_level -> PAT index conversion, but also
> mapping the PAT index bits into their placement in the PTE as part of
> the same operation.  The behavior might turn out to be correct, but it's
> really hard to follow the process, even with all the bspec docs at hand.
> So if we do keep using cache_level for now, I think it would be better
> to split out a MTL function to translate cache level into PAT index
> (which we can review independently) and then let these pte_encode
> functions handle the next step of figuring out where those index bits
> should land in the PTE.  If the bits are contiguous, you can also just
> define a mask and use REG_FIELD_PREP too.

sure i'll translate cache_level to  PAT index and then program the PTE
using those.

> 
>> +
>> +	return pte;
>> +}
>> +
>>  static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>>  {
>>  	struct drm_i915_private *i915 = ppgtt->vm.i915;
>> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>>  	 */
>>  	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>>  
>> -	ppgtt->vm.pte_encode = gen8_pte_encode;
>> +	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
>> +		ppgtt->vm.pte_encode = mtl_pte_encode;
>> +	else
>> +		ppgtt->vm.pte_encode = gen8_pte_encode;
>>  
>>  	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>>  	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> index f541d19264b4..c48f1fc32909 100644
>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>  			 enum i915_cache_level level,
>>  			 u32 flags);
>>  
>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>> +			enum i915_cache_level level,
>> +			u32 flags);
>> +
>>  #endif
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index 82203ad85b0e..3b6f1f6f780a 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>>  	}
>>  }
>>  
>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>> +			enum i915_cache_level level,
>> +			u32 flags)
>> +{
>> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
>> +
>> +	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
>> +
>> +	if (flags & PTE_LM)
>> +		pte |= GEN12_GGTT_PTE_LM;
>> +
>> +	switch (level) {
>> +	case I915_CACHE_NONE:
>> +		pte |= MTL_GGTT_PTE_PAT1;
>> +		break;
>> +	case I915_CACHE_LLC:
>> +	case I915_CACHE_L3_LLC:
>> +		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
>> +		break;
>> +	case I915_CACHE_WT:
>> +		pte |= MTL_GGTT_PTE_PAT0;
>> +		break;
>> +	}
>> +
>> +	return pte;
>> +}
>> +
>>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>  			 enum i915_cache_level level,
>>  			 u32 flags)
>> @@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>  	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
>>  	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>>  
>> -	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>> +	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>> +		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
>> +	else
>> +		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>>  
>>  	return ggtt_probe_common(ggtt, size);
>>  }
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> index 8a3e0a6793dd..4bb7a4005452 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
>>  #define BYT_PTE_WRITEABLE		REG_BIT(1)
>>  
>> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>>  #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
>> -
>> -#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
>> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
> 
> This bit is never used anywhere in the patch.
correct the default cache level we have will map uptil PAT index 3 hence
didn't use it and since platform supports it and in future when we have
PAT index setting this will be used.
> 
>> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
> 
> As noted above, 
> 
>> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
>> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
> 
> It sounds like these bits have been around since gen12; why didn't we
> ever have to program them in the past?  Is there something that causes
> the PAT index to never get used on the pre-MTL platforms?
these are mapped to _PAGE_PWT, _PAGE_PCD and being programmed in
gen8_pte_encode. On the MTL we have new PAT bits in PTE and since the
way these bits are programmed is different redefined for better
understanding.

Thanks.
Aravind.

<snip>
Matt Roper Dec. 7, 2022, 6:11 p.m. UTC | #3
On Wed, Dec 07, 2022 at 12:56:44PM +0530, Iddamsetty, Aravind wrote:
> 
> 
> On 07-12-2022 05:09, Matt Roper wrote:
> > On Tue, Dec 06, 2022 at 01:07:28PM +0530, Aravind Iddamsetty wrote:
> >> Add a separate PTE encode function for MTL. The number of PAT registers
> >> have increased to 16 on MTL. All 16 PAT registers are available for
> >> PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
> >> pages.
> >>
> >> BSPEC: 63884
> > 
> > I think you'll also want to include pages like 45015 (ggtt) and its
> > various equivalents for ppgtt since that's where the important layout
> > information is given.  And likely 63019 as well.
> > 
> >>
> >> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> >> Cc: Matt Roper <matthew.d.roper@intel.com>
> >> Co-developed-by: Fei Yang <fei.yang@intel.com>
> >> Signed-off-by: Fei Yang <fei.yang@intel.com>
> >> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> >> ---
> >>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
> >>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
> >>  drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
> >>  drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
> >>  4 files changed, 78 insertions(+), 4 deletions(-)
> >>
> 
> <snip>
> >> +
> >> +	switch (level) {
> >> +	case I915_CACHE_NONE:
> >> +		pte |= GEN12_PPGTT_PTE_PAT1;
> >> +		break;
> >> +	case I915_CACHE_LLC:
> >> +	case I915_CACHE_L3_LLC:
> >> +		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
> >> +		break;
> >> +	case I915_CACHE_WT:
> >> +		pte |= GEN12_PPGTT_PTE_PAT0;
> >> +		break;
> >> +	}
> > 
> > I forget what the plan was...are we going to move away from 'enum
> > i915_cache_level' and start working with PAT indices directly soon
> > (especially since the set_caching/get_caching ioctls are getting axed
> > and vm_bind is supposed to start taking platform-specific indicies
> > directly)?  If we're still using cache_level, then it's not clear to me
> > how the current platform-agnostic enum values (which talk about L3 and
> > LLC) are supposed to encode the L4 behavior we want on MTL.  It seems
> > like we'd need to extend the enum to also somehow reflect L4 behavior if
> > we were going to keep using it?  But given the continuing expansion of
> > caching functionality and complexity, I thought that was one of the
> > reasons why we wanted to get away from these platform-agnostic enums;
> > the userspace that actually cares about this stuff has the same PAT/MOCS
> > tables we do and knows the exact index it wants to use for an object
> > mapping, so eliminating the PAT idx -> cache_level -> PAT idx dance
> > would cut out a bunch of confusion.
> 
> The current plan is not to expose PAT index setting via VM_BIND but go
> with the defaults. Hence using the i915_cache_level till we decide on
> enabling PAT index setting via VM_BIND.
> 
> Also, IIUC the cache level we have in i915 apply to L4 as well (BSPEC 45101)
> 
> I915_CACHE_NONE -> UC
> I915_CACHE_LLC/I915_CACHE_L3_LLC -> WB
> I915_CACHE_WT-> WT
> 
> But I do not see a means why which we'll know that L4 cache is present
> on the platform to select the appropriate cache level.

I may be misunderstanding since the caching isn't an area I've
worked with much in the past, from reading the kerneldoc descriptions on
this enum, it sounds like I915_CACHE_LLC would be be COH_2W?  And
I915_CACHE_L3_LLC COH_1W?  It looks like you're programming both as PAT
index 3 (i.e., 1W coherency) right now, which confuses me.

> 
> > 
> > It's also hard to follow these functions right now because it looks like
> > you're doing an implicit cache_level -> PAT index conversion, but also
> > mapping the PAT index bits into their placement in the PTE as part of
> > the same operation.  The behavior might turn out to be correct, but it's
> > really hard to follow the process, even with all the bspec docs at hand.
> > So if we do keep using cache_level for now, I think it would be better
> > to split out a MTL function to translate cache level into PAT index
> > (which we can review independently) and then let these pte_encode
> > functions handle the next step of figuring out where those index bits
> > should land in the PTE.  If the bits are contiguous, you can also just
> > define a mask and use REG_FIELD_PREP too.
> 
> sure i'll translate cache_level to  PAT index and then program the PTE
> using those.
> 
> > 
> >> +
> >> +	return pte;
> >> +}
> >> +
> >>  static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
> >>  {
> >>  	struct drm_i915_private *i915 = ppgtt->vm.i915;
> >> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
> >>  	 */
> >>  	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
> >>  
> >> -	ppgtt->vm.pte_encode = gen8_pte_encode;
> >> +	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
> >> +		ppgtt->vm.pte_encode = mtl_pte_encode;
> >> +	else
> >> +		ppgtt->vm.pte_encode = gen8_pte_encode;
> >>  
> >>  	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
> >>  	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
> >> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> >> index f541d19264b4..c48f1fc32909 100644
> >> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> >> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> >> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
> >>  			 enum i915_cache_level level,
> >>  			 u32 flags);
> >>  
> >> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> >> +			enum i915_cache_level level,
> >> +			u32 flags);
> >> +
> >>  #endif
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> >> index 82203ad85b0e..3b6f1f6f780a 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> >> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> >> @@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
> >>  	}
> >>  }
> >>  
> >> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> >> +			enum i915_cache_level level,
> >> +			u32 flags)
> >> +{
> >> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
> >> +
> >> +	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
> >> +
> >> +	if (flags & PTE_LM)
> >> +		pte |= GEN12_GGTT_PTE_LM;
> >> +
> >> +	switch (level) {
> >> +	case I915_CACHE_NONE:
> >> +		pte |= MTL_GGTT_PTE_PAT1;
> >> +		break;
> >> +	case I915_CACHE_LLC:
> >> +	case I915_CACHE_L3_LLC:
> >> +		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
> >> +		break;
> >> +	case I915_CACHE_WT:
> >> +		pte |= MTL_GGTT_PTE_PAT0;
> >> +		break;
> >> +	}
> >> +
> >> +	return pte;
> >> +}
> >> +
> >>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
> >>  			 enum i915_cache_level level,
> >>  			 u32 flags)
> >> @@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> >>  	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
> >>  	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
> >>  
> >> -	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
> >> +	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
> >> +		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
> >> +	else
> >> +		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
> >>  
> >>  	return ggtt_probe_common(ggtt, size);
> >>  }
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> >> index 8a3e0a6793dd..4bb7a4005452 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> >> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> >> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
> >>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
> >>  #define BYT_PTE_WRITEABLE		REG_BIT(1)
> >>  
> >> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
> >>  #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
> >> -
> >> -#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
> >> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
> > 
> > This bit is never used anywhere in the patch.
> correct the default cache level we have will map uptil PAT index 3 hence
> didn't use it and since platform supports it and in future when we have
> PAT index setting this will be used.
> > 
> >> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
> > 
> > As noted above, 
> > 
> >> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
> >> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
> > 
> > It sounds like these bits have been around since gen12; why didn't we
> > ever have to program them in the past?  Is there something that causes
> > the PAT index to never get used on the pre-MTL platforms?
> these are mapped to _PAGE_PWT, _PAGE_PCD and being programmed in
> gen8_pte_encode. On the MTL we have new PAT bits in PTE and since the
> way these bits are programmed is different redefined for better
> understanding.

In that case why does it still have a GEN12_ prefix?  We should use
"MTL_" instead since this doesn't apply to any of the platforms that
used to be known as "gen12."


Matt

> 
> Thanks.
> Aravind.
> 
> <snip>
Iddamsetty, Aravind Dec. 14, 2022, 12:07 p.m. UTC | #4
On 07-12-2022 23:41, Matt Roper wrote:
> On Wed, Dec 07, 2022 at 12:56:44PM +0530, Iddamsetty, Aravind wrote:
>>
>>
>> On 07-12-2022 05:09, Matt Roper wrote:
>>> On Tue, Dec 06, 2022 at 01:07:28PM +0530, Aravind Iddamsetty wrote:
>>>> Add a separate PTE encode function for MTL. The number of PAT registers
>>>> have increased to 16 on MTL. All 16 PAT registers are available for
>>>> PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
>>>> pages.
>>>>
>>>> BSPEC: 63884
>>>
>>> I think you'll also want to include pages like 45015 (ggtt) and its
>>> various equivalents for ppgtt since that's where the important layout
>>> information is given.  And likely 63019 as well.
>>>
>>>>
>>>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>>>> Cc: Matt Roper <matthew.d.roper@intel.com>
>>>> Co-developed-by: Fei Yang <fei.yang@intel.com>
>>>> Signed-off-by: Fei Yang <fei.yang@intel.com>
>>>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
>>>> ---
>>>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
>>>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
>>>>  drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
>>>>  drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
>>>>  4 files changed, 78 insertions(+), 4 deletions(-)
>>>>
>>
>> <snip>
>>>> +
>>>> +	switch (level) {
>>>> +	case I915_CACHE_NONE:
>>>> +		pte |= GEN12_PPGTT_PTE_PAT1;
>>>> +		break;
>>>> +	case I915_CACHE_LLC:
>>>> +	case I915_CACHE_L3_LLC:
>>>> +		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
>>>> +		break;
>>>> +	case I915_CACHE_WT:
>>>> +		pte |= GEN12_PPGTT_PTE_PAT0;
>>>> +		break;
>>>> +	}
>>>
>>> I forget what the plan was...are we going to move away from 'enum
>>> i915_cache_level' and start working with PAT indices directly soon
>>> (especially since the set_caching/get_caching ioctls are getting axed
>>> and vm_bind is supposed to start taking platform-specific indicies
>>> directly)?  If we're still using cache_level, then it's not clear to me
>>> how the current platform-agnostic enum values (which talk about L3 and
>>> LLC) are supposed to encode the L4 behavior we want on MTL.  It seems
>>> like we'd need to extend the enum to also somehow reflect L4 behavior if
>>> we were going to keep using it?  But given the continuing expansion of
>>> caching functionality and complexity, I thought that was one of the
>>> reasons why we wanted to get away from these platform-agnostic enums;
>>> the userspace that actually cares about this stuff has the same PAT/MOCS
>>> tables we do and knows the exact index it wants to use for an object
>>> mapping, so eliminating the PAT idx -> cache_level -> PAT idx dance
>>> would cut out a bunch of confusion.
>>
>> The current plan is not to expose PAT index setting via VM_BIND but go
>> with the defaults. Hence using the i915_cache_level till we decide on
>> enabling PAT index setting via VM_BIND.
>>
>> Also, IIUC the cache level we have in i915 apply to L4 as well (BSPEC 45101)
>>
>> I915_CACHE_NONE -> UC
>> I915_CACHE_LLC/I915_CACHE_L3_LLC -> WB
>> I915_CACHE_WT-> WT
>>
>> But I do not see a means why which we'll know that L4 cache is present
>> on the platform to select the appropriate cache level.
> 
> I may be misunderstanding since the caching isn't an area I've
> worked with much in the past, from reading the kerneldoc descriptions on
> this enum, it sounds like I915_CACHE_LLC would be be COH_2W?  And
> I915_CACHE_L3_LLC COH_1W?  It looks like you're programming both as PAT
> index 3 (i.e., 1W coherency) right now, which confuses me.

Rereading the descriptions makes me feel what you mentioned is right.
Also for I915_CACHE_L3_LLC i see a note it is considered only uptil
gen7 so i believe this needn't be considered for MTL.

@Matt Auld, could you please confirm on this.

Thanks,
Aravind.
> 
>>
>>>
>>> It's also hard to follow these functions right now because it looks like
>>> you're doing an implicit cache_level -> PAT index conversion, but also
>>> mapping the PAT index bits into their placement in the PTE as part of
>>> the same operation.  The behavior might turn out to be correct, but it's
>>> really hard to follow the process, even with all the bspec docs at hand.
>>> So if we do keep using cache_level for now, I think it would be better
>>> to split out a MTL function to translate cache level into PAT index
>>> (which we can review independently) and then let these pte_encode
>>> functions handle the next step of figuring out where those index bits
>>> should land in the PTE.  If the bits are contiguous, you can also just
>>> define a mask and use REG_FIELD_PREP too.
>>
>> sure i'll translate cache_level to  PAT index and then program the PTE
>> using those.
>>
>>>
>>>> +
>>>> +	return pte;
>>>> +}
>>>> +
>>>>  static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>>>>  {
>>>>  	struct drm_i915_private *i915 = ppgtt->vm.i915;
>>>> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>>>>  	 */
>>>>  	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>>>>  
>>>> -	ppgtt->vm.pte_encode = gen8_pte_encode;
>>>> +	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
>>>> +		ppgtt->vm.pte_encode = mtl_pte_encode;
>>>> +	else
>>>> +		ppgtt->vm.pte_encode = gen8_pte_encode;
>>>>  
>>>>  	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>>>>  	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
>>>> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>>>> index f541d19264b4..c48f1fc32909 100644
>>>> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>>>> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
>>>> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>>>  			 enum i915_cache_level level,
>>>>  			 u32 flags);
>>>>  
>>>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>>>> +			enum i915_cache_level level,
>>>> +			u32 flags);
>>>> +
>>>>  #endif
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>>> index 82203ad85b0e..3b6f1f6f780a 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>>>> @@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>>>>  	}
>>>>  }
>>>>  
>>>> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
>>>> +			enum i915_cache_level level,
>>>> +			u32 flags)
>>>> +{
>>>> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
>>>> +
>>>> +	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
>>>> +
>>>> +	if (flags & PTE_LM)
>>>> +		pte |= GEN12_GGTT_PTE_LM;
>>>> +
>>>> +	switch (level) {
>>>> +	case I915_CACHE_NONE:
>>>> +		pte |= MTL_GGTT_PTE_PAT1;
>>>> +		break;
>>>> +	case I915_CACHE_LLC:
>>>> +	case I915_CACHE_L3_LLC:
>>>> +		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
>>>> +		break;
>>>> +	case I915_CACHE_WT:
>>>> +		pte |= MTL_GGTT_PTE_PAT0;
>>>> +		break;
>>>> +	}
>>>> +
>>>> +	return pte;
>>>> +}
>>>> +
>>>>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>>>>  			 enum i915_cache_level level,
>>>>  			 u32 flags)
>>>> @@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>>>>  	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
>>>>  	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>>>>  
>>>> -	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>>>> +	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
>>>> +		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
>>>> +	else
>>>> +		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>>>>  
>>>>  	return ggtt_probe_common(ggtt, size);
>>>>  }
>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
>>>> index 8a3e0a6793dd..4bb7a4005452 100644
>>>> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>>>> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>>>> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>>>>  #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
>>>>  #define BYT_PTE_WRITEABLE		REG_BIT(1)
>>>>  
>>>> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>>>>  #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
>>>> -
>>>> -#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
>>>> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
>>>
>>> This bit is never used anywhere in the patch.
>> correct the default cache level we have will map uptil PAT index 3 hence
>> didn't use it and since platform supports it and in future when we have
>> PAT index setting this will be used.
>>>
>>>> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
>>>
>>> As noted above, 
>>>
>>>> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
>>>> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
>>>
>>> It sounds like these bits have been around since gen12; why didn't we
>>> ever have to program them in the past?  Is there something that causes
>>> the PAT index to never get used on the pre-MTL platforms?
>> these are mapped to _PAGE_PWT, _PAGE_PCD and being programmed in
>> gen8_pte_encode. On the MTL we have new PAT bits in PTE and since the
>> way these bits are programmed is different redefined for better
>> understanding.
> 
> In that case why does it still have a GEN12_ prefix?  We should use
> "MTL_" instead since this doesn't apply to any of the platforms that
> used to be known as "gen12."
> 
> 
> Matt
> 
>>
>> Thanks.
>> Aravind.
>>
>> <snip>
>
Nirmoy Das Dec. 15, 2022, 1:02 p.m. UTC | #5
Hi Aravind,

On 12/6/2022 8:37 AM, Aravind Iddamsetty wrote:
> Add a separate PTE encode function for MTL. The number of PAT registers
> have increased to 16 on MTL. All 16 PAT registers are available for
> PPGTT mapped pages, but only the lower 4 are available for GGTT mapped
> pages.
>
> BSPEC: 63884
>
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Co-developed-by: Fei Yang <fei.yang@intel.com>
> Signed-off-by: Fei Yang <fei.yang@intel.com>
> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 33 +++++++++++++++++++++++++++-
>   drivers/gpu/drm/i915/gt/gen8_ppgtt.h |  4 ++++
>   drivers/gpu/drm/i915/gt/intel_ggtt.c | 32 ++++++++++++++++++++++++++-
>   drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +++++++++--
>   4 files changed, 78 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 31e838eee2ef..4197b43150cc 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
>   	return pte;
>   }
>   
> +static u64 mtl_pte_encode(dma_addr_t addr,
> +			  enum i915_cache_level level,
> +			  u32 flags)
> +{
> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
> +
> +	if (unlikely(flags & PTE_READ_ONLY))
> +		pte &= ~GEN8_PAGE_RW;
> +
> +	if (flags & PTE_LM)

PTE_LM shouldn't be applicable for MTL, see below.

> +		pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
> +
> +	switch (level) {
> +	case I915_CACHE_NONE:
> +		pte |= GEN12_PPGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_LLC:
> +	case I915_CACHE_L3_LLC:
> +		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_WT:
> +		pte |= GEN12_PPGTT_PTE_PAT0;
> +		break;
> +	}
> +
> +	return pte;
> +}
> +
>   static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
>   {
>   	struct drm_i915_private *i915 = ppgtt->vm.i915;
> @@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>   	 */
>   	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>   
> -	ppgtt->vm.pte_encode = gen8_pte_encode;
> +	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
> +		ppgtt->vm.pte_encode = mtl_pte_encode;

mtl_pte_encode() seems very specific for MTL but I assume it will be use for other platforms.
Please rename this function to a suitable one.

Nirmoy

> +	else
> +		ppgtt->vm.pte_encode = gen8_pte_encode;
>   
>   	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
>   	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> index f541d19264b4..c48f1fc32909 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
> @@ -19,4 +19,8 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>   			 enum i915_cache_level level,
>   			 u32 flags);
>   
> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> +			enum i915_cache_level level,
> +			u32 flags);
> +
>   #endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 82203ad85b0e..3b6f1f6f780a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -246,6 +246,33 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>   	}
>   }
>   
> +u64 mtl_ggtt_pte_encode(dma_addr_t addr,
> +			enum i915_cache_level level,
> +			u32 flags)
> +{
> +	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
> +
> +	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
> +
> +	if (flags & PTE_LM)
> +		pte |= GEN12_GGTT_PTE_LM;
> +
> +	switch (level) {
> +	case I915_CACHE_NONE:
> +		pte |= MTL_GGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_LLC:
> +	case I915_CACHE_L3_LLC:
> +		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
> +		break;
> +	case I915_CACHE_WT:
> +		pte |= MTL_GGTT_PTE_PAT0;
> +		break;
> +	}
> +
> +	return pte;
> +}
> +
>   u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>   			 enum i915_cache_level level,
>   			 u32 flags)
> @@ -993,7 +1020,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>   	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
>   	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
>   
> -	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
> +	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
> +		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
same here.
> +	else
> +		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>   
>   	return ggtt_probe_common(ggtt, size);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index 8a3e0a6793dd..4bb7a4005452 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -88,9 +88,18 @@ typedef u64 gen8_pte_t;
>   #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
>   #define BYT_PTE_WRITEABLE		REG_BIT(1)
>   
> +#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
>   #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
> -
> -#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
> +#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
> +#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
> +#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
> +#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
> +
> +#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
> +#define MTL_GGTT_PTE_PAT0		BIT_ULL(52)
> +#define MTL_GGTT_PTE_PAT1		BIT_ULL(53)
> +#define GEN12_GGTT_PTE_ADDR_MASK	GENMASK_ULL(45, 12)
> +#define MTL_GGTT_PTE_PAT_MASK		GENMASK_ULL(53, 52)
>   
>   #define GEN12_PDE_64K BIT(6)
>   #define GEN12_PTE_PS64 BIT(8)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 31e838eee2ef..4197b43150cc 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,6 +55,34 @@  static u64 gen8_pte_encode(dma_addr_t addr,
 	return pte;
 }
 
+static u64 mtl_pte_encode(dma_addr_t addr,
+			  enum i915_cache_level level,
+			  u32 flags)
+{
+	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+	if (unlikely(flags & PTE_READ_ONLY))
+		pte &= ~GEN8_PAGE_RW;
+
+	if (flags & PTE_LM)
+		pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= GEN12_PPGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_WT:
+		pte |= GEN12_PPGTT_PTE_PAT0;
+		break;
+	}
+
+	return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
 	struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -963,7 +991,10 @@  struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 	 */
 	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-	ppgtt->vm.pte_encode = gen8_pte_encode;
+	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+		ppgtt->vm.pte_encode = mtl_pte_encode;
+	else
+		ppgtt->vm.pte_encode = gen8_pte_encode;
 
 	ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
 	ppgtt->vm.insert_entries = gen8_ppgtt_insert;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
index f541d19264b4..c48f1fc32909 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -19,4 +19,8 @@  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 			 enum i915_cache_level level,
 			 u32 flags);
 
+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+			enum i915_cache_level level,
+			u32 flags);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 82203ad85b0e..3b6f1f6f780a 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -246,6 +246,33 @@  static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
 	}
 }
 
+u64 mtl_ggtt_pte_encode(dma_addr_t addr,
+			enum i915_cache_level level,
+			u32 flags)
+{
+	gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
+
+	GEM_BUG_ON(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
+
+	if (flags & PTE_LM)
+		pte |= GEN12_GGTT_PTE_LM;
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		pte |= MTL_GGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		pte |= MTL_GGTT_PTE_PAT0 | MTL_GGTT_PTE_PAT1;
+		break;
+	case I915_CACHE_WT:
+		pte |= MTL_GGTT_PTE_PAT0;
+		break;
+	}
+
+	return pte;
+}
+
 u64 gen8_ggtt_pte_encode(dma_addr_t addr,
 			 enum i915_cache_level level,
 			 u32 flags)
@@ -993,7 +1020,10 @@  static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	ggtt->vm.vma_ops.bind_vma    = intel_ggtt_bind_vma;
 	ggtt->vm.vma_ops.unbind_vma  = intel_ggtt_unbind_vma;
 
-	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
+	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+		ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
+	else
+		ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
 
 	return ggtt_probe_common(ggtt, size);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 8a3e0a6793dd..4bb7a4005452 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -88,9 +88,18 @@  typedef u64 gen8_pte_t;
 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	REG_BIT(2)
 #define BYT_PTE_WRITEABLE		REG_BIT(1)
 
+#define GEN12_PPGTT_PTE_PAT3    BIT_ULL(62)
 #define GEN12_PPGTT_PTE_LM	BIT_ULL(11)
-
-#define GEN12_GGTT_PTE_LM	BIT_ULL(1)
+#define GEN12_PPGTT_PTE_PAT2    BIT_ULL(7)
+#define GEN12_PPGTT_PTE_NC      BIT_ULL(5)
+#define GEN12_PPGTT_PTE_PAT1    BIT_ULL(4)
+#define GEN12_PPGTT_PTE_PAT0    BIT_ULL(3)
+
+#define GEN12_GGTT_PTE_LM		BIT_ULL(1)
+#define MTL_GGTT_PTE_PAT0		BIT_ULL(52)
+#define MTL_GGTT_PTE_PAT1		BIT_ULL(53)
+#define GEN12_GGTT_PTE_ADDR_MASK	GENMASK_ULL(45, 12)
+#define MTL_GGTT_PTE_PAT_MASK		GENMASK_ULL(53, 52)
 
 #define GEN12_PDE_64K BIT(6)
 #define GEN12_PTE_PS64 BIT(8)