diff mbox series

[03/11] drm/i915/pvc: Define MOCS table for PVC

Message ID 20220502163417.2635462-4-matthew.d.roper@intel.com (mailing list archive)
State New, archived
Headers show
Series i915: Introduce Ponte Vecchio | expand

Commit Message

Matt Roper May 2, 2022, 4:34 p.m. UTC
From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>

Bspec: 45101, 72161
Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt_types.h    |  1 +
 drivers/gpu/drm/i915/gt/intel_mocs.c        | 24 ++++++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++---
 drivers/gpu/drm/i915/i915_drv.h             |  2 ++
 drivers/gpu/drm/i915/i915_pci.c             |  3 ++-
 drivers/gpu/drm/i915/intel_device_info.h    |  1 +
 6 files changed, 39 insertions(+), 5 deletions(-)

Comments

Matt Roper May 2, 2022, 4:50 p.m. UTC | #1
On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote:
> From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> 
> Bspec: 45101, 72161
> Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> Signed-off-by: Fei Yang <fei.yang@intel.com>
> Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_types.h    |  1 +
>  drivers/gpu/drm/i915/gt/intel_mocs.c        | 24 ++++++++++++++++++++-
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++---
>  drivers/gpu/drm/i915/i915_drv.h             |  2 ++
>  drivers/gpu/drm/i915/i915_pci.c             |  3 ++-
>  drivers/gpu/drm/i915/intel_device_info.h    |  1 +
>  6 files changed, 39 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index b06611c1d4ad..7853ea194ea6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -221,6 +221,7 @@ struct intel_gt {
>  
>  	struct {
>  		u8 uc_index;
> +		u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
>  	} mocs;
>  
>  	struct intel_pxp pxp;
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index c4c37585ae8c..265812589f87 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -23,6 +23,7 @@ struct drm_i915_mocs_table {
>  	unsigned int n_entries;
>  	const struct drm_i915_mocs_entry *table;
>  	u8 uc_index;
> +	u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
>  	u8 unused_entries_index;
>  };
>  
> @@ -47,6 +48,7 @@ struct drm_i915_mocs_table {
>  
>  /* Helper defines */
>  #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
> +#define PVC_NUM_MOCS_ENTRIES	3

Should this be 4?  The value here should reflect the number of entries
that can defined in hardware rather than the size of the table we're
asked to program.  Since there are two registers (each with a high and a
low entry), that would imply we should set 4 here to ensure that the
fourth entry is initialized according to unused_entries_index rather
than left at whatever the hardware defaults might be.


Matt

>  
>  /* (e)LLC caching options */
>  /*
> @@ -394,6 +396,17 @@ static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = {
>  	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
>  };
>  
> +static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
> +	/* Error */
> +	MOCS_ENTRY(0, 0, L3_3_WB),
> +
> +	/* UC */
> +	MOCS_ENTRY(1, 0, L3_1_UC),
> +
> +	/* WB */
> +	MOCS_ENTRY(2, 0, L3_3_WB),
> +};
> +
>  enum {
>  	HAS_GLOBAL_MOCS = BIT(0),
>  	HAS_ENGINE_MOCS = BIT(1),
> @@ -423,7 +436,14 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
>  	memset(table, 0, sizeof(struct drm_i915_mocs_table));
>  
>  	table->unused_entries_index = I915_MOCS_PTE;
> -	if (IS_DG2(i915)) {
> +	if (IS_PONTEVECCHIO(i915)) {
> +		table->size = ARRAY_SIZE(pvc_mocs_table);
> +		table->table = pvc_mocs_table;
> +		table->n_entries = PVC_NUM_MOCS_ENTRIES;
> +		table->uc_index = 1;
> +		table->wb_index = 2;
> +		table->unused_entries_index = 2;
> +	} else if (IS_DG2(i915)) {
>  		if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
>  			table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
>  			table->table = dg2_mocs_table_g10_ax;
> @@ -622,6 +642,8 @@ void intel_set_mocs_index(struct intel_gt *gt)
>  
>  	get_mocs_settings(gt->i915, &table);
>  	gt->mocs.uc_index = table.uc_index;
> +	if (HAS_L3_CCS_READ(gt->i915))
> +		gt->mocs.wb_index = table.wb_index;
>  }
>  
>  void intel_mocs_init(struct intel_gt *gt)
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index a05c4b99b3fb..a656d9c2ca2b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -1994,7 +1994,7 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
>  static void
>  engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  {
> -	u8 mocs;
> +	u8 mocs_w, mocs_r;
>  
>  	/*
>  	 * RING_CMD_CCTL are need to be programed to un-cached
> @@ -2002,11 +2002,18 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  	 * Streamers on Gen12 onward platforms.
>  	 */
>  	if (GRAPHICS_VER(engine->i915) >= 12) {
> -		mocs = engine->gt->mocs.uc_index;
> +		if (HAS_L3_CCS_READ(engine->i915) &&
> +		    engine->class == COMPUTE_CLASS)
> +			mocs_r = engine->gt->mocs.wb_index;
> +		else
> +			mocs_r = engine->gt->mocs.uc_index;
> +
> +		mocs_w = engine->gt->mocs.uc_index;
> +
>  		wa_masked_field_set(wal,
>  				    RING_CMD_CCTL(engine->mmio_base),
>  				    CMD_CCTL_MOCS_MASK,
> -				    CMD_CCTL_MOCS_OVERRIDE(mocs, mocs));
> +				    CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r));
>  	}
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2dddc27a1b0e..8c8e7308502b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1369,6 +1369,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>  
>  #define HAS_LSPCON(dev_priv) (IS_DISPLAY_VER(dev_priv, 9, 10))
>  
> +#define HAS_L3_CCS_READ(i915) (INTEL_INFO(i915)->has_l3_ccs_read)
> +
>  /* DPF == dynamic parity feature */
>  #define HAS_L3_DPF(dev_priv) (INTEL_INFO(dev_priv)->has_l3_dpf)
>  #define NUM_L3_SLICES(dev_priv) (IS_HSW_GT3(dev_priv) ? \
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 498708b33924..07722cdf63ac 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1076,7 +1076,8 @@ static const struct intel_device_info ats_m_info = {
>  
>  #define XE_HPC_FEATURES \
>  	XE_HP_FEATURES, \
> -	.dma_mask_size = 52
> +	.dma_mask_size = 52, \
> +	.has_l3_ccs_read = 1
>  
>  __maybe_unused
>  static const struct intel_device_info pvc_info = {
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index e7d2cf7d65c8..09e33296157a 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -150,6 +150,7 @@ enum intel_ppgtt_type {
>  	func(has_heci_pxp); \
>  	func(has_heci_gscfi); \
>  	func(has_guc_deprivilege); \
> +	func(has_l3_ccs_read); \
>  	func(has_l3_dpf); \
>  	func(has_llc); \
>  	func(has_logical_ring_contexts); \
> -- 
> 2.35.1
>
Lucas De Marchi May 2, 2022, 6:39 p.m. UTC | #2
On Mon, May 02, 2022 at 09:50:23AM -0700, Matt Roper wrote:
>On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote:
>> From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
>>
>> Bspec: 45101, 72161
>> Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
>> Signed-off-by: Fei Yang <fei.yang@intel.com>
>> Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/intel_gt_types.h    |  1 +
>>  drivers/gpu/drm/i915/gt/intel_mocs.c        | 24 ++++++++++++++++++++-
>>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++---
>>  drivers/gpu/drm/i915/i915_drv.h             |  2 ++
>>  drivers/gpu/drm/i915/i915_pci.c             |  3 ++-
>>  drivers/gpu/drm/i915/intel_device_info.h    |  1 +
>>  6 files changed, 39 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>> index b06611c1d4ad..7853ea194ea6 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>> @@ -221,6 +221,7 @@ struct intel_gt {
>>
>>  	struct {
>>  		u8 uc_index;
>> +		u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
>>  	} mocs;
>>
>>  	struct intel_pxp pxp;
>> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
>> index c4c37585ae8c..265812589f87 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
>> @@ -23,6 +23,7 @@ struct drm_i915_mocs_table {
>>  	unsigned int n_entries;
>>  	const struct drm_i915_mocs_entry *table;
>>  	u8 uc_index;
>> +	u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
>>  	u8 unused_entries_index;
>>  };
>>
>> @@ -47,6 +48,7 @@ struct drm_i915_mocs_table {
>>
>>  /* Helper defines */
>>  #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
>> +#define PVC_NUM_MOCS_ENTRIES	3
>
>Should this be 4?  The value here should reflect the number of entries
>that can defined in hardware rather than the size of the table we're
>asked to program.  Since there are two registers (each with a high and a
>low entry), that would imply we should set 4 here to ensure that the
>fourth entry is initialized according to unused_entries_index rather
>than left at whatever the hardware defaults might be.

not sure I understand what you mean here. The n_entries specifies, as
you said, the number of entries we can have. Bspec 45101 shows entries
for indexes 0, 1 and 2. As does the pvc_mocs_table below.

Also, from bspec 44509:
"For PVC, only 3 MOCS states are supported. The allowed index values are
in range [0, 2]..."

So, I don't think we want to program any fourth entry.

Lucas De Marchi
Matt Roper May 2, 2022, 6:50 p.m. UTC | #3
On Mon, May 02, 2022 at 11:39:48AM -0700, Lucas De Marchi wrote:
> On Mon, May 02, 2022 at 09:50:23AM -0700, Matt Roper wrote:
> > On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote:
> > > From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > 
> > > Bspec: 45101, 72161
> > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > Signed-off-by: Fei Yang <fei.yang@intel.com>
> > > Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/gt/intel_gt_types.h    |  1 +
> > >  drivers/gpu/drm/i915/gt/intel_mocs.c        | 24 ++++++++++++++++++++-
> > >  drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++---
> > >  drivers/gpu/drm/i915/i915_drv.h             |  2 ++
> > >  drivers/gpu/drm/i915/i915_pci.c             |  3 ++-
> > >  drivers/gpu/drm/i915/intel_device_info.h    |  1 +
> > >  6 files changed, 39 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > > index b06611c1d4ad..7853ea194ea6 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > > @@ -221,6 +221,7 @@ struct intel_gt {
> > > 
> > >  	struct {
> > >  		u8 uc_index;
> > > +		u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
> > >  	} mocs;
> > > 
> > >  	struct intel_pxp pxp;
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > index c4c37585ae8c..265812589f87 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > @@ -23,6 +23,7 @@ struct drm_i915_mocs_table {
> > >  	unsigned int n_entries;
> > >  	const struct drm_i915_mocs_entry *table;
> > >  	u8 uc_index;
> > > +	u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
> > >  	u8 unused_entries_index;
> > >  };
> > > 
> > > @@ -47,6 +48,7 @@ struct drm_i915_mocs_table {
> > > 
> > >  /* Helper defines */
> > >  #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
> > > +#define PVC_NUM_MOCS_ENTRIES	3
> > 
> > Should this be 4?  The value here should reflect the number of entries
> > that can defined in hardware rather than the size of the table we're
> > asked to program.  Since there are two registers (each with a high and a
> > low entry), that would imply we should set 4 here to ensure that the
> > fourth entry is initialized according to unused_entries_index rather
> > than left at whatever the hardware defaults might be.
> 
> not sure I understand what you mean here. The n_entries specifies, as
> you said, the number of entries we can have. Bspec 45101 shows entries
> for indexes 0, 1 and 2. As does the pvc_mocs_table below.
> 
> Also, from bspec 44509:
> "For PVC, only 3 MOCS states are supported. The allowed index values are
> in range [0, 2]..."
> 
> So, I don't think we want to program any fourth entry.

We don't have a choice; the fourth entry lives in the same register as
the third entry, so no matter what we're writing _something_ to those
bits.  The question is whether we should write all 0's or whether we
should treat it like other platforms and ensure it's initialized to the
unused entry values.  Entry #4 isn't supposed to be used, but if buggy
userspace tries to use it, we probably still want well-defined behavior,
just like it an invalid entry gets used on any other platform.


Matt

> 
> Lucas De Marchi
Lucas De Marchi May 2, 2022, 7:27 p.m. UTC | #4
On Mon, May 02, 2022 at 11:50:22AM -0700, Matt Roper wrote:
>On Mon, May 02, 2022 at 11:39:48AM -0700, Lucas De Marchi wrote:
>> On Mon, May 02, 2022 at 09:50:23AM -0700, Matt Roper wrote:
>> > On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote:
>> > > From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
>> > >
>> > > Bspec: 45101, 72161
>> > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
>> > > Signed-off-by: Fei Yang <fei.yang@intel.com>
>> > > Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
>> > > ---
>> > >  drivers/gpu/drm/i915/gt/intel_gt_types.h    |  1 +
>> > >  drivers/gpu/drm/i915/gt/intel_mocs.c        | 24 ++++++++++++++++++++-
>> > >  drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++---
>> > >  drivers/gpu/drm/i915/i915_drv.h             |  2 ++
>> > >  drivers/gpu/drm/i915/i915_pci.c             |  3 ++-
>> > >  drivers/gpu/drm/i915/intel_device_info.h    |  1 +
>> > >  6 files changed, 39 insertions(+), 5 deletions(-)
>> > >
>> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>> > > index b06611c1d4ad..7853ea194ea6 100644
>> > > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>> > > @@ -221,6 +221,7 @@ struct intel_gt {
>> > >
>> > >  	struct {
>> > >  		u8 uc_index;
>> > > +		u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
>> > >  	} mocs;
>> > >
>> > >  	struct intel_pxp pxp;
>> > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
>> > > index c4c37585ae8c..265812589f87 100644
>> > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
>> > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
>> > > @@ -23,6 +23,7 @@ struct drm_i915_mocs_table {
>> > >  	unsigned int n_entries;
>> > >  	const struct drm_i915_mocs_entry *table;
>> > >  	u8 uc_index;
>> > > +	u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
>> > >  	u8 unused_entries_index;
>> > >  };
>> > >
>> > > @@ -47,6 +48,7 @@ struct drm_i915_mocs_table {
>> > >
>> > >  /* Helper defines */
>> > >  #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
>> > > +#define PVC_NUM_MOCS_ENTRIES	3
>> >
>> > Should this be 4?  The value here should reflect the number of entries
>> > that can defined in hardware rather than the size of the table we're
>> > asked to program.  Since there are two registers (each with a high and a
>> > low entry), that would imply we should set 4 here to ensure that the
>> > fourth entry is initialized according to unused_entries_index rather
>> > than left at whatever the hardware defaults might be.
>>
>> not sure I understand what you mean here. The n_entries specifies, as
>> you said, the number of entries we can have. Bspec 45101 shows entries
>> for indexes 0, 1 and 2. As does the pvc_mocs_table below.
>>
>> Also, from bspec 44509:
>> "For PVC, only 3 MOCS states are supported. The allowed index values are
>> in range [0, 2]..."
>>
>> So, I don't think we want to program any fourth entry.
>
>We don't have a choice; the fourth entry lives in the same register as
>the third entry, so no matter what we're writing _something_ to those
>bits.  The question is whether we should write all 0's or whether we
>should treat it like other platforms and ensure it's initialized to the
>unused entry values.  Entry #4 isn't supposed to be used, but if buggy
>userspace tries to use it, we probably still want well-defined behavior,
>just like it an invalid entry gets used on any other platform.

Now I understand what you were talking about:  each register houses 2
entries. For PVC we have LNCFCMOCS0 and LNCFCMOCS1. Humn... looking at
for_each_l3cc(), that is actually handled and the rest of the register
is initialized with the value pointed by unused_entries_index.

Such situation would only happen for the last entry, which implies the
handling for odd size works for this as well.

Lucas De Marchi

>
>
>Matt
>
>>
>> Lucas De Marchi
>
>-- 
>Matt Roper
>Graphics Software Engineer
>VTT-OSGC Platform Enablement
>Intel Corporation
>(916) 356-2795
Matt Roper May 2, 2022, 7:42 p.m. UTC | #5
On Mon, May 02, 2022 at 12:27:29PM -0700, Lucas De Marchi wrote:
> On Mon, May 02, 2022 at 11:50:22AM -0700, Matt Roper wrote:
> > On Mon, May 02, 2022 at 11:39:48AM -0700, Lucas De Marchi wrote:
> > > On Mon, May 02, 2022 at 09:50:23AM -0700, Matt Roper wrote:
> > > > On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote:
> > > > > From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > >
> > > > > Bspec: 45101, 72161
> > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > > Signed-off-by: Fei Yang <fei.yang@intel.com>
> > > > > Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
> > > > > ---
> > > > >  drivers/gpu/drm/i915/gt/intel_gt_types.h    |  1 +
> > > > >  drivers/gpu/drm/i915/gt/intel_mocs.c        | 24 ++++++++++++++++++++-
> > > > >  drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++---
> > > > >  drivers/gpu/drm/i915/i915_drv.h             |  2 ++
> > > > >  drivers/gpu/drm/i915/i915_pci.c             |  3 ++-
> > > > >  drivers/gpu/drm/i915/intel_device_info.h    |  1 +
> > > > >  6 files changed, 39 insertions(+), 5 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > > > > index b06611c1d4ad..7853ea194ea6 100644
> > > > > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> > > > > @@ -221,6 +221,7 @@ struct intel_gt {
> > > > >
> > > > >  	struct {
> > > > >  		u8 uc_index;
> > > > > +		u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
> > > > >  	} mocs;
> > > > >
> > > > >  	struct intel_pxp pxp;
> > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > > > index c4c37585ae8c..265812589f87 100644
> > > > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > > > @@ -23,6 +23,7 @@ struct drm_i915_mocs_table {
> > > > >  	unsigned int n_entries;
> > > > >  	const struct drm_i915_mocs_entry *table;
> > > > >  	u8 uc_index;
> > > > > +	u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
> > > > >  	u8 unused_entries_index;
> > > > >  };
> > > > >
> > > > > @@ -47,6 +48,7 @@ struct drm_i915_mocs_table {
> > > > >
> > > > >  /* Helper defines */
> > > > >  #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
> > > > > +#define PVC_NUM_MOCS_ENTRIES	3
> > > >
> > > > Should this be 4?  The value here should reflect the number of entries
> > > > that can defined in hardware rather than the size of the table we're
> > > > asked to program.  Since there are two registers (each with a high and a
> > > > low entry), that would imply we should set 4 here to ensure that the
> > > > fourth entry is initialized according to unused_entries_index rather
> > > > than left at whatever the hardware defaults might be.
> > > 
> > > not sure I understand what you mean here. The n_entries specifies, as
> > > you said, the number of entries we can have. Bspec 45101 shows entries
> > > for indexes 0, 1 and 2. As does the pvc_mocs_table below.
> > > 
> > > Also, from bspec 44509:
> > > "For PVC, only 3 MOCS states are supported. The allowed index values are
> > > in range [0, 2]..."
> > > 
> > > So, I don't think we want to program any fourth entry.
> > 
> > We don't have a choice; the fourth entry lives in the same register as
> > the third entry, so no matter what we're writing _something_ to those
> > bits.  The question is whether we should write all 0's or whether we
> > should treat it like other platforms and ensure it's initialized to the
> > unused entry values.  Entry #4 isn't supposed to be used, but if buggy
> > userspace tries to use it, we probably still want well-defined behavior,
> > just like it an invalid entry gets used on any other platform.
> 
> Now I understand what you were talking about:  each register houses 2
> entries. For PVC we have LNCFCMOCS0 and LNCFCMOCS1. Humn... looking at
> for_each_l3cc(), that is actually handled and the rest of the register
> is initialized with the value pointed by unused_entries_index.

Yep, you're right.  It looks like we still do a get_entry_l3cc() for the
upper entry of the final register, and that will return the unused_entry
value if it's out of bounds.  In that case I don't have any concerns
here.


Matt

> 
> Such situation would only happen for the last entry, which implies the
> handling for odd size works for this as well.
> 
> Lucas De Marchi
> 
> > 
> > 
> > Matt
> > 
> > > 
> > > Lucas De Marchi
> > 
> > -- 
> > Matt Roper
> > Graphics Software Engineer
> > VTT-OSGC Platform Enablement
> > Intel Corporation
> > (916) 356-2795
Lucas De Marchi May 2, 2022, 9:03 p.m. UTC | #6
On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote:
>From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
>
>Bspec: 45101, 72161
>Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
>Signed-off-by: Fei Yang <fei.yang@intel.com>
>Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
>---
> drivers/gpu/drm/i915/gt/intel_gt_types.h    |  1 +
> drivers/gpu/drm/i915/gt/intel_mocs.c        | 24 ++++++++++++++++++++-
> drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++---
> drivers/gpu/drm/i915/i915_drv.h             |  2 ++
> drivers/gpu/drm/i915/i915_pci.c             |  3 ++-
> drivers/gpu/drm/i915/intel_device_info.h    |  1 +
> 6 files changed, 39 insertions(+), 5 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>index b06611c1d4ad..7853ea194ea6 100644
>--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>@@ -221,6 +221,7 @@ struct intel_gt {
>
> 	struct {
> 		u8 uc_index;
>+		u8 wb_index; /* Only for platforms listed in Bspec: 72161 */

I don't like much writting the bspec in code like this. For commit
message it's acceptable/desired, but for code I think it's not great as
1) it's not something generally available and 2) it will likely get
outdated so one would have to rely on git log/blame to see when this was
actually valid.


> 	} mocs;
>
> 	struct intel_pxp pxp;
>diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
>index c4c37585ae8c..265812589f87 100644
>--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
>+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
>@@ -23,6 +23,7 @@ struct drm_i915_mocs_table {
> 	unsigned int n_entries;
> 	const struct drm_i915_mocs_entry *table;
> 	u8 uc_index;
>+	u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
> 	u8 unused_entries_index;
> };
>
>@@ -47,6 +48,7 @@ struct drm_i915_mocs_table {
>
> /* Helper defines */
> #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
>+#define PVC_NUM_MOCS_ENTRIES	3
>
> /* (e)LLC caching options */
> /*
>@@ -394,6 +396,17 @@ static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = {
> 	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
> };
>
>+static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
>+	/* Error */
>+	MOCS_ENTRY(0, 0, L3_3_WB),
>+
>+	/* UC */
>+	MOCS_ENTRY(1, 0, L3_1_UC),
>+
>+	/* WB */
>+	MOCS_ENTRY(2, 0, L3_3_WB),
>+};
>+
> enum {
> 	HAS_GLOBAL_MOCS = BIT(0),
> 	HAS_ENGINE_MOCS = BIT(1),
>@@ -423,7 +436,14 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
> 	memset(table, 0, sizeof(struct drm_i915_mocs_table));
>
> 	table->unused_entries_index = I915_MOCS_PTE;
>-	if (IS_DG2(i915)) {
>+	if (IS_PONTEVECCHIO(i915)) {
>+		table->size = ARRAY_SIZE(pvc_mocs_table);
>+		table->table = pvc_mocs_table;
>+		table->n_entries = PVC_NUM_MOCS_ENTRIES;
>+		table->uc_index = 1;
>+		table->wb_index = 2;
>+		table->unused_entries_index = 2;
>+	} else if (IS_DG2(i915)) {
> 		if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
> 			table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
> 			table->table = dg2_mocs_table_g10_ax;
>@@ -622,6 +642,8 @@ void intel_set_mocs_index(struct intel_gt *gt)
>
> 	get_mocs_settings(gt->i915, &table);
> 	gt->mocs.uc_index = table.uc_index;
>+	if (HAS_L3_CCS_READ(gt->i915))
>+		gt->mocs.wb_index = table.wb_index;
> }
>
> void intel_mocs_init(struct intel_gt *gt)
>diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>index a05c4b99b3fb..a656d9c2ca2b 100644
>--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
>+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>@@ -1994,7 +1994,7 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
> static void
> engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
> {
>-	u8 mocs;
>+	u8 mocs_w, mocs_r;
>
> 	/*
> 	 * RING_CMD_CCTL are need to be programed to un-cached
>@@ -2002,11 +2002,18 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
> 	 * Streamers on Gen12 onward platforms.
> 	 */
> 	if (GRAPHICS_VER(engine->i915) >= 12) {
>-		mocs = engine->gt->mocs.uc_index;
>+		if (HAS_L3_CCS_READ(engine->i915) &&
>+		    engine->class == COMPUTE_CLASS)
>+			mocs_r = engine->gt->mocs.wb_index;
>+		else
>+			mocs_r = engine->gt->mocs.uc_index;

shouldn't we add a warning in get_mocs_settings() if HAS_L3_CCS_READ(engine->i915)
and mocs.wb_index is 0 (since index 0 shouldn't really be used in latest
platforms)?

Lucas De Marchi

>+
>+		mocs_w = engine->gt->mocs.uc_index;
>+
> 		wa_masked_field_set(wal,
> 				    RING_CMD_CCTL(engine->mmio_base),
> 				    CMD_CCTL_MOCS_MASK,
>-				    CMD_CCTL_MOCS_OVERRIDE(mocs, mocs));
>+				    CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r));
> 	}
> }
>
>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>index 2dddc27a1b0e..8c8e7308502b 100644
>--- a/drivers/gpu/drm/i915/i915_drv.h
>+++ b/drivers/gpu/drm/i915/i915_drv.h
>@@ -1369,6 +1369,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>
> #define HAS_LSPCON(dev_priv) (IS_DISPLAY_VER(dev_priv, 9, 10))
>
>+#define HAS_L3_CCS_READ(i915) (INTEL_INFO(i915)->has_l3_ccs_read)
>+
> /* DPF == dynamic parity feature */
> #define HAS_L3_DPF(dev_priv) (INTEL_INFO(dev_priv)->has_l3_dpf)
> #define NUM_L3_SLICES(dev_priv) (IS_HSW_GT3(dev_priv) ? \
>diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>index 498708b33924..07722cdf63ac 100644
>--- a/drivers/gpu/drm/i915/i915_pci.c
>+++ b/drivers/gpu/drm/i915/i915_pci.c
>@@ -1076,7 +1076,8 @@ static const struct intel_device_info ats_m_info = {
>
> #define XE_HPC_FEATURES \
> 	XE_HP_FEATURES, \
>-	.dma_mask_size = 52
>+	.dma_mask_size = 52, \
>+	.has_l3_ccs_read = 1
>
> __maybe_unused
> static const struct intel_device_info pvc_info = {
>diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
>index e7d2cf7d65c8..09e33296157a 100644
>--- a/drivers/gpu/drm/i915/intel_device_info.h
>+++ b/drivers/gpu/drm/i915/intel_device_info.h
>@@ -150,6 +150,7 @@ enum intel_ppgtt_type {
> 	func(has_heci_pxp); \
> 	func(has_heci_gscfi); \
> 	func(has_guc_deprivilege); \
>+	func(has_l3_ccs_read); \
> 	func(has_l3_dpf); \
> 	func(has_llc); \
> 	func(has_logical_ring_contexts); \
>-- 
>2.35.1
>
Matt Roper May 2, 2022, 9:14 p.m. UTC | #7
On Mon, May 02, 2022 at 02:03:28PM -0700, Lucas De Marchi wrote:
> On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote:
> > From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
...
> > @@ -2002,11 +2002,18 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
> > 	 * Streamers on Gen12 onward platforms.
> > 	 */
> > 	if (GRAPHICS_VER(engine->i915) >= 12) {
> > -		mocs = engine->gt->mocs.uc_index;
> > +		if (HAS_L3_CCS_READ(engine->i915) &&
> > +		    engine->class == COMPUTE_CLASS)
> > +			mocs_r = engine->gt->mocs.wb_index;
> > +		else
> > +			mocs_r = engine->gt->mocs.uc_index;
> 
> shouldn't we add a warning in get_mocs_settings() if HAS_L3_CCS_READ(engine->i915)
> and mocs.wb_index is 0 (since index 0 shouldn't really be used in latest
> platforms)?

We should be careful about that assumption...index 0 is valid on DG2
today, although HAS_L3_CCS_READ() doesn't apply there.  And a couple
platforms in the future we're also going to have index 0 being valid on
a platform where HAS_L3_CCS_READ() is true (bspec 71582).  Index 0 would
still be the wrong entry to pick for WB behavior there, but it is a
legitimate entry in general.


Matt
Lucas De Marchi May 3, 2022, 6:22 a.m. UTC | #8
On Mon, May 02, 2022 at 02:14:02PM -0700, Matt Roper wrote:
>On Mon, May 02, 2022 at 02:03:28PM -0700, Lucas De Marchi wrote:
>> On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote:
>> > From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
>...
>> > @@ -2002,11 +2002,18 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>> > 	 * Streamers on Gen12 onward platforms.
>> > 	 */
>> > 	if (GRAPHICS_VER(engine->i915) >= 12) {
>> > -		mocs = engine->gt->mocs.uc_index;
>> > +		if (HAS_L3_CCS_READ(engine->i915) &&
>> > +		    engine->class == COMPUTE_CLASS)
>> > +			mocs_r = engine->gt->mocs.wb_index;
>> > +		else
>> > +			mocs_r = engine->gt->mocs.uc_index;
>>
>> shouldn't we add a warning in get_mocs_settings() if HAS_L3_CCS_READ(engine->i915)
>> and mocs.wb_index is 0 (since index 0 shouldn't really be used in latest
>> platforms)?
>
>We should be careful about that assumption...index 0 is valid on DG2
>today, although HAS_L3_CCS_READ() doesn't apply there.  And a couple
>platforms in the future we're also going to have index 0 being valid on
>a platform where HAS_L3_CCS_READ() is true (bspec 71582).  Index 0 would
>still be the wrong entry to pick for WB behavior there, but it is a
>legitimate entry in general.

ok, but comment is more about "forgetting to initialize it in
get_mocs_settings() and then using it here". Using 0 as "it was not
initialized" may be an easy way to do that.

Lucas De Marchi
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index b06611c1d4ad..7853ea194ea6 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -221,6 +221,7 @@  struct intel_gt {
 
 	struct {
 		u8 uc_index;
+		u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
 	} mocs;
 
 	struct intel_pxp pxp;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index c4c37585ae8c..265812589f87 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -23,6 +23,7 @@  struct drm_i915_mocs_table {
 	unsigned int n_entries;
 	const struct drm_i915_mocs_entry *table;
 	u8 uc_index;
+	u8 wb_index; /* Only for platforms listed in Bspec: 72161 */
 	u8 unused_entries_index;
 };
 
@@ -47,6 +48,7 @@  struct drm_i915_mocs_table {
 
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
+#define PVC_NUM_MOCS_ENTRIES	3
 
 /* (e)LLC caching options */
 /*
@@ -394,6 +396,17 @@  static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = {
 	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
 };
 
+static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
+	/* Error */
+	MOCS_ENTRY(0, 0, L3_3_WB),
+
+	/* UC */
+	MOCS_ENTRY(1, 0, L3_1_UC),
+
+	/* WB */
+	MOCS_ENTRY(2, 0, L3_3_WB),
+};
+
 enum {
 	HAS_GLOBAL_MOCS = BIT(0),
 	HAS_ENGINE_MOCS = BIT(1),
@@ -423,7 +436,14 @@  static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
 	memset(table, 0, sizeof(struct drm_i915_mocs_table));
 
 	table->unused_entries_index = I915_MOCS_PTE;
-	if (IS_DG2(i915)) {
+	if (IS_PONTEVECCHIO(i915)) {
+		table->size = ARRAY_SIZE(pvc_mocs_table);
+		table->table = pvc_mocs_table;
+		table->n_entries = PVC_NUM_MOCS_ENTRIES;
+		table->uc_index = 1;
+		table->wb_index = 2;
+		table->unused_entries_index = 2;
+	} else if (IS_DG2(i915)) {
 		if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
 			table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
 			table->table = dg2_mocs_table_g10_ax;
@@ -622,6 +642,8 @@  void intel_set_mocs_index(struct intel_gt *gt)
 
 	get_mocs_settings(gt->i915, &table);
 	gt->mocs.uc_index = table.uc_index;
+	if (HAS_L3_CCS_READ(gt->i915))
+		gt->mocs.wb_index = table.wb_index;
 }
 
 void intel_mocs_init(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index a05c4b99b3fb..a656d9c2ca2b 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1994,7 +1994,7 @@  void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
 static void
 engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 {
-	u8 mocs;
+	u8 mocs_w, mocs_r;
 
 	/*
 	 * RING_CMD_CCTL are need to be programed to un-cached
@@ -2002,11 +2002,18 @@  engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 	 * Streamers on Gen12 onward platforms.
 	 */
 	if (GRAPHICS_VER(engine->i915) >= 12) {
-		mocs = engine->gt->mocs.uc_index;
+		if (HAS_L3_CCS_READ(engine->i915) &&
+		    engine->class == COMPUTE_CLASS)
+			mocs_r = engine->gt->mocs.wb_index;
+		else
+			mocs_r = engine->gt->mocs.uc_index;
+
+		mocs_w = engine->gt->mocs.uc_index;
+
 		wa_masked_field_set(wal,
 				    RING_CMD_CCTL(engine->mmio_base),
 				    CMD_CCTL_MOCS_MASK,
-				    CMD_CCTL_MOCS_OVERRIDE(mocs, mocs));
+				    CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r));
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2dddc27a1b0e..8c8e7308502b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1369,6 +1369,8 @@  IS_SUBPLATFORM(const struct drm_i915_private *i915,
 
 #define HAS_LSPCON(dev_priv) (IS_DISPLAY_VER(dev_priv, 9, 10))
 
+#define HAS_L3_CCS_READ(i915) (INTEL_INFO(i915)->has_l3_ccs_read)
+
 /* DPF == dynamic parity feature */
 #define HAS_L3_DPF(dev_priv) (INTEL_INFO(dev_priv)->has_l3_dpf)
 #define NUM_L3_SLICES(dev_priv) (IS_HSW_GT3(dev_priv) ? \
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 498708b33924..07722cdf63ac 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1076,7 +1076,8 @@  static const struct intel_device_info ats_m_info = {
 
 #define XE_HPC_FEATURES \
 	XE_HP_FEATURES, \
-	.dma_mask_size = 52
+	.dma_mask_size = 52, \
+	.has_l3_ccs_read = 1
 
 __maybe_unused
 static const struct intel_device_info pvc_info = {
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index e7d2cf7d65c8..09e33296157a 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -150,6 +150,7 @@  enum intel_ppgtt_type {
 	func(has_heci_pxp); \
 	func(has_heci_gscfi); \
 	func(has_guc_deprivilege); \
+	func(has_l3_ccs_read); \
 	func(has_l3_dpf); \
 	func(has_llc); \
 	func(has_logical_ring_contexts); \