diff mbox series

[v7,3/4] drm/i915/icl: Define MOCS table for Icelake

Message ID 20181214182021.10483-4-lucas.demarchi@intel.com (mailing list archive)
State New, archived
Headers show
Series Define MOCS table for Icelake | expand

Commit Message

Lucas De Marchi Dec. 14, 2018, 6:20 p.m. UTC
From: Tomasz Lis <tomasz.lis@intel.com>

The table has been unified across OSes to minimize virtualization overhead.

The MOCS table is now published as part of bspec, and versioned. Entries
are supposed to never be modified, but new ones can be added. Adding
entries increases table version. The patch includes version 1 entries.

Meaning of each entry is now explained in bspec, and user mode clients
are expected to know what each entry means. The 3 entries used for previous
platforms are still compatible with their legacy definitions, but that is
not guaranteed to be true for future platforms.

v2: Fixed SCC values, improved commit comment (Daniele)
v3: Improved MOCS table comment (Daniele)
v4: Moved new entries below gen9 ones. Put common entries into
    definition to be used in multiple arrays. (Lucas)
v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
    to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
v6: Removed definitions of reserved entries. (Michal)
    Increased limit of entries sent to the hardware on gen11+.
v7: Simplify table as done for previou gens (Lucas)

BSpec: 34007
BSpec: 560

Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++----
 1 file changed, 162 insertions(+), 25 deletions(-)

Comments

Tvrtko Ursulin Dec. 21, 2018, 12:29 p.m. UTC | #1
On 14/12/2018 18:20, Lucas De Marchi wrote:
> From: Tomasz Lis <tomasz.lis@intel.com>
> 
> The table has been unified across OSes to minimize virtualization overhead.
> 
> The MOCS table is now published as part of bspec, and versioned. Entries
> are supposed to never be modified, but new ones can be added. Adding
> entries increases table version. The patch includes version 1 entries.
> 
> Meaning of each entry is now explained in bspec, and user mode clients
> are expected to know what each entry means. The 3 entries used for previous
> platforms are still compatible with their legacy definitions, but that is
> not guaranteed to be true for future platforms.
> 
> v2: Fixed SCC values, improved commit comment (Daniele)
> v3: Improved MOCS table comment (Daniele)
> v4: Moved new entries below gen9 ones. Put common entries into
>      definition to be used in multiple arrays. (Lucas)
> v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
>      to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
> v6: Removed definitions of reserved entries. (Michal)
>      Increased limit of entries sent to the hardware on gen11+.
> v7: Simplify table as done for previou gens (Lucas)
> 
> BSpec: 34007
> BSpec: 560
> 
> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++----
>   1 file changed, 162 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> index 577633cefb8a..dfc4edea020f 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/intel_mocs.c
> @@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
>   #define LE_SCC(value)		((value) << 8)
>   #define LE_PFM(value)		((value) << 11)
>   #define LE_SCF(value)		((value) << 14)
> +#define LE_COS(value)		((value) << 15)
> +#define LE_SSE(value)		((value) << 17)
>   
>   /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
>   #define L3_ESC(value)		((value) << 0)
> @@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
>   
>   /* Helper defines */
>   #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
> +#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
> +
> +#define NUM_MOCS_ENTRIES(i915) \
> +	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
>   
>   /* (e)LLC caching options */
>   #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
> @@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
>    * LNCFCMOCS0 - LNCFCMOCS32 registers.
>    *
>    * These tables are intended to be kept reasonably consistent across
> - * platforms. However some of the fields are not applicable to all of
> - * them.
> + * HW platforms, and for ICL+, be identical across OSes. To achieve
> + * that, for Icelake and above, list of entries is published as part
> + * of bspec.
>    *
>    * Entries not part of the following tables are undefined as far as
> - * userspace is concerned and shouldn't be relied upon.  For the time
> - * being they will be implicitly initialized to the strictest caching
> - * configuration (uncached) to guarantee forwards compatibility with
> - * userspace programs written against more recent kernels providing
> - * additional MOCS entries.
> + * userspace is concerned and shouldn't be relied upon.
> + *
> + * The last two entries are reserved by the hardware. For ICL+ they
> + * should be initialized according to bspec and never used, for older
> + * platforms they should never be written to.
>    *
> - * NOTE: These tables MUST start with being uncached and the length
> - *       MUST be less than 63 as the last two registers are reserved
> - *       by the hardware.  These tables are part of the kernel ABI and
> - *       may only be updated incrementally by adding entries at the
> - *       end.
> + * NOTE: These tables are part of bspec and defined as part of hardware
> + *       interface for ICL+. For older platforms, they are part of kernel
> + *       ABI. It is expected that existing entries will remain constant
> + *       and the tables will only be updated by adding new entries.
>    */
>   
>   #define GEN9_MOCS_ENTRIES \
> @@ -132,6 +138,132 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>   	},
>   };
>   
> +#define GEN11_MOCS_ENTRIES \
> +	[0] = { \
> +		/* Base - Uncached (Deprecated) */ \
> +		.control_value = LE_1_UC | LE_TC_1_LLC, \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[1] = { \
> +		/* Base - L3 + LeCC:PAT (Deprecated) */ \
> +		.control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[2] = { \
> +		/* Base - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[3] = { \
> +		/* Base - Uncached */ \
> +		.control_value = LE_1_UC | LE_TC_1_LLC, \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[4] = { \
> +		/* Base - L3 */ \
> +		.control_value = LE_1_UC | LE_TC_1_LLC, \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[5] = { \
> +		/* Base - LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[6] = { \
> +		/* Age 0 - LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[7] = { \
> +		/* Age 0 - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[8] = { \
> +		/* Age: Don't Chg. - LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[9] = { \
> +		/* Age: Don't Chg. - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[10] = { \
> +		/* No AOM - LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[11] = { \
> +		/* No AOM - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[12] = { \
> +		/* No AOM; Age 0 - LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[13] = { \
> +		/* No AOM; Age 0 - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[14] = { \
> +		/* No AOM; Age:DC - LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[15] = { \
> +		/* No AOM; Age:DC - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[18] = { \
> +		/* Self-Snoop - L3 + LLC */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[19] = { \
> +		/* Skip Caching - L3 + LLC(12.5%) */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[20] = { \
> +		/* Skip Caching - L3 + LLC(25%) */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[21] = { \
> +		/* Skip Caching - L3 + LLC(50%) */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[22] = { \
> +		/* Skip Caching - L3 + LLC(75%) */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
> +		.l3cc_value = L3_3_WB \
> +	}, \
> +	[23] = { \
> +		/* Skip Caching - L3 + LLC(87.5%) */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
> +		.l3cc_value = L3_3_WB \
> +	}, \

There is a hole of unused entries here which I think comes to play from 
the emit functions later.

> +	[62] = { \
> +		/* HW Reserved - SW program but never use */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> +		.l3cc_value = L3_1_UC \
> +	}, \
> +	[63] = { \
> +		/* HW Reserved - SW program but never use */ \
> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> +		.l3cc_value = L3_1_UC \
> +	},
> +
> +static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
> +	GEN11_MOCS_ENTRIES
> +};
> +
>   /**
>    * get_mocs_settings()
>    * @dev_priv:	i915 device.
> @@ -149,8 +281,11 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
>   {
>   	bool result = false;
>   
> -	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
> -	    IS_ICELAKE(dev_priv)) {
> +	if (IS_ICELAKE(dev_priv)) {
> +		table->size  = ARRAY_SIZE(icelake_mocs_table);

So effectively this is always 64 due last two reserved entries.

> +		table->table = icelake_mocs_table;
> +		result = true;
> +	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>   		table->size  = ARRAY_SIZE(skylake_mocs_table);
>   		table->table = skylake_mocs_table;
>   		result = true;
> @@ -213,7 +348,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>   	if (!get_mocs_settings(dev_priv, &table))
>   		return;
>   
> -	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
> +	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
>   
>   	for (index = 0; index < table.size; index++)
>   		I915_WRITE(mocs_register(engine->id, index),
> @@ -227,7 +362,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>   	 * Entry 0 in the table is uncached - so we are just writing
>   	 * that value to all the used entries.
>   	 */
> -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
> +	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
>   		I915_WRITE(mocs_register(engine->id, index),
>   			   table.table[0].control_value);
>   }
> @@ -245,18 +380,19 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>   static int emit_mocs_control_table(struct i915_request *rq,
>   				   const struct drm_i915_mocs_table *table)
>   {
> +	struct drm_i915_private *i915 = rq->i915;
>   	enum intel_engine_id engine = rq->engine->id;
>   	unsigned int index;
>   	u32 *cs;
>   
> -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
> +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>   		return -ENODEV;
>   
> -	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
> +	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);
>   
> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
> +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
>   
>   	for (index = 0; index < table->size; index++) {
>   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> @@ -271,7 +407,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
>   	 * Entry 0 in the table is uncached - so we are just writing
>   	 * that value to all the used entries.
>   	 */
> -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
> +	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
>   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>   		*cs++ = table->table[0].control_value;

So in init and emit functions the behaviour is now different due 
reserved entries.

Where before (and according the the comment which this patch removes - 
why?) we were setting unused entries to uncached, on Icelake they will 
be set to whatever all zeros is - LE_PAGETABLE?

If that is not desired, we may need to transition to a scheme where 
struct drm_i915_mocs_entry starts holding the table index, and the 
static array is not indexed by it.

It would also save the unused hole of zeros on Icelake which would 
probably offset the extra used space.

So I think someone needs to clarify what is the desired state for unused 
entries on Icelake.

Regards,

Tvrtko

>   	}
> @@ -304,17 +440,18 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
>   static int emit_mocs_l3cc_table(struct i915_request *rq,
>   				const struct drm_i915_mocs_table *table)
>   {
> +	struct drm_i915_private *i915 = rq->i915;
>   	unsigned int i;
>   	u32 *cs;
>   
> -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
> +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>   		return -ENODEV;
>   
> -	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
> +	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
>   	if (IS_ERR(cs))
>   		return PTR_ERR(cs);
>   
> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
> +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
>   
>   	for (i = 0; i < table->size/2; i++) {
>   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
> @@ -333,7 +470,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
>   	 * this will be uncached. Leave the last pair uninitialised as
>   	 * they are reserved by the hardware.
>   	 */
> -	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
> +	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
>   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>   		*cs++ = l3cc_combine(table, 0, 0);
>   	}
> @@ -380,7 +517,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
>   	 * this will be uncached. Leave the last pair as initialised as
>   	 * they are reserved by the hardware.
>   	 */
> -	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
> +	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
>   		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
>   }
>   
>
Lis, Tomasz Dec. 21, 2018, 6:05 p.m. UTC | #2
On 2018-12-21 13:29, Tvrtko Ursulin wrote:
>
> On 14/12/2018 18:20, Lucas De Marchi wrote:
>> From: Tomasz Lis <tomasz.lis@intel.com>
>>
>> The table has been unified across OSes to minimize virtualization 
>> overhead.
>>
>> The MOCS table is now published as part of bspec, and versioned. Entries
>> are supposed to never be modified, but new ones can be added. Adding
>> entries increases table version. The patch includes version 1 entries.
>>
>> Meaning of each entry is now explained in bspec, and user mode clients
>> are expected to know what each entry means. The 3 entries used for 
>> previous
>> platforms are still compatible with their legacy definitions, but 
>> that is
>> not guaranteed to be true for future platforms.
>>
>> v2: Fixed SCC values, improved commit comment (Daniele)
>> v3: Improved MOCS table comment (Daniele)
>> v4: Moved new entries below gen9 ones. Put common entries into
>>      definition to be used in multiple arrays. (Lucas)
>> v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
>>      to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
>> v6: Removed definitions of reserved entries. (Michal)
>>      Increased limit of entries sent to the hardware on gen11+.
>> v7: Simplify table as done for previou gens (Lucas)
>>
>> BSpec: 34007
>> BSpec: 560
>>
>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
>> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++----
>>   1 file changed, 162 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
>> b/drivers/gpu/drm/i915/intel_mocs.c
>> index 577633cefb8a..dfc4edea020f 100644
>> --- a/drivers/gpu/drm/i915/intel_mocs.c
>> +++ b/drivers/gpu/drm/i915/intel_mocs.c
>> @@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
>>   #define LE_SCC(value)        ((value) << 8)
>>   #define LE_PFM(value)        ((value) << 11)
>>   #define LE_SCF(value)        ((value) << 14)
>> +#define LE_COS(value)        ((value) << 15)
>> +#define LE_SSE(value)        ((value) << 17)
>>     /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries 
>> per word */
>>   #define L3_ESC(value)        ((value) << 0)
>> @@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
>>     /* Helper defines */
>>   #define GEN9_NUM_MOCS_ENTRIES    62  /* 62 out of 64 - 63 & 64 are 
>> reserved. */
>> +#define GEN11_NUM_MOCS_ENTRIES    64  /* 63-64 are reserved, but 
>> configured. */
>> +
>> +#define NUM_MOCS_ENTRIES(i915) \
>> +    (INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : 
>> GEN11_NUM_MOCS_ENTRIES)
>>     /* (e)LLC caching options */
>>   #define LE_0_PAGETABLE        _LE_CACHEABILITY(0)
>> @@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
>>    * LNCFCMOCS0 - LNCFCMOCS32 registers.
>>    *
>>    * These tables are intended to be kept reasonably consistent across
>> - * platforms. However some of the fields are not applicable to all of
>> - * them.
>> + * HW platforms, and for ICL+, be identical across OSes. To achieve
>> + * that, for Icelake and above, list of entries is published as part
>> + * of bspec.
>>    *
>>    * Entries not part of the following tables are undefined as far as
>> - * userspace is concerned and shouldn't be relied upon.  For the time
>> - * being they will be implicitly initialized to the strictest caching
>> - * configuration (uncached) to guarantee forwards compatibility with
>> - * userspace programs written against more recent kernels providing
>> - * additional MOCS entries.
>> + * userspace is concerned and shouldn't be relied upon.
>> + *
>> + * The last two entries are reserved by the hardware. For ICL+ they
>> + * should be initialized according to bspec and never used, for older
>> + * platforms they should never be written to.
>>    *
>> - * NOTE: These tables MUST start with being uncached and the length
>> - *       MUST be less than 63 as the last two registers are reserved
>> - *       by the hardware.  These tables are part of the kernel ABI and
>> - *       may only be updated incrementally by adding entries at the
>> - *       end.
>> + * NOTE: These tables are part of bspec and defined as part of hardware
>> + *       interface for ICL+. For older platforms, they are part of 
>> kernel
>> + *       ABI. It is expected that existing entries will remain constant
>> + *       and the tables will only be updated by adding new entries.
>>    */
>>     #define GEN9_MOCS_ENTRIES \
>> @@ -132,6 +138,132 @@ static const struct drm_i915_mocs_entry 
>> broxton_mocs_table[] = {
>>       },
>>   };
>>   +#define GEN11_MOCS_ENTRIES \
>> +    [0] = { \
>> +        /* Base - Uncached (Deprecated) */ \
>> +        .control_value = LE_1_UC | LE_TC_1_LLC, \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [1] = { \
>> +        /* Base - L3 + LeCC:PAT (Deprecated) */ \
>> +        .control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [2] = { \
>> +        /* Base - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [3] = { \
>> +        /* Base - Uncached */ \
>> +        .control_value = LE_1_UC | LE_TC_1_LLC, \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [4] = { \
>> +        /* Base - L3 */ \
>> +        .control_value = LE_1_UC | LE_TC_1_LLC, \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [5] = { \
>> +        /* Base - LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [6] = { \
>> +        /* Age 0 - LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [7] = { \
>> +        /* Age 0 - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [8] = { \
>> +        /* Age: Don't Chg. - LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [9] = { \
>> +        /* Age: Don't Chg. - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [10] = { \
>> +        /* No AOM - LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_AOM(1), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [11] = { \
>> +        /* No AOM - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_AOM(1), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [12] = { \
>> +        /* No AOM; Age 0 - LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | 
>> LE_AOM(1), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [13] = { \
>> +        /* No AOM; Age 0 - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | 
>> LE_AOM(1), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [14] = { \
>> +        /* No AOM; Age:DC - LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | 
>> LE_AOM(1), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [15] = { \
>> +        /* No AOM; Age:DC - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | 
>> LE_AOM(1), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [18] = { \
>> +        /* Self-Snoop - L3 + LLC */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_SSE(3), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [19] = { \
>> +        /* Skip Caching - L3 + LLC(12.5%) */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_SCC(7), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [20] = { \
>> +        /* Skip Caching - L3 + LLC(25%) */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_SCC(3), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [21] = { \
>> +        /* Skip Caching - L3 + LLC(50%) */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_SCC(1), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [22] = { \
>> +        /* Skip Caching - L3 + LLC(75%) */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_RSC(1) | LE_SCC(3), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>> +    [23] = { \
>> +        /* Skip Caching - L3 + LLC(87.5%) */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>> LE_RSC(1) | LE_SCC(7), \
>> +        .l3cc_value = L3_3_WB \
>> +    }, \
>
> There is a hole of unused entries here which I think comes to play 
> from the emit functions later.
>
>> +    [62] = { \
>> +        /* HW Reserved - SW program but never use */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>> +        .l3cc_value = L3_1_UC \
>> +    }, \
>> +    [63] = { \
>> +        /* HW Reserved - SW program but never use */ \
>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>> +        .l3cc_value = L3_1_UC \
>> +    },
>> +
>> +static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
>> +    GEN11_MOCS_ENTRIES
>> +};
>> +
>>   /**
>>    * get_mocs_settings()
>>    * @dev_priv:    i915 device.
>> @@ -149,8 +281,11 @@ static bool get_mocs_settings(struct 
>> drm_i915_private *dev_priv,
>>   {
>>       bool result = false;
>>   -    if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
>> -        IS_ICELAKE(dev_priv)) {
>> +    if (IS_ICELAKE(dev_priv)) {
>> +        table->size  = ARRAY_SIZE(icelake_mocs_table);
>
> So effectively this is always 64 due last two reserved entries.
>
>> +        table->table = icelake_mocs_table;
>> +        result = true;
>> +    } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>>           table->size  = ARRAY_SIZE(skylake_mocs_table);
>>           table->table = skylake_mocs_table;
>>           result = true;
>> @@ -213,7 +348,7 @@ void intel_mocs_init_engine(struct 
>> intel_engine_cs *engine)
>>       if (!get_mocs_settings(dev_priv, &table))
>>           return;
>>   -    GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
>> +    GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
>>         for (index = 0; index < table.size; index++)
>>           I915_WRITE(mocs_register(engine->id, index),
>> @@ -227,7 +362,7 @@ void intel_mocs_init_engine(struct 
>> intel_engine_cs *engine)
>>        * Entry 0 in the table is uncached - so we are just writing
>>        * that value to all the used entries.
>>        */
>> -    for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
>> +    for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
>>           I915_WRITE(mocs_register(engine->id, index),
>>                  table.table[0].control_value);
>>   }
>> @@ -245,18 +380,19 @@ void intel_mocs_init_engine(struct 
>> intel_engine_cs *engine)
>>   static int emit_mocs_control_table(struct i915_request *rq,
>>                      const struct drm_i915_mocs_table *table)
>>   {
>> +    struct drm_i915_private *i915 = rq->i915;
>>       enum intel_engine_id engine = rq->engine->id;
>>       unsigned int index;
>>       u32 *cs;
>>   -    if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
>> +    if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>           return -ENODEV;
>>   -    cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
>> +    cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
>>       if (IS_ERR(cs))
>>           return PTR_ERR(cs);
>>   -    *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
>> +    *cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
>>         for (index = 0; index < table->size; index++) {
>>           *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>> @@ -271,7 +407,7 @@ static int emit_mocs_control_table(struct 
>> i915_request *rq,
>>        * Entry 0 in the table is uncached - so we are just writing
>>        * that value to all the used entries.
>>        */
>> -    for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
>> +    for (; index < NUM_MOCS_ENTRIES(i915); index++) {
>>           *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>           *cs++ = table->table[0].control_value;
>
> So in init and emit functions the behaviour is now different due 
> reserved entries.
>
> Where before (and according the the comment which this patch removes - 
> why?) we were setting unused entries to uncached, on Icelake they will 
> be set to whatever all zeros is - LE_PAGETABLE?
>
> If that is not desired, we may need to transition to a scheme where 
> struct drm_i915_mocs_entry starts holding the table index, and the 
> static array is not indexed by it.
>
> It would also save the unused hole of zeros on Icelake which would 
> probably offset the extra used space.
>
> So I think someone needs to clarify what is the desired state for 
> unused entries on Icelake.
>
I don't think we have reason to care for the undefined entries. We do 
not give any promises regarding these entries. Any way we use to fill 
them will do, and we have no obligation not to change it later, in case 
a need emerges.

Since each entry is just 2 ints, adding index does not seem justified 
for me. We could add it as u16 next to the existing u16, and therefore 
not increase drm_i915_mocs_entry size at all (just use the bytes in 
padding); but having indexes also increases code complexity.

My opinion here is not really strong though, both indexed and 
non-indexed way will work for me.
-Tomasz

> Regards,
>
> Tvrtko
>
>>       }
>> @@ -304,17 +440,18 @@ static inline u32 l3cc_combine(const struct 
>> drm_i915_mocs_table *table,
>>   static int emit_mocs_l3cc_table(struct i915_request *rq,
>>                   const struct drm_i915_mocs_table *table)
>>   {
>> +    struct drm_i915_private *i915 = rq->i915;
>>       unsigned int i;
>>       u32 *cs;
>>   -    if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
>> +    if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>           return -ENODEV;
>>   -    cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
>> +    cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
>>       if (IS_ERR(cs))
>>           return PTR_ERR(cs);
>>   -    *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
>> +    *cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
>>         for (i = 0; i < table->size/2; i++) {
>>           *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>> @@ -333,7 +470,7 @@ static int emit_mocs_l3cc_table(struct 
>> i915_request *rq,
>>        * this will be uncached. Leave the last pair uninitialised as
>>        * they are reserved by the hardware.
>>        */
>> -    for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
>> +    for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
>>           *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>           *cs++ = l3cc_combine(table, 0, 0);
>>       }
>> @@ -380,7 +517,7 @@ void intel_mocs_init_l3cc_table(struct 
>> drm_i915_private *dev_priv)
>>        * this will be uncached. Leave the last pair as initialised as
>>        * they are reserved by the hardware.
>>        */
>> -    for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
>> +    for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
>>           I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
>>   }
>>
Tvrtko Ursulin Dec. 31, 2018, 8:59 a.m. UTC | #3
On 21/12/2018 18:05, Lis, Tomasz wrote:
> 
> 
> On 2018-12-21 13:29, Tvrtko Ursulin wrote:
>>
>> On 14/12/2018 18:20, Lucas De Marchi wrote:
>>> From: Tomasz Lis <tomasz.lis@intel.com>
>>>
>>> The table has been unified across OSes to minimize virtualization 
>>> overhead.
>>>
>>> The MOCS table is now published as part of bspec, and versioned. Entries
>>> are supposed to never be modified, but new ones can be added. Adding
>>> entries increases table version. The patch includes version 1 entries.
>>>
>>> Meaning of each entry is now explained in bspec, and user mode clients
>>> are expected to know what each entry means. The 3 entries used for 
>>> previous
>>> platforms are still compatible with their legacy definitions, but 
>>> that is
>>> not guaranteed to be true for future platforms.
>>>
>>> v2: Fixed SCC values, improved commit comment (Daniele)
>>> v3: Improved MOCS table comment (Daniele)
>>> v4: Moved new entries below gen9 ones. Put common entries into
>>>      definition to be used in multiple arrays. (Lucas)
>>> v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
>>>      to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
>>> v6: Removed definitions of reserved entries. (Michal)
>>>      Increased limit of entries sent to the hardware on gen11+.
>>> v7: Simplify table as done for previou gens (Lucas)
>>>
>>> BSpec: 34007
>>> BSpec: 560
>>>
>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
>>> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++----
>>>   1 file changed, 162 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_mocs.c 
>>> b/drivers/gpu/drm/i915/intel_mocs.c
>>> index 577633cefb8a..dfc4edea020f 100644
>>> --- a/drivers/gpu/drm/i915/intel_mocs.c
>>> +++ b/drivers/gpu/drm/i915/intel_mocs.c
>>> @@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
>>>   #define LE_SCC(value)        ((value) << 8)
>>>   #define LE_PFM(value)        ((value) << 11)
>>>   #define LE_SCF(value)        ((value) << 14)
>>> +#define LE_COS(value)        ((value) << 15)
>>> +#define LE_SSE(value)        ((value) << 17)
>>>     /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries 
>>> per word */
>>>   #define L3_ESC(value)        ((value) << 0)
>>> @@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
>>>     /* Helper defines */
>>>   #define GEN9_NUM_MOCS_ENTRIES    62  /* 62 out of 64 - 63 & 64 are 
>>> reserved. */
>>> +#define GEN11_NUM_MOCS_ENTRIES    64  /* 63-64 are reserved, but 
>>> configured. */
>>> +
>>> +#define NUM_MOCS_ENTRIES(i915) \
>>> +    (INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : 
>>> GEN11_NUM_MOCS_ENTRIES)
>>>     /* (e)LLC caching options */
>>>   #define LE_0_PAGETABLE        _LE_CACHEABILITY(0)
>>> @@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
>>>    * LNCFCMOCS0 - LNCFCMOCS32 registers.
>>>    *
>>>    * These tables are intended to be kept reasonably consistent across
>>> - * platforms. However some of the fields are not applicable to all of
>>> - * them.
>>> + * HW platforms, and for ICL+, be identical across OSes. To achieve
>>> + * that, for Icelake and above, list of entries is published as part
>>> + * of bspec.
>>>    *
>>>    * Entries not part of the following tables are undefined as far as
>>> - * userspace is concerned and shouldn't be relied upon.  For the time
>>> - * being they will be implicitly initialized to the strictest caching
>>> - * configuration (uncached) to guarantee forwards compatibility with
>>> - * userspace programs written against more recent kernels providing
>>> - * additional MOCS entries.
>>> + * userspace is concerned and shouldn't be relied upon.
>>> + *
>>> + * The last two entries are reserved by the hardware. For ICL+ they
>>> + * should be initialized according to bspec and never used, for older
>>> + * platforms they should never be written to.
>>>    *
>>> - * NOTE: These tables MUST start with being uncached and the length
>>> - *       MUST be less than 63 as the last two registers are reserved
>>> - *       by the hardware.  These tables are part of the kernel ABI and
>>> - *       may only be updated incrementally by adding entries at the
>>> - *       end.
>>> + * NOTE: These tables are part of bspec and defined as part of hardware
>>> + *       interface for ICL+. For older platforms, they are part of 
>>> kernel
>>> + *       ABI. It is expected that existing entries will remain constant
>>> + *       and the tables will only be updated by adding new entries.
>>>    */
>>>     #define GEN9_MOCS_ENTRIES \
>>> @@ -132,6 +138,132 @@ static const struct drm_i915_mocs_entry 
>>> broxton_mocs_table[] = {
>>>       },
>>>   };
>>>   +#define GEN11_MOCS_ENTRIES \
>>> +    [0] = { \
>>> +        /* Base - Uncached (Deprecated) */ \
>>> +        .control_value = LE_1_UC | LE_TC_1_LLC, \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [1] = { \
>>> +        /* Base - L3 + LeCC:PAT (Deprecated) */ \
>>> +        .control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [2] = { \
>>> +        /* Base - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [3] = { \
>>> +        /* Base - Uncached */ \
>>> +        .control_value = LE_1_UC | LE_TC_1_LLC, \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [4] = { \
>>> +        /* Base - L3 */ \
>>> +        .control_value = LE_1_UC | LE_TC_1_LLC, \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [5] = { \
>>> +        /* Base - LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [6] = { \
>>> +        /* Age 0 - LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [7] = { \
>>> +        /* Age 0 - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [8] = { \
>>> +        /* Age: Don't Chg. - LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [9] = { \
>>> +        /* Age: Don't Chg. - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [10] = { \
>>> +        /* No AOM - LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_AOM(1), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [11] = { \
>>> +        /* No AOM - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_AOM(1), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [12] = { \
>>> +        /* No AOM; Age 0 - LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | 
>>> LE_AOM(1), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [13] = { \
>>> +        /* No AOM; Age 0 - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | 
>>> LE_AOM(1), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [14] = { \
>>> +        /* No AOM; Age:DC - LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | 
>>> LE_AOM(1), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [15] = { \
>>> +        /* No AOM; Age:DC - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | 
>>> LE_AOM(1), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [18] = { \
>>> +        /* Self-Snoop - L3 + LLC */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_SSE(3), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [19] = { \
>>> +        /* Skip Caching - L3 + LLC(12.5%) */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_SCC(7), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [20] = { \
>>> +        /* Skip Caching - L3 + LLC(25%) */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_SCC(3), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [21] = { \
>>> +        /* Skip Caching - L3 + LLC(50%) */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_SCC(1), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [22] = { \
>>> +        /* Skip Caching - L3 + LLC(75%) */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_RSC(1) | LE_SCC(3), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>> +    [23] = { \
>>> +        /* Skip Caching - L3 + LLC(87.5%) */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | 
>>> LE_RSC(1) | LE_SCC(7), \
>>> +        .l3cc_value = L3_3_WB \
>>> +    }, \
>>
>> There is a hole of unused entries here which I think comes to play 
>> from the emit functions later.
>>
>>> +    [62] = { \
>>> +        /* HW Reserved - SW program but never use */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    }, \
>>> +    [63] = { \
>>> +        /* HW Reserved - SW program but never use */ \
>>> +        .control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +        .l3cc_value = L3_1_UC \
>>> +    },
>>> +
>>> +static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
>>> +    GEN11_MOCS_ENTRIES
>>> +};
>>> +
>>>   /**
>>>    * get_mocs_settings()
>>>    * @dev_priv:    i915 device.
>>> @@ -149,8 +281,11 @@ static bool get_mocs_settings(struct 
>>> drm_i915_private *dev_priv,
>>>   {
>>>       bool result = false;
>>>   -    if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
>>> -        IS_ICELAKE(dev_priv)) {
>>> +    if (IS_ICELAKE(dev_priv)) {
>>> +        table->size  = ARRAY_SIZE(icelake_mocs_table);
>>
>> So effectively this is always 64 due last two reserved entries.
>>
>>> +        table->table = icelake_mocs_table;
>>> +        result = true;
>>> +    } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>>>           table->size  = ARRAY_SIZE(skylake_mocs_table);
>>>           table->table = skylake_mocs_table;
>>>           result = true;
>>> @@ -213,7 +348,7 @@ void intel_mocs_init_engine(struct 
>>> intel_engine_cs *engine)
>>>       if (!get_mocs_settings(dev_priv, &table))
>>>           return;
>>>   -    GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
>>> +    GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
>>>         for (index = 0; index < table.size; index++)
>>>           I915_WRITE(mocs_register(engine->id, index),
>>> @@ -227,7 +362,7 @@ void intel_mocs_init_engine(struct 
>>> intel_engine_cs *engine)
>>>        * Entry 0 in the table is uncached - so we are just writing
>>>        * that value to all the used entries.
>>>        */
>>> -    for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
>>> +    for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
>>>           I915_WRITE(mocs_register(engine->id, index),
>>>                  table.table[0].control_value);
>>>   }
>>> @@ -245,18 +380,19 @@ void intel_mocs_init_engine(struct 
>>> intel_engine_cs *engine)
>>>   static int emit_mocs_control_table(struct i915_request *rq,
>>>                      const struct drm_i915_mocs_table *table)
>>>   {
>>> +    struct drm_i915_private *i915 = rq->i915;
>>>       enum intel_engine_id engine = rq->engine->id;
>>>       unsigned int index;
>>>       u32 *cs;
>>>   -    if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
>>> +    if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>>           return -ENODEV;
>>>   -    cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
>>> +    cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
>>>       if (IS_ERR(cs))
>>>           return PTR_ERR(cs);
>>>   -    *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
>>> +    *cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
>>>         for (index = 0; index < table->size; index++) {
>>>           *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>> @@ -271,7 +407,7 @@ static int emit_mocs_control_table(struct 
>>> i915_request *rq,
>>>        * Entry 0 in the table is uncached - so we are just writing
>>>        * that value to all the used entries.
>>>        */
>>> -    for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
>>> +    for (; index < NUM_MOCS_ENTRIES(i915); index++) {
>>>           *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>>           *cs++ = table->table[0].control_value;
>>
>> So in init and emit functions the behaviour is now different due 
>> reserved entries.
>>
>> Where before (and according the the comment which this patch removes - 
>> why?) we were setting unused entries to uncached, on Icelake they will 
>> be set to whatever all zeros is - LE_PAGETABLE?
>>
>> If that is not desired, we may need to transition to a scheme where 
>> struct drm_i915_mocs_entry starts holding the table index, and the 
>> static array is not indexed by it.
>>
>> It would also save the unused hole of zeros on Icelake which would 
>> probably offset the extra used space.
>>
>> So I think someone needs to clarify what is the desired state for 
>> unused entries on Icelake.
>>
> I don't think we have reason to care for the undefined entries. We do 
> not give any promises regarding these entries. Any way we use to fill 
> them will do, and we have no obligation not to change it later, in case 
> a need emerges.

Sounds reasonable to me - but can we put a note in the commit message? 
Otherwise the change is almost hidden with no justification/explanation 
either in the commit or comments.

> Since each entry is just 2 ints, adding index does not seem justified 
> for me. We could add it as u16 next to the existing u16, and therefore 
> not increase drm_i915_mocs_entry size at all (just use the bytes in 
> padding); but having indexes also increases code complexity.
> 
> My opinion here is not really strong though, both indexed and 
> non-indexed way will work for me.

Yes my bad, index solution does not work since code would need to track 
the used vs unused ones. Or we could have a bitmap.. anyways.. moot 
point if we are not going to do it.

Regards,

Tvrtko

> -Tomasz
> 
>> Regards,
>>
>> Tvrtko
>>
>>>       }
>>> @@ -304,17 +440,18 @@ static inline u32 l3cc_combine(const struct 
>>> drm_i915_mocs_table *table,
>>>   static int emit_mocs_l3cc_table(struct i915_request *rq,
>>>                   const struct drm_i915_mocs_table *table)
>>>   {
>>> +    struct drm_i915_private *i915 = rq->i915;
>>>       unsigned int i;
>>>       u32 *cs;
>>>   -    if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
>>> +    if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>>           return -ENODEV;
>>>   -    cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
>>> +    cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
>>>       if (IS_ERR(cs))
>>>           return PTR_ERR(cs);
>>>   -    *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
>>> +    *cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
>>>         for (i = 0; i < table->size/2; i++) {
>>>           *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>> @@ -333,7 +470,7 @@ static int emit_mocs_l3cc_table(struct 
>>> i915_request *rq,
>>>        * this will be uncached. Leave the last pair uninitialised as
>>>        * they are reserved by the hardware.
>>>        */
>>> -    for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
>>> +    for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
>>>           *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>>           *cs++ = l3cc_combine(table, 0, 0);
>>>       }
>>> @@ -380,7 +517,7 @@ void intel_mocs_init_l3cc_table(struct 
>>> drm_i915_private *dev_priv)
>>>        * this will be uncached. Leave the last pair as initialised as
>>>        * they are reserved by the hardware.
>>>        */
>>> -    for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
>>> +    for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
>>>           I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
>>>   }
>>>
> 
>
Lucas De Marchi Jan. 5, 2019, 1:33 a.m. UTC | #4
On Fri, Dec 21, 2018 at 12:29:41PM +0000, Tvrtko Ursulin wrote:
> 
> On 14/12/2018 18:20, Lucas De Marchi wrote:
> > From: Tomasz Lis <tomasz.lis@intel.com>
> > 
> > The table has been unified across OSes to minimize virtualization overhead.
> > 
> > The MOCS table is now published as part of bspec, and versioned. Entries
> > are supposed to never be modified, but new ones can be added. Adding
> > entries increases table version. The patch includes version 1 entries.
> > 
> > Meaning of each entry is now explained in bspec, and user mode clients
> > are expected to know what each entry means. The 3 entries used for previous
> > platforms are still compatible with their legacy definitions, but that is
> > not guaranteed to be true for future platforms.
> > 
> > v2: Fixed SCC values, improved commit comment (Daniele)
> > v3: Improved MOCS table comment (Daniele)
> > v4: Moved new entries below gen9 ones. Put common entries into
> >      definition to be used in multiple arrays. (Lucas)
> > v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
> >      to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
> > v6: Removed definitions of reserved entries. (Michal)
> >      Increased limit of entries sent to the hardware on gen11+.
> > v7: Simplify table as done for previou gens (Lucas)
> > 
> > BSpec: 34007
> > BSpec: 560
> > 
> > Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> > Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
> > Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> > ---
> >   drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++----
> >   1 file changed, 162 insertions(+), 25 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> > index 577633cefb8a..dfc4edea020f 100644
> > --- a/drivers/gpu/drm/i915/intel_mocs.c
> > +++ b/drivers/gpu/drm/i915/intel_mocs.c
> > @@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
> >   #define LE_SCC(value)		((value) << 8)
> >   #define LE_PFM(value)		((value) << 11)
> >   #define LE_SCF(value)		((value) << 14)
> > +#define LE_COS(value)		((value) << 15)
> > +#define LE_SSE(value)		((value) << 17)
> >   /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
> >   #define L3_ESC(value)		((value) << 0)
> > @@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
> >   /* Helper defines */
> >   #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
> > +#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
> > +
> > +#define NUM_MOCS_ENTRIES(i915) \
> > +	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
> >   /* (e)LLC caching options */
> >   #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
> > @@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
> >    * LNCFCMOCS0 - LNCFCMOCS32 registers.
> >    *
> >    * These tables are intended to be kept reasonably consistent across
> > - * platforms. However some of the fields are not applicable to all of
> > - * them.
> > + * HW platforms, and for ICL+, be identical across OSes. To achieve
> > + * that, for Icelake and above, list of entries is published as part
> > + * of bspec.
> >    *
> >    * Entries not part of the following tables are undefined as far as
> > - * userspace is concerned and shouldn't be relied upon.  For the time
> > - * being they will be implicitly initialized to the strictest caching
> > - * configuration (uncached) to guarantee forwards compatibility with
> > - * userspace programs written against more recent kernels providing
> > - * additional MOCS entries.
> > + * userspace is concerned and shouldn't be relied upon.
> > + *
> > + * The last two entries are reserved by the hardware. For ICL+ they
> > + * should be initialized according to bspec and never used, for older
> > + * platforms they should never be written to.
> >    *
> > - * NOTE: These tables MUST start with being uncached and the length
> > - *       MUST be less than 63 as the last two registers are reserved
> > - *       by the hardware.  These tables are part of the kernel ABI and
> > - *       may only be updated incrementally by adding entries at the
> > - *       end.
> > + * NOTE: These tables are part of bspec and defined as part of hardware
> > + *       interface for ICL+. For older platforms, they are part of kernel
> > + *       ABI. It is expected that existing entries will remain constant
> > + *       and the tables will only be updated by adding new entries.
> >    */
> >   #define GEN9_MOCS_ENTRIES \
> > @@ -132,6 +138,132 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
> >   	},
> >   };
> > +#define GEN11_MOCS_ENTRIES \
> > +	[0] = { \
> > +		/* Base - Uncached (Deprecated) */ \
> > +		.control_value = LE_1_UC | LE_TC_1_LLC, \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[1] = { \
> > +		/* Base - L3 + LeCC:PAT (Deprecated) */ \
> > +		.control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[2] = { \
> > +		/* Base - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[3] = { \
> > +		/* Base - Uncached */ \
> > +		.control_value = LE_1_UC | LE_TC_1_LLC, \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[4] = { \
> > +		/* Base - L3 */ \
> > +		.control_value = LE_1_UC | LE_TC_1_LLC, \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[5] = { \
> > +		/* Base - LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[6] = { \
> > +		/* Age 0 - LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[7] = { \
> > +		/* Age 0 - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[8] = { \
> > +		/* Age: Don't Chg. - LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[9] = { \
> > +		/* Age: Don't Chg. - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[10] = { \
> > +		/* No AOM - LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[11] = { \
> > +		/* No AOM - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[12] = { \
> > +		/* No AOM; Age 0 - LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[13] = { \
> > +		/* No AOM; Age 0 - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[14] = { \
> > +		/* No AOM; Age:DC - LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[15] = { \
> > +		/* No AOM; Age:DC - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[18] = { \
> > +		/* Self-Snoop - L3 + LLC */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[19] = { \
> > +		/* Skip Caching - L3 + LLC(12.5%) */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[20] = { \
> > +		/* Skip Caching - L3 + LLC(25%) */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[21] = { \
> > +		/* Skip Caching - L3 + LLC(50%) */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[22] = { \
> > +		/* Skip Caching - L3 + LLC(75%) */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> > +	[23] = { \
> > +		/* Skip Caching - L3 + LLC(87.5%) */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
> > +		.l3cc_value = L3_3_WB \
> > +	}, \
> 
> There is a hole of unused entries here which I think comes to play from the
> emit functions later.
> 
> > +	[62] = { \
> > +		/* HW Reserved - SW program but never use */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> > +		.l3cc_value = L3_1_UC \
> > +	}, \
> > +	[63] = { \
> > +		/* HW Reserved - SW program but never use */ \
> > +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
> > +		.l3cc_value = L3_1_UC \
> > +	},
> > +
> > +static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
> > +	GEN11_MOCS_ENTRIES
> > +};
> > +
> >   /**
> >    * get_mocs_settings()
> >    * @dev_priv:	i915 device.
> > @@ -149,8 +281,11 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
> >   {
> >   	bool result = false;
> > -	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
> > -	    IS_ICELAKE(dev_priv)) {
> > +	if (IS_ICELAKE(dev_priv)) {
> > +		table->size  = ARRAY_SIZE(icelake_mocs_table);
> 
> So effectively this is always 64 due last two reserved entries.

what do you mean by *always* ? It is 64 for icelake.

> 
> > +		table->table = icelake_mocs_table;
> > +		result = true;
> > +	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
> >   		table->size  = ARRAY_SIZE(skylake_mocs_table);
> >   		table->table = skylake_mocs_table;
> >   		result = true;
> > @@ -213,7 +348,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
> >   	if (!get_mocs_settings(dev_priv, &table))
> >   		return;
> > -	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
> > +	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
> >   	for (index = 0; index < table.size; index++)
> >   		I915_WRITE(mocs_register(engine->id, index),
> > @@ -227,7 +362,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
> >   	 * Entry 0 in the table is uncached - so we are just writing
> >   	 * that value to all the used entries.
> >   	 */
> > -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
> > +	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
> >   		I915_WRITE(mocs_register(engine->id, index),
> >   			   table.table[0].control_value);
> >   }
> > @@ -245,18 +380,19 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
> >   static int emit_mocs_control_table(struct i915_request *rq,
> >   				   const struct drm_i915_mocs_table *table)
> >   {
> > +	struct drm_i915_private *i915 = rq->i915;
> >   	enum intel_engine_id engine = rq->engine->id;
> >   	unsigned int index;
> >   	u32 *cs;
> > -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
> > +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
> >   		return -ENODEV;
> > -	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
> > +	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
> >   	if (IS_ERR(cs))
> >   		return PTR_ERR(cs);
> > -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
> > +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
> >   	for (index = 0; index < table->size; index++) {
> >   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> > @@ -271,7 +407,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
> >   	 * Entry 0 in the table is uncached - so we are just writing
> >   	 * that value to all the used entries.
> >   	 */
> > -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
> > +	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
> >   		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
> >   		*cs++ = table->table[0].control_value;
> 
> So in init and emit functions the behaviour is now different due reserved
> entries.

indeed

> 
> Where before (and according the the comment which this patch removes - why?)

humn... but the comment is still there. Are we talking about the same
comment?

	/*
	 * Ok, now set the unused entries to uncached. These entries
	 * are officially undefined and no contract for the contents
	 * and settings is given for these entries.
	 *
	 * Entry 0 in the table is uncached - so we are just writing
	 * that value to all the used entries.
	 */

> we were setting unused entries to uncached, on Icelake they will be set to
> whatever all zeros is - LE_PAGETABLE?
> 
> If that is not desired, we may need to transition to a scheme where struct
> drm_i915_mocs_entry starts holding the table index, and the static array is
> not indexed by it.

agreed

> 
> It would also save the unused hole of zeros on Icelake which would probably
> offset the extra used space.

well, but at the expense of the additional field on the used entries and
additional complexity in the code - we would lose the logic here to
override an entry unless we keep track what has already been set.


Lucas De Marchi

> 
> So I think someone needs to clarify what is the desired state for unused
> entries on Icelake.
> 
> Regards,
> 
> Tvrtko
> 
> >   	}
> > @@ -304,17 +440,18 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
> >   static int emit_mocs_l3cc_table(struct i915_request *rq,
> >   				const struct drm_i915_mocs_table *table)
> >   {
> > +	struct drm_i915_private *i915 = rq->i915;
> >   	unsigned int i;
> >   	u32 *cs;
> > -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
> > +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
> >   		return -ENODEV;
> > -	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
> > +	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
> >   	if (IS_ERR(cs))
> >   		return PTR_ERR(cs);
> > -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
> > +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
> >   	for (i = 0; i < table->size/2; i++) {
> >   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
> > @@ -333,7 +470,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
> >   	 * this will be uncached. Leave the last pair uninitialised as
> >   	 * they are reserved by the hardware.
> >   	 */
> > -	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
> > +	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
> >   		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
> >   		*cs++ = l3cc_combine(table, 0, 0);
> >   	}
> > @@ -380,7 +517,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
> >   	 * this will be uncached. Leave the last pair as initialised as
> >   	 * they are reserved by the hardware.
> >   	 */
> > -	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
> > +	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
> >   		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
> >   }
> >
Tvrtko Ursulin Jan. 7, 2019, 10:19 a.m. UTC | #5
On 05/01/2019 01:33, Lucas De Marchi wrote:
> On Fri, Dec 21, 2018 at 12:29:41PM +0000, Tvrtko Ursulin wrote:
>>
>> On 14/12/2018 18:20, Lucas De Marchi wrote:
>>> From: Tomasz Lis <tomasz.lis@intel.com>
>>>
>>> The table has been unified across OSes to minimize virtualization overhead.
>>>
>>> The MOCS table is now published as part of bspec, and versioned. Entries
>>> are supposed to never be modified, but new ones can be added. Adding
>>> entries increases table version. The patch includes version 1 entries.
>>>
>>> Meaning of each entry is now explained in bspec, and user mode clients
>>> are expected to know what each entry means. The 3 entries used for previous
>>> platforms are still compatible with their legacy definitions, but that is
>>> not guaranteed to be true for future platforms.
>>>
>>> v2: Fixed SCC values, improved commit comment (Daniele)
>>> v3: Improved MOCS table comment (Daniele)
>>> v4: Moved new entries below gen9 ones. Put common entries into
>>>       definition to be used in multiple arrays. (Lucas)
>>> v5: Made defines for or-ing flags. Renamed macros from MOCS_TABLE
>>>       to MOCS_ENTRIES. Switched LE_CoS to upper case. (Joonas)
>>> v6: Removed definitions of reserved entries. (Michal)
>>>       Increased limit of entries sent to the hardware on gen11+.
>>> v7: Simplify table as done for previou gens (Lucas)
>>>
>>> BSpec: 34007
>>> BSpec: 560
>>>
>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
>>> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/intel_mocs.c | 187 ++++++++++++++++++++++++++----
>>>    1 file changed, 162 insertions(+), 25 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
>>> index 577633cefb8a..dfc4edea020f 100644
>>> --- a/drivers/gpu/drm/i915/intel_mocs.c
>>> +++ b/drivers/gpu/drm/i915/intel_mocs.c
>>> @@ -44,6 +44,8 @@ struct drm_i915_mocs_table {
>>>    #define LE_SCC(value)		((value) << 8)
>>>    #define LE_PFM(value)		((value) << 11)
>>>    #define LE_SCF(value)		((value) << 14)
>>> +#define LE_COS(value)		((value) << 15)
>>> +#define LE_SSE(value)		((value) << 17)
>>>    /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
>>>    #define L3_ESC(value)		((value) << 0)
>>> @@ -52,6 +54,10 @@ struct drm_i915_mocs_table {
>>>    /* Helper defines */
>>>    #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
>>> +#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
>>> +
>>> +#define NUM_MOCS_ENTRIES(i915) \
>>> +	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
>>>    /* (e)LLC caching options */
>>>    #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
>>> @@ -80,21 +86,21 @@ struct drm_i915_mocs_table {
>>>     * LNCFCMOCS0 - LNCFCMOCS32 registers.
>>>     *
>>>     * These tables are intended to be kept reasonably consistent across
>>> - * platforms. However some of the fields are not applicable to all of
>>> - * them.
>>> + * HW platforms, and for ICL+, be identical across OSes. To achieve
>>> + * that, for Icelake and above, list of entries is published as part
>>> + * of bspec.
>>>     *
>>>     * Entries not part of the following tables are undefined as far as
>>> - * userspace is concerned and shouldn't be relied upon.  For the time
>>> - * being they will be implicitly initialized to the strictest caching
>>> - * configuration (uncached) to guarantee forwards compatibility with
>>> - * userspace programs written against more recent kernels providing
>>> - * additional MOCS entries.
>>> + * userspace is concerned and shouldn't be relied upon.
>>> + *
>>> + * The last two entries are reserved by the hardware. For ICL+ they
>>> + * should be initialized according to bspec and never used, for older
>>> + * platforms they should never be written to.
>>>     *
>>> - * NOTE: These tables MUST start with being uncached and the length
>>> - *       MUST be less than 63 as the last two registers are reserved
>>> - *       by the hardware.  These tables are part of the kernel ABI and
>>> - *       may only be updated incrementally by adding entries at the
>>> - *       end.
>>> + * NOTE: These tables are part of bspec and defined as part of hardware
>>> + *       interface for ICL+. For older platforms, they are part of kernel
>>> + *       ABI. It is expected that existing entries will remain constant
>>> + *       and the tables will only be updated by adding new entries.
>>>     */
>>>    #define GEN9_MOCS_ENTRIES \
>>> @@ -132,6 +138,132 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>>>    	},
>>>    };
>>> +#define GEN11_MOCS_ENTRIES \
>>> +	[0] = { \
>>> +		/* Base - Uncached (Deprecated) */ \
>>> +		.control_value = LE_1_UC | LE_TC_1_LLC, \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[1] = { \
>>> +		/* Base - L3 + LeCC:PAT (Deprecated) */ \
>>> +		.control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[2] = { \
>>> +		/* Base - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[3] = { \
>>> +		/* Base - Uncached */ \
>>> +		.control_value = LE_1_UC | LE_TC_1_LLC, \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[4] = { \
>>> +		/* Base - L3 */ \
>>> +		.control_value = LE_1_UC | LE_TC_1_LLC, \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[5] = { \
>>> +		/* Base - LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[6] = { \
>>> +		/* Age 0 - LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[7] = { \
>>> +		/* Age 0 - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[8] = { \
>>> +		/* Age: Don't Chg. - LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[9] = { \
>>> +		/* Age: Don't Chg. - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[10] = { \
>>> +		/* No AOM - LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[11] = { \
>>> +		/* No AOM - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[12] = { \
>>> +		/* No AOM; Age 0 - LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[13] = { \
>>> +		/* No AOM; Age 0 - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[14] = { \
>>> +		/* No AOM; Age:DC - LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[15] = { \
>>> +		/* No AOM; Age:DC - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[18] = { \
>>> +		/* Self-Snoop - L3 + LLC */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[19] = { \
>>> +		/* Skip Caching - L3 + LLC(12.5%) */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[20] = { \
>>> +		/* Skip Caching - L3 + LLC(25%) */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[21] = { \
>>> +		/* Skip Caching - L3 + LLC(50%) */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[22] = { \
>>> +		/* Skip Caching - L3 + LLC(75%) */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>> +	[23] = { \
>>> +		/* Skip Caching - L3 + LLC(87.5%) */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
>>> +		.l3cc_value = L3_3_WB \
>>> +	}, \
>>
>> There is a hole of unused entries here which I think comes to play from the
>> emit functions later.
>>
>>> +	[62] = { \
>>> +		/* HW Reserved - SW program but never use */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	}, \
>>> +	[63] = { \
>>> +		/* HW Reserved - SW program but never use */ \
>>> +		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
>>> +		.l3cc_value = L3_1_UC \
>>> +	},
>>> +
>>> +static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
>>> +	GEN11_MOCS_ENTRIES
>>> +};
>>> +
>>>    /**
>>>     * get_mocs_settings()
>>>     * @dev_priv:	i915 device.
>>> @@ -149,8 +281,11 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv,
>>>    {
>>>    	bool result = false;
>>> -	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
>>> -	    IS_ICELAKE(dev_priv)) {
>>> +	if (IS_ICELAKE(dev_priv)) {
>>> +		table->size  = ARRAY_SIZE(icelake_mocs_table);
>>
>> So effectively this is always 64 due last two reserved entries.
> 
> what do you mean by *always* ? It is 64 for icelake.

I think that meant I realized there is a lot of unused/initialized 
entries in the table for ICL.

> 
>>
>>> +		table->table = icelake_mocs_table;
>>> +		result = true;
>>> +	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
>>>    		table->size  = ARRAY_SIZE(skylake_mocs_table);
>>>    		table->table = skylake_mocs_table;
>>>    		result = true;
>>> @@ -213,7 +348,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>>    	if (!get_mocs_settings(dev_priv, &table))
>>>    		return;
>>> -	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
>>> +	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
>>>    	for (index = 0; index < table.size; index++)
>>>    		I915_WRITE(mocs_register(engine->id, index),
>>> @@ -227,7 +362,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>>    	 * Entry 0 in the table is uncached - so we are just writing
>>>    	 * that value to all the used entries.
>>>    	 */
>>> -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
>>> +	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
>>>    		I915_WRITE(mocs_register(engine->id, index),
>>>    			   table.table[0].control_value);
>>>    }
>>> @@ -245,18 +380,19 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
>>>    static int emit_mocs_control_table(struct i915_request *rq,
>>>    				   const struct drm_i915_mocs_table *table)
>>>    {
>>> +	struct drm_i915_private *i915 = rq->i915;
>>>    	enum intel_engine_id engine = rq->engine->id;
>>>    	unsigned int index;
>>>    	u32 *cs;
>>> -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
>>> +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>>    		return -ENODEV;
>>> -	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
>>> +	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
>>>    	if (IS_ERR(cs))
>>>    		return PTR_ERR(cs);
>>> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
>>> +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
>>>    	for (index = 0; index < table->size; index++) {
>>>    		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>> @@ -271,7 +407,7 @@ static int emit_mocs_control_table(struct i915_request *rq,
>>>    	 * Entry 0 in the table is uncached - so we are just writing
>>>    	 * that value to all the used entries.
>>>    	 */
>>> -	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
>>> +	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
>>>    		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
>>>    		*cs++ = table->table[0].control_value;
>>
>> So in init and emit functions the behaviour is now different due reserved
>> entries.
> 
> indeed
> 
>>
>> Where before (and according the the comment which this patch removes - why?)
> 
> humn... but the comment is still there. Are we talking about the same
> comment?
> 
> 	/*
> 	 * Ok, now set the unused entries to uncached. These entries
> 	 * are officially undefined and no contract for the contents
> 	 * and settings is given for these entries.
> 	 *
> 	 * Entry 0 in the table is uncached - so we are just writing
> 	 * that value to all the used entries.
> 	 */

I did not notice that comment. There is a comment at the top of this 
file which the patch removes/changes without any mention in the commit 
message of the change of behaviour:

"""
   * Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon.  For the time
- * being they will be implicitly initialized to the strictest caching
- * configuration (uncached) to guarantee forwards compatibility with
- * userspace programs written against more recent kernels providing
- * additional MOCS entries.
+ * userspace is concerned and shouldn't be relied upon.

"""

But this comment you mention is also now misleading - since on Icelake 
there will be no unused entries to be initialized to uncached, as far as 
intel_mocs_init_engine can see.

> 
>> we were setting unused entries to uncached, on Icelake they will be set to
>> whatever all zeros is - LE_PAGETABLE?
>>
>> If that is not desired, we may need to transition to a scheme where struct
>> drm_i915_mocs_entry starts holding the table index, and the static array is
>> not indexed by it.
> 
> agreed
> 
>>
>> It would also save the unused hole of zeros on Icelake which would probably
>> offset the extra used space.
> 
> well, but at the expense of the additional field on the used entries and
> additional complexity in the code - we would lose the logic here to
> override an entry unless we keep track what has already been set.

Yes agreed. If everyone is OK to have unused entries be different 
semantics per platforms (Icelake = LE_PAGETABLE, others = LE_UC) then as 
said before, lets just add the commentary to the commit message and fix 
up all the comments.

Regards,

Tvrtko

> 
> 
> Lucas De Marchi
> 
>>
>> So I think someone needs to clarify what is the desired state for unused
>> entries on Icelake.
>>
>> Regards,
>>
>> Tvrtko
>>
>>>    	}
>>> @@ -304,17 +440,18 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
>>>    static int emit_mocs_l3cc_table(struct i915_request *rq,
>>>    				const struct drm_i915_mocs_table *table)
>>>    {
>>> +	struct drm_i915_private *i915 = rq->i915;
>>>    	unsigned int i;
>>>    	u32 *cs;
>>> -	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
>>> +	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
>>>    		return -ENODEV;
>>> -	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
>>> +	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
>>>    	if (IS_ERR(cs))
>>>    		return PTR_ERR(cs);
>>> -	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
>>> +	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
>>>    	for (i = 0; i < table->size/2; i++) {
>>>    		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>> @@ -333,7 +470,7 @@ static int emit_mocs_l3cc_table(struct i915_request *rq,
>>>    	 * this will be uncached. Leave the last pair uninitialised as
>>>    	 * they are reserved by the hardware.
>>>    	 */
>>> -	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
>>> +	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
>>>    		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
>>>    		*cs++ = l3cc_combine(table, 0, 0);
>>>    	}
>>> @@ -380,7 +517,7 @@ void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
>>>    	 * this will be uncached. Leave the last pair as initialised as
>>>    	 * they are reserved by the hardware.
>>>    	 */
>>> -	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
>>> +	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
>>>    		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
>>>    }
>>>
>
Chris Wilson Jan. 7, 2019, 10:28 a.m. UTC | #6
Quoting Tvrtko Ursulin (2019-01-07 10:19:44)
> 
> On 05/01/2019 01:33, Lucas De Marchi wrote:
> > On Fri, Dec 21, 2018 at 12:29:41PM +0000, Tvrtko Ursulin wrote:
> >>
> >> On 14/12/2018 18:20, Lucas De Marchi wrote:
> >> we were setting unused entries to uncached, on Icelake they will be set to
> >> whatever all zeros is - LE_PAGETABLE?
> >>
> >> If that is not desired, we may need to transition to a scheme where struct
> >> drm_i915_mocs_entry starts holding the table index, and the static array is
> >> not indexed by it.
> > 
> > agreed
> > 
> >>
> >> It would also save the unused hole of zeros on Icelake which would probably
> >> offset the extra used space.
> > 
> > well, but at the expense of the additional field on the used entries and
> > additional complexity in the code - we would lose the logic here to
> > override an entry unless we keep track what has already been set.
> 
> Yes agreed. If everyone is OK to have unused entries be different 
> semantics per platforms (Icelake = LE_PAGETABLE, others = LE_UC) then as 
> said before, lets just add the commentary to the commit message and fix 
> up all the comments.

Or precede with the patch to make everyone use PTE for unknown (I've
argued that is better wrt to kernel domain tracking ;)
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 577633cefb8a..dfc4edea020f 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -44,6 +44,8 @@  struct drm_i915_mocs_table {
 #define LE_SCC(value)		((value) << 8)
 #define LE_PFM(value)		((value) << 11)
 #define LE_SCF(value)		((value) << 14)
+#define LE_COS(value)		((value) << 15)
+#define LE_SSE(value)		((value) << 17)
 
 /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */
 #define L3_ESC(value)		((value) << 0)
@@ -52,6 +54,10 @@  struct drm_i915_mocs_table {
 
 /* Helper defines */
 #define GEN9_NUM_MOCS_ENTRIES	62  /* 62 out of 64 - 63 & 64 are reserved. */
+#define GEN11_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
+
+#define NUM_MOCS_ENTRIES(i915) \
+	(INTEL_GEN(i915) < 11 ? GEN9_NUM_MOCS_ENTRIES : GEN11_NUM_MOCS_ENTRIES)
 
 /* (e)LLC caching options */
 #define LE_0_PAGETABLE		_LE_CACHEABILITY(0)
@@ -80,21 +86,21 @@  struct drm_i915_mocs_table {
  * LNCFCMOCS0 - LNCFCMOCS32 registers.
  *
  * These tables are intended to be kept reasonably consistent across
- * platforms. However some of the fields are not applicable to all of
- * them.
+ * HW platforms, and for ICL+, be identical across OSes. To achieve
+ * that, for Icelake and above, list of entries is published as part
+ * of bspec.
  *
  * Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon.  For the time
- * being they will be implicitly initialized to the strictest caching
- * configuration (uncached) to guarantee forwards compatibility with
- * userspace programs written against more recent kernels providing
- * additional MOCS entries.
+ * userspace is concerned and shouldn't be relied upon.
+ *
+ * The last two entries are reserved by the hardware. For ICL+ they
+ * should be initialized according to bspec and never used, for older
+ * platforms they should never be written to.
  *
- * NOTE: These tables MUST start with being uncached and the length
- *       MUST be less than 63 as the last two registers are reserved
- *       by the hardware.  These tables are part of the kernel ABI and
- *       may only be updated incrementally by adding entries at the
- *       end.
+ * NOTE: These tables are part of bspec and defined as part of hardware
+ *       interface for ICL+. For older platforms, they are part of kernel
+ *       ABI. It is expected that existing entries will remain constant
+ *       and the tables will only be updated by adding new entries.
  */
 
 #define GEN9_MOCS_ENTRIES \
@@ -132,6 +138,132 @@  static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 	},
 };
 
+#define GEN11_MOCS_ENTRIES \
+	[0] = { \
+		/* Base - Uncached (Deprecated) */ \
+		.control_value = LE_1_UC | LE_TC_1_LLC, \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[1] = { \
+		/* Base - L3 + LeCC:PAT (Deprecated) */ \
+		.control_value = LE_0_PAGETABLE | LE_TC_1_LLC, \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[2] = { \
+		/* Base - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[3] = { \
+		/* Base - Uncached */ \
+		.control_value = LE_1_UC | LE_TC_1_LLC, \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[4] = { \
+		/* Base - L3 */ \
+		.control_value = LE_1_UC | LE_TC_1_LLC, \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[5] = { \
+		/* Base - LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[6] = { \
+		/* Age 0 - LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[7] = { \
+		/* Age 0 - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[8] = { \
+		/* Age: Don't Chg. - LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[9] = { \
+		/* Age: Don't Chg. - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[10] = { \
+		/* No AOM - LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[11] = { \
+		/* No AOM - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_AOM(1), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[12] = { \
+		/* No AOM; Age 0 - LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[13] = { \
+		/* No AOM; Age 0 - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(1) | LE_AOM(1), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[14] = { \
+		/* No AOM; Age:DC - LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[15] = { \
+		/* No AOM; Age:DC - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[18] = { \
+		/* Self-Snoop - L3 + LLC */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[19] = { \
+		/* Skip Caching - L3 + LLC(12.5%) */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(7), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[20] = { \
+		/* Skip Caching - L3 + LLC(25%) */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(3), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[21] = { \
+		/* Skip Caching - L3 + LLC(50%) */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SCC(1), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[22] = { \
+		/* Skip Caching - L3 + LLC(75%) */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(3), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[23] = { \
+		/* Skip Caching - L3 + LLC(87.5%) */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_RSC(1) | LE_SCC(7), \
+		.l3cc_value = L3_3_WB \
+	}, \
+	[62] = { \
+		/* HW Reserved - SW program but never use */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		.l3cc_value = L3_1_UC \
+	}, \
+	[63] = { \
+		/* HW Reserved - SW program but never use */ \
+		.control_value = LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
+		.l3cc_value = L3_1_UC \
+	},
+
+static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+	GEN11_MOCS_ENTRIES
+};
+
 /**
  * get_mocs_settings()
  * @dev_priv:	i915 device.
@@ -149,8 +281,11 @@  static bool get_mocs_settings(struct drm_i915_private *dev_priv,
 {
 	bool result = false;
 
-	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
-	    IS_ICELAKE(dev_priv)) {
+	if (IS_ICELAKE(dev_priv)) {
+		table->size  = ARRAY_SIZE(icelake_mocs_table);
+		table->table = icelake_mocs_table;
+		result = true;
+	} else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
 		table->size  = ARRAY_SIZE(skylake_mocs_table);
 		table->table = skylake_mocs_table;
 		result = true;
@@ -213,7 +348,7 @@  void intel_mocs_init_engine(struct intel_engine_cs *engine)
 	if (!get_mocs_settings(dev_priv, &table))
 		return;
 
-	GEM_BUG_ON(table.size > GEN9_NUM_MOCS_ENTRIES);
+	GEM_BUG_ON(table.size > NUM_MOCS_ENTRIES(dev_priv));
 
 	for (index = 0; index < table.size; index++)
 		I915_WRITE(mocs_register(engine->id, index),
@@ -227,7 +362,7 @@  void intel_mocs_init_engine(struct intel_engine_cs *engine)
 	 * Entry 0 in the table is uncached - so we are just writing
 	 * that value to all the used entries.
 	 */
-	for (; index < GEN9_NUM_MOCS_ENTRIES; index++)
+	for (; index < NUM_MOCS_ENTRIES(dev_priv); index++)
 		I915_WRITE(mocs_register(engine->id, index),
 			   table.table[0].control_value);
 }
@@ -245,18 +380,19 @@  void intel_mocs_init_engine(struct intel_engine_cs *engine)
 static int emit_mocs_control_table(struct i915_request *rq,
 				   const struct drm_i915_mocs_table *table)
 {
+	struct drm_i915_private *i915 = rq->i915;
 	enum intel_engine_id engine = rq->engine->id;
 	unsigned int index;
 	u32 *cs;
 
-	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
+	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
 		return -ENODEV;
 
-	cs = intel_ring_begin(rq, 2 + 2 * GEN9_NUM_MOCS_ENTRIES);
+	cs = intel_ring_begin(rq, 2 + 2 * NUM_MOCS_ENTRIES(i915));
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES);
+	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915));
 
 	for (index = 0; index < table->size; index++) {
 		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
@@ -271,7 +407,7 @@  static int emit_mocs_control_table(struct i915_request *rq,
 	 * Entry 0 in the table is uncached - so we are just writing
 	 * that value to all the used entries.
 	 */
-	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
+	for (; index < NUM_MOCS_ENTRIES(i915); index++) {
 		*cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
 		*cs++ = table->table[0].control_value;
 	}
@@ -304,17 +440,18 @@  static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
 static int emit_mocs_l3cc_table(struct i915_request *rq,
 				const struct drm_i915_mocs_table *table)
 {
+	struct drm_i915_private *i915 = rq->i915;
 	unsigned int i;
 	u32 *cs;
 
-	if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES))
+	if (WARN_ON(table->size > NUM_MOCS_ENTRIES(i915)))
 		return -ENODEV;
 
-	cs = intel_ring_begin(rq, 2 + GEN9_NUM_MOCS_ENTRIES);
+	cs = intel_ring_begin(rq, 2 + NUM_MOCS_ENTRIES(i915));
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2);
+	*cs++ = MI_LOAD_REGISTER_IMM(NUM_MOCS_ENTRIES(i915) / 2);
 
 	for (i = 0; i < table->size/2; i++) {
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
@@ -333,7 +470,7 @@  static int emit_mocs_l3cc_table(struct i915_request *rq,
 	 * this will be uncached. Leave the last pair uninitialised as
 	 * they are reserved by the hardware.
 	 */
-	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
+	for (; i < NUM_MOCS_ENTRIES(i915) / 2; i++) {
 		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
 		*cs++ = l3cc_combine(table, 0, 0);
 	}
@@ -380,7 +517,7 @@  void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
 	 * this will be uncached. Leave the last pair as initialised as
 	 * they are reserved by the hardware.
 	 */
-	for (; i < (GEN9_NUM_MOCS_ENTRIES / 2); i++)
+	for (; i < (NUM_MOCS_ENTRIES(dev_priv) / 2); i++)
 		I915_WRITE(GEN9_LNCFCMOCS(i), l3cc_combine(&table, 0, 0));
 }