diff mbox series

drm/i915/gt: Add separate MOCS table for Gen12 devices other than TGL/RKL

Message ID 20210907171639.1221287-1-ayaz.siddiqui@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/gt: Add separate MOCS table for Gen12 devices other than TGL/RKL | expand

Commit Message

Siddiqui, Ayaz A Sept. 7, 2021, 5:16 p.m. UTC
MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
While for other gen12 devices we need to set MOCS[1] as L3_WB,
So adding a new MOCS table for other gen 12 devices eg. ADL.

Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 4 deletions(-)

Comments

Matt Roper Sept. 7, 2021, 5:27 p.m. UTC | #1
On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> While for other gen12 devices we need to set MOCS[1] as L3_WB,
> So adding a new MOCS table for other gen 12 devices eg. ADL.
> 
> Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>

Yep, we overlooked that the TGL table still had an explicit entry for
I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
for MOCS[1].  The new table is the same as the TGL table, just with
I915_MOCS_PTE (1) removed.

Looks good to me,

Reviewed-by: Matt Roper <matthew.d.roper@intel.com>


> ---
>  drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +++++++++++++++++++++++++---
>  1 file changed, 37 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index e96afd7beb49..c8d289b00de4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -315,6 +315,35 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
>  	MOCS_ENTRY(63, 0, L3_1_UC),
>  };
>  
> +static const struct drm_i915_mocs_entry gen12_mocs_table[] = {
> +
> +	GEN11_MOCS_ENTRIES,
> +	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
> +	MOCS_ENTRY(48,
> +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> +		   L3_3_WB),
> +	/* Implicitly enable L1 - HDC:L1 + L3 */
> +	MOCS_ENTRY(49,
> +		   LE_1_UC | LE_TC_1_LLC,
> +		   L3_3_WB),
> +	/* Implicitly enable L1 - HDC:L1 + LLC */
> +	MOCS_ENTRY(50,
> +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> +		   L3_1_UC),
> +	/* Implicitly enable L1 - HDC:L1 */
> +	MOCS_ENTRY(51,
> +		   LE_1_UC | LE_TC_1_LLC,
> +		   L3_1_UC),
> +	/* HW Special Case (CCS) */
> +	MOCS_ENTRY(60,
> +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> +		   L3_1_UC),
> +	/* HW Special Case (Displayable) */
> +	MOCS_ENTRY(61,
> +		   LE_1_UC | LE_TC_1_LLC,
> +		   L3_3_WB),
> +};
> +
>  enum {
>  	HAS_GLOBAL_MOCS = BIT(0),
>  	HAS_ENGINE_MOCS = BIT(1),
> @@ -351,14 +380,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
>  		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
>  		table->uc_index = 1;
>  		table->unused_entries_index = 5;
> -	} else if (GRAPHICS_VER(i915) >= 12) {
> +	} else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
> +		/* For TGL/RKL, Can't be changed now for ABI reasons */
>  		table->size  = ARRAY_SIZE(tgl_mocs_table);
>  		table->table = tgl_mocs_table;
>  		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
>  		table->uc_index = 3;
> -		/* For TGL/RKL, Can't be changed now for ABI reasons */
> -		if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915))
> -			table->unused_entries_index = 2;
> +	} else if (GRAPHICS_VER(i915) >= 12) {
> +		table->size  = ARRAY_SIZE(gen12_mocs_table);
> +		table->table = gen12_mocs_table;
> +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> +		table->uc_index = 3;
> +		table->unused_entries_index = 2;
>  	} else if (GRAPHICS_VER(i915) == 11) {
>  		table->size  = ARRAY_SIZE(icl_mocs_table);
>  		table->table = icl_mocs_table;
> -- 
> 2.26.2
>
Ville Syrjälä Sept. 7, 2021, 5:41 p.m. UTC | #2
On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > 
> > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > Cc: Matt Roper <matthew.d.roper@intel.com>
> > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> 
> Yep, we overlooked that the TGL table still had an explicit entry for
> I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> for MOCS[1].  The new table is the same as the TGL table, just with
> I915_MOCS_PTE (1) removed.

And just how are people planning on handling display cacheability
control without a PTE MOCS entry? Is Mesa/etc. already making all
external bos uncached on these platforms just in case we might
scan out said bo?

> 
> Looks good to me,
> 
> Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
> 
> 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +++++++++++++++++++++++++---
> >  1 file changed, 37 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > index e96afd7beb49..c8d289b00de4 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > @@ -315,6 +315,35 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
> >  	MOCS_ENTRY(63, 0, L3_1_UC),
> >  };
> >  
> > +static const struct drm_i915_mocs_entry gen12_mocs_table[] = {
> > +
> > +	GEN11_MOCS_ENTRIES,
> > +	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
> > +	MOCS_ENTRY(48,
> > +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> > +		   L3_3_WB),
> > +	/* Implicitly enable L1 - HDC:L1 + L3 */
> > +	MOCS_ENTRY(49,
> > +		   LE_1_UC | LE_TC_1_LLC,
> > +		   L3_3_WB),
> > +	/* Implicitly enable L1 - HDC:L1 + LLC */
> > +	MOCS_ENTRY(50,
> > +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> > +		   L3_1_UC),
> > +	/* Implicitly enable L1 - HDC:L1 */
> > +	MOCS_ENTRY(51,
> > +		   LE_1_UC | LE_TC_1_LLC,
> > +		   L3_1_UC),
> > +	/* HW Special Case (CCS) */
> > +	MOCS_ENTRY(60,
> > +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> > +		   L3_1_UC),
> > +	/* HW Special Case (Displayable) */
> > +	MOCS_ENTRY(61,
> > +		   LE_1_UC | LE_TC_1_LLC,
> > +		   L3_3_WB),
> > +};
> > +
> >  enum {
> >  	HAS_GLOBAL_MOCS = BIT(0),
> >  	HAS_ENGINE_MOCS = BIT(1),
> > @@ -351,14 +380,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
> >  		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> >  		table->uc_index = 1;
> >  		table->unused_entries_index = 5;
> > -	} else if (GRAPHICS_VER(i915) >= 12) {
> > +	} else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
> > +		/* For TGL/RKL, Can't be changed now for ABI reasons */
> >  		table->size  = ARRAY_SIZE(tgl_mocs_table);
> >  		table->table = tgl_mocs_table;
> >  		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> >  		table->uc_index = 3;
> > -		/* For TGL/RKL, Can't be changed now for ABI reasons */
> > -		if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915))
> > -			table->unused_entries_index = 2;
> > +	} else if (GRAPHICS_VER(i915) >= 12) {
> > +		table->size  = ARRAY_SIZE(gen12_mocs_table);
> > +		table->table = gen12_mocs_table;
> > +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> > +		table->uc_index = 3;
> > +		table->unused_entries_index = 2;
> >  	} else if (GRAPHICS_VER(i915) == 11) {
> >  		table->size  = ARRAY_SIZE(icl_mocs_table);
> >  		table->table = icl_mocs_table;
> > -- 
> > 2.26.2
> > 
> 
> -- 
> Matt Roper
> Graphics Software Engineer
> VTT-OSGC Platform Enablement
> Intel Corporation
> (916) 356-2795
Matt Roper Sept. 7, 2021, 6:19 p.m. UTC | #3
On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > 
> > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > 
> > Yep, we overlooked that the TGL table still had an explicit entry for
> > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > for MOCS[1].  The new table is the same as the TGL table, just with
> > I915_MOCS_PTE (1) removed.
> 
> And just how are people planning on handling display cacheability
> control without a PTE MOCS entry? Is Mesa/etc. already making all
> external bos uncached on these platforms just in case we might
> scan out said bo?

MOCS entry 1 has never been considered a valid MOCS table entry on gen12
platforms (despite the old #define, it's not actually related to PTE,
display, etc. anymore).  Userspace shouldn't be using entry 1, since its
settings are officially undefined, but because we had already defined a
value for it in the driver on our production gen12 platforms (TGL/RKL),
we can't change that value now without an ABI break; we just have to
leave it be on TGL/RKL.

Any userspace trying to use entry 1 on gen12 (or any other undocumented
table entry) is still buggy and should still be updated.  For the gen12
platforms going forward (ADL-S, ADL-P, and anything else that shows up
in the future) we can just follow the guidance of setting all
invalid/unused entries as L3 cached from day 1.


Matt

> 
> > 
> > Looks good to me,
> > 
> > Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
> > 
> > 
> > > ---
> > >  drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +++++++++++++++++++++++++---
> > >  1 file changed, 37 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > index e96afd7beb49..c8d289b00de4 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > @@ -315,6 +315,35 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
> > >  	MOCS_ENTRY(63, 0, L3_1_UC),
> > >  };
> > >  
> > > +static const struct drm_i915_mocs_entry gen12_mocs_table[] = {
> > > +
> > > +	GEN11_MOCS_ENTRIES,
> > > +	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
> > > +	MOCS_ENTRY(48,
> > > +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> > > +		   L3_3_WB),
> > > +	/* Implicitly enable L1 - HDC:L1 + L3 */
> > > +	MOCS_ENTRY(49,
> > > +		   LE_1_UC | LE_TC_1_LLC,
> > > +		   L3_3_WB),
> > > +	/* Implicitly enable L1 - HDC:L1 + LLC */
> > > +	MOCS_ENTRY(50,
> > > +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> > > +		   L3_1_UC),
> > > +	/* Implicitly enable L1 - HDC:L1 */
> > > +	MOCS_ENTRY(51,
> > > +		   LE_1_UC | LE_TC_1_LLC,
> > > +		   L3_1_UC),
> > > +	/* HW Special Case (CCS) */
> > > +	MOCS_ENTRY(60,
> > > +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> > > +		   L3_1_UC),
> > > +	/* HW Special Case (Displayable) */
> > > +	MOCS_ENTRY(61,
> > > +		   LE_1_UC | LE_TC_1_LLC,
> > > +		   L3_3_WB),
> > > +};
> > > +
> > >  enum {
> > >  	HAS_GLOBAL_MOCS = BIT(0),
> > >  	HAS_ENGINE_MOCS = BIT(1),
> > > @@ -351,14 +380,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
> > >  		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> > >  		table->uc_index = 1;
> > >  		table->unused_entries_index = 5;
> > > -	} else if (GRAPHICS_VER(i915) >= 12) {
> > > +	} else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
> > > +		/* For TGL/RKL, Can't be changed now for ABI reasons */
> > >  		table->size  = ARRAY_SIZE(tgl_mocs_table);
> > >  		table->table = tgl_mocs_table;
> > >  		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> > >  		table->uc_index = 3;
> > > -		/* For TGL/RKL, Can't be changed now for ABI reasons */
> > > -		if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915))
> > > -			table->unused_entries_index = 2;
> > > +	} else if (GRAPHICS_VER(i915) >= 12) {
> > > +		table->size  = ARRAY_SIZE(gen12_mocs_table);
> > > +		table->table = gen12_mocs_table;
> > > +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> > > +		table->uc_index = 3;
> > > +		table->unused_entries_index = 2;
> > >  	} else if (GRAPHICS_VER(i915) == 11) {
> > >  		table->size  = ARRAY_SIZE(icl_mocs_table);
> > >  		table->table = icl_mocs_table;
> > > -- 
> > > 2.26.2
> > > 
> > 
> > -- 
> > Matt Roper
> > Graphics Software Engineer
> > VTT-OSGC Platform Enablement
> > Intel Corporation
> > (916) 356-2795
> 
> -- 
> Ville Syrjälä
> Intel
Matt Roper Sept. 8, 2021, 2:40 a.m. UTC | #4
On Wed, Sep 08, 2021 at 12:41:28AM +0000, Patchwork wrote:
> == Series Details ==
> 
> Series: drm/i915/gt: Add separate MOCS table for Gen12 devices other than TGL/RKL
> URL   : https://patchwork.freedesktop.org/series/94451/
> State : failure
> 
> == Summary ==
> 
> CI Bug Log - changes from CI_DRM_10558_full -> Patchwork_20982_full
> ====================================================
> 
> Summary
> -------
> 
>   **FAILURE**
> 
>   Serious unknown changes coming with Patchwork_20982_full absolutely need to be
>   verified manually.
>   
>   If you think the reported changes have nothing to do with the changes
>   introduced in Patchwork_20982_full, please notify your bug team to allow them
>   to document this new failure mode, which will reduce false positives in CI.
> 
>   
> 
> Possible new issues
> -------------------
> 
>   Here are the unknown changes that may have been introduced in Patchwork_20982_full:
> 
> ### IGT changes ###
> 
> #### Possible regressions ####
> 
>   * igt@kms_draw_crc@draw-method-rgb565-mmap-cpu-ytiled:
>     - shard-iclb:         [PASS][1] -> [DMESG-WARN][2]
>    [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb3/igt@kms_draw_crc@draw-method-rgb565-mmap-cpu-ytiled.html
>    [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb2/igt@kms_draw_crc@draw-method-rgb565-mmap-cpu-ytiled.html

https://gitlab.freedesktop.org/drm/intel/-/issues/3728

> 
>   * igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile:
>     - shard-iclb:         [PASS][3] -> [SKIP][4]
>    [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb7/igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile.html
>    [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb2/igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile.html

ICL display test behavior isn't related to the gen12 MOCS update from
this patch.

Applied to drm-intel-gt-next.  Thanks for the patch.


Matt

> 
>   
> Known issues
> ------------
> 
>   Here are the changes found in Patchwork_20982_full that come from known issues:
> 
> ### IGT changes ###
> 
> #### Issues hit ####
> 
>   * igt@gem_create@create-massive:
>     - shard-snb:          NOTRUN -> [DMESG-WARN][5] ([i915#3002]) +1 similar issue
>    [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-snb5/igt@gem_create@create-massive.html
> 
>   * igt@gem_ctx_persistence@legacy-engines-mixed:
>     - shard-snb:          NOTRUN -> [SKIP][6] ([fdo#109271] / [i915#1099]) +6 similar issues
>    [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-snb2/igt@gem_ctx_persistence@legacy-engines-mixed.html
> 
>   * igt@gem_eio@unwedge-stress:
>     - shard-tglb:         [PASS][7] -> [TIMEOUT][8] ([i915#2369] / [i915#3063] / [i915#3648])
>    [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-tglb7/igt@gem_eio@unwedge-stress.html
>    [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb8/igt@gem_eio@unwedge-stress.html
> 
>   * igt@gem_exec_fair@basic-none@vcs1:
>     - shard-iclb:         NOTRUN -> [FAIL][9] ([i915#2842])
>    [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb2/igt@gem_exec_fair@basic-none@vcs1.html
> 
>   * igt@gem_exec_fair@basic-pace-solo@rcs0:
>     - shard-kbl:          [PASS][10] -> [FAIL][11] ([i915#2842])
>    [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-kbl4/igt@gem_exec_fair@basic-pace-solo@rcs0.html
>    [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-kbl2/igt@gem_exec_fair@basic-pace-solo@rcs0.html
> 
>   * igt@gem_exec_fair@basic-pace@bcs0:
>     - shard-tglb:         NOTRUN -> [FAIL][12] ([i915#2842]) +3 similar issues
>    [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb8/igt@gem_exec_fair@basic-pace@bcs0.html
> 
>   * igt@gem_exec_fair@basic-pace@vecs0:
>     - shard-iclb:         [PASS][13] -> [FAIL][14] ([i915#2842])
>    [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb7/igt@gem_exec_fair@basic-pace@vecs0.html
>    [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb6/igt@gem_exec_fair@basic-pace@vecs0.html
> 
>   * igt@gem_exec_fair@basic-throttle@rcs0:
>     - shard-glk:          [PASS][15] -> [FAIL][16] ([i915#2842])
>    [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-glk2/igt@gem_exec_fair@basic-throttle@rcs0.html
>    [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-glk2/igt@gem_exec_fair@basic-throttle@rcs0.html
>     - shard-iclb:         [PASS][17] -> [FAIL][18] ([i915#2849])
>    [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb8/igt@gem_exec_fair@basic-throttle@rcs0.html
>    [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb7/igt@gem_exec_fair@basic-throttle@rcs0.html
> 
>   * igt@gem_mmap_gtt@cpuset-big-copy-odd:
>     - shard-iclb:         [PASS][19] -> [FAIL][20] ([i915#307])
>    [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb6/igt@gem_mmap_gtt@cpuset-big-copy-odd.html
>    [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb1/igt@gem_mmap_gtt@cpuset-big-copy-odd.html
> 
>   * igt@gem_pwrite@basic-exhaustion:
>     - shard-apl:          NOTRUN -> [WARN][21] ([i915#2658])
>    [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl3/igt@gem_pwrite@basic-exhaustion.html
> 
>   * igt@gem_userptr_blits@access-control:
>     - shard-tglb:         NOTRUN -> [SKIP][22] ([i915#3297])
>    [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb8/igt@gem_userptr_blits@access-control.html
> 
>   * igt@gem_userptr_blits@vma-merge:
>     - shard-apl:          NOTRUN -> [FAIL][23] ([i915#3318])
>    [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl8/igt@gem_userptr_blits@vma-merge.html
> 
>   * igt@gen9_exec_parse@basic-rejected:
>     - shard-tglb:         NOTRUN -> [SKIP][24] ([i915#2856]) +2 similar issues
>    [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb1/igt@gen9_exec_parse@basic-rejected.html
> 
>   * igt@gen9_exec_parse@bb-start-out:
>     - shard-iclb:         NOTRUN -> [SKIP][25] ([i915#2856])
>    [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb6/igt@gen9_exec_parse@bb-start-out.html
> 
>   * igt@i915_pm_dc@dc6-psr:
>     - shard-iclb:         [PASS][26] -> [FAIL][27] ([i915#454])
>    [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb2/igt@i915_pm_dc@dc6-psr.html
>    [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb3/igt@i915_pm_dc@dc6-psr.html
> 
>   * igt@i915_pm_lpsp@screens-disabled:
>     - shard-tglb:         NOTRUN -> [SKIP][28] ([i915#1902])
>    [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb8/igt@i915_pm_lpsp@screens-disabled.html
> 
>   * igt@i915_pm_rpm@pc8-residency:
>     - shard-tglb:         NOTRUN -> [SKIP][29] ([fdo#109506] / [i915#2411])
>    [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb5/igt@i915_pm_rpm@pc8-residency.html
> 
>   * igt@kms_big_fb@x-tiled-max-hw-stride-64bpp-rotate-0-async-flip:
>     - shard-skl:          NOTRUN -> [FAIL][30] ([i915#3722])
>    [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl8/igt@kms_big_fb@x-tiled-max-hw-stride-64bpp-rotate-0-async-flip.html
> 
>   * igt@kms_big_fb@y-tiled-max-hw-stride-32bpp-rotate-180-hflip:
>     - shard-apl:          NOTRUN -> [SKIP][31] ([fdo#109271] / [i915#3777]) +2 similar issues
>    [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl3/igt@kms_big_fb@y-tiled-max-hw-stride-32bpp-rotate-180-hflip.html
> 
>   * igt@kms_big_fb@yf-tiled-8bpp-rotate-180:
>     - shard-iclb:         NOTRUN -> [SKIP][32] ([fdo#110723])
>    [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb6/igt@kms_big_fb@yf-tiled-8bpp-rotate-180.html
> 
>   * igt@kms_big_fb@yf-tiled-addfb:
>     - shard-tglb:         NOTRUN -> [SKIP][33] ([fdo#111615]) +1 similar issue
>    [33]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb8/igt@kms_big_fb@yf-tiled-addfb.html
> 
>   * igt@kms_big_fb@yf-tiled-max-hw-stride-64bpp-rotate-0:
>     - shard-apl:          NOTRUN -> [SKIP][34] ([fdo#109271]) +307 similar issues
>    [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl7/igt@kms_big_fb@yf-tiled-max-hw-stride-64bpp-rotate-0.html
> 
>   * igt@kms_ccs@pipe-a-missing-ccs-buffer-y_tiled_gen12_rc_ccs_cc:
>     - shard-apl:          NOTRUN -> [SKIP][35] ([fdo#109271] / [i915#3886]) +18 similar issues
>    [35]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl1/igt@kms_ccs@pipe-a-missing-ccs-buffer-y_tiled_gen12_rc_ccs_cc.html
> 
>   * igt@kms_ccs@pipe-b-crc-primary-basic-yf_tiled_ccs:
>     - shard-tglb:         NOTRUN -> [SKIP][36] ([i915#3689]) +3 similar issues
>    [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb5/igt@kms_ccs@pipe-b-crc-primary-basic-yf_tiled_ccs.html
> 
>   * igt@kms_ccs@pipe-b-crc-sprite-planes-basic-y_tiled_gen12_mc_ccs:
>     - shard-iclb:         NOTRUN -> [SKIP][37] ([fdo#109278] / [i915#3886]) +2 similar issues
>    [37]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb6/igt@kms_ccs@pipe-b-crc-sprite-planes-basic-y_tiled_gen12_mc_ccs.html
> 
>   * igt@kms_ccs@pipe-b-random-ccs-data-y_tiled_gen12_mc_ccs:
>     - shard-tglb:         NOTRUN -> [SKIP][38] ([i915#3689] / [i915#3886])
>    [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb5/igt@kms_ccs@pipe-b-random-ccs-data-y_tiled_gen12_mc_ccs.html
> 
>   * igt@kms_chamelium@hdmi-crc-fast:
>     - shard-iclb:         NOTRUN -> [SKIP][39] ([fdo#109284] / [fdo#111827]) +1 similar issue
>    [39]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb1/igt@kms_chamelium@hdmi-crc-fast.html
> 
>   * igt@kms_chamelium@hdmi-edid-change-during-suspend:
>     - shard-apl:          NOTRUN -> [SKIP][40] ([fdo#109271] / [fdo#111827]) +33 similar issues
>    [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl7/igt@kms_chamelium@hdmi-edid-change-during-suspend.html
> 
>   * igt@kms_chamelium@hdmi-hpd-fast:
>     - shard-snb:          NOTRUN -> [SKIP][41] ([fdo#109271] / [fdo#111827]) +22 similar issues
>    [41]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-snb2/igt@kms_chamelium@hdmi-hpd-fast.html
> 
>   * igt@kms_color_chamelium@pipe-b-ctm-0-75:
>     - shard-tglb:         NOTRUN -> [SKIP][42] ([fdo#109284] / [fdo#111827]) +6 similar issues
>    [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb8/igt@kms_color_chamelium@pipe-b-ctm-0-75.html
> 
>   * igt@kms_color_chamelium@pipe-d-ctm-0-75:
>     - shard-iclb:         NOTRUN -> [SKIP][43] ([fdo#109278] / [fdo#109284] / [fdo#111827])
>    [43]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb6/igt@kms_color_chamelium@pipe-d-ctm-0-75.html
> 
>   * igt@kms_color_chamelium@pipe-invalid-ctm-matrix-sizes:
>     - shard-skl:          NOTRUN -> [SKIP][44] ([fdo#109271] / [fdo#111827]) +3 similar issues
>    [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl6/igt@kms_color_chamelium@pipe-invalid-ctm-matrix-sizes.html
> 
>   * igt@kms_content_protection@dp-mst-type-1:
>     - shard-iclb:         NOTRUN -> [SKIP][45] ([i915#3116])
>    [45]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb1/igt@kms_content_protection@dp-mst-type-1.html
>     - shard-tglb:         NOTRUN -> [SKIP][46] ([i915#3116])
>    [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb1/igt@kms_content_protection@dp-mst-type-1.html
> 
>   * igt@kms_content_protection@lic:
>     - shard-apl:          NOTRUN -> [TIMEOUT][47] ([i915#1319])
>    [47]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl7/igt@kms_content_protection@lic.html
> 
>   * igt@kms_content_protection@srm:
>     - shard-iclb:         NOTRUN -> [SKIP][48] ([fdo#109300] / [fdo#111066])
>    [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb6/igt@kms_content_protection@srm.html
> 
>   * igt@kms_cursor_crc@pipe-a-cursor-suspend:
>     - shard-apl:          [PASS][49] -> [DMESG-WARN][50] ([i915#180])
>    [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-apl2/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
>    [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl6/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
> 
>   * igt@kms_cursor_crc@pipe-b-cursor-512x170-offscreen:
>     - shard-iclb:         NOTRUN -> [SKIP][51] ([fdo#109278] / [fdo#109279])
>    [51]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb6/igt@kms_cursor_crc@pipe-b-cursor-512x170-offscreen.html
> 
>   * igt@kms_cursor_crc@pipe-c-cursor-512x170-rapid-movement:
>     - shard-tglb:         NOTRUN -> [SKIP][52] ([i915#3359]) +1 similar issue
>    [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb5/igt@kms_cursor_crc@pipe-c-cursor-512x170-rapid-movement.html
> 
>   * igt@kms_cursor_crc@pipe-d-cursor-512x170-onscreen:
>     - shard-tglb:         NOTRUN -> [SKIP][53] ([fdo#109279] / [i915#3359]) +1 similar issue
>    [53]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb8/igt@kms_cursor_crc@pipe-d-cursor-512x170-onscreen.html
> 
>   * igt@kms_cursor_crc@pipe-d-cursor-64x64-random:
>     - shard-iclb:         NOTRUN -> [SKIP][54] ([fdo#109278]) +4 similar issues
>    [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb6/igt@kms_cursor_crc@pipe-d-cursor-64x64-random.html
> 
>   * igt@kms_draw_crc@draw-method-rgb565-mmap-gtt-ytiled:
>     - shard-skl:          [PASS][55] -> [DMESG-WARN][56] ([i915#1982]) +1 similar issue
>    [55]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-skl6/igt@kms_draw_crc@draw-method-rgb565-mmap-gtt-ytiled.html
>    [56]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl1/igt@kms_draw_crc@draw-method-rgb565-mmap-gtt-ytiled.html
> 
>   * igt@kms_fbcon_fbt@fbc-suspend:
>     - shard-tglb:         [PASS][57] -> [INCOMPLETE][58] ([i915#456])
>    [57]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-tglb3/igt@kms_fbcon_fbt@fbc-suspend.html
>    [58]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb7/igt@kms_fbcon_fbt@fbc-suspend.html
> 
>   * igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs:
>     - shard-apl:          NOTRUN -> [SKIP][59] ([fdo#109271] / [i915#2672])
>    [59]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl1/igt@kms_flip_scaled_crc@flip-32bpp-ytile-to-32bpp-ytilegen12rcccs.html
> 
>   * igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile:
>     - shard-snb:          NOTRUN -> [SKIP][60] ([fdo#109271]) +403 similar issues
>    [60]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-snb5/igt@kms_flip_scaled_crc@flip-32bpp-ytileccs-to-64bpp-ytile.html
> 
>   * igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilercccs:
>     - shard-skl:          NOTRUN -> [SKIP][61] ([fdo#109271] / [i915#2672])
>    [61]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl8/igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytilercccs.html
> 
>   * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-indfb-draw-mmap-wc:
>     - shard-tglb:         NOTRUN -> [SKIP][62] ([fdo#111825]) +17 similar issues
>    [62]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb8/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-pri-indfb-draw-mmap-wc.html
> 
>   * igt@kms_frontbuffer_tracking@fbc-suspend:
>     - shard-tglb:         [PASS][63] -> [INCOMPLETE][64] ([i915#2411] / [i915#456])
>    [63]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-tglb2/igt@kms_frontbuffer_tracking@fbc-suspend.html
>    [64]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb7/igt@kms_frontbuffer_tracking@fbc-suspend.html
> 
>   * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-indfb-msflip-blt:
>     - shard-skl:          NOTRUN -> [SKIP][65] ([fdo#109271]) +21 similar issues
>    [65]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl8/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-indfb-msflip-blt.html
> 
>   * igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-shrfb-plflip-blt:
>     - shard-iclb:         NOTRUN -> [SKIP][66] ([fdo#109280]) +1 similar issue
>    [66]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb6/igt@kms_frontbuffer_tracking@fbcpsr-2p-scndscrn-shrfb-plflip-blt.html
> 
>   * igt@kms_pipe_crc_basic@nonblocking-crc-pipe-d-frame-sequence:
>     - shard-apl:          NOTRUN -> [SKIP][67] ([fdo#109271] / [i915#533]) +1 similar issue
>    [67]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl7/igt@kms_pipe_crc_basic@nonblocking-crc-pipe-d-frame-sequence.html
> 
>   * igt@kms_plane_alpha_blend@pipe-a-alpha-basic:
>     - shard-apl:          NOTRUN -> [FAIL][68] ([fdo#108145] / [i915#265]) +1 similar issue
>    [68]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl7/igt@kms_plane_alpha_blend@pipe-a-alpha-basic.html
> 
>   * igt@kms_plane_alpha_blend@pipe-b-alpha-transparent-fb:
>     - shard-apl:          NOTRUN -> [FAIL][69] ([i915#265]) +1 similar issue
>    [69]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl8/igt@kms_plane_alpha_blend@pipe-b-alpha-transparent-fb.html
> 
>   * igt@kms_plane_alpha_blend@pipe-c-alpha-7efc:
>     - shard-skl:          NOTRUN -> [FAIL][70] ([fdo#108145] / [i915#265])
>    [70]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl6/igt@kms_plane_alpha_blend@pipe-c-alpha-7efc.html
> 
>   * igt@kms_plane_multiple@atomic-pipe-b-tiling-yf:
>     - shard-tglb:         NOTRUN -> [SKIP][71] ([fdo#112054])
>    [71]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb1/igt@kms_plane_multiple@atomic-pipe-b-tiling-yf.html
> 
>   * igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-5:
>     - shard-iclb:         NOTRUN -> [SKIP][72] ([i915#658])
>    [72]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb6/igt@kms_psr2_sf@overlay-plane-update-sf-dmg-area-5.html
> 
>   * igt@kms_psr2_sf@overlay-primary-update-sf-dmg-area-4:
>     - shard-apl:          NOTRUN -> [SKIP][73] ([fdo#109271] / [i915#658]) +6 similar issues
>    [73]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl1/igt@kms_psr2_sf@overlay-primary-update-sf-dmg-area-4.html
> 
>   * igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-2:
>     - shard-tglb:         NOTRUN -> [SKIP][74] ([i915#2920]) +1 similar issue
>    [74]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb8/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-2.html
>     - shard-skl:          NOTRUN -> [SKIP][75] ([fdo#109271] / [i915#658])
>    [75]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl8/igt@kms_psr2_sf@primary-plane-update-sf-dmg-area-2.html
> 
>   * igt@kms_psr@psr2_cursor_blt:
>     - shard-tglb:         NOTRUN -> [FAIL][76] ([i915#132] / [i915#3467])
>    [76]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb5/igt@kms_psr@psr2_cursor_blt.html
> 
>   * igt@kms_psr@psr2_cursor_plane_move:
>     - shard-iclb:         [PASS][77] -> [SKIP][78] ([fdo#109441]) +2 similar issues
>    [77]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb2/igt@kms_psr@psr2_cursor_plane_move.html
>    [78]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb3/igt@kms_psr@psr2_cursor_plane_move.html
> 
>   * igt@kms_sysfs_edid_timing:
>     - shard-apl:          NOTRUN -> [FAIL][79] ([IGT#2])
>    [79]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl7/igt@kms_sysfs_edid_timing.html
> 
>   * igt@kms_vblank@pipe-a-ts-continuation-suspend:
>     - shard-kbl:          [PASS][80] -> [DMESG-WARN][81] ([i915#180] / [i915#295])
>    [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-kbl6/igt@kms_vblank@pipe-a-ts-continuation-suspend.html
>    [81]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-kbl7/igt@kms_vblank@pipe-a-ts-continuation-suspend.html
> 
>   * igt@kms_vrr@flipline:
>     - shard-tglb:         NOTRUN -> [SKIP][82] ([fdo#109502])
>    [82]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb5/igt@kms_vrr@flipline.html
> 
>   * igt@kms_writeback@writeback-invalid-parameters:
>     - shard-apl:          NOTRUN -> [SKIP][83] ([fdo#109271] / [i915#2437])
>    [83]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl7/igt@kms_writeback@writeback-invalid-parameters.html
> 
>   * igt@nouveau_crc@pipe-b-source-outp-inactive:
>     - shard-iclb:         NOTRUN -> [SKIP][84] ([i915#2530])
>    [84]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb6/igt@nouveau_crc@pipe-b-source-outp-inactive.html
> 
>   * igt@nouveau_crc@pipe-d-source-outp-inactive:
>     - shard-tglb:         NOTRUN -> [SKIP][85] ([i915#2530]) +2 similar issues
>    [85]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb5/igt@nouveau_crc@pipe-d-source-outp-inactive.html
> 
>   * igt@perf@polling-parameterized:
>     - shard-skl:          [PASS][86] -> [FAIL][87] ([i915#1542])
>    [86]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-skl3/igt@perf@polling-parameterized.html
>    [87]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl6/igt@perf@polling-parameterized.html
> 
>   * igt@prime_nv_test@nv_i915_sharing:
>     - shard-tglb:         NOTRUN -> [SKIP][88] ([fdo#109291])
>    [88]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb5/igt@prime_nv_test@nv_i915_sharing.html
> 
>   * igt@runner@aborted:
>     - shard-snb:          NOTRUN -> ([FAIL][89], [FAIL][90]) ([i915#3002])
>    [89]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-snb5/igt@runner@aborted.html
>    [90]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-snb7/igt@runner@aborted.html
> 
>   * igt@sysfs_clients@busy:
>     - shard-tglb:         NOTRUN -> [SKIP][91] ([i915#2994])
>    [91]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb5/igt@sysfs_clients@busy.html
>     - shard-skl:          NOTRUN -> [SKIP][92] ([fdo#109271] / [i915#2994])
>    [92]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl6/igt@sysfs_clients@busy.html
> 
>   * igt@sysfs_clients@recycle-many:
>     - shard-apl:          NOTRUN -> [SKIP][93] ([fdo#109271] / [i915#2994]) +2 similar issues
>    [93]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl1/igt@sysfs_clients@recycle-many.html
> 
>   
> #### Possible fixes ####
> 
>   * igt@gem_ctx_isolation@preservation-s3@bcs0:
>     - shard-tglb:         [INCOMPLETE][94] -> [PASS][95]
>    [94]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-tglb7/igt@gem_ctx_isolation@preservation-s3@bcs0.html
>    [95]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb5/igt@gem_ctx_isolation@preservation-s3@bcs0.html
> 
>   * igt@gem_eio@unwedge-stress:
>     - shard-skl:          [TIMEOUT][96] ([i915#2369] / [i915#3063]) -> [PASS][97]
>    [96]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-skl1/igt@gem_eio@unwedge-stress.html
>    [97]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl8/igt@gem_eio@unwedge-stress.html
> 
>   * igt@gem_exec_fair@basic-flow@rcs0:
>     - shard-tglb:         [FAIL][98] ([i915#2842]) -> [PASS][99] +1 similar issue
>    [98]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-tglb2/igt@gem_exec_fair@basic-flow@rcs0.html
>    [99]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb1/igt@gem_exec_fair@basic-flow@rcs0.html
> 
>   * igt@gem_exec_fair@basic-none@vecs0:
>     - shard-kbl:          [FAIL][100] ([i915#2842]) -> [PASS][101]
>    [100]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-kbl2/igt@gem_exec_fair@basic-none@vecs0.html
>    [101]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-kbl6/igt@gem_exec_fair@basic-none@vecs0.html
> 
>   * igt@gem_exec_fair@basic-pace@rcs0:
>     - shard-kbl:          [SKIP][102] ([fdo#109271]) -> [PASS][103]
>    [102]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-kbl3/igt@gem_exec_fair@basic-pace@rcs0.html
>    [103]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-kbl2/igt@gem_exec_fair@basic-pace@rcs0.html
> 
>   * igt@gem_exec_whisper@basic-sync:
>     - shard-skl:          [INCOMPLETE][104] ([i915#2944]) -> [PASS][105]
>    [104]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-skl1/igt@gem_exec_whisper@basic-sync.html
>    [105]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl8/igt@gem_exec_whisper@basic-sync.html
> 
>   * igt@gem_huc_copy@huc-copy:
>     - shard-tglb:         [SKIP][106] ([i915#2190]) -> [PASS][107]
>    [106]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-tglb7/igt@gem_huc_copy@huc-copy.html
>    [107]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb5/igt@gem_huc_copy@huc-copy.html
> 
>   * igt@gem_mmap_offset@clear:
>     - shard-skl:          [FAIL][108] ([i915#1888] / [i915#3160]) -> [PASS][109]
>    [108]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-skl7/igt@gem_mmap_offset@clear.html
>    [109]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl7/igt@gem_mmap_offset@clear.html
> 
>   * igt@i915_module_load@reload-with-fault-injection:
>     - shard-skl:          [DMESG-WARN][110] ([i915#1982]) -> [PASS][111] +3 similar issues
>    [110]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-skl7/igt@i915_module_load@reload-with-fault-injection.html
>    [111]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl2/igt@i915_module_load@reload-with-fault-injection.html
> 
>   * igt@i915_pm_dc@dc6-dpms:
>     - shard-iclb:         [FAIL][112] ([i915#454]) -> [PASS][113]
>    [112]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb3/igt@i915_pm_dc@dc6-dpms.html
>    [113]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb2/igt@i915_pm_dc@dc6-dpms.html
> 
>   * igt@i915_suspend@forcewake:
>     - shard-tglb:         [INCOMPLETE][114] ([i915#2411] / [i915#456]) -> [PASS][115]
>    [114]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-tglb7/igt@i915_suspend@forcewake.html
>    [115]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-tglb8/igt@i915_suspend@forcewake.html
> 
>   * igt@kms_big_fb@x-tiled-32bpp-rotate-180:
>     - shard-glk:          [DMESG-WARN][116] ([i915#118] / [i915#95]) -> [PASS][117]
>    [116]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-glk8/igt@kms_big_fb@x-tiled-32bpp-rotate-180.html
>    [117]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-glk7/igt@kms_big_fb@x-tiled-32bpp-rotate-180.html
> 
>   * igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions:
>     - shard-skl:          [FAIL][118] ([i915#2346]) -> [PASS][119]
>    [118]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-skl7/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html
>    [119]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl7/igt@kms_cursor_legacy@flip-vs-cursor-atomic-transitions.html
> 
>   * igt@kms_draw_crc@draw-method-rgb565-pwrite-xtiled:
>     - shard-iclb:         [DMESG-WARN][120] -> [PASS][121]
>    [120]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb2/igt@kms_draw_crc@draw-method-rgb565-pwrite-xtiled.html
>    [121]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb6/igt@kms_draw_crc@draw-method-rgb565-pwrite-xtiled.html
> 
>   * igt@kms_fbcon_fbt@fbc-suspend:
>     - shard-apl:          [INCOMPLETE][122] ([i915#180] / [i915#1982]) -> [PASS][123]
>    [122]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-apl3/igt@kms_fbcon_fbt@fbc-suspend.html
>    [123]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-apl8/igt@kms_fbcon_fbt@fbc-suspend.html
> 
>   * igt@kms_flip@flip-vs-expired-vblank-interruptible@b-edp1:
>     - shard-skl:          [FAIL][124] ([i915#79]) -> [PASS][125]
>    [124]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-skl8/igt@kms_flip@flip-vs-expired-vblank-interruptible@b-edp1.html
>    [125]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl4/igt@kms_flip@flip-vs-expired-vblank-interruptible@b-edp1.html
> 
>   * igt@kms_flip@plain-flip-fb-recreate-interruptible@a-edp1:
>     - shard-skl:          [FAIL][126] ([i915#2122]) -> [PASS][127]
>    [126]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-skl1/igt@kms_flip@plain-flip-fb-recreate-interruptible@a-edp1.html
>    [127]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl5/igt@kms_flip@plain-flip-fb-recreate-interruptible@a-edp1.html
> 
>   * igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile:
>     - shard-iclb:         [SKIP][128] ([i915#3701]) -> [PASS][129]
>    [128]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb2/igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile.html
>    [129]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb3/igt@kms_flip_scaled_crc@flip-64bpp-ytile-to-32bpp-ytile.html
> 
>   * igt@kms_hdr@bpc-switch:
>     - shard-skl:          [FAIL][130] ([i915#1188]) -> [PASS][131]
>    [130]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-skl5/igt@kms_hdr@bpc-switch.html
>    [131]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl1/igt@kms_hdr@bpc-switch.html
> 
>   * igt@kms_plane_alpha_blend@pipe-a-coverage-7efc:
>     - shard-skl:          [FAIL][132] ([fdo#108145] / [i915#265]) -> [PASS][133] +1 similar issue
>    [132]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-skl1/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html
>    [133]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-skl5/igt@kms_plane_alpha_blend@pipe-a-coverage-7efc.html
> 
>   * igt@kms_psr@psr2_sprite_blt:
>     - shard-iclb:         [SKIP][134] ([fdo#109441]) -> [PASS][135] +1 similar issue
>    [134]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb1/igt@kms_psr@psr2_sprite_blt.html
>    [135]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb2/igt@kms_psr@psr2_sprite_blt.html
> 
>   
> #### Warnings ####
> 
>   * igt@i915_pm_dc@dc3co-vpb-simulation:
>     - shard-iclb:         [SKIP][136] ([i915#658]) -> [SKIP][137] ([i915#588])
>    [136]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb7/igt@i915_pm_dc@dc3co-vpb-simulation.html
>    [137]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb2/igt@i915_pm_dc@dc3co-vpb-simulation.html
> 
>   * igt@i915_pm_dc@dc9-dpms:
>     - shard-iclb:         [FAIL][138] ([i915#3343]) -> [SKIP][139] ([i915#3288])
>    [138]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10558/shard-iclb2/igt@i915_pm_dc@dc9-dpms.html
>    [139]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/shard-iclb3/igt@i915_pm_dc@dc9-dpms.html
> 
>   * igt@i915_pm_rc6_residency@rc6-idle:
>     - shard-iclb:         [WARN][140] ([i915#1804] / [i915#2684]) -> [WARN]
> 
> == Logs ==
> 
> For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_20982/index.html
Ville Syrjälä Sept. 9, 2021, 1:58 p.m. UTC | #5
On Tue, Sep 07, 2021 at 11:19:29AM -0700, Matt Roper wrote:
> On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> > On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > > 
> > > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > 
> > > Yep, we overlooked that the TGL table still had an explicit entry for
> > > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > > for MOCS[1].  The new table is the same as the TGL table, just with
> > > I915_MOCS_PTE (1) removed.
> > 
> > And just how are people planning on handling display cacheability
> > control without a PTE MOCS entry? Is Mesa/etc. already making all
> > external bos uncached on these platforms just in case we might
> > scan out said bo?
> 
> MOCS entry 1 has never been considered a valid MOCS table entry on gen12
> platforms (despite the old #define, it's not actually related to PTE,
> display, etc. anymore).

So can someone finally explain to me how we're supposed to cache
anything that might become a scanout buffer later (eg. window system
buffers)? Or are we just making everything like that UC now, and is
everyone happy with that? Is userspace actually following that?

We can't just randomly change this stuff in the kernel since it's an
abi. Userspace has to be in sync.

> Userspace shouldn't be using entry 1, since its
> settings are officially undefined, but because we had already defined a
> value for it in the driver on our production gen12 platforms (TGL/RKL),
> we can't change that value now without an ABI break; we just have to
> leave it be on TGL/RKL.
> 
> Any userspace trying to use entry 1 on gen12 (or any other undocumented
> table entry) is still buggy and should still be updated.  For the gen12
> platforms going forward (ADL-S, ADL-P, and anything else that shows up
> in the future) we can just follow the guidance of setting all
> invalid/unused entries as L3 cached from day 1.
> 
> 
> Matt
> 
> > 
> > > 
> > > Looks good to me,
> > > 
> > > Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
> > > 
> > > 
> > > > ---
> > > >  drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +++++++++++++++++++++++++---
> > > >  1 file changed, 37 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > > index e96afd7beb49..c8d289b00de4 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > > @@ -315,6 +315,35 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
> > > >  	MOCS_ENTRY(63, 0, L3_1_UC),
> > > >  };
> > > >  
> > > > +static const struct drm_i915_mocs_entry gen12_mocs_table[] = {
> > > > +
> > > > +	GEN11_MOCS_ENTRIES,
> > > > +	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
> > > > +	MOCS_ENTRY(48,
> > > > +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> > > > +		   L3_3_WB),
> > > > +	/* Implicitly enable L1 - HDC:L1 + L3 */
> > > > +	MOCS_ENTRY(49,
> > > > +		   LE_1_UC | LE_TC_1_LLC,
> > > > +		   L3_3_WB),
> > > > +	/* Implicitly enable L1 - HDC:L1 + LLC */
> > > > +	MOCS_ENTRY(50,
> > > > +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> > > > +		   L3_1_UC),
> > > > +	/* Implicitly enable L1 - HDC:L1 */
> > > > +	MOCS_ENTRY(51,
> > > > +		   LE_1_UC | LE_TC_1_LLC,
> > > > +		   L3_1_UC),
> > > > +	/* HW Special Case (CCS) */
> > > > +	MOCS_ENTRY(60,
> > > > +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> > > > +		   L3_1_UC),
> > > > +	/* HW Special Case (Displayable) */
> > > > +	MOCS_ENTRY(61,
> > > > +		   LE_1_UC | LE_TC_1_LLC,
> > > > +		   L3_3_WB),
> > > > +};
> > > > +
> > > >  enum {
> > > >  	HAS_GLOBAL_MOCS = BIT(0),
> > > >  	HAS_ENGINE_MOCS = BIT(1),
> > > > @@ -351,14 +380,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
> > > >  		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> > > >  		table->uc_index = 1;
> > > >  		table->unused_entries_index = 5;
> > > > -	} else if (GRAPHICS_VER(i915) >= 12) {
> > > > +	} else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
> > > > +		/* For TGL/RKL, Can't be changed now for ABI reasons */
> > > >  		table->size  = ARRAY_SIZE(tgl_mocs_table);
> > > >  		table->table = tgl_mocs_table;
> > > >  		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> > > >  		table->uc_index = 3;
> > > > -		/* For TGL/RKL, Can't be changed now for ABI reasons */
> > > > -		if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915))
> > > > -			table->unused_entries_index = 2;
> > > > +	} else if (GRAPHICS_VER(i915) >= 12) {
> > > > +		table->size  = ARRAY_SIZE(gen12_mocs_table);
> > > > +		table->table = gen12_mocs_table;
> > > > +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> > > > +		table->uc_index = 3;
> > > > +		table->unused_entries_index = 2;
> > > >  	} else if (GRAPHICS_VER(i915) == 11) {
> > > >  		table->size  = ARRAY_SIZE(icl_mocs_table);
> > > >  		table->table = icl_mocs_table;
> > > > -- 
> > > > 2.26.2
> > > > 
> > > 
> > > -- 
> > > Matt Roper
> > > Graphics Software Engineer
> > > VTT-OSGC Platform Enablement
> > > Intel Corporation
> > > (916) 356-2795
> > 
> > -- 
> > Ville Syrjälä
> > Intel
> 
> -- 
> Matt Roper
> Graphics Software Engineer
> VTT-OSGC Platform Enablement
> Intel Corporation
> (916) 356-2795
Matt Roper Sept. 9, 2021, 2:29 p.m. UTC | #6
On Thu, Sep 09, 2021 at 04:58:50PM +0300, Ville Syrjälä wrote:
> On Tue, Sep 07, 2021 at 11:19:29AM -0700, Matt Roper wrote:
> > On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> > > On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > > > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > > > 
> > > > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > 
> > > > Yep, we overlooked that the TGL table still had an explicit entry for
> > > > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > > > for MOCS[1].  The new table is the same as the TGL table, just with
> > > > I915_MOCS_PTE (1) removed.
> > > 
> > > And just how are people planning on handling display cacheability
> > > control without a PTE MOCS entry? Is Mesa/etc. already making all
> > > external bos uncached on these platforms just in case we might
> > > scan out said bo?
> > 
> > MOCS entry 1 has never been considered a valid MOCS table entry on gen12
> > platforms (despite the old #define, it's not actually related to PTE,
> > display, etc. anymore).
> 
> So can someone finally explain to me how we're supposed to cache
> anything that might become a scanout buffer later (eg. window system
> buffers)? Or are we just making everything like that UC now, and is
> everyone happy with that? Is userspace actually following that?

Table entry #1 has never had anything to do with scanout on gen12+.  I
would assume that UMDs are either using the display entry in the MOCS
table (which is 61 on gen12+) or some other UC entry.  If they weren't
properly updated and are still trying to use gen11 MOCS tables, then
they'll continue to work on TGL/RKL (we're not making any changes
there), but they'll just never run properly on the upcoming platforms
like ADL until the userspace enablement of the platform is completed.

> 
> We can't just randomly change this stuff in the kernel since it's an
> abi. Userspace has to be in sync.

That's exactly why we're making this change.  We need to get this right
_before_ it becomes established as ABI on upcoming platforms.  We missed
the boat for TGL/RKL, which is why we're stuck with the "wrong" value in
invalid table entry #1 forever.  But the platforms that haven't
established a behavior as pre-existing ABI yet need to be handled
correctly right from the beginning.  ABI means we can't change the
behavior of a specific platform between kernel releases; it doesn't mean
that new platform B has to reproduce all the mistakes of old platform A
too.  If buggy userspace happened to work by accident on TGL/RKL, that
doesn't mean that it has to accidentally work on ADL hardware too.


Matt

> 
> > Userspace shouldn't be using entry 1, since its
> > settings are officially undefined, but because we had already defined a
> > value for it in the driver on our production gen12 platforms (TGL/RKL),
> > we can't change that value now without an ABI break; we just have to
> > leave it be on TGL/RKL.
> > 
> > Any userspace trying to use entry 1 on gen12 (or any other undocumented
> > table entry) is still buggy and should still be updated.  For the gen12
> > platforms going forward (ADL-S, ADL-P, and anything else that shows up
> > in the future) we can just follow the guidance of setting all
> > invalid/unused entries as L3 cached from day 1.
> > 
> > 
> > Matt
> > 
> > > 
> > > > 
> > > > Looks good to me,
> > > > 
> > > > Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
> > > > 
> > > > 
> > > > > ---
> > > > >  drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +++++++++++++++++++++++++---
> > > > >  1 file changed, 37 insertions(+), 4 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > > > index e96afd7beb49..c8d289b00de4 100644
> > > > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> > > > > @@ -315,6 +315,35 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
> > > > >  	MOCS_ENTRY(63, 0, L3_1_UC),
> > > > >  };
> > > > >  
> > > > > +static const struct drm_i915_mocs_entry gen12_mocs_table[] = {
> > > > > +
> > > > > +	GEN11_MOCS_ENTRIES,
> > > > > +	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
> > > > > +	MOCS_ENTRY(48,
> > > > > +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> > > > > +		   L3_3_WB),
> > > > > +	/* Implicitly enable L1 - HDC:L1 + L3 */
> > > > > +	MOCS_ENTRY(49,
> > > > > +		   LE_1_UC | LE_TC_1_LLC,
> > > > > +		   L3_3_WB),
> > > > > +	/* Implicitly enable L1 - HDC:L1 + LLC */
> > > > > +	MOCS_ENTRY(50,
> > > > > +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> > > > > +		   L3_1_UC),
> > > > > +	/* Implicitly enable L1 - HDC:L1 */
> > > > > +	MOCS_ENTRY(51,
> > > > > +		   LE_1_UC | LE_TC_1_LLC,
> > > > > +		   L3_1_UC),
> > > > > +	/* HW Special Case (CCS) */
> > > > > +	MOCS_ENTRY(60,
> > > > > +		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
> > > > > +		   L3_1_UC),
> > > > > +	/* HW Special Case (Displayable) */
> > > > > +	MOCS_ENTRY(61,
> > > > > +		   LE_1_UC | LE_TC_1_LLC,
> > > > > +		   L3_3_WB),
> > > > > +};
> > > > > +
> > > > >  enum {
> > > > >  	HAS_GLOBAL_MOCS = BIT(0),
> > > > >  	HAS_ENGINE_MOCS = BIT(1),
> > > > > @@ -351,14 +380,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
> > > > >  		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> > > > >  		table->uc_index = 1;
> > > > >  		table->unused_entries_index = 5;
> > > > > -	} else if (GRAPHICS_VER(i915) >= 12) {
> > > > > +	} else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
> > > > > +		/* For TGL/RKL, Can't be changed now for ABI reasons */
> > > > >  		table->size  = ARRAY_SIZE(tgl_mocs_table);
> > > > >  		table->table = tgl_mocs_table;
> > > > >  		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> > > > >  		table->uc_index = 3;
> > > > > -		/* For TGL/RKL, Can't be changed now for ABI reasons */
> > > > > -		if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915))
> > > > > -			table->unused_entries_index = 2;
> > > > > +	} else if (GRAPHICS_VER(i915) >= 12) {
> > > > > +		table->size  = ARRAY_SIZE(gen12_mocs_table);
> > > > > +		table->table = gen12_mocs_table;
> > > > > +		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
> > > > > +		table->uc_index = 3;
> > > > > +		table->unused_entries_index = 2;
> > > > >  	} else if (GRAPHICS_VER(i915) == 11) {
> > > > >  		table->size  = ARRAY_SIZE(icl_mocs_table);
> > > > >  		table->table = icl_mocs_table;
> > > > > -- 
> > > > > 2.26.2
> > > > > 
> > > > 
> > > > -- 
> > > > Matt Roper
> > > > Graphics Software Engineer
> > > > VTT-OSGC Platform Enablement
> > > > Intel Corporation
> > > > (916) 356-2795
> > > 
> > > -- 
> > > Ville Syrjälä
> > > Intel
> > 
> > -- 
> > Matt Roper
> > Graphics Software Engineer
> > VTT-OSGC Platform Enablement
> > Intel Corporation
> > (916) 356-2795
> 
> -- 
> Ville Syrjälä
> Intel
Ville Syrjälä Sept. 9, 2021, 2:39 p.m. UTC | #7
On Thu, Sep 09, 2021 at 07:29:33AM -0700, Matt Roper wrote:
> On Thu, Sep 09, 2021 at 04:58:50PM +0300, Ville Syrjälä wrote:
> > On Tue, Sep 07, 2021 at 11:19:29AM -0700, Matt Roper wrote:
> > > On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> > > > On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > > > > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > > > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > > > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > > > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > > > > 
> > > > > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > > > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > > 
> > > > > Yep, we overlooked that the TGL table still had an explicit entry for
> > > > > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > > > > for MOCS[1].  The new table is the same as the TGL table, just with
> > > > > I915_MOCS_PTE (1) removed.
> > > > 
> > > > And just how are people planning on handling display cacheability
> > > > control without a PTE MOCS entry? Is Mesa/etc. already making all
> > > > external bos uncached on these platforms just in case we might
> > > > scan out said bo?
> > > 
> > > MOCS entry 1 has never been considered a valid MOCS table entry on gen12
> > > platforms (despite the old #define, it's not actually related to PTE,
> > > display, etc. anymore).
> > 
> > So can someone finally explain to me how we're supposed to cache
> > anything that might become a scanout buffer later (eg. window system
> > buffers)? Or are we just making everything like that UC now, and is
> > everyone happy with that? Is userspace actually following that?
> 
> Table entry #1 has never had anything to do with scanout on gen12+.  I
> would assume that UMDs are either using the display entry in the MOCS
> table (which is 61 on gen12+) or some other UC entry.

If 61 is meant to to be the new PTE entry wy hasn't it been defines as
such in the code? And I know for a fact that userspace (Mesa) is not
using entry 61. I think there is a massive communication gap here
where everyone just seems to assume the other side is doing something.

Could someone actually come up with a clear abi definition for this
and get all the stakeholders to sign off on it?
Matt Roper Sept. 9, 2021, 3 p.m. UTC | #8
On Thu, Sep 09, 2021 at 05:39:26PM +0300, Ville Syrjälä wrote:
> On Thu, Sep 09, 2021 at 07:29:33AM -0700, Matt Roper wrote:
> > On Thu, Sep 09, 2021 at 04:58:50PM +0300, Ville Syrjälä wrote:
> > > On Tue, Sep 07, 2021 at 11:19:29AM -0700, Matt Roper wrote:
> > > > On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> > > > > On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > > > > > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > > > > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > > > > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > > > > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > > > > > 
> > > > > > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > > > > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > > > 
> > > > > > Yep, we overlooked that the TGL table still had an explicit entry for
> > > > > > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > > > > > for MOCS[1].  The new table is the same as the TGL table, just with
> > > > > > I915_MOCS_PTE (1) removed.
> > > > > 
> > > > > And just how are people planning on handling display cacheability
> > > > > control without a PTE MOCS entry? Is Mesa/etc. already making all
> > > > > external bos uncached on these platforms just in case we might
> > > > > scan out said bo?
> > > > 
> > > > MOCS entry 1 has never been considered a valid MOCS table entry on gen12
> > > > platforms (despite the old #define, it's not actually related to PTE,
> > > > display, etc. anymore).
> > > 
> > > So can someone finally explain to me how we're supposed to cache
> > > anything that might become a scanout buffer later (eg. window system
> > > buffers)? Or are we just making everything like that UC now, and is
> > > everyone happy with that? Is userspace actually following that?
> > 
> > Table entry #1 has never had anything to do with scanout on gen12+.  I
> > would assume that UMDs are either using the display entry in the MOCS
> > table (which is 61 on gen12+) or some other UC entry.
> 
> If 61 is meant to to be the new PTE entry wy hasn't it been defines as
> such in the code? And I know for a fact that userspace (Mesa) is not

There is no "PTE" entry anymore.  But 61 is already documented as
"displayable" in both the spec and the code:

        /* HW Special Case (Displayable) */                                      
        MOCS_ENTRY(61,                                                           

> using entry 61. I think there is a massive communication gap here
> where everyone just seems to assume the other side is doing something.
> 
> Could someone actually come up with a clear abi definition for this
> and get all the stakeholders to sign off on it?

The agreement between the i915 team, various userspace teams, Windows
driver team, hardware architects, software architects, and bspec writers
was just completed; that's what triggered the kernel updates here (and
I'm guessing is triggering similar work on the UMD side).  It's also why
we held off on removing the force_probe flag on ADL until now since we
couldn't consider enablement of the platform complete until the
agreement and definitions here was finalized.


Matt

> 
> -- 
> Ville Syrjälä
> Intel
Ville Syrjälä Sept. 9, 2021, 3:09 p.m. UTC | #9
On Thu, Sep 09, 2021 at 08:00:02AM -0700, Matt Roper wrote:
> On Thu, Sep 09, 2021 at 05:39:26PM +0300, Ville Syrjälä wrote:
> > On Thu, Sep 09, 2021 at 07:29:33AM -0700, Matt Roper wrote:
> > > On Thu, Sep 09, 2021 at 04:58:50PM +0300, Ville Syrjälä wrote:
> > > > On Tue, Sep 07, 2021 at 11:19:29AM -0700, Matt Roper wrote:
> > > > > On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> > > > > > On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > > > > > > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > > > > > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > > > > > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > > > > > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > > > > > > 
> > > > > > > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > > > > > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > > > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > > > > 
> > > > > > > Yep, we overlooked that the TGL table still had an explicit entry for
> > > > > > > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > > > > > > for MOCS[1].  The new table is the same as the TGL table, just with
> > > > > > > I915_MOCS_PTE (1) removed.
> > > > > > 
> > > > > > And just how are people planning on handling display cacheability
> > > > > > control without a PTE MOCS entry? Is Mesa/etc. already making all
> > > > > > external bos uncached on these platforms just in case we might
> > > > > > scan out said bo?
> > > > > 
> > > > > MOCS entry 1 has never been considered a valid MOCS table entry on gen12
> > > > > platforms (despite the old #define, it's not actually related to PTE,
> > > > > display, etc. anymore).
> > > > 
> > > > So can someone finally explain to me how we're supposed to cache
> > > > anything that might become a scanout buffer later (eg. window system
> > > > buffers)? Or are we just making everything like that UC now, and is
> > > > everyone happy with that? Is userspace actually following that?
> > > 
> > > Table entry #1 has never had anything to do with scanout on gen12+.  I
> > > would assume that UMDs are either using the display entry in the MOCS
> > > table (which is 61 on gen12+) or some other UC entry.
> > 
> > If 61 is meant to to be the new PTE entry wy hasn't it been defines as
> > such in the code? And I know for a fact that userspace (Mesa) is not
> 
> There is no "PTE" entry anymore.  But 61 is already documented as
> "displayable" in both the spec and the code:
> 
>         /* HW Special Case (Displayable) */                                      
>         MOCS_ENTRY(61,                             

Why is it called a "HW special case"? I don't think there's any hw
magic in there?

And why aren't we setting it to PTE to get some cacheability for
window back buffers and such?

> 
> > using entry 61. I think there is a massive communication gap here
> > where everyone just seems to assume the other side is doing something.
> > 
> > Could someone actually come up with a clear abi definition for this
> > and get all the stakeholders to sign off on it?
> 
> The agreement between the i915 team, various userspace teams, Windows
> driver team, hardware architects, software architects, and bspec writers
> was just completed; that's what triggered the kernel updates here (and
> I'm guessing is triggering similar work on the UMD side).  It's also why
> we held off on removing the force_probe flag on ADL until now since we
> couldn't consider enablement of the platform complete until the
> agreement and definitions here was finalized.

Can we get that agreement visible on the mailing list? Since MOCS is
abi I don't see why we shouldn't follow the normal abi rules for these,
ie. post to dri-devel, get acks from relevant people, links to agreed
userspace changes, etc.
Matt Roper Sept. 9, 2021, 5:15 p.m. UTC | #10
On Thu, Sep 09, 2021 at 06:09:55PM +0300, Ville Syrjälä wrote:
> On Thu, Sep 09, 2021 at 08:00:02AM -0700, Matt Roper wrote:
> > On Thu, Sep 09, 2021 at 05:39:26PM +0300, Ville Syrjälä wrote:
> > > On Thu, Sep 09, 2021 at 07:29:33AM -0700, Matt Roper wrote:
> > > > On Thu, Sep 09, 2021 at 04:58:50PM +0300, Ville Syrjälä wrote:
> > > > > On Tue, Sep 07, 2021 at 11:19:29AM -0700, Matt Roper wrote:
> > > > > > On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> > > > > > > On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > > > > > > > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > > > > > > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > > > > > > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > > > > > > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > > > > > > > 
> > > > > > > > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > > > > > > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > > > > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > > > > > 
> > > > > > > > Yep, we overlooked that the TGL table still had an explicit entry for
> > > > > > > > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > > > > > > > for MOCS[1].  The new table is the same as the TGL table, just with
> > > > > > > > I915_MOCS_PTE (1) removed.
> > > > > > > 
> > > > > > > And just how are people planning on handling display cacheability
> > > > > > > control without a PTE MOCS entry? Is Mesa/etc. already making all
> > > > > > > external bos uncached on these platforms just in case we might
> > > > > > > scan out said bo?
> > > > > > 
> > > > > > MOCS entry 1 has never been considered a valid MOCS table entry on gen12
> > > > > > platforms (despite the old #define, it's not actually related to PTE,
> > > > > > display, etc. anymore).
> > > > > 
> > > > > So can someone finally explain to me how we're supposed to cache
> > > > > anything that might become a scanout buffer later (eg. window system
> > > > > buffers)? Or are we just making everything like that UC now, and is
> > > > > everyone happy with that? Is userspace actually following that?
> > > > 
> > > > Table entry #1 has never had anything to do with scanout on gen12+.  I
> > > > would assume that UMDs are either using the display entry in the MOCS
> > > > table (which is 61 on gen12+) or some other UC entry.
> > > 
> > > If 61 is meant to to be the new PTE entry wy hasn't it been defines as
> > > such in the code? And I know for a fact that userspace (Mesa) is not
> > 
> > There is no "PTE" entry anymore.  But 61 is already documented as
> > "displayable" in both the spec and the code:
> > 
> >         /* HW Special Case (Displayable) */                                      
> >         MOCS_ENTRY(61,                             
> 
> Why is it called a "HW special case"? I don't think there's any hw
> magic in there?
> 
> And why aren't we setting it to PTE to get some cacheability for
> window back buffers and such?

Who is "we" here?  The MOCS table is a pre-defined set of per-platform
magic numbers.  The software teams don't get to decide what the values
are, we just program the hardware with the per-platform numbers that
have been agreed upon as part of a platform-wide stack (everything from
low-level firmware to high level userspace should be working from the
same table, defined in the bspec).

Once we know what the per-platform magic numbers are, we're supposed to
pick the table entry that matches the behavior we're trying to
accomplish.  If you want some specific level of cacheability, then you
select a table row that gives you that.  Maybe 61 isn't the best
setting, I don't know; userspace can pick whichever defined setting is
actually best, using the data from the table.  But table row #1 is
already well-documented as reserved/dontuse across the full stack; the
fact that row #1 had values similar to PTE on Icelake hardware doesn't
carry forward to any post-gen11 platform.

> 
> > 
> > > using entry 61. I think there is a massive communication gap here
> > > where everyone just seems to assume the other side is doing something.
> > > 
> > > Could someone actually come up with a clear abi definition for this
> > > and get all the stakeholders to sign off on it?
> > 
> > The agreement between the i915 team, various userspace teams, Windows
> > driver team, hardware architects, software architects, and bspec writers
> > was just completed; that's what triggered the kernel updates here (and
> > I'm guessing is triggering similar work on the UMD side).  It's also why
> > we held off on removing the force_probe flag on ADL until now since we
> > couldn't consider enablement of the platform complete until the
> > agreement and definitions here was finalized.
> 
> Can we get that agreement visible on the mailing list? Since MOCS is
> abi I don't see why we shouldn't follow the normal abi rules for these,
> ie. post to dri-devel, get acks from relevant people, links to agreed
> userspace changes, etc.

The ABI design here was designed and agreed upon years ago, during early
gen11 development.  The ABI design is that the kernel driver will
faithfully initialize the hardware with the pre-determined set of magic
numbers documented by the hardware team.  Since these are
well-documented and unchanging numbers per-platform, there's no
ambiguity for userspace, firmware, etc. about what a specific mocs index
means, and no need to provide additional ABI for userspace to query what
the kernel used in each row or anything like that.  The specific magic
numbers are also ABI in the sense that we can't change the set of
defined values once they're set for a platform (and it's been a long
road to get the hardware and other OS software teams to understand and
agree to this requirement), but we don't get to define or overrule what
the initial values and order of those magic numbers are.

What is a bit vague in the formal documentation is what should be done
about the reserved/dontuse table entries.  In theory it wouldn't matter
since they'd never be used anyway, but in reality userspace can still
use them by accident, such as by forgetting to update their MOCS
selection logic from past platforms (e.g., still trying to use row #1 on
platforms where it isn't defined).  Given that it's legal for entries to
be added to MOCS tables, but never removed/modified, it follows that we
should always initialize the undefined entries to fully cached; if a
MOCS table update happens in the future and new rows show up, they can
only become more coherent, and any userspace software that was
incorrectly trying to use them previously will remain functionally
correct.

What you're proposing would be a change to existing ABI --- instead of
following the agreed upon contract, i915 would start defining its own
set of magic numbers that potentially contradict the documentation that
every other team is depending on.  We already made this mistake on
TGL/RKL, so due to an i915 bug we're outside the spec; if entry #1 ever
becomes a formally defined setting in the future, the rest of the
software stack will need to explicitly work around i915's bug since we
can't fix it now without breaking ABI.

If you really want to redefine how the MOCS ABI works and have i915 no
longer follow the current contract, I think you need to do the
following:

 * Re-add force_probe requirement to ADL and add appropriate Fixes: so
   that the platform isn't enabled yet.
 * Document clearly how you want i915 to select the MOCS settings it
   uses for each table row if we're no longer going to follow the
   documented values in the bspec.
 * Provide a way for userspace to determine how i915 has defined the
   MOCS settings (since they can no longer rely on us following the
   previously agreed upon contract).
 * Get acks from all the userspace teams on the new direction you're
   proposing.
 * Get an ack from the GuC team to make sure that programming
   MOCS values differently than documented in the bspec won't have any
   kind of impact on their operation.

Alternatively, you could lobby for a new table row #1 to be added to the
formal MOCS table for gen12.  It's legal for new MOCS entries to show up
in the future, so if that happens, we can follow up with the
corresponding change in i915; since we'll be moving to a more coherent
value (from today's fully cached entry), we'll be becoming more
permissive from a correctness point of view.  But we absolutely should
not try to add entries unilaterally in i915 that haven't been formally
agreed upon because they may clash with a different definition of the
row that shows up in the future through formal channels.



Matt

> 
> -- 
> Ville Syrjälä
> Intel
Matt Roper Sept. 9, 2021, 5:20 p.m. UTC | #11
+Cc a couple more people.

On Thu, Sep 09, 2021 at 10:15:57AM -0700, Matt Roper wrote:
> On Thu, Sep 09, 2021 at 06:09:55PM +0300, Ville Syrjälä wrote:
> > On Thu, Sep 09, 2021 at 08:00:02AM -0700, Matt Roper wrote:
> > > On Thu, Sep 09, 2021 at 05:39:26PM +0300, Ville Syrjälä wrote:
> > > > On Thu, Sep 09, 2021 at 07:29:33AM -0700, Matt Roper wrote:
> > > > > On Thu, Sep 09, 2021 at 04:58:50PM +0300, Ville Syrjälä wrote:
> > > > > > On Tue, Sep 07, 2021 at 11:19:29AM -0700, Matt Roper wrote:
> > > > > > > On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> > > > > > > > On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > > > > > > > > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > > > > > > > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > > > > > > > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > > > > > > > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > > > > > > > > 
> > > > > > > > > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > > > > > > > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > > > > > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > > > > > > 
> > > > > > > > > Yep, we overlooked that the TGL table still had an explicit entry for
> > > > > > > > > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > > > > > > > > for MOCS[1].  The new table is the same as the TGL table, just with
> > > > > > > > > I915_MOCS_PTE (1) removed.
> > > > > > > > 
> > > > > > > > And just how are people planning on handling display cacheability
> > > > > > > > control without a PTE MOCS entry? Is Mesa/etc. already making all
> > > > > > > > external bos uncached on these platforms just in case we might
> > > > > > > > scan out said bo?
> > > > > > > 
> > > > > > > MOCS entry 1 has never been considered a valid MOCS table entry on gen12
> > > > > > > platforms (despite the old #define, it's not actually related to PTE,
> > > > > > > display, etc. anymore).
> > > > > > 
> > > > > > So can someone finally explain to me how we're supposed to cache
> > > > > > anything that might become a scanout buffer later (eg. window system
> > > > > > buffers)? Or are we just making everything like that UC now, and is
> > > > > > everyone happy with that? Is userspace actually following that?
> > > > > 
> > > > > Table entry #1 has never had anything to do with scanout on gen12+.  I
> > > > > would assume that UMDs are either using the display entry in the MOCS
> > > > > table (which is 61 on gen12+) or some other UC entry.
> > > > 
> > > > If 61 is meant to to be the new PTE entry wy hasn't it been defines as
> > > > such in the code? And I know for a fact that userspace (Mesa) is not
> > > 
> > > There is no "PTE" entry anymore.  But 61 is already documented as
> > > "displayable" in both the spec and the code:
> > > 
> > >         /* HW Special Case (Displayable) */                                      
> > >         MOCS_ENTRY(61,                             
> > 
> > Why is it called a "HW special case"? I don't think there's any hw
> > magic in there?
> > 
> > And why aren't we setting it to PTE to get some cacheability for
> > window back buffers and such?
> 
> Who is "we" here?  The MOCS table is a pre-defined set of per-platform
> magic numbers.  The software teams don't get to decide what the values
> are, we just program the hardware with the per-platform numbers that
> have been agreed upon as part of a platform-wide stack (everything from
> low-level firmware to high level userspace should be working from the
> same table, defined in the bspec).
> 
> Once we know what the per-platform magic numbers are, we're supposed to
> pick the table entry that matches the behavior we're trying to
> accomplish.  If you want some specific level of cacheability, then you
> select a table row that gives you that.  Maybe 61 isn't the best
> setting, I don't know; userspace can pick whichever defined setting is
> actually best, using the data from the table.  But table row #1 is
> already well-documented as reserved/dontuse across the full stack; the
> fact that row #1 had values similar to PTE on Icelake hardware doesn't
> carry forward to any post-gen11 platform.
> 
> > 
> > > 
> > > > using entry 61. I think there is a massive communication gap here
> > > > where everyone just seems to assume the other side is doing something.
> > > > 
> > > > Could someone actually come up with a clear abi definition for this
> > > > and get all the stakeholders to sign off on it?
> > > 
> > > The agreement between the i915 team, various userspace teams, Windows
> > > driver team, hardware architects, software architects, and bspec writers
> > > was just completed; that's what triggered the kernel updates here (and
> > > I'm guessing is triggering similar work on the UMD side).  It's also why
> > > we held off on removing the force_probe flag on ADL until now since we
> > > couldn't consider enablement of the platform complete until the
> > > agreement and definitions here was finalized.
> > 
> > Can we get that agreement visible on the mailing list? Since MOCS is
> > abi I don't see why we shouldn't follow the normal abi rules for these,
> > ie. post to dri-devel, get acks from relevant people, links to agreed
> > userspace changes, etc.
> 
> The ABI design here was designed and agreed upon years ago, during early
> gen11 development.  The ABI design is that the kernel driver will
> faithfully initialize the hardware with the pre-determined set of magic
> numbers documented by the hardware team.  Since these are
> well-documented and unchanging numbers per-platform, there's no
> ambiguity for userspace, firmware, etc. about what a specific mocs index
> means, and no need to provide additional ABI for userspace to query what
> the kernel used in each row or anything like that.  The specific magic
> numbers are also ABI in the sense that we can't change the set of
> defined values once they're set for a platform (and it's been a long
> road to get the hardware and other OS software teams to understand and
> agree to this requirement), but we don't get to define or overrule what
> the initial values and order of those magic numbers are.
> 
> What is a bit vague in the formal documentation is what should be done
> about the reserved/dontuse table entries.  In theory it wouldn't matter
> since they'd never be used anyway, but in reality userspace can still
> use them by accident, such as by forgetting to update their MOCS
> selection logic from past platforms (e.g., still trying to use row #1 on
> platforms where it isn't defined).  Given that it's legal for entries to
> be added to MOCS tables, but never removed/modified, it follows that we
> should always initialize the undefined entries to fully cached; if a
> MOCS table update happens in the future and new rows show up, they can
> only become more coherent, and any userspace software that was
> incorrectly trying to use them previously will remain functionally
> correct.
> 
> What you're proposing would be a change to existing ABI --- instead of
> following the agreed upon contract, i915 would start defining its own
> set of magic numbers that potentially contradict the documentation that
> every other team is depending on.  We already made this mistake on
> TGL/RKL, so due to an i915 bug we're outside the spec; if entry #1 ever
> becomes a formally defined setting in the future, the rest of the
> software stack will need to explicitly work around i915's bug since we
> can't fix it now without breaking ABI.
> 
> If you really want to redefine how the MOCS ABI works and have i915 no
> longer follow the current contract, I think you need to do the
> following:
> 
>  * Re-add force_probe requirement to ADL and add appropriate Fixes: so
>    that the platform isn't enabled yet.
>  * Document clearly how you want i915 to select the MOCS settings it
>    uses for each table row if we're no longer going to follow the
>    documented values in the bspec.
>  * Provide a way for userspace to determine how i915 has defined the
>    MOCS settings (since they can no longer rely on us following the
>    previously agreed upon contract).
>  * Get acks from all the userspace teams on the new direction you're
>    proposing.
>  * Get an ack from the GuC team to make sure that programming
>    MOCS values differently than documented in the bspec won't have any
>    kind of impact on their operation.
> 
> Alternatively, you could lobby for a new table row #1 to be added to the
> formal MOCS table for gen12.  It's legal for new MOCS entries to show up
> in the future, so if that happens, we can follow up with the
> corresponding change in i915; since we'll be moving to a more coherent
> value (from today's fully cached entry), we'll be becoming more
> permissive from a correctness point of view.  But we absolutely should
> not try to add entries unilaterally in i915 that haven't been formally
> agreed upon because they may clash with a different definition of the
> row that shows up in the future through formal channels.
> 
> 
> 
> Matt
> 
> > 
> > -- 
> > Ville Syrjälä
> > Intel
> 
> -- 
> Matt Roper
> Graphics Software Engineer
> VTT-OSGC Platform Enablement
> Intel Corporation
> (916) 356-2795
Ville Syrjälä Sept. 9, 2021, 5:42 p.m. UTC | #12
On Thu, Sep 09, 2021 at 10:15:56AM -0700, Matt Roper wrote:
> On Thu, Sep 09, 2021 at 06:09:55PM +0300, Ville Syrjälä wrote:
> > On Thu, Sep 09, 2021 at 08:00:02AM -0700, Matt Roper wrote:
> > > On Thu, Sep 09, 2021 at 05:39:26PM +0300, Ville Syrjälä wrote:
> > > > On Thu, Sep 09, 2021 at 07:29:33AM -0700, Matt Roper wrote:
> > > > > On Thu, Sep 09, 2021 at 04:58:50PM +0300, Ville Syrjälä wrote:
> > > > > > On Tue, Sep 07, 2021 at 11:19:29AM -0700, Matt Roper wrote:
> > > > > > > On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> > > > > > > > On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > > > > > > > > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > > > > > > > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > > > > > > > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > > > > > > > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > > > > > > > > 
> > > > > > > > > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > > > > > > > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > > > > > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > > > > > > 
> > > > > > > > > Yep, we overlooked that the TGL table still had an explicit entry for
> > > > > > > > > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > > > > > > > > for MOCS[1].  The new table is the same as the TGL table, just with
> > > > > > > > > I915_MOCS_PTE (1) removed.
> > > > > > > > 
> > > > > > > > And just how are people planning on handling display cacheability
> > > > > > > > control without a PTE MOCS entry? Is Mesa/etc. already making all
> > > > > > > > external bos uncached on these platforms just in case we might
> > > > > > > > scan out said bo?
> > > > > > > 
> > > > > > > MOCS entry 1 has never been considered a valid MOCS table entry on gen12
> > > > > > > platforms (despite the old #define, it's not actually related to PTE,
> > > > > > > display, etc. anymore).
> > > > > > 
> > > > > > So can someone finally explain to me how we're supposed to cache
> > > > > > anything that might become a scanout buffer later (eg. window system
> > > > > > buffers)? Or are we just making everything like that UC now, and is
> > > > > > everyone happy with that? Is userspace actually following that?
> > > > > 
> > > > > Table entry #1 has never had anything to do with scanout on gen12+.  I
> > > > > would assume that UMDs are either using the display entry in the MOCS
> > > > > table (which is 61 on gen12+) or some other UC entry.
> > > > 
> > > > If 61 is meant to to be the new PTE entry wy hasn't it been defines as
> > > > such in the code? And I know for a fact that userspace (Mesa) is not
> > > 
> > > There is no "PTE" entry anymore.  But 61 is already documented as
> > > "displayable" in both the spec and the code:
> > > 
> > >         /* HW Special Case (Displayable) */                                      
> > >         MOCS_ENTRY(61,                             
> > 
> > Why is it called a "HW special case"? I don't think there's any hw
> > magic in there?
> > 
> > And why aren't we setting it to PTE to get some cacheability for
> > window back buffers and such?
> 
> Who is "we" here?

We who care about the performance of the system.

> The MOCS table is a pre-defined set of per-platform
> magic numbers.  The software teams don't get to decide what the values
> are, we just program the hardware with the per-platform numbers that
> have been agreed upon as part of a platform-wide stack (everything from
> low-level firmware to high level userspace should be working from the
> same table, defined in the bspec).

The magic numbers must be based on something. If that something is
purely Windows behaviour/performance then we might be shooting
ourselves in the foot here.

> 
> Once we know what the per-platform magic numbers are, we're supposed to
> pick the table entry that matches the behavior we're trying to
> accomplish.  If you want some specific level of cacheability, then you
> select a table row that gives you that.  Maybe 61 isn't the best
> setting, I don't know; userspace can pick whichever defined setting is
> actually best, using the data from the table.  But table row #1 is
> already well-documented as reserved/dontuse across the full stack; the
> fact that row #1 had values similar to PTE on Icelake hardware doesn't
> carry forward to any post-gen11 platform.

The only way you can get LLC cacheability for an external BO (window
back buffers and such) is by using a MOCS entry that directs the hardware
to consult the PTEs. Otherwise the client doing the rendering would have
to know ahead of time whether the buffer is going to be directly scanned
out by the compositor or not, for which there is no protocol in
X or wayland.

Historically I believe LLC cacheability has been on average a win.
Some workloads can do better with UC though. So if we are giving up
on LLC cacheability we should have some numbers to back up that
decision so that we're not dropping tons of performance on the floor.

> > > > using entry 61. I think there is a massive communication gap here
> > > > where everyone just seems to assume the other side is doing something.
> > > > 
> > > > Could someone actually come up with a clear abi definition for this
> > > > and get all the stakeholders to sign off on it?
> > > 
> > > The agreement between the i915 team, various userspace teams, Windows
> > > driver team, hardware architects, software architects, and bspec writers
> > > was just completed; that's what triggered the kernel updates here (and
> > > I'm guessing is triggering similar work on the UMD side).  It's also why
> > > we held off on removing the force_probe flag on ADL until now since we
> > > couldn't consider enablement of the platform complete until the
> > > agreement and definitions here was finalized.
> > 
> > Can we get that agreement visible on the mailing list? Since MOCS is
> > abi I don't see why we shouldn't follow the normal abi rules for these,
> > ie. post to dri-devel, get acks from relevant people, links to agreed
> > userspace changes, etc.
> 
> The ABI design here was designed and agreed upon years ago, during early
> gen11 development.  The ABI design is that the kernel driver will
> faithfully initialize the hardware with the pre-determined set of magic
> numbers documented by the hardware team.  Since these are
> well-documented and unchanging numbers per-platform, there's no
> ambiguity for userspace, firmware, etc. about what a specific mocs index
> means, and no need to provide additional ABI for userspace to query what
> the kernel used in each row or anything like that.  The specific magic
> numbers are also ABI in the sense that we can't change the set of
> defined values once they're set for a platform (and it's been a long
> road to get the hardware and other OS software teams to understand and
> agree to this requirement), but we don't get to define or overrule what
> the initial values and order of those magic numbers are.

And this apporach has clearly not worked considering userspace and
kernel have not agreed on what the abi is. We need to do better.

> 
> What is a bit vague in the formal documentation is what should be done
> about the reserved/dontuse table entries.  In theory it wouldn't matter
> since they'd never be used anyway, but in reality userspace can still
> use them by accident, such as by forgetting to update their MOCS
> selection logic from past platforms (e.g., still trying to use row #1 on
> platforms where it isn't defined).  Given that it's legal for entries to
> be added to MOCS tables, but never removed/modified, it follows that we
> should always initialize the undefined entries to fully cached; if a
> MOCS table update happens in the future and new rows show up, they can
> only become more coherent, and any userspace software that was
> incorrectly trying to use them previously will remain functionally
> correct.
> 
> What you're proposing would be a change to existing ABI --- instead of
> following the agreed upon contract, i915 would start defining its own
> set of magic numbers that potentially contradict the documentation that
> every other team is depending on.  We already made this mistake on
> TGL/RKL, so due to an i915 bug we're outside the spec; if entry #1 ever
> becomes a formally defined setting in the future, the rest of the
> software stack will need to explicitly work around i915's bug since we
> can't fix it now without breaking ABI.
> 
> If you really want to redefine how the MOCS ABI works and have i915 no
> longer follow the current contract, I think you need to do the
> following:

I want the abi to be actually defined properly, and some assurance that
all the stakeholders implement it correctly. Following the proper abi
rules for kernel development would guarantee that.

I would also like if the abi can give us the best performance rather
than potentially crippling it. Ie. I would expect to have a PTE MOCS
setting for potential scanout buffers, or some proof that pure UC
will in fact be a better choice.

> 
>  * Re-add force_probe requirement to ADL and add appropriate Fixes: so
>    that the platform isn't enabled yet.
>  * Document clearly how you want i915 to select the MOCS settings it
>    uses for each table row if we're no longer going to follow the
>    documented values in the bspec.
>  * Provide a way for userspace to determine how i915 has defined the
>    MOCS settings (since they can no longer rely on us following the
>    previously agreed upon contract).
>  * Get acks from all the userspace teams on the new direction you're
>    proposing.
>  * Get an ack from the GuC team to make sure that programming
>    MOCS values differently than documented in the bspec won't have any
>    kind of impact on their operation.
> 
> Alternatively, you could lobby for a new table row #1 to be added to the
> formal MOCS table for gen12. It's legal for new MOCS entries to show up
> in the future, so if that happens, we can follow up with the
> corresponding change in i915; since we'll be moving to a more coherent
> value (from today's fully cached entry), we'll be becoming more
> permissive from a correctness point of view.  But we absolutely should
> not try to add entries unilaterally in i915 that haven't been formally
> agreed upon because they may clash with a different definition of the
> row that shows up in the future through formal channels.
Matt Roper Sept. 9, 2021, 6:14 p.m. UTC | #13
On Thu, Sep 09, 2021 at 08:42:15PM +0300, Ville Syrjälä wrote:
> On Thu, Sep 09, 2021 at 10:15:56AM -0700, Matt Roper wrote:
> > On Thu, Sep 09, 2021 at 06:09:55PM +0300, Ville Syrjälä wrote:
> > > On Thu, Sep 09, 2021 at 08:00:02AM -0700, Matt Roper wrote:
> > > > On Thu, Sep 09, 2021 at 05:39:26PM +0300, Ville Syrjälä wrote:
> > > > > On Thu, Sep 09, 2021 at 07:29:33AM -0700, Matt Roper wrote:
> > > > > > On Thu, Sep 09, 2021 at 04:58:50PM +0300, Ville Syrjälä wrote:
> > > > > > > On Tue, Sep 07, 2021 at 11:19:29AM -0700, Matt Roper wrote:
> > > > > > > > On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> > > > > > > > > On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > > > > > > > > > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > > > > > > > > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > > > > > > > > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > > > > > > > > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > > > > > > > > > 
> > > > > > > > > > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > > > > > > > > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > > > > > > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > > > > > > > 
> > > > > > > > > > Yep, we overlooked that the TGL table still had an explicit entry for
> > > > > > > > > > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > > > > > > > > > for MOCS[1].  The new table is the same as the TGL table, just with
> > > > > > > > > > I915_MOCS_PTE (1) removed.
> > > > > > > > > 
> > > > > > > > > And just how are people planning on handling display cacheability
> > > > > > > > > control without a PTE MOCS entry? Is Mesa/etc. already making all
> > > > > > > > > external bos uncached on these platforms just in case we might
> > > > > > > > > scan out said bo?
> > > > > > > > 
> > > > > > > > MOCS entry 1 has never been considered a valid MOCS table entry on gen12
> > > > > > > > platforms (despite the old #define, it's not actually related to PTE,
> > > > > > > > display, etc. anymore).
> > > > > > > 
> > > > > > > So can someone finally explain to me how we're supposed to cache
> > > > > > > anything that might become a scanout buffer later (eg. window system
> > > > > > > buffers)? Or are we just making everything like that UC now, and is
> > > > > > > everyone happy with that? Is userspace actually following that?
> > > > > > 
> > > > > > Table entry #1 has never had anything to do with scanout on gen12+.  I
> > > > > > would assume that UMDs are either using the display entry in the MOCS
> > > > > > table (which is 61 on gen12+) or some other UC entry.
> > > > > 
> > > > > If 61 is meant to to be the new PTE entry wy hasn't it been defines as
> > > > > such in the code? And I know for a fact that userspace (Mesa) is not
> > > > 
> > > > There is no "PTE" entry anymore.  But 61 is already documented as
> > > > "displayable" in both the spec and the code:
> > > > 
> > > >         /* HW Special Case (Displayable) */                                      
> > > >         MOCS_ENTRY(61,                             
> > > 
> > > Why is it called a "HW special case"? I don't think there's any hw
> > > magic in there?
> > > 
> > > And why aren't we setting it to PTE to get some cacheability for
> > > window back buffers and such?
> > 
> > Who is "we" here?
> 
> We who care about the performance of the system.
> 
> > The MOCS table is a pre-defined set of per-platform
> > magic numbers.  The software teams don't get to decide what the values
> > are, we just program the hardware with the per-platform numbers that
> > have been agreed upon as part of a platform-wide stack (everything from
> > low-level firmware to high level userspace should be working from the
> > same table, defined in the bspec).
> 
> The magic numbers must be based on something. If that something is
> purely Windows behaviour/performance then we might be shooting
> ourselves in the foot here.

That's not how MOCS works.  The MOCS tables define every meaningful
combination of settings somewhere in the table.  The *types* of settings
that can be expressed change from platform to platform (e.g.,
"PAGETABLE" setting simply doesn't exist anymore hardware-wise) so the
tables themselves differ between platforms and you may need to use
different indices to get the same behavior between platforms.  But if
you're actually paying attention to the tables and choosing the right
entries, you're not going to leave any performance on the table.

> 
> > 
> > Once we know what the per-platform magic numbers are, we're supposed to
> > pick the table entry that matches the behavior we're trying to
> > accomplish.  If you want some specific level of cacheability, then you
> > select a table row that gives you that.  Maybe 61 isn't the best
> > setting, I don't know; userspace can pick whichever defined setting is
> > actually best, using the data from the table.  But table row #1 is
> > already well-documented as reserved/dontuse across the full stack; the
> > fact that row #1 had values similar to PTE on Icelake hardware doesn't
> > carry forward to any post-gen11 platform.
> 
> The only way you can get LLC cacheability for an external BO (window
> back buffers and such) is by using a MOCS entry that directs the hardware
> to consult the PTEs. Otherwise the client doing the rendering would have
> to know ahead of time whether the buffer is going to be directly scanned
> out by the compositor or not, for which there is no protocol in
> X or wayland.

It sounds like you're complaining about gen12 hardware design rather
than anything actually software or ABI related.  'Consult the PTEs'
hasn't been something the hardware has supported since gen11.  We still
have macros with PTE or PAGETABLE in their names, but those aren't
accurate descriptions of what they do on current platforms.  We note
this in the code:

   /*
    * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
    * the same as LE_UC
    */

Even on gen11 the PAT entry was documented as being deprecated, so we
knew it was going away.  I don't know why the hardware design changed,
but it did.

> 
> Historically I believe LLC cacheability has been on average a win.
> Some workloads can do better with UC though. So if we are giving up
> on LLC cacheability we should have some numbers to back up that
> decision so that we're not dropping tons of performance on the floor.

It's not our decision to make.  The hardware design has already made
this decision for us.  No matter what we program into the MOCS tables;
we're simply not getting back the behavior you're asking for and blindly
using the "LE_0_PAGETABLE" flags or whatever is just giving you
uncached.

> 
> > > > > using entry 61. I think there is a massive communication gap here
> > > > > where everyone just seems to assume the other side is doing something.
> > > > > 
> > > > > Could someone actually come up with a clear abi definition for this
> > > > > and get all the stakeholders to sign off on it?
> > > > 
> > > > The agreement between the i915 team, various userspace teams, Windows
> > > > driver team, hardware architects, software architects, and bspec writers
> > > > was just completed; that's what triggered the kernel updates here (and
> > > > I'm guessing is triggering similar work on the UMD side).  It's also why
> > > > we held off on removing the force_probe flag on ADL until now since we
> > > > couldn't consider enablement of the platform complete until the
> > > > agreement and definitions here was finalized.
> > > 
> > > Can we get that agreement visible on the mailing list? Since MOCS is
> > > abi I don't see why we shouldn't follow the normal abi rules for these,
> > > ie. post to dri-devel, get acks from relevant people, links to agreed
> > > userspace changes, etc.
> > 
> > The ABI design here was designed and agreed upon years ago, during early
> > gen11 development.  The ABI design is that the kernel driver will
> > faithfully initialize the hardware with the pre-determined set of magic
> > numbers documented by the hardware team.  Since these are
> > well-documented and unchanging numbers per-platform, there's no
> > ambiguity for userspace, firmware, etc. about what a specific mocs index
> > means, and no need to provide additional ABI for userspace to query what
> > the kernel used in each row or anything like that.  The specific magic
> > numbers are also ABI in the sense that we can't change the set of
> > defined values once they're set for a platform (and it's been a long
> > road to get the hardware and other OS software teams to understand and
> > agree to this requirement), but we don't get to define or overrule what
> > the initial values and order of those magic numbers are.
> 
> And this apporach has clearly not worked considering userspace and
> kernel have not agreed on what the abi is. We need to do better.

Userspace and kernel *do* agree on what the ABI is.  There may be a
delay in userspace enabling the MOCS properly on a new platform.  But
the changes here (which make everything fully cached by default) will
help catch those cases where a bit of enablement was forgotten on the
UMD side.  They won't "accidentally work" on initial bringup on the new
platform, and the lack of proper MOCS programming will be quickly
identified.

> 
> > 
> > What is a bit vague in the formal documentation is what should be done
> > about the reserved/dontuse table entries.  In theory it wouldn't matter
> > since they'd never be used anyway, but in reality userspace can still
> > use them by accident, such as by forgetting to update their MOCS
> > selection logic from past platforms (e.g., still trying to use row #1 on
> > platforms where it isn't defined).  Given that it's legal for entries to
> > be added to MOCS tables, but never removed/modified, it follows that we
> > should always initialize the undefined entries to fully cached; if a
> > MOCS table update happens in the future and new rows show up, they can
> > only become more coherent, and any userspace software that was
> > incorrectly trying to use them previously will remain functionally
> > correct.
> > 
> > What you're proposing would be a change to existing ABI --- instead of
> > following the agreed upon contract, i915 would start defining its own
> > set of magic numbers that potentially contradict the documentation that
> > every other team is depending on.  We already made this mistake on
> > TGL/RKL, so due to an i915 bug we're outside the spec; if entry #1 ever
> > becomes a formally defined setting in the future, the rest of the
> > software stack will need to explicitly work around i915's bug since we
> > can't fix it now without breaking ABI.
> > 
> > If you really want to redefine how the MOCS ABI works and have i915 no
> > longer follow the current contract, I think you need to do the
> > following:
> 
> I want the abi to be actually defined properly, and some assurance that
> all the stakeholders implement it correctly. Following the proper abi
> rules for kernel development would guarantee that.

This is what we have today.  As I said, if you want to propose a change
to the ABI, then you need to take the proper steps to do so.  But as far
as I can tell, most of your concern here boils down to not realizing
that the hardware capabilities you want to use were taken away from us
in gen12.

> 
> I would also like if the abi can give us the best performance rather
> than potentially crippling it. Ie. I would expect to have a PTE MOCS
> setting for potential scanout buffers, or some proof that pure UC
> will in fact be a better choice.

Again, this isn't a choice.  This is a fact of how the hardware works on
gen12.  Maybe you'll regain PTE flags on a future platform, but that's
simply not something software can ask for on today's platforms.


Matt

> 
> > 
> >  * Re-add force_probe requirement to ADL and add appropriate Fixes: so
> >    that the platform isn't enabled yet.
> >  * Document clearly how you want i915 to select the MOCS settings it
> >    uses for each table row if we're no longer going to follow the
> >    documented values in the bspec.
> >  * Provide a way for userspace to determine how i915 has defined the
> >    MOCS settings (since they can no longer rely on us following the
> >    previously agreed upon contract).
> >  * Get acks from all the userspace teams on the new direction you're
> >    proposing.
> >  * Get an ack from the GuC team to make sure that programming
> >    MOCS values differently than documented in the bspec won't have any
> >    kind of impact on their operation.
> > 
> > Alternatively, you could lobby for a new table row #1 to be added to the
> > formal MOCS table for gen12. It's legal for new MOCS entries to show up
> > in the future, so if that happens, we can follow up with the
> > corresponding change in i915; since we'll be moving to a more coherent
> > value (from today's fully cached entry), we'll be becoming more
> > permissive from a correctness point of view.  But we absolutely should
> > not try to add entries unilaterally in i915 that haven't been formally
> > agreed upon because they may clash with a different definition of the
> > row that shows up in the future through formal channels.
> 
> -- 
> Ville Syrjälä
> Intel
Ville Syrjälä Sept. 9, 2021, 7:59 p.m. UTC | #14
On Thu, Sep 09, 2021 at 11:14:15AM -0700, Matt Roper wrote:
> On Thu, Sep 09, 2021 at 08:42:15PM +0300, Ville Syrjälä wrote:
> > On Thu, Sep 09, 2021 at 10:15:56AM -0700, Matt Roper wrote:
> > > On Thu, Sep 09, 2021 at 06:09:55PM +0300, Ville Syrjälä wrote:
> > > > On Thu, Sep 09, 2021 at 08:00:02AM -0700, Matt Roper wrote:
> > > > > On Thu, Sep 09, 2021 at 05:39:26PM +0300, Ville Syrjälä wrote:
> > > > > > On Thu, Sep 09, 2021 at 07:29:33AM -0700, Matt Roper wrote:
> > > > > > > On Thu, Sep 09, 2021 at 04:58:50PM +0300, Ville Syrjälä wrote:
> > > > > > > > On Tue, Sep 07, 2021 at 11:19:29AM -0700, Matt Roper wrote:
> > > > > > > > > On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> > > > > > > > > > On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > > > > > > > > > > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > > > > > > > > > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > > > > > > > > > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > > > > > > > > > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > > > > > > > > > > 
> > > > > > > > > > > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > > > > > > > > > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > > > > > > > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > > > > > > > > 
> > > > > > > > > > > Yep, we overlooked that the TGL table still had an explicit entry for
> > > > > > > > > > > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > > > > > > > > > > for MOCS[1].  The new table is the same as the TGL table, just with
> > > > > > > > > > > I915_MOCS_PTE (1) removed.
> > > > > > > > > > 
> > > > > > > > > > And just how are people planning on handling display cacheability
> > > > > > > > > > control without a PTE MOCS entry? Is Mesa/etc. already making all
> > > > > > > > > > external bos uncached on these platforms just in case we might
> > > > > > > > > > scan out said bo?
> > > > > > > > > 
> > > > > > > > > MOCS entry 1 has never been considered a valid MOCS table entry on gen12
> > > > > > > > > platforms (despite the old #define, it's not actually related to PTE,
> > > > > > > > > display, etc. anymore).
> > > > > > > > 
> > > > > > > > So can someone finally explain to me how we're supposed to cache
> > > > > > > > anything that might become a scanout buffer later (eg. window system
> > > > > > > > buffers)? Or are we just making everything like that UC now, and is
> > > > > > > > everyone happy with that? Is userspace actually following that?
> > > > > > > 
> > > > > > > Table entry #1 has never had anything to do with scanout on gen12+.  I
> > > > > > > would assume that UMDs are either using the display entry in the MOCS
> > > > > > > table (which is 61 on gen12+) or some other UC entry.
> > > > > > 
> > > > > > If 61 is meant to to be the new PTE entry wy hasn't it been defines as
> > > > > > such in the code? And I know for a fact that userspace (Mesa) is not
> > > > > 
> > > > > There is no "PTE" entry anymore.  But 61 is already documented as
> > > > > "displayable" in both the spec and the code:
> > > > > 
> > > > >         /* HW Special Case (Displayable) */                                      
> > > > >         MOCS_ENTRY(61,                             
> > > > 
> > > > Why is it called a "HW special case"? I don't think there's any hw
> > > > magic in there?
> > > > 
> > > > And why aren't we setting it to PTE to get some cacheability for
> > > > window back buffers and such?
> > > 
> > > Who is "we" here?
> > 
> > We who care about the performance of the system.
> > 
> > > The MOCS table is a pre-defined set of per-platform
> > > magic numbers.  The software teams don't get to decide what the values
> > > are, we just program the hardware with the per-platform numbers that
> > > have been agreed upon as part of a platform-wide stack (everything from
> > > low-level firmware to high level userspace should be working from the
> > > same table, defined in the bspec).
> > 
> > The magic numbers must be based on something. If that something is
> > purely Windows behaviour/performance then we might be shooting
> > ourselves in the foot here.
> 
> That's not how MOCS works.  The MOCS tables define every meaningful
> combination of settings somewhere in the table.  The *types* of settings
> that can be expressed change from platform to platform (e.g.,
> "PAGETABLE" setting simply doesn't exist anymore hardware-wise) so the
> tables themselves differ between platforms and you may need to use
> different indices to get the same behavior between platforms.  But if
> you're actually paying attention to the tables and choosing the right
> entries, you're not going to leave any performance on the table.
> 
> > 
> > > 
> > > Once we know what the per-platform magic numbers are, we're supposed to
> > > pick the table entry that matches the behavior we're trying to
> > > accomplish.  If you want some specific level of cacheability, then you
> > > select a table row that gives you that.  Maybe 61 isn't the best
> > > setting, I don't know; userspace can pick whichever defined setting is
> > > actually best, using the data from the table.  But table row #1 is
> > > already well-documented as reserved/dontuse across the full stack; the
> > > fact that row #1 had values similar to PTE on Icelake hardware doesn't
> > > carry forward to any post-gen11 platform.
> > 
> > The only way you can get LLC cacheability for an external BO (window
> > back buffers and such) is by using a MOCS entry that directs the hardware
> > to consult the PTEs. Otherwise the client doing the rendering would have
> > to know ahead of time whether the buffer is going to be directly scanned
> > out by the compositor or not, for which there is no protocol in
> > X or wayland.
> 
> It sounds like you're complaining about gen12 hardware design rather
> than anything actually software or ABI related.  'Consult the PTEs'
> hasn't been something the hardware has supported since gen11. We still
> have macros with PTE or PAGETABLE in their names, but those aren't
> accurate descriptions of what they do on current platforms.

I've heard this a few times, but so far I've not been able to find
any evidence for it in bspec. I suppose to find out for sure I'm
going to have to test it on actual hardware.

I did stumble on some future platform stuff that shuffles around the
MOCS stuff once again, and looks like it adds in a specific PTE vs.
MOCS bit. So if it was removed, it's almost immediately coming back.

> We note
> this in the code:
> 
>    /*
>     * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
>     * the same as LE_UC
>     */
> 
> Even on gen11 the PAT entry was documented as being deprecated, so we
> knew it was going away.  I don't know why the hardware design changed,
> but it did.

Yeah, I've seen it, but could never figure out on what it was based.

> 
> > 
> > Historically I believe LLC cacheability has been on average a win.
> > Some workloads can do better with UC though. So if we are giving up
> > on LLC cacheability we should have some numbers to back up that
> > decision so that we're not dropping tons of performance on the floor.
> 
> It's not our decision to make.  The hardware design has already made
> this decision for us.  No matter what we program into the MOCS tables;
> we're simply not getting back the behavior you're asking for and blindly
> using the "LE_0_PAGETABLE" flags or whatever is just giving you
> uncached.

As said it looks to be coming back soon, if it ever went away.

> 
> > 
> > > > > > using entry 61. I think there is a massive communication gap here
> > > > > > where everyone just seems to assume the other side is doing something.
> > > > > > 
> > > > > > Could someone actually come up with a clear abi definition for this
> > > > > > and get all the stakeholders to sign off on it?
> > > > > 
> > > > > The agreement between the i915 team, various userspace teams, Windows
> > > > > driver team, hardware architects, software architects, and bspec writers
> > > > > was just completed; that's what triggered the kernel updates here (and
> > > > > I'm guessing is triggering similar work on the UMD side).  It's also why
> > > > > we held off on removing the force_probe flag on ADL until now since we
> > > > > couldn't consider enablement of the platform complete until the
> > > > > agreement and definitions here was finalized.
> > > > 
> > > > Can we get that agreement visible on the mailing list? Since MOCS is
> > > > abi I don't see why we shouldn't follow the normal abi rules for these,
> > > > ie. post to dri-devel, get acks from relevant people, links to agreed
> > > > userspace changes, etc.
> > > 
> > > The ABI design here was designed and agreed upon years ago, during early
> > > gen11 development.  The ABI design is that the kernel driver will
> > > faithfully initialize the hardware with the pre-determined set of magic
> > > numbers documented by the hardware team.  Since these are
> > > well-documented and unchanging numbers per-platform, there's no
> > > ambiguity for userspace, firmware, etc. about what a specific mocs index
> > > means, and no need to provide additional ABI for userspace to query what
> > > the kernel used in each row or anything like that.  The specific magic
> > > numbers are also ABI in the sense that we can't change the set of
> > > defined values once they're set for a platform (and it's been a long
> > > road to get the hardware and other OS software teams to understand and
> > > agree to this requirement), but we don't get to define or overrule what
> > > the initial values and order of those magic numbers are.
> > 
> > And this apporach has clearly not worked considering userspace and
> > kernel have not agreed on what the abi is. We need to do better.
> 
> Userspace and kernel *do* agree on what the ABI is.  There may be a
> delay in userspace enabling the MOCS properly on a new platform.

The fact that Mesa never started to use the MOCS entry supposedly
meant for display stuff is evidence to the contrary.

> But
> the changes here (which make everything fully cached by default) will
> help catch those cases where a bit of enablement was forgotten on the
> UMD side.  They won't "accidentally work" on initial bringup on the new
> platform, and the lack of proper MOCS programming will be quickly
> identified.

I'm looking at the history and not really convinced.

> 
> > 
> > > 
> > > What is a bit vague in the formal documentation is what should be done
> > > about the reserved/dontuse table entries.  In theory it wouldn't matter
> > > since they'd never be used anyway, but in reality userspace can still
> > > use them by accident, such as by forgetting to update their MOCS
> > > selection logic from past platforms (e.g., still trying to use row #1 on
> > > platforms where it isn't defined).  Given that it's legal for entries to
> > > be added to MOCS tables, but never removed/modified, it follows that we
> > > should always initialize the undefined entries to fully cached; if a
> > > MOCS table update happens in the future and new rows show up, they can
> > > only become more coherent, and any userspace software that was
> > > incorrectly trying to use them previously will remain functionally
> > > correct.
> > > 
> > > What you're proposing would be a change to existing ABI --- instead of
> > > following the agreed upon contract, i915 would start defining its own
> > > set of magic numbers that potentially contradict the documentation that
> > > every other team is depending on.  We already made this mistake on
> > > TGL/RKL, so due to an i915 bug we're outside the spec; if entry #1 ever
> > > becomes a formally defined setting in the future, the rest of the
> > > software stack will need to explicitly work around i915's bug since we
> > > can't fix it now without breaking ABI.
> > > 
> > > If you really want to redefine how the MOCS ABI works and have i915 no
> > > longer follow the current contract, I think you need to do the
> > > following:
> > 
> > I want the abi to be actually defined properly, and some assurance that
> > all the stakeholders implement it correctly. Following the proper abi
> > rules for kernel development would guarantee that.
> 
> This is what we have today.  As I said, if you want to propose a change
> to the ABI, then you need to take the proper steps to do so.  But as far
> as I can tell, most of your concern here boils down to not realizing
> that the hardware capabilities you want to use were taken away from us
> in gen12.

The fact that it's not actually properly documented anywhere certainly
doesn't help in figuring out what is going on.

> 
> > 
> > I would also like if the abi can give us the best performance rather
> > than potentially crippling it. Ie. I would expect to have a PTE MOCS
> > setting for potential scanout buffers, or some proof that pure UC
> > will in fact be a better choice.
> 
> Again, this isn't a choice.  This is a fact of how the hardware works on
> gen12.  Maybe you'll regain PTE flags on a future platform, but that's
> simply not something software can ask for on today's platforms.

There should be clear guidelines what MOCS index userspace is
supposed to use in each case. That more or less used to be the
case with the handful of MOCS entries we had defined (always
uncached vs. always cached vs. potential scanout buffers which
could be either depending on the situation). But that nice simple
approach wasn't good enough for whatever reason.
Matt Roper Sept. 9, 2021, 8:33 p.m. UTC | #15
On Thu, Sep 09, 2021 at 10:59:36PM +0300, Ville Syrjälä wrote:
> On Thu, Sep 09, 2021 at 11:14:15AM -0700, Matt Roper wrote:
> > On Thu, Sep 09, 2021 at 08:42:15PM +0300, Ville Syrjälä wrote:
> > > On Thu, Sep 09, 2021 at 10:15:56AM -0700, Matt Roper wrote:
> > > > On Thu, Sep 09, 2021 at 06:09:55PM +0300, Ville Syrjälä wrote:
> > > > > On Thu, Sep 09, 2021 at 08:00:02AM -0700, Matt Roper wrote:
> > > > > > On Thu, Sep 09, 2021 at 05:39:26PM +0300, Ville Syrjälä wrote:
> > > > > > > On Thu, Sep 09, 2021 at 07:29:33AM -0700, Matt Roper wrote:
> > > > > > > > On Thu, Sep 09, 2021 at 04:58:50PM +0300, Ville Syrjälä wrote:
> > > > > > > > > On Tue, Sep 07, 2021 at 11:19:29AM -0700, Matt Roper wrote:
> > > > > > > > > > On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> > > > > > > > > > > On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > > > > > > > > > > > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > > > > > > > > > > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > > > > > > > > > > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > > > > > > > > > > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > > > > > > > > > > > 
> > > > > > > > > > > > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > > > > > > > > > > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > > > > > > > > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > > > > > > > > > 
> > > > > > > > > > > > Yep, we overlooked that the TGL table still had an explicit entry for
> > > > > > > > > > > > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > > > > > > > > > > > for MOCS[1].  The new table is the same as the TGL table, just with
> > > > > > > > > > > > I915_MOCS_PTE (1) removed.
> > > > > > > > > > > 
> > > > > > > > > > > And just how are people planning on handling display cacheability
> > > > > > > > > > > control without a PTE MOCS entry? Is Mesa/etc. already making all
> > > > > > > > > > > external bos uncached on these platforms just in case we might
> > > > > > > > > > > scan out said bo?
> > > > > > > > > > 
> > > > > > > > > > MOCS entry 1 has never been considered a valid MOCS table entry on gen12
> > > > > > > > > > platforms (despite the old #define, it's not actually related to PTE,
> > > > > > > > > > display, etc. anymore).
> > > > > > > > > 
> > > > > > > > > So can someone finally explain to me how we're supposed to cache
> > > > > > > > > anything that might become a scanout buffer later (eg. window system
> > > > > > > > > buffers)? Or are we just making everything like that UC now, and is
> > > > > > > > > everyone happy with that? Is userspace actually following that?
> > > > > > > > 
> > > > > > > > Table entry #1 has never had anything to do with scanout on gen12+.  I
> > > > > > > > would assume that UMDs are either using the display entry in the MOCS
> > > > > > > > table (which is 61 on gen12+) or some other UC entry.
> > > > > > > 
> > > > > > > If 61 is meant to to be the new PTE entry wy hasn't it been defines as
> > > > > > > such in the code? And I know for a fact that userspace (Mesa) is not
> > > > > > 
> > > > > > There is no "PTE" entry anymore.  But 61 is already documented as
> > > > > > "displayable" in both the spec and the code:
> > > > > > 
> > > > > >         /* HW Special Case (Displayable) */                                      
> > > > > >         MOCS_ENTRY(61,                             
> > > > > 
> > > > > Why is it called a "HW special case"? I don't think there's any hw
> > > > > magic in there?
> > > > > 
> > > > > And why aren't we setting it to PTE to get some cacheability for
> > > > > window back buffers and such?
> > > > 
> > > > Who is "we" here?
> > > 
> > > We who care about the performance of the system.
> > > 
> > > > The MOCS table is a pre-defined set of per-platform
> > > > magic numbers.  The software teams don't get to decide what the values
> > > > are, we just program the hardware with the per-platform numbers that
> > > > have been agreed upon as part of a platform-wide stack (everything from
> > > > low-level firmware to high level userspace should be working from the
> > > > same table, defined in the bspec).
> > > 
> > > The magic numbers must be based on something. If that something is
> > > purely Windows behaviour/performance then we might be shooting
> > > ourselves in the foot here.
> > 
> > That's not how MOCS works.  The MOCS tables define every meaningful
> > combination of settings somewhere in the table.  The *types* of settings
> > that can be expressed change from platform to platform (e.g.,
> > "PAGETABLE" setting simply doesn't exist anymore hardware-wise) so the
> > tables themselves differ between platforms and you may need to use
> > different indices to get the same behavior between platforms.  But if
> > you're actually paying attention to the tables and choosing the right
> > entries, you're not going to leave any performance on the table.
> > 
> > > 
> > > > 
> > > > Once we know what the per-platform magic numbers are, we're supposed to
> > > > pick the table entry that matches the behavior we're trying to
> > > > accomplish.  If you want some specific level of cacheability, then you
> > > > select a table row that gives you that.  Maybe 61 isn't the best
> > > > setting, I don't know; userspace can pick whichever defined setting is
> > > > actually best, using the data from the table.  But table row #1 is
> > > > already well-documented as reserved/dontuse across the full stack; the
> > > > fact that row #1 had values similar to PTE on Icelake hardware doesn't
> > > > carry forward to any post-gen11 platform.
> > > 
> > > The only way you can get LLC cacheability for an external BO (window
> > > back buffers and such) is by using a MOCS entry that directs the hardware
> > > to consult the PTEs. Otherwise the client doing the rendering would have
> > > to know ahead of time whether the buffer is going to be directly scanned
> > > out by the compositor or not, for which there is no protocol in
> > > X or wayland.
> > 
> > It sounds like you're complaining about gen12 hardware design rather
> > than anything actually software or ABI related.  'Consult the PTEs'
> > hasn't been something the hardware has supported since gen11. We still
> > have macros with PTE or PAGETABLE in their names, but those aren't
> > accurate descriptions of what they do on current platforms.
> 
> I've heard this a few times, but so far I've not been able to find
> any evidence for it in bspec. I suppose to find out for sure I'm
> going to have to test it on actual hardware.

For gen11, bspec page 560 describes the layout of the GFX_MOCS_x
registers (which at the time were per-engine).  Specifically:

        Memory type information used in LLC/eDRAM.

        00b: Use Cacheability Controls from page table / UC with Fence
             (if coherent cycle).
        01b: Uncacheable (UC) – non-cacheable.
        10b: Writethrough (WT).
        11b: Writeback (WB).
        
So on that platform you can get the PTE behavior you're after by
programming a value of 00 into the relevant bits (1:0).

For gen12+, bspec page 44053 describes the layout of GLOB_MOCS_LECC_x
(which is now a global value, not per-engine):

	Memory type information used in LLC/eDRAM.

	00: Uncacheable (UC)
	01: Uncacheable (WC)
	10: Writethrough (WT)
	11: Writeback (WB)

So the value of '00' as expressed in a MOCS table no longer means what
it did on older platforms.  There's also a "Description" field on this
page that shows that the old meaning of 00 is explicitly removed by
gen12 design.

> 
> I did stumble on some future platform stuff that shuffles around the
> MOCS stuff once again, and looks like it adds in a specific PTE vs.
> MOCS bit. So if it was removed, it's almost immediately coming back.

Yes, bspec page 65553 makes it look like there is a new bit to handle
this eventually on a future platform.  But we have multiple new
platforms between now and then that we still need to support properly,
starting with ADL, and that bit (and the rest of the various MOCS
changes) aren't something we can use yet.

> 
> > We note
> > this in the code:
> > 
> >    /*
> >     * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
> >     * the same as LE_UC
> >     */
> > 
> > Even on gen11 the PAT entry was documented as being deprecated, so we
> > knew it was going away.  I don't know why the hardware design changed,
> > but it did.
> 
> Yeah, I've seen it, but could never figure out on what it was based.
> 
> > 
> > > 
> > > Historically I believe LLC cacheability has been on average a win.
> > > Some workloads can do better with UC though. So if we are giving up
> > > on LLC cacheability we should have some numbers to back up that
> > > decision so that we're not dropping tons of performance on the floor.
> > 
> > It's not our decision to make.  The hardware design has already made
> > this decision for us.  No matter what we program into the MOCS tables;
> > we're simply not getting back the behavior you're asking for and blindly
> > using the "LE_0_PAGETABLE" flags or whatever is just giving you
> > uncached.
> 
> As said it looks to be coming back soon, if it ever went away.
> 
> > 
> > > 
> > > > > > > using entry 61. I think there is a massive communication gap here
> > > > > > > where everyone just seems to assume the other side is doing something.
> > > > > > > 
> > > > > > > Could someone actually come up with a clear abi definition for this
> > > > > > > and get all the stakeholders to sign off on it?
> > > > > > 
> > > > > > The agreement between the i915 team, various userspace teams, Windows
> > > > > > driver team, hardware architects, software architects, and bspec writers
> > > > > > was just completed; that's what triggered the kernel updates here (and
> > > > > > I'm guessing is triggering similar work on the UMD side).  It's also why
> > > > > > we held off on removing the force_probe flag on ADL until now since we
> > > > > > couldn't consider enablement of the platform complete until the
> > > > > > agreement and definitions here was finalized.
> > > > > 
> > > > > Can we get that agreement visible on the mailing list? Since MOCS is
> > > > > abi I don't see why we shouldn't follow the normal abi rules for these,
> > > > > ie. post to dri-devel, get acks from relevant people, links to agreed
> > > > > userspace changes, etc.
> > > > 
> > > > The ABI design here was designed and agreed upon years ago, during early
> > > > gen11 development.  The ABI design is that the kernel driver will
> > > > faithfully initialize the hardware with the pre-determined set of magic
> > > > numbers documented by the hardware team.  Since these are
> > > > well-documented and unchanging numbers per-platform, there's no
> > > > ambiguity for userspace, firmware, etc. about what a specific mocs index
> > > > means, and no need to provide additional ABI for userspace to query what
> > > > the kernel used in each row or anything like that.  The specific magic
> > > > numbers are also ABI in the sense that we can't change the set of
> > > > defined values once they're set for a platform (and it's been a long
> > > > road to get the hardware and other OS software teams to understand and
> > > > agree to this requirement), but we don't get to define or overrule what
> > > > the initial values and order of those magic numbers are.
> > > 
> > > And this apporach has clearly not worked considering userspace and
> > > kernel have not agreed on what the abi is. We need to do better.
> > 
> > Userspace and kernel *do* agree on what the ABI is.  There may be a
> > delay in userspace enabling the MOCS properly on a new platform.
> 
> The fact that Mesa never started to use the MOCS entry supposedly
> meant for display stuff is evidence to the contrary.

I think this is just an oversight by the Mesa team where they didn't
notice the gen12 table was different from gen11.  I don't think it's
because they don't agree that the table described in the bspec provides
the right settings.


> 
> > But
> > the changes here (which make everything fully cached by default) will
> > help catch those cases where a bit of enablement was forgotten on the
> > UMD side.  They won't "accidentally work" on initial bringup on the new
> > platform, and the lack of proper MOCS programming will be quickly
> > identified.
> 
> I'm looking at the history and not really convinced.

We don't have history to compare to yet because we screwed up on TGL/RKL
and the use of an outdated MOCS entry by Mesa went unnoticed.  If we
actually do things right on ADL from day 1, then this will be our chance
to catch the mistakes like this.

> 
> > 
> > > 
> > > > 
> > > > What is a bit vague in the formal documentation is what should be done
> > > > about the reserved/dontuse table entries.  In theory it wouldn't matter
> > > > since they'd never be used anyway, but in reality userspace can still
> > > > use them by accident, such as by forgetting to update their MOCS
> > > > selection logic from past platforms (e.g., still trying to use row #1 on
> > > > platforms where it isn't defined).  Given that it's legal for entries to
> > > > be added to MOCS tables, but never removed/modified, it follows that we
> > > > should always initialize the undefined entries to fully cached; if a
> > > > MOCS table update happens in the future and new rows show up, they can
> > > > only become more coherent, and any userspace software that was
> > > > incorrectly trying to use them previously will remain functionally
> > > > correct.
> > > > 
> > > > What you're proposing would be a change to existing ABI --- instead of
> > > > following the agreed upon contract, i915 would start defining its own
> > > > set of magic numbers that potentially contradict the documentation that
> > > > every other team is depending on.  We already made this mistake on
> > > > TGL/RKL, so due to an i915 bug we're outside the spec; if entry #1 ever
> > > > becomes a formally defined setting in the future, the rest of the
> > > > software stack will need to explicitly work around i915's bug since we
> > > > can't fix it now without breaking ABI.
> > > > 
> > > > If you really want to redefine how the MOCS ABI works and have i915 no
> > > > longer follow the current contract, I think you need to do the
> > > > following:
> > > 
> > > I want the abi to be actually defined properly, and some assurance that
> > > all the stakeholders implement it correctly. Following the proper abi
> > > rules for kernel development would guarantee that.
> > 
> > This is what we have today.  As I said, if you want to propose a change
> > to the ABI, then you need to take the proper steps to do so.  But as far
> > as I can tell, most of your concern here boils down to not realizing
> > that the hardware capabilities you want to use were taken away from us
> > in gen12.
> 
> The fact that it's not actually properly documented anywhere certainly
> doesn't help in figuring out what is going on.

There's a pretty lengthy comment explaining everything right before the
tables start in intel_mocs.c already.  And the table values themselves
are well-documented in the bspec (although justification for why things
like PTE capability have disappeared in gen12 hardware design is
something only the hardware teams know).  We can extend the code comment
if you think something is still lacking from the description.

> 
> > 
> > > 
> > > I would also like if the abi can give us the best performance rather
> > > than potentially crippling it. Ie. I would expect to have a PTE MOCS
> > > setting for potential scanout buffers, or some proof that pure UC
> > > will in fact be a better choice.
> > 
> > Again, this isn't a choice.  This is a fact of how the hardware works on
> > gen12.  Maybe you'll regain PTE flags on a future platform, but that's
> > simply not something software can ask for on today's platforms.
> 
> There should be clear guidelines what MOCS index userspace is
> supposed to use in each case. That more or less used to be the
> case with the handful of MOCS entries we had defined (always
> uncached vs. always cached vs. potential scanout buffers which
> could be either depending on the situation). But that nice simple
> approach wasn't good enough for whatever reason.

I think the reality is that the real world doesn't fit into those small
handful of nice categories.  Userspace works with lots of different
buffers, many of which are only ever used by a single process and in
relation to a single IP block, and the userspace teams are really the
experts on the precise cache behavior they need.  Having the full table
of combinations available lets them choose exactly what's necessary
instead of being artificially limited to the small number i915 bothered
to implement; that would be a case where we'd potentially be leaving
performance on the table.  I suspect that you're right that there's a
relatively small subset of the MOCS table that's useful for the subset
of buffers that may be shared between processes and/or between
high-level graphics/media/display IP blocks, but again the userspace
teams are really the experts in this area and shouldn't have trouble
identifying which combinations of settings are suitable and optimal.


Matt

> 
> -- 
> Ville Syrjälä
> Intel
Ville Syrjälä Sept. 9, 2021, 9:28 p.m. UTC | #16
On Thu, Sep 09, 2021 at 01:33:23PM -0700, Matt Roper wrote:
> On Thu, Sep 09, 2021 at 10:59:36PM +0300, Ville Syrjälä wrote:
> > On Thu, Sep 09, 2021 at 11:14:15AM -0700, Matt Roper wrote:
> > > On Thu, Sep 09, 2021 at 08:42:15PM +0300, Ville Syrjälä wrote:
> > > > On Thu, Sep 09, 2021 at 10:15:56AM -0700, Matt Roper wrote:
> > > > > On Thu, Sep 09, 2021 at 06:09:55PM +0300, Ville Syrjälä wrote:
> > > > > > On Thu, Sep 09, 2021 at 08:00:02AM -0700, Matt Roper wrote:
> > > > > > > On Thu, Sep 09, 2021 at 05:39:26PM +0300, Ville Syrjälä wrote:
> > > > > > > > On Thu, Sep 09, 2021 at 07:29:33AM -0700, Matt Roper wrote:
> > > > > > > > > On Thu, Sep 09, 2021 at 04:58:50PM +0300, Ville Syrjälä wrote:
> > > > > > > > > > On Tue, Sep 07, 2021 at 11:19:29AM -0700, Matt Roper wrote:
> > > > > > > > > > > On Tue, Sep 07, 2021 at 08:41:06PM +0300, Ville Syrjälä wrote:
> > > > > > > > > > > > On Tue, Sep 07, 2021 at 10:27:28AM -0700, Matt Roper wrote:
> > > > > > > > > > > > > On Tue, Sep 07, 2021 at 10:46:39PM +0530, Ayaz A Siddiqui wrote:
> > > > > > > > > > > > > > MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
> > > > > > > > > > > > > > While for other gen12 devices we need to set MOCS[1] as L3_WB,
> > > > > > > > > > > > > > So adding a new MOCS table for other gen 12 devices eg. ADL.
> > > > > > > > > > > > > > 
> > > > > > > > > > > > > > Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values")
> > > > > > > > > > > > > > Cc: Matt Roper <matthew.d.roper@intel.com>
> > > > > > > > > > > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
> > > > > > > > > > > > > 
> > > > > > > > > > > > > Yep, we overlooked that the TGL table still had an explicit entry for
> > > > > > > > > > > > > I915_MOCS_PTE and wasn't just using an implicit 'unused_entries' lookup
> > > > > > > > > > > > > for MOCS[1].  The new table is the same as the TGL table, just with
> > > > > > > > > > > > > I915_MOCS_PTE (1) removed.
> > > > > > > > > > > > 
> > > > > > > > > > > > And just how are people planning on handling display cacheability
> > > > > > > > > > > > control without a PTE MOCS entry? Is Mesa/etc. already making all
> > > > > > > > > > > > external bos uncached on these platforms just in case we might
> > > > > > > > > > > > scan out said bo?
> > > > > > > > > > > 
> > > > > > > > > > > MOCS entry 1 has never been considered a valid MOCS table entry on gen12
> > > > > > > > > > > platforms (despite the old #define, it's not actually related to PTE,
> > > > > > > > > > > display, etc. anymore).
> > > > > > > > > > 
> > > > > > > > > > So can someone finally explain to me how we're supposed to cache
> > > > > > > > > > anything that might become a scanout buffer later (eg. window system
> > > > > > > > > > buffers)? Or are we just making everything like that UC now, and is
> > > > > > > > > > everyone happy with that? Is userspace actually following that?
> > > > > > > > > 
> > > > > > > > > Table entry #1 has never had anything to do with scanout on gen12+.  I
> > > > > > > > > would assume that UMDs are either using the display entry in the MOCS
> > > > > > > > > table (which is 61 on gen12+) or some other UC entry.
> > > > > > > > 
> > > > > > > > If 61 is meant to to be the new PTE entry wy hasn't it been defines as
> > > > > > > > such in the code? And I know for a fact that userspace (Mesa) is not
> > > > > > > 
> > > > > > > There is no "PTE" entry anymore.  But 61 is already documented as
> > > > > > > "displayable" in both the spec and the code:
> > > > > > > 
> > > > > > >         /* HW Special Case (Displayable) */                                      
> > > > > > >         MOCS_ENTRY(61,                             
> > > > > > 
> > > > > > Why is it called a "HW special case"? I don't think there's any hw
> > > > > > magic in there?
> > > > > > 
> > > > > > And why aren't we setting it to PTE to get some cacheability for
> > > > > > window back buffers and such?
> > > > > 
> > > > > Who is "we" here?
> > > > 
> > > > We who care about the performance of the system.
> > > > 
> > > > > The MOCS table is a pre-defined set of per-platform
> > > > > magic numbers.  The software teams don't get to decide what the values
> > > > > are, we just program the hardware with the per-platform numbers that
> > > > > have been agreed upon as part of a platform-wide stack (everything from
> > > > > low-level firmware to high level userspace should be working from the
> > > > > same table, defined in the bspec).
> > > > 
> > > > The magic numbers must be based on something. If that something is
> > > > purely Windows behaviour/performance then we might be shooting
> > > > ourselves in the foot here.
> > > 
> > > That's not how MOCS works.  The MOCS tables define every meaningful
> > > combination of settings somewhere in the table.  The *types* of settings
> > > that can be expressed change from platform to platform (e.g.,
> > > "PAGETABLE" setting simply doesn't exist anymore hardware-wise) so the
> > > tables themselves differ between platforms and you may need to use
> > > different indices to get the same behavior between platforms.  But if
> > > you're actually paying attention to the tables and choosing the right
> > > entries, you're not going to leave any performance on the table.
> > > 
> > > > 
> > > > > 
> > > > > Once we know what the per-platform magic numbers are, we're supposed to
> > > > > pick the table entry that matches the behavior we're trying to
> > > > > accomplish.  If you want some specific level of cacheability, then you
> > > > > select a table row that gives you that.  Maybe 61 isn't the best
> > > > > setting, I don't know; userspace can pick whichever defined setting is
> > > > > actually best, using the data from the table.  But table row #1 is
> > > > > already well-documented as reserved/dontuse across the full stack; the
> > > > > fact that row #1 had values similar to PTE on Icelake hardware doesn't
> > > > > carry forward to any post-gen11 platform.
> > > > 
> > > > The only way you can get LLC cacheability for an external BO (window
> > > > back buffers and such) is by using a MOCS entry that directs the hardware
> > > > to consult the PTEs. Otherwise the client doing the rendering would have
> > > > to know ahead of time whether the buffer is going to be directly scanned
> > > > out by the compositor or not, for which there is no protocol in
> > > > X or wayland.
> > > 
> > > It sounds like you're complaining about gen12 hardware design rather
> > > than anything actually software or ABI related.  'Consult the PTEs'
> > > hasn't been something the hardware has supported since gen11. We still
> > > have macros with PTE or PAGETABLE in their names, but those aren't
> > > accurate descriptions of what they do on current platforms.
> > 
> > I've heard this a few times, but so far I've not been able to find
> > any evidence for it in bspec. I suppose to find out for sure I'm
> > going to have to test it on actual hardware.
> 
> For gen11, bspec page 560 describes the layout of the GFX_MOCS_x
> registers (which at the time were per-engine).  Specifically:
> 
>         Memory type information used in LLC/eDRAM.
> 
>         00b: Use Cacheability Controls from page table / UC with Fence
>              (if coherent cycle).
>         01b: Uncacheable (UC) – non-cacheable.
>         10b: Writethrough (WT).
>         11b: Writeback (WB).
>         
> So on that platform you can get the PTE behavior you're after by
> programming a value of 00 into the relevant bits (1:0).
> 
> For gen12+, bspec page 44053 describes the layout of GLOB_MOCS_LECC_x
> (which is now a global value, not per-engine):
> 
> 	Memory type information used in LLC/eDRAM.
> 
> 	00: Uncacheable (UC)
> 	01: Uncacheable (WC)
> 	10: Writethrough (WT)
> 	11: Writeback (WB)
> 
> So the value of '00' as expressed in a MOCS table no longer means what
> it did on older platforms.  There's also a "Description" field on this
> page that shows that the old meaning of 00 is explicitly removed by
> gen12 design.

Page 45100 still has the old description for tgl. Sigh.

> 
> > 
> > I did stumble on some future platform stuff that shuffles around the
> > MOCS stuff once again, and looks like it adds in a specific PTE vs.
> > MOCS bit. So if it was removed, it's almost immediately coming back.
> 
> Yes, bspec page 65553 makes it look like there is a new bit to handle
> this eventually on a future platform.  But we have multiple new
> platforms between now and then that we still need to support properly,
> starting with ADL, and that bit (and the rest of the various MOCS
> changes) aren't something we can use yet.
> 
> > 
> > > We note
> > > this in the code:
> > > 
> > >    /*
> > >     * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
> > >     * the same as LE_UC
> > >     */
> > > 
> > > Even on gen11 the PAT entry was documented as being deprecated, so we
> > > knew it was going away.  I don't know why the hardware design changed,
> > > but it did.
> > 
> > Yeah, I've seen it, but could never figure out on what it was based.
> > 
> > > 
> > > > 
> > > > Historically I believe LLC cacheability has been on average a win.
> > > > Some workloads can do better with UC though. So if we are giving up
> > > > on LLC cacheability we should have some numbers to back up that
> > > > decision so that we're not dropping tons of performance on the floor.
> > > 
> > > It's not our decision to make.  The hardware design has already made
> > > this decision for us.  No matter what we program into the MOCS tables;
> > > we're simply not getting back the behavior you're asking for and blindly
> > > using the "LE_0_PAGETABLE" flags or whatever is just giving you
> > > uncached.
> > 
> > As said it looks to be coming back soon, if it ever went away.
> > 
> > > 
> > > > 
> > > > > > > > using entry 61. I think there is a massive communication gap here
> > > > > > > > where everyone just seems to assume the other side is doing something.
> > > > > > > > 
> > > > > > > > Could someone actually come up with a clear abi definition for this
> > > > > > > > and get all the stakeholders to sign off on it?
> > > > > > > 
> > > > > > > The agreement between the i915 team, various userspace teams, Windows
> > > > > > > driver team, hardware architects, software architects, and bspec writers
> > > > > > > was just completed; that's what triggered the kernel updates here (and
> > > > > > > I'm guessing is triggering similar work on the UMD side).  It's also why
> > > > > > > we held off on removing the force_probe flag on ADL until now since we
> > > > > > > couldn't consider enablement of the platform complete until the
> > > > > > > agreement and definitions here was finalized.
> > > > > > 
> > > > > > Can we get that agreement visible on the mailing list? Since MOCS is
> > > > > > abi I don't see why we shouldn't follow the normal abi rules for these,
> > > > > > ie. post to dri-devel, get acks from relevant people, links to agreed
> > > > > > userspace changes, etc.
> > > > > 
> > > > > The ABI design here was designed and agreed upon years ago, during early
> > > > > gen11 development.  The ABI design is that the kernel driver will
> > > > > faithfully initialize the hardware with the pre-determined set of magic
> > > > > numbers documented by the hardware team.  Since these are
> > > > > well-documented and unchanging numbers per-platform, there's no
> > > > > ambiguity for userspace, firmware, etc. about what a specific mocs index
> > > > > means, and no need to provide additional ABI for userspace to query what
> > > > > the kernel used in each row or anything like that.  The specific magic
> > > > > numbers are also ABI in the sense that we can't change the set of
> > > > > defined values once they're set for a platform (and it's been a long
> > > > > road to get the hardware and other OS software teams to understand and
> > > > > agree to this requirement), but we don't get to define or overrule what
> > > > > the initial values and order of those magic numbers are.
> > > > 
> > > > And this apporach has clearly not worked considering userspace and
> > > > kernel have not agreed on what the abi is. We need to do better.
> > > 
> > > Userspace and kernel *do* agree on what the ABI is.  There may be a
> > > delay in userspace enabling the MOCS properly on a new platform.
> > 
> > The fact that Mesa never started to use the MOCS entry supposedly
> > meant for display stuff is evidence to the contrary.
> 
> I think this is just an oversight by the Mesa team where they didn't
> notice the gen12 table was different from gen11.  I don't think it's
> because they don't agree that the table described in the bspec provides
> the right settings.

I never suggested they didn't agree. I'm saying that because the
documentation/communication is poor these kinds of oversights just
tend to happen. That's why some real abi docs explaining which 
entry one should use would be good. And following the usual abi
rules of having links to corresponding userspace changes would
guarantee that userspace is in sync before we lock things down.

> 
> 
> > 
> > > But
> > > the changes here (which make everything fully cached by default) will
> > > help catch those cases where a bit of enablement was forgotten on the
> > > UMD side.  They won't "accidentally work" on initial bringup on the new
> > > platform, and the lack of proper MOCS programming will be quickly
> > > identified.
> > 
> > I'm looking at the history and not really convinced.
> 
> We don't have history to compare to yet because we screwed up on TGL/RKL
> and the use of an outdated MOCS entry by Mesa went unnoticed.  If we
> actually do things right on ADL from day 1, then this will be our chance
> to catch the mistakes like this.

tgl wasn't the only time when our MOCS stuff was
unclear/used wrong. In fact the L3 cacheability vs.
display question is still open and I never managed
to get a straight answer what userspace wants. So
for the time being we make scanout buffers L3 cacheable
on most platforms, which means at least theoretically 
evictions from L3 into LLC can cause display corruption.

And on a few platforms we don't use L3 for display buffers
mostly by accident. CHV being one case where I think L3
caching would actually be safe since there's no LLC to
mess up the display coherency.

We also didn't even have the proper MOCS entries defined for
L3 evictions/etc. until recently when I+Chris noticed that
this stuff is just totally missing.

> 
> > 
> > > 
> > > > 
> > > > > 
> > > > > What is a bit vague in the formal documentation is what should be done
> > > > > about the reserved/dontuse table entries.  In theory it wouldn't matter
> > > > > since they'd never be used anyway, but in reality userspace can still
> > > > > use them by accident, such as by forgetting to update their MOCS
> > > > > selection logic from past platforms (e.g., still trying to use row #1 on
> > > > > platforms where it isn't defined).  Given that it's legal for entries to
> > > > > be added to MOCS tables, but never removed/modified, it follows that we
> > > > > should always initialize the undefined entries to fully cached; if a
> > > > > MOCS table update happens in the future and new rows show up, they can
> > > > > only become more coherent, and any userspace software that was
> > > > > incorrectly trying to use them previously will remain functionally
> > > > > correct.
> > > > > 
> > > > > What you're proposing would be a change to existing ABI --- instead of
> > > > > following the agreed upon contract, i915 would start defining its own
> > > > > set of magic numbers that potentially contradict the documentation that
> > > > > every other team is depending on.  We already made this mistake on
> > > > > TGL/RKL, so due to an i915 bug we're outside the spec; if entry #1 ever
> > > > > becomes a formally defined setting in the future, the rest of the
> > > > > software stack will need to explicitly work around i915's bug since we
> > > > > can't fix it now without breaking ABI.
> > > > > 
> > > > > If you really want to redefine how the MOCS ABI works and have i915 no
> > > > > longer follow the current contract, I think you need to do the
> > > > > following:
> > > > 
> > > > I want the abi to be actually defined properly, and some assurance that
> > > > all the stakeholders implement it correctly. Following the proper abi
> > > > rules for kernel development would guarantee that.
> > > 
> > > This is what we have today.  As I said, if you want to propose a change
> > > to the ABI, then you need to take the proper steps to do so.  But as far
> > > as I can tell, most of your concern here boils down to not realizing
> > > that the hardware capabilities you want to use were taken away from us
> > > in gen12.
> > 
> > The fact that it's not actually properly documented anywhere certainly
> > doesn't help in figuring out what is going on.
> 
> There's a pretty lengthy comment explaining everything right before the
> tables start in intel_mocs.c already.  And the table values themselves
> are well-documented in the bspec (although justification for why things
> like PTE capability have disappeared in gen12 hardware design is
> something only the hardware teams know).  We can extend the code comment
> if you think something is still lacking from the description.

Proper abi docs should probably be somewhere a bit more prominent.
Maybe we should even have real defines for these in some uapi header.

> 
> > 
> > > 
> > > > 
> > > > I would also like if the abi can give us the best performance rather
> > > > than potentially crippling it. Ie. I would expect to have a PTE MOCS
> > > > setting for potential scanout buffers, or some proof that pure UC
> > > > will in fact be a better choice.
> > > 
> > > Again, this isn't a choice.  This is a fact of how the hardware works on
> > > gen12.  Maybe you'll regain PTE flags on a future platform, but that's
> > > simply not something software can ask for on today's platforms.
> > 
> > There should be clear guidelines what MOCS index userspace is
> > supposed to use in each case. That more or less used to be the
> > case with the handful of MOCS entries we had defined (always
> > uncached vs. always cached vs. potential scanout buffers which
> > could be either depending on the situation). But that nice simple
> > approach wasn't good enough for whatever reason.
> 
> I think the reality is that the real world doesn't fit into those small
> handful of nice categories.  Userspace works with lots of different
> buffers, many of which are only ever used by a single process and in
> relation to a single IP block, and the userspace teams are really the
> experts on the precise cache behavior they need.

Well, Mesa just uses just two entries. Maybe three these days (iirc
there was some magic index thing for some extra data cache thing).
I'm sure you could no end of very workload/resolution specific tuning.
Maybe the Windows people do a lot of that but Mesa certainly doesn't.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index e96afd7beb49..c8d289b00de4 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -315,6 +315,35 @@  static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
 	MOCS_ENTRY(63, 0, L3_1_UC),
 };
 
+static const struct drm_i915_mocs_entry gen12_mocs_table[] = {
+
+	GEN11_MOCS_ENTRIES,
+	/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+	MOCS_ENTRY(48,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + L3 */
+	MOCS_ENTRY(49,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+	/* Implicitly enable L1 - HDC:L1 + LLC */
+	MOCS_ENTRY(50,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* Implicitly enable L1 - HDC:L1 */
+	MOCS_ENTRY(51,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_1_UC),
+	/* HW Special Case (CCS) */
+	MOCS_ENTRY(60,
+		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+		   L3_1_UC),
+	/* HW Special Case (Displayable) */
+	MOCS_ENTRY(61,
+		   LE_1_UC | LE_TC_1_LLC,
+		   L3_3_WB),
+};
+
 enum {
 	HAS_GLOBAL_MOCS = BIT(0),
 	HAS_ENGINE_MOCS = BIT(1),
@@ -351,14 +380,18 @@  static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
 		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 		table->uc_index = 1;
 		table->unused_entries_index = 5;
-	} else if (GRAPHICS_VER(i915) >= 12) {
+	} else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
+		/* For TGL/RKL, Can't be changed now for ABI reasons */
 		table->size  = ARRAY_SIZE(tgl_mocs_table);
 		table->table = tgl_mocs_table;
 		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
 		table->uc_index = 3;
-		/* For TGL/RKL, Can't be changed now for ABI reasons */
-		if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915))
-			table->unused_entries_index = 2;
+	} else if (GRAPHICS_VER(i915) >= 12) {
+		table->size  = ARRAY_SIZE(gen12_mocs_table);
+		table->table = gen12_mocs_table;
+		table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+		table->uc_index = 3;
+		table->unused_entries_index = 2;
 	} else if (GRAPHICS_VER(i915) == 11) {
 		table->size  = ARRAY_SIZE(icl_mocs_table);
 		table->table = icl_mocs_table;