diff mbox series

[v3] drm/i915/dg2: Add performance workaround 18019455067

Message ID 20220630083508.223348-1-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show
Series [v3] drm/i915/dg2: Add performance workaround 18019455067 | expand

Commit Message

Lionel Landwerlin June 30, 2022, 8:35 a.m. UTC
The recommended number of stackIDs for Ray Tracing subsystem is 512
rather than 2048 (default HW programming).

v2: Move the programming to dg2_ctx_gt_tuning_init() (Lucas)

v3: Move programming to general_render_compute_wa_init() (Matt)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h     | 4 ++++
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +++++++++
 2 files changed, 13 insertions(+)

Comments

Lucas De Marchi June 30, 2022, 3:05 p.m. UTC | #1
"Add performance tuning for RT_CTRL" would be more accurate as this is
handled as a workaround in the spec.

On Thu, Jun 30, 2022 at 11:35:08AM +0300, Lionel Landwerlin wrote:
>The recommended number of stackIDs for Ray Tracing subsystem is 512
>rather than 2048 (default HW programming).
>
>v2: Move the programming to dg2_ctx_gt_tuning_init() (Lucas)

sorry to nitpick, but as said in my reply to v2, this is not accurate. I
make a lot of mistakes on this kind of things, but this time I even
checked if it was in the context state.

>
>v3: Move programming to general_render_compute_wa_init() (Matt)

and also disagreed with this. We need an equivalent tuning function for
registers not context saved/restored. The tuning values are all in a
single place in the spec and having them in their own functions in the
driver make it much easier to check than having to search for it in the
middle of workarounds.

This just happens to be the first and we don't need another commit later
to move things around if we can make it right from the beginning.

Lucas De Marchi

>
>Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>---
> drivers/gpu/drm/i915/gt/intel_gt_regs.h     | 4 ++++
> drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +++++++++
> 2 files changed, 13 insertions(+)
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
>index 07ef111947b8c..12fc87b957425 100644
>--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
>+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
>@@ -1112,6 +1112,10 @@
> #define   GEN12_PUSH_CONST_DEREF_HOLD_DIS	REG_BIT(8)
>
> #define RT_CTRL					_MMIO(0xe530)
>+#define   RT_CTRL_NUMBER_OF_STACKIDS_MASK	REG_GENMASK(6, 5)
>+#define   NUMBER_OF_STACKIDS_512		2
>+#define   NUMBER_OF_STACKIDS_1024		1
>+#define   NUMBER_OF_STACKIDS_2048		0
> #define   DIS_NULL_QUERY			REG_BIT(10)
>
> #define EU_PERF_CNTL1				_MMIO(0xe558)
>diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>index 3213c593a55f4..ea674e456cd76 100644
>--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
>+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>@@ -2737,6 +2737,15 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
> 		wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
> 		wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
> 	}
>+
>+	if (IS_DG2(i915)) {
>+		/* Performance tuning for Ray-tracing */
>+		wa_write_clr_set(wal,
>+				 RT_CTRL,
>+				 RT_CTRL_NUMBER_OF_STACKIDS_MASK,
>+				 REG_FIELD_PREP(RT_CTRL_NUMBER_OF_STACKIDS_MASK,
>+						NUMBER_OF_STACKIDS_512));
>+	}
> }
>
> static void
>-- 
>2.34.1
>
Lionel Landwerlin July 11, 2022, 11:30 a.m. UTC | #2
Ping?

On 30/06/2022 11:35, Lionel Landwerlin wrote:
> The recommended number of stackIDs for Ray Tracing subsystem is 512
> rather than 2048 (default HW programming).
>
> v2: Move the programming to dg2_ctx_gt_tuning_init() (Lucas)
>
> v3: Move programming to general_render_compute_wa_init() (Matt)
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_gt_regs.h     | 4 ++++
>   drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +++++++++
>   2 files changed, 13 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 07ef111947b8c..12fc87b957425 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -1112,6 +1112,10 @@
>   #define   GEN12_PUSH_CONST_DEREF_HOLD_DIS	REG_BIT(8)
>   
>   #define RT_CTRL					_MMIO(0xe530)
> +#define   RT_CTRL_NUMBER_OF_STACKIDS_MASK	REG_GENMASK(6, 5)
> +#define   NUMBER_OF_STACKIDS_512		2
> +#define   NUMBER_OF_STACKIDS_1024		1
> +#define   NUMBER_OF_STACKIDS_2048		0
>   #define   DIS_NULL_QUERY			REG_BIT(10)
>   
>   #define EU_PERF_CNTL1				_MMIO(0xe558)
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 3213c593a55f4..ea674e456cd76 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -2737,6 +2737,15 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
>   		wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
>   		wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
>   	}
> +
> +	if (IS_DG2(i915)) {
> +		/* Performance tuning for Ray-tracing */
> +		wa_write_clr_set(wal,
> +				 RT_CTRL,
> +				 RT_CTRL_NUMBER_OF_STACKIDS_MASK,
> +				 REG_FIELD_PREP(RT_CTRL_NUMBER_OF_STACKIDS_MASK,
> +						NUMBER_OF_STACKIDS_512));
> +	}
>   }
>   
>   static void
Lionel Landwerlin July 20, 2022, 8:19 a.m. UTC | #3
Ping?

On 11/07/2022 14:30, Lionel Landwerlin wrote:
> Ping?
>
> On 30/06/2022 11:35, Lionel Landwerlin wrote:
>> The recommended number of stackIDs for Ray Tracing subsystem is 512
>> rather than 2048 (default HW programming).
>>
>> v2: Move the programming to dg2_ctx_gt_tuning_init() (Lucas)
>>
>> v3: Move programming to general_render_compute_wa_init() (Matt)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_gt_regs.h     | 4 ++++
>>   drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +++++++++
>>   2 files changed, 13 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
>> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
>> index 07ef111947b8c..12fc87b957425 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
>> @@ -1112,6 +1112,10 @@
>>   #define   GEN12_PUSH_CONST_DEREF_HOLD_DIS    REG_BIT(8)
>>     #define RT_CTRL                    _MMIO(0xe530)
>> +#define   RT_CTRL_NUMBER_OF_STACKIDS_MASK    REG_GENMASK(6, 5)
>> +#define   NUMBER_OF_STACKIDS_512        2
>> +#define   NUMBER_OF_STACKIDS_1024        1
>> +#define   NUMBER_OF_STACKIDS_2048        0
>>   #define   DIS_NULL_QUERY            REG_BIT(10)
>>     #define EU_PERF_CNTL1                _MMIO(0xe558)
>> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
>> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> index 3213c593a55f4..ea674e456cd76 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> @@ -2737,6 +2737,15 @@ general_render_compute_wa_init(struct 
>> intel_engine_cs *engine, struct i915_wa_li
>>           wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
>>           wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
>>       }
>> +
>> +    if (IS_DG2(i915)) {
>> +        /* Performance tuning for Ray-tracing */
>> +        wa_write_clr_set(wal,
>> +                 RT_CTRL,
>> +                 RT_CTRL_NUMBER_OF_STACKIDS_MASK,
>> + REG_FIELD_PREP(RT_CTRL_NUMBER_OF_STACKIDS_MASK,
>> +                        NUMBER_OF_STACKIDS_512));
>> +    }
>>   }
>>     static void
>
>
Matt Roper July 25, 2022, 11:23 p.m. UTC | #4
I think you may have missed Lucas' reply to your v3:

https://lists.freedesktop.org/archives/intel-gfx/2022-June/300712.html

Also, here's the reply to v2 that he's referring to:

https://lists.freedesktop.org/archives/intel-gfx/2022-June/300646.html

I.e., he wants this to be called from a new 'tuning_init' function that
is itself called from general_render_compute_wa_init, since we expect
more of these things to show up in the future so it makes sense to have
a dedicated place for them.


Matt

On Wed, Jul 20, 2022 at 11:19:18AM +0300, Lionel Landwerlin wrote:
> Ping?
> 
> On 11/07/2022 14:30, Lionel Landwerlin wrote:
> > Ping?
> > 
> > On 30/06/2022 11:35, Lionel Landwerlin wrote:
> > > The recommended number of stackIDs for Ray Tracing subsystem is 512
> > > rather than 2048 (default HW programming).
> > > 
> > > v2: Move the programming to dg2_ctx_gt_tuning_init() (Lucas)
> > > 
> > > v3: Move programming to general_render_compute_wa_init() (Matt)
> > > 
> > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> > > ---
> > >   drivers/gpu/drm/i915/gt/intel_gt_regs.h     | 4 ++++
> > >   drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +++++++++
> > >   2 files changed, 13 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > > b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > > index 07ef111947b8c..12fc87b957425 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > > @@ -1112,6 +1112,10 @@
> > >   #define   GEN12_PUSH_CONST_DEREF_HOLD_DIS    REG_BIT(8)
> > >     #define RT_CTRL                    _MMIO(0xe530)
> > > +#define   RT_CTRL_NUMBER_OF_STACKIDS_MASK    REG_GENMASK(6, 5)
> > > +#define   NUMBER_OF_STACKIDS_512        2
> > > +#define   NUMBER_OF_STACKIDS_1024        1
> > > +#define   NUMBER_OF_STACKIDS_2048        0
> > >   #define   DIS_NULL_QUERY            REG_BIT(10)
> > >     #define EU_PERF_CNTL1                _MMIO(0xe558)
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > > b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > > index 3213c593a55f4..ea674e456cd76 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > > @@ -2737,6 +2737,15 @@ general_render_compute_wa_init(struct
> > > intel_engine_cs *engine, struct i915_wa_li
> > >           wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
> > >           wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
> > >       }
> > > +
> > > +    if (IS_DG2(i915)) {
> > > +        /* Performance tuning for Ray-tracing */
> > > +        wa_write_clr_set(wal,
> > > +                 RT_CTRL,
> > > +                 RT_CTRL_NUMBER_OF_STACKIDS_MASK,
> > > + REG_FIELD_PREP(RT_CTRL_NUMBER_OF_STACKIDS_MASK,
> > > +                        NUMBER_OF_STACKIDS_512));
> > > +    }
> > >   }
> > >     static void
> > 
> > 
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 07ef111947b8c..12fc87b957425 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1112,6 +1112,10 @@ 
 #define   GEN12_PUSH_CONST_DEREF_HOLD_DIS	REG_BIT(8)
 
 #define RT_CTRL					_MMIO(0xe530)
+#define   RT_CTRL_NUMBER_OF_STACKIDS_MASK	REG_GENMASK(6, 5)
+#define   NUMBER_OF_STACKIDS_512		2
+#define   NUMBER_OF_STACKIDS_1024		1
+#define   NUMBER_OF_STACKIDS_2048		0
 #define   DIS_NULL_QUERY			REG_BIT(10)
 
 #define EU_PERF_CNTL1				_MMIO(0xe558)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 3213c593a55f4..ea674e456cd76 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2737,6 +2737,15 @@  general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
 		wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
 		wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
 	}
+
+	if (IS_DG2(i915)) {
+		/* Performance tuning for Ray-tracing */
+		wa_write_clr_set(wal,
+				 RT_CTRL,
+				 RT_CTRL_NUMBER_OF_STACKIDS_MASK,
+				 REG_FIELD_PREP(RT_CTRL_NUMBER_OF_STACKIDS_MASK,
+						NUMBER_OF_STACKIDS_512));
+	}
 }
 
 static void