diff mbox series

[2/2] drm/amd/display: use drm_crtc_set_vblank_offdelay()

Message ID 20240708202907.383917-2-hamza.mahfooz@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/vblank: allow dynamic per-crtc vblank off delay | expand

Commit Message

Hamza Mahfooz July 8, 2024, 8:29 p.m. UTC
Hook up drm_crtc_set_vblank_offdelay() in amdgpu_dm, so that we can
enable PSR more quickly for displays that support it.

Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 30 ++++++++++++++-----
 1 file changed, 22 insertions(+), 8 deletions(-)

Comments

Daniel Vetter July 9, 2024, 9:32 a.m. UTC | #1
On Mon, Jul 08, 2024 at 04:29:07PM -0400, Hamza Mahfooz wrote:
> Hook up drm_crtc_set_vblank_offdelay() in amdgpu_dm, so that we can
> enable PSR more quickly for displays that support it.
> 
> Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
> ---
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 30 ++++++++++++++-----
>  1 file changed, 22 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index fdbc9b57a23d..ee6c31e9d3c4 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -8231,7 +8231,7 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
>  
>  static void manage_dm_interrupts(struct amdgpu_device *adev,
>  				 struct amdgpu_crtc *acrtc,
> -				 bool enable)
> +				 struct dm_crtc_state *acrtc_state)
>  {
>  	/*
>  	 * We have no guarantee that the frontend index maps to the same
> @@ -8239,12 +8239,25 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
>  	 *
>  	 * TODO: Use a different interrupt or check DC itself for the mapping.
>  	 */
> -	int irq_type =
> -		amdgpu_display_crtc_idx_to_irq_type(
> -			adev,
> -			acrtc->crtc_id);
> +	int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
> +							   acrtc->crtc_id);
> +	struct dc_crtc_timing *timing;
> +	int offdelay;
> +
> +	if (acrtc_state) {
> +		timing = &acrtc_state->stream->timing;
> +
> +		/* at least 2 frames */
> +		offdelay = 2000 / div64_u64(div64_u64((timing->pix_clk_100hz *
> +						       (uint64_t)100),
> +						      timing->v_total),
> +					    timing->h_total) + 1;

Yeah, _especially_ when you have a this short timeout your really have to
instead fix the vblank driver code properly so you can enable
vblank_disable_immediate. This is just cheating :-)
-Sima

> +
> +		if (acrtc_state->stream->link->psr_settings.psr_version <
> +		    DC_PSR_VERSION_UNSUPPORTED &&
> +		    amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0))
> +			drm_crtc_set_vblank_offdelay(&acrtc->base, offdelay);
>  
> -	if (enable) {
>  		drm_crtc_vblank_on(&acrtc->base);
>  		amdgpu_irq_get(
>  			adev,
> @@ -9319,7 +9332,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
>  		if (old_crtc_state->active &&
>  		    (!new_crtc_state->active ||
>  		     drm_atomic_crtc_needs_modeset(new_crtc_state))) {
> -			manage_dm_interrupts(adev, acrtc, false);
> +			manage_dm_interrupts(adev, acrtc, NULL);
>  			dc_stream_release(dm_old_crtc_state->stream);
>  		}
>  	}
> @@ -9834,7 +9847,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
>  		     drm_atomic_crtc_needs_modeset(new_crtc_state))) {
>  			dc_stream_retain(dm_new_crtc_state->stream);
>  			acrtc->dm_irq_params.stream = dm_new_crtc_state->stream;
> -			manage_dm_interrupts(adev, acrtc, true);
> +			manage_dm_interrupts(adev, acrtc,
> +					     to_dm_crtc_state(new_crtc_state));
>  		}
>  		/* Handle vrr on->off / off->on transitions */
>  		amdgpu_dm_handle_vrr_transition(dm_old_crtc_state, dm_new_crtc_state);
> -- 
> 2.45.1
>
Daniel Vetter July 9, 2024, 10:09 a.m. UTC | #2
On Tue, Jul 09, 2024 at 11:32:11AM +0200, Daniel Vetter wrote:
> On Mon, Jul 08, 2024 at 04:29:07PM -0400, Hamza Mahfooz wrote:
> > Hook up drm_crtc_set_vblank_offdelay() in amdgpu_dm, so that we can
> > enable PSR more quickly for displays that support it.
> > 
> > Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
> > ---
> >  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 30 ++++++++++++++-----
> >  1 file changed, 22 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index fdbc9b57a23d..ee6c31e9d3c4 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -8231,7 +8231,7 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
> >  
> >  static void manage_dm_interrupts(struct amdgpu_device *adev,
> >  				 struct amdgpu_crtc *acrtc,
> > -				 bool enable)
> > +				 struct dm_crtc_state *acrtc_state)
> >  {
> >  	/*
> >  	 * We have no guarantee that the frontend index maps to the same
> > @@ -8239,12 +8239,25 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
> >  	 *
> >  	 * TODO: Use a different interrupt or check DC itself for the mapping.
> >  	 */
> > -	int irq_type =
> > -		amdgpu_display_crtc_idx_to_irq_type(
> > -			adev,
> > -			acrtc->crtc_id);
> > +	int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
> > +							   acrtc->crtc_id);
> > +	struct dc_crtc_timing *timing;
> > +	int offdelay;
> > +
> > +	if (acrtc_state) {
> > +		timing = &acrtc_state->stream->timing;
> > +
> > +		/* at least 2 frames */
> > +		offdelay = 2000 / div64_u64(div64_u64((timing->pix_clk_100hz *
> > +						       (uint64_t)100),
> > +						      timing->v_total),
> > +					    timing->h_total) + 1;
> 
> Yeah, _especially_ when you have a this short timeout your really have to
> instead fix the vblank driver code properly so you can enable
> vblank_disable_immediate. This is just cheating :-)

Michel mentioned on irc that DC had immediate vblank disabling, but this
was reverted with f64e6e0b6afe ("Revert "drm/amdgpu/display: set
vblank_disable_immediate for DC"").

I haven't looked at the details of the bug report, but stuttering is
exactly what happens when the driver's vblank code has these races. Going
for a very low timeout instead of zero just means it's a bit harder to hit
the issue, and much, much harder to debug properly.

So yeah even more reasons to look at the underlying root-cause here I
think.
-Sima
Hamza Mahfooz July 9, 2024, 2:02 p.m. UTC | #3
On 7/9/24 06:09, Daniel Vetter wrote:
> On Tue, Jul 09, 2024 at 11:32:11AM +0200, Daniel Vetter wrote:
>> On Mon, Jul 08, 2024 at 04:29:07PM -0400, Hamza Mahfooz wrote:
>>> Hook up drm_crtc_set_vblank_offdelay() in amdgpu_dm, so that we can
>>> enable PSR more quickly for displays that support it.
>>>
>>> Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
>>> ---
>>>   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 30 ++++++++++++++-----
>>>   1 file changed, 22 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> index fdbc9b57a23d..ee6c31e9d3c4 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> @@ -8231,7 +8231,7 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
>>>   
>>>   static void manage_dm_interrupts(struct amdgpu_device *adev,
>>>   				 struct amdgpu_crtc *acrtc,
>>> -				 bool enable)
>>> +				 struct dm_crtc_state *acrtc_state)
>>>   {
>>>   	/*
>>>   	 * We have no guarantee that the frontend index maps to the same
>>> @@ -8239,12 +8239,25 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
>>>   	 *
>>>   	 * TODO: Use a different interrupt or check DC itself for the mapping.
>>>   	 */
>>> -	int irq_type =
>>> -		amdgpu_display_crtc_idx_to_irq_type(
>>> -			adev,
>>> -			acrtc->crtc_id);
>>> +	int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
>>> +							   acrtc->crtc_id);
>>> +	struct dc_crtc_timing *timing;
>>> +	int offdelay;
>>> +
>>> +	if (acrtc_state) {
>>> +		timing = &acrtc_state->stream->timing;
>>> +
>>> +		/* at least 2 frames */
>>> +		offdelay = 2000 / div64_u64(div64_u64((timing->pix_clk_100hz *
>>> +						       (uint64_t)100),
>>> +						      timing->v_total),
>>> +					    timing->h_total) + 1;
>>
>> Yeah, _especially_ when you have a this short timeout your really have to
>> instead fix the vblank driver code properly so you can enable
>> vblank_disable_immediate. This is just cheating :-)
> 
> Michel mentioned on irc that DC had immediate vblank disabling, but this
> was reverted with f64e6e0b6afe ("Revert "drm/amdgpu/display: set
> vblank_disable_immediate for DC"").
> 
> I haven't looked at the details of the bug report, but stuttering is
> exactly what happens when the driver's vblank code has these races. Going
> for a very low timeout instead of zero just means it's a bit harder to hit
> the issue, and much, much harder to debug properly.
> 
> So yeah even more reasons to look at the underlying root-cause here I
> think.
> -Sima

The issue is that DMUB (display firmware) isn't able to keep up with all of
the requests that the driver is making. The issue is fairly difficult to
reproduce (I've only seen it once after letting the system run with a
program that would engage PSR every so often, after several hours).
It is also worth noting that we have the same 2 idle frame wait on the 
windows
driver, for the same reasons. So, in all likelihood if it is your 
opinion that
the series should be NAKed, we will probably have to move the wait into the
driver as a workaround.
Daniel Vetter July 10, 2024, 8:43 a.m. UTC | #4
On Tue, Jul 09, 2024 at 10:02:08AM -0400, Hamza Mahfooz wrote:
> On 7/9/24 06:09, Daniel Vetter wrote:
> > On Tue, Jul 09, 2024 at 11:32:11AM +0200, Daniel Vetter wrote:
> > > On Mon, Jul 08, 2024 at 04:29:07PM -0400, Hamza Mahfooz wrote:
> > > > Hook up drm_crtc_set_vblank_offdelay() in amdgpu_dm, so that we can
> > > > enable PSR more quickly for displays that support it.
> > > > 
> > > > Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
> > > > ---
> > > >   .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 30 ++++++++++++++-----
> > > >   1 file changed, 22 insertions(+), 8 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > index fdbc9b57a23d..ee6c31e9d3c4 100644
> > > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > @@ -8231,7 +8231,7 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
> > > >   static void manage_dm_interrupts(struct amdgpu_device *adev,
> > > >   				 struct amdgpu_crtc *acrtc,
> > > > -				 bool enable)
> > > > +				 struct dm_crtc_state *acrtc_state)
> > > >   {
> > > >   	/*
> > > >   	 * We have no guarantee that the frontend index maps to the same
> > > > @@ -8239,12 +8239,25 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
> > > >   	 *
> > > >   	 * TODO: Use a different interrupt or check DC itself for the mapping.
> > > >   	 */
> > > > -	int irq_type =
> > > > -		amdgpu_display_crtc_idx_to_irq_type(
> > > > -			adev,
> > > > -			acrtc->crtc_id);
> > > > +	int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
> > > > +							   acrtc->crtc_id);
> > > > +	struct dc_crtc_timing *timing;
> > > > +	int offdelay;
> > > > +
> > > > +	if (acrtc_state) {
> > > > +		timing = &acrtc_state->stream->timing;
> > > > +
> > > > +		/* at least 2 frames */
> > > > +		offdelay = 2000 / div64_u64(div64_u64((timing->pix_clk_100hz *
> > > > +						       (uint64_t)100),
> > > > +						      timing->v_total),
> > > > +					    timing->h_total) + 1;
> > > 
> > > Yeah, _especially_ when you have a this short timeout your really have to
> > > instead fix the vblank driver code properly so you can enable
> > > vblank_disable_immediate. This is just cheating :-)
> > 
> > Michel mentioned on irc that DC had immediate vblank disabling, but this
> > was reverted with f64e6e0b6afe ("Revert "drm/amdgpu/display: set
> > vblank_disable_immediate for DC"").
> > 
> > I haven't looked at the details of the bug report, but stuttering is
> > exactly what happens when the driver's vblank code has these races. Going
> > for a very low timeout instead of zero just means it's a bit harder to hit
> > the issue, and much, much harder to debug properly.
> > 
> > So yeah even more reasons to look at the underlying root-cause here I
> > think.
> > -Sima
> 
> The issue is that DMUB (display firmware) isn't able to keep up with all of
> the requests that the driver is making. The issue is fairly difficult to
> reproduce (I've only seen it once after letting the system run with a
> program that would engage PSR every so often, after several hours).
> It is also worth noting that we have the same 2 idle frame wait on the
> windows
> driver, for the same reasons. So, in all likelihood if it is your opinion
> that
> the series should be NAKed, we will probably have to move the wait into the
> driver as a workaround.

Well that's an entirely different reason, and needs to be recorded in the
commit log that disabling/enabling vblank is too expensive and why. Also
would be good to record that windows does the same.

I'm also not entirely sure this is a good interface, so some
thoughts/question:

- is the issue only with psr, meaning that if we switch the panel to a
  different crtc, do we need to update the off delay.

- there's still the question of why vblank_immediate_disable resulted in
  stuttering, is that the same bug? I think for consistency it'd be best
  if we enable immediate vblank disabling everywhere (for maximum
  testing), and then apply the 2 frame delay workaround only where needed
  explicitly. Otherwise if there's other issues than DMUB being slow, they
  might be mostly hidden and become really hard to track down when they
  show up.

- I think an interface to set the right values in lockstep with the vblank
  on/off state would be best, so maybe a special drm_crtc_vblank_on_config
  that takes additional parameters?

Cheers, Sima
Hamza Mahfooz July 10, 2024, 9:13 p.m. UTC | #5
On 7/10/24 04:43, Daniel Vetter wrote:
> On Tue, Jul 09, 2024 at 10:02:08AM -0400, Hamza Mahfooz wrote:
>> On 7/9/24 06:09, Daniel Vetter wrote:
>>> On Tue, Jul 09, 2024 at 11:32:11AM +0200, Daniel Vetter wrote:
>>>> On Mon, Jul 08, 2024 at 04:29:07PM -0400, Hamza Mahfooz wrote:
>>>>> Hook up drm_crtc_set_vblank_offdelay() in amdgpu_dm, so that we can
>>>>> enable PSR more quickly for displays that support it.
>>>>>
>>>>> Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
>>>>> ---
>>>>>    .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 30 ++++++++++++++-----
>>>>>    1 file changed, 22 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> index fdbc9b57a23d..ee6c31e9d3c4 100644
>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> @@ -8231,7 +8231,7 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
>>>>>    static void manage_dm_interrupts(struct amdgpu_device *adev,
>>>>>    				 struct amdgpu_crtc *acrtc,
>>>>> -				 bool enable)
>>>>> +				 struct dm_crtc_state *acrtc_state)
>>>>>    {
>>>>>    	/*
>>>>>    	 * We have no guarantee that the frontend index maps to the same
>>>>> @@ -8239,12 +8239,25 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
>>>>>    	 *
>>>>>    	 * TODO: Use a different interrupt or check DC itself for the mapping.
>>>>>    	 */
>>>>> -	int irq_type =
>>>>> -		amdgpu_display_crtc_idx_to_irq_type(
>>>>> -			adev,
>>>>> -			acrtc->crtc_id);
>>>>> +	int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
>>>>> +							   acrtc->crtc_id);
>>>>> +	struct dc_crtc_timing *timing;
>>>>> +	int offdelay;
>>>>> +
>>>>> +	if (acrtc_state) {
>>>>> +		timing = &acrtc_state->stream->timing;
>>>>> +
>>>>> +		/* at least 2 frames */
>>>>> +		offdelay = 2000 / div64_u64(div64_u64((timing->pix_clk_100hz *
>>>>> +						       (uint64_t)100),
>>>>> +						      timing->v_total),
>>>>> +					    timing->h_total) + 1;
>>>>
>>>> Yeah, _especially_ when you have a this short timeout your really have to
>>>> instead fix the vblank driver code properly so you can enable
>>>> vblank_disable_immediate. This is just cheating :-)
>>>
>>> Michel mentioned on irc that DC had immediate vblank disabling, but this
>>> was reverted with f64e6e0b6afe ("Revert "drm/amdgpu/display: set
>>> vblank_disable_immediate for DC"").
>>>
>>> I haven't looked at the details of the bug report, but stuttering is
>>> exactly what happens when the driver's vblank code has these races. Going
>>> for a very low timeout instead of zero just means it's a bit harder to hit
>>> the issue, and much, much harder to debug properly.
>>>
>>> So yeah even more reasons to look at the underlying root-cause here I
>>> think.
>>> -Sima
>>
>> The issue is that DMUB (display firmware) isn't able to keep up with all of
>> the requests that the driver is making. The issue is fairly difficult to
>> reproduce (I've only seen it once after letting the system run with a
>> program that would engage PSR every so often, after several hours).
>> It is also worth noting that we have the same 2 idle frame wait on the
>> windows
>> driver, for the same reasons. So, in all likelihood if it is your opinion
>> that
>> the series should be NAKed, we will probably have to move the wait into the
>> driver as a workaround.
> 
> Well that's an entirely different reason, and needs to be recorded in the
> commit log that disabling/enabling vblank is too expensive and why. Also
> would be good to record that windows does the same.

Point taken.

> 
> I'm also not entirely sure this is a good interface, so some
> thoughts/question:
> 
> - is the issue only with psr, meaning that if we switch the panel to a
>    different crtc, do we need to update the off delay.

I can't say definitively, but all of the public reports (that I've seen)
and my local repro are PSR related.

> 
> - there's still the question of why vblank_immediate_disable resulted in
>    stuttering, is that the same bug? I think for consistency it'd be best
>    if we enable immediate vblank disabling everywhere (for maximum
>    testing), and then apply the 2 frame delay workaround only where needed
>    explicitly. Otherwise if there's other issues than DMUB being slow, they
>    might be mostly hidden and become really hard to track down when they
>    show up.

Ya, I believe they are all DMUB related since the stuttering issues are
accompanied by the following dmesg log entry:

[drm:dc_dmub_srv_wait_idle [amdgpu]] *ERROR* Error waiting for DMUB 
idle: status=3

(which is pretty much an unspecified firmware timeout)

Also, setting vblank_immediate_disable unconditionally for amdgpu, while 
only
enabling the delay for cases that we know that we need it seems 
reasonable to me.

> 
> - I think an interface to set the right values in lockstep with the vblank
>    on/off state would be best, so maybe a special drm_crtc_vblank_on_config
>    that takes additional parameters?

Sure, that seems fine, what parameters besides the off delay did you have
in mind though?

> 
> Cheers, Sima
Daniel Vetter July 15, 2024, 8:40 a.m. UTC | #6
On Wed, Jul 10, 2024 at 05:13:18PM -0400, Hamza Mahfooz wrote:
> On 7/10/24 04:43, Daniel Vetter wrote:
> > On Tue, Jul 09, 2024 at 10:02:08AM -0400, Hamza Mahfooz wrote:
> > > On 7/9/24 06:09, Daniel Vetter wrote:
> > > > On Tue, Jul 09, 2024 at 11:32:11AM +0200, Daniel Vetter wrote:
> > > > > On Mon, Jul 08, 2024 at 04:29:07PM -0400, Hamza Mahfooz wrote:
> > > > > > Hook up drm_crtc_set_vblank_offdelay() in amdgpu_dm, so that we can
> > > > > > enable PSR more quickly for displays that support it.
> > > > > > 
> > > > > > Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
> > > > > > ---
> > > > > >    .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 30 ++++++++++++++-----
> > > > > >    1 file changed, 22 insertions(+), 8 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > > index fdbc9b57a23d..ee6c31e9d3c4 100644
> > > > > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > > > @@ -8231,7 +8231,7 @@ static int amdgpu_dm_encoder_init(struct drm_device *dev,
> > > > > >    static void manage_dm_interrupts(struct amdgpu_device *adev,
> > > > > >    				 struct amdgpu_crtc *acrtc,
> > > > > > -				 bool enable)
> > > > > > +				 struct dm_crtc_state *acrtc_state)
> > > > > >    {
> > > > > >    	/*
> > > > > >    	 * We have no guarantee that the frontend index maps to the same
> > > > > > @@ -8239,12 +8239,25 @@ static void manage_dm_interrupts(struct amdgpu_device *adev,
> > > > > >    	 *
> > > > > >    	 * TODO: Use a different interrupt or check DC itself for the mapping.
> > > > > >    	 */
> > > > > > -	int irq_type =
> > > > > > -		amdgpu_display_crtc_idx_to_irq_type(
> > > > > > -			adev,
> > > > > > -			acrtc->crtc_id);
> > > > > > +	int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
> > > > > > +							   acrtc->crtc_id);
> > > > > > +	struct dc_crtc_timing *timing;
> > > > > > +	int offdelay;
> > > > > > +
> > > > > > +	if (acrtc_state) {
> > > > > > +		timing = &acrtc_state->stream->timing;
> > > > > > +
> > > > > > +		/* at least 2 frames */
> > > > > > +		offdelay = 2000 / div64_u64(div64_u64((timing->pix_clk_100hz *
> > > > > > +						       (uint64_t)100),
> > > > > > +						      timing->v_total),
> > > > > > +					    timing->h_total) + 1;
> > > > > 
> > > > > Yeah, _especially_ when you have a this short timeout your really have to
> > > > > instead fix the vblank driver code properly so you can enable
> > > > > vblank_disable_immediate. This is just cheating :-)
> > > > 
> > > > Michel mentioned on irc that DC had immediate vblank disabling, but this
> > > > was reverted with f64e6e0b6afe ("Revert "drm/amdgpu/display: set
> > > > vblank_disable_immediate for DC"").
> > > > 
> > > > I haven't looked at the details of the bug report, but stuttering is
> > > > exactly what happens when the driver's vblank code has these races. Going
> > > > for a very low timeout instead of zero just means it's a bit harder to hit
> > > > the issue, and much, much harder to debug properly.
> > > > 
> > > > So yeah even more reasons to look at the underlying root-cause here I
> > > > think.
> > > > -Sima
> > > 
> > > The issue is that DMUB (display firmware) isn't able to keep up with all of
> > > the requests that the driver is making. The issue is fairly difficult to
> > > reproduce (I've only seen it once after letting the system run with a
> > > program that would engage PSR every so often, after several hours).
> > > It is also worth noting that we have the same 2 idle frame wait on the
> > > windows
> > > driver, for the same reasons. So, in all likelihood if it is your opinion
> > > that
> > > the series should be NAKed, we will probably have to move the wait into the
> > > driver as a workaround.
> > 
> > Well that's an entirely different reason, and needs to be recorded in the
> > commit log that disabling/enabling vblank is too expensive and why. Also
> > would be good to record that windows does the same.
> 
> Point taken.
> 
> > 
> > I'm also not entirely sure this is a good interface, so some
> > thoughts/question:
> > 
> > - is the issue only with psr, meaning that if we switch the panel to a
> >    different crtc, do we need to update the off delay.
> 
> I can't say definitively, but all of the public reports (that I've seen)
> and my local repro are PSR related.
> 
> > 
> > - there's still the question of why vblank_immediate_disable resulted in
> >    stuttering, is that the same bug? I think for consistency it'd be best
> >    if we enable immediate vblank disabling everywhere (for maximum
> >    testing), and then apply the 2 frame delay workaround only where needed
> >    explicitly. Otherwise if there's other issues than DMUB being slow, they
> >    might be mostly hidden and become really hard to track down when they
> >    show up.
> 
> Ya, I believe they are all DMUB related since the stuttering issues are
> accompanied by the following dmesg log entry:
> 
> [drm:dc_dmub_srv_wait_idle [amdgpu]] *ERROR* Error waiting for DMUB idle:
> status=3
> 
> (which is pretty much an unspecified firmware timeout)

Ah that would be really good to add to the commit message, should help
connect with any bug reports later on. And the firmware handling PSR
specially also makes sense, the flow is a lot more involved for that I
guess.

> Also, setting vblank_immediate_disable unconditionally for amdgpu, while
> only
> enabling the delay for cases that we know that we need it seems reasonable
> to me.
> 
> > 
> > - I think an interface to set the right values in lockstep with the vblank
> >    on/off state would be best, so maybe a special drm_crtc_vblank_on_config
> >    that takes additional parameters?
> 
> Sure, that seems fine, what parameters besides the off delay did you have
> in mind though?

Whatever you need, sounds like you need to update both the off delay and
the immediate disable (or maybe we just magically put them into the same
value, dunno). I just thought that supplying the right parameters to
drm_crtc_vblank_on is probably the right way to go about this.

Note that there's the question of whether drm_crtc_vblank_off should
restore the old values, or whether drivers simply have to use that special
version unconditionally, instead of only for the outputs like PSR that
need special handling.

Cheers, Sima
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index fdbc9b57a23d..ee6c31e9d3c4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8231,7 +8231,7 @@  static int amdgpu_dm_encoder_init(struct drm_device *dev,
 
 static void manage_dm_interrupts(struct amdgpu_device *adev,
 				 struct amdgpu_crtc *acrtc,
-				 bool enable)
+				 struct dm_crtc_state *acrtc_state)
 {
 	/*
 	 * We have no guarantee that the frontend index maps to the same
@@ -8239,12 +8239,25 @@  static void manage_dm_interrupts(struct amdgpu_device *adev,
 	 *
 	 * TODO: Use a different interrupt or check DC itself for the mapping.
 	 */
-	int irq_type =
-		amdgpu_display_crtc_idx_to_irq_type(
-			adev,
-			acrtc->crtc_id);
+	int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
+							   acrtc->crtc_id);
+	struct dc_crtc_timing *timing;
+	int offdelay;
+
+	if (acrtc_state) {
+		timing = &acrtc_state->stream->timing;
+
+		/* at least 2 frames */
+		offdelay = 2000 / div64_u64(div64_u64((timing->pix_clk_100hz *
+						       (uint64_t)100),
+						      timing->v_total),
+					    timing->h_total) + 1;
+
+		if (acrtc_state->stream->link->psr_settings.psr_version <
+		    DC_PSR_VERSION_UNSUPPORTED &&
+		    amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 5, 0))
+			drm_crtc_set_vblank_offdelay(&acrtc->base, offdelay);
 
-	if (enable) {
 		drm_crtc_vblank_on(&acrtc->base);
 		amdgpu_irq_get(
 			adev,
@@ -9319,7 +9332,7 @@  static void amdgpu_dm_commit_streams(struct drm_atomic_state *state,
 		if (old_crtc_state->active &&
 		    (!new_crtc_state->active ||
 		     drm_atomic_crtc_needs_modeset(new_crtc_state))) {
-			manage_dm_interrupts(adev, acrtc, false);
+			manage_dm_interrupts(adev, acrtc, NULL);
 			dc_stream_release(dm_old_crtc_state->stream);
 		}
 	}
@@ -9834,7 +9847,8 @@  static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 		     drm_atomic_crtc_needs_modeset(new_crtc_state))) {
 			dc_stream_retain(dm_new_crtc_state->stream);
 			acrtc->dm_irq_params.stream = dm_new_crtc_state->stream;
-			manage_dm_interrupts(adev, acrtc, true);
+			manage_dm_interrupts(adev, acrtc,
+					     to_dm_crtc_state(new_crtc_state));
 		}
 		/* Handle vrr on->off / off->on transitions */
 		amdgpu_dm_handle_vrr_transition(dm_old_crtc_state, dm_new_crtc_state);