diff mbox series

drm/amdgpu: Ensure HDA function is suspended before ASIC reset

Message ID 20220407121230.132627-1-kai.heng.feng@canonical.com (mailing list archive)
State New, archived
Headers show
Series drm/amdgpu: Ensure HDA function is suspended before ASIC reset | expand

Commit Message

Kai-Heng Feng April 7, 2022, 12:12 p.m. UTC
DP/HDMI audio on AMD PRO VII stops working after S3:
[  149.450391] amdgpu 0000:63:00.0: amdgpu: MODE1 reset
[  149.450395] amdgpu 0000:63:00.0: amdgpu: GPU mode1 reset
[  149.450494] amdgpu 0000:63:00.0: amdgpu: GPU psp mode1 reset
[  149.983693] snd_hda_intel 0000:63:00.1: refused to change power state from D0 to D3hot
[  150.003439] amdgpu 0000:63:00.0: refused to change power state from D0 to D3hot
...
[  155.432975] snd_hda_intel 0000:63:00.1: CORB reset timeout#2, CORBRP = 65535

The offending commit is daf8de0874ab5b ("drm/amdgpu: always reset the asic in
suspend (v2)"). Commit 34452ac3038a7 ("drm/amdgpu: don't use BACO for
reset in S3 ") doesn't help, so the issue is something different.

Assuming that to make HDA resume to D0 fully realized, it needs to be
successfully put to D3 first. And this guesswork proves working, by
moving amdgpu_asic_reset() to noirq callback, so it's called after HDA
function is in D3.

Fixes: daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)")
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

Comments

Alex Deucher April 7, 2022, 4:04 p.m. UTC | #1
Applied.  Thanks!

Alex

On Thu, Apr 7, 2022 at 8:21 AM Kai-Heng Feng
<kai.heng.feng@canonical.com> wrote:
>
> DP/HDMI audio on AMD PRO VII stops working after S3:
> [  149.450391] amdgpu 0000:63:00.0: amdgpu: MODE1 reset
> [  149.450395] amdgpu 0000:63:00.0: amdgpu: GPU mode1 reset
> [  149.450494] amdgpu 0000:63:00.0: amdgpu: GPU psp mode1 reset
> [  149.983693] snd_hda_intel 0000:63:00.1: refused to change power state from D0 to D3hot
> [  150.003439] amdgpu 0000:63:00.0: refused to change power state from D0 to D3hot
> ...
> [  155.432975] snd_hda_intel 0000:63:00.1: CORB reset timeout#2, CORBRP = 65535
>
> The offending commit is daf8de0874ab5b ("drm/amdgpu: always reset the asic in
> suspend (v2)"). Commit 34452ac3038a7 ("drm/amdgpu: don't use BACO for
> reset in S3 ") doesn't help, so the issue is something different.
>
> Assuming that to make HDA resume to D0 fully realized, it needs to be
> successfully put to D3 first. And this guesswork proves working, by
> moving amdgpu_asic_reset() to noirq callback, so it's called after HDA
> function is in D3.
>
> Fixes: daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)")
> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 18 ++++++++++++------
>  1 file changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index bb1c025d90019..31f7229e7ea89 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2323,18 +2323,23 @@ static int amdgpu_pmops_suspend(struct device *dev)
>  {
>         struct drm_device *drm_dev = dev_get_drvdata(dev);
>         struct amdgpu_device *adev = drm_to_adev(drm_dev);
> -       int r;
>
>         if (amdgpu_acpi_is_s0ix_active(adev))
>                 adev->in_s0ix = true;
>         else
>                 adev->in_s3 = true;
> -       r = amdgpu_device_suspend(drm_dev, true);
> -       if (r)
> -               return r;
> +       return amdgpu_device_suspend(drm_dev, true);
> +}
> +
> +static int amdgpu_pmops_suspend_noirq(struct device *dev)
> +{
> +       struct drm_device *drm_dev = dev_get_drvdata(dev);
> +       struct amdgpu_device *adev = drm_to_adev(drm_dev);
> +
>         if (!adev->in_s0ix)
> -               r = amdgpu_asic_reset(adev);
> -       return r;
> +               return amdgpu_asic_reset(adev);
> +
> +       return 0;
>  }
>
>  static int amdgpu_pmops_resume(struct device *dev)
> @@ -2575,6 +2580,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = {
>         .prepare = amdgpu_pmops_prepare,
>         .complete = amdgpu_pmops_complete,
>         .suspend = amdgpu_pmops_suspend,
> +       .suspend_noirq = amdgpu_pmops_suspend_noirq,
>         .resume = amdgpu_pmops_resume,
>         .freeze = amdgpu_pmops_freeze,
>         .thaw = amdgpu_pmops_thaw,
> --
> 2.34.1
>
Alex Deucher April 7, 2022, 5:08 p.m. UTC | #2
On Thu, Apr 7, 2022 at 8:21 AM Kai-Heng Feng
<kai.heng.feng@canonical.com> wrote:
>
> DP/HDMI audio on AMD PRO VII stops working after S3:
> [  149.450391] amdgpu 0000:63:00.0: amdgpu: MODE1 reset
> [  149.450395] amdgpu 0000:63:00.0: amdgpu: GPU mode1 reset
> [  149.450494] amdgpu 0000:63:00.0: amdgpu: GPU psp mode1 reset
> [  149.983693] snd_hda_intel 0000:63:00.1: refused to change power state from D0 to D3hot
> [  150.003439] amdgpu 0000:63:00.0: refused to change power state from D0 to D3hot
> ...
> [  155.432975] snd_hda_intel 0000:63:00.1: CORB reset timeout#2, CORBRP = 65535

As an aside, shouldn't device links order this properly already?  I
thought that was the whole point of them.  We have quirks in PCI
quirks.c to create device links for all GPU integrated peripherals
(audio, usb, ucsi).

Alex

>
> The offending commit is daf8de0874ab5b ("drm/amdgpu: always reset the asic in
> suspend (v2)"). Commit 34452ac3038a7 ("drm/amdgpu: don't use BACO for
> reset in S3 ") doesn't help, so the issue is something different.
>
> Assuming that to make HDA resume to D0 fully realized, it needs to be
> successfully put to D3 first. And this guesswork proves working, by
> moving amdgpu_asic_reset() to noirq callback, so it's called after HDA
> function is in D3.
>
> Fixes: daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)")
> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 18 ++++++++++++------
>  1 file changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index bb1c025d90019..31f7229e7ea89 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2323,18 +2323,23 @@ static int amdgpu_pmops_suspend(struct device *dev)
>  {
>         struct drm_device *drm_dev = dev_get_drvdata(dev);
>         struct amdgpu_device *adev = drm_to_adev(drm_dev);
> -       int r;
>
>         if (amdgpu_acpi_is_s0ix_active(adev))
>                 adev->in_s0ix = true;
>         else
>                 adev->in_s3 = true;
> -       r = amdgpu_device_suspend(drm_dev, true);
> -       if (r)
> -               return r;
> +       return amdgpu_device_suspend(drm_dev, true);
> +}
> +
> +static int amdgpu_pmops_suspend_noirq(struct device *dev)
> +{
> +       struct drm_device *drm_dev = dev_get_drvdata(dev);
> +       struct amdgpu_device *adev = drm_to_adev(drm_dev);
> +
>         if (!adev->in_s0ix)
> -               r = amdgpu_asic_reset(adev);
> -       return r;
> +               return amdgpu_asic_reset(adev);
> +
> +       return 0;
>  }
>
>  static int amdgpu_pmops_resume(struct device *dev)
> @@ -2575,6 +2580,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = {
>         .prepare = amdgpu_pmops_prepare,
>         .complete = amdgpu_pmops_complete,
>         .suspend = amdgpu_pmops_suspend,
> +       .suspend_noirq = amdgpu_pmops_suspend_noirq,
>         .resume = amdgpu_pmops_resume,
>         .freeze = amdgpu_pmops_freeze,
>         .thaw = amdgpu_pmops_thaw,
> --
> 2.34.1
>
Mario Limonciello April 7, 2022, 6:11 p.m. UTC | #3
[Public]

> -----Original Message-----
> From: Alex Deucher <alexdeucher@gmail.com>
> Sent: Thursday, April 7, 2022 12:08
> To: Kai-Heng Feng <kai.heng.feng@canonical.com>
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
> <Christian.Koenig@amd.com>; Pan, Xinhui <Xinhui.Pan@amd.com>; David
> Airlie <airlied@linux.ie>; Maling list - DRI developers <dri-
> devel@lists.freedesktop.org>; LKML <linux-kernel@vger.kernel.org>; amd-
> gfx list <amd-gfx@lists.freedesktop.org>; Chiu, Solomon
> <Solomon.Chiu@amd.com>; Limonciello, Mario
> <Mario.Limonciello@amd.com>; Quan, Evan <Evan.Quan@amd.com>;
> Tuikov, Luben <Luben.Tuikov@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: Ensure HDA function is suspended
> before ASIC reset
> 
> On Thu, Apr 7, 2022 at 8:21 AM Kai-Heng Feng
> <kai.heng.feng@canonical.com> wrote:
> >
> > DP/HDMI audio on AMD PRO VII stops working after S3:
> > [  149.450391] amdgpu 0000:63:00.0: amdgpu: MODE1 reset
> > [  149.450395] amdgpu 0000:63:00.0: amdgpu: GPU mode1 reset
> > [  149.450494] amdgpu 0000:63:00.0: amdgpu: GPU psp mode1 reset
> > [  149.983693] snd_hda_intel 0000:63:00.1: refused to change power state
> from D0 to D3hot
> > [  150.003439] amdgpu 0000:63:00.0: refused to change power state from
> D0 to D3hot
> > ...
> > [  155.432975] snd_hda_intel 0000:63:00.1: CORB reset timeout#2, CORBRP
> = 65535
> 
> As an aside, shouldn't device links order this properly already?  I
> thought that was the whole point of them.  We have quirks in PCI
> quirks.c to create device links for all GPU integrated peripherals
> (audio, usb, ucsi).

The movement from D0->D3 only happens in noirq though in pci_pm_suspend_noirq.
So if device links only order within a suspend phase then resetting during that
phase means the future phase won't have ground to stand on.

IOW I think device links + this commit are needed to get the order right with a reset.

> 
> Alex
> 
> >
> > The offending commit is daf8de0874ab5b ("drm/amdgpu: always reset the
> asic in
> > suspend (v2)"). Commit 34452ac3038a7 ("drm/amdgpu: don't use BACO for
> > reset in S3 ") doesn't help, so the issue is something different.
> >
> > Assuming that to make HDA resume to D0 fully realized, it needs to be
> > successfully put to D3 first. And this guesswork proves working, by
> > moving amdgpu_asic_reset() to noirq callback, so it's called after HDA
> > function is in D3.
> >
> > Fixes: daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend
> (v2)")
> > Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 18 ++++++++++++------
> >  1 file changed, 12 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index bb1c025d90019..31f7229e7ea89 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -2323,18 +2323,23 @@ static int amdgpu_pmops_suspend(struct
> device *dev)
> >  {
> >         struct drm_device *drm_dev = dev_get_drvdata(dev);
> >         struct amdgpu_device *adev = drm_to_adev(drm_dev);
> > -       int r;
> >
> >         if (amdgpu_acpi_is_s0ix_active(adev))
> >                 adev->in_s0ix = true;
> >         else
> >                 adev->in_s3 = true;
> > -       r = amdgpu_device_suspend(drm_dev, true);
> > -       if (r)
> > -               return r;
> > +       return amdgpu_device_suspend(drm_dev, true);
> > +}
> > +
> > +static int amdgpu_pmops_suspend_noirq(struct device *dev)
> > +{
> > +       struct drm_device *drm_dev = dev_get_drvdata(dev);
> > +       struct amdgpu_device *adev = drm_to_adev(drm_dev);
> > +
> >         if (!adev->in_s0ix)
> > -               r = amdgpu_asic_reset(adev);
> > -       return r;
> > +               return amdgpu_asic_reset(adev);
> > +
> > +       return 0;
> >  }
> >
> >  static int amdgpu_pmops_resume(struct device *dev)
> > @@ -2575,6 +2580,7 @@ static const struct dev_pm_ops amdgpu_pm_ops
> = {
> >         .prepare = amdgpu_pmops_prepare,
> >         .complete = amdgpu_pmops_complete,
> >         .suspend = amdgpu_pmops_suspend,
> > +       .suspend_noirq = amdgpu_pmops_suspend_noirq,
> >         .resume = amdgpu_pmops_resume,
> >         .freeze = amdgpu_pmops_freeze,
> >         .thaw = amdgpu_pmops_thaw,
> > --
> > 2.34.1
> >
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index bb1c025d90019..31f7229e7ea89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2323,18 +2323,23 @@  static int amdgpu_pmops_suspend(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = drm_to_adev(drm_dev);
-	int r;
 
 	if (amdgpu_acpi_is_s0ix_active(adev))
 		adev->in_s0ix = true;
 	else
 		adev->in_s3 = true;
-	r = amdgpu_device_suspend(drm_dev, true);
-	if (r)
-		return r;
+	return amdgpu_device_suspend(drm_dev, true);
+}
+
+static int amdgpu_pmops_suspend_noirq(struct device *dev)
+{
+	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
 	if (!adev->in_s0ix)
-		r = amdgpu_asic_reset(adev);
-	return r;
+		return amdgpu_asic_reset(adev);
+
+	return 0;
 }
 
 static int amdgpu_pmops_resume(struct device *dev)
@@ -2575,6 +2580,7 @@  static const struct dev_pm_ops amdgpu_pm_ops = {
 	.prepare = amdgpu_pmops_prepare,
 	.complete = amdgpu_pmops_complete,
 	.suspend = amdgpu_pmops_suspend,
+	.suspend_noirq = amdgpu_pmops_suspend_noirq,
 	.resume = amdgpu_pmops_resume,
 	.freeze = amdgpu_pmops_freeze,
 	.thaw = amdgpu_pmops_thaw,