diff mbox series

[PATCHv5] drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems

Message ID 20220429160604.2608782-1-richard.gong@amd.com (mailing list archive)
State New, archived
Headers show
Series [PATCHv5] drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems | expand

Commit Message

Gong, Richard April 29, 2022, 4:06 p.m. UTC
Active State Power Management (ASPM) feature is enabled since kernel 5.14.
There are some AMD Volcanic Islands (VI) GFX cards, such as the WX3200 and
RX640, that do not work with ASPM-enabled Intel Alder Lake based systems.
Using these GFX cards as video/display output, Intel Alder Lake based
systems will freeze after suspend/resume.

The issue was originally reported on one system (Dell Precision 3660 with
BIOS version 0.14.81), but was later confirmed to affect at least 4
pre-production Alder Lake based systems.

Add an extra check to disable ASPM on Intel Alder Lake based systems with
the problematic AMD Volcanic Islands GFX cards.

Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Richard Gong <richard.gong@amd.com>
---
v5: added vi to commit header and updated commit message
    rolled back guard with the preprocessor as did in v2 to correct build
    error on non-x86 systems
v4: s/CONFIG_X86_64/CONFIG_X86
    enhanced check logic
v3: s/intel_core_aspm_chk/aspm_support_quirk_check
    correct build error with W=1 option
v2: correct commit description
    move the check from chip family to problematic platform
---
 drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

Comments

Alex Deucher April 29, 2022, 4:13 p.m. UTC | #1
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

On Fri, Apr 29, 2022 at 12:08 PM Richard Gong <richard.gong@amd.com> wrote:
>
> Active State Power Management (ASPM) feature is enabled since kernel 5.14.
> There are some AMD Volcanic Islands (VI) GFX cards, such as the WX3200 and
> RX640, that do not work with ASPM-enabled Intel Alder Lake based systems.
> Using these GFX cards as video/display output, Intel Alder Lake based
> systems will freeze after suspend/resume.
>
> The issue was originally reported on one system (Dell Precision 3660 with
> BIOS version 0.14.81), but was later confirmed to affect at least 4
> pre-production Alder Lake based systems.
>
> Add an extra check to disable ASPM on Intel Alder Lake based systems with
> the problematic AMD Volcanic Islands GFX cards.
>
> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: Richard Gong <richard.gong@amd.com>
> ---
> v5: added vi to commit header and updated commit message
>     rolled back guard with the preprocessor as did in v2 to correct build
>     error on non-x86 systems
> v4: s/CONFIG_X86_64/CONFIG_X86
>     enhanced check logic
> v3: s/intel_core_aspm_chk/aspm_support_quirk_check
>     correct build error with W=1 option
> v2: correct commit description
>     move the check from chip family to problematic platform
> ---
>  drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
>  1 file changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 039b90cdc3bc..45f0188c4273 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -81,6 +81,10 @@
>  #include "mxgpu_vi.h"
>  #include "amdgpu_dm.h"
>
> +#if IS_ENABLED(CONFIG_X86)
> +#include <asm/intel-family.h>
> +#endif
> +
>  #define ixPCIE_LC_L1_PM_SUBSTATE       0x100100C6
>  #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK       0x00000001L
>  #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK   0x00000002L
> @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
>                 WREG32_PCIE(ixPCIE_LC_CNTL, data);
>  }
>
> +static bool aspm_support_quirk_check(void)
> +{
> +#if IS_ENABLED(CONFIG_X86)
> +       struct cpuinfo_x86 *c = &cpu_data(0);
> +
> +       return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> +#else
> +       return true;
> +#endif
> +}
> +
>  static void vi_program_aspm(struct amdgpu_device *adev)
>  {
>         u32 data, data1, orig;
>         bool bL1SS = false;
>         bool bClkReqSupport = true;
>
> -       if (!amdgpu_device_should_use_aspm(adev))
> +       if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
>                 return;
>
>         if (adev->flags & AMD_IS_APU ||
> --
> 2.25.1
>
Paul Menzel May 1, 2022, 7:14 a.m. UTC | #2
Dear Richard,


Am 29.04.22 um 18:06 schrieb Richard Gong:
> Active State Power Management (ASPM) feature is enabled since kernel 5.14.
> There are some AMD Volcanic Islands (VI) GFX cards, such as the WX3200 and
> RX640, that do not work with ASPM-enabled Intel Alder Lake based systems.
> Using these GFX cards as video/display output, Intel Alder Lake based
> systems will freeze after suspend/resume.

As replied in v4 just now, “freeze” is misleading if you can still run 
`dmesg` after resume.


Kind regards,

Paul


> The issue was originally reported on one system (Dell Precision 3660 with
> BIOS version 0.14.81), but was later confirmed to affect at least 4
> pre-production Alder Lake based systems.
> 
> Add an extra check to disable ASPM on Intel Alder Lake based systems with
> the problematic AMD Volcanic Islands GFX cards.
> 
> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1885
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: Richard Gong <richard.gong@amd.com>
> ---
> v5: added vi to commit header and updated commit message
>      rolled back guard with the preprocessor as did in v2 to correct build
>      error on non-x86 systems
> v4: s/CONFIG_X86_64/CONFIG_X86
>      enhanced check logic
> v3: s/intel_core_aspm_chk/aspm_support_quirk_check
>      correct build error with W=1 option
> v2: correct commit description
>      move the check from chip family to problematic platform
> ---
>   drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
>   1 file changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 039b90cdc3bc..45f0188c4273 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -81,6 +81,10 @@
>   #include "mxgpu_vi.h"
>   #include "amdgpu_dm.h"
>   
> +#if IS_ENABLED(CONFIG_X86)
> +#include <asm/intel-family.h>
> +#endif
> +
>   #define ixPCIE_LC_L1_PM_SUBSTATE	0x100100C6
>   #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK	0x00000001L
>   #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK	0x00000002L
> @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct amdgpu_device *adev)
>   		WREG32_PCIE(ixPCIE_LC_CNTL, data);
>   }
>   
> +static bool aspm_support_quirk_check(void)
> +{
> +#if IS_ENABLED(CONFIG_X86)
> +	struct cpuinfo_x86 *c = &cpu_data(0);
> +
> +	return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> +#else
> +	return true;
> +#endif
> +}
> +
>   static void vi_program_aspm(struct amdgpu_device *adev)
>   {
>   	u32 data, data1, orig;
>   	bool bL1SS = false;
>   	bool bClkReqSupport = true;
>   
> -	if (!amdgpu_device_should_use_aspm(adev))
> +	if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
>   		return;
>   
>   	if (adev->flags & AMD_IS_APU ||
Gong, Richard May 2, 2022, 3:22 p.m. UTC | #3
Hi Paul,

On 5/1/2022 2:14 AM, Paul Menzel wrote:
> Dear Richard,
>
>
> Am 29.04.22 um 18:06 schrieb Richard Gong:
>> Active State Power Management (ASPM) feature is enabled since kernel 
>> 5.14.
>> There are some AMD Volcanic Islands (VI) GFX cards, such as the 
>> WX3200 and
>> RX640, that do not work with ASPM-enabled Intel Alder Lake based 
>> systems.
>> Using these GFX cards as video/display output, Intel Alder Lake based
>> systems will freeze after suspend/resume.
>
> As replied in v4 just now, “freeze” is misleading if you can still run 
> `dmesg` after resume.
As my comments in v4, we can't run 'dmesg' when issue occurred. User 
have to recycle power to reset the system.
>
>
> Kind regards,
>
> Paul

Regards,

Richard

>
>
>> The issue was originally reported on one system (Dell Precision 3660 
>> with
>> BIOS version 0.14.81), but was later confirmed to affect at least 4
>> pre-production Alder Lake based systems.
>>
>> Add an extra check to disable ASPM on Intel Alder Lake based systems 
>> with
>> the problematic AMD Volcanic Islands GFX cards.
>>
>> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
>> Link: 
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.freedesktop.org%2Fdrm%2Famd%2F-%2Fissues%2F1885&amp;data=05%7C01%7Crichard.gong%40amd.com%7C78173acb0fe3463fead808da2b423e81%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637869860787352219%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=TK3Ur99Ro4OczgUlCpdod6CrvgGJvNZAyUfpzKEqExw%3D&amp;reserved=0
>> Reported-by: kernel test robot <lkp@intel.com>
>> Signed-off-by: Richard Gong <richard.gong@amd.com>
>> ---
>> v5: added vi to commit header and updated commit message
>>      rolled back guard with the preprocessor as did in v2 to correct 
>> build
>>      error on non-x86 systems
>> v4: s/CONFIG_X86_64/CONFIG_X86
>>      enhanced check logic
>> v3: s/intel_core_aspm_chk/aspm_support_quirk_check
>>      correct build error with W=1 option
>> v2: correct commit description
>>      move the check from chip family to problematic platform
>> ---
>>   drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
>>   1 file changed, 16 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c 
>> b/drivers/gpu/drm/amd/amdgpu/vi.c
>> index 039b90cdc3bc..45f0188c4273 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
>> @@ -81,6 +81,10 @@
>>   #include "mxgpu_vi.h"
>>   #include "amdgpu_dm.h"
>>   +#if IS_ENABLED(CONFIG_X86)
>> +#include <asm/intel-family.h>
>> +#endif
>> +
>>   #define ixPCIE_LC_L1_PM_SUBSTATE    0x100100C6
>>   #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK 
>> 0x00000001L
>>   #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK 
>> 0x00000002L
>> @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct 
>> amdgpu_device *adev)
>>           WREG32_PCIE(ixPCIE_LC_CNTL, data);
>>   }
>>   +static bool aspm_support_quirk_check(void)
>> +{
>> +#if IS_ENABLED(CONFIG_X86)
>> +    struct cpuinfo_x86 *c = &cpu_data(0);
>> +
>> +    return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
>> +#else
>> +    return true;
>> +#endif
>> +}
>> +
>>   static void vi_program_aspm(struct amdgpu_device *adev)
>>   {
>>       u32 data, data1, orig;
>>       bool bL1SS = false;
>>       bool bClkReqSupport = true;
>>   -    if (!amdgpu_device_should_use_aspm(adev))
>> +    if (!amdgpu_device_should_use_aspm(adev) || 
>> !aspm_support_quirk_check())
>>           return;
>>         if (adev->flags & AMD_IS_APU ||
Mario Limonciello May 3, 2022, 2:16 a.m. UTC | #4
[Public]



> -----Original Message-----
> From: Alex Deucher <alexdeucher@gmail.com>
> Sent: Friday, April 29, 2022 11:14
> To: Gong, Richard <Richard.Gong@amd.com>
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
> <Christian.Koenig@amd.com>; Pan, Xinhui <Xinhui.Pan@amd.com>; Dave
> Airlie <airlied@linux.ie>; Daniel Vetter <daniel@ffwll.ch>; amd-gfx list <amd-
> gfx@lists.freedesktop.org>; kernel test robot <lkp@intel.com>; LKML <linux-
> kernel@vger.kernel.org>; Maling list - DRI developers <dri-
> devel@lists.freedesktop.org>; Limonciello, Mario
> <Mario.Limonciello@amd.com>
> Subject: Re: [PATCHv5] drm/amdgpu: vi: disable ASPM on Intel Alder Lake
> based systems
> 
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> 
> On Fri, Apr 29, 2022 at 12:08 PM Richard Gong <richard.gong@amd.com>
> wrote:
> >
> > Active State Power Management (ASPM) feature is enabled since kernel
> 5.14.
> > There are some AMD Volcanic Islands (VI) GFX cards, such as the WX3200
> and
> > RX640, that do not work with ASPM-enabled Intel Alder Lake based
> systems.
> > Using these GFX cards as video/display output, Intel Alder Lake based
> > systems will freeze after suspend/resume.
> >
> > The issue was originally reported on one system (Dell Precision 3660 with
> > BIOS version 0.14.81), but was later confirmed to affect at least 4
> > pre-production Alder Lake based systems.
> >
> > Add an extra check to disable ASPM on Intel Alder Lake based systems with
> > the problematic AMD Volcanic Islands GFX cards.
> >
> > Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> > Link:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitla
> b.freedesktop.org%2Fdrm%2Famd%2F-
> %2Fissues%2F1885&amp;data=05%7C01%7Cmario.limonciello%40amd.com%
> 7C1fdb6c767a4a4b3f572c08da29fb3f1a%7C3dd8961fe4884e608e11a82d994e1
> 83d%7C0%7C0%7C637868456326825256%7CUnknown%7CTWFpbGZsb3d8eyJ
> WIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%
> 7C3000%7C%7C%7C&amp;sdata=LtV9WqUzB032KFmH2g%2F2BMtX2R6DyfM
> KqxCF1e2rHHg%3D&amp;reserved=0
> > Reported-by: kernel test robot <lkp@intel.com>

You should drop this "Reported-by:".  That makes more sense when it's a patch
that is already queued up and committee.  The bot just caught an intermediary
revision that wasn't committed anywhere, and it doesn't make sense here.

I don't think you need to resend out a new version for review to change commit
message for that, this can just be fixed up when committing the change.

Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>

> > Signed-off-by: Richard Gong <richard.gong@amd.com>
> > ---
> > v5: added vi to commit header and updated commit message
> >     rolled back guard with the preprocessor as did in v2 to correct build
> >     error on non-x86 systems
> > v4: s/CONFIG_X86_64/CONFIG_X86
> >     enhanced check logic
> > v3: s/intel_core_aspm_chk/aspm_support_quirk_check
> >     correct build error with W=1 option
> > v2: correct commit description
> >     move the check from chip family to problematic platform
> > ---
> >  drivers/gpu/drm/amd/amdgpu/vi.c | 17 ++++++++++++++++-
> >  1 file changed, 16 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c
> b/drivers/gpu/drm/amd/amdgpu/vi.c
> > index 039b90cdc3bc..45f0188c4273 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> > @@ -81,6 +81,10 @@
> >  #include "mxgpu_vi.h"
> >  #include "amdgpu_dm.h"
> >
> > +#if IS_ENABLED(CONFIG_X86)
> > +#include <asm/intel-family.h>
> > +#endif
> > +
> >  #define ixPCIE_LC_L1_PM_SUBSTATE       0x100100C6
> >  #define
> PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK
> 0x00000001L
> >  #define
> PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK
> 0x00000002L
> > @@ -1134,13 +1138,24 @@ static void vi_enable_aspm(struct
> amdgpu_device *adev)
> >                 WREG32_PCIE(ixPCIE_LC_CNTL, data);
> >  }
> >
> > +static bool aspm_support_quirk_check(void)
> > +{
> > +#if IS_ENABLED(CONFIG_X86)
> > +       struct cpuinfo_x86 *c = &cpu_data(0);
> > +
> > +       return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> > +#else
> > +       return true;
> > +#endif
> > +}
> > +
> >  static void vi_program_aspm(struct amdgpu_device *adev)
> >  {
> >         u32 data, data1, orig;
> >         bool bL1SS = false;
> >         bool bClkReqSupport = true;
> >
> > -       if (!amdgpu_device_should_use_aspm(adev))
> > +       if (!amdgpu_device_should_use_aspm(adev) ||
> !aspm_support_quirk_check())
> >                 return;
> >
> >         if (adev->flags & AMD_IS_APU ||
> > --
> > 2.25.1
> >
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 039b90cdc3bc..45f0188c4273 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -81,6 +81,10 @@ 
 #include "mxgpu_vi.h"
 #include "amdgpu_dm.h"
 
+#if IS_ENABLED(CONFIG_X86)
+#include <asm/intel-family.h>
+#endif
+
 #define ixPCIE_LC_L1_PM_SUBSTATE	0x100100C6
 #define PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK	0x00000001L
 #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK	0x00000002L
@@ -1134,13 +1138,24 @@  static void vi_enable_aspm(struct amdgpu_device *adev)
 		WREG32_PCIE(ixPCIE_LC_CNTL, data);
 }
 
+static bool aspm_support_quirk_check(void)
+{
+#if IS_ENABLED(CONFIG_X86)
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
+#else
+	return true;
+#endif
+}
+
 static void vi_program_aspm(struct amdgpu_device *adev)
 {
 	u32 data, data1, orig;
 	bool bL1SS = false;
 	bool bClkReqSupport = true;
 
-	if (!amdgpu_device_should_use_aspm(adev))
+	if (!amdgpu_device_should_use_aspm(adev) || !aspm_support_quirk_check())
 		return;
 
 	if (adev->flags & AMD_IS_APU ||