diff mbox series

ACPI / idle: override c-state latency when not in conformance with s0ix

Message ID 20210511025024.10083-1-mario.limonciello@amd.com (mailing list archive)
State Superseded, archived
Headers show
Series ACPI / idle: override c-state latency when not in conformance with s0ix | expand

Commit Message

Mario Limonciello May 11, 2021, 2:50 a.m. UTC
Generally the C-state latency is provided by the _CST method or FADT but
some OEM platforms using AMD Picasso, Renoir, and Cezanne set the C2
latency greater than C3's which causes the C2 state to be skipped.
That will block the core entering PC6, which prevents s0ix working
properly on Linux systems.

In other operating systems the latency values are not validated and this
does not cause problems by skipping states.

Link: https://gitlab.freedesktop.org/agd5f/linux/-/commit/026d186e4592c1ee9c1cb44295912d0294508725
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1230#note_712174
Suggested-by: Prike Liang <Prike.Liang@amd.com>
Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
 drivers/acpi/processor_idle.c | 68 +++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

Comments

Rafael J. Wysocki May 11, 2021, 10:24 a.m. UTC | #1
On Tue, May 11, 2021 at 4:50 AM Mario Limonciello
<mario.limonciello@amd.com> wrote:
>
> Generally the C-state latency is provided by the _CST method or FADT but
> some OEM platforms using AMD Picasso, Renoir, and Cezanne set the C2
> latency greater than C3's which causes the C2 state to be skipped.
> That will block the core entering PC6, which prevents s0ix working
> properly on Linux systems.

Well, if find_deepest_state() looked at the target residency instead
of the exit latency, this would work I suppose?

> In other operating systems the latency values are not validated and this
> does not cause problems by skipping states.
>
> Link: https://gitlab.freedesktop.org/agd5f/linux/-/commit/026d186e4592c1ee9c1cb44295912d0294508725
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1230#note_712174
> Suggested-by: Prike Liang <Prike.Liang@amd.com>
> Suggested-by: Alex Deucher <alexander.deucher@amd.com>
> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
> ---
>  drivers/acpi/processor_idle.c | 68 +++++++++++++++++++++++++++++++++++
>  1 file changed, 68 insertions(+)
>
> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
> index 4e2d76b8b697..9d468e0e0cd7 100644
> --- a/drivers/acpi/processor_idle.c
> +++ b/drivers/acpi/processor_idle.c
> @@ -30,6 +30,7 @@
>  #ifdef CONFIG_X86
>  #include <asm/apic.h>
>  #include <asm/cpu.h>
> +#include <asm/cpu_device_id.h>
>  #endif
>
>  #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
> @@ -203,8 +204,73 @@ static void tsc_check_state(int state)
>                         mark_tsc_unstable("TSC halts in idle");
>         }
>  }
> +
> +struct cpu_cstate_quirks {
> +       int latency2;
> +       int latency3;
> +};
> +
> +static struct cpu_cstate_quirks amd_cst_bug = {
> +       .latency2 = 18,
> +       .latency3 = 350,
> +};
> +
> +static const struct x86_cpu_id cpu_match[] = {
> +       /* AMD Picasso */
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x18, &amd_cst_bug),
> +       /* AMD Renoir */
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x60, &amd_cst_bug),
> +       /* AMD Van Gogh */
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x90, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x91, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x92, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x93, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x94, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x95, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x96, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x97, &amd_cst_bug),
> +       /* AMD Cezanne */
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x50, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x51, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x52, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x53, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x54, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x55, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x56, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x57, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x58, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x59, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x5A, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x5B, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x5C, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x5D, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x5E, &amd_cst_bug),
> +       X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x5F, &amd_cst_bug),
> +       {},
> +};
> +
> +static void acpi_processor_check_quirks(struct acpi_processor *pr)
> +{
> +       const struct x86_cpu_id *id = x86_match_cpu(cpu_match);
> +       struct cpu_cstate_quirks *quirks;
> +
> +       if (!id)
> +               return;
> +       quirks = (struct cpu_cstate_quirks *)id->driver_data;
> +       /* correct some OEM BIOS that mistakingly set C2 latency higher
> +        * than C3 making C2 seem like an invalid state
> +        */
> +       if (max_cstate >= 3 &&
> +           pr->power.states[2].latency >= pr->power.states[3].latency &&
> +           quirks->latency2 && quirks->latency3) {
> +               pr->power.states[2].latency = quirks->latency2;
> +               pr->power.states[3].latency = quirks->latency3;
> +               pr_notice("overriding known buggy C2 latency\n");
> +       }
> +}
>  #else
>  static void tsc_check_state(int state) { return; }
> +static void acpi_processor_check_quirks(struct acpi_processor *pr) { return; }
>  #endif
>
>  static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
> @@ -447,6 +513,8 @@ static int acpi_processor_get_cstate_info(struct acpi_processor *pr)
>
>         acpi_processor_get_power_info_default(pr);
>
> +       acpi_processor_check_quirks(pr);
> +
>         pr->power.count = acpi_processor_power_verify(pr);
>
>         /*
> --
> 2.25.1
>
Mario Limonciello May 11, 2021, 3:20 p.m. UTC | #2
> Well, if find_deepest_state() looked at the target residency instead
> of the exit latency, this would work I suppose?

Unfortunately I don't think this would help - from an OEM system the following
target residency values:

# cat /sys/devices/system/cpu/cpu0/cpuidle/*/residency
0
2
800
700

> 
> > In other operating systems the latency values are not validated and this
> > does not cause problems by skipping states.
> >
Rafael J. Wysocki May 11, 2021, 4:59 p.m. UTC | #3
On Tue, May 11, 2021 at 5:21 PM Limonciello, Mario
<Mario.Limonciello@amd.com> wrote:
>
> > Well, if find_deepest_state() looked at the target residency instead
> > of the exit latency, this would work I suppose?
>
> Unfortunately I don't think this would help - from an OEM system the following
> target residency values:
>
> # cat /sys/devices/system/cpu/cpu0/cpuidle/*/residency
> 0
> 2
> 800
> 700

But this means that not just S0ix, but cpuidle in general doesn't work
correctly on those systems and the latency quirk doesn't help here.

Well, it looks like the driver needs to sort the C-states table, then.
Rafael J. Wysocki May 11, 2021, 5:03 p.m. UTC | #4
On Tue, May 11, 2021 at 6:59 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Tue, May 11, 2021 at 5:21 PM Limonciello, Mario
> <Mario.Limonciello@amd.com> wrote:
> >
> > > Well, if find_deepest_state() looked at the target residency instead
> > > of the exit latency, this would work I suppose?
> >
> > Unfortunately I don't think this would help - from an OEM system the following
> > target residency values:
> >
> > # cat /sys/devices/system/cpu/cpu0/cpuidle/*/residency
> > 0
> > 2
> > 800
> > 700
>
> But this means that not just S0ix, but cpuidle in general doesn't work
> correctly on those systems and the latency quirk doesn't help here.
>
> Well, it looks like the driver needs to sort the C-states table, then.

But that wouldn't help, because the 700 us idle state is in fact deeper, right?

Are the values just swapped or are they completely bogus?
Mario Limonciello May 11, 2021, 5:24 p.m. UTC | #5
> On Tue, May 11, 2021 at 6:59 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
> >
> > On Tue, May 11, 2021 at 5:21 PM Limonciello, Mario
> > <Mario.Limonciello@amd.com> wrote:
> > >
> > > > Well, if find_deepest_state() looked at the target residency instead
> > > > of the exit latency, this would work I suppose?
> > >
> > > Unfortunately I don't think this would help - from an OEM system the
> following
> > > target residency values:
> > >
> > > # cat /sys/devices/system/cpu/cpu0/cpuidle/*/residency
> > > 0
> > > 2
> > > 800
> > > 700
> >
> > But this means that not just S0ix, but cpuidle in general doesn't work
> > correctly on those systems and the latency quirk doesn't help here.
> >
> > Well, it looks like the driver needs to sort the C-states table, then.
> 
> But that wouldn't help, because the 700 us idle state is in fact deeper, right?
> 
> Are the values just swapped or are they completely bogus?

Actually I think the value set in the OEM BIOS for state2 from LPI looks bogus too.
It should have been 36us.

@Liang, Prike and @Deucher, Alexander you have some more history on this
than I do.
Alex Deucher May 11, 2021, 6:04 p.m. UTC | #6
> -----Original Message-----
> From: Limonciello, Mario <Mario.Limonciello@amd.com>
> Sent: Tuesday, May 11, 2021 1:25 PM
> To: Rafael J. Wysocki <rafael@kernel.org>; Liang, Prike
> <Prike.Liang@amd.com>; Deucher, Alexander
> <Alexander.Deucher@amd.com>
> Cc: Rafael J . Wysocki <rjw@rjwysocki.net>; Len Brown <lenb@kernel.org>;
> ACPI Devel Maling List <linux-acpi@vger.kernel.org>
> Subject: RE: [PATCH] ACPI / idle: override c-state latency when not in
> conformance with s0ix
> 
> > On Tue, May 11, 2021 at 6:59 PM Rafael J. Wysocki <rafael@kernel.org>
> wrote:
> > >
> > > On Tue, May 11, 2021 at 5:21 PM Limonciello, Mario
> > > <Mario.Limonciello@amd.com> wrote:
> > > >
> > > > > Well, if find_deepest_state() looked at the target residency
> > > > > instead of the exit latency, this would work I suppose?
> > > >
> > > > Unfortunately I don't think this would help - from an OEM system
> > > > the
> > following
> > > > target residency values:
> > > >
> > > > # cat /sys/devices/system/cpu/cpu0/cpuidle/*/residency
> > > > 0
> > > > 2
> > > > 800
> > > > 700
> > >
> > > But this means that not just S0ix, but cpuidle in general doesn't
> > > work correctly on those systems and the latency quirk doesn't help here.
> > >
> > > Well, it looks like the driver needs to sort the C-states table, then.
> >
> > But that wouldn't help, because the 700 us idle state is in fact deeper,
> right?
> >
> > Are the values just swapped or are they completely bogus?
> 
> Actually I think the value set in the OEM BIOS for state2 from LPI looks bogus
> too.
> It should have been 36us.
> 
> @Liang, Prike and @Deucher, Alexander you have some more history on this
> than I do.

I think they were just bogus, at least in the initial cases where we saw this.

Alex
Mario Limonciello May 11, 2021, 6:56 p.m. UTC | #7
> >
> > Actually I think the value set in the OEM BIOS for state2 from LPI looks bogus
> > too.
> > It should have been 36us.
> >
> > @Liang, Prike and @Deucher, Alexander you have some more history on this
> > than I do.
> 
> I think they were just bogus, at least in the initial cases where we saw this.
> 

In processor_idle.c target residency is calculated by exit latency multiplied by
a latency factor parameter (2 by default).  So when these systems have the buggy
_CST C2 value of 400, that's where the 800 comes from for residency, not _LPI.
Rafael J. Wysocki May 11, 2021, 7:09 p.m. UTC | #8
On Tue, May 11, 2021 at 8:57 PM Limonciello, Mario
<Mario.Limonciello@amd.com> wrote:
>
> > >
> > > Actually I think the value set in the OEM BIOS for state2 from LPI looks bogus
> > > too.
> > > It should have been 36us.
> > >
> > > @Liang, Prike and @Deucher, Alexander you have some more history on this
> > > than I do.
> >
> > I think they were just bogus, at least in the initial cases where we saw this.
> >
>
> In processor_idle.c target residency is calculated by exit latency multiplied by
> a latency factor parameter (2 by default).  So when these systems have the buggy
> _CST C2 value of 400, that's where the 800 comes from for residency, not _LPI.

Yes, that's right, sorry for the confusion.

My point was mostly whether or not it would be sufficient to swap the
values for the last two states or if some new values needed to be used
and the answer appears to be the latter.

So I don't see any way to address this that would be cleaner than the
quirk mechanism as proposed.
Mario Limonciello May 12, 2021, 2:47 a.m. UTC | #9
> > In processor_idle.c target residency is calculated by exit latency multiplied by
> > a latency factor parameter (2 by default).  So when these systems have the
> buggy
> > _CST C2 value of 400, that's where the 800 comes from for residency, not
> _LPI.
> 
> Yes, that's right, sorry for the confusion.
> 
> My point was mostly whether or not it would be sufficient to swap the
> values for the last two states or if some new values needed to be used
> and the answer appears to be the latter.
> 
> So I don't see any way to address this that would be cleaner than the
> quirk mechanism as proposed.

The root of the problem is that the states get skipped.  So yes technically
your suggestion of swapping the values of the last two states would be
enough to avoid this issue as well.

My thought is to do it by checking whether all the latencies are arithmetic
progressions and if not then emit a notice this is a FW bug and sort at the
end of acpi_processor_power_verify(pr).  This would be done on all systems.

Let me know if that sounds good or if you would prefer to stick to the quirk
approach as proposed or something hybrid.
Rafael J. Wysocki May 12, 2021, 11:36 a.m. UTC | #10
On Wed, May 12, 2021 at 4:47 AM Limonciello, Mario
<Mario.Limonciello@amd.com> wrote:
>
> > > In processor_idle.c target residency is calculated by exit latency multiplied by
> > > a latency factor parameter (2 by default).  So when these systems have the
> > buggy
> > > _CST C2 value of 400, that's where the 800 comes from for residency, not
> > _LPI.
> >
> > Yes, that's right, sorry for the confusion.
> >
> > My point was mostly whether or not it would be sufficient to swap the
> > values for the last two states or if some new values needed to be used
> > and the answer appears to be the latter.
> >
> > So I don't see any way to address this that would be cleaner than the
> > quirk mechanism as proposed.
>
> The root of the problem is that the states get skipped.  So yes technically
> your suggestion of swapping the values of the last two states would be
> enough to avoid this issue as well.
>
> My thought is to do it by checking whether all the latencies are arithmetic
> progressions and if not then emit a notice this is a FW bug and sort at the
> end of acpi_processor_power_verify(pr).  This would be done on all systems.

Sounds reasonable to me.

> Let me know if that sounds good or if you would prefer to stick to the quirk
> approach as proposed or something hybrid.

Well, I prefer to avoid adding quirks if poss, because they become
pure technical debt when the quirked systems get out of use (and
there's no good way to determine whether or not this has already
happened).
diff mbox series

Patch

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 4e2d76b8b697..9d468e0e0cd7 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -30,6 +30,7 @@ 
 #ifdef CONFIG_X86
 #include <asm/apic.h>
 #include <asm/cpu.h>
+#include <asm/cpu_device_id.h>
 #endif
 
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
@@ -203,8 +204,73 @@  static void tsc_check_state(int state)
 			mark_tsc_unstable("TSC halts in idle");
 	}
 }
+
+struct cpu_cstate_quirks {
+	int latency2;
+	int latency3;
+};
+
+static struct cpu_cstate_quirks amd_cst_bug = {
+	.latency2 = 18,
+	.latency3 = 350,
+};
+
+static const struct x86_cpu_id cpu_match[] = {
+	/* AMD Picasso */
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x18, &amd_cst_bug),
+	/* AMD Renoir */
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x60, &amd_cst_bug),
+	/* AMD Van Gogh */
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x90, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x91, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x92, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x93, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x94, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x95, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x96, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x17, 0x97, &amd_cst_bug),
+	/* AMD Cezanne */
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x50, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x51, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x52, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x53, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x54, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x55, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x56, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x57, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x58, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x59, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x5A, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x5B, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x5C, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x5D, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x5E, &amd_cst_bug),
+	X86_MATCH_VENDOR_FAM_MODEL(AMD, 0x19, 0x5F, &amd_cst_bug),
+	{},
+};
+
+static void acpi_processor_check_quirks(struct acpi_processor *pr)
+{
+	const struct x86_cpu_id *id = x86_match_cpu(cpu_match);
+	struct cpu_cstate_quirks *quirks;
+
+	if (!id)
+		return;
+	quirks = (struct cpu_cstate_quirks *)id->driver_data;
+	/* correct some OEM BIOS that mistakingly set C2 latency higher
+	 * than C3 making C2 seem like an invalid state
+	 */
+	if (max_cstate >= 3 &&
+	    pr->power.states[2].latency >= pr->power.states[3].latency &&
+	    quirks->latency2 && quirks->latency3) {
+		pr->power.states[2].latency = quirks->latency2;
+		pr->power.states[3].latency = quirks->latency3;
+		pr_notice("overriding known buggy C2 latency\n");
+	}
+}
 #else
 static void tsc_check_state(int state) { return; }
+static void acpi_processor_check_quirks(struct acpi_processor *pr) { return; }
 #endif
 
 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
@@ -447,6 +513,8 @@  static int acpi_processor_get_cstate_info(struct acpi_processor *pr)
 
 	acpi_processor_get_power_info_default(pr);
 
+	acpi_processor_check_quirks(pr);
+
 	pr->power.count = acpi_processor_power_verify(pr);
 
 	/*