diff mbox series

[1/3] x86, sched: Calculate frequency invariance for AMD systems

Message ID 20201110083936.31994-2-ggherdovich@suse.cz (mailing list archive)
State Not Applicable, archived
Headers show
Series Add support for frequency invariance to AMD EPYC Zen2 | expand

Commit Message

Giovanni Gherdovich Nov. 10, 2020, 8:39 a.m. UTC
From: Nathan Fontenot <nathan.fontenot@amd.com>

This is the first pass in creating the ability to calculate the
frequency invariance on AMD systems. This approach uses the CPPC
highest performance and nominal performance values that range from
0 - 255 instead of a high and base frquency. This is because we do
not have the ability on AMD to get a highest frequency value.

On AMD systems the highest performance and nominal performance
vaues do correspond to the highest and base frequencies for the system
so using them should produce an appropriate ratio but some tweaking
is likely necessary.

Due to CPPC being initialized later in boot than when the frequency
invariant calculation is currently made, I had to create a callback
from the CPPC init code to do the calculation after we have CPPC
data.

Signed-off-by: Nathan Fontenot <nathan.fontenot@amd.com>
[ ggherdovich@suse.cz: cosmetic changes ]
Signed-off-by: Giovanni Gherdovich <ggherdovich@suse.cz>
---
 arch/x86/include/asm/topology.h |  8 ++++
 arch/x86/kernel/smpboot.c       | 76 +++++++++++++++++++++++++++++----
 drivers/acpi/cppc_acpi.c        |  5 +++
 3 files changed, 81 insertions(+), 8 deletions(-)

Comments

Peter Zijlstra Nov. 10, 2020, 9:49 a.m. UTC | #1
On Tue, Nov 10, 2020 at 09:39:34AM +0100, Giovanni Gherdovich wrote:

> +#ifdef CONFIG_ACPI
> +void init_freq_invariance_cppc(void)
> +{
> +	init_freq_invariance(false, true);
> +
> +	if (static_branch_likely(&arch_scale_freq_key))
> +		on_each_cpu(init_counter_refs, NULL, 0);
> +}
> +#endif
> +
>  static void disable_freq_invariance_workfn(struct work_struct *work)
>  {
>  	static_branch_disable(&arch_scale_freq_key);

> diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
> index 7a99b19bb893..e1969ff876ff 100644
> --- a/drivers/acpi/cppc_acpi.c
> +++ b/drivers/acpi/cppc_acpi.c
> @@ -39,6 +39,7 @@
>  #include <linux/ktime.h>
>  #include <linux/rwsem.h>
>  #include <linux/wait.h>
> +#include <linux/topology.h>
>  
>  #include <acpi/cppc_acpi.h>
>  
> @@ -850,6 +851,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
>  		goto out_free;
>  	}
>  
> +	/* Only needed once, so call on CPU0 */
> +	if (pr->id == 0)
> +		init_freq_invariance_cppc();
> +

This seems broken vs lovely things like booting with maxcpus= or
physical hotplug where you add logical CPUs.

Given the latter hunk limits it to one invocation (is phys_id 0
guaranteed to exist? Can a BIOS monkey screw us over?) only to then call
it on all CPUs, shouldn't this be changed to let
acpi_cppc_processor_probe() call it for every CPU that comes online?
Giovanni Gherdovich Nov. 10, 2020, 6:45 p.m. UTC | #2
On Tue, 2020-11-10 at 10:49 +0100, Peter Zijlstra wrote:
> On Tue, Nov 10, 2020 at 09:39:34AM +0100, Giovanni Gherdovich wrote:
> 
> > +#ifdef CONFIG_ACPI
> > +void init_freq_invariance_cppc(void)
> > +{
> > +	init_freq_invariance(false, true);
> > +
> > +	if (static_branch_likely(&arch_scale_freq_key))
> > +		on_each_cpu(init_counter_refs, NULL, 0);
> > +}
> > +#endif
> > +
> >  static void disable_freq_invariance_workfn(struct work_struct *work)
> >  {
> >  	static_branch_disable(&arch_scale_freq_key);
> > diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
> > index 7a99b19bb893..e1969ff876ff 100644
> > --- a/drivers/acpi/cppc_acpi.c
> > +++ b/drivers/acpi/cppc_acpi.c
> > @@ -39,6 +39,7 @@
> >  #include <linux/ktime.h>
> >  #include <linux/rwsem.h>
> >  #include <linux/wait.h>
> > +#include <linux/topology.h>
> >  
> >  #include <acpi/cppc_acpi.h>
> >  
> > @@ -850,6 +851,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
> >  		goto out_free;
> >  	}
> >  
> > +	/* Only needed once, so call on CPU0 */
> > +	if (pr->id == 0)
> > +		init_freq_invariance_cppc();
> > +
> 
> This seems broken vs lovely things like booting with maxcpus= or
> physical hotplug where you add logical CPUs.

Right.

> 
> Given the latter hunk limits it to one invocation (is phys_id 0
> guaranteed to exist? Can a BIOS monkey screw us over?) only to then call
> it on all CPUs, shouldn't this be changed to let
> acpi_cppc_processor_probe() call it for every CPU that comes online?

I sent a V2 that basically does that, it just makes sure that
"init_freq_invariance(secondary=false)" is called only once (the first CPU
that gets there), and init_counter_refs() instead is called by all.

Which makes me think, I could make better use of the "secondary" argument
to init_freq_invariance() and trim a couple of lines from
init_freq_invariance_cppc().


Giovanni
diff mbox series

Patch

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index f4234575f3fd..9799d4da282d 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -218,4 +218,12 @@  static inline void arch_set_max_freq_ratio(bool turbo_disabled)
 }
 #endif
 
+#ifdef CONFIG_ACPI
+void init_freq_invariance_cppc(void);
+#else
+static inline void init_freq_invariance_cppc(void)
+{
+}
+#endif
+
 #endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index de776b2e6046..89016cc96948 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -82,6 +82,10 @@ 
 #include <asm/hw_irq.h>
 #include <asm/stackprotector.h>
 
+#ifdef CONFIG_ACPI
+#include <acpi/cppc_acpi.h>
+#endif
+
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
@@ -148,7 +152,7 @@  static inline void smpboot_restore_warm_reset_vector(void)
 	*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
 }
 
-static void init_freq_invariance(bool secondary);
+static void init_freq_invariance(bool secondary, bool cppc_ready);
 
 /*
  * Report back to the Boot Processor during boot time or to the caller processor
@@ -186,7 +190,7 @@  static void smp_callin(void)
 	 */
 	set_cpu_sibling_map(raw_smp_processor_id());
 
-	init_freq_invariance(true);
+	init_freq_invariance(true, false);
 
 	/*
 	 * Get our bogomips.
@@ -1340,7 +1344,7 @@  void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	set_sched_topology(x86_topology);
 
 	set_cpu_sibling_map(0);
-	init_freq_invariance(false);
+	init_freq_invariance(false, false);
 	smp_sanity_check();
 
 	switch (apic_intr_mode) {
@@ -2027,7 +2031,47 @@  static bool intel_set_max_freq_ratio(void)
 	return true;
 }
 
-static void init_counter_refs(void)
+#ifdef CONFIG_ACPI
+static bool amd_set_max_freq_ratio(void)
+{
+	struct cppc_perf_caps perf_caps;
+	u64 highest_perf, nominal_perf;
+	u64 perf_ratio;
+	int rc;
+
+	rc = cppc_get_perf_caps(0, &perf_caps);
+	if (rc) {
+		pr_debug("Could not retrieve perf counters (%d)\n", rc);
+		return false;
+	}
+
+	highest_perf = perf_caps.highest_perf;
+	nominal_perf = perf_caps.nominal_perf;
+
+	if (!highest_perf || !nominal_perf) {
+		pr_debug("Could not retrieve highest or nominal performance\n");
+		return false;
+	}
+
+	perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
+	if (!perf_ratio) {
+		pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
+		return false;
+	}
+
+	arch_turbo_freq_ratio = perf_ratio;
+	arch_set_max_freq_ratio(false);
+
+	return true;
+}
+#else
+static bool amd_set_max_freq_ratio(void)
+{
+	return false;
+}
+#endif
+
+static void init_counter_refs(void *arg)
 {
 	u64 aperf, mperf;
 
@@ -2038,7 +2082,7 @@  static void init_counter_refs(void)
 	this_cpu_write(arch_prev_mperf, mperf);
 }
 
-static void init_freq_invariance(bool secondary)
+static void init_freq_invariance(bool secondary, bool cppc_ready)
 {
 	bool ret = false;
 
@@ -2047,22 +2091,38 @@  static void init_freq_invariance(bool secondary)
 
 	if (secondary) {
 		if (static_branch_likely(&arch_scale_freq_key)) {
-			init_counter_refs();
+			init_counter_refs(NULL);
 		}
 		return;
 	}
 
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
 		ret = intel_set_max_freq_ratio();
+	else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+		if (!cppc_ready) {
+			return;
+		}
+		ret = amd_set_max_freq_ratio();
+	}
 
 	if (ret) {
-		init_counter_refs();
+		init_counter_refs(NULL);
 		static_branch_enable(&arch_scale_freq_key);
 	} else {
 		pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
 	}
 }
 
+#ifdef CONFIG_ACPI
+void init_freq_invariance_cppc(void)
+{
+	init_freq_invariance(false, true);
+
+	if (static_branch_likely(&arch_scale_freq_key))
+		on_each_cpu(init_counter_refs, NULL, 0);
+}
+#endif
+
 static void disable_freq_invariance_workfn(struct work_struct *work)
 {
 	static_branch_disable(&arch_scale_freq_key);
@@ -2112,7 +2172,7 @@  void arch_scale_freq_tick(void)
 	schedule_work(&disable_freq_invariance_work);
 }
 #else
-static inline void init_freq_invariance(bool secondary)
+static inline void init_freq_invariance(bool secondary, bool cppc_ready)
 {
 }
 #endif /* CONFIG_X86_64 */
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 7a99b19bb893..e1969ff876ff 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -39,6 +39,7 @@ 
 #include <linux/ktime.h>
 #include <linux/rwsem.h>
 #include <linux/wait.h>
+#include <linux/topology.h>
 
 #include <acpi/cppc_acpi.h>
 
@@ -850,6 +851,10 @@  int acpi_cppc_processor_probe(struct acpi_processor *pr)
 		goto out_free;
 	}
 
+	/* Only needed once, so call on CPU0 */
+	if (pr->id == 0)
+		init_freq_invariance_cppc();
+
 	kfree(output.pointer);
 	return 0;