diff mbox series

[v1,03/11] xen/x86: introduce "cpufreq=amd-pstate" xen cmdline

Message ID 20241203081111.463400-4-Penny.Zheng@amd.com (mailing list archive)
State New
Headers show
Series amd-pstate CPU Performance Scaling Driver | expand

Commit Message

Penny Zheng Dec. 3, 2024, 8:11 a.m. UTC
Users need to set "cpufreq=amd-pstate" in xen cmdline to enable
amd-pstate driver, which selects ACPI Collaborative Performance
and Power Control (CPPC) on supported AMD hardware to provide a
finer grained frequency control mechanism.
`verbose` option can also be included to support verbose print.

When users setting "cpufreq=amd-pstate", a new amd-pstate driver
shall be registered and used. Actual implmentation will be introduced
in the following commits.

Signed-off-by: Penny Zheng <Penny.Zheng@amd.com>
---
 docs/misc/xen-command-line.pandoc      |  8 +++-
 xen/arch/x86/acpi/cpufreq/Makefile     |  1 +
 xen/arch/x86/acpi/cpufreq/amd-pstate.c | 66 ++++++++++++++++++++++++++
 xen/arch/x86/acpi/cpufreq/cpufreq.c    | 28 +++++++++++
 xen/arch/x86/platform_hypercall.c      |  6 +++
 xen/drivers/cpufreq/cpufreq.c          | 13 ++++-
 xen/include/acpi/cpufreq/cpufreq.h     |  4 ++
 xen/include/public/platform.h          |  1 +
 xen/include/public/sysctl.h            |  1 +
 9 files changed, 124 insertions(+), 4 deletions(-)
 create mode 100644 xen/arch/x86/acpi/cpufreq/amd-pstate.c
diff mbox series

Patch

diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index 293dbc1a95..30f855fa18 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -499,7 +499,7 @@  If set, force use of the performance counters for oprofile, rather than detectin
 available support.
 
 ### cpufreq
-> `= none | {{ <boolean> | xen } { [:[powersave|performance|ondemand|userspace][,[<maxfreq>]][,[<minfreq>]]] } [,verbose]} | dom0-kernel | hwp[:[<hdc>][,verbose]]`
+> `= none | {{ <boolean> | xen } { [:[powersave|performance|ondemand|userspace][,[<maxfreq>]][,[<minfreq>]]] } [,verbose]} | dom0-kernel | hwp[:[<hdc>][,verbose]] | amd-pstate[:[verbose]]`
 
 > Default: `xen`
 
@@ -510,7 +510,7 @@  choice of `dom0-kernel` is deprecated and not supported by all Dom0 kernels.
 * `<maxfreq>` and `<minfreq>` are integers which represent max and min processor frequencies
   respectively.
 * `verbose` option can be included as a string or also as `verbose=<integer>`
-  for `xen`.  It is a boolean for `hwp`.
+  for `xen`.  It is a boolean for `hwp` and `amd-pstate`.
 * `hwp` selects Hardware-Controlled Performance States (HWP) on supported Intel
   hardware.  HWP is a Skylake+ feature which provides better CPU power
   management.  The default is disabled.  If `hwp` is selected, but hardware
@@ -518,6 +518,10 @@  choice of `dom0-kernel` is deprecated and not supported by all Dom0 kernels.
 * `<hdc>` is a boolean to enable Hardware Duty Cycling (HDC).  HDC enables the
   processor to autonomously force physical package components into idle state.
   The default is enabled, but the option only applies when `hwp` is enabled.
+* `amd-pstate` selects ACPI Collaborative Performance and Power Control (CPPC)
+  on supported AMD hardware to provide a finer grained frequency control mechanism.
+  The default is disabled. If `amd-pstate` is selected, but hardware support
+  is not available, Xen will fallback to cpufreq=xen.
 
 There is also support for `;`-separated fallback options:
 `cpufreq=hwp;xen,verbose`.  This first tries `hwp` and falls back to `xen` if
diff --git a/xen/arch/x86/acpi/cpufreq/Makefile b/xen/arch/x86/acpi/cpufreq/Makefile
index e7dbe434a8..1710fc776c 100644
--- a/xen/arch/x86/acpi/cpufreq/Makefile
+++ b/xen/arch/x86/acpi/cpufreq/Makefile
@@ -1,4 +1,5 @@ 
 obj-$(CONFIG_INTEL) += acpi.o
 obj-y += cpufreq.o
+obj-y += amd-pstate.o
 obj-$(CONFIG_INTEL) += hwp.o
 obj-$(CONFIG_AMD) += powernow.o
diff --git a/xen/arch/x86/acpi/cpufreq/amd-pstate.c b/xen/arch/x86/acpi/cpufreq/amd-pstate.c
new file mode 100644
index 0000000000..bfad96ae3d
--- /dev/null
+++ b/xen/arch/x86/acpi/cpufreq/amd-pstate.c
@@ -0,0 +1,66 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * amd-pstate.c - AMD Processor P-state Frequency Driver
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * Author: Penny Zheng <penny.zheng@amd.com>
+ *
+ * AMD P-State introduces a new CPU performance scaling design for AMD
+ * processors using the ACPI Collaborative Performance and Power Control (CPPC)
+ * feature which provides a finer grained frequency control range.
+ *
+ */
+
+#include <xen/init.h>
+#include <xen/param.h>
+#include <acpi/cpufreq/cpufreq.h>
+
+uint16_t __read_mostly dmi_max_speed_mhz;
+
+static bool __init amd_pstate_handle_option(const char *s, const char *end)
+{
+    int ret;
+
+    ret = parse_boolean("verbose", s, end);
+    if ( ret >= 0 )
+    {
+        cpufreq_verbose = ret;
+        return true;
+    }
+
+    return false;
+}
+
+int __init amd_pstate_cmdline_parse(const char *s, const char *e)
+{
+    do
+    {
+        const char *end = strpbrk(s, ",;");
+
+        if ( !amd_pstate_handle_option(s, end) )
+        {
+            printk(XENLOG_WARNING "cpufreq/amd-pstate: option '%.*s' not recognized\n",
+                   (int)((end ?: e) - s), s);
+
+            return -EINVAL;
+        }
+
+        s = end ? ++end : end;
+    } while ( s && s < e );
+
+    return 0;
+}
+
+static const struct cpufreq_driver __initconstrel amd_pstate_cpufreq_driver =
+{
+    .name   = XEN_AMD_PSTATE_DRIVER_NAME,
+};
+
+int __init amd_pstate_register_driver(void)
+{
+    if ( !cpu_has_cppc )
+        return -ENODEV;
+
+    return cpufreq_register_driver(&amd_pstate_cpufreq_driver);
+}
diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index 61e98b67bd..a461cfc7b3 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -148,6 +148,9 @@  static int __init cf_check cpufreq_driver_init(void)
                 case CPUFREQ_none:
                     ret = 0;
                     break;
+                default:
+                    printk(XENLOG_WARNING "Unsupported cpufreq driver for vendor Intel\n");
+                    break;
                 }
 
                 if ( ret != -ENODEV )
@@ -156,6 +159,31 @@  static int __init cf_check cpufreq_driver_init(void)
             break;
 
         case X86_VENDOR_AMD:
+            ret = -ENOENT;
+
+            for ( unsigned int i = 0; i < cpufreq_xen_cnt; i++ )
+            {
+                switch ( cpufreq_xen_opts[i] )
+                {
+                case CPUFREQ_xen:
+                    ret = powernow_register_driver();
+                    break;
+                case CPUFREQ_amd_pstate:
+                    ret = amd_pstate_register_driver();
+                    break;
+                case CPUFREQ_none:
+                    ret = 0;
+                    break;
+                default:
+                    printk(XENLOG_WARNING "Unsupported cpufreq driver for vendor AMD\n");
+                    break;
+                }
+
+                if ( ret != -ENODEV )
+                    break;
+            }
+            break;
+
         case X86_VENDOR_HYGON:
             ret = IS_ENABLED(CONFIG_AMD) ? powernow_register_driver() : -ENODEV;
             break;
diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
index 917c395f58..4720c30e7e 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -574,6 +574,12 @@  ret_t do_platform_op(
 
         case XEN_PM_CPPC:
         {
+            if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CPPC) )
+            {
+                ret = -ENOSYS;
+                break;
+            }
+
             ret = set_cppc_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.u.cppc_data);
         }
         break;
diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index 3e3392da1b..54d554aa4f 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -84,7 +84,7 @@  static int __init cf_check setup_cpufreq_option(const char *str)
 
     if ( choice < 0 && !cmdline_strcmp(str, "dom0-kernel") )
     {
-        xen_processor_pmbits &= ~XEN_PROCESSOR_PM_PX;
+        xen_processor_pmbits &= ~(XEN_PROCESSOR_PM_PX|XEN_PROCESSOR_PM_CPPC);
         cpufreq_controller = FREQCTL_dom0_kernel;
         opt_dom0_vcpus_pin = 1;
         return 0;
@@ -92,7 +92,7 @@  static int __init cf_check setup_cpufreq_option(const char *str)
 
     if ( choice == 0 || !cmdline_strcmp(str, "none") )
     {
-        xen_processor_pmbits &= ~XEN_PROCESSOR_PM_PX;
+        xen_processor_pmbits &= ~(XEN_PROCESSOR_PM_PX|XEN_PROCESSOR_PM_CPPC);
         cpufreq_controller = FREQCTL_none;
         return 0;
     }
@@ -130,6 +130,15 @@  static int __init cf_check setup_cpufreq_option(const char *str)
             if ( arg[0] && arg[1] )
                 ret = hwp_cmdline_parse(arg + 1, end);
         }
+        else if ( choice < 0 && !cmdline_strcmp(str, "amd-pstate") )
+        {
+            xen_processor_pmbits |= XEN_PROCESSOR_PM_CPPC;
+            cpufreq_controller = FREQCTL_xen;
+            cpufreq_xen_opts[cpufreq_xen_cnt++] = CPUFREQ_amd_pstate;
+            ret = 0;
+            if ( arg[0] && arg[1] )
+                ret = amd_pstate_cmdline_parse(arg + 1, end);
+        }
         else
             ret = -EINVAL;
 
diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h
index 3f1b05a02e..71e8ca91f0 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -28,6 +28,7 @@  enum cpufreq_xen_opt {
     CPUFREQ_none,
     CPUFREQ_xen,
     CPUFREQ_hwp,
+    CPUFREQ_amd_pstate,
 };
 extern enum cpufreq_xen_opt cpufreq_xen_opts[2];
 extern unsigned int cpufreq_xen_cnt;
@@ -267,4 +268,7 @@  int set_hwp_para(struct cpufreq_policy *policy,
 
 int acpi_cpufreq_register(void);
 
+int amd_pstate_cmdline_parse(const char *s, const char *e);
+int amd_pstate_register_driver(void);
+
 #endif /* __XEN_CPUFREQ_PM_H__ */
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index be1cf9a12f..ad942f1775 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -357,6 +357,7 @@  DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t);
 #define XEN_PROCESSOR_PM_CX	1
 #define XEN_PROCESSOR_PM_PX	2
 #define XEN_PROCESSOR_PM_TX	4
+#define XEN_PROCESSOR_PM_CPPC	8
 
 /* cmd type */
 #define XEN_PM_CX   0
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index b0fec271d3..df4f362681 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -424,6 +424,7 @@  struct xen_set_cppc_para {
 };
 
 #define XEN_HWP_DRIVER_NAME "hwp"
+#define XEN_AMD_PSTATE_DRIVER_NAME "amd-pstate"
 
 /*
  * cpufreq para name of this structure named