diff mbox series

[09/13] xen: Add SET_CPUFREQ_HWP xen_sysctl_pm_op

Message ID 20210503192810.36084-10-jandryuk@gmail.com (mailing list archive)
State New, archived
Headers show
Series Intel Hardware P-States (HWP) support | expand

Commit Message

Jason Andryuk May 3, 2021, 7:28 p.m. UTC
Add SET_CPUFREQ_HWP xen_sysctl_pm_op to set HWP parameters.  The sysctl
supports setting multiple values simultaneously as indicated by the
set_params bits.  This allows atomically applying new HWP configuration
via a single wrmsr.

XEN_SYSCTL_HWP_SET_PRESET_BALANCE/PERFORMANCE/POWERSAVE provide three
common presets.  Setting them depends on hardware limits which the
hypervisor is already caching.  So using them allows skipping a
hypercall to query the limits (hw_lowest/highest) to then set those same
values.  The code is organized to allow a preset to be refined with
additional stuff if desired.

"most_efficient" and "guaranteed" could be additional presets in the
future, but the are not added now.  Those levels can change at runtime,
but we don't have code in place to monitor and update for those events.

Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
---
 xen/arch/x86/acpi/cpufreq/hwp.c    | 114 +++++++++++++++++++++++++++++
 xen/drivers/acpi/pmstat.c          |  24 ++++++
 xen/include/acpi/cpufreq/cpufreq.h |   2 +
 xen/include/public/sysctl.h        |  32 ++++++++
 4 files changed, 172 insertions(+)

Comments

Jan Beulich May 27, 2021, 8:33 a.m. UTC | #1
On 03.05.2021 21:28, Jason Andryuk wrote:
> --- a/xen/arch/x86/acpi/cpufreq/hwp.c
> +++ b/xen/arch/x86/acpi/cpufreq/hwp.c
> @@ -547,6 +547,120 @@ int get_hwp_para(struct cpufreq_policy *policy, struct xen_hwp_para *hwp_para)
>      return 0;
>  }
>  
> +int set_hwp_para(struct cpufreq_policy *policy,
> +                 struct xen_set_hwp_para *set_hwp)
> +{
> +    unsigned int cpu = policy->cpu;
> +    struct hwp_drv_data *data = hwp_drv_data[cpu];
> +
> +    if ( data == NULL )
> +        return -EINVAL;
> +
> +    /* Validate all parameters first */
> +    if ( set_hwp->set_params & ~XEN_SYSCTL_HWP_SET_PARAM_MASK )
> +    {
> +        hwp_err("Invalid bits in hwp set_params %u\n",
> +                set_hwp->set_params);
> +
> +        return -EINVAL;
> +    }
> +
> +    if ( set_hwp->activity_window & ~XEN_SYSCTL_HWP_ACT_WINDOW_MASK )
> +    {
> +        hwp_err("Invalid bits in activity window %u\n",
> +                set_hwp->activity_window);
> +
> +        return -EINVAL;
> +    }
> +
> +    if ( !feature_hwp_energy_perf &&
> +         set_hwp->set_params & XEN_SYSCTL_HWP_SET_ENERGY_PERF &&

Please add parentheses around the operands of & here and ...

> +         set_hwp->energy_perf > 0xf )
> +    {
> +        hwp_err("energy_perf %u out of range for IA32_ENERGY_PERF_BIAS\n",
> +                set_hwp->energy_perf);
> +
> +        return -EINVAL;
> +    }
> +
> +    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_DESIRED &&

... here.

> +         set_hwp->desired != 0 &&
> +         ( set_hwp->desired < data->hw_lowest ||
> +           set_hwp->desired > data->hw_highest ) )

Excess blanks inside the inner pair of parentheses.

> +    {
> +        hwp_err("hwp desired %u is out of range (%u ... %u)\n",
> +                set_hwp->desired, data->hw_lowest, data->hw_highest);
> +
> +        return -EINVAL;
> +    }

None of these -EINVAL should be accompanied by a hwp_err, imo.

> +    /*
> +     * minimum & maximum are not validated as hardware doesn't seem to care
> +     * and the SDM says CPUs will clip internally.
> +     */
> +
> +    /* Apply presets */
> +    switch ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_PRESET_MASK )
> +    {
> +    case XEN_SYSCTL_HWP_SET_PRESET_POWERSAVE:
> +        data->minimum = data->hw_lowest;
> +        data->maximum = data->hw_lowest;
> +        data->activity_window = 0;
> +        if ( feature_hwp_energy_perf )
> +            data->energy_perf = 0xff;
> +        else
> +            data->energy_perf = 0xf;

There may want to be constants #define-d for these, and ...

> +        data->desired = 0;
> +        break;
> +    case XEN_SYSCTL_HWP_SET_PRESET_PERFORMANCE:
> +        data->minimum = data->hw_highest;
> +        data->maximum = data->hw_highest;
> +        data->activity_window = 0;
> +        data->energy_perf = 0;
> +        data->desired = 0;
> +        break;
> +    case XEN_SYSCTL_HWP_SET_PRESET_BALANCE:
> +        data->minimum = data->hw_lowest;
> +        data->maximum = data->hw_highest;
> +        data->activity_window = 0;
> +        data->energy_perf = 0x80;
> +        if ( feature_hwp_energy_perf )
> +            data->energy_perf = 0x80;
> +        else
> +            data->energy_perf = 0x7;

... since these aren't the sole instances of these kind of magic
numbers there surely want to be #define-s for these (such that
the connection between the two [or more?] instances becomes
visible). Actually, the same applies to the 0xf further up, which
has a second use yet a few more lines up.

> +        data->desired = 0;
> +        break;
> +    case XEN_SYSCTL_HWP_SET_PRESET_NONE:
> +        break;
> +    default:
> +        printk("HWP: Invalid preset value: %u\n",
> +               set_hwp->set_params & XEN_SYSCTL_HWP_SET_PRESET_MASK);
> +
> +        return -EINVAL;
> +    }

For the entire switch() - please have blank lines between (non-fall-
through, which here is all of them) case blocks.

> --- a/xen/drivers/acpi/pmstat.c
> +++ b/xen/drivers/acpi/pmstat.c
> @@ -318,6 +318,24 @@ static int set_cpufreq_gov(struct xen_sysctl_pm_op *op)
>      return __cpufreq_set_policy(old_policy, &new_policy);
>  }
>  
> +static int set_cpufreq_hwp(struct xen_sysctl_pm_op *op)
> +{
> +    struct cpufreq_policy *policy;
> +
> +    if ( !cpufreq_governor_internal )
> +        return -EINVAL;
> +
> +    policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
> +
> +    if ( !policy || !policy->governor )
> +        return -EINVAL;
> +
> +    if ( strncasecmp(policy->governor->name, "hwp-internal", CPUFREQ_NAME_LEN) )

I think this recurring string literal also wants to at least gain
a #define.

> @@ -465,6 +483,12 @@ int do_pm_op(struct xen_sysctl_pm_op *op)
>          break;
>      }
>  
> +    case SET_CPUFREQ_HWP:
> +    {
> +        ret = set_cpufreq_hwp(op);
> +        break;
> +    }
> +
>      case SET_CPUFREQ_PARA:
>      {
>          ret = set_cpufreq_para(op);

I think you want to insert somewhere below this one and, despite all
the odd precedents, omit the stray braces.

> --- a/xen/include/acpi/cpufreq/cpufreq.h
> +++ b/xen/include/acpi/cpufreq/cpufreq.h
> @@ -248,5 +248,7 @@ void cpufreq_dbs_timer_resume(void);
>  
>  /********************** hwp hypercall helper *************************/
>  int get_hwp_para(struct cpufreq_policy *policy, struct xen_hwp_para *hwp_para);
> +int set_hwp_para(struct cpufreq_policy *policy,
> +                 struct xen_set_hwp_para *set_hwp);

This renders the comment stale - the patch introducing it probably
can use plural right away.

> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -318,6 +318,36 @@ struct xen_hwp_para {
>      uint8_t energy_perf;
>  };
>  
> +/* set multiple values simultaneously when set_args bit is set */
> +struct xen_set_hwp_para {
> +    uint16_t set_params; /* bitflags for valid values */
> +#define XEN_SYSCTL_HWP_SET_DESIRED              (1U << 0)
> +#define XEN_SYSCTL_HWP_SET_ENERGY_PERF          (1U << 1)
> +#define XEN_SYSCTL_HWP_SET_ACT_WINDOW           (1U << 2)
> +#define XEN_SYSCTL_HWP_SET_MINIMUM              (1U << 3)
> +#define XEN_SYSCTL_HWP_SET_MAXIMUM              (1U << 4)
> +#define XEN_SYSCTL_HWP_SET_PRESET_MASK          (0xf000)
> +#define XEN_SYSCTL_HWP_SET_PRESET_NONE          (0x0000)
> +#define XEN_SYSCTL_HWP_SET_PRESET_BALANCE       (0x1000)
> +#define XEN_SYSCTL_HWP_SET_PRESET_POWERSAVE     (0x2000)
> +#define XEN_SYSCTL_HWP_SET_PRESET_PERFORMANCE   (0x3000)

Personally I'd prefer unnecessary parentheses (like around single
tokens) to be omitted.

> +#define XEN_SYSCTL_HWP_SET_PARAM_MASK ((uint16_t)( \

What's the reason for this cast? Wherever possible #define-d
constants should be suitable for use in preprocessor conditionals.

> +                                  XEN_SYSCTL_HWP_SET_PRESET_MASK | \
> +                                  XEN_SYSCTL_HWP_SET_DESIRED     | \
> +                                  XEN_SYSCTL_HWP_SET_ENERGY_PERF | \
> +                                  XEN_SYSCTL_HWP_SET_ACT_WINDOW  | \
> +                                  XEN_SYSCTL_HWP_SET_MINIMUM     | \
> +                                  XEN_SYSCTL_HWP_SET_MAXIMUM     ))
> +
> +    uint16_t activity_window; /* 7bit mantissa and 3bit exponent */

Since the other respective comment is to be extended, perhaps here
you can simply refer to that one?

> +#define XEN_SYSCTL_HWP_ACT_WINDOW_MASK          (0x03ff)
> +    uint8_t minimum;
> +    uint8_t maximum;
> +    uint8_t desired;
> +    uint8_t energy_perf; /* 0-255 or 0-15 depending on HW support */
> +};
> +
> +

No double blank lines please.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/acpi/cpufreq/hwp.c b/xen/arch/x86/acpi/cpufreq/hwp.c
index 92222d6d85..0fd70d76a8 100644
--- a/xen/arch/x86/acpi/cpufreq/hwp.c
+++ b/xen/arch/x86/acpi/cpufreq/hwp.c
@@ -547,6 +547,120 @@  int get_hwp_para(struct cpufreq_policy *policy, struct xen_hwp_para *hwp_para)
     return 0;
 }
 
+int set_hwp_para(struct cpufreq_policy *policy,
+                 struct xen_set_hwp_para *set_hwp)
+{
+    unsigned int cpu = policy->cpu;
+    struct hwp_drv_data *data = hwp_drv_data[cpu];
+
+    if ( data == NULL )
+        return -EINVAL;
+
+    /* Validate all parameters first */
+    if ( set_hwp->set_params & ~XEN_SYSCTL_HWP_SET_PARAM_MASK )
+    {
+        hwp_err("Invalid bits in hwp set_params %u\n",
+                set_hwp->set_params);
+
+        return -EINVAL;
+    }
+
+    if ( set_hwp->activity_window & ~XEN_SYSCTL_HWP_ACT_WINDOW_MASK )
+    {
+        hwp_err("Invalid bits in activity window %u\n",
+                set_hwp->activity_window);
+
+        return -EINVAL;
+    }
+
+    if ( !feature_hwp_energy_perf &&
+         set_hwp->set_params & XEN_SYSCTL_HWP_SET_ENERGY_PERF &&
+         set_hwp->energy_perf > 0xf )
+    {
+        hwp_err("energy_perf %u out of range for IA32_ENERGY_PERF_BIAS\n",
+                set_hwp->energy_perf);
+
+        return -EINVAL;
+    }
+
+    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_DESIRED &&
+         set_hwp->desired != 0 &&
+         ( set_hwp->desired < data->hw_lowest ||
+           set_hwp->desired > data->hw_highest ) )
+    {
+        hwp_err("hwp desired %u is out of range (%u ... %u)\n",
+                set_hwp->desired, data->hw_lowest, data->hw_highest);
+
+        return -EINVAL;
+    }
+
+    /*
+     * minimum & maximum are not validated as hardware doesn't seem to care
+     * and the SDM says CPUs will clip internally.
+     */
+
+    /* Apply presets */
+    switch ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_PRESET_MASK )
+    {
+    case XEN_SYSCTL_HWP_SET_PRESET_POWERSAVE:
+        data->minimum = data->hw_lowest;
+        data->maximum = data->hw_lowest;
+        data->activity_window = 0;
+        if ( feature_hwp_energy_perf )
+            data->energy_perf = 0xff;
+        else
+            data->energy_perf = 0xf;
+        data->desired = 0;
+        break;
+    case XEN_SYSCTL_HWP_SET_PRESET_PERFORMANCE:
+        data->minimum = data->hw_highest;
+        data->maximum = data->hw_highest;
+        data->activity_window = 0;
+        data->energy_perf = 0;
+        data->desired = 0;
+        break;
+    case XEN_SYSCTL_HWP_SET_PRESET_BALANCE:
+        data->minimum = data->hw_lowest;
+        data->maximum = data->hw_highest;
+        data->activity_window = 0;
+        data->energy_perf = 0x80;
+        if ( feature_hwp_energy_perf )
+            data->energy_perf = 0x80;
+        else
+            data->energy_perf = 0x7;
+        data->desired = 0;
+        break;
+    case XEN_SYSCTL_HWP_SET_PRESET_NONE:
+        break;
+    default:
+        printk("HWP: Invalid preset value: %u\n",
+               set_hwp->set_params & XEN_SYSCTL_HWP_SET_PRESET_MASK);
+
+        return -EINVAL;
+    }
+
+    /* Further customize presets if needed */
+    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_MINIMUM )
+        data->minimum = set_hwp->minimum;
+
+    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_MAXIMUM )
+        data->maximum = set_hwp->maximum;
+
+    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_ENERGY_PERF )
+        data->energy_perf = set_hwp->energy_perf;
+
+    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_DESIRED )
+        data->desired = set_hwp->desired;
+
+    if ( set_hwp->set_params & XEN_SYSCTL_HWP_SET_ACT_WINDOW )
+        data->activity_window = set_hwp->activity_window &
+                                XEN_SYSCTL_HWP_ACT_WINDOW_MASK;
+
+    hwp_cpufreq_target(policy, 0, 0);
+
+    return 0;
+}
+
 int hwp_register_driver(void)
 {
     int ret;
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index 3e35c42949..016b0445ec 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -318,6 +318,24 @@  static int set_cpufreq_gov(struct xen_sysctl_pm_op *op)
     return __cpufreq_set_policy(old_policy, &new_policy);
 }
 
+static int set_cpufreq_hwp(struct xen_sysctl_pm_op *op)
+{
+    struct cpufreq_policy *policy;
+
+    if ( !cpufreq_governor_internal )
+        return -EINVAL;
+
+    policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
+
+    if ( !policy || !policy->governor )
+        return -EINVAL;
+
+    if ( strncasecmp(policy->governor->name, "hwp-internal", CPUFREQ_NAME_LEN) )
+        return -EINVAL;
+
+    return set_hwp_para(policy, &op->u.set_hwp);
+}
+
 static int set_cpufreq_para(struct xen_sysctl_pm_op *op)
 {
     int ret = 0;
@@ -465,6 +483,12 @@  int do_pm_op(struct xen_sysctl_pm_op *op)
         break;
     }
 
+    case SET_CPUFREQ_HWP:
+    {
+        ret = set_cpufreq_hwp(op);
+        break;
+    }
+
     case SET_CPUFREQ_PARA:
     {
         ret = set_cpufreq_para(op);
diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h
index 42146ca2cf..7ff7d0d4bb 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -248,5 +248,7 @@  void cpufreq_dbs_timer_resume(void);
 
 /********************** hwp hypercall helper *************************/
 int get_hwp_para(struct cpufreq_policy *policy, struct xen_hwp_para *hwp_para);
+int set_hwp_para(struct cpufreq_policy *policy,
+                 struct xen_set_hwp_para *set_hwp);
 
 #endif /* __XEN_CPUFREQ_PM_H__ */
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 1a6c6397ea..3f18a3d522 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -318,6 +318,36 @@  struct xen_hwp_para {
     uint8_t energy_perf;
 };
 
+/* set multiple values simultaneously when set_args bit is set */
+struct xen_set_hwp_para {
+    uint16_t set_params; /* bitflags for valid values */
+#define XEN_SYSCTL_HWP_SET_DESIRED              (1U << 0)
+#define XEN_SYSCTL_HWP_SET_ENERGY_PERF          (1U << 1)
+#define XEN_SYSCTL_HWP_SET_ACT_WINDOW           (1U << 2)
+#define XEN_SYSCTL_HWP_SET_MINIMUM              (1U << 3)
+#define XEN_SYSCTL_HWP_SET_MAXIMUM              (1U << 4)
+#define XEN_SYSCTL_HWP_SET_PRESET_MASK          (0xf000)
+#define XEN_SYSCTL_HWP_SET_PRESET_NONE          (0x0000)
+#define XEN_SYSCTL_HWP_SET_PRESET_BALANCE       (0x1000)
+#define XEN_SYSCTL_HWP_SET_PRESET_POWERSAVE     (0x2000)
+#define XEN_SYSCTL_HWP_SET_PRESET_PERFORMANCE   (0x3000)
+#define XEN_SYSCTL_HWP_SET_PARAM_MASK ((uint16_t)( \
+                                  XEN_SYSCTL_HWP_SET_PRESET_MASK | \
+                                  XEN_SYSCTL_HWP_SET_DESIRED     | \
+                                  XEN_SYSCTL_HWP_SET_ENERGY_PERF | \
+                                  XEN_SYSCTL_HWP_SET_ACT_WINDOW  | \
+                                  XEN_SYSCTL_HWP_SET_MINIMUM     | \
+                                  XEN_SYSCTL_HWP_SET_MAXIMUM     ))
+
+    uint16_t activity_window; /* 7bit mantissa and 3bit exponent */
+#define XEN_SYSCTL_HWP_ACT_WINDOW_MASK          (0x03ff)
+    uint8_t minimum;
+    uint8_t maximum;
+    uint8_t desired;
+    uint8_t energy_perf; /* 0-255 or 0-15 depending on HW support */
+};
+
+
 /*
  * cpufreq para name of this structure named
  * same as sysfs file name of native linux
@@ -379,6 +409,7 @@  struct xen_sysctl_pm_op {
     #define SET_CPUFREQ_GOV            (CPUFREQ_PARA | 0x02)
     #define SET_CPUFREQ_PARA           (CPUFREQ_PARA | 0x03)
     #define GET_CPUFREQ_AVGFREQ        (CPUFREQ_PARA | 0x04)
+    #define SET_CPUFREQ_HWP            (CPUFREQ_PARA | 0x05)
 
     /* set/reset scheduler power saving option */
     #define XEN_SYSCTL_pm_op_set_sched_opt_smt    0x21
@@ -405,6 +436,7 @@  struct xen_sysctl_pm_op {
         struct xen_get_cpufreq_para get_para;
         struct xen_set_cpufreq_gov  set_gov;
         struct xen_set_cpufreq_para set_para;
+        struct xen_set_hwp_para     set_hwp;
         uint64_aligned_t get_avgfreq;
         uint32_t                    set_sched_opt_smt;
 #define XEN_SYSCTL_CX_UNLIMITED 0xffffffff