diff mbox series

[v4,09/15] cpufreq: Export HWP parameters to userspace as CPPC

Message ID 20230614180253.89958-10-jandryuk@gmail.com (mailing list archive)
State Superseded
Headers show
Series Intel Hardware P-States (HWP) support | expand

Commit Message

Jason Andryuk June 14, 2023, 6:02 p.m. UTC
Extend xen_get_cpufreq_para to return hwp parameters.  HWP is an
implementation of ACPI CPPC (Collaborative Processor Performance
Control).  Use the CPPC name since that might be useful in the future
for AMD P-state.

We need the features bitmask to indicate fields supported by the actual
hardware - this only applies to activity window for the time being.

The HWP most_efficient is mapped to CPPC lowest_nonlinear, and guaranteed is
mapped to nominal.  CPPC has a guaranteed that is optional while nominal
is required.  ACPI spec says "If this register is not implemented, OSPM
assumes guaranteed performance is always equal to nominal performance."

The use of uint8_t parameters matches the hardware size.  uint32_t
entries grows the sysctl_t past the build assertion in setup.c.  The
uint8_t ranges are supported across multiple generations, so hopefully
they won't change.

Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
---
v2:
Style fixes
Don't bump XEN_SYSCTL_INTERFACE_VERSION
Drop cpufreq.h comment divider
Expand xen_hwp_para comment
Add HWP activity window mantissa/exponent defines
Handle union rename
Add const to get_hwp_para
Remove hw_ prefix from xen_hwp_para members
Use XEN_HWP_GOVERNOR
Use per_cpu for hwp_drv_data

v4:
Fixup for opt_cpufreq_hwp/hdc removal
get_hwp_para() takes cpu as arg
XEN_ prefix HWP_ACT_WINDOW_*
Drop HWP_ACT_WINDOW_EXPONENT_SHIFT - shift MASK
Remove Energy Bias (0-15) EPP fallback
Rename xen_hwp_para to xen_cppc_para
s/hwp/cppc/
Use scaling driver to switch output
---
 xen/arch/x86/acpi/cpufreq/hwp.c    | 23 +++++++++
 xen/drivers/acpi/pmstat.c          | 78 ++++++++++++++++--------------
 xen/include/acpi/cpufreq/cpufreq.h |  2 +
 xen/include/public/sysctl.h        | 56 +++++++++++++++++++++
 4 files changed, 123 insertions(+), 36 deletions(-)

Comments

Jan Beulich June 19, 2023, 2:24 p.m. UTC | #1
On 14.06.2023 20:02, Jason Andryuk wrote:
> --- a/xen/arch/x86/acpi/cpufreq/hwp.c
> +++ b/xen/arch/x86/acpi/cpufreq/hwp.c
> @@ -537,6 +537,29 @@ static const struct cpufreq_driver __initconstrel hwp_cpufreq_driver =
>      .update = hwp_cpufreq_update,
>  };
>  
> +int get_hwp_para(const unsigned int cpu,
> +                 struct xen_cppc_para *cppc_para)
> +{
> +    const struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
> +
> +    if ( data == NULL )
> +        return -EINVAL;

Maybe better -ENODATA in this case?

> --- a/xen/drivers/acpi/pmstat.c
> +++ b/xen/drivers/acpi/pmstat.c
> @@ -251,48 +251,54 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
>      else
>          strlcpy(op->u.get_para.scaling_driver, "Unknown", CPUFREQ_NAME_LEN);
>  
> -    if ( !(scaling_available_governors =
> -           xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
> -        return -ENOMEM;
> -    if ( (ret = read_scaling_available_governors(
> -                    scaling_available_governors,
> -                    gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
> +    if ( !strncasecmp(op->u.get_para.scaling_driver, XEN_HWP_DRIVER,
> +                      CPUFREQ_NAME_LEN) )

Mind me asking why you think case-insensitive compare is appropriate here?

> +        ret = get_hwp_para(policy->cpu, &op->u.get_para.u.cppc_para);
> +    else
>      {
> +        if ( !(scaling_available_governors =
> +               xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
> +            return -ENOMEM;
> +        if ( (ret = read_scaling_available_governors(
> +                        scaling_available_governors,
> +                        gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )

I realize you only re-indent this, but since you need to touch it anyway,
may I suggest to also switch to siezof(*scaling_available_governors)?

> +        {
> +            xfree(scaling_available_governors);
> +            return ret;
> +        }
> +        ret = copy_to_guest(op->u.get_para.scaling_available_governors,
> +                    scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);

Similarly here: Please adjust indentation while you touch this code.

>          xfree(scaling_available_governors);
> -        return ret;
> -    }
> -    ret = copy_to_guest(op->u.get_para.scaling_available_governors,
> -                scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);
> -    xfree(scaling_available_governors);
> -    if ( ret )
> -        return ret;
> +        if ( ret )
> +            return ret;
>  
> -    op->u.get_para.u.s.scaling_cur_freq = policy->cur;
> -    op->u.get_para.u.s.scaling_max_freq = policy->max;
> -    op->u.get_para.u.s.scaling_min_freq = policy->min;
> +        op->u.get_para.u.s.scaling_cur_freq = policy->cur;
> +        op->u.get_para.u.s.scaling_max_freq = policy->max;
> +        op->u.get_para.u.s.scaling_min_freq = policy->min;
>  
> -    if ( policy->governor->name[0] )
> -        strlcpy(op->u.get_para.u.s.scaling_governor,
> -            policy->governor->name, CPUFREQ_NAME_LEN);
> -    else
> -        strlcpy(op->u.get_para.u.s.scaling_governor, "Unknown",
> -                CPUFREQ_NAME_LEN);
> +        if ( policy->governor->name[0] )
> +            strlcpy(op->u.get_para.u.s.scaling_governor,
> +                policy->governor->name, CPUFREQ_NAME_LEN);
> +        else
> +            strlcpy(op->u.get_para.u.s.scaling_governor, "Unknown",
> +                    CPUFREQ_NAME_LEN);
>  
> -    /* governor specific para */
> -    if ( !strncasecmp(op->u.get_para.u.s.scaling_governor,
> -                      "userspace", CPUFREQ_NAME_LEN) )
> -    {
> -        op->u.get_para.u.s.u.userspace.scaling_setspeed = policy->cur;
> -    }
> +        /* governor specific para */
> +        if ( !strncasecmp(op->u.get_para.u.s.scaling_governor,
> +                          "userspace", CPUFREQ_NAME_LEN) )
> +        {
> +            op->u.get_para.u.s.u.userspace.scaling_setspeed = policy->cur;
> +        }

Would also be nice if you could get rid of the unnecessary braces here
at this occasion.

> --- a/xen/include/acpi/cpufreq/cpufreq.h
> +++ b/xen/include/acpi/cpufreq/cpufreq.h
> @@ -248,5 +248,7 @@ void intel_feature_detect(struct cpufreq_policy *policy);
>  
>  extern bool __initdata opt_cpufreq_hwp;
>  int hwp_cmdline_parse(const char *s);
> +int get_hwp_para(const unsigned int cpu,

I think we generally avoid const when it's not a pointed-to type. It's
not useful at all in a declaration.

> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -296,6 +296,61 @@ struct xen_ondemand {
>      uint32_t up_threshold;
>  };
>  
> +struct xen_cppc_para {
> +    /* OUT */
> +    /* activity_window supported if 1 */
> +#define XEN_SYSCTL_CPPC_FEAT_ACT_WINDOW  (1 << 0)

I think 1 isn't very helpful, looking forward. Perhaps better "set" or
"flag set"?

> +    uint32_t features; /* bit flags for features */
> +    /*
> +     * See Intel SDM: HWP Performance Range and Dynamic Capabilities
> +     *
> +     * These four are 0-255 hardware-provided values.  They "continuous,
> +     * abstract unit-less, performance" values.  smaller numbers are slower
> +     * and larger ones are faster.
> +     */
> +    uint32_t lowest;
> +    uint32_t lowest_nonlinear; /* most_efficient */

Why non_linear in the external interface when internally you use
most_efficient (merely put in the comment here)?

> +    uint32_t nominal; /* guaranteed */

Similar question for the name choice here.

> +    uint32_t highest;
> +    /*
> +     * See Intel SDM: IA32_HWP_REQUEST MSR (Address: 774H Logical Processor
> +     * Scope)
> +     *
> +     * These are all hints, and the processor may deviate outside of them.
> +     * Values below are 0-255.
> +     *
> +     * minimum and maximum can be set to the above hardware values to constrain
> +     * operation.  The full range 0-255 is accepted and will be clipped by
> +     * hardware.
> +     */
> +    uint32_t minimum;
> +    uint32_t maximum;
> +    /*
> +     * Set an explicit performance hint, disabling hardware selection.
> +     * 0 lets the hardware decide.
> +     */
> +    uint32_t desired;

"Set" kind of conflicts with all fields being marked as OUT above. I think
the word can simply be dropped?

Jan
Jason Andryuk June 20, 2023, 6:41 p.m. UTC | #2
On Mon, Jun 19, 2023 at 10:24 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 14.06.2023 20:02, Jason Andryuk wrote:
> > --- a/xen/arch/x86/acpi/cpufreq/hwp.c
> > +++ b/xen/arch/x86/acpi/cpufreq/hwp.c
> > @@ -537,6 +537,29 @@ static const struct cpufreq_driver __initconstrel hwp_cpufreq_driver =
> >      .update = hwp_cpufreq_update,
> >  };
> >
> > +int get_hwp_para(const unsigned int cpu,
> > +                 struct xen_cppc_para *cppc_para)
> > +{
> > +    const struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
> > +
> > +    if ( data == NULL )
> > +        return -EINVAL;
>
> Maybe better -ENODATA in this case?

Sounds good.

> > --- a/xen/drivers/acpi/pmstat.c
> > +++ b/xen/drivers/acpi/pmstat.c
> > @@ -251,48 +251,54 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
> >      else
> >          strlcpy(op->u.get_para.scaling_driver, "Unknown", CPUFREQ_NAME_LEN);
> >
> > -    if ( !(scaling_available_governors =
> > -           xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
> > -        return -ENOMEM;
> > -    if ( (ret = read_scaling_available_governors(
> > -                    scaling_available_governors,
> > -                    gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
> > +    if ( !strncasecmp(op->u.get_para.scaling_driver, XEN_HWP_DRIVER,
> > +                      CPUFREQ_NAME_LEN) )
>
> Mind me asking why you think case-insensitive compare is appropriate here?

I'll change to strncmp().  All the other string comparisons on
pmstat.c are strncasecmp, so I followed that pattern.

> > +        ret = get_hwp_para(policy->cpu, &op->u.get_para.u.cppc_para);
> > +    else
> >      {
> > +        if ( !(scaling_available_governors =
> > +               xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
> > +            return -ENOMEM;
> > +        if ( (ret = read_scaling_available_governors(
> > +                        scaling_available_governors,
> > +                        gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
>
> I realize you only re-indent this, but since you need to touch it anyway,
> may I suggest to also switch to siezof(*scaling_available_governors)?

How about dropping sizeof(*scaling_available_governors)?  This length ...

> > +        {
> > +            xfree(scaling_available_governors);
> > +            return ret;
> > +        }
> > +        ret = copy_to_guest(op->u.get_para.scaling_available_governors,
> > +                    scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);
>
> Similarly here: Please adjust indentation while you touch this code.

... should match here, but this second one lacks the "* sizeof($foo)".
They are strings, so multiplying by sizeof() is unusual.

FTAOD, you want the indenting as:
        ret = copy_to_guest(op->u.get_para.scaling_available_governors,
                            scaling_available_governors,
                            gov_num * CPUFREQ_NAME_LEN);
?

> >          xfree(scaling_available_governors);
> > -        return ret;
> > -    }
> > -    ret = copy_to_guest(op->u.get_para.scaling_available_governors,
> > -                scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);
> > -    xfree(scaling_available_governors);
> > -    if ( ret )
> > -        return ret;
> > +        if ( ret )
> > +            return ret;
> >
> > -    op->u.get_para.u.s.scaling_cur_freq = policy->cur;
> > -    op->u.get_para.u.s.scaling_max_freq = policy->max;
> > -    op->u.get_para.u.s.scaling_min_freq = policy->min;
> > +        op->u.get_para.u.s.scaling_cur_freq = policy->cur;
> > +        op->u.get_para.u.s.scaling_max_freq = policy->max;
> > +        op->u.get_para.u.s.scaling_min_freq = policy->min;
> >
> > -    if ( policy->governor->name[0] )
> > -        strlcpy(op->u.get_para.u.s.scaling_governor,
> > -            policy->governor->name, CPUFREQ_NAME_LEN);
> > -    else
> > -        strlcpy(op->u.get_para.u.s.scaling_governor, "Unknown",
> > -                CPUFREQ_NAME_LEN);
> > +        if ( policy->governor->name[0] )
> > +            strlcpy(op->u.get_para.u.s.scaling_governor,
> > +                policy->governor->name, CPUFREQ_NAME_LEN);
> > +        else
> > +            strlcpy(op->u.get_para.u.s.scaling_governor, "Unknown",
> > +                    CPUFREQ_NAME_LEN);
> >
> > -    /* governor specific para */
> > -    if ( !strncasecmp(op->u.get_para.u.s.scaling_governor,
> > -                      "userspace", CPUFREQ_NAME_LEN) )
> > -    {
> > -        op->u.get_para.u.s.u.userspace.scaling_setspeed = policy->cur;
> > -    }
> > +        /* governor specific para */
> > +        if ( !strncasecmp(op->u.get_para.u.s.scaling_governor,
> > +                          "userspace", CPUFREQ_NAME_LEN) )
> > +        {
> > +            op->u.get_para.u.s.u.userspace.scaling_setspeed = policy->cur;
> > +        }
>
> Would also be nice if you could get rid of the unnecessary braces here
> at this occasion.

Sure

> > --- a/xen/include/acpi/cpufreq/cpufreq.h
> > +++ b/xen/include/acpi/cpufreq/cpufreq.h
> > @@ -248,5 +248,7 @@ void intel_feature_detect(struct cpufreq_policy *policy);
> >
> >  extern bool __initdata opt_cpufreq_hwp;
> >  int hwp_cmdline_parse(const char *s);
> > +int get_hwp_para(const unsigned int cpu,
>
> I think we generally avoid const when it's not a pointed-to type. It's
> not useful at all in a declaration.

Ok

> > --- a/xen/include/public/sysctl.h
> > +++ b/xen/include/public/sysctl.h
> > @@ -296,6 +296,61 @@ struct xen_ondemand {
> >      uint32_t up_threshold;
> >  };
> >
> > +struct xen_cppc_para {
> > +    /* OUT */
> > +    /* activity_window supported if 1 */
> > +#define XEN_SYSCTL_CPPC_FEAT_ACT_WINDOW  (1 << 0)
>
> I think 1 isn't very helpful, looking forward. Perhaps better "set" or
> "flag set"?

"set" works for me.

> > +    uint32_t features; /* bit flags for features */
> > +    /*
> > +     * See Intel SDM: HWP Performance Range and Dynamic Capabilities
> > +     *
> > +     * These four are 0-255 hardware-provided values.  They "continuous,
> > +     * abstract unit-less, performance" values.  smaller numbers are slower
> > +     * and larger ones are faster.
> > +     */
> > +    uint32_t lowest;
> > +    uint32_t lowest_nonlinear; /* most_efficient */
>
> Why non_linear in the external interface when internally you use
> most_efficient (merely put in the comment here)?
>
> > +    uint32_t nominal; /* guaranteed */
>
> Similar question for the name choice here.

There is a naming mismatch between the HWP fields and the CPPC fields.
The commit message includes:
The HWP most_efficient is mapped to CPPC lowest_nonlinear, and guaranteed is
mapped to nominal.  CPPC has a guaranteed that is optional while nominal
is required.  ACPI spec says "If this register is not implemented, OSPM
assumes guaranteed performance is always equal to nominal performance."

So the comments were to help with the mapping.  Should I prefix the
comments like "HWP: most_efficient"?

> > +    uint32_t highest;
> > +    /*
> > +     * See Intel SDM: IA32_HWP_REQUEST MSR (Address: 774H Logical Processor
> > +     * Scope)
> > +     *
> > +     * These are all hints, and the processor may deviate outside of them.
> > +     * Values below are 0-255.
> > +     *
> > +     * minimum and maximum can be set to the above hardware values to constrain
> > +     * operation.  The full range 0-255 is accepted and will be clipped by
> > +     * hardware.
> > +     */
> > +    uint32_t minimum;
> > +    uint32_t maximum;
> > +    /*
> > +     * Set an explicit performance hint, disabling hardware selection.
> > +     * 0 lets the hardware decide.
> > +     */
> > +    uint32_t desired;
>
> "Set" kind of conflicts with all fields being marked as OUT above. I think
> the word can simply be dropped?

Sounds good.

Thanks,
Jason
Jan Beulich June 21, 2023, 8:12 a.m. UTC | #3
On 20.06.2023 20:41, Jason Andryuk wrote:
> On Mon, Jun 19, 2023 at 10:24 AM Jan Beulich <jbeulich@suse.com> wrote:
>> On 14.06.2023 20:02, Jason Andryuk wrote:
>>> +    else
>>>      {
>>> +        if ( !(scaling_available_governors =
>>> +               xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
>>> +            return -ENOMEM;
>>> +        if ( (ret = read_scaling_available_governors(
>>> +                        scaling_available_governors,
>>> +                        gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
>>
>> I realize you only re-indent this, but since you need to touch it anyway,
>> may I suggest to also switch to siezof(*scaling_available_governors)?
> 
> How about dropping sizeof(*scaling_available_governors)?  This length ...
> 
>>> +        {
>>> +            xfree(scaling_available_governors);
>>> +            return ret;
>>> +        }
>>> +        ret = copy_to_guest(op->u.get_para.scaling_available_governors,
>>> +                    scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);
>>
>> Similarly here: Please adjust indentation while you touch this code.
> 
> ... should match here, but this second one lacks the "* sizeof($foo)".

Indeed it does, because that multiplication happens inside copy_to_guest()
(really copy_to_guest_offset()).

> They are strings, so multiplying by sizeof() is unusual.

Kind of, but in code which may want switching to Unicode (and not just
UTF-8,but e.g. UCS2 or UCS4) at some point it's a good idea to have such
right away. We don't mean to do any such switch, but I think it's good
practice to not assume that strings / string literals only ever consist
of plain char elements.

> FTAOD, you want the indenting as:
>         ret = copy_to_guest(op->u.get_para.scaling_available_governors,
>                             scaling_available_governors,
>                             gov_num * CPUFREQ_NAME_LEN);
> ?

That's one conforming way, yes. Another would be

        ret = copy_to_guest(
                  op->u.get_para.scaling_available_governors,
                  scaling_available_governors,
                  gov_num * CPUFREQ_NAME_LEN);

>>> --- a/xen/include/public/sysctl.h
>>> +++ b/xen/include/public/sysctl.h
>>> @@ -296,6 +296,61 @@ struct xen_ondemand {
>>>      uint32_t up_threshold;
>>>  };
>>>
>>> +struct xen_cppc_para {
>>> +    /* OUT */
>>> +    /* activity_window supported if 1 */
>>> +#define XEN_SYSCTL_CPPC_FEAT_ACT_WINDOW  (1 << 0)
>>
>> I think 1 isn't very helpful, looking forward. Perhaps better "set" or
>> "flag set"?
> 
> "set" works for me.
> 
>>> +    uint32_t features; /* bit flags for features */
>>> +    /*
>>> +     * See Intel SDM: HWP Performance Range and Dynamic Capabilities
>>> +     *
>>> +     * These four are 0-255 hardware-provided values.  They "continuous,
>>> +     * abstract unit-less, performance" values.  smaller numbers are slower
>>> +     * and larger ones are faster.
>>> +     */
>>> +    uint32_t lowest;
>>> +    uint32_t lowest_nonlinear; /* most_efficient */
>>
>> Why non_linear in the external interface when internally you use
>> most_efficient (merely put in the comment here)?
>>
>>> +    uint32_t nominal; /* guaranteed */
>>
>> Similar question for the name choice here.
> 
> There is a naming mismatch between the HWP fields and the CPPC fields.
> The commit message includes:
> The HWP most_efficient is mapped to CPPC lowest_nonlinear, and guaranteed is
> mapped to nominal.  CPPC has a guaranteed that is optional while nominal
> is required.  ACPI spec says "If this register is not implemented, OSPM
> assumes guaranteed performance is always equal to nominal performance."
> 
> So the comments were to help with the mapping.  Should I prefix the
> comments like "HWP: most_efficient"?

Yes please. (I was going to suggest exactly that when I hadn't read this
question, yet.)

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/acpi/cpufreq/hwp.c b/xen/arch/x86/acpi/cpufreq/hwp.c
index 5f210b54ff..86c5793266 100644
--- a/xen/arch/x86/acpi/cpufreq/hwp.c
+++ b/xen/arch/x86/acpi/cpufreq/hwp.c
@@ -537,6 +537,29 @@  static const struct cpufreq_driver __initconstrel hwp_cpufreq_driver =
     .update = hwp_cpufreq_update,
 };
 
+int get_hwp_para(const unsigned int cpu,
+                 struct xen_cppc_para *cppc_para)
+{
+    const struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
+
+    if ( data == NULL )
+        return -EINVAL;
+
+    cppc_para->features         =
+        (feature_hwp_activity_window ? XEN_SYSCTL_CPPC_FEAT_ACT_WINDOW : 0);
+    cppc_para->lowest           = data->hw.lowest;
+    cppc_para->lowest_nonlinear = data->hw.most_efficient;
+    cppc_para->nominal          = data->hw.guaranteed;
+    cppc_para->highest          = data->hw.highest;
+    cppc_para->minimum          = data->minimum;
+    cppc_para->maximum          = data->maximum;
+    cppc_para->desired          = data->desired;
+    cppc_para->energy_perf      = data->energy_perf;
+    cppc_para->activity_window  = data->activity_window;
+
+    return 0;
+}
+
 int __init hwp_register_driver(void)
 {
     return cpufreq_register_driver(&hwp_cpufreq_driver);
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index 57359c21d8..10143c084c 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -251,48 +251,54 @@  static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
     else
         strlcpy(op->u.get_para.scaling_driver, "Unknown", CPUFREQ_NAME_LEN);
 
-    if ( !(scaling_available_governors =
-           xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
-        return -ENOMEM;
-    if ( (ret = read_scaling_available_governors(
-                    scaling_available_governors,
-                    gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+    if ( !strncasecmp(op->u.get_para.scaling_driver, XEN_HWP_DRIVER,
+                      CPUFREQ_NAME_LEN) )
+        ret = get_hwp_para(policy->cpu, &op->u.get_para.u.cppc_para);
+    else
     {
+        if ( !(scaling_available_governors =
+               xzalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
+            return -ENOMEM;
+        if ( (ret = read_scaling_available_governors(
+                        scaling_available_governors,
+                        gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+        {
+            xfree(scaling_available_governors);
+            return ret;
+        }
+        ret = copy_to_guest(op->u.get_para.scaling_available_governors,
+                    scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);
         xfree(scaling_available_governors);
-        return ret;
-    }
-    ret = copy_to_guest(op->u.get_para.scaling_available_governors,
-                scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);
-    xfree(scaling_available_governors);
-    if ( ret )
-        return ret;
+        if ( ret )
+            return ret;
 
-    op->u.get_para.u.s.scaling_cur_freq = policy->cur;
-    op->u.get_para.u.s.scaling_max_freq = policy->max;
-    op->u.get_para.u.s.scaling_min_freq = policy->min;
+        op->u.get_para.u.s.scaling_cur_freq = policy->cur;
+        op->u.get_para.u.s.scaling_max_freq = policy->max;
+        op->u.get_para.u.s.scaling_min_freq = policy->min;
 
-    if ( policy->governor->name[0] )
-        strlcpy(op->u.get_para.u.s.scaling_governor,
-            policy->governor->name, CPUFREQ_NAME_LEN);
-    else
-        strlcpy(op->u.get_para.u.s.scaling_governor, "Unknown",
-                CPUFREQ_NAME_LEN);
+        if ( policy->governor->name[0] )
+            strlcpy(op->u.get_para.u.s.scaling_governor,
+                policy->governor->name, CPUFREQ_NAME_LEN);
+        else
+            strlcpy(op->u.get_para.u.s.scaling_governor, "Unknown",
+                    CPUFREQ_NAME_LEN);
 
-    /* governor specific para */
-    if ( !strncasecmp(op->u.get_para.u.s.scaling_governor,
-                      "userspace", CPUFREQ_NAME_LEN) )
-    {
-        op->u.get_para.u.s.u.userspace.scaling_setspeed = policy->cur;
-    }
+        /* governor specific para */
+        if ( !strncasecmp(op->u.get_para.u.s.scaling_governor,
+                          "userspace", CPUFREQ_NAME_LEN) )
+        {
+            op->u.get_para.u.s.u.userspace.scaling_setspeed = policy->cur;
+        }
 
-    if ( !strncasecmp(op->u.get_para.u.s.scaling_governor,
-                      "ondemand", CPUFREQ_NAME_LEN) )
-    {
-        ret = get_cpufreq_ondemand_para(
-            &op->u.get_para.u.s.u.ondemand.sampling_rate_max,
-            &op->u.get_para.u.s.u.ondemand.sampling_rate_min,
-            &op->u.get_para.u.s.u.ondemand.sampling_rate,
-            &op->u.get_para.u.s.u.ondemand.up_threshold);
+        if ( !strncasecmp(op->u.get_para.u.s.scaling_governor,
+                          "ondemand", CPUFREQ_NAME_LEN) )
+        {
+            ret = get_cpufreq_ondemand_para(
+                &op->u.get_para.u.s.u.ondemand.sampling_rate_max,
+                &op->u.get_para.u.s.u.ondemand.sampling_rate_min,
+                &op->u.get_para.u.s.u.ondemand.sampling_rate,
+                &op->u.get_para.u.s.u.ondemand.up_threshold);
+        }
     }
 
     return ret;
diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h
index 6ec7dbcbbb..fb655fac14 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -248,5 +248,7 @@  void intel_feature_detect(struct cpufreq_policy *policy);
 
 extern bool __initdata opt_cpufreq_hwp;
 int hwp_cmdline_parse(const char *s);
+int get_hwp_para(const unsigned int cpu,
+                 struct xen_cppc_para *cppc_para);
 
 #endif /* __XEN_CPUFREQ_PM_H__ */
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 08fe815329..5c9b60b5d0 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -296,6 +296,61 @@  struct xen_ondemand {
     uint32_t up_threshold;
 };
 
+struct xen_cppc_para {
+    /* OUT */
+    /* activity_window supported if 1 */
+#define XEN_SYSCTL_CPPC_FEAT_ACT_WINDOW  (1 << 0)
+    uint32_t features; /* bit flags for features */
+    /*
+     * See Intel SDM: HWP Performance Range and Dynamic Capabilities
+     *
+     * These four are 0-255 hardware-provided values.  They "continuous,
+     * abstract unit-less, performance" values.  smaller numbers are slower
+     * and larger ones are faster.
+     */
+    uint32_t lowest;
+    uint32_t lowest_nonlinear; /* most_efficient */
+    uint32_t nominal; /* guaranteed */
+    uint32_t highest;
+    /*
+     * See Intel SDM: IA32_HWP_REQUEST MSR (Address: 774H Logical Processor
+     * Scope)
+     *
+     * These are all hints, and the processor may deviate outside of them.
+     * Values below are 0-255.
+     *
+     * minimum and maximum can be set to the above hardware values to constrain
+     * operation.  The full range 0-255 is accepted and will be clipped by
+     * hardware.
+     */
+    uint32_t minimum;
+    uint32_t maximum;
+    /*
+     * Set an explicit performance hint, disabling hardware selection.
+     * 0 lets the hardware decide.
+     */
+    uint32_t desired;
+    /*
+     * Hint to hardware for energy/performance preference.
+     * 0:   Performance
+     * 128: Balance (Default)
+     * 255: Powersaving
+     */
+    uint32_t energy_perf;
+    /*
+     * Activity Window is a moving history window for the processor's operation
+     * calculations, controlling responsiveness.  Measured in microseconds
+     * encoded as:
+     *
+     * bits 6:0   - 7bit mantissa
+     * bits 9:7   - 3bit base-10 exponent
+     * btis 15:10 - Unused - must be 0
+     */
+#define XEN_CPPC_ACT_WINDOW_MANTISSA_MASK  0x07f
+#define XEN_CPPC_ACT_WINDOW_EXPONENT_MASK  0x380
+    uint32_t activity_window;
+};
+
 #define XEN_HWP_DRIVER "hwp"
 /*
  * cpufreq para name of this structure named
@@ -332,6 +387,7 @@  struct xen_get_cpufreq_para {
                 struct  xen_ondemand ondemand;
             } u;
         } s;
+        struct xen_cppc_para cppc_para;
     } u;
 
     int32_t turbo_enabled;