diff mbox series

[v2,02/11] xen/x86: introduce new sub-hypercall to propagate CPPC data

Message ID 20250206083255.1296363-3-Penny.Zheng@amd.com (mailing list archive)
State New
Headers show
Series amd-cppc CPU Performance Scaling Driver | expand

Commit Message

Penny, Zheng Feb. 6, 2025, 8:32 a.m. UTC
In order to provide backward compatibility with existing governors
that represent performance as frequencies, like ondemand, the _CPC
table can optionally provide processor frequency range values, Lowest
frequency and Norminal frequency, to let OS use Lowest Frequency/
Performance and Nominal Frequency/Performance as anchor points to
create linear mapping of CPPC abstract performance to CPU frequency.

As Xen is uncapable of parsing the ACPI dynamic table, this commit
introduces a new sub-hypercall to propagate required CPPC data from
dom0 kernel.

Signed-off-by: Penny Zheng <Penny.Zheng@amd.com>
---
v1 -> v2:
- Remove unnecessary figure braces
- Pointer-to-const for print_CPPC and set_cppc_pminfo
- Structure allocation shall use xvzalloc()
- Unnecessary memcpy(), and change it to a (type safe) structure assignment
- Add comment for struct xen_processor_cppc, and keep the chosen fields
in the order _CPC has them
- Obey to alphabetic sorting, and prefix compat structures with ? instead
of !
---
 xen/arch/x86/platform_hypercall.c         |  4 ++
 xen/arch/x86/x86_64/cpufreq.c             |  2 +
 xen/drivers/cpufreq/cpufreq.c             | 48 +++++++++++++++++++++++
 xen/include/acpi/cpufreq/processor_perf.h |  1 +
 xen/include/public/platform.h             | 16 ++++++++
 xen/include/xen/pmstat.h                  |  1 +
 xen/include/xlat.lst                      |  1 +
 7 files changed, 73 insertions(+)

Comments

Jan Beulich Feb. 11, 2025, 11:10 a.m. UTC | #1
On 06.02.2025 09:32, Penny Zheng wrote:
> --- a/xen/arch/x86/platform_hypercall.c
> +++ b/xen/arch/x86/platform_hypercall.c
> @@ -572,6 +572,10 @@ ret_t do_platform_op(
>              break;
>          }
>  
> +        case XEN_PM_CPPC:
> +            ret = set_cppc_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.u.cppc_data);

Nit: Too long line.

> --- a/xen/arch/x86/x86_64/cpufreq.c
> +++ b/xen/arch/x86/x86_64/cpufreq.c
> @@ -26,6 +26,8 @@
>  #include <xen/pmstat.h>
>  #include <compat/platform.h>
>  
> +CHECK_processor_cppc;
> +
>  CHECK_processor_px;

May I ask that you insert below the pre-existing CHECK_* rather than above?
Or wait - maybe you were aiming at sorting these alphabetically? That would
perhaps be fine then.

> @@ -458,6 +459,53 @@ static void print_PPC(unsigned int platform_limit)
>      printk("\t_PPC: %d\n", platform_limit);
>  }
>  
> +static void print_CPPC(const struct xen_processor_cppc *cppc_data)
> +{
> +    printk("\t_CPC: highest_perf=%u, lowest_perf=%u, "
> +           "nominal_perf=%u, lowest_nonlinear_perf=%u, "
> +           "nominal_freq=%uMhz, lowest_freq=%uMhz\n",

Nit: MHz please.

> +           cppc_data->highest_perf, cppc_data->lowest_perf,
> +           cppc_data->nominal_perf, cppc_data->lowest_nonlinear_perf,
> +           cppc_data->nominal_freq, cppc_data->lowest_freq);
> +}
> +
> +int set_cppc_pminfo(uint32_t acpi_id, const struct xen_processor_cppc *cppc_data)

Too long a line again.

Also while print_CPPC() is placed okay, this function wants to move
down, past set_px_pminfo().

> +{
> +    int ret = 0, cpuid;
> +    struct processor_pminfo *pm_info;
> +
> +    cpuid = get_cpu_id(acpi_id);
> +    if ( cpuid < 0 || !cppc_data )
> +    {
> +        ret = -EINVAL;
> +        goto out;
> +    }
> +    if ( cpufreq_verbose )
> +        printk("Set CPU acpi_id(%d) cpuid(%d) CPPC State info:\n",
> +               acpi_id, cpuid);
> +
> +    pm_info = processor_pminfo[cpuid];
> +    if ( !pm_info )
> +    {
> +        pm_info = xvzalloc(struct processor_pminfo);
> +        if ( !pm_info )
> +        {
> +            ret = -ENOMEM;
> +            goto out;
> +        }
> +        processor_pminfo[cpuid] = pm_info;
> +    }
> +    pm_info->acpi_id = acpi_id;
> +    pm_info->id = cpuid;
> +    pm_info->cppc_data = *cppc_data;
> +
> +    if ( cpufreq_verbose )
> +        print_CPPC(&pm_info->cppc_data);
> +
> + out:
> +    return ret;
> +}

What's the interaction between the data set by set_px_pminfo() and the
data set here? In particular, what's going to happen if both functions
come into play for the same CPU? Shouldn't there be some sanity checks?
Will consumers be able to tell which of the two were correctly invoked,
before using respective data? Even in the event of no code changes at
all to address this, it will want discussing in the patch description.

> --- a/xen/include/xen/pmstat.h
> +++ b/xen/include/xen/pmstat.h
> @@ -5,6 +5,7 @@
>  #include <public/platform.h> /* for struct xen_processor_power */
>  #include <public/sysctl.h>   /* for struct pm_cx_stat */
>  
> +int set_cppc_pminfo(uint32_t cpu, const struct xen_processor_cppc *cppc_data);

Surprisingly this line is within limits, when the (supposedly) one char
shorter line at the definition site is not. Which points out a Misra
violation: Declaration and definition ought to agree in parameter names.

Jan
Penny, Zheng Feb. 17, 2025, 7:20 a.m. UTC | #2
[AMD Official Use Only - AMD Internal Distribution Only]

Hi,

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Tuesday, February 11, 2025 7:10 PM
> To: Penny, Zheng <penny.zheng@amd.com>
> Cc: Huang, Ray <Ray.Huang@amd.com>; Andryuk, Jason
> <Jason.Andryuk@amd.com>; Andrew Cooper <andrew.cooper3@citrix.com>;
> Roger Pau Monné <roger.pau@citrix.com>; Anthony PERARD
> <anthony.perard@vates.tech>; Orzel, Michal <Michal.Orzel@amd.com>; Julien
> Grall <julien@xen.org>; Stefano Stabellini <sstabellini@kernel.org>; xen-
> devel@lists.xenproject.org
> Subject: Re: [PATCH v2 02/11] xen/x86: introduce new sub-hypercall to propagate
> CPPC data
>
> On 06.02.2025 09:32, Penny Zheng wrote:
> > --- a/xen/arch/x86/platform_hypercall.c
> > +++ b/xen/arch/x86/platform_hypercall.c
> > @@ -572,6 +572,10 @@ ret_t do_platform_op(
> >              break;
> >          }
> >
> > +        case XEN_PM_CPPC:
> > +            ret = set_cppc_pminfo(op->u.set_pminfo.id,
> > + &op->u.set_pminfo.u.cppc_data);
>
> Nit: Too long line.
>
> > --- a/xen/arch/x86/x86_64/cpufreq.c
> > +++ b/xen/arch/x86/x86_64/cpufreq.c
> > @@ -26,6 +26,8 @@
> >  #include <xen/pmstat.h>
> >  #include <compat/platform.h>
> >
> > +CHECK_processor_cppc;
> > +
> >  CHECK_processor_px;
>
> May I ask that you insert below the pre-existing CHECK_* rather than above?
> Or wait - maybe you were aiming at sorting these alphabetically? That would
> perhaps be fine then.
>

Yes, I intended to sort these alphabetically

> > +{
> > +    int ret = 0, cpuid;
> > +    struct processor_pminfo *pm_info;
> > +
> > +    cpuid = get_cpu_id(acpi_id);
> > +    if ( cpuid < 0 || !cppc_data )
> > +    {
> > +        ret = -EINVAL;
> > +        goto out;
> > +    }
> > +    if ( cpufreq_verbose )
> > +        printk("Set CPU acpi_id(%d) cpuid(%d) CPPC State info:\n",
> > +               acpi_id, cpuid);
> > +
> > +    pm_info = processor_pminfo[cpuid];
> > +    if ( !pm_info )
> > +    {
> > +        pm_info = xvzalloc(struct processor_pminfo);
> > +        if ( !pm_info )
> > +        {
> > +            ret = -ENOMEM;
> > +            goto out;
> > +        }
> > +        processor_pminfo[cpuid] = pm_info;
> > +    }
> > +    pm_info->acpi_id = acpi_id;
> > +    pm_info->id = cpuid;
> > +    pm_info->cppc_data = *cppc_data;
> > +
> > +    if ( cpufreq_verbose )
> > +        print_CPPC(&pm_info->cppc_data);
> > +
> > + out:
> > +    return ret;
> > +}
>
> What's the interaction between the data set by set_px_pminfo() and the data set
> here? In particular, what's going to happen if both functions come into play for the
> same CPU? Shouldn't there be some sanity checks?

Yes, I've considered this and checked ACPI spec. I'll refer them here:
```
If the platform supports CPPC, the _CPC object must exist under all processor objects.
That is, OSPM is not expected to support mixed mode (CPPC & legacy PSS, _PCT, _PPC) operation.
```
See https://uefi.org/specs/ACPI/6.5/08_Processor_Configuration_and_Control.html?highlight=cppc#power-performance-and-throttling-state-dependencies
So CPUs could have both _CPC and legacy P-state info in ACPI for each entry, they just can't have mixed-mode
Maybe we shall add sanity check to see if _CPC exists, it shall exist for all pcpus?

> Will consumers be able to tell which of the two were correctly invoked, before using
> respective data? Even in the event of no code changes at all to address this, it will
> want discussing in the patch description.
>

I have checked the relevant spec. it shall be the following logic:
```
Software enables Collaborative Processor Performance Control by setting
CPPC_ENABLE[CPPC_En] (bit 0) = 1. Once it gets enabled, the processor ignores the legacy P-state control interface.
```
Hmmm, I shall add relevant comment in Doc?

>
> Jan

Many thanks,
Penny
Jan Beulich Feb. 17, 2025, 7:38 a.m. UTC | #3
On 17.02.2025 08:20, Penny, Zheng wrote:
> [AMD Official Use Only - AMD Internal Distribution Only]

Btw, boiler plates like this aren't really liked on public mailing lists,
for being contrary to the purpose of such lists.

>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Tuesday, February 11, 2025 7:10 PM
>>
>> On 06.02.2025 09:32, Penny Zheng wrote:
>>> +{
>>> +    int ret = 0, cpuid;
>>> +    struct processor_pminfo *pm_info;
>>> +
>>> +    cpuid = get_cpu_id(acpi_id);
>>> +    if ( cpuid < 0 || !cppc_data )
>>> +    {
>>> +        ret = -EINVAL;
>>> +        goto out;
>>> +    }
>>> +    if ( cpufreq_verbose )
>>> +        printk("Set CPU acpi_id(%d) cpuid(%d) CPPC State info:\n",
>>> +               acpi_id, cpuid);
>>> +
>>> +    pm_info = processor_pminfo[cpuid];
>>> +    if ( !pm_info )
>>> +    {
>>> +        pm_info = xvzalloc(struct processor_pminfo);
>>> +        if ( !pm_info )
>>> +        {
>>> +            ret = -ENOMEM;
>>> +            goto out;
>>> +        }
>>> +        processor_pminfo[cpuid] = pm_info;
>>> +    }
>>> +    pm_info->acpi_id = acpi_id;
>>> +    pm_info->id = cpuid;
>>> +    pm_info->cppc_data = *cppc_data;
>>> +
>>> +    if ( cpufreq_verbose )
>>> +        print_CPPC(&pm_info->cppc_data);
>>> +
>>> + out:
>>> +    return ret;
>>> +}
>>
>> What's the interaction between the data set by set_px_pminfo() and the data set
>> here? In particular, what's going to happen if both functions come into play for the
>> same CPU? Shouldn't there be some sanity checks?
> 
> Yes, I've considered this and checked ACPI spec. I'll refer them here:
> ```
> If the platform supports CPPC, the _CPC object must exist under all processor objects.
> That is, OSPM is not expected to support mixed mode (CPPC & legacy PSS, _PCT, _PPC) operation.
> ```
> See https://uefi.org/specs/ACPI/6.5/08_Processor_Configuration_and_Control.html?highlight=cppc#power-performance-and-throttling-state-dependencies
> So CPUs could have both _CPC and legacy P-state info in ACPI for each entry, they just can't have mixed-mode
> Maybe we shall add sanity check to see if _CPC exists, it shall exist for all pcpus?

Maybe, but that wasn't the point of my remark.

Properly behaving Dom0 should probably be passing only one of the two
possible pieces of information. Yet maybe we'd better sanity check _that_?
(I don't recall seeing Linux kernel side patches yet; if they were posted
somewhere, they may at least partly address my concern.)

>> Will consumers be able to tell which of the two were correctly invoked, before using
>> respective data? Even in the event of no code changes at all to address this, it will
>> want discussing in the patch description.
>>
> 
> I have checked the relevant spec. it shall be the following logic:
> ```
> Software enables Collaborative Processor Performance Control by setting
> CPPC_ENABLE[CPPC_En] (bit 0) = 1. Once it gets enabled, the processor ignores the legacy P-state control interface.
> ```
> Hmmm, I shall add relevant comment in Doc?

Mentioning this in the description would help. Yet the processor ignoring
the other P-state control interface shouldn't mean we can nevertheless try
to drive it. It has to be clear (and at least halfway obvious) internally
to Xen that we only ever use one of the two. My present reading of the
patches suggests that this is all implicit (and maybe not even guaranteed)
right now.

Jan
Penny, Zheng Feb. 18, 2025, 6:05 a.m. UTC | #4
[AMD Official Use Only - AMD Internal Distribution Only]

Hi,

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Monday, February 17, 2025 3:39 PM
> To: Penny, Zheng <penny.zheng@amd.com>
> Cc: Huang, Ray <Ray.Huang@amd.com>; Andryuk, Jason
> <Jason.Andryuk@amd.com>; Andrew Cooper <andrew.cooper3@citrix.com>;
> Roger Pau Monné <roger.pau@citrix.com>; Anthony PERARD
> <anthony.perard@vates.tech>; Orzel, Michal <Michal.Orzel@amd.com>; Julien
> Grall <julien@xen.org>; Stefano Stabellini <sstabellini@kernel.org>; xen-
> devel@lists.xenproject.org
> Subject: Re: [PATCH v2 02/11] xen/x86: introduce new sub-hypercall to propagate
> CPPC data
>
> On 17.02.2025 08:20, Penny, Zheng wrote:
> > [AMD Official Use Only - AMD Internal Distribution Only]
>
> Btw, boiler plates like this aren't really liked on public mailing lists, for being contrary
> to the purpose of such lists.
>
> >> -----Original Message-----
> >> From: Jan Beulich <jbeulich@suse.com>
> >> Sent: Tuesday, February 11, 2025 7:10 PM
> >>
> >> On 06.02.2025 09:32, Penny Zheng wrote:
> >>> +{
> >>> +    int ret = 0, cpuid;
> >>> +    struct processor_pminfo *pm_info;
> >>> +
> >>> +    cpuid = get_cpu_id(acpi_id);
> >>> +    if ( cpuid < 0 || !cppc_data )
> >>> +    {
> >>> +        ret = -EINVAL;
> >>> +        goto out;
> >>> +    }
> >>> +    if ( cpufreq_verbose )
> >>> +        printk("Set CPU acpi_id(%d) cpuid(%d) CPPC State info:\n",
> >>> +               acpi_id, cpuid);
> >>> +
> >>> +    pm_info = processor_pminfo[cpuid];
> >>> +    if ( !pm_info )
> >>> +    {
> >>> +        pm_info = xvzalloc(struct processor_pminfo);
> >>> +        if ( !pm_info )
> >>> +        {
> >>> +            ret = -ENOMEM;
> >>> +            goto out;
> >>> +        }
> >>> +        processor_pminfo[cpuid] = pm_info;
> >>> +    }
> >>> +    pm_info->acpi_id = acpi_id;
> >>> +    pm_info->id = cpuid;
> >>> +    pm_info->cppc_data = *cppc_data;
> >>> +
> >>> +    if ( cpufreq_verbose )
> >>> +        print_CPPC(&pm_info->cppc_data);
> >>> +
> >>> + out:
> >>> +    return ret;
> >>> +}
> >>
> >> What's the interaction between the data set by set_px_pminfo() and
> >> the data set here? In particular, what's going to happen if both
> >> functions come into play for the same CPU? Shouldn't there be some sanity
> checks?
> >
> > Yes, I've considered this and checked ACPI spec. I'll refer them here:
> > ```
> > If the platform supports CPPC, the _CPC object must exist under all processor
> objects.
> > That is, OSPM is not expected to support mixed mode (CPPC & legacy PSS,
> _PCT, _PPC) operation.
> > ```
> > See
> > https://uefi.org/specs/ACPI/6.5/08_Processor_Configuration_and_Control
> > .html?highlight=cppc#power-performance-and-throttling-state-dependenci
> > es So CPUs could have both _CPC and legacy P-state info in ACPI for
> > each entry, they just can't have mixed-mode Maybe we shall add sanity
> > check to see if _CPC exists, it shall exist for all pcpus?
>
> Maybe, but that wasn't the point of my remark.
>
> Properly behaving Dom0 should probably be passing only one of the two possible
> pieces of information. Yet maybe we'd better sanity check _that_?
> (I don't recall seeing Linux kernel side patches yet; if they were posted somewhere,
> they may at least partly address my concern.)
>

In my linux patch, https://lore.kernel.org/lkml/20241204082430.469092-1-Penny.Zheng@amd.com/T/
I only did zero-value check in xen_processor_get_perf_caps(), Do you think in that place, I shall add
more strict sanity check, like the register value shall not be zero and also must smaller than UINT8_T?
Or we just do the above check in Xen part when receiving the data?

> >> Will consumers be able to tell which of the two were correctly
> >> invoked, before using respective data? Even in the event of no code
> >> changes at all to address this, it will want discussing in the patch description.
> >>
> >
> > I have checked the relevant spec. it shall be the following logic:
> > ```
> > Software enables Collaborative Processor Performance Control by
> > setting CPPC_ENABLE[CPPC_En] (bit 0) = 1. Once it gets enabled, the
> processor ignores the legacy P-state control interface.
> > ```
> > Hmmm, I shall add relevant comment in Doc?
>
> Mentioning this in the description would help. Yet the processor ignoring the other P-
> state control interface shouldn't mean we can nevertheless try to drive it. It has to
> be clear (and at least halfway obvious) internally to Xen that we only ever use one
> of the two. My present reading of the patches suggests that this is all implicit (and
> maybe not even guaranteed) right now.

Understood!

>
> Jan
Jan Beulich Feb. 18, 2025, 2:48 p.m. UTC | #5
On 18.02.2025 07:05, Penny, Zheng wrote:
>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Monday, February 17, 2025 3:39 PM
>>
>> On 17.02.2025 08:20, Penny, Zheng wrote:
>>> [AMD Official Use Only - AMD Internal Distribution Only]
>>
>> Btw, boiler plates like this aren't really liked on public mailing lists, for being contrary
>> to the purpose of such lists.

You did read this, didn't you? I ask because the same boilerplate keeps
appearing in your mails.

>>>> -----Original Message-----
>>>> From: Jan Beulich <jbeulich@suse.com>
>>>> Sent: Tuesday, February 11, 2025 7:10 PM
>>>>
>>>> On 06.02.2025 09:32, Penny Zheng wrote:
>>>>> +{
>>>>> +    int ret = 0, cpuid;
>>>>> +    struct processor_pminfo *pm_info;
>>>>> +
>>>>> +    cpuid = get_cpu_id(acpi_id);
>>>>> +    if ( cpuid < 0 || !cppc_data )
>>>>> +    {
>>>>> +        ret = -EINVAL;
>>>>> +        goto out;
>>>>> +    }
>>>>> +    if ( cpufreq_verbose )
>>>>> +        printk("Set CPU acpi_id(%d) cpuid(%d) CPPC State info:\n",
>>>>> +               acpi_id, cpuid);
>>>>> +
>>>>> +    pm_info = processor_pminfo[cpuid];
>>>>> +    if ( !pm_info )
>>>>> +    {
>>>>> +        pm_info = xvzalloc(struct processor_pminfo);
>>>>> +        if ( !pm_info )
>>>>> +        {
>>>>> +            ret = -ENOMEM;
>>>>> +            goto out;
>>>>> +        }
>>>>> +        processor_pminfo[cpuid] = pm_info;
>>>>> +    }
>>>>> +    pm_info->acpi_id = acpi_id;
>>>>> +    pm_info->id = cpuid;
>>>>> +    pm_info->cppc_data = *cppc_data;
>>>>> +
>>>>> +    if ( cpufreq_verbose )
>>>>> +        print_CPPC(&pm_info->cppc_data);
>>>>> +
>>>>> + out:
>>>>> +    return ret;
>>>>> +}
>>>>
>>>> What's the interaction between the data set by set_px_pminfo() and
>>>> the data set here? In particular, what's going to happen if both
>>>> functions come into play for the same CPU? Shouldn't there be some sanity
>> checks?
>>>
>>> Yes, I've considered this and checked ACPI spec. I'll refer them here:
>>> ```
>>> If the platform supports CPPC, the _CPC object must exist under all processor
>> objects.
>>> That is, OSPM is not expected to support mixed mode (CPPC & legacy PSS,
>> _PCT, _PPC) operation.
>>> ```
>>> See
>>> https://uefi.org/specs/ACPI/6.5/08_Processor_Configuration_and_Control
>>> .html?highlight=cppc#power-performance-and-throttling-state-dependenci
>>> es So CPUs could have both _CPC and legacy P-state info in ACPI for
>>> each entry, they just can't have mixed-mode Maybe we shall add sanity
>>> check to see if _CPC exists, it shall exist for all pcpus?
>>
>> Maybe, but that wasn't the point of my remark.
>>
>> Properly behaving Dom0 should probably be passing only one of the two possible
>> pieces of information. Yet maybe we'd better sanity check _that_?
>> (I don't recall seeing Linux kernel side patches yet; if they were posted somewhere,
>> they may at least partly address my concern.)
>>
> 
> In my linux patch, https://lore.kernel.org/lkml/20241204082430.469092-1-Penny.Zheng@amd.com/T/
> I only did zero-value check in xen_processor_get_perf_caps(), Do you think in that place, I shall add
> more strict sanity check, like the register value shall not be zero and also must smaller than UINT8_T?
> Or we just do the above check in Xen part when receiving the data?

Value range checking is nice to have in Dom0, but the same checking
needs doing in the hypervisor anyway. But that isn't what my comment
was about. What I'm asking is how it is being made sure that we won't
have to deal with a mix of traditional and CPPC data in the hypervisor.

Jan
Penny, Zheng Feb. 19, 2025, 8:36 a.m. UTC | #6
[AMD Official Use Only - AMD Internal Distribution Only]

Hi,

> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Tuesday, February 18, 2025 10:49 PM
> To: Penny, Zheng <penny.zheng@amd.com>
> Cc: Huang, Ray <Ray.Huang@amd.com>; Andryuk, Jason
> <Jason.Andryuk@amd.com>; Andrew Cooper <andrew.cooper3@citrix.com>;
> Roger Pau Monné <roger.pau@citrix.com>; Anthony PERARD
> <anthony.perard@vates.tech>; Orzel, Michal <Michal.Orzel@amd.com>; Julien
> Grall <julien@xen.org>; Stefano Stabellini <sstabellini@kernel.org>; xen-
> devel@lists.xenproject.org
> Subject: Re: [PATCH v2 02/11] xen/x86: introduce new sub-hypercall to propagate
> CPPC data
>
> On 18.02.2025 07:05, Penny, Zheng wrote:
> >> -----Original Message-----
> >> From: Jan Beulich <jbeulich@suse.com>
> >> Sent: Monday, February 17, 2025 3:39 PM
> >>
> >> On 17.02.2025 08:20, Penny, Zheng wrote:
> >>> [AMD Official Use Only - AMD Internal Distribution Only]
> >>
> >> Btw, boiler plates like this aren't really liked on public mailing
> >> lists, for being contrary to the purpose of such lists.
>
> You did read this, didn't you? I ask because the same boilerplate keeps appearing in
> your mails.
>
> >>>> -----Original Message-----
> >>>> From: Jan Beulich <jbeulich@suse.com>
> >>>> Sent: Tuesday, February 11, 2025 7:10 PM
> >>>>
> >>>> On 06.02.2025 09:32, Penny Zheng wrote:
> >>>>> +{
> >>>>> +    int ret = 0, cpuid;
> >>>>> +    struct processor_pminfo *pm_info;
> >>>>> +
> >>>>> +    cpuid = get_cpu_id(acpi_id);
> >>>>> +    if ( cpuid < 0 || !cppc_data )
> >>>>> +    {
> >>>>> +        ret = -EINVAL;
> >>>>> +        goto out;
> >>>>> +    }
> >>>>> +    if ( cpufreq_verbose )
> >>>>> +        printk("Set CPU acpi_id(%d) cpuid(%d) CPPC State info:\n",
> >>>>> +               acpi_id, cpuid);
> >>>>> +
> >>>>> +    pm_info = processor_pminfo[cpuid];
> >>>>> +    if ( !pm_info )
> >>>>> +    {
> >>>>> +        pm_info = xvzalloc(struct processor_pminfo);
> >>>>> +        if ( !pm_info )
> >>>>> +        {
> >>>>> +            ret = -ENOMEM;
> >>>>> +            goto out;
> >>>>> +        }
> >>>>> +        processor_pminfo[cpuid] = pm_info;
> >>>>> +    }
> >>>>> +    pm_info->acpi_id = acpi_id;
> >>>>> +    pm_info->id = cpuid;
> >>>>> +    pm_info->cppc_data = *cppc_data;
> >>>>> +
> >>>>> +    if ( cpufreq_verbose )
> >>>>> +        print_CPPC(&pm_info->cppc_data);
> >>>>> +
> >>>>> + out:
> >>>>> +    return ret;
> >>>>> +}
> >>>>
> >>>> What's the interaction between the data set by set_px_pminfo() and
> >>>> the data set here? In particular, what's going to happen if both
> >>>> functions come into play for the same CPU? Shouldn't there be some
> >>>> sanity
> >> checks?
> >>>
> >>> Yes, I've considered this and checked ACPI spec. I'll refer them here:
> >>> ```
> >>> If the platform supports CPPC, the _CPC object must exist under all
> >>> processor
> >> objects.
> >>> That is, OSPM is not expected to support mixed mode (CPPC & legacy
> >>> PSS,
> >> _PCT, _PPC) operation.
> >>> ```
> >>> See
> >>> https://uefi.org/specs/ACPI/6.5/08_Processor_Configuration_and_Contr
> >>> ol
> >>> .html?highlight=cppc#power-performance-and-throttling-state-dependen
> >>> ci es So CPUs could have both _CPC and legacy P-state info in ACPI
> >>> for each entry, they just can't have mixed-mode Maybe we shall add
> >>> sanity check to see if _CPC exists, it shall exist for all pcpus?
> >>
> >> Maybe, but that wasn't the point of my remark.
> >>
> >> Properly behaving Dom0 should probably be passing only one of the two
> >> possible pieces of information. Yet maybe we'd better sanity check _that_?
> >> (I don't recall seeing Linux kernel side patches yet; if they were
> >> posted somewhere, they may at least partly address my concern.)
> >>
> >
> > In my linux patch,
> > https://lore.kernel.org/lkml/20241204082430.469092-1-Penny.Zheng@amd.c
> > om/T/ I only did zero-value check in xen_processor_get_perf_caps(), Do
> > you think in that place, I shall add more strict sanity check, like
> > the register value shall not be zero and also must smaller than UINT8_T?
> > Or we just do the above check in Xen part when receiving the data?
>
> Value range checking is nice to have in Dom0, but the same checking needs doing
> in the hypervisor anyway. But that isn't what my comment was about. What I'm
> asking is how it is being made sure that we won't have to deal with a mix of
> traditional and CPPC data in the hypervisor.
>

Are you suggesting that we only do either set_cppc_pminfo or set_px_pminfo?
Only one side data get set to avoid the consequence of mixture.

> Jan
Jan Beulich Feb. 19, 2025, 9:16 a.m. UTC | #7
On 19.02.2025 09:36, Penny, Zheng wrote:
>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Tuesday, February 18, 2025 10:49 PM
>>
>> On 18.02.2025 07:05, Penny, Zheng wrote:
>>>> -----Original Message-----
>>>> From: Jan Beulich <jbeulich@suse.com>
>>>> Sent: Monday, February 17, 2025 3:39 PM
>>>>
>>>> On 17.02.2025 08:20, Penny, Zheng wrote:
>>>>> [AMD Official Use Only - AMD Internal Distribution Only]
>>>>
>>>> Btw, boiler plates like this aren't really liked on public mailing
>>>> lists, for being contrary to the purpose of such lists.
>>
>> You did read this, didn't you? I ask because the same boilerplate keeps appearing in
>> your mails.
>>
>>>>>> -----Original Message-----
>>>>>> From: Jan Beulich <jbeulich@suse.com>
>>>>>> Sent: Tuesday, February 11, 2025 7:10 PM
>>>>>>
>>>>>> On 06.02.2025 09:32, Penny Zheng wrote:
>>>>>>> +{
>>>>>>> +    int ret = 0, cpuid;
>>>>>>> +    struct processor_pminfo *pm_info;
>>>>>>> +
>>>>>>> +    cpuid = get_cpu_id(acpi_id);
>>>>>>> +    if ( cpuid < 0 || !cppc_data )
>>>>>>> +    {
>>>>>>> +        ret = -EINVAL;
>>>>>>> +        goto out;
>>>>>>> +    }
>>>>>>> +    if ( cpufreq_verbose )
>>>>>>> +        printk("Set CPU acpi_id(%d) cpuid(%d) CPPC State info:\n",
>>>>>>> +               acpi_id, cpuid);
>>>>>>> +
>>>>>>> +    pm_info = processor_pminfo[cpuid];
>>>>>>> +    if ( !pm_info )
>>>>>>> +    {
>>>>>>> +        pm_info = xvzalloc(struct processor_pminfo);
>>>>>>> +        if ( !pm_info )
>>>>>>> +        {
>>>>>>> +            ret = -ENOMEM;
>>>>>>> +            goto out;
>>>>>>> +        }
>>>>>>> +        processor_pminfo[cpuid] = pm_info;
>>>>>>> +    }
>>>>>>> +    pm_info->acpi_id = acpi_id;
>>>>>>> +    pm_info->id = cpuid;
>>>>>>> +    pm_info->cppc_data = *cppc_data;
>>>>>>> +
>>>>>>> +    if ( cpufreq_verbose )
>>>>>>> +        print_CPPC(&pm_info->cppc_data);
>>>>>>> +
>>>>>>> + out:
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>
>>>>>> What's the interaction between the data set by set_px_pminfo() and
>>>>>> the data set here? In particular, what's going to happen if both
>>>>>> functions come into play for the same CPU? Shouldn't there be some
>>>>>> sanity
>>>> checks?
>>>>>
>>>>> Yes, I've considered this and checked ACPI spec. I'll refer them here:
>>>>> ```
>>>>> If the platform supports CPPC, the _CPC object must exist under all
>>>>> processor
>>>> objects.
>>>>> That is, OSPM is not expected to support mixed mode (CPPC & legacy
>>>>> PSS,
>>>> _PCT, _PPC) operation.
>>>>> ```
>>>>> See
>>>>> https://uefi.org/specs/ACPI/6.5/08_Processor_Configuration_and_Contr
>>>>> ol
>>>>> .html?highlight=cppc#power-performance-and-throttling-state-dependen
>>>>> ci es So CPUs could have both _CPC and legacy P-state info in ACPI
>>>>> for each entry, they just can't have mixed-mode Maybe we shall add
>>>>> sanity check to see if _CPC exists, it shall exist for all pcpus?
>>>>
>>>> Maybe, but that wasn't the point of my remark.
>>>>
>>>> Properly behaving Dom0 should probably be passing only one of the two
>>>> possible pieces of information. Yet maybe we'd better sanity check _that_?
>>>> (I don't recall seeing Linux kernel side patches yet; if they were
>>>> posted somewhere, they may at least partly address my concern.)
>>>>
>>>
>>> In my linux patch,
>>> https://lore.kernel.org/lkml/20241204082430.469092-1-Penny.Zheng@amd.c
>>> om/T/ I only did zero-value check in xen_processor_get_perf_caps(), Do
>>> you think in that place, I shall add more strict sanity check, like
>>> the register value shall not be zero and also must smaller than UINT8_T?
>>> Or we just do the above check in Xen part when receiving the data?
>>
>> Value range checking is nice to have in Dom0, but the same checking needs doing
>> in the hypervisor anyway. But that isn't what my comment was about. What I'm
>> asking is how it is being made sure that we won't have to deal with a mix of
>> traditional and CPPC data in the hypervisor.
> 
> Are you suggesting that we only do either set_cppc_pminfo or set_px_pminfo?
> Only one side data get set to avoid the consequence of mixture.

That's one of the possible ways to avoid mixing things. Then again Dom0
won't know what cpufreq= was passed to Xen, and hence it may not be in
the position to decide what to upload. It hence may need to be Xen to
simply ignore the uploading of data it's not going to use.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
index 67f851237d..735c71b0e7 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -572,6 +572,10 @@  ret_t do_platform_op(
             break;
         }
 
+        case XEN_PM_CPPC:
+            ret = set_cppc_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.u.cppc_data);
+            break;
+
         default:
             ret = -EINVAL;
             break;
diff --git a/xen/arch/x86/x86_64/cpufreq.c b/xen/arch/x86/x86_64/cpufreq.c
index e4f3d5b436..aa72037401 100644
--- a/xen/arch/x86/x86_64/cpufreq.c
+++ b/xen/arch/x86/x86_64/cpufreq.c
@@ -26,6 +26,8 @@ 
 #include <xen/pmstat.h>
 #include <compat/platform.h>
 
+CHECK_processor_cppc;
+
 CHECK_processor_px;
 
 DEFINE_XEN_GUEST_HANDLE(compat_processor_px_t);
diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index 4a103c6de9..f5e8bfa09e 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -40,6 +40,7 @@ 
 #include <xen/domain.h>
 #include <xen/cpu.h>
 #include <xen/pmstat.h>
+#include <xen/xvmalloc.h>
 #include <asm/io.h>
 #include <asm/processor.h>
 
@@ -458,6 +459,53 @@  static void print_PPC(unsigned int platform_limit)
     printk("\t_PPC: %d\n", platform_limit);
 }
 
+static void print_CPPC(const struct xen_processor_cppc *cppc_data)
+{
+    printk("\t_CPC: highest_perf=%u, lowest_perf=%u, "
+           "nominal_perf=%u, lowest_nonlinear_perf=%u, "
+           "nominal_freq=%uMhz, lowest_freq=%uMhz\n",
+           cppc_data->highest_perf, cppc_data->lowest_perf,
+           cppc_data->nominal_perf, cppc_data->lowest_nonlinear_perf,
+           cppc_data->nominal_freq, cppc_data->lowest_freq);
+}
+
+int set_cppc_pminfo(uint32_t acpi_id, const struct xen_processor_cppc *cppc_data)
+{
+    int ret = 0, cpuid;
+    struct processor_pminfo *pm_info;
+
+    cpuid = get_cpu_id(acpi_id);
+    if ( cpuid < 0 || !cppc_data )
+    {
+        ret = -EINVAL;
+        goto out;
+    }
+    if ( cpufreq_verbose )
+        printk("Set CPU acpi_id(%d) cpuid(%d) CPPC State info:\n",
+               acpi_id, cpuid);
+
+    pm_info = processor_pminfo[cpuid];
+    if ( !pm_info )
+    {
+        pm_info = xvzalloc(struct processor_pminfo);
+        if ( !pm_info )
+        {
+            ret = -ENOMEM;
+            goto out;
+        }
+        processor_pminfo[cpuid] = pm_info;
+    }
+    pm_info->acpi_id = acpi_id;
+    pm_info->id = cpuid;
+    pm_info->cppc_data = *cppc_data;
+
+    if ( cpufreq_verbose )
+        print_CPPC(&pm_info->cppc_data);
+
+ out:
+    return ret;
+}
+
 int set_px_pminfo(uint32_t acpi_id, struct xen_processor_performance *perf)
 {
     int ret = 0, cpu;
diff --git a/xen/include/acpi/cpufreq/processor_perf.h b/xen/include/acpi/cpufreq/processor_perf.h
index 301104e16f..cfa0fed647 100644
--- a/xen/include/acpi/cpufreq/processor_perf.h
+++ b/xen/include/acpi/cpufreq/processor_perf.h
@@ -37,6 +37,7 @@  struct processor_pminfo {
     uint32_t acpi_id;
     uint32_t id;
     struct processor_performance    perf;
+    struct xen_processor_cppc cppc_data;
 };
 
 extern struct processor_pminfo *processor_pminfo[NR_CPUS];
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index 2725b8d104..b8daa8fc42 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -363,6 +363,7 @@  DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t);
 #define XEN_PM_PX   1
 #define XEN_PM_TX   2
 #define XEN_PM_PDC  3
+#define XEN_PM_CPPC 4
 
 /* Px sub info type */
 #define XEN_PX_PCT   1
@@ -432,6 +433,20 @@  struct xen_processor_px {
 typedef struct xen_processor_px xen_processor_px_t;
 DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t);
 
+/*
+ * Subset _CPC fields useful for CPPC-compatible cpufreq
+ * driver's initialization
+ */
+struct xen_processor_cppc {
+    uint32_t highest_perf;
+    uint32_t nominal_perf;
+    uint32_t lowest_nonlinear_perf;
+    uint32_t lowest_perf;
+    uint32_t lowest_freq;
+    uint32_t nominal_freq;
+};
+typedef struct xen_processor_cppc xen_processor_cppc_t;
+
 struct xen_psd_package {
     uint64_t num_entries;
     uint64_t revision;
@@ -465,6 +480,7 @@  struct xenpf_set_processor_pminfo {
         struct xen_processor_power          power;/* Cx: _CST/_CSD */
         struct xen_processor_performance    perf; /* Px: _PPC/_PCT/_PSS/_PSD */
         XEN_GUEST_HANDLE(uint32)            pdc;  /* _PDC */
+        xen_processor_cppc_t                cppc_data; /*_CPC */
     } u;
 };
 typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
diff --git a/xen/include/xen/pmstat.h b/xen/include/xen/pmstat.h
index 8350403e95..d2fe74ef0b 100644
--- a/xen/include/xen/pmstat.h
+++ b/xen/include/xen/pmstat.h
@@ -5,6 +5,7 @@ 
 #include <public/platform.h> /* for struct xen_processor_power */
 #include <public/sysctl.h>   /* for struct pm_cx_stat */
 
+int set_cppc_pminfo(uint32_t cpu, const struct xen_processor_cppc *cppc_data);
 int set_px_pminfo(uint32_t acpi_id, struct xen_processor_performance *perf);
 long set_cx_pminfo(uint32_t acpi_id, struct xen_processor_power *power);
 
diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
index 3c7b6c6830..20201c1667 100644
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -162,6 +162,7 @@ 
 
 !	pct_register			platform.h
 !	power_register			platform.h
+?	processor_cppc			platform.h
 ?	processor_csd			platform.h
 !	processor_cx			platform.h
 !	processor_flags			platform.h