diff mbox series

[v3,1/2] xen+tools: Report Interrupt Controller Virtualization capabilities on x86

Message ID 20220218172943.12182-2-jane.malalane@citrix.com (mailing list archive)
State Superseded
Headers show
Series xen+tools: Report Interrupt Controller Virtualization capabilities on x86 | expand

Commit Message

Jane Malalane Feb. 18, 2022, 5:29 p.m. UTC
Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
and x2apic, on x86 hardware.
No such features are currently implemented on AMD hardware.

For that purpose, also add an arch-specific "capabilities" parameter
to struct xen_sysctl_physinfo.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
---
CC: Wei Liu <wl@xen.org>
CC: Anthony PERARD <anthony.perard@citrix.com>
CC: Juergen Gross <jgross@suse.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: George Dunlap <george.dunlap@citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
CC: Julien Grall <julien@xen.org>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
CC: Bertrand Marquis <bertrand.marquis@arm.com>
CC: Jun Nakajima <jun.nakajima@intel.com>
CC: Kevin Tian <kevin.tian@intel.com>
CC: "Roger Pau Monné" <roger.pau@citrix.com>

v3:
 * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
   set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
 * Have assisted_x2apic_available only depend on
   cpu_has_vmx_virtualize_x2apic_mode
v2:
 * Use one macro LIBXL_HAVE_PHYSINFO_ASSISTED_APIC instead of two
 * Pass xcpyshinfo as a pointer in libxl__arch_get_physinfo
 * Set assisted_x{2}apic_available to be conditional upon "bsp" and
   annotate it with __ro_after_init
 * Change XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_X{2}APIC to
   (...)_X86_ASSISTED_X{2}APIC
 * Keep XEN_SYSCTL_PHYSCAP_X86_ASSISTED_X{2}APIC contained within
   sysctl.h
 * Fix padding introduced in struct xen_sysctl_physinfo and bump
   XEN_SYSCTL_INTERFACE_VERSION
---
 tools/golang/xenlight/helpers.gen.go |  4 ++++
 tools/golang/xenlight/types.gen.go   |  2 ++
 tools/include/libxl.h                |  7 +++++++
 tools/libs/light/libxl.c             |  3 +++
 tools/libs/light/libxl_arch.h        |  4 ++++
 tools/libs/light/libxl_arm.c         |  5 +++++
 tools/libs/light/libxl_types.idl     |  2 ++
 tools/libs/light/libxl_x86.c         | 11 +++++++++++
 tools/ocaml/libs/xc/xenctrl.ml       |  5 +++++
 tools/ocaml/libs/xc/xenctrl.mli      |  5 +++++
 tools/ocaml/libs/xc/xenctrl_stubs.c  | 14 +++++++++++---
 tools/xl/xl_info.c                   |  6 ++++--
 xen/arch/x86/hvm/vmx/vmcs.c          |  7 +++++++
 xen/arch/x86/include/asm/domain.h    |  3 +++
 xen/arch/x86/sysctl.c                |  7 +++++++
 xen/include/public/sysctl.h          | 11 ++++++++++-
 16 files changed, 90 insertions(+), 6 deletions(-)

Comments

Jan Beulich Feb. 24, 2022, 2:08 p.m. UTC | #1
On 18.02.2022 18:29, Jane Malalane wrote:
> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
> and x2apic, on x86 hardware.
> No such features are currently implemented on AMD hardware.
> 
> For that purpose, also add an arch-specific "capabilities" parameter
> to struct xen_sysctl_physinfo.
> 
> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
> ---
> v3:
>  * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
>    set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
>  * Have assisted_x2apic_available only depend on
>    cpu_has_vmx_virtualize_x2apic_mode

I understand this was the result from previous discussion, but this
needs justifying in the description. Not the least because it differs
from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
vmx_vlapic_msr_changed() does. The difference between those two is
probably intended (judging from a comment there), but the further
difference to what you add isn't obvious.

Which raises another thought: If that hypervisor leaf was part of the
HVM feature set, the tool stack could be able to obtain the wanted
information without altering sysctl (assuming the conditions to set
the respective bits were the same). And I would view it as generally
reasonable for there to be a way for tool stacks to know what
hypervisor leaves guests are going to get to see (at the maximum and
by default).

> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -35,7 +35,7 @@
>  #include "domctl.h"
>  #include "physdev.h"
>  
> -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000014
> +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000015
>  
>  /*
>   * Read console content from Xen buffer ring.
> @@ -111,6 +111,13 @@ struct xen_sysctl_tbuf_op {
>  /* Max XEN_SYSCTL_PHYSCAP_* constant.  Used for ABI checking. */
>  #define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_gnttab_v2
>  
> +/* The platform supports x{2}apic hardware assisted emulation. */
> +#define XEN_SYSCTL_PHYSCAP_X86_ASSISTED_XAPIC  (1u << 0)
> +#define XEN_SYSCTL_PHYSCAP_X86_ASSISTED_X2APIC (1u << 1)
> +
> +/* Max XEN_SYSCTL_PHYSCAP_X86{ARM}__* constant. Used for ABI checking. */
> +#define XEN_SYSCTL_PHYSCAP_ARCH_MAX XEN_SYSCTL_PHYSCAP_X86_ASSISTED_X2APIC

Doesn't this then need to be a per-arch constant? The ABIs would differ
unless we required that every bit may only be used for a single purpose.
IOW it would want to be named XEN_SYSCTL_PHYSCAP_X86_MAX.

> @@ -120,6 +127,8 @@ struct xen_sysctl_physinfo {
>      uint32_t max_node_id; /* Largest possible node ID on this host */
>      uint32_t cpu_khz;
>      uint32_t capabilities;/* XEN_SYSCTL_PHYSCAP_??? */
> +    uint32_t arch_capabilities;/* XEN_SYSCTL_PHYSCAP_X86{ARM}_??? */
> +    uint32_t pad; /* Must be zero. */

If this was an input field (or could potentially become one), the
comment would be applicable. But the whole struct is OUT-only, so
either omit the comment or use e.g. "will" or better "reserved" (as
people shouldn't make themselves dependent on the field being zero).

Jan
Anthony PERARD Feb. 25, 2022, 1:08 p.m. UTC | #2
On Fri, Feb 18, 2022 at 05:29:42PM +0000, Jane Malalane wrote:
> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
> and x2apic, on x86 hardware.
> No such features are currently implemented on AMD hardware.
> 
> For that purpose, also add an arch-specific "capabilities" parameter
> to struct xen_sysctl_physinfo.
> 
> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
> ---
> diff --git a/tools/include/libxl.h b/tools/include/libxl.h
> index 51a9b6cfac..333ffad38d 100644
> --- a/tools/include/libxl.h
> +++ b/tools/include/libxl.h
> @@ -528,6 +528,13 @@
>  #define LIBXL_HAVE_MAX_GRANT_VERSION 1
>  
>  /*
> + * LIBXL_HAVE_PHYSINFO_ASSISTED_APIC indicates that libxl_physinfo has
> + * cap_assisted_x{2}apic fields, which indicates the availability of x{2}APIC

I think I'd rather have both cap_assisted_xapic and cap_assisted_x2apic
spelled out in the comment as that would allow to grep for both string.

> + * hardware assisted virtualization.
> + */
> +#define LIBXL_HAVE_PHYSINFO_ASSISTED_APIC 1


Otherwise, tools/ side looks good.

Thanks,
Jane Malalane Feb. 25, 2022, 4:02 p.m. UTC | #3
On 24/02/2022 14:08, Jan Beulich wrote:
> [CAUTION - EXTERNAL EMAIL] DO NOT reply, click links, or open attachments unless you have verified the sender and know the content is safe.
> 
> On 18.02.2022 18:29, Jane Malalane wrote:
>> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
>> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
>> and x2apic, on x86 hardware.
>> No such features are currently implemented on AMD hardware.
>>
>> For that purpose, also add an arch-specific "capabilities" parameter
>> to struct xen_sysctl_physinfo.
>>
>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
>> ---
>> v3:
>>   * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
>>     set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
>>   * Have assisted_x2apic_available only depend on
>>     cpu_has_vmx_virtualize_x2apic_mode
> 
> I understand this was the result from previous discussion, but this
> needs justifying in the description. Not the least because it differs
> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
> vmx_vlapic_msr_changed() does. The difference between those two is
> probably intended (judging from a comment there), but the further
> difference to what you add isn't obvious.

Okay, I will make that explicit.

> Which raises another thought: If that hypervisor leaf was part of the
> HVM feature set, the tool stack could be able to obtain the wanted
> information without altering sysctl (assuming the conditions to set
> the respective bits were the same). And I would view it as generally
> reasonable for there to be a way for tool stacks to know what
> hypervisor leaves guests are going to get to see (at the maximum and
> by default).

Like the "cpuid" xtf test allows us to?
Makes sense to me. I'm happy to take that up after.

> 
>> --- a/xen/include/public/sysctl.h
>> +++ b/xen/include/public/sysctl.h
>> @@ -35,7 +35,7 @@
>>   #include "domctl.h"
>>   #include "physdev.h"
>>   
>> -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000014
>> +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000015
>>   
>>   /*
>>    * Read console content from Xen buffer ring.
>> @@ -111,6 +111,13 @@ struct xen_sysctl_tbuf_op {
>>   /* Max XEN_SYSCTL_PHYSCAP_* constant.  Used for ABI checking. */
>>   #define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_gnttab_v2
>>   
>> +/* The platform supports x{2}apic hardware assisted emulation. */
>> +#define XEN_SYSCTL_PHYSCAP_X86_ASSISTED_XAPIC  (1u << 0)
>> +#define XEN_SYSCTL_PHYSCAP_X86_ASSISTED_X2APIC (1u << 1)
>> +
>> +/* Max XEN_SYSCTL_PHYSCAP_X86{ARM}__* constant. Used for ABI checking. */
>> +#define XEN_SYSCTL_PHYSCAP_ARCH_MAX XEN_SYSCTL_PHYSCAP_X86_ASSISTED_X2APIC
> 
> Doesn't this then need to be a per-arch constant? The ABIs would differ
> unless we required that every bit may only be used for a single purpose.
> IOW it would want to be named XEN_SYSCTL_PHYSCAP_X86_MAX.

Okay.

> 
>> @@ -120,6 +127,8 @@ struct xen_sysctl_physinfo {
>>       uint32_t max_node_id; /* Largest possible node ID on this host */
>>       uint32_t cpu_khz;
>>       uint32_t capabilities;/* XEN_SYSCTL_PHYSCAP_??? */
>> +    uint32_t arch_capabilities;/* XEN_SYSCTL_PHYSCAP_X86{ARM}_??? */
>> +    uint32_t pad; /* Must be zero. */
> 
> If this was an input field (or could potentially become one), the
> comment would be applicable. But the whole struct is OUT-only, so
> either omit the comment or use e.g. "will" or better "reserved" (as
> people shouldn't make themselves dependent on the field being zero).

Will ommit.

Thank you,

Jane.
Jan Beulich Feb. 28, 2022, 7:32 a.m. UTC | #4
On 25.02.2022 17:02, Jane Malalane wrote:
> On 24/02/2022 14:08, Jan Beulich wrote:
>> On 18.02.2022 18:29, Jane Malalane wrote:
>>> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
>>> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
>>> and x2apic, on x86 hardware.
>>> No such features are currently implemented on AMD hardware.
>>>
>>> For that purpose, also add an arch-specific "capabilities" parameter
>>> to struct xen_sysctl_physinfo.
>>>
>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
>>> ---
>>> v3:
>>>   * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
>>>     set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
>>>   * Have assisted_x2apic_available only depend on
>>>     cpu_has_vmx_virtualize_x2apic_mode
>>
>> I understand this was the result from previous discussion, but this
>> needs justifying in the description. Not the least because it differs
>> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
>> vmx_vlapic_msr_changed() does. The difference between those two is
>> probably intended (judging from a comment there), but the further
>> difference to what you add isn't obvious.
> 
> Okay, I will make that explicit.
> 
>> Which raises another thought: If that hypervisor leaf was part of the
>> HVM feature set, the tool stack could be able to obtain the wanted
>> information without altering sysctl (assuming the conditions to set
>> the respective bits were the same). And I would view it as generally
>> reasonable for there to be a way for tool stacks to know what
>> hypervisor leaves guests are going to get to see (at the maximum and
>> by default).
> 
> Like the "cpuid" xtf test allows us to?

I don't think I understand the question. That xtf test is concerned
about checking the CPUID output it gets to see itself. It doesn't care
about what other guests might get to see, nor the maximum and default.

> Makes sense to me. I'm happy to take that up after.

"After" what?

Jan
Roger Pau Monné Feb. 28, 2022, 10:59 a.m. UTC | #5
On Thu, Feb 24, 2022 at 03:08:41PM +0100, Jan Beulich wrote:
> On 18.02.2022 18:29, Jane Malalane wrote:
> > Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
> > XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
> > and x2apic, on x86 hardware.
> > No such features are currently implemented on AMD hardware.
> > 
> > For that purpose, also add an arch-specific "capabilities" parameter
> > to struct xen_sysctl_physinfo.
> > 
> > Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> > Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
> > ---
> > v3:
> >  * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
> >    set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
> >  * Have assisted_x2apic_available only depend on
> >    cpu_has_vmx_virtualize_x2apic_mode
> 
> I understand this was the result from previous discussion, but this
> needs justifying in the description. Not the least because it differs
> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
> vmx_vlapic_msr_changed() does. The difference between those two is
> probably intended (judging from a comment there), but the further
> difference to what you add isn't obvious.
> 
> Which raises another thought: If that hypervisor leaf was part of the
> HVM feature set, the tool stack could be able to obtain the wanted
> information without altering sysctl (assuming the conditions to set
> the respective bits were the same). And I would view it as generally
> reasonable for there to be a way for tool stacks to know what
> hypervisor leaves guests are going to get to see (at the maximum and
> by default).

I'm not sure using CPUID would be appropriate for this. Those fields
are supposed to be used by a guest to decide whether it should prefer
the x{2}APIC over PV alternatives for certain operations (ie: IPIs for
example), but the level of control we can provide with the sysctl is
more fine grained.

The current proposal is limited to the exposure and control of the
usage of APIC virtualization, but we could also expose availability
and per-domain enablement of APIC register virtualization and posted
interrupts.

Thanks, Roger.
Jane Malalane Feb. 28, 2022, 12:09 p.m. UTC | #6
On 28/02/2022 07:32, Jan Beulich wrote:
> [CAUTION - EXTERNAL EMAIL] DO NOT reply, click links, or open attachments unless you have verified the sender and know the content is safe.
> 
> On 25.02.2022 17:02, Jane Malalane wrote:
>> On 24/02/2022 14:08, Jan Beulich wrote:
>>> On 18.02.2022 18:29, Jane Malalane wrote:
>>>> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
>>>> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
>>>> and x2apic, on x86 hardware.
>>>> No such features are currently implemented on AMD hardware.
>>>>
>>>> For that purpose, also add an arch-specific "capabilities" parameter
>>>> to struct xen_sysctl_physinfo.
>>>>
>>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>>> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
>>>> ---
>>>> v3:
>>>>    * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
>>>>      set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
>>>>    * Have assisted_x2apic_available only depend on
>>>>      cpu_has_vmx_virtualize_x2apic_mode
>>>
>>> I understand this was the result from previous discussion, but this
>>> needs justifying in the description. Not the least because it differs
>>> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
>>> vmx_vlapic_msr_changed() does. The difference between those two is
>>> probably intended (judging from a comment there), but the further
>>> difference to what you add isn't obvious.
>>
>> Okay, I will make that explicit.
>>
>>> Which raises another thought: If that hypervisor leaf was part of the
>>> HVM feature set, the tool stack could be able to obtain the wanted
>>> information without altering sysctl (assuming the conditions to set
>>> the respective bits were the same). And I would view it as generally
>>> reasonable for there to be a way for tool stacks to know what
>>> hypervisor leaves guests are going to get to see (at the maximum and
>>> by default).
>>
>> Like the "cpuid" xtf test allows us to?
> 
> I don't think I understand the question. That xtf test is concerned
> about checking the CPUID output it gets to see itself. It doesn't care
> about what other guests might get to see, nor the maximum and default.
> 
>> Makes sense to me. I'm happy to take that up after.
> 
> "After" what?
So I meant to say that I could add the Xen CPUID leaves (40000x...) to 
the policy so that toolstacks could know what hypervisor leaves guests 
are going to see - in a future patch, as this wouldn't just expose 
XEN_HVM_CPUID_APIC_ACCESS_VIRT and XEN_HVM_CPUID_X2APIC_VIRT 
(0x40000x04) but other features too.

But, at the same time, w.r.t. this patch in particular, using 
XEN_HVM_CPUID_APIC_ACCESS_VIRT and XEN_HVM_CPUID_X2APIC_VIRT to detect 
assisted APIC gives us less flexibility to add more fine grained 
controls in the future.

Thanks,

Jane.
Jane Malalane Feb. 28, 2022, 12:12 p.m. UTC | #7
On 28/02/2022 07:32, Jan Beulich wrote:
> [CAUTION - EXTERNAL EMAIL] DO NOT reply, click links, or open attachments unless you have verified the sender and know the content is safe.
> 
> On 25.02.2022 17:02, Jane Malalane wrote:
>> On 24/02/2022 14:08, Jan Beulich wrote:
>>> On 18.02.2022 18:29, Jane Malalane wrote:
>>>> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
>>>> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
>>>> and x2apic, on x86 hardware.
>>>> No such features are currently implemented on AMD hardware.
>>>>
>>>> For that purpose, also add an arch-specific "capabilities" parameter
>>>> to struct xen_sysctl_physinfo.
>>>>
>>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>>> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
>>>> ---
>>>> v3:
>>>>    * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
>>>>      set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
>>>>    * Have assisted_x2apic_available only depend on
>>>>      cpu_has_vmx_virtualize_x2apic_mode
>>>
>>> I understand this was the result from previous discussion, but this
>>> needs justifying in the description. Not the least because it differs
>>> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
>>> vmx_vlapic_msr_changed() does. The difference between those two is
>>> probably intended (judging from a comment there), but the further
>>> difference to what you add isn't obvious.
>>
>> Okay, I will make that explicit.
>>
>>> Which raises another thought: If that hypervisor leaf was part of the
>>> HVM feature set, the tool stack could be able to obtain the wanted
>>> information without altering sysctl (assuming the conditions to set
>>> the respective bits were the same). And I would view it as generally
>>> reasonable for there to be a way for tool stacks to know what
>>> hypervisor leaves guests are going to get to see (at the maximum and
>>> by default).
>>
>> Like the "cpuid" xtf test allows us to?
> 
> I don't think I understand the question. That xtf test is concerned
> about checking the CPUID output it gets to see itself. It doesn't care
> about what other guests might get to see, nor the maximum and default.
> 
>> Makes sense to me. I'm happy to take that up after.
> 
> "After" what?
So I meant to say that I could add the Xen CPUID leaves (40000x...) to 
the policy so that toolstacks could know what hypervisor leaves guests 
are going to see - in a future patch, as this wouldn't just expose 
XEN_HVM_CPUID_APIC_ACCESS_VIRT and XEN_HVM_CPUID_X2APIC_VIRT 
(0x40000x04) but other features too.

But, at the same time, w.r.t. this patch in particular, using 
XEN_HVM_CPUID_APIC_ACCESS_VIRT and XEN_HVM_CPUID_X2APIC_VIRT to detect 
assisted APIC gives us less flexibility to add more fine grained 
controls in the future.

Thanks,

Jane.
Jan Beulich Feb. 28, 2022, 1:07 p.m. UTC | #8
On 28.02.2022 13:09, Jane Malalane wrote:
> On 28/02/2022 07:32, Jan Beulich wrote:
>> [CAUTION - EXTERNAL EMAIL] DO NOT reply, click links, or open attachments unless you have verified the sender and know the content is safe.
>>
>> On 25.02.2022 17:02, Jane Malalane wrote:
>>> On 24/02/2022 14:08, Jan Beulich wrote:
>>>> On 18.02.2022 18:29, Jane Malalane wrote:
>>>>> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
>>>>> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
>>>>> and x2apic, on x86 hardware.
>>>>> No such features are currently implemented on AMD hardware.
>>>>>
>>>>> For that purpose, also add an arch-specific "capabilities" parameter
>>>>> to struct xen_sysctl_physinfo.
>>>>>
>>>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>>>> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
>>>>> ---
>>>>> v3:
>>>>>    * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
>>>>>      set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
>>>>>    * Have assisted_x2apic_available only depend on
>>>>>      cpu_has_vmx_virtualize_x2apic_mode
>>>>
>>>> I understand this was the result from previous discussion, but this
>>>> needs justifying in the description. Not the least because it differs
>>>> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
>>>> vmx_vlapic_msr_changed() does. The difference between those two is
>>>> probably intended (judging from a comment there), but the further
>>>> difference to what you add isn't obvious.
>>>
>>> Okay, I will make that explicit.
>>>
>>>> Which raises another thought: If that hypervisor leaf was part of the
>>>> HVM feature set, the tool stack could be able to obtain the wanted
>>>> information without altering sysctl (assuming the conditions to set
>>>> the respective bits were the same). And I would view it as generally
>>>> reasonable for there to be a way for tool stacks to know what
>>>> hypervisor leaves guests are going to get to see (at the maximum and
>>>> by default).
>>>
>>> Like the "cpuid" xtf test allows us to?
>>
>> I don't think I understand the question. That xtf test is concerned
>> about checking the CPUID output it gets to see itself. It doesn't care
>> about what other guests might get to see, nor the maximum and default.
>>
>>> Makes sense to me. I'm happy to take that up after.
>>
>> "After" what?
> So I meant to say that I could add the Xen CPUID leaves (40000x...) to 
> the policy so that toolstacks could know what hypervisor leaves guests 
> are going to see - in a future patch, as this wouldn't just expose 
> XEN_HVM_CPUID_APIC_ACCESS_VIRT and XEN_HVM_CPUID_X2APIC_VIRT 
> (0x40000x04) but other features too.

But doing this in a future patch (i.e. subsequent to this one) would
mean to first introduce the sysctl just to then rip it out again.
Hence my desire to consider the alternative before we settle on the
sysctl.

> But, at the same time, w.r.t. this patch in particular, using 
> XEN_HVM_CPUID_APIC_ACCESS_VIRT and XEN_HVM_CPUID_X2APIC_VIRT to detect 
> assisted APIC gives us less flexibility to add more fine grained 
> controls in the future.

I'm afraid I can't follow: All I'm talking about is how to expose the
same kind of information to the tool stack. I don't see how the
mechanism chosen would limit flexibility going forward.

Jan
Jan Beulich Feb. 28, 2022, 1:11 p.m. UTC | #9
On 28.02.2022 11:59, Roger Pau Monné wrote:
> On Thu, Feb 24, 2022 at 03:08:41PM +0100, Jan Beulich wrote:
>> On 18.02.2022 18:29, Jane Malalane wrote:
>>> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
>>> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
>>> and x2apic, on x86 hardware.
>>> No such features are currently implemented on AMD hardware.
>>>
>>> For that purpose, also add an arch-specific "capabilities" parameter
>>> to struct xen_sysctl_physinfo.
>>>
>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
>>> ---
>>> v3:
>>>  * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
>>>    set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
>>>  * Have assisted_x2apic_available only depend on
>>>    cpu_has_vmx_virtualize_x2apic_mode
>>
>> I understand this was the result from previous discussion, but this
>> needs justifying in the description. Not the least because it differs
>> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
>> vmx_vlapic_msr_changed() does. The difference between those two is
>> probably intended (judging from a comment there), but the further
>> difference to what you add isn't obvious.
>>
>> Which raises another thought: If that hypervisor leaf was part of the
>> HVM feature set, the tool stack could be able to obtain the wanted
>> information without altering sysctl (assuming the conditions to set
>> the respective bits were the same). And I would view it as generally
>> reasonable for there to be a way for tool stacks to know what
>> hypervisor leaves guests are going to get to see (at the maximum and
>> by default).
> 
> I'm not sure using CPUID would be appropriate for this. Those fields
> are supposed to be used by a guest to decide whether it should prefer
> the x{2}APIC over PV alternatives for certain operations (ie: IPIs for
> example), but the level of control we can provide with the sysctl is
> more fine grained.
> 
> The current proposal is limited to the exposure and control of the
> usage of APIC virtualization, but we could also expose availability
> and per-domain enablement of APIC register virtualization and posted
> interrupts.

But then I would still like to avoid duplication of information
exposure and expose through the featureset what can be exposed there
and limit sysctl to what cannot be expressed otherwise.

Jan
Roger Pau Monné Feb. 28, 2022, 3:36 p.m. UTC | #10
On Mon, Feb 28, 2022 at 02:11:04PM +0100, Jan Beulich wrote:
> On 28.02.2022 11:59, Roger Pau Monné wrote:
> > On Thu, Feb 24, 2022 at 03:08:41PM +0100, Jan Beulich wrote:
> >> On 18.02.2022 18:29, Jane Malalane wrote:
> >>> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
> >>> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
> >>> and x2apic, on x86 hardware.
> >>> No such features are currently implemented on AMD hardware.
> >>>
> >>> For that purpose, also add an arch-specific "capabilities" parameter
> >>> to struct xen_sysctl_physinfo.
> >>>
> >>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> >>> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
> >>> ---
> >>> v3:
> >>>  * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
> >>>    set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
> >>>  * Have assisted_x2apic_available only depend on
> >>>    cpu_has_vmx_virtualize_x2apic_mode
> >>
> >> I understand this was the result from previous discussion, but this
> >> needs justifying in the description. Not the least because it differs
> >> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
> >> vmx_vlapic_msr_changed() does. The difference between those two is
> >> probably intended (judging from a comment there), but the further
> >> difference to what you add isn't obvious.
> >>
> >> Which raises another thought: If that hypervisor leaf was part of the
> >> HVM feature set, the tool stack could be able to obtain the wanted
> >> information without altering sysctl (assuming the conditions to set
> >> the respective bits were the same). And I would view it as generally
> >> reasonable for there to be a way for tool stacks to know what
> >> hypervisor leaves guests are going to get to see (at the maximum and
> >> by default).
> > 
> > I'm not sure using CPUID would be appropriate for this. Those fields
> > are supposed to be used by a guest to decide whether it should prefer
> > the x{2}APIC over PV alternatives for certain operations (ie: IPIs for
> > example), but the level of control we can provide with the sysctl is
> > more fine grained.
> > 
> > The current proposal is limited to the exposure and control of the
> > usage of APIC virtualization, but we could also expose availability
> > and per-domain enablement of APIC register virtualization and posted
> > interrupts.
> 
> But then I would still like to avoid duplication of information
> exposure and expose through the featureset what can be exposed there
> and limit sysctl to what cannot be expressed otherwise.

So you would rather prefer to expose this information in a synthetic
CPUID leaf?

I assume the duplication of information will depend on what we end up
exposing with the sysctl interface, whether it's just support for
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES and
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE or there's more to it.

Thanks, Roger.
Jan Beulich Feb. 28, 2022, 4:14 p.m. UTC | #11
On 28.02.2022 16:36, Roger Pau Monné wrote:
> On Mon, Feb 28, 2022 at 02:11:04PM +0100, Jan Beulich wrote:
>> On 28.02.2022 11:59, Roger Pau Monné wrote:
>>> On Thu, Feb 24, 2022 at 03:08:41PM +0100, Jan Beulich wrote:
>>>> On 18.02.2022 18:29, Jane Malalane wrote:
>>>>> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
>>>>> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
>>>>> and x2apic, on x86 hardware.
>>>>> No such features are currently implemented on AMD hardware.
>>>>>
>>>>> For that purpose, also add an arch-specific "capabilities" parameter
>>>>> to struct xen_sysctl_physinfo.
>>>>>
>>>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>>>> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
>>>>> ---
>>>>> v3:
>>>>>  * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
>>>>>    set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
>>>>>  * Have assisted_x2apic_available only depend on
>>>>>    cpu_has_vmx_virtualize_x2apic_mode
>>>>
>>>> I understand this was the result from previous discussion, but this
>>>> needs justifying in the description. Not the least because it differs
>>>> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
>>>> vmx_vlapic_msr_changed() does. The difference between those two is
>>>> probably intended (judging from a comment there), but the further
>>>> difference to what you add isn't obvious.
>>>>
>>>> Which raises another thought: If that hypervisor leaf was part of the
>>>> HVM feature set, the tool stack could be able to obtain the wanted
>>>> information without altering sysctl (assuming the conditions to set
>>>> the respective bits were the same). And I would view it as generally
>>>> reasonable for there to be a way for tool stacks to know what
>>>> hypervisor leaves guests are going to get to see (at the maximum and
>>>> by default).
>>>
>>> I'm not sure using CPUID would be appropriate for this. Those fields
>>> are supposed to be used by a guest to decide whether it should prefer
>>> the x{2}APIC over PV alternatives for certain operations (ie: IPIs for
>>> example), but the level of control we can provide with the sysctl is
>>> more fine grained.
>>>
>>> The current proposal is limited to the exposure and control of the
>>> usage of APIC virtualization, but we could also expose availability
>>> and per-domain enablement of APIC register virtualization and posted
>>> interrupts.
>>
>> But then I would still like to avoid duplication of information
>> exposure and expose through the featureset what can be exposed there
>> and limit sysctl to what cannot be expressed otherwise.
> 
> So you would rather prefer to expose this information in a synthetic
> CPUID leaf?

Depends on what you mean by "synthetic leaf". We already have a leaf.
What I'm suggesting to consider to the give that hypervisor leaf a
representation in the featureset.

Jan
Roger Pau Monné Feb. 28, 2022, 4:31 p.m. UTC | #12
On Mon, Feb 28, 2022 at 05:14:26PM +0100, Jan Beulich wrote:
> On 28.02.2022 16:36, Roger Pau Monné wrote:
> > On Mon, Feb 28, 2022 at 02:11:04PM +0100, Jan Beulich wrote:
> >> On 28.02.2022 11:59, Roger Pau Monné wrote:
> >>> On Thu, Feb 24, 2022 at 03:08:41PM +0100, Jan Beulich wrote:
> >>>> On 18.02.2022 18:29, Jane Malalane wrote:
> >>>>> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
> >>>>> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
> >>>>> and x2apic, on x86 hardware.
> >>>>> No such features are currently implemented on AMD hardware.
> >>>>>
> >>>>> For that purpose, also add an arch-specific "capabilities" parameter
> >>>>> to struct xen_sysctl_physinfo.
> >>>>>
> >>>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> >>>>> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
> >>>>> ---
> >>>>> v3:
> >>>>>  * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
> >>>>>    set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
> >>>>>  * Have assisted_x2apic_available only depend on
> >>>>>    cpu_has_vmx_virtualize_x2apic_mode
> >>>>
> >>>> I understand this was the result from previous discussion, but this
> >>>> needs justifying in the description. Not the least because it differs
> >>>> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
> >>>> vmx_vlapic_msr_changed() does. The difference between those two is
> >>>> probably intended (judging from a comment there), but the further
> >>>> difference to what you add isn't obvious.
> >>>>
> >>>> Which raises another thought: If that hypervisor leaf was part of the
> >>>> HVM feature set, the tool stack could be able to obtain the wanted
> >>>> information without altering sysctl (assuming the conditions to set
> >>>> the respective bits were the same). And I would view it as generally
> >>>> reasonable for there to be a way for tool stacks to know what
> >>>> hypervisor leaves guests are going to get to see (at the maximum and
> >>>> by default).
> >>>
> >>> I'm not sure using CPUID would be appropriate for this. Those fields
> >>> are supposed to be used by a guest to decide whether it should prefer
> >>> the x{2}APIC over PV alternatives for certain operations (ie: IPIs for
> >>> example), but the level of control we can provide with the sysctl is
> >>> more fine grained.
> >>>
> >>> The current proposal is limited to the exposure and control of the
> >>> usage of APIC virtualization, but we could also expose availability
> >>> and per-domain enablement of APIC register virtualization and posted
> >>> interrupts.
> >>
> >> But then I would still like to avoid duplication of information
> >> exposure and expose through the featureset what can be exposed there
> >> and limit sysctl to what cannot be expressed otherwise.
> > 
> > So you would rather prefer to expose this information in a synthetic
> > CPUID leaf?
> 
> Depends on what you mean by "synthetic leaf". We already have a leaf.
> What I'm suggesting to consider to the give that hypervisor leaf a
> representation in the featureset.

Hm, but then we won't be able to expose more fine grained controls,
ie: separate between basic APIC virtualization support, APIC register
virtualization and interrupt virtualization. We would need to keep the
meaning of XEN_HVM_CPUID_APIC_ACCESS_VIRT / XEN_HVM_CPUID_X2APIC_VIRT
(and exposing more fine grained features to guests make no sense).

Thanks, Roger.
Jan Beulich March 1, 2022, 7:51 a.m. UTC | #13
On 28.02.2022 17:31, Roger Pau Monné wrote:
> On Mon, Feb 28, 2022 at 05:14:26PM +0100, Jan Beulich wrote:
>> On 28.02.2022 16:36, Roger Pau Monné wrote:
>>> On Mon, Feb 28, 2022 at 02:11:04PM +0100, Jan Beulich wrote:
>>>> On 28.02.2022 11:59, Roger Pau Monné wrote:
>>>>> On Thu, Feb 24, 2022 at 03:08:41PM +0100, Jan Beulich wrote:
>>>>>> On 18.02.2022 18:29, Jane Malalane wrote:
>>>>>>> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
>>>>>>> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
>>>>>>> and x2apic, on x86 hardware.
>>>>>>> No such features are currently implemented on AMD hardware.
>>>>>>>
>>>>>>> For that purpose, also add an arch-specific "capabilities" parameter
>>>>>>> to struct xen_sysctl_physinfo.
>>>>>>>
>>>>>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>>>>>> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
>>>>>>> ---
>>>>>>> v3:
>>>>>>>  * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
>>>>>>>    set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
>>>>>>>  * Have assisted_x2apic_available only depend on
>>>>>>>    cpu_has_vmx_virtualize_x2apic_mode
>>>>>>
>>>>>> I understand this was the result from previous discussion, but this
>>>>>> needs justifying in the description. Not the least because it differs
>>>>>> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
>>>>>> vmx_vlapic_msr_changed() does. The difference between those two is
>>>>>> probably intended (judging from a comment there), but the further
>>>>>> difference to what you add isn't obvious.
>>>>>>
>>>>>> Which raises another thought: If that hypervisor leaf was part of the
>>>>>> HVM feature set, the tool stack could be able to obtain the wanted
>>>>>> information without altering sysctl (assuming the conditions to set
>>>>>> the respective bits were the same). And I would view it as generally
>>>>>> reasonable for there to be a way for tool stacks to know what
>>>>>> hypervisor leaves guests are going to get to see (at the maximum and
>>>>>> by default).
>>>>>
>>>>> I'm not sure using CPUID would be appropriate for this. Those fields
>>>>> are supposed to be used by a guest to decide whether it should prefer
>>>>> the x{2}APIC over PV alternatives for certain operations (ie: IPIs for
>>>>> example), but the level of control we can provide with the sysctl is
>>>>> more fine grained.
>>>>>
>>>>> The current proposal is limited to the exposure and control of the
>>>>> usage of APIC virtualization, but we could also expose availability
>>>>> and per-domain enablement of APIC register virtualization and posted
>>>>> interrupts.
>>>>
>>>> But then I would still like to avoid duplication of information
>>>> exposure and expose through the featureset what can be exposed there
>>>> and limit sysctl to what cannot be expressed otherwise.
>>>
>>> So you would rather prefer to expose this information in a synthetic
>>> CPUID leaf?
>>
>> Depends on what you mean by "synthetic leaf". We already have a leaf.
>> What I'm suggesting to consider to the give that hypervisor leaf a
>> representation in the featureset.
> 
> Hm, but then we won't be able to expose more fine grained controls,
> ie: separate between basic APIC virtualization support, APIC register
> virtualization and interrupt virtualization. We would need to keep the
> meaning of XEN_HVM_CPUID_APIC_ACCESS_VIRT / XEN_HVM_CPUID_X2APIC_VIRT
> (and exposing more fine grained features to guests make no sense).

I did say before that once (if ever) finer grained controls are wanted,
a sysctl like suggested would indeed look to be the way to report the
capability. But we aren't at that point.

Jan
Roger Pau Monné March 1, 2022, 2:19 p.m. UTC | #14
On Tue, Mar 01, 2022 at 08:51:59AM +0100, Jan Beulich wrote:
> On 28.02.2022 17:31, Roger Pau Monné wrote:
> > On Mon, Feb 28, 2022 at 05:14:26PM +0100, Jan Beulich wrote:
> >> On 28.02.2022 16:36, Roger Pau Monné wrote:
> >>> On Mon, Feb 28, 2022 at 02:11:04PM +0100, Jan Beulich wrote:
> >>>> On 28.02.2022 11:59, Roger Pau Monné wrote:
> >>>>> On Thu, Feb 24, 2022 at 03:08:41PM +0100, Jan Beulich wrote:
> >>>>>> On 18.02.2022 18:29, Jane Malalane wrote:
> >>>>>>> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
> >>>>>>> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
> >>>>>>> and x2apic, on x86 hardware.
> >>>>>>> No such features are currently implemented on AMD hardware.
> >>>>>>>
> >>>>>>> For that purpose, also add an arch-specific "capabilities" parameter
> >>>>>>> to struct xen_sysctl_physinfo.
> >>>>>>>
> >>>>>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> >>>>>>> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
> >>>>>>> ---
> >>>>>>> v3:
> >>>>>>>  * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
> >>>>>>>    set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
> >>>>>>>  * Have assisted_x2apic_available only depend on
> >>>>>>>    cpu_has_vmx_virtualize_x2apic_mode
> >>>>>>
> >>>>>> I understand this was the result from previous discussion, but this
> >>>>>> needs justifying in the description. Not the least because it differs
> >>>>>> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
> >>>>>> vmx_vlapic_msr_changed() does. The difference between those two is
> >>>>>> probably intended (judging from a comment there), but the further
> >>>>>> difference to what you add isn't obvious.
> >>>>>>
> >>>>>> Which raises another thought: If that hypervisor leaf was part of the
> >>>>>> HVM feature set, the tool stack could be able to obtain the wanted
> >>>>>> information without altering sysctl (assuming the conditions to set
> >>>>>> the respective bits were the same). And I would view it as generally
> >>>>>> reasonable for there to be a way for tool stacks to know what
> >>>>>> hypervisor leaves guests are going to get to see (at the maximum and
> >>>>>> by default).
> >>>>>
> >>>>> I'm not sure using CPUID would be appropriate for this. Those fields
> >>>>> are supposed to be used by a guest to decide whether it should prefer
> >>>>> the x{2}APIC over PV alternatives for certain operations (ie: IPIs for
> >>>>> example), but the level of control we can provide with the sysctl is
> >>>>> more fine grained.
> >>>>>
> >>>>> The current proposal is limited to the exposure and control of the
> >>>>> usage of APIC virtualization, but we could also expose availability
> >>>>> and per-domain enablement of APIC register virtualization and posted
> >>>>> interrupts.
> >>>>
> >>>> But then I would still like to avoid duplication of information
> >>>> exposure and expose through the featureset what can be exposed there
> >>>> and limit sysctl to what cannot be expressed otherwise.
> >>>
> >>> So you would rather prefer to expose this information in a synthetic
> >>> CPUID leaf?
> >>
> >> Depends on what you mean by "synthetic leaf". We already have a leaf.
> >> What I'm suggesting to consider to the give that hypervisor leaf a
> >> representation in the featureset.
> > 
> > Hm, but then we won't be able to expose more fine grained controls,
> > ie: separate between basic APIC virtualization support, APIC register
> > virtualization and interrupt virtualization. We would need to keep the
> > meaning of XEN_HVM_CPUID_APIC_ACCESS_VIRT / XEN_HVM_CPUID_X2APIC_VIRT
> > (and exposing more fine grained features to guests make no sense).
> 
> I did say before that once (if ever) finer grained controls are wanted,
> a sysctl like suggested would indeed look to be the way to report the
> capability. But we aren't at that point.

So we would expose everything in the 0x40000000 range, and caller
would figure out the position of the Xen leaves doing a signature
check until the Xen leaf is found?

Would we represent the max policy as having Viridian enabled (so Xen
leaves starting at 0x40000100)?

I would agree with this if the hypervisor leaves where already part of
the managed CPUID data, but the work required to expose the leaves in
the policy/featureset doesn't seem trivial. Making those leaves part
of the policy will likely be done at some point, and then we can
decide to drop the sysctl bits.

Thanks, Roger.
Jan Beulich March 1, 2022, 2:40 p.m. UTC | #15
On 01.03.2022 15:19, Roger Pau Monné wrote:
> On Tue, Mar 01, 2022 at 08:51:59AM +0100, Jan Beulich wrote:
>> On 28.02.2022 17:31, Roger Pau Monné wrote:
>>> On Mon, Feb 28, 2022 at 05:14:26PM +0100, Jan Beulich wrote:
>>>> On 28.02.2022 16:36, Roger Pau Monné wrote:
>>>>> On Mon, Feb 28, 2022 at 02:11:04PM +0100, Jan Beulich wrote:
>>>>>> On 28.02.2022 11:59, Roger Pau Monné wrote:
>>>>>>> On Thu, Feb 24, 2022 at 03:08:41PM +0100, Jan Beulich wrote:
>>>>>>>> On 18.02.2022 18:29, Jane Malalane wrote:
>>>>>>>>> Add XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_xapic and
>>>>>>>>> XEN_SYSCTL_PHYSCAP_ARCH_ASSISTED_x2apic to report accelerated xapic
>>>>>>>>> and x2apic, on x86 hardware.
>>>>>>>>> No such features are currently implemented on AMD hardware.
>>>>>>>>>
>>>>>>>>> For that purpose, also add an arch-specific "capabilities" parameter
>>>>>>>>> to struct xen_sysctl_physinfo.
>>>>>>>>>
>>>>>>>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>>>>>>>> Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
>>>>>>>>> ---
>>>>>>>>> v3:
>>>>>>>>>  * Define XEN_SYSCTL_PHYSCAP_ARCH_MAX for ABI checking and actually
>>>>>>>>>    set arch_capbilities, via a call to c_bitmap_to_ocaml_list()
>>>>>>>>>  * Have assisted_x2apic_available only depend on
>>>>>>>>>    cpu_has_vmx_virtualize_x2apic_mode
>>>>>>>>
>>>>>>>> I understand this was the result from previous discussion, but this
>>>>>>>> needs justifying in the description. Not the least because it differs
>>>>>>>> from when XEN_HVM_CPUID_X2APIC_VIRT would be set as well as from what
>>>>>>>> vmx_vlapic_msr_changed() does. The difference between those two is
>>>>>>>> probably intended (judging from a comment there), but the further
>>>>>>>> difference to what you add isn't obvious.
>>>>>>>>
>>>>>>>> Which raises another thought: If that hypervisor leaf was part of the
>>>>>>>> HVM feature set, the tool stack could be able to obtain the wanted
>>>>>>>> information without altering sysctl (assuming the conditions to set
>>>>>>>> the respective bits were the same). And I would view it as generally
>>>>>>>> reasonable for there to be a way for tool stacks to know what
>>>>>>>> hypervisor leaves guests are going to get to see (at the maximum and
>>>>>>>> by default).
>>>>>>>
>>>>>>> I'm not sure using CPUID would be appropriate for this. Those fields
>>>>>>> are supposed to be used by a guest to decide whether it should prefer
>>>>>>> the x{2}APIC over PV alternatives for certain operations (ie: IPIs for
>>>>>>> example), but the level of control we can provide with the sysctl is
>>>>>>> more fine grained.
>>>>>>>
>>>>>>> The current proposal is limited to the exposure and control of the
>>>>>>> usage of APIC virtualization, but we could also expose availability
>>>>>>> and per-domain enablement of APIC register virtualization and posted
>>>>>>> interrupts.
>>>>>>
>>>>>> But then I would still like to avoid duplication of information
>>>>>> exposure and expose through the featureset what can be exposed there
>>>>>> and limit sysctl to what cannot be expressed otherwise.
>>>>>
>>>>> So you would rather prefer to expose this information in a synthetic
>>>>> CPUID leaf?
>>>>
>>>> Depends on what you mean by "synthetic leaf". We already have a leaf.
>>>> What I'm suggesting to consider to the give that hypervisor leaf a
>>>> representation in the featureset.
>>>
>>> Hm, but then we won't be able to expose more fine grained controls,
>>> ie: separate between basic APIC virtualization support, APIC register
>>> virtualization and interrupt virtualization. We would need to keep the
>>> meaning of XEN_HVM_CPUID_APIC_ACCESS_VIRT / XEN_HVM_CPUID_X2APIC_VIRT
>>> (and exposing more fine grained features to guests make no sense).
>>
>> I did say before that once (if ever) finer grained controls are wanted,
>> a sysctl like suggested would indeed look to be the way to report the
>> capability. But we aren't at that point.
> 
> So we would expose everything in the 0x40000000 range, and caller
> would figure out the position of the Xen leaves doing a signature
> check until the Xen leaf is found?

The leaf number doesn't matter. The FEATURESET_* constants are part of
the ABI (just that the names we give them aren't in the public headers).
There would simply be a new FEATURESET_x4a.

> I would agree with this if the hypervisor leaves where already part of
> the managed CPUID data, but the work required to expose the leaves in
> the policy/featureset doesn't seem trivial. Making those leaves part
> of the policy will likely be done at some point, and then we can
> decide to drop the sysctl bits.

I may well be underestimating the amount of work involved. I wanted to
put this up as a possible alternative. Seeing that it's not liked, I'm
not going to insist to further pursue this.

Jan
diff mbox series

Patch

diff --git a/tools/golang/xenlight/helpers.gen.go b/tools/golang/xenlight/helpers.gen.go
index b746ff1081..dd4e6c9f14 100644
--- a/tools/golang/xenlight/helpers.gen.go
+++ b/tools/golang/xenlight/helpers.gen.go
@@ -3373,6 +3373,8 @@  x.CapVmtrace = bool(xc.cap_vmtrace)
 x.CapVpmu = bool(xc.cap_vpmu)
 x.CapGnttabV1 = bool(xc.cap_gnttab_v1)
 x.CapGnttabV2 = bool(xc.cap_gnttab_v2)
+x.CapAssistedXapic = bool(xc.cap_assisted_xapic)
+x.CapAssistedX2Apic = bool(xc.cap_assisted_x2apic)
 
  return nil}
 
@@ -3407,6 +3409,8 @@  xc.cap_vmtrace = C.bool(x.CapVmtrace)
 xc.cap_vpmu = C.bool(x.CapVpmu)
 xc.cap_gnttab_v1 = C.bool(x.CapGnttabV1)
 xc.cap_gnttab_v2 = C.bool(x.CapGnttabV2)
+xc.cap_assisted_xapic = C.bool(x.CapAssistedXapic)
+xc.cap_assisted_x2apic = C.bool(x.CapAssistedX2Apic)
 
  return nil
  }
diff --git a/tools/golang/xenlight/types.gen.go b/tools/golang/xenlight/types.gen.go
index b1e84d5258..87be46c745 100644
--- a/tools/golang/xenlight/types.gen.go
+++ b/tools/golang/xenlight/types.gen.go
@@ -1014,6 +1014,8 @@  CapVmtrace bool
 CapVpmu bool
 CapGnttabV1 bool
 CapGnttabV2 bool
+CapAssistedXapic bool
+CapAssistedX2Apic bool
 }
 
 type Connectorinfo struct {
diff --git a/tools/include/libxl.h b/tools/include/libxl.h
index 51a9b6cfac..333ffad38d 100644
--- a/tools/include/libxl.h
+++ b/tools/include/libxl.h
@@ -528,6 +528,13 @@ 
 #define LIBXL_HAVE_MAX_GRANT_VERSION 1
 
 /*
+ * LIBXL_HAVE_PHYSINFO_ASSISTED_APIC indicates that libxl_physinfo has
+ * cap_assisted_x{2}apic fields, which indicates the availability of x{2}APIC
+ * hardware assisted virtualization.
+ */
+#define LIBXL_HAVE_PHYSINFO_ASSISTED_APIC 1
+
+/*
  * libxl ABI compatibility
  *
  * The only guarantee which libxl makes regarding ABI compatibility
diff --git a/tools/libs/light/libxl.c b/tools/libs/light/libxl.c
index a0bf7d186f..6d699951e2 100644
--- a/tools/libs/light/libxl.c
+++ b/tools/libs/light/libxl.c
@@ -15,6 +15,7 @@ 
 #include "libxl_osdeps.h"
 
 #include "libxl_internal.h"
+#include "libxl_arch.h"
 
 int libxl_ctx_alloc(libxl_ctx **pctx, int version,
                     unsigned flags, xentoollog_logger * lg)
@@ -410,6 +411,8 @@  int libxl_get_physinfo(libxl_ctx *ctx, libxl_physinfo *physinfo)
     physinfo->cap_gnttab_v2 =
         !!(xcphysinfo.capabilities & XEN_SYSCTL_PHYSCAP_gnttab_v2);
 
+    libxl__arch_get_physinfo(physinfo, &xcphysinfo);
+
     GC_FREE;
     return 0;
 }
diff --git a/tools/libs/light/libxl_arch.h b/tools/libs/light/libxl_arch.h
index 1522ecb97f..207ceac6a1 100644
--- a/tools/libs/light/libxl_arch.h
+++ b/tools/libs/light/libxl_arch.h
@@ -86,6 +86,10 @@  int libxl__arch_extra_memory(libxl__gc *gc,
                              uint64_t *out);
 
 _hidden
+void libxl__arch_get_physinfo(libxl_physinfo *physinfo,
+                              const xc_physinfo_t *xcphysinfo);
+
+_hidden
 void libxl__arch_update_domain_config(libxl__gc *gc,
                                       libxl_domain_config *dst,
                                       const libxl_domain_config *src);
diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
index eef1de0939..39fdca1b49 100644
--- a/tools/libs/light/libxl_arm.c
+++ b/tools/libs/light/libxl_arm.c
@@ -1431,6 +1431,11 @@  int libxl__arch_passthrough_mode_setdefault(libxl__gc *gc,
     return rc;
 }
 
+void libxl__arch_get_physinfo(libxl_physinfo *physinfo,
+                              const xc_physinfo_t *xcphysinfo)
+{
+}
+
 void libxl__arch_update_domain_config(libxl__gc *gc,
                                       libxl_domain_config *dst,
                                       const libxl_domain_config *src)
diff --git a/tools/libs/light/libxl_types.idl b/tools/libs/light/libxl_types.idl
index 2a42da2f7d..42ac6c357b 100644
--- a/tools/libs/light/libxl_types.idl
+++ b/tools/libs/light/libxl_types.idl
@@ -1068,6 +1068,8 @@  libxl_physinfo = Struct("physinfo", [
     ("cap_vpmu", bool),
     ("cap_gnttab_v1", bool),
     ("cap_gnttab_v2", bool),
+    ("cap_assisted_xapic", bool),
+    ("cap_assisted_x2apic", bool),
     ], dir=DIR_OUT)
 
 libxl_connectorinfo = Struct("connectorinfo", [
diff --git a/tools/libs/light/libxl_x86.c b/tools/libs/light/libxl_x86.c
index 1feadebb18..e0a06ecfe3 100644
--- a/tools/libs/light/libxl_x86.c
+++ b/tools/libs/light/libxl_x86.c
@@ -866,6 +866,17 @@  int libxl__arch_passthrough_mode_setdefault(libxl__gc *gc,
     return rc;
 }
 
+void libxl__arch_get_physinfo(libxl_physinfo *physinfo,
+                              const xc_physinfo_t *xcphysinfo)
+{
+    physinfo->cap_assisted_xapic =
+        !!(xcphysinfo->arch_capabilities &
+           XEN_SYSCTL_PHYSCAP_X86_ASSISTED_XAPIC);
+    physinfo->cap_assisted_x2apic =
+        !!(xcphysinfo->arch_capabilities &
+           XEN_SYSCTL_PHYSCAP_X86_ASSISTED_X2APIC);
+}
+
 void libxl__arch_update_domain_config(libxl__gc *gc,
                                       libxl_domain_config *dst,
                                       const libxl_domain_config *src)
diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
index 7503031d8f..21783d3622 100644
--- a/tools/ocaml/libs/xc/xenctrl.ml
+++ b/tools/ocaml/libs/xc/xenctrl.ml
@@ -127,6 +127,10 @@  type physinfo_cap_flag =
 	| CAP_Gnttab_v1
 	| CAP_Gnttab_v2
 
+type physinfo_arch_cap_flag =
+	| CAP_X86_ASSISTED_XAPIC
+	| CAP_X86_ASSISTED_X2APIC
+
 type physinfo =
 {
 	threads_per_core : int;
@@ -139,6 +143,7 @@  type physinfo =
 	scrub_pages      : nativeint;
 	(* XXX hw_cap *)
 	capabilities     : physinfo_cap_flag list;
+	arch_capabilities : physinfo_arch_cap_flag list;
 	max_nr_cpus      : int;
 }
 
diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli
index d1d9c9247a..af6ba3d1a0 100644
--- a/tools/ocaml/libs/xc/xenctrl.mli
+++ b/tools/ocaml/libs/xc/xenctrl.mli
@@ -112,6 +112,10 @@  type physinfo_cap_flag =
   | CAP_Gnttab_v1
   | CAP_Gnttab_v2
 
+type physinfo_arch_cap_flag =
+  | CAP_X86_ASSISTED_XAPIC
+  | CAP_X86_ASSISTED_X2APIC
+
 type physinfo = {
   threads_per_core : int;
   cores_per_socket : int;
@@ -122,6 +126,7 @@  type physinfo = {
   free_pages       : nativeint;
   scrub_pages      : nativeint;
   capabilities     : physinfo_cap_flag list;
+  arch_capabilities : physinfo_arch_cap_flag list;
   max_nr_cpus      : int; (** compile-time max possible number of nr_cpus *)
 }
 type version = { major : int; minor : int; extra : string; }
diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
index 5b4fe72c8d..1fa5453043 100644
--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
@@ -712,7 +712,7 @@  CAMLprim value stub_xc_send_debug_keys(value xch, value keys)
 CAMLprim value stub_xc_physinfo(value xch)
 {
 	CAMLparam1(xch);
-	CAMLlocal2(physinfo, cap_list);
+	CAMLlocal3(physinfo, cap_list, arch_cap_list);
 	xc_physinfo_t c_physinfo;
 	int r;
 
@@ -730,8 +730,15 @@  CAMLprim value stub_xc_physinfo(value xch)
 		/* ! physinfo_cap_flag CAP_ lc */
 		/* ! XEN_SYSCTL_PHYSCAP_ XEN_SYSCTL_PHYSCAP_MAX max */
 		(c_physinfo.capabilities);
+	/*
+	 * arch_capabilities: physinfo_arch_cap_flag list;
+	 */
+	arch_cap_list = c_bitmap_to_ocaml_list
+		/* ! physinfo_arch_cap_flag CAP_ none */
+		/* ! XEN_SYSCTL_PHYSCAP_ XEN_SYSCTL_PHYSCAP_ARCH_MAX max */
+		(c_physinfo.arch_capabilities);
 
-	physinfo = caml_alloc_tuple(10);
+	physinfo = caml_alloc_tuple(11);
 	Store_field(physinfo, 0, Val_int(c_physinfo.threads_per_core));
 	Store_field(physinfo, 1, Val_int(c_physinfo.cores_per_socket));
 	Store_field(physinfo, 2, Val_int(c_physinfo.nr_cpus));
@@ -741,7 +748,8 @@  CAMLprim value stub_xc_physinfo(value xch)
 	Store_field(physinfo, 6, caml_copy_nativeint(c_physinfo.free_pages));
 	Store_field(physinfo, 7, caml_copy_nativeint(c_physinfo.scrub_pages));
 	Store_field(physinfo, 8, cap_list);
-	Store_field(physinfo, 9, Val_int(c_physinfo.max_cpu_id + 1));
+	Store_field(physinfo, 9, arch_cap_list);
+	Store_field(physinfo, 10, Val_int(c_physinfo.max_cpu_id + 1));
 
 	CAMLreturn(physinfo);
 }
diff --git a/tools/xl/xl_info.c b/tools/xl/xl_info.c
index 712b7638b0..3205270754 100644
--- a/tools/xl/xl_info.c
+++ b/tools/xl/xl_info.c
@@ -210,7 +210,7 @@  static void output_physinfo(void)
          info.hw_cap[4], info.hw_cap[5], info.hw_cap[6], info.hw_cap[7]
         );
 
-    maybe_printf("virt_caps              :%s%s%s%s%s%s%s%s%s%s%s\n",
+    maybe_printf("virt_caps              :%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
          info.cap_pv ? " pv" : "",
          info.cap_hvm ? " hvm" : "",
          info.cap_hvm && info.cap_hvm_directio ? " hvm_directio" : "",
@@ -221,7 +221,9 @@  static void output_physinfo(void)
          info.cap_vmtrace ? " vmtrace" : "",
          info.cap_vpmu ? " vpmu" : "",
          info.cap_gnttab_v1 ? " gnttab-v1" : "",
-         info.cap_gnttab_v2 ? " gnttab-v2" : ""
+         info.cap_gnttab_v2 ? " gnttab-v2" : "",
+         info.cap_assisted_xapic ? " assisted_xapic" : "",
+         info.cap_assisted_x2apic ? " assisted_x2apic" : ""
         );
 
     vinfo = libxl_get_version_info(ctx);
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 7ab15e07a0..be981e11bc 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -343,6 +343,13 @@  static int vmx_init_vmcs_config(bool bsp)
             MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch);
     }
 
+    /* Check whether hardware supports accelerated xapic and x2apic. */
+    if ( bsp )
+    {
+        assisted_xapic_available = cpu_has_vmx_virtualize_apic_accesses;
+        assisted_x2apic_available = cpu_has_vmx_virtualize_x2apic_mode;
+    }
+
     /* The IA32_VMX_EPT_VPID_CAP MSR exists only when EPT or VPID available */
     if ( _vmx_secondary_exec_control & (SECONDARY_EXEC_ENABLE_EPT |
                                         SECONDARY_EXEC_ENABLE_VPID) )
diff --git a/xen/arch/x86/include/asm/domain.h b/xen/arch/x86/include/asm/domain.h
index e62e109598..72431df26d 100644
--- a/xen/arch/x86/include/asm/domain.h
+++ b/xen/arch/x86/include/asm/domain.h
@@ -756,6 +756,9 @@  static inline void pv_inject_sw_interrupt(unsigned int vector)
                       : is_pv_32bit_domain(d) ? PV32_VM_ASSIST_MASK \
                                               : PV64_VM_ASSIST_MASK)
 
+extern bool assisted_xapic_available;
+extern bool assisted_x2apic_available;
+
 #endif /* __ASM_DOMAIN_H__ */
 
 /*
diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
index aff52a13f3..642cc96985 100644
--- a/xen/arch/x86/sysctl.c
+++ b/xen/arch/x86/sysctl.c
@@ -69,6 +69,9 @@  struct l3_cache_info {
     unsigned long size;
 };
 
+bool __ro_after_init assisted_xapic_available;
+bool __ro_after_init assisted_x2apic_available;
+
 static void l3_cache_get(void *arg)
 {
     struct cpuid4_info info;
@@ -135,6 +138,10 @@  void arch_do_physinfo(struct xen_sysctl_physinfo *pi)
         pi->capabilities |= XEN_SYSCTL_PHYSCAP_hap;
     if ( IS_ENABLED(CONFIG_SHADOW_PAGING) )
         pi->capabilities |= XEN_SYSCTL_PHYSCAP_shadow;
+    if ( assisted_xapic_available )
+        pi->arch_capabilities |= XEN_SYSCTL_PHYSCAP_X86_ASSISTED_XAPIC;
+    if ( assisted_x2apic_available )
+        pi->arch_capabilities |= XEN_SYSCTL_PHYSCAP_X86_ASSISTED_X2APIC;
 }
 
 long arch_do_sysctl(
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 55252e97f2..d38141a780 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -35,7 +35,7 @@ 
 #include "domctl.h"
 #include "physdev.h"
 
-#define XEN_SYSCTL_INTERFACE_VERSION 0x00000014
+#define XEN_SYSCTL_INTERFACE_VERSION 0x00000015
 
 /*
  * Read console content from Xen buffer ring.
@@ -111,6 +111,13 @@  struct xen_sysctl_tbuf_op {
 /* Max XEN_SYSCTL_PHYSCAP_* constant.  Used for ABI checking. */
 #define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_gnttab_v2
 
+/* The platform supports x{2}apic hardware assisted emulation. */
+#define XEN_SYSCTL_PHYSCAP_X86_ASSISTED_XAPIC  (1u << 0)
+#define XEN_SYSCTL_PHYSCAP_X86_ASSISTED_X2APIC (1u << 1)
+
+/* Max XEN_SYSCTL_PHYSCAP_X86{ARM}__* constant. Used for ABI checking. */
+#define XEN_SYSCTL_PHYSCAP_ARCH_MAX XEN_SYSCTL_PHYSCAP_X86_ASSISTED_X2APIC
+
 struct xen_sysctl_physinfo {
     uint32_t threads_per_core;
     uint32_t cores_per_socket;
@@ -120,6 +127,8 @@  struct xen_sysctl_physinfo {
     uint32_t max_node_id; /* Largest possible node ID on this host */
     uint32_t cpu_khz;
     uint32_t capabilities;/* XEN_SYSCTL_PHYSCAP_??? */
+    uint32_t arch_capabilities;/* XEN_SYSCTL_PHYSCAP_X86{ARM}_??? */
+    uint32_t pad; /* Must be zero. */
     uint64_aligned_t total_pages;
     uint64_aligned_t free_pages;
     uint64_aligned_t scrub_pages;