diff mbox series

[v3,2/2] x86/xen: Allow per-domain usage of hardware virtualized APIC

Message ID 20220218172943.12182-3-jane.malalane@citrix.com (mailing list archive)
State Superseded
Headers show
Series xen+tools: Report Interrupt Controller Virtualization capabilities on x86 | expand

Commit Message

Jane Malalane Feb. 18, 2022, 5:29 p.m. UTC
Introduce a new per-domain creation x86 specific flag to
select whether hardware assisted virtualization should be used for
x{2}APIC.

A per-domain option is added to xl in order to select the usage of
x{2}APIC hardware assisted vitualization, as well as a global
configuration option.

Having all APIC interaction exit to Xen for emulation is slow and can
induce much overhead. Hardware can speed up x{2}APIC by decoding the
APIC access and providing a VM exit with a more specific exit reason
than a regular EPT fault or by altogether avoiding a VM exit.

On the other hand, being able to disable x{2}APIC hardware assisted
vitualization can be useful for testing and debugging purposes.

Note: vmx_install_vlapic_mapping doesn't require modifications
regardless of whether the guest has virtualize_apic_accesses enabled
or not, i.e., setting the the APIC_ACCESS_ADDR VMCS field is fine so
long as virtualize_apic_accesses is supported by the CPU.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jane Malalane <jane.malalane@citrix.com>
---
CC: Wei Liu <wl@xen.org>
CC: Anthony PERARD <anthony.perard@citrix.com>
CC: Juergen Gross <jgross@suse.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: George Dunlap <george.dunlap@citrix.com>
CC: Jan Beulich <jbeulich@suse.com>
CC: Julien Grall <julien@xen.org>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Christian Lindig <christian.lindig@citrix.com>
CC: David Scott <dave@recoil.org>
CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
CC: "Roger Pau Monné" <roger.pau@citrix.com>

v3:
 * Change info in xl.cfg to better express reality and fix
   capitalization of x{2}apic
 * Move "physinfo" variable definition to the beggining of
   libxl__domain_build_info_setdefault()
 * Reposition brackets in if statement to match libxl coding style
 * Shorten logic in libxl__arch_domain_build_info_setdefault()
 * Correct dprintk message in arch_sanitise_domain_config()
 * Make appropriate changes in vmx_vlapic_msr_changed() and
   cpuid_hypervisor_leaves() for amended "assisted_x2apic" bit
 * Remove unneeded parantheses

v2:
 * Add a LIBXL_HAVE_ASSISTED_APIC macro
 * Pass xcpyshinfo as a pointer in libxl__arch_get_physinfo
 * Add a return statement in now "int"
   libxl__arch_domain_build_info_setdefault()
 * Preserve libxl__arch_domain_build_info_setdefault 's location in
   libxl_create.c
 * Correct x{2}apic default setting logic in
   libxl__arch_domain_prepare_config()
 * Correct logic for parsing assisted_x{2}apic host/guest options in
   xl_parse.c and initialize them to -1 in xl.c
 * Use guest options directly in vmx_vlapic_msr_changed
 * Fix indentation of bool assisted_x{2}apic in struct hvm_domain
 * Add a change in xenctrl_stubs.c to pass xenctrl ABI checks
---
 docs/man/xl.cfg.5.pod.in              | 19 +++++++++++++++++++
 docs/man/xl.conf.5.pod.in             | 12 ++++++++++++
 tools/golang/xenlight/helpers.gen.go  | 12 ++++++++++++
 tools/golang/xenlight/types.gen.go    |  2 ++
 tools/include/libxl.h                 |  7 +++++++
 tools/libs/light/libxl_arch.h         |  5 +++--
 tools/libs/light/libxl_arm.c          |  7 +++++--
 tools/libs/light/libxl_create.c       | 22 +++++++++++++---------
 tools/libs/light/libxl_types.idl      |  2 ++
 tools/libs/light/libxl_x86.c          | 28 ++++++++++++++++++++++++++--
 tools/ocaml/libs/xc/xenctrl.ml        |  2 ++
 tools/ocaml/libs/xc/xenctrl.mli       |  2 ++
 tools/ocaml/libs/xc/xenctrl_stubs.c   |  2 +-
 tools/xl/xl.c                         |  8 ++++++++
 tools/xl/xl.h                         |  2 ++
 tools/xl/xl_parse.c                   | 16 ++++++++++++++++
 xen/arch/x86/domain.c                 | 28 +++++++++++++++++++++++++++-
 xen/arch/x86/hvm/vmx/vmcs.c           |  4 ++++
 xen/arch/x86/hvm/vmx/vmx.c            |  8 ++++----
 xen/arch/x86/include/asm/hvm/domain.h |  6 ++++++
 xen/arch/x86/traps.c                  |  8 ++++----
 xen/include/public/arch-x86/xen.h     |  2 ++
 22 files changed, 179 insertions(+), 25 deletions(-)

Comments

Jan Beulich Feb. 24, 2022, 2:16 p.m. UTC | #1
On 18.02.2022 18:29, Jane Malalane wrote:
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -3333,15 +3333,15 @@ static void vmx_install_vlapic_mapping(struct vcpu *v)
>  
>  void vmx_vlapic_msr_changed(struct vcpu *v)
>  {
> -    int virtualize_x2apic_mode;
> +    bool virtualize_x2apic_mode;
>      struct vlapic *vlapic = vcpu_vlapic(v);
>      unsigned int msr;
>  
>      virtualize_x2apic_mode = ( (cpu_has_vmx_apic_reg_virt ||
>                                  cpu_has_vmx_virtual_intr_delivery) &&
> -                               cpu_has_vmx_virtualize_x2apic_mode );
> +                               v->domain->arch.hvm.assisted_x2apic );

Following from my comment on patch 1, I'd expect this to become a simple
assignment of v->domain->arch.hvm.assisted_x2apic (at which point the
local variable could go away), just like ...

> -    if ( !cpu_has_vmx_virtualize_apic_accesses &&
> +    if ( !v->domain->arch.hvm.assisted_xapic &&
>           !virtualize_x2apic_mode )
>          return;

... here.

> @@ -1124,9 +1125,8 @@ void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf,
>           * and wrmsr in the guest will run without VMEXITs (see
>           * vmx_vlapic_msr_changed()).
>           */
> -        if ( cpu_has_vmx_virtualize_x2apic_mode &&
> -             cpu_has_vmx_apic_reg_virt &&
> -             cpu_has_vmx_virtual_intr_delivery )
> +        if ( cpu_has_vmx_apic_reg_virt && cpu_has_vmx_virtual_intr_delivery &&
> +             v->domain->arch.hvm.assisted_x2apic )
>              res->a |= XEN_HVM_CPUID_X2APIC_VIRT;

While within the 80 cols limit, I think it would help readability if you
kept it at one sub-condition per line.

Jan
Jane Malalane Feb. 24, 2022, 4:59 p.m. UTC | #2
On 24/02/2022 14:16, Jan Beulich wrote:
> [CAUTION - EXTERNAL EMAIL] DO NOT reply, click links, or open attachments unless you have verified the sender and know the content is safe.
> 
> On 18.02.2022 18:29, Jane Malalane wrote:
>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>> @@ -3333,15 +3333,15 @@ static void vmx_install_vlapic_mapping(struct vcpu *v)
>>   
>>   void vmx_vlapic_msr_changed(struct vcpu *v)
>>   {
>> -    int virtualize_x2apic_mode;
>> +    bool virtualize_x2apic_mode;
>>       struct vlapic *vlapic = vcpu_vlapic(v);
>>       unsigned int msr;
>>   
>>       virtualize_x2apic_mode = ( (cpu_has_vmx_apic_reg_virt ||
>>                                   cpu_has_vmx_virtual_intr_delivery) &&
>> -                               cpu_has_vmx_virtualize_x2apic_mode );
>> +                               v->domain->arch.hvm.assisted_x2apic );
> 
> Following from my comment on patch 1, I'd expect this to become a simple
> assignment of v->domain->arch.hvm.assisted_x2apic (at which point the
> local variable could go away), just like ...
> 
>> -    if ( !cpu_has_vmx_virtualize_apic_accesses &&
>> +    if ( !v->domain->arch.hvm.assisted_xapic &&
>>            !virtualize_x2apic_mode )
>>           return;
> 
> ... here.
Previosuly we discussed setting v->domain->arch.hvm.assisted_xapic equal 
to only cpu_has_vmx_virtualize_x2apic_mode, that's why I have those 
additional checks as at least apic_reg_virt or virtual_intr_delivery are 
needed for the subsequent parts of this function. I might be 
misunderstanding your question.

Unless you mean that we should fallback to having 
v->domain->arch.hvm.assisted_xapic depend on those other features...?
> 
>> @@ -1124,9 +1125,8 @@ void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf,
>>            * and wrmsr in the guest will run without VMEXITs (see
>>            * vmx_vlapic_msr_changed()).
>>            */
>> -        if ( cpu_has_vmx_virtualize_x2apic_mode &&
>> -             cpu_has_vmx_apic_reg_virt &&
>> -             cpu_has_vmx_virtual_intr_delivery )
>> +        if ( cpu_has_vmx_apic_reg_virt && cpu_has_vmx_virtual_intr_delivery &&
>> +             v->domain->arch.hvm.assisted_x2apic )
>>               res->a |= XEN_HVM_CPUID_X2APIC_VIRT;
> 
> While within the 80 cols limit, I think it would help readability if you
> kept it at one sub-condition per line.
Sure.

Thank you,

Jane.
Jan Beulich Feb. 24, 2022, 5:04 p.m. UTC | #3
On 24.02.2022 17:59, Jane Malalane wrote:
> On 24/02/2022 14:16, Jan Beulich wrote:
>> [CAUTION - EXTERNAL EMAIL] DO NOT reply, click links, or open attachments unless you have verified the sender and know the content is safe.
>>
>> On 18.02.2022 18:29, Jane Malalane wrote:
>>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>>> @@ -3333,15 +3333,15 @@ static void vmx_install_vlapic_mapping(struct vcpu *v)
>>>   
>>>   void vmx_vlapic_msr_changed(struct vcpu *v)
>>>   {
>>> -    int virtualize_x2apic_mode;
>>> +    bool virtualize_x2apic_mode;
>>>       struct vlapic *vlapic = vcpu_vlapic(v);
>>>       unsigned int msr;
>>>   
>>>       virtualize_x2apic_mode = ( (cpu_has_vmx_apic_reg_virt ||
>>>                                   cpu_has_vmx_virtual_intr_delivery) &&
>>> -                               cpu_has_vmx_virtualize_x2apic_mode );
>>> +                               v->domain->arch.hvm.assisted_x2apic );
>>
>> Following from my comment on patch 1, I'd expect this to become a simple
>> assignment of v->domain->arch.hvm.assisted_x2apic (at which point the
>> local variable could go away), just like ...
>>
>>> -    if ( !cpu_has_vmx_virtualize_apic_accesses &&
>>> +    if ( !v->domain->arch.hvm.assisted_xapic &&
>>>            !virtualize_x2apic_mode )
>>>           return;
>>
>> ... here.
> Previosuly we discussed setting v->domain->arch.hvm.assisted_xapic equal 
> to only cpu_has_vmx_virtualize_x2apic_mode, that's why I have those 
> additional checks as at least apic_reg_virt or virtual_intr_delivery are 
> needed for the subsequent parts of this function. I might be 
> misunderstanding your question.

My expectation would have been that assisted_x2apic_available is assigned
what is (in context above) assigned to virtualize_x2apic_mode (in patch 1).
Anything deviating from this needs, I think, explaining there.

> Unless you mean that we should fallback to having 
> v->domain->arch.hvm.assisted_xapic depend on those other features...?

No, xapic is fine afaic.

Jan
Anthony PERARD Feb. 25, 2022, 1:13 p.m. UTC | #4
On Fri, Feb 18, 2022 at 05:29:43PM +0000, Jane Malalane wrote:
> diff --git a/tools/include/libxl.h b/tools/include/libxl.h
> index 333ffad38d..1c83cae711 100644
> --- a/tools/include/libxl.h
> +++ b/tools/include/libxl.h
> @@ -535,6 +535,13 @@
>  #define LIBXL_HAVE_PHYSINFO_ASSISTED_APIC 1
>  
>  /*
> + * LIBXL_HAVE_ASSISTED_APIC indicates that libxl_domain_build_info has
> + * assisted_x{2}apic fields, for enabling hardware assisted virtualization for

Could you spell out both "assisted_xapic and assisted_x2apic" as that
would allow for grep to find both string.

> + * x{2}apic per domain.
> + */
> +#define LIBXL_HAVE_ASSISTED_APIC 1
> +
> +/*
> diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
> index 39fdca1b49..ba5b8f433f 100644
> --- a/tools/libs/light/libxl_arm.c
> +++ b/tools/libs/light/libxl_arm.c
> @@ -1384,8 +1384,9 @@ void libxl__arch_domain_create_info_setdefault(libxl__gc *gc,
>      }
>  }
>  
> -void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
> -                                              libxl_domain_build_info *b_info)
> +int libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
> +                                             libxl_domain_build_info *b_info,
> +                                             const libxl_physinfo *physinfo)
>  {
>      /* ACPI is disabled by default */
>      libxl_defbool_setdefault(&b_info->acpi, false);
> @@ -1399,6 +1400,8 @@ void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,

There is another return in this function, which want to return 0 rather
than void.

>      memset(&b_info->u, '\0', sizeof(b_info->u));
>      b_info->type = LIBXL_DOMAIN_TYPE_INVALID;
>      libxl_domain_build_info_init_type(b_info, LIBXL_DOMAIN_TYPE_PVH);
> +
> +    return 0;
>  }
>  
>  int libxl__arch_passthrough_mode_setdefault(libxl__gc *gc,
> diff --git a/tools/libs/light/libxl_x86.c b/tools/libs/light/libxl_x86.c
> index e0a06ecfe3..c377d13b19 100644
> --- a/tools/libs/light/libxl_x86.c
> +++ b/tools/libs/light/libxl_x86.c
> @@ -819,11 +827,27 @@ void libxl__arch_domain_create_info_setdefault(libxl__gc *gc,
>  {
>  }
>  
> -void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
> -                                              libxl_domain_build_info *b_info)
> +int libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
> +                                             libxl_domain_build_info *b_info,
> +                                             const libxl_physinfo *physinfo)
>  {
>      libxl_defbool_setdefault(&b_info->acpi, true);
>      libxl_defbool_setdefault(&b_info->arch_x86.msr_relaxed, false);
> +
> +    if (b_info->type != LIBXL_DOMAIN_TYPE_PV) {
> +        libxl_defbool_setdefault(&b_info->arch_x86.assisted_xapic,
> +                             physinfo->cap_assisted_xapic);
> +        libxl_defbool_setdefault(&b_info->arch_x86.assisted_x2apic,
> +                             physinfo->cap_assisted_x2apic);
> +    }
> +
> +    else if (!libxl_defbool_is_default(b_info->arch_x86.assisted_xapic) ||

This "else" needs to be on the same line as the "}" 2 lines above.

> +             !libxl_defbool_is_default(b_info->arch_x86.assisted_x2apic)) {
> +        LOG(ERROR, "Interrupt Controller Virtualization not supported for PV");
> +        return ERROR_INVAL;
> +    }
> +
> +    return 0;

Thanks,
Jane Malalane Feb. 25, 2022, 2:27 p.m. UTC | #5
On 24/02/2022 17:04, Jan Beulich wrote:
> On 24.02.2022 17:59, Jane Malalane wrote:
>> On 24/02/2022 14:16, Jan Beulich wrote:
>>> [CAUTION - EXTERNAL EMAIL] DO NOT reply, click links, or open attachments unless you have verified the sender and know the content is safe.
>>>
>>> On 18.02.2022 18:29, Jane Malalane wrote:
>>>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>>>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>>>> @@ -3333,15 +3333,15 @@ static void vmx_install_vlapic_mapping(struct vcpu *v)
>>>>    
>>>>    void vmx_vlapic_msr_changed(struct vcpu *v)
>>>>    {
>>>> -    int virtualize_x2apic_mode;
>>>> +    bool virtualize_x2apic_mode;
>>>>        struct vlapic *vlapic = vcpu_vlapic(v);
>>>>        unsigned int msr;
>>>>    
>>>>        virtualize_x2apic_mode = ( (cpu_has_vmx_apic_reg_virt ||
>>>>                                    cpu_has_vmx_virtual_intr_delivery) &&
>>>> -                               cpu_has_vmx_virtualize_x2apic_mode );
>>>> +                               v->domain->arch.hvm.assisted_x2apic );
>>>
>>> Following from my comment on patch 1, I'd expect this to become a simple
>>> assignment of v->domain->arch.hvm.assisted_x2apic (at which point the
>>> local variable could go away), just like ...
>>>
>>>> -    if ( !cpu_has_vmx_virtualize_apic_accesses &&
>>>> +    if ( !v->domain->arch.hvm.assisted_xapic &&
>>>>             !virtualize_x2apic_mode )
>>>>            return;
>>>
>>> ... here.
>> Previosuly we discussed setting v->domain->arch.hvm.assisted_xapic equal
>> to only cpu_has_vmx_virtualize_x2apic_mode, that's why I have those
>> additional checks as at least apic_reg_virt or virtual_intr_delivery are
>> needed for the subsequent parts of this function. I might be
>> misunderstanding your question.
> 
> My expectation would have been that assisted_x2apic_available is assigned
> what is (in context above) assigned to virtualize_x2apic_mode (in patch 1).
> Anything deviating from this needs, I think, explaining there.

Oh, sorry, I kept thinking you meant cpu_has_... instead of the local 
variable and it was just all confusing me. Would it help if I didn't use 
a local variable at all, or changed the name then. This is really just a 
check that is done before executing the code below in the function.

Thanks,

Jane.
Jane Malalane Feb. 25, 2022, 2:31 p.m. UTC | #6
On 25/02/2022 13:13, Anthony PERARD wrote:
> On Fri, Feb 18, 2022 at 05:29:43PM +0000, Jane Malalane wrote:
>> diff --git a/tools/include/libxl.h b/tools/include/libxl.h
>> index 333ffad38d..1c83cae711 100644
>> --- a/tools/include/libxl.h
>> +++ b/tools/include/libxl.h
>> @@ -535,6 +535,13 @@
>>   #define LIBXL_HAVE_PHYSINFO_ASSISTED_APIC 1
>>   
>>   /*
>> + * LIBXL_HAVE_ASSISTED_APIC indicates that libxl_domain_build_info has
>> + * assisted_x{2}apic fields, for enabling hardware assisted virtualization for
> 
> Could you spell out both "assisted_xapic and assisted_x2apic" as that
> would allow for grep to find both string.
Will do (for both cases).
> 
>> + * x{2}apic per domain.
>> + */
>> +#define LIBXL_HAVE_ASSISTED_APIC 1
>> +
>> +/*
>> diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
>> index 39fdca1b49..ba5b8f433f 100644
>> --- a/tools/libs/light/libxl_arm.c
>> +++ b/tools/libs/light/libxl_arm.c
>> @@ -1384,8 +1384,9 @@ void libxl__arch_domain_create_info_setdefault(libxl__gc *gc,
>>       }
>>   }
>>   
>> -void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
>> -                                              libxl_domain_build_info *b_info)
>> +int libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
>> +                                             libxl_domain_build_info *b_info,
>> +                                             const libxl_physinfo *physinfo)
>>   {
>>       /* ACPI is disabled by default */
>>       libxl_defbool_setdefault(&b_info->acpi, false);
>> @@ -1399,6 +1400,8 @@ void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
> 
> There is another return in this function, which want to return 0 rather
> than void.
> 
>>       memset(&b_info->u, '\0', sizeof(b_info->u));
>>       b_info->type = LIBXL_DOMAIN_TYPE_INVALID;
>>       libxl_domain_build_info_init_type(b_info, LIBXL_DOMAIN_TYPE_PVH);
>> +
>> +    return 0;
>>   }
>>   
>>   int libxl__arch_passthrough_mode_setdefault(libxl__gc *gc,
>> diff --git a/tools/libs/light/libxl_x86.c b/tools/libs/light/libxl_x86.c
>> index e0a06ecfe3..c377d13b19 100644
>> --- a/tools/libs/light/libxl_x86.c
>> +++ b/tools/libs/light/libxl_x86.c
>> @@ -819,11 +827,27 @@ void libxl__arch_domain_create_info_setdefault(libxl__gc *gc,
>>   {
>>   }
>>   
>> -void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
>> -                                              libxl_domain_build_info *b_info)
>> +int libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
>> +                                             libxl_domain_build_info *b_info,
>> +                                             const libxl_physinfo *physinfo)
>>   {
>>       libxl_defbool_setdefault(&b_info->acpi, true);
>>       libxl_defbool_setdefault(&b_info->arch_x86.msr_relaxed, false);
>> +
>> +    if (b_info->type != LIBXL_DOMAIN_TYPE_PV) {
>> +        libxl_defbool_setdefault(&b_info->arch_x86.assisted_xapic,
>> +                             physinfo->cap_assisted_xapic);
>> +        libxl_defbool_setdefault(&b_info->arch_x86.assisted_x2apic,
>> +                             physinfo->cap_assisted_x2apic);
>> +    }
>> +
>> +    else if (!libxl_defbool_is_default(b_info->arch_x86.assisted_xapic) ||
> 
> This "else" needs to be on the same line as the "}" 2 lines above.
Okay.

Thank you,

Jane.
Roger Pau Monné Feb. 28, 2022, 11:20 a.m. UTC | #7
On Thu, Feb 24, 2022 at 03:16:08PM +0100, Jan Beulich wrote:
> On 18.02.2022 18:29, Jane Malalane wrote:
> > --- a/xen/arch/x86/hvm/vmx/vmx.c
> > +++ b/xen/arch/x86/hvm/vmx/vmx.c
> > @@ -3333,15 +3333,15 @@ static void vmx_install_vlapic_mapping(struct vcpu *v)
> >  
> >  void vmx_vlapic_msr_changed(struct vcpu *v)
> >  {
> > -    int virtualize_x2apic_mode;
> > +    bool virtualize_x2apic_mode;
> >      struct vlapic *vlapic = vcpu_vlapic(v);
> >      unsigned int msr;
> >  
> >      virtualize_x2apic_mode = ( (cpu_has_vmx_apic_reg_virt ||
> >                                  cpu_has_vmx_virtual_intr_delivery) &&
> > -                               cpu_has_vmx_virtualize_x2apic_mode );
> > +                               v->domain->arch.hvm.assisted_x2apic );
> 
> Following from my comment on patch 1, I'd expect this to become a simple
> assignment of v->domain->arch.hvm.assisted_x2apic (at which point the
> local variable could go away), just like ...

I think we want to keep assisted_x{2}apic mapped to
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES and
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE respectively, so that in the
future we could add further controls for
SECONDARY_EXEC_APIC_REGISTER_VIRT and interrupt delivery.

Thanks, Roger.
Jan Beulich Feb. 28, 2022, 1:14 p.m. UTC | #8
On 28.02.2022 12:20, Roger Pau Monné wrote:
> On Thu, Feb 24, 2022 at 03:16:08PM +0100, Jan Beulich wrote:
>> On 18.02.2022 18:29, Jane Malalane wrote:
>>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>>> @@ -3333,15 +3333,15 @@ static void vmx_install_vlapic_mapping(struct vcpu *v)
>>>  
>>>  void vmx_vlapic_msr_changed(struct vcpu *v)
>>>  {
>>> -    int virtualize_x2apic_mode;
>>> +    bool virtualize_x2apic_mode;
>>>      struct vlapic *vlapic = vcpu_vlapic(v);
>>>      unsigned int msr;
>>>  
>>>      virtualize_x2apic_mode = ( (cpu_has_vmx_apic_reg_virt ||
>>>                                  cpu_has_vmx_virtual_intr_delivery) &&
>>> -                               cpu_has_vmx_virtualize_x2apic_mode );
>>> +                               v->domain->arch.hvm.assisted_x2apic );
>>
>> Following from my comment on patch 1, I'd expect this to become a simple
>> assignment of v->domain->arch.hvm.assisted_x2apic (at which point the
>> local variable could go away), just like ...
> 
> I think we want to keep assisted_x{2}apic mapped to
> SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES and
> SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE respectively, so that in the
> future we could add further controls for
> SECONDARY_EXEC_APIC_REGISTER_VIRT and interrupt delivery.

If we want to be able to control more (most?) VMX sub-features, it
would seem to me as if this would better be modeled accordingly
right away. At that point there would likely need to be VMX and SVM
specific controls rather than general HVM ones. Plus then it might
make sense to match bit assignments in our interface with that in
the VT-x spec.

Jan
Roger Pau Monné Feb. 28, 2022, 3:48 p.m. UTC | #9
On Mon, Feb 28, 2022 at 02:14:26PM +0100, Jan Beulich wrote:
> On 28.02.2022 12:20, Roger Pau Monné wrote:
> > On Thu, Feb 24, 2022 at 03:16:08PM +0100, Jan Beulich wrote:
> >> On 18.02.2022 18:29, Jane Malalane wrote:
> >>> --- a/xen/arch/x86/hvm/vmx/vmx.c
> >>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> >>> @@ -3333,15 +3333,15 @@ static void vmx_install_vlapic_mapping(struct vcpu *v)
> >>>  
> >>>  void vmx_vlapic_msr_changed(struct vcpu *v)
> >>>  {
> >>> -    int virtualize_x2apic_mode;
> >>> +    bool virtualize_x2apic_mode;
> >>>      struct vlapic *vlapic = vcpu_vlapic(v);
> >>>      unsigned int msr;
> >>>  
> >>>      virtualize_x2apic_mode = ( (cpu_has_vmx_apic_reg_virt ||
> >>>                                  cpu_has_vmx_virtual_intr_delivery) &&
> >>> -                               cpu_has_vmx_virtualize_x2apic_mode );
> >>> +                               v->domain->arch.hvm.assisted_x2apic );
> >>
> >> Following from my comment on patch 1, I'd expect this to become a simple
> >> assignment of v->domain->arch.hvm.assisted_x2apic (at which point the
> >> local variable could go away), just like ...
> > 
> > I think we want to keep assisted_x{2}apic mapped to
> > SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES and
> > SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE respectively, so that in the
> > future we could add further controls for
> > SECONDARY_EXEC_APIC_REGISTER_VIRT and interrupt delivery.
> 
> If we want to be able to control more (most?) VMX sub-features, it
> would seem to me as if this would better be modeled accordingly
> right away. At that point there would likely need to be VMX and SVM
> specific controls rather than general HVM ones.

I would have to check the AMD interface for hardware APIC
virtualization support, I'm not sure how different the control values
are there.

> Plus then it might
> make sense to match bit assignments in our interface with that in
> the VT-x spec.

That could work for things in secondary_exec_control, but posted
interrupts are controlled in pin based exec control, so we would need
to expose two different fields? Not sure it's worth the extra effort
to match bit positions with the spec (or maybe I'm not understanding
this correctly).

Are you suggesting a (VMX) generic interface where the hypervisor
exposes the raw vmx_secondary_exec_control and possibly
vmx_pin_based_exec_control and let the toolstack play with it, setting
in the VMCS what it gets back from the toolstack?

That would imply quite a rework of the code in order to detect enabled
features based on domain specific VMX fields (instead of using the
global vmx_{secondary,pin_based}_exec_control variables)

Thanks, Roger.
Jan Beulich March 1, 2022, 7:54 a.m. UTC | #10
On 28.02.2022 16:48, Roger Pau Monné wrote:
> On Mon, Feb 28, 2022 at 02:14:26PM +0100, Jan Beulich wrote:
>> On 28.02.2022 12:20, Roger Pau Monné wrote:
>>> On Thu, Feb 24, 2022 at 03:16:08PM +0100, Jan Beulich wrote:
>>>> On 18.02.2022 18:29, Jane Malalane wrote:
>>>>> --- a/xen/arch/x86/hvm/vmx/vmx.c
>>>>> +++ b/xen/arch/x86/hvm/vmx/vmx.c
>>>>> @@ -3333,15 +3333,15 @@ static void vmx_install_vlapic_mapping(struct vcpu *v)
>>>>>  
>>>>>  void vmx_vlapic_msr_changed(struct vcpu *v)
>>>>>  {
>>>>> -    int virtualize_x2apic_mode;
>>>>> +    bool virtualize_x2apic_mode;
>>>>>      struct vlapic *vlapic = vcpu_vlapic(v);
>>>>>      unsigned int msr;
>>>>>  
>>>>>      virtualize_x2apic_mode = ( (cpu_has_vmx_apic_reg_virt ||
>>>>>                                  cpu_has_vmx_virtual_intr_delivery) &&
>>>>> -                               cpu_has_vmx_virtualize_x2apic_mode );
>>>>> +                               v->domain->arch.hvm.assisted_x2apic );
>>>>
>>>> Following from my comment on patch 1, I'd expect this to become a simple
>>>> assignment of v->domain->arch.hvm.assisted_x2apic (at which point the
>>>> local variable could go away), just like ...
>>>
>>> I think we want to keep assisted_x{2}apic mapped to
>>> SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES and
>>> SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE respectively, so that in the
>>> future we could add further controls for
>>> SECONDARY_EXEC_APIC_REGISTER_VIRT and interrupt delivery.
>>
>> If we want to be able to control more (most?) VMX sub-features, it
>> would seem to me as if this would better be modeled accordingly
>> right away. At that point there would likely need to be VMX and SVM
>> specific controls rather than general HVM ones.
> 
> I would have to check the AMD interface for hardware APIC
> virtualization support, I'm not sure how different the control values
> are there.
> 
>> Plus then it might
>> make sense to match bit assignments in our interface with that in
>> the VT-x spec.
> 
> That could work for things in secondary_exec_control, but posted
> interrupts are controlled in pin based exec control, so we would need
> to expose two different fields? Not sure it's worth the extra effort
> to match bit positions with the spec (or maybe I'm not understanding
> this correctly).
> 
> Are you suggesting a (VMX) generic interface where the hypervisor
> exposes the raw vmx_secondary_exec_control and possibly
> vmx_pin_based_exec_control and let the toolstack play with it, setting
> in the VMCS what it gets back from the toolstack?

Not necessarily all of them, but on a case by case basis. But _where_
a control bit would appear (if supported) would be well known up front.
This wouldn't be very different from VMX'es "supports the 1-setting of"
information provided via MSRs. The hypervisor would indicate which of
the bits can be controlled on a per-guest basis.

Jan
diff mbox series

Patch

diff --git a/docs/man/xl.cfg.5.pod.in b/docs/man/xl.cfg.5.pod.in
index b98d161398..dcca564a23 100644
--- a/docs/man/xl.cfg.5.pod.in
+++ b/docs/man/xl.cfg.5.pod.in
@@ -1862,6 +1862,25 @@  firmware tables when using certain older guest Operating
 Systems. These tables have been superseded by newer constructs within
 the ACPI tables.
 
+=item B<assisted_xapic=BOOLEAN>
+
+B<(x86 only)> Enables or disables hardware assisted virtualization for
+xAPIC. With this option enabled, a memory-mapped APIC access will be
+decoded by hardware and either issue a more specific VM exit than just
+an EPT fault, or altogether avoid a VM exit. Notice full
+virtualization for xAPIC can only be achieved if hardware supports
+“APIC-register virtualization” and “virtual-interrupt delivery”. The
+default is settable via L<xl.conf(5)>.
+
+=item B<assisted_x2apic=BOOLEAN>
+
+B<(x86 only)> Enables or disables hardware assisted virtualization for
+x2APIC. With this option enabled, an MSR-Based APIC access will
+either issue a VM exit or altogether avoid one. Notice full
+virtualization for x2APIC can only be achieved if hardware supports
+“APIC-register virtualization” and “virtual-interrupt delivery”. The
+default is settable via L<xl.conf(5)>.
+
 =item B<nx=BOOLEAN>
 
 B<(x86 only)> Hides or exposes the No-eXecute capability. This allows a guest
diff --git a/docs/man/xl.conf.5.pod.in b/docs/man/xl.conf.5.pod.in
index df20c08137..95d136d1ea 100644
--- a/docs/man/xl.conf.5.pod.in
+++ b/docs/man/xl.conf.5.pod.in
@@ -107,6 +107,18 @@  Sets the default value for the C<max_grant_version> domain config value.
 
 Default: maximum grant version supported by the hypervisor.
 
+=item B<assisted_xapic=BOOLEAN>
+
+If enabled, domains will use xAPIC hardware assisted virtualization by default.
+
+Default: enabled if supported.
+
+=item B<assisted_x2apic=BOOLEAN>
+
+If enabled, domains will use x2APIC hardware assisted virtualization by default.
+
+Default: enabled if supported.
+
 =item B<vif.default.script="PATH">
 
 Configures the default hotplug script used by virtual network devices.
diff --git a/tools/golang/xenlight/helpers.gen.go b/tools/golang/xenlight/helpers.gen.go
index dd4e6c9f14..dece545ee0 100644
--- a/tools/golang/xenlight/helpers.gen.go
+++ b/tools/golang/xenlight/helpers.gen.go
@@ -1120,6 +1120,12 @@  x.ArchArm.Vuart = VuartType(xc.arch_arm.vuart)
 if err := x.ArchX86.MsrRelaxed.fromC(&xc.arch_x86.msr_relaxed);err != nil {
 return fmt.Errorf("converting field ArchX86.MsrRelaxed: %v", err)
 }
+if err := x.ArchX86.AssistedXapic.fromC(&xc.arch_x86.assisted_xapic);err != nil {
+return fmt.Errorf("converting field ArchX86.AssistedXapic: %v", err)
+}
+if err := x.ArchX86.AssistedX2Apic.fromC(&xc.arch_x86.assisted_x2apic);err != nil {
+return fmt.Errorf("converting field ArchX86.AssistedX2Apic: %v", err)
+}
 x.Altp2M = Altp2MMode(xc.altp2m)
 x.VmtraceBufKb = int(xc.vmtrace_buf_kb)
 if err := x.Vpmu.fromC(&xc.vpmu);err != nil {
@@ -1605,6 +1611,12 @@  xc.arch_arm.vuart = C.libxl_vuart_type(x.ArchArm.Vuart)
 if err := x.ArchX86.MsrRelaxed.toC(&xc.arch_x86.msr_relaxed); err != nil {
 return fmt.Errorf("converting field ArchX86.MsrRelaxed: %v", err)
 }
+if err := x.ArchX86.AssistedXapic.toC(&xc.arch_x86.assisted_xapic); err != nil {
+return fmt.Errorf("converting field ArchX86.AssistedXapic: %v", err)
+}
+if err := x.ArchX86.AssistedX2Apic.toC(&xc.arch_x86.assisted_x2apic); err != nil {
+return fmt.Errorf("converting field ArchX86.AssistedX2Apic: %v", err)
+}
 xc.altp2m = C.libxl_altp2m_mode(x.Altp2M)
 xc.vmtrace_buf_kb = C.int(x.VmtraceBufKb)
 if err := x.Vpmu.toC(&xc.vpmu); err != nil {
diff --git a/tools/golang/xenlight/types.gen.go b/tools/golang/xenlight/types.gen.go
index 87be46c745..253c9ad93d 100644
--- a/tools/golang/xenlight/types.gen.go
+++ b/tools/golang/xenlight/types.gen.go
@@ -520,6 +520,8 @@  Vuart VuartType
 }
 ArchX86 struct {
 MsrRelaxed Defbool
+AssistedXapic Defbool
+AssistedX2Apic Defbool
 }
 Altp2M Altp2MMode
 VmtraceBufKb int
diff --git a/tools/include/libxl.h b/tools/include/libxl.h
index 333ffad38d..1c83cae711 100644
--- a/tools/include/libxl.h
+++ b/tools/include/libxl.h
@@ -535,6 +535,13 @@ 
 #define LIBXL_HAVE_PHYSINFO_ASSISTED_APIC 1
 
 /*
+ * LIBXL_HAVE_ASSISTED_APIC indicates that libxl_domain_build_info has
+ * assisted_x{2}apic fields, for enabling hardware assisted virtualization for
+ * x{2}apic per domain.
+ */
+#define LIBXL_HAVE_ASSISTED_APIC 1
+
+/*
  * libxl ABI compatibility
  *
  * The only guarantee which libxl makes regarding ABI compatibility
diff --git a/tools/libs/light/libxl_arch.h b/tools/libs/light/libxl_arch.h
index 207ceac6a1..03b89929e6 100644
--- a/tools/libs/light/libxl_arch.h
+++ b/tools/libs/light/libxl_arch.h
@@ -71,8 +71,9 @@  void libxl__arch_domain_create_info_setdefault(libxl__gc *gc,
                                                libxl_domain_create_info *c_info);
 
 _hidden
-void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
-                                              libxl_domain_build_info *b_info);
+int libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
+                                             libxl_domain_build_info *b_info,
+                                             const libxl_physinfo *physinfo);
 
 _hidden
 int libxl__arch_passthrough_mode_setdefault(libxl__gc *gc,
diff --git a/tools/libs/light/libxl_arm.c b/tools/libs/light/libxl_arm.c
index 39fdca1b49..ba5b8f433f 100644
--- a/tools/libs/light/libxl_arm.c
+++ b/tools/libs/light/libxl_arm.c
@@ -1384,8 +1384,9 @@  void libxl__arch_domain_create_info_setdefault(libxl__gc *gc,
     }
 }
 
-void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
-                                              libxl_domain_build_info *b_info)
+int libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
+                                             libxl_domain_build_info *b_info,
+                                             const libxl_physinfo *physinfo)
 {
     /* ACPI is disabled by default */
     libxl_defbool_setdefault(&b_info->acpi, false);
@@ -1399,6 +1400,8 @@  void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
     memset(&b_info->u, '\0', sizeof(b_info->u));
     b_info->type = LIBXL_DOMAIN_TYPE_INVALID;
     libxl_domain_build_info_init_type(b_info, LIBXL_DOMAIN_TYPE_PVH);
+
+    return 0;
 }
 
 int libxl__arch_passthrough_mode_setdefault(libxl__gc *gc,
diff --git a/tools/libs/light/libxl_create.c b/tools/libs/light/libxl_create.c
index d7a40d7550..4043bc682f 100644
--- a/tools/libs/light/libxl_create.c
+++ b/tools/libs/light/libxl_create.c
@@ -75,6 +75,7 @@  int libxl__domain_build_info_setdefault(libxl__gc *gc,
                                         libxl_domain_build_info *b_info)
 {
     int i, rc;
+    libxl_physinfo info;
 
     if (b_info->type != LIBXL_DOMAIN_TYPE_HVM &&
         b_info->type != LIBXL_DOMAIN_TYPE_PV &&
@@ -264,7 +265,18 @@  int libxl__domain_build_info_setdefault(libxl__gc *gc,
     if (!b_info->event_channels)
         b_info->event_channels = 1023;
 
-    libxl__arch_domain_build_info_setdefault(gc, b_info);
+    rc = libxl_get_physinfo(CTX, &info);
+    if (rc) {
+        LOG(ERROR, "failed to get hypervisor info");
+        return rc;
+    }
+
+    rc = libxl__arch_domain_build_info_setdefault(gc, b_info, &info);
+    if (rc) {
+        LOG(ERROR, "unable to set domain arch build info defaults");
+        return rc;
+    }
+
     libxl_defbool_setdefault(&b_info->dm_restrict, false);
 
     if (b_info->iommu_memkb == LIBXL_MEMKB_DEFAULT)
@@ -457,14 +469,6 @@  int libxl__domain_build_info_setdefault(libxl__gc *gc,
     }
 
     if (b_info->max_grant_version == LIBXL_MAX_GRANT_DEFAULT) {
-        libxl_physinfo info;
-
-        rc = libxl_get_physinfo(CTX, &info);
-        if (rc) {
-            LOG(ERROR, "failed to get hypervisor info");
-            return rc;
-        }
-
         if (info.cap_gnttab_v2)
             b_info->max_grant_version = 2;
         else if (info.cap_gnttab_v1)
diff --git a/tools/libs/light/libxl_types.idl b/tools/libs/light/libxl_types.idl
index 42ac6c357b..db5eb0a0b3 100644
--- a/tools/libs/light/libxl_types.idl
+++ b/tools/libs/light/libxl_types.idl
@@ -648,6 +648,8 @@  libxl_domain_build_info = Struct("domain_build_info",[
                                ("vuart", libxl_vuart_type),
                               ])),
     ("arch_x86", Struct(None, [("msr_relaxed", libxl_defbool),
+                               ("assisted_xapic", libxl_defbool),
+                               ("assisted_x2apic", libxl_defbool),
                               ])),
     # Alternate p2m is not bound to any architecture or guest type, as it is
     # supported by x86 HVM and ARM support is planned.
diff --git a/tools/libs/light/libxl_x86.c b/tools/libs/light/libxl_x86.c
index e0a06ecfe3..c377d13b19 100644
--- a/tools/libs/light/libxl_x86.c
+++ b/tools/libs/light/libxl_x86.c
@@ -23,6 +23,14 @@  int libxl__arch_domain_prepare_config(libxl__gc *gc,
     if (libxl_defbool_val(d_config->b_info.arch_x86.msr_relaxed))
         config->arch.misc_flags |= XEN_X86_MSR_RELAXED;
 
+    if (d_config->c_info.type != LIBXL_DOMAIN_TYPE_PV)
+    {
+        if (libxl_defbool_val(d_config->b_info.arch_x86.assisted_xapic))
+            config->arch.misc_flags |= XEN_X86_ASSISTED_XAPIC;
+
+        if (libxl_defbool_val(d_config->b_info.arch_x86.assisted_x2apic))
+            config->arch.misc_flags |= XEN_X86_ASSISTED_X2APIC;
+    }
     return 0;
 }
 
@@ -819,11 +827,27 @@  void libxl__arch_domain_create_info_setdefault(libxl__gc *gc,
 {
 }
 
-void libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
-                                              libxl_domain_build_info *b_info)
+int libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
+                                             libxl_domain_build_info *b_info,
+                                             const libxl_physinfo *physinfo)
 {
     libxl_defbool_setdefault(&b_info->acpi, true);
     libxl_defbool_setdefault(&b_info->arch_x86.msr_relaxed, false);
+
+    if (b_info->type != LIBXL_DOMAIN_TYPE_PV) {
+        libxl_defbool_setdefault(&b_info->arch_x86.assisted_xapic,
+                             physinfo->cap_assisted_xapic);
+        libxl_defbool_setdefault(&b_info->arch_x86.assisted_x2apic,
+                             physinfo->cap_assisted_x2apic);
+    }
+
+    else if (!libxl_defbool_is_default(b_info->arch_x86.assisted_xapic) ||
+             !libxl_defbool_is_default(b_info->arch_x86.assisted_x2apic)) {
+        LOG(ERROR, "Interrupt Controller Virtualization not supported for PV");
+        return ERROR_INVAL;
+    }
+
+    return 0;
 }
 
 int libxl__arch_passthrough_mode_setdefault(libxl__gc *gc,
diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
index 21783d3622..672a11ceb6 100644
--- a/tools/ocaml/libs/xc/xenctrl.ml
+++ b/tools/ocaml/libs/xc/xenctrl.ml
@@ -50,6 +50,8 @@  type x86_arch_emulation_flags =
 
 type x86_arch_misc_flags =
 	| X86_MSR_RELAXED
+	| X86_ASSISTED_XAPIC
+	| X86_ASSISTED_X2APIC
 
 type xen_x86_arch_domainconfig =
 {
diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli
index af6ba3d1a0..f9a6aa3a0f 100644
--- a/tools/ocaml/libs/xc/xenctrl.mli
+++ b/tools/ocaml/libs/xc/xenctrl.mli
@@ -44,6 +44,8 @@  type x86_arch_emulation_flags =
 
 type x86_arch_misc_flags =
   | X86_MSR_RELAXED
+  | X86_ASSISTED_XAPIC
+  | X86_ASSISTED_X2APIC
 
 type xen_x86_arch_domainconfig = {
   emulation_flags: x86_arch_emulation_flags list;
diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c b/tools/ocaml/libs/xc/xenctrl_stubs.c
index 1fa5453043..c0ef57d6b7 100644
--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
@@ -239,7 +239,7 @@  CAMLprim value stub_xc_domain_create(value xch, value wanted_domid, value config
 
 		cfg.arch.misc_flags = ocaml_list_to_c_bitmap
 			/* ! x86_arch_misc_flags X86_ none */
-			/* ! XEN_X86_ XEN_X86_MSR_RELAXED all */
+			/* ! XEN_X86_ XEN_X86_ASSISTED_X2APIC max */
 			(VAL_MISC_FLAGS);
 
 #undef VAL_MISC_FLAGS
diff --git a/tools/xl/xl.c b/tools/xl/xl.c
index 2d1ec18ea3..31eb223309 100644
--- a/tools/xl/xl.c
+++ b/tools/xl/xl.c
@@ -57,6 +57,8 @@  int max_grant_frames = -1;
 int max_maptrack_frames = -1;
 int max_grant_version = LIBXL_MAX_GRANT_DEFAULT;
 libxl_domid domid_policy = INVALID_DOMID;
+int assisted_xapic = -1;
+int assisted_x2apic = -1;
 
 xentoollog_level minmsglevel = minmsglevel_default;
 
@@ -201,6 +203,12 @@  static void parse_global_config(const char *configfile,
     if (!xlu_cfg_get_long (config, "claim_mode", &l, 0))
         claim_mode = l;
 
+    if (!xlu_cfg_get_long (config, "assisted_xapic", &l, 0))
+        assisted_xapic = l;
+
+    if (!xlu_cfg_get_long (config, "assisted_x2apic", &l, 0))
+        assisted_x2apic = l;
+
     xlu_cfg_replace_string (config, "remus.default.netbufscript",
         &default_remus_netbufscript, 0);
     xlu_cfg_replace_string (config, "colo.default.proxyscript",
diff --git a/tools/xl/xl.h b/tools/xl/xl.h
index c5c4bedbdd..528deb3feb 100644
--- a/tools/xl/xl.h
+++ b/tools/xl/xl.h
@@ -286,6 +286,8 @@  extern libxl_bitmap global_vm_affinity_mask;
 extern libxl_bitmap global_hvm_affinity_mask;
 extern libxl_bitmap global_pv_affinity_mask;
 extern libxl_domid domid_policy;
+extern int assisted_xapic;
+extern int assisted_x2apic;
 
 enum output_format {
     OUTPUT_FORMAT_JSON,
diff --git a/tools/xl/xl_parse.c b/tools/xl/xl_parse.c
index 117fcdcb2b..0ab9b145fe 100644
--- a/tools/xl/xl_parse.c
+++ b/tools/xl/xl_parse.c
@@ -1681,6 +1681,22 @@  void parse_config_data(const char *config_source,
         xlu_cfg_get_defbool(config, "vpt_align", &b_info->u.hvm.vpt_align, 0);
         xlu_cfg_get_defbool(config, "apic", &b_info->apic, 0);
 
+        e = xlu_cfg_get_long(config, "assisted_xapic", &l , 0);
+        if ((e == ESRCH && assisted_xapic != -1)) /* use global default if present */
+            libxl_defbool_set(&b_info->arch_x86.assisted_xapic, assisted_xapic);
+        else if (!e)
+            libxl_defbool_set(&b_info->arch_x86.assisted_xapic, l);
+        else
+            exit(1);
+
+        e = xlu_cfg_get_long(config, "assisted_x2apic", &l, 0);
+        if ((e == ESRCH && assisted_x2apic != -1)) /* use global default if present */
+            libxl_defbool_set(&b_info->arch_x86.assisted_x2apic, assisted_x2apic);
+        else if (!e)
+            libxl_defbool_set(&b_info->arch_x86.assisted_x2apic, l);
+        else
+            exit(1);
+
         switch (xlu_cfg_get_list(config, "viridian",
                                  &viridian, &num_viridian, 1))
         {
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 9835f90ea0..c239e55f12 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -619,6 +619,8 @@  int arch_sanitise_domain_config(struct xen_domctl_createdomain *config)
     bool hvm = config->flags & XEN_DOMCTL_CDF_hvm;
     bool hap = config->flags & XEN_DOMCTL_CDF_hap;
     bool nested_virt = config->flags & XEN_DOMCTL_CDF_nested_virt;
+    bool assisted_xapic = config->arch.misc_flags & XEN_X86_ASSISTED_XAPIC;
+    bool assisted_x2apic = config->arch.misc_flags & XEN_X86_ASSISTED_X2APIC;
     unsigned int max_vcpus;
 
     if ( hvm ? !hvm_enabled : !IS_ENABLED(CONFIG_PV) )
@@ -685,13 +687,31 @@  int arch_sanitise_domain_config(struct xen_domctl_createdomain *config)
         }
     }
 
-    if ( config->arch.misc_flags & ~XEN_X86_MSR_RELAXED )
+    if ( config->arch.misc_flags & ~(XEN_X86_MSR_RELAXED |
+                                     XEN_X86_ASSISTED_XAPIC |
+                                     XEN_X86_ASSISTED_X2APIC) )
     {
         dprintk(XENLOG_INFO, "Invalid arch misc flags %#x\n",
                 config->arch.misc_flags);
         return -EINVAL;
     }
 
+    if ( (assisted_xapic || assisted_x2apic) && !hvm )
+    {
+        dprintk(XENLOG_INFO,
+                "Interrupt Controller Virtualization not supported for PV\n");
+        return -EINVAL;
+    }
+
+    if ( (assisted_xapic && !assisted_xapic_available) ||
+         (assisted_x2apic && !assisted_x2apic_available) )
+    {
+        dprintk(XENLOG_INFO,
+                "Hardware assisted x%sAPIC requested but not available\n",
+                assisted_xapic && !assisted_xapic_available ? "" : "2");
+        return -EINVAL;
+    }
+
     return 0;
 }
 
@@ -864,6 +884,12 @@  int arch_domain_create(struct domain *d,
 
     d->arch.msr_relaxed = config->arch.misc_flags & XEN_X86_MSR_RELAXED;
 
+    d->arch.hvm.assisted_xapic =
+        config->arch.misc_flags & XEN_X86_ASSISTED_XAPIC;
+
+    d->arch.hvm.assisted_x2apic =
+        config->arch.misc_flags & XEN_X86_ASSISTED_X2APIC;
+
     return 0;
 
  fail:
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index be981e11bc..862b1d2126 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1155,6 +1155,10 @@  static int construct_vmcs(struct vcpu *v)
         __vmwrite(PLE_WINDOW, ple_window);
     }
 
+    if ( !v->domain->arch.hvm.assisted_xapic )
+        v->arch.hvm.vmx.secondary_exec_control &=
+            ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+
     if ( cpu_has_vmx_secondary_exec_control )
         __vmwrite(SECONDARY_VM_EXEC_CONTROL,
                   v->arch.hvm.vmx.secondary_exec_control);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 36c8a12cfe..2a0851c960 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3333,15 +3333,15 @@  static void vmx_install_vlapic_mapping(struct vcpu *v)
 
 void vmx_vlapic_msr_changed(struct vcpu *v)
 {
-    int virtualize_x2apic_mode;
+    bool virtualize_x2apic_mode;
     struct vlapic *vlapic = vcpu_vlapic(v);
     unsigned int msr;
 
     virtualize_x2apic_mode = ( (cpu_has_vmx_apic_reg_virt ||
                                 cpu_has_vmx_virtual_intr_delivery) &&
-                               cpu_has_vmx_virtualize_x2apic_mode );
+                               v->domain->arch.hvm.assisted_x2apic );
 
-    if ( !cpu_has_vmx_virtualize_apic_accesses &&
+    if ( !v->domain->arch.hvm.assisted_xapic &&
          !virtualize_x2apic_mode )
         return;
 
@@ -3373,7 +3373,7 @@  void vmx_vlapic_msr_changed(struct vcpu *v)
                 vmx_clear_msr_intercept(v, MSR_X2APIC_SELF, VMX_MSR_W);
             }
         }
-        else
+        else if ( v->domain->arch.hvm.assisted_xapic )
             v->arch.hvm.vmx.secondary_exec_control |=
                 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
     }
diff --git a/xen/arch/x86/include/asm/hvm/domain.h b/xen/arch/x86/include/asm/hvm/domain.h
index 698455444e..92bf53483c 100644
--- a/xen/arch/x86/include/asm/hvm/domain.h
+++ b/xen/arch/x86/include/asm/hvm/domain.h
@@ -117,6 +117,12 @@  struct hvm_domain {
 
     bool                   is_s3_suspended;
 
+    /* xAPIC hardware assisted virtualization. */
+    bool                   assisted_xapic;
+
+    /* x2APIC hardware assisted virtualization. */
+    bool                   assisted_x2apic;
+
     /* hypervisor intercepted msix table */
     struct list_head       msixtbl_list;
 
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 485bd66971..8f1c5efef7 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1115,7 +1115,8 @@  void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf,
         if ( !is_hvm_domain(d) || subleaf != 0 )
             break;
 
-        if ( cpu_has_vmx_apic_reg_virt )
+        if ( cpu_has_vmx_apic_reg_virt &&
+             v->domain->arch.hvm.assisted_xapic )
             res->a |= XEN_HVM_CPUID_APIC_ACCESS_VIRT;
 
         /*
@@ -1124,9 +1125,8 @@  void cpuid_hypervisor_leaves(const struct vcpu *v, uint32_t leaf,
          * and wrmsr in the guest will run without VMEXITs (see
          * vmx_vlapic_msr_changed()).
          */
-        if ( cpu_has_vmx_virtualize_x2apic_mode &&
-             cpu_has_vmx_apic_reg_virt &&
-             cpu_has_vmx_virtual_intr_delivery )
+        if ( cpu_has_vmx_apic_reg_virt && cpu_has_vmx_virtual_intr_delivery &&
+             v->domain->arch.hvm.assisted_x2apic )
             res->a |= XEN_HVM_CPUID_X2APIC_VIRT;
 
         /*
diff --git a/xen/include/public/arch-x86/xen.h b/xen/include/public/arch-x86/xen.h
index 7acd94c8eb..9da32c6239 100644
--- a/xen/include/public/arch-x86/xen.h
+++ b/xen/include/public/arch-x86/xen.h
@@ -317,6 +317,8 @@  struct xen_arch_domainconfig {
  * doesn't allow the guest to read or write to the underlying MSR.
  */
 #define XEN_X86_MSR_RELAXED (1u << 0)
+#define XEN_X86_ASSISTED_XAPIC (1u << 1)
+#define XEN_X86_ASSISTED_X2APIC (1u << 2)
     uint32_t misc_flags;
 };