diff mbox

[v3] kvm:vmx: more complete state update on APICv on/off

Message ID 1463582900-22620-1-git-send-email-rkagan@virtuozzo.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roman Kagan May 18, 2016, 2:48 p.m. UTC
The function to update APICv on/off state (in particular, to deactivate
it when enabling Hyper-V SynIC), used to be incomplete: it didn't adjust
APICv-related fields among secondary processor-based VM-execution
controls.

As a result, Windows 2012 guests would get stuck when SynIC-based
auto-EOI interrupt intersected with e.g. an IPI in the guest.

In addition, the MSR intercept bitmap wasn't updated to correspond to
whether "virtualize x2APIC mode" was enabled.  This path used not to be
triggered, since Windows didn't use x2APIC but rather their own
synthetic APIC access MSRs; however it represented a security risk
because the guest running in a SynIC-enabled VM could switch to x2APIC
and thus obtain direct access to host APIC MSRs (thanks to Yang Zhang
<yang.zhang.wz@gmail.com> for spotting this).

The patch fixes those omissions.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
Cc: Steve Rutherford <srutherford@google.com>
Cc: Yang Zhang <yang.zhang.wz@gmail.com>
---
v2 -> v3:
 - only switch to x2apic msr bitmap if virtualize x2apic mode is on in vmcs

v1 -> v2:
 - only update relevant bits in the secondary exec control
 - update msr intercept bitmap (also make x2apic msr bitmap always
   correspond to APICv)

 arch/x86/kvm/vmx.c | 48 ++++++++++++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 18 deletions(-)

Comments

Steve Rutherford May 18, 2016, 9:49 p.m. UTC | #1
This patch looks good.

I don't believe you need to explicitly check virtualize x2apic mode,
given that `vcpu->arch.apic_base & X2APIC_ENABLE &&
kvm_vcpu_apicv_active(vcpu)` implies that virtualize x2apic mode is
enabled (because KVM currently never re-enables apicv after disabling
it with the SyncIC), but being explicit is probably easier to
maintain.

On Wed, May 18, 2016 at 7:48 AM, Roman Kagan <rkagan@virtuozzo.com> wrote:
> The function to update APICv on/off state (in particular, to deactivate
> it when enabling Hyper-V SynIC), used to be incomplete: it didn't adjust
> APICv-related fields among secondary processor-based VM-execution
> controls.
>
> As a result, Windows 2012 guests would get stuck when SynIC-based
> auto-EOI interrupt intersected with e.g. an IPI in the guest.
>
> In addition, the MSR intercept bitmap wasn't updated to correspond to
> whether "virtualize x2APIC mode" was enabled.  This path used not to be
> triggered, since Windows didn't use x2APIC but rather their own
> synthetic APIC access MSRs; however it represented a security risk
> because the guest running in a SynIC-enabled VM could switch to x2APIC
> and thus obtain direct access to host APIC MSRs (thanks to Yang Zhang
> <yang.zhang.wz@gmail.com> for spotting this).
>
> The patch fixes those omissions.
>
> Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
> Cc: Steve Rutherford <srutherford@google.com>
> Cc: Yang Zhang <yang.zhang.wz@gmail.com>
> ---
> v2 -> v3:
>  - only switch to x2apic msr bitmap if virtualize x2apic mode is on in vmcs
>
> v1 -> v2:
>  - only update relevant bits in the secondary exec control
>  - update msr intercept bitmap (also make x2apic msr bitmap always
>    correspond to APICv)
>
>  arch/x86/kvm/vmx.c | 48 ++++++++++++++++++++++++++++++------------------
>  1 file changed, 30 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index ee1c8a9..cef741a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2418,7 +2418,9 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
>
>         if (is_guest_mode(vcpu))
>                 msr_bitmap = vmx_msr_bitmap_nested;
> -       else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
> +       else if (cpu_has_secondary_exec_ctrls() &&
> +                (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
> +                 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
>                 if (is_long_mode(vcpu))
>                         msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
>                 else
> @@ -4783,6 +4785,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
>         struct vcpu_vmx *vmx = to_vmx(vcpu);
>
>         vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
> +       if (cpu_has_secondary_exec_ctrls()) {
> +               if (kvm_vcpu_apicv_active(vcpu))
> +                       vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
> +                                     SECONDARY_EXEC_APIC_REGISTER_VIRT |
> +                                     SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
> +               else
> +                       vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
> +                                       SECONDARY_EXEC_APIC_REGISTER_VIRT |
> +                                       SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
> +       }
> +
> +       if (cpu_has_vmx_msr_bitmap())
> +               vmx_set_msr_bitmap(vcpu);
>  }
>
>  static u32 vmx_exec_control(struct vcpu_vmx *vmx)
> @@ -6329,23 +6344,20 @@ static __init int hardware_setup(void)
>
>         set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
>
> -       if (enable_apicv) {
> -               for (msr = 0x800; msr <= 0x8ff; msr++)
> -                       vmx_disable_intercept_msr_read_x2apic(msr);
> -
> -               /* According SDM, in x2apic mode, the whole id reg is used.
> -                * But in KVM, it only use the highest eight bits. Need to
> -                * intercept it */
> -               vmx_enable_intercept_msr_read_x2apic(0x802);
> -               /* TMCCT */
> -               vmx_enable_intercept_msr_read_x2apic(0x839);
> -               /* TPR */
> -               vmx_disable_intercept_msr_write_x2apic(0x808);
> -               /* EOI */
> -               vmx_disable_intercept_msr_write_x2apic(0x80b);
> -               /* SELF-IPI */
> -               vmx_disable_intercept_msr_write_x2apic(0x83f);
> -       }
> +       for (msr = 0x800; msr <= 0x8ff; msr++)
> +               vmx_disable_intercept_msr_read_x2apic(msr);
> +
> +       /* According SDM, in x2apic mode, the whole id reg is used.  But in
> +        * KVM, it only use the highest eight bits. Need to intercept it */
> +       vmx_enable_intercept_msr_read_x2apic(0x802);
> +       /* TMCCT */
> +       vmx_enable_intercept_msr_read_x2apic(0x839);
> +       /* TPR */
> +       vmx_disable_intercept_msr_write_x2apic(0x808);
> +       /* EOI */
> +       vmx_disable_intercept_msr_write_x2apic(0x80b);
> +       /* SELF-IPI */
> +       vmx_disable_intercept_msr_write_x2apic(0x83f);
>
>         if (enable_ept) {
>                 kvm_mmu_set_mask_ptes(0ull,
> --
> 2.5.5
>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yang Zhang May 19, 2016, 1:38 a.m. UTC | #2
On 2016/5/18 22:48, Roman Kagan wrote:
> The function to update APICv on/off state (in particular, to deactivate
> it when enabling Hyper-V SynIC), used to be incomplete: it didn't adjust
> APICv-related fields among secondary processor-based VM-execution
> controls.
>
> As a result, Windows 2012 guests would get stuck when SynIC-based
> auto-EOI interrupt intersected with e.g. an IPI in the guest.
>
> In addition, the MSR intercept bitmap wasn't updated to correspond to
> whether "virtualize x2APIC mode" was enabled.  This path used not to be
> triggered, since Windows didn't use x2APIC but rather their own
> synthetic APIC access MSRs; however it represented a security risk
> because the guest running in a SynIC-enabled VM could switch to x2APIC
> and thus obtain direct access to host APIC MSRs (thanks to Yang Zhang
> <yang.zhang.wz@gmail.com> for spotting this).
>
> The patch fixes those omissions.
>
> Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
> Cc: Steve Rutherford <srutherford@google.com>
> Cc: Yang Zhang <yang.zhang.wz@gmail.com>
> ---
> v2 -> v3:
>   - only switch to x2apic msr bitmap if virtualize x2apic mode is on in vmcs
>
> v1 -> v2:
>   - only update relevant bits in the secondary exec control
>   - update msr intercept bitmap (also make x2apic msr bitmap always
>     correspond to APICv)
>
>   arch/x86/kvm/vmx.c | 48 ++++++++++++++++++++++++++++++------------------
>   1 file changed, 30 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index ee1c8a9..cef741a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2418,7 +2418,9 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
>
>   	if (is_guest_mode(vcpu))
>   		msr_bitmap = vmx_msr_bitmap_nested;
> -	else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
> +	else if (cpu_has_secondary_exec_ctrls() &&
> +		 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
> +		  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
>   		if (is_long_mode(vcpu))
>   			msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
>   		else
> @@ -4783,6 +4785,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
>   	struct vcpu_vmx *vmx = to_vmx(vcpu);
>
>   	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
> +	if (cpu_has_secondary_exec_ctrls()) {
> +		if (kvm_vcpu_apicv_active(vcpu))
> +			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
> +				      SECONDARY_EXEC_APIC_REGISTER_VIRT |
> +				      SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
> +		else
> +			vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
> +					SECONDARY_EXEC_APIC_REGISTER_VIRT |
> +					SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
> +	}
> +
> +	if (cpu_has_vmx_msr_bitmap())
> +		vmx_set_msr_bitmap(vcpu);
>   }
>
>   static u32 vmx_exec_control(struct vcpu_vmx *vmx)
> @@ -6329,23 +6344,20 @@ static __init int hardware_setup(void)
>
>   	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
>
> -	if (enable_apicv) {
> -		for (msr = 0x800; msr <= 0x8ff; msr++)
> -			vmx_disable_intercept_msr_read_x2apic(msr);
> -
> -		/* According SDM, in x2apic mode, the whole id reg is used.
> -		 * But in KVM, it only use the highest eight bits. Need to
> -		 * intercept it */
> -		vmx_enable_intercept_msr_read_x2apic(0x802);
> -		/* TMCCT */
> -		vmx_enable_intercept_msr_read_x2apic(0x839);
> -		/* TPR */
> -		vmx_disable_intercept_msr_write_x2apic(0x808);
> -		/* EOI */
> -		vmx_disable_intercept_msr_write_x2apic(0x80b);
> -		/* SELF-IPI */
> -		vmx_disable_intercept_msr_write_x2apic(0x83f);
> -	}
> +	for (msr = 0x800; msr <= 0x8ff; msr++)
> +		vmx_disable_intercept_msr_read_x2apic(msr);
> +
> +	/* According SDM, in x2apic mode, the whole id reg is used.  But in
> +	 * KVM, it only use the highest eight bits. Need to intercept it */
> +	vmx_enable_intercept_msr_read_x2apic(0x802);
> +	/* TMCCT */
> +	vmx_enable_intercept_msr_read_x2apic(0x839);
> +	/* TPR */
> +	vmx_disable_intercept_msr_write_x2apic(0x808);
> +	/* EOI */
> +	vmx_disable_intercept_msr_write_x2apic(0x80b);
> +	/* SELF-IPI */
> +	vmx_disable_intercept_msr_write_x2apic(0x83f);

In current KVM, it will enable virtualize x2apic mode only when 
enable_apicv is enabled. So this change seems unnecessary.

Except this minor comment, the other part is :

Reviewed-by: Yang Zhang <yang.zhang.wz@gmail.com>
Yang Zhang May 19, 2016, 2:01 a.m. UTC | #3
On 2016/5/18 22:48, Roman Kagan wrote:
> The function to update APICv on/off state (in particular, to deactivate
> it when enabling Hyper-V SynIC), used to be incomplete: it didn't adjust
> APICv-related fields among secondary processor-based VM-execution
> controls.

Hi Roman,

I have question about the performance between APICv and Hyper-V SynIC. 
As we known APICv is a hardware feature which including three features: 
APIC register virtualization, virtual interrupt delivery and Posted 
Interrupt. My gut feeling is that the average performance that improved 
by APICv should greater than Hyper-v SynIC. Am i right? If yes, current 
policy that disable the whole APICv seems too aggressive.

btw, do you have any performance data, not micro-level? Thanks.

>
> As a result, Windows 2012 guests would get stuck when SynIC-based
> auto-EOI interrupt intersected with e.g. an IPI in the guest.
>
> In addition, the MSR intercept bitmap wasn't updated to correspond to
> whether "virtualize x2APIC mode" was enabled.  This path used not to be
> triggered, since Windows didn't use x2APIC but rather their own
> synthetic APIC access MSRs; however it represented a security risk
> because the guest running in a SynIC-enabled VM could switch to x2APIC
> and thus obtain direct access to host APIC MSRs (thanks to Yang Zhang
> <yang.zhang.wz@gmail.com> for spotting this).
>
> The patch fixes those omissions.
>
> Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
> Cc: Steve Rutherford <srutherford@google.com>
> Cc: Yang Zhang <yang.zhang.wz@gmail.com>
> ---
> v2 -> v3:
>   - only switch to x2apic msr bitmap if virtualize x2apic mode is on in vmcs
>
> v1 -> v2:
>   - only update relevant bits in the secondary exec control
>   - update msr intercept bitmap (also make x2apic msr bitmap always
>     correspond to APICv)
>
>   arch/x86/kvm/vmx.c | 48 ++++++++++++++++++++++++++++++------------------
>   1 file changed, 30 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index ee1c8a9..cef741a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2418,7 +2418,9 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
>
>   	if (is_guest_mode(vcpu))
>   		msr_bitmap = vmx_msr_bitmap_nested;
> -	else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
> +	else if (cpu_has_secondary_exec_ctrls() &&
> +		 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
> +		  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
>   		if (is_long_mode(vcpu))
>   			msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
>   		else
> @@ -4783,6 +4785,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
>   	struct vcpu_vmx *vmx = to_vmx(vcpu);
>
>   	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
> +	if (cpu_has_secondary_exec_ctrls()) {
> +		if (kvm_vcpu_apicv_active(vcpu))
> +			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
> +				      SECONDARY_EXEC_APIC_REGISTER_VIRT |
> +				      SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
> +		else
> +			vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
> +					SECONDARY_EXEC_APIC_REGISTER_VIRT |
> +					SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
> +	}
> +
> +	if (cpu_has_vmx_msr_bitmap())
> +		vmx_set_msr_bitmap(vcpu);
>   }
>
>   static u32 vmx_exec_control(struct vcpu_vmx *vmx)
> @@ -6329,23 +6344,20 @@ static __init int hardware_setup(void)
>
>   	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
>
> -	if (enable_apicv) {
> -		for (msr = 0x800; msr <= 0x8ff; msr++)
> -			vmx_disable_intercept_msr_read_x2apic(msr);
> -
> -		/* According SDM, in x2apic mode, the whole id reg is used.
> -		 * But in KVM, it only use the highest eight bits. Need to
> -		 * intercept it */
> -		vmx_enable_intercept_msr_read_x2apic(0x802);
> -		/* TMCCT */
> -		vmx_enable_intercept_msr_read_x2apic(0x839);
> -		/* TPR */
> -		vmx_disable_intercept_msr_write_x2apic(0x808);
> -		/* EOI */
> -		vmx_disable_intercept_msr_write_x2apic(0x80b);
> -		/* SELF-IPI */
> -		vmx_disable_intercept_msr_write_x2apic(0x83f);
> -	}
> +	for (msr = 0x800; msr <= 0x8ff; msr++)
> +		vmx_disable_intercept_msr_read_x2apic(msr);
> +
> +	/* According SDM, in x2apic mode, the whole id reg is used.  But in
> +	 * KVM, it only use the highest eight bits. Need to intercept it */
> +	vmx_enable_intercept_msr_read_x2apic(0x802);
> +	/* TMCCT */
> +	vmx_enable_intercept_msr_read_x2apic(0x839);
> +	/* TPR */
> +	vmx_disable_intercept_msr_write_x2apic(0x808);
> +	/* EOI */
> +	vmx_disable_intercept_msr_write_x2apic(0x80b);
> +	/* SELF-IPI */
> +	vmx_disable_intercept_msr_write_x2apic(0x83f);
>
>   	if (enable_ept) {
>   		kvm_mmu_set_mask_ptes(0ull,
>
Denis V. Lunev May 19, 2016, 5:40 a.m. UTC | #4
On 05/19/2016 05:01 AM, Yang Zhang wrote:
> On 2016/5/18 22:48, Roman Kagan wrote:
>> The function to update APICv on/off state (in particular, to deactivate
>> it when enabling Hyper-V SynIC), used to be incomplete: it didn't adjust
>> APICv-related fields among secondary processor-based VM-execution
>> controls.
>
> Hi Roman,
>
> I have question about the performance between APICv and Hyper-V SynIC. 
> As we known APICv is a hardware feature which including three 
> features: APIC register virtualization, virtual interrupt delivery and 
> Posted Interrupt. My gut feeling is that the average performance that 
> improved by APICv should greater than Hyper-v SynIC. Am i right? If 
> yes, current policy that disable the whole APICv seems too aggressive.
>
Argh.. We have faced this situation in Parallels Desktop may be
3 years ago. Unfortunately, there is no data at the moment.
It was toooo old and made by other team. As far as I remember
(for that time), interrupt delivery becomes faster, but operations
with on of CR registers becomes much slower and general
performance score becomes lower.

The problem with SynIC is that it is mandatory prerequisite
to enable HyperV bus in the guest, which is our final goal.
Thus there is no other way for us.

> btw, do you have any performance data, not micro-level? Thanks.
>
not collected at the moment, especially with KVM.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Roman Kagan May 19, 2016, 9:29 a.m. UTC | #5
On Thu, May 19, 2016 at 09:38:45AM +0800, Yang Zhang wrote:
> On 2016/5/18 22:48, Roman Kagan wrote:
> > @@ -6329,23 +6344,20 @@ static __init int hardware_setup(void)
> > 
> >   	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
> > 
> > -	if (enable_apicv) {
> > -		for (msr = 0x800; msr <= 0x8ff; msr++)
> > -			vmx_disable_intercept_msr_read_x2apic(msr);
> > -
> > -		/* According SDM, in x2apic mode, the whole id reg is used.
> > -		 * But in KVM, it only use the highest eight bits. Need to
> > -		 * intercept it */
> > -		vmx_enable_intercept_msr_read_x2apic(0x802);
> > -		/* TMCCT */
> > -		vmx_enable_intercept_msr_read_x2apic(0x839);
> > -		/* TPR */
> > -		vmx_disable_intercept_msr_write_x2apic(0x808);
> > -		/* EOI */
> > -		vmx_disable_intercept_msr_write_x2apic(0x80b);
> > -		/* SELF-IPI */
> > -		vmx_disable_intercept_msr_write_x2apic(0x83f);
> > -	}
> > +	for (msr = 0x800; msr <= 0x8ff; msr++)
> > +		vmx_disable_intercept_msr_read_x2apic(msr);
> > +
> > +	/* According SDM, in x2apic mode, the whole id reg is used.  But in
> > +	 * KVM, it only use the highest eight bits. Need to intercept it */
> > +	vmx_enable_intercept_msr_read_x2apic(0x802);
> > +	/* TMCCT */
> > +	vmx_enable_intercept_msr_read_x2apic(0x839);
> > +	/* TPR */
> > +	vmx_disable_intercept_msr_write_x2apic(0x808);
> > +	/* EOI */
> > +	vmx_disable_intercept_msr_write_x2apic(0x80b);
> > +	/* SELF-IPI */
> > +	vmx_disable_intercept_msr_write_x2apic(0x83f);
> 
> In current KVM, it will enable virtualize x2apic mode only when enable_apicv
> is enabled. So this change seems unnecessary.

Strictly speaking, it isn't, but I thought it was counter-intuitive the
old way.

Thanks,
Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Roman Kagan May 19, 2016, 9:34 a.m. UTC | #6
On Wed, May 18, 2016 at 02:49:48PM -0700, Steve Rutherford wrote:
> This patch looks good.
> 
> I don't believe you need to explicitly check virtualize x2apic mode,
> given that `vcpu->arch.apic_base & X2APIC_ENABLE &&
> kvm_vcpu_apicv_active(vcpu)` implies that virtualize x2apic mode is
> enabled (because KVM currently never re-enables apicv after disabling
> it with the SyncIC), but being explicit is probably easier to
> maintain.

This was exactly my intent.

Thanks,
Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yang Zhang May 20, 2016, 1:15 a.m. UTC | #7
On 2016/5/19 13:40, Denis V. Lunev wrote:
> On 05/19/2016 05:01 AM, Yang Zhang wrote:
>> On 2016/5/18 22:48, Roman Kagan wrote:
>>> The function to update APICv on/off state (in particular, to deactivate
>>> it when enabling Hyper-V SynIC), used to be incomplete: it didn't adjust
>>> APICv-related fields among secondary processor-based VM-execution
>>> controls.
>>
>> Hi Roman,
>>
>> I have question about the performance between APICv and Hyper-V SynIC.
>> As we known APICv is a hardware feature which including three
>> features: APIC register virtualization, virtual interrupt delivery and
>> Posted Interrupt. My gut feeling is that the average performance that
>> improved by APICv should greater than Hyper-v SynIC. Am i right? If
>> yes, current policy that disable the whole APICv seems too aggressive.
>>
> Argh.. We have faced this situation in Parallels Desktop may be
> 3 years ago. Unfortunately, there is no data at the moment.
> It was toooo old and made by other team. As far as I remember
> (for that time), interrupt delivery becomes faster, but operations
> with on of CR registers becomes much slower and general
> performance score becomes lower.

I guess the data may be collected on KVM w/o APICv supporting. Normally, 
APICv provides the faster way to delivery interrupt than software solution.

>
> The problem with SynIC is that it is mandatory prerequisite
> to enable HyperV bus in the guest, which is our final goal.
> Thus there is no other way for us.

I see. Actually, we saw the performance improvement with SynIC timer but 
we don't want to turn off APICv since we think it may hurt the 
performance. Is it possible to turn on SynIC timer without disable APICv?

>
>> btw, do you have any performance data, not micro-level? Thanks.
>>
> not collected at the moment, especially with KVM.
Roman Kagan May 20, 2016, 6:32 a.m. UTC | #8
On Fri, May 20, 2016 at 09:15:31AM +0800, Yang Zhang wrote:
> I see. Actually, we saw the performance improvement with SynIC timer but we
> don't want to turn off APICv since we think it may hurt the performance. Is
> it possible to turn on SynIC timer without disable APICv?

We failed to come up with a sensible solution to make SynIC auto-EOI
interrupts coexist with APICv.

Roman.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Denis V. Lunev May 20, 2016, 6:38 a.m. UTC | #9
On 05/20/2016 04:15 AM, Yang Zhang wrote:
> On 2016/5/19 13:40, Denis V. Lunev wrote:
>> On 05/19/2016 05:01 AM, Yang Zhang wrote:
>>> On 2016/5/18 22:48, Roman Kagan wrote:
>>>> The function to update APICv on/off state (in particular, to 
>>>> deactivate
>>>> it when enabling Hyper-V SynIC), used to be incomplete: it didn't 
>>>> adjust
>>>> APICv-related fields among secondary processor-based VM-execution
>>>> controls.
>>>
>>> Hi Roman,
>>>
>>> I have question about the performance between APICv and Hyper-V SynIC.
>>> As we known APICv is a hardware feature which including three
>>> features: APIC register virtualization, virtual interrupt delivery and
>>> Posted Interrupt. My gut feeling is that the average performance that
>>> improved by APICv should greater than Hyper-v SynIC. Am i right? If
>>> yes, current policy that disable the whole APICv seems too aggressive.
>>>
>> Argh.. We have faced this situation in Parallels Desktop may be
>> 3 years ago. Unfortunately, there is no data at the moment.
>> It was toooo old and made by other team. As far as I remember
>> (for that time), interrupt delivery becomes faster, but operations
>> with on of CR registers becomes much slower and general
>> performance score becomes lower.
>
> I guess the data may be collected on KVM w/o APICv supporting. 
> Normally, APICv provides the faster way to delivery interrupt than 
> software solution.
>
this seems useless.

Once again, interrupt delivery with APICv will be faster,
this is out of question. Though integral tests can show
performance degradation. I know, this sounds silly
and this is test-dependent.

We are going to make this testing after implementing
of HyperV TSC page avoid extra VM exit on context
switch. This seems more beneficial at the moment.

For this reason APICv is disabled in Parallels Desktop
and Parallels Server v6, which are not KVM based.

>>
>> The problem with SynIC is that it is mandatory prerequisite
>> to enable HyperV bus in the guest, which is our final goal.
>> Thus there is no other way for us.
>
> I see. Actually, we saw the performance improvement with SynIC timer 
> but we don't want to turn off APICv since we think it may hurt the 
> performance. Is it possible to turn on SynIC timer without disable APICv?
>
unfortunately no. The problem is AutoEOI feature. At
least we have no idea how to do that.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yang Zhang May 23, 2016, 1:34 a.m. UTC | #10
On 2016/5/20 14:38, Denis V. Lunev wrote:
> On 05/20/2016 04:15 AM, Yang Zhang wrote:
>> On 2016/5/19 13:40, Denis V. Lunev wrote:
>>> On 05/19/2016 05:01 AM, Yang Zhang wrote:
>>>> On 2016/5/18 22:48, Roman Kagan wrote:
>>>>> The function to update APICv on/off state (in particular, to
>>>>> deactivate
>>>>> it when enabling Hyper-V SynIC), used to be incomplete: it didn't
>>>>> adjust
>>>>> APICv-related fields among secondary processor-based VM-execution
>>>>> controls.
>>>>
>>>> Hi Roman,
>>>>
>>>> I have question about the performance between APICv and Hyper-V SynIC.
>>>> As we known APICv is a hardware feature which including three
>>>> features: APIC register virtualization, virtual interrupt delivery and
>>>> Posted Interrupt. My gut feeling is that the average performance that
>>>> improved by APICv should greater than Hyper-v SynIC. Am i right? If
>>>> yes, current policy that disable the whole APICv seems too aggressive.
>>>>
>>> Argh.. We have faced this situation in Parallels Desktop may be
>>> 3 years ago. Unfortunately, there is no data at the moment.
>>> It was toooo old and made by other team. As far as I remember
>>> (for that time), interrupt delivery becomes faster, but operations
>>> with on of CR registers becomes much slower and general
>>> performance score becomes lower.
>>
>> I guess the data may be collected on KVM w/o APICv supporting.
>> Normally, APICv provides the faster way to delivery interrupt than
>> software solution.
>>
> this seems useless.
>
> Once again, interrupt delivery with APICv will be faster,
> this is out of question. Though integral tests can show
> performance degradation. I know, this sounds silly
> and this is test-dependent.
>
> We are going to make this testing after implementing
> of HyperV TSC page avoid extra VM exit on context
> switch. This seems more beneficial at the moment.
>
> For this reason APICv is disabled in Parallels Desktop
> and Parallels Server v6, which are not KVM based.
>
>>>
>>> The problem with SynIC is that it is mandatory prerequisite
>>> to enable HyperV bus in the guest, which is our final goal.
>>> Thus there is no other way for us.
>>
>> I see. Actually, we saw the performance improvement with SynIC timer
>> but we don't want to turn off APICv since we think it may hurt the
>> performance. Is it possible to turn on SynIC timer without disable APICv?
>>
> unfortunately no. The problem is AutoEOI feature. At
> least we have no idea how to do that.

Ok. Thanks for your explanation.
Paolo Bonzini May 23, 2016, 2:18 p.m. UTC | #11
On 23/05/2016 03:34, Yang Zhang wrote:
> On 2016/5/20 14:38, Denis V. Lunev wrote:
>> On 05/20/2016 04:15 AM, Yang Zhang wrote:
>>> On 2016/5/19 13:40, Denis V. Lunev wrote:
>>>> On 05/19/2016 05:01 AM, Yang Zhang wrote:
>>>>> On 2016/5/18 22:48, Roman Kagan wrote:
>>>>>> The function to update APICv on/off state (in particular, to
>>>>>> deactivate
>>>>>> it when enabling Hyper-V SynIC), used to be incomplete: it didn't
>>>>>> adjust
>>>>>> APICv-related fields among secondary processor-based VM-execution
>>>>>> controls.
>>>>> Roman,
>>>>>
>>>>> I have question about the performance between APICv and Hyper-V SynIC.
>>>>> As we known APICv is a hardware feature which including three
>>>>> features: APIC register virtualization, virtual interrupt delivery and
>>>>> Posted Interrupt. My gut feeling is that the average performance that
>>>>> improved by APICv should greater than Hyper-v SynIC. Am i right? If
>>>>> yes, current policy that disable the whole APICv seems too aggressive.
>>>>>
>>>> Argh.. We have faced this situation in Parallels Desktop may be
>>>> 3 years ago. Unfortunately, there is no data at the moment.
>>>> It was toooo old and made by other team. As far as I remember
>>>> (for that time), interrupt delivery becomes faster, but operations
>>>> with on of CR registers becomes much slower and general
>>>> performance score becomes lower.
>>>
>>> I guess the data may be collected on KVM w/o APICv supporting.
>>> Normally, APICv provides the faster way to delivery interrupt than
>>> software solution.
>>>
>> this seems useless.
>>
>> Once again, interrupt delivery with APICv will be faster,
>> this is out of question. Though integral tests can show
>> performance degradation. I know, this sounds silly
>> and this is test-dependent.
>>
>> We are going to make this testing after implementing
>> of HyperV TSC page avoid extra VM exit on context
>> switch. This seems more beneficial at the moment.
>>
>> For this reason APICv is disabled in Parallels Desktop
>> and Parallels Server v6, which are not KVM based.
>>
>>>>
>>>> The problem with SynIC is that it is mandatory prerequisite
>>>> to enable HyperV bus in the guest, which is our final goal.
>>>> Thus there is no other way for us.
>>>
>>> I see. Actually, we saw the performance improvement with SynIC timer
>>> but we don't want to turn off APICv since we think it may hurt the
>>> performance. Is it possible to turn on SynIC timer without disable
>>> APICv?
>>>
>> unfortunately no. The problem is AutoEOI feature. At
>> least we have no idea how to do that.
> 
> Ok. Thanks for your explanation.

You can search the KVM mailing list archives; there are some discussions
between me, Andrey and Roman regarding APICv---when I tried their unit
tests on a Haswell-EP machine I found that they broke due to AutoEOI,
and we came up with the solution of disabling APICv.

For what it's worth, we've also seen only very small performance
improvements from APICv, with the exception of nested virtualization
where APICv's impact is large.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini May 23, 2016, 2:31 p.m. UTC | #12
On 18/05/2016 16:48, Roman Kagan wrote:
> The function to update APICv on/off state (in particular, to deactivate
> it when enabling Hyper-V SynIC), used to be incomplete: it didn't adjust
> APICv-related fields among secondary processor-based VM-execution
> controls.
> 
> As a result, Windows 2012 guests would get stuck when SynIC-based
> auto-EOI interrupt intersected with e.g. an IPI in the guest.
> 
> In addition, the MSR intercept bitmap wasn't updated to correspond to
> whether "virtualize x2APIC mode" was enabled.  This path used not to be
> triggered, since Windows didn't use x2APIC but rather their own
> synthetic APIC access MSRs; however it represented a security risk
> because the guest running in a SynIC-enabled VM could switch to x2APIC
> and thus obtain direct access to host APIC MSRs (thanks to Yang Zhang
> <yang.zhang.wz@gmail.com> for spotting this).
> 
> The patch fixes those omissions.
> 
> Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
> Cc: Steve Rutherford <srutherford@google.com>
> Cc: Yang Zhang <yang.zhang.wz@gmail.com>

Thanks, I am queuing this patch for testing.  Just a little nit, commit 
messages usually refer to bugs in the present tense:

    kvm:vmx: more complete state update on APICv on/off
    
    The function to update APICv on/off state (in particular, to deactivate
    it when enabling Hyper-V SynIC) is incomplete: it doesn't adjust
    APICv-related fields among secondary processor-based VM-execution
    controls.  As a result, Windows 2012 guests get stuck when SynIC-based
    auto-EOI interrupt intersected with e.g. an IPI in the guest.
    
    In addition, the MSR intercept bitmap isn't updated every time "virtualize
    x2APIC mode" is toggled.  This path can only be triggered by a malicious
    guest, because Windows didn't use x2APIC but rather their own synthetic
    APIC access MSRs; however a guest running in a SynIC-enabled VM could
    switch to x2APIC and thus obtain direct access to host APIC MSRs.
    
    The patch fixes those omissions.

The idea is that the commit message is the body of an email message, and
therefore it describes the situation to the recipient before the change
is applied.

Thanks,

Paolo

> ---
> v2 -> v3:
>  - only switch to x2apic msr bitmap if virtualize x2apic mode is on in vmcs
> 
> v1 -> v2:
>  - only update relevant bits in the secondary exec control
>  - update msr intercept bitmap (also make x2apic msr bitmap always
>    correspond to APICv)
> 
>  arch/x86/kvm/vmx.c | 48 ++++++++++++++++++++++++++++++------------------
>  1 file changed, 30 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index ee1c8a9..cef741a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2418,7 +2418,9 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
>  
>  	if (is_guest_mode(vcpu))
>  		msr_bitmap = vmx_msr_bitmap_nested;
> -	else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
> +	else if (cpu_has_secondary_exec_ctrls() &&
> +		 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
> +		  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
>  		if (is_long_mode(vcpu))
>  			msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
>  		else
> @@ -4783,6 +4785,19 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  
>  	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
> +	if (cpu_has_secondary_exec_ctrls()) {
> +		if (kvm_vcpu_apicv_active(vcpu))
> +			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
> +				      SECONDARY_EXEC_APIC_REGISTER_VIRT |
> +				      SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
> +		else
> +			vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
> +					SECONDARY_EXEC_APIC_REGISTER_VIRT |
> +					SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
> +	}
> +
> +	if (cpu_has_vmx_msr_bitmap())
> +		vmx_set_msr_bitmap(vcpu);
>  }
>  
>  static u32 vmx_exec_control(struct vcpu_vmx *vmx)
> @@ -6329,23 +6344,20 @@ static __init int hardware_setup(void)
>  
>  	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
>  
> -	if (enable_apicv) {
> -		for (msr = 0x800; msr <= 0x8ff; msr++)
> -			vmx_disable_intercept_msr_read_x2apic(msr);
> -
> -		/* According SDM, in x2apic mode, the whole id reg is used.
> -		 * But in KVM, it only use the highest eight bits. Need to
> -		 * intercept it */
> -		vmx_enable_intercept_msr_read_x2apic(0x802);
> -		/* TMCCT */
> -		vmx_enable_intercept_msr_read_x2apic(0x839);
> -		/* TPR */
> -		vmx_disable_intercept_msr_write_x2apic(0x808);
> -		/* EOI */
> -		vmx_disable_intercept_msr_write_x2apic(0x80b);
> -		/* SELF-IPI */
> -		vmx_disable_intercept_msr_write_x2apic(0x83f);
> -	}
> +	for (msr = 0x800; msr <= 0x8ff; msr++)
> +		vmx_disable_intercept_msr_read_x2apic(msr);
> +
> +	/* According SDM, in x2apic mode, the whole id reg is used.  But in
> +	 * KVM, it only use the highest eight bits. Need to intercept it */
> +	vmx_enable_intercept_msr_read_x2apic(0x802);
> +	/* TMCCT */
> +	vmx_enable_intercept_msr_read_x2apic(0x839);
> +	/* TPR */
> +	vmx_disable_intercept_msr_write_x2apic(0x808);
> +	/* EOI */
> +	vmx_disable_intercept_msr_write_x2apic(0x80b);
> +	/* SELF-IPI */
> +	vmx_disable_intercept_msr_write_x2apic(0x83f);
>  
>  	if (enable_ept) {
>  		kvm_mmu_set_mask_ptes(0ull,
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yang Zhang May 24, 2016, 1:23 a.m. UTC | #13
On 2016/5/23 22:18, Paolo Bonzini wrote:
>
>
> On 23/05/2016 03:34, Yang Zhang wrote:
>> On 2016/5/20 14:38, Denis V. Lunev wrote:
>>> On 05/20/2016 04:15 AM, Yang Zhang wrote:
>>>> On 2016/5/19 13:40, Denis V. Lunev wrote:
>>>>> On 05/19/2016 05:01 AM, Yang Zhang wrote:
>>>>>> On 2016/5/18 22:48, Roman Kagan wrote:
>>>>>>> The function to update APICv on/off state (in particular, to
>>>>>>> deactivate
>>>>>>> it when enabling Hyper-V SynIC), used to be incomplete: it didn't
>>>>>>> adjust
>>>>>>> APICv-related fields among secondary processor-based VM-execution
>>>>>>> controls.
>>>>>> Roman,
>>>>>>
>>>>>> I have question about the performance between APICv and Hyper-V SynIC.
>>>>>> As we known APICv is a hardware feature which including three
>>>>>> features: APIC register virtualization, virtual interrupt delivery and
>>>>>> Posted Interrupt. My gut feeling is that the average performance that
>>>>>> improved by APICv should greater than Hyper-v SynIC. Am i right? If
>>>>>> yes, current policy that disable the whole APICv seems too aggressive.
>>>>>>
>>>>> Argh.. We have faced this situation in Parallels Desktop may be
>>>>> 3 years ago. Unfortunately, there is no data at the moment.
>>>>> It was toooo old and made by other team. As far as I remember
>>>>> (for that time), interrupt delivery becomes faster, but operations
>>>>> with on of CR registers becomes much slower and general
>>>>> performance score becomes lower.
>>>>
>>>> I guess the data may be collected on KVM w/o APICv supporting.
>>>> Normally, APICv provides the faster way to delivery interrupt than
>>>> software solution.
>>>>
>>> this seems useless.
>>>
>>> Once again, interrupt delivery with APICv will be faster,
>>> this is out of question. Though integral tests can show
>>> performance degradation. I know, this sounds silly
>>> and this is test-dependent.
>>>
>>> We are going to make this testing after implementing
>>> of HyperV TSC page avoid extra VM exit on context
>>> switch. This seems more beneficial at the moment.
>>>
>>> For this reason APICv is disabled in Parallels Desktop
>>> and Parallels Server v6, which are not KVM based.
>>>
>>>>>
>>>>> The problem with SynIC is that it is mandatory prerequisite
>>>>> to enable HyperV bus in the guest, which is our final goal.
>>>>> Thus there is no other way for us.
>>>>
>>>> I see. Actually, we saw the performance improvement with SynIC timer
>>>> but we don't want to turn off APICv since we think it may hurt the
>>>> performance. Is it possible to turn on SynIC timer without disable
>>>> APICv?
>>>>
>>> unfortunately no. The problem is AutoEOI feature. At
>>> least we have no idea how to do that.
>>
>> Ok. Thanks for your explanation.
>
> You can search the KVM mailing list archives; there are some discussions
> between me, Andrey and Roman regarding APICv---when I tried their unit
> tests on a Haswell-EP machine I found that they broke due to AutoEOI,
> and we came up with the solution of disabling APICv.

Thanks. I will check it.

>
> For what it's worth, we've also seen only very small performance
> improvements from APICv, with the exception of nested virtualization
> where APICv's impact is large.

I think it depends on the workload. For some micro benchmarks, we have 
see more than 10% improvement, like ping latency testing. But for normal 
workload, you may only seen less than 2% improvement.

The reason i raise this concern is that VT-d PI also depends on APICv. 
This means all windows guests with SynIC enabled cannot benefit from 
VT-d PI.

>
> Paolo
>
Wincy Van May 24, 2016, 2:09 a.m. UTC | #14
On Tue, May 24, 2016 at 9:23 AM, Yang Zhang <yang.zhang.wz@gmail.com> wrote:
> On 2016/5/23 22:18, Paolo Bonzini wrote:
>>
>>
>>
>> On 23/05/2016 03:34, Yang Zhang wrote:
>>>
>>> On 2016/5/20 14:38, Denis V. Lunev wrote:
>>>>
>>>> On 05/20/2016 04:15 AM, Yang Zhang wrote:
>>>>>
>>>>> On 2016/5/19 13:40, Denis V. Lunev wrote:
>>>>>>
>>>>>> On 05/19/2016 05:01 AM, Yang Zhang wrote:
>>>>>>>
>>>>>>> On 2016/5/18 22:48, Roman Kagan wrote:
>>>>>>>>
>>>>>>>> The function to update APICv on/off state (in particular, to
>>>>>>>> deactivate
>>>>>>>> it when enabling Hyper-V SynIC), used to be incomplete: it didn't
>>>>>>>> adjust
>>>>>>>> APICv-related fields among secondary processor-based VM-execution
>>>>>>>> controls.
>>>>>>>
>>>>>>> Roman,
>>>>>>>
>>>>>>> I have question about the performance between APICv and Hyper-V
>>>>>>> SynIC.
>>>>>>> As we known APICv is a hardware feature which including three
>>>>>>> features: APIC register virtualization, virtual interrupt delivery
>>>>>>> and
>>>>>>> Posted Interrupt. My gut feeling is that the average performance that
>>>>>>> improved by APICv should greater than Hyper-v SynIC. Am i right? If
>>>>>>> yes, current policy that disable the whole APICv seems too
>>>>>>> aggressive.
>>>>>>>
>>>>>> Argh.. We have faced this situation in Parallels Desktop may be
>>>>>> 3 years ago. Unfortunately, there is no data at the moment.
>>>>>> It was toooo old and made by other team. As far as I remember
>>>>>> (for that time), interrupt delivery becomes faster, but operations
>>>>>> with on of CR registers becomes much slower and general
>>>>>> performance score becomes lower.
>>>>>
>>>>>
>>>>> I guess the data may be collected on KVM w/o APICv supporting.
>>>>> Normally, APICv provides the faster way to delivery interrupt than
>>>>> software solution.
>>>>>
>>>> this seems useless.
>>>>
>>>> Once again, interrupt delivery with APICv will be faster,
>>>> this is out of question. Though integral tests can show
>>>> performance degradation. I know, this sounds silly
>>>> and this is test-dependent.
>>>>
>>>> We are going to make this testing after implementing
>>>> of HyperV TSC page avoid extra VM exit on context
>>>> switch. This seems more beneficial at the moment.
>>>>
>>>> For this reason APICv is disabled in Parallels Desktop
>>>> and Parallels Server v6, which are not KVM based.
>>>>
>>>>>>
>>>>>> The problem with SynIC is that it is mandatory prerequisite
>>>>>> to enable HyperV bus in the guest, which is our final goal.
>>>>>> Thus there is no other way for us.
>>>>>
>>>>>
>>>>> I see. Actually, we saw the performance improvement with SynIC timer
>>>>> but we don't want to turn off APICv since we think it may hurt the
>>>>> performance. Is it possible to turn on SynIC timer without disable
>>>>> APICv?
>>>>>
>>>> unfortunately no. The problem is AutoEOI feature. At
>>>> least we have no idea how to do that.
>>>
>>>
>>> Ok. Thanks for your explanation.
>>
>>
>> You can search the KVM mailing list archives; there are some discussions
>> between me, Andrey and Roman regarding APICv---when I tried their unit
>> tests on a Haswell-EP machine I found that they broke due to AutoEOI,
>> and we came up with the solution of disabling APICv.
>
>
> Thanks. I will check it.
>
>>
>> For what it's worth, we've also seen only very small performance
>> improvements from APICv, with the exception of nested virtualization
>> where APICv's impact is large.
>
>
> I think it depends on the workload. For some micro benchmarks, we have see
> more than 10% improvement, like ping latency testing. But for normal
> workload, you may only seen less than 2% improvement.
>

APICv does have a big improvement for windows guest with some scenario.
One of our customers use Windows Server 2008 R2 to run a game server,
there are many small TCP packets transferring between the server and client.

The server side use kernel spin-lock frequently, NT kernel is
different with Linux,
it will raise TPR to 2 if a spin-lock accquired. We also see that the
context switch
rate on HOST is very high w/o APICv, and GUEST ping will raise up to ~2000ms,
even lost network respond. I think the frequently _tpr_below_threshold_ and
the _irq_window_ vmexits slow down the GUEST, and the interrupt of guest will
have a huge latency.

With APICv, the guest will run normally.


Thanks,
Wincy
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini May 24, 2016, 10:42 a.m. UTC | #15
On 24/05/2016 04:09, Wincy Van wrote:
> The server side use kernel spin-lock frequently, NT kernel is
> different with Linux,
> it will raise TPR to 2 if a spin-lock acquired. We also see that the
> context switch
> rate on HOST is very high w/o APICv, and GUEST ping will raise up to ~2000ms,
> even lost network respond. I think the frequently _tpr_below_threshold_ and
> the _irq_window_ vmexits slow down the GUEST, and the interrupt of guest will
> have a huge latency.
> 
> With APICv, the guest will run normally.

What actually makes a difference here is the self-IPI acceleration, not
simply raising the TPR.  Windows does a self-IPI when you schedule a DPC.

Thanks,

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wincy Van May 24, 2016, 12:03 p.m. UTC | #16
On Tue, May 24, 2016 at 6:42 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>
> On 24/05/2016 04:09, Wincy Van wrote:
>> The server side use kernel spin-lock frequently, NT kernel is
>> different with Linux,
>> it will raise TPR to 2 if a spin-lock acquired. We also see that the
>> context switch
>> rate on HOST is very high w/o APICv, and GUEST ping will raise up to ~2000ms,
>> even lost network respond. I think the frequently _tpr_below_threshold_ and
>> the _irq_window_ vmexits slow down the GUEST, and the interrupt of guest will
>> have a huge latency.
>>
>> With APICv, the guest will run normally.
>
> What actually makes a difference here is the self-IPI acceleration, not
> simply raising the TPR.  Windows does a self-IPI when you schedule a DPC.
>

OK, I see..
Thanks for your explanation :-)



Wincy
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ee1c8a9..cef741a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2418,7 +2418,9 @@  static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
 
 	if (is_guest_mode(vcpu))
 		msr_bitmap = vmx_msr_bitmap_nested;
-	else if (vcpu->arch.apic_base & X2APIC_ENABLE) {
+	else if (cpu_has_secondary_exec_ctrls() &&
+		 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
+		  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
 		if (is_long_mode(vcpu))
 			msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
 		else
@@ -4783,6 +4785,19 @@  static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+	if (cpu_has_secondary_exec_ctrls()) {
+		if (kvm_vcpu_apicv_active(vcpu))
+			vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
+				      SECONDARY_EXEC_APIC_REGISTER_VIRT |
+				      SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
+		else
+			vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
+					SECONDARY_EXEC_APIC_REGISTER_VIRT |
+					SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
+	}
+
+	if (cpu_has_vmx_msr_bitmap())
+		vmx_set_msr_bitmap(vcpu);
 }
 
 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
@@ -6329,23 +6344,20 @@  static __init int hardware_setup(void)
 
 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
 
-	if (enable_apicv) {
-		for (msr = 0x800; msr <= 0x8ff; msr++)
-			vmx_disable_intercept_msr_read_x2apic(msr);
-
-		/* According SDM, in x2apic mode, the whole id reg is used.
-		 * But in KVM, it only use the highest eight bits. Need to
-		 * intercept it */
-		vmx_enable_intercept_msr_read_x2apic(0x802);
-		/* TMCCT */
-		vmx_enable_intercept_msr_read_x2apic(0x839);
-		/* TPR */
-		vmx_disable_intercept_msr_write_x2apic(0x808);
-		/* EOI */
-		vmx_disable_intercept_msr_write_x2apic(0x80b);
-		/* SELF-IPI */
-		vmx_disable_intercept_msr_write_x2apic(0x83f);
-	}
+	for (msr = 0x800; msr <= 0x8ff; msr++)
+		vmx_disable_intercept_msr_read_x2apic(msr);
+
+	/* According SDM, in x2apic mode, the whole id reg is used.  But in
+	 * KVM, it only use the highest eight bits. Need to intercept it */
+	vmx_enable_intercept_msr_read_x2apic(0x802);
+	/* TMCCT */
+	vmx_enable_intercept_msr_read_x2apic(0x839);
+	/* TPR */
+	vmx_disable_intercept_msr_write_x2apic(0x808);
+	/* EOI */
+	vmx_disable_intercept_msr_write_x2apic(0x80b);
+	/* SELF-IPI */
+	vmx_disable_intercept_msr_write_x2apic(0x83f);
 
 	if (enable_ept) {
 		kvm_mmu_set_mask_ptes(0ull,