diff mbox

KVM: VMX: Enable MSR-BASED TPR shadow even if w/o APICv

Message ID 1473839936-3393-1-git-send-email-wanpeng.li@hotmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wanpeng Li Sept. 14, 2016, 7:58 a.m. UTC
From: Wanpeng Li <wanpeng.li@hotmail.com>

I observed that kvmvapic(to optimize flexpriority=N or AMD) is used 
to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542 
x86, apicv: add virtual x2apic support) disable virtual x2apic mode 
completely if w/o APICv, and the author also told me that windows guest
can't enter into x2apic mode when he developed the APICv feature several 
years ago. However, it is not truth currently, Interrupt Remapping and 
vIOMMU is added to qemu and the developers from Intel test windows 8 can 
work in x2apic mode w/ Interrupt Remapping enabled recently. 

This patch enables TPR shadow for virtual x2apic mode to boost 
windows guest in x2apic mode even if w/o APICv.

Can pass the kvm-unit-test.

Suggested-by: Wincy Van <fanwenyi0529@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Wincy Van <fanwenyi0529@gmail.com>
Cc: Yang Zhang <yang.zhang.wz@gmail.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
 arch/x86/kvm/vmx.c | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

Comments

Paolo Bonzini Sept. 14, 2016, 9:40 a.m. UTC | #1
On 14/09/2016 09:58, Wanpeng Li wrote:
> From: Wanpeng Li <wanpeng.li@hotmail.com>
> 
> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used 
> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542 
> x86, apicv: add virtual x2apic support) disable virtual x2apic mode 
> completely if w/o APICv, and the author also told me that windows guest
> can't enter into x2apic mode when he developed the APICv feature several 
> years ago. However, it is not truth currently, Interrupt Remapping and 
> vIOMMU is added to qemu and the developers from Intel test windows 8 can 
> work in x2apic mode w/ Interrupt Remapping enabled recently. 
> 
> This patch enables TPR shadow for virtual x2apic mode to boost 
> windows guest in x2apic mode even if w/o APICv.
> 
> Can pass the kvm-unit-test.

Ok, now I see what you meant; this actually makes sense.  I don't expect
much speedup though, because Linux doesn't touch the TPR and Windows is
likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
this reason I'm not sure if the patch is useful in practice.

To test this patch, you have to run kvm-unit-tests with Hyper-V
synthetic interrupt enabled.  Did you do this?

Paolo

> Suggested-by: Wincy Van <fanwenyi0529@gmail.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Wincy Van <fanwenyi0529@gmail.com>
> Cc: Yang Zhang <yang.zhang.wz@gmail.com>
> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
> ---
>  arch/x86/kvm/vmx.c | 41 ++++++++++++++++++++++-------------------
>  1 file changed, 22 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 5cede40..e703129 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6336,7 +6336,7 @@ static void wakeup_handler(void)
>  
>  static __init int hardware_setup(void)
>  {
> -	int r = -ENOMEM, i, msr;
> +	int r = -ENOMEM, i;
>  
>  	rdmsrl_safe(MSR_EFER, &host_efer);
>  
> @@ -6464,18 +6464,6 @@ static __init int hardware_setup(void)
>  
>  	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
>  
> -	for (msr = 0x800; msr <= 0x8ff; msr++)
> -		vmx_disable_intercept_msr_read_x2apic(msr);
> -
> -	/* TMCCT */
> -	vmx_enable_intercept_msr_read_x2apic(0x839);
> -	/* TPR */
> -	vmx_disable_intercept_msr_write_x2apic(0x808);
> -	/* EOI */
> -	vmx_disable_intercept_msr_write_x2apic(0x80b);
> -	/* SELF-IPI */
> -	vmx_disable_intercept_msr_write_x2apic(0x83f);
> -
>  	if (enable_ept) {
>  		kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
>  			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
> @@ -8435,12 +8423,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>  		return;
>  	}
>  
> -	/*
> -	 * There is not point to enable virtualize x2apic without enable
> -	 * apicv
> -	 */
> -	if (!cpu_has_vmx_virtualize_x2apic_mode() ||
> -				!kvm_vcpu_apicv_active(vcpu))
> +	if (!cpu_has_vmx_virtualize_x2apic_mode())
>  		return;
>  
>  	if (!cpu_need_tpr_shadow(vcpu))
> @@ -8449,8 +8432,28 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>  	sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
>  
>  	if (set) {
> +		int msr;
> +
>  		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>  		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
> +
> +		if (kvm_vcpu_apicv_active(vcpu)) {
> +			for (msr = 0x800; msr <= 0x8ff; msr++)
> +				vmx_disable_intercept_msr_read_x2apic(msr);
> +
> +			/* TMCCT */
> +			vmx_enable_intercept_msr_read_x2apic(0x839);
> +			/* TPR */
> +			vmx_disable_intercept_msr_write_x2apic(0x808);
> +			/* EOI */
> +			vmx_disable_intercept_msr_write_x2apic(0x80b);
> +			/* SELF-IPI */
> +			vmx_disable_intercept_msr_write_x2apic(0x83f);
> +		} else if (vmx_exec_control(to_vmx(vcpu)) & CPU_BASED_TPR_SHADOW) {
> +			/* TPR */
> +			vmx_disable_intercept_msr_read_x2apic(0x808);
> +			vmx_disable_intercept_msr_write_x2apic(0x808);
> +		}
>  	} else {
>  		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
>  		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Sept. 14, 2016, 12:03 p.m. UTC | #2
2016-09-14 11:40+0200, Paolo Bonzini:
> On 14/09/2016 09:58, Wanpeng Li wrote:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>> 
>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used 
>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542 
>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode 
>> completely if w/o APICv, and the author also told me that windows guest
>> can't enter into x2apic mode when he developed the APICv feature several 
>> years ago. However, it is not truth currently, Interrupt Remapping and 
>> vIOMMU is added to qemu and the developers from Intel test windows 8 can 
>> work in x2apic mode w/ Interrupt Remapping enabled recently. 
>> 
>> This patch enables TPR shadow for virtual x2apic mode to boost 
>> windows guest in x2apic mode even if w/o APICv.
>> 
>> Can pass the kvm-unit-test.
> 
> Ok, now I see what you meant; this actually makes sense.  I don't expect
> much speedup though, because Linux doesn't touch the TPR and Windows is
> likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
> this reason I'm not sure if the patch is useful in practice.

I agree with Paolo on the use case -- what configurations benefit from
this change?

> To test this patch, you have to run kvm-unit-tests with Hyper-V
> synthetic interrupt enabled.  Did you do this?

The patch is buggy.  MSR bitmaps are global and we'd have a CVE if one
guests used synic (=> disabled apicv) and one didn't.
You'd want a new set of bitmaps and assign them in vmx_set_msr_bitmap()
(or completely rewrite our management).
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li Sept. 15, 2016, 12:07 a.m. UTC | #3
2016-09-14 17:40 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
>
>
> On 14/09/2016 09:58, Wanpeng Li wrote:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>> completely if w/o APICv, and the author also told me that windows guest
>> can't enter into x2apic mode when he developed the APICv feature several
>> years ago. However, it is not truth currently, Interrupt Remapping and
>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>
>> This patch enables TPR shadow for virtual x2apic mode to boost
>> windows guest in x2apic mode even if w/o APICv.
>>
>> Can pass the kvm-unit-test.
>
> Ok, now I see what you meant; this actually makes sense.  I don't expect
> much speedup though, because Linux doesn't touch the TPR and Windows is
> likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
> this reason I'm not sure if the patch is useful in practice.

We should use more newer windows guests which have Hyper-V synthetic
interrupt support, however, older windows guests can't get benefit.

>
> To test this patch, you have to run kvm-unit-tests with Hyper-V
> synthetic interrupt enabled.  Did you do this?

qemu-system-x86_64 -enable-kvm -machine kernel_irqchip=split -cpu
kvm64,hv_synic -device pc-testdev -device
isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial stdio -device
pci-testdev -kernel x86/hyperv_synic.flat
enabling apic
paging enabled
cr0 = 80010011
cr3 = 7fff000
cr4 = 20
enabling apic
ncpus = 1
prepare
test 0 -> 0

I run ./x86-run x86/hyperv_synic.flat against latest linus tree, it
just stuck here.

Regards,
Wanpeng Li
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li Sept. 15, 2016, 12:17 a.m. UTC | #4
2016-09-15 8:07 GMT+08:00 Wanpeng Li <kernellwp@gmail.com>:
> 2016-09-14 17:40 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
>>
>>
>> On 14/09/2016 09:58, Wanpeng Li wrote:
>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>
>>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>>> completely if w/o APICv, and the author also told me that windows guest
>>> can't enter into x2apic mode when he developed the APICv feature several
>>> years ago. However, it is not truth currently, Interrupt Remapping and
>>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>>
>>> This patch enables TPR shadow for virtual x2apic mode to boost
>>> windows guest in x2apic mode even if w/o APICv.
>>>
>>> Can pass the kvm-unit-test.
>>
>> Ok, now I see what you meant; this actually makes sense.  I don't expect
>> much speedup though, because Linux doesn't touch the TPR and Windows is
>> likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
>> this reason I'm not sure if the patch is useful in practice.
>
> We should use more newer windows guests which have Hyper-V synthetic
> interrupt support, however, older windows guests can't get benefit.
>
>>
>> To test this patch, you have to run kvm-unit-tests with Hyper-V
>> synthetic interrupt enabled.  Did you do this?
>
> qemu-system-x86_64 -enable-kvm -machine kernel_irqchip=split -cpu
> kvm64,hv_synic -device pc-testdev -device
> isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial stdio -device
> pci-testdev -kernel x86/hyperv_synic.flat
> enabling apic
> paging enabled
> cr0 = 80010011
> cr3 = 7fff000
> cr4 = 20
> enabling apic
> ncpus = 1
> prepare
> test 0 -> 0
>
> I run ./x86-run x86/hyperv_synic.flat against latest linus tree, it
> just stuck here.

Oops, I miss the "-device hyperv-testdev" parameter. And the patch
passes the hyperv_sync.flat test case.

Regards,
Wanpeng Li
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li Sept. 15, 2016, 1:19 a.m. UTC | #5
2016-09-14 20:03 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
> 2016-09-14 11:40+0200, Paolo Bonzini:
>> On 14/09/2016 09:58, Wanpeng Li wrote:
>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>
>>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>>> completely if w/o APICv, and the author also told me that windows guest
>>> can't enter into x2apic mode when he developed the APICv feature several
>>> years ago. However, it is not truth currently, Interrupt Remapping and
>>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>>
>>> This patch enables TPR shadow for virtual x2apic mode to boost
>>> windows guest in x2apic mode even if w/o APICv.
>>>
>>> Can pass the kvm-unit-test.
>>
>> Ok, now I see what you meant; this actually makes sense.  I don't expect
>> much speedup though, because Linux doesn't touch the TPR and Windows is
>> likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
>> this reason I'm not sure if the patch is useful in practice.
>
> I agree with Paolo on the use case -- what configurations benefit from
> this change?

Old windows guest w/o Hyper-V synthetic interrupt support.

>
>> To test this patch, you have to run kvm-unit-tests with Hyper-V
>> synthetic interrupt enabled.  Did you do this?
>
> The patch is buggy.  MSR bitmaps are global and we'd have a CVE if one
> guests used synic (=> disabled apicv) and one didn't.
> You'd want a new set of bitmaps and assign them in vmx_set_msr_bitmap()
> (or completely rewrite our management).
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mika Penttilä Sept. 15, 2016, 4:08 a.m. UTC | #6
On 09/14/2016 10:58 AM, Wanpeng Li wrote:
> From: Wanpeng Li <wanpeng.li@hotmail.com>
> 
> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used 
> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542 
> x86, apicv: add virtual x2apic support) disable virtual x2apic mode 
> completely if w/o APICv, and the author also told me that windows guest
> can't enter into x2apic mode when he developed the APICv feature several 
> years ago. However, it is not truth currently, Interrupt Remapping and 
> vIOMMU is added to qemu and the developers from Intel test windows 8 can 
> work in x2apic mode w/ Interrupt Remapping enabled recently. 
> 
> This patch enables TPR shadow for virtual x2apic mode to boost 
> windows guest in x2apic mode even if w/o APICv.
> 
> Can pass the kvm-unit-test.
> 

While at it, is the vmx flexpriotity stuff still valid code?
AFAICS it gets enabled iff TPR shadow is on. flexpriority
is on when :

(flexpriority_enabled && lapic_in_kernel && cpu_has_vmx_tpr_shadow && cpu_has_vmx_virtualize_apic_accesses)

But apic accesses to TPR mmio are not then trapped and TPR changes not reported because
the “use TPR shadow” VM-execution control is 1.

Thanks,
Mika


> Suggested-by: Wincy Van <fanwenyi0529@gmail.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Wincy Van <fanwenyi0529@gmail.com>
> Cc: Yang Zhang <yang.zhang.wz@gmail.com>
> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
> ---
>  arch/x86/kvm/vmx.c | 41 ++++++++++++++++++++++-------------------
>  1 file changed, 22 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 5cede40..e703129 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6336,7 +6336,7 @@ static void wakeup_handler(void)
>  
>  static __init int hardware_setup(void)
>  {
> -	int r = -ENOMEM, i, msr;
> +	int r = -ENOMEM, i;
>  
>  	rdmsrl_safe(MSR_EFER, &host_efer);
>  
> @@ -6464,18 +6464,6 @@ static __init int hardware_setup(void)
>  
>  	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
>  
> -	for (msr = 0x800; msr <= 0x8ff; msr++)
> -		vmx_disable_intercept_msr_read_x2apic(msr);
> -
> -	/* TMCCT */
> -	vmx_enable_intercept_msr_read_x2apic(0x839);
> -	/* TPR */
> -	vmx_disable_intercept_msr_write_x2apic(0x808);
> -	/* EOI */
> -	vmx_disable_intercept_msr_write_x2apic(0x80b);
> -	/* SELF-IPI */
> -	vmx_disable_intercept_msr_write_x2apic(0x83f);
> -
>  	if (enable_ept) {
>  		kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
>  			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
> @@ -8435,12 +8423,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>  		return;
>  	}
>  
> -	/*
> -	 * There is not point to enable virtualize x2apic without enable
> -	 * apicv
> -	 */
> -	if (!cpu_has_vmx_virtualize_x2apic_mode() ||
> -				!kvm_vcpu_apicv_active(vcpu))
> +	if (!cpu_has_vmx_virtualize_x2apic_mode())
>  		return;
>  
>  	if (!cpu_need_tpr_shadow(vcpu))
> @@ -8449,8 +8432,28 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>  	sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
>  
>  	if (set) {
> +		int msr;
> +
>  		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>  		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
> +
> +		if (kvm_vcpu_apicv_active(vcpu)) {
> +			for (msr = 0x800; msr <= 0x8ff; msr++)
> +				vmx_disable_intercept_msr_read_x2apic(msr);
> +
> +			/* TMCCT */
> +			vmx_enable_intercept_msr_read_x2apic(0x839);
> +			/* TPR */
> +			vmx_disable_intercept_msr_write_x2apic(0x808);
> +			/* EOI */
> +			vmx_disable_intercept_msr_write_x2apic(0x80b);
> +			/* SELF-IPI */
> +			vmx_disable_intercept_msr_write_x2apic(0x83f);
> +		} else if (vmx_exec_control(to_vmx(vcpu)) & CPU_BASED_TPR_SHADOW) {
> +			/* TPR */
> +			vmx_disable_intercept_msr_read_x2apic(0x808);
> +			vmx_disable_intercept_msr_write_x2apic(0x808);
> +		}
>  	} else {
>  		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
>  		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li Sept. 15, 2016, 4:25 a.m. UTC | #7
2016-09-15 12:08 GMT+08:00 Mika Penttilä <mika.penttila@nextfour.com>:
> On 09/14/2016 10:58 AM, Wanpeng Li wrote:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>> completely if w/o APICv, and the author also told me that windows guest
>> can't enter into x2apic mode when he developed the APICv feature several
>> years ago. However, it is not truth currently, Interrupt Remapping and
>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>
>> This patch enables TPR shadow for virtual x2apic mode to boost
>> windows guest in x2apic mode even if w/o APICv.
>>
>> Can pass the kvm-unit-test.
>>
>
> While at it, is the vmx flexpriotity stuff still valid code?
> AFAICS it gets enabled iff TPR shadow is on. flexpriority
> is on when :
>
> (flexpriority_enabled && lapic_in_kernel && cpu_has_vmx_tpr_shadow && cpu_has_vmx_virtualize_apic_accesses)
>
> But apic accesses to TPR mmio are not then trapped and TPR changes not reported because
> the “use TPR shadow” VM-execution control is 1.

Please note the patch is for MSR-BASED TPR shadow w/o APICv, TPR
shadow can work correctly in other configurations.

Regards,
Wanpeng Li
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mika Penttilä Sept. 15, 2016, 4:46 a.m. UTC | #8
On 09/15/2016 07:25 AM, Wanpeng Li wrote:
> 2016-09-15 12:08 GMT+08:00 Mika Penttilä <mika.penttila@nextfour.com>:
>> On 09/14/2016 10:58 AM, Wanpeng Li wrote:
>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>
>>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>>> completely if w/o APICv, and the author also told me that windows guest
>>> can't enter into x2apic mode when he developed the APICv feature several
>>> years ago. However, it is not truth currently, Interrupt Remapping and
>>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>>
>>> This patch enables TPR shadow for virtual x2apic mode to boost
>>> windows guest in x2apic mode even if w/o APICv.
>>>
>>> Can pass the kvm-unit-test.
>>>
>>
>> While at it, is the vmx flexpriotity stuff still valid code?
>> AFAICS it gets enabled iff TPR shadow is on. flexpriority
>> is on when :
>>
>> (flexpriority_enabled && lapic_in_kernel && cpu_has_vmx_tpr_shadow && cpu_has_vmx_virtualize_apic_accesses)
>>
>> But apic accesses to TPR mmio are not then trapped and TPR changes not reported because
>> the “use TPR shadow” VM-execution control is 1.
> 
> Please note the patch is for MSR-BASED TPR shadow w/o APICv, TPR
> shadow can work correctly in other configurations.
> 
> Regards,
> Wanpeng Li
> 

Hi,

Yes I see, this is slightly offtopic but while at flexpriority, how is it relevant in current kvm?
In other words I see it as dead code, because it is enabled only when TPR shadow is enabled,
and as such ineffective because TPR shadow disables the wmexits tpr reporting uses.

Thanks,
Mika

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 15, 2016, 6:29 a.m. UTC | #9
On 15/09/2016 03:19, Wanpeng Li wrote:
> 2016-09-14 20:03 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>> 2016-09-14 11:40+0200, Paolo Bonzini:
>>> On 14/09/2016 09:58, Wanpeng Li wrote:
>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>
>>>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>>>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>>>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>>>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>>>> completely if w/o APICv, and the author also told me that windows guest
>>>> can't enter into x2apic mode when he developed the APICv feature several
>>>> years ago. However, it is not truth currently, Interrupt Remapping and
>>>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>>>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>>>
>>>> This patch enables TPR shadow for virtual x2apic mode to boost
>>>> windows guest in x2apic mode even if w/o APICv.
>>>>
>>>> Can pass the kvm-unit-test.
>>>
>>> Ok, now I see what you meant; this actually makes sense.  I don't expect
>>> much speedup though, because Linux doesn't touch the TPR and Windows is
>>> likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
>>> this reason I'm not sure if the patch is useful in practice.
>>
>> I agree with Paolo on the use case -- what configurations benefit from
>> this change?
> 
> Old windows guest w/o Hyper-V synthetic interrupt support.

... but with Hyper-V synthetic interrupt support enabled in the QEMU
command line.  Right?

Paolo

>>
>>> To test this patch, you have to run kvm-unit-tests with Hyper-V
>>> synthetic interrupt enabled.  Did you do this?
>>
>> The patch is buggy.  MSR bitmaps are global and we'd have a CVE if one
>> guests used synic (=> disabled apicv) and one didn't.
>> You'd want a new set of bitmaps and assign them in vmx_set_msr_bitmap()
>> (or completely rewrite our management).
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 15, 2016, 6:30 a.m. UTC | #10
On 15/09/2016 06:08, Mika Penttilä wrote:
> On 09/14/2016 10:58 AM, Wanpeng Li wrote:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used 
>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542 
>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode 
>> completely if w/o APICv, and the author also told me that windows guest
>> can't enter into x2apic mode when he developed the APICv feature several 
>> years ago. However, it is not truth currently, Interrupt Remapping and 
>> vIOMMU is added to qemu and the developers from Intel test windows 8 can 
>> work in x2apic mode w/ Interrupt Remapping enabled recently. 
>>
>> This patch enables TPR shadow for virtual x2apic mode to boost 
>> windows guest in x2apic mode even if w/o APICv.
>>
>> Can pass the kvm-unit-test.
>>
> 
> While at it, is the vmx flexpriotity stuff still valid code?
> AFAICS it gets enabled iff TPR shadow is on.

flexpriority is an Intel commercial name for TPR shadow.

Paolo

 flexpriority
> is on when :
> 
> (flexpriority_enabled && lapic_in_kernel && cpu_has_vmx_tpr_shadow && cpu_has_vmx_virtualize_apic_accesses)
> 
> But apic accesses to TPR mmio are not then trapped and TPR changes not reported because
> the “use TPR shadow” VM-execution control is 1.
> 
> Thanks,
> Mika
> 
> 
>> Suggested-by: Wincy Van <fanwenyi0529@gmail.com>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>> Cc: Wincy Van <fanwenyi0529@gmail.com>
>> Cc: Yang Zhang <yang.zhang.wz@gmail.com>
>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>> ---
>>  arch/x86/kvm/vmx.c | 41 ++++++++++++++++++++++-------------------
>>  1 file changed, 22 insertions(+), 19 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 5cede40..e703129 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -6336,7 +6336,7 @@ static void wakeup_handler(void)
>>  
>>  static __init int hardware_setup(void)
>>  {
>> -	int r = -ENOMEM, i, msr;
>> +	int r = -ENOMEM, i;
>>  
>>  	rdmsrl_safe(MSR_EFER, &host_efer);
>>  
>> @@ -6464,18 +6464,6 @@ static __init int hardware_setup(void)
>>  
>>  	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
>>  
>> -	for (msr = 0x800; msr <= 0x8ff; msr++)
>> -		vmx_disable_intercept_msr_read_x2apic(msr);
>> -
>> -	/* TMCCT */
>> -	vmx_enable_intercept_msr_read_x2apic(0x839);
>> -	/* TPR */
>> -	vmx_disable_intercept_msr_write_x2apic(0x808);
>> -	/* EOI */
>> -	vmx_disable_intercept_msr_write_x2apic(0x80b);
>> -	/* SELF-IPI */
>> -	vmx_disable_intercept_msr_write_x2apic(0x83f);
>> -
>>  	if (enable_ept) {
>>  		kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
>>  			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
>> @@ -8435,12 +8423,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>>  		return;
>>  	}
>>  
>> -	/*
>> -	 * There is not point to enable virtualize x2apic without enable
>> -	 * apicv
>> -	 */
>> -	if (!cpu_has_vmx_virtualize_x2apic_mode() ||
>> -				!kvm_vcpu_apicv_active(vcpu))
>> +	if (!cpu_has_vmx_virtualize_x2apic_mode())
>>  		return;
>>  
>>  	if (!cpu_need_tpr_shadow(vcpu))
>> @@ -8449,8 +8432,28 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
>>  	sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
>>  
>>  	if (set) {
>> +		int msr;
>> +
>>  		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>>  		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
>> +
>> +		if (kvm_vcpu_apicv_active(vcpu)) {
>> +			for (msr = 0x800; msr <= 0x8ff; msr++)
>> +				vmx_disable_intercept_msr_read_x2apic(msr);
>> +
>> +			/* TMCCT */
>> +			vmx_enable_intercept_msr_read_x2apic(0x839);
>> +			/* TPR */
>> +			vmx_disable_intercept_msr_write_x2apic(0x808);
>> +			/* EOI */
>> +			vmx_disable_intercept_msr_write_x2apic(0x80b);
>> +			/* SELF-IPI */
>> +			vmx_disable_intercept_msr_write_x2apic(0x83f);
>> +		} else if (vmx_exec_control(to_vmx(vcpu)) & CPU_BASED_TPR_SHADOW) {
>> +			/* TPR */
>> +			vmx_disable_intercept_msr_read_x2apic(0x808);
>> +			vmx_disable_intercept_msr_write_x2apic(0x808);
>> +		}
>>  	} else {
>>  		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
>>  		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>>
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li Sept. 15, 2016, 6:40 a.m. UTC | #11
2016-09-15 14:29 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
>
>
> On 15/09/2016 03:19, Wanpeng Li wrote:
>> 2016-09-14 20:03 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>>> 2016-09-14 11:40+0200, Paolo Bonzini:
>>>> On 14/09/2016 09:58, Wanpeng Li wrote:
>>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>>
>>>>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>>>>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>>>>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>>>>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>>>>> completely if w/o APICv, and the author also told me that windows guest
>>>>> can't enter into x2apic mode when he developed the APICv feature several
>>>>> years ago. However, it is not truth currently, Interrupt Remapping and
>>>>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>>>>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>>>>
>>>>> This patch enables TPR shadow for virtual x2apic mode to boost
>>>>> windows guest in x2apic mode even if w/o APICv.
>>>>>
>>>>> Can pass the kvm-unit-test.
>>>>
>>>> Ok, now I see what you meant; this actually makes sense.  I don't expect
>>>> much speedup though, because Linux doesn't touch the TPR and Windows is
>>>> likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
>>>> this reason I'm not sure if the patch is useful in practice.
>>>
>>> I agree with Paolo on the use case -- what configurations benefit from
>>> this change?
>>
>> Old windows guest w/o Hyper-V synthetic interrupt support.
>
> ... but with Hyper-V synthetic interrupt support enabled in the QEMU
> command line.  Right?

I think so. :)

Regards,
Wanpeng Li
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li Sept. 15, 2016, 7:05 a.m. UTC | #12
2016-09-14 20:03 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
> 2016-09-14 11:40+0200, Paolo Bonzini:
>> On 14/09/2016 09:58, Wanpeng Li wrote:
>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>
>>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>>> completely if w/o APICv, and the author also told me that windows guest
>>> can't enter into x2apic mode when he developed the APICv feature several
>>> years ago. However, it is not truth currently, Interrupt Remapping and
>>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>>
>>> This patch enables TPR shadow for virtual x2apic mode to boost
>>> windows guest in x2apic mode even if w/o APICv.
>>>
>>> Can pass the kvm-unit-test.
>>
>> Ok, now I see what you meant; this actually makes sense.  I don't expect
>> much speedup though, because Linux doesn't touch the TPR and Windows is
>> likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
>> this reason I'm not sure if the patch is useful in practice.
>
> I agree with Paolo on the use case -- what configurations benefit from
> this change?
>
>> To test this patch, you have to run kvm-unit-tests with Hyper-V
>> synthetic interrupt enabled.  Did you do this?
>
> The patch is buggy.  MSR bitmaps are global and we'd have a CVE if one
> guests used synic (=> disabled apicv) and one didn't.
> You'd want a new set of bitmaps and assign them in vmx_set_msr_bitmap()
> (or completely rewrite our management).

Do you think introduce per-VM x2apic msr bitmap make sense?

Regards,
Wanpeng Li
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Sept. 15, 2016, 3:58 p.m. UTC | #13
2016-09-15 15:05+0800, Wanpeng Li:
> 2016-09-14 20:03 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>> 2016-09-14 11:40+0200, Paolo Bonzini:
>>> On 14/09/2016 09:58, Wanpeng Li wrote:
>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>
>>>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>>>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>>>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>>>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>>>> completely if w/o APICv, and the author also told me that windows guest
>>>> can't enter into x2apic mode when he developed the APICv feature several
>>>> years ago. However, it is not truth currently, Interrupt Remapping and
>>>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>>>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>>>
>>>> This patch enables TPR shadow for virtual x2apic mode to boost
>>>> windows guest in x2apic mode even if w/o APICv.
>>>>
>>>> Can pass the kvm-unit-test.
>>>
>>> Ok, now I see what you meant; this actually makes sense.  I don't expect
>>> much speedup though, because Linux doesn't touch the TPR and Windows is
>>> likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
>>> this reason I'm not sure if the patch is useful in practice.
>>
>> I agree with Paolo on the use case -- what configurations benefit from
>> this change?
>>
>>> To test this patch, you have to run kvm-unit-tests with Hyper-V
>>> synthetic interrupt enabled.  Did you do this?
>>
>> The patch is buggy.  MSR bitmaps are global and we'd have a CVE if one
>> guests used synic (=> disabled apicv) and one didn't.
>> You'd want a new set of bitmaps and assign them in vmx_set_msr_bitmap()
>> (or completely rewrite our management).
> 
> Do you think introduce per-VM x2apic msr bitmap make sense?

Not much.  It would still need different msr bitmaps for VCPUs in
various modes, so it would take more memory and be slower without giving
nicer code as we'd have to do pretty much the same as we do now.
We could improve clarity of the caching solution instead ...

Per-VCPU could allow a slightly clearer design, but that is very
wasteful -- the caching isn't that bad.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li Sept. 18, 2016, 6:53 a.m. UTC | #14
2016-09-15 23:58 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
> 2016-09-15 15:05+0800, Wanpeng Li:
>> 2016-09-14 20:03 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>>> 2016-09-14 11:40+0200, Paolo Bonzini:
>>>> On 14/09/2016 09:58, Wanpeng Li wrote:
>>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>>
>>>>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>>>>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>>>>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>>>>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>>>>> completely if w/o APICv, and the author also told me that windows guest
>>>>> can't enter into x2apic mode when he developed the APICv feature several
>>>>> years ago. However, it is not truth currently, Interrupt Remapping and
>>>>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>>>>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>>>>
>>>>> This patch enables TPR shadow for virtual x2apic mode to boost
>>>>> windows guest in x2apic mode even if w/o APICv.
>>>>>
>>>>> Can pass the kvm-unit-test.
>>>>
>>>> Ok, now I see what you meant; this actually makes sense.  I don't expect
>>>> much speedup though, because Linux doesn't touch the TPR and Windows is
>>>> likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
>>>> this reason I'm not sure if the patch is useful in practice.
>>>
>>> I agree with Paolo on the use case -- what configurations benefit from
>>> this change?
>>>
>>>> To test this patch, you have to run kvm-unit-tests with Hyper-V
>>>> synthetic interrupt enabled.  Did you do this?
>>>
>>> The patch is buggy.  MSR bitmaps are global and we'd have a CVE if one
>>> guests used synic (=> disabled apicv) and one didn't.
>>> You'd want a new set of bitmaps and assign them in vmx_set_msr_bitmap()
>>> (or completely rewrite our management).
>>
>> Do you think introduce per-VM x2apic msr bitmap make sense?
>
> Not much.  It would still need different msr bitmaps for VCPUs in
> various modes, so it would take more memory and be slower without giving
> nicer code as we'd have to do pretty much the same as we do now.
> We could improve clarity of the caching solution instead ...
>
> Per-VCPU could allow a slightly clearer design, but that is very
> wasteful -- the caching isn't that bad.

Could you elaborate the caching design in your mind? :) In addition,
I'm not sure whether we still can get benefit from x2apic tpr shadow
w/ APICv since the overhead of the other bitmaps/caching.

Btw, I heard from Tianyu from Intel, you said there was a x2apic bug
in kvm forum and the bug maybe in kvm, I guess I meet the same bug
when run a windows guest(server version of windows 7, 2008 or 2012) w/
x2apic enabled in guest and -machine q35,kernel_irqchip=spit -device
intel-iommu, intremap=on in the QEMU command line.

Regards,
Wanpeng Li
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li Sept. 18, 2016, 6:55 a.m. UTC | #15
2016-09-18 14:53 GMT+08:00 Wanpeng Li <kernellwp@gmail.com>:
> 2016-09-15 23:58 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>> 2016-09-15 15:05+0800, Wanpeng Li:
>>> 2016-09-14 20:03 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>>>> 2016-09-14 11:40+0200, Paolo Bonzini:
>>>>> On 14/09/2016 09:58, Wanpeng Li wrote:
>>>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>>>
>>>>>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>>>>>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>>>>>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>>>>>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>>>>>> completely if w/o APICv, and the author also told me that windows guest
>>>>>> can't enter into x2apic mode when he developed the APICv feature several
>>>>>> years ago. However, it is not truth currently, Interrupt Remapping and
>>>>>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>>>>>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>>>>>
>>>>>> This patch enables TPR shadow for virtual x2apic mode to boost
>>>>>> windows guest in x2apic mode even if w/o APICv.
>>>>>>
>>>>>> Can pass the kvm-unit-test.
>>>>>
>>>>> Ok, now I see what you meant; this actually makes sense.  I don't expect
>>>>> much speedup though, because Linux doesn't touch the TPR and Windows is
>>>>> likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
>>>>> this reason I'm not sure if the patch is useful in practice.
>>>>
>>>> I agree with Paolo on the use case -- what configurations benefit from
>>>> this change?
>>>>
>>>>> To test this patch, you have to run kvm-unit-tests with Hyper-V
>>>>> synthetic interrupt enabled.  Did you do this?
>>>>
>>>> The patch is buggy.  MSR bitmaps are global and we'd have a CVE if one
>>>> guests used synic (=> disabled apicv) and one didn't.
>>>> You'd want a new set of bitmaps and assign them in vmx_set_msr_bitmap()
>>>> (or completely rewrite our management).
>>>
>>> Do you think introduce per-VM x2apic msr bitmap make sense?
>>
>> Not much.  It would still need different msr bitmaps for VCPUs in
>> various modes, so it would take more memory and be slower without giving
>> nicer code as we'd have to do pretty much the same as we do now.
>> We could improve clarity of the caching solution instead ...
>>
>> Per-VCPU could allow a slightly clearer design, but that is very
>> wasteful -- the caching isn't that bad.
>
> Could you elaborate the caching design in your mind? :) In addition,
> I'm not sure whether we still can get benefit from x2apic tpr shadow
> w/ APICv since the overhead of the other bitmaps/caching.

w/o

>
> Btw, I heard from Tianyu from Intel, you said there was a x2apic bug
> in kvm forum and the bug maybe in kvm, I guess I meet the same bug
> when run a windows guest(server version of windows 7, 2008 or 2012) w/
> x2apic enabled in guest and -machine q35,kernel_irqchip=spit -device
> intel-iommu, intremap=on in the QEMU command line.
>
> Regards,
> Wanpeng Li
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Sept. 19, 2016, 1:44 p.m. UTC | #16
2016-09-18 14:53+0800, Wanpeng Li:
> 2016-09-15 23:58 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>> 2016-09-15 15:05+0800, Wanpeng Li:
>>> 2016-09-14 20:03 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>>>> 2016-09-14 11:40+0200, Paolo Bonzini:
>>>>> On 14/09/2016 09:58, Wanpeng Li wrote:
>>>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>>>
>>>>>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>>>>>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>>>>>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>>>>>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>>>>>> completely if w/o APICv, and the author also told me that windows guest
>>>>>> can't enter into x2apic mode when he developed the APICv feature several
>>>>>> years ago. However, it is not truth currently, Interrupt Remapping and
>>>>>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>>>>>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>>>>>
>>>>>> This patch enables TPR shadow for virtual x2apic mode to boost
>>>>>> windows guest in x2apic mode even if w/o APICv.
>>>>>>
>>>>>> Can pass the kvm-unit-test.
>>>>>
>>>>> Ok, now I see what you meant; this actually makes sense.  I don't expect
>>>>> much speedup though, because Linux doesn't touch the TPR and Windows is
>>>>> likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
>>>>> this reason I'm not sure if the patch is useful in practice.
>>>>
>>>> I agree with Paolo on the use case -- what configurations benefit from
>>>> this change?
>>>>
>>>>> To test this patch, you have to run kvm-unit-tests with Hyper-V
>>>>> synthetic interrupt enabled.  Did you do this?
>>>>
>>>> The patch is buggy.  MSR bitmaps are global and we'd have a CVE if one
>>>> guests used synic (=> disabled apicv) and one didn't.
>>>> You'd want a new set of bitmaps and assign them in vmx_set_msr_bitmap()
>>>> (or completely rewrite our management).
>>>
>>> Do you think introduce per-VM x2apic msr bitmap make sense?
>>
>> Not much.  It would still need different msr bitmaps for VCPUs in
>> various modes, so it would take more memory and be slower without giving
>> nicer code as we'd have to do pretty much the same as we do now.
>> We could improve clarity of the caching solution instead ...
>>
>> Per-VCPU could allow a slightly clearer design, but that is very
>> wasteful -- the caching isn't that bad.
> 
> Could you elaborate the caching design in your mind? :)

The one we already do -- precompute all possible bitmaps at KVM
initialization and assign the appropriate ones at runtime.

>                                                         In addition,
> I'm not sure whether we still can get benefit from x2apic tpr shadow
> w/o APICv since the overhead of the other bitmaps/caching.

Overhead from assigning a cached MSR bitmap should be less than one VM
exit caused by a TPR write when there are no pending interrupts.
Are there other sources of overhead?

> Btw, I heard from Tianyu from Intel, you said there was a x2apic bug
> in kvm forum and the bug maybe in kvm, I guess I meet the same bug
> when run a windows guest(server version of windows 7, 2008 or 2012) w/
> x2apic enabled in guest and -machine q35,kernel_irqchip=spit -device
> intel-iommu, intremap=on in the QEMU command line.

Does it happen when you are running less than 8 VCPUs (max APIC ID < 8)?
QEMU always enabled x2APIC support in IOMMU (EIME) even though it
doesn't work under some configurations.

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li Sept. 20, 2016, 12:18 a.m. UTC | #17
2016-09-19 21:44 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
> 2016-09-18 14:53+0800, Wanpeng Li:
>> 2016-09-15 23:58 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>>> 2016-09-15 15:05+0800, Wanpeng Li:
>>>> 2016-09-14 20:03 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
>>>>> 2016-09-14 11:40+0200, Paolo Bonzini:
>>>>>> On 14/09/2016 09:58, Wanpeng Li wrote:
>>>>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>>>>
>>>>>>> I observed that kvmvapic(to optimize flexpriority=N or AMD) is used
>>>>>>> to boost TPR access when testing kvm-unit-test/eventinj.flat tpr case
>>>>>>> on my haswell desktop (w/ flexpriority, w/o APICv). Commit (8d14695f9542
>>>>>>> x86, apicv: add virtual x2apic support) disable virtual x2apic mode
>>>>>>> completely if w/o APICv, and the author also told me that windows guest
>>>>>>> can't enter into x2apic mode when he developed the APICv feature several
>>>>>>> years ago. However, it is not truth currently, Interrupt Remapping and
>>>>>>> vIOMMU is added to qemu and the developers from Intel test windows 8 can
>>>>>>> work in x2apic mode w/ Interrupt Remapping enabled recently.
>>>>>>>
>>>>>>> This patch enables TPR shadow for virtual x2apic mode to boost
>>>>>>> windows guest in x2apic mode even if w/o APICv.
>>>>>>>
>>>>>>> Can pass the kvm-unit-test.
>>>>>>
>>>>>> Ok, now I see what you meant; this actually makes sense.  I don't expect
>>>>>> much speedup though, because Linux doesn't touch the TPR and Windows is
>>>>>> likely going to use the Hyper-V APIC MSRs when APICv is disabled.  For
>>>>>> this reason I'm not sure if the patch is useful in practice.
>>>>>
>>>>> I agree with Paolo on the use case -- what configurations benefit from
>>>>> this change?
>>>>>
>>>>>> To test this patch, you have to run kvm-unit-tests with Hyper-V
>>>>>> synthetic interrupt enabled.  Did you do this?
>>>>>
>>>>> The patch is buggy.  MSR bitmaps are global and we'd have a CVE if one
>>>>> guests used synic (=> disabled apicv) and one didn't.
>>>>> You'd want a new set of bitmaps and assign them in vmx_set_msr_bitmap()
>>>>> (or completely rewrite our management).
>>>>
>>>> Do you think introduce per-VM x2apic msr bitmap make sense?
>>>
>>> Not much.  It would still need different msr bitmaps for VCPUs in
>>> various modes, so it would take more memory and be slower without giving
>>> nicer code as we'd have to do pretty much the same as we do now.
>>> We could improve clarity of the caching solution instead ...
>>>
>>> Per-VCPU could allow a slightly clearer design, but that is very
>>> wasteful -- the caching isn't that bad.
>>
>> Could you elaborate the caching design in your mind? :)
>
> The one we already do -- precompute all possible bitmaps at KVM
> initialization and assign the appropriate ones at runtime.

I see. :)

>
>>                                                         In addition,
>> I'm not sure whether we still can get benefit from x2apic tpr shadow
>> w/o APICv since the overhead of the other bitmaps/caching.
>
> Overhead from assigning a cached MSR bitmap should be less than one VM
> exit caused by a TPR write when there are no pending interrupts.
> Are there other sources of overhead?

Then I understand what the cached MSR bitmap you mean instead of
introducing another caching.

>
>> Btw, I heard from Tianyu from Intel, you said there was a x2apic bug
>> in kvm forum and the bug maybe in kvm, I guess I meet the same bug
>> when run a windows guest(server version of windows 7, 2008 or 2012) w/
>> x2apic enabled in guest and -machine q35,kernel_irqchip=spit -device
>> intel-iommu, intremap=on in the QEMU command line.
>
> Does it happen when you are running less than 8 VCPUs (max APIC ID < 8)?
> QEMU always enabled x2APIC support in IOMMU (EIME) even though it
> doesn't work under some configurations.

Yes, less than 8 vCPUs in my testing and "bcdedit /set x2apicpolicy
enable" to enable x2apic in windows server guests, the windows guests
BSOD after reboot.

Regards,
Wanpeng Li
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li Sept. 22, 2016, 6:48 a.m. UTC | #18
[...]
>
>> Btw, I heard from Tianyu from Intel, you said there was a x2apic bug
>> in kvm forum and the bug maybe in kvm, I guess I meet the same bug
>> when run a windows guest(server version of windows 7, 2008 or 2012) w/
>> x2apic enabled in guest and -machine q35,kernel_irqchip=spit -device
>> intel-iommu, intremap=on in the QEMU command line.
>
> Does it happen when you are running less than 8 VCPUs (max APIC ID < 8)?
> QEMU always enabled x2APIC support in IOMMU (EIME) even though it
> doesn't work under some configurations.

I'm digging  into the bug currently. :)

Regards,
Wanpeng Li
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5cede40..e703129 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6336,7 +6336,7 @@  static void wakeup_handler(void)
 
 static __init int hardware_setup(void)
 {
-	int r = -ENOMEM, i, msr;
+	int r = -ENOMEM, i;
 
 	rdmsrl_safe(MSR_EFER, &host_efer);
 
@@ -6464,18 +6464,6 @@  static __init int hardware_setup(void)
 
 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
 
-	for (msr = 0x800; msr <= 0x8ff; msr++)
-		vmx_disable_intercept_msr_read_x2apic(msr);
-
-	/* TMCCT */
-	vmx_enable_intercept_msr_read_x2apic(0x839);
-	/* TPR */
-	vmx_disable_intercept_msr_write_x2apic(0x808);
-	/* EOI */
-	vmx_disable_intercept_msr_write_x2apic(0x80b);
-	/* SELF-IPI */
-	vmx_disable_intercept_msr_write_x2apic(0x83f);
-
 	if (enable_ept) {
 		kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
 			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
@@ -8435,12 +8423,7 @@  static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 		return;
 	}
 
-	/*
-	 * There is not point to enable virtualize x2apic without enable
-	 * apicv
-	 */
-	if (!cpu_has_vmx_virtualize_x2apic_mode() ||
-				!kvm_vcpu_apicv_active(vcpu))
+	if (!cpu_has_vmx_virtualize_x2apic_mode())
 		return;
 
 	if (!cpu_need_tpr_shadow(vcpu))
@@ -8449,8 +8432,28 @@  static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 	sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
 
 	if (set) {
+		int msr;
+
 		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
 		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+
+		if (kvm_vcpu_apicv_active(vcpu)) {
+			for (msr = 0x800; msr <= 0x8ff; msr++)
+				vmx_disable_intercept_msr_read_x2apic(msr);
+
+			/* TMCCT */
+			vmx_enable_intercept_msr_read_x2apic(0x839);
+			/* TPR */
+			vmx_disable_intercept_msr_write_x2apic(0x808);
+			/* EOI */
+			vmx_disable_intercept_msr_write_x2apic(0x80b);
+			/* SELF-IPI */
+			vmx_disable_intercept_msr_write_x2apic(0x83f);
+		} else if (vmx_exec_control(to_vmx(vcpu)) & CPU_BASED_TPR_SHADOW) {
+			/* TPR */
+			vmx_disable_intercept_msr_read_x2apic(0x808);
+			vmx_disable_intercept_msr_write_x2apic(0x808);
+		}
 	} else {
 		sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
 		sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;