diff mbox

[v2] KVM: nVMX: Do not load EOI-exitmap while running L2

Message ID 1521593431-10278-2-git-send-email-liran.alon@oracle.com (mailing list archive)
State New, archived
Headers show

Commit Message

Liran Alon March 21, 2018, 12:50 a.m. UTC
When L1 IOAPIC redirection-table is written, a request of
KVM_REQ_SCAN_IOAPIC is set on all vCPUs. This is done such that
all vCPUs will now recalc their IOAPIC handled vectors and load
it to their EOI-exitmap.

However, it could be that one of the vCPUs is currently running
L2. In this case, load_eoi_exitmap() will be called which would
write to vmcs02->eoi_exit_bitmap, which is wrong because
vmcs02->eoi_exit_bitmap should always be equal to
vmcs12->eoi_exit_bitmap. Furthermore, at this point
KVM_REQ_SCAN_IOAPIC was already consumed and therefore we will
never update vmcs01->eoi_exit_bitmap. This could lead to remote_irr
of some IOAPIC level-triggered entry to remain set forever.

Fix this issue by delaying the load of EOI-exitmap to when vCPU
is running L1.

One may wonder why not just delay entire KVM_REQ_SCAN_IOAPIC
processing to when vCPU is running L1. This is done in order to handle
correctly the case where LAPIC & IO-APIC of L1 is pass-throughed into
L2. In this case, vmcs12->virtual_interrupt_delivery should be 0. In
current nVMX implementation, that results in
vmcs02->virtual_interrupt_delivery to also be 0. Thus,
vmcs02->eoi_exit_bitmap is not used. Therefore, every L2 EOI cause
a #VMExit into L0 (either on MSR_WRITE to x2APIC MSR or
APIC_ACCESS/APIC_WRITE/EPT_MISCONFIG to APIC MMIO page).
In order for such L2 EOI to be broadcasted, if needed, from LAPIC
to IO-APIC, vcpu->arch.ioapic_handled_vectors must be updated
while L2 is running. Therefore, patch makes sure to delay only the
loading of EOI-exitmap but not the update of
vcpu->arch.ioapic_handled_vectors.

Signed-off-by: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/kvm_cache_regs.h   |  5 +++++
 arch/x86/kvm/x86.c              | 19 +++++++++++++++++--
 3 files changed, 24 insertions(+), 2 deletions(-)

Comments

Paolo Bonzini March 21, 2018, 1:16 p.m. UTC | #1
On 21/03/2018 01:50, Liran Alon wrote:
> When L1 IOAPIC redirection-table is written, a request of
> KVM_REQ_SCAN_IOAPIC is set on all vCPUs. This is done such that
> all vCPUs will now recalc their IOAPIC handled vectors and load
> it to their EOI-exitmap.
> 
> However, it could be that one of the vCPUs is currently running
> L2. In this case, load_eoi_exitmap() will be called which would
> write to vmcs02->eoi_exit_bitmap, which is wrong because
> vmcs02->eoi_exit_bitmap should always be equal to
> vmcs12->eoi_exit_bitmap. Furthermore, at this point
> KVM_REQ_SCAN_IOAPIC was already consumed and therefore we will
> never update vmcs01->eoi_exit_bitmap. This could lead to remote_irr
> of some IOAPIC level-triggered entry to remain set forever.
> 
> Fix this issue by delaying the load of EOI-exitmap to when vCPU
> is running L1.
> 
> One may wonder why not just delay entire KVM_REQ_SCAN_IOAPIC
> processing to when vCPU is running L1. This is done in order to handle
> correctly the case where LAPIC & IO-APIC of L1 is pass-throughed into
> L2. In this case, vmcs12->virtual_interrupt_delivery should be 0. In
> current nVMX implementation, that results in
> vmcs02->virtual_interrupt_delivery to also be 0. Thus,
> vmcs02->eoi_exit_bitmap is not used. Therefore, every L2 EOI cause
> a #VMExit into L0 (either on MSR_WRITE to x2APIC MSR or
> APIC_ACCESS/APIC_WRITE/EPT_MISCONFIG to APIC MMIO page).
> In order for such L2 EOI to be broadcasted, if needed, from LAPIC
> to IO-APIC, vcpu->arch.ioapic_handled_vectors must be updated
> while L2 is running. Therefore, patch makes sure to delay only the
> loading of EOI-exitmap but not the update of
> vcpu->arch.ioapic_handled_vectors.
> 
> Signed-off-by: Liran Alon <liran.alon@oracle.com>
> Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
> Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/kvm_cache_regs.h   |  5 +++++
>  arch/x86/kvm/x86.c              | 19 +++++++++++++++++--
>  3 files changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b605a5b6a30c..e04e15898b5b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -73,6 +73,7 @@
>  #define KVM_REQ_HV_RESET		KVM_ARCH_REQ(20)
>  #define KVM_REQ_HV_EXIT			KVM_ARCH_REQ(21)
>  #define KVM_REQ_HV_STIMER		KVM_ARCH_REQ(22)
> +#define KVM_REQ_LOAD_EOI_EXITMAP	KVM_ARCH_REQ(23)
>  
>  #define CR0_RESERVED_BITS                                               \
>  	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
> @@ -498,6 +499,7 @@ struct kvm_vcpu_arch {
>  	u64 apic_base;
>  	struct kvm_lapic *apic;    /* kernel irqchip context */
>  	bool apicv_active;
> +	bool load_eoi_exitmap_pending;
>  	DECLARE_BITMAP(ioapic_handled_vectors, 256);
>  	unsigned long apic_attention;
>  	int32_t apic_arb_prio;
> diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
> index f500293dad8d..d1457bc63d1c 100644
> --- a/arch/x86/kvm/kvm_cache_regs.h
> +++ b/arch/x86/kvm/kvm_cache_regs.h
> @@ -93,6 +93,11 @@ static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
>  static inline void leave_guest_mode(struct kvm_vcpu *vcpu)
>  {
>  	vcpu->arch.hflags &= ~HF_GUEST_MASK;
> +
> +	if (vcpu->arch.load_eoi_exitmap_pending) {
> +		vcpu->arch.load_eoi_exitmap_pending = false;
> +		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
> +	}
>  }
>  
>  static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 18b5ca7a3197..0322aa60e191 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6985,8 +6985,6 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
>  
>  static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
>  {
> -	u64 eoi_exit_bitmap[4];
> -
>  	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
>  		return;
>  
> @@ -6999,6 +6997,20 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
>  			kvm_x86_ops->sync_pir_to_irr(vcpu);
>  		kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
>  	}
> +
> +	if (is_guest_mode(vcpu))
> +		vcpu->arch.load_eoi_exitmap_pending = true;
> +	else
> +		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
> +}
> +
> +static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
> +{
> +	u64 eoi_exit_bitmap[4];
> +
> +	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
> +		return;
> +
>  	bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
>  		  vcpu_to_synic(vcpu)->vec_bitmap, 256);
>  	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
> @@ -7113,6 +7125,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		}
>  		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
>  			vcpu_scan_ioapic(vcpu);
> +		if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
> +			vcpu_load_eoi_exitmap(vcpu);
>  		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
>  			kvm_vcpu_reload_apic_access_page(vcpu);
>  		if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
> @@ -8338,6 +8352,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>  	int r;
>  
>  	vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
> +	vcpu->arch.load_eoi_exitmap_pending = false;
>  	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
>  	if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
>  		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> 

This hunk should not be needed since the memory is zero-initialized.
Apart from this, I've queued the patch.

Paolo
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b605a5b6a30c..e04e15898b5b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -73,6 +73,7 @@ 
 #define KVM_REQ_HV_RESET		KVM_ARCH_REQ(20)
 #define KVM_REQ_HV_EXIT			KVM_ARCH_REQ(21)
 #define KVM_REQ_HV_STIMER		KVM_ARCH_REQ(22)
+#define KVM_REQ_LOAD_EOI_EXITMAP	KVM_ARCH_REQ(23)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -498,6 +499,7 @@  struct kvm_vcpu_arch {
 	u64 apic_base;
 	struct kvm_lapic *apic;    /* kernel irqchip context */
 	bool apicv_active;
+	bool load_eoi_exitmap_pending;
 	DECLARE_BITMAP(ioapic_handled_vectors, 256);
 	unsigned long apic_attention;
 	int32_t apic_arb_prio;
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index f500293dad8d..d1457bc63d1c 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -93,6 +93,11 @@  static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
 static inline void leave_guest_mode(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hflags &= ~HF_GUEST_MASK;
+
+	if (vcpu->arch.load_eoi_exitmap_pending) {
+		vcpu->arch.load_eoi_exitmap_pending = false;
+		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
+	}
 }
 
 static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 18b5ca7a3197..0322aa60e191 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6985,8 +6985,6 @@  void kvm_make_scan_ioapic_request(struct kvm *kvm)
 
 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 {
-	u64 eoi_exit_bitmap[4];
-
 	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
 		return;
 
@@ -6999,6 +6997,20 @@  static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 			kvm_x86_ops->sync_pir_to_irr(vcpu);
 		kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
 	}
+
+	if (is_guest_mode(vcpu))
+		vcpu->arch.load_eoi_exitmap_pending = true;
+	else
+		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
+}
+
+static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
+{
+	u64 eoi_exit_bitmap[4];
+
+	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
+		return;
+
 	bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
 		  vcpu_to_synic(vcpu)->vec_bitmap, 256);
 	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
@@ -7113,6 +7125,8 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
 			vcpu_scan_ioapic(vcpu);
+		if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
+			vcpu_load_eoi_exitmap(vcpu);
 		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
 			kvm_vcpu_reload_apic_access_page(vcpu);
 		if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
@@ -8338,6 +8352,7 @@  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	int r;
 
 	vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
+	vcpu->arch.load_eoi_exitmap_pending = false;
 	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
 	if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;