diff mbox

[5/6] KVM: x86: do not scan IRR twice on APICv vmentry

Message ID 1482164232-130035-6-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paolo Bonzini Dec. 19, 2016, 4:17 p.m. UTC
Calls to apic_find_highest_irr are scanning IRR twice, once
in vmx_sync_pir_from_irr and once in apic_search_irr.  Change
sync_pir_from_irr to get the new maximum IRR from kvm_apic_update_irr;
now that it does the computation, it can also do the RVI write.

In order to avoid complications in svm.c, make the callback optional.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/lapic.c            |  8 +++++---
 arch/x86/kvm/svm.c              |  6 ------
 arch/x86/kvm/vmx.c              | 30 ++++++++++++++++++------------
 arch/x86/kvm/x86.c              |  7 ++-----
 5 files changed, 26 insertions(+), 27 deletions(-)

Comments

Radim Krčmář Feb. 7, 2017, 8:19 p.m. UTC | #1
2016-12-19 17:17+0100, Paolo Bonzini:
> Calls to apic_find_highest_irr are scanning IRR twice, once
> in vmx_sync_pir_from_irr and once in apic_search_irr.  Change
> sync_pir_from_irr to get the new maximum IRR from kvm_apic_update_irr;
> now that it does the computation, it can also do the RVI write.
> 
> In order to avoid complications in svm.c, make the callback optional.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> @@ -8734,20 +8736,24 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
>  	}
>  }
>  
> -static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
> +static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	int max_irr;
>  
> -	if (!pi_test_on(&vmx->pi_desc))
> -		return;
> -
> -	pi_clear_on(&vmx->pi_desc);
> -	/*
> -	 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
> -	 * But on x86 this is just a compiler barrier anyway.
> -	 */
> -	smp_mb__after_atomic();
> -	kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
> +	if (vcpu->arch.apicv_active && pi_test_on(&vmx->pi_desc)) {
> +		pi_clear_on(&vmx->pi_desc);
> +		/*
> +		 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
> +		 * But on x86 this is just a compiler barrier anyway.
> +		 */
> +		smp_mb__after_atomic();
> +		max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
> +	} else {
> +		max_irr = kvm_lapic_find_highest_irr(vcpu);
> +	}
> +	vmx_hwapic_irr_update(vcpu, max_irr);

Btw. a v1 discussion revolved about the need to have
vmx_hwapic_irr_update() here when the maximal IRR should always be in
RVI, and, uh, I didn't follow up (negligible attention span) ...

There is one place where that doesn't hold: we don't update RVI after a
EXTERNAL_INTERRUPT nested VM exit without VM_EXIT_ACK_INTR_ON_EXIT, but
IRR has likely changed.  Isn't that the problem?
Paolo Bonzini Feb. 8, 2017, 2:10 p.m. UTC | #2
On 07/02/2017 21:19, Radim Krčmář wrote:
> 2016-12-19 17:17+0100, Paolo Bonzini:
>> Calls to apic_find_highest_irr are scanning IRR twice, once
>> in vmx_sync_pir_from_irr and once in apic_search_irr.  Change
>> sync_pir_from_irr to get the new maximum IRR from kvm_apic_update_irr;
>> now that it does the computation, it can also do the RVI write.
>>
>> In order to avoid complications in svm.c, make the callback optional.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> @@ -8734,20 +8736,24 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
>>  	}
>>  }
>>  
>> -static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>> +static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>>  {
>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +	int max_irr;
>>  
>> -	if (!pi_test_on(&vmx->pi_desc))
>> -		return;
>> -
>> -	pi_clear_on(&vmx->pi_desc);
>> -	/*
>> -	 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
>> -	 * But on x86 this is just a compiler barrier anyway.
>> -	 */
>> -	smp_mb__after_atomic();
>> -	kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
>> +	if (vcpu->arch.apicv_active && pi_test_on(&vmx->pi_desc)) {
>> +		pi_clear_on(&vmx->pi_desc);
>> +		/*
>> +		 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
>> +		 * But on x86 this is just a compiler barrier anyway.
>> +		 */
>> +		smp_mb__after_atomic();
>> +		max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
>> +	} else {
>> +		max_irr = kvm_lapic_find_highest_irr(vcpu);
>> +	}
>> +	vmx_hwapic_irr_update(vcpu, max_irr);
> 
> Btw. a v1 discussion revolved about the need to have
> vmx_hwapic_irr_update() here when the maximal IRR should always be in
> RVI, and, uh, I didn't follow up (negligible attention span) ...
>
> There is one place where that doesn't hold: we don't update RVI after a
> EXTERNAL_INTERRUPT nested VM exit without VM_EXIT_ACK_INTR_ON_EXIT, but
> IRR has likely changed.  Isn't that the problem?

I'm not sure... there shouldn't be any issue with missed RVI updates in
this series, since it does

	if (kvm_lapic_enabled(vcpu)) {
		/*
		 * This handles the case where a posted interrupt was
		 * notified with kvm_vcpu_kick.
		 */
		if (kvm_x86_ops->sync_pir_to_irr)
			kvm_x86_ops->sync_pir_to_irr(vcpu);
	}

on every VM entry (and kvm_lapic_find_highest_irr inside the callback).
That is not something I really like, but it's no worse than what was
there before

                if (vcpu->arch.apicv_active)
                        kvm_x86_ops->hwapic_irr_update(vcpu,
                                kvm_lapic_find_highest_irr(vcpu));
        }

and obviously better than going unnecessarily through KVM_REQ_EVENT
processing.

Paolo
Radim Krčmář Feb. 8, 2017, 2:24 p.m. UTC | #3
2017-02-08 15:10+0100, Paolo Bonzini:
> 
> 
> On 07/02/2017 21:19, Radim Krčmář wrote:
> > 2016-12-19 17:17+0100, Paolo Bonzini:
> >> Calls to apic_find_highest_irr are scanning IRR twice, once
> >> in vmx_sync_pir_from_irr and once in apic_search_irr.  Change
> >> sync_pir_from_irr to get the new maximum IRR from kvm_apic_update_irr;
> >> now that it does the computation, it can also do the RVI write.
> >>
> >> In order to avoid complications in svm.c, make the callback optional.
> >>
> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> >> ---
> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >> @@ -8734,20 +8736,24 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
> >>  	}
> >>  }
> >>  
> >> -static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
> >> +static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
> >>  {
> >>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> >> +	int max_irr;
> >>  
> >> -	if (!pi_test_on(&vmx->pi_desc))
> >> -		return;
> >> -
> >> -	pi_clear_on(&vmx->pi_desc);
> >> -	/*
> >> -	 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
> >> -	 * But on x86 this is just a compiler barrier anyway.
> >> -	 */
> >> -	smp_mb__after_atomic();
> >> -	kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
> >> +	if (vcpu->arch.apicv_active && pi_test_on(&vmx->pi_desc)) {
> >> +		pi_clear_on(&vmx->pi_desc);
> >> +		/*
> >> +		 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
> >> +		 * But on x86 this is just a compiler barrier anyway.
> >> +		 */
> >> +		smp_mb__after_atomic();
> >> +		max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
> >> +	} else {
> >> +		max_irr = kvm_lapic_find_highest_irr(vcpu);
> >> +	}
> >> +	vmx_hwapic_irr_update(vcpu, max_irr);
> > 
> > Btw. a v1 discussion revolved about the need to have
> > vmx_hwapic_irr_update() here when the maximal IRR should always be in
> > RVI, and, uh, I didn't follow up (negligible attention span) ...
> >
> > There is one place where that doesn't hold: we don't update RVI after a
> > EXTERNAL_INTERRUPT nested VM exit without VM_EXIT_ACK_INTR_ON_EXIT, but
> > IRR has likely changed.  Isn't that the problem?
> 
> I'm not sure... there shouldn't be any issue with missed RVI updates in
> this series, since it does

> 	if (kvm_lapic_enabled(vcpu)) {
> 		/*
> 		 * This handles the case where a posted interrupt was
> 		 * notified with kvm_vcpu_kick.
> 		 */
> 		if (kvm_x86_ops->sync_pir_to_irr)
> 			kvm_x86_ops->sync_pir_to_irr(vcpu);
> 	}
> 
> on every VM entry (and kvm_lapic_find_highest_irr inside the callback).
> That is not something I really like, but it's no worse than what was
> there before
> 
>                 if (vcpu->arch.apicv_active)
>                         kvm_x86_ops->hwapic_irr_update(vcpu,
>                                 kvm_lapic_find_highest_irr(vcpu));
>         }
> 
> and obviously better than going unnecessarily through KVM_REQ_EVENT
> processing.

I agree.  I wanted to point out that we could get rid of the RVI update
on VM entry when PI.ON is clear, like you originally planned (because
RVI should already be the max IRR).

And the reason why v1 didn't work out was likely in missing the RVI
update on nested VM exit, which forced v2 to have this work-around.
.
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4b820b2b6bfa..b182b0ee0997 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -965,7 +965,7 @@  struct kvm_x86_ops {
 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
 	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
-	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
+	int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4af0e105caad..f644dd1dbe71 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -515,6 +515,7 @@  int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 	 */
 	return apic_find_highest_irr(vcpu->arch.apic);
 }
+EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
 
 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			     int vector, int level, int trig_mode,
@@ -580,9 +581,10 @@  static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
 {
 	int highest_irr;
-	if (apic->vcpu->arch.apicv_active)
-		kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
-	highest_irr = apic_find_highest_irr(apic);
+	if (kvm_x86_ops->sync_pir_to_irr)
+		highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
+	else
+		highest_irr = apic_find_highest_irr(apic);
 	if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
 		return -1;
 	return highest_irr;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 08a4d3ab3455..dc4b8834caee 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4357,11 +4357,6 @@  static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 	return;
 }
 
-static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
-{
-	return;
-}
-
 static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
 {
 	kvm_lapic_set_irr(vec, vcpu->arch.apic);
@@ -5371,7 +5366,6 @@  static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
 	.get_enable_apicv = svm_get_enable_apicv,
 	.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
-	.sync_pir_to_irr = svm_sync_pir_to_irr,
 	.hwapic_irr_update = svm_hwapic_irr_update,
 	.hwapic_isr_update = svm_hwapic_isr_update,
 	.apicv_post_state_restore = avic_post_state_restore,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index eab8ab023705..27e40b180242 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6660,8 +6660,10 @@  static __init int hardware_setup(void)
 	if (!cpu_has_vmx_ple())
 		ple_gap = 0;
 
-	if (!cpu_has_vmx_apicv())
+	if (!cpu_has_vmx_apicv()) {
 		enable_apicv = 0;
+		kvm_x86_ops->sync_pir_to_irr = NULL;
+	}
 
 	if (cpu_has_vmx_tsc_scaling()) {
 		kvm_has_tsc_control = true;
@@ -8734,20 +8736,24 @@  static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 	}
 }
 
-static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
+static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int max_irr;
 
-	if (!pi_test_on(&vmx->pi_desc))
-		return;
-
-	pi_clear_on(&vmx->pi_desc);
-	/*
-	 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
-	 * But on x86 this is just a compiler barrier anyway.
-	 */
-	smp_mb__after_atomic();
-	kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
+	if (vcpu->arch.apicv_active && pi_test_on(&vmx->pi_desc)) {
+		pi_clear_on(&vmx->pi_desc);
+		/*
+		 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
+		 * But on x86 this is just a compiler barrier anyway.
+		 */
+		smp_mb__after_atomic();
+		max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
+	} else {
+		max_irr = kvm_lapic_find_highest_irr(vcpu);
+	}
+	vmx_hwapic_irr_update(vcpu, max_irr);
+	return max_irr;
 }
 
 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 641e9309ee08..c666414adc1d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2871,7 +2871,7 @@  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 {
-	if (vcpu->arch.apicv_active)
+	if (kvm_x86_ops->sync_pir_to_irr)
 		kvm_x86_ops->sync_pir_to_irr(vcpu);
 
 	return kvm_apic_get_state(vcpu, s);
@@ -6719,11 +6719,8 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		 * Update architecture specific hints for APIC
 		 * virtual interrupt delivery.
 		 */
-		if (vcpu->arch.apicv_active) {
+		if (kvm_x86_ops->sync_pir_to_irr)
 			kvm_x86_ops->sync_pir_to_irr(vcpu);
-			kvm_x86_ops->hwapic_irr_update(vcpu,
-				kvm_lapic_find_highest_irr(vcpu));
-		}
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {