[v2,5/5] KVM: nVMX: Enable nested posted interrupt processing.

If vcpu has a interrupt in vmx non-root mode, we will
kick that vcpu to inject interrupt timely. With posted
interrupt processing, the kick intr is not needed, and
interrupts are fully taken care of by hardware.

In nested vmx, this feature avoids much more vmexits
than non-nested vmx.

This patch use L0's POSTED_INTR_NV to avoid unexpected
interrupt if L1's vector is different with L0's. If vcpu
is in hardware's non-root mode, we use a physical ipi to
deliver posted interrupts, otherwise we will deliver that
interrupt to L1 and kick that vcpu out of nested
non-root mode.

Signed-off-by: Wincy Van <fanwenyi0529@gmail.com>
---
 arch/x86/kvm/vmx.c |  136 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 132 insertions(+), 4 deletions(-)

        return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -2362,6 +2375,9 @@ static void nested_vmx_setup_ctls_msrs(struct
vcpu_vmx *vmx)
        vmx->nested.nested_vmx_pinbased_ctls_high |=
                PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
                PIN_BASED_VMX_PREEMPTION_TIMER;
+       if (vmx_vm_has_apicv(vmx->vcpu.kvm))
+               vmx->nested.nested_vmx_pinbased_ctls_high |=
+                       PIN_BASED_POSTED_INTR;

        /* exit controls */
        rdmsr(MSR_IA32_VMX_EXIT_CTLS,
@@ -4267,6 +4283,46 @@ static int vmx_vm_has_apicv(struct kvm *kvm)
        return enable_apicv && irqchip_in_kernel(kvm);
 }

+static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
+                                               int vector)
+{
+       int r = 0;
+       struct vmcs12 *vmcs12;
+
+       /*
+        * Since posted intr delivery is async,
+        * we must aquire a spin-lock to avoid
+        * the race of vmcs12.
+        */
+       spin_lock(&to_vmx(vcpu)->nested.vmcs12_lock);
+       vmcs12 = get_vmcs12(vcpu);
+       if (!is_guest_mode(vcpu) || !vmcs12) {
+               r = -1;
+               goto out;
+       }
+       if (vector == vmcs12->posted_intr_nv &&
+           nested_cpu_has_posted_intr(vmcs12)) {
+               if (vcpu->mode == IN_GUEST_MODE)
+                       apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
+                               POSTED_INTR_VECTOR);
+               else {
+                       r = -1;
+                       goto out;
+               }
+
+               /*
+                * if posted intr is done by hardware, the
+                * corresponding eoi was sent to L0. Thus
+                * we should send eoi to L1 manually.
+                */
+               kvm_apic_set_eoi_accelerated(vcpu,
+                       vmcs12->posted_intr_nv);
+       } else
+               r = -1;
+out:
+       spin_unlock(&to_vmx(vcpu)->nested.vmcs12_lock);
+       return r;
+}
 /*
  * Send interrupt to vcpu via posted interrupt way.
  * 1. If target vcpu is running(non-root mode), send posted interrupt
@@ -4279,6 +4335,10 @@ static void vmx_deliver_posted_interrupt(struct
kvm_vcpu *vcpu, int vector)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        int r;

+       r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
+       if (!r)
+               return;
+
        if (pi_test_and_set_pir(vector, &vmx->pi_desc))
                return;

@@ -6499,6 +6559,8 @@ static inline void nested_release_vmcs12(struct
vcpu_vmx *vmx)
        if (WARN_ON(vmx->nested.current_vmcs12 == NULL))
                return;

+       spin_lock(&vmx->nested.vmcs12_lock);
+
        if (enable_shadow_vmcs) {
                /* copy to memory all shadowed fields in case
                   they were modified */
@@ -6513,6 +6575,7 @@ static inline void nested_release_vmcs12(struct
vcpu_vmx *vmx)
        nested_release_page(vmx->nested.current_vmcs12_page);
        vmx->nested.current_vmptr = -1ull;
        vmx->nested.current_vmcs12 = NULL;
+       spin_unlock(&vmx->nested.vmcs12_lock);
 }

 /*
@@ -6537,6 +6600,12 @@ static void free_nested(struct vcpu_vmx *vmx)
                nested_release_page(vmx->nested.virtual_apic_page);
                vmx->nested.virtual_apic_page = NULL;
        }
+       if (vmx->nested.pi_desc_page) {
+               kunmap(vmx->nested.pi_desc_page);
+               nested_release_page(vmx->nested.pi_desc_page);
+               vmx->nested.pi_desc = NULL;
+               vmx->nested.pi_desc_page = NULL;
+       }

        nested_free_all_saved_vmcss(vmx);
 }
@@ -8130,6 +8199,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct
kvm *kvm, unsigned int id)
        if (nested)
                nested_vmx_setup_ctls_msrs(vmx);

+       spin_lock_init(&vmx->nested.vmcs12_lock);
        vmx->nested.current_vmptr = -1ull;
        vmx->nested.current_vmcs12 = NULL;

@@ -8363,6 +8433,30 @@ static bool nested_get_vmcs12_pages(struct
kvm_vcpu *vcpu,
                        return false;
        }

+       if (nested_cpu_has_posted_intr(vmcs12)) {
+               if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64))
+                       return false;
+
+               if (vmx->nested.pi_desc_page) { /* shouldn't happen */
+                       kunmap(vmx->nested.pi_desc_page);
+                       nested_release_page(vmx->nested.pi_desc_page);
+               }
+               vmx->nested.pi_desc_page =
+                       nested_get_page(vcpu, vmcs12->posted_intr_desc_addr);
+               if (!vmx->nested.pi_desc_page)
+                       return false;
+
+               vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page);
+               if (!vmx->nested.pi_desc) {
+                       nested_release_page(vmx->nested.pi_desc_page);
+                       return false;
+               }
+               vmx->nested.pi_desc = (struct pi_desc *)
+                       ((unsigned long)vmx->nested.pi_desc +
+                       (unsigned long)(vmcs12->posted_intr_desc_addr &
+                       (PAGE_SIZE - 1)));
+       }
+
        return true;
 }

@@ -8404,20 +8498,38 @@ static inline int nested_vmx_check_vid(struct
kvm_vcpu *vcpu,
        return 0;
 }

+static inline int nested_vmx_check_posted_intr(struct kvm_vcpu *vcpu,
+                                              struct vmcs12 *vmcs12)
+{
+       /*
+        * bits 15:8 should be zero in posted_intr_nv,
+        * the descriptor address has been already checked
+        * in nested_get_vmcs12_pages.
+        */
+       if (!nested_cpu_has_vid(vmcs12) ||
+           !nested_exit_intr_ack_set(vcpu) ||
+           vmcs12->posted_intr_nv & 0xff00)
+               return -EINVAL;
+       return 0;
+}
+
 static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
                                           struct vmcs12 *vmcs12)
 {
-       int r;
+       int r = 0;

        if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
            !nested_cpu_has_apic_reg_virt(vmcs12) &&
-           !nested_cpu_has_vid(vmcs12))
+           !nested_cpu_has_vid(vmcs12) &&
+           !nested_cpu_has_posted_intr(vmcs12))
                return 0;

        if (nested_cpu_has_virt_x2apic_mode(vmcs12))
                r = nested_vmx_check_virt_x2apic(vcpu, vmcs12);
        if (nested_cpu_has_vid(vmcs12))
                r |= nested_vmx_check_vid(vcpu, vmcs12);
+       if (nested_cpu_has_posted_intr(vmcs12))
+               r |= nested_vmx_check_posted_intr(vcpu, vmcs12);

        if (r)
                goto fail;
@@ -8669,8 +8781,18 @@ static void prepare_vmcs02(struct kvm_vcpu
*vcpu, struct vmcs12 *vmcs12)

        exec_control = vmcs12->pin_based_vm_exec_control;
        exec_control |= vmcs_config.pin_based_exec_ctrl;
-       exec_control &= ~(PIN_BASED_VMX_PREEMPTION_TIMER |
-                          PIN_BASED_POSTED_INTR);
+       exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+
+       if (nested_cpu_has_posted_intr(vmcs12)) {
+               /* Note that we use L0's vector to avoid unexpected intr. */
+               vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
+               vmcs_write64(POSTED_INTR_DESC_ADDR,
+                       page_to_phys(vmx->nested.pi_desc_page) +
+                       (unsigned long)(vmcs12->posted_intr_desc_addr &
+                       (PAGE_SIZE - 1)));
+       } else
+               exec_control &= ~PIN_BASED_POSTED_INTR;
+
        vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);

        vmx->nested.preemption_timer_expired = false;
@@ -9579,6 +9701,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu
*vcpu, u32 exit_reason,
                nested_release_page(vmx->nested.virtual_apic_page);
                vmx->nested.virtual_apic_page = NULL;
        }
+       if (vmx->nested.pi_desc_page) {
+               kunmap(vmx->nested.pi_desc_page);
+               nested_release_page(vmx->nested.pi_desc_page);
+               vmx->nested.pi_desc = NULL;
+               vmx->nested.pi_desc_page = NULL;
+       }

        /*
         * We are now running in L2, mmu_notifier will force to reload the
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[v2,5/5] KVM: nVMX: Enable nested posted interrupt processing.

Commit Message

Comments

Patch