@@ -1838,7 +1838,8 @@ struct kvm_x86_ops {
int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
unsigned int host_irq, uint32_t guest_irq,
- struct kvm_kernel_irq_routing_entry *new);
+ struct kvm_kernel_irq_routing_entry *new,
+ struct kvm_vcpu *vcpu, u32 vector);
void (*pi_start_assignment)(struct kvm *kvm);
void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
@@ -812,52 +812,13 @@ static int svm_ir_list_add(struct vcpu_svm *svm,
return 0;
}
-/*
- * Note:
- * The HW cannot support posting multicast/broadcast
- * interrupts to a vCPU. So, we still use legacy interrupt
- * remapping for these kind of interrupts.
- *
- * For lowest-priority interrupts, we only support
- * those with single CPU as the destination, e.g. user
- * configures the interrupts via /proc/irq or uses
- * irqbalance to make the interrupts single-CPU.
- */
-static int
-get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
- struct vcpu_data *vcpu_info, struct kvm_vcpu **vcpu)
-{
- struct kvm_lapic_irq irq;
- *vcpu = NULL;
-
- kvm_set_msi_irq(kvm, e, &irq);
-
- if (!kvm_intr_is_single_vcpu(kvm, &irq, vcpu) ||
- !kvm_irq_is_postable(&irq)) {
- pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
- __func__, irq.vector);
- return -1;
- }
-
- pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
- irq.vector);
- vcpu_info->vector = irq.vector;
-
- return 0;
-}
-
int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
unsigned int host_irq, uint32_t guest_irq,
- struct kvm_kernel_irq_routing_entry *new)
+ struct kvm_kernel_irq_routing_entry *new,
+ struct kvm_vcpu *vcpu, u32 vector)
{
- bool enable_remapped_mode = true;
- struct vcpu_data vcpu_info;
- struct kvm_vcpu *vcpu = NULL;
int ret = 0;
- if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass())
- return 0;
-
/*
* If the IRQ was affined to a different vCPU, remove the IRTE metadata
* from the *previous* vCPU's list.
@@ -865,7 +826,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
svm_ir_list_del(irqfd);
pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
- __func__, host_irq, guest_irq, !!new);
+ __func__, host_irq, guest_irq, !!vcpu);
/**
* Here, we setup with legacy mode in the following cases:
@@ -874,23 +835,23 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
* 3. APIC virtualization is disabled for the vcpu.
* 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
*/
- if (new && new && new->type == KVM_IRQ_ROUTING_MSI &&
- !get_pi_vcpu_info(kvm, new, &vcpu_info, &vcpu) &&
- kvm_vcpu_apicv_active(vcpu)) {
- struct amd_iommu_pi_data pi;
-
- enable_remapped_mode = false;
-
- vcpu_info.pi_desc_addr = avic_get_backing_page_address(to_svm(vcpu));
-
+ if (vcpu && kvm_vcpu_apicv_active(vcpu)) {
/*
* Try to enable guest_mode in IRTE. Note, the address
* of the vCPU's AVIC backing page is passed to the
* IOMMU via vcpu_info->pi_desc_addr.
*/
- pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, vcpu->vcpu_id);
- pi.is_guest_mode = true;
- pi.vcpu_data = &vcpu_info;
+ struct vcpu_data vcpu_info = {
+ .pi_desc_addr = avic_get_backing_page_address(to_svm(vcpu)),
+ .vector = vector,
+ };
+
+ struct amd_iommu_pi_data pi = {
+ .ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, vcpu->vcpu_id),
+ .is_guest_mode = true,
+ .vcpu_data = &vcpu_info,
+ };
+
ret = irq_set_vcpu_affinity(host_irq, &pi);
/**
@@ -902,12 +863,11 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
*/
if (!ret)
ret = svm_ir_list_add(to_svm(vcpu), irqfd, &pi);
- }
- if (!ret && vcpu) {
- trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id,
- guest_irq, vcpu_info.vector,
- vcpu_info.pi_desc_addr, !!new);
+ trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
+ vector, vcpu_info.pi_desc_addr, true);
+ } else {
+ ret = irq_set_vcpu_affinity(host_irq, NULL);
}
if (ret < 0) {
@@ -915,10 +875,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
goto out;
}
- if (enable_remapped_mode)
- ret = irq_set_vcpu_affinity(host_irq, NULL);
- else
- ret = 0;
+ ret = 0;
out:
return ret;
}
@@ -741,7 +741,8 @@ void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
unsigned int host_irq, uint32_t guest_irq,
- struct kvm_kernel_irq_routing_entry *new);
+ struct kvm_kernel_irq_routing_entry *new,
+ struct kvm_vcpu *vcpu, u32 vector);
void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
void avic_ring_doorbell(struct kvm_vcpu *vcpu);
@@ -266,46 +266,20 @@ void vmx_pi_start_assignment(struct kvm *kvm)
int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
unsigned int host_irq, uint32_t guest_irq,
- struct kvm_kernel_irq_routing_entry *new)
+ struct kvm_kernel_irq_routing_entry *new,
+ struct kvm_vcpu *vcpu, u32 vector)
{
- struct kvm_lapic_irq irq;
- struct kvm_vcpu *vcpu;
- struct vcpu_data vcpu_info;
-
- if (!vmx_can_use_vtd_pi(kvm))
- return 0;
-
- /*
- * VT-d PI cannot support posting multicast/broadcast
- * interrupts to a vCPU, we still use interrupt remapping
- * for these kind of interrupts.
- *
- * For lowest-priority interrupts, we only support
- * those with single CPU as the destination, e.g. user
- * configures the interrupts via /proc/irq or uses
- * irqbalance to make the interrupts single-CPU.
- *
- * We will support full lowest-priority interrupt later.
- *
- * In addition, we can only inject generic interrupts using
- * the PI mechanism, refuse to route others through it.
- */
- if (!new || new->type != KVM_IRQ_ROUTING_MSI)
- goto do_remapping;
-
- kvm_set_msi_irq(kvm, new, &irq);
-
- if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
- !kvm_irq_is_postable(&irq))
- goto do_remapping;
-
- vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
- vcpu_info.vector = irq.vector;
-
- trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
- vcpu_info.vector, vcpu_info.pi_desc_addr, true);
-
- return irq_set_vcpu_affinity(host_irq, &vcpu_info);
-do_remapping:
- return irq_set_vcpu_affinity(host_irq, NULL);
+ if (vcpu) {
+ struct vcpu_data vcpu_info = {
+ .pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)),
+ .vector = vector,
+ };
+
+ trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, guest_irq,
+ vcpu_info.vector, vcpu_info.pi_desc_addr, true);
+
+ return irq_set_vcpu_affinity(host_irq, &vcpu_info);
+ } else {
+ return irq_set_vcpu_affinity(host_irq, NULL);
+ }
}
@@ -15,7 +15,8 @@ void __init pi_init_cpu(int cpu);
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
unsigned int host_irq, uint32_t guest_irq,
- struct kvm_kernel_irq_routing_entry *new);
+ struct kvm_kernel_irq_routing_entry *new,
+ struct kvm_vcpu *vcpu, u32 vector);
void vmx_pi_start_assignment(struct kvm *kvm);
static inline int pi_find_highest_vector(struct pi_desc *pi_desc)
@@ -13567,6 +13567,43 @@ bool kvm_arch_has_irq_bypass(void)
}
EXPORT_SYMBOL_GPL(kvm_arch_has_irq_bypass);
+static int kvm_pi_update_irte(struct kvm_kernel_irqfd *irqfd,
+ struct kvm_kernel_irq_routing_entry *old,
+ struct kvm_kernel_irq_routing_entry *new)
+{
+ struct kvm *kvm = irqfd->kvm;
+ struct kvm_vcpu *vcpu = NULL;
+ struct kvm_lapic_irq irq;
+
+ if (!irqchip_in_kernel(kvm) ||
+ !kvm_arch_has_irq_bypass() ||
+ !kvm_arch_has_assigned_device(kvm))
+ return 0;
+
+ if (new && new->type == KVM_IRQ_ROUTING_MSI) {
+ kvm_set_msi_irq(kvm, new, &irq);
+
+ /*
+ * Force remapped mode if hardware doesn't support posting the
+ * virtual interrupt to a vCPU. Only IRQs are postable (NMIs,
+ * SMIs, etc. are not), and neither AMD nor Intel IOMMUs support
+ * posting multicast/broadcast IRQs. If the interrupt can't be
+ * posted, the device MSI needs to be routed to the host so that
+ * the guest's desired interrupt can be synthesized by KVM.
+ *
+ * This means that KVM can only post lowest-priority interrupts
+ * if they have a single CPU as the destination, e.g. only if
+ * the guest has affined the interrupt to a single vCPU.
+ */
+ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+ !kvm_irq_is_postable(&irq))
+ vcpu = NULL;
+ }
+
+ return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq,
+ irqfd->gsi, new, vcpu, irq.vector);
+}
+
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
@@ -13581,8 +13618,7 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
irqfd->producer = prod;
if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
- ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
- irqfd->gsi, &irqfd->irq_entry);
+ ret = kvm_pi_update_irte(irqfd, NULL, &irqfd->irq_entry);
if (ret)
kvm_arch_end_assignment(irqfd->kvm);
}
@@ -13610,8 +13646,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
spin_lock_irq(&kvm->irqfds.lock);
if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
- ret = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, prod->irq,
- irqfd->gsi, NULL);
+ ret = kvm_pi_update_irte(irqfd, &irqfd->irq_entry, NULL);
if (ret)
pr_info("irq bypass consumer (token %p) unregistration fails: %d\n",
irqfd->consumer.token, ret);
@@ -13628,8 +13663,7 @@ int kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
struct kvm_kernel_irq_routing_entry *old,
struct kvm_kernel_irq_routing_entry *new)
{
- return kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, irqfd->producer->irq,
- irqfd->gsi, new);
+ return kvm_pi_update_irte(irqfd, old, new);
}
bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
Hoist the logic for identifying the target vCPU for a posted interrupt into common x86. The code is functionally identical between Intel and AMD. Signed-off-by: Sean Christopherson <seanjc@google.com> --- arch/x86/include/asm/kvm_host.h | 3 +- arch/x86/kvm/svm/avic.c | 83 ++++++++------------------------- arch/x86/kvm/svm/svm.h | 3 +- arch/x86/kvm/vmx/posted_intr.c | 56 ++++++---------------- arch/x86/kvm/vmx/posted_intr.h | 3 +- arch/x86/kvm/x86.c | 46 +++++++++++++++--- 6 files changed, 81 insertions(+), 113 deletions(-)