diff mbox

[8/9] KVM: x86: Add EOI exit bitmap inference

Message ID 1438788228-34856-9-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paolo Bonzini Aug. 5, 2015, 3:23 p.m. UTC
From: Steve Rutherford <srutherford@google.com>

In order to support a userspace IOAPIC interacting with an in kernel
APIC, the EOI exit bitmaps need to be configurable.

If the IOAPIC is in userspace (i.e. the irqchip has been split), the
EOI exit bitmaps will be set whenever the GSI Routes are configured.
In particular, for the low MSI routes are reservable for userspace
IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
destination vector of the route will be set for the destination VCPU.

The intention is for the userspace IOAPICs to use the reservable MSI
routes to inject interrupts into the guest.

This is a slight abuse of the notion of an MSI Route, given that MSIs
classically bypass the IOAPIC. It might be worthwhile to add an
additional route type to improve clarity.

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford <srutherford@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  9 ++++++---
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/ioapic.h             |  2 ++
 arch/x86/kvm/irq_comm.c           | 42 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/lapic.c              |  3 +--
 arch/x86/kvm/x86.c                |  9 ++++++++-
 include/linux/kvm_host.h          | 17 ++++++++++++++++
 virt/kvm/irqchip.c                | 12 ++---------
 8 files changed, 79 insertions(+), 16 deletions(-)

Comments

Wu, Feng Aug. 7, 2015, 5:43 a.m. UTC | #1
> -----Original Message-----
> From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org] On
> Behalf Of Paolo Bonzini
> Sent: Wednesday, August 05, 2015 11:24 PM
> To: linux-kernel@vger.kernel.org; kvm@vger.kernel.org
> Cc: Steve Rutherford; rkrcmar@redhat.com
> Subject: [PATCH 8/9] KVM: x86: Add EOI exit bitmap inference
> 
> From: Steve Rutherford <srutherford@google.com>
> 
> In order to support a userspace IOAPIC interacting with an in kernel
> APIC, the EOI exit bitmaps need to be configurable.
> 
> If the IOAPIC is in userspace (i.e. the irqchip has been split), the
> EOI exit bitmaps will be set whenever the GSI Routes are configured.
> In particular, for the low MSI routes are reservable for userspace
> IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
> destination vector of the route will be set for the destination VCPU.
> 
> The intention is for the userspace IOAPICs to use the reservable MSI
> routes to inject interrupts into the guest.
> 
> This is a slight abuse of the notion of an MSI Route, given that MSIs
> classically bypass the IOAPIC. It might be worthwhile to add an
> additional route type to improve clarity.
> 
> Compile tested for Intel x86.
> 
> Signed-off-by: Steve Rutherford <srutherford@google.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  Documentation/virtual/kvm/api.txt |  9 ++++++---
>  arch/x86/include/asm/kvm_host.h   |  1 +
>  arch/x86/kvm/ioapic.h             |  2 ++
>  arch/x86/kvm/irq_comm.c           | 42
> +++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/lapic.c              |  3 +--
>  arch/x86/kvm/x86.c                |  9 ++++++++-
>  include/linux/kvm_host.h          | 17 ++++++++++++++++
>  virt/kvm/irqchip.c                | 12 ++---------
>  8 files changed, 79 insertions(+), 16 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt
> b/Documentation/virtual/kvm/api.txt
> index bda6cb747b23..dcd748e2d46d 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3635,7 +3635,7 @@ KVM handlers should exit to userspace with rc =
> -EREMOTE.
>  7.5 KVM_CAP_SPLIT_IRQCHIP
> 
>  Architectures: x86
> -Parameters: None
> +Parameters: args[0] - number of routes reserved for userspace IOAPICs
>  Returns: 0 on success, -1 on error
> 
>  Create a local apic for each processor in the kernel. This can be used
> @@ -3643,8 +3643,11 @@ instead of KVM_CREATE_IRQCHIP if the userspace
> VMM wishes to emulate the
>  IOAPIC and PIC (and also the PIT, even though this has to be enabled
>  separately).
> 
> -This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in
> kernel
> -IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
> +This capability also enables in kernel routing of interrupt requests;
> +when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type
> are
> +used in the IRQ routing table.  The first args[0] MSI routes are reserved
> +for the IOAPIC pins.  Whenever the LAPIC receives an EOI for these routes,
> +a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
> 
>  Fails if VCPU has already been created, or if the irqchip is already in the
>  kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index 4294722dfd1d..4bc714f7b164 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -687,6 +687,7 @@ struct kvm_arch {
>  	u64 disabled_quirks;
> 
>  	bool irqchip_split;
> +	u8 nr_reserved_ioapic_pins;
>  };
> 
>  struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
> index a8842c0dee73..084617d37c74 100644
> --- a/arch/x86/kvm/ioapic.h
> +++ b/arch/x86/kvm/ioapic.h
> @@ -9,6 +9,7 @@ struct kvm;
>  struct kvm_vcpu;
> 
>  #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
> +#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
>  #define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
>  #define IOAPIC_EDGE_TRIG  0
>  #define IOAPIC_LEVEL_TRIG 1
> @@ -121,5 +122,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct
> kvm_lapic *src,
>  int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
>  int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
>  void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
> +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
> 
>  #endif
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 67f6b62a6814..177460998bb0 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
>  {
>  	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
>  }
> +
> +void kvm_arch_irq_routing_update(struct kvm *kvm)
> +{
> +	if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
> +		return;
> +	kvm_make_scan_ioapic_request(kvm);
> +}
> +
> +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_kernel_irq_routing_entry *entry;
> +	struct kvm_irq_routing_table *table;
> +	u32 i, nr_ioapic_pins;
> +	int idx;
> +
> +	/* kvm->irq_routing must be read after clearing
> +	 * KVM_SCAN_IOAPIC. */
> +	smp_mb();
> +	idx = srcu_read_lock(&kvm->irq_srcu);
> +	table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
> +	nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
> +			       kvm->arch.nr_reserved_ioapic_pins);
> +	for (i = 0; i < nr_ioapic_pins; ++i) {
> +		hlist_for_each_entry(entry, &table->map[i], link) {
> +			u32 dest_id, dest_mode;
> +
> +			if (entry->type != KVM_IRQ_ROUTING_MSI)
> +				continue;
> +			dest_id = (entry->msi.address_lo >> 12) & 0xff;
> +			dest_mode = (entry->msi.address_lo >> 2) & 0x1;
> +			if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id,
> +						dest_mode)) {
> +				u32 vector = entry->msi.data & 0xff;
> +
> +				__set_bit(vector,
> +					  (unsigned long *) eoi_exit_bitmap);
> +			}
> +		}
> +	}
> +	srcu_read_unlock(&kvm->irq_srcu, idx);
> +}
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 14b5603ef6c5..38c580aa27c2 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -209,8 +209,7 @@ out:
>  	if (old)
>  		kfree_rcu(old, rcu);
> 
> -	if (ioapic_in_kernel(kvm))
> -		kvm_vcpu_request_scan_ioapic(kvm);
> +	kvm_make_scan_ioapic_request(kvm);
>  }
> 
>  static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 16e2f3c577c7..c9fb11491aa3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3571,6 +3571,9 @@ static int kvm_vm_ioctl_enable_cap(struct kvm
> *kvm,
>  		break;
>  	case KVM_CAP_SPLIT_IRQCHIP: {
>  		mutex_lock(&kvm->lock);
> +		r = -EINVAL;
> +		if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
> +			goto split_irqchip_unlock;
>  		r = -EEXIST;
>  		if (irqchip_in_kernel(kvm))
>  			goto split_irqchip_unlock;
> @@ -3582,6 +3585,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm
> *kvm,
>  		/* Pairs with irqchip_in_kernel. */
>  		smp_wmb();
>  		kvm->arch.irqchip_split = true;
> +		kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
>  		r = 0;
>  split_irqchip_unlock:
>  		mutex_unlock(&kvm->lock);
> @@ -6173,7 +6177,10 @@ static void vcpu_scan_ioapic(struct kvm_vcpu
> *vcpu)
> 
>  	memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8);
> 
> -	kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
> +	if (irqchip_split(vcpu->kvm))
> +		kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap);
> +	else
> +		kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
>  	kvm_x86_ops->load_eoi_exitmap(vcpu);
>  }
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 653d494e13d1..27ccdf91a465 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -329,6 +329,19 @@ struct kvm_kernel_irq_routing_entry {
>  	struct hlist_node link;
>  };
> 
> +#ifdef CONFIG_HAVE_KVM_IRQCHIP
> +struct kvm_irq_routing_table {
> +	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
> +	struct kvm_kernel_irq_routing_entry *rt_entries;

This filed doesn't exist anymore. In fact, this changes is also in my
VT-d PI patches. If this series get merged first, I can rebase my
patches then.

Thanks,
Feng

> +	u32 nr_rt_entries;
> +	/*
> +	 * Array indexed by gsi. Each entry contains list of irq chips
> +	 * the gsi is connected to.
> +	 */
> +	struct hlist_head map[0];
> +};
> +#endif
> +
>  #ifndef KVM_PRIVATE_MEM_SLOTS
>  #define KVM_PRIVATE_MEM_SLOTS 0
>  #endif
> @@ -455,10 +468,14 @@ void vcpu_put(struct kvm_vcpu *vcpu);
> 
>  #ifdef __KVM_HAVE_IOAPIC
>  void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
> +void kvm_arch_irq_routing_update(struct kvm *kvm);
>  #else
>  static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
>  {
>  }
> +static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
> +{
> +}
>  #endif
> 
>  #ifdef CONFIG_HAVE_KVM_IRQFD
> diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
> index 21c14244f4c4..4f85c6ee96dc 100644
> --- a/virt/kvm/irqchip.c
> +++ b/virt/kvm/irqchip.c
> @@ -31,16 +31,6 @@
>  #include <trace/events/kvm.h>
>  #include "irq.h"
> 
> -struct kvm_irq_routing_table {
> -	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
> -	u32 nr_rt_entries;
> -	/*
> -	 * Array indexed by gsi. Each entry contains list of irq chips
> -	 * the gsi is connected to.
> -	 */
> -	struct hlist_head map[0];
> -};
> -
>  int kvm_irq_map_gsi(struct kvm *kvm,
>  		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
>  {
> @@ -227,6 +217,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
>  	kvm_irq_routing_update(kvm);
>  	mutex_unlock(&kvm->irq_lock);
> 
> +	kvm_arch_irq_routing_update(kvm);
> +
>  	synchronize_srcu_expedited(&kvm->irq_srcu);
> 
>  	new = old;
> --
> 1.8.3.1
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Aug. 7, 2015, 7:32 a.m. UTC | #2
On 07/08/2015 07:43, Wu, Feng wrote:
>> > +#ifdef CONFIG_HAVE_KVM_IRQCHIP
>> > +struct kvm_irq_routing_table {
>> > +	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
>> > +	struct kvm_kernel_irq_routing_entry *rt_entries;
> This filed doesn't exist anymore. In fact, this changes is also in my
> VT-d PI patches. If this series get merged first, I can rebase my
> patches then.

Thanks!

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wu, Feng Aug. 7, 2015, 7:46 a.m. UTC | #3
> -----Original Message-----
> From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org] On
> Behalf Of Paolo Bonzini
> Sent: Wednesday, August 05, 2015 11:24 PM
> To: linux-kernel@vger.kernel.org; kvm@vger.kernel.org
> Cc: Steve Rutherford; rkrcmar@redhat.com
> Subject: [PATCH 8/9] KVM: x86: Add EOI exit bitmap inference
> 
> From: Steve Rutherford <srutherford@google.com>
> 
> In order to support a userspace IOAPIC interacting with an in kernel
> APIC, the EOI exit bitmaps need to be configurable.
> 
> If the IOAPIC is in userspace (i.e. the irqchip has been split), the
> EOI exit bitmaps will be set whenever the GSI Routes are configured.
> In particular, for the low MSI routes are reservable for userspace
> IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
> destination vector of the route will be set for the destination VCPU.
> 
> The intention is for the userspace IOAPICs to use the reservable MSI
> routes to inject interrupts into the guest.
> 
> This is a slight abuse of the notion of an MSI Route, given that MSIs
> classically bypass the IOAPIC. It might be worthwhile to add an
> additional route type to improve clarity.
> 
> Compile tested for Intel x86.
> 
> Signed-off-by: Steve Rutherford <srutherford@google.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  Documentation/virtual/kvm/api.txt |  9 ++++++---
>  arch/x86/include/asm/kvm_host.h   |  1 +
>  arch/x86/kvm/ioapic.h             |  2 ++
>  arch/x86/kvm/irq_comm.c           | 42
> +++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/lapic.c              |  3 +--
>  arch/x86/kvm/x86.c                |  9 ++++++++-
>  include/linux/kvm_host.h          | 17 ++++++++++++++++
>  virt/kvm/irqchip.c                | 12 ++---------
>  8 files changed, 79 insertions(+), 16 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt
> b/Documentation/virtual/kvm/api.txt
> index bda6cb747b23..dcd748e2d46d 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3635,7 +3635,7 @@ KVM handlers should exit to userspace with rc =
> -EREMOTE.
>  7.5 KVM_CAP_SPLIT_IRQCHIP
> 
>  Architectures: x86
> -Parameters: None
> +Parameters: args[0] - number of routes reserved for userspace IOAPICs
>  Returns: 0 on success, -1 on error
> 
>  Create a local apic for each processor in the kernel. This can be used
> @@ -3643,8 +3643,11 @@ instead of KVM_CREATE_IRQCHIP if the userspace
> VMM wishes to emulate the
>  IOAPIC and PIC (and also the PIT, even though this has to be enabled
>  separately).
> 
> -This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in
> kernel
> -IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
> +This capability also enables in kernel routing of interrupt requests;
> +when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type
> are
> +used in the IRQ routing table.  The first args[0] MSI routes are reserved
> +for the IOAPIC pins.  Whenever the LAPIC receives an EOI for these routes,
> +a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
> 
>  Fails if VCPU has already been created, or if the irqchip is already in the
>  kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index 4294722dfd1d..4bc714f7b164 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -687,6 +687,7 @@ struct kvm_arch {
>  	u64 disabled_quirks;
> 
>  	bool irqchip_split;
> +	u8 nr_reserved_ioapic_pins;
>  };
> 
>  struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
> index a8842c0dee73..084617d37c74 100644
> --- a/arch/x86/kvm/ioapic.h
> +++ b/arch/x86/kvm/ioapic.h
> @@ -9,6 +9,7 @@ struct kvm;
>  struct kvm_vcpu;
> 
>  #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
> +#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
>  #define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
>  #define IOAPIC_EDGE_TRIG  0
>  #define IOAPIC_LEVEL_TRIG 1
> @@ -121,5 +122,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct
> kvm_lapic *src,
>  int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
>  int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
>  void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
> +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
> 
>  #endif
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 67f6b62a6814..177460998bb0 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -335,3 +335,45 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
>  {
>  	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
>  }
> +
> +void kvm_arch_irq_routing_update(struct kvm *kvm)
> +{
> +	if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
> +		return;
> +	kvm_make_scan_ioapic_request(kvm);
> +}
> +
> +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_kernel_irq_routing_entry *entry;
> +	struct kvm_irq_routing_table *table;
> +	u32 i, nr_ioapic_pins;
> +	int idx;
> +
> +	/* kvm->irq_routing must be read after clearing
> +	 * KVM_SCAN_IOAPIC. */
> +	smp_mb();
> +	idx = srcu_read_lock(&kvm->irq_srcu);
> +	table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
> +	nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
> +			       kvm->arch.nr_reserved_ioapic_pins);
> +	for (i = 0; i < nr_ioapic_pins; ++i) {
> +		hlist_for_each_entry(entry, &table->map[i], link) {
> +			u32 dest_id, dest_mode;
> +
> +			if (entry->type != KVM_IRQ_ROUTING_MSI)
> +				continue;

If I understand it correctly, here you reserve the low part of the routing
table, and insert entries with KVM_IRQ_ROUTING_MSI type in them,
then you use this as a hint to KVM to set the EOI bit map. I have two
concerns:

- Currently, GSI 2 is used for MSI routing, I want to make sure after this
patch, whether GSI 2 can still be used for _real_ MSI routing, if it can,
does everything work correctly?
- Now, KVM_IRQ_ROUTING_MSI and KVM_IRQ_ROUTING_IRQCHIP
type entries cannot share the same map[gsi] (pls refer to the following
code), so where should be the IOAPIC entries exist in the map[] array?

static int setup_routing_entry(struct kvm_irq_routing_table *rt,
                               struct kvm_kernel_irq_routing_entry *e,
                               const struct kvm_irq_routing_entry *ue)
{

		......

        /*
         * Do not allow GSI to be mapped to the same irqchip more than once.
         * Allow only one to one mapping between GSI and MSI.
         */
        hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
                if (ei->type == KVM_IRQ_ROUTING_MSI ||
                    ue->type == KVM_IRQ_ROUTING_MSI ||
                    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
                        return r;

		......
}

Thanks,
Feng

> +			dest_id = (entry->msi.address_lo >> 12) & 0xff;
> +			dest_mode = (entry->msi.address_lo >> 2) & 0x1;
> +			if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id,
> +						dest_mode)) {
> +				u32 vector = entry->msi.data & 0xff;
> +
> +				__set_bit(vector,
> +					  (unsigned long *) eoi_exit_bitmap);
> +			}
> +		}
> +	}
> +	srcu_read_unlock(&kvm->irq_srcu, idx);
> +}
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 14b5603ef6c5..38c580aa27c2 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -209,8 +209,7 @@ out:
>  	if (old)
>  		kfree_rcu(old, rcu);
> 
> -	if (ioapic_in_kernel(kvm))
> -		kvm_vcpu_request_scan_ioapic(kvm);
> +	kvm_make_scan_ioapic_request(kvm);
>  }
> 
>  static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 16e2f3c577c7..c9fb11491aa3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3571,6 +3571,9 @@ static int kvm_vm_ioctl_enable_cap(struct kvm
> *kvm,
>  		break;
>  	case KVM_CAP_SPLIT_IRQCHIP: {
>  		mutex_lock(&kvm->lock);
> +		r = -EINVAL;
> +		if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
> +			goto split_irqchip_unlock;
>  		r = -EEXIST;
>  		if (irqchip_in_kernel(kvm))
>  			goto split_irqchip_unlock;
> @@ -3582,6 +3585,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm
> *kvm,
>  		/* Pairs with irqchip_in_kernel. */
>  		smp_wmb();
>  		kvm->arch.irqchip_split = true;
> +		kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
>  		r = 0;
>  split_irqchip_unlock:
>  		mutex_unlock(&kvm->lock);
> @@ -6173,7 +6177,10 @@ static void vcpu_scan_ioapic(struct kvm_vcpu
> *vcpu)
> 
>  	memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8);
> 
> -	kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
> +	if (irqchip_split(vcpu->kvm))
> +		kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap);
> +	else
> +		kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
>  	kvm_x86_ops->load_eoi_exitmap(vcpu);
>  }
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 653d494e13d1..27ccdf91a465 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -329,6 +329,19 @@ struct kvm_kernel_irq_routing_entry {
>  	struct hlist_node link;
>  };
> 
> +#ifdef CONFIG_HAVE_KVM_IRQCHIP
> +struct kvm_irq_routing_table {
> +	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
> +	struct kvm_kernel_irq_routing_entry *rt_entries;
> +	u32 nr_rt_entries;
> +	/*
> +	 * Array indexed by gsi. Each entry contains list of irq chips
> +	 * the gsi is connected to.
> +	 */
> +	struct hlist_head map[0];
> +};
> +#endif
> +
>  #ifndef KVM_PRIVATE_MEM_SLOTS
>  #define KVM_PRIVATE_MEM_SLOTS 0
>  #endif
> @@ -455,10 +468,14 @@ void vcpu_put(struct kvm_vcpu *vcpu);
> 
>  #ifdef __KVM_HAVE_IOAPIC
>  void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
> +void kvm_arch_irq_routing_update(struct kvm *kvm);
>  #else
>  static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
>  {
>  }
> +static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
> +{
> +}
>  #endif
> 
>  #ifdef CONFIG_HAVE_KVM_IRQFD
> diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
> index 21c14244f4c4..4f85c6ee96dc 100644
> --- a/virt/kvm/irqchip.c
> +++ b/virt/kvm/irqchip.c
> @@ -31,16 +31,6 @@
>  #include <trace/events/kvm.h>
>  #include "irq.h"
> 
> -struct kvm_irq_routing_table {
> -	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
> -	u32 nr_rt_entries;
> -	/*
> -	 * Array indexed by gsi. Each entry contains list of irq chips
> -	 * the gsi is connected to.
> -	 */
> -	struct hlist_head map[0];
> -};
> -
>  int kvm_irq_map_gsi(struct kvm *kvm,
>  		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
>  {
> @@ -227,6 +217,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
>  	kvm_irq_routing_update(kvm);
>  	mutex_unlock(&kvm->irq_lock);
> 
> +	kvm_arch_irq_routing_update(kvm);
> +
>  	synchronize_srcu_expedited(&kvm->irq_srcu);
> 
>  	new = old;
> --
> 1.8.3.1
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Aug. 7, 2015, 10:16 a.m. UTC | #4
On 07/08/2015 09:46, Wu, Feng wrote:
> If I understand it correctly, here you reserve the low part of the routing
> table, and insert entries with KVM_IRQ_ROUTING_MSI type in them,
> then you use this as a hint to KVM to set the EOI bit map. I have two
> concerns:
> 
> - Currently, GSI 2 is used for MSI routing, I want to make sure after this
> patch, whether GSI 2 can still be used for _real_ MSI routing, if it can,
> does everything work correctly?

The patch has no effect if you use the in-kernel IOAPIC.  If you use a
userspace IOAPIC you won't be able to use GSI 2 for MSI routing because
it falls in the reserved range.

> - Now, KVM_IRQ_ROUTING_MSI and KVM_IRQ_ROUTING_IRQCHIP
> type entries cannot share the same map[gsi] (pls refer to the following
> code), so where should be the IOAPIC entries exist in the map[] array?

With split irqchip, only KVM_IRQ_ROUTING_MSI is used.  Does this answer
your question?

Paolo

> static int setup_routing_entry(struct kvm_irq_routing_table *rt,
>                                struct kvm_kernel_irq_routing_entry *e,
>                                const struct kvm_irq_routing_entry *ue)
> {
> 
> 		......
> 
>         /*
>          * Do not allow GSI to be mapped to the same irqchip more than once.
>          * Allow only one to one mapping between GSI and MSI.
>          */
>         hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
>                 if (ei->type == KVM_IRQ_ROUTING_MSI ||
>                     ue->type == KVM_IRQ_ROUTING_MSI ||
>                     ue->u.irqchip.irqchip == ei->irqchip.irqchip)
>                         return r;
> 
> 		......
> }
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wu, Feng Aug. 7, 2015, 10:55 p.m. UTC | #5
> -----Original Message-----
> From: Paolo Bonzini [mailto:paolo.bonzini@gmail.com] On Behalf Of Paolo
> Bonzini
> Sent: Friday, August 07, 2015 6:17 PM
> To: Wu, Feng; linux-kernel@vger.kernel.org; kvm@vger.kernel.org
> Cc: Steve Rutherford; rkrcmar@redhat.com
> Subject: Re: [PATCH 8/9] KVM: x86: Add EOI exit bitmap inference
> 
> 
> 
> On 07/08/2015 09:46, Wu, Feng wrote:
> > If I understand it correctly, here you reserve the low part of the routing
> > table, and insert entries with KVM_IRQ_ROUTING_MSI type in them,
> > then you use this as a hint to KVM to set the EOI bit map. I have two
> > concerns:
> >
> > - Currently, GSI 2 is used for MSI routing, I want to make sure after this
> > patch, whether GSI 2 can still be used for _real_ MSI routing, if it can,
> > does everything work correctly?
> 
> The patch has no effect if you use the in-kernel IOAPIC.  If you use a
> userspace IOAPIC you won't be able to use GSI 2 for MSI routing because
> it falls in the reserved range.

Good to know this, it addresses my concern, thanks!

> 
> > - Now, KVM_IRQ_ROUTING_MSI and KVM_IRQ_ROUTING_IRQCHIP
> > type entries cannot share the same map[gsi] (pls refer to the following
> > code), so where should be the IOAPIC entries exist in the map[] array?
> 
> With split irqchip, only KVM_IRQ_ROUTING_MSI is used.  Does this answer
> your question?

Ah, I got it, since the IOAPIC is in userspace, there is no need to add the IOAPIC
routing information in the routing table. Thanks for the clarification!

Thanks,
Feng

> 
> Paolo
> 
> > static int setup_routing_entry(struct kvm_irq_routing_table *rt,
> >                                struct kvm_kernel_irq_routing_entry
> *e,
> >                                const struct kvm_irq_routing_entry
> *ue)
> > {
> >
> > 		......
> >
> >         /*
> >          * Do not allow GSI to be mapped to the same irqchip more than
> once.
> >          * Allow only one to one mapping between GSI and MSI.
> >          */
> >         hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
> >                 if (ei->type == KVM_IRQ_ROUTING_MSI ||
> >                     ue->type == KVM_IRQ_ROUTING_MSI ||
> >                     ue->u.irqchip.irqchip == ei->irqchip.irqchip)
> >                         return r;
> >
> > 		......
> > }
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index bda6cb747b23..dcd748e2d46d 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3635,7 +3635,7 @@  KVM handlers should exit to userspace with rc = -EREMOTE.
 7.5 KVM_CAP_SPLIT_IRQCHIP
 
 Architectures: x86
-Parameters: None
+Parameters: args[0] - number of routes reserved for userspace IOAPICs
 Returns: 0 on success, -1 on error
 
 Create a local apic for each processor in the kernel. This can be used
@@ -3643,8 +3643,11 @@  instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the
 IOAPIC and PIC (and also the PIT, even though this has to be enabled
 separately).
 
-This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel
-IOAPIC or PIC. This also enables in kernel routing of interrupt requests.
+This capability also enables in kernel routing of interrupt requests;
+when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type are
+used in the IRQ routing table.  The first args[0] MSI routes are reserved
+for the IOAPIC pins.  Whenever the LAPIC receives an EOI for these routes,
+a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
 
 Fails if VCPU has already been created, or if the irqchip is already in the
 kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4294722dfd1d..4bc714f7b164 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -687,6 +687,7 @@  struct kvm_arch {
 	u64 disabled_quirks;
 
 	bool irqchip_split;
+	u8 nr_reserved_ioapic_pins;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index a8842c0dee73..084617d37c74 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -9,6 +9,7 @@  struct kvm;
 struct kvm_vcpu;
 
 #define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES
 #define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
 #define IOAPIC_EDGE_TRIG  0
 #define IOAPIC_LEVEL_TRIG 1
@@ -121,5 +122,6 @@  int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 
 #endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 67f6b62a6814..177460998bb0 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -335,3 +335,45 @@  int kvm_setup_empty_irq_routing(struct kvm *kvm)
 {
 	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
 }
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+	if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
+		return;
+	kvm_make_scan_ioapic_request(kvm);
+}
+
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_kernel_irq_routing_entry *entry;
+	struct kvm_irq_routing_table *table;
+	u32 i, nr_ioapic_pins;
+	int idx;
+
+	/* kvm->irq_routing must be read after clearing
+	 * KVM_SCAN_IOAPIC. */
+	smp_mb();
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
+			       kvm->arch.nr_reserved_ioapic_pins);
+	for (i = 0; i < nr_ioapic_pins; ++i) {
+		hlist_for_each_entry(entry, &table->map[i], link) {
+			u32 dest_id, dest_mode;
+
+			if (entry->type != KVM_IRQ_ROUTING_MSI)
+				continue;
+			dest_id = (entry->msi.address_lo >> 12) & 0xff;
+			dest_mode = (entry->msi.address_lo >> 2) & 0x1;
+			if (kvm_apic_match_dest(vcpu, NULL, 0, dest_id,
+						dest_mode)) {
+				u32 vector = entry->msi.data & 0xff;
+
+				__set_bit(vector,
+					  (unsigned long *) eoi_exit_bitmap);
+			}
+		}
+	}
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 14b5603ef6c5..38c580aa27c2 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -209,8 +209,7 @@  out:
 	if (old)
 		kfree_rcu(old, rcu);
 
-	if (ioapic_in_kernel(kvm))
-		kvm_vcpu_request_scan_ioapic(kvm);
+	kvm_make_scan_ioapic_request(kvm);
 }
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 16e2f3c577c7..c9fb11491aa3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3571,6 +3571,9 @@  static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		break;
 	case KVM_CAP_SPLIT_IRQCHIP: {
 		mutex_lock(&kvm->lock);
+		r = -EINVAL;
+		if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
+			goto split_irqchip_unlock;
 		r = -EEXIST;
 		if (irqchip_in_kernel(kvm))
 			goto split_irqchip_unlock;
@@ -3582,6 +3585,7 @@  static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		/* Pairs with irqchip_in_kernel. */
 		smp_wmb();
 		kvm->arch.irqchip_split = true;
+		kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
 		r = 0;
 split_irqchip_unlock:
 		mutex_unlock(&kvm->lock);
@@ -6173,7 +6177,10 @@  static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 
 	memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8);
 
-	kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
+	if (irqchip_split(vcpu->kvm))
+		kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap);
+	else
+		kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
 	kvm_x86_ops->load_eoi_exitmap(vcpu);
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 653d494e13d1..27ccdf91a465 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -329,6 +329,19 @@  struct kvm_kernel_irq_routing_entry {
 	struct hlist_node link;
 };
 
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+struct kvm_irq_routing_table {
+	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+	struct kvm_kernel_irq_routing_entry *rt_entries;
+	u32 nr_rt_entries;
+	/*
+	 * Array indexed by gsi. Each entry contains list of irq chips
+	 * the gsi is connected to.
+	 */
+	struct hlist_head map[0];
+};
+#endif
+
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
@@ -455,10 +468,14 @@  void vcpu_put(struct kvm_vcpu *vcpu);
 
 #ifdef __KVM_HAVE_IOAPIC
 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+void kvm_arch_irq_routing_update(struct kvm *kvm);
 #else
 static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
 {
 }
+static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+}
 #endif
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 21c14244f4c4..4f85c6ee96dc 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,16 +31,6 @@ 
 #include <trace/events/kvm.h>
 #include "irq.h"
 
-struct kvm_irq_routing_table {
-	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-	u32 nr_rt_entries;
-	/*
-	 * Array indexed by gsi. Each entry contains list of irq chips
-	 * the gsi is connected to.
-	 */
-	struct hlist_head map[0];
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
 		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
@@ -227,6 +217,8 @@  int kvm_set_irq_routing(struct kvm *kvm,
 	kvm_irq_routing_update(kvm);
 	mutex_unlock(&kvm->irq_lock);
 
+	kvm_arch_irq_routing_update(kvm);
+
 	synchronize_srcu_expedited(&kvm->irq_srcu);
 
 	new = old;