[RFC,4/4] KVM: x86: Add support for local interrupt requests from userspace
diff mbox

Message ID 1431481652-27268-4-git-send-email-srutherford@google.com
State New
Headers show

Commit Message

Steve Rutherford May 13, 2015, 1:47 a.m. UTC
In order to enable userspace PIC support, the userspace PIC needs to
be able to inject local interrupt requests.

This adds the ioctl KVM_REQUEST_LOCAL_INTERRUPT and kvm exit
KVM_EXIT_GET_EXTINT.

The vm ioctl KVM_REQUEST_LOCAL_INTERRUPT makes a KVM_REQ_EVENT request
on the BSP, which causes the BSP to exit to userspace to fetch the
vector of the underlying external interrupt, which the BSP then
injects into the guest. This matches the PIC spec, and is necessary to
boot Windows.

Boots and passes the KVM unit tests on intel x86 with the
PIC/PIT/IOAPIC in userspace (under a non-QEMU VMM). Boots and passes
the KVM unit tests under normal conditions as well.

SVM support and device assignment are untested with this feature
enabled, but testing for both is in the works.

Compiles for ARM/x86/PPC.

Signed-off-by: Steve Rutherford <srutherford@google.com>
---
 Documentation/virtual/kvm/api.txt |  9 +++++++
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/irq.c                |  6 ++++-
 arch/x86/kvm/lapic.c              |  7 ++++++
 arch/x86/kvm/lapic.h              |  2 ++
 arch/x86/kvm/x86.c                | 53 ++++++++++++++++++++++++++++++++++++---
 include/uapi/linux/kvm.h          |  6 +++++
 7 files changed, 80 insertions(+), 4 deletions(-)

Comments

Jan Kiszka May 13, 2015, 6:12 a.m. UTC | #1
On 2015-05-13 03:47, Steve Rutherford wrote:
> In order to enable userspace PIC support, the userspace PIC needs to
> be able to inject local interrupt requests.
> 
> This adds the ioctl KVM_REQUEST_LOCAL_INTERRUPT and kvm exit
> KVM_EXIT_GET_EXTINT.
> 
> The vm ioctl KVM_REQUEST_LOCAL_INTERRUPT makes a KVM_REQ_EVENT request
> on the BSP, which causes the BSP to exit to userspace to fetch the
> vector of the underlying external interrupt, which the BSP then
> injects into the guest. This matches the PIC spec, and is necessary to
> boot Windows.

The API name seems too generic, given the fairly specific use case. As
it only allows to kick the BSP, not any VCPU, that should be clarified.
Maybe call it KVM_REQUEST_PIC_INJECTION or so. Or allow userspace to
specify the target VCPU, then it's a bit more generic again.

Actually, when looking at the MultiProcessor spec, you will find
multiple models for injecting PIC interrupts into CPU APICs. Just in the
PIC Mode, the BSP is the only possible target. In the other modes, all
APICs can receive PIC interrupts, and either the IOAPIC or the local
APIC's LINT0 configuration decide about the effective target. We should
allow to model all modes, based on userspace decisions.

> 
> Boots and passes the KVM unit tests on intel x86 with the
> PIC/PIT/IOAPIC in userspace (under a non-QEMU VMM). Boots and passes
> the KVM unit tests under normal conditions as well.

Do you plan to provide a reference implementation for an open source
userspace VMM as well, once the kernel side is settled?

> 
> SVM support and device assignment are untested with this feature
> enabled, but testing for both is in the works.
> 
> Compiles for ARM/x86/PPC.
> 
> Signed-off-by: Steve Rutherford <srutherford@google.com>
> ---
>  Documentation/virtual/kvm/api.txt |  9 +++++++
>  arch/x86/include/asm/kvm_host.h   |  1 +
>  arch/x86/kvm/irq.c                |  6 ++++-
>  arch/x86/kvm/lapic.c              |  7 ++++++
>  arch/x86/kvm/lapic.h              |  2 ++
>  arch/x86/kvm/x86.c                | 53 ++++++++++++++++++++++++++++++++++++---
>  include/uapi/linux/kvm.h          |  6 +++++
>  7 files changed, 80 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index dd92996..a650321 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2993,6 +2993,15 @@ the ioapic nor the pic in the kernel. Also, enables in kernel routing of
>  interrupt requests. Fails if VCPU has already been created, or if the irqchip is
>  already in the kernel.
>  
> +4.97 KVM_REQUEST_LOCAL_INTERRUPT
> +
> +Capability: KVM_CAP_SPLIT_IRQCHIP
> +Type: VM ioctl
> +Parameters: none
> +Returns: 0 on success, -1 on error.
> +
> +Informs the kernel that userspace has a pending external interrupt.
> +
>  
>  5. The kvm_run structure
>  ------------------------
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b1978f1..602ea70 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -542,6 +542,7 @@ struct kvm_vcpu_arch {
>  
>  	u64 eoi_exit_bitmaps[4];
>  	int pending_ioapic_eoi;
> +	bool pending_external;
>  };
>  
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> index 706e47a..487b5f5 100644
> --- a/arch/x86/kvm/irq.c
> +++ b/arch/x86/kvm/irq.c
> @@ -43,7 +43,11 @@ EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
>   */
>  static int kvm_cpu_has_extint(struct kvm_vcpu *v)
>  {
> -	if (kvm_apic_accept_pic_intr(v))
> +	u8 accept = kvm_apic_accept_pic_intr(v);
> +
> +	if (accept && irqchip_split(v->kvm))
> +		return v->arch.pending_external;
> +	else if (accept)
>  		return pic_irqchip(v->kvm)->output;	/* PIC */
>  	else
>  		return 0;
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 7533b87..9a021f7 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2089,3 +2089,10 @@ void kvm_lapic_init(void)
>  	jump_label_rate_limit(&apic_hw_disabled, HZ);
>  	jump_label_rate_limit(&apic_sw_disabled, HZ);
>  }
> +
> +void kvm_request_local_interrupt(struct kvm_vcpu *vcpu)
> +{
> +	vcpu->arch.pending_external = true;
> +	kvm_make_request(KVM_REQ_EVENT, vcpu);
> +	kvm_vcpu_kick(vcpu);
> +}
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 71b150c..66bb780 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -63,6 +63,8 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
>  		unsigned long *dest_map);
>  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
>  
> +void kvm_request_local_interrupt(struct kvm_vcpu *vcpu);
> +
>  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
>  		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 6127fe7..b7ceff3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -65,6 +65,8 @@
>  #include <asm/pvclock.h>
>  #include <asm/div64.h>
>  
> +#define GET_VECTOR_FROM_USERSPACE 1
> +
>  #define MAX_IO_MSRS 256
>  #define KVM_MAX_MCE_BANKS 32
>  #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
> @@ -4146,6 +4148,30 @@ split_irqchip_unlock:
>  		mutex_unlock(&kvm->lock);
>  		break;
>  	}
> +	case KVM_REQUEST_LOCAL_INTERRUPT: {
> +		int i;
> +		struct kvm_vcpu *vcpu;
> +		struct kvm_vcpu *bsp_vcpu = NULL;
> +
> +		mutex_lock(&kvm->lock);
> +		r = -EEXIST;

Nitpicking, EEXIST seems confusing. I would go for EINVAL here as well.

> +		if (!irqchip_split(kvm))
> +			goto out;
> +		kvm_for_each_vcpu(i, vcpu, kvm) {
> +			if (kvm_vcpu_is_reset_bsp(vcpu)) {
> +				bsp_vcpu = vcpu;
> +				continue;
> +			}
> +		}
> +		r = -EINVAL;
> +		if (bsp_vcpu == NULL)
> +			goto interrupt_unlock;
> +		kvm_request_local_interrupt(bsp_vcpu);
> +		r = 0;
> +interrupt_unlock:
> +		mutex_unlock(&kvm->lock);
> +		break;
> +	}
>  
>  	default:
>  		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
> @@ -6123,6 +6149,16 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
>  	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
>  }
>  
> +static int pending_userspace_external_intr(struct kvm_vcpu *vcpu)
> +{
> +	if (vcpu->arch.pending_external && kvm_apic_accept_pic_intr(vcpu)) {
> +		vcpu->arch.pending_external = false;
> +		return true;
> +	} else
> +		return false;
> +
> +}
> +
>  static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
>  {
>  	int r;
> @@ -6187,7 +6223,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
>  				return r;
>  		}
>  		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
> -			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> +			if (irqchip_split(vcpu->kvm) &&
> +			    pending_userspace_external_intr(vcpu)) {
> +				return GET_VECTOR_FROM_USERSPACE;
> +			}
> +			kvm_queue_interrupt(vcpu,
> +					    kvm_cpu_get_interrupt(vcpu),
>  					    false);
>  			kvm_x86_ops->set_irq(vcpu);
>  		}
> @@ -6351,13 +6392,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	}
>  
>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> +		int event;
>  		kvm_apic_accept_events(vcpu);
>  		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
>  			r = 1;
>  			goto out;
>  		}
> -
> -		if (inject_pending_event(vcpu, req_int_win) != 0)
> +		event = inject_pending_event(vcpu, req_int_win);
> +		if (event == GET_VECTOR_FROM_USERSPACE) {
> +			vcpu->run->exit_reason = KVM_EXIT_GET_EXTINT;
> +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> +			r = 0;
> +			goto out;
> +		} else if (event != 0)
>  			req_immediate_exit = true;
>  		/* enable NMI/IRQ window open exits if needed */
>  		else if (vcpu->arch.nmi_pending)
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 2305948..368e80a 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -184,6 +184,7 @@ struct kvm_s390_skeys {
>  #define KVM_EXIT_SYSTEM_EVENT     24
>  #define KVM_EXIT_S390_STSI        25
>  #define KVM_EXIT_IOAPIC_EOI       26
> +#define KVM_EXIT_GET_EXTINT       27
>  
>  /* For KVM_EXIT_INTERNAL_ERROR */
>  /* Emulate instruction failed. */
> @@ -334,6 +335,10 @@ struct kvm_run {
>  		struct {
>  			__u8 vector;
>  		} eoi;
> +		/* KVM_EXIT_GET_EXTINT */
> +		struct {
> +			__u8 vector;
> +		} extint;
>  		/* Fix the size of the union. */
>  		char padding[256];
>  	};
> @@ -1208,6 +1213,7 @@ struct kvm_s390_ucas_mapping {
>  #define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
>  /* Available with KVM_CAP_SPLIT_IRQCHIP */
>  #define KVM_CREATE_SPLIT_IRQCHIP  _IO(KVMIO, 0xb7)
> +#define KVM_REQUEST_LOCAL_INTERRUPT _IO(KVMIO, 0xb8)
>  
>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
>  #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
> 

Jan
Paolo Bonzini May 13, 2015, 7:22 a.m. UTC | #2
On 13/05/2015 03:47, Steve Rutherford wrote:
> In order to enable userspace PIC support, the userspace PIC needs to
> be able to inject local interrupt requests.
> 
> This adds the ioctl KVM_REQUEST_LOCAL_INTERRUPT and kvm exit
> KVM_EXIT_GET_EXTINT.
> 
> The vm ioctl KVM_REQUEST_LOCAL_INTERRUPT makes a KVM_REQ_EVENT request
> on the BSP, which causes the BSP to exit to userspace to fetch the
> vector of the underlying external interrupt, which the BSP then
> injects into the guest. This matches the PIC spec, and is necessary to
> boot Windows.
> 
> Boots and passes the KVM unit tests on intel x86 with the
> PIC/PIT/IOAPIC in userspace (under a non-QEMU VMM). Boots and passes
> the KVM unit tests under normal conditions as well.
> 
> SVM support and device assignment are untested with this feature
> enabled, but testing for both is in the works.
> 
> Compiles for ARM/x86/PPC.

This may be ENOCOFFEE, but where is extint.vector read?

Could you use KVM_INTERRUPT to set KVM_REQUEST_LOCAL_INTERRUPT _and_ the
interrupt vector at the same time?  This is exactly the logic that is
used for !irqchip, and it should work for the EXTINT case as well.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Rutherford May 13, 2015, 10:41 p.m. UTC | #3
On Wed, May 13, 2015 at 08:12:59AM +0200, Jan Kiszka wrote:
> On 2015-05-13 03:47, Steve Rutherford wrote:
> > In order to enable userspace PIC support, the userspace PIC needs to
> > be able to inject local interrupt requests.
> > 
> > This adds the ioctl KVM_REQUEST_LOCAL_INTERRUPT and kvm exit
> > KVM_EXIT_GET_EXTINT.
> > 
> > The vm ioctl KVM_REQUEST_LOCAL_INTERRUPT makes a KVM_REQ_EVENT request
> > on the BSP, which causes the BSP to exit to userspace to fetch the
> > vector of the underlying external interrupt, which the BSP then
> > injects into the guest. This matches the PIC spec, and is necessary to
> > boot Windows.
> 
> The API name seems too generic, given the fairly specific use case. As
> it only allows to kick the BSP, not any VCPU, that should be clarified.
> Maybe call it KVM_REQUEST_PIC_INJECTION or so. Or allow userspace to
> specify the target VCPU, then it's a bit more generic again.
> 
> Actually, when looking at the MultiProcessor spec, you will find
> multiple models for injecting PIC interrupts into CPU APICs. Just in the
> PIC Mode, the BSP is the only possible target. In the other modes, all
> APICs can receive PIC interrupts, and either the IOAPIC or the local
> APIC's LINT0 configuration decide about the effective target. We should
> allow to model all modes, based on userspace decisions.
> 

Supporting the other modes seems reasonable, but I'm not certain I have an outlet for testing them for correctness. I'm not even certain which OSes use the other modes. Unless there is a common OS that uses the other modes, and a straightforward way for me to test the other modes, I'd rather just rename the API to be less generic.

> > 
> > Boots and passes the KVM unit tests on intel x86 with the
> > PIC/PIT/IOAPIC in userspace (under a non-QEMU VMM). Boots and passes
> > the KVM unit tests under normal conditions as well.
> 
> Do you plan to provide a reference implementation for an open source
> userspace VMM as well, once the kernel side is settled?
> 

Not at the moment (given that I'm not all that familiar with QEMU). I'm definitely willing to help guide someone else through the process.

Steve
> > 
> > SVM support and device assignment are untested with this feature
> > enabled, but testing for both is in the works.
> > 
> > Compiles for ARM/x86/PPC.
> > 
> > Signed-off-by: Steve Rutherford <srutherford@google.com>
> > ---
> >  Documentation/virtual/kvm/api.txt |  9 +++++++
> >  arch/x86/include/asm/kvm_host.h   |  1 +
> >  arch/x86/kvm/irq.c                |  6 ++++-
> >  arch/x86/kvm/lapic.c              |  7 ++++++
> >  arch/x86/kvm/lapic.h              |  2 ++
> >  arch/x86/kvm/x86.c                | 53 ++++++++++++++++++++++++++++++++++++---
> >  include/uapi/linux/kvm.h          |  6 +++++
> >  7 files changed, 80 insertions(+), 4 deletions(-)
> > 
> > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> > index dd92996..a650321 100644
> > --- a/Documentation/virtual/kvm/api.txt
> > +++ b/Documentation/virtual/kvm/api.txt
> > @@ -2993,6 +2993,15 @@ the ioapic nor the pic in the kernel. Also, enables in kernel routing of
> >  interrupt requests. Fails if VCPU has already been created, or if the irqchip is
> >  already in the kernel.
> >  
> > +4.97 KVM_REQUEST_LOCAL_INTERRUPT
> > +
> > +Capability: KVM_CAP_SPLIT_IRQCHIP
> > +Type: VM ioctl
> > +Parameters: none
> > +Returns: 0 on success, -1 on error.
> > +
> > +Informs the kernel that userspace has a pending external interrupt.
> > +
> >  
> >  5. The kvm_run structure
> >  ------------------------
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index b1978f1..602ea70 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -542,6 +542,7 @@ struct kvm_vcpu_arch {
> >  
> >  	u64 eoi_exit_bitmaps[4];
> >  	int pending_ioapic_eoi;
> > +	bool pending_external;
> >  };
> >  
> >  struct kvm_lpage_info {
> > diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> > index 706e47a..487b5f5 100644
> > --- a/arch/x86/kvm/irq.c
> > +++ b/arch/x86/kvm/irq.c
> > @@ -43,7 +43,11 @@ EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
> >   */
> >  static int kvm_cpu_has_extint(struct kvm_vcpu *v)
> >  {
> > -	if (kvm_apic_accept_pic_intr(v))
> > +	u8 accept = kvm_apic_accept_pic_intr(v);
> > +
> > +	if (accept && irqchip_split(v->kvm))
> > +		return v->arch.pending_external;
> > +	else if (accept)
> >  		return pic_irqchip(v->kvm)->output;	/* PIC */
> >  	else
> >  		return 0;
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 7533b87..9a021f7 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -2089,3 +2089,10 @@ void kvm_lapic_init(void)
> >  	jump_label_rate_limit(&apic_hw_disabled, HZ);
> >  	jump_label_rate_limit(&apic_sw_disabled, HZ);
> >  }
> > +
> > +void kvm_request_local_interrupt(struct kvm_vcpu *vcpu)
> > +{
> > +	vcpu->arch.pending_external = true;
> > +	kvm_make_request(KVM_REQ_EVENT, vcpu);
> > +	kvm_vcpu_kick(vcpu);
> > +}
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index 71b150c..66bb780 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -63,6 +63,8 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
> >  		unsigned long *dest_map);
> >  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
> >  
> > +void kvm_request_local_interrupt(struct kvm_vcpu *vcpu);
> > +
> >  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> >  		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
> >  
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 6127fe7..b7ceff3 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -65,6 +65,8 @@
> >  #include <asm/pvclock.h>
> >  #include <asm/div64.h>
> >  
> > +#define GET_VECTOR_FROM_USERSPACE 1
> > +
> >  #define MAX_IO_MSRS 256
> >  #define KVM_MAX_MCE_BANKS 32
> >  #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
> > @@ -4146,6 +4148,30 @@ split_irqchip_unlock:
> >  		mutex_unlock(&kvm->lock);
> >  		break;
> >  	}
> > +	case KVM_REQUEST_LOCAL_INTERRUPT: {
> > +		int i;
> > +		struct kvm_vcpu *vcpu;
> > +		struct kvm_vcpu *bsp_vcpu = NULL;
> > +
> > +		mutex_lock(&kvm->lock);
> > +		r = -EEXIST;
> 
> Nitpicking, EEXIST seems confusing. I would go for EINVAL here as well.
> 
> > +		if (!irqchip_split(kvm))
> > +			goto out;
> > +		kvm_for_each_vcpu(i, vcpu, kvm) {
> > +			if (kvm_vcpu_is_reset_bsp(vcpu)) {
> > +				bsp_vcpu = vcpu;
> > +				continue;
> > +			}
> > +		}
> > +		r = -EINVAL;
> > +		if (bsp_vcpu == NULL)
> > +			goto interrupt_unlock;
> > +		kvm_request_local_interrupt(bsp_vcpu);
> > +		r = 0;
> > +interrupt_unlock:
> > +		mutex_unlock(&kvm->lock);
> > +		break;
> > +	}
> >  
> >  	default:
> >  		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
> > @@ -6123,6 +6149,16 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
> >  	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
> >  }
> >  
> > +static int pending_userspace_external_intr(struct kvm_vcpu *vcpu)
> > +{
> > +	if (vcpu->arch.pending_external && kvm_apic_accept_pic_intr(vcpu)) {
> > +		vcpu->arch.pending_external = false;
> > +		return true;
> > +	} else
> > +		return false;
> > +
> > +}
> > +
> >  static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
> >  {
> >  	int r;
> > @@ -6187,7 +6223,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
> >  				return r;
> >  		}
> >  		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
> > -			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> > +			if (irqchip_split(vcpu->kvm) &&
> > +			    pending_userspace_external_intr(vcpu)) {
> > +				return GET_VECTOR_FROM_USERSPACE;
> > +			}
> > +			kvm_queue_interrupt(vcpu,
> > +					    kvm_cpu_get_interrupt(vcpu),
> >  					    false);
> >  			kvm_x86_ops->set_irq(vcpu);
> >  		}
> > @@ -6351,13 +6392,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >  	}
> >  
> >  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> > +		int event;
> >  		kvm_apic_accept_events(vcpu);
> >  		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
> >  			r = 1;
> >  			goto out;
> >  		}
> > -
> > -		if (inject_pending_event(vcpu, req_int_win) != 0)
> > +		event = inject_pending_event(vcpu, req_int_win);
> > +		if (event == GET_VECTOR_FROM_USERSPACE) {
> > +			vcpu->run->exit_reason = KVM_EXIT_GET_EXTINT;
> > +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> > +			r = 0;
> > +			goto out;
> > +		} else if (event != 0)
> >  			req_immediate_exit = true;
> >  		/* enable NMI/IRQ window open exits if needed */
> >  		else if (vcpu->arch.nmi_pending)
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 2305948..368e80a 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -184,6 +184,7 @@ struct kvm_s390_skeys {
> >  #define KVM_EXIT_SYSTEM_EVENT     24
> >  #define KVM_EXIT_S390_STSI        25
> >  #define KVM_EXIT_IOAPIC_EOI       26
> > +#define KVM_EXIT_GET_EXTINT       27
> >  
> >  /* For KVM_EXIT_INTERNAL_ERROR */
> >  /* Emulate instruction failed. */
> > @@ -334,6 +335,10 @@ struct kvm_run {
> >  		struct {
> >  			__u8 vector;
> >  		} eoi;
> > +		/* KVM_EXIT_GET_EXTINT */
> > +		struct {
> > +			__u8 vector;
> > +		} extint;
> >  		/* Fix the size of the union. */
> >  		char padding[256];
> >  	};
> > @@ -1208,6 +1213,7 @@ struct kvm_s390_ucas_mapping {
> >  #define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
> >  /* Available with KVM_CAP_SPLIT_IRQCHIP */
> >  #define KVM_CREATE_SPLIT_IRQCHIP  _IO(KVMIO, 0xb7)
> > +#define KVM_REQUEST_LOCAL_INTERRUPT _IO(KVMIO, 0xb8)
> >  
> >  #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
> >  #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
> > 
> 
> Jan
> 
> -- 
> Siemens AG, Corporate Technology, CT RTC ITP SES-DE
> Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Rutherford May 13, 2015, 11:13 p.m. UTC | #4
On Wed, May 13, 2015 at 09:22:21AM +0200, Paolo Bonzini wrote:
> 
> 
> On 13/05/2015 03:47, Steve Rutherford wrote:
> > In order to enable userspace PIC support, the userspace PIC needs to
> > be able to inject local interrupt requests.
> > 
> > This adds the ioctl KVM_REQUEST_LOCAL_INTERRUPT and kvm exit
> > KVM_EXIT_GET_EXTINT.
> > 
> > The vm ioctl KVM_REQUEST_LOCAL_INTERRUPT makes a KVM_REQ_EVENT request
> > on the BSP, which causes the BSP to exit to userspace to fetch the
> > vector of the underlying external interrupt, which the BSP then
> > injects into the guest. This matches the PIC spec, and is necessary to
> > boot Windows.
> > 
> > Boots and passes the KVM unit tests on intel x86 with the
> > PIC/PIT/IOAPIC in userspace (under a non-QEMU VMM). Boots and passes
> > the KVM unit tests under normal conditions as well.
> > 
> > SVM support and device assignment are untested with this feature
> > enabled, but testing for both is in the works.
> > 
> > Compiles for ARM/x86/PPC.
> 
> This may be ENOCOFFEE, but where is extint.vector read?

Uhhh. It's not. Something's wrong here. Looks like I dropped some lines in porting these patches. Somehow this still passes the KVM unit tests o.O

I'll track down the error. 

Steve
> 
> Could you use KVM_INTERRUPT to set KVM_REQUEST_LOCAL_INTERRUPT _and_ the
> interrupt vector at the same time?  This is exactly the logic that is
> used for !irqchip, and it should work for the EXTINT case as well.
> 
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka May 15, 2015, 1:27 p.m. UTC | #5
On 2015-05-14 00:41, Steve Rutherford wrote:
> On Wed, May 13, 2015 at 08:12:59AM +0200, Jan Kiszka wrote:
>> On 2015-05-13 03:47, Steve Rutherford wrote:
>>> In order to enable userspace PIC support, the userspace PIC needs to
>>> be able to inject local interrupt requests.
>>>
>>> This adds the ioctl KVM_REQUEST_LOCAL_INTERRUPT and kvm exit
>>> KVM_EXIT_GET_EXTINT.
>>>
>>> The vm ioctl KVM_REQUEST_LOCAL_INTERRUPT makes a KVM_REQ_EVENT request
>>> on the BSP, which causes the BSP to exit to userspace to fetch the
>>> vector of the underlying external interrupt, which the BSP then
>>> injects into the guest. This matches the PIC spec, and is necessary to
>>> boot Windows.
>>
>> The API name seems too generic, given the fairly specific use case. As
>> it only allows to kick the BSP, not any VCPU, that should be clarified.
>> Maybe call it KVM_REQUEST_PIC_INJECTION or so. Or allow userspace to
>> specify the target VCPU, then it's a bit more generic again.
>>
>> Actually, when looking at the MultiProcessor spec, you will find
>> multiple models for injecting PIC interrupts into CPU APICs. Just in the
>> PIC Mode, the BSP is the only possible target. In the other modes, all
>> APICs can receive PIC interrupts, and either the IOAPIC or the local
>> APIC's LINT0 configuration decide about the effective target. We should
>> allow to model all modes, based on userspace decisions.
>>
> 
> Supporting the other modes seems reasonable, but I'm not certain I have an outlet for testing them for correctness. I'm not even certain which OSes use the other modes. Unless there is a common OS that uses the other modes, and a straightforward way for me to test the other modes, I'd rather just rename the API to be less generic.

The OS has to configure what the hardware provides, I bet Windows does
so as well. This is a hardware property, thus something userspace (QEMU
& Co.) may want to configure.

> 
>>>
>>> Boots and passes the KVM unit tests on intel x86 with the
>>> PIC/PIT/IOAPIC in userspace (under a non-QEMU VMM). Boots and passes
>>> the KVM unit tests under normal conditions as well.
>>
>> Do you plan to provide a reference implementation for an open source
>> userspace VMM as well, once the kernel side is settled?
>>
> 
> Not at the moment (given that I'm not all that familiar with QEMU). I'm definitely willing to help guide someone else through the process.

It would be fairly valuable to have this tested against a known, public
machine emulator so that we can validate all needs before setting the
new kernel ABI in stone.

I do have an interest in this API as well, sitting on IRQ remapping
hacks and their lacking x2APIC support for too long, but I unfortunately
can't promise bandwidth either. :/

Jan

Patch
diff mbox

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index dd92996..a650321 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2993,6 +2993,15 @@  the ioapic nor the pic in the kernel. Also, enables in kernel routing of
 interrupt requests. Fails if VCPU has already been created, or if the irqchip is
 already in the kernel.
 
+4.97 KVM_REQUEST_LOCAL_INTERRUPT
+
+Capability: KVM_CAP_SPLIT_IRQCHIP
+Type: VM ioctl
+Parameters: none
+Returns: 0 on success, -1 on error.
+
+Informs the kernel that userspace has a pending external interrupt.
+
 
 5. The kvm_run structure
 ------------------------
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b1978f1..602ea70 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -542,6 +542,7 @@  struct kvm_vcpu_arch {
 
 	u64 eoi_exit_bitmaps[4];
 	int pending_ioapic_eoi;
+	bool pending_external;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 706e47a..487b5f5 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -43,7 +43,11 @@  EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
  */
 static int kvm_cpu_has_extint(struct kvm_vcpu *v)
 {
-	if (kvm_apic_accept_pic_intr(v))
+	u8 accept = kvm_apic_accept_pic_intr(v);
+
+	if (accept && irqchip_split(v->kvm))
+		return v->arch.pending_external;
+	else if (accept)
 		return pic_irqchip(v->kvm)->output;	/* PIC */
 	else
 		return 0;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 7533b87..9a021f7 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2089,3 +2089,10 @@  void kvm_lapic_init(void)
 	jump_label_rate_limit(&apic_hw_disabled, HZ);
 	jump_label_rate_limit(&apic_sw_disabled, HZ);
 }
+
+void kvm_request_local_interrupt(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pending_external = true;
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+	kvm_vcpu_kick(vcpu);
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 71b150c..66bb780 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -63,6 +63,8 @@  int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 		unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
+void kvm_request_local_interrupt(struct kvm_vcpu *vcpu);
+
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6127fe7..b7ceff3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -65,6 +65,8 @@ 
 #include <asm/pvclock.h>
 #include <asm/div64.h>
 
+#define GET_VECTOR_FROM_USERSPACE 1
+
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
@@ -4146,6 +4148,30 @@  split_irqchip_unlock:
 		mutex_unlock(&kvm->lock);
 		break;
 	}
+	case KVM_REQUEST_LOCAL_INTERRUPT: {
+		int i;
+		struct kvm_vcpu *vcpu;
+		struct kvm_vcpu *bsp_vcpu = NULL;
+
+		mutex_lock(&kvm->lock);
+		r = -EEXIST;
+		if (!irqchip_split(kvm))
+			goto out;
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			if (kvm_vcpu_is_reset_bsp(vcpu)) {
+				bsp_vcpu = vcpu;
+				continue;
+			}
+		}
+		r = -EINVAL;
+		if (bsp_vcpu == NULL)
+			goto interrupt_unlock;
+		kvm_request_local_interrupt(bsp_vcpu);
+		r = 0;
+interrupt_unlock:
+		mutex_unlock(&kvm->lock);
+		break;
+	}
 
 	default:
 		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
@@ -6123,6 +6149,16 @@  static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
 }
 
+static int pending_userspace_external_intr(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.pending_external && kvm_apic_accept_pic_intr(vcpu)) {
+		vcpu->arch.pending_external = false;
+		return true;
+	} else
+		return false;
+
+}
+
 static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 {
 	int r;
@@ -6187,7 +6223,12 @@  static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 				return r;
 		}
 		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
+			if (irqchip_split(vcpu->kvm) &&
+			    pending_userspace_external_intr(vcpu)) {
+				return GET_VECTOR_FROM_USERSPACE;
+			}
+			kvm_queue_interrupt(vcpu,
+					    kvm_cpu_get_interrupt(vcpu),
 					    false);
 			kvm_x86_ops->set_irq(vcpu);
 		}
@@ -6351,13 +6392,19 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
+		int event;
 		kvm_apic_accept_events(vcpu);
 		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 			r = 1;
 			goto out;
 		}
-
-		if (inject_pending_event(vcpu, req_int_win) != 0)
+		event = inject_pending_event(vcpu, req_int_win);
+		if (event == GET_VECTOR_FROM_USERSPACE) {
+			vcpu->run->exit_reason = KVM_EXIT_GET_EXTINT;
+			kvm_make_request(KVM_REQ_EVENT, vcpu);
+			r = 0;
+			goto out;
+		} else if (event != 0)
 			req_immediate_exit = true;
 		/* enable NMI/IRQ window open exits if needed */
 		else if (vcpu->arch.nmi_pending)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 2305948..368e80a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -184,6 +184,7 @@  struct kvm_s390_skeys {
 #define KVM_EXIT_SYSTEM_EVENT     24
 #define KVM_EXIT_S390_STSI        25
 #define KVM_EXIT_IOAPIC_EOI       26
+#define KVM_EXIT_GET_EXTINT       27
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -334,6 +335,10 @@  struct kvm_run {
 		struct {
 			__u8 vector;
 		} eoi;
+		/* KVM_EXIT_GET_EXTINT */
+		struct {
+			__u8 vector;
+		} extint;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -1208,6 +1213,7 @@  struct kvm_s390_ucas_mapping {
 #define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
 /* Available with KVM_CAP_SPLIT_IRQCHIP */
 #define KVM_CREATE_SPLIT_IRQCHIP  _IO(KVMIO, 0xb7)
+#define KVM_REQUEST_LOCAL_INTERRUPT _IO(KVMIO, 0xb8)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)