diff mbox

[v3,4/4] KVM: x86: Add support for local interrupt requests from userspace

Message ID 1433289107-20638-4-git-send-email-srutherford@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

Steve Rutherford June 2, 2015, 11:51 p.m. UTC
In order to enable userspace PIC support, the userspace PIC needs to
be able to inject local interrupt requests.

This adds the ioctl KVM_REQUEST_PIC_INJECTION and kvm exit
KVM_EXIT_GET_EXTINT.

The vm ioctl KVM_REQUEST_PIC_INJECTION makes a KVM_REQ_EVENT request
on the BSP, which causes the BSP to exit to userspace to fetch the
vector of the underlying external interrupt, which the BSP then
injects into the guest. This matches the PIC spec, and is necessary to
boot Windows.

Compiles for x86.

Update: Boots Windows and passes the KVM Unit Tests.

Signed-off-by: Steve Rutherford <srutherford@google.com>
---
 Documentation/virtual/kvm/api.txt |  9 ++++++
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/irq.c                | 22 +++++++++++++--
 arch/x86/kvm/lapic.c              |  7 +++++
 arch/x86/kvm/lapic.h              |  2 ++
 arch/x86/kvm/x86.c                | 59 +++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/kvm.h          |  7 +++++
 7 files changed, 103 insertions(+), 5 deletions(-)

Comments

Paolo Bonzini June 3, 2015, 9:38 a.m. UTC | #1
On 03/06/2015 01:51, Steve Rutherford wrote:
> In order to enable userspace PIC support, the userspace PIC needs to
> be able to inject local interrupt requests.
> 
> This adds the ioctl KVM_REQUEST_PIC_INJECTION and kvm exit
> KVM_EXIT_GET_EXTINT.
> 
> The vm ioctl KVM_REQUEST_PIC_INJECTION makes a KVM_REQ_EVENT request
> on the BSP, which causes the BSP to exit to userspace to fetch the
> vector of the underlying external interrupt, which the BSP then
> injects into the guest. This matches the PIC spec, and is necessary to
> boot Windows.
> 
> Compiles for x86.
> 
> Update: Boots Windows and passes the KVM Unit Tests.
> 
> Signed-off-by: Steve Rutherford <srutherford@google.com>
> ---
>  Documentation/virtual/kvm/api.txt |  9 ++++++
>  arch/x86/include/asm/kvm_host.h   |  2 ++
>  arch/x86/kvm/irq.c                | 22 +++++++++++++--
>  arch/x86/kvm/lapic.c              |  7 +++++
>  arch/x86/kvm/lapic.h              |  2 ++
>  arch/x86/kvm/x86.c                | 59 +++++++++++++++++++++++++++++++++++++--
>  include/uapi/linux/kvm.h          |  7 +++++
>  7 files changed, 103 insertions(+), 5 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 6ab2a3f7..b5d90cb 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2979,6 +2979,15 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
>  and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
>  which is the maximum number of possibly pending cpu-local interrupts.
>  
> +4.96 KVM_REQUEST_PIC_INJECTION
> +
> +Capability: KVM_CAP_SPLIT_IRQCHIP
> +Type: VM ioctl
> +Parameters: none
> +Returns: 0 on success, -1 on error.
> +
> +Informs the kernel that userspace has a pending external interrupt.
> +

Missing documentation for the new vmexit and kvm_run member.

However, why is the roundtrip to userspace necessary?  Could you pass
the extint index directly as an argument to KVM_INTERRUPT?  It's
backwards-compatible, because KVM_INTERRUPT so far could not be used
together with an in-kernel LAPIC.  If you could do that, you could also
avoid the new userspace_extint_available field.

Userspace can figure out who's the BSP.  The rendez-vous between the
irqchip and the BSP's VCPU thread is still needed, but it can be done
entirely in userspace.

You'd also need much fewer changes to irq.c.  Basically just something like

 int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 {
         int vector;

-        if (!irqchip_in_kernel(v->kvm))
+        if (!pic_in_kernel(v->kvm) && v->arch.interrupt.pending)
                return v->arch.interrupt.nr;

...

 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 {
-        if (!irqchip_in_kernel(v->kvm))
+        if (!pic_in_kernel(v->kvm))
                 return v->arch.interrupt.pending;

...

 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
-        if (!irqchip_in_kernel(v->kvm))
+        if (!pic_in_kernel(v->kvm))
                 return v->arch.interrupt.pending;

More comments below.

>  5. The kvm_run structure
>  ------------------------
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 4f439ff..0e8b0fc 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -543,6 +543,8 @@ struct kvm_vcpu_arch {
>  
>  	u64 eoi_exit_bitmaps[4];
>  	int pending_ioapic_eoi;
> +	bool userspace_extint_available;
> +	int pending_external_vector;
>  };
>  

>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> index 706e47a..1270b2a 100644
> --- a/arch/x86/kvm/irq.c
> +++ b/arch/x86/kvm/irq.c
> @@ -38,12 +38,25 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
>  EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
>  
>  /*
> + * check if there is a pending userspace external interrupt
> + */
> +static int pending_userspace_extint(struct kvm_vcpu *v)
> +{
> +	return v->arch.userspace_extint_available ||
> +	       v->arch.pending_external_vector != -1;
> +}
> +
> +/*
>   * check if there is pending interrupt from
>   * non-APIC source without intack.
>   */
>  static int kvm_cpu_has_extint(struct kvm_vcpu *v)
>  {
> -	if (kvm_apic_accept_pic_intr(v))
> +	u8 accept = kvm_apic_accept_pic_intr(v);
> +
> +	if (accept && irqchip_split(v->kvm))
> +		return pending_userspace_extint(v);
> +	else if (accept)
>  		return pic_irqchip(v->kvm)->output;	/* PIC */

	if (accept) {
		if (irqchip_split(v->kvm))
			return pending_userspace_extint(v);
		else
			return pic_irqchip(v->kvm)->output;
	}

	return 0;

>  	else
>  		return 0;
> @@ -91,7 +104,12 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
>   */
>  static int kvm_cpu_get_extint(struct kvm_vcpu *v)
>  {
> -	if (kvm_cpu_has_extint(v))
> +	if (irqchip_split(v->kvm) && kvm_cpu_has_extint(v)) {
> +		int vector = v->arch.pending_external_vector;
> +
> +		v->arch.pending_external_vector = -1;
> +		return vector;
> +	} else if (kvm_cpu_has_extint(v))
>  		return kvm_pic_read_irq(v->kvm); /* PIC */

Same as above.

>  	return -1;
>  }
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 766d297..012b56ee 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2094,3 +2094,10 @@ void kvm_lapic_init(void)
>  	jump_label_rate_limit(&apic_hw_disabled, HZ);
>  	jump_label_rate_limit(&apic_sw_disabled, HZ);
>  }
> +
> +void kvm_request_pic_injection(struct kvm_vcpu *vcpu)
> +{
> +	vcpu->arch.userspace_extint_available = true;
> +	kvm_make_request(KVM_REQ_EVENT, vcpu);
> +	kvm_vcpu_kick(vcpu);
> +}
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 71b150c..7831e4d 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -63,6 +63,8 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
>  		unsigned long *dest_map);
>  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
>  
> +void kvm_request_pic_injection(struct kvm_vcpu *vcpu);
> +
>  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
>  		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 35d13d4..40e7509 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -65,6 +65,8 @@
>  #include <asm/pvclock.h>
>  #include <asm/div64.h>
>  
> +#define GET_VECTOR_FROM_USERSPACE 1
> +
>  #define MAX_IO_MSRS 256
>  #define KVM_MAX_MCE_BANKS 32
>  #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
> @@ -4217,6 +4219,30 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
>  		break;
>  	}
> +	case KVM_REQUEST_PIC_INJECTION: {
> +		int i;
> +		struct kvm_vcpu *vcpu;
> +		struct kvm_vcpu *bsp_vcpu = NULL;
> +
> +		mutex_lock(&kvm->lock);
> +		r = -EEXIST;
> +		if (!irqchip_split(kvm))
> +			goto out;
> +		kvm_for_each_vcpu(i, vcpu, kvm) {
> +			if (kvm_vcpu_is_reset_bsp(vcpu)) {
> +				bsp_vcpu = vcpu;
> +				continue;
> +			}
> +		}
> +		r = -EINVAL;
> +		if (bsp_vcpu == NULL)
> +			goto interrupt_unlock;
> +		kvm_request_pic_injection(bsp_vcpu);
> +		r = 0;
> +interrupt_unlock:
> +		mutex_unlock(&kvm->lock);
> +		break;
> +	}
>  
>  	default:
>  		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
> @@ -6194,6 +6220,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
>  	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
>  }
>  
> +static int must_fetch_userspace_extint(struct kvm_vcpu *vcpu)

I would rename this to kvm_accept_request_pic_injection.

Paolo

> +{
> +	if (vcpu->arch.userspace_extint_available &&
> +	    kvm_apic_accept_pic_intr(vcpu)) {
> +		vcpu->arch.userspace_extint_available = false;
> +		return true;
> +	} else
> +		return false;
> +
> +}
> +
>  static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
>  {
>  	int r;
> @@ -6258,7 +6295,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
>  				return r;
>  		}
>  		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
> -			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> +			if (irqchip_split(vcpu->kvm) &&
> +			    must_fetch_userspace_extint(vcpu)) {
> +				return GET_VECTOR_FROM_USERSPACE;
> +			}
> +			kvm_queue_interrupt(vcpu,
> +					    kvm_cpu_get_interrupt(vcpu),
>  					    false);
>  			kvm_x86_ops->set_irq(vcpu);
>  		}
> @@ -6424,13 +6466,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	}
>  
>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> +		int event;
>  		kvm_apic_accept_events(vcpu);
>  		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
>  			r = 1;
>  			goto out;
>  		}
> -
> -		if (inject_pending_event(vcpu, req_int_win) != 0)
> +		event = inject_pending_event(vcpu, req_int_win);
> +		if (event == GET_VECTOR_FROM_USERSPACE) {
> +			vcpu->run->exit_reason = KVM_EXIT_GET_EXTINT;
> +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> +			r = 0;
> +			goto out;
> +		} else if (event != 0)
>  			req_immediate_exit = true;
>  		/* enable NMI/IRQ window open exits if needed */
>  		else if (vcpu->arch.nmi_pending)
> @@ -6747,6 +6795,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  	if (vcpu->sigset_active)
>  		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
>  
> +	if (vcpu->run->exit_reason == KVM_EXIT_GET_EXTINT)
> +		vcpu->arch.pending_external_vector = vcpu->run->extint.vector;
> +
>  	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
>  		kvm_vcpu_block(vcpu);
>  		kvm_apic_accept_events(vcpu);
> @@ -7536,6 +7587,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>  	kvm_async_pf_hash_reset(vcpu);
>  	kvm_pmu_init(vcpu);
>  
> +	vcpu->arch.pending_external_vector = -1;
> +
>  	return 0;
>  fail_free_wbinvd_dirty_mask:
>  	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 826a08d..0cf7ed6 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -184,6 +184,7 @@ struct kvm_s390_skeys {
>  #define KVM_EXIT_SYSTEM_EVENT     24
>  #define KVM_EXIT_S390_STSI        25
>  #define KVM_EXIT_IOAPIC_EOI       26
> +#define KVM_EXIT_GET_EXTINT       27
>  
>  /* For KVM_EXIT_INTERNAL_ERROR */
>  /* Emulate instruction failed. */
> @@ -334,6 +335,10 @@ struct kvm_run {
>  		struct {
>  			__u8 vector;
>  		} eoi;
> +		/* KVM_EXIT_GET_EXTINT */
> +		struct {
> +			__u8 vector;
> +		} extint;
>  		/* Fix the size of the union. */
>  		char padding[256];
>  	};
> @@ -1206,6 +1211,8 @@ struct kvm_s390_ucas_mapping {
>  /* Available with KVM_CAP_S390_IRQ_STATE */
>  #define KVM_S390_SET_IRQ_STATE	  _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
>  #define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
> +/* Available with KVM_CAP_SPLIT_IRQCHIP */
> +#define KVM_REQUEST_PIC_INJECTION _IO(KVMIO, 0xb7)
>  
>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
>  #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Rutherford June 4, 2015, 8:21 p.m. UTC | #2
On Wed, Jun 03, 2015 at 11:38:21AM +0200, Paolo Bonzini wrote:
 
> However, why is the roundtrip to userspace necessary?  Could you pass
> the extint index directly as an argument to KVM_INTERRUPT?  It's
> backwards-compatible, because KVM_INTERRUPT so far could not be used
> together with an in-kernel LAPIC.  If you could do that, you could also
> avoid the new userspace_extint_available field.
This is possible, and definitely simpler, but not accurate to the spec.
In general, the PIC fires an INT, which leads to the CPU responding with
and INTA, and fetching the interrupt vector. It might not be strictly
necessary for this handshake to occur, but it is how the hardware did it
originally. 

In certain cases, having the interface modelled after the hardware is 
convenient. For example, devices can send external interrupt MSIs,
which require an Interrupt Ack to fetch the vector. They're a bit weird,
and I have absolutely no idea why someone would want these, but they are
a definitely a thing.

Looking back at KVM though, it doesn't look like KVM even supports these,
so this may not be a real issue. Eliding the roundtrip might be acceptable.
It's certainly simpler.

> Userspace can figure out who's the BSP.  The rendez-vous between the
> irqchip and the BSP's VCPU thread is still needed, but it can be done
> entirely in userspace.
Good point. I'll push this up into userspace. 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Rutherford June 20, 2015, 12:41 a.m. UTC | #3
On Wed, Jun 03, 2015 at 11:38:21AM +0200, Paolo Bonzini wrote:
> 
> 
> On 03/06/2015 01:51, Steve Rutherford wrote:
> > In order to enable userspace PIC support, the userspace PIC needs to
> > be able to inject local interrupt requests.
> > 
> > This adds the ioctl KVM_REQUEST_PIC_INJECTION and kvm exit
> > KVM_EXIT_GET_EXTINT.
> > 
> > The vm ioctl KVM_REQUEST_PIC_INJECTION makes a KVM_REQ_EVENT request
> > on the BSP, which causes the BSP to exit to userspace to fetch the
> > vector of the underlying external interrupt, which the BSP then
> > injects into the guest. This matches the PIC spec, and is necessary to
> > boot Windows.
> > 
> > Compiles for x86.
> > 
> > Update: Boots Windows and passes the KVM Unit Tests.
> > 
> > Signed-off-by: Steve Rutherford <srutherford@google.com>
> > ---
> >  Documentation/virtual/kvm/api.txt |  9 ++++++
> >  arch/x86/include/asm/kvm_host.h   |  2 ++
> >  arch/x86/kvm/irq.c                | 22 +++++++++++++--
> >  arch/x86/kvm/lapic.c              |  7 +++++
> >  arch/x86/kvm/lapic.h              |  2 ++
> >  arch/x86/kvm/x86.c                | 59 +++++++++++++++++++++++++++++++++++++--
> >  include/uapi/linux/kvm.h          |  7 +++++
> >  7 files changed, 103 insertions(+), 5 deletions(-)
> > 
> > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> > index 6ab2a3f7..b5d90cb 100644
> > --- a/Documentation/virtual/kvm/api.txt
> > +++ b/Documentation/virtual/kvm/api.txt
> > @@ -2979,6 +2979,15 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
> >  and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
> >  which is the maximum number of possibly pending cpu-local interrupts.
> >  
> > +4.96 KVM_REQUEST_PIC_INJECTION
> > +
> > +Capability: KVM_CAP_SPLIT_IRQCHIP
> > +Type: VM ioctl
> > +Parameters: none
> > +Returns: 0 on success, -1 on error.
> > +
> > +Informs the kernel that userspace has a pending external interrupt.
> > +
> 
> Missing documentation for the new vmexit and kvm_run member.
> 
> However, why is the roundtrip to userspace necessary?  Could you pass
> the extint index directly as an argument to KVM_INTERRUPT?  It's
> backwards-compatible, because KVM_INTERRUPT so far could not be used
> together with an in-kernel LAPIC.  If you could do that, you could also
> avoid the new userspace_extint_available field.
> 
> Userspace can figure out who's the BSP.  The rendez-vous between the
> irqchip and the BSP's VCPU thread is still needed, but it can be done
> entirely in userspace.
> 
> You'd also need much fewer changes to irq.c.  Basically just something like
> 
>  int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
>  {
>          int vector;
> 
> -        if (!irqchip_in_kernel(v->kvm))
> +        if (!pic_in_kernel(v->kvm) && v->arch.interrupt.pending)
>                 return v->arch.interrupt.nr;
> 
> ...
> 
>  int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
>  {
> -        if (!irqchip_in_kernel(v->kvm))
> +        if (!pic_in_kernel(v->kvm))
>                  return v->arch.interrupt.pending;
> 
> ...
> 
>  int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
>  {
> -        if (!irqchip_in_kernel(v->kvm))
> +        if (!pic_in_kernel(v->kvm))
>                  return v->arch.interrupt.pending;
> 
> More comments below.
> 
> >  5. The kvm_run structure
> >  ------------------------
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 4f439ff..0e8b0fc 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -543,6 +543,8 @@ struct kvm_vcpu_arch {
> >  
> >  	u64 eoi_exit_bitmaps[4];
> >  	int pending_ioapic_eoi;
> > +	bool userspace_extint_available;
> > +	int pending_external_vector;
> >  };
> >  
> 
> >  struct kvm_lpage_info {
> > diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> > index 706e47a..1270b2a 100644
> > --- a/arch/x86/kvm/irq.c
> > +++ b/arch/x86/kvm/irq.c
> > @@ -38,12 +38,25 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
> >  EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
> >  
> >  /*
> > + * check if there is a pending userspace external interrupt
> > + */
> > +static int pending_userspace_extint(struct kvm_vcpu *v)
> > +{
> > +	return v->arch.userspace_extint_available ||
> > +	       v->arch.pending_external_vector != -1;
> > +}
> > +
> > +/*
> >   * check if there is pending interrupt from
> >   * non-APIC source without intack.
> >   */
> >  static int kvm_cpu_has_extint(struct kvm_vcpu *v)
> >  {
> > -	if (kvm_apic_accept_pic_intr(v))
> > +	u8 accept = kvm_apic_accept_pic_intr(v);
> > +
> > +	if (accept && irqchip_split(v->kvm))
> > +		return pending_userspace_extint(v);
> > +	else if (accept)
> >  		return pic_irqchip(v->kvm)->output;	/* PIC */
> 
> 	if (accept) {
> 		if (irqchip_split(v->kvm))
> 			return pending_userspace_extint(v);
> 		else
> 			return pic_irqchip(v->kvm)->output;
> 	}
> 
> 	return 0;
> 
> >  	else
> >  		return 0;
> > @@ -91,7 +104,12 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
> >   */
> >  static int kvm_cpu_get_extint(struct kvm_vcpu *v)
> >  {
> > -	if (kvm_cpu_has_extint(v))
> > +	if (irqchip_split(v->kvm) && kvm_cpu_has_extint(v)) {
> > +		int vector = v->arch.pending_external_vector;
> > +
> > +		v->arch.pending_external_vector = -1;
> > +		return vector;
> > +	} else if (kvm_cpu_has_extint(v))
> >  		return kvm_pic_read_irq(v->kvm); /* PIC */
> 
> Same as above.
> 
> >  	return -1;
> >  }
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 766d297..012b56ee 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -2094,3 +2094,10 @@ void kvm_lapic_init(void)
> >  	jump_label_rate_limit(&apic_hw_disabled, HZ);
> >  	jump_label_rate_limit(&apic_sw_disabled, HZ);
> >  }
> > +
> > +void kvm_request_pic_injection(struct kvm_vcpu *vcpu)
> > +{
> > +	vcpu->arch.userspace_extint_available = true;
> > +	kvm_make_request(KVM_REQ_EVENT, vcpu);
> > +	kvm_vcpu_kick(vcpu);
> > +}
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index 71b150c..7831e4d 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -63,6 +63,8 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
> >  		unsigned long *dest_map);
> >  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
> >  
> > +void kvm_request_pic_injection(struct kvm_vcpu *vcpu);
> > +
> >  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> >  		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
> >  
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 35d13d4..40e7509 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -65,6 +65,8 @@
> >  #include <asm/pvclock.h>
> >  #include <asm/div64.h>
> >  
> > +#define GET_VECTOR_FROM_USERSPACE 1
> > +
> >  #define MAX_IO_MSRS 256
> >  #define KVM_MAX_MCE_BANKS 32
> >  #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
> > @@ -4217,6 +4219,30 @@ long kvm_arch_vm_ioctl(struct file *filp,
> >  		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
> >  		break;
> >  	}
> > +	case KVM_REQUEST_PIC_INJECTION: {
> > +		int i;
> > +		struct kvm_vcpu *vcpu;
> > +		struct kvm_vcpu *bsp_vcpu = NULL;
> > +
> > +		mutex_lock(&kvm->lock);
> > +		r = -EEXIST;
> > +		if (!irqchip_split(kvm))
> > +			goto out;
> > +		kvm_for_each_vcpu(i, vcpu, kvm) {
> > +			if (kvm_vcpu_is_reset_bsp(vcpu)) {
> > +				bsp_vcpu = vcpu;
> > +				continue;
> > +			}
> > +		}
> > +		r = -EINVAL;
> > +		if (bsp_vcpu == NULL)
> > +			goto interrupt_unlock;
> > +		kvm_request_pic_injection(bsp_vcpu);
> > +		r = 0;
> > +interrupt_unlock:
> > +		mutex_unlock(&kvm->lock);
> > +		break;
> > +	}
> >  
> >  	default:
> >  		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
> > @@ -6194,6 +6220,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
> >  	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
> >  }
> >  
> > +static int must_fetch_userspace_extint(struct kvm_vcpu *vcpu)
> 
> I would rename this to kvm_accept_request_pic_injection.
> 
> Paolo
> 
> > +{
> > +	if (vcpu->arch.userspace_extint_available &&
> > +	    kvm_apic_accept_pic_intr(vcpu)) {
> > +		vcpu->arch.userspace_extint_available = false;
> > +		return true;
> > +	} else
> > +		return false;
> > +
> > +}
> > +
> >  static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
> >  {
> >  	int r;
> > @@ -6258,7 +6295,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
> >  				return r;
> >  		}
> >  		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
> > -			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> > +			if (irqchip_split(vcpu->kvm) &&
> > +			    must_fetch_userspace_extint(vcpu)) {
> > +				return GET_VECTOR_FROM_USERSPACE;
> > +			}
> > +			kvm_queue_interrupt(vcpu,
> > +					    kvm_cpu_get_interrupt(vcpu),
> >  					    false);
> >  			kvm_x86_ops->set_irq(vcpu);
> >  		}
> > @@ -6424,13 +6466,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >  	}
> >  
> >  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> > +		int event;
> >  		kvm_apic_accept_events(vcpu);
> >  		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
> >  			r = 1;
> >  			goto out;
> >  		}
> > -
> > -		if (inject_pending_event(vcpu, req_int_win) != 0)
> > +		event = inject_pending_event(vcpu, req_int_win);
> > +		if (event == GET_VECTOR_FROM_USERSPACE) {
> > +			vcpu->run->exit_reason = KVM_EXIT_GET_EXTINT;
> > +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> > +			r = 0;
> > +			goto out;
> > +		} else if (event != 0)
> >  			req_immediate_exit = true;
> >  		/* enable NMI/IRQ window open exits if needed */
> >  		else if (vcpu->arch.nmi_pending)
> > @@ -6747,6 +6795,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> >  	if (vcpu->sigset_active)
> >  		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
> >  
> > +	if (vcpu->run->exit_reason == KVM_EXIT_GET_EXTINT)
> > +		vcpu->arch.pending_external_vector = vcpu->run->extint.vector;
> > +
> >  	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
> >  		kvm_vcpu_block(vcpu);
> >  		kvm_apic_accept_events(vcpu);
> > @@ -7536,6 +7587,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> >  	kvm_async_pf_hash_reset(vcpu);
> >  	kvm_pmu_init(vcpu);
> >  
> > +	vcpu->arch.pending_external_vector = -1;
> > +
> >  	return 0;
> >  fail_free_wbinvd_dirty_mask:
> >  	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 826a08d..0cf7ed6 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -184,6 +184,7 @@ struct kvm_s390_skeys {
> >  #define KVM_EXIT_SYSTEM_EVENT     24
> >  #define KVM_EXIT_S390_STSI        25
> >  #define KVM_EXIT_IOAPIC_EOI       26
> > +#define KVM_EXIT_GET_EXTINT       27
> >  
> >  /* For KVM_EXIT_INTERNAL_ERROR */
> >  /* Emulate instruction failed. */
> > @@ -334,6 +335,10 @@ struct kvm_run {
> >  		struct {
> >  			__u8 vector;
> >  		} eoi;
> > +		/* KVM_EXIT_GET_EXTINT */
> > +		struct {
> > +			__u8 vector;
> > +		} extint;
> >  		/* Fix the size of the union. */
> >  		char padding[256];
> >  	};
> > @@ -1206,6 +1211,8 @@ struct kvm_s390_ucas_mapping {
> >  /* Available with KVM_CAP_S390_IRQ_STATE */
> >  #define KVM_S390_SET_IRQ_STATE	  _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
> >  #define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
> > +/* Available with KVM_CAP_SPLIT_IRQCHIP */
> > +#define KVM_REQUEST_PIC_INJECTION _IO(KVMIO, 0xb7)
> >  
> >  #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
> >  #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
> > 

Pinging this thread.

Should I go with skipping the round trip, and combining
KVM_REQUEST_PIC_INJECTION with the KVM_INTERRUPT (a VCPU IOCTL)?
[It's currently a VM IOCTL, which seems reasonable, given that the
PIC is a per VM device. When skipping the round trip, a VCPU Ioctl
seems sensible, given that an interrupt is associated with a specific
CPU.]

Steve
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
Paolo Bonzini June 21, 2015, 8:10 p.m. UTC | #4
On 20/06/2015 02:41, Steve Rutherford wrote:
> Pinging this thread.
> 
> Should I go with skipping the round trip, and combining
> KVM_REQUEST_PIC_INJECTION with the KVM_INTERRUPT (a VCPU IOCTL)?
> [It's currently a VM IOCTL, which seems reasonable, given that the
> PIC is a per VM device. When skipping the round trip, a VCPU Ioctl
> seems sensible, given that an interrupt is associated with a specific
> CPU.]

Yes, please.  Sorry for not answering, I didn't understand a question
was implied.  The roundtrip can be done in userspace.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
Steve Rutherford June 26, 2015, 12:26 a.m. UTC | #5
> However, why is the roundtrip to userspace necessary?  Could you pass
> the extint index directly as an argument to KVM_INTERRUPT?  It's
> backwards-compatible, because KVM_INTERRUPT so far could not be used
> together with an in-kernel LAPIC.  If you could do that, you could also
> avoid the new userspace_extint_available field.

Implemented a basic version of this, and ran into some potential
issues with this strategy. Supporting PIC masking/unmasking by the
CPU/APIC means that either:
A) PIC interrupts need to be bufferable in the kernel (with some way
   of comparing priorities).
B) the APIC state needs to be read in order to fetch the bit as to
   whether or not the PIC is being masked (which I believe can be done
   from userspace via the APIC state ioctl).
C) something hacky that doesn't conform to the PIC spec but still
   happens to boot common OSes (like buffering the interrupts and
   injecting them in the order of arrival (which is wrong)).

Steve
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini June 26, 2015, 8:49 a.m. UTC | #6
On 26/06/2015 02:26, Steve Rutherford wrote:
> Implemented a basic version of this, and ran into some potential
> issues with this strategy. Supporting PIC masking/unmasking by the
> CPU/APIC means that either:
> A) PIC interrupts need to be bufferable in the kernel (with some way
>    of comparing priorities).
> B) the APIC state needs to be read in order to fetch the bit as to
>    whether or not the PIC is being masked (which I believe can be done
>    from userspace via the APIC state ioctl).
> C) something hacky that doesn't conform to the PIC spec but still
>    happens to boot common OSes (like buffering the interrupts and
>    injecting them in the order of arrival (which is wrong)).

I think B is okay to do.  Later if needed we can make it possible for
userspace to mmap the LAPIC page instead of using the APIC state ioctl.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6ab2a3f7..b5d90cb 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2979,6 +2979,15 @@  len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
+4.96 KVM_REQUEST_PIC_INJECTION
+
+Capability: KVM_CAP_SPLIT_IRQCHIP
+Type: VM ioctl
+Parameters: none
+Returns: 0 on success, -1 on error.
+
+Informs the kernel that userspace has a pending external interrupt.
+
 
 5. The kvm_run structure
 ------------------------
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4f439ff..0e8b0fc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -543,6 +543,8 @@  struct kvm_vcpu_arch {
 
 	u64 eoi_exit_bitmaps[4];
 	int pending_ioapic_eoi;
+	bool userspace_extint_available;
+	int pending_external_vector;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 706e47a..1270b2a 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -38,12 +38,25 @@  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
 /*
+ * check if there is a pending userspace external interrupt
+ */
+static int pending_userspace_extint(struct kvm_vcpu *v)
+{
+	return v->arch.userspace_extint_available ||
+	       v->arch.pending_external_vector != -1;
+}
+
+/*
  * check if there is pending interrupt from
  * non-APIC source without intack.
  */
 static int kvm_cpu_has_extint(struct kvm_vcpu *v)
 {
-	if (kvm_apic_accept_pic_intr(v))
+	u8 accept = kvm_apic_accept_pic_intr(v);
+
+	if (accept && irqchip_split(v->kvm))
+		return pending_userspace_extint(v);
+	else if (accept)
 		return pic_irqchip(v->kvm)->output;	/* PIC */
 	else
 		return 0;
@@ -91,7 +104,12 @@  EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
  */
 static int kvm_cpu_get_extint(struct kvm_vcpu *v)
 {
-	if (kvm_cpu_has_extint(v))
+	if (irqchip_split(v->kvm) && kvm_cpu_has_extint(v)) {
+		int vector = v->arch.pending_external_vector;
+
+		v->arch.pending_external_vector = -1;
+		return vector;
+	} else if (kvm_cpu_has_extint(v))
 		return kvm_pic_read_irq(v->kvm); /* PIC */
 	return -1;
 }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 766d297..012b56ee 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2094,3 +2094,10 @@  void kvm_lapic_init(void)
 	jump_label_rate_limit(&apic_hw_disabled, HZ);
 	jump_label_rate_limit(&apic_sw_disabled, HZ);
 }
+
+void kvm_request_pic_injection(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.userspace_extint_available = true;
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+	kvm_vcpu_kick(vcpu);
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 71b150c..7831e4d 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -63,6 +63,8 @@  int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 		unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
+void kvm_request_pic_injection(struct kvm_vcpu *vcpu);
+
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 35d13d4..40e7509 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -65,6 +65,8 @@ 
 #include <asm/pvclock.h>
 #include <asm/div64.h>
 
+#define GET_VECTOR_FROM_USERSPACE 1
+
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
@@ -4217,6 +4219,30 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 		break;
 	}
+	case KVM_REQUEST_PIC_INJECTION: {
+		int i;
+		struct kvm_vcpu *vcpu;
+		struct kvm_vcpu *bsp_vcpu = NULL;
+
+		mutex_lock(&kvm->lock);
+		r = -EEXIST;
+		if (!irqchip_split(kvm))
+			goto out;
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			if (kvm_vcpu_is_reset_bsp(vcpu)) {
+				bsp_vcpu = vcpu;
+				continue;
+			}
+		}
+		r = -EINVAL;
+		if (bsp_vcpu == NULL)
+			goto interrupt_unlock;
+		kvm_request_pic_injection(bsp_vcpu);
+		r = 0;
+interrupt_unlock:
+		mutex_unlock(&kvm->lock);
+		break;
+	}
 
 	default:
 		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
@@ -6194,6 +6220,17 @@  static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
 }
 
+static int must_fetch_userspace_extint(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.userspace_extint_available &&
+	    kvm_apic_accept_pic_intr(vcpu)) {
+		vcpu->arch.userspace_extint_available = false;
+		return true;
+	} else
+		return false;
+
+}
+
 static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 {
 	int r;
@@ -6258,7 +6295,12 @@  static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 				return r;
 		}
 		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
+			if (irqchip_split(vcpu->kvm) &&
+			    must_fetch_userspace_extint(vcpu)) {
+				return GET_VECTOR_FROM_USERSPACE;
+			}
+			kvm_queue_interrupt(vcpu,
+					    kvm_cpu_get_interrupt(vcpu),
 					    false);
 			kvm_x86_ops->set_irq(vcpu);
 		}
@@ -6424,13 +6466,19 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
+		int event;
 		kvm_apic_accept_events(vcpu);
 		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 			r = 1;
 			goto out;
 		}
-
-		if (inject_pending_event(vcpu, req_int_win) != 0)
+		event = inject_pending_event(vcpu, req_int_win);
+		if (event == GET_VECTOR_FROM_USERSPACE) {
+			vcpu->run->exit_reason = KVM_EXIT_GET_EXTINT;
+			kvm_make_request(KVM_REQ_EVENT, vcpu);
+			r = 0;
+			goto out;
+		} else if (event != 0)
 			req_immediate_exit = true;
 		/* enable NMI/IRQ window open exits if needed */
 		else if (vcpu->arch.nmi_pending)
@@ -6747,6 +6795,9 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
+	if (vcpu->run->exit_reason == KVM_EXIT_GET_EXTINT)
+		vcpu->arch.pending_external_vector = vcpu->run->extint.vector;
+
 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
 		kvm_vcpu_block(vcpu);
 		kvm_apic_accept_events(vcpu);
@@ -7536,6 +7587,8 @@  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	kvm_async_pf_hash_reset(vcpu);
 	kvm_pmu_init(vcpu);
 
+	vcpu->arch.pending_external_vector = -1;
+
 	return 0;
 fail_free_wbinvd_dirty_mask:
 	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 826a08d..0cf7ed6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -184,6 +184,7 @@  struct kvm_s390_skeys {
 #define KVM_EXIT_SYSTEM_EVENT     24
 #define KVM_EXIT_S390_STSI        25
 #define KVM_EXIT_IOAPIC_EOI       26
+#define KVM_EXIT_GET_EXTINT       27
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -334,6 +335,10 @@  struct kvm_run {
 		struct {
 			__u8 vector;
 		} eoi;
+		/* KVM_EXIT_GET_EXTINT */
+		struct {
+			__u8 vector;
+		} extint;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -1206,6 +1211,8 @@  struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_S390_IRQ_STATE */
 #define KVM_S390_SET_IRQ_STATE	  _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
 #define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
+/* Available with KVM_CAP_SPLIT_IRQCHIP */
+#define KVM_REQUEST_PIC_INJECTION _IO(KVMIO, 0xb7)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)