[RFC,2/4] KVM: x86: Add KVM exit for IOAPIC EOIs
diff mbox

Message ID 1431481652-27268-2-git-send-email-srutherford@google.com
State New
Headers show

Commit Message

Steve Rutherford May 13, 2015, 1:47 a.m. UTC
Adds KVM_EXIT_IOAPIC_EOI which passes the interrupt vector up to
userspace.

Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
to be informed (which is identical to the EOI_EXIT_BITMAP field used
by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
exits on older processors).

[Note: A prototype using ResampleFDs found that decoupling the EOI
from the VCPU's thread made it possible for the VCPU to not see a
recent EOI after reentering the guest. This does not match real
hardware.]

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford <srutherford@google.com>
---
 Documentation/virtual/kvm/api.txt | 10 ++++++++++
 arch/x86/include/asm/kvm_host.h   |  3 +++
 arch/x86/kvm/lapic.c              |  9 +++++++++
 arch/x86/kvm/x86.c                | 11 +++++++++++
 include/linux/kvm_host.h          |  1 +
 include/uapi/linux/kvm.h          |  5 +++++
 6 files changed, 39 insertions(+)

Comments

Paolo Bonzini May 13, 2015, 7:35 a.m. UTC | #1
On 13/05/2015 03:47, Steve Rutherford wrote:
> Adds KVM_EXIT_IOAPIC_EOI which passes the interrupt vector up to
> userspace.
> 
> Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
> to be informed (which is identical to the EOI_EXIT_BITMAP field used
> by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
> exits on older processors).
> 
> [Note: A prototype using ResampleFDs found that decoupling the EOI
> from the VCPU's thread made it possible for the VCPU to not see a
> recent EOI after reentering the guest. This does not match real
> hardware.]

Can you explain this better (and perhaps add a testcase to ioapic.c)?

Paolo

> Compile tested for Intel x86.
> 
> Signed-off-by: Steve Rutherford <srutherford@google.com>
> ---
>  Documentation/virtual/kvm/api.txt | 10 ++++++++++
>  arch/x86/include/asm/kvm_host.h   |  3 +++
>  arch/x86/kvm/lapic.c              |  9 +++++++++
>  arch/x86/kvm/x86.c                | 11 +++++++++++
>  include/linux/kvm_host.h          |  1 +
>  include/uapi/linux/kvm.h          |  5 +++++
>  6 files changed, 39 insertions(+)
> 
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 0744b4e..dd92996 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3285,6 +3285,16 @@ Valid values for 'type' are:
>  	 */
>  	__u64 kvm_valid_regs;
>  	__u64 kvm_dirty_regs;
> +
> +	/* KVM_EXIT_IOAPIC_EOI */
> +        struct {
> +	       __u8 vector;
> +        } eoi;
> +
> +Indicates that an eoi of a level triggered IOAPIC interrupt on vector has
> +occurred, which should be handled by the userspace IOAPIC. Triggers when
> +the Irqchip has been split between userspace and the kernel.
> +
>  	union {
>  		struct kvm_sync_regs regs;
>  		char padding[1024];
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3ddc134..b1978f1 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -539,6 +539,9 @@ struct kvm_vcpu_arch {
>  	struct {
>  		bool pv_unhalted;
>  	} pv;
> +
> +	u64 eoi_exit_bitmaps[4];
> +	int pending_ioapic_eoi;
>  };
>  
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index bc392a6..42fada6f 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -860,6 +860,15 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
>  
>  static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
>  {
> +	if (irqchip_split(apic->vcpu->kvm)) {
> +		if (test_bit(vector,
> +			     (void *) apic->vcpu->arch.eoi_exit_bitmaps)) {
> +			apic->vcpu->arch.pending_ioapic_eoi = vector;
> +			kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
> +		}
> +		return;
> +	}
> +
>  	if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
>  		int trigger_mode;
>  		if (apic_test_vector(vector, apic->regs + APIC_TMR))
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 7505b39..cc27c35 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6324,6 +6324,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  			kvm_handle_pmu_event(vcpu);
>  		if (kvm_check_request(KVM_REQ_PMI, vcpu))
>  			kvm_deliver_pmi(vcpu);
> +		if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
> +			BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
> +			if (test_bit(vcpu->arch.pending_ioapic_eoi,
> +				     (void *) vcpu->arch.eoi_exit_bitmaps)) {
> +				vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
> +				vcpu->run->eoi.vector =
> +						vcpu->arch.pending_ioapic_eoi;
> +				r = 0;
> +				goto out;
> +			}
> +		}
>  		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
>  			vcpu_scan_ioapic(vcpu);
>  		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 277b7a1..cef20ad 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -134,6 +134,7 @@ static inline bool is_error_page(struct page *page)
>  #define KVM_REQ_ENABLE_IBS        23
>  #define KVM_REQ_DISABLE_IBS       24
>  #define KVM_REQ_APIC_PAGE_RELOAD  25
> +#define KVM_REQ_IOAPIC_EOI_EXIT   26
>  
>  #define KVM_USERSPACE_IRQ_SOURCE_ID		0
>  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 7d06dc4..2305948 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -183,6 +183,7 @@ struct kvm_s390_skeys {
>  #define KVM_EXIT_EPR              23
>  #define KVM_EXIT_SYSTEM_EVENT     24
>  #define KVM_EXIT_S390_STSI        25
> +#define KVM_EXIT_IOAPIC_EOI       26
>  
>  /* For KVM_EXIT_INTERNAL_ERROR */
>  /* Emulate instruction failed. */
> @@ -329,6 +330,10 @@ struct kvm_run {
>  			__u8 sel1;
>  			__u16 sel2;
>  		} s390_stsi;
> +		/* KVM_EXIT_IOAPIC_EOI */
> +		struct {
> +			__u8 vector;
> +		} eoi;
>  		/* Fix the size of the union. */
>  		char padding[256];
>  	};
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Rutherford May 13, 2015, 10:18 p.m. UTC | #2
On Wed, May 13, 2015 at 09:35:34AM +0200, Paolo Bonzini wrote:
> 
> 
> On 13/05/2015 03:47, Steve Rutherford wrote:
> > Adds KVM_EXIT_IOAPIC_EOI which passes the interrupt vector up to
> > userspace.
> > 
> > Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
> > to be informed (which is identical to the EOI_EXIT_BITMAP field used
> > by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
> > exits on older processors).
> > 
> > [Note: A prototype using ResampleFDs found that decoupling the EOI
> > from the VCPU's thread made it possible for the VCPU to not see a
> > recent EOI after reentering the guest. This does not match real
> > hardware.]
> 
> Can you explain this better (and perhaps add a testcase to ioapic.c)?

This is for passing EOIs to the IOAPIC for level-triggered IOAPIC interrupts. Note that this patch requires some other patch to set the bitmaps. 

The sequential level-triggered interrupts test in ioapic.c effectively tests one side of this (If an EOI fails to be delivered to the IOAPIC, then any subsequent interrupts on that EOI's vector will be coalesced, and the second interrupt in the test won't fire.). 
The other side, checking that the guest /only/ exits on the specified vectors, is harder to instrument from within the KVM unit tests.

Steve

> 
> Paolo
> 
> > Compile tested for Intel x86.
> > 
> > Signed-off-by: Steve Rutherford <srutherford@google.com>
> > ---
> >  Documentation/virtual/kvm/api.txt | 10 ++++++++++
> >  arch/x86/include/asm/kvm_host.h   |  3 +++
> >  arch/x86/kvm/lapic.c              |  9 +++++++++
> >  arch/x86/kvm/x86.c                | 11 +++++++++++
> >  include/linux/kvm_host.h          |  1 +
> >  include/uapi/linux/kvm.h          |  5 +++++
> >  6 files changed, 39 insertions(+)
> > 
> > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> > index 0744b4e..dd92996 100644
> > --- a/Documentation/virtual/kvm/api.txt
> > +++ b/Documentation/virtual/kvm/api.txt
> > @@ -3285,6 +3285,16 @@ Valid values for 'type' are:
> >  	 */
> >  	__u64 kvm_valid_regs;
> >  	__u64 kvm_dirty_regs;
> > +
> > +	/* KVM_EXIT_IOAPIC_EOI */
> > +        struct {
> > +	       __u8 vector;
> > +        } eoi;
> > +
> > +Indicates that an eoi of a level triggered IOAPIC interrupt on vector has
> > +occurred, which should be handled by the userspace IOAPIC. Triggers when
> > +the Irqchip has been split between userspace and the kernel.
> > +
> >  	union {
> >  		struct kvm_sync_regs regs;
> >  		char padding[1024];
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 3ddc134..b1978f1 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -539,6 +539,9 @@ struct kvm_vcpu_arch {
> >  	struct {
> >  		bool pv_unhalted;
> >  	} pv;
> > +
> > +	u64 eoi_exit_bitmaps[4];
> > +	int pending_ioapic_eoi;
> >  };
> >  
> >  struct kvm_lpage_info {
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index bc392a6..42fada6f 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -860,6 +860,15 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
> >  
> >  static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
> >  {
> > +	if (irqchip_split(apic->vcpu->kvm)) {
> > +		if (test_bit(vector,
> > +			     (void *) apic->vcpu->arch.eoi_exit_bitmaps)) {
> > +			apic->vcpu->arch.pending_ioapic_eoi = vector;
> > +			kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
> > +		}
> > +		return;
> > +	}
> > +
> >  	if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
> >  		int trigger_mode;
> >  		if (apic_test_vector(vector, apic->regs + APIC_TMR))
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 7505b39..cc27c35 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -6324,6 +6324,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >  			kvm_handle_pmu_event(vcpu);
> >  		if (kvm_check_request(KVM_REQ_PMI, vcpu))
> >  			kvm_deliver_pmi(vcpu);
> > +		if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
> > +			BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
> > +			if (test_bit(vcpu->arch.pending_ioapic_eoi,
> > +				     (void *) vcpu->arch.eoi_exit_bitmaps)) {
> > +				vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
> > +				vcpu->run->eoi.vector =
> > +						vcpu->arch.pending_ioapic_eoi;
> > +				r = 0;
> > +				goto out;
> > +			}
> > +		}
> >  		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
> >  			vcpu_scan_ioapic(vcpu);
> >  		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 277b7a1..cef20ad 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -134,6 +134,7 @@ static inline bool is_error_page(struct page *page)
> >  #define KVM_REQ_ENABLE_IBS        23
> >  #define KVM_REQ_DISABLE_IBS       24
> >  #define KVM_REQ_APIC_PAGE_RELOAD  25
> > +#define KVM_REQ_IOAPIC_EOI_EXIT   26
> >  
> >  #define KVM_USERSPACE_IRQ_SOURCE_ID		0
> >  #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 7d06dc4..2305948 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -183,6 +183,7 @@ struct kvm_s390_skeys {
> >  #define KVM_EXIT_EPR              23
> >  #define KVM_EXIT_SYSTEM_EVENT     24
> >  #define KVM_EXIT_S390_STSI        25
> > +#define KVM_EXIT_IOAPIC_EOI       26
> >  
> >  /* For KVM_EXIT_INTERNAL_ERROR */
> >  /* Emulate instruction failed. */
> > @@ -329,6 +330,10 @@ struct kvm_run {
> >  			__u8 sel1;
> >  			__u16 sel2;
> >  		} s390_stsi;
> > +		/* KVM_EXIT_IOAPIC_EOI */
> > +		struct {
> > +			__u8 vector;
> > +		} eoi;
> >  		/* Fix the size of the union. */
> >  		char padding[256];
> >  	};
> > 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity May 24, 2015, 4:46 p.m. UTC | #3
On 05/13/2015 04:47 AM, Steve Rutherford wrote:
> Adds KVM_EXIT_IOAPIC_EOI which passes the interrupt vector up to
> userspace.
>
> Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
> to be informed (which is identical to the EOI_EXIT_BITMAP field used
> by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
> exits on older processors).
>
> [Note: A prototype using ResampleFDs found that decoupling the EOI
> from the VCPU's thread made it possible for the VCPU to not see a
> recent EOI after reentering the guest. This does not match real
> hardware.]
>
> Compile tested for Intel x86.
>
> Signed-off-by: Steve Rutherford <srutherford@google.com>
> ---
>   Documentation/virtual/kvm/api.txt | 10 ++++++++++
>   arch/x86/include/asm/kvm_host.h   |  3 +++
>   arch/x86/kvm/lapic.c              |  9 +++++++++
>   arch/x86/kvm/x86.c                | 11 +++++++++++
>   include/linux/kvm_host.h          |  1 +
>   include/uapi/linux/kvm.h          |  5 +++++
>   6 files changed, 39 insertions(+)
>
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 0744b4e..dd92996 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3285,6 +3285,16 @@ Valid values for 'type' are:
>   	 */
>   	__u64 kvm_valid_regs;
>   	__u64 kvm_dirty_regs;
> +
> +	/* KVM_EXIT_IOAPIC_EOI */
> +        struct {
> +	       __u8 vector;
> +        } eoi;
> +
> +Indicates that an eoi of a level triggered IOAPIC interrupt on vector has
> +occurred, which should be handled by the userspace IOAPIC. Triggers when
> +the Irqchip has been split between userspace and the kernel.
> +

The ioapic is a global resource, so it doesn't make sense for 
information about it to be returned in a per-vcpu structure (or to block 
the vcpu while it is being processed).

The way I'd model it is to emulate the APIC bus that connects local 
APICs and the IOAPIC, using a socket pair.  When the user-space ioapic 
wants to inject an interrupt, it sends a message to the local APICs 
which then inject it, and when it's ack'ed the EOI is sent back on the 
same bus.

It's true that the APIC bus no longer exists, but modern processors 
still pretend it does.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Rutherford May 27, 2015, 2:06 a.m. UTC | #4
On Sun, May 24, 2015 at 07:46:03PM +0300, Avi Kivity wrote:
> On 05/13/2015 04:47 AM, Steve Rutherford wrote:
> >Adds KVM_EXIT_IOAPIC_EOI which passes the interrupt vector up to
> >userspace.
> >
> >Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
> >to be informed (which is identical to the EOI_EXIT_BITMAP field used
> >by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
> >exits on older processors).
> >
> >[Note: A prototype using ResampleFDs found that decoupling the EOI
> >from the VCPU's thread made it possible for the VCPU to not see a
> >recent EOI after reentering the guest. This does not match real
> >hardware.]
> >
> >Compile tested for Intel x86.
> >
> >Signed-off-by: Steve Rutherford <srutherford@google.com>
> >---
> >  Documentation/virtual/kvm/api.txt | 10 ++++++++++
> >  arch/x86/include/asm/kvm_host.h   |  3 +++
> >  arch/x86/kvm/lapic.c              |  9 +++++++++
> >  arch/x86/kvm/x86.c                | 11 +++++++++++
> >  include/linux/kvm_host.h          |  1 +
> >  include/uapi/linux/kvm.h          |  5 +++++
> >  6 files changed, 39 insertions(+)
> >
> >diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> >index 0744b4e..dd92996 100644
> >--- a/Documentation/virtual/kvm/api.txt
> >+++ b/Documentation/virtual/kvm/api.txt
> >@@ -3285,6 +3285,16 @@ Valid values for 'type' are:
> >  	 */
> >  	__u64 kvm_valid_regs;
> >  	__u64 kvm_dirty_regs;
> >+
> >+	/* KVM_EXIT_IOAPIC_EOI */
> >+        struct {
> >+	       __u8 vector;
> >+        } eoi;
> >+
> >+Indicates that an eoi of a level triggered IOAPIC interrupt on vector has
> >+occurred, which should be handled by the userspace IOAPIC. Triggers when
> >+the Irqchip has been split between userspace and the kernel.
> >+
> 
> The ioapic is a global resource, so it doesn't make sense for
> information about it to be returned in a per-vcpu structure
EOI exits are a per-vcpu behavior, so this doesn't seem all that strange.

> (or to block the vcpu while it is being processed).

Blocking doesn't feel clean, but doesn't seem all that bad, given
that these operations are relatively rare on modern configurations.

> 
> The way I'd model it is to emulate the APIC bus that connects local
> APICs and the IOAPIC, using a socket pair.  When the user-space
> ioapic wants to inject an interrupt, it sends a message to the local
> APICs which then inject it, and when it's ack'ed the EOI is sent
> back on the same bus.
Although I'm not certain about this, it sounds to me like this would
require a kernel thread to be waiting (in some way) on this socket, which
seems rather heavy handed.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity May 27, 2015, 5:32 a.m. UTC | #5
On 05/27/2015 05:06 AM, Steve Rutherford wrote:
> On Sun, May 24, 2015 at 07:46:03PM +0300, Avi Kivity wrote:
>> On 05/13/2015 04:47 AM, Steve Rutherford wrote:
>>> Adds KVM_EXIT_IOAPIC_EOI which passes the interrupt vector up to
>>> userspace.
>>>
>>> Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
>>> to be informed (which is identical to the EOI_EXIT_BITMAP field used
>>> by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
>>> exits on older processors).
>>>
>>> [Note: A prototype using ResampleFDs found that decoupling the EOI
>> >from the VCPU's thread made it possible for the VCPU to not see a
>>> recent EOI after reentering the guest. This does not match real
>>> hardware.]
>>>
>>> Compile tested for Intel x86.
>>>
>>> Signed-off-by: Steve Rutherford <srutherford@google.com>
>>> ---
>>>   Documentation/virtual/kvm/api.txt | 10 ++++++++++
>>>   arch/x86/include/asm/kvm_host.h   |  3 +++
>>>   arch/x86/kvm/lapic.c              |  9 +++++++++
>>>   arch/x86/kvm/x86.c                | 11 +++++++++++
>>>   include/linux/kvm_host.h          |  1 +
>>>   include/uapi/linux/kvm.h          |  5 +++++
>>>   6 files changed, 39 insertions(+)
>>>
>>> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
>>> index 0744b4e..dd92996 100644
>>> --- a/Documentation/virtual/kvm/api.txt
>>> +++ b/Documentation/virtual/kvm/api.txt
>>> @@ -3285,6 +3285,16 @@ Valid values for 'type' are:
>>>   	 */
>>>   	__u64 kvm_valid_regs;
>>>   	__u64 kvm_dirty_regs;
>>> +
>>> +	/* KVM_EXIT_IOAPIC_EOI */
>>> +        struct {
>>> +	       __u8 vector;
>>> +        } eoi;
>>> +
>>> +Indicates that an eoi of a level triggered IOAPIC interrupt on vector has
>>> +occurred, which should be handled by the userspace IOAPIC. Triggers when
>>> +the Irqchip has been split between userspace and the kernel.
>>> +
>> The ioapic is a global resource, so it doesn't make sense for
>> information about it to be returned in a per-vcpu structure
> EOI exits are a per-vcpu behavior, so this doesn't seem all that strange.
>
>> (or to block the vcpu while it is being processed).
> Blocking doesn't feel clean, but doesn't seem all that bad, given
> that these operations are relatively rare on modern configurations.

Agree, maybe the realtime people have an interest here.

>> The way I'd model it is to emulate the APIC bus that connects local
>> APICs and the IOAPIC, using a socket pair.  When the user-space
>> ioapic wants to inject an interrupt, it sends a message to the local
>> APICs which then inject it, and when it's ack'ed the EOI is sent
>> back on the same bus.
> Although I'm not certain about this, it sounds to me like this would
> require a kernel thread to be waiting (in some way) on this socket, which
> seems rather heavy handed.

It's been a while since I did kernel programming, but I think you can 
queue a callback to be called when an I/O is ready, and not require a 
thread.  IIRC we do that with irqfd to cause an interrupt to be injected.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Rutherford May 28, 2015, 9:58 p.m. UTC | #6
On Wed, May 27, 2015 at 08:32:04AM +0300, Avi Kivity wrote:
> On 05/27/2015 05:06 AM, Steve Rutherford wrote:
> >On Sun, May 24, 2015 at 07:46:03PM +0300, Avi Kivity wrote:
> >>On 05/13/2015 04:47 AM, Steve Rutherford wrote:
> >>>Adds KVM_EXIT_IOAPIC_EOI which passes the interrupt vector up to
> >>>userspace.
> >>>
> >>>Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs
> >>>to be informed (which is identical to the EOI_EXIT_BITMAP field used
> >>>by modern x86 processors, but can also be used to elide kvm IOAPIC EOI
> >>>exits on older processors).
> >>>
> >>>[Note: A prototype using ResampleFDs found that decoupling the EOI
> >>>from the VCPU's thread made it possible for the VCPU to not see a
> >>>recent EOI after reentering the guest. This does not match real
> >>>hardware.]
> >>>
> >>>Compile tested for Intel x86.
> >>>
> >>>Signed-off-by: Steve Rutherford <srutherford@google.com>
> >>>---
> >>>  Documentation/virtual/kvm/api.txt | 10 ++++++++++
> >>>  arch/x86/include/asm/kvm_host.h   |  3 +++
> >>>  arch/x86/kvm/lapic.c              |  9 +++++++++
> >>>  arch/x86/kvm/x86.c                | 11 +++++++++++
> >>>  include/linux/kvm_host.h          |  1 +
> >>>  include/uapi/linux/kvm.h          |  5 +++++
> >>>  6 files changed, 39 insertions(+)
> >>>
> >>>diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> >>>index 0744b4e..dd92996 100644
> >>>--- a/Documentation/virtual/kvm/api.txt
> >>>+++ b/Documentation/virtual/kvm/api.txt
> >>>@@ -3285,6 +3285,16 @@ Valid values for 'type' are:
> >>>  	 */
> >>>  	__u64 kvm_valid_regs;
> >>>  	__u64 kvm_dirty_regs;
> >>>+
> >>>+	/* KVM_EXIT_IOAPIC_EOI */
> >>>+        struct {
> >>>+	       __u8 vector;
> >>>+        } eoi;
> >>>+
> >>>+Indicates that an eoi of a level triggered IOAPIC interrupt on vector has
> >>>+occurred, which should be handled by the userspace IOAPIC. Triggers when
> >>>+the Irqchip has been split between userspace and the kernel.
> >>>+
> >>The ioapic is a global resource, so it doesn't make sense for
> >>information about it to be returned in a per-vcpu structure
> >EOI exits are a per-vcpu behavior, so this doesn't seem all that strange.
> >
> >>(or to block the vcpu while it is being processed).
> >Blocking doesn't feel clean, but doesn't seem all that bad, given
> >that these operations are relatively rare on modern configurations.
> 
> Agree, maybe the realtime people have an interest here.
> 
> >>The way I'd model it is to emulate the APIC bus that connects local
> >>APICs and the IOAPIC, using a socket pair.  When the user-space
> >>ioapic wants to inject an interrupt, it sends a message to the local
> >>APICs which then inject it, and when it's ack'ed the EOI is sent
> >>back on the same bus.
> >Although I'm not certain about this, it sounds to me like this would
> >require a kernel thread to be waiting (in some way) on this socket, which
> >seems rather heavy handed.
> 
> It's been a while since I did kernel programming, but I think you
> can queue a callback to be called when an I/O is ready, and not
> require a thread.  IIRC we do that with irqfd to cause an interrupt
> to be injected.
> 

This should be possible, but it's going to add a ton of complexity, and I don't really see any compelling benefits. If there is a compelling reason to switch to a socket based interface, I'm definitely willing to refactor.

Steve
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 0744b4e..dd92996 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3285,6 +3285,16 @@  Valid values for 'type' are:
 	 */
 	__u64 kvm_valid_regs;
 	__u64 kvm_dirty_regs;
+
+	/* KVM_EXIT_IOAPIC_EOI */
+        struct {
+	       __u8 vector;
+        } eoi;
+
+Indicates that an eoi of a level triggered IOAPIC interrupt on vector has
+occurred, which should be handled by the userspace IOAPIC. Triggers when
+the Irqchip has been split between userspace and the kernel.
+
 	union {
 		struct kvm_sync_regs regs;
 		char padding[1024];
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3ddc134..b1978f1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -539,6 +539,9 @@  struct kvm_vcpu_arch {
 	struct {
 		bool pv_unhalted;
 	} pv;
+
+	u64 eoi_exit_bitmaps[4];
+	int pending_ioapic_eoi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bc392a6..42fada6f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -860,6 +860,15 @@  int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 {
+	if (irqchip_split(apic->vcpu->kvm)) {
+		if (test_bit(vector,
+			     (void *) apic->vcpu->arch.eoi_exit_bitmaps)) {
+			apic->vcpu->arch.pending_ioapic_eoi = vector;
+			kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
+		}
+		return;
+	}
+
 	if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
 		int trigger_mode;
 		if (apic_test_vector(vector, apic->regs + APIC_TMR))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7505b39..cc27c35 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6324,6 +6324,17 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_handle_pmu_event(vcpu);
 		if (kvm_check_request(KVM_REQ_PMI, vcpu))
 			kvm_deliver_pmi(vcpu);
+		if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
+			BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
+			if (test_bit(vcpu->arch.pending_ioapic_eoi,
+				     (void *) vcpu->arch.eoi_exit_bitmaps)) {
+				vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
+				vcpu->run->eoi.vector =
+						vcpu->arch.pending_ioapic_eoi;
+				r = 0;
+				goto out;
+			}
+		}
 		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
 			vcpu_scan_ioapic(vcpu);
 		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 277b7a1..cef20ad 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -134,6 +134,7 @@  static inline bool is_error_page(struct page *page)
 #define KVM_REQ_ENABLE_IBS        23
 #define KVM_REQ_DISABLE_IBS       24
 #define KVM_REQ_APIC_PAGE_RELOAD  25
+#define KVM_REQ_IOAPIC_EOI_EXIT   26
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7d06dc4..2305948 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -183,6 +183,7 @@  struct kvm_s390_skeys {
 #define KVM_EXIT_EPR              23
 #define KVM_EXIT_SYSTEM_EVENT     24
 #define KVM_EXIT_S390_STSI        25
+#define KVM_EXIT_IOAPIC_EOI       26
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -329,6 +330,10 @@  struct kvm_run {
 			__u8 sel1;
 			__u16 sel2;
 		} s390_stsi;
+		/* KVM_EXIT_IOAPIC_EOI */
+		struct {
+			__u8 vector;
+		} eoi;
 		/* Fix the size of the union. */
 		char padding[256];
 	};