diff mbox

[v3,7/8] Move IO APIC to its own lock.

Message ID 1250079442-5163-8-git-send-email-gleb@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gleb Natapov Aug. 12, 2009, 12:17 p.m. UTC
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |    7 +---
 arch/x86/kvm/i8259.c     |   22 +++++++++---
 arch/x86/kvm/lapic.c     |    5 +--
 arch/x86/kvm/x86.c       |   10 +----
 virt/kvm/ioapic.c        |   80 +++++++++++++++++++++++++++++++++++-----------
 virt/kvm/ioapic.h        |    4 ++
 6 files changed, 86 insertions(+), 42 deletions(-)

Comments

Marcelo Tosatti Aug. 13, 2009, 9:13 a.m. UTC | #1
On Wed, Aug 12, 2009 at 03:17:21PM +0300, Gleb Natapov wrote:
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/ia64/kvm/kvm-ia64.c |    7 +---
>  arch/x86/kvm/i8259.c     |   22 +++++++++---
>  arch/x86/kvm/lapic.c     |    5 +--
>  arch/x86/kvm/x86.c       |   10 +----
>  virt/kvm/ioapic.c        |   80 +++++++++++++++++++++++++++++++++++-----------
>  virt/kvm/ioapic.h        |    4 ++
>  6 files changed, 86 insertions(+), 42 deletions(-)
> 
> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index 0ad09f0..4a98314 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -851,8 +851,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
>  	r = 0;
>  	switch (chip->chip_id) {
>  	case KVM_IRQCHIP_IOAPIC:
> -		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
> -				sizeof(struct kvm_ioapic_state));
> +		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
>  		break;
>  	default:
>  		r = -EINVAL;
> @@ -868,9 +867,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
>  	r = 0;
>  	switch (chip->chip_id) {
>  	case KVM_IRQCHIP_IOAPIC:
> -		memcpy(ioapic_irqchip(kvm),
> -				&chip->chip.ioapic,
> -				sizeof(struct kvm_ioapic_state));
> +		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
>  		break;
>  	default:
>  		r = -EINVAL;
> diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
> index eb2b8b7..96ea7f7 100644
> --- a/arch/x86/kvm/i8259.c
> +++ b/arch/x86/kvm/i8259.c
> @@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
>  	s->isr_ack |= (1 << irq);
>  	if (s != &s->pics_state->pics[0])
>  		irq += 8;
> +	/*
> +	 * We are dropping lock while calling ack notifiers since ack
> +	 * notifier callbacks for assigned devices call into PIC recursively.
> +	 * Other interrupt may be delivered to PIC while lock is dropped but
> +	 * it should be safe since PIC state is already updated at this stage.
> +	 */
> +	spin_unlock(&s->pics_state->lock);
>  	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
> +	spin_lock(&s->pics_state->lock);
>  }
>  
>  void kvm_pic_clear_isr_ack(struct kvm *kvm)
> @@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
>  static inline void pic_intack(struct kvm_kpic_state *s, int irq)
>  {
>  	s->isr |= 1 << irq;
> -	if (s->auto_eoi) {
> -		if (s->rotate_on_auto_eoi)
> -			s->priority_add = (irq + 1) & 7;
> -		pic_clear_isr(s, irq);
> -	}
>  	/*
>  	 * We don't clear a level sensitive interrupt here
>  	 */
>  	if (!(s->elcr & (1 << irq)))
>  		s->irr &= ~(1 << irq);
> +
> +	if (s->auto_eoi) {
> +		if (s->rotate_on_auto_eoi)
> +			s->priority_add = (irq + 1) & 7;
> +		pic_clear_isr(s, irq);
> +	}
> +
>  }
>  
>  int kvm_pic_read_irq(struct kvm *kvm)
> @@ -282,9 +292,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
>  				priority = get_priority(s, s->isr);
>  				if (priority != 8) {
>  					irq = (priority + s->priority_add) & 7;
> -					pic_clear_isr(s, irq);
>  					if (cmd == 5)
>  						s->priority_add = (irq + 1) & 7;
> +					pic_clear_isr(s, irq);
>  					pic_update_irq(s->pics_state);
>  				}
>  				break;
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index ce195f8..f24d4d0 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -471,11 +471,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
>  		trigger_mode = IOAPIC_LEVEL_TRIG;
>  	else
>  		trigger_mode = IOAPIC_EDGE_TRIG;
> -	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
> -		mutex_lock(&apic->vcpu->kvm->irq_lock);
> +	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
>  		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
> -		mutex_unlock(&apic->vcpu->kvm->irq_lock);
> -	}
>  }
>  
>  static void apic_send_ipi(struct kvm_lapic *apic)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 850cf56..75dcdad 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2023,9 +2023,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
>  			sizeof(struct kvm_pic_state));
>  		break;
>  	case KVM_IRQCHIP_IOAPIC:
> -		memcpy(&chip->chip.ioapic,
> -			ioapic_irqchip(kvm),
> -			sizeof(struct kvm_ioapic_state));
> +		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
>  		break;
>  	default:
>  		r = -EINVAL;
> @@ -2055,11 +2053,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
>  		spin_unlock(&pic_irqchip(kvm)->lock);
>  		break;
>  	case KVM_IRQCHIP_IOAPIC:
> -		mutex_lock(&kvm->irq_lock);
> -		memcpy(ioapic_irqchip(kvm),
> -			&chip->chip.ioapic,
> -			sizeof(struct kvm_ioapic_state));
> -		mutex_unlock(&kvm->irq_lock);
> +		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
>  		break;
>  	default:
>  		r = -EINVAL;
> diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> index fa05f67..e9de458 100644
> --- a/virt/kvm/ioapic.c
> +++ b/virt/kvm/ioapic.c
> @@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
>  	union kvm_ioapic_redirect_entry entry;
>  	int ret = 1;
>  
> +	mutex_lock(&ioapic->lock);
>  	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
>  		entry = ioapic->redirtbl[irq];
>  		level ^= entry.fields.polarity;

But this is an RCU critical section now, right? 

If so, you can't sleep, must use a spinlock.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov Aug. 13, 2009, 9:48 a.m. UTC | #2
On Thu, Aug 13, 2009 at 06:13:30AM -0300, Marcelo Tosatti wrote:
> > +++ b/virt/kvm/ioapic.c
> > @@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
> >  	union kvm_ioapic_redirect_entry entry;
> >  	int ret = 1;
> >  
> > +	mutex_lock(&ioapic->lock);
> >  	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
> >  		entry = ioapic->redirtbl[irq];
> >  		level ^= entry.fields.polarity;
> 
> But this is an RCU critical section now, right? 
> 
Correct! Forget about that. It was spinlock, but Avi prefers mutexes.

> If so, you can't sleep, must use a spinlock.
Either that or I can collect callbacks in critical section and call them
afterwords.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Aug. 13, 2009, 9:49 a.m. UTC | #3
On 08/13/2009 12:48 PM, Gleb Natapov wrote:
> On Thu, Aug 13, 2009 at 06:13:30AM -0300, Marcelo Tosatti wrote:
>    
>>> +++ b/virt/kvm/ioapic.c
>>> @@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
>>>   	union kvm_ioapic_redirect_entry entry;
>>>   	int ret = 1;
>>>
>>> +	mutex_lock(&ioapic->lock);
>>>   	if (irq>= 0&&  irq<  IOAPIC_NUM_PINS) {
>>>   		entry = ioapic->redirtbl[irq];
>>>   		level ^= entry.fields.polarity;
>>>        
>> But this is an RCU critical section now, right?
>>
>>      
> Correct! Forget about that. It was spinlock, but Avi prefers mutexes.
>    

Well, I prefer correct code to mutexes.

>    
>> If so, you can't sleep, must use a spinlock.
>>      
> Either that or I can collect callbacks in critical section and call them
> afterwords.
>    

There's also srcu.
Gleb Natapov Aug. 13, 2009, 10:09 a.m. UTC | #4
On Thu, Aug 13, 2009 at 12:49:45PM +0300, Avi Kivity wrote:
> On 08/13/2009 12:48 PM, Gleb Natapov wrote:
> >On Thu, Aug 13, 2009 at 06:13:30AM -0300, Marcelo Tosatti wrote:
> >>>+++ b/virt/kvm/ioapic.c
> >>>@@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
> >>>  	union kvm_ioapic_redirect_entry entry;
> >>>  	int ret = 1;
> >>>
> >>>+	mutex_lock(&ioapic->lock);
> >>>  	if (irq>= 0&&  irq<  IOAPIC_NUM_PINS) {
> >>>  		entry = ioapic->redirtbl[irq];
> >>>  		level ^= entry.fields.polarity;
> >>But this is an RCU critical section now, right?
> >>
> >Correct! Forget about that. It was spinlock, but Avi prefers mutexes.
> 
> Well, I prefer correct code to mutexes.
> 
> >>If so, you can't sleep, must use a spinlock.
> >Either that or I can collect callbacks in critical section and call them
> >afterwords.
> 
> There's also srcu.
> 
What are the disadvantages? There should be some, otherwise why not use
it all the time.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Aug. 13, 2009, 10:44 a.m. UTC | #5
On 08/13/2009 01:09 PM, Gleb Natapov wrote:
>> There's also srcu.
>>
>>      
> What are the disadvantages? There should be some, otherwise why not use
> it all the time.
>    

I think it incurs an atomic op in the read path, but not much overhead 
otherwise.  Paul?
Paul E. McKenney Aug. 13, 2009, 3:15 p.m. UTC | #6
On Thu, Aug 13, 2009 at 01:44:06PM +0300, Avi Kivity wrote:
> On 08/13/2009 01:09 PM, Gleb Natapov wrote:
>>> There's also srcu.
>>>      
>> What are the disadvantages? There should be some, otherwise why not use
>> it all the time.
>
> I think it incurs an atomic op in the read path, but not much overhead 
> otherwise.  Paul?

There are not atomic operations in srcu_read_lock():

	int srcu_read_lock(struct srcu_struct *sp)
	{
		int idx;

		preempt_disable();
		idx = sp->completed & 0x1;
		barrier();  /* ensure compiler looks -once- at sp->completed. */
		per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++;
		srcu_barrier();  /* ensure compiler won't misorder critical section. */
		preempt_enable();
		return idx;
	}

There is a preempt_disable() and a preempt_enable(), which
non-atomically manipulate a field in the thread_info structure.
There is a barrier() and an srcu_barrier(), which are just compiler
directives (no code generated).  Other than that, simple arithmetic
and array accesses.  Shouldn't even be any cache misses in the common
case (the uncommon case being where synchronize_srcu() executing on
some other CPU).

There is even less in srcu_read_unlock():

	void srcu_read_unlock(struct srcu_struct *sp, int idx)
	{
		preempt_disable();
		srcu_barrier();  /* ensure compiler won't misorder critical section. */
		per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
		preempt_enable();
	}

So SRCU should have pretty low overhead.  And, as with other forms
of RCU, legal use of the read-side primitives cannot possibly
participate in deadlocks.

So, to answer the question above, what are the disadvantages?

o	On the update side, synchronize_srcu() does takes some time,
	mostly blocking in synchronize_sched().  So, like other
	forms of RCU, you would use SRCU in read-mostly situations.

o	Just as with RCU, reads and updates run concurrently, with
	all the good and bad that this implies.  For an example
	of the good, srcu_read_lock() executes deterministically,
	no blocking or spinning.  For an example of the bad, there
	is no way to shut down SRCU readers.  These are opposite
	sides of the same coin.  ;-)

o	Although srcu_read_lock() and srcu_read_unlock() are light
	weight, they are expensive compared to other forms of RCU.

o	In contrast to other forms of RCU, SRCU requires that the
	return value from srcu_read_lock() be passed into
	srcu_read_unlock().  Usually not a problem, but does place
	another constraint on the code.

Please keep in mind that I have no idea about what you are thinking of
using SRCU for, so the above advice is necessarily quite generic.  ;-)

							Thanx, Paul
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0ad09f0..4a98314 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -851,8 +851,7 @@  static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
 	r = 0;
 	switch (chip->chip_id) {
 	case KVM_IRQCHIP_IOAPIC:
-		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
-				sizeof(struct kvm_ioapic_state));
+		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
 		break;
 	default:
 		r = -EINVAL;
@@ -868,9 +867,7 @@  static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 	r = 0;
 	switch (chip->chip_id) {
 	case KVM_IRQCHIP_IOAPIC:
-		memcpy(ioapic_irqchip(kvm),
-				&chip->chip.ioapic,
-				sizeof(struct kvm_ioapic_state));
+		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
 		break;
 	default:
 		r = -EINVAL;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index eb2b8b7..96ea7f7 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -38,7 +38,15 @@  static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 	s->isr_ack |= (1 << irq);
 	if (s != &s->pics_state->pics[0])
 		irq += 8;
+	/*
+	 * We are dropping lock while calling ack notifiers since ack
+	 * notifier callbacks for assigned devices call into PIC recursively.
+	 * Other interrupt may be delivered to PIC while lock is dropped but
+	 * it should be safe since PIC state is already updated at this stage.
+	 */
+	spin_unlock(&s->pics_state->lock);
 	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
+	spin_lock(&s->pics_state->lock);
 }
 
 void kvm_pic_clear_isr_ack(struct kvm *kvm)
@@ -176,16 +184,18 @@  int kvm_pic_set_irq(void *opaque, int irq, int level)
 static inline void pic_intack(struct kvm_kpic_state *s, int irq)
 {
 	s->isr |= 1 << irq;
-	if (s->auto_eoi) {
-		if (s->rotate_on_auto_eoi)
-			s->priority_add = (irq + 1) & 7;
-		pic_clear_isr(s, irq);
-	}
 	/*
 	 * We don't clear a level sensitive interrupt here
 	 */
 	if (!(s->elcr & (1 << irq)))
 		s->irr &= ~(1 << irq);
+
+	if (s->auto_eoi) {
+		if (s->rotate_on_auto_eoi)
+			s->priority_add = (irq + 1) & 7;
+		pic_clear_isr(s, irq);
+	}
+
 }
 
 int kvm_pic_read_irq(struct kvm *kvm)
@@ -282,9 +292,9 @@  static void pic_ioport_write(void *opaque, u32 addr, u32 val)
 				priority = get_priority(s, s->isr);
 				if (priority != 8) {
 					irq = (priority + s->priority_add) & 7;
-					pic_clear_isr(s, irq);
 					if (cmd == 5)
 						s->priority_add = (irq + 1) & 7;
+					pic_clear_isr(s, irq);
 					pic_update_irq(s->pics_state);
 				}
 				break;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ce195f8..f24d4d0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -471,11 +471,8 @@  static void apic_set_eoi(struct kvm_lapic *apic)
 		trigger_mode = IOAPIC_LEVEL_TRIG;
 	else
 		trigger_mode = IOAPIC_EDGE_TRIG;
-	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
-		mutex_lock(&apic->vcpu->kvm->irq_lock);
+	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
 		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
-		mutex_unlock(&apic->vcpu->kvm->irq_lock);
-	}
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 850cf56..75dcdad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2023,9 +2023,7 @@  static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 			sizeof(struct kvm_pic_state));
 		break;
 	case KVM_IRQCHIP_IOAPIC:
-		memcpy(&chip->chip.ioapic,
-			ioapic_irqchip(kvm),
-			sizeof(struct kvm_ioapic_state));
+		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
 		break;
 	default:
 		r = -EINVAL;
@@ -2055,11 +2053,7 @@  static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 		spin_unlock(&pic_irqchip(kvm)->lock);
 		break;
 	case KVM_IRQCHIP_IOAPIC:
-		mutex_lock(&kvm->irq_lock);
-		memcpy(ioapic_irqchip(kvm),
-			&chip->chip.ioapic,
-			sizeof(struct kvm_ioapic_state));
-		mutex_unlock(&kvm->irq_lock);
+		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
 		break;
 	default:
 		r = -EINVAL;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index fa05f67..e9de458 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -182,6 +182,7 @@  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 	union kvm_ioapic_redirect_entry entry;
 	int ret = 1;
 
+	mutex_lock(&ioapic->lock);
 	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
 		entry = ioapic->redirtbl[irq];
 		level ^= entry.fields.polarity;
@@ -196,34 +197,51 @@  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 		}
 		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	}
+	mutex_unlock(&ioapic->lock);
+
 	return ret;
 }
 
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
-				    int trigger_mode)
+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
+				     int trigger_mode)
 {
-	union kvm_ioapic_redirect_entry *ent;
+	int i;
+
+	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
 
-	ent = &ioapic->redirtbl[pin];
+		if (ent->fields.vector != vector)
+			continue;
 
-	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
+		/*
+		 * We are dropping lock while calling ack notifiers because ack
+		 * notifier callbacks for assigned devices call into IOAPIC
+		 * recursively. Since remote_irr is cleared only after call
+		 * to notifiers if the same vector will be delivered while lock
+		 * is dropped it will be put into irr and will be delivered
+		 * after ack notifier returns.
+		 */
+		mutex_unlock(&ioapic->lock);
+		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
+		mutex_lock(&ioapic->lock);
+
+		if (trigger_mode != IOAPIC_LEVEL_TRIG)
+			continue;
 
-	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
 		ent->fields.remote_irr = 0;
-		if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
-			ioapic_service(ioapic, pin);
+		if (!ent->fields.mask && (ioapic->irr & (1 << i)))
+			ioapic_service(ioapic, i);
 	}
 }
 
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-	int i;
 
-	for (i = 0; i < IOAPIC_NUM_PINS; i++)
-		if (ioapic->redirtbl[i].fields.vector == vector)
-			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
+	mutex_lock(&ioapic->lock);
+	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
+	mutex_unlock(&ioapic->lock);
 }
 
 static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
@@ -248,8 +266,8 @@  static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
-	mutex_lock(&ioapic->kvm->irq_lock);
 	addr &= 0xff;
+	mutex_lock(&ioapic->lock);
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
 		result = ioapic->ioregsel;
@@ -263,6 +281,8 @@  static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 		result = 0;
 		break;
 	}
+	mutex_unlock(&ioapic->lock);
+
 	switch (len) {
 	case 8:
 		*(u64 *) val = result;
@@ -275,7 +295,6 @@  static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	default:
 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
 	}
-	mutex_unlock(&ioapic->kvm->irq_lock);
 	return 0;
 }
 
@@ -291,15 +310,15 @@  static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 		     (void*)addr, len, val);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
-	mutex_lock(&ioapic->kvm->irq_lock);
 	if (len == 4 || len == 8)
 		data = *(u32 *) val;
 	else {
 		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
-		goto unlock;
+		return 0;
 	}
 
 	addr &= 0xff;
+	mutex_lock(&ioapic->lock);
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
 		ioapic->ioregsel = data;
@@ -310,15 +329,14 @@  static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 		break;
 #ifdef	CONFIG_IA64
 	case IOAPIC_REG_EOI:
-		kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
+		__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
 		break;
 #endif
 
 	default:
 		break;
 	}
-unlock:
-	mutex_unlock(&ioapic->kvm->irq_lock);
+	mutex_unlock(&ioapic->lock);
 	return 0;
 }
 
@@ -347,6 +365,7 @@  int kvm_ioapic_init(struct kvm *kvm)
 	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
 	if (!ioapic)
 		return -ENOMEM;
+	mutex_init(&ioapic->lock);
 	kvm->arch.vioapic = ioapic;
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
@@ -358,3 +377,26 @@  int kvm_ioapic_init(struct kvm *kvm)
 	return ret;
 }
 
+int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+{
+	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+	if (!ioapic)
+		return -EINVAL;
+
+	mutex_lock(&ioapic->lock);
+	memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
+	mutex_unlock(&ioapic->lock);
+	return 0;
+}
+
+int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+{
+	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+	if (!ioapic)
+		return -EINVAL;
+
+	mutex_lock(&ioapic->lock);
+	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
+	mutex_unlock(&ioapic->lock);
+	return 0;
+}
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 7080b71..9de8272 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -44,6 +44,7 @@  struct kvm_ioapic {
 	struct kvm_io_device dev;
 	struct kvm *kvm;
 	void (*ack_notifier)(void *opaque, int irq);
+	struct mutex lock;
 };
 
 #ifdef DEBUG
@@ -73,4 +74,7 @@  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq);
+int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+
 #endif