diff mbox

[4/4] KVM: switch irq injection/acking to irq_lock

Message ID 20090518170855.597124743@localhost.localdomain (mailing list archive)
State New, archived
Headers show

Commit Message

Marcelo Tosatti May 18, 2009, 4:56 p.m. UTC
Switch irq injection/acking to irq_lock, and change PIO/MMIO paths 
so that the device search is protected by kvm->lock, but not the
read/write callbacks (which is responsability of the device).

Fix for http://article.gmane.org/gmane.comp.emulators.kvm.devel/32286.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Comments

Gregory Haskins May 20, 2009, 12:15 p.m. UTC | #1
Marcelo Tosatti wrote:
> Switch irq injection/acking to irq_lock, and change PIO/MMIO paths 
> so that the device search is protected by kvm->lock, but not the
> read/write callbacks (which is responsability of the device).
>
> Fix for http://article.gmane.org/gmane.comp.emulators.kvm.devel/32286.
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>
> Index: kvm/arch/x86/kvm/i8254.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/i8254.c
> +++ kvm/arch/x86/kvm/i8254.c
> @@ -634,10 +634,10 @@ static void __inject_pit_timer_intr(stru
>  	struct kvm_vcpu *vcpu;
>  	int i;
>  
> -	mutex_lock(&kvm->lock);
> +	mutex_lock(&kvm->irq_lock);
>   

There would be advantages to having irq_lock be
interrupt/nonpreempt-friendly design, such as s/mutex/spinlock.  For
instance, irqfd could inject the interrupt directly without deferring to
a workqueue.   I'm not sure if this is possible/easy, but its something
to consider.

-Greg

>  	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
>  	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
> -	mutex_unlock(&kvm->lock);
> +	mutex_unlock(&kvm->irq_lock);
>  
>  	/*
>  	 * Provides NMI watchdog support via Virtual Wire mode.
> Index: kvm/arch/x86/kvm/x86.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/x86.c
> +++ kvm/arch/x86/kvm/x86.c
> @@ -1909,10 +1909,10 @@ long kvm_arch_vm_ioctl(struct file *filp
>  			goto out;
>  		if (irqchip_in_kernel(kvm)) {
>  			__s32 status;
> -			mutex_lock(&kvm->lock);
> +			mutex_lock(&kvm->irq_lock);
>  			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
>  					irq_event.irq, irq_event.level);
> -			mutex_unlock(&kvm->lock);
> +			mutex_unlock(&kvm->irq_lock);
>  			if (ioctl == KVM_IRQ_LINE_STATUS) {
>  				irq_event.status = status;
>  				if (copy_to_user(argp, &irq_event,
> @@ -2158,12 +2158,11 @@ mmio:
>  	 */
>  	mutex_lock(&vcpu->kvm->lock);
>  	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
> +	mutex_unlock(&vcpu->kvm->lock);
>  	if (mmio_dev) {
>  		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
> -		mutex_unlock(&vcpu->kvm->lock);
>  		return X86EMUL_CONTINUE;
>  	}
> -	mutex_unlock(&vcpu->kvm->lock);
>  
>  	vcpu->mmio_needed = 1;
>
>  	vcpu->mmio_phys_addr = gpa;
> @@ -2541,7 +2540,6 @@ static void kernel_pio(struct kvm_io_dev
>  {
>  	/* TODO: String I/O for in kernel device */
>  
> -	mutex_lock(&vcpu->kvm->lock);
>  	if (vcpu->arch.pio.in)
>  		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
>  				  vcpu->arch.pio.size,
> @@ -2550,7 +2548,6 @@ static void kernel_pio(struct kvm_io_dev
>  		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
>  				   vcpu->arch.pio.size,
>  				   pd);
> -	mutex_unlock(&vcpu->kvm->lock);
>  }
>  
>  static void pio_string_write(struct kvm_io_device *pio_dev,
> @@ -2560,14 +2557,12 @@ static void pio_string_write(struct kvm_
>  	void *pd = vcpu->arch.pio_data;
>  	int i;
>  
> -	mutex_lock(&vcpu->kvm->lock);
>  	for (i = 0; i < io->cur_count; i++) {
>  		kvm_iodevice_write(pio_dev, io->port,
>  				   io->size,
>  				   pd);
>  		pd += io->size;
>  	}
> -	mutex_unlock(&vcpu->kvm->lock);
>  }
>  
>  static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
> @@ -2604,7 +2599,9 @@ int kvm_emulate_pio(struct kvm_vcpu *vcp
>  	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
>  	memcpy(vcpu->arch.pio_data, &val, 4);
>  
> +	mutex_lock(&vcpu->kvm->lock);
>  	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
> +	mutex_unlock(&vcpu->kvm->lock);
>  	if (pio_dev) {
>  		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
>  		complete_pio(vcpu);
> @@ -2668,9 +2665,11 @@ int kvm_emulate_pio_string(struct kvm_vc
>  
>  	vcpu->arch.pio.guest_gva = address;
>  
> +	mutex_lock(&vcpu->kvm->lock);
>  	pio_dev = vcpu_find_pio_dev(vcpu, port,
>  				    vcpu->arch.pio.cur_count,
>  				    !vcpu->arch.pio.in);
> +	mutex_unlock(&vcpu->kvm->lock);
>  	if (!vcpu->arch.pio.in) {
>  		/* string PIO write */
>  		ret = pio_copy_data(vcpu);
> Index: kvm/virt/kvm/kvm_main.c
> ===================================================================
> --- kvm.orig/virt/kvm/kvm_main.c
> +++ kvm/virt/kvm/kvm_main.c
> @@ -62,6 +62,12 @@
>  MODULE_AUTHOR("Qumranet");
>  MODULE_LICENSE("GPL");
>  
> +/*
> + * Ordering of locks:
> + *
> + * 		kvm->lock --> kvm->irq_lock
> + */
> +
>  DEFINE_SPINLOCK(kvm_lock);
>  LIST_HEAD(vm_list);
>  
> @@ -126,11 +132,7 @@ static void kvm_assigned_dev_interrupt_w
>  				    interrupt_work);
>  	kvm = assigned_dev->kvm;
>  
> -	/* This is taken to safely inject irq inside the guest. When
> -	 * the interrupt injection (or the ioapic code) uses a
> -	 * finer-grained lock, update this
> -	 */
> -	mutex_lock(&kvm->lock);
> +	mutex_lock(&kvm->irq_lock);
>  	spin_lock_irq(&assigned_dev->assigned_dev_lock);
>  	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
>  		struct kvm_guest_msix_entry *guest_entries =
> @@ -159,7 +161,7 @@ static void kvm_assigned_dev_interrupt_w
>  	}
>  
>  	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
> -	mutex_unlock(&assigned_dev->kvm->lock);
> +	mutex_unlock(&assigned_dev->kvm->irq_lock);
>  }
>  
>  static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
> @@ -215,7 +217,7 @@ static void kvm_assigned_dev_ack_irq(str
>  static void deassign_guest_irq(struct kvm *kvm,
>  			       struct kvm_assigned_dev_kernel *assigned_dev)
>  {
> -	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
> +	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
>  	assigned_dev->ack_notifier.gsi = -1;
>  
>  	if (assigned_dev->irq_source_id != -1)
> Index: kvm/virt/kvm/irq_comm.c
> ===================================================================
> --- kvm.orig/virt/kvm/irq_comm.c
> +++ kvm/virt/kvm/irq_comm.c
> @@ -62,6 +62,8 @@ int kvm_irq_delivery_to_apic(struct kvm 
>  	int i, r = -1;
>  	struct kvm_vcpu *vcpu, *lowest = NULL;
>  
> +	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> +
>  	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
>  			kvm_is_dm_lowest_prio(irq))
>  		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
> @@ -113,7 +115,7 @@ static int kvm_set_msi(struct kvm_kernel
>  	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
>  }
>  
> -/* This should be called with the kvm->lock mutex held
> +/* This should be called with the kvm->irq_lock mutex held
>   * Return value:
>   *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
>   *  = 0   Interrupt was coalesced (previous irq is still pending)
> @@ -125,6 +127,8 @@ int kvm_set_irq(struct kvm *kvm, int irq
>  	unsigned long *irq_state, sig_level;
>  	int ret = -1;
>  
> +	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
> +
>  	if (irq < KVM_IOAPIC_NUM_PINS) {
>  		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
>  
> @@ -174,19 +178,26 @@ void kvm_notify_acked_irq(struct kvm *kv
>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
>  				   struct kvm_irq_ack_notifier *kian)
>  {
> +	mutex_lock(&kvm->irq_lock);
>  	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
> +	mutex_unlock(&kvm->irq_lock);
>  }
>  
> -void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
> +void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> +				    struct kvm_irq_ack_notifier *kian)
>  {
> +	mutex_lock(&kvm->irq_lock);
>  	hlist_del_init(&kian->link);
> +	mutex_unlock(&kvm->irq_lock);
>  }
>  
> -/* The caller must hold kvm->lock mutex */
>  int kvm_request_irq_source_id(struct kvm *kvm)
>  {
>  	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
> -	int irq_source_id = find_first_zero_bit(bitmap,
> +	int irq_source_id;
> +
> +	mutex_lock(&kvm->irq_lock);
> +	irq_source_id = find_first_zero_bit(bitmap,
>  				sizeof(kvm->arch.irq_sources_bitmap));
>  
>  	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
> @@ -196,6 +207,7 @@ int kvm_request_irq_source_id(struct kvm
>  
>  	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
>  	set_bit(irq_source_id, bitmap);
> +	mutex_unlock(&kvm->irq_lock);
>  
>  	return irq_source_id;
>  }
> @@ -206,6 +218,7 @@ void kvm_free_irq_source_id(struct kvm *
>  
>  	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
>  
> +	mutex_lock(&kvm->irq_lock);
>  	if (irq_source_id < 0 ||
>  	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
>  		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
> @@ -214,19 +227,24 @@ void kvm_free_irq_source_id(struct kvm *
>  	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
>  		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
>  	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
> +	mutex_unlock(&kvm->irq_lock);
>  }
>  
>  void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
>  				    struct kvm_irq_mask_notifier *kimn)
>  {
> +	mutex_lock(&kvm->irq_lock);
>  	kimn->irq = irq;
>  	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
> +	mutex_unlock(&kvm->irq_lock);
>  }
>  
>  void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
>  				      struct kvm_irq_mask_notifier *kimn)
>  {
> +	mutex_lock(&kvm->irq_lock);
>  	hlist_del(&kimn->link);
> +	mutex_unlock(&kvm->irq_lock);
>  }
>  
>  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
> @@ -234,6 +252,8 @@ void kvm_fire_mask_notifiers(struct kvm 
>  	struct kvm_irq_mask_notifier *kimn;
>  	struct hlist_node *n;
>  
> +	WARN_ON(!mutex_is_locked(&kvm->lock));
> +
>  	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
>  		if (kimn->irq == irq)
>  			kimn->func(kimn, mask);
> @@ -249,7 +269,9 @@ static void __kvm_free_irq_routing(struc
>  
>  void kvm_free_irq_routing(struct kvm *kvm)
>  {
> +	mutex_lock(&kvm->irq_lock);
>  	__kvm_free_irq_routing(&kvm->irq_routing);
> +	mutex_unlock(&kvm->irq_lock);
>  }
>  
>  static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
> @@ -323,13 +345,13 @@ int kvm_set_irq_routing(struct kvm *kvm,
>  		e = NULL;
>  	}
>  
> -	mutex_lock(&kvm->lock);
> +	mutex_lock(&kvm->irq_lock);
>  	list_splice(&kvm->irq_routing, &tmp);
>  	INIT_LIST_HEAD(&kvm->irq_routing);
>  	list_splice(&irq_list, &kvm->irq_routing);
>  	INIT_LIST_HEAD(&irq_list);
>  	list_splice(&tmp, &irq_list);
> -	mutex_unlock(&kvm->lock);
> +	mutex_unlock(&kvm->irq_lock);
>  
>  	r = 0;
>  
> Index: kvm/arch/x86/kvm/lapic.c
> ===================================================================
> --- kvm.orig/arch/x86/kvm/lapic.c
> +++ kvm/arch/x86/kvm/lapic.c
> @@ -425,7 +425,9 @@ static void apic_set_eoi(struct kvm_lapi
>  		trigger_mode = IOAPIC_LEVEL_TRIG;
>  	else
>  		trigger_mode = IOAPIC_EDGE_TRIG;
> +	mutex_lock(&apic->vcpu->kvm->irq_lock);
>  	kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
> +	mutex_unlock(&apic->vcpu->kvm->irq_lock);
>  }
>  
>  static void apic_send_ipi(struct kvm_lapic *apic)
> @@ -449,7 +451,9 @@ static void apic_send_ipi(struct kvm_lap
>  		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
>  		   irq.vector);
>  
> +	mutex_lock(&apic->vcpu->kvm->irq_lock);
>  	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
> +	mutex_unlock(&apic->vcpu->kvm->irq_lock);
>  }
>  
>  static u32 apic_get_tmcct(struct kvm_lapic *apic)
> Index: kvm/include/linux/kvm_host.h
> ===================================================================
> --- kvm.orig/include/linux/kvm_host.h
> +++ kvm/include/linux/kvm_host.h
> @@ -375,7 +375,8 @@ int kvm_set_irq(struct kvm *kvm, int irq
>  void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
>  void kvm_register_irq_ack_notifier(struct kvm *kvm,
>  				   struct kvm_irq_ack_notifier *kian);
> -void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
> +void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
> +				   struct kvm_irq_ack_notifier *kian);
>  int kvm_request_irq_source_id(struct kvm *kvm);
>  void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
>  
>
>
Marcelo Tosatti May 20, 2009, 2:16 p.m. UTC | #2
On Wed, May 20, 2009 at 08:15:55AM -0400, Gregory Haskins wrote:
> Marcelo Tosatti wrote:
> > Switch irq injection/acking to irq_lock, and change PIO/MMIO paths 
> > so that the device search is protected by kvm->lock, but not the
> > read/write callbacks (which is responsability of the device).
> >
> > Fix for http://article.gmane.org/gmane.comp.emulators.kvm.devel/32286.
> >
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> >
> > Index: kvm/arch/x86/kvm/i8254.c
> > ===================================================================
> > --- kvm.orig/arch/x86/kvm/i8254.c
> > +++ kvm/arch/x86/kvm/i8254.c
> > @@ -634,10 +634,10 @@ static void __inject_pit_timer_intr(stru
> >  	struct kvm_vcpu *vcpu;
> >  	int i;
> >  
> > -	mutex_lock(&kvm->lock);
> > +	mutex_lock(&kvm->irq_lock);
> >   
> 
> There would be advantages to having irq_lock be
> interrupt/nonpreempt-friendly design, such as s/mutex/spinlock.  For
> instance, irqfd could inject the interrupt directly without deferring to
> a workqueue.   I'm not sure if this is possible/easy, but its something
> to consider.

It is possible to convert to a spinlock (and required to switch
bus->devs[] to be protected by RCU as discussed).

Now if its worth to make it interrupt safe depends on how bad disabling
interrupts on the non-interrupt-disabled paths is. I've seen kvm_set_irq
relatively high in profiling yesterday, BTW, but this is just
hand-waving..
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity May 24, 2009, 2:53 p.m. UTC | #3
Gregory Haskins wrote:
> Marcelo Tosatti wrote:
>   
>> Switch irq injection/acking to irq_lock, and change PIO/MMIO paths 
>> so that the device search is protected by kvm->lock, but not the
>> read/write callbacks (which is responsability of the device).
>>
>> Fix for http://article.gmane.org/gmane.comp.emulators.kvm.devel/32286.
>>
>> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>>
>> Index: kvm/arch/x86/kvm/i8254.c
>> ===================================================================
>> --- kvm.orig/arch/x86/kvm/i8254.c
>> +++ kvm/arch/x86/kvm/i8254.c
>> @@ -634,10 +634,10 @@ static void __inject_pit_timer_intr(stru
>>  	struct kvm_vcpu *vcpu;
>>  	int i;
>>  
>> -	mutex_lock(&kvm->lock);
>> +	mutex_lock(&kvm->irq_lock);
>>   
>>     
>
> There would be advantages to having irq_lock be
> interrupt/nonpreempt-friendly design, such as s/mutex/spinlock.  For
> instance, irqfd could inject the interrupt directly without deferring to
> a workqueue.   I'm not sure if this is possible/easy, but its something
> to consider.
>   

I agree, but let's make the first step splitting the interrupt and a 
second step changing it to a spinlock.  It's scary enough as is.
diff mbox

Patch

Index: kvm/arch/x86/kvm/i8254.c
===================================================================
--- kvm.orig/arch/x86/kvm/i8254.c
+++ kvm/arch/x86/kvm/i8254.c
@@ -634,10 +634,10 @@  static void __inject_pit_timer_intr(stru
 	struct kvm_vcpu *vcpu;
 	int i;
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 
 	/*
 	 * Provides NMI watchdog support via Virtual Wire mode.
Index: kvm/arch/x86/kvm/x86.c
===================================================================
--- kvm.orig/arch/x86/kvm/x86.c
+++ kvm/arch/x86/kvm/x86.c
@@ -1909,10 +1909,10 @@  long kvm_arch_vm_ioctl(struct file *filp
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			__s32 status;
-			mutex_lock(&kvm->lock);
+			mutex_lock(&kvm->irq_lock);
 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 					irq_event.irq, irq_event.level);
-			mutex_unlock(&kvm->lock);
+			mutex_unlock(&kvm->irq_lock);
 			if (ioctl == KVM_IRQ_LINE_STATUS) {
 				irq_event.status = status;
 				if (copy_to_user(argp, &irq_event,
@@ -2158,12 +2158,11 @@  mmio:
 	 */
 	mutex_lock(&vcpu->kvm->lock);
 	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (mmio_dev) {
 		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
-		mutex_unlock(&vcpu->kvm->lock);
 		return X86EMUL_CONTINUE;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 
 	vcpu->mmio_needed = 1;

 	vcpu->mmio_phys_addr = gpa;
@@ -2541,7 +2540,6 @@  static void kernel_pio(struct kvm_io_dev
 {
 	/* TODO: String I/O for in kernel device */
 
-	mutex_lock(&vcpu->kvm->lock);
 	if (vcpu->arch.pio.in)
 		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
 				  vcpu->arch.pio.size,
@@ -2550,7 +2548,6 @@  static void kernel_pio(struct kvm_io_dev
 		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
 				   vcpu->arch.pio.size,
 				   pd);
-	mutex_unlock(&vcpu->kvm->lock);
 }
 
 static void pio_string_write(struct kvm_io_device *pio_dev,
@@ -2560,14 +2557,12 @@  static void pio_string_write(struct kvm_
 	void *pd = vcpu->arch.pio_data;
 	int i;
 
-	mutex_lock(&vcpu->kvm->lock);
 	for (i = 0; i < io->cur_count; i++) {
 		kvm_iodevice_write(pio_dev, io->port,
 				   io->size,
 				   pd);
 		pd += io->size;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 }
 
 static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@@ -2604,7 +2599,9 @@  int kvm_emulate_pio(struct kvm_vcpu *vcp
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
 
+	mutex_lock(&vcpu->kvm->lock);
 	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (pio_dev) {
 		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
 		complete_pio(vcpu);
@@ -2668,9 +2665,11 @@  int kvm_emulate_pio_string(struct kvm_vc
 
 	vcpu->arch.pio.guest_gva = address;
 
+	mutex_lock(&vcpu->kvm->lock);
 	pio_dev = vcpu_find_pio_dev(vcpu, port,
 				    vcpu->arch.pio.cur_count,
 				    !vcpu->arch.pio.in);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (!vcpu->arch.pio.in) {
 		/* string PIO write */
 		ret = pio_copy_data(vcpu);
Index: kvm/virt/kvm/kvm_main.c
===================================================================
--- kvm.orig/virt/kvm/kvm_main.c
+++ kvm/virt/kvm/kvm_main.c
@@ -62,6 +62,12 @@ 
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
+/*
+ * Ordering of locks:
+ *
+ * 		kvm->lock --> kvm->irq_lock
+ */
+
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
@@ -126,11 +132,7 @@  static void kvm_assigned_dev_interrupt_w
 				    interrupt_work);
 	kvm = assigned_dev->kvm;
 
-	/* This is taken to safely inject irq inside the guest. When
-	 * the interrupt injection (or the ioapic code) uses a
-	 * finer-grained lock, update this
-	 */
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	spin_lock_irq(&assigned_dev->assigned_dev_lock);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
@@ -159,7 +161,7 @@  static void kvm_assigned_dev_interrupt_w
 	}
 
 	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
-	mutex_unlock(&assigned_dev->kvm->lock);
+	mutex_unlock(&assigned_dev->kvm->irq_lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -215,7 +217,7 @@  static void kvm_assigned_dev_ack_irq(str
 static void deassign_guest_irq(struct kvm *kvm,
 			       struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
 	assigned_dev->ack_notifier.gsi = -1;
 
 	if (assigned_dev->irq_source_id != -1)
Index: kvm/virt/kvm/irq_comm.c
===================================================================
--- kvm.orig/virt/kvm/irq_comm.c
+++ kvm/virt/kvm/irq_comm.c
@@ -62,6 +62,8 @@  int kvm_irq_delivery_to_apic(struct kvm 
 	int i, r = -1;
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
 			kvm_is_dm_lowest_prio(irq))
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
@@ -113,7 +115,7 @@  static int kvm_set_msi(struct kvm_kernel
 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
-/* This should be called with the kvm->lock mutex held
+/* This should be called with the kvm->irq_lock mutex held
  * Return value:
  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
  *  = 0   Interrupt was coalesced (previous irq is still pending)
@@ -125,6 +127,8 @@  int kvm_set_irq(struct kvm *kvm, int irq
 	unsigned long *irq_state, sig_level;
 	int ret = -1;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	if (irq < KVM_IOAPIC_NUM_PINS) {
 		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
 
@@ -174,19 +178,26 @@  void kvm_notify_acked_irq(struct kvm *kv
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
 }
 
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_del_init(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
 }
 
-/* The caller must hold kvm->lock mutex */
 int kvm_request_irq_source_id(struct kvm *kvm)
 {
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
-	int irq_source_id = find_first_zero_bit(bitmap,
+	int irq_source_id;
+
+	mutex_lock(&kvm->irq_lock);
+	irq_source_id = find_first_zero_bit(bitmap,
 				sizeof(kvm->arch.irq_sources_bitmap));
 
 	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
@@ -196,6 +207,7 @@  int kvm_request_irq_source_id(struct kvm
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 	set_bit(irq_source_id, bitmap);
+	mutex_unlock(&kvm->irq_lock);
 
 	return irq_source_id;
 }
@@ -206,6 +218,7 @@  void kvm_free_irq_source_id(struct kvm *
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 
+	mutex_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
@@ -214,19 +227,24 @@  void kvm_free_irq_source_id(struct kvm *
 	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
 		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 				    struct kvm_irq_mask_notifier *kimn)
 {
+	mutex_lock(&kvm->irq_lock);
 	kimn->irq = irq;
 	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_del(&kimn->link);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@@ -234,6 +252,8 @@  void kvm_fire_mask_notifiers(struct kvm 
 	struct kvm_irq_mask_notifier *kimn;
 	struct hlist_node *n;
 
+	WARN_ON(!mutex_is_locked(&kvm->lock));
+
 	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
 		if (kimn->irq == irq)
 			kimn->func(kimn, mask);
@@ -249,7 +269,9 @@  static void __kvm_free_irq_routing(struc
 
 void kvm_free_irq_routing(struct kvm *kvm)
 {
+	mutex_lock(&kvm->irq_lock);
 	__kvm_free_irq_routing(&kvm->irq_routing);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
@@ -323,13 +345,13 @@  int kvm_set_irq_routing(struct kvm *kvm,
 		e = NULL;
 	}
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	list_splice(&kvm->irq_routing, &tmp);
 	INIT_LIST_HEAD(&kvm->irq_routing);
 	list_splice(&irq_list, &kvm->irq_routing);
 	INIT_LIST_HEAD(&irq_list);
 	list_splice(&tmp, &irq_list);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 
 	r = 0;
 
Index: kvm/arch/x86/kvm/lapic.c
===================================================================
--- kvm.orig/arch/x86/kvm/lapic.c
+++ kvm/arch/x86/kvm/lapic.c
@@ -425,7 +425,9 @@  static void apic_set_eoi(struct kvm_lapi
 		trigger_mode = IOAPIC_LEVEL_TRIG;
 	else
 		trigger_mode = IOAPIC_EDGE_TRIG;
+	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
@@ -449,7 +451,9 @@  static void apic_send_ipi(struct kvm_lap
 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
 		   irq.vector);
 
+	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
Index: kvm/include/linux/kvm_host.h
===================================================================
--- kvm.orig/include/linux/kvm_host.h
+++ kvm/include/linux/kvm_host.h
@@ -375,7 +375,8 @@  int kvm_set_irq(struct kvm *kvm, int irq
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);