diff mbox

Move irq sharing information to irqchip level.

Message ID 20090813171910.GA4636@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gleb Natapov Aug. 13, 2009, 5:19 p.m. UTC
This removes assumptions that max GSIs is smaller than number of pins.
Sharing is tracked on pin level not GSI level.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Avi Kivity Aug. 17, 2009, 9:35 a.m. UTC | #1
On 08/13/2009 08:19 PM, Gleb Natapov wrote:
> This removes assumptions that max GSIs is smaller than number of pins.
> Sharing is tracked on pin level not GSI level.
>
> Signed-off-by: Gleb Natapov<gleb@redhat.com>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b17d845..4c15bdd 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -413,7 +413,6 @@ struct kvm_arch{
>   	gpa_t ept_identity_map_addr;
>
>   	unsigned long irq_sources_bitmap;
> -	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
>   	u64 vm_init_tsc;
>   };
>
> diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
> index 7d6058a..c025a23 100644
> --- a/arch/x86/kvm/irq.h
> +++ b/arch/x86/kvm/irq.h
> @@ -71,6 +71,7 @@ struct kvm_pic {
>   	int output;		/* intr from master PIC */
>   	struct kvm_io_device dev;
>   	void (*ack_notifier)(void *opaque, int irq);
> +	unsigned long irq_states[16];
>   };
>
>    

I think it's cleaner to move this into the routing table to avoid 
duplication.  Currently there is no object which is unique to a gsi, but 
after your array patch, it can be placed next to the hlist_head.
Gleb Natapov Aug. 21, 2009, 6:16 p.m. UTC | #2
On Mon, Aug 17, 2009 at 12:35:47PM +0300, Avi Kivity wrote:
> On 08/13/2009 08:19 PM, Gleb Natapov wrote:
> >This removes assumptions that max GSIs is smaller than number of pins.
> >Sharing is tracked on pin level not GSI level.
> >
> >Signed-off-by: Gleb Natapov<gleb@redhat.com>
> >diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >index b17d845..4c15bdd 100644
> >--- a/arch/x86/include/asm/kvm_host.h
> >+++ b/arch/x86/include/asm/kvm_host.h
> >@@ -413,7 +413,6 @@ struct kvm_arch{
> >  	gpa_t ept_identity_map_addr;
> >
> >  	unsigned long irq_sources_bitmap;
> >-	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
> >  	u64 vm_init_tsc;
> >  };
> >
> >diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
> >index 7d6058a..c025a23 100644
> >--- a/arch/x86/kvm/irq.h
> >+++ b/arch/x86/kvm/irq.h
> >@@ -71,6 +71,7 @@ struct kvm_pic {
> >  	int output;		/* intr from master PIC */
> >  	struct kvm_io_device dev;
> >  	void (*ack_notifier)(void *opaque, int irq);
> >+	unsigned long irq_states[16];
> >  };
> >
> 
> I think it's cleaner to move this into the routing table to avoid
> duplication.  Currently there is no object which is unique to a gsi,
> but after your array patch, it can be placed next to the hlist_head.
> 
The problem is that at this level it is not known if GSI is MSI or not.
Current code dials with this by arbitrary assuming that GSI is MSI if it
is greater then KVM_IOAPIC_NUM_PINS, but this is not enforced when
routing table is installed and userspace is free to use any GSI as MSI.
Of cause this problem would not exist if MSIs were not linked to GSIs
in the first place.

The code duplication is very small (one line of code) and logically it
is more correct to track duplications at irqchip level. Theoretically
GSI may be shared when PIC is used and not shared when IOAPIC is used.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Aug. 23, 2009, 11:28 a.m. UTC | #3
On 08/21/2009 09:16 PM, Gleb Natapov wrote:
> On Mon, Aug 17, 2009 at 12:35:47PM +0300, Avi Kivity wrote:
>    
>> On 08/13/2009 08:19 PM, Gleb Natapov wrote:
>>      
>>> This removes assumptions that max GSIs is smaller than number of pins.
>>> Sharing is tracked on pin level not GSI level.
>>>
>>> Signed-off-by: Gleb Natapov<gleb@redhat.com>
>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>> index b17d845..4c15bdd 100644
>>> --- a/arch/x86/include/asm/kvm_host.h
>>> +++ b/arch/x86/include/asm/kvm_host.h
>>> @@ -413,7 +413,6 @@ struct kvm_arch{
>>>   	gpa_t ept_identity_map_addr;
>>>
>>>   	unsigned long irq_sources_bitmap;
>>> -	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
>>>   	u64 vm_init_tsc;
>>>   };
>>>
>>> diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
>>> index 7d6058a..c025a23 100644
>>> --- a/arch/x86/kvm/irq.h
>>> +++ b/arch/x86/kvm/irq.h
>>> @@ -71,6 +71,7 @@ struct kvm_pic {
>>>   	int output;		/* intr from master PIC */
>>>   	struct kvm_io_device dev;
>>>   	void (*ack_notifier)(void *opaque, int irq);
>>> +	unsigned long irq_states[16];
>>>   };
>>>
>>>        
>> I think it's cleaner to move this into the routing table to avoid
>> duplication.  Currently there is no object which is unique to a gsi,
>> but after your array patch, it can be placed next to the hlist_head.
>>
>>      
> The problem is that at this level it is not known if GSI is MSI or not.
> Current code dials with this by arbitrary assuming that GSI is MSI if it
> is greater then KVM_IOAPIC_NUM_PINS, but this is not enforced when
> routing table is installed and userspace is free to use any GSI as MSI.
> Of cause this problem would not exist if MSIs were not linked to GSIs
> in the first place.
>    

Does msi actually care about the states?  I don't think it does.

> The code duplication is very small (one line of code) and logically it
> is more correct to track duplications at irqchip level. Theoretically
> GSI may be shared when PIC is used and not shared when IOAPIC is used.
>
>    

No, it's more correct at the GSI level, since all interrupt lines 
connected to one GSI are shared.
Gleb Natapov Aug. 23, 2009, 11:36 a.m. UTC | #4
On Sun, Aug 23, 2009 at 02:28:24PM +0300, Avi Kivity wrote:
> On 08/21/2009 09:16 PM, Gleb Natapov wrote:
> >On Mon, Aug 17, 2009 at 12:35:47PM +0300, Avi Kivity wrote:
> >>On 08/13/2009 08:19 PM, Gleb Natapov wrote:
> >>>This removes assumptions that max GSIs is smaller than number of pins.
> >>>Sharing is tracked on pin level not GSI level.
> >>>
> >>>Signed-off-by: Gleb Natapov<gleb@redhat.com>
> >>>diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> >>>index b17d845..4c15bdd 100644
> >>>--- a/arch/x86/include/asm/kvm_host.h
> >>>+++ b/arch/x86/include/asm/kvm_host.h
> >>>@@ -413,7 +413,6 @@ struct kvm_arch{
> >>>  	gpa_t ept_identity_map_addr;
> >>>
> >>>  	unsigned long irq_sources_bitmap;
> >>>-	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
> >>>  	u64 vm_init_tsc;
> >>>  };
> >>>
> >>>diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
> >>>index 7d6058a..c025a23 100644
> >>>--- a/arch/x86/kvm/irq.h
> >>>+++ b/arch/x86/kvm/irq.h
> >>>@@ -71,6 +71,7 @@ struct kvm_pic {
> >>>  	int output;		/* intr from master PIC */
> >>>  	struct kvm_io_device dev;
> >>>  	void (*ack_notifier)(void *opaque, int irq);
> >>>+	unsigned long irq_states[16];
> >>>  };
> >>>
> >>I think it's cleaner to move this into the routing table to avoid
> >>duplication.  Currently there is no object which is unique to a gsi,
> >>but after your array patch, it can be placed next to the hlist_head.
> >>
> >The problem is that at this level it is not known if GSI is MSI or not.
> >Current code dials with this by arbitrary assuming that GSI is MSI if it
> >is greater then KVM_IOAPIC_NUM_PINS, but this is not enforced when
> >routing table is installed and userspace is free to use any GSI as MSI.
> >Of cause this problem would not exist if MSIs were not linked to GSIs
> >in the first place.
> 
> Does msi actually care about the states?  I don't think it does.
> 
That is the point. MSI doesn't care, but we don't know if GSI is MSI or
not.

> >The code duplication is very small (one line of code) and logically it
> >is more correct to track duplications at irqchip level. Theoretically
> >GSI may be shared when PIC is used and not shared when IOAPIC is used.
> >
> 
> No, it's more correct at the GSI level, since all interrupt lines
> connected to one GSI are shared.
> 
Why? All interrupts connected to the same irqchip pin (through pci irq
router or other onboard logic) are shared. GSI is pure logical
abstraction.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Aug. 23, 2009, 11:46 a.m. UTC | #5
On 08/23/2009 02:36 PM, Gleb Natapov wrote:
>>>
>>> The problem is that at this level it is not known if GSI is MSI or not.
>>> Current code dials with this by arbitrary assuming that GSI is MSI if it
>>> is greater then KVM_IOAPIC_NUM_PINS, but this is not enforced when
>>> routing table is installed and userspace is free to use any GSI as MSI.
>>> Of cause this problem would not exist if MSIs were not linked to GSIs
>>> in the first place.
>>>        
>> Does msi actually care about the states?  I don't think it does.
>>
>>      
> That is the point. MSI doesn't care, but we don't know if GSI is MSI or
> not.
>    

So we toggle the bits needlessly.  Just like with edge-triggered interrupts.
>> No, it's more correct at the GSI level, since all interrupt lines
>> connected to one GSI are shared.
>>
>>      
> Why? All interrupts connected to the same irqchip pin (through pci irq
> router or other onboard logic) are shared. GSI is pure logical
> abstraction.
>    

All interrupts connected through one GSI are shared, so why not keep the 
level information shared as well?
Gleb Natapov Aug. 23, 2009, 11:56 a.m. UTC | #6
On Sun, Aug 23, 2009 at 02:46:38PM +0300, Avi Kivity wrote:
> On 08/23/2009 02:36 PM, Gleb Natapov wrote:
> >>>
> >>>The problem is that at this level it is not known if GSI is MSI or not.
> >>>Current code dials with this by arbitrary assuming that GSI is MSI if it
> >>>is greater then KVM_IOAPIC_NUM_PINS, but this is not enforced when
> >>>routing table is installed and userspace is free to use any GSI as MSI.
> >>>Of cause this problem would not exist if MSIs were not linked to GSIs
> >>>in the first place.
> >>Does msi actually care about the states?  I don't think it does.
> >>
> >That is the point. MSI doesn't care, but we don't know if GSI is MSI or
> >not.
> 
> So we toggle the bits needlessly.  Just like with edge-triggered interrupts.
If we can avoid it why not?

> >>No, it's more correct at the GSI level, since all interrupt lines
> >>connected to one GSI are shared.
> >>
> >Why? All interrupts connected to the same irqchip pin (through pci irq
> >router or other onboard logic) are shared. GSI is pure logical
> >abstraction.
> 
> All interrupts connected through one GSI are shared, so why not keep
> the level information shared as well?
> 
If we will keep sharing info at irq routing table level how will we recalculate
sharing state when irq routing table changes?

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Aug. 23, 2009, 12:02 p.m. UTC | #7
On 08/23/2009 02:56 PM, Gleb Natapov wrote:
>>>> Does msi actually care about the states?  I don't think it does.
>>>>          
>>> That is the point. MSI doesn't care, but we don't know if GSI is MSI or
>>> not.
>>>        
>> So we toggle the bits needlessly.  Just like with edge-triggered interrupts.
>>      
> If we can avoid it why not?
>    

It simplifies the code.  You do it at the kvm_set_irq() entry point 
regardless of whether the interrupt is level-triggered, edge-triggered, 
or msi.  As a bonus, you only do it once (not twice for pic/ioapic 
interrupts).

>> All interrupts connected through one GSI are shared, so why not keep
>> the level information shared as well?
>>
>>      
> If we will keep sharing info at irq routing table level how will we recalculate
> sharing state when irq routing table changes?
>    

Good question.  Move from old table to new table? but that interferes 
with rcu.

Okay, at irqchip level is better after all.
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b17d845..4c15bdd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -413,7 +413,6 @@  struct kvm_arch{
 	gpa_t ept_identity_map_addr;
 
 	unsigned long irq_sources_bitmap;
-	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
 	u64 vm_init_tsc;
 };
 
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 7d6058a..c025a23 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -71,6 +71,7 @@  struct kvm_pic {
 	int output;		/* intr from master PIC */
 	struct kvm_io_device dev;
 	void (*ack_notifier)(void *opaque, int irq);
+	unsigned long irq_states[16];
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f814512..beab24b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -121,7 +121,7 @@  struct kvm_kernel_irq_routing_entry {
 	u32 gsi;
 	u32 type;
 	int (*set)(struct kvm_kernel_irq_routing_entry *e,
-		    struct kvm *kvm, int level);
+		   struct kvm *kvm, int irq_source_id, int level);
 	union {
 		struct {
 			unsigned irqchip;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 7080b71..6e461ad 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -41,6 +41,7 @@  struct kvm_ioapic {
 	u32 irr;
 	u32 pad;
 	union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
+	unsigned long irq_states[IOAPIC_NUM_PINS];
 	struct kvm_io_device dev;
 	struct kvm *kvm;
 	void (*ack_notifier)(void *opaque, int irq);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 001663f..11aa702 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -31,20 +31,39 @@ 
 
 #include "ioapic.h"
 
+static inline int kvm_irq_line_state(unsigned long *irq_state,
+				     int irq_source_id, int level)
+{
+	/* Logical OR for level trig interrupt */
+	if (level)
+		set_bit(irq_source_id, irq_state);
+	else
+		clear_bit(irq_source_id, irq_state);
+
+	return !!(*irq_state);
+}
+
 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
-			   struct kvm *kvm, int level)
+			   struct kvm *kvm, int irq_source_id, int level)
 {
 #ifdef CONFIG_X86
-	return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
+	struct kvm_pic *pic = pic_irqchip(kvm);
+	level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin],
+				   irq_source_id, level);
+	return kvm_pic_set_irq(pic, e->irqchip.pin, level);
 #else
 	return -1;
 #endif
 }
 
 static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
-			      struct kvm *kvm, int level)
+			      struct kvm *kvm, int irq_source_id, int level)
 {
-	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+	level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin],
+				   irq_source_id, level);
+
+	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level);
 }
 
 inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
@@ -96,10 +115,13 @@  int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 }
 
 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-		       struct kvm *kvm, int level)
+		       struct kvm *kvm, int irq_source_id, int level)
 {
 	struct kvm_lapic_irq irq;
 
+	if (!level)
+		return -1;
+
 	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
 
 	irq.dest_id = (e->msi.address_lo &
@@ -125,34 +147,19 @@  static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 {
 	struct kvm_kernel_irq_routing_entry *e;
-	unsigned long *irq_state, sig_level;
 	int ret = -1;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
 	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
 
-	if (irq < KVM_IOAPIC_NUM_PINS) {
-		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
-
-		/* Logical OR for level trig interrupt */
-		if (level)
-			set_bit(irq_source_id, irq_state);
-		else
-			clear_bit(irq_source_id, irq_state);
-		sig_level = !!(*irq_state);
-	} else if (!level)
-		return ret;
-	else /* Deal with MSI/MSI-X */
-		sig_level = 1;
-
 	/* Not possible to detect if the guest uses the PIC or the
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
 	list_for_each_entry(e, &kvm->irq_routing, link)
 		if (e->gsi == irq) {
-			int r = e->set(e, kvm, sig_level);
+			int r = e->set(e, kvm, irq_source_id, level);
 			if (r < 0)
 				continue;
 
@@ -232,8 +239,12 @@  void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
 		return;
 	}
-	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
-		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
+	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) {
+		clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]);
+		if (i >= 16)
+			continue;
+		clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]);
+	}
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
 	mutex_unlock(&kvm->irq_lock);
 }