[1/7] KVM: Add a route layer to convert MSI message to GSI
diff mbox

Message ID 1231411535-2461-2-git-send-email-sheng@linux.intel.com
State Accepted, archived
Headers show

Commit Message

Sheng Yang Jan. 8, 2009, 10:45 a.m. UTC
Avi's purpose, to use single kvm_set_irq() to deal with all interrupt, including
MSI. So here is it.

struct gsi_route_entry is a mapping from a special gsi(with KVM_GSI_MSG_MASK) to
MSI/MSI-X message address/data. And the struct can also be extended for other
purpose.

Now we support up to 256 gsi_route_entry mapping, and gsi is allocated by kernel and
provide two ioctls to userspace, which is more flexiable.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
---
 include/linux/kvm.h      |   26 +++++++++++
 include/linux/kvm_host.h |   20 +++++++++
 virt/kvm/irq_comm.c      |   70 ++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c      |  105 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 221 insertions(+), 0 deletions(-)

Comments

Marcelo Tosatti Jan. 8, 2009, 2:20 p.m. UTC | #1
On Thu, Jan 08, 2009 at 06:45:29PM +0800, Sheng Yang wrote:
>   * ioctls for VM fds
> @@ -433,6 +436,8 @@ struct kvm_trace_rec {
>  #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
>  			    struct kvm_assigned_irq)
>  #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
> +#define KVM_REQUEST_GSI_ROUTE	  _IOWR(KVMIO, 0x72, void *)
> +#define KVM_FREE_GSI_ROUTE	  _IOR(KVMIO, 0x73, void *)

Oh this slipped: should be struct kvm_gsi_route_guest.

>  /*
>   * ioctls for vcpu fds
> @@ -553,4 +558,25 @@ struct kvm_assigned_irq {
>  #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
>  #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
>  
> +struct kvm_gsi_route_guest {
> +	__u32 entries_nr;
> +	struct kvm_gsi_route_entry_guest *entries;
> +};

And you can use a zero sized array here.

Sorry :(
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Jan. 8, 2009, 3:08 p.m. UTC | #2
Sheng Yang wrote:
> Avi's purpose, to use single kvm_set_irq() to deal with all interrupt, including
> MSI. So here is it.
>
> struct gsi_route_entry is a mapping from a special gsi(with KVM_GSI_MSG_MASK) to
> MSI/MSI-X message address/data. And the struct can also be extended for other
> purpose.
>
> Now we support up to 256 gsi_route_entry mapping, and gsi is allocated by kernel and
> provide two ioctls to userspace, which is more flexiable.
>
> @@ -553,4 +558,25 @@ struct kvm_assigned_irq {
>  #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
>  #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
>  
> +struct kvm_gsi_route_guest {
> +	__u32 entries_nr;
>   

Need padding here otherwise offsetof(entries) will differ on 32-bit and 
64-bit kernels.

> +	struct kvm_gsi_route_entry_guest *entries;
>   

Like Marcelo says, zero sized array is better here.

> +};
> +
> +#define KVM_GSI_ROUTE_MSI	(1 << 0)
>   

This looks like a flag.  Shouldn't it be a type?

> +struct kvm_gsi_route_entry_guest {
>   
what does _guest mean here? almost all kvm stuff is _guest related.

> +	__u32 gsi;
> +	__u32 type;
> +	__u32 flags;
> +	__u32 reserved;
> +	union {
> +		struct {
> +			__u32 addr_lo;
> +			__u32 addr_hi;
> +			__u32 data;
> +		} msi;
> +		__u32 padding[8];
> +	};
> +};
> +
>   

Since we replace the entire table every time, how do ioapic/pic gsis work?

>  
>  /* The guest did something we don't support. */
> @@ -336,6 +339,19 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
>  				      struct kvm_irq_mask_notifier *kimn);
>  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
>  
> +#define KVM_GSI_ROUTE_MASK    0x1000000ull
> +struct kvm_gsi_route_entry {
> +	u32 gsi;
> +	u32 type;
> +	u32 flags;
> +	u32 reserved;
> +	union {
> +		struct msi_msg msi;
> +		u32 reserved[8];
>   

No need for reserved fields in kernel data.

> +	};
> +	struct hlist_node link;
> +};
> @@ -123,3 +123,73 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
>  			kimn->func(kimn, mask);
>  }
>  
> +int kvm_update_gsi_route(struct kvm *kvm, struct kvm_gsi_route_entry *entry)
> +{
> +	struct kvm_gsi_route_entry *found_entry, *new_entry;
> +	int r, gsi;
> +
> +	mutex_lock(&kvm->lock);
> +	/* Find whether we need a update or a new entry */
> +	found_entry = kvm_find_gsi_route_entry(kvm, entry->gsi);
> +	if (found_entry)
> +		*found_entry = *entry;
> +	else {
> +		gsi = find_first_zero_bit(kvm->gsi_route_bitmap,
> +					  KVM_NR_GSI_ROUTE_ENTRIES);
> +		if (gsi >= KVM_NR_GSI_ROUTE_ENTRIES) {
> +			r = -ENOSPC;
> +			goto out;
> +		}
> +		new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
> +		if (!new_entry) {
> +			r = -ENOMEM;
> +			goto out;
> +		}
> +		*new_entry = *entry;
> +		entry->gsi = gsi | KVM_GSI_ROUTE_MASK;
> +		__set_bit(gsi, kvm->gsi_route_bitmap);
> +		hlist_add_head(&new_entry->link, &kvm->gsi_route_list);
> +	}
> +	r = 0;
> +out:
> +	mutex_unlock(&kvm->lock);
> +	return r;
> +}
>   

Why not throw everything and set the new table?

I didn't see where you respond the new KVM_CAP.  It looks like a good 
place to return the maximum size of the table.
Sheng Yang Jan. 9, 2009, 1:51 a.m. UTC | #3
On Thursday 08 January 2009 22:20:22 Marcelo Tosatti wrote:
> On Thu, Jan 08, 2009 at 06:45:29PM +0800, Sheng Yang wrote:
> >   * ioctls for VM fds
> > @@ -433,6 +436,8 @@ struct kvm_trace_rec {
> >  #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
> >  			    struct kvm_assigned_irq)
> >  #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
> > +#define KVM_REQUEST_GSI_ROUTE	  _IOWR(KVMIO, 0x72, void *)
> > +#define KVM_FREE_GSI_ROUTE	  _IOR(KVMIO, 0x73, void *)
>
> Oh this slipped: should be struct kvm_gsi_route_guest.

Oh, yeah, forgot to change it back(I original purpose a array here...)
>
> >  /*
> >   * ioctls for vcpu fds
> > @@ -553,4 +558,25 @@ struct kvm_assigned_irq {
> >  #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
> >  #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
> >
> > +struct kvm_gsi_route_guest {
> > +	__u32 entries_nr;
> > +	struct kvm_gsi_route_entry_guest *entries;
> > +};
>
> And you can use a zero sized array here.

OK.
>
> Sorry :(

Oh, that's not necessary. :)
Sheng Yang Jan. 9, 2009, 2:50 a.m. UTC | #4
On Thursday 08 January 2009 23:08:25 Avi Kivity wrote:
> Sheng Yang wrote:
> > Avi's purpose, to use single kvm_set_irq() to deal with all interrupt,
> > including MSI. So here is it.
> >
> > struct gsi_route_entry is a mapping from a special gsi(with
> > KVM_GSI_MSG_MASK) to MSI/MSI-X message address/data. And the struct can
> > also be extended for other purpose.
> >
> > Now we support up to 256 gsi_route_entry mapping, and gsi is allocated by
> > kernel and provide two ioctls to userspace, which is more flexiable.
> >
> > @@ -553,4 +558,25 @@ struct kvm_assigned_irq {
> >  #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
> >  #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
> >
> > +struct kvm_gsi_route_guest {
> > +	__u32 entries_nr;
>
> Need padding here otherwise offsetof(entries) will differ on 32-bit and
> 64-bit kernels.

OK.

>
> > +	struct kvm_gsi_route_entry_guest *entries;
>
> Like Marcelo says, zero sized array is better here.
>
> > +};
> > +
> > +#define KVM_GSI_ROUTE_MSI	(1 << 0)
>
> This looks like a flag.  Shouldn't it be a type?

Oh, custom... Would update.

> > +struct kvm_gsi_route_entry_guest {
>
> what does _guest mean here? almost all kvm stuff is _guest related.

Because I can't think of a good name... kvm_gsi_route_entry_guest? 
kvm_gsi_kernel_route_entry? What's your favorite? :)

> > +	__u32 gsi;
> > +	__u32 type;
> > +	__u32 flags;
> > +	__u32 reserved;
> > +	union {
> > +		struct {
> > +			__u32 addr_lo;
> > +			__u32 addr_hi;
> > +			__u32 data;
> > +		} msi;
> > +		__u32 padding[8];
> > +	};
> > +};
> > +
>
> Since we replace the entire table every time, how do ioapic/pic gsis work?
> >  /* The guest did something we don't support. */
> > @@ -336,6 +339,19 @@ void kvm_unregister_irq_mask_notifier(struct kvm
> > *kvm, int irq, struct kvm_irq_mask_notifier *kimn);
> >  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
> >
> > +#define KVM_GSI_ROUTE_MASK    0x1000000ull
> > +struct kvm_gsi_route_entry {
> > +	u32 gsi;
> > +	u32 type;
> > +	u32 flags;
> > +	u32 reserved;
> > +	union {
> > +		struct msi_msg msi;
> > +		u32 reserved[8];
>
> No need for reserved fields in kernel data.

Yeah

> > +	};
> > +	struct hlist_node link;
> > +};
> > @@ -123,3 +123,73 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int
> > irq, bool mask) kimn->func(kimn, mask);
> >  }
> >
> > +int kvm_update_gsi_route(struct kvm *kvm, struct kvm_gsi_route_entry
> > *entry) +{
> > +	struct kvm_gsi_route_entry *found_entry, *new_entry;
> > +	int r, gsi;
> > +
> > +	mutex_lock(&kvm->lock);
> > +	/* Find whether we need a update or a new entry */
> > +	found_entry = kvm_find_gsi_route_entry(kvm, entry->gsi);
> > +	if (found_entry)
> > +		*found_entry = *entry;
> > +	else {
> > +		gsi = find_first_zero_bit(kvm->gsi_route_bitmap,
> > +					  KVM_NR_GSI_ROUTE_ENTRIES);
> > +		if (gsi >= KVM_NR_GSI_ROUTE_ENTRIES) {
> > +			r = -ENOSPC;
> > +			goto out;
> > +		}
> > +		new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
> > +		if (!new_entry) {
> > +			r = -ENOMEM;
> > +			goto out;
> > +		}
> > +		*new_entry = *entry;
> > +		entry->gsi = gsi | KVM_GSI_ROUTE_MASK;
> > +		__set_bit(gsi, kvm->gsi_route_bitmap);
> > +		hlist_add_head(&new_entry->link, &kvm->gsi_route_list);
> > +	}
> > +	r = 0;
> > +out:
> > +	mutex_unlock(&kvm->lock);
> > +	return r;
> > +}
>
> Why not throw everything and set the new table?

Userspace to maintain a big route table? Just for MSI/MSI-X it's easy, but for 
others, a global one is needed, and need follow up more maintain functions. 
For kernel, a little more effect can archive this, like this. So I do it in 
this way.

> I didn't see where you respond the new KVM_CAP.  It looks like a good
> place to return the maximum size of the table.

I just use it as #ifdef in userspace now, for no user other than MSI/MSI-X 
now. And if we keep maintaining it in kernel, we would return free size 
instead of maximum size..
Avi Kivity Jan. 9, 2009, 6:06 p.m. UTC | #5
Sheng Yang wrote:
>>> +struct kvm_gsi_route_entry_guest {
>>>       
>> what does _guest mean here? almost all kvm stuff is _guest related.
>>     
>
> Because I can't think of a good name... kvm_gsi_route_entry_guest? 
> kvm_gsi_kernel_route_entry? What's your favorite? :)
>
>   

kvm_gsi_route_entry?

>>
>> Since we replace the entire table every time, how do ioapic/pic gsis work?
>>     

Missed this question...

>> Why not throw everything and set the new table?
>>     
>
> Userspace to maintain a big route table? Just for MSI/MSI-X it's easy, but for 
> others, a global one is needed, and need follow up more maintain functions. 
> For kernel, a little more effect can archive this, like this. So I do it in 
> this way.
>   

Sorry, I don't understand the reply.

>> I didn't see where you respond the new KVM_CAP.  It looks like a good
>> place to return the maximum size of the table.
>>     
>
> I just use it as #ifdef in userspace now, for no user other than MSI/MSI-X 
> now. And if we keep maintaining it in kernel, we would return free size 
> instead of maximum size..
>   

We need to allow userspace to change pic/ioapic routing for the HPET.

There are two styles of maintaining the table:

1. add/remove ioctls

The advantage is that very little work needs to be done when something 
changes, but the code size (and bug count) doubles.

2. replace everything ioctl

Smaller code size, but slower if there are high frequency changes

I don't think we'll see high frequency interrupt routing changes; we'll 
probably have one on setup (for HPET), another when switching from ACPI 
PIC mode to ACPI APIC mode, and one for every msi initialized.
Sheng Yang Jan. 10, 2009, 11:12 a.m. UTC | #6
On Fri, Jan 09, 2009 at 08:06:01PM +0200, Avi Kivity wrote:
> Sheng Yang wrote:
>>>> +struct kvm_gsi_route_entry_guest {
>>>>       
>>> what does _guest mean here? almost all kvm stuff is _guest related.
>>>     
>>
>> Because I can't think of a good name... kvm_gsi_route_entry_guest?  
>> kvm_gsi_kernel_route_entry? What's your favorite? :)
>>
>>   
>
> kvm_gsi_route_entry?

Which is used for kernel one now...
>
>>>
>>> Since we replace the entire table every time, how do ioapic/pic gsis work?
>>>     
>
> Missed this question...

My comments below...
>
>>> Why not throw everything and set the new table?
>>>     
>>
>> Userspace to maintain a big route table? Just for MSI/MSI-X it's easy, 
>> but for others, a global one is needed, and need follow up more 
>> maintain functions. For kernel, a little more effect can archive this, 
>> like this. So I do it in this way.
>>   
>
> Sorry, I don't understand the reply.
>
>>> I didn't see where you respond the new KVM_CAP.  It looks like a good
>>> place to return the maximum size of the table.
>>>     
>>
>> I just use it as #ifdef in userspace now, for no user other than 
>> MSI/MSI-X now. And if we keep maintaining it in kernel, we would return 
>> free size instead of maximum size..
>>   
>
> We need to allow userspace to change pic/ioapic routing for the HPET.
>
> There are two styles of maintaining the table:
>
> 1. add/remove ioctls
>
> The advantage is that very little work needs to be done when something  
> changes, but the code size (and bug count) doubles.
>
> 2. replace everything ioctl
>
> Smaller code size, but slower if there are high frequency changes
>
> I don't think we'll see high frequency interrupt routing changes; we'll  
> probably have one on setup (for HPET), another when switching from ACPI  
> PIC mode to ACPI APIC mode, and one for every msi initialized.

I come to option 2 mix with option 1 now. What I meant is, MSI part is in
device-assignment part, and HPET and others are in some other place, so a
global table should be maintained for these three across the parts of
userspace. I don't like the global gsi route table, and especially we
already got one in kernel space. We have to do some maintain work in the
kernel space, e.g. looking up a entry, so I think a little add-on can take
the job.

And now I think you original purpose is three different tables for MSI, HPET
and ACPI APIC mode? This also avoid global big table in userspace. But it
seems like a waste, and also too specific...

So how about this? One ioctl to replace everything, another(maybe two,
transfer entry number then table, or one with maximum entries number in
userspace) can export current gsi route table? This can avoid the global
route table, as well as reduce the complexity.

How do you think?
Sheng Yang Jan. 10, 2009, 12:28 p.m. UTC | #7
On Fri, Jan 09, 2009 at 08:06:01PM +0200, Avi Kivity wrote:
> Sheng Yang wrote:
>> I just use it as #ifdef in userspace now, for no user other than 
>> MSI/MSI-X now. And if we keep maintaining it in kernel, we would return 
>> free size instead of maximum size..
>>   
>
> We need to allow userspace to change pic/ioapic routing for the HPET.
>
> There are two styles of maintaining the table:
>
> 1. add/remove ioctls
>
> The advantage is that very little work needs to be done when something  
> changes, but the code size (and bug count) doubles.
>
> 2. replace everything ioctl
>
> Smaller code size, but slower if there are high frequency changes
>
> I don't think we'll see high frequency interrupt routing changes; we'll  
> probably have one on setup (for HPET), another when switching from ACPI  
> PIC mode to ACPI APIC mode, and one for every msi initialized.

Hi, Avi

After reconsidering, I must say I prefer add/remove ioctls.

About the code size, I don't think it would increase much. I've rewritten
the code twice, I think I know the difference is little.

For the option 2 route table ioctl, we got a array from userspace, and would
convert it to linked list and keep it in kernel. That's a kind of must(I
don't think you would prefer use a array in kernel), and it's very direct.

So, we have to insert/delete route entry for both. What's the real
difference we do it one by one or do it all at once. I don't think it is
much different on the code size. And it's indeed very clear and direct.

Beside this, option 2 seems strange. Why should we handle this table in this
way when it won't result in significant code reduce. Insert/delete entry it
there, look up entry is also there, not many things changed. And it's not
that direct as option 1, which also can be a source of bugs.

How do you think?
Avi Kivity Jan. 11, 2009, 9:34 a.m. UTC | #8
Sheng Yang wrote:
> On Fri, Jan 09, 2009 at 08:06:01PM +0200, Avi Kivity wrote:
>   
>> Sheng Yang wrote:
>>     
>>>>> +struct kvm_gsi_route_entry_guest {
>>>>>       
>>>>>           
>>>> what does _guest mean here? almost all kvm stuff is _guest related.
>>>>     
>>>>         
>>> Because I can't think of a good name... kvm_gsi_route_entry_guest?  
>>> kvm_gsi_kernel_route_entry? What's your favorite? :)
>>>
>>>   
>>>       
>> kvm_gsi_route_entry?
>>     
>
> Which is used for kernel one now...
>   

So add _kernel to that...

It's more important to keep the userspace interface clean.

>> We need to allow userspace to change pic/ioapic routing for the HPET.
>>
>> There are two styles of maintaining the table:
>>
>> 1. add/remove ioctls
>>
>> The advantage is that very little work needs to be done when something  
>> changes, but the code size (and bug count) doubles.
>>
>> 2. replace everything ioctl
>>
>> Smaller code size, but slower if there are high frequency changes
>>
>> I don't think we'll see high frequency interrupt routing changes; we'll  
>> probably have one on setup (for HPET), another when switching from ACPI  
>> PIC mode to ACPI APIC mode, and one for every msi initialized.
>>     
>
> I come to option 2 mix with option 1 now. What I meant is, MSI part is in
> device-assignment part, and HPET and others are in some other place, so a
> global table should be maintained for these three across the parts of
> userspace. I don't like the global gsi route table, and especially we
> already got one in kernel space. We have to do some maintain work in the
> kernel space, e.g. looking up a entry, so I think a little add-on can take
> the job.
>
> And now I think you original purpose is three different tables for MSI, HPET
> and ACPI APIC mode? This also avoid global big table in userspace. But it
> seems like a waste, and also too specific...
>
> So how about this? One ioctl to replace everything, another(maybe two,
> transfer entry number then table, or one with maximum entries number in
> userspace) can export current gsi route table? This can avoid the global
> route table, as well as reduce the complexity.
>   

I still don't understand.

We already have all of the information needed in userspace.  We know 
whether HPET is in use or not, whether PIC mode or APIC mode is enabled, 
and what MSIs are enabled.  We need all this at least for live migration.

In the kernel, we'll need a function to free the table (for VM 
shutdown), so adding a function to populate a new table seems to be the 
least amount of work.

But I think we're miscommunicating, maybe I'm misunderstanding what 
you're suggesting.
Avi Kivity Jan. 11, 2009, 9:38 a.m. UTC | #9
Sheng Yang wrote:
> After reconsidering, I must say I prefer add/remove ioctls.
>
> About the code size, I don't think it would increase much. I've rewritten
> the code twice, I think I know the difference is little.
>   

:( sorry about that.

> For the option 2 route table ioctl, we got a array from userspace, and would
> convert it to linked list and keep it in kernel. That's a kind of must(I
> don't think you would prefer use a array in kernel), and it's very direct.
>   

Actually, eventually we'd want an array indexed by gsi.  Each element 
would be a pointer to another array (one or two routing entries).

Certainly we don't want to iterate a list which could hold several 
hundred interrupts for a large guest.

It's okay to start with a linked list, but eventually we'll want 
something faster.

> So, we have to insert/delete route entry for both. What's the real
> difference we do it one by one or do it all at once. I don't think it is
> much different on the code size. And it's indeed very clear and direct.
>
> Beside this, option 2 seems strange. Why should we handle this table in this
> way when it won't result in significant code reduce. Insert/delete entry it
> there, look up entry is also there, not many things changed. And it's not
> that direct as option 1, which also can be a source of bugs.
>
> How do you think?
>   

I'm not convinced.  Please post your latest, and I will post a 
counter-proposal.
Avi Kivity Jan. 11, 2009, 9:40 a.m. UTC | #10
Avi Kivity wrote:
>
> 1. add/remove ioctls
>
> The advantage is that very little work needs to be done when something 
> changes, but the code size (and bug count) doubles.
>

One disadvantage of add/remove is that we cannot effect a change 
atomically.  Probably not a big deal, but something to keep in mind.
Sheng Yang Jan. 12, 2009, 6:53 a.m. UTC | #11
On Sunday 11 January 2009 17:38:22 Avi Kivity wrote:
> Sheng Yang wrote:
> > After reconsidering, I must say I prefer add/remove ioctls.
> >
> > About the code size, I don't think it would increase much. I've rewritten
> > the code twice, I think I know the difference is little.
> >
> :( sorry about that.
> :
> > For the option 2 route table ioctl, we got a array from userspace, and
> > would convert it to linked list and keep it in kernel. That's a kind of
> > must(I don't think you would prefer use a array in kernel), and it's very
> > direct.
>
> Actually, eventually we'd want an array indexed by gsi.  Each element
> would be a pointer to another array (one or two routing entries).
>
> Certainly we don't want to iterate a list which could hold several
> hundred interrupts for a large guest.
>
> It's okay to start with a linked list, but eventually we'll want
> something faster.

Oh, I see. What I means here is allocate/deallocate would cause some memory 
fragments, but seems not that critical here.
>
> > So, we have to insert/delete route entry for both. What's the real
> > difference we do it one by one or do it all at once. I don't think it is
> > much different on the code size. And it's indeed very clear and direct.
> >
> > Beside this, option 2 seems strange. Why should we handle this table in
> > this way when it won't result in significant code reduce. Insert/delete
> > entry it there, look up entry is also there, not many things changed. And
> > it's not that direct as option 1, which also can be a source of bugs.
> >
> > How do you think?
>
> I'm not convinced.  Please post your latest, and I will post a
> counter-proposal.

OK.

Patch
diff mbox

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 71c150f..bbefce6 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -399,6 +399,9 @@  struct kvm_trace_rec {
 #if defined(CONFIG_X86)
 #define KVM_CAP_REINJECT_CONTROL 24
 #endif
+#if defined(CONFIG_X86)
+#define KVM_CAP_GSI_ROUTE 25
+#endif
 
 /*
  * ioctls for VM fds
@@ -433,6 +436,8 @@  struct kvm_trace_rec {
 #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
 			    struct kvm_assigned_irq)
 #define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
+#define KVM_REQUEST_GSI_ROUTE	  _IOWR(KVMIO, 0x72, void *)
+#define KVM_FREE_GSI_ROUTE	  _IOR(KVMIO, 0x73, void *)
 
 /*
  * ioctls for vcpu fds
@@ -553,4 +558,25 @@  struct kvm_assigned_irq {
 #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION	KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
 #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI	(1 << 0)
 
+struct kvm_gsi_route_guest {
+	__u32 entries_nr;
+	struct kvm_gsi_route_entry_guest *entries;
+};
+
+#define KVM_GSI_ROUTE_MSI	(1 << 0)
+struct kvm_gsi_route_entry_guest {
+	__u32 gsi;
+	__u32 type;
+	__u32 flags;
+	__u32 reserved;
+	union {
+		struct {
+			__u32 addr_lo;
+			__u32 addr_hi;
+			__u32 data;
+		} msi;
+		__u32 padding[8];
+	};
+};
+
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a8bcad0..6a00201 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -136,6 +136,9 @@  struct kvm {
 	unsigned long mmu_notifier_seq;
 	long mmu_notifier_count;
 #endif
+	struct hlist_head gsi_route_list;
+#define KVM_NR_GSI_ROUTE_ENTRIES    256
+	DECLARE_BITMAP(gsi_route_bitmap, KVM_NR_GSI_ROUTE_ENTRIES);
 };
 
 /* The guest did something we don't support. */
@@ -336,6 +339,19 @@  void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn);
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
 
+#define KVM_GSI_ROUTE_MASK    0x1000000ull
+struct kvm_gsi_route_entry {
+	u32 gsi;
+	u32 type;
+	u32 flags;
+	u32 reserved;
+	union {
+		struct msi_msg msi;
+		u32 reserved[8];
+	};
+	struct hlist_node link;
+};
+
 void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
@@ -343,6 +359,10 @@  void kvm_register_irq_ack_notifier(struct kvm *kvm,
 void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
+int kvm_update_gsi_route(struct kvm *kvm, struct kvm_gsi_route_entry *entry);
+struct kvm_gsi_route_entry *kvm_find_gsi_route_entry(struct kvm *kvm, u32 gsi);
+void kvm_free_gsi_route(struct kvm *kvm, struct kvm_gsi_route_entry *entry);
+void kvm_free_gsi_route_list(struct kvm *kvm);
 
 #ifdef CONFIG_DMAR
 int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 5162a41..64ca4cf 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -123,3 +123,73 @@  void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 			kimn->func(kimn, mask);
 }
 
+int kvm_update_gsi_route(struct kvm *kvm, struct kvm_gsi_route_entry *entry)
+{
+	struct kvm_gsi_route_entry *found_entry, *new_entry;
+	int r, gsi;
+
+	mutex_lock(&kvm->lock);
+	/* Find whether we need a update or a new entry */
+	found_entry = kvm_find_gsi_route_entry(kvm, entry->gsi);
+	if (found_entry)
+		*found_entry = *entry;
+	else {
+		gsi = find_first_zero_bit(kvm->gsi_route_bitmap,
+					  KVM_NR_GSI_ROUTE_ENTRIES);
+		if (gsi >= KVM_NR_GSI_ROUTE_ENTRIES) {
+			r = -ENOSPC;
+			goto out;
+		}
+		new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
+		if (!new_entry) {
+			r = -ENOMEM;
+			goto out;
+		}
+		*new_entry = *entry;
+		entry->gsi = gsi | KVM_GSI_ROUTE_MASK;
+		__set_bit(gsi, kvm->gsi_route_bitmap);
+		hlist_add_head(&new_entry->link, &kvm->gsi_route_list);
+	}
+	r = 0;
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+/* Call with kvm->lock hold */
+struct kvm_gsi_route_entry *kvm_find_gsi_route_entry(struct kvm *kvm, u32 gsi)
+{
+	struct kvm_gsi_route_entry *entry;
+	struct hlist_node *n;
+
+	if (!(gsi & KVM_GSI_ROUTE_MASK))
+		return NULL;
+	hlist_for_each_entry(entry, n, &kvm->gsi_route_list, link)
+		if (entry->gsi == gsi)
+			goto out;
+	entry = NULL;
+out:
+	return entry;
+}
+
+/* Call with kvm->lock hold */
+void kvm_free_gsi_route(struct kvm *kvm, struct kvm_gsi_route_entry *entry)
+{
+	if (!entry)
+		return;
+	__clear_bit(entry->gsi & ~KVM_GSI_ROUTE_MASK, kvm->gsi_route_bitmap);
+	hlist_del(&entry->link);
+	kfree(entry);
+}
+
+void kvm_free_gsi_route_list(struct kvm *kvm)
+{
+	struct kvm_gsi_route_entry *entry;
+	struct hlist_node *pos, *n;
+
+	mutex_lock(&kvm->lock);
+	hlist_for_each_entry_safe(entry, pos, n, &kvm->gsi_route_list, link)
+		kvm_free_gsi_route(kvm, entry);
+	mutex_unlock(&kvm->lock);
+}
+
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 61688a6..8aa939c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -839,6 +839,7 @@  static struct kvm *kvm_create_vm(void)
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
 	kvm_coalesced_mmio_init(kvm);
 #endif
+	INIT_HLIST_HEAD(&kvm->gsi_route_list);
 out:
 	return kvm;
 }
@@ -877,6 +878,7 @@  static void kvm_destroy_vm(struct kvm *kvm)
 	struct mm_struct *mm = kvm->mm;
 
 	kvm_arch_sync_events(kvm);
+	kvm_free_gsi_route_list(kvm);
 	spin_lock(&kvm_lock);
 	list_del(&kvm->vm_list);
 	spin_unlock(&kvm_lock);
@@ -1605,6 +1607,45 @@  static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
 	return 0;
 }
 
+static int kvm_vm_ioctl_request_gsi_route(struct kvm *kvm,
+			struct kvm_gsi_route_guest *gsi_route,
+			struct kvm_gsi_route_entry_guest *guest_entries)
+{
+	struct kvm_gsi_route_entry entry;
+	int r, i;
+
+	for (i = 0; i < gsi_route->entries_nr; i++) {
+		memcpy(&entry, &guest_entries[i], sizeof(entry));
+		r = kvm_update_gsi_route(kvm, &entry);
+		if (r == 0)
+			guest_entries[i].gsi = entry.gsi;
+		else
+			break;
+	}
+	return r;
+}
+
+static int kvm_vm_ioctl_free_gsi_route(struct kvm *kvm,
+			struct kvm_gsi_route_guest *gsi_route,
+			struct kvm_gsi_route_entry_guest *guest_entries)
+{
+	struct kvm_gsi_route_entry *entry;
+	int r, i;
+
+	mutex_lock(&kvm->lock);
+	for (i = 0; i < gsi_route->entries_nr; i++) {
+		entry = kvm_find_gsi_route_entry(kvm, guest_entries[i].gsi);
+		if (!entry) {
+			r = -EINVAL;
+			goto out;
+		}
+		kvm_free_gsi_route(kvm, entry);
+	}
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
 static long kvm_vcpu_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -1803,6 +1844,7 @@  static long kvm_vm_ioctl(struct file *filp,
 {
 	struct kvm *kvm = filp->private_data;
 	void __user *argp = (void __user *)arg;
+	struct kvm_gsi_route_entry_guest *gsi_entries = NULL;
 	int r;
 
 	if (kvm->mm != current->mm)
@@ -1887,10 +1929,73 @@  static long kvm_vm_ioctl(struct file *filp,
 		break;
 	}
 #endif
+	case KVM_REQUEST_GSI_ROUTE: {
+		struct kvm_gsi_route_guest gsi_route;
+		r = -EFAULT;
+		if (copy_from_user(&gsi_route, argp, sizeof gsi_route))
+			goto out;
+		if (gsi_route.entries_nr == 0 ||
+			    gsi_route.entries_nr >= KVM_NR_GSI_ROUTE_ENTRIES) {
+			r = -EFAULT;
+			goto out;
+		}
+		gsi_entries = vmalloc(gsi_route.entries_nr *
+				      sizeof(struct kvm_gsi_route_entry_guest));
+		if (!gsi_entries) {
+			r = -ENOMEM;
+			goto out;
+		}
+		r = -EFAULT;
+		if (copy_from_user(gsi_entries,
+				   (void __user *)gsi_route.entries,
+				   gsi_route.entries_nr *
+				   sizeof(struct kvm_gsi_route_entry_guest)))
+			goto out;
+		r = kvm_vm_ioctl_request_gsi_route(kvm, &gsi_route,
+						   gsi_entries);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user((void __user *)gsi_route.entries,
+				gsi_entries,
+				gsi_route.entries_nr *
+				sizeof(struct kvm_gsi_route_entry_guest)))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_FREE_GSI_ROUTE: {
+		struct kvm_gsi_route_guest gsi_route;
+		r = -EFAULT;
+		if (copy_from_user(&gsi_route, argp, sizeof gsi_route))
+			goto out;
+		if (gsi_route.entries_nr == 0 ||
+			    gsi_route.entries_nr >= KVM_NR_GSI_ROUTE_ENTRIES) {
+			r = -EFAULT;
+			goto out;
+		}
+		gsi_entries = vmalloc(gsi_route.entries_nr *
+				      sizeof(struct kvm_gsi_route_entry_guest));
+		if (!gsi_entries) {
+			r = -ENOMEM;
+			goto out;
+		}
+		r = -EFAULT;
+		if (copy_from_user(gsi_entries,
+				   (void __user *)gsi_route.entries,
+				   gsi_route.entries_nr *
+				   sizeof(struct kvm_gsi_route_entry_guest)))
+			goto out;
+		r = kvm_vm_ioctl_free_gsi_route(kvm, &gsi_route, gsi_entries);
+		if (r)
+			goto out;
+		break;
+	}
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
 out:
+	vfree(gsi_entries);
 	return r;
 }