Message ID | 20090625132832.26748.35406.stgit@dev.haskins.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Jun 25, 2009 at 09:28:32AM -0400, Gregory Haskins wrote: > DEASSIGN allows us to optionally disassociate an IRQFD from its underlying > eventfd without destroying the eventfd in the process. This is useful > for conditions like live-migration which may have an eventfd associated > with a device and an IRQFD. We need to be able to decouple the guest > from the event source while not perturbing the event source itself. > > Signed-off-by: Gregory Haskins <ghaskins@novell.com> > CC: Michael S. Tsirkin <mst@redhat.com> > --- > > include/linux/kvm.h | 2 ++ > virt/kvm/eventfd.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++-- > 2 files changed, 56 insertions(+), 2 deletions(-) > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > index 38ff31e..6710518 100644 > --- a/include/linux/kvm.h > +++ b/include/linux/kvm.h > @@ -490,6 +490,8 @@ struct kvm_x86_mce { > }; > #endif > > +#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) > + > struct kvm_irqfd { > __u32 fd; > __u32 gsi; > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > index ca21e8a..2d4549c 100644 > --- a/virt/kvm/eventfd.c > +++ b/virt/kvm/eventfd.c > @@ -180,8 +180,8 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, > add_wait_queue(wqh, &irqfd->wait); > } > > -int > -kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) > +static int > +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) > { > struct _irqfd *irqfd; > struct file *file = NULL; > @@ -303,6 +303,58 @@ irqfd_shutdown(struct _irqfd *irqfd) > irqfd_release(irqfd); > } > > +/* > + * assumes kvm->irqfds.lock is held > + */ > +static struct _irqfd * > +irqfd_find(struct kvm *kvm, int fd, int gsi) > +{ > + struct _irqfd *irqfd, *tmp, *ret = ERR_PTR(-ENOENT); > + struct eventfd_ctx *eventfd; > + > + eventfd = eventfd_ctx_fdget(fd); > + if (IS_ERR(eventfd)) > + return ERR_PTR(PTR_ERR(eventfd)); > + > + spin_lock_irq(&kvm->irqfds.lock); > + > + list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { > + if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) { > + irqfd_deactivate(irqfd); > + ret = irqfd; > + break; > + } > + } > + > + spin_unlock_irq(&kvm->irqfds.lock); > + eventfd_ctx_put(eventfd); > + > + return ret; > +} > + > +static int > +kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) > +{ > + struct _irqfd *irqfd; > + > + irqfd = irqfd_find(kvm, fd, gsi); > + if (IS_ERR(irqfd)) > + return PTR_ERR(irqfd); > + > + irqfd_shutdown(irqfd); > + > + return 0; > +} I think that, to make this work properly, you must add irqfd to list the last thing so do. As it is, when you assign irqfd, the last thing you do is irqfd->eventfd = eventfd; I think you should move this to within a spinlock. > + > +int > +kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) > +{ > + if (flags & KVM_IRQFD_FLAG_DEASSIGN) > + return kvm_irqfd_deassign(kvm, fd, gsi); > + > + return kvm_irqfd_assign(kvm, fd, gsi); > +} > + At some point we discussed limiting the number of irqfds that can be created in some way, so that userspace can not consume unlimited amount of kernel memory. What happened to that idea? This will happen naturally if - we keep fget on the file until irqfd goes away - we allow the same file be bound to only one irqfd but there might be other ways to do this
Michael S. Tsirkin wrote: > On Thu, Jun 25, 2009 at 09:28:32AM -0400, Gregory Haskins wrote: > >> DEASSIGN allows us to optionally disassociate an IRQFD from its underlying >> eventfd without destroying the eventfd in the process. This is useful >> for conditions like live-migration which may have an eventfd associated >> with a device and an IRQFD. We need to be able to decouple the guest >> from the event source while not perturbing the event source itself. >> >> Signed-off-by: Gregory Haskins <ghaskins@novell.com> >> CC: Michael S. Tsirkin <mst@redhat.com> >> --- >> >> include/linux/kvm.h | 2 ++ >> virt/kvm/eventfd.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++-- >> 2 files changed, 56 insertions(+), 2 deletions(-) >> >> diff --git a/include/linux/kvm.h b/include/linux/kvm.h >> index 38ff31e..6710518 100644 >> --- a/include/linux/kvm.h >> +++ b/include/linux/kvm.h >> @@ -490,6 +490,8 @@ struct kvm_x86_mce { >> }; >> #endif >> >> +#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) >> + >> struct kvm_irqfd { >> __u32 fd; >> __u32 gsi; >> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c >> index ca21e8a..2d4549c 100644 >> --- a/virt/kvm/eventfd.c >> +++ b/virt/kvm/eventfd.c >> @@ -180,8 +180,8 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, >> add_wait_queue(wqh, &irqfd->wait); >> } >> >> -int >> -kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) >> +static int >> +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) >> { >> struct _irqfd *irqfd; >> struct file *file = NULL; >> @@ -303,6 +303,58 @@ irqfd_shutdown(struct _irqfd *irqfd) >> irqfd_release(irqfd); >> } >> >> +/* >> + * assumes kvm->irqfds.lock is held >> + */ >> +static struct _irqfd * >> +irqfd_find(struct kvm *kvm, int fd, int gsi) >> +{ >> + struct _irqfd *irqfd, *tmp, *ret = ERR_PTR(-ENOENT); >> + struct eventfd_ctx *eventfd; >> + >> + eventfd = eventfd_ctx_fdget(fd); >> + if (IS_ERR(eventfd)) >> + return ERR_PTR(PTR_ERR(eventfd)); >> + >> + spin_lock_irq(&kvm->irqfds.lock); >> + >> + list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { >> + if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) { >> + irqfd_deactivate(irqfd); >> + ret = irqfd; >> + break; >> + } >> + } >> + >> + spin_unlock_irq(&kvm->irqfds.lock); >> + eventfd_ctx_put(eventfd); >> + >> + return ret; >> +} >> + >> +static int >> +kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) >> +{ >> + struct _irqfd *irqfd; >> + >> + irqfd = irqfd_find(kvm, fd, gsi); >> + if (IS_ERR(irqfd)) >> + return PTR_ERR(irqfd); >> + >> + irqfd_shutdown(irqfd); >> + >> + return 0; >> +} >> > > > I think that, to make this work properly, you must > add irqfd to list the last thing so do. > As it is, when you assign irqfd, the last thing you do is > > irqfd->eventfd = eventfd; > Yeah, I agree. I actually already replied to this effect on the thread for 3/4. ;) > I think you should move this to within a spinlock. > I think if I fix the ordering, the list spinlock should be sufficient. Am I missing something? > >> + >> +int >> +kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) >> +{ >> + if (flags & KVM_IRQFD_FLAG_DEASSIGN) >> + return kvm_irqfd_deassign(kvm, fd, gsi); >> + >> + return kvm_irqfd_assign(kvm, fd, gsi); >> +} >> + >> > > > At some point we discussed limiting the number of > irqfds that can be created in some way, so that userspace > can not consume unlimited amount of kernel memory. > > What happened to that idea? > Yeah, that is a good question. I thought I had already done that, too, but now I don't know what happened to the logic. Perhaps it got lost on a respin somewhere. I will look into this and add the feature. > This will happen naturally if > - we keep fget on the file until irqfd goes away > - we allow the same file be bound to only one irqfd > but there might be other ways to do this > >
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 38ff31e..6710518 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -490,6 +490,8 @@ struct kvm_x86_mce { }; #endif +#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) + struct kvm_irqfd { __u32 fd; __u32 gsi; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index ca21e8a..2d4549c 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -180,8 +180,8 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, add_wait_queue(wqh, &irqfd->wait); } -int -kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) +static int +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) { struct _irqfd *irqfd; struct file *file = NULL; @@ -303,6 +303,58 @@ irqfd_shutdown(struct _irqfd *irqfd) irqfd_release(irqfd); } +/* + * assumes kvm->irqfds.lock is held + */ +static struct _irqfd * +irqfd_find(struct kvm *kvm, int fd, int gsi) +{ + struct _irqfd *irqfd, *tmp, *ret = ERR_PTR(-ENOENT); + struct eventfd_ctx *eventfd; + + eventfd = eventfd_ctx_fdget(fd); + if (IS_ERR(eventfd)) + return ERR_PTR(PTR_ERR(eventfd)); + + spin_lock_irq(&kvm->irqfds.lock); + + list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { + if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) { + irqfd_deactivate(irqfd); + ret = irqfd; + break; + } + } + + spin_unlock_irq(&kvm->irqfds.lock); + eventfd_ctx_put(eventfd); + + return ret; +} + +static int +kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) +{ + struct _irqfd *irqfd; + + irqfd = irqfd_find(kvm, fd, gsi); + if (IS_ERR(irqfd)) + return PTR_ERR(irqfd); + + irqfd_shutdown(irqfd); + + return 0; +} + +int +kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) +{ + if (flags & KVM_IRQFD_FLAG_DEASSIGN) + return kvm_irqfd_deassign(kvm, fd, gsi); + + return kvm_irqfd_assign(kvm, fd, gsi); +} + void kvm_irqfd_release(struct kvm *kvm) {
DEASSIGN allows us to optionally disassociate an IRQFD from its underlying eventfd without destroying the eventfd in the process. This is useful for conditions like live-migration which may have an eventfd associated with a device and an IRQFD. We need to be able to decouple the guest from the event source while not perturbing the event source itself. Signed-off-by: Gregory Haskins <ghaskins@novell.com> CC: Michael S. Tsirkin <mst@redhat.com> --- include/linux/kvm.h | 2 ++ virt/kvm/eventfd.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 2 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html