Message ID | 20171204190044.14125-3-rkagan@virtuozzo.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Roman Kagan <rkagan@virtuozzo.com> writes: > In Hyper-V, the fast guest->host notification mechanism is the > SIGNAL_EVENT hypercall, with a single parameter of the connection ID to > signal. (I may be missing something important...) I'm not sure how Windows does that but Linux Hyper-V drivers use hard-coded VMBUS_EVENT_CONNECTION_ID (2) for all HVCALL_SIGNAL_EVENT hypercalls. According to the spec, this id comes from HvConnectPort hypercall which is executed by the root partition -- not sure how it's supposed to be passed down to the guest. So in case the situation in Windows is not any different the connection ID parameter will always be 2 so we may as well just omit it) Or, alternatively, we can probably add both VMBUS_MESSAGE_CONNECTION_ID and VMBUS_MONITOR_CONNECTION_ID to the mechanism... > > Currently this hypercall incurs a user exit and requires the userspace > to decode the parameters and trigger the notification of the potentially > different I/O context. > > To avoid the costly user exit, process this hypercall and signal the > corresponding eventfd in KVM, similar to ioeventfd. The association > between the connection id and the eventfd is established via the newly > introduced KVM_HYPERV_EVENTFD ioctl, and maintained in an > (srcu-protected) IDR. > > Signed-off-by: Roman Kagan <rkagan@virtuozzo.com> > --- > Documentation/virtual/kvm/api.txt | 23 ++++++++ > arch/x86/include/asm/kvm_host.h | 2 + > arch/x86/kvm/hyperv.h | 1 + > include/uapi/linux/kvm.h | 13 +++++ > arch/x86/kvm/hyperv.c | 115 +++++++++++++++++++++++++++++++++++++- > arch/x86/kvm/x86.c | 10 ++++ > 6 files changed, 163 insertions(+), 1 deletion(-) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index f670e4b9e7f3..e4f319add8b7 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -3394,6 +3394,29 @@ invalid, if invalid pages are written to (e.g. after the end of memory) > or if no page table is present for the addresses (e.g. when using > hugepages). > > +4.109 KVM_HYPERV_EVENTFD > + > +Capability: KVM_CAP_HYPERV_EVENTFD > +Architectures: x86 > +Type: vm ioctl > +Parameters: struct kvm_hyperv_eventfd (in) > +Returns: 0 on success, !0 on error > + > +This ioctl (un)registers an eventfd to receive notifications from the guest on > +the specified Hyper-V connection id through the SIGNAL_EVENT hypercall, without > +causing a user exit. > + > +struct kvm_hyperv_eventfd { > + __u32 conn_id; > + __s32 fd; > + __u32 flags; > + __u32 padding[3]; > +}; > + > +The acceptable values for the flags field: > + > +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) > + > 5. The kvm_run structure > ------------------------ > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 1bfb99770c34..0d37eb837991 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -739,6 +739,8 @@ struct kvm_hv { > u64 hv_crash_ctl; > > HV_REFERENCE_TSC_PAGE tsc_ref; > + > + struct idr conn_to_evt; > }; > > enum kvm_irqchip_mode { > diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h > index cc2468244ca2..837465d69c6d 100644 > --- a/arch/x86/kvm/hyperv.h > +++ b/arch/x86/kvm/hyperv.h > @@ -90,5 +90,6 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, > > void kvm_hv_init_vm(struct kvm *kvm); > void kvm_hv_destroy_vm(struct kvm *kvm); > +int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); > > #endif > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 282d7613fce8..465f45c13cdc 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -932,6 +932,7 @@ struct kvm_ppc_resize_hpt { > #define KVM_CAP_HYPERV_SYNIC2 148 > #define KVM_CAP_HYPERV_VP_INDEX 149 > #define KVM_CAP_S390_AIS_MIGRATION 150 > +#define KVM_CAP_HYPERV_EVENTFD 151 > > #ifdef KVM_CAP_IRQ_ROUTING > > @@ -1359,6 +1360,8 @@ struct kvm_s390_ucas_mapping { > #define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log) > #define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log) > > +#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xba, struct kvm_hyperv_eventfd) > + > #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) > #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) > #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) > @@ -1419,4 +1422,14 @@ struct kvm_assigned_msix_entry { > #define KVM_ARM_DEV_EL1_PTIMER (1 << 1) > #define KVM_ARM_DEV_PMU (1 << 2) > > +struct kvm_hyperv_eventfd { > + __u32 conn_id; > + __s32 fd; > + __u32 flags; > + __u32 padding[3]; > +}; > + > +#define KVM_HYPERV_CONN_ID_BITS 24 > +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) > + > #endif /* __LINUX_KVM_H */ > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c > index 015fb06c7522..d2e8915546b1 100644 > --- a/arch/x86/kvm/hyperv.c > +++ b/arch/x86/kvm/hyperv.c > @@ -29,6 +29,7 @@ > #include <linux/kvm_host.h> > #include <linux/highmem.h> > #include <linux/sched/cputime.h> > +#include <linux/eventfd.h> > > #include <asm/apicdef.h> > #include <trace/events/kvm.h> > @@ -1226,6 +1227,54 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) > return 1; > } > > +static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *conn_id) > +{ > + struct page *page; > + void *pg; > + struct hv_input_signal_event *msg; > + > + if ((gpa & (__alignof__(*msg) - 1)) || > + offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE) > + return HV_STATUS_INVALID_ALIGNMENT; > + > + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); > + if (is_error_page(page)) > + return HV_STATUS_INSUFFICIENT_MEMORY; > + > + pg = kmap_atomic(page); > + msg = pg + offset_in_page(gpa); > + *conn_id = msg->connectionid.u.id + msg->flag_number; > + kunmap_atomic(pg); > + return HV_STATUS_SUCCESS; > +} > + > +static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 ingpa) > +{ > + u16 ret; > + u32 conn_id; > + int idx; > + struct eventfd_ctx *eventfd; > + > + if (likely(fast)) > + conn_id = (ingpa & 0xffffffff) + ((ingpa >> 32) & 0xffff); > + else { > + ret = hvcall_sigevent_param(vcpu, ingpa, &conn_id); > + if (ret != HV_STATUS_SUCCESS) > + return ret; > + } > + > + if (conn_id & ~((1 << KVM_HYPERV_CONN_ID_BITS) - 1)) > + return HV_STATUS_INVALID_CONNECTION_ID; > + > + idx = srcu_read_lock(&vcpu->kvm->srcu); > + eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, conn_id); > + if (eventfd) > + eventfd_signal(eventfd, 1); > + srcu_read_unlock(&vcpu->kvm->srcu, idx); > + > + return eventfd ? HV_STATUS_SUCCESS : HV_STATUS_INVALID_CONNECTION_ID; > +} > + > int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > { > u64 param, ingpa, outgpa, ret; > @@ -1276,8 +1325,12 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > case HVCALL_NOTIFY_LONG_SPIN_WAIT: > kvm_vcpu_on_spin(vcpu, true); > break; > - case HVCALL_POST_MESSAGE: > case HVCALL_SIGNAL_EVENT: > + res = kvm_hvcall_signal_event(vcpu, fast, ingpa); > + if (res != HV_STATUS_INVALID_CONNECTION_ID) > + break; > + /* maybe userspace knows this conn_id: fall through */ > + case HVCALL_POST_MESSAGE: > /* don't bother userspace if it has no way to handle it */ > if (!vcpu_to_synic(vcpu)->active) { > res = HV_STATUS_INVALID_HYPERCALL_CODE; > @@ -1305,8 +1358,68 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > void kvm_hv_init_vm(struct kvm *kvm) > { > mutex_init(&kvm->arch.hyperv.hv_lock); > + idr_init(&kvm->arch.hyperv.conn_to_evt); > } > > void kvm_hv_destroy_vm(struct kvm *kvm) > { > + int i; > + struct eventfd_ctx *eventfd; > + > + idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) > + eventfd_ctx_put(eventfd); > + idr_destroy(&kvm->arch.hyperv.conn_to_evt); > +} > + > +static int kvm_hv_eventfd_assign(struct kvm *kvm, int conn_id, int fd) > +{ > + int ret; > + struct eventfd_ctx *eventfd; > + struct kvm_hv *hv = &kvm->arch.hyperv; > + > + eventfd = eventfd_ctx_fdget(fd); > + if (IS_ERR(eventfd)) > + return PTR_ERR(eventfd); > + > + mutex_lock(&hv->hv_lock); > + ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, > + GFP_KERNEL); > + mutex_unlock(&hv->hv_lock); > + > + if (ret >= 0) > + return 0; > + > + if (ret == -ENOSPC) > + ret = -EEXIST; > + eventfd_ctx_put(eventfd); > + return ret; > +} > + > +static int kvm_hv_eventfd_deassign(struct kvm *kvm, int conn_id) > +{ > + int ret; > + struct eventfd_ctx *eventfd; > + struct kvm_hv *hv = &kvm->arch.hyperv; > + > + mutex_lock(&hv->hv_lock); > + eventfd = idr_remove(&hv->conn_to_evt, conn_id); > + mutex_unlock(&hv->hv_lock); > + > + if (!eventfd) > + return -ENOENT; > + > + synchronize_srcu(&kvm->srcu); > + eventfd_ctx_put(eventfd); > + return ret; > +} > + > +int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) > +{ > + if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || > + (args->conn_id & ~((1 << KVM_HYPERV_CONN_ID_BITS) - 1))) > + return -EINVAL; > + > + return args->flags == KVM_HYPERV_EVENTFD_DEASSIGN ? > + kvm_hv_eventfd_deassign(kvm, args->conn_id) : > + kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); > } > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 0fe479d4b82c..2c786682f6f6 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -2686,6 +2686,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_HYPERV_SYNIC: > case KVM_CAP_HYPERV_SYNIC2: > case KVM_CAP_HYPERV_VP_INDEX: > + case KVM_CAP_HYPERV_EVENTFD: > case KVM_CAP_PCI_SEGMENT: > case KVM_CAP_DEBUGREGS: > case KVM_CAP_X86_ROBUST_SINGLESTEP: > @@ -4281,6 +4282,15 @@ long kvm_arch_vm_ioctl(struct file *filp, > r = kvm_vm_ioctl_enable_cap(kvm, &cap); > break; > } > + case KVM_HYPERV_EVENTFD: { > + struct kvm_hyperv_eventfd hvevfd; > + > + r = -EFAULT; > + if (copy_from_user(&hvevfd, argp, sizeof(hvevfd))) > + goto out; > + r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd); > + break; > + } > default: > r = -ENOTTY; > }
.snip.. > +static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *conn_id) > +{ > + struct page *page; > + void *pg; > + struct hv_input_signal_event *msg; > + > + if ((gpa & (__alignof__(*msg) - 1)) || > + offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE) > + return HV_STATUS_INVALID_ALIGNMENT; > + > + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); > + if (is_error_page(page)) > + return HV_STATUS_INSUFFICIENT_MEMORY; > + > + pg = kmap_atomic(page); > + msg = pg + offset_in_page(gpa); > + *conn_id = msg->connectionid.u.id + msg->flag_number; Here it is u32.. > + kunmap_atomic(pg); > + return HV_STATUS_SUCCESS; > +} > + .. snip.. > +static int kvm_hv_eventfd_assign(struct kvm *kvm, int conn_id, int fd) It is uint32 in the hypercall, but here you are making it an 'int'. Is that intentional? Or could it be the same type?
On Wed, Dec 06, 2017 at 10:28:33AM -0500, Konrad Rzeszutek Wilk wrote: > .snip.. > > +static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *conn_id) > > +{ > > + struct page *page; > > + void *pg; > > + struct hv_input_signal_event *msg; > > + > > + if ((gpa & (__alignof__(*msg) - 1)) || > > + offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE) > > + return HV_STATUS_INVALID_ALIGNMENT; > > + > > + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); > > + if (is_error_page(page)) > > + return HV_STATUS_INSUFFICIENT_MEMORY; > > + > > + pg = kmap_atomic(page); > > + msg = pg + offset_in_page(gpa); > > + *conn_id = msg->connectionid.u.id + msg->flag_number; > > Here it is u32.. > > > + kunmap_atomic(pg); > > + return HV_STATUS_SUCCESS; > > +} > > + > .. snip.. > > > +static int kvm_hv_eventfd_assign(struct kvm *kvm, int conn_id, int fd) > > It is uint32 in the hypercall, but here you are making it an 'int'. > > Is that intentional? Or could it be the same type? Yes it should have been u32 everywhere. Thanks for spotting! (The actual values allowed there are 24bit wide, nonetheless different types are confusing indeed so I'd better make them consistent.) Thanks, Roman.
On Wed, Dec 06, 2017 at 04:19:22PM +0100, Vitaly Kuznetsov wrote: > Roman Kagan <rkagan@virtuozzo.com> writes: > > > In Hyper-V, the fast guest->host notification mechanism is the > > SIGNAL_EVENT hypercall, with a single parameter of the connection ID to > > signal. > > (I may be missing something important...) > > I'm not sure how Windows does that but Linux Hyper-V drivers use > hard-coded VMBUS_EVENT_CONNECTION_ID (2) for all HVCALL_SIGNAL_EVENT > hypercalls. This is only true for VMBus protocol of w2008, where all channels use the same connection id, and use an additional "interrupt page" to sort out whose notification it is. Newer VMBus uses "dedicated interrupt" per channel, and Linux certainly does use that, too, if the hypervisor offers it. See vmbus_set_event(). > According to the spec, this id comes from HvConnectPort > hypercall which is executed by the root partition -- not sure how it's > supposed to be passed down to the guest. It comes in the channel offer. See vmbus_onoffer(). > So in case the situation in Windows is not any different the connection > ID parameter will always be 2 so we may as well just omit it) It is not different in Windows: the connection id varies there too. > Or, > alternatively, we can probably add both VMBUS_MESSAGE_CONNECTION_ID and > VMBUS_MONITOR_CONNECTION_ID to the mechanism... These two are not used with the SIGNAL_EVENT hypercall. Or are you suggesting to also handle the POST_MESSAGE hypercall in KVM? I don't see a compelling reason to do so, since this is a slow control mechanism and only used at setup/teardown, so handling it in userspace is good enough. Thanks, Roman.
Roman Kagan <rkagan@virtuozzo.com> writes: > On Wed, Dec 06, 2017 at 04:19:22PM +0100, Vitaly Kuznetsov wrote: >> Roman Kagan <rkagan@virtuozzo.com> writes: >> >> > In Hyper-V, the fast guest->host notification mechanism is the >> > SIGNAL_EVENT hypercall, with a single parameter of the connection ID to >> > signal. >> >> (I may be missing something important...) >> >> I'm not sure how Windows does that but Linux Hyper-V drivers use >> hard-coded VMBUS_EVENT_CONNECTION_ID (2) for all HVCALL_SIGNAL_EVENT >> hypercalls. > > This is only true for VMBus protocol of w2008, where all channels use > the same connection id, and use an additional "interrupt page" to sort > out whose notification it is. > > Newer VMBus uses "dedicated interrupt" per channel, and Linux certainly > does use that, too, if the hypervisor offers it. See vmbus_set_event(). > Ah, right, thanks! > >> Or, >> alternatively, we can probably add both VMBUS_MESSAGE_CONNECTION_ID and >> VMBUS_MONITOR_CONNECTION_ID to the mechanism... > > These two are not used with the SIGNAL_EVENT hypercall. Or are you > suggesting to also handle the POST_MESSAGE hypercall in KVM? I don't > see a compelling reason to do so, since this is a slow control mechanism > and only used at setup/teardown, so handling it in userspace is good > enough. Yes, it is good enough but the new mechanism's name look generic enough: KVM_HYPERV_EVENTFD and it is unclear why only SIGNAL_EVENT is handled.
On Wed, Dec 06, 2017 at 06:09:55PM +0100, Vitaly Kuznetsov wrote: > Roman Kagan <rkagan@virtuozzo.com> writes: > > On Wed, Dec 06, 2017 at 04:19:22PM +0100, Vitaly Kuznetsov wrote: > >> Or, > >> alternatively, we can probably add both VMBUS_MESSAGE_CONNECTION_ID and > >> VMBUS_MONITOR_CONNECTION_ID to the mechanism... > > > > These two are not used with the SIGNAL_EVENT hypercall. Or are you > > suggesting to also handle the POST_MESSAGE hypercall in KVM? I don't > > see a compelling reason to do so, since this is a slow control mechanism > > and only used at setup/teardown, so handling it in userspace is good > > enough. > > Yes, it is good enough but the new mechanism's name look generic enough: > KVM_HYPERV_EVENTFD and it is unclear why only SIGNAL_EVENT is handled. Because SIGNAL_EVENT matches the eventfd semantics while POST_MESSAGE doesn't. Because POST_MESSAGE is, well, about posting messages. It bears up to 256 bytes of data which need to be copyied aside before returning to the guest and then delivered somehow to the userspace for processing. Roman.
Hi Roman, Thank you for the patch! Yet something to improve: [auto build test ERROR on kvm/linux-next] [also build test ERROR on v4.15-rc2] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Roman-Kagan/x86-kvm-hyperv-guest-host-event-signaling-via-eventfd/20171207-065837 base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next config: i386-randconfig-x011-201749 (attached as .config) compiler: gcc-7 (Debian 7.2.0-12) 7.2.1 20171025 reproduce: # save the attached .config to linux build tree make ARCH=i386 All error/warnings (new ones prefixed by >>): In file included from include/linux/init.h:5:0, from arch/x86/include/asm/mem_encrypt.h:18, from include/linux/mem_encrypt.h:20, from arch/x86/include/asm/processor-flags.h:6, from arch/x86/include/asm/processor.h:5, from arch/x86//kvm/x86.h:5, from arch/x86//kvm/hyperv.c:24: arch/x86//kvm/hyperv.c: In function 'hvcall_sigevent_param': >> arch/x86//kvm/hyperv.c:1236:26: error: dereferencing pointer to incomplete type 'struct hv_input_signal_event' if ((gpa & (__alignof__(*msg) - 1)) || ^~ include/linux/compiler.h:58:30: note: in definition of macro '__trace_if' if (__builtin_constant_p(!!(cond)) ? !!(cond) : \ ^~~~ >> arch/x86//kvm/hyperv.c:1236:2: note: in expansion of macro 'if' if ((gpa & (__alignof__(*msg) - 1)) || ^~ vim +1236 arch/x86//kvm/hyperv.c 1229 1230 static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *conn_id) 1231 { 1232 struct page *page; 1233 void *pg; 1234 struct hv_input_signal_event *msg; 1235 > 1236 if ((gpa & (__alignof__(*msg) - 1)) || 1237 offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE) 1238 return HV_STATUS_INVALID_ALIGNMENT; 1239 1240 page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); 1241 if (is_error_page(page)) 1242 return HV_STATUS_INSUFFICIENT_MEMORY; 1243 1244 pg = kmap_atomic(page); 1245 msg = pg + offset_in_page(gpa); 1246 *conn_id = msg->connectionid.u.id + msg->flag_number; 1247 kunmap_atomic(pg); 1248 return HV_STATUS_SUCCESS; 1249 } 1250 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f670e4b9e7f3..e4f319add8b7 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3394,6 +3394,29 @@ invalid, if invalid pages are written to (e.g. after the end of memory) or if no page table is present for the addresses (e.g. when using hugepages). +4.109 KVM_HYPERV_EVENTFD + +Capability: KVM_CAP_HYPERV_EVENTFD +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_hyperv_eventfd (in) +Returns: 0 on success, !0 on error + +This ioctl (un)registers an eventfd to receive notifications from the guest on +the specified Hyper-V connection id through the SIGNAL_EVENT hypercall, without +causing a user exit. + +struct kvm_hyperv_eventfd { + __u32 conn_id; + __s32 fd; + __u32 flags; + __u32 padding[3]; +}; + +The acceptable values for the flags field: + +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + 5. The kvm_run structure ------------------------ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1bfb99770c34..0d37eb837991 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -739,6 +739,8 @@ struct kvm_hv { u64 hv_crash_ctl; HV_REFERENCE_TSC_PAGE tsc_ref; + + struct idr conn_to_evt; }; enum kvm_irqchip_mode { diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index cc2468244ca2..837465d69c6d 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -90,5 +90,6 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, void kvm_hv_init_vm(struct kvm *kvm); void kvm_hv_destroy_vm(struct kvm *kvm); +int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 282d7613fce8..465f45c13cdc 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -932,6 +932,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_SYNIC2 148 #define KVM_CAP_HYPERV_VP_INDEX 149 #define KVM_CAP_S390_AIS_MIGRATION 150 +#define KVM_CAP_HYPERV_EVENTFD 151 #ifdef KVM_CAP_IRQ_ROUTING @@ -1359,6 +1360,8 @@ struct kvm_s390_ucas_mapping { #define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log) #define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log) +#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xba, struct kvm_hyperv_eventfd) + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) @@ -1419,4 +1422,14 @@ struct kvm_assigned_msix_entry { #define KVM_ARM_DEV_EL1_PTIMER (1 << 1) #define KVM_ARM_DEV_PMU (1 << 2) +struct kvm_hyperv_eventfd { + __u32 conn_id; + __s32 fd; + __u32 flags; + __u32 padding[3]; +}; + +#define KVM_HYPERV_CONN_ID_BITS 24 +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + #endif /* __LINUX_KVM_H */ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 015fb06c7522..d2e8915546b1 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -29,6 +29,7 @@ #include <linux/kvm_host.h> #include <linux/highmem.h> #include <linux/sched/cputime.h> +#include <linux/eventfd.h> #include <asm/apicdef.h> #include <trace/events/kvm.h> @@ -1226,6 +1227,54 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) return 1; } +static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *conn_id) +{ + struct page *page; + void *pg; + struct hv_input_signal_event *msg; + + if ((gpa & (__alignof__(*msg) - 1)) || + offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE) + return HV_STATUS_INVALID_ALIGNMENT; + + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); + if (is_error_page(page)) + return HV_STATUS_INSUFFICIENT_MEMORY; + + pg = kmap_atomic(page); + msg = pg + offset_in_page(gpa); + *conn_id = msg->connectionid.u.id + msg->flag_number; + kunmap_atomic(pg); + return HV_STATUS_SUCCESS; +} + +static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 ingpa) +{ + u16 ret; + u32 conn_id; + int idx; + struct eventfd_ctx *eventfd; + + if (likely(fast)) + conn_id = (ingpa & 0xffffffff) + ((ingpa >> 32) & 0xffff); + else { + ret = hvcall_sigevent_param(vcpu, ingpa, &conn_id); + if (ret != HV_STATUS_SUCCESS) + return ret; + } + + if (conn_id & ~((1 << KVM_HYPERV_CONN_ID_BITS) - 1)) + return HV_STATUS_INVALID_CONNECTION_ID; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, conn_id); + if (eventfd) + eventfd_signal(eventfd, 1); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + return eventfd ? HV_STATUS_SUCCESS : HV_STATUS_INVALID_CONNECTION_ID; +} + int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { u64 param, ingpa, outgpa, ret; @@ -1276,8 +1325,12 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) case HVCALL_NOTIFY_LONG_SPIN_WAIT: kvm_vcpu_on_spin(vcpu, true); break; - case HVCALL_POST_MESSAGE: case HVCALL_SIGNAL_EVENT: + res = kvm_hvcall_signal_event(vcpu, fast, ingpa); + if (res != HV_STATUS_INVALID_CONNECTION_ID) + break; + /* maybe userspace knows this conn_id: fall through */ + case HVCALL_POST_MESSAGE: /* don't bother userspace if it has no way to handle it */ if (!vcpu_to_synic(vcpu)->active) { res = HV_STATUS_INVALID_HYPERCALL_CODE; @@ -1305,8 +1358,68 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) void kvm_hv_init_vm(struct kvm *kvm) { mutex_init(&kvm->arch.hyperv.hv_lock); + idr_init(&kvm->arch.hyperv.conn_to_evt); } void kvm_hv_destroy_vm(struct kvm *kvm) { + int i; + struct eventfd_ctx *eventfd; + + idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) + eventfd_ctx_put(eventfd); + idr_destroy(&kvm->arch.hyperv.conn_to_evt); +} + +static int kvm_hv_eventfd_assign(struct kvm *kvm, int conn_id, int fd) +{ + int ret; + struct eventfd_ctx *eventfd; + struct kvm_hv *hv = &kvm->arch.hyperv; + + eventfd = eventfd_ctx_fdget(fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + mutex_lock(&hv->hv_lock); + ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, + GFP_KERNEL); + mutex_unlock(&hv->hv_lock); + + if (ret >= 0) + return 0; + + if (ret == -ENOSPC) + ret = -EEXIST; + eventfd_ctx_put(eventfd); + return ret; +} + +static int kvm_hv_eventfd_deassign(struct kvm *kvm, int conn_id) +{ + int ret; + struct eventfd_ctx *eventfd; + struct kvm_hv *hv = &kvm->arch.hyperv; + + mutex_lock(&hv->hv_lock); + eventfd = idr_remove(&hv->conn_to_evt, conn_id); + mutex_unlock(&hv->hv_lock); + + if (!eventfd) + return -ENOENT; + + synchronize_srcu(&kvm->srcu); + eventfd_ctx_put(eventfd); + return ret; +} + +int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) +{ + if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || + (args->conn_id & ~((1 << KVM_HYPERV_CONN_ID_BITS) - 1))) + return -EINVAL; + + return args->flags == KVM_HYPERV_EVENTFD_DEASSIGN ? + kvm_hv_eventfd_deassign(kvm, args->conn_id) : + kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0fe479d4b82c..2c786682f6f6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2686,6 +2686,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_SYNIC: case KVM_CAP_HYPERV_SYNIC2: case KVM_CAP_HYPERV_VP_INDEX: + case KVM_CAP_HYPERV_EVENTFD: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: @@ -4281,6 +4282,15 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_enable_cap(kvm, &cap); break; } + case KVM_HYPERV_EVENTFD: { + struct kvm_hyperv_eventfd hvevfd; + + r = -EFAULT; + if (copy_from_user(&hvevfd, argp, sizeof(hvevfd))) + goto out; + r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd); + break; + } default: r = -ENOTTY; }
In Hyper-V, the fast guest->host notification mechanism is the SIGNAL_EVENT hypercall, with a single parameter of the connection ID to signal. Currently this hypercall incurs a user exit and requires the userspace to decode the parameters and trigger the notification of the potentially different I/O context. To avoid the costly user exit, process this hypercall and signal the corresponding eventfd in KVM, similar to ioeventfd. The association between the connection id and the eventfd is established via the newly introduced KVM_HYPERV_EVENTFD ioctl, and maintained in an (srcu-protected) IDR. Signed-off-by: Roman Kagan <rkagan@virtuozzo.com> --- Documentation/virtual/kvm/api.txt | 23 ++++++++ arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/hyperv.h | 1 + include/uapi/linux/kvm.h | 13 +++++ arch/x86/kvm/hyperv.c | 115 +++++++++++++++++++++++++++++++++++++- arch/x86/kvm/x86.c | 10 ++++ 6 files changed, 163 insertions(+), 1 deletion(-)