diff mbox

[2/2] x86:kvm:hyperv: guest->host event signaling via eventfd

Message ID 20171204190044.14125-3-rkagan@virtuozzo.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roman Kagan Dec. 4, 2017, 7 p.m. UTC
In Hyper-V, the fast guest->host notification mechanism is the
SIGNAL_EVENT hypercall, with a single parameter of the connection ID to
signal.

Currently this hypercall incurs a user exit and requires the userspace
to decode the parameters and trigger the notification of the potentially
different I/O context.

To avoid the costly user exit, process this hypercall and signal the
corresponding eventfd in KVM, similar to ioeventfd.  The association
between the connection id and the eventfd is established via the newly
introduced KVM_HYPERV_EVENTFD ioctl, and maintained in an
(srcu-protected) IDR.

Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
---
 Documentation/virtual/kvm/api.txt |  23 ++++++++
 arch/x86/include/asm/kvm_host.h   |   2 +
 arch/x86/kvm/hyperv.h             |   1 +
 include/uapi/linux/kvm.h          |  13 +++++
 arch/x86/kvm/hyperv.c             | 115 +++++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/x86.c                |  10 ++++
 6 files changed, 163 insertions(+), 1 deletion(-)

Comments

Vitaly Kuznetsov Dec. 6, 2017, 3:19 p.m. UTC | #1
Roman Kagan <rkagan@virtuozzo.com> writes:

> In Hyper-V, the fast guest->host notification mechanism is the
> SIGNAL_EVENT hypercall, with a single parameter of the connection ID to
> signal.

(I may be missing something important...)

I'm not sure how Windows does that but Linux Hyper-V drivers use
hard-coded VMBUS_EVENT_CONNECTION_ID (2) for all HVCALL_SIGNAL_EVENT
hypercalls. According to the spec, this id comes from HvConnectPort
hypercall which is executed by the root partition -- not sure how it's
supposed to be passed down to the guest.

So in case the situation in Windows is not any different the connection
ID parameter will always be 2 so we may as well just omit it) Or,
alternatively, we can probably add both VMBUS_MESSAGE_CONNECTION_ID and
VMBUS_MONITOR_CONNECTION_ID to the mechanism...

>
> Currently this hypercall incurs a user exit and requires the userspace
> to decode the parameters and trigger the notification of the potentially
> different I/O context.
>
> To avoid the costly user exit, process this hypercall and signal the
> corresponding eventfd in KVM, similar to ioeventfd.  The association
> between the connection id and the eventfd is established via the newly
> introduced KVM_HYPERV_EVENTFD ioctl, and maintained in an
> (srcu-protected) IDR.
>
> Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
> ---
>  Documentation/virtual/kvm/api.txt |  23 ++++++++
>  arch/x86/include/asm/kvm_host.h   |   2 +
>  arch/x86/kvm/hyperv.h             |   1 +
>  include/uapi/linux/kvm.h          |  13 +++++
>  arch/x86/kvm/hyperv.c             | 115 +++++++++++++++++++++++++++++++++++++-
>  arch/x86/kvm/x86.c                |  10 ++++
>  6 files changed, 163 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index f670e4b9e7f3..e4f319add8b7 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3394,6 +3394,29 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
>  or if no page table is present for the addresses (e.g. when using
>  hugepages).
>
> +4.109 KVM_HYPERV_EVENTFD
> +
> +Capability: KVM_CAP_HYPERV_EVENTFD
> +Architectures: x86
> +Type: vm ioctl
> +Parameters: struct kvm_hyperv_eventfd (in)
> +Returns: 0 on success, !0 on error
> +
> +This ioctl (un)registers an eventfd to receive notifications from the guest on
> +the specified Hyper-V connection id through the SIGNAL_EVENT hypercall, without
> +causing a user exit.
> +
> +struct kvm_hyperv_eventfd {
> +	__u32 conn_id;
> +	__s32 fd;
> +	__u32 flags;
> +	__u32 padding[3];
> +};
> +
> +The acceptable values for the flags field:
> +
> +#define KVM_HYPERV_EVENTFD_DEASSIGN	(1 << 0)
> +
>  5. The kvm_run structure
>  ------------------------
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 1bfb99770c34..0d37eb837991 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -739,6 +739,8 @@ struct kvm_hv {
>  	u64 hv_crash_ctl;
>
>  	HV_REFERENCE_TSC_PAGE tsc_ref;
> +
> +	struct idr conn_to_evt;
>  };
>
>  enum kvm_irqchip_mode {
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index cc2468244ca2..837465d69c6d 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -90,5 +90,6 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
>
>  void kvm_hv_init_vm(struct kvm *kvm);
>  void kvm_hv_destroy_vm(struct kvm *kvm);
> +int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
>
>  #endif
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 282d7613fce8..465f45c13cdc 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -932,6 +932,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_HYPERV_SYNIC2 148
>  #define KVM_CAP_HYPERV_VP_INDEX 149
>  #define KVM_CAP_S390_AIS_MIGRATION 150
> +#define KVM_CAP_HYPERV_EVENTFD 151
>
>  #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -1359,6 +1360,8 @@ struct kvm_s390_ucas_mapping {
>  #define KVM_S390_GET_CMMA_BITS      _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
>  #define KVM_S390_SET_CMMA_BITS      _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
>
> +#define KVM_HYPERV_EVENTFD	_IOW(KVMIO,  0xba, struct kvm_hyperv_eventfd)
> +
>  #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
>  #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
>  #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)
> @@ -1419,4 +1422,14 @@ struct kvm_assigned_msix_entry {
>  #define KVM_ARM_DEV_EL1_PTIMER		(1 << 1)
>  #define KVM_ARM_DEV_PMU			(1 << 2)
>
> +struct kvm_hyperv_eventfd {
> +	__u32 conn_id;
> +	__s32 fd;
> +	__u32 flags;
> +	__u32 padding[3];
> +};
> +
> +#define KVM_HYPERV_CONN_ID_BITS		24
> +#define KVM_HYPERV_EVENTFD_DEASSIGN	(1 << 0)
> +
>  #endif /* __LINUX_KVM_H */
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 015fb06c7522..d2e8915546b1 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -29,6 +29,7 @@
>  #include <linux/kvm_host.h>
>  #include <linux/highmem.h>
>  #include <linux/sched/cputime.h>
> +#include <linux/eventfd.h>
>
>  #include <asm/apicdef.h>
>  #include <trace/events/kvm.h>
> @@ -1226,6 +1227,54 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
>  	return 1;
>  }
>
> +static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *conn_id)
> +{
> +	struct page *page;
> +	void *pg;
> +	struct hv_input_signal_event *msg;
> +
> +	if ((gpa & (__alignof__(*msg) - 1)) ||
> +	    offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE)
> +		return HV_STATUS_INVALID_ALIGNMENT;
> +
> +	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
> +	if (is_error_page(page))
> +		return HV_STATUS_INSUFFICIENT_MEMORY;
> +
> +	pg = kmap_atomic(page);
> +	msg = pg + offset_in_page(gpa);
> +	*conn_id = msg->connectionid.u.id + msg->flag_number;
> +	kunmap_atomic(pg);
> +	return HV_STATUS_SUCCESS;
> +}
> +
> +static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 ingpa)
> +{
> +	u16 ret;
> +	u32 conn_id;
> +	int idx;
> +	struct eventfd_ctx *eventfd;
> +
> +	if (likely(fast))
> +		conn_id = (ingpa & 0xffffffff) + ((ingpa >> 32) & 0xffff);
> +	else {
> +		ret = hvcall_sigevent_param(vcpu, ingpa, &conn_id);
> +		if (ret != HV_STATUS_SUCCESS)
> +			return ret;
> +	}
> +
> +	if (conn_id & ~((1 << KVM_HYPERV_CONN_ID_BITS) - 1))
> +		return HV_STATUS_INVALID_CONNECTION_ID;
> +
> +	idx = srcu_read_lock(&vcpu->kvm->srcu);
> +	eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, conn_id);
> +	if (eventfd)
> +		eventfd_signal(eventfd, 1);
> +	srcu_read_unlock(&vcpu->kvm->srcu, idx);
> +
> +	return eventfd ? HV_STATUS_SUCCESS : HV_STATUS_INVALID_CONNECTION_ID;
> +}
> +
>  int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>  {
>  	u64 param, ingpa, outgpa, ret;
> @@ -1276,8 +1325,12 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>  	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
>  		kvm_vcpu_on_spin(vcpu, true);
>  		break;
> -	case HVCALL_POST_MESSAGE:
>  	case HVCALL_SIGNAL_EVENT:
> +		res = kvm_hvcall_signal_event(vcpu, fast, ingpa);
> +		if (res != HV_STATUS_INVALID_CONNECTION_ID)
> +			break;
> +		/* maybe userspace knows this conn_id: fall through */
> +	case HVCALL_POST_MESSAGE:
>  		/* don't bother userspace if it has no way to handle it */
>  		if (!vcpu_to_synic(vcpu)->active) {
>  			res = HV_STATUS_INVALID_HYPERCALL_CODE;
> @@ -1305,8 +1358,68 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>  void kvm_hv_init_vm(struct kvm *kvm)
>  {
>  	mutex_init(&kvm->arch.hyperv.hv_lock);
> +	idr_init(&kvm->arch.hyperv.conn_to_evt);
>  }
>
>  void kvm_hv_destroy_vm(struct kvm *kvm)
>  {
> +	int i;
> +	struct eventfd_ctx *eventfd;
> +
> +	idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i)
> +		eventfd_ctx_put(eventfd);
> +	idr_destroy(&kvm->arch.hyperv.conn_to_evt);
> +}
> +
> +static int kvm_hv_eventfd_assign(struct kvm *kvm, int conn_id, int fd)
> +{
> +	int ret;
> +	struct eventfd_ctx *eventfd;
> +	struct kvm_hv *hv = &kvm->arch.hyperv;
> +
> +	eventfd = eventfd_ctx_fdget(fd);
> +	if (IS_ERR(eventfd))
> +		return PTR_ERR(eventfd);
> +
> +	mutex_lock(&hv->hv_lock);
> +	ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
> +			GFP_KERNEL);
> +	mutex_unlock(&hv->hv_lock);
> +
> +	if (ret >= 0)
> +		return 0;
> +
> +	if (ret == -ENOSPC)
> +		ret = -EEXIST;
> +	eventfd_ctx_put(eventfd);
> +	return ret;
> +}
> +
> +static int kvm_hv_eventfd_deassign(struct kvm *kvm, int conn_id)
> +{
> +	int ret;
> +	struct eventfd_ctx *eventfd;
> +	struct kvm_hv *hv = &kvm->arch.hyperv;
> +
> +	mutex_lock(&hv->hv_lock);
> +	eventfd = idr_remove(&hv->conn_to_evt, conn_id);
> +	mutex_unlock(&hv->hv_lock);
> +
> +	if (!eventfd)
> +		return -ENOENT;
> +
> +	synchronize_srcu(&kvm->srcu);
> +	eventfd_ctx_put(eventfd);
> +	return ret;
> +}
> +
> +int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
> +{
> +	if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) ||
> +	    (args->conn_id & ~((1 << KVM_HYPERV_CONN_ID_BITS) - 1)))
> +		return -EINVAL;
> +
> +	return args->flags == KVM_HYPERV_EVENTFD_DEASSIGN ?
> +		kvm_hv_eventfd_deassign(kvm, args->conn_id) :
> +		kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
>  }
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0fe479d4b82c..2c786682f6f6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2686,6 +2686,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_HYPERV_SYNIC:
>  	case KVM_CAP_HYPERV_SYNIC2:
>  	case KVM_CAP_HYPERV_VP_INDEX:
> +	case KVM_CAP_HYPERV_EVENTFD:
>  	case KVM_CAP_PCI_SEGMENT:
>  	case KVM_CAP_DEBUGREGS:
>  	case KVM_CAP_X86_ROBUST_SINGLESTEP:
> @@ -4281,6 +4282,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
>  		break;
>  	}
> +	case KVM_HYPERV_EVENTFD: {
> +		struct kvm_hyperv_eventfd hvevfd;
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
> +			goto out;
> +		r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
> +		break;
> +	}
>  	default:
>  		r = -ENOTTY;
>  	}
Konrad Rzeszutek Wilk Dec. 6, 2017, 3:28 p.m. UTC | #2
.snip..
> +static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *conn_id)
> +{
> +	struct page *page;
> +	void *pg;
> +	struct hv_input_signal_event *msg;
> +
> +	if ((gpa & (__alignof__(*msg) - 1)) ||
> +	    offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE)
> +		return HV_STATUS_INVALID_ALIGNMENT;
> +
> +	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
> +	if (is_error_page(page))
> +		return HV_STATUS_INSUFFICIENT_MEMORY;
> +
> +	pg = kmap_atomic(page);
> +	msg = pg + offset_in_page(gpa);
> +	*conn_id = msg->connectionid.u.id + msg->flag_number;

Here it is u32..

> +	kunmap_atomic(pg);
> +	return HV_STATUS_SUCCESS;
> +}
> +
.. snip..

> +static int kvm_hv_eventfd_assign(struct kvm *kvm, int conn_id, int fd)

It is uint32 in the hypercall, but here you are making it an 'int'.

Is that intentional?  Or could it be the same type?
Roman Kagan Dec. 6, 2017, 4:37 p.m. UTC | #3
On Wed, Dec 06, 2017 at 10:28:33AM -0500, Konrad Rzeszutek Wilk wrote:
> .snip..
> > +static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *conn_id)
> > +{
> > +	struct page *page;
> > +	void *pg;
> > +	struct hv_input_signal_event *msg;
> > +
> > +	if ((gpa & (__alignof__(*msg) - 1)) ||
> > +	    offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE)
> > +		return HV_STATUS_INVALID_ALIGNMENT;
> > +
> > +	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
> > +	if (is_error_page(page))
> > +		return HV_STATUS_INSUFFICIENT_MEMORY;
> > +
> > +	pg = kmap_atomic(page);
> > +	msg = pg + offset_in_page(gpa);
> > +	*conn_id = msg->connectionid.u.id + msg->flag_number;
> 
> Here it is u32..
> 
> > +	kunmap_atomic(pg);
> > +	return HV_STATUS_SUCCESS;
> > +}
> > +
> .. snip..
> 
> > +static int kvm_hv_eventfd_assign(struct kvm *kvm, int conn_id, int fd)
> 
> It is uint32 in the hypercall, but here you are making it an 'int'.
> 
> Is that intentional?  Or could it be the same type?

Yes it should have been u32 everywhere.  Thanks for spotting!
(The actual values allowed there are 24bit wide, nonetheless different
types are confusing indeed so I'd better make them consistent.)

Thanks,
Roman.
Roman Kagan Dec. 6, 2017, 5 p.m. UTC | #4
On Wed, Dec 06, 2017 at 04:19:22PM +0100, Vitaly Kuznetsov wrote:
> Roman Kagan <rkagan@virtuozzo.com> writes:
> 
> > In Hyper-V, the fast guest->host notification mechanism is the
> > SIGNAL_EVENT hypercall, with a single parameter of the connection ID to
> > signal.
> 
> (I may be missing something important...)
> 
> I'm not sure how Windows does that but Linux Hyper-V drivers use
> hard-coded VMBUS_EVENT_CONNECTION_ID (2) for all HVCALL_SIGNAL_EVENT
> hypercalls.

This is only true for VMBus protocol of w2008, where all channels use
the same connection id, and use an additional "interrupt page" to sort
out whose notification it is.

Newer VMBus uses "dedicated interrupt" per channel, and Linux certainly
does use that, too, if the hypervisor offers it.  See vmbus_set_event().

> According to the spec, this id comes from HvConnectPort
> hypercall which is executed by the root partition -- not sure how it's
> supposed to be passed down to the guest.

It comes in the channel offer.  See vmbus_onoffer().

> So in case the situation in Windows is not any different the connection
> ID parameter will always be 2 so we may as well just omit it)

It is not different in Windows: the connection id varies there too.

> Or,
> alternatively, we can probably add both VMBUS_MESSAGE_CONNECTION_ID and
> VMBUS_MONITOR_CONNECTION_ID to the mechanism...

These two are not used with the SIGNAL_EVENT hypercall.  Or are you
suggesting to also handle the POST_MESSAGE hypercall in KVM?  I don't
see a compelling reason to do so, since this is a slow control mechanism
and only used at setup/teardown, so handling it in userspace is good
enough.

Thanks,
Roman.
Vitaly Kuznetsov Dec. 6, 2017, 5:09 p.m. UTC | #5
Roman Kagan <rkagan@virtuozzo.com> writes:

> On Wed, Dec 06, 2017 at 04:19:22PM +0100, Vitaly Kuznetsov wrote:
>> Roman Kagan <rkagan@virtuozzo.com> writes:
>> 
>> > In Hyper-V, the fast guest->host notification mechanism is the
>> > SIGNAL_EVENT hypercall, with a single parameter of the connection ID to
>> > signal.
>> 
>> (I may be missing something important...)
>> 
>> I'm not sure how Windows does that but Linux Hyper-V drivers use
>> hard-coded VMBUS_EVENT_CONNECTION_ID (2) for all HVCALL_SIGNAL_EVENT
>> hypercalls.
>
> This is only true for VMBus protocol of w2008, where all channels use
> the same connection id, and use an additional "interrupt page" to sort
> out whose notification it is.
>
> Newer VMBus uses "dedicated interrupt" per channel, and Linux certainly
> does use that, too, if the hypervisor offers it.  See vmbus_set_event().
>

Ah, right, thanks!

>
>> Or,
>> alternatively, we can probably add both VMBUS_MESSAGE_CONNECTION_ID and
>> VMBUS_MONITOR_CONNECTION_ID to the mechanism...
>
> These two are not used with the SIGNAL_EVENT hypercall.  Or are you
> suggesting to also handle the POST_MESSAGE hypercall in KVM?  I don't
> see a compelling reason to do so, since this is a slow control mechanism
> and only used at setup/teardown, so handling it in userspace is good
> enough.

Yes, it is good enough but the new mechanism's name look generic enough:
KVM_HYPERV_EVENTFD and it is unclear why only SIGNAL_EVENT is handled.
Roman Kagan Dec. 6, 2017, 5:32 p.m. UTC | #6
On Wed, Dec 06, 2017 at 06:09:55PM +0100, Vitaly Kuznetsov wrote:
> Roman Kagan <rkagan@virtuozzo.com> writes:
> > On Wed, Dec 06, 2017 at 04:19:22PM +0100, Vitaly Kuznetsov wrote:
> >> Or,
> >> alternatively, we can probably add both VMBUS_MESSAGE_CONNECTION_ID and
> >> VMBUS_MONITOR_CONNECTION_ID to the mechanism...
> >
> > These two are not used with the SIGNAL_EVENT hypercall.  Or are you
> > suggesting to also handle the POST_MESSAGE hypercall in KVM?  I don't
> > see a compelling reason to do so, since this is a slow control mechanism
> > and only used at setup/teardown, so handling it in userspace is good
> > enough.
> 
> Yes, it is good enough but the new mechanism's name look generic enough:
> KVM_HYPERV_EVENTFD and it is unclear why only SIGNAL_EVENT is handled.

Because SIGNAL_EVENT matches the eventfd semantics while POST_MESSAGE
doesn't.

Because POST_MESSAGE is, well, about posting messages.  It bears up to
256 bytes of data which need to be copyied aside before returning to the
guest and then delivered somehow to the userspace for processing.

Roman.
kernel test robot Dec. 7, 2017, 4:31 a.m. UTC | #7
Hi Roman,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v4.15-rc2]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Roman-Kagan/x86-kvm-hyperv-guest-host-event-signaling-via-eventfd/20171207-065837
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: i386-randconfig-x011-201749 (attached as .config)
compiler: gcc-7 (Debian 7.2.0-12) 7.2.1 20171025
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All error/warnings (new ones prefixed by >>):

   In file included from include/linux/init.h:5:0,
                    from arch/x86/include/asm/mem_encrypt.h:18,
                    from include/linux/mem_encrypt.h:20,
                    from arch/x86/include/asm/processor-flags.h:6,
                    from arch/x86/include/asm/processor.h:5,
                    from arch/x86//kvm/x86.h:5,
                    from arch/x86//kvm/hyperv.c:24:
   arch/x86//kvm/hyperv.c: In function 'hvcall_sigevent_param':
>> arch/x86//kvm/hyperv.c:1236:26: error: dereferencing pointer to incomplete type 'struct hv_input_signal_event'
     if ((gpa & (__alignof__(*msg) - 1)) ||
                             ^~
   include/linux/compiler.h:58:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
>> arch/x86//kvm/hyperv.c:1236:2: note: in expansion of macro 'if'
     if ((gpa & (__alignof__(*msg) - 1)) ||
     ^~

vim +1236 arch/x86//kvm/hyperv.c

  1229	
  1230	static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *conn_id)
  1231	{
  1232		struct page *page;
  1233		void *pg;
  1234		struct hv_input_signal_event *msg;
  1235	
> 1236		if ((gpa & (__alignof__(*msg) - 1)) ||
  1237		    offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE)
  1238			return HV_STATUS_INVALID_ALIGNMENT;
  1239	
  1240		page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
  1241		if (is_error_page(page))
  1242			return HV_STATUS_INSUFFICIENT_MEMORY;
  1243	
  1244		pg = kmap_atomic(page);
  1245		msg = pg + offset_in_page(gpa);
  1246		*conn_id = msg->connectionid.u.id + msg->flag_number;
  1247		kunmap_atomic(pg);
  1248		return HV_STATUS_SUCCESS;
  1249	}
  1250	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index f670e4b9e7f3..e4f319add8b7 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3394,6 +3394,29 @@  invalid, if invalid pages are written to (e.g. after the end of memory)
 or if no page table is present for the addresses (e.g. when using
 hugepages).
 
+4.109 KVM_HYPERV_EVENTFD
+
+Capability: KVM_CAP_HYPERV_EVENTFD
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_hyperv_eventfd (in)
+Returns: 0 on success, !0 on error
+
+This ioctl (un)registers an eventfd to receive notifications from the guest on
+the specified Hyper-V connection id through the SIGNAL_EVENT hypercall, without
+causing a user exit.
+
+struct kvm_hyperv_eventfd {
+	__u32 conn_id;
+	__s32 fd;
+	__u32 flags;
+	__u32 padding[3];
+};
+
+The acceptable values for the flags field:
+
+#define KVM_HYPERV_EVENTFD_DEASSIGN	(1 << 0)
+
 5. The kvm_run structure
 ------------------------
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1bfb99770c34..0d37eb837991 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -739,6 +739,8 @@  struct kvm_hv {
 	u64 hv_crash_ctl;
 
 	HV_REFERENCE_TSC_PAGE tsc_ref;
+
+	struct idr conn_to_evt;
 };
 
 enum kvm_irqchip_mode {
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index cc2468244ca2..837465d69c6d 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -90,5 +90,6 @@  void kvm_hv_setup_tsc_page(struct kvm *kvm,
 
 void kvm_hv_init_vm(struct kvm *kvm);
 void kvm_hv_destroy_vm(struct kvm *kvm);
+int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
 
 #endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 282d7613fce8..465f45c13cdc 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -932,6 +932,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_HYPERV_SYNIC2 148
 #define KVM_CAP_HYPERV_VP_INDEX 149
 #define KVM_CAP_S390_AIS_MIGRATION 150
+#define KVM_CAP_HYPERV_EVENTFD 151
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1359,6 +1360,8 @@  struct kvm_s390_ucas_mapping {
 #define KVM_S390_GET_CMMA_BITS      _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
 #define KVM_S390_SET_CMMA_BITS      _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
 
+#define KVM_HYPERV_EVENTFD	_IOW(KVMIO,  0xba, struct kvm_hyperv_eventfd)
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)
@@ -1419,4 +1422,14 @@  struct kvm_assigned_msix_entry {
 #define KVM_ARM_DEV_EL1_PTIMER		(1 << 1)
 #define KVM_ARM_DEV_PMU			(1 << 2)
 
+struct kvm_hyperv_eventfd {
+	__u32 conn_id;
+	__s32 fd;
+	__u32 flags;
+	__u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_BITS		24
+#define KVM_HYPERV_EVENTFD_DEASSIGN	(1 << 0)
+
 #endif /* __LINUX_KVM_H */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 015fb06c7522..d2e8915546b1 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -29,6 +29,7 @@ 
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
 #include <linux/sched/cputime.h>
+#include <linux/eventfd.h>
 
 #include <asm/apicdef.h>
 #include <trace/events/kvm.h>
@@ -1226,6 +1227,54 @@  static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static u16 hvcall_sigevent_param(struct kvm_vcpu *vcpu, gpa_t gpa, u32 *conn_id)
+{
+	struct page *page;
+	void *pg;
+	struct hv_input_signal_event *msg;
+
+	if ((gpa & (__alignof__(*msg) - 1)) ||
+	    offset_in_page(gpa) + sizeof(*msg) > PAGE_SIZE)
+		return HV_STATUS_INVALID_ALIGNMENT;
+
+	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
+	if (is_error_page(page))
+		return HV_STATUS_INSUFFICIENT_MEMORY;
+
+	pg = kmap_atomic(page);
+	msg = pg + offset_in_page(gpa);
+	*conn_id = msg->connectionid.u.id + msg->flag_number;
+	kunmap_atomic(pg);
+	return HV_STATUS_SUCCESS;
+}
+
+static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 ingpa)
+{
+	u16 ret;
+	u32 conn_id;
+	int idx;
+	struct eventfd_ctx *eventfd;
+
+	if (likely(fast))
+		conn_id = (ingpa & 0xffffffff) + ((ingpa >> 32) & 0xffff);
+	else {
+		ret = hvcall_sigevent_param(vcpu, ingpa, &conn_id);
+		if (ret != HV_STATUS_SUCCESS)
+			return ret;
+	}
+
+	if (conn_id & ~((1 << KVM_HYPERV_CONN_ID_BITS) - 1))
+		return HV_STATUS_INVALID_CONNECTION_ID;
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, conn_id);
+	if (eventfd)
+		eventfd_signal(eventfd, 1);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+	return eventfd ? HV_STATUS_SUCCESS : HV_STATUS_INVALID_CONNECTION_ID;
+}
+
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 {
 	u64 param, ingpa, outgpa, ret;
@@ -1276,8 +1325,12 @@  int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
 		kvm_vcpu_on_spin(vcpu, true);
 		break;
-	case HVCALL_POST_MESSAGE:
 	case HVCALL_SIGNAL_EVENT:
+		res = kvm_hvcall_signal_event(vcpu, fast, ingpa);
+		if (res != HV_STATUS_INVALID_CONNECTION_ID)
+			break;
+		/* maybe userspace knows this conn_id: fall through */
+	case HVCALL_POST_MESSAGE:
 		/* don't bother userspace if it has no way to handle it */
 		if (!vcpu_to_synic(vcpu)->active) {
 			res = HV_STATUS_INVALID_HYPERCALL_CODE;
@@ -1305,8 +1358,68 @@  int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 void kvm_hv_init_vm(struct kvm *kvm)
 {
 	mutex_init(&kvm->arch.hyperv.hv_lock);
+	idr_init(&kvm->arch.hyperv.conn_to_evt);
 }
 
 void kvm_hv_destroy_vm(struct kvm *kvm)
 {
+	int i;
+	struct eventfd_ctx *eventfd;
+
+	idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i)
+		eventfd_ctx_put(eventfd);
+	idr_destroy(&kvm->arch.hyperv.conn_to_evt);
+}
+
+static int kvm_hv_eventfd_assign(struct kvm *kvm, int conn_id, int fd)
+{
+	int ret;
+	struct eventfd_ctx *eventfd;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+
+	eventfd = eventfd_ctx_fdget(fd);
+	if (IS_ERR(eventfd))
+		return PTR_ERR(eventfd);
+
+	mutex_lock(&hv->hv_lock);
+	ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
+			GFP_KERNEL);
+	mutex_unlock(&hv->hv_lock);
+
+	if (ret >= 0)
+		return 0;
+
+	if (ret == -ENOSPC)
+		ret = -EEXIST;
+	eventfd_ctx_put(eventfd);
+	return ret;
+}
+
+static int kvm_hv_eventfd_deassign(struct kvm *kvm, int conn_id)
+{
+	int ret;
+	struct eventfd_ctx *eventfd;
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+
+	mutex_lock(&hv->hv_lock);
+	eventfd = idr_remove(&hv->conn_to_evt, conn_id);
+	mutex_unlock(&hv->hv_lock);
+
+	if (!eventfd)
+		return -ENOENT;
+
+	synchronize_srcu(&kvm->srcu);
+	eventfd_ctx_put(eventfd);
+	return ret;
+}
+
+int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
+{
+	if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) ||
+	    (args->conn_id & ~((1 << KVM_HYPERV_CONN_ID_BITS) - 1)))
+		return -EINVAL;
+
+	return args->flags == KVM_HYPERV_EVENTFD_DEASSIGN ?
+		kvm_hv_eventfd_deassign(kvm, args->conn_id) :
+		kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0fe479d4b82c..2c786682f6f6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2686,6 +2686,7 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV_SYNIC:
 	case KVM_CAP_HYPERV_SYNIC2:
 	case KVM_CAP_HYPERV_VP_INDEX:
+	case KVM_CAP_HYPERV_EVENTFD:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -4281,6 +4282,15 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 		break;
 	}
+	case KVM_HYPERV_EVENTFD: {
+		struct kvm_hyperv_eventfd hvevfd;
+
+		r = -EFAULT;
+		if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
+			goto out;
+		r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}