@@ -190,3 +190,21 @@ the KVM_CAP_EXIT_HYPERCALL capability. Userspace must enable that capability
before advertising KVM_FEATURE_HC_MAP_GPA_RANGE in the guest CPUID. In
addition, if the guest supports KVM_FEATURE_MIGRATION_CONTROL, userspace
must also set up an MSR filter to process writes to MSR_KVM_MIGRATION_CONTROL.
+
+9. KVM_HC_HOST_SYSCALL
+---------------------
+:Architecture: x86
+:Status: active
+:Purpose: Execute a specified system call.
+
+- a0: pointer to a pt_regs structure in the host addess space.
+
+This hypercall lets a guest to execute host system calls. The first and only
+argument represents process registers that are used as input and output
+parameters.
+
+Returns 0 if the requested syscall has been executed. Otherwise, it returns an
+error code.
+
+**Implementation note**: The KVM_CAP_PV_HOST_SYSCALL capability has to be set
+to use this hypercall.
@@ -81,6 +81,7 @@
#include <asm/emulate_prefix.h>
#include <asm/sgx.h>
#include <clocksource/hyperv_timer.h>
+#include <asm/syscall.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -9253,6 +9254,27 @@ static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
+static int kvm_pv_host_syscall(unsigned long a0)
+{
+ struct pt_regs pt_regs = {};
+ unsigned long sysno;
+
+ if (copy_from_user(&pt_regs, (void *)a0, sizeof(pt_regs)))
+ return -EFAULT;
+
+ sysno = pt_regs.ax;
+ pt_regs.orig_ax = pt_regs.ax;
+ pt_regs.ax = -ENOSYS;
+
+ do_ksyscall_64(sysno, &pt_regs);
+
+ pt_regs.orig_ax = -1;
+ if (copy_to_user((void *)a0, &pt_regs, sizeof(pt_regs)))
+ return -EFAULT;
+
+ return 0;
+}
+
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
@@ -9318,6 +9340,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
kvm_sched_yield(vcpu, a0);
ret = 0;
break;
+
case KVM_HC_MAP_GPA_RANGE: {
u64 gpa = a0, npages = a1, attrs = a2;
@@ -9340,6 +9363,16 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
vcpu->arch.complete_userspace_io = complete_hypercall_exit;
return 0;
}
+
+ case KVM_HC_HOST_SYSCALL:
+ if (!guest_pv_has(vcpu, KVM_FEATURE_PV_HOST_SYSCALL))
+ break;
+
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ ret = kvm_pv_host_syscall(a0);
+ kvm_vcpu_srcu_read_lock(vcpu);
+ break;
+
default:
ret = -KVM_ENOSYS;
break;
@@ -30,6 +30,7 @@
#define KVM_HC_SEND_IPI 10
#define KVM_HC_SCHED_YIELD 11
#define KVM_HC_MAP_GPA_RANGE 12
+#define KVM_HC_HOST_SYSCALL 13
/*
* hypercalls use architecture specific
There is a class of applications that use KVM to manage multiple address spaces rather than use it as an isolation boundary. In all other terms, they are normal processes that execute system calls, handle signals, etc. Currently, each time when such a process needs to interact with the operation system, it has to switch to host and back to guest. Such entire switches are expensive and significantly increase the overhead of system calls. The new hypercall reduces this overhead by more than two times. The new hypercall allows to execute host system calls. As for native calls, seccomp filters are executed before calls. It takes one argument that is a pointer to a pt_regs structure in the host address space. It provides registers to execute a system call according to the calling convention. Arguments are passed in %rdi, %rsi, %rdx, %r10, %r8 and %r9 and then a return code is stored in %rax. The hypercall returns 0 if a system call has been executed. Otherwise, it returns an error code. Signed-off-by: Andrei Vagin <avagin@google.com> --- Documentation/virt/kvm/x86/hypercalls.rst | 18 +++++++++++++ arch/x86/kvm/x86.c | 33 +++++++++++++++++++++++ include/uapi/linux/kvm_para.h | 1 + 3 files changed, 52 insertions(+)