diff mbox series

[1/5] kernel: add a new helper to execute system calls from kernel code

Message ID 20220722230241.1944655-2-avagin@google.com (mailing list archive)
State New, archived
Headers show
Series KVM/x86: add a new hypercall to execute host system | expand

Commit Message

Andrei Vagin July 22, 2022, 11:02 p.m. UTC
This helper will be used to implement a kvm hypercall to call host
system calls.

The new helper executes seccomp rules and calls trace_sys_{enter,exit}
hooks. But it intentionally doesn't call ptrace hooks because calling
syscalls are not linked with the current process state.

Signed-off-by: Andrei Vagin <avagin@google.com>
---
 arch/x86/entry/common.c        | 50 ++++++++++++++++++++++++++++++++++
 arch/x86/include/asm/syscall.h |  1 +
 2 files changed, 51 insertions(+)
diff mbox series

Patch

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6c2826417b33..7f4c172a9a4e 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -19,6 +19,7 @@ 
 #include <linux/nospec.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
+#include <trace/events/syscalls.h>
 
 #ifdef CONFIG_XEN_PV
 #include <xen/xen-ops.h>
@@ -37,6 +38,55 @@ 
 
 #ifdef CONFIG_X86_64
 
+/*
+ * do_ksyscall_64 executes a system call. This helper can be used from the
+ * kernel code.
+ */
+bool do_ksyscall_64(int nr, struct pt_regs *regs)
+{
+	struct task_struct *task = current;
+	unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
+	/*
+	 * Convert negative numbers to very high and thus out of range
+	 * numbers for comparisons.
+	 */
+	unsigned int unr = nr;
+
+#ifdef CONFIG_IA32_EMULATION
+	if (task->thread_info.status & TS_COMPAT)
+		return false;
+#endif
+
+	if (work & SYSCALL_WORK_SECCOMP) {
+		struct seccomp_data sd;
+		unsigned long args[6];
+
+		sd.nr = nr;
+		sd.arch = AUDIT_ARCH_X86_64;
+		syscall_get_arguments(task, regs, args);
+		sd.args[0] = args[0];
+		sd.args[1] = args[1];
+		sd.args[2] = args[2];
+		sd.args[3] = args[3];
+		sd.args[4] = args[4];
+		sd.args[5] = args[5];
+		sd.instruction_pointer = regs->ip;
+		if (__secure_computing(&sd) == -1)
+			return false;
+	}
+
+	if (likely(unr >= NR_syscalls))
+		return false;
+
+	unr = array_index_nospec(unr, NR_syscalls);
+
+	trace_sys_enter(regs, unr);
+	regs->ax = sys_call_table[unr](regs);
+	trace_sys_exit(regs, syscall_get_return_value(task, regs));
+	return true;
+}
+EXPORT_SYMBOL_GPL(do_ksyscall_64);
+
 static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
 {
 	/*
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 5b85987a5e97..6cde1ddeb50b 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -126,6 +126,7 @@  static inline int syscall_get_arch(struct task_struct *task)
 		? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 }
 
+bool do_ksyscall_64(int nr, struct pt_regs *regs);
 void do_syscall_64(struct pt_regs *regs, int nr);
 void do_int80_syscall_32(struct pt_regs *regs);
 long do_fast_syscall_32(struct pt_regs *regs);