diff mbox series

[v3,1/3] ptrace,syscall_user_dispatch: Implement Syscall User Dispatch Suspension

Message ID 20230120144356.40717-2-gregory.price@memverge.com (mailing list archive)
State New
Headers show
Series Checkpoint Support for Syscall User Dispatch | expand

Commit Message

Gregory Price Jan. 20, 2023, 2:43 p.m. UTC
Adds PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH to ptrace options, and
modify Syscall User Dispatch to suspend interception when enabled.

This is modeled after the SUSPEND_SECCOMP feature, which suspends
SECCOMP interposition.  Without doing this, software like CRIU will
inject system calls into a process and be intercepted by Syscall
User Dispatch, either causing a crash (due to blocked signals) or
the delivery of those signals to a ptracer (not the intended behavior).

Since Syscall User Dispatch is not a privileged feature, a check
for permissions is not required, however attempting to set this
option when CONFIG_CHECKPOINT_RESTORE it not supported should be
disallowed, as its intended use is checkpoint/resume.

Signed-off-by: Gregory Price <gregory.price@memverge.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/ptrace.h               | 2 ++
 include/uapi/linux/ptrace.h          | 6 +++++-
 kernel/entry/syscall_user_dispatch.c | 5 +++++
 kernel/ptrace.c                      | 4 ++++
 4 files changed, 16 insertions(+), 1 deletion(-)

Comments

Oleg Nesterov Jan. 20, 2023, 3:22 p.m. UTC | #1
Hi Gregory,

I'll try to read this series next Monday, I need to recall what does
syscall-user-dispatch actually do ;)

just one question for now,

On 01/20, Gregory Price wrote:
>
> --- a/kernel/ptrace.c
> +++ b/kernel/ptrace.c
> @@ -370,6 +370,10 @@ static int check_ptrace_options(unsigned long data)
>  	if (data & ~(unsigned long)PTRACE_O_MASK)
>  		return -EINVAL;
>  
> +	if (unlikely(data & PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH) &&
> +	    (!IS_ENABLED(CONFIG_CHECKPOINT_RESTART)))
> +			return -EINVAL;

Hmm? git grep CHECKPOINT_RESTART shows nothing.

Oleg.
Gregory Price Jan. 20, 2023, 3:49 p.m. UTC | #2
On Fri, Jan 20, 2023 at 04:22:51PM +0100, Oleg Nesterov wrote:
> Hi Gregory,
> 
> I'll try to read this series next Monday, I need to recall what does
> syscall-user-dispatch actually do ;)
> 
> just one question for now,
> 
> On 01/20, Gregory Price wrote:
> >
> > --- a/kernel/ptrace.c
> > +++ b/kernel/ptrace.c
> > @@ -370,6 +370,10 @@ static int check_ptrace_options(unsigned long data)
> >  	if (data & ~(unsigned long)PTRACE_O_MASK)
> >  		return -EINVAL;
> >  
> > +	if (unlikely(data & PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH) &&
> > +	    (!IS_ENABLED(CONFIG_CHECKPOINT_RESTART)))
> > +			return -EINVAL;
> 
> Hmm? git grep CHECKPOINT_RESTART shows nothing.
> 
> Oleg.
>

TIL the mailing lists don't like responses from proxy addresses.
Resending response to it goes out to everyone


Good catch, I always mixup RESTART/RESTORE.  This should be RESTORE
Adjusted patch below, will send a v4 tomorrow so as not to spam the
lists.  Attached an updated patch for the time being.



(brief syscall user dispatch overview)

syscall-user-dispatch is relatively simple, the goal is to implement
syscall interposition for foreign syscalls (windows, non-posix,
whatever).  Since the ABI of these syscalls can't be trusted to be
anything like linux, syscall dispatch produces a SIGSYS before anything
else can do things like check register values.

How to use

1) User registers a SIGSYS signal handler
2) User does
   prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON,
	       <address>, <length>, char* selector)

3) All 'syscall' instructions *outside* the virtual address range
   (address, address+length) now produce a SIGSYS on the thread that
	 executed the syscall.

   <selector> can be set to SYSCALL_DISPATCH_FILTER_ALLOW or 
	 SYSCALL_DISPATCH_FILTER_BLOCK to enable/disable this signal
	 production from userland without having to make kernel calls.

docs: https://docs.kernel.org/admin-guide/syscall-user-dispatch.html


Updated patch


diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index eaaef3ffec22..461ae5c99d57 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -45,6 +45,8 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,

 #define PT_EXITKILL            (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
 #define PT_SUSPEND_SECCOMP     (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
+#define PT_SUSPEND_SYSCALL_USER_DISPATCH \
+       (PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH << PT_OPT_FLAG_SHIFT)

 extern long arch_ptrace(struct task_struct *child, long request,
                        unsigned long addr, unsigned long data);
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 195ae64a8c87..ba9e3f19a22c 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -146,9 +146,13 @@ struct ptrace_rseq_configuration {
 /* eventless options */
 #define PTRACE_O_EXITKILL              (1 << 20)
 #define PTRACE_O_SUSPEND_SECCOMP       (1 << 21)
+#define PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH (1 << 22)

 #define PTRACE_O_MASK          (\
-       0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP)
+       0x000000ff | \
+       PTRACE_O_EXITKILL | \
+       PTRACE_O_SUSPEND_SECCOMP | \
+       PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH)

 #include <asm/ptrace.h>

diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
index 0b6379adff6b..b5ec75164805 100644
--- a/kernel/entry/syscall_user_dispatch.c
+++ b/kernel/entry/syscall_user_dispatch.c
@@ -8,6 +8,7 @@
 #include <linux/uaccess.h>
 #include <linux/signal.h>
 #include <linux/elf.h>
+#include <linux/ptrace.h>

 #include <linux/sched/signal.h>
 #include <linux/sched/task_stack.h>
@@ -36,6 +37,10 @@ bool syscall_user_dispatch(struct pt_regs *regs)
        struct syscall_user_dispatch *sd = &current->syscall_dispatch;
        char state;

+       if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
+           unlikely(current->ptrace & PT_SUSPEND_SYSCALL_USER_DISPATCH))
+               return false;
+
        if (likely(instruction_pointer(regs) - sd->offset < sd->len))
                return false;

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 54482193e1ed..a348b68d07a2 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -370,6 +370,10 @@ static int check_ptrace_options(unsigned long data)
        if (data & ~(unsigned long)PTRACE_O_MASK)
                return -EINVAL;

+       if (unlikely(data & PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH) &&
+           (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE)))
+                       return -EINVAL;
+
        if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
                if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) ||
                    !IS_ENABLED(CONFIG_SECCOMP))
diff mbox series

Patch

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index eaaef3ffec22..461ae5c99d57 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -45,6 +45,8 @@  extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
 
 #define PT_EXITKILL		(PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
 #define PT_SUSPEND_SECCOMP	(PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
+#define PT_SUSPEND_SYSCALL_USER_DISPATCH \
+	(PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH << PT_OPT_FLAG_SHIFT)
 
 extern long arch_ptrace(struct task_struct *child, long request,
 			unsigned long addr, unsigned long data);
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 195ae64a8c87..ba9e3f19a22c 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -146,9 +146,13 @@  struct ptrace_rseq_configuration {
 /* eventless options */
 #define PTRACE_O_EXITKILL		(1 << 20)
 #define PTRACE_O_SUSPEND_SECCOMP	(1 << 21)
+#define PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH	(1 << 22)
 
 #define PTRACE_O_MASK		(\
-	0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP)
+	0x000000ff | \
+	PTRACE_O_EXITKILL | \
+	PTRACE_O_SUSPEND_SECCOMP | \
+	PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH)
 
 #include <asm/ptrace.h>
 
diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
index 0b6379adff6b..b5ec75164805 100644
--- a/kernel/entry/syscall_user_dispatch.c
+++ b/kernel/entry/syscall_user_dispatch.c
@@ -8,6 +8,7 @@ 
 #include <linux/uaccess.h>
 #include <linux/signal.h>
 #include <linux/elf.h>
+#include <linux/ptrace.h>
 
 #include <linux/sched/signal.h>
 #include <linux/sched/task_stack.h>
@@ -36,6 +37,10 @@  bool syscall_user_dispatch(struct pt_regs *regs)
 	struct syscall_user_dispatch *sd = &current->syscall_dispatch;
 	char state;
 
+	if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
+	    unlikely(current->ptrace & PT_SUSPEND_SYSCALL_USER_DISPATCH))
+		return false;
+
 	if (likely(instruction_pointer(regs) - sd->offset < sd->len))
 		return false;
 
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 54482193e1ed..99467ba5f55b 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -370,6 +370,10 @@  static int check_ptrace_options(unsigned long data)
 	if (data & ~(unsigned long)PTRACE_O_MASK)
 		return -EINVAL;
 
+	if (unlikely(data & PTRACE_O_SUSPEND_SYSCALL_USER_DISPATCH) &&
+	    (!IS_ENABLED(CONFIG_CHECKPOINT_RESTART)))
+			return -EINVAL;
+
 	if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
 		if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) ||
 		    !IS_ENABLED(CONFIG_SECCOMP))