diff mbox series

[v1,7/8] tracing/perf: Add might_fault check to syscall probes

Message ID 20241003151638.1608537-8-mathieu.desnoyers@efficios.com (mailing list archive)
State Superseded
Headers show
Series tracing: Allow system call tracepoints to handle page faults | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Mathieu Desnoyers Oct. 3, 2024, 3:16 p.m. UTC
Add a might_fault() check to validate that the perf sys_enter/sys_exit
probe callbacks are indeed called from a context where page faults can
be handled.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
---
 include/trace/perf.h          | 1 +
 kernel/trace/trace_syscalls.c | 2 ++
 2 files changed, 3 insertions(+)

Comments

Steven Rostedt Oct. 3, 2024, 10:37 p.m. UTC | #1
On Thu,  3 Oct 2024 11:16:37 -0400
Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:

> Add a might_fault() check to validate that the perf sys_enter/sys_exit
> probe callbacks are indeed called from a context where page faults can
> be handled.
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: Michael Jeanson <mjeanson@efficios.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Masami Hiramatsu <mhiramat@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Yonghong Song <yhs@fb.com>
> Cc: Paul E. McKenney <paulmck@kernel.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> Cc: bpf@vger.kernel.org
> Cc: Joel Fernandes <joel@joelfernandes.org>
> ---
>  include/trace/perf.h          | 1 +
>  kernel/trace/trace_syscalls.c | 2 ++
>  2 files changed, 3 insertions(+)
> 
> diff --git a/include/trace/perf.h b/include/trace/perf.h
> index 5650c1bad088..321bfd7919f6 100644
> --- a/include/trace/perf.h
> +++ b/include/trace/perf.h
> @@ -84,6 +84,7 @@ perf_trace_##call(void *__data, proto)					\
>  	u64 __count __attribute__((unused));				\
>  	struct task_struct *__task __attribute__((unused));		\
>  									\
> +	might_fault();							\
>  	guard(preempt_notrace)();					\
>  	do_perf_trace_##call(__data, args);				\

Same for this. This is used for all tracepoints that perf hooks to.

-- Steve

>  }
> diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
> index 89d7e4c57b5b..0d42d6f293d6 100644
> --- a/kernel/trace/trace_syscalls.c
> +++ b/kernel/trace/trace_syscalls.c
> @@ -602,6 +602,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
>  	 * Syscall probe called with preemption enabled, but the ring
>  	 * buffer and per-cpu data require preemption to be disabled.
>  	 */
> +	might_fault();
>  	guard(preempt_notrace)();
>  
>  	syscall_nr = trace_get_syscall_nr(current, regs);
> @@ -710,6 +711,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
>  	 * Syscall probe called with preemption enabled, but the ring
>  	 * buffer and per-cpu data require preemption to be disabled.
>  	 */
> +	might_fault();
>  	guard(preempt_notrace)();
>  
>  	syscall_nr = trace_get_syscall_nr(current, regs);
Mathieu Desnoyers Oct. 4, 2024, 12:12 a.m. UTC | #2
On 2024-10-04 00:37, Steven Rostedt wrote:
> On Thu,  3 Oct 2024 11:16:37 -0400
> Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
> 
>> Add a might_fault() check to validate that the perf sys_enter/sys_exit
>> probe callbacks are indeed called from a context where page faults can
>> be handled.
>>
>> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>> Cc: Michael Jeanson <mjeanson@efficios.com>
>> Cc: Steven Rostedt <rostedt@goodmis.org>
>> Cc: Masami Hiramatsu <mhiramat@kernel.org>
>> Cc: Peter Zijlstra <peterz@infradead.org>
>> Cc: Alexei Starovoitov <ast@kernel.org>
>> Cc: Yonghong Song <yhs@fb.com>
>> Cc: Paul E. McKenney <paulmck@kernel.org>
>> Cc: Ingo Molnar <mingo@redhat.com>
>> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
>> Cc: Mark Rutland <mark.rutland@arm.com>
>> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
>> Cc: Namhyung Kim <namhyung@kernel.org>
>> Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
>> Cc: bpf@vger.kernel.org
>> Cc: Joel Fernandes <joel@joelfernandes.org>
>> ---
>>   include/trace/perf.h          | 1 +
>>   kernel/trace/trace_syscalls.c | 2 ++
>>   2 files changed, 3 insertions(+)
>>
>> diff --git a/include/trace/perf.h b/include/trace/perf.h
>> index 5650c1bad088..321bfd7919f6 100644
>> --- a/include/trace/perf.h
>> +++ b/include/trace/perf.h
>> @@ -84,6 +84,7 @@ perf_trace_##call(void *__data, proto)					\
>>   	u64 __count __attribute__((unused));				\
>>   	struct task_struct *__task __attribute__((unused));		\
>>   									\
>> +	might_fault();							\
>>   	guard(preempt_notrace)();					\
>>   	do_perf_trace_##call(__data, args);				\
> 
> Same for this. This is used for all tracepoints that perf hooks to.

You're also missing the context:

#undef DECLARE_EVENT_SYSCALL_CLASS
#define DECLARE_EVENT_SYSCALL_CLASS(call, proto, args, tstruct, assign, print) \
__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \
                       PARAMS(assign), PARAMS(print))                    \
static notrace void                                                     \
perf_trace_##call(void *__data, proto)                                  \
{                                                                       \
         u64 __count __attribute__((unused));                            \
         struct task_struct *__task __attribute__((unused));             \
                                                                         \
         might_fault();                                                  \
         guard(preempt_notrace)();                                       \
         do_perf_trace_##call(__data, args);                             \
}

Not an issue.

Thanks,

Mathieu


> 
> -- Steve
> 
>>   }
>> diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
>> index 89d7e4c57b5b..0d42d6f293d6 100644
>> --- a/kernel/trace/trace_syscalls.c
>> +++ b/kernel/trace/trace_syscalls.c
>> @@ -602,6 +602,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
>>   	 * Syscall probe called with preemption enabled, but the ring
>>   	 * buffer and per-cpu data require preemption to be disabled.
>>   	 */
>> +	might_fault();
>>   	guard(preempt_notrace)();
>>   
>>   	syscall_nr = trace_get_syscall_nr(current, regs);
>> @@ -710,6 +711,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
>>   	 * Syscall probe called with preemption enabled, but the ring
>>   	 * buffer and per-cpu data require preemption to be disabled.
>>   	 */
>> +	might_fault();
>>   	guard(preempt_notrace)();
>>   
>>   	syscall_nr = trace_get_syscall_nr(current, regs);
>
diff mbox series

Patch

diff --git a/include/trace/perf.h b/include/trace/perf.h
index 5650c1bad088..321bfd7919f6 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -84,6 +84,7 @@  perf_trace_##call(void *__data, proto)					\
 	u64 __count __attribute__((unused));				\
 	struct task_struct *__task __attribute__((unused));		\
 									\
+	might_fault();							\
 	guard(preempt_notrace)();					\
 	do_perf_trace_##call(__data, args);				\
 }
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 89d7e4c57b5b..0d42d6f293d6 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -602,6 +602,7 @@  static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 	 * Syscall probe called with preemption enabled, but the ring
 	 * buffer and per-cpu data require preemption to be disabled.
 	 */
+	might_fault();
 	guard(preempt_notrace)();
 
 	syscall_nr = trace_get_syscall_nr(current, regs);
@@ -710,6 +711,7 @@  static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 	 * Syscall probe called with preemption enabled, but the ring
 	 * buffer and per-cpu data require preemption to be disabled.
 	 */
+	might_fault();
 	guard(preempt_notrace)();
 
 	syscall_nr = trace_get_syscall_nr(current, regs);