diff mbox series

[v5,24/38] kmsan: disable instrumentation of certain functions

Message ID 20200325161249.55095-25-glider@google.com (mailing list archive)
State New, archived
Headers show
Series Add KernelMemorySanitizer infrastructure | expand

Commit Message

Alexander Potapenko March 25, 2020, 4:12 p.m. UTC
Some functions are called from handwritten assembly, and therefore don't
have their arguments' metadata fully set up by the instrumentation code.
Mark them with __no_sanitize_memory to avoid false positives from
spreading further.
Certain functions perform task switching, so that the value of |current|
is different as they proceed. Because KMSAN state pointer is only read
once at the beginning of the function, touching it after |current| has
changed may be dangerous.

Signed-off-by: Alexander Potapenko <glider@google.com>
To: Alexander Potapenko <glider@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Marco Elver <elver@google.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: linux-mm@kvack.org
---
v3:
 - removed TODOs from comments

v4:
 - updated the comments, dropped __no_sanitize_memory from idle_cpu(),
   sched_init(), profile_tick()
 - split away the uprobes part as requested by Andrey Konovalov

Change-Id: I684d23dac5a22eb0a4cea71993cb934302b17cea
---
 arch/x86/entry/common.c                |  2 ++
 arch/x86/include/asm/irq_regs.h        |  2 ++
 arch/x86/include/asm/syscall_wrapper.h |  2 ++
 arch/x86/kernel/apic/apic.c            |  3 +++
 arch/x86/kernel/dumpstack_64.c         |  5 +++++
 arch/x86/kernel/process_64.c           |  5 +++++
 arch/x86/kernel/traps.c                | 13 +++++++++++--
 kernel/sched/core.c                    | 22 ++++++++++++++++++++++
 8 files changed, 52 insertions(+), 2 deletions(-)

Comments

Andrey Konovalov April 14, 2020, 3:04 p.m. UTC | #1
On Wed, Mar 25, 2020 at 5:14 PM <glider@google.com> wrote:
>
> Some functions are called from handwritten assembly, and therefore don't
> have their arguments' metadata fully set up by the instrumentation code.
> Mark them with __no_sanitize_memory to avoid false positives from
> spreading further.
> Certain functions perform task switching, so that the value of |current|
> is different as they proceed. Because KMSAN state pointer is only read
> once at the beginning of the function, touching it after |current| has
> changed may be dangerous.

I think that whenever a patch touches some subsystem, we need to
mention that in the commit title. This patch touches mostly x86, so
"kmsan, x86: ..." or something like that. This will probably make x86
maintainers more involved.

>
> Signed-off-by: Alexander Potapenko <glider@google.com>
> To: Alexander Potapenko <glider@google.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Vegard Nossum <vegard.nossum@oracle.com>
> Cc: Dmitry Vyukov <dvyukov@google.com>
> Cc: Marco Elver <elver@google.com>
> Cc: Andrey Konovalov <andreyknvl@google.com>
> Cc: linux-mm@kvack.org
> ---
> v3:
>  - removed TODOs from comments
>
> v4:
>  - updated the comments, dropped __no_sanitize_memory from idle_cpu(),
>    sched_init(), profile_tick()
>  - split away the uprobes part as requested by Andrey Konovalov
>
> Change-Id: I684d23dac5a22eb0a4cea71993cb934302b17cea
> ---
>  arch/x86/entry/common.c                |  2 ++
>  arch/x86/include/asm/irq_regs.h        |  2 ++
>  arch/x86/include/asm/syscall_wrapper.h |  2 ++
>  arch/x86/kernel/apic/apic.c            |  3 +++
>  arch/x86/kernel/dumpstack_64.c         |  5 +++++
>  arch/x86/kernel/process_64.c           |  5 +++++
>  arch/x86/kernel/traps.c                | 13 +++++++++++--
>  kernel/sched/core.c                    | 22 ++++++++++++++++++++++
>  8 files changed, 52 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index ec167d8c41cbd..5c3d0f3a14c37 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -280,6 +280,8 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
>  }
>
>  #ifdef CONFIG_X86_64
> +/* Tell KMSAN to not instrument this function and to initialize |regs|. */
> +__no_sanitize_memory
>  __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
>  {
>         struct thread_info *ti;
> diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
> index 187ce59aea28e..a6fc1641e2861 100644
> --- a/arch/x86/include/asm/irq_regs.h
> +++ b/arch/x86/include/asm/irq_regs.h
> @@ -14,6 +14,8 @@
>
>  DECLARE_PER_CPU(struct pt_regs *, irq_regs);
>
> +/* Tell KMSAN to return an initialized struct pt_regs. */
> +__no_sanitize_memory
>  static inline struct pt_regs *get_irq_regs(void)
>  {
>         return __this_cpu_read(irq_regs);
> diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
> index e2389ce9bf58a..098b1a8d6bc41 100644
> --- a/arch/x86/include/asm/syscall_wrapper.h
> +++ b/arch/x86/include/asm/syscall_wrapper.h
> @@ -196,6 +196,8 @@ struct pt_regs;
>         ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO);                  \
>         static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));     \
>         static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
> +       /* Tell KMSAN to initialize |regs|. */                          \
> +       __no_sanitize_memory                                            \
>         asmlinkage long __x64_sys##name(const struct pt_regs *regs)     \
>         {                                                               \
>                 return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
> diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
> index 5f973fed3c9ff..1f0250f14e462 100644
> --- a/arch/x86/kernel/apic/apic.c
> +++ b/arch/x86/kernel/apic/apic.c
> @@ -1127,6 +1127,9 @@ static void local_apic_timer_interrupt(void)
>   * [ if a single-CPU system runs an SMP kernel then we call the local
>   *   interrupt as well. Thus we cannot inline the local irq ... ]
>   */
> +
> +/* Tell KMSAN to initialize |regs|. */
> +__no_sanitize_memory
>  __visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
>  {
>         struct pt_regs *old_regs = set_irq_regs(regs);
> diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
> index 87b97897a8810..3d1691f81cada 100644
> --- a/arch/x86/kernel/dumpstack_64.c
> +++ b/arch/x86/kernel/dumpstack_64.c
> @@ -150,6 +150,11 @@ static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
>         return true;
>  }
>
> +/*
> + * This function may touch stale uninitialized values on stack. Do not
> + * instrument it with KMSAN to avoid false positives.
> + */
> +__no_sanitize_memory
>  int get_stack_info(unsigned long *stack, struct task_struct *task,
>                    struct stack_info *info, unsigned long *visit_mask)
>  {
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index ffd497804dbc3..5e8c6767e9916 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -424,6 +424,11 @@ void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
>   * Kprobes not supported here. Set the probe on schedule instead.
>   * Function graph tracer not supported too.
>   */
> +/*
> + * Avoid touching KMSAN state or reporting anything here, as __switch_to() does
> + * weird things with tasks.
> + */
> +__no_sanitize_memory
>  __visible __notrace_funcgraph struct task_struct *
>  __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
>  {
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index d54cffdc7cac2..917268aee054e 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -638,7 +638,11 @@ NOKPROBE_SYMBOL(do_int3);
>   * Help handler running on a per-cpu (IST or entry trampoline) stack
>   * to switch to the normal thread stack if the interrupted code was in
>   * user mode. The actual stack switch is done in entry_64.S
> + *
>   */
> +
> +/* This function switches the registers - don't instrument it with KMSAN. */
> +__no_sanitize_memory
>  asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
>  {
>         struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
> @@ -654,6 +658,11 @@ struct bad_iret_stack {
>  };
>
>  asmlinkage __visible notrace
> +/*
> + * Dark magic happening here, let's not instrument this function.
> + * Also avoid copying any metadata by using raw __memmove().
> + */
> +__no_sanitize_memory
>  struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
>  {
>         /*
> @@ -668,10 +677,10 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
>                 (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
>
>         /* Copy the IRET target to the new stack. */
> -       memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
> +       __memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
>
>         /* Copy the remainder of the stack from the current stack. */
> -       memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
> +       __memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
>
>         BUG_ON(!user_mode(&new_stack->regs));
>         return new_stack;
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 1a5937936ac75..bb1b659c12f6a 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -471,6 +471,11 @@ void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
>                 put_task_struct(task);
>  }
>
> +/*
> + * Context switch here may lead to KMSAN task state corruption. Disable KMSAN
> + * instrumentation.
> + */
> +__no_sanitize_memory
>  void wake_up_q(struct wake_q_head *head)
>  {
>         struct wake_q_node *node = head->first;
> @@ -3217,6 +3222,12 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
>   * past. prev == current is still correct but we need to recalculate this_rq
>   * because prev may have moved to another CPU.
>   */
> +
> +/*
> + * Context switch here may lead to KMSAN task state corruption. Disable KMSAN
> + * instrumentation.
> + */
> +__no_sanitize_memory
>  static struct rq *finish_task_switch(struct task_struct *prev)
>         __releases(rq->lock)
>  {
> @@ -4052,6 +4063,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
>   *
>   * WARNING: must be called with preemption disabled!
>   */
> +
> +/*
> + * Context switch here may lead to KMSAN task state corruption. Disable KMSAN
> + * instrumentation.
> + */
> +__no_sanitize_memory
>  static void __sched notrace __schedule(bool preempt)
>  {
>         struct task_struct *prev, *next;
> @@ -6789,6 +6806,11 @@ static inline int preempt_count_equals(int preempt_offset)
>         return (nested == preempt_offset);
>  }
>
> +/*
> + * This function might be called from code that is not instrumented with KMSAN.
> + * Nevertheless, treat its arguments as initialized.
> + */
> +__no_sanitize_memory
>  void __might_sleep(const char *file, int line, int preempt_offset)
>  {
>         /*
> --
> 2.25.1.696.g5e7596f4ac-goog
>
diff mbox series

Patch

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ec167d8c41cbd..5c3d0f3a14c37 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -280,6 +280,8 @@  __visible inline void syscall_return_slowpath(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_X86_64
+/* Tell KMSAN to not instrument this function and to initialize |regs|. */
+__no_sanitize_memory
 __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 {
 	struct thread_info *ti;
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 187ce59aea28e..a6fc1641e2861 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -14,6 +14,8 @@ 
 
 DECLARE_PER_CPU(struct pt_regs *, irq_regs);
 
+/* Tell KMSAN to return an initialized struct pt_regs. */
+__no_sanitize_memory
 static inline struct pt_regs *get_irq_regs(void)
 {
 	return __this_cpu_read(irq_regs);
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index e2389ce9bf58a..098b1a8d6bc41 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -196,6 +196,8 @@  struct pt_regs;
 	ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO);			\
 	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
 	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+	/* Tell KMSAN to initialize |regs|. */				\
+	__no_sanitize_memory						\
 	asmlinkage long __x64_sys##name(const struct pt_regs *regs)	\
 	{								\
 		return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 5f973fed3c9ff..1f0250f14e462 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1127,6 +1127,9 @@  static void local_apic_timer_interrupt(void)
  * [ if a single-CPU system runs an SMP kernel then we call the local
  *   interrupt as well. Thus we cannot inline the local irq ... ]
  */
+
+/* Tell KMSAN to initialize |regs|. */
+__no_sanitize_memory
 __visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 87b97897a8810..3d1691f81cada 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -150,6 +150,11 @@  static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
 	return true;
 }
 
+/*
+ * This function may touch stale uninitialized values on stack. Do not
+ * instrument it with KMSAN to avoid false positives.
+ */
+__no_sanitize_memory
 int get_stack_info(unsigned long *stack, struct task_struct *task,
 		   struct stack_info *info, unsigned long *visit_mask)
 {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ffd497804dbc3..5e8c6767e9916 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -424,6 +424,11 @@  void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
  * Kprobes not supported here. Set the probe on schedule instead.
  * Function graph tracer not supported too.
  */
+/*
+ * Avoid touching KMSAN state or reporting anything here, as __switch_to() does
+ * weird things with tasks.
+ */
+__no_sanitize_memory
 __visible __notrace_funcgraph struct task_struct *
 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d54cffdc7cac2..917268aee054e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -638,7 +638,11 @@  NOKPROBE_SYMBOL(do_int3);
  * Help handler running on a per-cpu (IST or entry trampoline) stack
  * to switch to the normal thread stack if the interrupted code was in
  * user mode. The actual stack switch is done in entry_64.S
+ *
  */
+
+/* This function switches the registers - don't instrument it with KMSAN. */
+__no_sanitize_memory
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
 	struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
@@ -654,6 +658,11 @@  struct bad_iret_stack {
 };
 
 asmlinkage __visible notrace
+/*
+ * Dark magic happening here, let's not instrument this function.
+ * Also avoid copying any metadata by using raw __memmove().
+ */
+__no_sanitize_memory
 struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
 {
 	/*
@@ -668,10 +677,10 @@  struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
 		(struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
 
 	/* Copy the IRET target to the new stack. */
-	memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
+	__memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
 
 	/* Copy the remainder of the stack from the current stack. */
-	memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
+	__memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
 
 	BUG_ON(!user_mode(&new_stack->regs));
 	return new_stack;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1a5937936ac75..bb1b659c12f6a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -471,6 +471,11 @@  void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
 		put_task_struct(task);
 }
 
+/*
+ * Context switch here may lead to KMSAN task state corruption. Disable KMSAN
+ * instrumentation.
+ */
+__no_sanitize_memory
 void wake_up_q(struct wake_q_head *head)
 {
 	struct wake_q_node *node = head->first;
@@ -3217,6 +3222,12 @@  prepare_task_switch(struct rq *rq, struct task_struct *prev,
  * past. prev == current is still correct but we need to recalculate this_rq
  * because prev may have moved to another CPU.
  */
+
+/*
+ * Context switch here may lead to KMSAN task state corruption. Disable KMSAN
+ * instrumentation.
+ */
+__no_sanitize_memory
 static struct rq *finish_task_switch(struct task_struct *prev)
 	__releases(rq->lock)
 {
@@ -4052,6 +4063,12 @@  pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  *
  * WARNING: must be called with preemption disabled!
  */
+
+/*
+ * Context switch here may lead to KMSAN task state corruption. Disable KMSAN
+ * instrumentation.
+ */
+__no_sanitize_memory
 static void __sched notrace __schedule(bool preempt)
 {
 	struct task_struct *prev, *next;
@@ -6789,6 +6806,11 @@  static inline int preempt_count_equals(int preempt_offset)
 	return (nested == preempt_offset);
 }
 
+/*
+ * This function might be called from code that is not instrumented with KMSAN.
+ * Nevertheless, treat its arguments as initialized.
+ */
+__no_sanitize_memory
 void __might_sleep(const char *file, int line, int preempt_offset)
 {
 	/*