diff mbox series

[v2,03/23] kcsan: Avoid checking scoped accesses from nested contexts

Message ID 20211118081027.3175699-4-elver@google.com (mailing list archive)
State New
Headers show
Series kcsan: Support detecting a subset of missing memory barriers | expand

Commit Message

Marco Elver Nov. 18, 2021, 8:10 a.m. UTC
Avoid checking scoped accesses from nested contexts (such as nested
interrupts or in scheduler code) which share the same kcsan_ctx.

This is to avoid detecting false positive races of accesses in the same
thread with currently scoped accesses: consider setting up a watchpoint
for a non-scoped (normal) access that also "conflicts" with a current
scoped access. In a nested interrupt (or in the scheduler), which shares
the same kcsan_ctx, we cannot check scoped accesses set up in the parent
context -- simply ignore them in this case.

With the introduction of kcsan_ctx::disable_scoped, we can also clean up
kcsan_check_scoped_accesses()'s recursion guard, and do not need to
modify the list's prev pointer.

Signed-off-by: Marco Elver <elver@google.com>
---
 include/linux/kcsan.h |  1 +
 kernel/kcsan/core.c   | 18 +++++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

Comments

Boqun Feng Nov. 29, 2021, 8:47 a.m. UTC | #1
Hi Marco,

On Thu, Nov 18, 2021 at 09:10:07AM +0100, Marco Elver wrote:
> Avoid checking scoped accesses from nested contexts (such as nested
> interrupts or in scheduler code) which share the same kcsan_ctx.
> 
> This is to avoid detecting false positive races of accesses in the same

Could you provide an example for a false positive?

I think we do want to detect the following race:

	static int v = SOME_VALUE; // a percpu variable.
	static int other_v = ... ;

	void foo(..)
	{
		int tmp;
		int other_tmp;

		preempt_disable();
		{
			ASSERT_EXCLUSIVE_ACCESSS_SCOPED(v);
			tmp = v;
			
			other_tmp = other_v; // int_handler() may run here
			
			v = tmp + 2;
		}
		preempt_enabled();
	}

	void int_handler() // an interrupt handler
	{
		v++;
	}

, if I understand correctly, we can detect this currently, but with this
patch, we cannot detect this if the interrupt happens while we're doing
the check for "other_tmp = other_v;", right? Of course, running tests
multiple times may eventually catch this, but I just want to understand
what's this patch for, thanks!

Regards,
Boqun

> thread with currently scoped accesses: consider setting up a watchpoint
> for a non-scoped (normal) access that also "conflicts" with a current
> scoped access. In a nested interrupt (or in the scheduler), which shares
> the same kcsan_ctx, we cannot check scoped accesses set up in the parent
> context -- simply ignore them in this case.
> 
> With the introduction of kcsan_ctx::disable_scoped, we can also clean up
> kcsan_check_scoped_accesses()'s recursion guard, and do not need to
> modify the list's prev pointer.
> 
> Signed-off-by: Marco Elver <elver@google.com>
> ---
>  include/linux/kcsan.h |  1 +
>  kernel/kcsan/core.c   | 18 +++++++++++++++---
>  2 files changed, 16 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h
> index fc266ecb2a4d..13cef3458fed 100644
> --- a/include/linux/kcsan.h
> +++ b/include/linux/kcsan.h
> @@ -21,6 +21,7 @@
>   */
>  struct kcsan_ctx {
>  	int disable_count; /* disable counter */
> +	int disable_scoped; /* disable scoped access counter */
>  	int atomic_next; /* number of following atomic ops */
>  
>  	/*
> diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
> index e34a1710b7bc..bd359f8ee63a 100644
> --- a/kernel/kcsan/core.c
> +++ b/kernel/kcsan/core.c
> @@ -204,15 +204,17 @@ check_access(const volatile void *ptr, size_t size, int type, unsigned long ip);
>  static noinline void kcsan_check_scoped_accesses(void)
>  {
>  	struct kcsan_ctx *ctx = get_ctx();
> -	struct list_head *prev_save = ctx->scoped_accesses.prev;
>  	struct kcsan_scoped_access *scoped_access;
>  
> -	ctx->scoped_accesses.prev = NULL;  /* Avoid recursion. */
> +	if (ctx->disable_scoped)
> +		return;
> +
> +	ctx->disable_scoped++;
>  	list_for_each_entry(scoped_access, &ctx->scoped_accesses, list) {
>  		check_access(scoped_access->ptr, scoped_access->size,
>  			     scoped_access->type, scoped_access->ip);
>  	}
> -	ctx->scoped_accesses.prev = prev_save;
> +	ctx->disable_scoped--;
>  }
>  
>  /* Rules for generic atomic accesses. Called from fast-path. */
> @@ -465,6 +467,15 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned
>  		goto out;
>  	}
>  
> +	/*
> +	 * Avoid races of scoped accesses from nested interrupts (or scheduler).
> +	 * Assume setting up a watchpoint for a non-scoped (normal) access that
> +	 * also conflicts with a current scoped access. In a nested interrupt,
> +	 * which shares the context, it would check a conflicting scoped access.
> +	 * To avoid, disable scoped access checking.
> +	 */
> +	ctx->disable_scoped++;
> +
>  	/*
>  	 * Save and restore the IRQ state trace touched by KCSAN, since KCSAN's
>  	 * runtime is entered for every memory access, and potentially useful
> @@ -578,6 +589,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned
>  	if (!kcsan_interrupt_watcher)
>  		local_irq_restore(irq_flags);
>  	kcsan_restore_irqtrace(current);
> +	ctx->disable_scoped--;
>  out:
>  	user_access_restore(ua_flags);
>  }
> -- 
> 2.34.0.rc2.393.gf8c9666880-goog
>
Marco Elver Nov. 29, 2021, 10:57 a.m. UTC | #2
On Mon, Nov 29, 2021 at 04:47PM +0800, Boqun Feng wrote:
> Hi Marco,
> 
> On Thu, Nov 18, 2021 at 09:10:07AM +0100, Marco Elver wrote:
> > Avoid checking scoped accesses from nested contexts (such as nested
> > interrupts or in scheduler code) which share the same kcsan_ctx.
> > 
> > This is to avoid detecting false positive races of accesses in the same
> 
> Could you provide an example for a false positive?
> 
> I think we do want to detect the following race:
> 
> 	static int v = SOME_VALUE; // a percpu variable.
> 	static int other_v = ... ;
> 
> 	void foo(..)
> 	{
> 		int tmp;
> 		int other_tmp;
> 
> 		preempt_disable();
> 		{
> 			ASSERT_EXCLUSIVE_ACCESSS_SCOPED(v);
> 			tmp = v;
> 			
> 			other_tmp = other_v; // int_handler() may run here
> 			
> 			v = tmp + 2;
> 		}
> 		preempt_enabled();
> 	}
> 
> 	void int_handler() // an interrupt handler
> 	{
> 		v++;
> 	}
> 
> , if I understand correctly, we can detect this currently, but with this
> patch, we cannot detect this if the interrupt happens while we're doing
> the check for "other_tmp = other_v;", right? Of course, running tests
> multiple times may eventually catch this, but I just want to understand
> what's this patch for, thanks!

The above will still be detected. Task and interrupt contexts in this
case are distinct, i.e. kcsan_ctx differ (see get_ctx()).

But there are rare cases where kcsan_ctx is shared, such as nested
interrupts (NMI?), or when entering scheduler code -- which currently
has a KCSAN_SANITIZE := n, but I occasionally test it, which is how I
found this problem. The problem occurs frequently when enabling KCSAN in
kernel/sched and placing a random ASSERT_EXCLUSIVE_ACCESS_SCOPED() in
task context, or just enable "weak memory modeling" without this fix.
You also need CONFIG_PREEMPT=y + CONFIG_KCSAN_INTERRUPT_WATCHER=y.

The emphasis here really is on _shared kcsan_ctx_, which is not too
common. As noted in the commit description, we need to "[...] setting up
a watchpoint for a non-scoped (normal) access that also "conflicts" with
a current scoped access."

Consider this:

	static int v;
	int foo(..)
	{
		ASSERT_EXCLUSIVE_ACCESS_SCOPED(v);
		v++; // preempted during watchpoint for 'v++'
	}

Here we set up a scoped_access to be checked for v. Then on v++, a
watchpoint is set up for the normal access. While the watchpoint is set
up, the task is preempted and upon entering scheduler code, we're still
in_task() and 'current' is still the same, thus get_ctx() returns a
kcsan_ctx where the scoped_accesses list is non-empty containing the
scoped access for foo()'s ASSERT_EXCLUSIVE.

That means, when instrumenting scheduler code or any other code called
by scheduler code or nested interrupts (anything where get_ctx() still
returns the same as parent context), it'd now perform checks based on
the parent context's scoped access, and because the parent context also
has a watchpoint set up on the variable that conflicts with the scoped
access we'd report a nonsensical race.

This case is also possible:

	static int v;
	static int x;
	int foo(..)
	{
		ASSERT_EXCLUSIVE_ACCESS_SCOPED(v);
		x++; // preempted during watchpoint for 'v' after checking x++
	}

Here, all we need is for the scoped access to be checked after x++, end
up with a watchpoint for it, then enter scheduler code, which then
checked 'v', sees the conflicting watchpoint, and reports a nonsensical
race again.

By disallowing scoped access checking for a kcsan_ctx, we simply make
sure that in such nested contexts where kcsan_ctx is shared, none of
these nonsensical races would be detected nor reported.

Hopefully that clarifies what this is about.

Thanks,
-- Marco
Boqun Feng Nov. 29, 2021, 2:26 p.m. UTC | #3
On Mon, Nov 29, 2021 at 11:57:30AM +0100, Marco Elver wrote:
> On Mon, Nov 29, 2021 at 04:47PM +0800, Boqun Feng wrote:
> > Hi Marco,
> > 
> > On Thu, Nov 18, 2021 at 09:10:07AM +0100, Marco Elver wrote:
> > > Avoid checking scoped accesses from nested contexts (such as nested
> > > interrupts or in scheduler code) which share the same kcsan_ctx.
> > > 
> > > This is to avoid detecting false positive races of accesses in the same
> > 
> > Could you provide an example for a false positive?
> > 
> > I think we do want to detect the following race:
> > 
> > 	static int v = SOME_VALUE; // a percpu variable.
> > 	static int other_v = ... ;
> > 
> > 	void foo(..)
> > 	{
> > 		int tmp;
> > 		int other_tmp;
> > 
> > 		preempt_disable();
> > 		{
> > 			ASSERT_EXCLUSIVE_ACCESSS_SCOPED(v);
> > 			tmp = v;
> > 			
> > 			other_tmp = other_v; // int_handler() may run here
> > 			
> > 			v = tmp + 2;
> > 		}
> > 		preempt_enabled();
> > 	}
> > 
> > 	void int_handler() // an interrupt handler
> > 	{
> > 		v++;
> > 	}
> > 
> > , if I understand correctly, we can detect this currently, but with this
> > patch, we cannot detect this if the interrupt happens while we're doing
> > the check for "other_tmp = other_v;", right? Of course, running tests
> > multiple times may eventually catch this, but I just want to understand
> > what's this patch for, thanks!
> 
> The above will still be detected. Task and interrupt contexts in this
> case are distinct, i.e. kcsan_ctx differ (see get_ctx()).
> 

Ok, I was missing that.

> But there are rare cases where kcsan_ctx is shared, such as nested
> interrupts (NMI?), or when entering scheduler code -- which currently
> has a KCSAN_SANITIZE := n, but I occasionally test it, which is how I
> found this problem. The problem occurs frequently when enabling KCSAN in
> kernel/sched and placing a random ASSERT_EXCLUSIVE_ACCESS_SCOPED() in
> task context, or just enable "weak memory modeling" without this fix.
> You also need CONFIG_PREEMPT=y + CONFIG_KCSAN_INTERRUPT_WATCHER=y.
> 

Thanks for the background, it's now more clear that the problem is
triggered ;-)

> The emphasis here really is on _shared kcsan_ctx_, which is not too
> common. As noted in the commit description, we need to "[...] setting up
> a watchpoint for a non-scoped (normal) access that also "conflicts" with
> a current scoped access."
> 
> Consider this:
> 
> 	static int v;
> 	int foo(..)
> 	{
> 		ASSERT_EXCLUSIVE_ACCESS_SCOPED(v);
> 		v++; // preempted during watchpoint for 'v++'
> 	}
> 
> Here we set up a scoped_access to be checked for v. Then on v++, a
> watchpoint is set up for the normal access. While the watchpoint is set
> up, the task is preempted and upon entering scheduler code, we're still
> in_task() and 'current' is still the same, thus get_ctx() returns a
> kcsan_ctx where the scoped_accesses list is non-empty containing the
> scoped access for foo()'s ASSERT_EXCLUSIVE.
> 
> That means, when instrumenting scheduler code or any other code called
> by scheduler code or nested interrupts (anything where get_ctx() still
> returns the same as parent context), it'd now perform checks based on
> the parent context's scoped access, and because the parent context also
> has a watchpoint set up on the variable that conflicts with the scoped
> access we'd report a nonsensical race.
> 

Agreed.

> This case is also possible:
> 
> 	static int v;
> 	static int x;
> 	int foo(..)
> 	{
> 		ASSERT_EXCLUSIVE_ACCESS_SCOPED(v);
> 		x++; // preempted during watchpoint for 'v' after checking x++
> 	}
> 
> Here, all we need is for the scoped access to be checked after x++, end
> up with a watchpoint for it, then enter scheduler code, which then
> checked 'v', sees the conflicting watchpoint, and reports a nonsensical
> race again.
> 

Just to be clear, in both examples, the assumption is that 'v' is a
variable that scheduler code doesn't access, right? Because if scheduler
code does access 'v', then it's a problem that KCSAN should report. Yes,
I don't know any variable that scheduler exports, just to make sure
here.

> By disallowing scoped access checking for a kcsan_ctx, we simply make
> sure that in such nested contexts where kcsan_ctx is shared, none of
> these nonsensical races would be detected nor reported.
> 
> Hopefully that clarifies what this is about.
> 

Make sense to me, thanks.

Regards,
Boqun

> Thanks,
> -- Marco
Marco Elver Nov. 29, 2021, 2:42 p.m. UTC | #4
On Mon, 29 Nov 2021 at 15:27, Boqun Feng <boqun.feng@gmail.com> wrote:
[...]
> > This case is also possible:
> >
> >       static int v;
> >       static int x;
> >       int foo(..)
> >       {
> >               ASSERT_EXCLUSIVE_ACCESS_SCOPED(v);
> >               x++; // preempted during watchpoint for 'v' after checking x++
> >       }
> >
> > Here, all we need is for the scoped access to be checked after x++, end
> > up with a watchpoint for it, then enter scheduler code, which then
> > checked 'v', sees the conflicting watchpoint, and reports a nonsensical
> > race again.
> >
>
> Just to be clear, in both examples, the assumption is that 'v' is a
> variable that scheduler code doesn't access, right? Because if scheduler
> code does access 'v', then it's a problem that KCSAN should report. Yes,
> I don't know any variable that scheduler exports, just to make sure
> here.

Right. We might miss such cases where an ASSERT_EXCLUSIVE*_SCOPED()
could have pointed out a legitimate race with a nested context that
share ctx, like in scheduler, where the only time to detect it is if
some state change later in the scope makes a concurrent access
possible from that point in the scope. I'm willing to bet that there's
an extremely small chance we'll ever encounter such a case (famous
last words ;-)), i.e. the initial check_access() in
kcsan_begin_scoped_access() wouldn't detect it nor would the problem
manifest as a regular data race.

Thanks,
-- Marco
diff mbox series

Patch

diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h
index fc266ecb2a4d..13cef3458fed 100644
--- a/include/linux/kcsan.h
+++ b/include/linux/kcsan.h
@@ -21,6 +21,7 @@ 
  */
 struct kcsan_ctx {
 	int disable_count; /* disable counter */
+	int disable_scoped; /* disable scoped access counter */
 	int atomic_next; /* number of following atomic ops */
 
 	/*
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index e34a1710b7bc..bd359f8ee63a 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -204,15 +204,17 @@  check_access(const volatile void *ptr, size_t size, int type, unsigned long ip);
 static noinline void kcsan_check_scoped_accesses(void)
 {
 	struct kcsan_ctx *ctx = get_ctx();
-	struct list_head *prev_save = ctx->scoped_accesses.prev;
 	struct kcsan_scoped_access *scoped_access;
 
-	ctx->scoped_accesses.prev = NULL;  /* Avoid recursion. */
+	if (ctx->disable_scoped)
+		return;
+
+	ctx->disable_scoped++;
 	list_for_each_entry(scoped_access, &ctx->scoped_accesses, list) {
 		check_access(scoped_access->ptr, scoped_access->size,
 			     scoped_access->type, scoped_access->ip);
 	}
-	ctx->scoped_accesses.prev = prev_save;
+	ctx->disable_scoped--;
 }
 
 /* Rules for generic atomic accesses. Called from fast-path. */
@@ -465,6 +467,15 @@  kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned
 		goto out;
 	}
 
+	/*
+	 * Avoid races of scoped accesses from nested interrupts (or scheduler).
+	 * Assume setting up a watchpoint for a non-scoped (normal) access that
+	 * also conflicts with a current scoped access. In a nested interrupt,
+	 * which shares the context, it would check a conflicting scoped access.
+	 * To avoid, disable scoped access checking.
+	 */
+	ctx->disable_scoped++;
+
 	/*
 	 * Save and restore the IRQ state trace touched by KCSAN, since KCSAN's
 	 * runtime is entered for every memory access, and potentially useful
@@ -578,6 +589,7 @@  kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type, unsigned
 	if (!kcsan_interrupt_watcher)
 		local_irq_restore(irq_flags);
 	kcsan_restore_irqtrace(current);
+	ctx->disable_scoped--;
 out:
 	user_access_restore(ua_flags);
 }