diff mbox series

kcov: properly check for softirq context

Message ID 20240729022158.92059-1-andrey.konovalov@linux.dev (mailing list archive)
State Accepted
Commit 7d4df2dad312f270d62fecb0e5c8b086c6d7dcfc
Headers show
Series kcov: properly check for softirq context | expand

Commit Message

andrey.konovalov@linux.dev July 29, 2024, 2:21 a.m. UTC
From: Andrey Konovalov <andreyknvl@gmail.com>

When collecting coverage from softirqs, KCOV uses in_serving_softirq() to
check whether the code is running in the softirq context. Unfortunately,
in_serving_softirq() is > 0 even when the code is running in the hardirq
or NMI context for hardirqs and NMIs that happened during a softirq.

As a result, if a softirq handler contains a remote coverage collection
section and a hardirq with another remote coverage collection section
happens during handling the softirq, KCOV incorrectly detects a nested
softirq coverate collection section and prints a WARNING, as reported
by syzbot.

This issue was exposed by commit a7f3813e589f ("usb: gadget: dummy_hcd:
Switch to hrtimer transfer scheduler"), which switched dummy_hcd to using
hrtimer and made the timer's callback be executed in the hardirq context.

Change the related checks in KCOV to account for this behavior of
in_serving_softirq() and make KCOV ignore remote coverage collection
sections in the hardirq and NMI contexts.

This prevents the WARNING printed by syzbot but does not fix the inability
of KCOV to collect coverage from the __usb_hcd_giveback_urb when dummy_hcd
is in use (caused by a7f3813e589f); a separate patch is required for that.

Reported-by: syzbot+2388cdaeb6b10f0c13ac@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=2388cdaeb6b10f0c13ac
Fixes: 5ff3b30ab57d ("kcov: collect coverage from interrupts")
Cc: stable@vger.kernel.org
Signed-off-by: Andrey Konovalov <andreyknvl@gmail.com>
---
 kernel/kcov.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

Comments

Marco Elver July 29, 2024, 9:42 a.m. UTC | #1
On Mon, 29 Jul 2024 at 04:22, <andrey.konovalov@linux.dev> wrote:
>
> From: Andrey Konovalov <andreyknvl@gmail.com>
>
> When collecting coverage from softirqs, KCOV uses in_serving_softirq() to
> check whether the code is running in the softirq context. Unfortunately,
> in_serving_softirq() is > 0 even when the code is running in the hardirq
> or NMI context for hardirqs and NMIs that happened during a softirq.
>
> As a result, if a softirq handler contains a remote coverage collection
> section and a hardirq with another remote coverage collection section
> happens during handling the softirq, KCOV incorrectly detects a nested
> softirq coverate collection section and prints a WARNING, as reported
> by syzbot.
>
> This issue was exposed by commit a7f3813e589f ("usb: gadget: dummy_hcd:
> Switch to hrtimer transfer scheduler"), which switched dummy_hcd to using
> hrtimer and made the timer's callback be executed in the hardirq context.
>
> Change the related checks in KCOV to account for this behavior of
> in_serving_softirq() and make KCOV ignore remote coverage collection
> sections in the hardirq and NMI contexts.
>
> This prevents the WARNING printed by syzbot but does not fix the inability
> of KCOV to collect coverage from the __usb_hcd_giveback_urb when dummy_hcd
> is in use (caused by a7f3813e589f); a separate patch is required for that.
>
> Reported-by: syzbot+2388cdaeb6b10f0c13ac@syzkaller.appspotmail.com
> Closes: https://syzkaller.appspot.com/bug?extid=2388cdaeb6b10f0c13ac
> Fixes: 5ff3b30ab57d ("kcov: collect coverage from interrupts")
> Cc: stable@vger.kernel.org
> Signed-off-by: Andrey Konovalov <andreyknvl@gmail.com>
> ---
>  kernel/kcov.c | 15 ++++++++++++---
>  1 file changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/kcov.c b/kernel/kcov.c
> index f0a69d402066e..274b6b7c718de 100644
> --- a/kernel/kcov.c
> +++ b/kernel/kcov.c
> @@ -161,6 +161,15 @@ static void kcov_remote_area_put(struct kcov_remote_area *area,
>         kmsan_unpoison_memory(&area->list, sizeof(area->list));
>  }
>
> +/*
> + * Unlike in_serving_softirq(), this function returns false when called during
> + * a hardirq or an NMI that happened in the softirq context.
> + */
> +static inline bool in_softirq_really(void)
> +{
> +       return in_serving_softirq() && !in_hardirq() && !in_nmi();
> +}

Not sure you need this function. Check if just this will give you what you want:

  interrupt_context_level() == 1

I think the below condition could then also just become:

  if (interrupt_context_level() == 1 && t->kcov_softirq)

Although the softirq_count() helper has a special PREEMPT_RT variant,
and interrupt_context_level() doesn't, so it's not immediately obvious
to me if that's also ok on PREEMPT_RT kernels.

Maybe some RT folks can help confirm that using
interrupt_context_level()==1 does what your above function does also
on RT kernels.

>  static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
>  {
>         unsigned int mode;
> @@ -170,7 +179,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru
>          * so we ignore code executed in interrupts, unless we are in a remote
>          * coverage collection section in a softirq.
>          */
> -       if (!in_task() && !(in_serving_softirq() && t->kcov_softirq))
> +       if (!in_task() && !(in_softirq_really() && t->kcov_softirq))
>                 return false;
>         mode = READ_ONCE(t->kcov_mode);
>         /*
> @@ -849,7 +858,7 @@ void kcov_remote_start(u64 handle)
>
>         if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
>                 return;
> -       if (!in_task() && !in_serving_softirq())
> +       if (!in_task() && !in_softirq_really())
>                 return;
>
>         local_lock_irqsave(&kcov_percpu_data.lock, flags);
> @@ -991,7 +1000,7 @@ void kcov_remote_stop(void)
>         int sequence;
>         unsigned long flags;
>
> -       if (!in_task() && !in_serving_softirq())
> +       if (!in_task() && !in_softirq_really())
>                 return;
>
>         local_lock_irqsave(&kcov_percpu_data.lock, flags);
> --
> 2.25.1
>
Marco Elver July 29, 2024, 2:49 p.m. UTC | #2
On Mon, 29 Jul 2024 at 11:42, Marco Elver <elver@google.com> wrote:
>
> On Mon, 29 Jul 2024 at 04:22, <andrey.konovalov@linux.dev> wrote:
> >
> > From: Andrey Konovalov <andreyknvl@gmail.com>
> >
> > When collecting coverage from softirqs, KCOV uses in_serving_softirq() to
> > check whether the code is running in the softirq context. Unfortunately,
> > in_serving_softirq() is > 0 even when the code is running in the hardirq
> > or NMI context for hardirqs and NMIs that happened during a softirq.
> >
> > As a result, if a softirq handler contains a remote coverage collection
> > section and a hardirq with another remote coverage collection section
> > happens during handling the softirq, KCOV incorrectly detects a nested
> > softirq coverate collection section and prints a WARNING, as reported
> > by syzbot.
> >
> > This issue was exposed by commit a7f3813e589f ("usb: gadget: dummy_hcd:
> > Switch to hrtimer transfer scheduler"), which switched dummy_hcd to using
> > hrtimer and made the timer's callback be executed in the hardirq context.
> >
> > Change the related checks in KCOV to account for this behavior of
> > in_serving_softirq() and make KCOV ignore remote coverage collection
> > sections in the hardirq and NMI contexts.
> >
> > This prevents the WARNING printed by syzbot but does not fix the inability
> > of KCOV to collect coverage from the __usb_hcd_giveback_urb when dummy_hcd
> > is in use (caused by a7f3813e589f); a separate patch is required for that.
> >
> > Reported-by: syzbot+2388cdaeb6b10f0c13ac@syzkaller.appspotmail.com
> > Closes: https://syzkaller.appspot.com/bug?extid=2388cdaeb6b10f0c13ac
> > Fixes: 5ff3b30ab57d ("kcov: collect coverage from interrupts")
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Andrey Konovalov <andreyknvl@gmail.com>
> > ---
> >  kernel/kcov.c | 15 ++++++++++++---
> >  1 file changed, 12 insertions(+), 3 deletions(-)
> >
> > diff --git a/kernel/kcov.c b/kernel/kcov.c
> > index f0a69d402066e..274b6b7c718de 100644
> > --- a/kernel/kcov.c
> > +++ b/kernel/kcov.c
> > @@ -161,6 +161,15 @@ static void kcov_remote_area_put(struct kcov_remote_area *area,
> >         kmsan_unpoison_memory(&area->list, sizeof(area->list));
> >  }
> >
> > +/*
> > + * Unlike in_serving_softirq(), this function returns false when called during
> > + * a hardirq or an NMI that happened in the softirq context.
> > + */
> > +static inline bool in_softirq_really(void)
> > +{
> > +       return in_serving_softirq() && !in_hardirq() && !in_nmi();
> > +}
>
> Not sure you need this function. Check if just this will give you what you want:
>
>   interrupt_context_level() == 1
>
> I think the below condition could then also just become:
>
>   if (interrupt_context_level() == 1 && t->kcov_softirq)
>
> Although the softirq_count() helper has a special PREEMPT_RT variant,
> and interrupt_context_level() doesn't, so it's not immediately obvious
> to me if that's also ok on PREEMPT_RT kernels.
>
> Maybe some RT folks can help confirm that using
> interrupt_context_level()==1 does what your above function does also
> on RT kernels.

Hmm, so Thomas just told me that softirqs always run in threaded
context on RT and because there's no nesting,
interrupt_context_level() won't work for what I had imagined here.

So your current solution is fine.

Acked-by: Marco Elver <elver@google.com>

> >  static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
> >  {
> >         unsigned int mode;
> > @@ -170,7 +179,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru
> >          * so we ignore code executed in interrupts, unless we are in a remote
> >          * coverage collection section in a softirq.
> >          */
> > -       if (!in_task() && !(in_serving_softirq() && t->kcov_softirq))
> > +       if (!in_task() && !(in_softirq_really() && t->kcov_softirq))
> >                 return false;
> >         mode = READ_ONCE(t->kcov_mode);
> >         /*
> > @@ -849,7 +858,7 @@ void kcov_remote_start(u64 handle)
> >
> >         if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
> >                 return;
> > -       if (!in_task() && !in_serving_softirq())
> > +       if (!in_task() && !in_softirq_really())
> >                 return;
> >
> >         local_lock_irqsave(&kcov_percpu_data.lock, flags);
> > @@ -991,7 +1000,7 @@ void kcov_remote_stop(void)
> >         int sequence;
> >         unsigned long flags;
> >
> > -       if (!in_task() && !in_serving_softirq())
> > +       if (!in_task() && !in_softirq_really())
> >                 return;
> >
> >         local_lock_irqsave(&kcov_percpu_data.lock, flags);
> > --
> > 2.25.1
> >
diff mbox series

Patch

diff --git a/kernel/kcov.c b/kernel/kcov.c
index f0a69d402066e..274b6b7c718de 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -161,6 +161,15 @@  static void kcov_remote_area_put(struct kcov_remote_area *area,
 	kmsan_unpoison_memory(&area->list, sizeof(area->list));
 }
 
+/*
+ * Unlike in_serving_softirq(), this function returns false when called during
+ * a hardirq or an NMI that happened in the softirq context.
+ */
+static inline bool in_softirq_really(void)
+{
+	return in_serving_softirq() && !in_hardirq() && !in_nmi();
+}
+
 static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
 {
 	unsigned int mode;
@@ -170,7 +179,7 @@  static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru
 	 * so we ignore code executed in interrupts, unless we are in a remote
 	 * coverage collection section in a softirq.
 	 */
-	if (!in_task() && !(in_serving_softirq() && t->kcov_softirq))
+	if (!in_task() && !(in_softirq_really() && t->kcov_softirq))
 		return false;
 	mode = READ_ONCE(t->kcov_mode);
 	/*
@@ -849,7 +858,7 @@  void kcov_remote_start(u64 handle)
 
 	if (WARN_ON(!kcov_check_handle(handle, true, true, true)))
 		return;
-	if (!in_task() && !in_serving_softirq())
+	if (!in_task() && !in_softirq_really())
 		return;
 
 	local_lock_irqsave(&kcov_percpu_data.lock, flags);
@@ -991,7 +1000,7 @@  void kcov_remote_stop(void)
 	int sequence;
 	unsigned long flags;
 
-	if (!in_task() && !in_serving_softirq())
+	if (!in_task() && !in_softirq_really())
 		return;
 
 	local_lock_irqsave(&kcov_percpu_data.lock, flags);