diff mbox series

[v4,09/17] watchdog/hardlockup: Add a "cpu" param to watchdog_hardlockup_check()

Message ID 20230504151100.v4.9.I3a7d4dd8c23ac30ee0b607d77feb6646b64825c0@changeid (mailing list archive)
State New, archived
Headers show
Series watchdog/hardlockup: Add the buddy hardlockup detector | expand

Commit Message

Doug Anderson May 4, 2023, 10:13 p.m. UTC
In preparation for the buddy hardlockup detector where the CPU
checking for lockup might not be the currently running CPU, add a
"cpu" parameter to watchdog_hardlockup_check().

Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

Changes in v4:
- ("Add a "cpu" param to watchdog_hardlockup_check()") new for v4.

 include/linux/nmi.h    |  2 +-
 kernel/watchdog.c      | 47 ++++++++++++++++++++++++++++--------------
 kernel/watchdog_perf.c |  2 +-
 3 files changed, 33 insertions(+), 18 deletions(-)

Comments

Doug Anderson May 19, 2023, 5:21 p.m. UTC | #1
Hi,

On Thu, May 11, 2023 at 7:14 AM Petr Mladek <pmladek@suse.com> wrote:
>
> On Thu 2023-05-04 15:13:41, Douglas Anderson wrote:
> > In preparation for the buddy hardlockup detector where the CPU
> > checking for lockup might not be the currently running CPU, add a
> > "cpu" parameter to watchdog_hardlockup_check().
> >
> > --- a/kernel/watchdog.c
> > +++ b/kernel/watchdog.c
> > @@ -92,14 +92,14 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
> >  static DEFINE_PER_CPU(bool, watchdog_hardlockup_processed);
> >  static unsigned long watchdog_hardlockup_dumped_stacks;
> >
> > -static bool watchdog_hardlockup_is_lockedup(void)
> > +static bool watchdog_hardlockup_is_lockedup(unsigned int cpu)
> >  {
> > -     unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
> > +     unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
>
> My radar tells me that this should be
> READ_ONCE(per_cpu(hrtimer_interrupts, cpu)) when the value might
> be modified on another CPU. Otherwise, the compiler is allowed
> to split the read into more instructions.
>
> It will be needed for the buddy detector. And it will require
> also incrementing the value in watchdog_hardlockup_interrupt_count()
> an atomic way.
>
> Note that __this_cpu_inc_return() does not guarantee atomicity
> according to my understanding. In theory, the following should
> work because counter will never be incremented in parallel:
>
> static unsigned long watchdog_hardlockup_interrupt_count(void)
> {
>         unsigned long count;
>
>         count = __this_cpu_read(hrtimer_interrupts);
>         count++;
>         WRITE_ONCE(*raw_cpu_ptr(hrtimer_interrupts), count);
> }
>
> but it is nasty. A more elegant solution might be using atomic_t
> for hrtimer_interrupts counter.

I switched it over to atomic_t.


> > -     if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
> > +     if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
> >               return true;
> >
> > -     __this_cpu_write(hrtimer_interrupts_saved, hrint);
> > +     per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
>
> IMHO, hrtimer_interrupts_saved might be handled this way.
> The value is read/written only by this function.
>
> The buddy watchdog should see consistent values even when
> the buddy CPU goes offline. This check should never race
> because this CPU should get touched when another buddy
> gets assigned.
>
> Well, it would deserve a comment.

I spent a bunch of time thinking about this too and I agree that for
hrtimer_interrupts_saved we don't need atomic_t nor even
READ_ONCE/WRITE_ONCE. I've add a comment and a note in the commit
message in v5.
diff mbox series

Patch

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index c6cb9bc5dc80..2c9ea1ba285c 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -88,7 +88,7 @@  static inline void hardlockup_detector_disable(void) {}
 #endif
 
 #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
-void watchdog_hardlockup_check(struct pt_regs *regs);
+void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
 #endif
 
 #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f46669c1671d..367bea0167a5 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -92,14 +92,14 @@  static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
 static DEFINE_PER_CPU(bool, watchdog_hardlockup_processed);
 static unsigned long watchdog_hardlockup_dumped_stacks;
 
-static bool watchdog_hardlockup_is_lockedup(void)
+static bool watchdog_hardlockup_is_lockedup(unsigned int cpu)
 {
-	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
+	unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
 
-	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+	if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
 		return true;
 
-	__this_cpu_write(hrtimer_interrupts_saved, hrint);
+	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
 
 	return false;
 }
@@ -109,7 +109,7 @@  static void watchdog_hardlockup_interrupt_count(void)
 	__this_cpu_inc(hrtimer_interrupts);
 }
 
-void watchdog_hardlockup_check(struct pt_regs *regs)
+void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
 {
 	/*
 	 * Check for a hardlockup by making sure the CPU's timer
@@ -117,35 +117,50 @@  void watchdog_hardlockup_check(struct pt_regs *regs)
 	 * fired multiple times before we overflow'd. If it hasn't
 	 * then this is a good indication the cpu is stuck
 	 */
-	if (watchdog_hardlockup_is_lockedup()) {
+	if (watchdog_hardlockup_is_lockedup(cpu)) {
 		unsigned int this_cpu = smp_processor_id();
+		struct cpumask backtrace_mask = *cpu_online_mask;
 
 		/* Only handle hardlockups once. */
-		if (__this_cpu_read(watchdog_hardlockup_processed))
+		if (per_cpu(watchdog_hardlockup_processed, cpu))
 			return;
 
-		pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", this_cpu);
+		pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", cpu);
 		print_modules();
 		print_irqtrace_events(current);
-		if (regs)
+		if (regs) {
 			show_regs(regs);
-		else
-			dump_stack();
+			cpumask_clear_cpu(cpu, &backtrace_mask);
+		} else {
+			/*
+			 * If the locked up CPU is different than the CPU we're
+			 * running on then we'll try to backtrace the CPU that
+			 * locked up and then exclude it from later backtraces.
+			 * If that fails or if we're running on the locked up
+			 * CPU, just do a normal backtrace.
+			 */
+			if (cpu != this_cpu && trigger_single_cpu_backtrace(cpu)) {
+				cpumask_clear_cpu(cpu, &backtrace_mask);
+			} else {
+				dump_stack();
+				cpumask_clear_cpu(this_cpu, &backtrace_mask);
+			}
+		}
 
 		/*
-		 * Perform all-CPU dump only once to avoid multiple hardlockups
-		 * generating interleaving traces
+		 * Perform multi-CPU dump only once to avoid multiple
+		 * hardlockups generating interleaving traces
 		 */
 		if (sysctl_hardlockup_all_cpu_backtrace &&
 		    !test_and_set_bit(0, &watchdog_hardlockup_dumped_stacks))
-			trigger_allbutself_cpu_backtrace();
+			trigger_cpumask_backtrace(&backtrace_mask);
 
 		if (hardlockup_panic)
 			nmi_panic(regs, "Hard LOCKUP");
 
-		__this_cpu_write(watchdog_hardlockup_processed, true);
+		per_cpu(watchdog_hardlockup_processed, cpu) = true;
 	} else {
-		__this_cpu_write(watchdog_hardlockup_processed, false);
+		per_cpu(watchdog_hardlockup_processed, cpu) = false;
 	}
 }
 
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index 5f3651b87ee7..9be90b2a2ea7 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -120,7 +120,7 @@  static void watchdog_overflow_callback(struct perf_event *event,
 	if (!watchdog_check_timestamp())
 		return;
 
-	watchdog_hardlockup_check(regs);
+	watchdog_hardlockup_check(smp_processor_id(), regs);
 }
 
 static int hardlockup_detector_event_create(void)