sched: watchdog: Touch kernel watchdog in sched code

Message ID	20200304213941.112303-1-xii@google.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=Z0KA=4V=vger.kernel.org=linux-fsdevel-owner@kernel.org> Date: Wed, 4 Mar 2020 13:39:41 -0800 Message-Id: <20200304213941.112303-1-xii@google.com> Mime-Version: 1.0 Subject: [PATCH] sched: watchdog: Touch kernel watchdog in sched code From: Xi Wang <xii@google.com> To: Ingo Molnar <mingo@redhat.com>, Peter Zijlstra <peterz@infradead.org> Cc: Juri Lelli <juri.lelli@redhat.com>, Vincent Guittot <vincent.guittot@linaro.org>, Dietmar Eggemann <dietmar.eggemann@arm.com>, Steven Rostedt <rostedt@goodmis.org>, Josh Don <joshdon@google.com>, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Xi Wang <xii@google.com>, Paul Turner <pjt@google.com> Content-Type: text/plain; charset="UTF-8" Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk
Series	sched: watchdog: Touch kernel watchdog in sched code \| expand sched: watchdog: Touch kernel watchdog in sched code

diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1a9983da4408..9d8e00760d1c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3898,6 +3898,27 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt) schedstat_inc(this_rq()->sched_count); } +#ifdef CONFIG_SOFTLOCKUP_DETECTOR + +DEFINE_PER_CPU(bool, sched_should_touch_watchdog); + +void touch_watchdog_from_sched(void); + +/* Helper called by watchdog code */ +void resched_for_watchdog(void) +{ + unsigned long flags; + struct rq *rq = this_rq(); + + this_cpu_write(sched_should_touch_watchdog, true); + raw_spin_lock_irqsave(&rq->lock, flags); + /* Trigger resched for code in pick_next_task to touch watchdog */ + resched_curr(rq); + raw_spin_unlock_irqrestore(&rq->lock, flags); +} + +#endif /* CONFIG_SOFTLOCKUP_DETECTOR */ + /* * Pick up the highest-prio task: */ @@ -3927,7 +3948,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) p = pick_next_task_idle(rq); } - return p; + goto out; } restart: @@ -3951,11 +3972,22 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) for_each_class(class) { p = class->pick_next_task(rq); if (p) - return p; + goto out; } /* The idle class should always have a runnable task: */ BUG(); + +out: + +#ifdef CONFIG_SOFTLOCKUP_DETECTOR + if (this_cpu_read(sched_should_touch_watchdog)) { + touch_watchdog_from_sched(); + this_cpu_write(sched_should_touch_watchdog, false); + } +#endif + + return p; } /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ad5b88a53c5a..adb4b11fbccb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -118,6 +118,9 @@ extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max; #ifndef CONFIG_MMU extern int sysctl_nr_trim_pages; #endif +#ifdef CONFIG_SOFTLOCKUP_DETECTOR +extern unsigned int sysctl_watchdog_touch_in_thread_interval; +#endif /* Constants used for minimum and maximum */ #ifdef CONFIG_LOCKUP_DETECTOR @@ -961,6 +964,13 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "watchdog_touch_in_thread_interval", + .data = &sysctl_watchdog_touch_in_thread_interval, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_SMP { .procname = "softlockup_all_cpu_backtrace", @@ -996,7 +1006,6 @@ static struct ctl_table kern_table[] = { #endif /* CONFIG_SMP */ #endif #endif - #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) { .procname = "unknown_nmi_panic", diff --git a/kernel/watchdog.c b/kernel/watchdog.c index b6b1f54a7837..f9138c29db48 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -49,6 +49,16 @@ static struct cpumask watchdog_allowed_mask __read_mostly; struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); +#ifdef CONFIG_SOFTLOCKUP_DETECTOR +/* + * 0: Always touch watchdog from pick_next_task + * 1: Always touch watchdog from migration thread + * N (N>0): Touch watchdog from migration thread once in every N invocations, + * and touch watchdog from pick_next_task for other invocations. + */ +unsigned int sysctl_watchdog_touch_in_thread_interval = 10; +#endif + #ifdef CONFIG_HARDLOCKUP_DETECTOR /* * Should we panic when a soft-lockup or hard-lockup occurs: @@ -356,6 +366,9 @@ static int softlockup_fn(void *data) return 0; } +static DEFINE_PER_CPU(unsigned int, num_watchdog_wakeup_skipped); +void resched_for_watchdog(void); + /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { @@ -371,11 +384,20 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) watchdog_interrupt_count(); /* kick the softlockup detector */ - if (completion_done(this_cpu_ptr(&softlockup_completion))) { - reinit_completion(this_cpu_ptr(&softlockup_completion)); - stop_one_cpu_nowait(smp_processor_id(), - softlockup_fn, NULL, - this_cpu_ptr(&softlockup_stop_work)); + if ((!sysctl_watchdog_touch_in_thread_interval || + sysctl_watchdog_touch_in_thread_interval > this_cpu_read(num_watchdog_wakeup_skipped) + 1)) { + this_cpu_write(num_watchdog_wakeup_skipped, sysctl_watchdog_touch_in_thread_interval ? + this_cpu_read(num_watchdog_wakeup_skipped) + 1 : 0); + /* touch watchdog from pick_next_task */ + resched_for_watchdog(); + } else { + this_cpu_write(num_watchdog_wakeup_skipped, 0); + if (completion_done(this_cpu_ptr(&softlockup_completion))) { + reinit_completion(this_cpu_ptr(&softlockup_completion)); + stop_one_cpu_nowait(smp_processor_id(), + softlockup_fn, NULL, + this_cpu_ptr(&softlockup_stop_work)); + } } /* .. and repeat */ @@ -526,6 +548,13 @@ static int softlockup_start_fn(void *data) return 0; } + +/* Similar to watchdog thread function but called from pick_next_task */ +void touch_watchdog_from_sched(void) +{ + __touch_watchdog(); +} + static void softlockup_start_all(void) { int cpu;

sched: watchdog: Touch kernel watchdog in sched code

Commit Message

Comments

Patch