@@ -16,8 +16,9 @@ extern unsigned int sysctl_hung_task_all_cpu_backtrace;
extern int sysctl_hung_task_check_count;
extern unsigned int sysctl_hung_task_panic;
+extern unsigned long sysctl_hung_task_timeout_msecs;
extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_check_interval_secs;
+extern unsigned long sysctl_hung_task_check_interval_msecs;
extern int sysctl_hung_task_warnings;
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
@@ -37,16 +37,23 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
* the RCU grace period. So it needs to be upper-bound.
*/
#define HUNG_TASK_LOCK_BREAK (HZ / 10)
+#define MSEC_PER_SEC 1000L
/*
- * Zero means infinite timeout - no checking done:
+ * Zero and sysctl_hung_task_timeout_msecs zero means infinite timeout - no checking done:
*/
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
/*
- * Zero (default value) means use sysctl_hung_task_timeout_secs:
+ * Zero (default value) means only use sysctl_hung_task_timeout_secs
*/
-unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
+unsigned long __read_mostly sysctl_hung_task_timeout_msecs;
+
+/*
+ * Zero (default value) means use
+ * sysctl_hung_task_timeout_secs * MSEC_PER_SEC + sysctl_hung_task_timeout_msecs
+ */
+unsigned long __read_mostly sysctl_hung_task_check_interval_msecs;
int __read_mostly sysctl_hung_task_warnings = 10;
@@ -108,7 +115,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
t->last_switch_time = jiffies;
return;
}
- if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
+
+ if (time_is_after_jiffies(t->last_switch_time + msecs_to_jiffies(timeout)))
return;
trace_sched_process_hang(t);
@@ -126,13 +134,17 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
if (sysctl_hung_task_warnings) {
if (sysctl_hung_task_warnings > 0)
sysctl_hung_task_warnings--;
- pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
- t->comm, t->pid, (jiffies - t->last_switch_time) / HZ);
+
+ pr_err("INFO: task %s:%d blocked for more than %ld.%03ld seconds.\n",
+ t->comm, t->pid,
+ jiffies_to_msecs(jiffies - t->last_switch_time) / MSEC_PER_SEC,
+ jiffies_to_msecs(jiffies - t->last_switch_time) % MSEC_PER_SEC);
pr_err(" %s %s %.*s\n",
print_tainted(), init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
+ "\"echo 0 > /proc/sys/kernel/hung_task_timeout_msecs\""
" disables this message.\n");
sched_show_task(t);
hung_task_show_lock = true;
@@ -217,7 +229,7 @@ static long hung_timeout_jiffies(unsigned long last_checked,
unsigned long timeout)
{
/* timeout of 0 will disable the watchdog */
- return timeout ? last_checked - jiffies + timeout * HZ :
+ return timeout ? last_checked - jiffies + msecs_to_jiffies(timeout) :
MAX_SCHEDULE_TIMEOUT;
}
@@ -281,8 +293,9 @@ static int watchdog(void *dummy)
set_user_nice(current, 0);
for ( ; ; ) {
- unsigned long timeout = sysctl_hung_task_timeout_secs;
- unsigned long interval = sysctl_hung_task_check_interval_secs;
+ unsigned long timeout = sysctl_hung_task_timeout_secs * MSEC_PER_SEC +
+ sysctl_hung_task_timeout_msecs;
+ unsigned long interval = sysctl_hung_task_check_interval_msecs;
long t;
if (interval == 0)
@@ -2476,6 +2476,14 @@ static struct ctl_table kern_table[] = {
.extra1 = SYSCTL_ZERO,
},
{
+ .procname = "hung_task_timeout_msecs",
+ .data = &sysctl_hung_task_timeout_msecs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = &hung_task_timeout_max,
+ },
+ {
.procname = "hung_task_timeout_secs",
.data = &sysctl_hung_task_timeout_secs,
.maxlen = sizeof(unsigned long),
@@ -2484,8 +2492,8 @@ static struct ctl_table kern_table[] = {
.extra2 = &hung_task_timeout_max,
},
{
- .procname = "hung_task_check_interval_secs",
- .data = &sysctl_hung_task_check_interval_secs,
+ .procname = "hung_task_check_interval_msecs",
+ .data = &sysctl_hung_task_check_interval_msecs,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_dohung_task_timeout_secs,
current hung_task_check_interval_secs and hung_task_timeout_secs only supports seconds. In some cases,the TASK_UNINTERRUPTIBLE state takes less than 1 second,may need to hung task trigger panic get ramdump or print all cpu task. modify hung_task_check_interval_secs to hung_task_check_interval_msecs, check interval use milliseconds. Add hung_task_timeout_msecs file to set milliseconds. task timeout = hung_task_timeout_secs * 1000 + hung_task_timeout_msecs. Signed-off-by: yang che <chey84736@gmail.com> --- v2->v3: Fix some format issues. add use msecs_to_jiffies,jiffies_to_msec. because use timeout = secs * 1000 + msecs,so sysctl_hung_task_timeout_msec = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT * MSEC_PER_SEC; will cause timeout is CONFIG_DEFAULT_HUNG_TASK_TIMEOUT double. v1->v2: add hung_task_check_interval_millisecs,hung_task_timeout_millisecs. fix writing to the millisecond file silently overrides the setting in the seconds file. [1]https://lore.kernel.org/lkml/CAN_w4MWMfoDGfpON-bYHrU=KuJG2vpFj01ZbN4r-iwM4AyyuGw@mail.gmail.com [2]https://lore.kernel.org/lkml/20200705171633.GU25523@casper.infradead.org/ include/linux/sched/sysctl.h | 3 ++- kernel/hung_task.c | 31 ++++++++++++++++++++++--------- kernel/sysctl.c | 12 ++++++++++-- 3 files changed, 34 insertions(+), 12 deletions(-)