net/sched: taprio: fix CPU stuck due to the taprio hrtimer

Message ID	20240503085335.1160006-1-luyun@kylinos.cn (mailing list archive)
State	Superseded
Delegated to:	Netdev Maintainers
Headers	show Received: from mailgw.kylinos.cn (mailgw.kylinos.cn [124.126.103.232]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1F43542078 for <netdev@vger.kernel.org>; Fri, 3 May 2024 08:53:46 +0000 (UTC) From: Yun Lu <luyun@kylinos.cn> To: syzbot+1acbadd9f48eeeacda29@syzkaller.appspotmail.com Cc: syzkaller-bugs@googlegroups.com, netdev@vger.kernel.org Subject: [PATCH] net/sched: taprio: fix CPU stuck due to the taprio hrtimer Date: Fri, 3 May 2024 16:53:35 +0800 Message-Id: <20240503085335.1160006-1-luyun@kylinos.cn> In-Reply-To: <00000000000022a23c061604edb3@google.com> References: <00000000000022a23c061604edb3@google.com> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable
Series	net/sched: taprio: fix CPU stuck due to the taprio hrtimer \| expand net/sched: taprio: fix CPU stuck due to the taprio hrtimer

Message ID

20240503085335.1160006-1-luyun@kylinos.cn (mailing list archive)

State

Superseded

Delegated to:

Netdev Maintainers

Headers

From: Yun Lu <luyun@kylinos.cn>
To: syzbot+1acbadd9f48eeeacda29@syzkaller.appspotmail.com
Cc: syzkaller-bugs@googlegroups.com,
	netdev@vger.kernel.org
Subject: [PATCH] net/sched: taprio: fix CPU stuck due to the taprio hrtimer
Date: Fri,  3 May 2024 16:53:35 +0800
Message-Id: <20240503085335.1160006-1-luyun@kylinos.cn>
In-Reply-To: <00000000000022a23c061604edb3@google.com>
References: <00000000000022a23c061604edb3@google.com>
Precedence: bulk
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable

Series

net/sched: taprio: fix CPU stuck due to the taprio hrtimer | expand

Context	Check	Description
netdev/series_format	warning	Single patches do not need cover letters; Target tree name not specified in the subject
netdev/tree_selection	success	Guessed tree name to be net-next
netdev/ynl	success	Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present	success	Fixes tag not required for -next series
netdev/header_inline	success	No static functions without inline keyword in header files
netdev/build_32bit	success	Errors and warnings before: 926 this patch: 926
netdev/build_tools	success	No tools touched, skip
netdev/cc_maintainers	fail	7 maintainers not CCed: pabeni@redhat.com edumazet@google.com xiyou.wangcong@gmail.com jiri@resnulli.us vinicius.gomes@intel.com kuba@kernel.org jhs@mojatatu.com
netdev/build_clang	success	Errors and warnings before: 937 this patch: 937
netdev/verify_signedoff	success	Signed-off-by tag matches author and committer
netdev/deprecated_api	success	None detected
netdev/check_selftest	success	No net selftest shell script
netdev/verify_fixes	success	No Fixes tag
netdev/build_allmodconfig_warn	success	Errors and warnings before: 937 this patch: 937
netdev/checkpatch	success	total: 0 errors, 0 warnings, 0 checks, 55 lines checked
netdev/build_clang_rust	success	No Rust files in patch. Skipping build
netdev/kdoc	success	Errors and warnings before: 0 this patch: 0
netdev/source_inline	success	Was 0 now: 0

Context

Check

Description

netdev/series_format

warning

Single patches do not need cover letters; Target tree name not specified in the subject

netdev/tree_selection

success

Guessed tree name to be net-next

netdev/ynl

success

Generated files up to date; no warnings/errors; no diff in generated;

netdev/fixes_present

success

Fixes tag not required for -next series

netdev/header_inline

success

No static functions without inline keyword in header files

netdev/build_32bit

success

Errors and warnings before: 926 this patch: 926

netdev/build_tools

success

No tools touched, skip

netdev/cc_maintainers

fail

7 maintainers not CCed: pabeni@redhat.com edumazet@google.com xiyou.wangcong@gmail.com jiri@resnulli.us vinicius.gomes@intel.com kuba@kernel.org jhs@mojatatu.com

netdev/build_clang

success

Errors and warnings before: 937 this patch: 937

netdev/verify_signedoff

success

Signed-off-by tag matches author and committer

netdev/deprecated_api

success

None detected

netdev/check_selftest

success

No net selftest shell script

netdev/verify_fixes

success

No Fixes tag

netdev/build_allmodconfig_warn

success

Errors and warnings before: 937 this patch: 937

netdev/checkpatch

success

total: 0 errors, 0 warnings, 0 checks, 55 lines checked

netdev/build_clang_rust

success

No Rust files in patch. Skipping build

netdev/kdoc

success

Errors and warnings before: 0 this patch: 0

netdev/source_inline

success

Was 0 now: 0

Commit Message

Yun Lu May 3, 2024, 8:53 a.m. UTC

Syzbot reported the issues which rcu_preempt detected stalls on CPUs, and
the Call Trace shows that CPU is stuck on taprio hrtimer [1] [2].

 rcu_lock_release include/linux/rcupdate.h:308 [inline]
 rcu_read_unlock include/linux/rcupdate.h:783 [inline]
 advance_sched+0xb37/0xca0 net/sched/sch_taprio.c:987
 __run_hrtimer kernel/time/hrtimer.c:1692 [inline]
 __hrtimer_run_queues+0x597/0xd00 kernel/time/hrtimer.c:1756
 hrtimer_interrupt+0x396/0x990 kernel/time/hrtimer.c:1818
 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1032 [inline]
 __sysvec_apic_timer_interrupt+0x109/0x3a0 arch/x86/kernel/apic/apic.c:1049
 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline]
 sysvec_apic_timer_interrupt+0x52/0xc0 arch/x86/kernel/apic/apic.c:1043
 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702
 ......

Assuming the clockid of hrtimer is set to REALTIME and hrtimer is started,
the system time is then adjusted by a significant value backwards, and this
adjustment is not taken into account in the advance_sched function, which
still calculating the hrtimer expires based on the previous end_time.

This will result in the hrtimer expires being much smaller than the current
system time. Consequently, this hrtimer keeps getting inserted as the first
node in the timerqueue, causing the CPU to enter an infinite loop in the
__hrtimer_run_queues function, getting stuck and unable to exit or respond
to any interrupts.

To address this, when calculating the start time of the hrtimer, retain a
record of the offset between the current time corresponding to clockid and
the monotonic time. Subsequently, when setting the hrtimer expires, check
if this offset has changed. If it has, the hrtimer expires should be
adjusted accordingly by adding this difference offset value.

[1] https://lore.kernel.org/all/00000000000022a23c061604edb3@google.com/
[2] https://lore.kernel.org/all/000000000000d929dd0614a8ba8c@google.com/

Signed-off-by: Yun Lu <luyun@kylinos.cn>
Reported-by: syzbot+1acbadd9f48eeeacda29@syzkaller.appspotmail.com
---
 net/sched/sch_taprio.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index a0d54b422186..360778f65d9e 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -104,6 +104,7 @@  struct taprio_sched {
 	u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
 	u32 fp[TC_QOPT_MAX_QUEUE]; /* only for dump and offloading */
 	u32 txtime_delay;
+	ktime_t offset;
 };
 
 struct __tc_taprio_qopt_offload {
@@ -170,6 +171,13 @@  static ktime_t sched_base_time(const struct sched_gate_list *sched)
 	return ns_to_ktime(sched->base_time);
 }
 
+static ktime_t taprio_get_offset(enum tk_offsets tk_offset)
+{
+	ktime_t time = ktime_get();
+
+	return ktime_sub_ns(ktime_mono_to_any(time, tk_offset), time);
+}
+
 static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
 {
 	/* This pairs with WRITE_ONCE() in taprio_parse_clockid() */
@@ -918,6 +926,8 @@  static enum hrtimer_restart advance_sched(struct hrtimer *timer)
 	int num_tc = netdev_get_num_tc(dev);
 	struct sched_entry *entry, *next;
 	struct Qdisc *sch = q->root;
+	enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
+	ktime_t now_offset = taprio_get_offset(tk_offset);
 	ktime_t end_time;
 	int tc;
 
@@ -957,6 +967,14 @@  static enum hrtimer_restart advance_sched(struct hrtimer *timer)
 	end_time = ktime_add_ns(entry->end_time, next->interval);
 	end_time = min_t(ktime_t, end_time, oper->cycle_end_time);
 
+	if (q->offset != now_offset) {
+		ktime_t diff = ktime_sub_ns(now_offset, q->offset);
+
+		end_time = ktime_add_ns(end_time, diff);
+		oper->cycle_end_time = ktime_add_ns(oper->cycle_end_time, diff);
+		q->offset = now_offset;
+	}
+
 	for (tc = 0; tc < num_tc; tc++) {
 		if (next->gate_duration[tc] == oper->cycle_time)
 			next->gate_close_time[tc] = KTIME_MAX;
@@ -1205,11 +1223,13 @@  static int taprio_get_start_time(struct Qdisc *sch,
 				 ktime_t *start)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
+	enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
 	ktime_t now, base, cycle;
 	s64 n;
 
 	base = sched_base_time(sched);
 	now = taprio_get_time(q);
+	q->offset = taprio_get_offset(tk_offset);
 
 	if (ktime_after(base, now)) {
 		*start = base;

net/sched: taprio: fix CPU stuck due to the taprio hrtimer

Checks

Commit Message

Patch