diff mbox series

net/sched: taprio: fix CPU stuck due to the taprio hrtimer

Message ID 20240503091844.1161175-1-luyun@kylinos.cn (mailing list archive)
State Not Applicable
Delegated to: Netdev Maintainers
Headers show
Series net/sched: taprio: fix CPU stuck due to the taprio hrtimer | expand

Checks

Context Check Description
netdev/series_format warning Single patches do not need cover letters; Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 926 this patch: 926
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers fail 7 maintainers not CCed: pabeni@redhat.com edumazet@google.com xiyou.wangcong@gmail.com jiri@resnulli.us vinicius.gomes@intel.com kuba@kernel.org jhs@mojatatu.com
netdev/build_clang success Errors and warnings before: 937 this patch: 937
netdev/verify_signedoff fail author Signed-off-by missing
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 937 this patch: 937
netdev/checkpatch warning WARNING: Commit log lines starting with '#' are dropped by git as comments
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-05-03--15-00 (tests: 1001)

Commit Message

Yun Lu May 3, 2024, 9:18 a.m. UTC
#syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git  master

---
 net/sched/sch_taprio.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

Comments

syzbot May 3, 2024, 12:03 p.m. UTC | #1
Hello,

syzbot has tested the proposed patch but the reproducer is still triggering an issue:
UBSAN: array-index-out-of-bounds in ktime_mono_to_any

------------[ cut here ]------------
UBSAN: array-index-out-of-bounds in kernel/time/timekeeping.c:927:20
index 3 is out of range for type 'ktime_t *[3]' (aka 'long long *[3]')
CPU: 0 PID: 5514 Comm: syz-executor.0 Not tainted 6.9.0-rc6-syzkaller-00131-gf03359bca01b-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114
 ubsan_epilogue lib/ubsan.c:231 [inline]
 __ubsan_handle_out_of_bounds+0x121/0x150 lib/ubsan.c:429
 ktime_mono_to_any+0xf7/0x100 kernel/time/timekeeping.c:927
 taprio_get_offset net/sched/sch_taprio.c:178 [inline]
 taprio_get_start_time+0xf6/0x2a0 net/sched/sch_taprio.c:1232
 taprio_change+0x30c5/0x42d0 net/sched/sch_taprio.c:1963
 taprio_init+0x9da/0xc80 net/sched/sch_taprio.c:2135
 qdisc_create+0x9d6/0x1190 net/sched/sch_api.c:1355
 tc_modify_qdisc+0xa26/0x1e40 net/sched/sch_api.c:1776
 rtnetlink_rcv_msg+0x89d/0x10d0 net/core/rtnetlink.c:6595
 netlink_rcv_skb+0x1e5/0x430 net/netlink/af_netlink.c:2559
 netlink_unicast_kernel net/netlink/af_netlink.c:1335 [inline]
 netlink_unicast+0x7ec/0x980 net/netlink/af_netlink.c:1361
 netlink_sendmsg+0x8e1/0xcb0 net/netlink/af_netlink.c:1905
 sock_sendmsg_nosec net/socket.c:730 [inline]
 __sock_sendmsg+0x223/0x270 net/socket.c:745
 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584
 ___sys_sendmsg net/socket.c:2638 [inline]
 __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7fca4dc7de69
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fca4e9720c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007fca4ddabf80 RCX: 00007fca4dc7de69
RDX: 0000000000000000 RSI: 00000000200007c0 RDI: 0000000000000004
RBP: 00007fca4dcca47a R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 000000000000000b R14: 00007fca4ddabf80 R15: 00007fffc66d5928
 </TASK>
---[ end trace ]---


Tested on:

commit:         f03359bc Merge tag 'for-6.9-rc6-tag' of git://git.kern..
git tree:       https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master
console output: https://syzkaller.appspot.com/x/log.txt?x=17aef62f180000
kernel config:  https://syzkaller.appspot.com/x/.config?x=38cdad974684e704
dashboard link: https://syzkaller.appspot.com/bug?extid=1acbadd9f48eeeacda29
compiler:       Debian clang version 15.0.6, GNU ld (GNU Binutils for Debian) 2.40
patch:          https://syzkaller.appspot.com/x/patch.diff?x=1665ecc0980000
Jakub Kicinski May 3, 2024, 9:34 p.m. UTC | #2
On Fri,  3 May 2024 17:18:44 +0800 Yun Lu wrote:
> #syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git  master

Why are you CCing netdev@ on this? 
diff mbox series

Patch

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index a0d54b422186..360778f65d9e 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -104,6 +104,7 @@  struct taprio_sched {
 	u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
 	u32 fp[TC_QOPT_MAX_QUEUE]; /* only for dump and offloading */
 	u32 txtime_delay;
+	ktime_t offset;
 };
 
 struct __tc_taprio_qopt_offload {
@@ -170,6 +171,13 @@  static ktime_t sched_base_time(const struct sched_gate_list *sched)
 	return ns_to_ktime(sched->base_time);
 }
 
+static ktime_t taprio_get_offset(enum tk_offsets tk_offset)
+{
+	ktime_t time = ktime_get();
+
+	return ktime_sub_ns(ktime_mono_to_any(time, tk_offset), time);
+}
+
 static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
 {
 	/* This pairs with WRITE_ONCE() in taprio_parse_clockid() */
@@ -918,6 +926,8 @@  static enum hrtimer_restart advance_sched(struct hrtimer *timer)
 	int num_tc = netdev_get_num_tc(dev);
 	struct sched_entry *entry, *next;
 	struct Qdisc *sch = q->root;
+	enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
+	ktime_t now_offset = taprio_get_offset(tk_offset);
 	ktime_t end_time;
 	int tc;
 
@@ -957,6 +967,14 @@  static enum hrtimer_restart advance_sched(struct hrtimer *timer)
 	end_time = ktime_add_ns(entry->end_time, next->interval);
 	end_time = min_t(ktime_t, end_time, oper->cycle_end_time);
 
+	if (q->offset != now_offset) {
+		ktime_t diff = ktime_sub_ns(now_offset, q->offset);
+
+		end_time = ktime_add_ns(end_time, diff);
+		oper->cycle_end_time = ktime_add_ns(oper->cycle_end_time, diff);
+		q->offset = now_offset;
+	}
+
 	for (tc = 0; tc < num_tc; tc++) {
 		if (next->gate_duration[tc] == oper->cycle_time)
 			next->gate_close_time[tc] = KTIME_MAX;
@@ -1205,11 +1223,13 @@  static int taprio_get_start_time(struct Qdisc *sch,
 				 ktime_t *start)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
+	enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
 	ktime_t now, base, cycle;
 	s64 n;
 
 	base = sched_base_time(sched);
 	now = taprio_get_time(q);
+	q->offset = taprio_get_offset(tk_offset);
 
 	if (ktime_after(base, now)) {
 		*start = base;