@@ -104,6 +104,7 @@ struct taprio_sched {
u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
u32 fp[TC_QOPT_MAX_QUEUE]; /* only for dump and offloading */
u32 txtime_delay;
+ ktime_t offset;
};
struct __tc_taprio_qopt_offload {
@@ -170,6 +171,13 @@ static ktime_t sched_base_time(const struct sched_gate_list *sched)
return ns_to_ktime(sched->base_time);
}
+static ktime_t taprio_get_offset(enum tk_offsets tk_offset)
+{
+ ktime_t time = ktime_get();
+
+ return ktime_sub_ns(ktime_mono_to_any(time, tk_offset), time);
+}
+
static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
{
/* This pairs with WRITE_ONCE() in taprio_parse_clockid() */
@@ -918,6 +926,8 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
int num_tc = netdev_get_num_tc(dev);
struct sched_entry *entry, *next;
struct Qdisc *sch = q->root;
+ enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
+ ktime_t now_offset = taprio_get_offset(tk_offset);
ktime_t end_time;
int tc;
@@ -957,6 +967,14 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
end_time = ktime_add_ns(entry->end_time, next->interval);
end_time = min_t(ktime_t, end_time, oper->cycle_end_time);
+ if (q->offset != now_offset) {
+ ktime_t diff = ktime_sub_ns(now_offset, q->offset);
+
+ end_time = ktime_add_ns(end_time, diff);
+ oper->cycle_end_time = ktime_add_ns(oper->cycle_end_time, diff);
+ q->offset = now_offset;
+ }
+
for (tc = 0; tc < num_tc; tc++) {
if (next->gate_duration[tc] == oper->cycle_time)
next->gate_close_time[tc] = KTIME_MAX;
@@ -1205,11 +1223,13 @@ static int taprio_get_start_time(struct Qdisc *sch,
ktime_t *start)
{
struct taprio_sched *q = qdisc_priv(sch);
+ enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
ktime_t now, base, cycle;
s64 n;
base = sched_base_time(sched);
now = taprio_get_time(q);
+ q->offset = taprio_get_offset(tk_offset);
if (ktime_after(base, now)) {
*start = base;
Syzbot reported the issues which rcu_preempt detected stalls on CPUs, and the Call Trace shows that CPU is stuck on taprio hrtimer [1] [2]. rcu_lock_release include/linux/rcupdate.h:308 [inline] rcu_read_unlock include/linux/rcupdate.h:783 [inline] advance_sched+0xb37/0xca0 net/sched/sch_taprio.c:987 __run_hrtimer kernel/time/hrtimer.c:1692 [inline] __hrtimer_run_queues+0x597/0xd00 kernel/time/hrtimer.c:1756 hrtimer_interrupt+0x396/0x990 kernel/time/hrtimer.c:1818 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1032 [inline] __sysvec_apic_timer_interrupt+0x109/0x3a0 arch/x86/kernel/apic/apic.c:1049 instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline] sysvec_apic_timer_interrupt+0x52/0xc0 arch/x86/kernel/apic/apic.c:1043 asm_sysvec_apic_timer_interrupt+0x1a/0x20 arch/x86/include/asm/idtentry.h:702 ...... Assuming the clockid of hrtimer is set to REALTIME and hrtimer is started, the system time is then adjusted by a significant value backwards, and this adjustment is not taken into account in the advance_sched function, which still calculating the hrtimer expires based on the previous end_time. This will result in the hrtimer expires being much smaller than the current system time. Consequently, this hrtimer keeps getting inserted as the first node in the timerqueue, causing the CPU to enter an infinite loop in the __hrtimer_run_queues function, getting stuck and unable to exit or respond to any interrupts. To address this, when calculating the start time of the hrtimer, retain a record of the offset between the current time corresponding to clockid and the monotonic time. Subsequently, when setting the hrtimer expires, check if this offset has changed. If it has, the hrtimer expires should be adjusted accordingly by adding this difference offset value. [1] https://lore.kernel.org/all/00000000000022a23c061604edb3@google.com/ [2] https://lore.kernel.org/all/000000000000d929dd0614a8ba8c@google.com/ Signed-off-by: Yun Lu <luyun@kylinos.cn> Reported-by: syzbot+1acbadd9f48eeeacda29@syzkaller.appspotmail.com --- net/sched/sch_taprio.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+)