@@ -9,6 +9,7 @@
enum scx_internal_consts {
SCX_NR_ONLINE_OPS = SCX_OP_IDX(init),
SCX_DSP_DFL_MAX_BATCH = 32,
+ SCX_DSP_MAX_LOOPS = 32,
SCX_WATCHDOG_MAX_TIMEOUT = 30 * HZ,
};
@@ -168,6 +169,7 @@ static DEFINE_PER_CPU(struct scx_dsp_ctx, scx_dsp_ctx);
void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice,
u64 enq_flags);
+void scx_bpf_kick_cpu(s32 cpu, u64 flags);
struct scx_task_iter {
struct sched_ext_entity cursor;
@@ -1243,6 +1245,7 @@ static int balance_scx(struct rq *rq, struct task_struct *prev,
struct scx_rq *scx_rq = &rq->scx;
struct scx_dsp_ctx *dspc = this_cpu_ptr(&scx_dsp_ctx);
bool prev_on_scx = prev->sched_class == &ext_sched_class;
+ int nr_loops = SCX_DSP_MAX_LOOPS;
lockdep_assert_rq_held(rq);
@@ -1297,6 +1300,20 @@ static int balance_scx(struct rq *rq, struct task_struct *prev,
return 1;
if (consume_dispatch_q(rq, rf, &scx_dsq_global))
return 1;
+
+ /*
+ * ops.dispatch() can trap us in this loop by repeatedly
+ * dispatching ineligible tasks. Break out once in a while to
+ * allow the watchdog to run. As IRQ can't be enabled in
+ * balance(), we want to complete this scheduling cycle and then
+ * start a new one. IOW, we want to call resched_curr() on the
+ * next, most likely idle, task, not the current one. Use
+ * scx_bpf_kick_cpu() for deferred kicking.
+ */
+ if (unlikely(!--nr_loops)) {
+ scx_bpf_kick_cpu(cpu_of(rq), 0);
+ break;
+ }
} while (dspc->nr_tasks);
return 0;
@@ -25,6 +25,7 @@ const volatile u64 slice_ns = SCX_SLICE_DFL;
const volatile bool switch_all;
const volatile u32 stall_user_nth;
const volatile u32 stall_kernel_nth;
+const volatile u32 dsp_inf_loop_after;
const volatile s32 disallow_tgid;
u32 test_error_cnt;
@@ -184,6 +185,22 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev)
s32 pid;
int i;
+ if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after) {
+ struct task_struct *p;
+
+ /*
+ * PID 2 should be kthreadd which should mostly be idle and off
+ * the scheduler. Let's keep dispatching it to force the kernel
+ * to call this function over and over again.
+ */
+ p = bpf_task_from_pid(2);
+ if (p) {
+ scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, slice_ns, 0);
+ bpf_task_release(p);
+ return;
+ }
+ }
+
if (!idx || !cnt) {
scx_bpf_error("failed to lookup idx[%p], cnt[%p]", idx, cnt);
return;
@@ -20,12 +20,13 @@ const char help_fmt[] =
"\n"
"See the top-level comment in .bpf.c for more details.\n"
"\n"
-"Usage: %s [-a] [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-d PID]\n"
+"Usage: %s [-a] [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-d PID]\n"
"\n"
" -s SLICE_US Override slice duration\n"
" -e COUNT Trigger scx_bpf_error() after COUNT enqueues\n"
" -t COUNT Stall every COUNT'th user thread\n"
" -T COUNT Stall every COUNT'th kernel thread\n"
+" -l COUNT Trigger dispatch infinite looping after COUNT dispatches\n"
" -d PID Disallow a process from switching into SCHED_EXT (-1 for self)\n"
" -h Display this help and exit\n";
@@ -50,7 +51,7 @@ int main(int argc, char **argv)
skel = scx_example_qmap__open();
assert(skel);
- while ((opt = getopt(argc, argv, "ahs:e:t:T:d:")) != -1) {
+ while ((opt = getopt(argc, argv, "ahs:e:t:T:l:d:")) != -1) {
switch (opt) {
case 'a':
skel->rodata->switch_all = true;
@@ -67,6 +68,9 @@ int main(int argc, char **argv)
case 'T':
skel->rodata->stall_kernel_nth = strtoul(optarg, NULL, 0);
break;
+ case 'l':
+ skel->rodata->dsp_inf_loop_after = strtoul(optarg, NULL, 0);
+ break;
case 'd':
skel->rodata->disallow_tgid = strtol(optarg, NULL, 0);
if (skel->rodata->disallow_tgid < 0)