@@ -3393,16 +3393,17 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
else
return prev_cpu;
} else {
- bool found;
s32 cpu;
- cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0, &found);
- p->scx.selected_cpu = cpu;
- if (found) {
+ cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0);
+ if (cpu >= 0) {
p->scx.slice = SCX_SLICE_DFL;
p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL;
__scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
+ } else {
+ cpu = prev_cpu;
}
+ p->scx.selected_cpu = cpu;
if (rq_bypass)
__scx_add_event(SCX_EV_BYPASS_DISPATCH, 1);
@@ -411,22 +411,26 @@ void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops)
*
* 5. Pick any idle CPU usable by the task.
*
- * Step 3 and 4 are performed only if the system has, respectively, multiple
- * LLC domains / multiple NUMA nodes (see scx_selcpu_topo_llc and
- * scx_selcpu_topo_numa).
+ * Step 3 and 4 are performed only if the system has, respectively,
+ * multiple LLCs / multiple NUMA nodes (see scx_selcpu_topo_llc and
+ * scx_selcpu_topo_numa) and they don't contain the same subset of CPUs.
+ *
+ * If %SCX_OPS_BUILTIN_IDLE_PER_NODE is enabled, the search will always
+ * begin in @prev_cpu's node and proceed to other nodes in order of
+ * increasing distance.
+ *
+ * Return the picked CPU if idle, or a negative value otherwise.
*
* NOTE: tasks that can only run on 1 CPU are excluded by this logic, because
* we never call ops.select_cpu() for them, see select_task_rq().
*/
-s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags, bool *found)
+s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags)
{
const struct cpumask *llc_cpus = NULL;
const struct cpumask *numa_cpus = NULL;
int node = scx_cpu_node_if_enabled(prev_cpu);
s32 cpu;
- *found = false;
-
/*
* This is necessary to protect llc_cpus.
*/
@@ -465,7 +469,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
if (cpus_share_cache(cpu, prev_cpu) &&
scx_idle_test_and_clear_cpu(prev_cpu)) {
cpu = prev_cpu;
- goto cpu_found;
+ goto out_unlock;
}
/*
@@ -487,7 +491,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
(!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) &&
!cpumask_empty(idle_cpumask(waker_node)->cpu)) {
if (cpumask_test_cpu(cpu, p->cpus_ptr))
- goto cpu_found;
+ goto out_unlock;
}
}
@@ -502,7 +506,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
if (cpumask_test_cpu(prev_cpu, idle_cpumask(node)->smt) &&
scx_idle_test_and_clear_cpu(prev_cpu)) {
cpu = prev_cpu;
- goto cpu_found;
+ goto out_unlock;
}
/*
@@ -511,7 +515,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
if (llc_cpus) {
cpu = pick_idle_cpu_in_node(llc_cpus, node, SCX_PICK_IDLE_CORE);
if (cpu >= 0)
- goto cpu_found;
+ goto out_unlock;
}
/*
@@ -520,7 +524,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
if (numa_cpus) {
cpu = pick_idle_cpu_in_node(numa_cpus, node, SCX_PICK_IDLE_CORE);
if (cpu >= 0)
- goto cpu_found;
+ goto out_unlock;
}
/*
@@ -533,7 +537,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
*/
cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags | SCX_PICK_IDLE_CORE);
if (cpu >= 0)
- goto cpu_found;
+ goto out_unlock;
/*
* Give up if we're strictly looking for a full-idle SMT
@@ -550,7 +554,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
*/
if (scx_idle_test_and_clear_cpu(prev_cpu)) {
cpu = prev_cpu;
- goto cpu_found;
+ goto out_unlock;
}
/*
@@ -559,7 +563,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
if (llc_cpus) {
cpu = pick_idle_cpu_in_node(llc_cpus, node, 0);
if (cpu >= 0)
- goto cpu_found;
+ goto out_unlock;
}
/*
@@ -568,7 +572,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
if (numa_cpus) {
cpu = pick_idle_cpu_in_node(numa_cpus, node, 0);
if (cpu >= 0)
- goto cpu_found;
+ goto out_unlock;
}
/*
@@ -581,13 +585,8 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
*/
cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags);
if (cpu >= 0)
- goto cpu_found;
-
- cpu = prev_cpu;
- goto out_unlock;
+ goto out_unlock;
-cpu_found:
- *found = true;
out_unlock:
rcu_read_unlock();
@@ -819,6 +818,9 @@ __bpf_kfunc int scx_bpf_cpu_node(s32 cpu)
__bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
u64 wake_flags, bool *is_idle)
{
+#ifdef CONFIG_SMP
+ s32 cpu;
+#endif
if (!ops_cpu_valid(prev_cpu, NULL))
goto prev_cpu;
@@ -829,7 +831,11 @@ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
goto prev_cpu;
#ifdef CONFIG_SMP
- return scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0, is_idle);
+ cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0);
+ if (cpu >= 0) {
+ *is_idle = true;
+ return cpu;
+ }
#endif
prev_cpu:
@@ -27,7 +27,7 @@ static inline s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node
}
#endif /* CONFIG_SMP */
-s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags, bool *found);
+s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags);
void scx_idle_enable(struct sched_ext_ops *ops);
void scx_idle_disable(void);
int scx_idle_init(void);
Make scx_select_cpu_dfl() more consistent with the other idle-related APIs by returning a negative value when an idle CPU isn't found. No functional changes, this is purely a refactoring. Signed-off-by: Andrea Righi <arighi@nvidia.com> --- kernel/sched/ext.c | 9 ++++---- kernel/sched/ext_idle.c | 50 +++++++++++++++++++++++------------------ kernel/sched/ext_idle.h | 2 +- 3 files changed, 34 insertions(+), 27 deletions(-)