diff mbox series

[06/10] sched_ext: Introduce SCX_OPS_NODE_BUILTIN_IDLE

Message ID 20241220154107.287478-7-arighi@nvidia.com (mailing list archive)
State New
Headers show
Series [01/10] sched/topology: introduce for_each_numa_hop_node() / sched_numa_hop_node() | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Andrea Righi Dec. 20, 2024, 3:11 p.m. UTC
Add the new scheduler flag SCX_OPS_NODE_BUILTIN_IDLE, which allows each
scx scheduler to select between using a global flat idle cpumask or
multiple per-node cpumasks.

Signed-off-by: Andrea Righi <arighi@nvidia.com>
---
 kernel/sched/ext.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
diff mbox series

Patch

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 769e43fdea1e..148ec04d4a0a 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -124,6 +124,12 @@  enum scx_ops_flags {
 	 */
 	SCX_OPS_SWITCH_PARTIAL	= 1LLU << 3,
 
+	/*
+	 * If set, enable per-node idle cpumasks. If clear, use a single global
+	 * flat idle cpumask.
+	 */
+	SCX_OPS_BUILTIN_IDLE_PER_NODE = 1LLU << 4,
+
 	/*
 	 * CPU cgroup support flags
 	 */
@@ -133,6 +139,7 @@  enum scx_ops_flags {
 				  SCX_OPS_ENQ_LAST |
 				  SCX_OPS_ENQ_EXITING |
 				  SCX_OPS_SWITCH_PARTIAL |
+				  SCX_OPS_BUILTIN_IDLE_PER_NODE |
 				  SCX_OPS_HAS_CGROUP_WEIGHT,
 };
 
@@ -4974,6 +4981,16 @@  static int validate_ops(const struct sched_ext_ops *ops)
 		return -EINVAL;
 	}
 
+	/*
+	 * SCX_OPS_BUILTIN_IDLE_PER_NODE requires built-in CPU idle
+	 * selection policy to be enabled.
+	 */
+	if ((ops->flags & SCX_OPS_BUILTIN_IDLE_PER_NODE) &&
+	    (ops->update_idle && !(ops->flags & SCX_OPS_KEEP_BUILTIN_IDLE))) {
+		scx_ops_error("SCX_OPS_BUILTIN_IDLE_PER_NODE requires CPU idle selection enabled");
+		return -EINVAL;
+	}
+
 	return 0;
 }