diff mbox series

[RFC,v3,08/10] sched/fair: Prepare migrate_task_rq_fair() for per-task throttling

Message ID 20240711130004.2157737-9-vschneid@redhat.com (mailing list archive)
State New, archived
Headers show
Series sched/fair: Defer CFS throttle to user entry | expand

Commit Message

Valentin Schneider July 11, 2024, 1 p.m. UTC
Later commits will change CFS bandwidth control throttling from a
per-cfs_rq basis to a per-task basis. This means special care needs to be
taken around any transition a task can have into and out of a cfs_rq.

To ease reviewing, the transitions are patched with dummy-helpers that are
implemented later on.

Add helpers to migrate_task_rq_fair() to cover CPU migration. Even if the
task stays within the same taskgroup, each cfs_rq has its own runtime
accounting, thus the task needs to be throttled or unthrottled
accordingly.

Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
 kernel/sched/fair.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
diff mbox series

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ec4cf7308a586..b2242307677ca 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5694,8 +5694,11 @@  static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
 	return cfs_bandwidth_used() && cfs_rq->throttle_count;
 }
 
+static inline bool task_has_throttle_work(struct task_struct *p) { return false; }
 static inline bool task_needs_throttling(struct task_struct *p) { return false; }
+static inline bool task_needs_migrate_throttling(struct task_struct *p, unsigned int dst_cpu) { return false; }
 static inline void task_throttle_setup(struct task_struct *p) { }
+static inline void task_throttle_cancel_migrate(struct task_struct *p, int dst_cpu) { }
 static inline void task_throttle_cancel(struct task_struct *p) { }
 
 /*
@@ -6626,8 +6629,11 @@  static inline int throttled_lb_pair(struct task_group *tg,
 	return 0;
 }
 
+static inline bool task_has_throttle_work(struct task_struct *p) { return false; }
 static inline bool task_needs_throttling(struct task_struct *p) { return false; }
+static inline bool task_needs_migrate_throttling(struct task_struct *p, unsigned int dst_cpu) { return false; }
 static inline void task_throttle_setup(struct task_struct *p) { }
+static inline void task_throttle_cancel_migrate(struct task_struct *p, int dst_cpu) { }
 static inline void task_throttle_cancel(struct task_struct *p) { }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -8308,6 +8314,24 @@  static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
 	se->avg.last_update_time = 0;
 
 	update_scan_period(p, new_cpu);
+
+	if (!cfs_bandwidth_used())
+		return;
+	/*
+	 * When the runtime within a cfs_bandwidth is depleted, all underlying
+	 * cfs_rq's can have (approximately) sched_cfs_bandwidth_slice() runtime
+	 * remaining.
+	 *
+	 * This means all tg->cfs_rq[]'s do not get throttled at the exact same
+	 * time: some may still have a bit of runtime left. Thus, even if the
+	 * task is staying within the same cgroup, and under the same
+	 * cfs_bandwidth, the cfs_rq it migrates to might have a different
+	 * throttle status - resync is needed.
+	 */
+	if (task_needs_migrate_throttling(p, new_cpu))
+		task_throttle_setup(p);
+	else if (task_has_throttle_work(p))
+		task_throttle_cancel_migrate(p, new_cpu);
 }
 
 static void task_dead_fair(struct task_struct *p)