@@ -889,6 +889,10 @@ struct mm_struct {
* mm nr_cpus_allowed updates.
*/
raw_spinlock_t cpus_allowed_lock;
+ /*
+ * @cid_work: Work item to run the mm_cid scan.
+ */
+ struct work_struct cid_work;
#endif
#ifdef CONFIG_MMU
atomic_long_t pgtables_bytes; /* size of all page tables */
@@ -1185,6 +1189,8 @@ enum mm_cid_state {
MM_CID_LAZY_PUT = (1U << 31),
};
+extern void task_mm_cid_work(struct work_struct *work);
+
static inline bool mm_cid_is_unset(int cid)
{
return cid == MM_CID_UNSET;
@@ -1257,12 +1263,14 @@ static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *
if (!mm->pcpu_cid)
return -ENOMEM;
mm_init_cid(mm, p);
+ INIT_WORK(&mm->cid_work, task_mm_cid_work);
return 0;
}
#define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__))
static inline void mm_destroy_cid(struct mm_struct *mm)
{
+ disable_work(&mm->cid_work);
free_percpu(mm->pcpu_cid);
mm->pcpu_cid = NULL;
}
@@ -1284,6 +1292,11 @@ static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumas
WRITE_ONCE(mm->nr_cpus_allowed, cpumask_weight(mm_allowed));
raw_spin_unlock(&mm->cpus_allowed_lock);
}
+
+static inline bool mm_cid_needs_scan(struct mm_struct *mm)
+{
+ return mm && !time_before(jiffies, READ_ONCE(mm->mm_cid_next_scan));
+}
#else /* CONFIG_SCHED_MM_CID */
static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { }
static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; }
@@ -1294,6 +1307,10 @@ static inline unsigned int mm_cid_size(void)
return 0;
}
static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) { }
+static inline bool mm_cid_needs_scan(struct mm_struct *mm)
+{
+ return false;
+}
#endif /* CONFIG_SCHED_MM_CID */
struct mmu_gather;
@@ -7,6 +7,8 @@
#include <linux/preempt.h>
#include <linux/sched.h>
+#define RSEQ_UNPREEMPTED_THRESHOLD (100ULL * 1000000) /* 100ms */
+
/*
* Map the event mask on the user-space ABI enum rseq_cs_flags
* for direct mask checks.
@@ -54,6 +56,14 @@ static inline void rseq_preempt(struct task_struct *t)
rseq_set_notify_resume(t);
}
+static inline void rseq_preempt_from_tick(struct task_struct *t)
+{
+ u64 rtime = t->se.sum_exec_runtime - t->se.prev_sum_exec_runtime;
+
+ if (rtime > RSEQ_UNPREEMPTED_THRESHOLD)
+ rseq_preempt(t);
+}
+
/* rseq_migrate() requires preemption to be disabled. */
static inline void rseq_migrate(struct task_struct *t)
{
@@ -104,6 +114,9 @@ static inline void rseq_signal_deliver(struct ksignal *ksig,
static inline void rseq_preempt(struct task_struct *t)
{
}
+static inline void rseq_preempt_from_tick(struct task_struct *t)
+{
+}
static inline void rseq_migrate(struct task_struct *t)
{
}
@@ -1397,7 +1397,6 @@ struct task_struct {
int last_mm_cid; /* Most recent cid in mm */
int migrate_from_cpu;
int mm_cid_active; /* Whether cid bitmap is active */
- struct callback_head cid_work;
#endif
struct tlbflush_unmap_batch tlb_ubc;
@@ -2254,4 +2253,10 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo
#define alloc_tag_restore(_tag, _old) do {} while (0)
#endif
+#ifdef CONFIG_SCHED_MM_CID
+extern void task_queue_mm_cid(struct task_struct *curr);
+#else
+static inline void task_queue_mm_cid(struct task_struct *curr) { }
+#endif
+
#endif
@@ -419,6 +419,8 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
}
if (unlikely(rseq_update_cpu_node_id(t)))
goto error;
+ if (mm_cid_needs_scan(t->mm))
+ task_queue_mm_cid(t);
return;
error:
@@ -5663,7 +5663,7 @@ void sched_tick(void)
resched_latency = cpu_resched_latency(rq);
calc_global_load_tick(rq);
sched_core_tick(rq);
- task_tick_mm_cid(rq, donor);
+ rseq_preempt_from_tick(donor);
scx_tick(rq);
rq_unlock(rq, &rf);
@@ -10530,22 +10530,16 @@ static void sched_mm_cid_remote_clear_weight(struct mm_struct *mm, int cpu,
sched_mm_cid_remote_clear(mm, pcpu_cid, cpu);
}
-static void task_mm_cid_work(struct callback_head *work)
+void task_mm_cid_work(struct work_struct *work)
{
unsigned long now = jiffies, old_scan, next_scan;
- struct task_struct *t = current;
struct cpumask *cidmask;
- struct mm_struct *mm;
+ struct mm_struct *mm = container_of(work, struct mm_struct, cid_work);
int weight, cpu;
- SCHED_WARN_ON(t != container_of(work, struct task_struct, cid_work));
-
- work->next = work; /* Prevent double-add */
- if (t->flags & PF_EXITING)
- return;
- mm = t->mm;
- if (!mm)
- return;
+ /* We are the last user, process already terminated. */
+ if (atomic_read(&mm->mm_count) == 1)
+ goto out_drop;
old_scan = READ_ONCE(mm->mm_cid_next_scan);
next_scan = now + msecs_to_jiffies(MM_CID_SCAN_DELAY);
if (!old_scan) {
@@ -10558,9 +10552,9 @@ static void task_mm_cid_work(struct callback_head *work)
old_scan = next_scan;
}
if (time_before(now, old_scan))
- return;
+ goto out_drop;
if (!try_cmpxchg(&mm->mm_cid_next_scan, &old_scan, next_scan))
- return;
+ goto out_drop;
cidmask = mm_cidmask(mm);
/* Clear cids that were not recently used. */
for_each_possible_cpu(cpu)
@@ -10572,6 +10566,8 @@ static void task_mm_cid_work(struct callback_head *work)
*/
for_each_possible_cpu(cpu)
sched_mm_cid_remote_clear_weight(mm, cpu, weight);
+out_drop:
+ mmdrop(mm);
}
void init_sched_mm_cid(struct task_struct *t)
@@ -10584,23 +10580,14 @@ void init_sched_mm_cid(struct task_struct *t)
if (mm_users == 1)
mm->mm_cid_next_scan = jiffies + msecs_to_jiffies(MM_CID_SCAN_DELAY);
}
- t->cid_work.next = &t->cid_work; /* Protect against double add */
- init_task_work(&t->cid_work, task_mm_cid_work);
}
-void task_tick_mm_cid(struct rq *rq, struct task_struct *curr)
+/* Call only when curr is a user thread. */
+void task_queue_mm_cid(struct task_struct *curr)
{
- struct callback_head *work = &curr->cid_work;
- unsigned long now = jiffies;
-
- if (!curr->mm || (curr->flags & (PF_EXITING | PF_KTHREAD)) ||
- work->next != work)
- return;
- if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan)))
- return;
-
- /* No page allocation under rq lock */
- task_work_add(curr, work, TWA_RESUME);
+ /* Ensure the mm exists when we run. */
+ mmgrab(curr->mm);
+ queue_work(system_unbound_wq, &curr->mm->cid_work);
}
void sched_mm_cid_exit_signals(struct task_struct *t)
@@ -3630,7 +3630,6 @@ extern int use_cid_lock;
extern void sched_mm_cid_migrate_from(struct task_struct *t);
extern void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t);
-extern void task_tick_mm_cid(struct rq *rq, struct task_struct *curr);
extern void init_sched_mm_cid(struct task_struct *t);
static inline void __mm_cid_put(struct mm_struct *mm, int cid)
@@ -3899,7 +3898,6 @@ static inline void switch_mm_cid(struct rq *rq,
static inline void switch_mm_cid(struct rq *rq, struct task_struct *prev, struct task_struct *next) { }
static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
static inline void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t) { }
-static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
static inline void init_sched_mm_cid(struct task_struct *t) { }
#endif /* !CONFIG_SCHED_MM_CID */