@@ -425,6 +425,20 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
irqs disabled over activate_mm. Architectures that do IPI based TLB
shootdowns should enable this.
+# Use normal mm refcounting for MMU_LAZY_TLB kernel thread references.
+# MMU_LAZY_TLB_REFCOUNT=n can improve the scalability of context switching
+# to/from kernel threads when the same mm is running on a lot of CPUs (a large
+# multi-threaded application), by reducing contention on the mm refcount.
+#
+# This can be disabled if the architecture ensures no CPUs are using an mm as a
+# "lazy tlb" beyond its final refcount (i.e., by the time __mmdrop frees the mm
+# or its kernel page tables). This could be arranged by arch_exit_mmap(), or
+# final exit(2) TLB flush, for example. arch code must also ensure the
+# _lazy_tlb variants of mmgrab/mmdrop are used when dropping the lazy reference
+# to a kthread ->active_mm (non-arch code has been converted already).
+config MMU_LAZY_TLB_REFCOUNT
+ def_bool y
+
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
@@ -52,12 +52,22 @@ static inline void mmdrop(struct mm_stru
/* Helpers for lazy TLB mm refcounting */
static inline void mmgrab_lazy_tlb(struct mm_struct *mm)
{
- mmgrab(mm);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT))
+ mmgrab(mm);
}
static inline void mmdrop_lazy_tlb(struct mm_struct *mm)
{
- mmdrop(mm);
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_REFCOUNT)) {
+ mmdrop(mm);
+ } else {
+ /*
+ * mmdrop_lazy_tlb must provide a full memory barrier, see the
+ * membarrier comment in finish_task_switch which relies on
+ * this.
+ */
+ smp_mb();
+ }
}
/**
@@ -4527,7 +4527,7 @@ static struct rq *finish_task_switch(str
__releases(rq->lock)
{
struct rq *rq = this_rq();
- struct mm_struct *mm = rq->prev_mm;
+ struct mm_struct *mm = NULL;
long prev_state;
/*
@@ -4546,7 +4546,10 @@ static struct rq *finish_task_switch(str
current->comm, current->pid, preempt_count()))
preempt_count_set(FORK_PREEMPT_COUNT);
- rq->prev_mm = NULL;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ mm = rq->prev_lazy_mm;
+ rq->prev_lazy_mm = NULL;
+#endif
/*
* A task struct has one reference for the use as "current".
@@ -4682,9 +4685,20 @@ context_switch(struct rq *rq, struct tas
switch_mm_irqs_off(prev->active_mm, next->mm, next);
if (!prev->mm) { // from kernel
- /* will mmdrop_lazy_tlb() in finish_task_switch(). */
- rq->prev_mm = prev->active_mm;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ /* Will mmdrop_lazy_tlb() in finish_task_switch(). */
+ rq->prev_lazy_mm = prev->active_mm;
prev->active_mm = NULL;
+#else
+ /*
+ * Without MMU_LAZY_TLB_REFCOUNT there is no lazy
+ * tracking (because no rq->prev_lazy_mm) in
+ * finish_task_switch, so no mmdrop_lazy_tlb(), so no
+ * memory barrier for membarrier (see the membarrier
+ * comment in finish_task_switch()). Do it here.
+ */
+ smp_mb();
+#endif
}
}
@@ -967,7 +967,9 @@ struct rq {
struct task_struct *idle;
struct task_struct *stop;
unsigned long next_balance;
- struct mm_struct *prev_mm;
+#ifdef CONFIG_MMU_LAZY_TLB_REFCOUNT
+ struct mm_struct *prev_lazy_mm;
+#endif
unsigned int clock_update_flags;
u64 clock;