@@ -32,6 +32,16 @@
struct address_space;
struct mem_cgroup;
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+struct luf_batch {
+ struct tlbflush_unmap_batch batch;
+ unsigned long ugen;
+ rwlock_t lock;
+};
+#else
+struct luf_batch {};
+#endif
+
/*
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
@@ -1240,6 +1240,8 @@ extern struct workqueue_struct *mm_percpu_wq;
void try_to_unmap_flush(void);
void try_to_unmap_flush_dirty(void);
void flush_tlb_batched_pending(struct mm_struct *mm);
+void fold_batch(struct tlbflush_unmap_batch *dst, struct tlbflush_unmap_batch *src, bool reset);
+void fold_luf_batch(struct luf_batch *dst, struct luf_batch *src);
#else
static inline void try_to_unmap_flush(void)
{
@@ -1250,6 +1252,12 @@ static inline void try_to_unmap_flush_dirty(void)
static inline void flush_tlb_batched_pending(struct mm_struct *mm)
{
}
+static inline void fold_batch(struct tlbflush_unmap_batch *dst, struct tlbflush_unmap_batch *src, bool reset)
+{
+}
+static inline void fold_luf_batch(struct luf_batch *dst, struct luf_batch *src)
+{
+}
#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
extern const struct trace_print_flags pageflag_names[];
@@ -641,7 +641,7 @@ struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio,
* function, ugen_before(), should be used to evaluate the temporal
* sequence of events because the number is designed to wraparound.
*/
-static atomic_long_t __maybe_unused luf_ugen = ATOMIC_LONG_INIT(LUF_UGEN_INIT);
+static atomic_long_t luf_ugen = ATOMIC_LONG_INIT(LUF_UGEN_INIT);
/*
* Don't return invalid luf_ugen, zero.
@@ -656,6 +656,122 @@ static unsigned long __maybe_unused new_luf_ugen(void)
return ugen;
}
+static void reset_batch(struct tlbflush_unmap_batch *batch)
+{
+ arch_tlbbatch_clear(&batch->arch);
+ batch->flush_required = false;
+ batch->writable = false;
+}
+
+void fold_batch(struct tlbflush_unmap_batch *dst,
+ struct tlbflush_unmap_batch *src, bool reset)
+{
+ if (!src->flush_required)
+ return;
+
+ /*
+ * Fold src to dst.
+ */
+ arch_tlbbatch_fold(&dst->arch, &src->arch);
+ dst->writable = dst->writable || src->writable;
+ dst->flush_required = true;
+
+ if (!reset)
+ return;
+
+ /*
+ * Reset src.
+ */
+ reset_batch(src);
+}
+
+/*
+ * The range that luf_key covers, which is 'unsigned short' type.
+ */
+#define NR_LUF_BATCH (1 << (sizeof(short) * 8))
+
+/*
+ * Use 0th entry as accumulated batch.
+ */
+static struct luf_batch luf_batch[NR_LUF_BATCH];
+
+static void luf_batch_init(struct luf_batch *lb)
+{
+ rwlock_init(&lb->lock);
+ reset_batch(&lb->batch);
+ lb->ugen = atomic_long_read(&luf_ugen) - 1;
+}
+
+static int __init luf_init(void)
+{
+ int i;
+
+ for (i = 0; i < NR_LUF_BATCH; i++)
+ luf_batch_init(&luf_batch[i]);
+
+ return 0;
+}
+early_initcall(luf_init);
+
+/*
+ * key to point an entry of the luf_batch array
+ *
+ * note: zero means invalid key
+ */
+static atomic_t luf_kgen = ATOMIC_INIT(1);
+
+/*
+ * Don't return invalid luf_key, zero.
+ */
+static unsigned short __maybe_unused new_luf_key(void)
+{
+ unsigned short luf_key = atomic_inc_return(&luf_kgen);
+
+ if (!luf_key)
+ luf_key = atomic_inc_return(&luf_kgen);
+
+ return luf_key;
+}
+
+static void __fold_luf_batch(struct luf_batch *dst_lb,
+ struct tlbflush_unmap_batch *src_batch,
+ unsigned long src_ugen)
+{
+ /*
+ * dst_lb->ugen represents one that requires tlb shootdown for
+ * it, that is, sort of request number. The newer it is, the
+ * more tlb shootdown might be needed to fulfill the newer
+ * request. Conservertively keep the newer one.
+ */
+ if (!dst_lb->ugen || ugen_before(dst_lb->ugen, src_ugen))
+ dst_lb->ugen = src_ugen;
+ fold_batch(&dst_lb->batch, src_batch, false);
+}
+
+void fold_luf_batch(struct luf_batch *dst, struct luf_batch *src)
+{
+ unsigned long flags;
+
+ /*
+ * Exactly same. Nothing to fold.
+ */
+ if (dst == src)
+ return;
+
+ if (&src->lock < &dst->lock) {
+ read_lock_irqsave(&src->lock, flags);
+ write_lock(&dst->lock);
+ } else {
+ write_lock_irqsave(&dst->lock, flags);
+ read_lock(&src->lock);
+ }
+
+ __fold_luf_batch(dst, &src->batch, src->ugen);
+
+ write_unlock(&dst->lock);
+ read_unlock_irqrestore(&src->lock, flags);
+}
+
/*
* Flush TLB entries for recently unmapped pages from remote CPUs. It is
* important if a PTE was dirty when it was unmapped that it's flushed
@@ -670,9 +786,7 @@ void try_to_unmap_flush(void)
return;
arch_tlbbatch_flush(&tlb_ubc->arch);
- arch_tlbbatch_clear(&tlb_ubc->arch);
- tlb_ubc->flush_required = false;
- tlb_ubc->writable = false;
+ reset_batch(tlb_ubc);
}
/* Flush iff there are potentially writable TLB entries that can race with IO */
Functionally, no change. This is a preparation for luf mechanism that needs to keep luf meta data per page while staying in pcp or buddy allocator. The meta data includes cpumask for tlb shootdown and luf's request generation number. Since struct page doesn't have enough room to store luf meta data, this patch introduces a hash table to store them and makes each page keep its hash key instead. Since all the pages in pcp or buddy share the hash table, confliction is inevitable so care must be taken when reading or updating its entry. Signed-off-by: Byungchul Park <byungchul@sk.com> --- include/linux/mm_types.h | 10 ++++ mm/internal.h | 8 +++ mm/rmap.c | 122 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 136 insertions(+), 4 deletions(-)