@@ -552,7 +552,7 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm
/*
* Don't leave stale tlb entries for this vma.
*/
- luf_flush(0);
+ luf_flush_vma(vma);
if (tlb->fullmm)
return;
@@ -38,8 +38,10 @@ struct luf_batch {
unsigned long ugen;
rwlock_t lock;
};
+void luf_batch_init(struct luf_batch *lb);
#else
struct luf_batch {};
+static inline void luf_batch_init(struct luf_batch *lb) {}
#endif
/*
@@ -1022,6 +1024,9 @@ struct mm_struct {
* moving a PROT_NONE mapped page.
*/
atomic_t tlb_flush_pending;
+
+ /* luf batch for this mm */
+ struct luf_batch luf_batch;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
/* See flush_tlb_batched_pending() */
atomic_t tlb_flush_batched;
@@ -1272,8 +1277,12 @@ extern void tlb_finish_mmu(struct mmu_gather *tlb);
#if defined(CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH)
void luf_flush(unsigned short luf_key);
+void luf_flush_mm(struct mm_struct *mm);
+void luf_flush_vma(struct vm_area_struct *vma);
#else
static inline void luf_flush(unsigned short luf_key) {}
+static inline void luf_flush_mm(struct mm_struct *mm) {}
+static inline void luf_flush_vma(struct vm_area_struct *vma) {}
#endif
struct vm_fault;
@@ -1268,6 +1268,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
spin_lock_init(&mm->arg_lock);
+ luf_batch_init(&mm->luf_batch);
mm_init_cpumask(mm);
mm_init_aio(mm);
mm_init_owner(mm, p);
@@ -5225,7 +5225,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
if (mm) {
membarrier_mm_sync_core_before_usermode(mm);
mmdrop_lazy_tlb_sched(mm);
- luf_flush(0);
+ luf_flush_mm(mm);
}
if (unlikely(prev_state == TASK_DEAD)) {
@@ -6081,6 +6081,7 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
struct mm_struct *mm = vma->vm_mm;
vm_fault_t ret;
bool is_droppable;
+ struct address_space *mapping = NULL;
bool flush = false;
__set_current_state(TASK_RUNNING);
@@ -6112,9 +6113,17 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
* should be considered.
*/
if (vma->vm_flags & (VM_WRITE | VM_MAYWRITE) ||
- flags & FAULT_FLAG_WRITE)
+ flags & FAULT_FLAG_WRITE) {
flush = true;
+ /*
+ * Doesn't care the !VM_SHARED cases because it won't
+ * update the pages that might be shared with others.
+ */
+ if (vma->vm_flags & VM_SHARED && vma->vm_file)
+ mapping = vma->vm_file->f_mapping;
+ }
+
if (unlikely(is_vm_hugetlb_page(vma)))
ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
else
@@ -6149,8 +6158,15 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
/*
* Ensure to clean stale tlb entries for this vma.
*/
- if (flush)
- luf_flush(0);
+ if (flush) {
+ /*
+ * If it has a VM_SHARED mapping, all the mms involved
+ * should be luf_flush'ed.
+ */
+ if (mapping)
+ luf_flush(0);
+ luf_flush_mm(mm);
+ }
return ret;
}
@@ -100,7 +100,7 @@ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address,
if (pte_accessible(mm, pte))
flush_tlb_page(vma, address);
else
- luf_flush(0);
+ luf_flush_vma(vma);
return pte;
}
#endif
@@ -695,7 +695,7 @@ void fold_batch(struct tlbflush_unmap_batch *dst,
*/
struct luf_batch luf_batch[NR_LUF_BATCH];
-static void luf_batch_init(struct luf_batch *lb)
+void luf_batch_init(struct luf_batch *lb)
{
rwlock_init(&lb->lock);
reset_batch(&lb->batch);
@@ -778,6 +778,31 @@ void fold_luf_batch(struct luf_batch *dst, struct luf_batch *src)
read_unlock_irqrestore(&src->lock, flags);
}
+static void fold_luf_batch_mm(struct luf_batch *dst,
+ struct mm_struct *mm)
+{
+ unsigned long flags;
+ bool need_fold = false;
+
+ read_lock_irqsave(&dst->lock, flags);
+ if (arch_tlbbatch_need_fold(&dst->batch.arch, mm))
+ need_fold = true;
+ read_unlock(&dst->lock);
+
+ write_lock(&dst->lock);
+ if (unlikely(need_fold))
+ arch_tlbbatch_add_pending(&dst->batch.arch, mm, 0);
+
+ /*
+ * dst->ugen represents sort of request for tlb shootdown. The
+ * newer it is, the more tlb shootdown might be needed to
+ * fulfill the newer request. Keep the newest one not to miss
+ * necessary tlb shootdown.
+ */
+ dst->ugen = new_luf_ugen();
+ write_unlock_irqrestore(&dst->lock, flags);
+}
+
static unsigned long tlb_flush_start(void)
{
/*
@@ -894,6 +919,49 @@ void luf_flush(unsigned short luf_key)
}
EXPORT_SYMBOL(luf_flush);
+void luf_flush_vma(struct vm_area_struct *vma)
+{
+ struct mm_struct *mm;
+ struct address_space *mapping = NULL;
+
+ if (!vma)
+ return;
+
+ mm = vma->vm_mm;
+ /*
+ * Doesn't care the !VM_SHARED cases because it won't
+ * update the pages that might be shared with others.
+ */
+ if (vma->vm_flags & VM_SHARED && vma->vm_file)
+ mapping = vma->vm_file->f_mapping;
+
+ if (mapping)
+ luf_flush(0);
+ luf_flush_mm(mm);
+}
+
+void luf_flush_mm(struct mm_struct *mm)
+{
+ struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
+ struct luf_batch *lb;
+ unsigned long flags;
+ unsigned long lb_ugen;
+
+ if (!mm)
+ return;
+
+ lb = &mm->luf_batch;
+ read_lock_irqsave(&lb->lock, flags);
+ fold_batch(tlb_ubc, &lb->batch, false);
+ lb_ugen = lb->ugen;
+ read_unlock_irqrestore(&lb->lock, flags);
+
+ if (arch_tlbbatch_diet(&tlb_ubc->arch, lb_ugen))
+ return;
+
+ try_to_unmap_flush();
+}
+
/*
* Flush TLB entries for recently unmapped pages from remote CPUs. It is
* important if a PTE was dirty when it was unmapped that it's flushed
@@ -962,8 +1030,10 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval,
if (!can_luf_test())
tlb_ubc = ¤t->tlb_ubc;
- else
+ else {
tlb_ubc = ¤t->tlb_ubc_ro;
+ fold_luf_batch_mm(&mm->luf_batch, mm);
+ }
arch_tlbbatch_add_pending(&tlb_ubc->arch, mm, uaddr);
tlb_ubc->flush_required = true;
Fault hander performs tlb flush pended by luf when a new pte becomes to have write permission, no matter whether tlb flush required has been performed or not. By storing luf generation number, luf_ugen, in struct mm_struct, we can skip unnecessary tlb flush. Signed-off-by: Byungchul Park <byungchul@sk.com> --- include/asm-generic/tlb.h | 2 +- include/linux/mm_types.h | 9 +++++ kernel/fork.c | 1 + kernel/sched/core.c | 2 +- mm/memory.c | 22 ++++++++++-- mm/pgtable-generic.c | 2 +- mm/rmap.c | 74 +++++++++++++++++++++++++++++++++++++-- 7 files changed, 104 insertions(+), 8 deletions(-)