@@ -25,7 +25,8 @@
void __tlb_remove_table(void *_table);
static inline void tlb_flush(struct mmu_gather *tlb);
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size);
+ struct page *page, int page_size,
+ unsigned int flags);
#define tlb_flush tlb_flush
#define pte_free_tlb pte_free_tlb
@@ -36,13 +37,24 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
#include <asm/tlbflush.h>
#include <asm-generic/tlb.h>
+/*
+ * s390 never needs to delay page_remove_rmap, because
+ * the ptep_get_and_clear_full() will have flushed the
+ * TLB across CPUs
+ */
+static inline bool tlb_delay_rmap(struct mmu_gather *tlb)
+{
+ return false;
+}
+
/*
* Release the page cache reference for a pte removed by
* tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
* has already been freed, so just do free_page_and_swap_cache.
*/
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
+ struct page *page, int page_size,
+ unsigned int flags)
{
free_page_and_swap_cache(page);
return false;
@@ -53,6 +65,11 @@ static inline void tlb_flush(struct mmu_gather *tlb)
__tlb_flush_mm_lazy(tlb->mm);
}
+static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+ /* Nothing to do, s390 does not delay rmaps */
+}
+
/*
* pte_free_tlb frees a pte table and clears the CRSTE for the
* page table from the tlb.
@@ -257,7 +257,15 @@ struct mmu_gather_batch {
#define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH)
extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
- int page_size);
+ int page_size, unsigned int flags);
+extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma);
+
+/*
+ * This both sets 'delayed_rmap', and returns true. It would be an inline
+ * function, except we define it before the 'struct mmu_gather'.
+ */
+#define tlb_delay_rmap(tlb) (((tlb)->delayed_rmap = 1), true)
+
#endif
/*
@@ -290,6 +298,11 @@ struct mmu_gather {
*/
unsigned int freed_tables : 1;
+ /*
+ * Do we have pending delayed rmap removals?
+ */
+ unsigned int delayed_rmap : 1;
+
/*
* at which levels have we cleared entries?
*/
@@ -431,13 +444,13 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
static inline void tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, int page_size)
{
- if (__tlb_remove_page_size(tlb, page, page_size))
+ if (__tlb_remove_page_size(tlb, page, page_size, 0))
tlb_flush_mmu(tlb);
}
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page, unsigned int flags)
{
- return __tlb_remove_page_size(tlb, page, PAGE_SIZE);
+ return __tlb_remove_page_size(tlb, page, PAGE_SIZE, flags);
}
/* tlb_remove_page
@@ -1432,6 +1432,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
break;
if (pte_present(ptent)) {
+ unsigned int delay_rmap;
+
page = vm_normal_page(vma, addr, ptent);
if (unlikely(!should_zap_page(details, page)))
continue;
@@ -1443,20 +1445,26 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
if (unlikely(!page))
continue;
+ delay_rmap = 0;
if (!PageAnon(page)) {
if (pte_dirty(ptent)) {
- force_flush = 1;
set_page_dirty(page);
+ if (tlb_delay_rmap(tlb)) {
+ delay_rmap = 1;
+ force_flush = 1;
+ }
}
if (pte_young(ptent) &&
likely(!(vma->vm_flags & VM_SEQ_READ)))
mark_page_accessed(page);
}
rss[mm_counter(page)]--;
- page_remove_rmap(page, vma, false);
- if (unlikely(page_mapcount(page) < 0))
- print_bad_pte(vma, addr, ptent, page);
- if (unlikely(__tlb_remove_page(tlb, page))) {
+ if (!delay_rmap) {
+ page_remove_rmap(page, vma, false);
+ if (unlikely(page_mapcount(page) < 0))
+ print_bad_pte(vma, addr, ptent, page);
+ }
+ if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
force_flush = 1;
addr += PAGE_SIZE;
break;
@@ -1513,8 +1521,11 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
arch_leave_lazy_mmu_mode();
/* Do the actual TLB flush before dropping ptl */
- if (force_flush)
+ if (force_flush) {
tlb_flush_mmu_tlbonly(tlb);
+ if (tlb->delayed_rmap)
+ tlb_flush_rmaps(tlb, vma);
+ }
pte_unmap_unlock(start_pte, ptl);
/*
@@ -9,6 +9,7 @@
#include <linux/rcupdate.h>
#include <linux/smp.h>
#include <linux/swap.h>
+#include <linux/rmap.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
@@ -19,6 +20,10 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
{
struct mmu_gather_batch *batch;
+ /* No more batching if we have delayed rmaps pending */
+ if (tlb->delayed_rmap)
+ return false;
+
batch = tlb->active;
if (batch->next) {
tlb->active = batch->next;
@@ -43,6 +48,31 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
return true;
}
+/**
+ * tlb_flush_rmaps - do pending rmap removals after we have flushed the TLB
+ * @tlb: the current mmu_gather
+ *
+ * Note that because of how tlb_next_batch() above works, we will
+ * never start new batches with pending delayed rmaps, so we only
+ * need to walk through the current active batch.
+ */
+void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+ struct mmu_gather_batch *batch;
+
+ batch = tlb->active;
+ for (int i = 0; i < batch->nr; i++) {
+ struct encoded_page *enc = batch->encoded_pages[i];
+
+ if (encoded_page_flags(enc)) {
+ struct page *page = encoded_page_ptr(enc);
+ page_remove_rmap(page, vma, false);
+ }
+ }
+
+ tlb->delayed_rmap = 0;
+}
+
static void tlb_batch_pages_flush(struct mmu_gather *tlb)
{
struct mmu_gather_batch *batch;
@@ -77,7 +107,7 @@ static void tlb_batch_list_free(struct mmu_gather *tlb)
tlb->local.next = NULL;
}
-bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
+bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size, unsigned int flags)
{
struct mmu_gather_batch *batch;
@@ -92,7 +122,7 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
* Add the page and check if we are full. If so
* force a flush.
*/
- batch->encoded_pages[batch->nr++] = encode_page(page, 0);
+ batch->encoded_pages[batch->nr++] = encode_page(page, flags);
if (batch->nr == batch->max) {
if (!tlb_next_batch(tlb))
return true;
@@ -286,6 +316,7 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
tlb->active = &tlb->local;
tlb->batch_count = 0;
#endif
+ tlb->delayed_rmap = 0;
tlb_table_init(tlb);
#ifdef CONFIG_MMU_GATHER_PAGE_SIZE