@@ -75,6 +75,10 @@
* boolean indicating if the queue is (now) full and a call to
* tlb_flush_mmu() is required.
*
+ * tlb_reserve_space() attempts to preallocate space for nr pages and returns
+ * the minimum garanteed number of pages that can be queued without overflow,
+ * which may be more or less than requested.
+ *
* tlb_remove_page() and tlb_remove_page_size() imply the call to
* tlb_flush_mmu() when required and has no return value.
*
@@ -263,6 +267,7 @@ struct mmu_gather_batch {
extern bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct encoded_page *page,
int page_size);
+extern unsigned int tlb_reserve_space(struct mmu_gather *tlb, unsigned int nr);
#ifdef CONFIG_SMP
/*
@@ -273,6 +278,12 @@ extern bool __tlb_remove_page_size(struct mmu_gather *tlb,
extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma);
#endif
+#else
+static inline unsigned int tlb_reserve_space(struct mmu_gather *tlb,
+ unsigned int nr)
+{
+ return 1;
+}
#endif
/*
@@ -601,6 +601,49 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
}
#endif
+#ifndef clear_ptes
+struct mm_struct;
+/**
+ * clear_ptes - Clear a consecutive range of ptes and return the previous value.
+ * @mm: Address space that the ptes map.
+ * @address: Address corresponding to the first pte to clear.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of ptes to clear.
+ * @full: True if systematically clearing all ptes for the address space.
+ *
+ * A batched version of ptep_get_and_clear_full(), which returns the old pte
+ * value for the first pte in the range, but with young and/or dirty set if any
+ * of the ptes in the range were young or dirty.
+ *
+ * May be overridden by the architecture, else implemented as a loop over
+ * ptep_get_and_clear_full().
+ *
+ * Context: The caller holds the page table lock. The PTEs are all in the same
+ * PMD.
+ */
+static inline pte_t clear_ptes(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep,
+ unsigned int nr, int full)
+{
+ unsigned int i;
+ pte_t pte;
+ pte_t orig_pte = ptep_get_and_clear_full(mm, address, ptep, full);
+
+ for (i = 1; i < nr; i++) {
+ address += PAGE_SIZE;
+ ptep++;
+ pte = ptep_get_and_clear_full(mm, address, ptep, full);
+
+ if (pte_dirty(pte))
+ orig_pte = pte_mkdirty(orig_pte);
+
+ if (pte_young(pte))
+ orig_pte = pte_mkyoung(orig_pte);
+ }
+
+ return orig_pte;
+}
+#endif
/*
* If two threads concurrently fault at the same page, the thread that
@@ -1447,6 +1447,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
pte_t *start_pte;
pte_t *pte;
swp_entry_t entry;
+ int nr;
tlb_change_page_size(tlb, PAGE_SIZE);
init_rss_vec(rss);
@@ -1459,6 +1460,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
do {
pte_t ptent = ptep_get(pte);
struct page *page;
+ int i;
+
+ nr = 1;
if (pte_none(ptent))
continue;
@@ -1467,43 +1471,67 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
break;
if (pte_present(ptent)) {
- unsigned int delay_rmap;
+ unsigned int delay_rmap = 0;
+ struct folio *folio;
+ bool full = false;
+
+ /*
+ * tlb_gather always has at least one slot so avoid call
+ * to tlb_reserve_space() when pte_batch_remaining() is
+ * a compile-time constant 1 (default).
+ */
+ nr = pte_batch_remaining(ptent, addr, end);
+ if (unlikely(nr > 1))
+ nr = min_t(int, nr, tlb_reserve_space(tlb, nr));
page = vm_normal_page(vma, addr, ptent);
if (unlikely(!should_zap_page(details, page)))
continue;
- ptent = ptep_get_and_clear_full(mm, addr, pte,
- tlb->fullmm);
+ ptent = clear_ptes(mm, addr, pte, nr, tlb->fullmm);
arch_check_zapped_pte(vma, ptent);
- tlb_remove_tlb_entry(tlb, pte, addr);
- zap_install_uffd_wp_if_needed(vma, addr, pte, details,
- ptent);
+
+ for (i = 0; i < nr; i++) {
+ unsigned long subaddr = addr + PAGE_SIZE * i;
+
+ tlb_remove_tlb_entry(tlb, &pte[i], subaddr);
+ zap_install_uffd_wp_if_needed(vma, subaddr,
+ &pte[i], details, ptent);
+ }
if (unlikely(!page)) {
ksm_might_unmap_zero_page(mm, ptent);
continue;
}
- delay_rmap = 0;
- if (!PageAnon(page)) {
+ folio = page_folio(page);
+ if (!folio_test_anon(folio)) {
if (pte_dirty(ptent)) {
- set_page_dirty(page);
+ folio_mark_dirty(folio);
if (tlb_delay_rmap(tlb)) {
delay_rmap = 1;
force_flush = 1;
}
}
if (pte_young(ptent) && likely(vma_has_recency(vma)))
- mark_page_accessed(page);
+ folio_mark_accessed(folio);
}
- rss[mm_counter(page)]--;
- if (!delay_rmap) {
- page_remove_rmap(page, vma, false);
- if (unlikely(page_mapcount(page) < 0))
- print_bad_pte(vma, addr, ptent, page);
+ rss[mm_counter(page)] -= nr;
+ for (i = 0; i < nr; i++, page++) {
+ if (!delay_rmap) {
+ page_remove_rmap(page, vma, false);
+ if (unlikely(page_mapcount(page) < 0))
+ print_bad_pte(vma, addr, ptent, page);
+ }
+
+ /*
+ * nr calculated based on available space, so
+ * can only be full on final iteration.
+ */
+ VM_WARN_ON(full);
+ full = __tlb_remove_page(tlb, page, delay_rmap);
}
- if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
+ if (unlikely(full)) {
force_flush = 1;
- addr += PAGE_SIZE;
+ addr += PAGE_SIZE * nr;
break;
}
continue;
@@ -1557,7 +1585,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
}
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent);
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ } while (pte += nr, addr += PAGE_SIZE * nr, addr != end);
add_mm_rss_vec(mm, rss);
arch_leave_lazy_mmu_mode();
@@ -47,6 +47,21 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
return true;
}
+unsigned int tlb_reserve_space(struct mmu_gather *tlb, unsigned int nr)
+{
+ struct mmu_gather_batch *batch = tlb->active;
+ unsigned int nr_alloc = batch->max - batch->nr;
+
+ while (nr_alloc < nr) {
+ if (!tlb_next_batch(tlb))
+ break;
+ nr_alloc += tlb->active->max;
+ }
+
+ tlb->active = batch;
+ return nr_alloc;
+}
+
#ifdef CONFIG_SMP
static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_struct *vma)
{