diff mbox series

[2/3] mm: Support tlbbatch flush for a range of PTEs

Message ID 20250106031711.82855-3-21cnbao@gmail.com (mailing list archive)
State New
Headers show
Series mm: batched unmap lazyfree large folios during reclamation | expand

Commit Message

Barry Song Jan. 6, 2025, 3:17 a.m. UTC
From: Barry Song <v-songbaohua@oppo.com>

This is a preparatory patch to support batch PTE unmapping in
`try_to_unmap_one`. It first introduces range handling for
`tlbbatch` flush. Currently, the range is always set to the size of
PAGE_SIZE.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Shaoqin Huang <shahuang@redhat.com>
Cc: Gavin Shan <gshan@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
---
 arch/arm64/include/asm/tlbflush.h | 26 ++++++++++++++------------
 arch/arm64/mm/contpte.c           |  2 +-
 arch/x86/include/asm/tlbflush.h   |  3 ++-
 mm/rmap.c                         | 12 +++++++-----
 4 files changed, 24 insertions(+), 19 deletions(-)

Comments

kernel test robot Jan. 6, 2025, 8:22 a.m. UTC | #1
Hi Barry,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Barry-Song/mm-set-folio-swapbacked-iff-folios-are-dirty-in-try_to_unmap_one/20250106-112638
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20250106031711.82855-3-21cnbao%40gmail.com
patch subject: [PATCH 2/3] mm: Support tlbbatch flush for a range of PTEs
config: i386-buildonly-randconfig-002-20250106 (https://download.01.org/0day-ci/archive/20250106/202501061535.zx9E486H-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250106/202501061535.zx9E486H-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501061535.zx9E486H-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from arch/x86/include/asm/uaccess.h:17,
                    from include/linux/uaccess.h:12,
                    from include/linux/sched/task.h:13,
                    from include/linux/sched/signal.h:9,
                    from include/linux/rcuwait.h:6,
                    from include/linux/percpu-rwsem.h:7,
                    from include/linux/fs.h:33,
                    from include/linux/cgroup.h:17,
                    from include/linux/memcontrol.h:13,
                    from include/linux/swap.h:9,
                    from include/linux/suspend.h:5,
                    from arch/x86/kernel/asm-offsets.c:14:
>> arch/x86/include/asm/tlbflush.h:283:46: error: unknown type name 'unsignd'; did you mean 'unsigned'?
     283 |                                              unsignd long size)
         |                                              ^~~~~~~
         |                                              unsigned
   make[3]: *** [scripts/Makefile.build:102: arch/x86/kernel/asm-offsets.s] Error 1 shuffle=998720002
   make[3]: Target 'prepare' not remade because of errors.
   make[2]: *** [Makefile:1263: prepare0] Error 2 shuffle=998720002
   make[2]: Target 'prepare' not remade because of errors.
   make[1]: *** [Makefile:251: __sub-make] Error 2 shuffle=998720002
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:251: __sub-make] Error 2 shuffle=998720002
   make: Target 'prepare' not remade because of errors.


vim +283 arch/x86/include/asm/tlbflush.h

   279	
   280	static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
   281						     struct mm_struct *mm,
   282						     unsigned long uaddr,
 > 283						     unsignd long size)
   284	{
   285		inc_mm_tlb_gen(mm);
   286		cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
   287		mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
   288	}
   289
kernel test robot Jan. 6, 2025, 10:07 a.m. UTC | #2
Hi Barry,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Barry-Song/mm-set-folio-swapbacked-iff-folios-are-dirty-in-try_to_unmap_one/20250106-112638
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20250106031711.82855-3-21cnbao%40gmail.com
patch subject: [PATCH 2/3] mm: Support tlbbatch flush for a range of PTEs
config: riscv-randconfig-001-20250106 (https://download.01.org/0day-ci/archive/20250106/202501061736.FoHcInHJ-lkp@intel.com/config)
compiler: riscv64-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250106/202501061736.FoHcInHJ-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202501061736.FoHcInHJ-lkp@intel.com/

All errors (new ones prefixed by >>):

   mm/rmap.c: In function 'set_tlb_ubc_flush_pending':
>> mm/rmap.c:685:9: error: too many arguments to function 'arch_tlbbatch_add_pending'
     685 |         arch_tlbbatch_add_pending(&tlb_ubc->arch, mm, uaddr, size);
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~
   In file included from arch/riscv/include/asm/pgtable.h:113,
                    from include/linux/pgtable.h:6,
                    from include/linux/mm.h:30,
                    from mm/rmap.c:55:
   arch/riscv/include/asm/tlbflush.h:62:6: note: declared here
      62 | void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
         |      ^~~~~~~~~~~~~~~~~~~~~~~~~


vim +/arch_tlbbatch_add_pending +685 mm/rmap.c

   663	
   664	/*
   665	 * Bits 0-14 of mm->tlb_flush_batched record pending generations.
   666	 * Bits 16-30 of mm->tlb_flush_batched bit record flushed generations.
   667	 */
   668	#define TLB_FLUSH_BATCH_FLUSHED_SHIFT	16
   669	#define TLB_FLUSH_BATCH_PENDING_MASK			\
   670		((1 << (TLB_FLUSH_BATCH_FLUSHED_SHIFT - 1)) - 1)
   671	#define TLB_FLUSH_BATCH_PENDING_LARGE			\
   672		(TLB_FLUSH_BATCH_PENDING_MASK / 2)
   673	
   674	static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval,
   675					      unsigned long uaddr,
   676					      unsigned long size)
   677	{
   678		struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
   679		int batch;
   680		bool writable = pte_dirty(pteval);
   681	
   682		if (!pte_accessible(mm, pteval))
   683			return;
   684	
 > 685		arch_tlbbatch_add_pending(&tlb_ubc->arch, mm, uaddr, size);
   686		tlb_ubc->flush_required = true;
   687	
   688		/*
   689		 * Ensure compiler does not re-order the setting of tlb_flush_batched
   690		 * before the PTE is cleared.
   691		 */
   692		barrier();
   693		batch = atomic_read(&mm->tlb_flush_batched);
   694	retry:
   695		if ((batch & TLB_FLUSH_BATCH_PENDING_MASK) > TLB_FLUSH_BATCH_PENDING_LARGE) {
   696			/*
   697			 * Prevent `pending' from catching up with `flushed' because of
   698			 * overflow.  Reset `pending' and `flushed' to be 1 and 0 if
   699			 * `pending' becomes large.
   700			 */
   701			if (!atomic_try_cmpxchg(&mm->tlb_flush_batched, &batch, 1))
   702				goto retry;
   703		} else {
   704			atomic_inc(&mm->tlb_flush_batched);
   705		}
   706	
   707		/*
   708		 * If the PTE was dirty then it's best to assume it's writable. The
   709		 * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
   710		 * before the page is queued for IO.
   711		 */
   712		if (writable)
   713			tlb_ubc->writable = true;
   714	}
   715
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bc94e036a26b..f34e4fab5aa2 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -322,13 +322,6 @@  static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
 	return true;
 }
 
-static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
-					     struct mm_struct *mm,
-					     unsigned long uaddr)
-{
-	__flush_tlb_page_nosync(mm, uaddr);
-}
-
 /*
  * If mprotect/munmap/etc occurs during TLB batched flushing, we need to
  * synchronise all the TLBI issued with a DSB to avoid the race mentioned in
@@ -448,7 +441,7 @@  static inline bool __flush_tlb_range_limit_excess(unsigned long start,
 	return false;
 }
 
-static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
+static inline void __flush_tlb_range_nosync(struct mm_struct *mm,
 				     unsigned long start, unsigned long end,
 				     unsigned long stride, bool last_level,
 				     int tlb_level)
@@ -460,12 +453,12 @@  static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
 	pages = (end - start) >> PAGE_SHIFT;
 
 	if (__flush_tlb_range_limit_excess(start, end, pages, stride)) {
-		flush_tlb_mm(vma->vm_mm);
+		flush_tlb_mm(mm);
 		return;
 	}
 
 	dsb(ishst);
-	asid = ASID(vma->vm_mm);
+	asid = ASID(mm);
 
 	if (last_level)
 		__flush_tlb_range_op(vale1is, start, pages, stride, asid,
@@ -474,7 +467,7 @@  static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
 		__flush_tlb_range_op(vae1is, start, pages, stride, asid,
 				     tlb_level, true, lpa2_is_enabled());
 
-	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
+	mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
 }
 
 static inline void __flush_tlb_range(struct vm_area_struct *vma,
@@ -482,7 +475,7 @@  static inline void __flush_tlb_range(struct vm_area_struct *vma,
 				     unsigned long stride, bool last_level,
 				     int tlb_level)
 {
-	__flush_tlb_range_nosync(vma, start, end, stride,
+	__flush_tlb_range_nosync(vma->vm_mm, start, end, stride,
 				 last_level, tlb_level);
 	dsb(ish);
 }
@@ -533,6 +526,15 @@  static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
 	dsb(ish);
 	isb();
 }
+
+static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+					     struct mm_struct *mm,
+					     unsigned long uaddr,
+					     unsigned long size)
+{
+	__flush_tlb_range_nosync(mm, uaddr, uaddr + size,
+				 PAGE_SIZE, true, 3);
+}
 #endif
 
 #endif
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index 55107d27d3f8..bcac4f55f9c1 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -335,7 +335,7 @@  int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
 		 * eliding the trailing DSB applies here.
 		 */
 		addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
-		__flush_tlb_range_nosync(vma, addr, addr + CONT_PTE_SIZE,
+		__flush_tlb_range_nosync(vma->vm_mm, addr, addr + CONT_PTE_SIZE,
 					 PAGE_SIZE, true, 3);
 	}
 
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 69e79fff41b8..cda35f53f544 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -279,7 +279,8 @@  static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
 
 static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
 					     struct mm_struct *mm,
-					     unsigned long uaddr)
+					     unsigned long uaddr,
+					     unsignd long size)
 {
 	inc_mm_tlb_gen(mm);
 	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
diff --git a/mm/rmap.c b/mm/rmap.c
index de6b8c34e98c..365112af5291 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -672,7 +672,8 @@  void try_to_unmap_flush_dirty(void)
 	(TLB_FLUSH_BATCH_PENDING_MASK / 2)
 
 static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval,
-				      unsigned long uaddr)
+				      unsigned long uaddr,
+				      unsigned long size)
 {
 	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
 	int batch;
@@ -681,7 +682,7 @@  static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval,
 	if (!pte_accessible(mm, pteval))
 		return;
 
-	arch_tlbbatch_add_pending(&tlb_ubc->arch, mm, uaddr);
+	arch_tlbbatch_add_pending(&tlb_ubc->arch, mm, uaddr, size);
 	tlb_ubc->flush_required = true;
 
 	/*
@@ -757,7 +758,8 @@  void flush_tlb_batched_pending(struct mm_struct *mm)
 }
 #else
 static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval,
-				      unsigned long uaddr)
+				      unsigned long uaddr,
+				      unsigned long size)
 {
 }
 
@@ -1792,7 +1794,7 @@  static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 				 */
 				pteval = ptep_get_and_clear(mm, address, pvmw.pte);
 
-				set_tlb_ubc_flush_pending(mm, pteval, address);
+				set_tlb_ubc_flush_pending(mm, pteval, address, PAGE_SIZE);
 			} else {
 				pteval = ptep_clear_flush(vma, address, pvmw.pte);
 			}
@@ -2164,7 +2166,7 @@  static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 				 */
 				pteval = ptep_get_and_clear(mm, address, pvmw.pte);
 
-				set_tlb_ubc_flush_pending(mm, pteval, address);
+				set_tlb_ubc_flush_pending(mm, pteval, address, PAGE_SIZE);
 			} else {
 				pteval = ptep_clear_flush(vma, address, pvmw.pte);
 			}