diff mbox series

[RFC,5/7] x86: mm: free page table pages by RCU instead of semi RCU

Message ID 1a27215790293face83242cfd703e910aa0c5ce8.1719570849.git.zhengqi.arch@bytedance.com (mailing list archive)
State New
Headers show
Series synchronously scan and reclaim empty user PTE pages | expand

Commit Message

Qi Zheng July 1, 2024, 8:46 a.m. UTC
Now, if CONFIG_MMU_GATHER_RCU_TABLE_FREE is selected, the page table pages
will be freed by semi RCU, that is:

 - batch table freeing: asynchronous free by RCU
 - single table freeing: IPI + synchronous free

In this way, the page table can be lockless traversed by disabling IRQ in
paths such as fast GUP. But this is not enough to free the empty PTE page
table pages in paths other that munmap and exit_mmap path, because IPI
cannot be synchronized with rcu_read_lock() in pte_offset_map{_lock}().

In preparation for supporting empty PTE page table pages reclaimation,
let single table also be freed by RCU like batch table freeing. Then we
can also use pte_offset_map() etc to prevent PTE page from being freed.

Like pte_free_defer(), we can also safely use ptdesc->pt_rcu_head to free
the page table pages:

 - The pt_rcu_head is unioned with pt_list and pmd_huge_pte.

 - For pt_list, it is used to manage the PGD page in x86. Fortunately
   tlb_remove_table() will not be used for free PGD pages, so it is safe
   to use pt_rcu_head.

 - For pmd_huge_pte, we will do zap_deposited_table() before freeing the
   PMD page, so it is also safe.

Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 arch/x86/include/asm/tlb.h | 23 +++++++++++++++++++++++
 arch/x86/kernel/paravirt.c |  7 +++++++
 arch/x86/mm/pgtable.c      |  2 +-
 mm/mmu_gather.c            |  2 +-
 4 files changed, 32 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 580636cdc257..9182db1e0264 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -34,4 +34,27 @@  static inline void __tlb_remove_table(void *table)
 	free_page_and_swap_cache(table);
 }
 
+#ifndef CONFIG_PT_RECLAIM
+static inline void __tlb_remove_table_one(void *table)
+{
+	free_page_and_swap_cache(table);
+}
+#else
+static inline void __tlb_remove_table_one_rcu(struct rcu_head *head)
+{
+	struct page *page;
+
+	page = container_of(head, struct page, rcu_head);
+	free_page_and_swap_cache(page);
+}
+
+static inline void __tlb_remove_table_one(void *table)
+{
+	struct page *page;
+
+	page = table;
+	call_rcu(&page->rcu_head, __tlb_remove_table_one_rcu);
+}
+#endif /* CONFIG_PT_RECLAIM */
+
 #endif /* _ASM_X86_TLB_H */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 5358d43886ad..199b9a3813b4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -60,10 +60,17 @@  void __init native_pv_lock_init(void)
 		static_branch_disable(&virt_spin_lock_key);
 }
 
+#ifndef CONFIG_PT_RECLAIM
 static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
 {
 	tlb_remove_page(tlb, table);
 }
+#else
+static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+	tlb_remove_table(tlb, table);
+}
+#endif
 
 struct static_key paravirt_steal_enabled;
 struct static_key paravirt_steal_rq_enabled;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 93e54ba91fbf..cd5bf2157611 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -18,7 +18,7 @@  EXPORT_SYMBOL(physical_mask);
 #define PGTABLE_HIGHMEM 0
 #endif
 
-#ifndef CONFIG_PARAVIRT
+#if !defined(CONFIG_PARAVIRT) && !defined(CONFIG_PT_RECLAIM)
 static inline
 void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
 {
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 99b3e9408aa0..1a8f7b8781a2 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -314,7 +314,7 @@  static inline void tlb_table_invalidate(struct mmu_gather *tlb)
 static void tlb_remove_table_one(void *table)
 {
 	tlb_remove_table_sync_one();
-	__tlb_remove_table(table);
+	__tlb_remove_table_one(table);
 }
 
 static void tlb_table_flush(struct mmu_gather *tlb)