Message ID | 0e8f0b3835c15e99145e0006ac1020ae45a2b166.1735549103.git.zhengqi.arch@bytedance.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | move pagetable_*_dtor() to __tlb_remove_table() | expand |
On 30/12/2024 10:07, Qi Zheng wrote: > static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) > { > - if (riscv_use_sbi_for_rfence()) > + if (riscv_use_sbi_for_rfence()) { > tlb_remove_ptdesc(tlb, pt); > - else > + } else { > + pagetable_dtor(pt); > tlb_remove_page_ptdesc(tlb, pt); I find the imbalance pretty confusing: pagetable_dtor() is called explicitly before using tlb_remove_page() but not tlb_remove_ptdesc(). Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected? Could we not call pagetable_dtor() from __tlb_batch_free_encoded_pages() to ensure that the dtor is always called just before freeing, and remove the extra handling from arch code? I may well be missing something, I'm not super familiar with the tlb handling code. - Kevin
Hi Kevin, On 2025/1/3 00:53, Kevin Brodsky wrote: > On 30/12/2024 10:07, Qi Zheng wrote: >> static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) >> { >> - if (riscv_use_sbi_for_rfence()) >> + if (riscv_use_sbi_for_rfence()) { >> tlb_remove_ptdesc(tlb, pt); >> - else >> + } else { >> + pagetable_dtor(pt); >> tlb_remove_page_ptdesc(tlb, pt); > > I find the imbalance pretty confusing: pagetable_dtor() is called > explicitly before using tlb_remove_page() but not tlb_remove_ptdesc(). > Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected? > Could we not call pagetable_dtor() from __tlb_batch_free_encoded_pages() > to ensure that the dtor is always called just before freeing, and remove In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable() and call pagetable_dtor() to dtor the page table pages. But __tlb_batch_free_encoded_pages() is also used to free normal pages (not page table pages), so I don't want to add overhead there. But now I think maybe we can do this in tlb_remove_page_ptdesc(), like this: diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h index f1ce5b7b28f22..e45c7e91dcbf9 100644 --- a/arch/csky/include/asm/pgalloc.h +++ b/arch/csky/include/asm/pgalloc.h @@ -63,7 +63,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) #define __pte_free_tlb(tlb, pte, address) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ } while (0) diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index 40e42a0e71673..9903449f45cff 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -89,7 +89,6 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, #define __pte_free_tlb(tlb, pte, addr) \ do { \ - pagetable_dtor((page_ptdesc(pte))); \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 7211dff8c969e..de5b3f5c85d1c 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -57,7 +57,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) #define __pte_free_tlb(tlb, pte, address) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ } while (0) diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 2b626cb3ad0ae..731cc8f0731d3 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -19,7 +19,6 @@ extern const char bad_pmd_string[]; #define __pte_free_tlb(tlb, pte, addr) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ } while (0) diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 36d9805033c4b..964ad514be281 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -56,7 +56,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) #define __pte_free_tlb(tlb, pte, address) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ } while (0) diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index 12a536b7bfbd4..ef6b4b8301ac6 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -30,7 +30,6 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm); #define __pte_free_tlb(tlb, pte, addr) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 596e2355824e3..9361205610910 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -68,7 +68,6 @@ extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); #define __pte_free_tlb(tlb, pte, addr) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index c8907b8317115..61c26576614da 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -25,12 +25,10 @@ */ static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { - if (riscv_use_sbi_for_rfence()) { + if (riscv_use_sbi_for_rfence()) tlb_remove_ptdesc(tlb, pt); - } else { - pagetable_dtor(pt); + else tlb_remove_page_ptdesc(tlb, pt); - } } static inline void pmd_populate_kernel(struct mm_struct *mm, diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index 96d938fdf2244..5b5c73e9fdb4b 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -34,7 +34,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, #define __pte_free_tlb(tlb, pte, addr) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index f0af23c3aeb2b..4fd59fb184818 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -27,7 +27,6 @@ extern pgd_t *pgd_alloc(struct mm_struct *); #define __pte_free_tlb(tlb, pte, address) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) @@ -35,7 +34,6 @@ do { \ #define __pmd_free_tlb(tlb, pmd, address) \ do { \ - pagetable_dtor(virt_to_ptdesc(pmd)); \ tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ } while (0) @@ -43,7 +41,6 @@ do { \ #define __pud_free_tlb(tlb, pud, address) \ do { \ - pagetable_dtor(virt_to_ptdesc(pud)); \ tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \ } while (0) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index a96d4b440f3da..a59205863f431 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -506,6 +506,7 @@ static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) /* Like tlb_remove_ptdesc, but for page-like page directories. */ static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) { + pagetable_dtor(pt); tlb_remove_page(tlb, ptdesc_page(pt)); } This avoids explicitly calling pagetable_dtor() in the architecture code. If that makes sense, I can send a formal separate patch to do this. Thanks, Qi > the extra handling from arch code? I may well be missing something, I'm > not super familiar with the tlb handling code > > - Kevin
On 03/01/2025 04:48, Qi Zheng wrote: > Hi Kevin, > > On 2025/1/3 00:53, Kevin Brodsky wrote: >> On 30/12/2024 10:07, Qi Zheng wrote: >>> static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, >>> void *pt) >>> { >>> - if (riscv_use_sbi_for_rfence()) >>> + if (riscv_use_sbi_for_rfence()) { >>> tlb_remove_ptdesc(tlb, pt); >>> - else >>> + } else { >>> + pagetable_dtor(pt); >>> tlb_remove_page_ptdesc(tlb, pt); >> >> I find the imbalance pretty confusing: pagetable_dtor() is called >> explicitly before using tlb_remove_page() but not tlb_remove_ptdesc(). >> Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected? >> Could we not call pagetable_dtor() from __tlb_batch_free_encoded_pages() >> to ensure that the dtor is always called just before freeing, and remove > > In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable() > and call pagetable_dtor() to dtor the page table pages. > But __tlb_batch_free_encoded_pages() is also used to free normal pages > (not page table pages), so I don't want to add overhead there. Interesting, can a tlb batch refer to pages than are not PTPs then? > > But now I think maybe we can do this in tlb_remove_page_ptdesc(), like > this: > > diff --git a/arch/csky/include/asm/pgalloc.h > b/arch/csky/include/asm/pgalloc.h > index f1ce5b7b28f22..e45c7e91dcbf9 100644 > --- a/arch/csky/include/asm/pgalloc.h > +++ b/arch/csky/include/asm/pgalloc.h > @@ -63,7 +63,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) > > #define __pte_free_tlb(tlb, pte, address) \ > do { \ > - pagetable_dtor(page_ptdesc(pte)); \ > tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ > } while (0) > > [...] > > diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h > index a96d4b440f3da..a59205863f431 100644 > --- a/include/asm-generic/tlb.h > +++ b/include/asm-generic/tlb.h > @@ -506,6 +506,7 @@ static inline void tlb_remove_ptdesc(struct > mmu_gather *tlb, void *pt) > /* Like tlb_remove_ptdesc, but for page-like page directories. */ > static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, > struct ptdesc *pt) > { > + pagetable_dtor(pt); > tlb_remove_page(tlb, ptdesc_page(pt)); > } I think this is an interesting idea, it does make arch code easier to follow. OTOH it would have been more natural to me to call pagetable_dtor() from tlb_remove_page(). I can however see that this doesn't work, because tlb_remove_table() is defined as tlb_remove_page() if CONFIG_MMU_GATHER_HAVE_TABLE_FREE isn't selected. Which brings me back to my earlier question: in that case, aren't we missing a call to pagetable_dtor() when using tlb_remove_table() (or tlb_remove_ptdesc())? - Kevin
On 2025/1/3 16:02, Kevin Brodsky wrote: > On 03/01/2025 04:48, Qi Zheng wrote: >> Hi Kevin, >> >> On 2025/1/3 00:53, Kevin Brodsky wrote: >>> On 30/12/2024 10:07, Qi Zheng wrote: >>>> static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, >>>> void *pt) >>>> { >>>> - if (riscv_use_sbi_for_rfence()) >>>> + if (riscv_use_sbi_for_rfence()) { >>>> tlb_remove_ptdesc(tlb, pt); >>>> - else >>>> + } else { >>>> + pagetable_dtor(pt); >>>> tlb_remove_page_ptdesc(tlb, pt); >>> >>> I find the imbalance pretty confusing: pagetable_dtor() is called >>> explicitly before using tlb_remove_page() but not tlb_remove_ptdesc(). >>> Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected? >>> Could we not call pagetable_dtor() from __tlb_batch_free_encoded_pages() >>> to ensure that the dtor is always called just before freeing, and remove >> >> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable() >> and call pagetable_dtor() to dtor the page table pages. >> But __tlb_batch_free_encoded_pages() is also used to free normal pages >> (not page table pages), so I don't want to add overhead there. > > Interesting, can a tlb batch refer to pages than are not PTPs then? Yes, you can see the caller of __tlb_remove_folio_pages() or tlb_remove_page_size(). > >> >> But now I think maybe we can do this in tlb_remove_page_ptdesc(), like >> this: >> >> diff --git a/arch/csky/include/asm/pgalloc.h >> b/arch/csky/include/asm/pgalloc.h >> index f1ce5b7b28f22..e45c7e91dcbf9 100644 >> --- a/arch/csky/include/asm/pgalloc.h >> +++ b/arch/csky/include/asm/pgalloc.h >> @@ -63,7 +63,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) >> >> #define __pte_free_tlb(tlb, pte, address) \ >> do { \ >> - pagetable_dtor(page_ptdesc(pte)); \ >> tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ >> } while (0) >> >> [...] >> >> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h >> index a96d4b440f3da..a59205863f431 100644 >> --- a/include/asm-generic/tlb.h >> +++ b/include/asm-generic/tlb.h >> @@ -506,6 +506,7 @@ static inline void tlb_remove_ptdesc(struct >> mmu_gather *tlb, void *pt) >> /* Like tlb_remove_ptdesc, but for page-like page directories. */ >> static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, >> struct ptdesc *pt) >> { >> + pagetable_dtor(pt); >> tlb_remove_page(tlb, ptdesc_page(pt)); >> } > > I think this is an interesting idea, it does make arch code easier to > follow. OTOH it would have been more natural to me to call > pagetable_dtor() from tlb_remove_page(). I can however see that this > doesn't work, because tlb_remove_table() is defined as tlb_remove_page() > if CONFIG_MMU_GATHER_HAVE_TABLE_FREE isn't selected. Which brings me > back to my earlier question: in that case, aren't we missing a call to > pagetable_dtor() when using tlb_remove_table() (or tlb_remove_ptdesc())? Thank you for pointing this out! Now, there are the following architectures selected CONFIG_MMU_GATHER_RCU_TABLE_FREE: 1. arm: select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE 2. arm64: select MMU_GATHER_RCU_TABLE_FREE 3. powerpc: select MMU_GATHER_RCU_TABLE_FREE 4. riscv: select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU 5. s390: select MMU_GATHER_RCU_TABLE_FREE 6. sparc: select MMU_GATHER_RCU_TABLE_FREE if SMP 7. x86: select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT If CONFIG_MMU_GATHER_TABLE_FREE is selected, an architecture is expected to provide __tlb_remove_table(). This patch series modifies the __tlb_remove_table() in arm, arm64, riscv, s390 and x86. Among them, arm64 and s390 unconditionally select MMU_GATHER_RCU_TABLE_FREE, so we only need to double-check arm, riscv and x86. For x86, it was called tlb_remove_page() in the non-PARAVIRT case, and I added pagetable_dtor() for it explicitly (see patch #11), so this should be no problem. For riscv, it will only call tlb_remove_ptdesc() in the case of SMP && MMU, so this should be no problem. For arm, the call to pagetable_dtor() is indeed missed in the non-MMU_GATHER_RCU_TABLE_FREE case. This needs to be fixed. But we can't fix this by adding pagetable_dtor() to tlb_remove_table(), because some architectures call tlb_remove_table() but don't support page table statistics, like sparc. So a more direct fix might be: diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index a59205863f431..0a131444a18ca 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -500,6 +500,9 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { +#ifndef CONFIG_MMU_GATHER_TABLE_FREE + pagetable_dtor(pt); +#endif tlb_remove_table(tlb, pt); } Or fix it directly in arm? Like the following: diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index ea4fbe7b17f6f..cf5d0ca021440 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -43,6 +43,9 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) __tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE); #endif +#ifndef CONFIG_MMU_GATHER_TABLE_FREE + pagetable_dtor(ptdesc); +#endif tlb_remove_ptdesc(tlb, ptdesc); } Thanks, Qi > > - Kevin
On 2025/1/3 17:13, Qi Zheng wrote: > > > On 2025/1/3 16:02, Kevin Brodsky wrote: >> On 03/01/2025 04:48, Qi Zheng wrote: >>> Hi Kevin, >>> >>> On 2025/1/3 00:53, Kevin Brodsky wrote: >>>> On 30/12/2024 10:07, Qi Zheng wrote: >>>>> static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, >>>>> void *pt) >>>>> { >>>>> - if (riscv_use_sbi_for_rfence()) >>>>> + if (riscv_use_sbi_for_rfence()) { >>>>> tlb_remove_ptdesc(tlb, pt); >>>>> - else >>>>> + } else { >>>>> + pagetable_dtor(pt); >>>>> tlb_remove_page_ptdesc(tlb, pt); >>>> >>>> I find the imbalance pretty confusing: pagetable_dtor() is called >>>> explicitly before using tlb_remove_page() but not tlb_remove_ptdesc(). >>>> Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected? >>>> Could we not call pagetable_dtor() from >>>> __tlb_batch_free_encoded_pages() >>>> to ensure that the dtor is always called just before freeing, and >>>> remove >>> >>> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable() >>> and call pagetable_dtor() to dtor the page table pages. >>> But __tlb_batch_free_encoded_pages() is also used to free normal pages >>> (not page table pages), so I don't want to add overhead there. >> >> Interesting, can a tlb batch refer to pages than are not PTPs then? > > Yes, you can see the caller of __tlb_remove_folio_pages() or > tlb_remove_page_size(). > >> >>> >>> But now I think maybe we can do this in tlb_remove_page_ptdesc(), like >>> this: >>> >>> diff --git a/arch/csky/include/asm/pgalloc.h >>> b/arch/csky/include/asm/pgalloc.h >>> index f1ce5b7b28f22..e45c7e91dcbf9 100644 >>> --- a/arch/csky/include/asm/pgalloc.h >>> +++ b/arch/csky/include/asm/pgalloc.h >>> @@ -63,7 +63,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) >>> >>> #define __pte_free_tlb(tlb, pte, address) \ >>> do { \ >>> - pagetable_dtor(page_ptdesc(pte)); \ >>> tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ >>> } while (0) >>> >>> [...] >>> >>> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h >>> index a96d4b440f3da..a59205863f431 100644 >>> --- a/include/asm-generic/tlb.h >>> +++ b/include/asm-generic/tlb.h >>> @@ -506,6 +506,7 @@ static inline void tlb_remove_ptdesc(struct >>> mmu_gather *tlb, void *pt) >>> /* Like tlb_remove_ptdesc, but for page-like page directories. */ >>> static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, >>> struct ptdesc *pt) >>> { >>> + pagetable_dtor(pt); >>> tlb_remove_page(tlb, ptdesc_page(pt)); >>> } >> >> I think this is an interesting idea, it does make arch code easier to >> follow. OTOH it would have been more natural to me to call >> pagetable_dtor() from tlb_remove_page(). I can however see that this >> doesn't work, because tlb_remove_table() is defined as tlb_remove_page() >> if CONFIG_MMU_GATHER_HAVE_TABLE_FREE isn't selected. Which brings me >> back to my earlier question: in that case, aren't we missing a call to >> pagetable_dtor() when using tlb_remove_table() (or tlb_remove_ptdesc())? > > Thank you for pointing this out! > > Now, there are the following architectures selected > CONFIG_MMU_GATHER_RCU_TABLE_FREE: > > 1. arm: select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE > 2. arm64: select MMU_GATHER_RCU_TABLE_FREE > 3. powerpc: select MMU_GATHER_RCU_TABLE_FREE > 4. riscv: select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU > 5. s390: select MMU_GATHER_RCU_TABLE_FREE > 6. sparc: select MMU_GATHER_RCU_TABLE_FREE if SMP > 7. x86: select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT > > If CONFIG_MMU_GATHER_TABLE_FREE is selected, an architecture is expected > to provide __tlb_remove_table(). This patch series modifies the > __tlb_remove_table() in arm, arm64, riscv, s390 and x86. Among them, > arm64 and s390 unconditionally select MMU_GATHER_RCU_TABLE_FREE, so we > only need to double-check arm, riscv and x86. > > For x86, it was called tlb_remove_page() in the non-PARAVIRT case, and I > added pagetable_dtor() for it explicitly (see patch #11), so this should > be no problem. > > For riscv, it will only call tlb_remove_ptdesc() in the case of > SMP && MMU, so this should be no problem. > > For arm, the call to pagetable_dtor() is indeed missed in the > non-MMU_GATHER_RCU_TABLE_FREE case. This needs to be fixed. But we > can't fix this by adding pagetable_dtor() to tlb_remove_table(), > because some architectures call tlb_remove_table() but don't support > page table statistics, like sparc. > > So a more direct fix might be: > > diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h > index a59205863f431..0a131444a18ca 100644 > --- a/include/asm-generic/tlb.h > +++ b/include/asm-generic/tlb.h > @@ -500,6 +500,9 @@ static inline void tlb_remove_page(struct mmu_gather > *tlb, struct page *page) > > static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) > { > +#ifndef CONFIG_MMU_GATHER_TABLE_FREE > + pagetable_dtor(pt); > +#endif > tlb_remove_table(tlb, pt); > } > > Or fix it directly in arm? Like the following: > > diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h > index ea4fbe7b17f6f..cf5d0ca021440 100644 > --- a/arch/arm/include/asm/tlb.h > +++ b/arch/arm/include/asm/tlb.h > @@ -43,6 +43,9 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, > unsigned long addr) > __tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE); > #endif > > +#ifndef CONFIG_MMU_GATHER_TABLE_FREE > + pagetable_dtor(ptdesc); > +#endif > tlb_remove_ptdesc(tlb, ptdesc); > } Or can we just not let tlb_remove_table() fall back to tlb_remove_page()? Like the following: diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index a59205863f431..354ffaa4bd120 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -195,8 +195,6 @@ * various ptep_get_and_clear() functions. */ -#ifdef CONFIG_MMU_GATHER_TABLE_FREE - struct mmu_table_batch { #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE struct rcu_head rcu; @@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table) extern void tlb_remove_table(struct mmu_gather *tlb, void *table); -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ - -/* - * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based - * page directories and we can use the normal page batching to free them. - */ -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) - -#endif /* CONFIG_MMU_GATHER_TABLE_FREE */ - #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE /* * This allows an architecture that does not use the linux page-tables for > > Thanks, > Qi > >> >> - Kevin
On 03/01/2025 10:35, Qi Zheng wrote: > On 2025/1/3 17:13, Qi Zheng wrote: >> On 2025/1/3 16:02, Kevin Brodsky wrote: >>> On 03/01/2025 04:48, Qi Zheng wrote: >>>> [...] >>>> >>>> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable() >>>> and call pagetable_dtor() to dtor the page table pages. >>>> But __tlb_batch_free_encoded_pages() is also used to free normal pages >>>> (not page table pages), so I don't want to add overhead there. >>> >>> Interesting, can a tlb batch refer to pages than are not PTPs then? >> >> Yes, you can see the caller of __tlb_remove_folio_pages() or >> tlb_remove_page_size(). I had a brief look but clearly not a good enough one! I hadn't realised that "table" in tlb_remove_table() means PTP, while "page" in tlb_remove_page() can mean any page, and it's making more sense now. [...] >> >> For arm, the call to pagetable_dtor() is indeed missed in the >> non-MMU_GATHER_RCU_TABLE_FREE case. This needs to be fixed. But we >> can't fix this by adding pagetable_dtor() to tlb_remove_table(), >> because some architectures call tlb_remove_table() but don't support >> page table statistics, like sparc. When I investigated this for my own series, I found that the only case where ctor/dtor are not called for page-sized page tables is 32-bit sparc (see table at the end of [1]). However only 64-bit sparc makes use of tlb_remove_table() (at PTE level, where ctor/dtor are already called). So really calling pagetable_dtor() from tlb_remove_table() in the non-MMU_GATHER_TABLE_FREE case seems to be the obvious thing to do. Once this is done, we should be able to replace all those confusing calls to tlb_remove_page() on PTPs with tlb_remove_table() and remove the explicit call to pagetable_dtor(). AIUI this is essentially what Peter suggested on v3 [2]. [1] https://lore.kernel.org/linux-mm/20241219164425.2277022-1-kevin.brodsky@arm.com/ [2] https://lore.kernel.org/linux-mm/20250103111457.GC22934@noisy.programming.kicks-ass.net/ [...] > Or can we just not let tlb_remove_table() fall back to > tlb_remove_page()? Like the following: > > diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h > index a59205863f431..354ffaa4bd120 100644 > --- a/include/asm-generic/tlb.h > +++ b/include/asm-generic/tlb.h > @@ -195,8 +195,6 @@ > * various ptep_get_and_clear() functions. > */ > > -#ifdef CONFIG_MMU_GATHER_TABLE_FREE > - > struct mmu_table_batch { > #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE > struct rcu_head rcu; > @@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table) > > extern void tlb_remove_table(struct mmu_gather *tlb, void *table); > > -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ > - > -/* > - * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have > page based > - * page directories and we can use the normal page batching to free > them. > - */ > -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) We still need a different implementation of tlb_remove_table() in this case. We could define it inline here: static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct page *page = table; pagetable_dtor(page_ptdesc(page)); tlb_remove_page(page); } - Kevin
Hi Kevin, On 2025/1/3 21:27, Kevin Brodsky wrote: > On 03/01/2025 10:35, Qi Zheng wrote: >> On 2025/1/3 17:13, Qi Zheng wrote: >>> On 2025/1/3 16:02, Kevin Brodsky wrote: >>>> On 03/01/2025 04:48, Qi Zheng wrote: >>>>> [...] >>>>> >>>>> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable() >>>>> and call pagetable_dtor() to dtor the page table pages. >>>>> But __tlb_batch_free_encoded_pages() is also used to free normal pages >>>>> (not page table pages), so I don't want to add overhead there. >>>> >>>> Interesting, can a tlb batch refer to pages than are not PTPs then? >>> >>> Yes, you can see the caller of __tlb_remove_folio_pages() or >>> tlb_remove_page_size(). > > I had a brief look but clearly not a good enough one! I hadn't realised > that "table" in tlb_remove_table() means PTP, while "page" in > tlb_remove_page() can mean any page, and it's making more sense now. > > [...] > >>> >>> For arm, the call to pagetable_dtor() is indeed missed in the >>> non-MMU_GATHER_RCU_TABLE_FREE case. This needs to be fixed. But we >>> can't fix this by adding pagetable_dtor() to tlb_remove_table(), >>> because some architectures call tlb_remove_table() but don't support >>> page table statistics, like sparc. > > When I investigated this for my own series, I found that the only case > where ctor/dtor are not called for page-sized page tables is 32-bit > sparc (see table at the end of [1]). However only 64-bit sparc makes use > of tlb_remove_table() (at PTE level, where ctor/dtor are already called). Thanks for providing this information. > > So really calling pagetable_dtor() from tlb_remove_table() in the > non-MMU_GATHER_TABLE_FREE case seems to be the obvious thing to do. Right. Currently, only powerpc, sparc and x86 will directly call tlb_remove_table(), and all of them are in the MMU_GATHER_TABLE_FREE case. Therefore, I think the modification you mentioned below is feasible. In summary, currently only arm calls tlb_remove_table() in the non-MMU_GATHER_RCU_TABLE_FREE case. So I think we can add this fix directly to patch #8. If I haven't missed anything, I'll send an updated patch #8. > > Once this is done, we should be able to replace all those confusing > calls to tlb_remove_page() on PTPs with tlb_remove_table() and remove > the explicit call to pagetable_dtor(). AIUI this is essentially what > Peter suggested on v3 [2]. Since this patch series is mainly for bug fix, I think that these things can be done in separate patch series later. > > [1] > https://lore.kernel.org/linux-mm/20241219164425.2277022-1-kevin.brodsky@arm.com/ > [2] > https://lore.kernel.org/linux-mm/20250103111457.GC22934@noisy.programming.kicks-ass.net/ > > [...] > >> Or can we just not let tlb_remove_table() fall back to >> tlb_remove_page()? Like the following: >> >> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h >> index a59205863f431..354ffaa4bd120 100644 >> --- a/include/asm-generic/tlb.h >> +++ b/include/asm-generic/tlb.h >> @@ -195,8 +195,6 @@ >> * various ptep_get_and_clear() functions. >> */ >> >> -#ifdef CONFIG_MMU_GATHER_TABLE_FREE >> - >> struct mmu_table_batch { >> #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE >> struct rcu_head rcu; >> @@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table) >> >> extern void tlb_remove_table(struct mmu_gather *tlb, void *table); >> >> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ >> - >> -/* >> - * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have >> page based >> - * page directories and we can use the normal page batching to free >> them. >> - */ >> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) > > We still need a different implementation of tlb_remove_table() in this > case. We could define it inline here: > > static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) > { > struct page *page = table; > > pagetable_dtor(page_ptdesc(page)); > tlb_remove_page(page); > } Right. As I said above, will add this to the updated patch #8. Thanks! > > - Kevin
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index b6793c5c99296..c8907b8317115 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -15,12 +15,22 @@ #define __HAVE_ARCH_PUD_FREE #include <asm-generic/pgalloc.h> +/* + * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to + * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use + * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this + * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the + * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h + * for more details. + */ static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { - if (riscv_use_sbi_for_rfence()) + if (riscv_use_sbi_for_rfence()) { tlb_remove_ptdesc(tlb, pt); - else + } else { + pagetable_dtor(pt); tlb_remove_page_ptdesc(tlb, pt); + } } static inline void pmd_populate_kernel(struct mm_struct *mm, @@ -97,23 +107,15 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long addr) { - if (pgtable_l4_enabled) { - struct ptdesc *ptdesc = virt_to_ptdesc(pud); - - pagetable_dtor(ptdesc); - riscv_tlb_remove_ptdesc(tlb, ptdesc); - } + if (pgtable_l4_enabled) + riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); } static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr) { - if (pgtable_l5_enabled) { - struct ptdesc *ptdesc = virt_to_ptdesc(p4d); - - pagetable_dtor(ptdesc); + if (pgtable_l5_enabled) riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); - } } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -142,10 +144,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) { - struct ptdesc *ptdesc = virt_to_ptdesc(pmd); - - pagetable_dtor(ptdesc); - riscv_tlb_remove_ptdesc(tlb, ptdesc); + riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -153,10 +152,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - struct ptdesc *ptdesc = page_ptdesc(pte); - - pagetable_dtor(ptdesc); - riscv_tlb_remove_ptdesc(tlb, ptdesc); + riscv_tlb_remove_ptdesc(tlb, page_ptdesc(pte)); } #endif /* CONFIG_MMU */ diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h index 1f6c38420d8e0..ded8724b3c4f7 100644 --- a/arch/riscv/include/asm/tlb.h +++ b/arch/riscv/include/asm/tlb.h @@ -11,19 +11,13 @@ struct mmu_gather; static void tlb_flush(struct mmu_gather *tlb); #ifdef CONFIG_MMU -#include <linux/swap.h> -/* - * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to - * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use - * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this - * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the - * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h - * for more details. - */ static inline void __tlb_remove_table(void *table) { - free_page_and_swap_cache(table); + struct ptdesc *ptdesc = (struct ptdesc *)table; + + pagetable_dtor(ptdesc); + pagetable_free(ptdesc); } #endif /* CONFIG_MMU */
Move pagetable_dtor() to __tlb_remove_table(), so that ptlock and page table pages can be freed together (regardless of whether RCU is used). This prevents the use-after-free problem where the ptlock is freed immediately but the page table pages is freed later via RCU. Page tables shouldn't have swap cache, so use pagetable_free() instead of free_page_and_swap_cache() to free page table pages. By the way, move the comment above __tlb_remove_table() to riscv_tlb_remove_ptdesc(), it will be more appropriate. Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: linux-riscv@lists.infradead.org --- arch/riscv/include/asm/pgalloc.h | 38 ++++++++++++++------------------ arch/riscv/include/asm/tlb.h | 14 ++++-------- 2 files changed, 21 insertions(+), 31 deletions(-)