Message ID | 0e8f0b3835c15e99145e0006ac1020ae45a2b166.1735549103.git.zhengqi.arch@bytedance.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | move pagetable_*_dtor() to __tlb_remove_table() | expand |
On 30/12/2024 10:07, Qi Zheng wrote: > static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) > { > - if (riscv_use_sbi_for_rfence()) > + if (riscv_use_sbi_for_rfence()) { > tlb_remove_ptdesc(tlb, pt); > - else > + } else { > + pagetable_dtor(pt); > tlb_remove_page_ptdesc(tlb, pt); I find the imbalance pretty confusing: pagetable_dtor() is called explicitly before using tlb_remove_page() but not tlb_remove_ptdesc(). Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected? Could we not call pagetable_dtor() from __tlb_batch_free_encoded_pages() to ensure that the dtor is always called just before freeing, and remove the extra handling from arch code? I may well be missing something, I'm not super familiar with the tlb handling code. - Kevin
Hi Kevin, On 2025/1/3 00:53, Kevin Brodsky wrote: > On 30/12/2024 10:07, Qi Zheng wrote: >> static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) >> { >> - if (riscv_use_sbi_for_rfence()) >> + if (riscv_use_sbi_for_rfence()) { >> tlb_remove_ptdesc(tlb, pt); >> - else >> + } else { >> + pagetable_dtor(pt); >> tlb_remove_page_ptdesc(tlb, pt); > > I find the imbalance pretty confusing: pagetable_dtor() is called > explicitly before using tlb_remove_page() but not tlb_remove_ptdesc(). > Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected? > Could we not call pagetable_dtor() from __tlb_batch_free_encoded_pages() > to ensure that the dtor is always called just before freeing, and remove In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable() and call pagetable_dtor() to dtor the page table pages. But __tlb_batch_free_encoded_pages() is also used to free normal pages (not page table pages), so I don't want to add overhead there. But now I think maybe we can do this in tlb_remove_page_ptdesc(), like this: diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h index f1ce5b7b28f22..e45c7e91dcbf9 100644 --- a/arch/csky/include/asm/pgalloc.h +++ b/arch/csky/include/asm/pgalloc.h @@ -63,7 +63,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) #define __pte_free_tlb(tlb, pte, address) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ } while (0) diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index 40e42a0e71673..9903449f45cff 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -89,7 +89,6 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, #define __pte_free_tlb(tlb, pte, addr) \ do { \ - pagetable_dtor((page_ptdesc(pte))); \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 7211dff8c969e..de5b3f5c85d1c 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -57,7 +57,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) #define __pte_free_tlb(tlb, pte, address) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ } while (0) diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 2b626cb3ad0ae..731cc8f0731d3 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -19,7 +19,6 @@ extern const char bad_pmd_string[]; #define __pte_free_tlb(tlb, pte, addr) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ } while (0) diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 36d9805033c4b..964ad514be281 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -56,7 +56,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) #define __pte_free_tlb(tlb, pte, address) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), page_ptdesc(pte)); \ } while (0) diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index 12a536b7bfbd4..ef6b4b8301ac6 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -30,7 +30,6 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm); #define __pte_free_tlb(tlb, pte, addr) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 596e2355824e3..9361205610910 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -68,7 +68,6 @@ extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); #define __pte_free_tlb(tlb, pte, addr) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index c8907b8317115..61c26576614da 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -25,12 +25,10 @@ */ static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { - if (riscv_use_sbi_for_rfence()) { + if (riscv_use_sbi_for_rfence()) tlb_remove_ptdesc(tlb, pt); - } else { - pagetable_dtor(pt); + else tlb_remove_page_ptdesc(tlb, pt); - } } static inline void pmd_populate_kernel(struct mm_struct *mm, diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index 96d938fdf2244..5b5c73e9fdb4b 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -34,7 +34,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, #define __pte_free_tlb(tlb, pte, addr) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index f0af23c3aeb2b..4fd59fb184818 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -27,7 +27,6 @@ extern pgd_t *pgd_alloc(struct mm_struct *); #define __pte_free_tlb(tlb, pte, address) \ do { \ - pagetable_dtor(page_ptdesc(pte)); \ tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte))); \ } while (0) @@ -35,7 +34,6 @@ do { \ #define __pmd_free_tlb(tlb, pmd, address) \ do { \ - pagetable_dtor(virt_to_ptdesc(pmd)); \ tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ } while (0) @@ -43,7 +41,6 @@ do { \ #define __pud_free_tlb(tlb, pud, address) \ do { \ - pagetable_dtor(virt_to_ptdesc(pud)); \ tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \ } while (0) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index a96d4b440f3da..a59205863f431 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -506,6 +506,7 @@ static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) /* Like tlb_remove_ptdesc, but for page-like page directories. */ static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) { + pagetable_dtor(pt); tlb_remove_page(tlb, ptdesc_page(pt)); } This avoids explicitly calling pagetable_dtor() in the architecture code. If that makes sense, I can send a formal separate patch to do this. Thanks, Qi > the extra handling from arch code? I may well be missing something, I'm > not super familiar with the tlb handling code > > - Kevin
On 03/01/2025 04:48, Qi Zheng wrote: > Hi Kevin, > > On 2025/1/3 00:53, Kevin Brodsky wrote: >> On 30/12/2024 10:07, Qi Zheng wrote: >>> static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, >>> void *pt) >>> { >>> - if (riscv_use_sbi_for_rfence()) >>> + if (riscv_use_sbi_for_rfence()) { >>> tlb_remove_ptdesc(tlb, pt); >>> - else >>> + } else { >>> + pagetable_dtor(pt); >>> tlb_remove_page_ptdesc(tlb, pt); >> >> I find the imbalance pretty confusing: pagetable_dtor() is called >> explicitly before using tlb_remove_page() but not tlb_remove_ptdesc(). >> Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected? >> Could we not call pagetable_dtor() from __tlb_batch_free_encoded_pages() >> to ensure that the dtor is always called just before freeing, and remove > > In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable() > and call pagetable_dtor() to dtor the page table pages. > But __tlb_batch_free_encoded_pages() is also used to free normal pages > (not page table pages), so I don't want to add overhead there. Interesting, can a tlb batch refer to pages than are not PTPs then? > > But now I think maybe we can do this in tlb_remove_page_ptdesc(), like > this: > > diff --git a/arch/csky/include/asm/pgalloc.h > b/arch/csky/include/asm/pgalloc.h > index f1ce5b7b28f22..e45c7e91dcbf9 100644 > --- a/arch/csky/include/asm/pgalloc.h > +++ b/arch/csky/include/asm/pgalloc.h > @@ -63,7 +63,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) > > #define __pte_free_tlb(tlb, pte, address) \ > do { \ > - pagetable_dtor(page_ptdesc(pte)); \ > tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ > } while (0) > > [...] > > diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h > index a96d4b440f3da..a59205863f431 100644 > --- a/include/asm-generic/tlb.h > +++ b/include/asm-generic/tlb.h > @@ -506,6 +506,7 @@ static inline void tlb_remove_ptdesc(struct > mmu_gather *tlb, void *pt) > /* Like tlb_remove_ptdesc, but for page-like page directories. */ > static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, > struct ptdesc *pt) > { > + pagetable_dtor(pt); > tlb_remove_page(tlb, ptdesc_page(pt)); > } I think this is an interesting idea, it does make arch code easier to follow. OTOH it would have been more natural to me to call pagetable_dtor() from tlb_remove_page(). I can however see that this doesn't work, because tlb_remove_table() is defined as tlb_remove_page() if CONFIG_MMU_GATHER_HAVE_TABLE_FREE isn't selected. Which brings me back to my earlier question: in that case, aren't we missing a call to pagetable_dtor() when using tlb_remove_table() (or tlb_remove_ptdesc())? - Kevin
On 2025/1/3 16:02, Kevin Brodsky wrote: > On 03/01/2025 04:48, Qi Zheng wrote: >> Hi Kevin, >> >> On 2025/1/3 00:53, Kevin Brodsky wrote: >>> On 30/12/2024 10:07, Qi Zheng wrote: >>>> static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, >>>> void *pt) >>>> { >>>> - if (riscv_use_sbi_for_rfence()) >>>> + if (riscv_use_sbi_for_rfence()) { >>>> tlb_remove_ptdesc(tlb, pt); >>>> - else >>>> + } else { >>>> + pagetable_dtor(pt); >>>> tlb_remove_page_ptdesc(tlb, pt); >>> >>> I find the imbalance pretty confusing: pagetable_dtor() is called >>> explicitly before using tlb_remove_page() but not tlb_remove_ptdesc(). >>> Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected? >>> Could we not call pagetable_dtor() from __tlb_batch_free_encoded_pages() >>> to ensure that the dtor is always called just before freeing, and remove >> >> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable() >> and call pagetable_dtor() to dtor the page table pages. >> But __tlb_batch_free_encoded_pages() is also used to free normal pages >> (not page table pages), so I don't want to add overhead there. > > Interesting, can a tlb batch refer to pages than are not PTPs then? Yes, you can see the caller of __tlb_remove_folio_pages() or tlb_remove_page_size(). > >> >> But now I think maybe we can do this in tlb_remove_page_ptdesc(), like >> this: >> >> diff --git a/arch/csky/include/asm/pgalloc.h >> b/arch/csky/include/asm/pgalloc.h >> index f1ce5b7b28f22..e45c7e91dcbf9 100644 >> --- a/arch/csky/include/asm/pgalloc.h >> +++ b/arch/csky/include/asm/pgalloc.h >> @@ -63,7 +63,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) >> >> #define __pte_free_tlb(tlb, pte, address) \ >> do { \ >> - pagetable_dtor(page_ptdesc(pte)); \ >> tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ >> } while (0) >> >> [...] >> >> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h >> index a96d4b440f3da..a59205863f431 100644 >> --- a/include/asm-generic/tlb.h >> +++ b/include/asm-generic/tlb.h >> @@ -506,6 +506,7 @@ static inline void tlb_remove_ptdesc(struct >> mmu_gather *tlb, void *pt) >> /* Like tlb_remove_ptdesc, but for page-like page directories. */ >> static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, >> struct ptdesc *pt) >> { >> + pagetable_dtor(pt); >> tlb_remove_page(tlb, ptdesc_page(pt)); >> } > > I think this is an interesting idea, it does make arch code easier to > follow. OTOH it would have been more natural to me to call > pagetable_dtor() from tlb_remove_page(). I can however see that this > doesn't work, because tlb_remove_table() is defined as tlb_remove_page() > if CONFIG_MMU_GATHER_HAVE_TABLE_FREE isn't selected. Which brings me > back to my earlier question: in that case, aren't we missing a call to > pagetable_dtor() when using tlb_remove_table() (or tlb_remove_ptdesc())? Thank you for pointing this out! Now, there are the following architectures selected CONFIG_MMU_GATHER_RCU_TABLE_FREE: 1. arm: select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE 2. arm64: select MMU_GATHER_RCU_TABLE_FREE 3. powerpc: select MMU_GATHER_RCU_TABLE_FREE 4. riscv: select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU 5. s390: select MMU_GATHER_RCU_TABLE_FREE 6. sparc: select MMU_GATHER_RCU_TABLE_FREE if SMP 7. x86: select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT If CONFIG_MMU_GATHER_TABLE_FREE is selected, an architecture is expected to provide __tlb_remove_table(). This patch series modifies the __tlb_remove_table() in arm, arm64, riscv, s390 and x86. Among them, arm64 and s390 unconditionally select MMU_GATHER_RCU_TABLE_FREE, so we only need to double-check arm, riscv and x86. For x86, it was called tlb_remove_page() in the non-PARAVIRT case, and I added pagetable_dtor() for it explicitly (see patch #11), so this should be no problem. For riscv, it will only call tlb_remove_ptdesc() in the case of SMP && MMU, so this should be no problem. For arm, the call to pagetable_dtor() is indeed missed in the non-MMU_GATHER_RCU_TABLE_FREE case. This needs to be fixed. But we can't fix this by adding pagetable_dtor() to tlb_remove_table(), because some architectures call tlb_remove_table() but don't support page table statistics, like sparc. So a more direct fix might be: diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index a59205863f431..0a131444a18ca 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -500,6 +500,9 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { +#ifndef CONFIG_MMU_GATHER_TABLE_FREE + pagetable_dtor(pt); +#endif tlb_remove_table(tlb, pt); } Or fix it directly in arm? Like the following: diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index ea4fbe7b17f6f..cf5d0ca021440 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -43,6 +43,9 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) __tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE); #endif +#ifndef CONFIG_MMU_GATHER_TABLE_FREE + pagetable_dtor(ptdesc); +#endif tlb_remove_ptdesc(tlb, ptdesc); } Thanks, Qi > > - Kevin
On 2025/1/3 17:13, Qi Zheng wrote: > > > On 2025/1/3 16:02, Kevin Brodsky wrote: >> On 03/01/2025 04:48, Qi Zheng wrote: >>> Hi Kevin, >>> >>> On 2025/1/3 00:53, Kevin Brodsky wrote: >>>> On 30/12/2024 10:07, Qi Zheng wrote: >>>>> static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, >>>>> void *pt) >>>>> { >>>>> - if (riscv_use_sbi_for_rfence()) >>>>> + if (riscv_use_sbi_for_rfence()) { >>>>> tlb_remove_ptdesc(tlb, pt); >>>>> - else >>>>> + } else { >>>>> + pagetable_dtor(pt); >>>>> tlb_remove_page_ptdesc(tlb, pt); >>>> >>>> I find the imbalance pretty confusing: pagetable_dtor() is called >>>> explicitly before using tlb_remove_page() but not tlb_remove_ptdesc(). >>>> Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected? >>>> Could we not call pagetable_dtor() from >>>> __tlb_batch_free_encoded_pages() >>>> to ensure that the dtor is always called just before freeing, and >>>> remove >>> >>> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable() >>> and call pagetable_dtor() to dtor the page table pages. >>> But __tlb_batch_free_encoded_pages() is also used to free normal pages >>> (not page table pages), so I don't want to add overhead there. >> >> Interesting, can a tlb batch refer to pages than are not PTPs then? > > Yes, you can see the caller of __tlb_remove_folio_pages() or > tlb_remove_page_size(). > >> >>> >>> But now I think maybe we can do this in tlb_remove_page_ptdesc(), like >>> this: >>> >>> diff --git a/arch/csky/include/asm/pgalloc.h >>> b/arch/csky/include/asm/pgalloc.h >>> index f1ce5b7b28f22..e45c7e91dcbf9 100644 >>> --- a/arch/csky/include/asm/pgalloc.h >>> +++ b/arch/csky/include/asm/pgalloc.h >>> @@ -63,7 +63,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) >>> >>> #define __pte_free_tlb(tlb, pte, address) \ >>> do { \ >>> - pagetable_dtor(page_ptdesc(pte)); \ >>> tlb_remove_page_ptdesc(tlb, page_ptdesc(pte)); \ >>> } while (0) >>> >>> [...] >>> >>> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h >>> index a96d4b440f3da..a59205863f431 100644 >>> --- a/include/asm-generic/tlb.h >>> +++ b/include/asm-generic/tlb.h >>> @@ -506,6 +506,7 @@ static inline void tlb_remove_ptdesc(struct >>> mmu_gather *tlb, void *pt) >>> /* Like tlb_remove_ptdesc, but for page-like page directories. */ >>> static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, >>> struct ptdesc *pt) >>> { >>> + pagetable_dtor(pt); >>> tlb_remove_page(tlb, ptdesc_page(pt)); >>> } >> >> I think this is an interesting idea, it does make arch code easier to >> follow. OTOH it would have been more natural to me to call >> pagetable_dtor() from tlb_remove_page(). I can however see that this >> doesn't work, because tlb_remove_table() is defined as tlb_remove_page() >> if CONFIG_MMU_GATHER_HAVE_TABLE_FREE isn't selected. Which brings me >> back to my earlier question: in that case, aren't we missing a call to >> pagetable_dtor() when using tlb_remove_table() (or tlb_remove_ptdesc())? > > Thank you for pointing this out! > > Now, there are the following architectures selected > CONFIG_MMU_GATHER_RCU_TABLE_FREE: > > 1. arm: select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE > 2. arm64: select MMU_GATHER_RCU_TABLE_FREE > 3. powerpc: select MMU_GATHER_RCU_TABLE_FREE > 4. riscv: select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU > 5. s390: select MMU_GATHER_RCU_TABLE_FREE > 6. sparc: select MMU_GATHER_RCU_TABLE_FREE if SMP > 7. x86: select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT > > If CONFIG_MMU_GATHER_TABLE_FREE is selected, an architecture is expected > to provide __tlb_remove_table(). This patch series modifies the > __tlb_remove_table() in arm, arm64, riscv, s390 and x86. Among them, > arm64 and s390 unconditionally select MMU_GATHER_RCU_TABLE_FREE, so we > only need to double-check arm, riscv and x86. > > For x86, it was called tlb_remove_page() in the non-PARAVIRT case, and I > added pagetable_dtor() for it explicitly (see patch #11), so this should > be no problem. > > For riscv, it will only call tlb_remove_ptdesc() in the case of > SMP && MMU, so this should be no problem. > > For arm, the call to pagetable_dtor() is indeed missed in the > non-MMU_GATHER_RCU_TABLE_FREE case. This needs to be fixed. But we > can't fix this by adding pagetable_dtor() to tlb_remove_table(), > because some architectures call tlb_remove_table() but don't support > page table statistics, like sparc. > > So a more direct fix might be: > > diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h > index a59205863f431..0a131444a18ca 100644 > --- a/include/asm-generic/tlb.h > +++ b/include/asm-generic/tlb.h > @@ -500,6 +500,9 @@ static inline void tlb_remove_page(struct mmu_gather > *tlb, struct page *page) > > static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) > { > +#ifndef CONFIG_MMU_GATHER_TABLE_FREE > + pagetable_dtor(pt); > +#endif > tlb_remove_table(tlb, pt); > } > > Or fix it directly in arm? Like the following: > > diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h > index ea4fbe7b17f6f..cf5d0ca021440 100644 > --- a/arch/arm/include/asm/tlb.h > +++ b/arch/arm/include/asm/tlb.h > @@ -43,6 +43,9 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, > unsigned long addr) > __tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE); > #endif > > +#ifndef CONFIG_MMU_GATHER_TABLE_FREE > + pagetable_dtor(ptdesc); > +#endif > tlb_remove_ptdesc(tlb, ptdesc); > } Or can we just not let tlb_remove_table() fall back to tlb_remove_page()? Like the following: diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index a59205863f431..354ffaa4bd120 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -195,8 +195,6 @@ * various ptep_get_and_clear() functions. */ -#ifdef CONFIG_MMU_GATHER_TABLE_FREE - struct mmu_table_batch { #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE struct rcu_head rcu; @@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table) extern void tlb_remove_table(struct mmu_gather *tlb, void *table); -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ - -/* - * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based - * page directories and we can use the normal page batching to free them. - */ -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) - -#endif /* CONFIG_MMU_GATHER_TABLE_FREE */ - #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE /* * This allows an architecture that does not use the linux page-tables for > > Thanks, > Qi > >> >> - Kevin
On 03/01/2025 10:35, Qi Zheng wrote: > On 2025/1/3 17:13, Qi Zheng wrote: >> On 2025/1/3 16:02, Kevin Brodsky wrote: >>> On 03/01/2025 04:48, Qi Zheng wrote: >>>> [...] >>>> >>>> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable() >>>> and call pagetable_dtor() to dtor the page table pages. >>>> But __tlb_batch_free_encoded_pages() is also used to free normal pages >>>> (not page table pages), so I don't want to add overhead there. >>> >>> Interesting, can a tlb batch refer to pages than are not PTPs then? >> >> Yes, you can see the caller of __tlb_remove_folio_pages() or >> tlb_remove_page_size(). I had a brief look but clearly not a good enough one! I hadn't realised that "table" in tlb_remove_table() means PTP, while "page" in tlb_remove_page() can mean any page, and it's making more sense now. [...] >> >> For arm, the call to pagetable_dtor() is indeed missed in the >> non-MMU_GATHER_RCU_TABLE_FREE case. This needs to be fixed. But we >> can't fix this by adding pagetable_dtor() to tlb_remove_table(), >> because some architectures call tlb_remove_table() but don't support >> page table statistics, like sparc. When I investigated this for my own series, I found that the only case where ctor/dtor are not called for page-sized page tables is 32-bit sparc (see table at the end of [1]). However only 64-bit sparc makes use of tlb_remove_table() (at PTE level, where ctor/dtor are already called). So really calling pagetable_dtor() from tlb_remove_table() in the non-MMU_GATHER_TABLE_FREE case seems to be the obvious thing to do. Once this is done, we should be able to replace all those confusing calls to tlb_remove_page() on PTPs with tlb_remove_table() and remove the explicit call to pagetable_dtor(). AIUI this is essentially what Peter suggested on v3 [2]. [1] https://lore.kernel.org/linux-mm/20241219164425.2277022-1-kevin.brodsky@arm.com/ [2] https://lore.kernel.org/linux-mm/20250103111457.GC22934@noisy.programming.kicks-ass.net/ [...] > Or can we just not let tlb_remove_table() fall back to > tlb_remove_page()? Like the following: > > diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h > index a59205863f431..354ffaa4bd120 100644 > --- a/include/asm-generic/tlb.h > +++ b/include/asm-generic/tlb.h > @@ -195,8 +195,6 @@ > * various ptep_get_and_clear() functions. > */ > > -#ifdef CONFIG_MMU_GATHER_TABLE_FREE > - > struct mmu_table_batch { > #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE > struct rcu_head rcu; > @@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table) > > extern void tlb_remove_table(struct mmu_gather *tlb, void *table); > > -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ > - > -/* > - * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have > page based > - * page directories and we can use the normal page batching to free > them. > - */ > -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) We still need a different implementation of tlb_remove_table() in this case. We could define it inline here: static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct page *page = table; pagetable_dtor(page_ptdesc(page)); tlb_remove_page(page); } - Kevin
Hi Kevin, On 2025/1/3 21:27, Kevin Brodsky wrote: > On 03/01/2025 10:35, Qi Zheng wrote: >> On 2025/1/3 17:13, Qi Zheng wrote: >>> On 2025/1/3 16:02, Kevin Brodsky wrote: >>>> On 03/01/2025 04:48, Qi Zheng wrote: >>>>> [...] >>>>> >>>>> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable() >>>>> and call pagetable_dtor() to dtor the page table pages. >>>>> But __tlb_batch_free_encoded_pages() is also used to free normal pages >>>>> (not page table pages), so I don't want to add overhead there. >>>> >>>> Interesting, can a tlb batch refer to pages than are not PTPs then? >>> >>> Yes, you can see the caller of __tlb_remove_folio_pages() or >>> tlb_remove_page_size(). > > I had a brief look but clearly not a good enough one! I hadn't realised > that "table" in tlb_remove_table() means PTP, while "page" in > tlb_remove_page() can mean any page, and it's making more sense now. > > [...] > >>> >>> For arm, the call to pagetable_dtor() is indeed missed in the >>> non-MMU_GATHER_RCU_TABLE_FREE case. This needs to be fixed. But we >>> can't fix this by adding pagetable_dtor() to tlb_remove_table(), >>> because some architectures call tlb_remove_table() but don't support >>> page table statistics, like sparc. > > When I investigated this for my own series, I found that the only case > where ctor/dtor are not called for page-sized page tables is 32-bit > sparc (see table at the end of [1]). However only 64-bit sparc makes use > of tlb_remove_table() (at PTE level, where ctor/dtor are already called). Thanks for providing this information. > > So really calling pagetable_dtor() from tlb_remove_table() in the > non-MMU_GATHER_TABLE_FREE case seems to be the obvious thing to do. Right. Currently, only powerpc, sparc and x86 will directly call tlb_remove_table(), and all of them are in the MMU_GATHER_TABLE_FREE case. Therefore, I think the modification you mentioned below is feasible. In summary, currently only arm calls tlb_remove_table() in the non-MMU_GATHER_RCU_TABLE_FREE case. So I think we can add this fix directly to patch #8. If I haven't missed anything, I'll send an updated patch #8. > > Once this is done, we should be able to replace all those confusing > calls to tlb_remove_page() on PTPs with tlb_remove_table() and remove > the explicit call to pagetable_dtor(). AIUI this is essentially what > Peter suggested on v3 [2]. Since this patch series is mainly for bug fix, I think that these things can be done in separate patch series later. > > [1] > https://lore.kernel.org/linux-mm/20241219164425.2277022-1-kevin.brodsky@arm.com/ > [2] > https://lore.kernel.org/linux-mm/20250103111457.GC22934@noisy.programming.kicks-ass.net/ > > [...] > >> Or can we just not let tlb_remove_table() fall back to >> tlb_remove_page()? Like the following: >> >> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h >> index a59205863f431..354ffaa4bd120 100644 >> --- a/include/asm-generic/tlb.h >> +++ b/include/asm-generic/tlb.h >> @@ -195,8 +195,6 @@ >> * various ptep_get_and_clear() functions. >> */ >> >> -#ifdef CONFIG_MMU_GATHER_TABLE_FREE >> - >> struct mmu_table_batch { >> #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE >> struct rcu_head rcu; >> @@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table) >> >> extern void tlb_remove_table(struct mmu_gather *tlb, void *table); >> >> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ >> - >> -/* >> - * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have >> page based >> - * page directories and we can use the normal page batching to free >> them. >> - */ >> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) > > We still need a different implementation of tlb_remove_table() in this > case. We could define it inline here: > > static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) > { > struct page *page = table; > > pagetable_dtor(page_ptdesc(page)); > tlb_remove_page(page); > } Right. As I said above, will add this to the updated patch #8. Thanks! > > - Kevin
On 06/01/2025 04:49, Qi Zheng wrote: > [...] > >> Once this is done, we should be able to replace all those confusing >> calls to tlb_remove_page() on PTPs with tlb_remove_table() and remove >> the explicit call to pagetable_dtor(). AIUI this is essentially what >> Peter suggested on v3 [2]. > > Since this patch series is mainly for bug fix, I think that these things > can be done in separate patch series later. Sure that's fair. > >> >> [...] >> >>> Or can we just not let tlb_remove_table() fall back to >>> tlb_remove_page()? Like the following: >>> >>> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h >>> index a59205863f431..354ffaa4bd120 100644 >>> --- a/include/asm-generic/tlb.h >>> +++ b/include/asm-generic/tlb.h >>> @@ -195,8 +195,6 @@ >>> * various ptep_get_and_clear() functions. >>> */ >>> >>> -#ifdef CONFIG_MMU_GATHER_TABLE_FREE >>> - >>> struct mmu_table_batch { >>> #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE >>> struct rcu_head rcu; >>> @@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table) >>> >>> extern void tlb_remove_table(struct mmu_gather *tlb, void *table); >>> >>> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ >>> - >>> -/* >>> - * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have >>> page based >>> - * page directories and we can use the normal page batching to free >>> them. >>> - */ >>> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) >> >> We still need a different implementation of tlb_remove_table() in this >> case. We could define it inline here: >> >> static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) >> { >> struct page *page = table; >> >> pagetable_dtor(page_ptdesc(page)); >> tlb_remove_page(page); >> } > > Right. As I said above, will add this to the updated patch #8. I think it would be preferable to make it a standalone patch, because this is a change to generic code that could in principle impact other arch's too. - Kevin
On 2025/1/7 17:57, Kevin Brodsky wrote: > On 06/01/2025 04:49, Qi Zheng wrote: >> [...] >> >>> Once this is done, we should be able to replace all those confusing >>> calls to tlb_remove_page() on PTPs with tlb_remove_table() and remove >>> the explicit call to pagetable_dtor(). AIUI this is essentially what >>> Peter suggested on v3 [2]. >> >> Since this patch series is mainly for bug fix, I think that these things >> can be done in separate patch series later. > > Sure that's fair. > >> >>> >>> [...] >>> >>>> Or can we just not let tlb_remove_table() fall back to >>>> tlb_remove_page()? Like the following: >>>> >>>> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h >>>> index a59205863f431..354ffaa4bd120 100644 >>>> --- a/include/asm-generic/tlb.h >>>> +++ b/include/asm-generic/tlb.h >>>> @@ -195,8 +195,6 @@ >>>> * various ptep_get_and_clear() functions. >>>> */ >>>> >>>> -#ifdef CONFIG_MMU_GATHER_TABLE_FREE >>>> - >>>> struct mmu_table_batch { >>>> #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE >>>> struct rcu_head rcu; >>>> @@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table) >>>> >>>> extern void tlb_remove_table(struct mmu_gather *tlb, void *table); >>>> >>>> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ >>>> - >>>> -/* >>>> - * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have >>>> page based >>>> - * page directories and we can use the normal page batching to free >>>> them. >>>> - */ >>>> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) >>> >>> We still need a different implementation of tlb_remove_table() in this >>> case. We could define it inline here: >>> >>> static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) >>> { >>> struct page *page = table; >>> >>> pagetable_dtor(page_ptdesc(page)); >>> tlb_remove_page(page); >>> } >> >> Right. As I said above, will add this to the updated patch #8. > > I think it would be preferable to make it a standalone patch, because > this is a change to generic code that could in principle impact other > arch's too. Agree, I have done that: ``` Author: Qi Zheng <zhengqi.arch@bytedance.com> Date: Fri Dec 13 17:13:48 2024 +0800 mm: pgtable: completely move pagetable_dtor() to generic tlb_remove_table() For the generic tlb_remove_table(), it is implemented in the following two forms: 1) CONFIG_MMU_GATHER_TABLE_FREE is enabled tlb_remove_table --> generic __tlb_remove_table() 2) CONFIG_MMU_GATHER_TABLE_FREE is disabled tlb_remove_table --> tlb_remove_page For case 1), the pagetable_dtor() has already been moved to generic __tlb_remove_table(). For case 2), now only arm will call tlb_remove_table()/tlb_remove_ptdesc() when CONFIG_MMU_GATHER_TABLE_FREE is disabled. Let's move pagetable_dtor() completely to generic tlb_remove_table(), so that the architectures can follow more easily. Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index b8eebdb598631..ea4fbe7b17f6f 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -34,10 +34,6 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { struct ptdesc *ptdesc = page_ptdesc(pte); -#ifndef CONFIG_MMU_GATHER_TABLE_FREE - pagetable_dtor(ptdesc); -#endif - #ifndef CONFIG_ARM_LPAE /* * With the classic ARM MMU, a pte page has two corresponding pmd diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 69de47c7ef3c5..53ae7748f555b 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -220,14 +220,20 @@ static inline void __tlb_remove_table(void *table) extern void tlb_remove_table(struct mmu_gather *tlb, void *table); -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ +#else /* !CONFIG_MMU_GATHER_TABLE_FREE */ +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page); /* * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based * page directories and we can use the normal page batching to free them. */ -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) +static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct page *page = (struct page *)table; + pagetable_dtor(page_ptdesc(page)); + tlb_remove_page(tlb, page); +} #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE ``` and will send v5 later. Thanks! > > - Kevin
On 07/01/2025 11:51, Qi Zheng wrote: > [...] > > Author: Qi Zheng <zhengqi.arch@bytedance.com> > Date: Fri Dec 13 17:13:48 2024 +0800 > > mm: pgtable: completely move pagetable_dtor() to generic > tlb_remove_table() > > For the generic tlb_remove_table(), it is implemented in the > following two > forms: > > 1) CONFIG_MMU_GATHER_TABLE_FREE is enabled > > tlb_remove_table > --> generic __tlb_remove_table() > > 2) CONFIG_MMU_GATHER_TABLE_FREE is disabled > > tlb_remove_table > --> tlb_remove_page > > For case 1), the pagetable_dtor() has already been moved to generic > __tlb_remove_table(). > > For case 2), now only arm will call > tlb_remove_table()/tlb_remove_ptdesc() > when CONFIG_MMU_GATHER_TABLE_FREE is disabled. Let's move > pagetable_dtor() > completely to generic tlb_remove_table(), so that the > architectures can > follow more easily. > > Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> > > diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h > index b8eebdb598631..ea4fbe7b17f6f 100644 > --- a/arch/arm/include/asm/tlb.h > +++ b/arch/arm/include/asm/tlb.h > @@ -34,10 +34,6 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t > pte, unsigned long addr) > { > struct ptdesc *ptdesc = page_ptdesc(pte); > > -#ifndef CONFIG_MMU_GATHER_TABLE_FREE > - pagetable_dtor(ptdesc); > -#endif I guess this hunk will disappear since this call isn't present to start with. > - > #ifndef CONFIG_ARM_LPAE > /* > * With the classic ARM MMU, a pte page has two corresponding pmd > diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h > index 69de47c7ef3c5..53ae7748f555b 100644 > --- a/include/asm-generic/tlb.h > +++ b/include/asm-generic/tlb.h > @@ -220,14 +220,20 @@ static inline void __tlb_remove_table(void *table) > > extern void tlb_remove_table(struct mmu_gather *tlb, void *table); > > -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ > +#else /* !CONFIG_MMU_GATHER_TABLE_FREE */ Good catch! > > +static inline void tlb_remove_page(struct mmu_gather *tlb, struct > page *page); Nit: might be better to move the declaration up, e.g. above #ifdef CONFIG_MMU_GATHER_TABLE_FREE. > /* > * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have > page based > * page directories and we can use the normal page batching to free > them. > */ > -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) > +static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) > +{ > + struct page *page = (struct page *)table; > > + pagetable_dtor(page_ptdesc(page)); > + tlb_remove_page(tlb, page); > +} > #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ > > #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE Looks good to me otherwise. - Kevin
On 2025/1/7 19:58, Kevin Brodsky wrote: > On 07/01/2025 11:51, Qi Zheng wrote: >> [...] >> >> Author: Qi Zheng <zhengqi.arch@bytedance.com> >> Date: Fri Dec 13 17:13:48 2024 +0800 >> >> mm: pgtable: completely move pagetable_dtor() to generic >> tlb_remove_table() >> >> For the generic tlb_remove_table(), it is implemented in the >> following two >> forms: >> >> 1) CONFIG_MMU_GATHER_TABLE_FREE is enabled >> >> tlb_remove_table >> --> generic __tlb_remove_table() >> >> 2) CONFIG_MMU_GATHER_TABLE_FREE is disabled >> >> tlb_remove_table >> --> tlb_remove_page >> >> For case 1), the pagetable_dtor() has already been moved to generic >> __tlb_remove_table(). >> >> For case 2), now only arm will call >> tlb_remove_table()/tlb_remove_ptdesc() >> when CONFIG_MMU_GATHER_TABLE_FREE is disabled. Let's move >> pagetable_dtor() >> completely to generic tlb_remove_table(), so that the >> architectures can >> follow more easily. >> >> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> I missed your Suggested-by, will add it in v5. >> >> diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h >> index b8eebdb598631..ea4fbe7b17f6f 100644 >> --- a/arch/arm/include/asm/tlb.h >> +++ b/arch/arm/include/asm/tlb.h >> @@ -34,10 +34,6 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t >> pte, unsigned long addr) >> { >> struct ptdesc *ptdesc = page_ptdesc(pte); >> >> -#ifndef CONFIG_MMU_GATHER_TABLE_FREE >> - pagetable_dtor(ptdesc); >> -#endif > > I guess this hunk will disappear since this call isn't present to start > with. Yes, I plan to add this in the patch #8, and remove it in this patch. > >> - >> #ifndef CONFIG_ARM_LPAE >> /* >> * With the classic ARM MMU, a pte page has two corresponding pmd >> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h >> index 69de47c7ef3c5..53ae7748f555b 100644 >> --- a/include/asm-generic/tlb.h >> +++ b/include/asm-generic/tlb.h >> @@ -220,14 +220,20 @@ static inline void __tlb_remove_table(void *table) >> >> extern void tlb_remove_table(struct mmu_gather *tlb, void *table); >> >> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ >> +#else /* !CONFIG_MMU_GATHER_TABLE_FREE */ > > Good catch! > >> >> +static inline void tlb_remove_page(struct mmu_gather *tlb, struct >> page *page); > > Nit: might be better to move the declaration up, e.g. above #ifdef > CONFIG_MMU_GATHER_TABLE_FREE. Now only the tlb_remove_table() below calls it, maybe it's better to keep the impact to minimum? > >> /* >> * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have >> page based >> * page directories and we can use the normal page batching to free >> them. >> */ >> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) >> +static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) >> +{ >> + struct page *page = (struct page *)table; >> >> + pagetable_dtor(page_ptdesc(page)); >> + tlb_remove_page(tlb, page); >> +} >> #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ >> >> #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE > > Looks good to me otherwise. I will add your Reviewed-by to all patches (except yours) in v5, can I also add it to this new added patch? (if we agree with the discussion above) ;) Thanks! > > - Kevin
On 07/01/2025 13:31, Qi Zheng wrote: > On 2025/1/7 19:58, Kevin Brodsky wrote: >> On 07/01/2025 11:51, Qi Zheng wrote: >>> [...] >>> >>> Author: Qi Zheng <zhengqi.arch@bytedance.com> >>> Date: Fri Dec 13 17:13:48 2024 +0800 >>> >>> mm: pgtable: completely move pagetable_dtor() to generic >>> tlb_remove_table() >>> >>> For the generic tlb_remove_table(), it is implemented in the >>> following two >>> forms: >>> >>> 1) CONFIG_MMU_GATHER_TABLE_FREE is enabled >>> >>> tlb_remove_table >>> --> generic __tlb_remove_table() >>> >>> 2) CONFIG_MMU_GATHER_TABLE_FREE is disabled >>> >>> tlb_remove_table >>> --> tlb_remove_page >>> >>> For case 1), the pagetable_dtor() has already been moved to >>> generic >>> __tlb_remove_table(). >>> >>> For case 2), now only arm will call >>> tlb_remove_table()/tlb_remove_ptdesc() >>> when CONFIG_MMU_GATHER_TABLE_FREE is disabled. Let's move >>> pagetable_dtor() >>> completely to generic tlb_remove_table(), so that the >>> architectures can >>> follow more easily. >>> >>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> > > I missed your Suggested-by, will add it in v5. Ah yes thanks! > >>> >>> diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h >>> index b8eebdb598631..ea4fbe7b17f6f 100644 >>> --- a/arch/arm/include/asm/tlb.h >>> +++ b/arch/arm/include/asm/tlb.h >>> @@ -34,10 +34,6 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t >>> pte, unsigned long addr) >>> { >>> struct ptdesc *ptdesc = page_ptdesc(pte); >>> >>> -#ifndef CONFIG_MMU_GATHER_TABLE_FREE >>> - pagetable_dtor(ptdesc); >>> -#endif >> >> I guess this hunk will disappear since this call isn't present to start >> with. > > Yes, I plan to add this in the patch #8, and remove it in this patch. Right I guess this is required to keep patch 8 self-contained, makes sense. > >> >>> - >>> #ifndef CONFIG_ARM_LPAE >>> /* >>> * With the classic ARM MMU, a pte page has two >>> corresponding pmd >>> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h >>> index 69de47c7ef3c5..53ae7748f555b 100644 >>> --- a/include/asm-generic/tlb.h >>> +++ b/include/asm-generic/tlb.h >>> @@ -220,14 +220,20 @@ static inline void __tlb_remove_table(void >>> *table) >>> >>> extern void tlb_remove_table(struct mmu_gather *tlb, void *table); >>> >>> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ >>> +#else /* !CONFIG_MMU_GATHER_TABLE_FREE */ >> >> Good catch! >> >>> >>> +static inline void tlb_remove_page(struct mmu_gather *tlb, struct >>> page *page); >> >> Nit: might be better to move the declaration up, e.g. above #ifdef >> CONFIG_MMU_GATHER_TABLE_FREE. > > Now only the tlb_remove_table() below calls it, maybe it's better to > keep the impact to minimum? I feel it might be better to make the declaration unconditional, but this is really a detail, I don't mind either way. > >> >>> /* >>> * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have >>> page based >>> * page directories and we can use the normal page batching to free >>> them. >>> */ >>> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page)) >>> +static inline void tlb_remove_table(struct mmu_gather *tlb, void >>> *table) >>> +{ >>> + struct page *page = (struct page *)table; >>> >>> + pagetable_dtor(page_ptdesc(page)); >>> + tlb_remove_page(tlb, page); >>> +} >>> #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ >>> >>> #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE >> >> Looks good to me otherwise. > > I will add your Reviewed-by to all patches (except yours) in v5, can > I also add it to this new added patch? (if we agree with the discussion > above) ;) Yes please do, thanks! - Kevin
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index b6793c5c99296..c8907b8317115 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -15,12 +15,22 @@ #define __HAVE_ARCH_PUD_FREE #include <asm-generic/pgalloc.h> +/* + * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to + * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use + * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this + * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the + * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h + * for more details. + */ static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { - if (riscv_use_sbi_for_rfence()) + if (riscv_use_sbi_for_rfence()) { tlb_remove_ptdesc(tlb, pt); - else + } else { + pagetable_dtor(pt); tlb_remove_page_ptdesc(tlb, pt); + } } static inline void pmd_populate_kernel(struct mm_struct *mm, @@ -97,23 +107,15 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, unsigned long addr) { - if (pgtable_l4_enabled) { - struct ptdesc *ptdesc = virt_to_ptdesc(pud); - - pagetable_dtor(ptdesc); - riscv_tlb_remove_ptdesc(tlb, ptdesc); - } + if (pgtable_l4_enabled) + riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud)); } static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr) { - if (pgtable_l5_enabled) { - struct ptdesc *ptdesc = virt_to_ptdesc(p4d); - - pagetable_dtor(ptdesc); + if (pgtable_l5_enabled) riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); - } } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -142,10 +144,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) { - struct ptdesc *ptdesc = virt_to_ptdesc(pmd); - - pagetable_dtor(ptdesc); - riscv_tlb_remove_ptdesc(tlb, ptdesc); + riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd)); } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -153,10 +152,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - struct ptdesc *ptdesc = page_ptdesc(pte); - - pagetable_dtor(ptdesc); - riscv_tlb_remove_ptdesc(tlb, ptdesc); + riscv_tlb_remove_ptdesc(tlb, page_ptdesc(pte)); } #endif /* CONFIG_MMU */ diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h index 1f6c38420d8e0..ded8724b3c4f7 100644 --- a/arch/riscv/include/asm/tlb.h +++ b/arch/riscv/include/asm/tlb.h @@ -11,19 +11,13 @@ struct mmu_gather; static void tlb_flush(struct mmu_gather *tlb); #ifdef CONFIG_MMU -#include <linux/swap.h> -/* - * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to - * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use - * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this - * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the - * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h - * for more details. - */ static inline void __tlb_remove_table(void *table) { - free_page_and_swap_cache(table); + struct ptdesc *ptdesc = (struct ptdesc *)table; + + pagetable_dtor(ptdesc); + pagetable_free(ptdesc); } #endif /* CONFIG_MMU */
Move pagetable_dtor() to __tlb_remove_table(), so that ptlock and page table pages can be freed together (regardless of whether RCU is used). This prevents the use-after-free problem where the ptlock is freed immediately but the page table pages is freed later via RCU. Page tables shouldn't have swap cache, so use pagetable_free() instead of free_page_and_swap_cache() to free page table pages. By the way, move the comment above __tlb_remove_table() to riscv_tlb_remove_ptdesc(), it will be more appropriate. Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: linux-riscv@lists.infradead.org --- arch/riscv/include/asm/pgalloc.h | 38 ++++++++++++++------------------ arch/riscv/include/asm/tlb.h | 14 ++++-------- 2 files changed, 21 insertions(+), 31 deletions(-)