Message ID | 6a6a768634b9ce8537154264e35e6a66a79b6ca8.1656586863.git.baolin.wang@linux.alibaba.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add PUD and kernel PTE level pagetable account | expand |
On Thu, Jun 30, 2022 at 07:11:15PM +0800, Baolin Wang wrote: > Now the PUD level ptes are always protected by mm->page_table_lock, > which means no split pagetable lock needed. So the generic PUD level > pagetable pages allocation will not call pgtable_pte_page_ctor/dtor(), > that means we will miss to account PUD level pagetable pages. > > Adding pagetable account by calling pgtable_page_inc() or > pgtable_page_dec() when allocating or freeing PUD level pagetable > pages to help to get an accurate pagetable accounting. > > Moreover this patch will also mark the PUD level pagetable with PG_table > flag, which will help to do sanity validation in unpoison_memory() and > get more accurate pagetable accounting by /proc/kpageflags interface. > > Meanwhile converting the architectures with using generic PUD pagatable > allocation to add corresponding pgtable_page_inc() or pgtable_page_dec() > to account PUD level pagetable. > > Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> > --- > arch/arm64/include/asm/tlb.h | 5 ++++- > arch/loongarch/include/asm/pgalloc.h | 11 ++++++++--- > arch/mips/include/asm/pgalloc.h | 11 ++++++++--- > arch/s390/include/asm/tlb.h | 1 + > arch/x86/mm/pgtable.c | 5 ++++- > include/asm-generic/pgalloc.h | 12 ++++++++++-- > 6 files changed, 35 insertions(+), 10 deletions(-) > > diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h > index c995d1f..1772df9 100644 > --- a/arch/arm64/include/asm/tlb.h > +++ b/arch/arm64/include/asm/tlb.h > @@ -94,7 +94,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, > static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, > unsigned long addr) > { > - tlb_remove_table(tlb, virt_to_page(pudp)); > + struct page *page = virt_to_page(pudp); > + > + pgtable_page_dec(page); Using pgtable_pud_page_ctor() and pgtable_pud_page_dtor() would be consistent with what we currently have for PTEs and PMDs. This applies to all the additions of pgtable_page_dec() and pgtable_page_inc(). > + tlb_remove_table(tlb, page); > } > #endif > > diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h > index b0a57b2..19bfe14 100644 > --- a/arch/loongarch/include/asm/pgalloc.h > +++ b/arch/loongarch/include/asm/pgalloc.h > @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) > { > pud_t *pud; > + struct page *page; > > - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); > - if (pud) > - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); > + page = alloc_pages(GFP_KERNEL, PUD_ORDER); > + if (!page) > + return NULL; > + > + pgtable_page_inc(page); > + pud = (pud_t *)page_address(page); > + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); > return pud; > } > > diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h > index 867e9c3..990f614 100644 > --- a/arch/mips/include/asm/pgalloc.h > +++ b/arch/mips/include/asm/pgalloc.h > @@ -89,11 +89,16 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) > > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) > { > + struct page *page; > pud_t *pud; > > - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); > - if (pud) > - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); > + page = alloc_pages(GFP_KERNEL, PUD_ORDER); > + if (!page) > + return NULL; > + > + pgtable_page_inc(page); > + pud = (pud_t *)page_address(page); > + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); > return pud; > } > > diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h > index fe6407f..744e2d7 100644 > --- a/arch/s390/include/asm/tlb.h > +++ b/arch/s390/include/asm/tlb.h > @@ -125,6 +125,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, > { > if (mm_pud_folded(tlb->mm)) > return; > + pgtable_page_dec(virt_to_page(pud)); > tlb->mm->context.flush_mm = 1; > tlb->freed_tables = 1; > tlb->cleared_p4ds = 1; > diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c > index a932d77..5e46e31 100644 > --- a/arch/x86/mm/pgtable.c > +++ b/arch/x86/mm/pgtable.c > @@ -76,8 +76,11 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) > #if CONFIG_PGTABLE_LEVELS > 3 > void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) > { > + struct page *page = virt_to_page(pud); > + > + pgtable_page_dec(page); > paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); > - paravirt_tlb_remove_table(tlb, virt_to_page(pud)); > + paravirt_tlb_remove_table(tlb, page); > } > > #if CONFIG_PGTABLE_LEVELS > 4 > diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h > index 977bea1..11350f7 100644 > --- a/include/asm-generic/pgalloc.h > +++ b/include/asm-generic/pgalloc.h > @@ -149,11 +149,16 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) > > static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) > { > + struct page *page; > gfp_t gfp = GFP_PGTABLE_USER; > > if (mm == &init_mm) > gfp = GFP_PGTABLE_KERNEL; > - return (pud_t *)get_zeroed_page(gfp); > + page = alloc_pages(gfp, 0); > + if (!page) > + return NULL; > + pgtable_page_inc(page); > + return (pud_t *)page_address(page); > } > > #ifndef __HAVE_ARCH_PUD_ALLOC_ONE > @@ -174,8 +179,11 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) > > static inline void __pud_free(struct mm_struct *mm, pud_t *pud) > { > + struct page *page = virt_to_page(pud); > + > BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); > - free_page((unsigned long)pud); > + pgtable_page_dec(page); > + __free_page(page); > } > > #ifndef __HAVE_ARCH_PUD_FREE > -- > 1.8.3.1 >
On 6/30/2022 10:17 PM, Mike Rapoport wrote: > On Thu, Jun 30, 2022 at 07:11:15PM +0800, Baolin Wang wrote: >> Now the PUD level ptes are always protected by mm->page_table_lock, >> which means no split pagetable lock needed. So the generic PUD level >> pagetable pages allocation will not call pgtable_pte_page_ctor/dtor(), >> that means we will miss to account PUD level pagetable pages. >> >> Adding pagetable account by calling pgtable_page_inc() or >> pgtable_page_dec() when allocating or freeing PUD level pagetable >> pages to help to get an accurate pagetable accounting. >> >> Moreover this patch will also mark the PUD level pagetable with PG_table >> flag, which will help to do sanity validation in unpoison_memory() and >> get more accurate pagetable accounting by /proc/kpageflags interface. >> >> Meanwhile converting the architectures with using generic PUD pagatable >> allocation to add corresponding pgtable_page_inc() or pgtable_page_dec() >> to account PUD level pagetable. >> >> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> >> --- >> arch/arm64/include/asm/tlb.h | 5 ++++- >> arch/loongarch/include/asm/pgalloc.h | 11 ++++++++--- >> arch/mips/include/asm/pgalloc.h | 11 ++++++++--- >> arch/s390/include/asm/tlb.h | 1 + >> arch/x86/mm/pgtable.c | 5 ++++- >> include/asm-generic/pgalloc.h | 12 ++++++++++-- >> 6 files changed, 35 insertions(+), 10 deletions(-) >> >> diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h >> index c995d1f..1772df9 100644 >> --- a/arch/arm64/include/asm/tlb.h >> +++ b/arch/arm64/include/asm/tlb.h >> @@ -94,7 +94,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, >> static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, >> unsigned long addr) >> { >> - tlb_remove_table(tlb, virt_to_page(pudp)); >> + struct page *page = virt_to_page(pudp); >> + >> + pgtable_page_dec(page); > > Using pgtable_pud_page_ctor() and pgtable_pud_page_dtor() would be > consistent with what we currently have for PTEs and PMDs. > > This applies to all the additions of pgtable_page_dec() and > pgtable_page_inc(). OK. I can add pgtable_pud_page_ctor() and pgtable_pud_page_dtor() helpers to keep consistent, which are just wrappers of pgtable_page_inc() and pgtable_page_dec(). > >> + tlb_remove_table(tlb, page); >> } >> #endif >> >> diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h >> index b0a57b2..19bfe14 100644 >> --- a/arch/loongarch/include/asm/pgalloc.h >> +++ b/arch/loongarch/include/asm/pgalloc.h >> @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) >> static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) >> { >> pud_t *pud; >> + struct page *page; >> >> - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); >> - if (pud) >> - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); >> + page = alloc_pages(GFP_KERNEL, PUD_ORDER); >> + if (!page) >> + return NULL; >> + >> + pgtable_page_inc(page); >> + pud = (pud_t *)page_address(page); >> + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); >> return pud; >> } >> >> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h >> index 867e9c3..990f614 100644 >> --- a/arch/mips/include/asm/pgalloc.h >> +++ b/arch/mips/include/asm/pgalloc.h >> @@ -89,11 +89,16 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) >> >> static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) >> { >> + struct page *page; >> pud_t *pud; >> >> - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); >> - if (pud) >> - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); >> + page = alloc_pages(GFP_KERNEL, PUD_ORDER); >> + if (!page) >> + return NULL; >> + >> + pgtable_page_inc(page); >> + pud = (pud_t *)page_address(page); >> + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); >> return pud; >> } >> >> diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h >> index fe6407f..744e2d7 100644 >> --- a/arch/s390/include/asm/tlb.h >> +++ b/arch/s390/include/asm/tlb.h >> @@ -125,6 +125,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, >> { >> if (mm_pud_folded(tlb->mm)) >> return; >> + pgtable_page_dec(virt_to_page(pud)); >> tlb->mm->context.flush_mm = 1; >> tlb->freed_tables = 1; >> tlb->cleared_p4ds = 1; >> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c >> index a932d77..5e46e31 100644 >> --- a/arch/x86/mm/pgtable.c >> +++ b/arch/x86/mm/pgtable.c >> @@ -76,8 +76,11 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) >> #if CONFIG_PGTABLE_LEVELS > 3 >> void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) >> { >> + struct page *page = virt_to_page(pud); >> + >> + pgtable_page_dec(page); >> paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); >> - paravirt_tlb_remove_table(tlb, virt_to_page(pud)); >> + paravirt_tlb_remove_table(tlb, page); >> } >> >> #if CONFIG_PGTABLE_LEVELS > 4 >> diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h >> index 977bea1..11350f7 100644 >> --- a/include/asm-generic/pgalloc.h >> +++ b/include/asm-generic/pgalloc.h >> @@ -149,11 +149,16 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) >> >> static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) >> { >> + struct page *page; >> gfp_t gfp = GFP_PGTABLE_USER; >> >> if (mm == &init_mm) >> gfp = GFP_PGTABLE_KERNEL; >> - return (pud_t *)get_zeroed_page(gfp); >> + page = alloc_pages(gfp, 0); >> + if (!page) >> + return NULL; >> + pgtable_page_inc(page); >> + return (pud_t *)page_address(page); >> } >> >> #ifndef __HAVE_ARCH_PUD_ALLOC_ONE >> @@ -174,8 +179,11 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) >> >> static inline void __pud_free(struct mm_struct *mm, pud_t *pud) >> { >> + struct page *page = virt_to_page(pud); >> + >> BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); >> - free_page((unsigned long)pud); >> + pgtable_page_dec(page); >> + __free_page(page); >> } >> >> #ifndef __HAVE_ARCH_PUD_FREE >> -- >> 1.8.3.1 >> >
On Fri, Jul 01, 2022 at 04:04:21PM +0800, Baolin Wang wrote: > > Using pgtable_pud_page_ctor() and pgtable_pud_page_dtor() would be > > consistent with what we currently have for PTEs and PMDs. > > > > This applies to all the additions of pgtable_page_dec() and > > pgtable_page_inc(). > > OK. I can add pgtable_pud_page_ctor() and pgtable_pud_page_dtor() helpers to > keep consistent, which are just wrappers of pgtable_page_inc() and > pgtable_page_dec(). I think you misunderstand Mike. Don't add pgtable_page_inc() and pgtable_page_dec(). Just add pgtable_pud_page_ctor() and pgtable_pud_page_dtor(). At least, that was what I said last time yo uposted these patches.
On Thu, Jun 30, 2022 at 07:11:15PM +0800, Baolin Wang wrote: > +++ b/arch/loongarch/include/asm/pgalloc.h > @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) > { > pud_t *pud; > + struct page *page; > > - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); > - if (pud) > - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); > + page = alloc_pages(GFP_KERNEL, PUD_ORDER); Argh. I just got rid of PMD_ORDER from PA-RISC. Now Loongstupid has reintroduced it, and worse introduced PUD_ORDER. See commit 7bf82eb3873f.
On 7/3/2022 11:40 AM, Matthew Wilcox wrote: > On Fri, Jul 01, 2022 at 04:04:21PM +0800, Baolin Wang wrote: >>> Using pgtable_pud_page_ctor() and pgtable_pud_page_dtor() would be >>> consistent with what we currently have for PTEs and PMDs. >>> >>> This applies to all the additions of pgtable_page_dec() and >>> pgtable_page_inc(). >> >> OK. I can add pgtable_pud_page_ctor() and pgtable_pud_page_dtor() helpers to >> keep consistent, which are just wrappers of pgtable_page_inc() and >> pgtable_page_dec(). > > I think you misunderstand Mike. > > Don't add pgtable_page_inc() and pgtable_page_dec(). Just add > pgtable_pud_page_ctor() and pgtable_pud_page_dtor(). At least, that > was what I said last time yo uposted these patches. My concern is that I need another helpers for kernel page table allocation helpers, if only adding pgtable_pud_page_ctor() and pgtable_pud_page_dtor() like below: static inline void pgtable_pud_page_ctor(struct page *page) { __SetPageTable(page); inc_lruvec_page_state(page, NR_PAGETABLE); } static inline void pgtable_pud_page_dtor(struct page *page) { __ClearPageTable(page); dec_lruvec_page_state(page, NR_PAGETABLE); } So for kernel pte page table allocation, I need another similar helpers like below. However they do the samething with pgtable_pud_page_ctor/pgtable_pud_page_dtor, so I am not sure this is good for adding these duplicate code. static inline void pgtable_kernel_pte_page_ctor(struct page *page) { __SetPageTable(page); inc_lruvec_page_state(page, NR_PAGETABLE); } static inline void pgtable_kernel_pte_page_dtor(struct page *page) { __ClearPageTable(page); dec_lruvec_page_state(page, NR_PAGETABLE); } Instead adding a common helpers seems more readable to me, which can also simplify original pgtable_pmd_page_dtor()/pgtable_pmd_page_ctor(). Something like below. static inline void pgtable_page_inc(struct page *page) { __SetPageTable(page); inc_lruvec_page_state(page, NR_PAGETABLE); } static inline void pgtable_page_dec(struct page *page) { __ClearPageTable(page); dec_lruvec_page_state(page, NR_PAGETABLE); } static inline void pgtable_pud_page_ctor(struct page *page) { pgtable_page_inc(page); } static inline void pgtable_pud_page_dtor(struct page *page) { pgtable_page_dec(page); } For kernel pte page table, we can just use pgtable_page_inc/pgtable_page_dec(), or adding pgtable_kernel_pte_page_ctor/pgtable_kernel_pte_page_dtor, which just wrappers of pgtable_page_inc() and pgtable_page_dec(). Matthew and Mike, how do you think? Thanks.
On Sun, Jul 03, 2022 at 04:47:07AM +0100, Matthew Wilcox wrote: > On Thu, Jun 30, 2022 at 07:11:15PM +0800, Baolin Wang wrote: > > +++ b/arch/loongarch/include/asm/pgalloc.h > > @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) > > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) > > { > > pud_t *pud; > > + struct page *page; > > > > - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); > > - if (pud) > > - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); > > + page = alloc_pages(GFP_KERNEL, PUD_ORDER); > > Argh. I just got rid of PMD_ORDER from PA-RISC. Now Loongstupid has > reintroduced it, and worse introduced PUD_ORDER. See commit > 7bf82eb3873f. Let's try and deal with all those PxD_ORDERs for once. https://lore.kernel.org/linux-arch/20220703141203.147893-1-rppt@kernel.org/
On Sun, Jul 03, 2022 at 10:06:32PM +0800, Baolin Wang wrote: > > > On 7/3/2022 11:40 AM, Matthew Wilcox wrote: > > On Fri, Jul 01, 2022 at 04:04:21PM +0800, Baolin Wang wrote: > > > > Using pgtable_pud_page_ctor() and pgtable_pud_page_dtor() would be > > > > consistent with what we currently have for PTEs and PMDs. > > > > > > > > This applies to all the additions of pgtable_page_dec() and > > > > pgtable_page_inc(). > > > > > > OK. I can add pgtable_pud_page_ctor() and pgtable_pud_page_dtor() helpers to > > > keep consistent, which are just wrappers of pgtable_page_inc() and > > > pgtable_page_dec(). > > > > I think you misunderstand Mike. > > > > Don't add pgtable_page_inc() and pgtable_page_dec(). Just add > > pgtable_pud_page_ctor() and pgtable_pud_page_dtor(). At least, that > > was what I said last time you posted these patches. > > My concern is that I need another helpers for kernel page table allocation > helpers, if only adding pgtable_pud_page_ctor() and pgtable_pud_page_dtor() > like below: > > static inline void pgtable_pud_page_ctor(struct page *page) > { > __SetPageTable(page); > inc_lruvec_page_state(page, NR_PAGETABLE); > } > > static inline void pgtable_pud_page_dtor(struct page *page) > { > __ClearPageTable(page); > dec_lruvec_page_state(page, NR_PAGETABLE); > } > > So for kernel pte page table allocation, I need another similar helpers like > below. However they do the samething with > pgtable_pud_page_ctor/pgtable_pud_page_dtor, so I am not sure this is good > for adding these duplicate code. > > static inline void pgtable_kernel_pte_page_ctor(struct page *page) > { > __SetPageTable(page); > inc_lruvec_page_state(page, NR_PAGETABLE); > } > > static inline void pgtable_kernel_pte_page_dtor(struct page *page) > { > __ClearPageTable(page); > dec_lruvec_page_state(page, NR_PAGETABLE); > } > > Instead adding a common helpers seems more readable to me, which can also > simplify original pgtable_pmd_page_dtor()/pgtable_pmd_page_ctor(). Something > like below. > > static inline void pgtable_page_inc(struct page *page) > { > __SetPageTable(page); > inc_lruvec_page_state(page, NR_PAGETABLE); > } > > static inline void pgtable_page_dec(struct page *page) > { > __ClearPageTable(page); > dec_lruvec_page_state(page, NR_PAGETABLE); > } > > static inline void pgtable_pud_page_ctor(struct page *page) > { > pgtable_page_inc(page); > } > > static inline void pgtable_pud_page_dtor(struct page *page) > { > pgtable_page_dec(page); > } > > For kernel pte page table, we can just use > pgtable_page_inc/pgtable_page_dec(), or adding > pgtable_kernel_pte_page_ctor/pgtable_kernel_pte_page_dtor, which just > wrappers of pgtable_page_inc() and pgtable_page_dec(). > > Matthew and Mike, how do you think? Thanks. I actually meant to add pgtable_pud_page_ctor/dtor() as a wrapper for the new helper to keep pud tables allocation consistent with pmd and pte and as a provision for the time we'll have per-page pud locks. For the accounting of the kernel page tables a new helper does make sense because there are no locks to initialize for the kernel page tables. I can't say that I'm happy with the pgtable_page_inc/dec names, though. Maybe page_{set,clear}_pgtable()?
On 7/3/2022 10:28 PM, Mike Rapoport wrote: > On Sun, Jul 03, 2022 at 10:06:32PM +0800, Baolin Wang wrote: >> >> >> On 7/3/2022 11:40 AM, Matthew Wilcox wrote: >>> On Fri, Jul 01, 2022 at 04:04:21PM +0800, Baolin Wang wrote: >>>>> Using pgtable_pud_page_ctor() and pgtable_pud_page_dtor() would be >>>>> consistent with what we currently have for PTEs and PMDs. >>>>> >>>>> This applies to all the additions of pgtable_page_dec() and >>>>> pgtable_page_inc(). >>>> >>>> OK. I can add pgtable_pud_page_ctor() and pgtable_pud_page_dtor() helpers to >>>> keep consistent, which are just wrappers of pgtable_page_inc() and >>>> pgtable_page_dec(). >>> >>> I think you misunderstand Mike. >>> >>> Don't add pgtable_page_inc() and pgtable_page_dec(). Just add >>> pgtable_pud_page_ctor() and pgtable_pud_page_dtor(). At least, that >>> was what I said last time you posted these patches. >> >> My concern is that I need another helpers for kernel page table allocation >> helpers, if only adding pgtable_pud_page_ctor() and pgtable_pud_page_dtor() >> like below: >> >> static inline void pgtable_pud_page_ctor(struct page *page) >> { >> __SetPageTable(page); >> inc_lruvec_page_state(page, NR_PAGETABLE); >> } >> >> static inline void pgtable_pud_page_dtor(struct page *page) >> { >> __ClearPageTable(page); >> dec_lruvec_page_state(page, NR_PAGETABLE); >> } >> >> So for kernel pte page table allocation, I need another similar helpers like >> below. However they do the samething with >> pgtable_pud_page_ctor/pgtable_pud_page_dtor, so I am not sure this is good >> for adding these duplicate code. >> >> static inline void pgtable_kernel_pte_page_ctor(struct page *page) >> { >> __SetPageTable(page); >> inc_lruvec_page_state(page, NR_PAGETABLE); >> } >> >> static inline void pgtable_kernel_pte_page_dtor(struct page *page) >> { >> __ClearPageTable(page); >> dec_lruvec_page_state(page, NR_PAGETABLE); >> } >> >> Instead adding a common helpers seems more readable to me, which can also >> simplify original pgtable_pmd_page_dtor()/pgtable_pmd_page_ctor(). Something >> like below. >> >> static inline void pgtable_page_inc(struct page *page) >> { >> __SetPageTable(page); >> inc_lruvec_page_state(page, NR_PAGETABLE); >> } >> >> static inline void pgtable_page_dec(struct page *page) >> { >> __ClearPageTable(page); >> dec_lruvec_page_state(page, NR_PAGETABLE); >> } >> >> static inline void pgtable_pud_page_ctor(struct page *page) >> { >> pgtable_page_inc(page); >> } >> >> static inline void pgtable_pud_page_dtor(struct page *page) >> { >> pgtable_page_dec(page); >> } >> >> For kernel pte page table, we can just use >> pgtable_page_inc/pgtable_page_dec(), or adding >> pgtable_kernel_pte_page_ctor/pgtable_kernel_pte_page_dtor, which just >> wrappers of pgtable_page_inc() and pgtable_page_dec(). >> >> Matthew and Mike, how do you think? Thanks. > > I actually meant to add pgtable_pud_page_ctor/dtor() as a wrapper for the > new helper to keep pud tables allocation consistent with pmd and pte and > as a provision for the time we'll have per-page pud locks. > > For the accounting of the kernel page tables a new helper does make sense > because there are no locks to initialize for the kernel page tables. Thanks for clarification. That is also my thought. > > I can't say that I'm happy with the pgtable_page_inc/dec names, though. > > Maybe page_{set,clear}_pgtable()? Sounds better than pgtable_page_inc/dec() for me. I will use them in next version if no other objections. Thanks.
On Sun, Jul 03, 2022 at 10:06:32PM +0800, Baolin Wang wrote: > So for kernel pte page table allocation, I need another similar helpers like > below. However they do the samething with > pgtable_pud_page_ctor/pgtable_pud_page_dtor, so I am not sure this is good > for adding these duplicate code. Why do we want to account kernel PTE page tables in NR_PAGETABLE? I think that's confusing.
On 7/3/2022 10:52 PM, Matthew Wilcox wrote: > On Sun, Jul 03, 2022 at 10:06:32PM +0800, Baolin Wang wrote: >> So for kernel pte page table allocation, I need another similar helpers like >> below. However they do the samething with >> pgtable_pud_page_ctor/pgtable_pud_page_dtor, so I am not sure this is good >> for adding these duplicate code. > > Why do we want to account kernel PTE page tables in NR_PAGETABLE? > I think that's confusing. Why this will confuse you? I think it is inconsistent that kernel PTE page tables are not accounted, because we will account PMD/PUD level page tables no matter they are userspace pagetable pages or kernel pagetable pages. Moreover the the vmalloc()/vmap() can consume some kernel pagetable pages, which should be accounted.
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index c995d1f..1772df9 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -94,7 +94,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - tlb_remove_table(tlb, virt_to_page(pudp)); + struct page *page = virt_to_page(pudp); + + pgtable_page_dec(page); + tlb_remove_table(tlb, page); } #endif diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index b0a57b2..19bfe14 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { pud_t *pud; + struct page *page; - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); - if (pud) - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); + page = alloc_pages(GFP_KERNEL, PUD_ORDER); + if (!page) + return NULL; + + pgtable_page_inc(page); + pud = (pud_t *)page_address(page); + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); return pud; } diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 867e9c3..990f614 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -89,11 +89,16 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { + struct page *page; pud_t *pud; - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); - if (pud) - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); + page = alloc_pages(GFP_KERNEL, PUD_ORDER); + if (!page) + return NULL; + + pgtable_page_inc(page); + pud = (pud_t *)page_address(page); + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); return pud; } diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index fe6407f..744e2d7 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -125,6 +125,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, { if (mm_pud_folded(tlb->mm)) return; + pgtable_page_dec(virt_to_page(pud)); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_p4ds = 1; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index a932d77..5e46e31 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -76,8 +76,11 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) #if CONFIG_PGTABLE_LEVELS > 3 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { + struct page *page = virt_to_page(pud); + + pgtable_page_dec(page); paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); - paravirt_tlb_remove_table(tlb, virt_to_page(pud)); + paravirt_tlb_remove_table(tlb, page); } #if CONFIG_PGTABLE_LEVELS > 4 diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 977bea1..11350f7 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -149,11 +149,16 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) { + struct page *page; gfp_t gfp = GFP_PGTABLE_USER; if (mm == &init_mm) gfp = GFP_PGTABLE_KERNEL; - return (pud_t *)get_zeroed_page(gfp); + page = alloc_pages(gfp, 0); + if (!page) + return NULL; + pgtable_page_inc(page); + return (pud_t *)page_address(page); } #ifndef __HAVE_ARCH_PUD_ALLOC_ONE @@ -174,8 +179,11 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) static inline void __pud_free(struct mm_struct *mm, pud_t *pud) { + struct page *page = virt_to_page(pud); + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); - free_page((unsigned long)pud); + pgtable_page_dec(page); + __free_page(page); } #ifndef __HAVE_ARCH_PUD_FREE
Now the PUD level ptes are always protected by mm->page_table_lock, which means no split pagetable lock needed. So the generic PUD level pagetable pages allocation will not call pgtable_pte_page_ctor/dtor(), that means we will miss to account PUD level pagetable pages. Adding pagetable account by calling pgtable_page_inc() or pgtable_page_dec() when allocating or freeing PUD level pagetable pages to help to get an accurate pagetable accounting. Moreover this patch will also mark the PUD level pagetable with PG_table flag, which will help to do sanity validation in unpoison_memory() and get more accurate pagetable accounting by /proc/kpageflags interface. Meanwhile converting the architectures with using generic PUD pagatable allocation to add corresponding pgtable_page_inc() or pgtable_page_dec() to account PUD level pagetable. Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> --- arch/arm64/include/asm/tlb.h | 5 ++++- arch/loongarch/include/asm/pgalloc.h | 11 ++++++++--- arch/mips/include/asm/pgalloc.h | 11 ++++++++--- arch/s390/include/asm/tlb.h | 1 + arch/x86/mm/pgtable.c | 5 ++++- include/asm-generic/pgalloc.h | 12 ++++++++++-- 6 files changed, 35 insertions(+), 10 deletions(-)