diff mbox series

[v4,10/15] riscv: pgtable: move pagetable_dtor() to __tlb_remove_table()

Message ID 0e8f0b3835c15e99145e0006ac1020ae45a2b166.1735549103.git.zhengqi.arch@bytedance.com (mailing list archive)
State New
Headers show
Series move pagetable_*_dtor() to __tlb_remove_table() | expand

Commit Message

Qi Zheng Dec. 30, 2024, 9:07 a.m. UTC
Move pagetable_dtor() to __tlb_remove_table(), so that ptlock and page
table pages can be freed together (regardless of whether RCU is used).
This prevents the use-after-free problem where the ptlock is freed
immediately but the page table pages is freed later via RCU.

Page tables shouldn't have swap cache, so use pagetable_free() instead of
free_page_and_swap_cache() to free page table pages.

By the way, move the comment above __tlb_remove_table() to
riscv_tlb_remove_ptdesc(), it will be more appropriate.

Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: linux-riscv@lists.infradead.org
---
 arch/riscv/include/asm/pgalloc.h | 38 ++++++++++++++------------------
 arch/riscv/include/asm/tlb.h     | 14 ++++--------
 2 files changed, 21 insertions(+), 31 deletions(-)

Comments

Kevin Brodsky Jan. 2, 2025, 4:53 p.m. UTC | #1
On 30/12/2024 10:07, Qi Zheng wrote:
>  static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt)
>  {
> -	if (riscv_use_sbi_for_rfence())
> +	if (riscv_use_sbi_for_rfence()) {
>  		tlb_remove_ptdesc(tlb, pt);
> -	else
> +	} else {
> +		pagetable_dtor(pt);
>  		tlb_remove_page_ptdesc(tlb, pt);

I find the imbalance pretty confusing: pagetable_dtor() is called
explicitly before using tlb_remove_page() but not tlb_remove_ptdesc().
Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected?
Could we not call pagetable_dtor() from __tlb_batch_free_encoded_pages()
to ensure that the dtor is always called just before freeing, and remove
the extra handling from arch code? I may well be missing something, I'm
not super familiar with the tlb handling code.

- Kevin
Qi Zheng Jan. 3, 2025, 3:48 a.m. UTC | #2
Hi Kevin,

On 2025/1/3 00:53, Kevin Brodsky wrote:
> On 30/12/2024 10:07, Qi Zheng wrote:
>>   static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt)
>>   {
>> -	if (riscv_use_sbi_for_rfence())
>> +	if (riscv_use_sbi_for_rfence()) {
>>   		tlb_remove_ptdesc(tlb, pt);
>> -	else
>> +	} else {
>> +		pagetable_dtor(pt);
>>   		tlb_remove_page_ptdesc(tlb, pt);
> 
> I find the imbalance pretty confusing: pagetable_dtor() is called
> explicitly before using tlb_remove_page() but not tlb_remove_ptdesc().
> Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected?
> Could we not call pagetable_dtor() from __tlb_batch_free_encoded_pages()
> to ensure that the dtor is always called just before freeing, and remove

In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable()
and call pagetable_dtor() to dtor the page table pages.
But __tlb_batch_free_encoded_pages() is also used to free normal pages
(not page table pages), so I don't want to add overhead there.

But now I think maybe we can do this in tlb_remove_page_ptdesc(), like
this:

diff --git a/arch/csky/include/asm/pgalloc.h 
b/arch/csky/include/asm/pgalloc.h
index f1ce5b7b28f22..e45c7e91dcbf9 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -63,7 +63,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)

  #define __pte_free_tlb(tlb, pte, address)              \
  do {                                                   \
-       pagetable_dtor(page_ptdesc(pte));               \
         tlb_remove_page_ptdesc(tlb, page_ptdesc(pte));  \
  } while (0)

diff --git a/arch/hexagon/include/asm/pgalloc.h 
b/arch/hexagon/include/asm/pgalloc.h
index 40e42a0e71673..9903449f45cff 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -89,7 +89,6 @@ static inline void pmd_populate_kernel(struct 
mm_struct *mm, pmd_t *pmd,

  #define __pte_free_tlb(tlb, pte, addr)                         \
  do {                                                           \
-       pagetable_dtor((page_ptdesc(pte)));                     \
         tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte)));      \
  } while (0)

diff --git a/arch/loongarch/include/asm/pgalloc.h 
b/arch/loongarch/include/asm/pgalloc.h
index 7211dff8c969e..de5b3f5c85d1c 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -57,7 +57,6 @@ static inline pte_t *pte_alloc_one_kernel(struct 
mm_struct *mm)

  #define __pte_free_tlb(tlb, pte, address)                      \
  do {                                                           \
-       pagetable_dtor(page_ptdesc(pte));                       \
         tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));        \
  } while (0)

diff --git a/arch/m68k/include/asm/sun3_pgalloc.h 
b/arch/m68k/include/asm/sun3_pgalloc.h
index 2b626cb3ad0ae..731cc8f0731d3 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -19,7 +19,6 @@ extern const char bad_pmd_string[];

  #define __pte_free_tlb(tlb, pte, addr)                         \
  do {                                                           \
-       pagetable_dtor(page_ptdesc(pte));                       \
         tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));        \
  } while (0)

diff --git a/arch/mips/include/asm/pgalloc.h 
b/arch/mips/include/asm/pgalloc.h
index 36d9805033c4b..964ad514be281 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -56,7 +56,6 @@ static inline void pgd_free(struct mm_struct *mm, 
pgd_t *pgd)

  #define __pte_free_tlb(tlb, pte, address)                      \
  do {                                                           \
-       pagetable_dtor(page_ptdesc(pte));                       \
         tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));        \
  } while (0)

diff --git a/arch/nios2/include/asm/pgalloc.h 
b/arch/nios2/include/asm/pgalloc.h
index 12a536b7bfbd4..ef6b4b8301ac6 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -30,7 +30,6 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm);

  #define __pte_free_tlb(tlb, pte, addr)                                 \
         do {                                                            \
-               pagetable_dtor(page_ptdesc(pte));                       \
                 tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte)));      \
         } while (0)

diff --git a/arch/openrisc/include/asm/pgalloc.h 
b/arch/openrisc/include/asm/pgalloc.h
index 596e2355824e3..9361205610910 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -68,7 +68,6 @@ extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);

  #define __pte_free_tlb(tlb, pte, addr)                         \
  do {                                                           \
-       pagetable_dtor(page_ptdesc(pte));                       \
         tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte)));      \
  } while (0)

diff --git a/arch/riscv/include/asm/pgalloc.h 
b/arch/riscv/include/asm/pgalloc.h
index c8907b8317115..61c26576614da 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -25,12 +25,10 @@
   */
  static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, 
void *pt)
  {
-       if (riscv_use_sbi_for_rfence()) {
+       if (riscv_use_sbi_for_rfence())
                 tlb_remove_ptdesc(tlb, pt);
-       } else {
-               pagetable_dtor(pt);
+       else
                 tlb_remove_page_ptdesc(tlb, pt);
-       }
  }

  static inline void pmd_populate_kernel(struct mm_struct *mm,
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index 96d938fdf2244..5b5c73e9fdb4b 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -34,7 +34,6 @@ static inline void pmd_populate(struct mm_struct *mm, 
pmd_t *pmd,

  #define __pte_free_tlb(tlb, pte, addr)                         \
  do {                                                           \
-       pagetable_dtor(page_ptdesc(pte));                       \
         tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte)));      \
  } while (0)

diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index f0af23c3aeb2b..4fd59fb184818 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -27,7 +27,6 @@ extern pgd_t *pgd_alloc(struct mm_struct *);

  #define __pte_free_tlb(tlb, pte, address)                      \
  do {                                                           \
-       pagetable_dtor(page_ptdesc(pte));                       \
         tlb_remove_page_ptdesc((tlb), (page_ptdesc(pte)));      \
  } while (0)

@@ -35,7 +34,6 @@ do { 
        \

  #define __pmd_free_tlb(tlb, pmd, address)                      \
  do {                                                           \
-       pagetable_dtor(virt_to_ptdesc(pmd));                    \
         tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd));     \
  } while (0)

@@ -43,7 +41,6 @@ do { 
        \

  #define __pud_free_tlb(tlb, pud, address)                      \
  do {                                                           \
-       pagetable_dtor(virt_to_ptdesc(pud));            \
         tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud));     \
  } while (0)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index a96d4b440f3da..a59205863f431 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -506,6 +506,7 @@ static inline void tlb_remove_ptdesc(struct 
mmu_gather *tlb, void *pt)
  /* Like tlb_remove_ptdesc, but for page-like page directories. */
  static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, 
struct ptdesc *pt)
  {
+       pagetable_dtor(pt);
         tlb_remove_page(tlb, ptdesc_page(pt));
  }

This avoids explicitly calling pagetable_dtor() in the architecture
code. If that makes sense, I can send a formal separate patch to do
this.

Thanks,
Qi


> the extra handling from arch code? I may well be missing something, I'm
> not super familiar with the tlb handling code >
> - Kevin
Kevin Brodsky Jan. 3, 2025, 8:02 a.m. UTC | #3
On 03/01/2025 04:48, Qi Zheng wrote:
> Hi Kevin,
>
> On 2025/1/3 00:53, Kevin Brodsky wrote:
>> On 30/12/2024 10:07, Qi Zheng wrote:
>>>   static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb,
>>> void *pt)
>>>   {
>>> -    if (riscv_use_sbi_for_rfence())
>>> +    if (riscv_use_sbi_for_rfence()) {
>>>           tlb_remove_ptdesc(tlb, pt);
>>> -    else
>>> +    } else {
>>> +        pagetable_dtor(pt);
>>>           tlb_remove_page_ptdesc(tlb, pt);
>>
>> I find the imbalance pretty confusing: pagetable_dtor() is called
>> explicitly before using tlb_remove_page() but not tlb_remove_ptdesc().
>> Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected?
>> Could we not call pagetable_dtor() from __tlb_batch_free_encoded_pages()
>> to ensure that the dtor is always called just before freeing, and remove
>
> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable()
> and call pagetable_dtor() to dtor the page table pages.
> But __tlb_batch_free_encoded_pages() is also used to free normal pages
> (not page table pages), so I don't want to add overhead there.

Interesting, can a tlb batch refer to pages than are not PTPs then?

>
> But now I think maybe we can do this in tlb_remove_page_ptdesc(), like
> this:
>
> diff --git a/arch/csky/include/asm/pgalloc.h
> b/arch/csky/include/asm/pgalloc.h
> index f1ce5b7b28f22..e45c7e91dcbf9 100644
> --- a/arch/csky/include/asm/pgalloc.h
> +++ b/arch/csky/include/asm/pgalloc.h
> @@ -63,7 +63,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
>
>  #define __pte_free_tlb(tlb, pte, address)              \
>  do {                                                   \
> -       pagetable_dtor(page_ptdesc(pte));               \
>         tlb_remove_page_ptdesc(tlb, page_ptdesc(pte));  \
>  } while (0)
>
> [...]
>
> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
> index a96d4b440f3da..a59205863f431 100644
> --- a/include/asm-generic/tlb.h
> +++ b/include/asm-generic/tlb.h
> @@ -506,6 +506,7 @@ static inline void tlb_remove_ptdesc(struct
> mmu_gather *tlb, void *pt)
>  /* Like tlb_remove_ptdesc, but for page-like page directories. */
>  static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb,
> struct ptdesc *pt)
>  {
> +       pagetable_dtor(pt);
>         tlb_remove_page(tlb, ptdesc_page(pt));
>  }

I think this is an interesting idea, it does make arch code easier to
follow. OTOH it would have been more natural to me to call
pagetable_dtor() from tlb_remove_page(). I can however see that this
doesn't work, because tlb_remove_table() is defined as tlb_remove_page()
if CONFIG_MMU_GATHER_HAVE_TABLE_FREE isn't selected. Which brings me
back to my earlier question: in that case, aren't we missing a call to
pagetable_dtor() when using tlb_remove_table() (or tlb_remove_ptdesc())?

- Kevin
Qi Zheng Jan. 3, 2025, 9:13 a.m. UTC | #4
On 2025/1/3 16:02, Kevin Brodsky wrote:
> On 03/01/2025 04:48, Qi Zheng wrote:
>> Hi Kevin,
>>
>> On 2025/1/3 00:53, Kevin Brodsky wrote:
>>> On 30/12/2024 10:07, Qi Zheng wrote:
>>>>    static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb,
>>>> void *pt)
>>>>    {
>>>> -    if (riscv_use_sbi_for_rfence())
>>>> +    if (riscv_use_sbi_for_rfence()) {
>>>>            tlb_remove_ptdesc(tlb, pt);
>>>> -    else
>>>> +    } else {
>>>> +        pagetable_dtor(pt);
>>>>            tlb_remove_page_ptdesc(tlb, pt);
>>>
>>> I find the imbalance pretty confusing: pagetable_dtor() is called
>>> explicitly before using tlb_remove_page() but not tlb_remove_ptdesc().
>>> Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected?
>>> Could we not call pagetable_dtor() from __tlb_batch_free_encoded_pages()
>>> to ensure that the dtor is always called just before freeing, and remove
>>
>> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable()
>> and call pagetable_dtor() to dtor the page table pages.
>> But __tlb_batch_free_encoded_pages() is also used to free normal pages
>> (not page table pages), so I don't want to add overhead there.
> 
> Interesting, can a tlb batch refer to pages than are not PTPs then?

Yes, you can see the caller of __tlb_remove_folio_pages() or 
tlb_remove_page_size().

> 
>>
>> But now I think maybe we can do this in tlb_remove_page_ptdesc(), like
>> this:
>>
>> diff --git a/arch/csky/include/asm/pgalloc.h
>> b/arch/csky/include/asm/pgalloc.h
>> index f1ce5b7b28f22..e45c7e91dcbf9 100644
>> --- a/arch/csky/include/asm/pgalloc.h
>> +++ b/arch/csky/include/asm/pgalloc.h
>> @@ -63,7 +63,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
>>
>>   #define __pte_free_tlb(tlb, pte, address)              \
>>   do {                                                   \
>> -       pagetable_dtor(page_ptdesc(pte));               \
>>          tlb_remove_page_ptdesc(tlb, page_ptdesc(pte));  \
>>   } while (0)
>>
>> [...]
>>
>> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
>> index a96d4b440f3da..a59205863f431 100644
>> --- a/include/asm-generic/tlb.h
>> +++ b/include/asm-generic/tlb.h
>> @@ -506,6 +506,7 @@ static inline void tlb_remove_ptdesc(struct
>> mmu_gather *tlb, void *pt)
>>   /* Like tlb_remove_ptdesc, but for page-like page directories. */
>>   static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb,
>> struct ptdesc *pt)
>>   {
>> +       pagetable_dtor(pt);
>>          tlb_remove_page(tlb, ptdesc_page(pt));
>>   }
> 
> I think this is an interesting idea, it does make arch code easier to
> follow. OTOH it would have been more natural to me to call
> pagetable_dtor() from tlb_remove_page(). I can however see that this
> doesn't work, because tlb_remove_table() is defined as tlb_remove_page()
> if CONFIG_MMU_GATHER_HAVE_TABLE_FREE isn't selected. Which brings me
> back to my earlier question: in that case, aren't we missing a call to
> pagetable_dtor() when using tlb_remove_table() (or tlb_remove_ptdesc())?

Thank you for pointing this out!

Now, there are the following architectures selected 
CONFIG_MMU_GATHER_RCU_TABLE_FREE:

1. arm: select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
2. arm64: select MMU_GATHER_RCU_TABLE_FREE
3. powerpc: select MMU_GATHER_RCU_TABLE_FREE
4. riscv: select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU
5. s390: select MMU_GATHER_RCU_TABLE_FREE
6. sparc: select MMU_GATHER_RCU_TABLE_FREE if SMP
7. x86: select MMU_GATHER_RCU_TABLE_FREE	if PARAVIRT

If CONFIG_MMU_GATHER_TABLE_FREE is selected, an architecture is expected
to provide __tlb_remove_table(). This patch series modifies the
__tlb_remove_table() in arm, arm64, riscv, s390 and x86. Among them,
arm64 and s390 unconditionally select MMU_GATHER_RCU_TABLE_FREE, so we
only need to double-check arm, riscv and x86.

For x86, it was called tlb_remove_page() in the non-PARAVIRT case, and I
added pagetable_dtor() for it explicitly (see patch #11), so this should
be no problem.

For riscv, it will only call tlb_remove_ptdesc() in the case of
SMP && MMU, so this should be no problem.

For arm, the call to pagetable_dtor() is indeed missed in the
non-MMU_GATHER_RCU_TABLE_FREE case. This needs to be fixed. But we
can't fix this by adding pagetable_dtor() to tlb_remove_table(),
because some architectures call tlb_remove_table() but don't support
page table statistics, like sparc.

So a more direct fix might be:

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index a59205863f431..0a131444a18ca 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -500,6 +500,9 @@ static inline void tlb_remove_page(struct mmu_gather 
*tlb, struct page *page)

  static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt)
  {
+#ifndef CONFIG_MMU_GATHER_TABLE_FREE
+       pagetable_dtor(pt);
+#endif
         tlb_remove_table(tlb, pt);
  }

Or fix it directly in arm? Like the following:

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index ea4fbe7b17f6f..cf5d0ca021440 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -43,6 +43,9 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, 
unsigned long addr)
         __tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE);
  #endif

+#ifndef CONFIG_MMU_GATHER_TABLE_FREE
+       pagetable_dtor(ptdesc);
+#endif
         tlb_remove_ptdesc(tlb, ptdesc);
  }

Thanks,
Qi

> 
> - Kevin
Qi Zheng Jan. 3, 2025, 9:35 a.m. UTC | #5
On 2025/1/3 17:13, Qi Zheng wrote:
> 
> 
> On 2025/1/3 16:02, Kevin Brodsky wrote:
>> On 03/01/2025 04:48, Qi Zheng wrote:
>>> Hi Kevin,
>>>
>>> On 2025/1/3 00:53, Kevin Brodsky wrote:
>>>> On 30/12/2024 10:07, Qi Zheng wrote:
>>>>>    static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb,
>>>>> void *pt)
>>>>>    {
>>>>> -    if (riscv_use_sbi_for_rfence())
>>>>> +    if (riscv_use_sbi_for_rfence()) {
>>>>>            tlb_remove_ptdesc(tlb, pt);
>>>>> -    else
>>>>> +    } else {
>>>>> +        pagetable_dtor(pt);
>>>>>            tlb_remove_page_ptdesc(tlb, pt);
>>>>
>>>> I find the imbalance pretty confusing: pagetable_dtor() is called
>>>> explicitly before using tlb_remove_page() but not tlb_remove_ptdesc().
>>>> Doesn't that assume that CONFIG_MMU_GATHER_HAVE_TABLE_FREE is selected?
>>>> Could we not call pagetable_dtor() from 
>>>> __tlb_batch_free_encoded_pages()
>>>> to ensure that the dtor is always called just before freeing, and 
>>>> remove
>>>
>>> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable()
>>> and call pagetable_dtor() to dtor the page table pages.
>>> But __tlb_batch_free_encoded_pages() is also used to free normal pages
>>> (not page table pages), so I don't want to add overhead there.
>>
>> Interesting, can a tlb batch refer to pages than are not PTPs then?
> 
> Yes, you can see the caller of __tlb_remove_folio_pages() or 
> tlb_remove_page_size().
> 
>>
>>>
>>> But now I think maybe we can do this in tlb_remove_page_ptdesc(), like
>>> this:
>>>
>>> diff --git a/arch/csky/include/asm/pgalloc.h
>>> b/arch/csky/include/asm/pgalloc.h
>>> index f1ce5b7b28f22..e45c7e91dcbf9 100644
>>> --- a/arch/csky/include/asm/pgalloc.h
>>> +++ b/arch/csky/include/asm/pgalloc.h
>>> @@ -63,7 +63,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
>>>
>>>   #define __pte_free_tlb(tlb, pte, address)              \
>>>   do {                                                   \
>>> -       pagetable_dtor(page_ptdesc(pte));               \
>>>          tlb_remove_page_ptdesc(tlb, page_ptdesc(pte));  \
>>>   } while (0)
>>>
>>> [...]
>>>
>>> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
>>> index a96d4b440f3da..a59205863f431 100644
>>> --- a/include/asm-generic/tlb.h
>>> +++ b/include/asm-generic/tlb.h
>>> @@ -506,6 +506,7 @@ static inline void tlb_remove_ptdesc(struct
>>> mmu_gather *tlb, void *pt)
>>>   /* Like tlb_remove_ptdesc, but for page-like page directories. */
>>>   static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb,
>>> struct ptdesc *pt)
>>>   {
>>> +       pagetable_dtor(pt);
>>>          tlb_remove_page(tlb, ptdesc_page(pt));
>>>   }
>>
>> I think this is an interesting idea, it does make arch code easier to
>> follow. OTOH it would have been more natural to me to call
>> pagetable_dtor() from tlb_remove_page(). I can however see that this
>> doesn't work, because tlb_remove_table() is defined as tlb_remove_page()
>> if CONFIG_MMU_GATHER_HAVE_TABLE_FREE isn't selected. Which brings me
>> back to my earlier question: in that case, aren't we missing a call to
>> pagetable_dtor() when using tlb_remove_table() (or tlb_remove_ptdesc())?
> 
> Thank you for pointing this out!
> 
> Now, there are the following architectures selected 
> CONFIG_MMU_GATHER_RCU_TABLE_FREE:
> 
> 1. arm: select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
> 2. arm64: select MMU_GATHER_RCU_TABLE_FREE
> 3. powerpc: select MMU_GATHER_RCU_TABLE_FREE
> 4. riscv: select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU
> 5. s390: select MMU_GATHER_RCU_TABLE_FREE
> 6. sparc: select MMU_GATHER_RCU_TABLE_FREE if SMP
> 7. x86: select MMU_GATHER_RCU_TABLE_FREE    if PARAVIRT
> 
> If CONFIG_MMU_GATHER_TABLE_FREE is selected, an architecture is expected
> to provide __tlb_remove_table(). This patch series modifies the
> __tlb_remove_table() in arm, arm64, riscv, s390 and x86. Among them,
> arm64 and s390 unconditionally select MMU_GATHER_RCU_TABLE_FREE, so we
> only need to double-check arm, riscv and x86.
> 
> For x86, it was called tlb_remove_page() in the non-PARAVIRT case, and I
> added pagetable_dtor() for it explicitly (see patch #11), so this should
> be no problem.
> 
> For riscv, it will only call tlb_remove_ptdesc() in the case of
> SMP && MMU, so this should be no problem.
> 
> For arm, the call to pagetable_dtor() is indeed missed in the
> non-MMU_GATHER_RCU_TABLE_FREE case. This needs to be fixed. But we
> can't fix this by adding pagetable_dtor() to tlb_remove_table(),
> because some architectures call tlb_remove_table() but don't support
> page table statistics, like sparc.
> 
> So a more direct fix might be:
> 
> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
> index a59205863f431..0a131444a18ca 100644
> --- a/include/asm-generic/tlb.h
> +++ b/include/asm-generic/tlb.h
> @@ -500,6 +500,9 @@ static inline void tlb_remove_page(struct mmu_gather 
> *tlb, struct page *page)
> 
>   static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt)
>   {
> +#ifndef CONFIG_MMU_GATHER_TABLE_FREE
> +       pagetable_dtor(pt);
> +#endif
>          tlb_remove_table(tlb, pt);
>   }
> 
> Or fix it directly in arm? Like the following:
> 
> diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
> index ea4fbe7b17f6f..cf5d0ca021440 100644
> --- a/arch/arm/include/asm/tlb.h
> +++ b/arch/arm/include/asm/tlb.h
> @@ -43,6 +43,9 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, 
> unsigned long addr)
>          __tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE);
>   #endif
> 
> +#ifndef CONFIG_MMU_GATHER_TABLE_FREE
> +       pagetable_dtor(ptdesc);
> +#endif
>          tlb_remove_ptdesc(tlb, ptdesc);
>   }

Or can we just not let tlb_remove_table() fall back to
tlb_remove_page()? Like the following:

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index a59205863f431..354ffaa4bd120 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -195,8 +195,6 @@
   *  various ptep_get_and_clear() functions.
   */

-#ifdef CONFIG_MMU_GATHER_TABLE_FREE
-
  struct mmu_table_batch {
  #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
         struct rcu_head         rcu;
@@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table)

  extern void tlb_remove_table(struct mmu_gather *tlb, void *table);

-#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */
-
-/*
- * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have 
page based
- * page directories and we can use the normal page batching to free them.
- */
-#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page))
-
-#endif /* CONFIG_MMU_GATHER_TABLE_FREE */
-
  #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
  /*
   * This allows an architecture that does not use the linux page-tables for

> 
> Thanks,
> Qi
> 
>>
>> - Kevin
Kevin Brodsky Jan. 3, 2025, 1:27 p.m. UTC | #6
On 03/01/2025 10:35, Qi Zheng wrote:
> On 2025/1/3 17:13, Qi Zheng wrote:
>> On 2025/1/3 16:02, Kevin Brodsky wrote:
>>> On 03/01/2025 04:48, Qi Zheng wrote:
>>>> [...]
>>>>
>>>> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable()
>>>> and call pagetable_dtor() to dtor the page table pages.
>>>> But __tlb_batch_free_encoded_pages() is also used to free normal pages
>>>> (not page table pages), so I don't want to add overhead there.
>>>
>>> Interesting, can a tlb batch refer to pages than are not PTPs then?
>>
>> Yes, you can see the caller of __tlb_remove_folio_pages() or
>> tlb_remove_page_size().

I had a brief look but clearly not a good enough one! I hadn't realised
that "table" in tlb_remove_table() means PTP, while "page" in
tlb_remove_page() can mean any page, and it's making more sense now.

[...]

>>
>> For arm, the call to pagetable_dtor() is indeed missed in the
>> non-MMU_GATHER_RCU_TABLE_FREE case. This needs to be fixed. But we
>> can't fix this by adding pagetable_dtor() to tlb_remove_table(),
>> because some architectures call tlb_remove_table() but don't support
>> page table statistics, like sparc.

When I investigated this for my own series, I found that the only case
where ctor/dtor are not called for page-sized page tables is 32-bit
sparc (see table at the end of [1]). However only 64-bit sparc makes use
of tlb_remove_table() (at PTE level, where ctor/dtor are already called).

So really calling pagetable_dtor() from tlb_remove_table() in the
non-MMU_GATHER_TABLE_FREE case seems to be the obvious thing to do.

Once this is done, we should be able to replace all those confusing
calls to tlb_remove_page() on PTPs with tlb_remove_table() and remove
the explicit call to pagetable_dtor(). AIUI this is essentially what
Peter suggested on v3 [2].

[1]
https://lore.kernel.org/linux-mm/20241219164425.2277022-1-kevin.brodsky@arm.com/
[2]
https://lore.kernel.org/linux-mm/20250103111457.GC22934@noisy.programming.kicks-ass.net/

[...]

> Or can we just not let tlb_remove_table() fall back to
> tlb_remove_page()? Like the following:
>
> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
> index a59205863f431..354ffaa4bd120 100644
> --- a/include/asm-generic/tlb.h
> +++ b/include/asm-generic/tlb.h
> @@ -195,8 +195,6 @@
>   *  various ptep_get_and_clear() functions.
>   */
>
> -#ifdef CONFIG_MMU_GATHER_TABLE_FREE
> -
>  struct mmu_table_batch {
>  #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
>         struct rcu_head         rcu;
> @@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table)
>
>  extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
>
> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */
> -
> -/*
> - * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have
> page based
> - * page directories and we can use the normal page batching to free
> them.
> - */
> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page))

We still need a different implementation of tlb_remove_table() in this
case. We could define it inline here:

static inline void tlb_remove_table(struct mmu_gather *tlb, void *table)
{
    struct page *page = table;

    pagetable_dtor(page_ptdesc(page));
    tlb_remove_page(page);
}

- Kevin
Qi Zheng Jan. 6, 2025, 3:49 a.m. UTC | #7
Hi Kevin,

On 2025/1/3 21:27, Kevin Brodsky wrote:
> On 03/01/2025 10:35, Qi Zheng wrote:
>> On 2025/1/3 17:13, Qi Zheng wrote:
>>> On 2025/1/3 16:02, Kevin Brodsky wrote:
>>>> On 03/01/2025 04:48, Qi Zheng wrote:
>>>>> [...]
>>>>>
>>>>> In __tlb_batch_free_encoded_pages(), we can indeed detect PageTable()
>>>>> and call pagetable_dtor() to dtor the page table pages.
>>>>> But __tlb_batch_free_encoded_pages() is also used to free normal pages
>>>>> (not page table pages), so I don't want to add overhead there.
>>>>
>>>> Interesting, can a tlb batch refer to pages than are not PTPs then?
>>>
>>> Yes, you can see the caller of __tlb_remove_folio_pages() or
>>> tlb_remove_page_size().
> 
> I had a brief look but clearly not a good enough one! I hadn't realised
> that "table" in tlb_remove_table() means PTP, while "page" in
> tlb_remove_page() can mean any page, and it's making more sense now.
> 
> [...]
> 
>>>
>>> For arm, the call to pagetable_dtor() is indeed missed in the
>>> non-MMU_GATHER_RCU_TABLE_FREE case. This needs to be fixed. But we
>>> can't fix this by adding pagetable_dtor() to tlb_remove_table(),
>>> because some architectures call tlb_remove_table() but don't support
>>> page table statistics, like sparc.
> 
> When I investigated this for my own series, I found that the only case
> where ctor/dtor are not called for page-sized page tables is 32-bit
> sparc (see table at the end of [1]). However only 64-bit sparc makes use
> of tlb_remove_table() (at PTE level, where ctor/dtor are already called).

Thanks for providing this information.

> 
> So really calling pagetable_dtor() from tlb_remove_table() in the
> non-MMU_GATHER_TABLE_FREE case seems to be the obvious thing to do.

Right. Currently, only powerpc, sparc and x86 will directly call
tlb_remove_table(), and all of them are in the MMU_GATHER_TABLE_FREE
case. Therefore, I think the modification you mentioned below is
feasible.

In summary, currently only arm calls tlb_remove_table() in the
non-MMU_GATHER_RCU_TABLE_FREE case. So I think we can add this fix
directly to patch #8. If I haven't missed anything, I'll send an
updated patch #8.

> 
> Once this is done, we should be able to replace all those confusing
> calls to tlb_remove_page() on PTPs with tlb_remove_table() and remove
> the explicit call to pagetable_dtor(). AIUI this is essentially what
> Peter suggested on v3 [2].

Since this patch series is mainly for bug fix, I think that these things
can be done in separate patch series later.

> 
> [1]
> https://lore.kernel.org/linux-mm/20241219164425.2277022-1-kevin.brodsky@arm.com/
> [2]
> https://lore.kernel.org/linux-mm/20250103111457.GC22934@noisy.programming.kicks-ass.net/
> 
> [...]
> 
>> Or can we just not let tlb_remove_table() fall back to
>> tlb_remove_page()? Like the following:
>>
>> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
>> index a59205863f431..354ffaa4bd120 100644
>> --- a/include/asm-generic/tlb.h
>> +++ b/include/asm-generic/tlb.h
>> @@ -195,8 +195,6 @@
>>    *  various ptep_get_and_clear() functions.
>>    */
>>
>> -#ifdef CONFIG_MMU_GATHER_TABLE_FREE
>> -
>>   struct mmu_table_batch {
>>   #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
>>          struct rcu_head         rcu;
>> @@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table)
>>
>>   extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
>>
>> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */
>> -
>> -/*
>> - * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have
>> page based
>> - * page directories and we can use the normal page batching to free
>> them.
>> - */
>> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page))
> 
> We still need a different implementation of tlb_remove_table() in this
> case. We could define it inline here:
> 
> static inline void tlb_remove_table(struct mmu_gather *tlb, void *table)
> {
>      struct page *page = table;
> 
>      pagetable_dtor(page_ptdesc(page));
>      tlb_remove_page(page);
> }

Right. As I said above, will add this to the updated patch #8.

Thanks!

> 
> - Kevin
Kevin Brodsky Jan. 7, 2025, 9:57 a.m. UTC | #8
On 06/01/2025 04:49, Qi Zheng wrote:
> [...]
>
>> Once this is done, we should be able to replace all those confusing
>> calls to tlb_remove_page() on PTPs with tlb_remove_table() and remove
>> the explicit call to pagetable_dtor(). AIUI this is essentially what
>> Peter suggested on v3 [2].
>
> Since this patch series is mainly for bug fix, I think that these things
> can be done in separate patch series later.

Sure that's fair.

>
>>
>> [...]
>>
>>> Or can we just not let tlb_remove_table() fall back to
>>> tlb_remove_page()? Like the following:
>>>
>>> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
>>> index a59205863f431..354ffaa4bd120 100644
>>> --- a/include/asm-generic/tlb.h
>>> +++ b/include/asm-generic/tlb.h
>>> @@ -195,8 +195,6 @@
>>>    *  various ptep_get_and_clear() functions.
>>>    */
>>>
>>> -#ifdef CONFIG_MMU_GATHER_TABLE_FREE
>>> -
>>>   struct mmu_table_batch {
>>>   #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
>>>          struct rcu_head         rcu;
>>> @@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table)
>>>
>>>   extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
>>>
>>> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */
>>> -
>>> -/*
>>> - * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have
>>> page based
>>> - * page directories and we can use the normal page batching to free
>>> them.
>>> - */
>>> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page))
>>
>> We still need a different implementation of tlb_remove_table() in this
>> case. We could define it inline here:
>>
>> static inline void tlb_remove_table(struct mmu_gather *tlb, void *table)
>> {
>>      struct page *page = table;
>>
>>      pagetable_dtor(page_ptdesc(page));
>>      tlb_remove_page(page);
>> }
>
> Right. As I said above, will add this to the updated patch #8.

I think it would be preferable to make it a standalone patch, because
this is a change to generic code that could in principle impact other
arch's too.

- Kevin
Qi Zheng Jan. 7, 2025, 10:51 a.m. UTC | #9
On 2025/1/7 17:57, Kevin Brodsky wrote:
> On 06/01/2025 04:49, Qi Zheng wrote:
>> [...]
>>
>>> Once this is done, we should be able to replace all those confusing
>>> calls to tlb_remove_page() on PTPs with tlb_remove_table() and remove
>>> the explicit call to pagetable_dtor(). AIUI this is essentially what
>>> Peter suggested on v3 [2].
>>
>> Since this patch series is mainly for bug fix, I think that these things
>> can be done in separate patch series later.
> 
> Sure that's fair.
> 
>>
>>>
>>> [...]
>>>
>>>> Or can we just not let tlb_remove_table() fall back to
>>>> tlb_remove_page()? Like the following:
>>>>
>>>> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
>>>> index a59205863f431..354ffaa4bd120 100644
>>>> --- a/include/asm-generic/tlb.h
>>>> +++ b/include/asm-generic/tlb.h
>>>> @@ -195,8 +195,6 @@
>>>>     *  various ptep_get_and_clear() functions.
>>>>     */
>>>>
>>>> -#ifdef CONFIG_MMU_GATHER_TABLE_FREE
>>>> -
>>>>    struct mmu_table_batch {
>>>>    #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
>>>>           struct rcu_head         rcu;
>>>> @@ -219,16 +217,6 @@ static inline void __tlb_remove_table(void *table)
>>>>
>>>>    extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
>>>>
>>>> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */
>>>> -
>>>> -/*
>>>> - * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have
>>>> page based
>>>> - * page directories and we can use the normal page batching to free
>>>> them.
>>>> - */
>>>> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page))
>>>
>>> We still need a different implementation of tlb_remove_table() in this
>>> case. We could define it inline here:
>>>
>>> static inline void tlb_remove_table(struct mmu_gather *tlb, void *table)
>>> {
>>>       struct page *page = table;
>>>
>>>       pagetable_dtor(page_ptdesc(page));
>>>       tlb_remove_page(page);
>>> }
>>
>> Right. As I said above, will add this to the updated patch #8.
> 
> I think it would be preferable to make it a standalone patch, because
> this is a change to generic code that could in principle impact other
> arch's too.

Agree, I have done that:

```
Author: Qi Zheng <zhengqi.arch@bytedance.com>
Date:   Fri Dec 13 17:13:48 2024 +0800

     mm: pgtable: completely move pagetable_dtor() to generic 
tlb_remove_table()

     For the generic tlb_remove_table(), it is implemented in the 
following two
     forms:

     1) CONFIG_MMU_GATHER_TABLE_FREE is enabled

     tlb_remove_table
     --> generic __tlb_remove_table()

     2) CONFIG_MMU_GATHER_TABLE_FREE is disabled

     tlb_remove_table
     --> tlb_remove_page

     For case 1), the pagetable_dtor() has already been moved to generic
     __tlb_remove_table().

     For case 2), now only arm will call 
tlb_remove_table()/tlb_remove_ptdesc()
     when CONFIG_MMU_GATHER_TABLE_FREE is disabled. Let's move 
pagetable_dtor()
     completely to generic tlb_remove_table(), so that the architectures can
     follow more easily.

     Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index b8eebdb598631..ea4fbe7b17f6f 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -34,10 +34,6 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, 
unsigned long addr)
  {
         struct ptdesc *ptdesc = page_ptdesc(pte);

-#ifndef CONFIG_MMU_GATHER_TABLE_FREE
-       pagetable_dtor(ptdesc);
-#endif
-
  #ifndef CONFIG_ARM_LPAE
         /*
          * With the classic ARM MMU, a pte page has two corresponding pmd
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 69de47c7ef3c5..53ae7748f555b 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -220,14 +220,20 @@ static inline void __tlb_remove_table(void *table)

  extern void tlb_remove_table(struct mmu_gather *tlb, void *table);

-#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */
+#else /* !CONFIG_MMU_GATHER_TABLE_FREE */

+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page 
*page);
  /*
   * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have 
page based
   * page directories and we can use the normal page batching to free them.
   */
-#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page))
+static inline void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+       struct page *page = (struct page *)table;

+       pagetable_dtor(page_ptdesc(page));
+       tlb_remove_page(tlb, page);
+}
  #endif /* CONFIG_MMU_GATHER_TABLE_FREE */

  #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
```

and will send v5 later.

Thanks!

> 
> - Kevin
Kevin Brodsky Jan. 7, 2025, 11:58 a.m. UTC | #10
On 07/01/2025 11:51, Qi Zheng wrote:
> [...]
>
> Author: Qi Zheng <zhengqi.arch@bytedance.com>
> Date:   Fri Dec 13 17:13:48 2024 +0800
>
>     mm: pgtable: completely move pagetable_dtor() to generic
> tlb_remove_table()
>
>     For the generic tlb_remove_table(), it is implemented in the
> following two
>     forms:
>
>     1) CONFIG_MMU_GATHER_TABLE_FREE is enabled
>
>     tlb_remove_table
>     --> generic __tlb_remove_table()
>
>     2) CONFIG_MMU_GATHER_TABLE_FREE is disabled
>
>     tlb_remove_table
>     --> tlb_remove_page
>
>     For case 1), the pagetable_dtor() has already been moved to generic
>     __tlb_remove_table().
>
>     For case 2), now only arm will call
> tlb_remove_table()/tlb_remove_ptdesc()
>     when CONFIG_MMU_GATHER_TABLE_FREE is disabled. Let's move
> pagetable_dtor()
>     completely to generic tlb_remove_table(), so that the
> architectures can
>     follow more easily.
>
>     Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>
> diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
> index b8eebdb598631..ea4fbe7b17f6f 100644
> --- a/arch/arm/include/asm/tlb.h
> +++ b/arch/arm/include/asm/tlb.h
> @@ -34,10 +34,6 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t
> pte, unsigned long addr)
>  {
>         struct ptdesc *ptdesc = page_ptdesc(pte);
>
> -#ifndef CONFIG_MMU_GATHER_TABLE_FREE
> -       pagetable_dtor(ptdesc);
> -#endif

I guess this hunk will disappear since this call isn't present to start
with.

> -
>  #ifndef CONFIG_ARM_LPAE
>         /*
>          * With the classic ARM MMU, a pte page has two corresponding pmd
> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
> index 69de47c7ef3c5..53ae7748f555b 100644
> --- a/include/asm-generic/tlb.h
> +++ b/include/asm-generic/tlb.h
> @@ -220,14 +220,20 @@ static inline void __tlb_remove_table(void *table)
>
>  extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
>
> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */
> +#else /* !CONFIG_MMU_GATHER_TABLE_FREE */

Good catch!

>
> +static inline void tlb_remove_page(struct mmu_gather *tlb, struct
> page *page);

Nit: might be better to move the declaration up, e.g. above #ifdef
CONFIG_MMU_GATHER_TABLE_FREE.

>  /*
>   * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have
> page based
>   * page directories and we can use the normal page batching to free
> them.
>   */
> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page))
> +static inline void tlb_remove_table(struct mmu_gather *tlb, void *table)
> +{
> +       struct page *page = (struct page *)table;
>
> +       pagetable_dtor(page_ptdesc(page));
> +       tlb_remove_page(tlb, page);
> +}
>  #endif /* CONFIG_MMU_GATHER_TABLE_FREE */
>
>  #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE

Looks good to me otherwise.

- Kevin
Qi Zheng Jan. 7, 2025, 12:31 p.m. UTC | #11
On 2025/1/7 19:58, Kevin Brodsky wrote:
> On 07/01/2025 11:51, Qi Zheng wrote:
>> [...]
>>
>> Author: Qi Zheng <zhengqi.arch@bytedance.com>
>> Date:   Fri Dec 13 17:13:48 2024 +0800
>>
>>      mm: pgtable: completely move pagetable_dtor() to generic
>> tlb_remove_table()
>>
>>      For the generic tlb_remove_table(), it is implemented in the
>> following two
>>      forms:
>>
>>      1) CONFIG_MMU_GATHER_TABLE_FREE is enabled
>>
>>      tlb_remove_table
>>      --> generic __tlb_remove_table()
>>
>>      2) CONFIG_MMU_GATHER_TABLE_FREE is disabled
>>
>>      tlb_remove_table
>>      --> tlb_remove_page
>>
>>      For case 1), the pagetable_dtor() has already been moved to generic
>>      __tlb_remove_table().
>>
>>      For case 2), now only arm will call
>> tlb_remove_table()/tlb_remove_ptdesc()
>>      when CONFIG_MMU_GATHER_TABLE_FREE is disabled. Let's move
>> pagetable_dtor()
>>      completely to generic tlb_remove_table(), so that the
>> architectures can
>>      follow more easily.
>>
>>      Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>

I missed your Suggested-by, will add it in v5.

>>
>> diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
>> index b8eebdb598631..ea4fbe7b17f6f 100644
>> --- a/arch/arm/include/asm/tlb.h
>> +++ b/arch/arm/include/asm/tlb.h
>> @@ -34,10 +34,6 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t
>> pte, unsigned long addr)
>>   {
>>          struct ptdesc *ptdesc = page_ptdesc(pte);
>>
>> -#ifndef CONFIG_MMU_GATHER_TABLE_FREE
>> -       pagetable_dtor(ptdesc);
>> -#endif
> 
> I guess this hunk will disappear since this call isn't present to start
> with.

Yes, I plan to add this in the patch #8, and remove it in this patch.

> 
>> -
>>   #ifndef CONFIG_ARM_LPAE
>>          /*
>>           * With the classic ARM MMU, a pte page has two corresponding pmd
>> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
>> index 69de47c7ef3c5..53ae7748f555b 100644
>> --- a/include/asm-generic/tlb.h
>> +++ b/include/asm-generic/tlb.h
>> @@ -220,14 +220,20 @@ static inline void __tlb_remove_table(void *table)
>>
>>   extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
>>
>> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */
>> +#else /* !CONFIG_MMU_GATHER_TABLE_FREE */
> 
> Good catch!
> 
>>
>> +static inline void tlb_remove_page(struct mmu_gather *tlb, struct
>> page *page);
> 
> Nit: might be better to move the declaration up, e.g. above #ifdef
> CONFIG_MMU_GATHER_TABLE_FREE.

Now only the tlb_remove_table() below calls it, maybe it's better to
keep the impact to minimum?

> 
>>   /*
>>    * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have
>> page based
>>    * page directories and we can use the normal page batching to free
>> them.
>>    */
>> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page))
>> +static inline void tlb_remove_table(struct mmu_gather *tlb, void *table)
>> +{
>> +       struct page *page = (struct page *)table;
>>
>> +       pagetable_dtor(page_ptdesc(page));
>> +       tlb_remove_page(tlb, page);
>> +}
>>   #endif /* CONFIG_MMU_GATHER_TABLE_FREE */
>>
>>   #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
> 
> Looks good to me otherwise.

I will add your Reviewed-by to all patches (except yours) in v5, can
I also add it to this new added patch? (if we agree with the discussion
above) ;)

Thanks!

> 
> - Kevin
Kevin Brodsky Jan. 7, 2025, 2:17 p.m. UTC | #12
On 07/01/2025 13:31, Qi Zheng wrote:
> On 2025/1/7 19:58, Kevin Brodsky wrote:
>> On 07/01/2025 11:51, Qi Zheng wrote:
>>> [...]
>>>
>>> Author: Qi Zheng <zhengqi.arch@bytedance.com>
>>> Date:   Fri Dec 13 17:13:48 2024 +0800
>>>
>>>      mm: pgtable: completely move pagetable_dtor() to generic
>>> tlb_remove_table()
>>>
>>>      For the generic tlb_remove_table(), it is implemented in the
>>> following two
>>>      forms:
>>>
>>>      1) CONFIG_MMU_GATHER_TABLE_FREE is enabled
>>>
>>>      tlb_remove_table
>>>      --> generic __tlb_remove_table()
>>>
>>>      2) CONFIG_MMU_GATHER_TABLE_FREE is disabled
>>>
>>>      tlb_remove_table
>>>      --> tlb_remove_page
>>>
>>>      For case 1), the pagetable_dtor() has already been moved to
>>> generic
>>>      __tlb_remove_table().
>>>
>>>      For case 2), now only arm will call
>>> tlb_remove_table()/tlb_remove_ptdesc()
>>>      when CONFIG_MMU_GATHER_TABLE_FREE is disabled. Let's move
>>> pagetable_dtor()
>>>      completely to generic tlb_remove_table(), so that the
>>> architectures can
>>>      follow more easily.
>>>
>>>      Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>
> I missed your Suggested-by, will add it in v5.

Ah yes thanks!

>
>>>
>>> diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
>>> index b8eebdb598631..ea4fbe7b17f6f 100644
>>> --- a/arch/arm/include/asm/tlb.h
>>> +++ b/arch/arm/include/asm/tlb.h
>>> @@ -34,10 +34,6 @@ __pte_free_tlb(struct mmu_gather *tlb, pgtable_t
>>> pte, unsigned long addr)
>>>   {
>>>          struct ptdesc *ptdesc = page_ptdesc(pte);
>>>
>>> -#ifndef CONFIG_MMU_GATHER_TABLE_FREE
>>> -       pagetable_dtor(ptdesc);
>>> -#endif
>>
>> I guess this hunk will disappear since this call isn't present to start
>> with.
>
> Yes, I plan to add this in the patch #8, and remove it in this patch.

Right I guess this is required to keep patch 8 self-contained, makes sense.

>
>>
>>> -
>>>   #ifndef CONFIG_ARM_LPAE
>>>          /*
>>>           * With the classic ARM MMU, a pte page has two
>>> corresponding pmd
>>> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
>>> index 69de47c7ef3c5..53ae7748f555b 100644
>>> --- a/include/asm-generic/tlb.h
>>> +++ b/include/asm-generic/tlb.h
>>> @@ -220,14 +220,20 @@ static inline void __tlb_remove_table(void
>>> *table)
>>>
>>>   extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
>>>
>>> -#else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */
>>> +#else /* !CONFIG_MMU_GATHER_TABLE_FREE */
>>
>> Good catch!
>>
>>>
>>> +static inline void tlb_remove_page(struct mmu_gather *tlb, struct
>>> page *page);
>>
>> Nit: might be better to move the declaration up, e.g. above #ifdef
>> CONFIG_MMU_GATHER_TABLE_FREE.
>
> Now only the tlb_remove_table() below calls it, maybe it's better to
> keep the impact to minimum?

I feel it might be better to make the declaration unconditional, but
this is really a detail, I don't mind either way.

>
>>
>>>   /*
>>>    * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have
>>> page based
>>>    * page directories and we can use the normal page batching to free
>>> them.
>>>    */
>>> -#define tlb_remove_table(tlb, page) tlb_remove_page((tlb), (page))
>>> +static inline void tlb_remove_table(struct mmu_gather *tlb, void
>>> *table)
>>> +{
>>> +       struct page *page = (struct page *)table;
>>>
>>> +       pagetable_dtor(page_ptdesc(page));
>>> +       tlb_remove_page(tlb, page);
>>> +}
>>>   #endif /* CONFIG_MMU_GATHER_TABLE_FREE */
>>>
>>>   #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
>>
>> Looks good to me otherwise.
>
> I will add your Reviewed-by to all patches (except yours) in v5, can
> I also add it to this new added patch? (if we agree with the discussion
> above) ;)

Yes please do, thanks!

- Kevin
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index b6793c5c99296..c8907b8317115 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -15,12 +15,22 @@ 
 #define __HAVE_ARCH_PUD_FREE
 #include <asm-generic/pgalloc.h>
 
+/*
+ * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to
+ * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use
+ * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this
+ * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the
+ * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h
+ * for more details.
+ */
 static inline void riscv_tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt)
 {
-	if (riscv_use_sbi_for_rfence())
+	if (riscv_use_sbi_for_rfence()) {
 		tlb_remove_ptdesc(tlb, pt);
-	else
+	} else {
+		pagetable_dtor(pt);
 		tlb_remove_page_ptdesc(tlb, pt);
+	}
 }
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
@@ -97,23 +107,15 @@  static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 				  unsigned long addr)
 {
-	if (pgtable_l4_enabled) {
-		struct ptdesc *ptdesc = virt_to_ptdesc(pud);
-
-		pagetable_dtor(ptdesc);
-		riscv_tlb_remove_ptdesc(tlb, ptdesc);
-	}
+	if (pgtable_l4_enabled)
+		riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pud));
 }
 
 static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
 				  unsigned long addr)
 {
-	if (pgtable_l5_enabled) {
-		struct ptdesc *ptdesc = virt_to_ptdesc(p4d);
-
-		pagetable_dtor(ptdesc);
+	if (pgtable_l5_enabled)
 		riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
-	}
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
@@ -142,10 +144,7 @@  static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 				  unsigned long addr)
 {
-	struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
-
-	pagetable_dtor(ptdesc);
-	riscv_tlb_remove_ptdesc(tlb, ptdesc);
+	riscv_tlb_remove_ptdesc(tlb, virt_to_ptdesc(pmd));
 }
 
 #endif /* __PAGETABLE_PMD_FOLDED */
@@ -153,10 +152,7 @@  static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 				  unsigned long addr)
 {
-	struct ptdesc *ptdesc = page_ptdesc(pte);
-
-	pagetable_dtor(ptdesc);
-	riscv_tlb_remove_ptdesc(tlb, ptdesc);
+	riscv_tlb_remove_ptdesc(tlb, page_ptdesc(pte));
 }
 #endif /* CONFIG_MMU */
 
diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h
index 1f6c38420d8e0..ded8724b3c4f7 100644
--- a/arch/riscv/include/asm/tlb.h
+++ b/arch/riscv/include/asm/tlb.h
@@ -11,19 +11,13 @@  struct mmu_gather;
 static void tlb_flush(struct mmu_gather *tlb);
 
 #ifdef CONFIG_MMU
-#include <linux/swap.h>
 
-/*
- * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to
- * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use
- * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this
- * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the
- * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h
- * for more details.
- */
 static inline void __tlb_remove_table(void *table)
 {
-	free_page_and_swap_cache(table);
+	struct ptdesc *ptdesc = (struct ptdesc *)table;
+
+	pagetable_dtor(ptdesc);
+	pagetable_free(ptdesc);
 }
 
 #endif /* CONFIG_MMU */