diff mbox series

[v4,12/12] mm/debug_vm_pgtable: Fix corrupted page flag

Message ID 20210727061401.592616-13-gshan@redhat.com (mailing list archive)
State New
Headers show
Series mm/debug_vm_pgtable: Enhancements | expand

Commit Message

Gavin Shan July 27, 2021, 6:14 a.m. UTC
In page table entry modifying tests, set_xxx_at() are used to populate
the page table entries. On ARM64, PG_arch_1 (PG_dcache_clean) flag is
set to the target page flag if execution permission is given. The logic
exits since commit 4f04d8f00545 ("arm64: MMU definitions"). The page
flag is kept when the page is free'd to buddy's free area list. However,
it will trigger page checking failure when it's pulled from the buddy's
free area list, as the following warning messages indicate.

   BUG: Bad page state in process memhog  pfn:08000
   page:0000000015c0a628 refcount:0 mapcount:0 \
        mapping:0000000000000000 index:0x1 pfn:0x8000
   flags: 0x7ffff8000000800(arch_1|node=0|zone=0|lastcpupid=0xfffff)
   raw: 07ffff8000000800 dead000000000100 dead000000000122 0000000000000000
   raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
   page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag(s) set

This fixes the issue by clearing PG_arch_1 through flush_dcache_page()
after set_xxx_at() is called. For architectures other than ARM64, the
unexpected overhead of cache flushing is acceptable.

Signed-off-by: Gavin Shan <gshan@redhat.com>
---
 mm/debug_vm_pgtable.c | 55 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 51 insertions(+), 4 deletions(-)

Comments

Christophe Leroy July 28, 2021, 7:53 a.m. UTC | #1
Gavin Shan <gshan@redhat.com> a écrit :

> In page table entry modifying tests, set_xxx_at() are used to populate
> the page table entries. On ARM64, PG_arch_1 (PG_dcache_clean) flag is
> set to the target page flag if execution permission is given. The logic
> exits since commit 4f04d8f00545 ("arm64: MMU definitions"). The page
> flag is kept when the page is free'd to buddy's free area list. However,
> it will trigger page checking failure when it's pulled from the buddy's
> free area list, as the following warning messages indicate.
>
>    BUG: Bad page state in process memhog  pfn:08000
>    page:0000000015c0a628 refcount:0 mapcount:0 \
>         mapping:0000000000000000 index:0x1 pfn:0x8000
>    flags: 0x7ffff8000000800(arch_1|node=0|zone=0|lastcpupid=0xfffff)
>    raw: 07ffff8000000800 dead000000000100 dead000000000122 0000000000000000
>    raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
>    page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag(s) set
>
> This fixes the issue by clearing PG_arch_1 through flush_dcache_page()
> after set_xxx_at() is called. For architectures other than ARM64, the
> unexpected overhead of cache flushing is acceptable.
>
> Signed-off-by: Gavin Shan <gshan@redhat.com>

Maybe a Fixes: tag would be good to have

And would it be possible to have this fix as first patch of the series  
so that it can be applied to stable without applying the whole series ?

Christophe


> ---
>  mm/debug_vm_pgtable.c | 55 +++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 51 insertions(+), 4 deletions(-)
>
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 162ff6329f7b..d2c2d23e542e 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -29,6 +29,8 @@
>  #include <linux/start_kernel.h>
>  #include <linux/sched/mm.h>
>  #include <linux/io.h>
> +
> +#include <asm/cacheflush.h>
>  #include <asm/pgalloc.h>
>  #include <asm/tlbflush.h>
>
> @@ -119,19 +121,28 @@ static void __init pte_basic_tests(struct  
> pgtable_debug_args *args, int idx)
>
>  static void __init pte_advanced_tests(struct pgtable_debug_args *args)
>  {
> +	struct page *page;
>  	pte_t pte;
>
>  	/*
>  	 * Architectures optimize set_pte_at by avoiding TLB flush.
>  	 * This requires set_pte_at to be not used to update an
>  	 * existing pte entry. Clear pte before we do set_pte_at
> +	 *
> +	 * flush_dcache_page() is called after set_pte_at() to clear
> +	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
> +	 * when it's released and page allocation check will fail when
> +	 * the page is allocated again. For architectures other than ARM64,
> +	 * the unexpected overhead of cache flushing is acceptable.
>  	 */
> -	if (args->pte_pfn == ULONG_MAX)
> +	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
> +	if (!page)
>  		return;
>
>  	pr_debug("Validating PTE advanced\n");
>  	pte = pfn_pte(args->pte_pfn, args->page_prot);
>  	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
> +	flush_dcache_page(page);
>  	ptep_set_wrprotect(args->mm, args->vaddr, args->ptep);
>  	pte = ptep_get(args->ptep);
>  	WARN_ON(pte_write(pte));
> @@ -143,6 +154,7 @@ static void __init pte_advanced_tests(struct  
> pgtable_debug_args *args)
>  	pte = pte_wrprotect(pte);
>  	pte = pte_mkclean(pte);
>  	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
> +	flush_dcache_page(page);
>  	pte = pte_mkwrite(pte);
>  	pte = pte_mkdirty(pte);
>  	ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1);
> @@ -155,6 +167,7 @@ static void __init pte_advanced_tests(struct  
> pgtable_debug_args *args)
>  	pte = pfn_pte(args->pte_pfn, args->page_prot);
>  	pte = pte_mkyoung(pte);
>  	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
> +	flush_dcache_page(page);
>  	ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep);
>  	pte = ptep_get(args->ptep);
>  	WARN_ON(pte_young(pte));
> @@ -213,15 +226,24 @@ static void __init pmd_basic_tests(struct  
> pgtable_debug_args *args, int idx)
>
>  static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
>  {
> +	struct page *page;
>  	pmd_t pmd;
>  	unsigned long vaddr = args->vaddr;
>
>  	if (!has_transparent_hugepage())
>  		return;
>
> -	if (args->pmd_pfn == ULONG_MAX)
> +	page = (args->pmd_pfn != ULONG_MAX) ? pfn_to_page(args->pmd_pfn) : NULL;
> +	if (!page)
>  		return;
>
> +	/*
> +	 * flush_dcache_page() is called after set_pmd_at() to clear
> +	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
> +	 * when it's released and page allocation check will fail when
> +	 * the page is allocated again. For architectures other than ARM64,
> +	 * the unexpected overhead of cache flushing is acceptable.
> +	 */
>  	pr_debug("Validating PMD advanced\n");
>  	/* Align the address wrt HPAGE_PMD_SIZE */
>  	vaddr &= HPAGE_PMD_MASK;
> @@ -230,6 +252,7 @@ static void __init pmd_advanced_tests(struct  
> pgtable_debug_args *args)
>
>  	pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
>  	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
> +	flush_dcache_page(page);
>  	pmdp_set_wrprotect(args->mm, vaddr, args->pmdp);
>  	pmd = READ_ONCE(*args->pmdp);
>  	WARN_ON(pmd_write(pmd));
> @@ -241,6 +264,7 @@ static void __init pmd_advanced_tests(struct  
> pgtable_debug_args *args)
>  	pmd = pmd_wrprotect(pmd);
>  	pmd = pmd_mkclean(pmd);
>  	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
> +	flush_dcache_page(page);
>  	pmd = pmd_mkwrite(pmd);
>  	pmd = pmd_mkdirty(pmd);
>  	pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1);
> @@ -253,6 +277,7 @@ static void __init pmd_advanced_tests(struct  
> pgtable_debug_args *args)
>  	pmd = pmd_mkhuge(pfn_pmd(args->pmd_pfn, args->page_prot));
>  	pmd = pmd_mkyoung(pmd);
>  	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
> +	flush_dcache_page(page);
>  	pmdp_test_and_clear_young(args->vma, vaddr, args->pmdp);
>  	pmd = READ_ONCE(*args->pmdp);
>  	WARN_ON(pmd_young(pmd));
> @@ -339,21 +364,31 @@ static void __init pud_basic_tests(struct  
> pgtable_debug_args *args, int idx)
>
>  static void __init pud_advanced_tests(struct pgtable_debug_args *args)
>  {
> +	struct page *page;
>  	unsigned long vaddr = args->vaddr;
>  	pud_t pud;
>
>  	if (!has_transparent_hugepage())
>  		return;
>
> -	if (args->pud_pfn == ULONG_MAX)
> +	page = (args->pud_pfn != ULONG_MAX) ? pfn_to_page(args->pud_pfn) : NULL;
> +	if (!page)
>  		return;
>
> +	/*
> +	 * flush_dcache_page() is called after set_pud_at() to clear
> +	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
> +	 * when it's released and page allocation check will fail when
> +	 * the page is allocated again. For architectures other than ARM64,
> +	 * the unexpected overhead of cache flushing is acceptable.
> +	 */
>  	pr_debug("Validating PUD advanced\n");
>  	/* Align the address wrt HPAGE_PUD_SIZE */
>  	vaddr &= HPAGE_PUD_MASK;
>
>  	pud = pfn_pud(args->pud_pfn, args->page_prot);
>  	set_pud_at(args->mm, vaddr, args->pudp, pud);
> +	flush_dcache_page(page);
>  	pudp_set_wrprotect(args->mm, vaddr, args->pudp);
>  	pud = READ_ONCE(*args->pudp);
>  	WARN_ON(pud_write(pud));
> @@ -367,6 +402,7 @@ static void __init pud_advanced_tests(struct  
> pgtable_debug_args *args)
>  	pud = pud_wrprotect(pud);
>  	pud = pud_mkclean(pud);
>  	set_pud_at(args->mm, vaddr, args->pudp, pud);
> +	flush_dcache_page(page);
>  	pud = pud_mkwrite(pud);
>  	pud = pud_mkdirty(pud);
>  	pudp_set_access_flags(args->vma, vaddr, args->pudp, pud, 1);
> @@ -382,6 +418,7 @@ static void __init pud_advanced_tests(struct  
> pgtable_debug_args *args)
>  	pud = pfn_pud(args->pud_pfn, args->page_prot);
>  	pud = pud_mkyoung(pud);
>  	set_pud_at(args->mm, vaddr, args->pudp, pud);
> +	flush_dcache_page(page);
>  	pudp_test_and_clear_young(args->vma, vaddr, args->pudp);
>  	pud = READ_ONCE(*args->pudp);
>  	WARN_ON(pud_young(pud));
> @@ -594,16 +631,26 @@ static void __init pgd_populate_tests(struct  
> pgtable_debug_args *args) { }
>
>  static void __init pte_clear_tests(struct pgtable_debug_args *args)
>  {
> +	struct page *page;
>  	pte_t pte = pfn_pte(args->pte_pfn, args->page_prot);
>
> -	if (args->pte_pfn == ULONG_MAX)
> +	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
> +	if (!page)
>  		return;
>
> +	/*
> +	 * flush_dcache_page() is called after set_pte_at() to clear
> +	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
> +	 * when it's released and page allocation check will fail when
> +	 * the page is allocated again. For architectures other than ARM64,
> +	 * the unexpected overhead of cache flushing is acceptable.
> +	 */
>  	pr_debug("Validating PTE clear\n");
>  #ifndef CONFIG_RISCV
>  	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
>  #endif
>  	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
> +	flush_dcache_page(page);
>  	barrier();
>  	pte_clear(args->mm, args->vaddr, args->ptep);
>  	pte = ptep_get(args->ptep);
> --
> 2.23.0
Anshuman Khandual July 28, 2021, 10:05 a.m. UTC | #2
On 7/28/21 1:23 PM, Christophe Leroy wrote:
> Gavin Shan <gshan@redhat.com> a écrit :
> 
>> In page table entry modifying tests, set_xxx_at() are used to populate
>> the page table entries. On ARM64, PG_arch_1 (PG_dcache_clean) flag is
>> set to the target page flag if execution permission is given. The logic
>> exits since commit 4f04d8f00545 ("arm64: MMU definitions"). The page
>> flag is kept when the page is free'd to buddy's free area list. However,
>> it will trigger page checking failure when it's pulled from the buddy's
>> free area list, as the following warning messages indicate.
>>
>>    BUG: Bad page state in process memhog  pfn:08000
>>    page:0000000015c0a628 refcount:0 mapcount:0 \
>>         mapping:0000000000000000 index:0x1 pfn:0x8000
>>    flags: 0x7ffff8000000800(arch_1|node=0|zone=0|lastcpupid=0xfffff)
>>    raw: 07ffff8000000800 dead000000000100 dead000000000122 0000000000000000
>>    raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
>>    page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag(s) set
>>
>> This fixes the issue by clearing PG_arch_1 through flush_dcache_page()
>> after set_xxx_at() is called. For architectures other than ARM64, the
>> unexpected overhead of cache flushing is acceptable.
>>
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
> 
> Maybe a Fixes: tag would be good to have

Agreed.

Fixes: a5c3b9ffb0f4 ("mm/debug_vm_pgtable: add tests validating advanced arch page table helpers")

> 
> And would it be possible to have this fix as first patch of the series so that it can be applied to stable without applying the whole series ?
Changing the allocation scheme does solve another problem (using non-owned pages)
but is achieved via the entire series applied. But this particular patch could be
moved to the beginning without much problem.
Gavin Shan July 29, 2021, midnight UTC | #3
Hi Christophe and Anshuman,

On 7/28/21 8:05 PM, Anshuman Khandual wrote:
> On 7/28/21 1:23 PM, Christophe Leroy wrote:
>> Gavin Shan <gshan@redhat.com> a écrit :
>>> In page table entry modifying tests, set_xxx_at() are used to populate
>>> the page table entries. On ARM64, PG_arch_1 (PG_dcache_clean) flag is
>>> set to the target page flag if execution permission is given. The logic
>>> exits since commit 4f04d8f00545 ("arm64: MMU definitions"). The page
>>> flag is kept when the page is free'd to buddy's free area list. However,
>>> it will trigger page checking failure when it's pulled from the buddy's
>>> free area list, as the following warning messages indicate.
>>>
>>>     BUG: Bad page state in process memhog  pfn:08000
>>>     page:0000000015c0a628 refcount:0 mapcount:0 \
>>>          mapping:0000000000000000 index:0x1 pfn:0x8000
>>>     flags: 0x7ffff8000000800(arch_1|node=0|zone=0|lastcpupid=0xfffff)
>>>     raw: 07ffff8000000800 dead000000000100 dead000000000122 0000000000000000
>>>     raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
>>>     page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag(s) set
>>>
>>> This fixes the issue by clearing PG_arch_1 through flush_dcache_page()
>>> after set_xxx_at() is called. For architectures other than ARM64, the
>>> unexpected overhead of cache flushing is acceptable.
>>>
>>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>>
>> Maybe a Fixes: tag would be good to have
> 
> Agreed.
> 
> Fixes: a5c3b9ffb0f4 ("mm/debug_vm_pgtable: add tests validating advanced arch page table helpers")
> 

Yep, I will add the tag in v5.

>>
>> And would it be possible to have this fix as first patch of the series so that it can be applied to stable without applying the whole series ?
> Changing the allocation scheme does solve another problem (using non-owned pages)
> but is achieved via the entire series applied. But this particular patch could be
> moved to the beginning without much problem.
> 

I prefer to keep current layout as explained before. Firstly, all
code changes included in this series are affecting only one source
file. It's hard to apply the whole series to stable kernel. I also
need apply this series to our downstream kernel once it hits upstream.
Secondly, applying PATCH[v4 12/12] can't resolve all issues we have.
It means we need to apply the whole series to resolve all issues.
Lastly, moving PATCH[v4 12/12] to PATCH[v4 01/12] will introduce
unnecessary code for subsequent patches. I don't think it's worthy
to do.

So lets keep current layout we have.

Thanks,
Gavin
Anshuman Khandual July 29, 2021, 6:05 a.m. UTC | #4
On 7/27/21 11:44 AM, Gavin Shan wrote:
> In page table entry modifying tests, set_xxx_at() are used to populate
> the page table entries. On ARM64, PG_arch_1 (PG_dcache_clean) flag is
> set to the target page flag if execution permission is given. The logic
> exits since commit 4f04d8f00545 ("arm64: MMU definitions"). The page
> flag is kept when the page is free'd to buddy's free area list. However,
> it will trigger page checking failure when it's pulled from the buddy's
> free area list, as the following warning messages indicate.
> 
>    BUG: Bad page state in process memhog  pfn:08000
>    page:0000000015c0a628 refcount:0 mapcount:0 \
>         mapping:0000000000000000 index:0x1 pfn:0x8000
>    flags: 0x7ffff8000000800(arch_1|node=0|zone=0|lastcpupid=0xfffff)
>    raw: 07ffff8000000800 dead000000000100 dead000000000122 0000000000000000
>    raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
>    page dumped because: PAGE_FLAGS_CHECK_AT_PREP flag(s) set
> 
> This fixes the issue by clearing PG_arch_1 through flush_dcache_page()
> after set_xxx_at() is called. For architectures other than ARM64, the
> unexpected overhead of cache flushing is acceptable.
> 
> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
>  mm/debug_vm_pgtable.c | 55 +++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 51 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 162ff6329f7b..d2c2d23e542e 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -29,6 +29,8 @@
>  #include <linux/start_kernel.h>
>  #include <linux/sched/mm.h>
>  #include <linux/io.h>
> +
> +#include <asm/cacheflush.h>
>  #include <asm/pgalloc.h>
>  #include <asm/tlbflush.h>
>  
> @@ -119,19 +121,28 @@ static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx)
>  
>  static void __init pte_advanced_tests(struct pgtable_debug_args *args)
>  {
> +	struct page *page;
>  	pte_t pte;
>  
>  	/*
>  	 * Architectures optimize set_pte_at by avoiding TLB flush.
>  	 * This requires set_pte_at to be not used to update an
>  	 * existing pte entry. Clear pte before we do set_pte_at
> +	 *
> +	 * flush_dcache_page() is called after set_pte_at() to clear
> +	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
> +	 * when it's released and page allocation check will fail when
> +	 * the page is allocated again. For architectures other than ARM64,
> +	 * the unexpected overhead of cache flushing is acceptable.
>  	 */
> -	if (args->pte_pfn == ULONG_MAX)
> +	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
> +	if (!page)
>  		return;
>  
>  	pr_debug("Validating PTE advanced\n");
>  	pte = pfn_pte(args->pte_pfn, args->page_prot);
>  	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
> +	flush_dcache_page(page);
>  	ptep_set_wrprotect(args->mm, args->vaddr, args->ptep);
>  	pte = ptep_get(args->ptep);
>  	WARN_ON(pte_write(pte));
> @@ -143,6 +154,7 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args)
>  	pte = pte_wrprotect(pte);
>  	pte = pte_mkclean(pte);
>  	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
> +	flush_dcache_page(page);
>  	pte = pte_mkwrite(pte);
>  	pte = pte_mkdirty(pte);
>  	ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1);
> @@ -155,6 +167,7 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args)
>  	pte = pfn_pte(args->pte_pfn, args->page_prot);
>  	pte = pte_mkyoung(pte);
>  	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
> +	flush_dcache_page(page);
>  	ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep);
>  	pte = ptep_get(args->ptep);
>  	WARN_ON(pte_young(pte));
> @@ -213,15 +226,24 @@ static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx)
>  
>  static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
>  {
> +	struct page *page;
>  	pmd_t pmd;
>  	unsigned long vaddr = args->vaddr;
>  
>  	if (!has_transparent_hugepage())
>  		return;
>  
> -	if (args->pmd_pfn == ULONG_MAX)
> +	page = (args->pmd_pfn != ULONG_MAX) ? pfn_to_page(args->pmd_pfn) : NULL;
> +	if (!page)
>  		return;
>  
> +	/*
> +	 * flush_dcache_page() is called after set_pmd_at() to clear
> +	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
> +	 * when it's released and page allocation check will fail when
> +	 * the page is allocated again. For architectures other than ARM64,
> +	 * the unexpected overhead of cache flushing is acceptable.
> +	 */
>  	pr_debug("Validating PMD advanced\n");
>  	/* Align the address wrt HPAGE_PMD_SIZE */
>  	vaddr &= HPAGE_PMD_MASK;
> @@ -230,6 +252,7 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
>  
>  	pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
>  	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
> +	flush_dcache_page(page);
>  	pmdp_set_wrprotect(args->mm, vaddr, args->pmdp);
>  	pmd = READ_ONCE(*args->pmdp);
>  	WARN_ON(pmd_write(pmd));
> @@ -241,6 +264,7 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
>  	pmd = pmd_wrprotect(pmd);
>  	pmd = pmd_mkclean(pmd);
>  	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
> +	flush_dcache_page(page);
>  	pmd = pmd_mkwrite(pmd);
>  	pmd = pmd_mkdirty(pmd);
>  	pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1);
> @@ -253,6 +277,7 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
>  	pmd = pmd_mkhuge(pfn_pmd(args->pmd_pfn, args->page_prot));
>  	pmd = pmd_mkyoung(pmd);
>  	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
> +	flush_dcache_page(page);
>  	pmdp_test_and_clear_young(args->vma, vaddr, args->pmdp);
>  	pmd = READ_ONCE(*args->pmdp);
>  	WARN_ON(pmd_young(pmd));
> @@ -339,21 +364,31 @@ static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx)
>  
>  static void __init pud_advanced_tests(struct pgtable_debug_args *args)
>  {
> +	struct page *page;
>  	unsigned long vaddr = args->vaddr;
>  	pud_t pud;
>  
>  	if (!has_transparent_hugepage())
>  		return;
>  
> -	if (args->pud_pfn == ULONG_MAX)
> +	page = (args->pud_pfn != ULONG_MAX) ? pfn_to_page(args->pud_pfn) : NULL;
> +	if (!page)
>  		return;
>  
> +	/*
> +	 * flush_dcache_page() is called after set_pud_at() to clear
> +	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
> +	 * when it's released and page allocation check will fail when
> +	 * the page is allocated again. For architectures other than ARM64,
> +	 * the unexpected overhead of cache flushing is acceptable.
> +	 */
>  	pr_debug("Validating PUD advanced\n");
>  	/* Align the address wrt HPAGE_PUD_SIZE */
>  	vaddr &= HPAGE_PUD_MASK;
>  
>  	pud = pfn_pud(args->pud_pfn, args->page_prot);
>  	set_pud_at(args->mm, vaddr, args->pudp, pud);
> +	flush_dcache_page(page);
>  	pudp_set_wrprotect(args->mm, vaddr, args->pudp);
>  	pud = READ_ONCE(*args->pudp);
>  	WARN_ON(pud_write(pud));
> @@ -367,6 +402,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
>  	pud = pud_wrprotect(pud);
>  	pud = pud_mkclean(pud);
>  	set_pud_at(args->mm, vaddr, args->pudp, pud);
> +	flush_dcache_page(page);
>  	pud = pud_mkwrite(pud);
>  	pud = pud_mkdirty(pud);
>  	pudp_set_access_flags(args->vma, vaddr, args->pudp, pud, 1);
> @@ -382,6 +418,7 @@ static void __init pud_advanced_tests(struct pgtable_debug_args *args)
>  	pud = pfn_pud(args->pud_pfn, args->page_prot);
>  	pud = pud_mkyoung(pud);
>  	set_pud_at(args->mm, vaddr, args->pudp, pud);
> +	flush_dcache_page(page);
>  	pudp_test_and_clear_young(args->vma, vaddr, args->pudp);
>  	pud = READ_ONCE(*args->pudp);
>  	WARN_ON(pud_young(pud));
> @@ -594,16 +631,26 @@ static void __init pgd_populate_tests(struct pgtable_debug_args *args) { }
>  
>  static void __init pte_clear_tests(struct pgtable_debug_args *args)
>  {
> +	struct page *page;
>  	pte_t pte = pfn_pte(args->pte_pfn, args->page_prot);
>  
> -	if (args->pte_pfn == ULONG_MAX)
> +	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
> +	if (!page)
>  		return;
>  
> +	/*
> +	 * flush_dcache_page() is called after set_pte_at() to clear
> +	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
> +	 * when it's released and page allocation check will fail when
> +	 * the page is allocated again. For architectures other than ARM64,
> +	 * the unexpected overhead of cache flushing is acceptable.
> +	 */
>  	pr_debug("Validating PTE clear\n");
>  #ifndef CONFIG_RISCV
>  	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
>  #endif
>  	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
> +	flush_dcache_page(page);
>  	barrier();
>  	pte_clear(args->mm, args->vaddr, args->ptep);
>  	pte = ptep_get(args->ptep);
> 

Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
diff mbox series

Patch

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 162ff6329f7b..d2c2d23e542e 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -29,6 +29,8 @@ 
 #include <linux/start_kernel.h>
 #include <linux/sched/mm.h>
 #include <linux/io.h>
+
+#include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
@@ -119,19 +121,28 @@  static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx)
 
 static void __init pte_advanced_tests(struct pgtable_debug_args *args)
 {
+	struct page *page;
 	pte_t pte;
 
 	/*
 	 * Architectures optimize set_pte_at by avoiding TLB flush.
 	 * This requires set_pte_at to be not used to update an
 	 * existing pte entry. Clear pte before we do set_pte_at
+	 *
+	 * flush_dcache_page() is called after set_pte_at() to clear
+	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
+	 * when it's released and page allocation check will fail when
+	 * the page is allocated again. For architectures other than ARM64,
+	 * the unexpected overhead of cache flushing is acceptable.
 	 */
-	if (args->pte_pfn == ULONG_MAX)
+	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
+	if (!page)
 		return;
 
 	pr_debug("Validating PTE advanced\n");
 	pte = pfn_pte(args->pte_pfn, args->page_prot);
 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
+	flush_dcache_page(page);
 	ptep_set_wrprotect(args->mm, args->vaddr, args->ptep);
 	pte = ptep_get(args->ptep);
 	WARN_ON(pte_write(pte));
@@ -143,6 +154,7 @@  static void __init pte_advanced_tests(struct pgtable_debug_args *args)
 	pte = pte_wrprotect(pte);
 	pte = pte_mkclean(pte);
 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
+	flush_dcache_page(page);
 	pte = pte_mkwrite(pte);
 	pte = pte_mkdirty(pte);
 	ptep_set_access_flags(args->vma, args->vaddr, args->ptep, pte, 1);
@@ -155,6 +167,7 @@  static void __init pte_advanced_tests(struct pgtable_debug_args *args)
 	pte = pfn_pte(args->pte_pfn, args->page_prot);
 	pte = pte_mkyoung(pte);
 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
+	flush_dcache_page(page);
 	ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep);
 	pte = ptep_get(args->ptep);
 	WARN_ON(pte_young(pte));
@@ -213,15 +226,24 @@  static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx)
 
 static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
 {
+	struct page *page;
 	pmd_t pmd;
 	unsigned long vaddr = args->vaddr;
 
 	if (!has_transparent_hugepage())
 		return;
 
-	if (args->pmd_pfn == ULONG_MAX)
+	page = (args->pmd_pfn != ULONG_MAX) ? pfn_to_page(args->pmd_pfn) : NULL;
+	if (!page)
 		return;
 
+	/*
+	 * flush_dcache_page() is called after set_pmd_at() to clear
+	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
+	 * when it's released and page allocation check will fail when
+	 * the page is allocated again. For architectures other than ARM64,
+	 * the unexpected overhead of cache flushing is acceptable.
+	 */
 	pr_debug("Validating PMD advanced\n");
 	/* Align the address wrt HPAGE_PMD_SIZE */
 	vaddr &= HPAGE_PMD_MASK;
@@ -230,6 +252,7 @@  static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
 
 	pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
 	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
+	flush_dcache_page(page);
 	pmdp_set_wrprotect(args->mm, vaddr, args->pmdp);
 	pmd = READ_ONCE(*args->pmdp);
 	WARN_ON(pmd_write(pmd));
@@ -241,6 +264,7 @@  static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
 	pmd = pmd_wrprotect(pmd);
 	pmd = pmd_mkclean(pmd);
 	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
+	flush_dcache_page(page);
 	pmd = pmd_mkwrite(pmd);
 	pmd = pmd_mkdirty(pmd);
 	pmdp_set_access_flags(args->vma, vaddr, args->pmdp, pmd, 1);
@@ -253,6 +277,7 @@  static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
 	pmd = pmd_mkhuge(pfn_pmd(args->pmd_pfn, args->page_prot));
 	pmd = pmd_mkyoung(pmd);
 	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
+	flush_dcache_page(page);
 	pmdp_test_and_clear_young(args->vma, vaddr, args->pmdp);
 	pmd = READ_ONCE(*args->pmdp);
 	WARN_ON(pmd_young(pmd));
@@ -339,21 +364,31 @@  static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx)
 
 static void __init pud_advanced_tests(struct pgtable_debug_args *args)
 {
+	struct page *page;
 	unsigned long vaddr = args->vaddr;
 	pud_t pud;
 
 	if (!has_transparent_hugepage())
 		return;
 
-	if (args->pud_pfn == ULONG_MAX)
+	page = (args->pud_pfn != ULONG_MAX) ? pfn_to_page(args->pud_pfn) : NULL;
+	if (!page)
 		return;
 
+	/*
+	 * flush_dcache_page() is called after set_pud_at() to clear
+	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
+	 * when it's released and page allocation check will fail when
+	 * the page is allocated again. For architectures other than ARM64,
+	 * the unexpected overhead of cache flushing is acceptable.
+	 */
 	pr_debug("Validating PUD advanced\n");
 	/* Align the address wrt HPAGE_PUD_SIZE */
 	vaddr &= HPAGE_PUD_MASK;
 
 	pud = pfn_pud(args->pud_pfn, args->page_prot);
 	set_pud_at(args->mm, vaddr, args->pudp, pud);
+	flush_dcache_page(page);
 	pudp_set_wrprotect(args->mm, vaddr, args->pudp);
 	pud = READ_ONCE(*args->pudp);
 	WARN_ON(pud_write(pud));
@@ -367,6 +402,7 @@  static void __init pud_advanced_tests(struct pgtable_debug_args *args)
 	pud = pud_wrprotect(pud);
 	pud = pud_mkclean(pud);
 	set_pud_at(args->mm, vaddr, args->pudp, pud);
+	flush_dcache_page(page);
 	pud = pud_mkwrite(pud);
 	pud = pud_mkdirty(pud);
 	pudp_set_access_flags(args->vma, vaddr, args->pudp, pud, 1);
@@ -382,6 +418,7 @@  static void __init pud_advanced_tests(struct pgtable_debug_args *args)
 	pud = pfn_pud(args->pud_pfn, args->page_prot);
 	pud = pud_mkyoung(pud);
 	set_pud_at(args->mm, vaddr, args->pudp, pud);
+	flush_dcache_page(page);
 	pudp_test_and_clear_young(args->vma, vaddr, args->pudp);
 	pud = READ_ONCE(*args->pudp);
 	WARN_ON(pud_young(pud));
@@ -594,16 +631,26 @@  static void __init pgd_populate_tests(struct pgtable_debug_args *args) { }
 
 static void __init pte_clear_tests(struct pgtable_debug_args *args)
 {
+	struct page *page;
 	pte_t pte = pfn_pte(args->pte_pfn, args->page_prot);
 
-	if (args->pte_pfn == ULONG_MAX)
+	page = (args->pte_pfn != ULONG_MAX) ? pfn_to_page(args->pte_pfn) : NULL;
+	if (!page)
 		return;
 
+	/*
+	 * flush_dcache_page() is called after set_pte_at() to clear
+	 * PG_arch_1 for the page on ARM64. The page flag isn't cleared
+	 * when it's released and page allocation check will fail when
+	 * the page is allocated again. For architectures other than ARM64,
+	 * the unexpected overhead of cache flushing is acceptable.
+	 */
 	pr_debug("Validating PTE clear\n");
 #ifndef CONFIG_RISCV
 	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
 #endif
 	set_pte_at(args->mm, args->vaddr, args->ptep, pte);
+	flush_dcache_page(page);
 	barrier();
 	pte_clear(args->mm, args->vaddr, args->ptep);
 	pte = ptep_get(args->ptep);