diff mbox series

[v4,13/13] mm/debug_vm_pgtable: Avoid none pte in pte_clear_test

Message ID 20200902114222.181353-14-aneesh.kumar@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series mm/debug_vm_pgtable fixes | expand

Commit Message

Aneesh Kumar K.V Sept. 2, 2020, 11:42 a.m. UTC
pte_clear_tests operate on an existing pte entry. Make sure that
is not a none pte entry.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 mm/debug_vm_pgtable.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

Comments

Nathan Chancellor Sept. 11, 2020, 2:13 a.m. UTC | #1
On Wed, Sep 02, 2020 at 05:12:22PM +0530, Aneesh Kumar K.V wrote:
> pte_clear_tests operate on an existing pte entry. Make sure that
> is not a none pte entry.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  mm/debug_vm_pgtable.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 9afa1354326b..c36530c69e33 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -542,9 +542,10 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
>  #endif /* PAGETABLE_P4D_FOLDED */
>  
>  static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
> -				   unsigned long vaddr)
> +				   unsigned long pfn, unsigned long vaddr,
> +				   pgprot_t prot)
>  {
> -	pte_t pte = ptep_get(ptep);
> +	pte_t pte = pfn_pte(pfn, prot);
>  
>  	pr_debug("Validating PTE clear\n");
>  	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
> @@ -1049,7 +1050,7 @@ static int __init debug_vm_pgtable(void)
>  
>  	ptl = pte_lockptr(mm, pmdp);
>  	spin_lock(ptl);
> -	pte_clear_tests(mm, ptep, vaddr);
> +	pte_clear_tests(mm, ptep, pte_aligned, vaddr, prot);
>  	pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
>  	pte_unmap_unlock(ptep, ptl);
>  
> -- 
> 2.26.2
> 
> 

This patch causes a panic at boot for RISC-V defconfig. The rootfs is here if it is needed:
https://github.com/ClangBuiltLinux/boot-utils/blob/3b21a5b71451742866349ba4f18638c5a754e660/images/riscv/rootfs.cpio.zst

$ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux- O=out/riscv distclean defconfig Image

$ qemu-system-riscv64 -bios default -M virt -display none -initrd rootfs.cpio -kernel Image -m 512m -nodefaults -serial mon:stdio
...

OpenSBI v0.6
   ____                    _____ ____ _____
  / __ \                  / ____|  _ \_   _|
 | |  | |_ __   ___ _ __ | (___ | |_) || |
 | |  | | '_ \ / _ \ '_ \ \___ \|  _ < | |
 | |__| | |_) |  __/ | | |____) | |_) || |_
  \____/| .__/ \___|_| |_|_____/|____/_____|
        | |
        |_|

Platform Name          : QEMU Virt Machine
Platform HART Features : RV64ACDFIMSU
Platform Max HARTs     : 8
Current Hart           : 0
Firmware Base          : 0x80000000
Firmware Size          : 120 KB
Runtime SBI Version    : 0.2

MIDELEG : 0x0000000000000222
MEDELEG : 0x000000000000b109
PMP0    : 0x0000000080000000-0x000000008001ffff (A)
PMP1    : 0x0000000000000000-0xffffffffffffffff (A,R,W,X)
[    0.000000] Linux version 5.9.0-rc4-next-20200910 (nathan@ubuntu-n2-xlarge-x86) (riscv64-linux-gcc (GCC) 10.2.0, GNU ld (GNU Binutils) 2.35) #1 SMP Thu Sep 10 19:10:43 MST 2020
...
[    0.294593] NET: Registered protocol family 17
[    0.295781] 9pnet: Installing 9P2000 support
[    0.296153] Key type dns_resolver registered
[    0.296694] debug_vm_pgtable: [debug_vm_pgtable         ]: Validating architecture page table helpers
[    0.297635] Unable to handle kernel paging request at virtual address 0a7fffe01dafefc8
[    0.298029] Oops [#1]
[    0.298153] Modules linked in:
[    0.298433] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc4-next-20200910 #1
[    0.298792] epc: ffffffe000205afc ra : ffffffe0008be0aa sp : ffffffe01ae73d40
[    0.299078]  gp : ffffffe0010b9b48 tp : ffffffe01ae68000 t0 : ffffffe008152000
[    0.299362]  t1 : 0000000000000000 t2 : 0000000000000000 s0 : ffffffe01ae73d60
[    0.299648]  s1 : bffffffffffffffb a0 : 0a7fffe01dafefc8 a1 : bffffffffffffffb
[    0.299948]  a2 : ffffffe0010a2698 a3 : 0000000000000001 a4 : 0000000000000003
[    0.300231]  a5 : 0000000000000800 a6 : fffffffff0000080 a7 : 000000001b642000
[    0.300521]  s2 : ffffffe0081517b8 s3 : ffffffe008150a80 s4 : ffffffe01af30000
[    0.300806]  s5 : ffffffe01f8ca9b8 s6 : ffffffe008150000 s7 : ffffffe0010bb100
[    0.301161]  s8 : ffffffe0010bb108 s9 : 0000000000080202 s10: ffffffe0010bb928
[    0.301481]  s11: 000000002008085b t3 : 0000000000000000 t4 : 0000000000000000
[    0.301722]  t5 : 0000000000000000 t6 : ffffffe008150000
[    0.301947] status: 0000000000000120 badaddr: 0a7fffe01dafefc8 cause: 000000000000000f
[    0.302569] ---[ end trace 7ffb153d816164cf ]---
[    0.302797] note: swapper/0[1] exited with preempt_count 1
[    0.303101] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[    0.303614] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---

$ git bisect log
# bad: [7ce53e3a447bced7b85ed181c4d027e93c062e07] Add linux-next specific files for 20200910
# good: [34d4ddd359dbcdf6c5fb3f85a179243d7a1cb7f8] Merge tag 'linux-kselftest-5.9-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest
git bisect start '7ce53e3a447bced7b85ed181c4d027e93c062e07' '34d4ddd359dbcdf6c5fb3f85a179243d7a1cb7f8'
# good: [bb1f09d126618aa1ec776d87d7f85136edbed485] Merge remote-tracking branch 'crypto/master' into master
git bisect good bb1f09d126618aa1ec776d87d7f85136edbed485
# good: [1fb7e980a0f9a0aa4c7daba1a7e35d12c97820ea] Merge remote-tracking branch 'audit/next' into master
git bisect good 1fb7e980a0f9a0aa4c7daba1a7e35d12c97820ea
# good: [afdf05baff78a658843c1013855985e7a6871406] Merge remote-tracking branch 'thunderbolt/next' into master
git bisect good afdf05baff78a658843c1013855985e7a6871406
# good: [c5f3c031bd4b8ef5fb6b07352abc284603c3edee] Merge remote-tracking branch 'kselftest/next' into master
git bisect good c5f3c031bd4b8ef5fb6b07352abc284603c3edee
# bad: [671aca25e253f2773850aefb0837a225c691e336] lib: bitmap: delete duplicated words
git bisect bad 671aca25e253f2773850aefb0837a225c691e336
# bad: [e42ac710a849403b7fe582cc555dc3b7bf5b6fa9] tools/testing/selftests/vm/hmm-tests.c: use the new SKIP() macro
git bisect bad e42ac710a849403b7fe582cc555dc3b7bf5b6fa9
# good: [1443e3384317a9dfaf1381e8134a69c1e3fc7130] device-dax: kill dax_kmem_res
git bisect good 1443e3384317a9dfaf1381e8134a69c1e3fc7130
# good: [e2aad6f1d232b457ea6a3194992dd4c0a83534a5] mm/debug_vm_pgtable/locks: take correct page table lock
git bisect good e2aad6f1d232b457ea6a3194992dd4c0a83534a5
# bad: [08075d21b791241ba9f1366f814afa4a77372250] mm: workingset: ignore slab memory size when calculating shadows pressure
git bisect bad 08075d21b791241ba9f1366f814afa4a77372250
# bad: [42e9e63856020953cc3645a2032a70179364a1d8] mm-gup-dont-permit-users-to-call-get_user_pages-with-foll_longterm-fix
git bisect bad 42e9e63856020953cc3645a2032a70179364a1d8
# good: [b77bfa2ce4c2e30c1b1d1b3eb18c5b57397277f9] mm/debug_vm_pgtable/hugetlb: disable hugetlb test on ppc64
git bisect good b77bfa2ce4c2e30c1b1d1b3eb18c5b57397277f9
# bad: [90b612d56df39666f0f6fa6a033b4a7ec2e0a16c] mm/gup_benchmark: use pin_user_pages for FOLL_LONGTERM flag
git bisect bad 90b612d56df39666f0f6fa6a033b4a7ec2e0a16c
# bad: [060e70ecf865e55d82282995b4cb478126a1163c] mm/debug_vm_pgtable: avoid none pte in pte_clear_test
git bisect bad 060e70ecf865e55d82282995b4cb478126a1163c
# first bad commit: [060e70ecf865e55d82282995b4cb478126a1163c] mm/debug_vm_pgtable: avoid none pte in pte_clear_test

Cheers,
Nathan
Aneesh Kumar K.V Sept. 11, 2020, 5:21 a.m. UTC | #2
Nathan Chancellor <natechancellor@gmail.com> writes:

> On Wed, Sep 02, 2020 at 05:12:22PM +0530, Aneesh Kumar K.V wrote:
>> pte_clear_tests operate on an existing pte entry. Make sure that
>> is not a none pte entry.
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>  mm/debug_vm_pgtable.c | 7 ++++---
>>  1 file changed, 4 insertions(+), 3 deletions(-)
>> 
>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>> index 9afa1354326b..c36530c69e33 100644
>> --- a/mm/debug_vm_pgtable.c
>> +++ b/mm/debug_vm_pgtable.c
>> @@ -542,9 +542,10 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
>>  #endif /* PAGETABLE_P4D_FOLDED */
>>  
>>  static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
>> -				   unsigned long vaddr)
>> +				   unsigned long pfn, unsigned long vaddr,
>> +				   pgprot_t prot)
>>  {
>> -	pte_t pte = ptep_get(ptep);
>> +	pte_t pte = pfn_pte(pfn, prot);
>>  
>>  	pr_debug("Validating PTE clear\n");
>>  	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
>> @@ -1049,7 +1050,7 @@ static int __init debug_vm_pgtable(void)
>>  
>>  	ptl = pte_lockptr(mm, pmdp);
>>  	spin_lock(ptl);
>> -	pte_clear_tests(mm, ptep, vaddr);
>> +	pte_clear_tests(mm, ptep, pte_aligned, vaddr, prot);
>>  	pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
>>  	pte_unmap_unlock(ptep, ptl);
>>  
>> -- 
> This patch causes a panic at boot for RISC-V defconfig. The rootfs is here if it is needed:
> https://github.com/ClangBuiltLinux/boot-utils/blob/3b21a5b71451742866349ba4f18638c5a754e660/images/riscv/rootfs.cpio.zst
>
> $ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux- O=out/riscv distclean defconfig Image
>
> $ qemu-system-riscv64 -bios default -M virt -display none -initrd rootfs.cpio -kernel Image -m 512m -nodefaults -serial mon:stdio
> ...
>
> OpenSBI v0.6
>    ____                    _____ ____ _____
>   / __ \                  / ____|  _ \_   _|
>  | |  | |_ __   ___ _ __ | (___ | |_) || |
>  | |  | | '_ \ / _ \ '_ \ \___ \|  _ < | |
>  | |__| | |_) |  __/ | | |____) | |_) || |_
>   \____/| .__/ \___|_| |_|_____/|____/_____|
>         | |
>         |_|
>
> Platform Name          : QEMU Virt Machine
> Platform HART Features : RV64ACDFIMSU
> Platform Max HARTs     : 8
> Current Hart           : 0
> Firmware Base          : 0x80000000
> Firmware Size          : 120 KB
> Runtime SBI Version    : 0.2
>
> MIDELEG : 0x0000000000000222
> MEDELEG : 0x000000000000b109
> PMP0    : 0x0000000080000000-0x000000008001ffff (A)
> PMP1    : 0x0000000000000000-0xffffffffffffffff (A,R,W,X)
> [    0.000000] Linux version 5.9.0-rc4-next-20200910 (nathan@ubuntu-n2-xlarge-x86) (riscv64-linux-gcc (GCC) 10.2.0, GNU ld (GNU Binutils) 2.35) #1 SMP Thu Sep 10 19:10:43 MST 2020
> ...
> [    0.294593] NET: Registered protocol family 17
> [    0.295781] 9pnet: Installing 9P2000 support
> [    0.296153] Key type dns_resolver registered
> [    0.296694] debug_vm_pgtable: [debug_vm_pgtable         ]: Validating architecture page table helpers
> [    0.297635] Unable to handle kernel paging request at virtual address 0a7fffe01dafefc8
> [    0.298029] Oops [#1]
> [    0.298153] Modules linked in:
> [    0.298433] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc4-next-20200910 #1
> [    0.298792] epc: ffffffe000205afc ra : ffffffe0008be0aa sp : ffffffe01ae73d40
> [    0.299078]  gp : ffffffe0010b9b48 tp : ffffffe01ae68000 t0 : ffffffe008152000
> [    0.299362]  t1 : 0000000000000000 t2 : 0000000000000000 s0 : ffffffe01ae73d60
> [    0.299648]  s1 : bffffffffffffffb a0 : 0a7fffe01dafefc8 a1 : bffffffffffffffb
> [    0.299948]  a2 : ffffffe0010a2698 a3 : 0000000000000001 a4 : 0000000000000003
> [    0.300231]  a5 : 0000000000000800 a6 : fffffffff0000080 a7 : 000000001b642000
> [    0.300521]  s2 : ffffffe0081517b8 s3 : ffffffe008150a80 s4 : ffffffe01af30000
> [    0.300806]  s5 : ffffffe01f8ca9b8 s6 : ffffffe008150000 s7 : ffffffe0010bb100
> [    0.301161]  s8 : ffffffe0010bb108 s9 : 0000000000080202 s10: ffffffe0010bb928
> [    0.301481]  s11: 000000002008085b t3 : 0000000000000000 t4 : 0000000000000000
> [    0.301722]  t5 : 0000000000000000 t6 : ffffffe008150000
> [    0.301947] status: 0000000000000120 badaddr: 0a7fffe01dafefc8 cause: 000000000000000f
> [    0.302569] ---[ end trace 7ffb153d816164cf ]---
> [    0.302797] note: swapper/0[1] exited with preempt_count 1
> [    0.303101] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> [    0.303614] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---


I guess it is the combination of a valid pte and usage of
RANDOM_ORVALUE. The below change get the kernel to boot. Can somebody
faimilar with riscv pte format take a look at the RANDOM_ORVALUE?

modified   mm/debug_vm_pgtable.c
@@ -548,7 +548,7 @@ static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
 	pte_t pte = pfn_pte(pfn, prot);
 
 	pr_debug("Validating PTE clear\n");
-	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
+//	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
 	set_pte_at(mm, vaddr, ptep, pte);
 	barrier();
 	pte_clear(mm, vaddr, ptep);
Anshuman Khandual Sept. 23, 2020, 3:14 a.m. UTC | #3
On 09/11/2020 10:51 AM, Aneesh Kumar K.V wrote:
> Nathan Chancellor <natechancellor@gmail.com> writes:
> 
>> On Wed, Sep 02, 2020 at 05:12:22PM +0530, Aneesh Kumar K.V wrote:
>>> pte_clear_tests operate on an existing pte entry. Make sure that
>>> is not a none pte entry.
>>>
>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>>> ---
>>>  mm/debug_vm_pgtable.c | 7 ++++---
>>>  1 file changed, 4 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>>> index 9afa1354326b..c36530c69e33 100644
>>> --- a/mm/debug_vm_pgtable.c
>>> +++ b/mm/debug_vm_pgtable.c
>>> @@ -542,9 +542,10 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
>>>  #endif /* PAGETABLE_P4D_FOLDED */
>>>  
>>>  static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
>>> -				   unsigned long vaddr)
>>> +				   unsigned long pfn, unsigned long vaddr,
>>> +				   pgprot_t prot)
>>>  {
>>> -	pte_t pte = ptep_get(ptep);
>>> +	pte_t pte = pfn_pte(pfn, prot);
>>>  
>>>  	pr_debug("Validating PTE clear\n");
>>>  	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
>>> @@ -1049,7 +1050,7 @@ static int __init debug_vm_pgtable(void)
>>>  
>>>  	ptl = pte_lockptr(mm, pmdp);
>>>  	spin_lock(ptl);
>>> -	pte_clear_tests(mm, ptep, vaddr);
>>> +	pte_clear_tests(mm, ptep, pte_aligned, vaddr, prot);
>>>  	pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
>>>  	pte_unmap_unlock(ptep, ptl);
>>>  
>>> -- 
>> This patch causes a panic at boot for RISC-V defconfig. The rootfs is here if it is needed:
>> https://github.com/ClangBuiltLinux/boot-utils/blob/3b21a5b71451742866349ba4f18638c5a754e660/images/riscv/rootfs.cpio.zst
>>
>> $ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux- O=out/riscv distclean defconfig Image
>>
>> $ qemu-system-riscv64 -bios default -M virt -display none -initrd rootfs.cpio -kernel Image -m 512m -nodefaults -serial mon:stdio
>> ...
>>
>> OpenSBI v0.6
>>    ____                    _____ ____ _____
>>   / __ \                  / ____|  _ \_   _|
>>  | |  | |_ __   ___ _ __ | (___ | |_) || |
>>  | |  | | '_ \ / _ \ '_ \ \___ \|  _ < | |
>>  | |__| | |_) |  __/ | | |____) | |_) || |_
>>   \____/| .__/ \___|_| |_|_____/|____/_____|
>>         | |
>>         |_|
>>
>> Platform Name          : QEMU Virt Machine
>> Platform HART Features : RV64ACDFIMSU
>> Platform Max HARTs     : 8
>> Current Hart           : 0
>> Firmware Base          : 0x80000000
>> Firmware Size          : 120 KB
>> Runtime SBI Version    : 0.2
>>
>> MIDELEG : 0x0000000000000222
>> MEDELEG : 0x000000000000b109
>> PMP0    : 0x0000000080000000-0x000000008001ffff (A)
>> PMP1    : 0x0000000000000000-0xffffffffffffffff (A,R,W,X)
>> [    0.000000] Linux version 5.9.0-rc4-next-20200910 (nathan@ubuntu-n2-xlarge-x86) (riscv64-linux-gcc (GCC) 10.2.0, GNU ld (GNU Binutils) 2.35) #1 SMP Thu Sep 10 19:10:43 MST 2020
>> ...
>> [    0.294593] NET: Registered protocol family 17
>> [    0.295781] 9pnet: Installing 9P2000 support
>> [    0.296153] Key type dns_resolver registered
>> [    0.296694] debug_vm_pgtable: [debug_vm_pgtable         ]: Validating architecture page table helpers
>> [    0.297635] Unable to handle kernel paging request at virtual address 0a7fffe01dafefc8
>> [    0.298029] Oops [#1]
>> [    0.298153] Modules linked in:
>> [    0.298433] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc4-next-20200910 #1
>> [    0.298792] epc: ffffffe000205afc ra : ffffffe0008be0aa sp : ffffffe01ae73d40
>> [    0.299078]  gp : ffffffe0010b9b48 tp : ffffffe01ae68000 t0 : ffffffe008152000
>> [    0.299362]  t1 : 0000000000000000 t2 : 0000000000000000 s0 : ffffffe01ae73d60
>> [    0.299648]  s1 : bffffffffffffffb a0 : 0a7fffe01dafefc8 a1 : bffffffffffffffb
>> [    0.299948]  a2 : ffffffe0010a2698 a3 : 0000000000000001 a4 : 0000000000000003
>> [    0.300231]  a5 : 0000000000000800 a6 : fffffffff0000080 a7 : 000000001b642000
>> [    0.300521]  s2 : ffffffe0081517b8 s3 : ffffffe008150a80 s4 : ffffffe01af30000
>> [    0.300806]  s5 : ffffffe01f8ca9b8 s6 : ffffffe008150000 s7 : ffffffe0010bb100
>> [    0.301161]  s8 : ffffffe0010bb108 s9 : 0000000000080202 s10: ffffffe0010bb928
>> [    0.301481]  s11: 000000002008085b t3 : 0000000000000000 t4 : 0000000000000000
>> [    0.301722]  t5 : 0000000000000000 t6 : ffffffe008150000
>> [    0.301947] status: 0000000000000120 badaddr: 0a7fffe01dafefc8 cause: 000000000000000f
>> [    0.302569] ---[ end trace 7ffb153d816164cf ]---
>> [    0.302797] note: swapper/0[1] exited with preempt_count 1
>> [    0.303101] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
>> [    0.303614] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
> 
> 
> I guess it is the combination of a valid pte and usage of
> RANDOM_ORVALUE. The below change get the kernel to boot. Can somebody
> faimilar with riscv pte format take a look at the RANDOM_ORVALUE?
> 
> modified   mm/debug_vm_pgtable.c
> @@ -548,7 +548,7 @@ static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
>  	pte_t pte = pfn_pte(pfn, prot);
>  
>  	pr_debug("Validating PTE clear\n");
> -	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
> +//	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
>  	set_pte_at(mm, vaddr, ptep, pte);
>  	barrier();
>  	pte_clear(mm, vaddr, ptep);

Do we have a fix for this problem ? Otherwise we just risk going into
the next release with this regression on riscv platforms.
Guenter Roeck Oct. 11, 2020, 8:02 p.m. UTC | #4
On Wed, Sep 02, 2020 at 05:12:22PM +0530, Aneesh Kumar K.V wrote:
> pte_clear_tests operate on an existing pte entry. Make sure that
> is not a none pte entry.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

This patch causes all riscv64 images to crash. Reverting it
as well as the follow-up patch fixes the problem, but there are
still several warning messages starting with
	BUG kmem_cache (Not tainted): Freechain corrupt
I did not try to track down this other problem.

A detailed crash log is at
	https://kerneltests.org/builders/qemu-riscv64-next/builds/523/steps/qemubuildcommand/logs/stdio

Bisect log is attached.

Guenter

---
# bad: [d67bc7812221606e1886620a357b13f906814af7] Add linux-next specific files for 20201009
# good: [549738f15da0e5a00275977623be199fbbf7df50] Linux 5.9-rc8
git bisect start 'HEAD' 'v5.9-rc8'
# good: [b71be15b496cc71a3434a198fc1a1b9e08af6c57] Merge remote-tracking branch 'bpf-next/master' into master
git bisect good b71be15b496cc71a3434a198fc1a1b9e08af6c57
# good: [3542e5a87341bdca83e3d7f061b0f4e0f4c23f73] Merge remote-tracking branch 'spi/for-next' into master
git bisect good 3542e5a87341bdca83e3d7f061b0f4e0f4c23f73
# good: [65f9c957115c749ba79fb469083caf14101a93bb] Merge remote-tracking branch 'char-misc/char-misc-next' into master
git bisect good 65f9c957115c749ba79fb469083caf14101a93bb
# good: [aadfe5ecb55641d5994a5f3b27f074beead8f49b] Merge remote-tracking branch 'scsi-mkp/for-next' into master
git bisect good aadfe5ecb55641d5994a5f3b27f074beead8f49b
# good: [060196553d119d5c252f09ed5b0316929cc25983] Merge remote-tracking branch 'memblock/for-next' into master
git bisect good 060196553d119d5c252f09ed5b0316929cc25983
# bad: [d2340d89ffa6d2643f0689600dbf0969d86fdd3c] x86/setup: simplify initrd relocation and reservation
git bisect bad d2340d89ffa6d2643f0689600dbf0969d86fdd3c
# bad: [4b23734f5ba1a0487b2b569a9e64e4e5360009c1] mm/swapfile.c: remove unnecessary goto out in _swap_info_get()
git bisect bad 4b23734f5ba1a0487b2b569a9e64e4e5360009c1
# good: [b30f5277e97be65a99ca5af3d4337cb9acbf8fa7] device-dax: introduce 'struct dev_dax' typed-driver operations
git bisect good b30f5277e97be65a99ca5af3d4337cb9acbf8fa7
# good: [8c2075296dbbbce685fa14bbf46d29fba54f8a62] mm/debug_vm_pgtable: drop hugetlb_advanced_tests()
git bisect good 8c2075296dbbbce685fa14bbf46d29fba54f8a62
# bad: [52ce97889b3c85826995d22a690cd1430c14f316] mm/filemap: fix filemap_map_pages for THP
git bisect bad 52ce97889b3c85826995d22a690cd1430c14f316
# bad: [1ddc0b707be99faece6cdd77f34544f408c6617d] proc: optimise smaps for shmem entries
git bisect bad 1ddc0b707be99faece6cdd77f34544f408c6617d
# bad: [5d685be3785638202430b6baa2ecde482b52c41e] mm: factor find_get_incore_page out of mincore_page
git bisect bad 5d685be3785638202430b6baa2ecde482b52c41e
# bad: [0797f84d689b9f1e7256d954280b28bfeaf5b1fc] mm/debug_vm_pgtable: avoid doing memory allocation with pgtable_t mapped.
git bisect bad 0797f84d689b9f1e7256d954280b28bfeaf5b1fc
# bad: [4f9a78e6bcd60a25b187adc1526ab3815fc40dae] mm/debug_vm_pgtable: avoid none pte in pte_clear_test
git bisect bad 4f9a78e6bcd60a25b187adc1526ab3815fc40dae
# first bad commit: [4f9a78e6bcd60a25b187adc1526ab3815fc40dae] mm/debug_vm_pgtable: avoid none pte in pte_clear_test
Aneesh Kumar K.V Oct. 12, 2020, 4:29 a.m. UTC | #5
Guenter Roeck <linux@roeck-us.net> writes:

> On Wed, Sep 02, 2020 at 05:12:22PM +0530, Aneesh Kumar K.V wrote:
>> pte_clear_tests operate on an existing pte entry. Make sure that
>> is not a none pte entry.
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>
> This patch causes all riscv64 images to crash. Reverting it
> as well as the follow-up patch fixes the problem, but there are
> still several warning messages starting with
> 	BUG kmem_cache (Not tainted): Freechain corrupt
> I did not try to track down this other problem.
>
> A detailed crash log is at
> 	https://kerneltests.org/builders/qemu-riscv64-next/builds/523/steps/qemubuildcommand/logs/stdio
>
> Bisect log is attached.


https://lore.kernel.org/linux-mm/87zh5wx51b.fsf@linux.ibm.com

This was mentioned earlier. The RANDOM_OR_VALUE used is interacting with
some of the riscv page table accessors. 

-aneesh
diff mbox series

Patch

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 9afa1354326b..c36530c69e33 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -542,9 +542,10 @@  static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
 #endif /* PAGETABLE_P4D_FOLDED */
 
 static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
-				   unsigned long vaddr)
+				   unsigned long pfn, unsigned long vaddr,
+				   pgprot_t prot)
 {
-	pte_t pte = ptep_get(ptep);
+	pte_t pte = pfn_pte(pfn, prot);
 
 	pr_debug("Validating PTE clear\n");
 	pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
@@ -1049,7 +1050,7 @@  static int __init debug_vm_pgtable(void)
 
 	ptl = pte_lockptr(mm, pmdp);
 	spin_lock(ptl);
-	pte_clear_tests(mm, ptep, vaddr);
+	pte_clear_tests(mm, ptep, pte_aligned, vaddr, prot);
 	pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
 	pte_unmap_unlock(ptep, ptl);