diff mbox series

[1/2] arm64: mm: drop VM_FAULT_BADMAP/VM_FAULT_BADACCESS

Message ID 20240407081211.2292362-2-wangkefeng.wang@huawei.com (mailing list archive)
State New
Headers show
Series mm: remove arch's private VM_FAULT_BADMAP/BADACCESS | expand

Commit Message

Kefeng Wang April 7, 2024, 8:12 a.m. UTC
If bad map or access, directly set si_code to SEGV_MAPRR or SEGV_ACCERR,
also set fault to 0 and goto error handling, which make us to drop the
arch's special vm fault reason.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 arch/arm64/mm/fault.c | 41 ++++++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

Comments

Catalin Marinas April 9, 2024, 2:28 p.m. UTC | #1
Hi Kefeng,

On Sun, Apr 07, 2024 at 04:12:10PM +0800, Kefeng Wang wrote:
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 405f9aa831bd..61a2acae0dca 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -500,9 +500,6 @@ static bool is_write_abort(unsigned long esr)
>  	return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
>  }
>  
> -#define VM_FAULT_BADMAP		((__force vm_fault_t)0x010000)
> -#define VM_FAULT_BADACCESS	((__force vm_fault_t)0x020000)
> -
>  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  				   struct pt_regs *regs)
>  {
> @@ -513,6 +510,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  	unsigned int mm_flags = FAULT_FLAG_DEFAULT;
>  	unsigned long addr = untagged_addr(far);
>  	struct vm_area_struct *vma;
> +	int si_code;

I think we should initialise this to 0. Currently all paths seem to set
si_code to something meaningful but I'm not sure the last 'else' close
in this patch is guaranteed to always cover exactly those earlier code
paths updating si_code. I'm not talking about the 'goto bad_area' paths
since they set 'fault' to 0 but the fall through after the second (under
the mm lock) handle_mm_fault().

>  	if (kprobe_page_fault(regs, esr))
>  		return 0;
> @@ -572,9 +570,10 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  
>  	if (!(vma->vm_flags & vm_flags)) {
>  		vma_end_read(vma);
> -		fault = VM_FAULT_BADACCESS;
> +		fault = 0;
> +		si_code = SEGV_ACCERR;
>  		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> -		goto done;
> +		goto bad_area;
>  	}
>  	fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
>  	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> @@ -599,15 +598,18 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  retry:
>  	vma = lock_mm_and_find_vma(mm, addr, regs);
>  	if (unlikely(!vma)) {
> -		fault = VM_FAULT_BADMAP;
> -		goto done;
> +		fault = 0;
> +		si_code = SEGV_MAPERR;
> +		goto bad_area;
>  	}
>  
> -	if (!(vma->vm_flags & vm_flags))
> -		fault = VM_FAULT_BADACCESS;
> -	else
> -		fault = handle_mm_fault(vma, addr, mm_flags, regs);
> +	if (!(vma->vm_flags & vm_flags)) {
> +		fault = 0;
> +		si_code = SEGV_ACCERR;
> +		goto bad_area;
> +	}

What's releasing the mm lock here? Prior to this change, it is falling
through to mmap_read_unlock() below or handle_mm_fault() was releasing
the lock (VM_FAULT_RETRY, VM_FAULT_COMPLETED).

>  
> +	fault = handle_mm_fault(vma, addr, mm_flags, regs);
>  	/* Quick path to respond to signals */
>  	if (fault_signal_pending(fault, regs)) {
>  		if (!user_mode(regs))
> @@ -626,13 +628,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  	mmap_read_unlock(mm);
>  
>  done:
> -	/*
> -	 * Handle the "normal" (no error) case first.
> -	 */
> -	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
> -			      VM_FAULT_BADACCESS))))
> +	/* Handle the "normal" (no error) case first. */
> +	if (likely(!(fault & VM_FAULT_ERROR)))
>  		return 0;
>  
> +bad_area:
>  	/*
>  	 * If we are in kernel mode at this point, we have no context to
>  	 * handle this fault with.
> @@ -667,13 +667,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  
>  		arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
>  	} else {
> -		/*
> -		 * Something tried to access memory that isn't in our memory
> -		 * map.
> -		 */
> -		arm64_force_sig_fault(SIGSEGV,
> -				      fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,
> -				      far, inf->name);
> +		/* Something tried to access memory that out of memory map */
> +		arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
>  	}

We can get to the 'else' close after the second handle_mm_fault(). Do we
guarantee that 'fault == 0' in this last block? If not, maybe a warning
and some safe initialisation for 'si_code' to avoid leaking stack data.
Kefeng Wang April 10, 2024, 1:30 a.m. UTC | #2
On 2024/4/9 22:28, Catalin Marinas wrote:
> Hi Kefeng,
> 
> On Sun, Apr 07, 2024 at 04:12:10PM +0800, Kefeng Wang wrote:
>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
>> index 405f9aa831bd..61a2acae0dca 100644
>> --- a/arch/arm64/mm/fault.c
>> +++ b/arch/arm64/mm/fault.c
>> @@ -500,9 +500,6 @@ static bool is_write_abort(unsigned long esr)
>>   	return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
>>   }
>>   
>> -#define VM_FAULT_BADMAP		((__force vm_fault_t)0x010000)
>> -#define VM_FAULT_BADACCESS	((__force vm_fault_t)0x020000)
>> -
>>   static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>>   				   struct pt_regs *regs)
>>   {
>> @@ -513,6 +510,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>>   	unsigned int mm_flags = FAULT_FLAG_DEFAULT;
>>   	unsigned long addr = untagged_addr(far);
>>   	struct vm_area_struct *vma;
>> +	int si_code;
> 
> I think we should initialise this to 0. Currently all paths seem to set
> si_code to something meaningful but I'm not sure the last 'else' close
> in this patch is guaranteed to always cover exactly those earlier code
> paths updating si_code. I'm not talking about the 'goto bad_area' paths
> since they set 'fault' to 0 but the fall through after the second (under
> the mm lock) handle_mm_fault().

Recheck it, without this patch, the second handle_mm_fault() never
return VM_FAULT_BADACCESS, but could return VM_FAULT_SIGSEGV(maybe
other), which not handled in the other error path,

  handle_mm_fault
     ret = sanitize_fault_flags(vma, &flags);
     if (!arch_vma_access_permitted())
	 ret = VM_FAULT_SIGSEGV;

so the orignal logical will set si_code to SEGV_MAPERR

   fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,

therefore, i think we should set the default si_code to SEGV_MAPERR.


> 
>>   	if (kprobe_page_fault(regs, esr))
>>   		return 0;
>> @@ -572,9 +570,10 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>>   
>>   	if (!(vma->vm_flags & vm_flags)) {
>>   		vma_end_read(vma);
>> -		fault = VM_FAULT_BADACCESS;
>> +		fault = 0;
>> +		si_code = SEGV_ACCERR;
>>   		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>> -		goto done;
>> +		goto bad_area;
>>   	}
>>   	fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
>>   	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
>> @@ -599,15 +598,18 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>>   retry:
>>   	vma = lock_mm_and_find_vma(mm, addr, regs);
>>   	if (unlikely(!vma)) {
>> -		fault = VM_FAULT_BADMAP;
>> -		goto done;
>> +		fault = 0;
>> +		si_code = SEGV_MAPERR;
>> +		goto bad_area;
>>   	}
>>   
>> -	if (!(vma->vm_flags & vm_flags))
>> -		fault = VM_FAULT_BADACCESS;
>> -	else
>> -		fault = handle_mm_fault(vma, addr, mm_flags, regs);
>> +	if (!(vma->vm_flags & vm_flags)) {
>> +		fault = 0;
>> +		si_code = SEGV_ACCERR;
>> +		goto bad_area;
>> +	}
> 
> What's releasing the mm lock here? Prior to this change, it is falling
> through to mmap_read_unlock() below or handle_mm_fault() was releasing
> the lock (VM_FAULT_RETRY, VM_FAULT_COMPLETED).

Indeed, will fix,

> 
>>   
>> +	fault = handle_mm_fault(vma, addr, mm_flags, regs);
>>   	/* Quick path to respond to signals */
>>   	if (fault_signal_pending(fault, regs)) {
>>   		if (!user_mode(regs))
>> @@ -626,13 +628,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>>   	mmap_read_unlock(mm);
>>   
>>   done:
>> -	/*
>> -	 * Handle the "normal" (no error) case first.
>> -	 */
>> -	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
>> -			      VM_FAULT_BADACCESS))))
>> +	/* Handle the "normal" (no error) case first. */
>> +	if (likely(!(fault & VM_FAULT_ERROR)))
>>   		return 0;
>>   
>> +bad_area:
>>   	/*
>>   	 * If we are in kernel mode at this point, we have no context to
>>   	 * handle this fault with.
>> @@ -667,13 +667,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>>   
>>   		arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
>>   	} else {
>> -		/*
>> -		 * Something tried to access memory that isn't in our memory
>> -		 * map.
>> -		 */
>> -		arm64_force_sig_fault(SIGSEGV,
>> -				      fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,
>> -				      far, inf->name);
>> +		/* Something tried to access memory that out of memory map */
>> +		arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
>>   	}
> 
> We can get to the 'else' close after the second handle_mm_fault(). Do we
> guarantee that 'fault == 0' in this last block? If not, maybe a warning
> and some safe initialisation for 'si_code' to avoid leaking stack data.

As analyzed above, it is sufficient that make si_code to SEGV_MAPPER by 
default, right?

Thanks.


>
Kefeng Wang April 10, 2024, 10:58 a.m. UTC | #3
On 2024/4/10 9:30, Kefeng Wang wrote:
> 
> 
> On 2024/4/9 22:28, Catalin Marinas wrote:
>> Hi Kefeng,
>>
>> On Sun, Apr 07, 2024 at 04:12:10PM +0800, Kefeng Wang wrote:
>>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
>>> index 405f9aa831bd..61a2acae0dca 100644
>>> --- a/arch/arm64/mm/fault.c
>>> +++ b/arch/arm64/mm/fault.c
>>> @@ -500,9 +500,6 @@ static bool is_write_abort(unsigned long esr)
>>>       return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
>>>   }
>>> -#define VM_FAULT_BADMAP        ((__force vm_fault_t)0x010000)
>>> -#define VM_FAULT_BADACCESS    ((__force vm_fault_t)0x020000)
>>> -
>>>   static int __kprobes do_page_fault(unsigned long far, unsigned long 
>>> esr,
>>>                      struct pt_regs *regs)
>>>   {
>>> @@ -513,6 +510,7 @@ static int __kprobes do_page_fault(unsigned long 
>>> far, unsigned long esr,
>>>       unsigned int mm_flags = FAULT_FLAG_DEFAULT;
>>>       unsigned long addr = untagged_addr(far);
>>>       struct vm_area_struct *vma;
>>> +    int si_code;
>>
>> I think we should initialise this to 0. Currently all paths seem to set
>> si_code to something meaningful but I'm not sure the last 'else' close
>> in this patch is guaranteed to always cover exactly those earlier code
>> paths updating si_code. I'm not talking about the 'goto bad_area' paths
>> since they set 'fault' to 0 but the fall through after the second (under
>> the mm lock) handle_mm_fault().
> 
> Recheck it, without this patch, the second handle_mm_fault() never
> return VM_FAULT_BADACCESS, but could return VM_FAULT_SIGSEGV(maybe
> other), which not handled in the other error path,
> 
>   handle_mm_fault
>      ret = sanitize_fault_flags(vma, &flags);
>      if (!arch_vma_access_permitted())
>       ret = VM_FAULT_SIGSEGV;
> 
> so the orignal logical will set si_code to SEGV_MAPERR
> 
>    fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,
> 
> therefore, i think we should set the default si_code to SEGV_MAPERR.
> 
> 
>>
>>>       if (kprobe_page_fault(regs, esr))
>>>           return 0;
>>> @@ -572,9 +570,10 @@ static int __kprobes do_page_fault(unsigned long 
>>> far, unsigned long esr,
>>>       if (!(vma->vm_flags & vm_flags)) {
>>>           vma_end_read(vma);
>>> -        fault = VM_FAULT_BADACCESS;
>>> +        fault = 0;
>>> +        si_code = SEGV_ACCERR;
>>>           count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>>> -        goto done;
>>> +        goto bad_area;
>>>       }
>>>       fault = handle_mm_fault(vma, addr, mm_flags | 
>>> FAULT_FLAG_VMA_LOCK, regs);
>>>       if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
>>> @@ -599,15 +598,18 @@ static int __kprobes do_page_fault(unsigned 
>>> long far, unsigned long esr,
>>>   retry:
>>>       vma = lock_mm_and_find_vma(mm, addr, regs);
>>>       if (unlikely(!vma)) {
>>> -        fault = VM_FAULT_BADMAP;
>>> -        goto done;
>>> +        fault = 0;
>>> +        si_code = SEGV_MAPERR;
>>> +        goto bad_area;
>>>       }
>>> -    if (!(vma->vm_flags & vm_flags))
>>> -        fault = VM_FAULT_BADACCESS;
>>> -    else
>>> -        fault = handle_mm_fault(vma, addr, mm_flags, regs);
>>> +    if (!(vma->vm_flags & vm_flags)) {
>>> +        fault = 0;
>>> +        si_code = SEGV_ACCERR;
>>> +        goto bad_area;
>>> +    }
>>
>> What's releasing the mm lock here? Prior to this change, it is falling
>> through to mmap_read_unlock() below or handle_mm_fault() was releasing
>> the lock (VM_FAULT_RETRY, VM_FAULT_COMPLETED).
> 
> Indeed, will fix,
> 
>>
>>> +    fault = handle_mm_fault(vma, addr, mm_flags, regs);
>>>       /* Quick path to respond to signals */
>>>       if (fault_signal_pending(fault, regs)) {
>>>           if (!user_mode(regs))
>>> @@ -626,13 +628,11 @@ static int __kprobes do_page_fault(unsigned 
>>> long far, unsigned long esr,
>>>       mmap_read_unlock(mm);
>>>   done:
>>> -    /*
>>> -     * Handle the "normal" (no error) case first.
>>> -     */
>>> -    if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
>>> -                  VM_FAULT_BADACCESS))))
>>> +    /* Handle the "normal" (no error) case first. */
>>> +    if (likely(!(fault & VM_FAULT_ERROR)))
>>>           return 0;

Another choice, we set si_code = SEGV_MAPERR here, since normal
pagefault don't use si_code, only the error patch need to initialize.


>>> +bad_area:
>>>       /*
>>>        * If we are in kernel mode at this point, we have no context to
>>>        * handle this fault with.
>>> @@ -667,13 +667,8 @@ static int __kprobes do_page_fault(unsigned long 
>>> far, unsigned long esr,
>>>           arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
>>>       } else {
>>> -        /*
>>> -         * Something tried to access memory that isn't in our memory
>>> -         * map.
>>> -         */
>>> -        arm64_force_sig_fault(SIGSEGV,
>>> -                      fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : 
>>> SEGV_MAPERR,
>>> -                      far, inf->name);
>>> +        /* Something tried to access memory that out of memory map */
>>> +        arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
>>>       }
>>
>> We can get to the 'else' close after the second handle_mm_fault(). Do we
>> guarantee that 'fault == 0' in this last block? If not, maybe a warning
>> and some safe initialisation for 'si_code' to avoid leaking stack data.
> 
> As analyzed above, it is sufficient that make si_code to SEGV_MAPPER by 
> default, right?
> 
> Thanks.
> 
> 
>>
>
Aishwarya TCV April 10, 2024, 11:24 a.m. UTC | #4
On 07/04/2024 09:12, Kefeng Wang wrote:
> If bad map or access, directly set si_code to SEGV_MAPRR or SEGV_ACCERR,
> also set fault to 0 and goto error handling, which make us to drop the
> arch's special vm fault reason.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>  arch/arm64/mm/fault.c | 41 ++++++++++++++++++-----------------------
>  1 file changed, 18 insertions(+), 23 deletions(-)

Hi Kefeng,

While running LTP test suite, I observed that few test cases are unable
to kill exe when run against next-master(next-20240409) kernel with
Arm64 on JUNO in our CI. I can send the full logs if required, but it
doesn't say much.

A bisect identified cf0049a15207a5a78798105eff789c2025bcf652 as the
first bad commit. Bisected it on the tag "next-20240409" at repo
"https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".

This works fine on Linux version v6.9-rc3

Here are some of the failing test cases in LTP quickhit:
-----------------------------
chdir01
link04
select03
unlink07


Failure log:
-----------
06:06:49.288908  Test timeouted, sending SIGKILL!
06:06:54.293806  Test timeouted, sending SIGKILL!
06:06:59.296872  Test timeouted, sending SIGKILL!
06:07:04.292044  Test timeouted, sending SIGKILL!
06:07:09.290185  Test timeouted, sending SIGKILL!
06:07:14.295134  Test timeouted, sending SIGKILL!
06:07:19.293279  Test timeouted, sending SIGKILL!
06:07:24.292405  Test timeouted, sending SIGKILL!
06:07:24.292790  Cannot kill test processes!
06:07:24.296564  Congratulation, likely test hit a kernel bug.
06:07:24.301315  Exiting uncleanly...


Bisect log:
----------
git bisect start
# good: [fec50db7033ea478773b159e0e2efb135270e3b7] Linux 6.9-rc3
git bisect good fec50db7033ea478773b159e0e2efb135270e3b7
# bad: [a053fd3ca5d1b927a8655f239c84b0d790218fda] Add linux-next
specific files for 20240409
git bisect bad a053fd3ca5d1b927a8655f239c84b0d790218fda
# bad: [4eb0063b031ea720cd8971e3e3d2426d27c5d7a6] Merge branch
'mtd/next' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git
git bisect bad 4eb0063b031ea720cd8971e3e3d2426d27c5d7a6
# bad: [24dde11bfadd5f38c6cca3cea6f16971bd10dc86] Merge branch
'for-next' of git://github.com/Xilinx/linux-xlnx.git
git bisect bad 24dde11bfadd5f38c6cca3cea6f16971bd10dc86
# bad: [bef23348d7e75c502399ba1a24627aa447b816dc] Merge branch
'mm-everything' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
git bisect bad bef23348d7e75c502399ba1a24627aa447b816dc
# good: [3317f7faabc24b500c26d02615ac75ca2786e272] mm: swap:
free_swap_and_cache_nr() as batched free_swap_and_cache()
git bisect good 3317f7faabc24b500c26d02615ac75ca2786e272
# good: [541970e62546ff5c96622669f2796d43b1a406e3] Merge branch
'gpio/for-current' of
git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
git bisect good 541970e62546ff5c96622669f2796d43b1a406e3
# bad: [1df265717e0f9d96079073189f6e6c52a48e493c]
__mod_memcg_lruvec_state-enhance-diagnostics-fix
git bisect bad 1df265717e0f9d96079073189f6e6c52a48e493c
# bad: [19ab4054346474c2b456f9bff6a98e41e5e46224] hugetlb: Simplify
hugetlb_wp() arguments
git bisect bad 19ab4054346474c2b456f9bff6a98e41e5e46224
# good: [75ef450aa9828ca20817c46c5b99f131156f6eee] x86: mm: accelerate
pagefault when badaccess
git bisect good 75ef450aa9828ca20817c46c5b99f131156f6eee
# bad: [c6b5a19e679bb713efd57d82f9080f780e0bb60a] mm/treewide: rename
CONFIG_HAVE_FAST_GUP to CONFIG_HAVE_GUP_FAST
git bisect bad c6b5a19e679bb713efd57d82f9080f780e0bb60a
# bad: [01446d1e9e627945664f2a1daa53e8720946d0cd] mm: remove struct page
from get_shadow_from_swap_cache
git bisect bad 01446d1e9e627945664f2a1daa53e8720946d0cd
# bad: [8f9d6a30dd992c44debea3161083a6c2cd3ad87f] arm: mm: drop
VM_FAULT_BADMAP/VM_FAULT_BADACCESS
git bisect bad 8f9d6a30dd992c44debea3161083a6c2cd3ad87f
# bad: [cf0049a15207a5a78798105eff789c2025bcf652] arm64: mm: drop
VM_FAULT_BADMAP/VM_FAULT_BADACCESS
git bisect bad cf0049a15207a5a78798105eff789c2025bcf652
# first bad commit: [cf0049a15207a5a78798105eff789c2025bcf652] arm64:
mm: drop VM_FAULT_BADMAP/VM_FAULT_BADACCESSgit bisect log


Thanks,
Aishwarya
Kefeng Wang April 10, 2024, 11:53 a.m. UTC | #5
On 2024/4/10 19:24, Aishwarya TCV wrote:
> 
> 
> On 07/04/2024 09:12, Kefeng Wang wrote:
>> If bad map or access, directly set si_code to SEGV_MAPRR or SEGV_ACCERR,
>> also set fault to 0 and goto error handling, which make us to drop the
>> arch's special vm fault reason.
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   arch/arm64/mm/fault.c | 41 ++++++++++++++++++-----------------------
>>   1 file changed, 18 insertions(+), 23 deletions(-)
> 
> Hi Kefeng,
> 
> While running LTP test suite, I observed that few test cases are unable
> to kill exe when run against next-master(next-20240409) kernel with
> Arm64 on JUNO in our CI. I can send the full logs if required, but it
> doesn't say much.

Sorry about it, as Catalin pointed, there is issue in this patch, and
I replied today, could you help to test with following changes, many thanks.

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 61a2acae0dca..451ba7cbd5ad 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -604,6 +604,7 @@ static int __kprobes do_page_fault(unsigned long 
far, unsigned long esr,
         }

         if (!(vma->vm_flags & vm_flags)) {
+               mmap_read_unlock(mm);
                 fault = 0;
                 si_code = SEGV_ACCERR;
                 goto bad_area;
@@ -632,6 +633,7 @@ static int __kprobes do_page_fault(unsigned long 
far, unsigned long esr,
         if (likely(!(fault & VM_FAULT_ERROR)))
                 return 0;

+       si_code = SEGV_MAPERR;
  bad_area:
         /*
          * If we are in kernel mode at this point, we have no context to

> 
> A bisect identified cf0049a15207a5a78798105eff789c2025bcf652 as the
> first bad commit. Bisected it on the tag "next-20240409" at repo
> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
> 
> This works fine on Linux version v6.9-rc3
> 
> Here are some of the failing test cases in LTP quickhit:
> -----------------------------
> chdir01
> link04
> select03
> unlink07
> 
> 
> Failure log:
> -----------
> 06:06:49.288908  Test timeouted, sending SIGKILL!
> 06:06:54.293806  Test timeouted, sending SIGKILL!
> 06:06:59.296872  Test timeouted, sending SIGKILL!
> 06:07:04.292044  Test timeouted, sending SIGKILL!
> 06:07:09.290185  Test timeouted, sending SIGKILL!
> 06:07:14.295134  Test timeouted, sending SIGKILL!
> 06:07:19.293279  Test timeouted, sending SIGKILL!
> 06:07:24.292405  Test timeouted, sending SIGKILL!
> 06:07:24.292790  Cannot kill test processes!
> 06:07:24.296564  Congratulation, likely test hit a kernel bug.
> 06:07:24.301315  Exiting uncleanly...
> 
> 
> Bisect log:
> ----------
> git bisect start
> # good: [fec50db7033ea478773b159e0e2efb135270e3b7] Linux 6.9-rc3
> git bisect good fec50db7033ea478773b159e0e2efb135270e3b7
> # bad: [a053fd3ca5d1b927a8655f239c84b0d790218fda] Add linux-next
> specific files for 20240409
> git bisect bad a053fd3ca5d1b927a8655f239c84b0d790218fda
> # bad: [4eb0063b031ea720cd8971e3e3d2426d27c5d7a6] Merge branch
> 'mtd/next' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git
> git bisect bad 4eb0063b031ea720cd8971e3e3d2426d27c5d7a6
> # bad: [24dde11bfadd5f38c6cca3cea6f16971bd10dc86] Merge branch
> 'for-next' of git://github.com/Xilinx/linux-xlnx.git
> git bisect bad 24dde11bfadd5f38c6cca3cea6f16971bd10dc86
> # bad: [bef23348d7e75c502399ba1a24627aa447b816dc] Merge branch
> 'mm-everything' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
> git bisect bad bef23348d7e75c502399ba1a24627aa447b816dc
> # good: [3317f7faabc24b500c26d02615ac75ca2786e272] mm: swap:
> free_swap_and_cache_nr() as batched free_swap_and_cache()
> git bisect good 3317f7faabc24b500c26d02615ac75ca2786e272
> # good: [541970e62546ff5c96622669f2796d43b1a406e3] Merge branch
> 'gpio/for-current' of
> git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
> git bisect good 541970e62546ff5c96622669f2796d43b1a406e3
> # bad: [1df265717e0f9d96079073189f6e6c52a48e493c]
> __mod_memcg_lruvec_state-enhance-diagnostics-fix
> git bisect bad 1df265717e0f9d96079073189f6e6c52a48e493c
> # bad: [19ab4054346474c2b456f9bff6a98e41e5e46224] hugetlb: Simplify
> hugetlb_wp() arguments
> git bisect bad 19ab4054346474c2b456f9bff6a98e41e5e46224
> # good: [75ef450aa9828ca20817c46c5b99f131156f6eee] x86: mm: accelerate
> pagefault when badaccess
> git bisect good 75ef450aa9828ca20817c46c5b99f131156f6eee
> # bad: [c6b5a19e679bb713efd57d82f9080f780e0bb60a] mm/treewide: rename
> CONFIG_HAVE_FAST_GUP to CONFIG_HAVE_GUP_FAST
> git bisect bad c6b5a19e679bb713efd57d82f9080f780e0bb60a
> # bad: [01446d1e9e627945664f2a1daa53e8720946d0cd] mm: remove struct page
> from get_shadow_from_swap_cache
> git bisect bad 01446d1e9e627945664f2a1daa53e8720946d0cd
> # bad: [8f9d6a30dd992c44debea3161083a6c2cd3ad87f] arm: mm: drop
> VM_FAULT_BADMAP/VM_FAULT_BADACCESS
> git bisect bad 8f9d6a30dd992c44debea3161083a6c2cd3ad87f
> # bad: [cf0049a15207a5a78798105eff789c2025bcf652] arm64: mm: drop
> VM_FAULT_BADMAP/VM_FAULT_BADACCESS
> git bisect bad cf0049a15207a5a78798105eff789c2025bcf652
> # first bad commit: [cf0049a15207a5a78798105eff789c2025bcf652] arm64:
> mm: drop VM_FAULT_BADMAP/VM_FAULT_BADACCESSgit bisect log
> 
> 
> Thanks,
> Aishwarya
Cristian Marussi April 10, 2024, 12:39 p.m. UTC | #6
On Wed, Apr 10, 2024 at 07:53:21PM +0800, Kefeng Wang wrote:
> 
> 
> On 2024/4/10 19:24, Aishwarya TCV wrote:
> > 

Hi,

> > 
> > On 07/04/2024 09:12, Kefeng Wang wrote:
> > > If bad map or access, directly set si_code to SEGV_MAPRR or SEGV_ACCERR,
> > > also set fault to 0 and goto error handling, which make us to drop the
> > > arch's special vm fault reason.
> > > 
> > > Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> > > ---
> > >   arch/arm64/mm/fault.c | 41 ++++++++++++++++++-----------------------
> > >   1 file changed, 18 insertions(+), 23 deletions(-)
> > 
> > Hi Kefeng,
> > 
> > While running LTP test suite, I observed that few test cases are unable
> > to kill exe when run against next-master(next-20240409) kernel with
> > Arm64 on JUNO in our CI. I can send the full logs if required, but it
> > doesn't say much.
> 
> Sorry about it, as Catalin pointed, there is issue in this patch, and
> I replied today, could you help to test with following changes, many thanks.
> 
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 61a2acae0dca..451ba7cbd5ad 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -604,6 +604,7 @@ static int __kprobes do_page_fault(unsigned long far,
> unsigned long esr,
>         }
> 
>         if (!(vma->vm_flags & vm_flags)) {
> +               mmap_read_unlock(mm);
>                 fault = 0;
>                 si_code = SEGV_ACCERR;
>                 goto bad_area;
> @@ -632,6 +633,7 @@ static int __kprobes do_page_fault(unsigned long far,
> unsigned long esr,
>         if (likely(!(fault & VM_FAULT_ERROR)))
>                 return 0;
> 
> +       si_code = SEGV_MAPERR;
>  bad_area:
>         /*
>          * If we are in kernel mode at this point, we have no context to
> 

On my JUNO setup, the above patch solves the 'un-killable tests' issues and
all the previously failing tests passes. (I have NOT run the full LTP
test-suite, though, only replayed the tests that were failing before as
mentioned by Aishwarya)

Thanks,
Cristian
Kefeng Wang April 10, 2024, 12:48 p.m. UTC | #7
On 2024/4/10 20:39, Cristian Marussi wrote:
> On Wed, Apr 10, 2024 at 07:53:21PM +0800, Kefeng Wang wrote:
>>
>>
>> On 2024/4/10 19:24, Aishwarya TCV wrote:
>>>
> 
> Hi,
> 
>>>
>>> On 07/04/2024 09:12, Kefeng Wang wrote:
>>>> If bad map or access, directly set si_code to SEGV_MAPRR or SEGV_ACCERR,
>>>> also set fault to 0 and goto error handling, which make us to drop the
>>>> arch's special vm fault reason.
>>>>
>>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>>> ---
>>>>    arch/arm64/mm/fault.c | 41 ++++++++++++++++++-----------------------
>>>>    1 file changed, 18 insertions(+), 23 deletions(-)
>>>
>>> Hi Kefeng,
>>>
>>> While running LTP test suite, I observed that few test cases are unable
>>> to kill exe when run against next-master(next-20240409) kernel with
>>> Arm64 on JUNO in our CI. I can send the full logs if required, but it
>>> doesn't say much.
>>
>> Sorry about it, as Catalin pointed, there is issue in this patch, and
>> I replied today, could you help to test with following changes, many thanks.
>>
>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
>> index 61a2acae0dca..451ba7cbd5ad 100644
>> --- a/arch/arm64/mm/fault.c
>> +++ b/arch/arm64/mm/fault.c
>> @@ -604,6 +604,7 @@ static int __kprobes do_page_fault(unsigned long far,
>> unsigned long esr,
>>          }
>>
>>          if (!(vma->vm_flags & vm_flags)) {
>> +               mmap_read_unlock(mm);
>>                  fault = 0;
>>                  si_code = SEGV_ACCERR;
>>                  goto bad_area;
>> @@ -632,6 +633,7 @@ static int __kprobes do_page_fault(unsigned long far,
>> unsigned long esr,
>>          if (likely(!(fault & VM_FAULT_ERROR)))
>>                  return 0;
>>
>> +       si_code = SEGV_MAPERR;
>>   bad_area:
>>          /*
>>           * If we are in kernel mode at this point, we have no context to
>>
> 
> On my JUNO setup, the above patch solves the 'un-killable tests' issues and
> all the previously failing tests passes. (I have NOT run the full LTP
> test-suite, though, only replayed the tests that were failing before as
> mentioned by Aishwarya)

Thanks, I will resend v2, but let's wait for Catalin to see if there is 
any other opinion.
> 
> Thanks,
> Cristian
Andrew Morton April 10, 2024, 8:18 p.m. UTC | #8
On Wed, 10 Apr 2024 20:48:28 +0800 Kefeng Wang <wangkefeng.wang@huawei.com> wrote:

> > On my JUNO setup, the above patch solves the 'un-killable tests' issues and
> > all the previously failing tests passes. (I have NOT run the full LTP
> > test-suite, though, only replayed the tests that were failing before as
> > mentioned by Aishwarya)
> 
> Thanks, I will resend v2, but let's wait for Catalin to see if there is 
> any other opinion.

Thanks, I dropped v1.
Catalin Marinas April 11, 2024, 9:59 a.m. UTC | #9
On Wed, Apr 10, 2024 at 06:58:27PM +0800, Kefeng Wang wrote:
> On 2024/4/10 9:30, Kefeng Wang wrote:
> > On 2024/4/9 22:28, Catalin Marinas wrote:
> > > On Sun, Apr 07, 2024 at 04:12:10PM +0800, Kefeng Wang wrote:
> > > > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> > > > index 405f9aa831bd..61a2acae0dca 100644
> > > > --- a/arch/arm64/mm/fault.c
> > > > +++ b/arch/arm64/mm/fault.c
> > > > @@ -500,9 +500,6 @@ static bool is_write_abort(unsigned long esr)
> > > >       return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
> > > >   }
> > > > -#define VM_FAULT_BADMAP        ((__force vm_fault_t)0x010000)
> > > > -#define VM_FAULT_BADACCESS    ((__force vm_fault_t)0x020000)
> > > > -
> > > >   static int __kprobes do_page_fault(unsigned long far, unsigned
> > > > long esr,
> > > >                      struct pt_regs *regs)
> > > >   {
> > > > @@ -513,6 +510,7 @@ static int __kprobes do_page_fault(unsigned
> > > > long far, unsigned long esr,
> > > >       unsigned int mm_flags = FAULT_FLAG_DEFAULT;
> > > >       unsigned long addr = untagged_addr(far);
> > > >       struct vm_area_struct *vma;
> > > > +    int si_code;
> > > 
> > > I think we should initialise this to 0. Currently all paths seem to set
> > > si_code to something meaningful but I'm not sure the last 'else' close
> > > in this patch is guaranteed to always cover exactly those earlier code
> > > paths updating si_code. I'm not talking about the 'goto bad_area' paths
> > > since they set 'fault' to 0 but the fall through after the second (under
> > > the mm lock) handle_mm_fault().
[...]
> > > > +    fault = handle_mm_fault(vma, addr, mm_flags, regs);
> > > >       /* Quick path to respond to signals */
> > > >       if (fault_signal_pending(fault, regs)) {
> > > >           if (!user_mode(regs))
> > > > @@ -626,13 +628,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
> > > >       mmap_read_unlock(mm);
> > > >   done:
> > > > -    /*
> > > > -     * Handle the "normal" (no error) case first.
> > > > -     */
> > > > -    if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
> > > > -                  VM_FAULT_BADACCESS))))
> > > > +    /* Handle the "normal" (no error) case first. */
> > > > +    if (likely(!(fault & VM_FAULT_ERROR)))
> > > >           return 0;
> 
> Another choice, we set si_code = SEGV_MAPERR here, since normal
> pagefault don't use si_code, only the error patch need to initialize.

Yes, I think initialising it here would be fine. That's the fall-through
case I was concerned about. All the other goto bad_area places already
initialise si_code.
Kefeng Wang April 11, 2024, 11:11 a.m. UTC | #10
On 2024/4/11 17:59, Catalin Marinas wrote:
> On Wed, Apr 10, 2024 at 06:58:27PM +0800, Kefeng Wang wrote:
>> On 2024/4/10 9:30, Kefeng Wang wrote:
>>> On 2024/4/9 22:28, Catalin Marinas wrote:
>>>> On Sun, Apr 07, 2024 at 04:12:10PM +0800, Kefeng Wang wrote:
>>>>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
>>>>> index 405f9aa831bd..61a2acae0dca 100644
>>>>> --- a/arch/arm64/mm/fault.c
>>>>> +++ b/arch/arm64/mm/fault.c
>>>>> @@ -500,9 +500,6 @@ static bool is_write_abort(unsigned long esr)
>>>>>        return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
>>>>>    }
>>>>> -#define VM_FAULT_BADMAP        ((__force vm_fault_t)0x010000)
>>>>> -#define VM_FAULT_BADACCESS    ((__force vm_fault_t)0x020000)
>>>>> -
>>>>>    static int __kprobes do_page_fault(unsigned long far, unsigned
>>>>> long esr,
>>>>>                       struct pt_regs *regs)
>>>>>    {
>>>>> @@ -513,6 +510,7 @@ static int __kprobes do_page_fault(unsigned
>>>>> long far, unsigned long esr,
>>>>>        unsigned int mm_flags = FAULT_FLAG_DEFAULT;
>>>>>        unsigned long addr = untagged_addr(far);
>>>>>        struct vm_area_struct *vma;
>>>>> +    int si_code;
>>>>
>>>> I think we should initialise this to 0. Currently all paths seem to set
>>>> si_code to something meaningful but I'm not sure the last 'else' close
>>>> in this patch is guaranteed to always cover exactly those earlier code
>>>> paths updating si_code. I'm not talking about the 'goto bad_area' paths
>>>> since they set 'fault' to 0 but the fall through after the second (under
>>>> the mm lock) handle_mm_fault().
> [...]
>>>>> +    fault = handle_mm_fault(vma, addr, mm_flags, regs);
>>>>>        /* Quick path to respond to signals */
>>>>>        if (fault_signal_pending(fault, regs)) {
>>>>>            if (!user_mode(regs))
>>>>> @@ -626,13 +628,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>>>>>        mmap_read_unlock(mm);
>>>>>    done:
>>>>> -    /*
>>>>> -     * Handle the "normal" (no error) case first.
>>>>> -     */
>>>>> -    if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
>>>>> -                  VM_FAULT_BADACCESS))))
>>>>> +    /* Handle the "normal" (no error) case first. */
>>>>> +    if (likely(!(fault & VM_FAULT_ERROR)))
>>>>>            return 0;
>>
>> Another choice, we set si_code = SEGV_MAPERR here, since normal
>> pagefault don't use si_code, only the error patch need to initialize.
> 
> Yes, I think initialising it here would be fine. That's the fall-through
> case I was concerned about. All the other goto bad_area places already
> initialise si_code.
> 
Thanks for your confirm, will send v2 soon.
diff mbox series

Patch

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 405f9aa831bd..61a2acae0dca 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -500,9 +500,6 @@  static bool is_write_abort(unsigned long esr)
 	return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
 }
 
-#define VM_FAULT_BADMAP		((__force vm_fault_t)0x010000)
-#define VM_FAULT_BADACCESS	((__force vm_fault_t)0x020000)
-
 static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 				   struct pt_regs *regs)
 {
@@ -513,6 +510,7 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 	unsigned int mm_flags = FAULT_FLAG_DEFAULT;
 	unsigned long addr = untagged_addr(far);
 	struct vm_area_struct *vma;
+	int si_code;
 
 	if (kprobe_page_fault(regs, esr))
 		return 0;
@@ -572,9 +570,10 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 
 	if (!(vma->vm_flags & vm_flags)) {
 		vma_end_read(vma);
-		fault = VM_FAULT_BADACCESS;
+		fault = 0;
+		si_code = SEGV_ACCERR;
 		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
-		goto done;
+		goto bad_area;
 	}
 	fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
 	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
@@ -599,15 +598,18 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 retry:
 	vma = lock_mm_and_find_vma(mm, addr, regs);
 	if (unlikely(!vma)) {
-		fault = VM_FAULT_BADMAP;
-		goto done;
+		fault = 0;
+		si_code = SEGV_MAPERR;
+		goto bad_area;
 	}
 
-	if (!(vma->vm_flags & vm_flags))
-		fault = VM_FAULT_BADACCESS;
-	else
-		fault = handle_mm_fault(vma, addr, mm_flags, regs);
+	if (!(vma->vm_flags & vm_flags)) {
+		fault = 0;
+		si_code = SEGV_ACCERR;
+		goto bad_area;
+	}
 
+	fault = handle_mm_fault(vma, addr, mm_flags, regs);
 	/* Quick path to respond to signals */
 	if (fault_signal_pending(fault, regs)) {
 		if (!user_mode(regs))
@@ -626,13 +628,11 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 	mmap_read_unlock(mm);
 
 done:
-	/*
-	 * Handle the "normal" (no error) case first.
-	 */
-	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
-			      VM_FAULT_BADACCESS))))
+	/* Handle the "normal" (no error) case first. */
+	if (likely(!(fault & VM_FAULT_ERROR)))
 		return 0;
 
+bad_area:
 	/*
 	 * If we are in kernel mode at this point, we have no context to
 	 * handle this fault with.
@@ -667,13 +667,8 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 
 		arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
 	} else {
-		/*
-		 * Something tried to access memory that isn't in our memory
-		 * map.
-		 */
-		arm64_force_sig_fault(SIGSEGV,
-				      fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,
-				      far, inf->name);
+		/* Something tried to access memory that out of memory map */
+		arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
 	}
 
 	return 0;