Message ID | 20240407081211.2292362-2-wangkefeng.wang@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: remove arch's private VM_FAULT_BADMAP/BADACCESS | expand |
Hi Kefeng, On Sun, Apr 07, 2024 at 04:12:10PM +0800, Kefeng Wang wrote: > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > index 405f9aa831bd..61a2acae0dca 100644 > --- a/arch/arm64/mm/fault.c > +++ b/arch/arm64/mm/fault.c > @@ -500,9 +500,6 @@ static bool is_write_abort(unsigned long esr) > return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); > } > > -#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) > -#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) > - > static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > struct pt_regs *regs) > { > @@ -513,6 +510,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > unsigned int mm_flags = FAULT_FLAG_DEFAULT; > unsigned long addr = untagged_addr(far); > struct vm_area_struct *vma; > + int si_code; I think we should initialise this to 0. Currently all paths seem to set si_code to something meaningful but I'm not sure the last 'else' close in this patch is guaranteed to always cover exactly those earlier code paths updating si_code. I'm not talking about the 'goto bad_area' paths since they set 'fault' to 0 but the fall through after the second (under the mm lock) handle_mm_fault(). > if (kprobe_page_fault(regs, esr)) > return 0; > @@ -572,9 +570,10 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > > if (!(vma->vm_flags & vm_flags)) { > vma_end_read(vma); > - fault = VM_FAULT_BADACCESS; > + fault = 0; > + si_code = SEGV_ACCERR; > count_vm_vma_lock_event(VMA_LOCK_SUCCESS); > - goto done; > + goto bad_area; > } > fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs); > if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) > @@ -599,15 +598,18 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > retry: > vma = lock_mm_and_find_vma(mm, addr, regs); > if (unlikely(!vma)) { > - fault = VM_FAULT_BADMAP; > - goto done; > + fault = 0; > + si_code = SEGV_MAPERR; > + goto bad_area; > } > > - if (!(vma->vm_flags & vm_flags)) > - fault = VM_FAULT_BADACCESS; > - else > - fault = handle_mm_fault(vma, addr, mm_flags, regs); > + if (!(vma->vm_flags & vm_flags)) { > + fault = 0; > + si_code = SEGV_ACCERR; > + goto bad_area; > + } What's releasing the mm lock here? Prior to this change, it is falling through to mmap_read_unlock() below or handle_mm_fault() was releasing the lock (VM_FAULT_RETRY, VM_FAULT_COMPLETED). > > + fault = handle_mm_fault(vma, addr, mm_flags, regs); > /* Quick path to respond to signals */ > if (fault_signal_pending(fault, regs)) { > if (!user_mode(regs)) > @@ -626,13 +628,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > mmap_read_unlock(mm); > > done: > - /* > - * Handle the "normal" (no error) case first. > - */ > - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | > - VM_FAULT_BADACCESS)))) > + /* Handle the "normal" (no error) case first. */ > + if (likely(!(fault & VM_FAULT_ERROR))) > return 0; > > +bad_area: > /* > * If we are in kernel mode at this point, we have no context to > * handle this fault with. > @@ -667,13 +667,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > > arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name); > } else { > - /* > - * Something tried to access memory that isn't in our memory > - * map. > - */ > - arm64_force_sig_fault(SIGSEGV, > - fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR, > - far, inf->name); > + /* Something tried to access memory that out of memory map */ > + arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); > } We can get to the 'else' close after the second handle_mm_fault(). Do we guarantee that 'fault == 0' in this last block? If not, maybe a warning and some safe initialisation for 'si_code' to avoid leaking stack data.
On 2024/4/9 22:28, Catalin Marinas wrote: > Hi Kefeng, > > On Sun, Apr 07, 2024 at 04:12:10PM +0800, Kefeng Wang wrote: >> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c >> index 405f9aa831bd..61a2acae0dca 100644 >> --- a/arch/arm64/mm/fault.c >> +++ b/arch/arm64/mm/fault.c >> @@ -500,9 +500,6 @@ static bool is_write_abort(unsigned long esr) >> return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); >> } >> >> -#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) >> -#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) >> - >> static int __kprobes do_page_fault(unsigned long far, unsigned long esr, >> struct pt_regs *regs) >> { >> @@ -513,6 +510,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, >> unsigned int mm_flags = FAULT_FLAG_DEFAULT; >> unsigned long addr = untagged_addr(far); >> struct vm_area_struct *vma; >> + int si_code; > > I think we should initialise this to 0. Currently all paths seem to set > si_code to something meaningful but I'm not sure the last 'else' close > in this patch is guaranteed to always cover exactly those earlier code > paths updating si_code. I'm not talking about the 'goto bad_area' paths > since they set 'fault' to 0 but the fall through after the second (under > the mm lock) handle_mm_fault(). Recheck it, without this patch, the second handle_mm_fault() never return VM_FAULT_BADACCESS, but could return VM_FAULT_SIGSEGV(maybe other), which not handled in the other error path, handle_mm_fault ret = sanitize_fault_flags(vma, &flags); if (!arch_vma_access_permitted()) ret = VM_FAULT_SIGSEGV; so the orignal logical will set si_code to SEGV_MAPERR fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR, therefore, i think we should set the default si_code to SEGV_MAPERR. > >> if (kprobe_page_fault(regs, esr)) >> return 0; >> @@ -572,9 +570,10 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, >> >> if (!(vma->vm_flags & vm_flags)) { >> vma_end_read(vma); >> - fault = VM_FAULT_BADACCESS; >> + fault = 0; >> + si_code = SEGV_ACCERR; >> count_vm_vma_lock_event(VMA_LOCK_SUCCESS); >> - goto done; >> + goto bad_area; >> } >> fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs); >> if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) >> @@ -599,15 +598,18 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, >> retry: >> vma = lock_mm_and_find_vma(mm, addr, regs); >> if (unlikely(!vma)) { >> - fault = VM_FAULT_BADMAP; >> - goto done; >> + fault = 0; >> + si_code = SEGV_MAPERR; >> + goto bad_area; >> } >> >> - if (!(vma->vm_flags & vm_flags)) >> - fault = VM_FAULT_BADACCESS; >> - else >> - fault = handle_mm_fault(vma, addr, mm_flags, regs); >> + if (!(vma->vm_flags & vm_flags)) { >> + fault = 0; >> + si_code = SEGV_ACCERR; >> + goto bad_area; >> + } > > What's releasing the mm lock here? Prior to this change, it is falling > through to mmap_read_unlock() below or handle_mm_fault() was releasing > the lock (VM_FAULT_RETRY, VM_FAULT_COMPLETED). Indeed, will fix, > >> >> + fault = handle_mm_fault(vma, addr, mm_flags, regs); >> /* Quick path to respond to signals */ >> if (fault_signal_pending(fault, regs)) { >> if (!user_mode(regs)) >> @@ -626,13 +628,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, >> mmap_read_unlock(mm); >> >> done: >> - /* >> - * Handle the "normal" (no error) case first. >> - */ >> - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | >> - VM_FAULT_BADACCESS)))) >> + /* Handle the "normal" (no error) case first. */ >> + if (likely(!(fault & VM_FAULT_ERROR))) >> return 0; >> >> +bad_area: >> /* >> * If we are in kernel mode at this point, we have no context to >> * handle this fault with. >> @@ -667,13 +667,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, >> >> arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name); >> } else { >> - /* >> - * Something tried to access memory that isn't in our memory >> - * map. >> - */ >> - arm64_force_sig_fault(SIGSEGV, >> - fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR, >> - far, inf->name); >> + /* Something tried to access memory that out of memory map */ >> + arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); >> } > > We can get to the 'else' close after the second handle_mm_fault(). Do we > guarantee that 'fault == 0' in this last block? If not, maybe a warning > and some safe initialisation for 'si_code' to avoid leaking stack data. As analyzed above, it is sufficient that make si_code to SEGV_MAPPER by default, right? Thanks. >
On 2024/4/10 9:30, Kefeng Wang wrote: > > > On 2024/4/9 22:28, Catalin Marinas wrote: >> Hi Kefeng, >> >> On Sun, Apr 07, 2024 at 04:12:10PM +0800, Kefeng Wang wrote: >>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c >>> index 405f9aa831bd..61a2acae0dca 100644 >>> --- a/arch/arm64/mm/fault.c >>> +++ b/arch/arm64/mm/fault.c >>> @@ -500,9 +500,6 @@ static bool is_write_abort(unsigned long esr) >>> return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); >>> } >>> -#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) >>> -#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) >>> - >>> static int __kprobes do_page_fault(unsigned long far, unsigned long >>> esr, >>> struct pt_regs *regs) >>> { >>> @@ -513,6 +510,7 @@ static int __kprobes do_page_fault(unsigned long >>> far, unsigned long esr, >>> unsigned int mm_flags = FAULT_FLAG_DEFAULT; >>> unsigned long addr = untagged_addr(far); >>> struct vm_area_struct *vma; >>> + int si_code; >> >> I think we should initialise this to 0. Currently all paths seem to set >> si_code to something meaningful but I'm not sure the last 'else' close >> in this patch is guaranteed to always cover exactly those earlier code >> paths updating si_code. I'm not talking about the 'goto bad_area' paths >> since they set 'fault' to 0 but the fall through after the second (under >> the mm lock) handle_mm_fault(). > > Recheck it, without this patch, the second handle_mm_fault() never > return VM_FAULT_BADACCESS, but could return VM_FAULT_SIGSEGV(maybe > other), which not handled in the other error path, > > handle_mm_fault > ret = sanitize_fault_flags(vma, &flags); > if (!arch_vma_access_permitted()) > ret = VM_FAULT_SIGSEGV; > > so the orignal logical will set si_code to SEGV_MAPERR > > fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR, > > therefore, i think we should set the default si_code to SEGV_MAPERR. > > >> >>> if (kprobe_page_fault(regs, esr)) >>> return 0; >>> @@ -572,9 +570,10 @@ static int __kprobes do_page_fault(unsigned long >>> far, unsigned long esr, >>> if (!(vma->vm_flags & vm_flags)) { >>> vma_end_read(vma); >>> - fault = VM_FAULT_BADACCESS; >>> + fault = 0; >>> + si_code = SEGV_ACCERR; >>> count_vm_vma_lock_event(VMA_LOCK_SUCCESS); >>> - goto done; >>> + goto bad_area; >>> } >>> fault = handle_mm_fault(vma, addr, mm_flags | >>> FAULT_FLAG_VMA_LOCK, regs); >>> if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) >>> @@ -599,15 +598,18 @@ static int __kprobes do_page_fault(unsigned >>> long far, unsigned long esr, >>> retry: >>> vma = lock_mm_and_find_vma(mm, addr, regs); >>> if (unlikely(!vma)) { >>> - fault = VM_FAULT_BADMAP; >>> - goto done; >>> + fault = 0; >>> + si_code = SEGV_MAPERR; >>> + goto bad_area; >>> } >>> - if (!(vma->vm_flags & vm_flags)) >>> - fault = VM_FAULT_BADACCESS; >>> - else >>> - fault = handle_mm_fault(vma, addr, mm_flags, regs); >>> + if (!(vma->vm_flags & vm_flags)) { >>> + fault = 0; >>> + si_code = SEGV_ACCERR; >>> + goto bad_area; >>> + } >> >> What's releasing the mm lock here? Prior to this change, it is falling >> through to mmap_read_unlock() below or handle_mm_fault() was releasing >> the lock (VM_FAULT_RETRY, VM_FAULT_COMPLETED). > > Indeed, will fix, > >> >>> + fault = handle_mm_fault(vma, addr, mm_flags, regs); >>> /* Quick path to respond to signals */ >>> if (fault_signal_pending(fault, regs)) { >>> if (!user_mode(regs)) >>> @@ -626,13 +628,11 @@ static int __kprobes do_page_fault(unsigned >>> long far, unsigned long esr, >>> mmap_read_unlock(mm); >>> done: >>> - /* >>> - * Handle the "normal" (no error) case first. >>> - */ >>> - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | >>> - VM_FAULT_BADACCESS)))) >>> + /* Handle the "normal" (no error) case first. */ >>> + if (likely(!(fault & VM_FAULT_ERROR))) >>> return 0; Another choice, we set si_code = SEGV_MAPERR here, since normal pagefault don't use si_code, only the error patch need to initialize. >>> +bad_area: >>> /* >>> * If we are in kernel mode at this point, we have no context to >>> * handle this fault with. >>> @@ -667,13 +667,8 @@ static int __kprobes do_page_fault(unsigned long >>> far, unsigned long esr, >>> arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name); >>> } else { >>> - /* >>> - * Something tried to access memory that isn't in our memory >>> - * map. >>> - */ >>> - arm64_force_sig_fault(SIGSEGV, >>> - fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : >>> SEGV_MAPERR, >>> - far, inf->name); >>> + /* Something tried to access memory that out of memory map */ >>> + arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); >>> } >> >> We can get to the 'else' close after the second handle_mm_fault(). Do we >> guarantee that 'fault == 0' in this last block? If not, maybe a warning >> and some safe initialisation for 'si_code' to avoid leaking stack data. > > As analyzed above, it is sufficient that make si_code to SEGV_MAPPER by > default, right? > > Thanks. > > >> >
On 07/04/2024 09:12, Kefeng Wang wrote: > If bad map or access, directly set si_code to SEGV_MAPRR or SEGV_ACCERR, > also set fault to 0 and goto error handling, which make us to drop the > arch's special vm fault reason. > > Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> > --- > arch/arm64/mm/fault.c | 41 ++++++++++++++++++----------------------- > 1 file changed, 18 insertions(+), 23 deletions(-) Hi Kefeng, While running LTP test suite, I observed that few test cases are unable to kill exe when run against next-master(next-20240409) kernel with Arm64 on JUNO in our CI. I can send the full logs if required, but it doesn't say much. A bisect identified cf0049a15207a5a78798105eff789c2025bcf652 as the first bad commit. Bisected it on the tag "next-20240409" at repo "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git". This works fine on Linux version v6.9-rc3 Here are some of the failing test cases in LTP quickhit: ----------------------------- chdir01 link04 select03 unlink07 Failure log: ----------- 06:06:49.288908 Test timeouted, sending SIGKILL! 06:06:54.293806 Test timeouted, sending SIGKILL! 06:06:59.296872 Test timeouted, sending SIGKILL! 06:07:04.292044 Test timeouted, sending SIGKILL! 06:07:09.290185 Test timeouted, sending SIGKILL! 06:07:14.295134 Test timeouted, sending SIGKILL! 06:07:19.293279 Test timeouted, sending SIGKILL! 06:07:24.292405 Test timeouted, sending SIGKILL! 06:07:24.292790 Cannot kill test processes! 06:07:24.296564 Congratulation, likely test hit a kernel bug. 06:07:24.301315 Exiting uncleanly... Bisect log: ---------- git bisect start # good: [fec50db7033ea478773b159e0e2efb135270e3b7] Linux 6.9-rc3 git bisect good fec50db7033ea478773b159e0e2efb135270e3b7 # bad: [a053fd3ca5d1b927a8655f239c84b0d790218fda] Add linux-next specific files for 20240409 git bisect bad a053fd3ca5d1b927a8655f239c84b0d790218fda # bad: [4eb0063b031ea720cd8971e3e3d2426d27c5d7a6] Merge branch 'mtd/next' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git git bisect bad 4eb0063b031ea720cd8971e3e3d2426d27c5d7a6 # bad: [24dde11bfadd5f38c6cca3cea6f16971bd10dc86] Merge branch 'for-next' of git://github.com/Xilinx/linux-xlnx.git git bisect bad 24dde11bfadd5f38c6cca3cea6f16971bd10dc86 # bad: [bef23348d7e75c502399ba1a24627aa447b816dc] Merge branch 'mm-everything' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm git bisect bad bef23348d7e75c502399ba1a24627aa447b816dc # good: [3317f7faabc24b500c26d02615ac75ca2786e272] mm: swap: free_swap_and_cache_nr() as batched free_swap_and_cache() git bisect good 3317f7faabc24b500c26d02615ac75ca2786e272 # good: [541970e62546ff5c96622669f2796d43b1a406e3] Merge branch 'gpio/for-current' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git git bisect good 541970e62546ff5c96622669f2796d43b1a406e3 # bad: [1df265717e0f9d96079073189f6e6c52a48e493c] __mod_memcg_lruvec_state-enhance-diagnostics-fix git bisect bad 1df265717e0f9d96079073189f6e6c52a48e493c # bad: [19ab4054346474c2b456f9bff6a98e41e5e46224] hugetlb: Simplify hugetlb_wp() arguments git bisect bad 19ab4054346474c2b456f9bff6a98e41e5e46224 # good: [75ef450aa9828ca20817c46c5b99f131156f6eee] x86: mm: accelerate pagefault when badaccess git bisect good 75ef450aa9828ca20817c46c5b99f131156f6eee # bad: [c6b5a19e679bb713efd57d82f9080f780e0bb60a] mm/treewide: rename CONFIG_HAVE_FAST_GUP to CONFIG_HAVE_GUP_FAST git bisect bad c6b5a19e679bb713efd57d82f9080f780e0bb60a # bad: [01446d1e9e627945664f2a1daa53e8720946d0cd] mm: remove struct page from get_shadow_from_swap_cache git bisect bad 01446d1e9e627945664f2a1daa53e8720946d0cd # bad: [8f9d6a30dd992c44debea3161083a6c2cd3ad87f] arm: mm: drop VM_FAULT_BADMAP/VM_FAULT_BADACCESS git bisect bad 8f9d6a30dd992c44debea3161083a6c2cd3ad87f # bad: [cf0049a15207a5a78798105eff789c2025bcf652] arm64: mm: drop VM_FAULT_BADMAP/VM_FAULT_BADACCESS git bisect bad cf0049a15207a5a78798105eff789c2025bcf652 # first bad commit: [cf0049a15207a5a78798105eff789c2025bcf652] arm64: mm: drop VM_FAULT_BADMAP/VM_FAULT_BADACCESSgit bisect log Thanks, Aishwarya
On 2024/4/10 19:24, Aishwarya TCV wrote: > > > On 07/04/2024 09:12, Kefeng Wang wrote: >> If bad map or access, directly set si_code to SEGV_MAPRR or SEGV_ACCERR, >> also set fault to 0 and goto error handling, which make us to drop the >> arch's special vm fault reason. >> >> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> >> --- >> arch/arm64/mm/fault.c | 41 ++++++++++++++++++----------------------- >> 1 file changed, 18 insertions(+), 23 deletions(-) > > Hi Kefeng, > > While running LTP test suite, I observed that few test cases are unable > to kill exe when run against next-master(next-20240409) kernel with > Arm64 on JUNO in our CI. I can send the full logs if required, but it > doesn't say much. Sorry about it, as Catalin pointed, there is issue in this patch, and I replied today, could you help to test with following changes, many thanks. diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 61a2acae0dca..451ba7cbd5ad 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -604,6 +604,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, } if (!(vma->vm_flags & vm_flags)) { + mmap_read_unlock(mm); fault = 0; si_code = SEGV_ACCERR; goto bad_area; @@ -632,6 +633,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, if (likely(!(fault & VM_FAULT_ERROR))) return 0; + si_code = SEGV_MAPERR; bad_area: /* * If we are in kernel mode at this point, we have no context to > > A bisect identified cf0049a15207a5a78798105eff789c2025bcf652 as the > first bad commit. Bisected it on the tag "next-20240409" at repo > "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git". > > This works fine on Linux version v6.9-rc3 > > Here are some of the failing test cases in LTP quickhit: > ----------------------------- > chdir01 > link04 > select03 > unlink07 > > > Failure log: > ----------- > 06:06:49.288908 Test timeouted, sending SIGKILL! > 06:06:54.293806 Test timeouted, sending SIGKILL! > 06:06:59.296872 Test timeouted, sending SIGKILL! > 06:07:04.292044 Test timeouted, sending SIGKILL! > 06:07:09.290185 Test timeouted, sending SIGKILL! > 06:07:14.295134 Test timeouted, sending SIGKILL! > 06:07:19.293279 Test timeouted, sending SIGKILL! > 06:07:24.292405 Test timeouted, sending SIGKILL! > 06:07:24.292790 Cannot kill test processes! > 06:07:24.296564 Congratulation, likely test hit a kernel bug. > 06:07:24.301315 Exiting uncleanly... > > > Bisect log: > ---------- > git bisect start > # good: [fec50db7033ea478773b159e0e2efb135270e3b7] Linux 6.9-rc3 > git bisect good fec50db7033ea478773b159e0e2efb135270e3b7 > # bad: [a053fd3ca5d1b927a8655f239c84b0d790218fda] Add linux-next > specific files for 20240409 > git bisect bad a053fd3ca5d1b927a8655f239c84b0d790218fda > # bad: [4eb0063b031ea720cd8971e3e3d2426d27c5d7a6] Merge branch > 'mtd/next' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git > git bisect bad 4eb0063b031ea720cd8971e3e3d2426d27c5d7a6 > # bad: [24dde11bfadd5f38c6cca3cea6f16971bd10dc86] Merge branch > 'for-next' of git://github.com/Xilinx/linux-xlnx.git > git bisect bad 24dde11bfadd5f38c6cca3cea6f16971bd10dc86 > # bad: [bef23348d7e75c502399ba1a24627aa447b816dc] Merge branch > 'mm-everything' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm > git bisect bad bef23348d7e75c502399ba1a24627aa447b816dc > # good: [3317f7faabc24b500c26d02615ac75ca2786e272] mm: swap: > free_swap_and_cache_nr() as batched free_swap_and_cache() > git bisect good 3317f7faabc24b500c26d02615ac75ca2786e272 > # good: [541970e62546ff5c96622669f2796d43b1a406e3] Merge branch > 'gpio/for-current' of > git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git > git bisect good 541970e62546ff5c96622669f2796d43b1a406e3 > # bad: [1df265717e0f9d96079073189f6e6c52a48e493c] > __mod_memcg_lruvec_state-enhance-diagnostics-fix > git bisect bad 1df265717e0f9d96079073189f6e6c52a48e493c > # bad: [19ab4054346474c2b456f9bff6a98e41e5e46224] hugetlb: Simplify > hugetlb_wp() arguments > git bisect bad 19ab4054346474c2b456f9bff6a98e41e5e46224 > # good: [75ef450aa9828ca20817c46c5b99f131156f6eee] x86: mm: accelerate > pagefault when badaccess > git bisect good 75ef450aa9828ca20817c46c5b99f131156f6eee > # bad: [c6b5a19e679bb713efd57d82f9080f780e0bb60a] mm/treewide: rename > CONFIG_HAVE_FAST_GUP to CONFIG_HAVE_GUP_FAST > git bisect bad c6b5a19e679bb713efd57d82f9080f780e0bb60a > # bad: [01446d1e9e627945664f2a1daa53e8720946d0cd] mm: remove struct page > from get_shadow_from_swap_cache > git bisect bad 01446d1e9e627945664f2a1daa53e8720946d0cd > # bad: [8f9d6a30dd992c44debea3161083a6c2cd3ad87f] arm: mm: drop > VM_FAULT_BADMAP/VM_FAULT_BADACCESS > git bisect bad 8f9d6a30dd992c44debea3161083a6c2cd3ad87f > # bad: [cf0049a15207a5a78798105eff789c2025bcf652] arm64: mm: drop > VM_FAULT_BADMAP/VM_FAULT_BADACCESS > git bisect bad cf0049a15207a5a78798105eff789c2025bcf652 > # first bad commit: [cf0049a15207a5a78798105eff789c2025bcf652] arm64: > mm: drop VM_FAULT_BADMAP/VM_FAULT_BADACCESSgit bisect log > > > Thanks, > Aishwarya
On Wed, Apr 10, 2024 at 07:53:21PM +0800, Kefeng Wang wrote: > > > On 2024/4/10 19:24, Aishwarya TCV wrote: > > Hi, > > > > On 07/04/2024 09:12, Kefeng Wang wrote: > > > If bad map or access, directly set si_code to SEGV_MAPRR or SEGV_ACCERR, > > > also set fault to 0 and goto error handling, which make us to drop the > > > arch's special vm fault reason. > > > > > > Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> > > > --- > > > arch/arm64/mm/fault.c | 41 ++++++++++++++++++----------------------- > > > 1 file changed, 18 insertions(+), 23 deletions(-) > > > > Hi Kefeng, > > > > While running LTP test suite, I observed that few test cases are unable > > to kill exe when run against next-master(next-20240409) kernel with > > Arm64 on JUNO in our CI. I can send the full logs if required, but it > > doesn't say much. > > Sorry about it, as Catalin pointed, there is issue in this patch, and > I replied today, could you help to test with following changes, many thanks. > > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > index 61a2acae0dca..451ba7cbd5ad 100644 > --- a/arch/arm64/mm/fault.c > +++ b/arch/arm64/mm/fault.c > @@ -604,6 +604,7 @@ static int __kprobes do_page_fault(unsigned long far, > unsigned long esr, > } > > if (!(vma->vm_flags & vm_flags)) { > + mmap_read_unlock(mm); > fault = 0; > si_code = SEGV_ACCERR; > goto bad_area; > @@ -632,6 +633,7 @@ static int __kprobes do_page_fault(unsigned long far, > unsigned long esr, > if (likely(!(fault & VM_FAULT_ERROR))) > return 0; > > + si_code = SEGV_MAPERR; > bad_area: > /* > * If we are in kernel mode at this point, we have no context to > On my JUNO setup, the above patch solves the 'un-killable tests' issues and all the previously failing tests passes. (I have NOT run the full LTP test-suite, though, only replayed the tests that were failing before as mentioned by Aishwarya) Thanks, Cristian
On 2024/4/10 20:39, Cristian Marussi wrote: > On Wed, Apr 10, 2024 at 07:53:21PM +0800, Kefeng Wang wrote: >> >> >> On 2024/4/10 19:24, Aishwarya TCV wrote: >>> > > Hi, > >>> >>> On 07/04/2024 09:12, Kefeng Wang wrote: >>>> If bad map or access, directly set si_code to SEGV_MAPRR or SEGV_ACCERR, >>>> also set fault to 0 and goto error handling, which make us to drop the >>>> arch's special vm fault reason. >>>> >>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> >>>> --- >>>> arch/arm64/mm/fault.c | 41 ++++++++++++++++++----------------------- >>>> 1 file changed, 18 insertions(+), 23 deletions(-) >>> >>> Hi Kefeng, >>> >>> While running LTP test suite, I observed that few test cases are unable >>> to kill exe when run against next-master(next-20240409) kernel with >>> Arm64 on JUNO in our CI. I can send the full logs if required, but it >>> doesn't say much. >> >> Sorry about it, as Catalin pointed, there is issue in this patch, and >> I replied today, could you help to test with following changes, many thanks. >> >> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c >> index 61a2acae0dca..451ba7cbd5ad 100644 >> --- a/arch/arm64/mm/fault.c >> +++ b/arch/arm64/mm/fault.c >> @@ -604,6 +604,7 @@ static int __kprobes do_page_fault(unsigned long far, >> unsigned long esr, >> } >> >> if (!(vma->vm_flags & vm_flags)) { >> + mmap_read_unlock(mm); >> fault = 0; >> si_code = SEGV_ACCERR; >> goto bad_area; >> @@ -632,6 +633,7 @@ static int __kprobes do_page_fault(unsigned long far, >> unsigned long esr, >> if (likely(!(fault & VM_FAULT_ERROR))) >> return 0; >> >> + si_code = SEGV_MAPERR; >> bad_area: >> /* >> * If we are in kernel mode at this point, we have no context to >> > > On my JUNO setup, the above patch solves the 'un-killable tests' issues and > all the previously failing tests passes. (I have NOT run the full LTP > test-suite, though, only replayed the tests that were failing before as > mentioned by Aishwarya) Thanks, I will resend v2, but let's wait for Catalin to see if there is any other opinion. > > Thanks, > Cristian
On Wed, 10 Apr 2024 20:48:28 +0800 Kefeng Wang <wangkefeng.wang@huawei.com> wrote: > > On my JUNO setup, the above patch solves the 'un-killable tests' issues and > > all the previously failing tests passes. (I have NOT run the full LTP > > test-suite, though, only replayed the tests that were failing before as > > mentioned by Aishwarya) > > Thanks, I will resend v2, but let's wait for Catalin to see if there is > any other opinion. Thanks, I dropped v1.
On Wed, Apr 10, 2024 at 06:58:27PM +0800, Kefeng Wang wrote: > On 2024/4/10 9:30, Kefeng Wang wrote: > > On 2024/4/9 22:28, Catalin Marinas wrote: > > > On Sun, Apr 07, 2024 at 04:12:10PM +0800, Kefeng Wang wrote: > > > > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > > > > index 405f9aa831bd..61a2acae0dca 100644 > > > > --- a/arch/arm64/mm/fault.c > > > > +++ b/arch/arm64/mm/fault.c > > > > @@ -500,9 +500,6 @@ static bool is_write_abort(unsigned long esr) > > > > return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); > > > > } > > > > -#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) > > > > -#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) > > > > - > > > > static int __kprobes do_page_fault(unsigned long far, unsigned > > > > long esr, > > > > struct pt_regs *regs) > > > > { > > > > @@ -513,6 +510,7 @@ static int __kprobes do_page_fault(unsigned > > > > long far, unsigned long esr, > > > > unsigned int mm_flags = FAULT_FLAG_DEFAULT; > > > > unsigned long addr = untagged_addr(far); > > > > struct vm_area_struct *vma; > > > > + int si_code; > > > > > > I think we should initialise this to 0. Currently all paths seem to set > > > si_code to something meaningful but I'm not sure the last 'else' close > > > in this patch is guaranteed to always cover exactly those earlier code > > > paths updating si_code. I'm not talking about the 'goto bad_area' paths > > > since they set 'fault' to 0 but the fall through after the second (under > > > the mm lock) handle_mm_fault(). [...] > > > > + fault = handle_mm_fault(vma, addr, mm_flags, regs); > > > > /* Quick path to respond to signals */ > > > > if (fault_signal_pending(fault, regs)) { > > > > if (!user_mode(regs)) > > > > @@ -626,13 +628,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > > > > mmap_read_unlock(mm); > > > > done: > > > > - /* > > > > - * Handle the "normal" (no error) case first. > > > > - */ > > > > - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | > > > > - VM_FAULT_BADACCESS)))) > > > > + /* Handle the "normal" (no error) case first. */ > > > > + if (likely(!(fault & VM_FAULT_ERROR))) > > > > return 0; > > Another choice, we set si_code = SEGV_MAPERR here, since normal > pagefault don't use si_code, only the error patch need to initialize. Yes, I think initialising it here would be fine. That's the fall-through case I was concerned about. All the other goto bad_area places already initialise si_code.
On 2024/4/11 17:59, Catalin Marinas wrote: > On Wed, Apr 10, 2024 at 06:58:27PM +0800, Kefeng Wang wrote: >> On 2024/4/10 9:30, Kefeng Wang wrote: >>> On 2024/4/9 22:28, Catalin Marinas wrote: >>>> On Sun, Apr 07, 2024 at 04:12:10PM +0800, Kefeng Wang wrote: >>>>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c >>>>> index 405f9aa831bd..61a2acae0dca 100644 >>>>> --- a/arch/arm64/mm/fault.c >>>>> +++ b/arch/arm64/mm/fault.c >>>>> @@ -500,9 +500,6 @@ static bool is_write_abort(unsigned long esr) >>>>> return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); >>>>> } >>>>> -#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) >>>>> -#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) >>>>> - >>>>> static int __kprobes do_page_fault(unsigned long far, unsigned >>>>> long esr, >>>>> struct pt_regs *regs) >>>>> { >>>>> @@ -513,6 +510,7 @@ static int __kprobes do_page_fault(unsigned >>>>> long far, unsigned long esr, >>>>> unsigned int mm_flags = FAULT_FLAG_DEFAULT; >>>>> unsigned long addr = untagged_addr(far); >>>>> struct vm_area_struct *vma; >>>>> + int si_code; >>>> >>>> I think we should initialise this to 0. Currently all paths seem to set >>>> si_code to something meaningful but I'm not sure the last 'else' close >>>> in this patch is guaranteed to always cover exactly those earlier code >>>> paths updating si_code. I'm not talking about the 'goto bad_area' paths >>>> since they set 'fault' to 0 but the fall through after the second (under >>>> the mm lock) handle_mm_fault(). > [...] >>>>> + fault = handle_mm_fault(vma, addr, mm_flags, regs); >>>>> /* Quick path to respond to signals */ >>>>> if (fault_signal_pending(fault, regs)) { >>>>> if (!user_mode(regs)) >>>>> @@ -626,13 +628,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, >>>>> mmap_read_unlock(mm); >>>>> done: >>>>> - /* >>>>> - * Handle the "normal" (no error) case first. >>>>> - */ >>>>> - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | >>>>> - VM_FAULT_BADACCESS)))) >>>>> + /* Handle the "normal" (no error) case first. */ >>>>> + if (likely(!(fault & VM_FAULT_ERROR))) >>>>> return 0; >> >> Another choice, we set si_code = SEGV_MAPERR here, since normal >> pagefault don't use si_code, only the error patch need to initialize. > > Yes, I think initialising it here would be fine. That's the fall-through > case I was concerned about. All the other goto bad_area places already > initialise si_code. > Thanks for your confirm, will send v2 soon.
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 405f9aa831bd..61a2acae0dca 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -500,9 +500,6 @@ static bool is_write_abort(unsigned long esr) return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); } -#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) -#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) - static int __kprobes do_page_fault(unsigned long far, unsigned long esr, struct pt_regs *regs) { @@ -513,6 +510,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, unsigned int mm_flags = FAULT_FLAG_DEFAULT; unsigned long addr = untagged_addr(far); struct vm_area_struct *vma; + int si_code; if (kprobe_page_fault(regs, esr)) return 0; @@ -572,9 +570,10 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, if (!(vma->vm_flags & vm_flags)) { vma_end_read(vma); - fault = VM_FAULT_BADACCESS; + fault = 0; + si_code = SEGV_ACCERR; count_vm_vma_lock_event(VMA_LOCK_SUCCESS); - goto done; + goto bad_area; } fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs); if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) @@ -599,15 +598,18 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, retry: vma = lock_mm_and_find_vma(mm, addr, regs); if (unlikely(!vma)) { - fault = VM_FAULT_BADMAP; - goto done; + fault = 0; + si_code = SEGV_MAPERR; + goto bad_area; } - if (!(vma->vm_flags & vm_flags)) - fault = VM_FAULT_BADACCESS; - else - fault = handle_mm_fault(vma, addr, mm_flags, regs); + if (!(vma->vm_flags & vm_flags)) { + fault = 0; + si_code = SEGV_ACCERR; + goto bad_area; + } + fault = handle_mm_fault(vma, addr, mm_flags, regs); /* Quick path to respond to signals */ if (fault_signal_pending(fault, regs)) { if (!user_mode(regs)) @@ -626,13 +628,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, mmap_read_unlock(mm); done: - /* - * Handle the "normal" (no error) case first. - */ - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | - VM_FAULT_BADACCESS)))) + /* Handle the "normal" (no error) case first. */ + if (likely(!(fault & VM_FAULT_ERROR))) return 0; +bad_area: /* * If we are in kernel mode at this point, we have no context to * handle this fault with. @@ -667,13 +667,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name); } else { - /* - * Something tried to access memory that isn't in our memory - * map. - */ - arm64_force_sig_fault(SIGSEGV, - fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR, - far, inf->name); + /* Something tried to access memory that out of memory map */ + arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); } return 0;
If bad map or access, directly set si_code to SEGV_MAPRR or SEGV_ACCERR, also set fault to 0 and goto error handling, which make us to drop the arch's special vm fault reason. Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> --- arch/arm64/mm/fault.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-)