Message ID | 20230227173632.3292573-30-surenb@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Per-VMA locks | expand |
Hi, On 27. 02. 23, 18:36, Suren Baghdasaryan wrote: > Attempt VMA lock-based page fault handling first, and fall back to the > existing mmap_lock-based handling if that fails. > > Signed-off-by: Suren Baghdasaryan <surenb@google.com> > --- > arch/x86/Kconfig | 1 + > arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++ > 2 files changed, 37 insertions(+) > > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig > index a825bf031f49..df21fba77db1 100644 > --- a/arch/x86/Kconfig > +++ b/arch/x86/Kconfig > @@ -27,6 +27,7 @@ config X86_64 > # Options that are inherently 64-bit kernel only: > select ARCH_HAS_GIGANTIC_PAGE > select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 > + select ARCH_SUPPORTS_PER_VMA_LOCK > select ARCH_USE_CMPXCHG_LOCKREF > select HAVE_ARCH_SOFT_DIRTY > select MODULES_USE_ELF_RELA > diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c > index a498ae1fbe66..e4399983c50c 100644 > --- a/arch/x86/mm/fault.c > +++ b/arch/x86/mm/fault.c > @@ -19,6 +19,7 @@ > #include <linux/uaccess.h> /* faulthandler_disabled() */ > #include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/ > #include <linux/mm_types.h> > +#include <linux/mm.h> /* find_and_lock_vma() */ > > #include <asm/cpufeature.h> /* boot_cpu_has, ... */ > #include <asm/traps.h> /* dotraplinkage, ... */ > @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs, > } > #endif > > +#ifdef CONFIG_PER_VMA_LOCK > + if (!(flags & FAULT_FLAG_USER)) > + goto lock_mmap; > + > + vma = lock_vma_under_rcu(mm, address); > + if (!vma) > + goto lock_mmap; > + > + if (unlikely(access_error(error_code, vma))) { > + vma_end_read(vma); > + goto lock_mmap; > + } > + fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); > + vma_end_read(vma); > + > + if (!(fault & VM_FAULT_RETRY)) { > + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); > + goto done; > + } > + count_vm_vma_lock_event(VMA_LOCK_RETRY); This is apparently not strong enough as it causes go build failures like: [ 409s] strconv [ 409s] releasep: m=0x579e2000 m->p=0x5781c600 p->m=0x0 p->status=2 [ 409s] fatal error: releasep: invalid p state [ 409s] [ 325s] hash/adler32 [ 325s] hash/crc32 [ 325s] cmd/internal/codesign [ 336s] fatal error: runtime: out of memory There are many kinds of similar errors. It happens in 1-3 out of 20 builds only. If I revert the commit on top of 6.4, they all dismiss. Any idea? The downstream report: https://bugzilla.suse.com/show_bug.cgi?id=1212775 > + > + /* Quick path to respond to signals */ > + if (fault_signal_pending(fault, regs)) { > + if (!user_mode(regs)) > + kernelmode_fixup_or_oops(regs, error_code, address, > + SIGBUS, BUS_ADRERR, > + ARCH_DEFAULT_PKEY); > + return; > + } > +lock_mmap: > +#endif /* CONFIG_PER_VMA_LOCK */ > + > /* > * Kernel-mode access to the user address space should only occur > * on well-defined single instructions listed in the exception > @@ -1433,6 +1466,9 @@ void do_user_addr_fault(struct pt_regs *regs, > } > > mmap_read_unlock(mm); > +#ifdef CONFIG_PER_VMA_LOCK > +done: > +#endif > if (likely(!(fault & VM_FAULT_ERROR))) > return; > thanks,
On Thu, Jun 29, 2023 at 7:40 AM Jiri Slaby <jirislaby@kernel.org> wrote: > > Hi, > > On 27. 02. 23, 18:36, Suren Baghdasaryan wrote: > > Attempt VMA lock-based page fault handling first, and fall back to the > > existing mmap_lock-based handling if that fails. > > > > Signed-off-by: Suren Baghdasaryan <surenb@google.com> > > --- > > arch/x86/Kconfig | 1 + > > arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++ > > 2 files changed, 37 insertions(+) > > > > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig > > index a825bf031f49..df21fba77db1 100644 > > --- a/arch/x86/Kconfig > > +++ b/arch/x86/Kconfig > > @@ -27,6 +27,7 @@ config X86_64 > > # Options that are inherently 64-bit kernel only: > > select ARCH_HAS_GIGANTIC_PAGE > > select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 > > + select ARCH_SUPPORTS_PER_VMA_LOCK > > select ARCH_USE_CMPXCHG_LOCKREF > > select HAVE_ARCH_SOFT_DIRTY > > select MODULES_USE_ELF_RELA > > diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c > > index a498ae1fbe66..e4399983c50c 100644 > > --- a/arch/x86/mm/fault.c > > +++ b/arch/x86/mm/fault.c > > @@ -19,6 +19,7 @@ > > #include <linux/uaccess.h> /* faulthandler_disabled() */ > > #include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/ > > #include <linux/mm_types.h> > > +#include <linux/mm.h> /* find_and_lock_vma() */ > > > > #include <asm/cpufeature.h> /* boot_cpu_has, ... */ > > #include <asm/traps.h> /* dotraplinkage, ... */ > > @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs, > > } > > #endif > > > > +#ifdef CONFIG_PER_VMA_LOCK > > + if (!(flags & FAULT_FLAG_USER)) > > + goto lock_mmap; > > + > > + vma = lock_vma_under_rcu(mm, address); > > + if (!vma) > > + goto lock_mmap; > > + > > + if (unlikely(access_error(error_code, vma))) { > > + vma_end_read(vma); > > + goto lock_mmap; > > + } > > + fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); > > + vma_end_read(vma); > > + > > + if (!(fault & VM_FAULT_RETRY)) { > > + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); > > + goto done; > > + } > > + count_vm_vma_lock_event(VMA_LOCK_RETRY); > > This is apparently not strong enough as it causes go build failures like: > > [ 409s] strconv > [ 409s] releasep: m=0x579e2000 m->p=0x5781c600 p->m=0x0 p->status=2 > [ 409s] fatal error: releasep: invalid p state > [ 409s] > > [ 325s] hash/adler32 > [ 325s] hash/crc32 > [ 325s] cmd/internal/codesign > [ 336s] fatal error: runtime: out of memory Hi Jiri, Thanks for reporting! I'm not familiar with go builds. Could you please explain the error to me or point me to some documentation to decipher that error? Thanks, Suren. > > There are many kinds of similar errors. It happens in 1-3 out of 20 > builds only. > > If I revert the commit on top of 6.4, they all dismiss. Any idea? > > The downstream report: > https://bugzilla.suse.com/show_bug.cgi?id=1212775 > > > + > > + /* Quick path to respond to signals */ > > + if (fault_signal_pending(fault, regs)) { > > + if (!user_mode(regs)) > > + kernelmode_fixup_or_oops(regs, error_code, address, > > + SIGBUS, BUS_ADRERR, > > + ARCH_DEFAULT_PKEY); > > + return; > > + } > > +lock_mmap: > > +#endif /* CONFIG_PER_VMA_LOCK */ > > + > > /* > > * Kernel-mode access to the user address space should only occur > > * on well-defined single instructions listed in the exception > > @@ -1433,6 +1466,9 @@ void do_user_addr_fault(struct pt_regs *regs, > > } > > > > mmap_read_unlock(mm); > > +#ifdef CONFIG_PER_VMA_LOCK > > +done: > > +#endif > > if (likely(!(fault & VM_FAULT_ERROR))) > > return; > > > > thanks, > -- > js > suse labs >
[CCing the regression list, as it should be in the loop for regressions: https://docs.kernel.org/admin-guide/reporting-regressions.html] On 29.06.23 16:40, Jiri Slaby wrote: > > On 27. 02. 23, 18:36, Suren Baghdasaryan wrote: >> Attempt VMA lock-based page fault handling first, and fall back to the >> existing mmap_lock-based handling if that fails. > [...] >> + fault = handle_mm_fault(vma, address, flags | >> FAULT_FLAG_VMA_LOCK, regs); >> + vma_end_read(vma); >> + >> + if (!(fault & VM_FAULT_RETRY)) { >> + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); >> + goto done; >> + } >> + count_vm_vma_lock_event(VMA_LOCK_RETRY); > > This is apparently not strong enough as it causes go build failures like: > > [ 409s] strconv > [ 409s] releasep: m=0x579e2000 m->p=0x5781c600 p->m=0x0 p->status=2 > [ 409s] fatal error: releasep: invalid p state > [ 409s] > > [ 325s] hash/adler32 > [ 325s] hash/crc32 > [ 325s] cmd/internal/codesign > [ 336s] fatal error: runtime: out of memory > > There are many kinds of similar errors. It happens in 1-3 out of 20 > builds only. > > If I revert the commit on top of 6.4, they all dismiss. Any idea? > > The downstream report: > https://bugzilla.suse.com/show_bug.cgi?id=1212775 > [...] Thanks for the report. To be sure the issue doesn't fall through the cracks unnoticed, I'm adding it to regzbot, the Linux kernel regression tracking bot: #regzbot ^introduced 0bff0aaea03e2a3ed6bfa3021 https://bugzilla.suse.com/show_bug.cgi?id=1212775 #regzbot title mm: failures when building go in 1-3 out of 20 builds #regzbot ignore-activity Developers: When fixing the issue, remember to add 'Link:' tags pointing to the report (the parent of this mail). See page linked in footer for details. Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat) -- Everything you wanna know about Linux kernel regression tracking: https://linux-regtracking.leemhuis.info/about/#tldr That page also explains what to do if mails like this annoy you.
On 29. 06. 23, 17:30, Suren Baghdasaryan wrote: > On Thu, Jun 29, 2023 at 7:40 AM Jiri Slaby <jirislaby@kernel.org> wrote: >> >> Hi, >> >> On 27. 02. 23, 18:36, Suren Baghdasaryan wrote: >>> Attempt VMA lock-based page fault handling first, and fall back to the >>> existing mmap_lock-based handling if that fails. >>> >>> Signed-off-by: Suren Baghdasaryan <surenb@google.com> >>> --- >>> arch/x86/Kconfig | 1 + >>> arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++ >>> 2 files changed, 37 insertions(+) >>> >>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig >>> index a825bf031f49..df21fba77db1 100644 >>> --- a/arch/x86/Kconfig >>> +++ b/arch/x86/Kconfig >>> @@ -27,6 +27,7 @@ config X86_64 >>> # Options that are inherently 64-bit kernel only: >>> select ARCH_HAS_GIGANTIC_PAGE >>> select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 >>> + select ARCH_SUPPORTS_PER_VMA_LOCK >>> select ARCH_USE_CMPXCHG_LOCKREF >>> select HAVE_ARCH_SOFT_DIRTY >>> select MODULES_USE_ELF_RELA >>> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c >>> index a498ae1fbe66..e4399983c50c 100644 >>> --- a/arch/x86/mm/fault.c >>> +++ b/arch/x86/mm/fault.c >>> @@ -19,6 +19,7 @@ >>> #include <linux/uaccess.h> /* faulthandler_disabled() */ >>> #include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/ >>> #include <linux/mm_types.h> >>> +#include <linux/mm.h> /* find_and_lock_vma() */ >>> >>> #include <asm/cpufeature.h> /* boot_cpu_has, ... */ >>> #include <asm/traps.h> /* dotraplinkage, ... */ >>> @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs, >>> } >>> #endif >>> >>> +#ifdef CONFIG_PER_VMA_LOCK >>> + if (!(flags & FAULT_FLAG_USER)) >>> + goto lock_mmap; >>> + >>> + vma = lock_vma_under_rcu(mm, address); >>> + if (!vma) >>> + goto lock_mmap; >>> + >>> + if (unlikely(access_error(error_code, vma))) { >>> + vma_end_read(vma); >>> + goto lock_mmap; >>> + } >>> + fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); >>> + vma_end_read(vma); >>> + >>> + if (!(fault & VM_FAULT_RETRY)) { >>> + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); >>> + goto done; >>> + } >>> + count_vm_vma_lock_event(VMA_LOCK_RETRY); >> >> This is apparently not strong enough as it causes go build failures like: >> >> [ 409s] strconv >> [ 409s] releasep: m=0x579e2000 m->p=0x5781c600 p->m=0x0 p->status=2 >> [ 409s] fatal error: releasep: invalid p state >> [ 409s] >> >> [ 325s] hash/adler32 >> [ 325s] hash/crc32 >> [ 325s] cmd/internal/codesign >> [ 336s] fatal error: runtime: out of memory > > Hi Jiri, > Thanks for reporting! I'm not familiar with go builds. Could you > please explain the error to me or point me to some documentation to > decipher that error? Sorry, we are on the same boat -- me neither. It only popped up in our (openSUSE) build system and I only tracked it down by bisection. Let me know if I can try something (like a patch or gathering some debug info). thanks,
On 30. 06. 23, 8:35, Jiri Slaby wrote: > On 29. 06. 23, 17:30, Suren Baghdasaryan wrote: >> On Thu, Jun 29, 2023 at 7:40 AM Jiri Slaby <jirislaby@kernel.org> wrote: >>> >>> Hi, >>> >>> On 27. 02. 23, 18:36, Suren Baghdasaryan wrote: >>>> Attempt VMA lock-based page fault handling first, and fall back to the >>>> existing mmap_lock-based handling if that fails. >>>> >>>> Signed-off-by: Suren Baghdasaryan <surenb@google.com> >>>> --- >>>> arch/x86/Kconfig | 1 + >>>> arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++ >>>> 2 files changed, 37 insertions(+) >>>> >>>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig >>>> index a825bf031f49..df21fba77db1 100644 >>>> --- a/arch/x86/Kconfig >>>> +++ b/arch/x86/Kconfig >>>> @@ -27,6 +27,7 @@ config X86_64 >>>> # Options that are inherently 64-bit kernel only: >>>> select ARCH_HAS_GIGANTIC_PAGE >>>> select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 >>>> + select ARCH_SUPPORTS_PER_VMA_LOCK >>>> select ARCH_USE_CMPXCHG_LOCKREF >>>> select HAVE_ARCH_SOFT_DIRTY >>>> select MODULES_USE_ELF_RELA >>>> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c >>>> index a498ae1fbe66..e4399983c50c 100644 >>>> --- a/arch/x86/mm/fault.c >>>> +++ b/arch/x86/mm/fault.c >>>> @@ -19,6 +19,7 @@ >>>> #include <linux/uaccess.h> /* >>>> faulthandler_disabled() */ >>>> #include <linux/efi.h> /* >>>> efi_crash_gracefully_on_page_fault()*/ >>>> #include <linux/mm_types.h> >>>> +#include <linux/mm.h> /* find_and_lock_vma() */ >>>> >>>> #include <asm/cpufeature.h> /* boot_cpu_has, >>>> ... */ >>>> #include <asm/traps.h> /* dotraplinkage, >>>> ... */ >>>> @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs, >>>> } >>>> #endif >>>> >>>> +#ifdef CONFIG_PER_VMA_LOCK >>>> + if (!(flags & FAULT_FLAG_USER)) >>>> + goto lock_mmap; >>>> + >>>> + vma = lock_vma_under_rcu(mm, address); >>>> + if (!vma) >>>> + goto lock_mmap; >>>> + >>>> + if (unlikely(access_error(error_code, vma))) { >>>> + vma_end_read(vma); >>>> + goto lock_mmap; >>>> + } >>>> + fault = handle_mm_fault(vma, address, flags | >>>> FAULT_FLAG_VMA_LOCK, regs); >>>> + vma_end_read(vma); >>>> + >>>> + if (!(fault & VM_FAULT_RETRY)) { >>>> + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); >>>> + goto done; >>>> + } >>>> + count_vm_vma_lock_event(VMA_LOCK_RETRY); >>> >>> This is apparently not strong enough as it causes go build failures >>> like: >>> >>> [ 409s] strconv >>> [ 409s] releasep: m=0x579e2000 m->p=0x5781c600 p->m=0x0 p->status=2 >>> [ 409s] fatal error: releasep: invalid p state >>> [ 409s] >>> >>> [ 325s] hash/adler32 >>> [ 325s] hash/crc32 >>> [ 325s] cmd/internal/codesign >>> [ 336s] fatal error: runtime: out of memory >> >> Hi Jiri, >> Thanks for reporting! I'm not familiar with go builds. Could you >> please explain the error to me or point me to some documentation to >> decipher that error? > > Sorry, we are on the same boat -- me neither. It only popped up in our > (openSUSE) build system and I only tracked it down by bisection. Let me > know if I can try something (like a patch or gathering some debug info). FWIW, a failed build log: https://decibel.fi.muni.cz/~xslaby/n/vma/log.txt and a strace for it: https://decibel.fi.muni.cz/~xslaby/n/vma/strace.txt An excerpt from the log: [ 55s] runtime: marked free object in span 0x7fca6824bec8, elemsize=192 freeindex=0 (bad use of unsafe.Pointer? try -d=checkptr) [ 55s] 0xc0002f2000 alloc marked [ 55s] 0xc0002f20c0 alloc marked [ 55s] 0xc0002f2180 alloc marked [ 55s] 0xc0002f2240 free unmarked [ 55s] 0xc0002f2300 alloc marked [ 55s] 0xc0002f23c0 alloc marked [ 55s] 0xc0002f2480 alloc marked [ 55s] 0xc0002f2540 alloc marked [ 55s] 0xc0002f2600 alloc marked [ 55s] 0xc0002f26c0 alloc marked [ 55s] 0xc0002f2780 alloc marked [ 55s] 0xc0002f2840 alloc marked [ 55s] 0xc0002f2900 alloc marked [ 55s] 0xc0002f29c0 free unmarked [ 55s] 0xc0002f2a80 alloc marked [ 55s] 0xc0002f2b40 alloc marked [ 55s] 0xc0002f2c00 alloc marked [ 55s] 0xc0002f2cc0 alloc marked [ 55s] 0xc0002f2d80 alloc marked [ 55s] 0xc0002f2e40 alloc marked [ 55s] 0xc0002f2f00 alloc marked [ 55s] 0xc0002f2fc0 alloc marked [ 55s] 0xc0002f3080 alloc marked [ 55s] 0xc0002f3140 alloc marked [ 55s] 0xc0002f3200 alloc marked [ 55s] 0xc0002f32c0 alloc marked [ 55s] 0xc0002f3380 free unmarked [ 55s] 0xc0002f3440 free marked zombie An excerpt from strace: > 2348 clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa6a1b990, parent_tid=0x7fcaa6a1b990, exit_signal=0, stack=0x7fcaa621b000, stack_size=0x7ffe00, tls=0x7fcaa6a1b6c0} => {parent_tid=[2350]}, 88) = 2350 > 2348 clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351 > 2350 <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372 > 2351 <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354 > 2351 <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357 > 2354 <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355 > 2355 <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370 > 2370 mmap(NULL, 262144, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> > 2370 <... mmap resumed>) = 0x7fca68249000 > 2372 <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384 > 2384 <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388 > 2388 <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392 > 2392 <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395 > 2395 write(2, "runtime: marked free object in s"..., 36 <unfinished ...> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some reason 0x7fca6824bec8 in that region is "bad". > thanks,--
On 30. 06. 23, 10:28, Jiri Slaby wrote: > > 2348 > clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351 > > 2350 <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372 > > 2351 <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354 > > 2351 <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357 > > 2354 <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355 > > 2355 <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370 > > 2370 mmap(NULL, 262144, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> > > 2370 <... mmap resumed>) = 0x7fca68249000 > > 2372 <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384 > > 2384 <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388 > > 2388 <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392 > > 2392 <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395 > > 2395 write(2, "runtime: marked free object in s"..., 36 <unfinished > ...> > > I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON > 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some > reason 0x7fca6824bec8 in that region is "bad". As I was noticed, this might be as well be a fail of the go's inter-thread communication (or alike) too. It might now be only more exposed with vma-based locks as we can do more parallelism now. There are older hard to reproduce bugs in go with similar symptoms (we see this error sometimes now too): https://github.com/golang/go/issues/15246 Or this 2016 bug is a red herring. Hard to tell... >> thanks,
On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote: > > On 30. 06. 23, 10:28, Jiri Slaby wrote: > > > 2348 > > clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351 > > > 2350 <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372 > > > 2351 <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354 > > > 2351 <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357 > > > 2354 <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355 > > > 2355 <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370 > > > 2370 mmap(NULL, 262144, PROT_READ|PROT_WRITE, > > MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> > > > 2370 <... mmap resumed>) = 0x7fca68249000 > > > 2372 <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384 > > > 2384 <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388 > > > 2388 <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392 > > > 2392 <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395 > > > 2395 write(2, "runtime: marked free object in s"..., 36 <unfinished > > ...> > > > > I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON > > 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some > > reason 0x7fca6824bec8 in that region is "bad". Thanks for the analysis Jiri. Is it possible from these logs to identify whether 2370 finished the mmap operation before 2395 tried to access 0x7fca6824bec8? That access has to happen only after mmap finishes mapping the region. > > As I was noticed, this might be as well be a fail of the go's > inter-thread communication (or alike) too. It might now be only more > exposed with vma-based locks as we can do more parallelism now. Yes, with multithreaded processes like these where threads are mapping and accessing memory areas, per-VMA locks should allow for greater parallelism. So, if there is a race like the one I asked above, it might become more pronounced with per-VMA locks. I'll double check the code, but from Kernel's POV mmap would take the mmap_lock for write then will lock the VMA lock for write. That should prevent any page fault handlers from accessing this VMA in parallel until writers release the locks. Page fault path would try to find the VMA without any lock and then will try to read-lock that VMA. If it fails it will fall back to mmap_lock. So, if the writer started first and obtained the VMA lock, the reader will fall back to mmap_lock and will block until the writer releases the mmap_lock. If the reader got VMA read lock first then the writer will block while obtaining the VMA's write lock. However for your scenario, the reader (page fault) might be getting here before the writer (mmap) and upon not finding the VMA it is looking for, it will fail. Please let me know if you can verify this scenario. Thanks, Suren. > > There are older hard to reproduce bugs in go with similar symptoms (we > see this error sometimes now too): > https://github.com/golang/go/issues/15246 > > Or this 2016 bug is a red herring. Hard to tell... > > >> thanks, > -- > js > suse labs >
On 29.06.23 16:40, Jiri Slaby wrote: > On 27. 02. 23, 18:36, Suren Baghdasaryan wrote: >> Attempt VMA lock-based page fault handling first, and fall back to the >> existing mmap_lock-based handling if that fails. >> >> Signed-off-by: Suren Baghdasaryan <surenb@google.com> >> --- >> arch/x86/Kconfig | 1 + >> arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++ >> 2 files changed, 37 insertions(+) >> >> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig >> index a825bf031f49..df21fba77db1 100644 >> --- a/arch/x86/Kconfig >> +++ b/arch/x86/Kconfig >> @@ -27,6 +27,7 @@ config X86_64 >> # Options that are inherently 64-bit kernel only: >> select ARCH_HAS_GIGANTIC_PAGE >> select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 >> + select ARCH_SUPPORTS_PER_VMA_LOCK >> select ARCH_USE_CMPXCHG_LOCKREF >> select HAVE_ARCH_SOFT_DIRTY >> select MODULES_USE_ELF_RELA >> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c >> index a498ae1fbe66..e4399983c50c 100644 >> --- a/arch/x86/mm/fault.c >> +++ b/arch/x86/mm/fault.c >> @@ -19,6 +19,7 @@ >> #include <linux/uaccess.h> /* faulthandler_disabled() */ >> #include <linux/efi.h> /* >> efi_crash_gracefully_on_page_fault()*/ >> #include <linux/mm_types.h> >> +#include <linux/mm.h> /* find_and_lock_vma() */ >> #include <asm/cpufeature.h> /* boot_cpu_has, ... */ >> #include <asm/traps.h> /* dotraplinkage, ... */ >> @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs, >> } >> #endif >> +#ifdef CONFIG_PER_VMA_LOCK >> + if (!(flags & FAULT_FLAG_USER)) >> + goto lock_mmap; >> + >> + vma = lock_vma_under_rcu(mm, address); >> + if (!vma) >> + goto lock_mmap; >> + >> + if (unlikely(access_error(error_code, vma))) { >> + vma_end_read(vma); >> + goto lock_mmap; >> + } >> + fault = handle_mm_fault(vma, address, flags | >> FAULT_FLAG_VMA_LOCK, regs); >> + vma_end_read(vma); >> + >> + if (!(fault & VM_FAULT_RETRY)) { >> + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); >> + goto done; >> + } >> + count_vm_vma_lock_event(VMA_LOCK_RETRY); > > This is apparently not strong enough as it causes go build failures like: TWIMC & for the record: there is another report about trouble caused by this change; for details see https://bugzilla.kernel.org/show_bug.cgi?id=217624 And a "forward to devs and lists" thread about that report: https://lore.kernel.org/all/facbfec3-837a-51ed-85fa-31021c17d6ef@gmail.com/ Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat) -- Everything you wanna know about Linux kernel regression tracking: https://linux-regtracking.leemhuis.info/about/#tldr If I did something stupid, please tell me, as explained on that page. > [ 409s] strconv > [ 409s] releasep: m=0x579e2000 m->p=0x5781c600 p->m=0x0 p->status=2 > [ 409s] fatal error: releasep: invalid p state > [ 409s] > > [ 325s] hash/adler32 > [ 325s] hash/crc32 > [ 325s] cmd/internal/codesign > [ 336s] fatal error: runtime: out of memory > > There are many kinds of similar errors. It happens in 1-3 out of 20 > builds only. > > If I revert the commit on top of 6.4, they all dismiss. Any idea? > > The downstream report: > https://bugzilla.suse.com/show_bug.cgi?id=1212775 > >> + >> + /* Quick path to respond to signals */ >> + if (fault_signal_pending(fault, regs)) { >> + if (!user_mode(regs)) >> + kernelmode_fixup_or_oops(regs, error_code, address, >> + SIGBUS, BUS_ADRERR, >> + ARCH_DEFAULT_PKEY); >> + return; >> + } >> +lock_mmap: >> +#endif /* CONFIG_PER_VMA_LOCK */ >> + >> /* >> * Kernel-mode access to the user address space should only occur >> * on well-defined single instructions listed in the exception >> @@ -1433,6 +1466,9 @@ void do_user_addr_fault(struct pt_regs *regs, >> } >> mmap_read_unlock(mm); >> +#ifdef CONFIG_PER_VMA_LOCK >> +done: >> +#endif >> if (likely(!(fault & VM_FAULT_ERROR))) >> return; >> > > thanks,
Cc Jacob Young (from kernel bugzilla) On 30. 06. 23, 19:40, Suren Baghdasaryan wrote: > On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote: >> >> On 30. 06. 23, 10:28, Jiri Slaby wrote: >>> > 2348 >>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351 >>> > 2350 <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372 >>> > 2351 <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354 >>> > 2351 <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357 >>> > 2354 <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355 >>> > 2355 <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370 >>> > 2370 mmap(NULL, 262144, PROT_READ|PROT_WRITE, >>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> >>> > 2370 <... mmap resumed>) = 0x7fca68249000 >>> > 2372 <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384 >>> > 2384 <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388 >>> > 2388 <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392 >>> > 2392 <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395 >>> > 2395 write(2, "runtime: marked free object in s"..., 36 <unfinished >>> ...> >>> >>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON >>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some >>> reason 0x7fca6824bec8 in that region is "bad". > > Thanks for the analysis Jiri. > Is it possible from these logs to identify whether 2370 finished the > mmap operation before 2395 tried to access 0x7fca6824bec8? That access > has to happen only after mmap finishes mapping the region. Hi, it's hard to tell, but I assume so. For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in: https://bugzilla.kernel.org/show_bug.cgi?id=217624 ;) FWIW, I can reproduce using the test case too. thanks,
On 2023-07-03 12:47, Jiri Slaby wrote: > Cc Jacob Young (from kernel bugzilla) > > On 30. 06. 23, 19:40, Suren Baghdasaryan wrote: >> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote: >>> >>> On 30. 06. 23, 10:28, Jiri Slaby wrote: >>>> > 2348 >>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351 >>>> > 2350 <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372 >>>> > 2351 <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354 >>>> > 2351 <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357 >>>> > 2354 <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355 >>>> > 2355 <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370 >>>> > 2370 mmap(NULL, 262144, PROT_READ|PROT_WRITE, >>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> >>>> > 2370 <... mmap resumed>) = 0x7fca68249000 >>>> > 2372 <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384 >>>> > 2384 <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388 >>>> > 2388 <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392 >>>> > 2392 <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395 >>>> > 2395 write(2, "runtime: marked free object in s"..., 36 <unfinished >>>> ...> >>>> >>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON >>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some >>>> reason 0x7fca6824bec8 in that region is "bad". >> >> Thanks for the analysis Jiri. >> Is it possible from these logs to identify whether 2370 finished the >> mmap operation before 2395 tried to access 0x7fca6824bec8? That access >> has to happen only after mmap finishes mapping the region. > > Hi, > > it's hard to tell, but I assume so. > > For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in: > https://bugzilla.kernel.org/show_bug.cgi?id=217624 > ;) > > FWIW, I can reproduce using the test case too. > > thanks, As another (admittedly correlation-only) data point, I noticed at least hourly crashes of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x. After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up commits in 6.4.1 - it has been rock stable again, for several hours now. cheers Holger
On Mon, Jul 3, 2023 at 6:52 AM Holger Hoffstätte <holger@applied-asynchrony.com> wrote: > > On 2023-07-03 12:47, Jiri Slaby wrote: > > Cc Jacob Young (from kernel bugzilla) > > > > On 30. 06. 23, 19:40, Suren Baghdasaryan wrote: > >> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote: > >>> > >>> On 30. 06. 23, 10:28, Jiri Slaby wrote: > >>>> > 2348 > >>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351 > >>>> > 2350 <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372 > >>>> > 2351 <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354 > >>>> > 2351 <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357 > >>>> > 2354 <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355 > >>>> > 2355 <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370 > >>>> > 2370 mmap(NULL, 262144, PROT_READ|PROT_WRITE, > >>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> > >>>> > 2370 <... mmap resumed>) = 0x7fca68249000 > >>>> > 2372 <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384 > >>>> > 2384 <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388 > >>>> > 2388 <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392 > >>>> > 2392 <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395 > >>>> > 2395 write(2, "runtime: marked free object in s"..., 36 <unfinished > >>>> ...> > >>>> > >>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON > >>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some > >>>> reason 0x7fca6824bec8 in that region is "bad". > >> > >> Thanks for the analysis Jiri. > >> Is it possible from these logs to identify whether 2370 finished the > >> mmap operation before 2395 tried to access 0x7fca6824bec8? That access > >> has to happen only after mmap finishes mapping the region. > > > > Hi, > > > > it's hard to tell, but I assume so. > > > > For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in: > > https://bugzilla.kernel.org/show_bug.cgi?id=217624 > > ;) > > > > FWIW, I can reproduce using the test case too. Thanks for the reproducer, Jiri! Let me try it and see if I can figure this one out. > > > > thanks, > > As another (admittedly correlation-only) data point, I noticed at least hourly crashes > of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x. > After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up > commits in 6.4.1 - it has been rock stable again, for several hours now. > > cheers > Holger
On Mon, Jul 3, 2023 at 2:45 PM Suren Baghdasaryan <surenb@google.com> wrote: > > On Mon, Jul 3, 2023 at 6:52 AM Holger Hoffstätte > <holger@applied-asynchrony.com> wrote: > > > > On 2023-07-03 12:47, Jiri Slaby wrote: > > > Cc Jacob Young (from kernel bugzilla) > > > > > > On 30. 06. 23, 19:40, Suren Baghdasaryan wrote: > > >> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote: > > >>> > > >>> On 30. 06. 23, 10:28, Jiri Slaby wrote: > > >>>> > 2348 > > >>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351 > > >>>> > 2350 <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372 > > >>>> > 2351 <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354 > > >>>> > 2351 <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357 > > >>>> > 2354 <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355 > > >>>> > 2355 <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370 > > >>>> > 2370 mmap(NULL, 262144, PROT_READ|PROT_WRITE, > > >>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> > > >>>> > 2370 <... mmap resumed>) = 0x7fca68249000 > > >>>> > 2372 <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384 > > >>>> > 2384 <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388 > > >>>> > 2388 <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392 > > >>>> > 2392 <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395 > > >>>> > 2395 write(2, "runtime: marked free object in s"..., 36 <unfinished > > >>>> ...> > > >>>> > > >>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON > > >>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some > > >>>> reason 0x7fca6824bec8 in that region is "bad". > > >> > > >> Thanks for the analysis Jiri. > > >> Is it possible from these logs to identify whether 2370 finished the > > >> mmap operation before 2395 tried to access 0x7fca6824bec8? That access > > >> has to happen only after mmap finishes mapping the region. > > > > > > Hi, > > > > > > it's hard to tell, but I assume so. > > > > > > For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in: > > > https://bugzilla.kernel.org/show_bug.cgi?id=217624 > > > ;) > > > > > > FWIW, I can reproduce using the test case too. > > Thanks for the reproducer, Jiri! > Let me try it and see if I can figure this one out. Interestingly I can't reproduce it with qemu emulator (reproducer returns 1) but my host machine with the same kernel reproduces it every time. Will try tracing the major code paths to see what's going on. I have to leave for a day but will resume in the evening once I'm home. Thanks, Suren. > > > > > > > thanks, > > > > As another (admittedly correlation-only) data point, I noticed at least hourly crashes > > of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x. > > After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up > > commits in 6.4.1 - it has been rock stable again, for several hours now. > > > > cheers > > Holger
On Mon, Jul 3, 2023 at 8:24 AM Suren Baghdasaryan <surenb@google.com> wrote: > > On Mon, Jul 3, 2023 at 2:45 PM Suren Baghdasaryan <surenb@google.com> wrote: > > > > On Mon, Jul 3, 2023 at 6:52 AM Holger Hoffstätte > > <holger@applied-asynchrony.com> wrote: > > > > > > On 2023-07-03 12:47, Jiri Slaby wrote: > > > > Cc Jacob Young (from kernel bugzilla) > > > > > > > > On 30. 06. 23, 19:40, Suren Baghdasaryan wrote: > > > >> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote: > > > >>> > > > >>> On 30. 06. 23, 10:28, Jiri Slaby wrote: > > > >>>> > 2348 > > > >>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351 > > > >>>> > 2350 <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372 > > > >>>> > 2351 <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354 > > > >>>> > 2351 <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357 > > > >>>> > 2354 <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355 > > > >>>> > 2355 <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370 > > > >>>> > 2370 mmap(NULL, 262144, PROT_READ|PROT_WRITE, > > > >>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> > > > >>>> > 2370 <... mmap resumed>) = 0x7fca68249000 > > > >>>> > 2372 <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384 > > > >>>> > 2384 <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388 > > > >>>> > 2388 <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392 > > > >>>> > 2392 <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395 > > > >>>> > 2395 write(2, "runtime: marked free object in s"..., 36 <unfinished > > > >>>> ...> > > > >>>> > > > >>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON > > > >>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some > > > >>>> reason 0x7fca6824bec8 in that region is "bad". > > > >> > > > >> Thanks for the analysis Jiri. > > > >> Is it possible from these logs to identify whether 2370 finished the > > > >> mmap operation before 2395 tried to access 0x7fca6824bec8? That access > > > >> has to happen only after mmap finishes mapping the region. > > > > > > > > Hi, > > > > > > > > it's hard to tell, but I assume so. > > > > > > > > For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in: > > > > https://bugzilla.kernel.org/show_bug.cgi?id=217624 > > > > ;) > > > > > > > > FWIW, I can reproduce using the test case too. > > > > Thanks for the reproducer, Jiri! > > Let me try it and see if I can figure this one out. > > Interestingly I can't reproduce it with qemu emulator (reproducer > returns 1) but my host machine with the same kernel reproduces it > every time. Will try tracing the major code paths to see what's going > on. > I have to leave for a day but will resume in the evening once I'm home. I posted a patch to disable per-VMA locks by default for now: https://lore.kernel.org/all/20230703182150.2193578-1-surenb@google.com/ Will re-enable them once we figure this issue out. Thanks, Suren. > Thanks, > Suren. > > > > > > > > > > > thanks, > > > > > > As another (admittedly correlation-only) data point, I noticed at least hourly crashes > > > of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x. > > > After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up > > > commits in 6.4.1 - it has been rock stable again, for several hours now. > > > > > > cheers > > > Holger
On Mon, Jul 3, 2023 at 6:52 AM Holger Hoffstätte <holger@applied-asynchrony.com> wrote: > > On 2023-07-03 12:47, Jiri Slaby wrote: > > Cc Jacob Young (from kernel bugzilla) > > > > On 30. 06. 23, 19:40, Suren Baghdasaryan wrote: > >> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote: > >>> > >>> On 30. 06. 23, 10:28, Jiri Slaby wrote: > >>>> > 2348 > >>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351 > >>>> > 2350 <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372 > >>>> > 2351 <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354 > >>>> > 2351 <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357 > >>>> > 2354 <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355 > >>>> > 2355 <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370 > >>>> > 2370 mmap(NULL, 262144, PROT_READ|PROT_WRITE, > >>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> > >>>> > 2370 <... mmap resumed>) = 0x7fca68249000 > >>>> > 2372 <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384 > >>>> > 2384 <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388 > >>>> > 2388 <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392 > >>>> > 2392 <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395 > >>>> > 2395 write(2, "runtime: marked free object in s"..., 36 <unfinished > >>>> ...> > >>>> > >>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON > >>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some > >>>> reason 0x7fca6824bec8 in that region is "bad". > >> > >> Thanks for the analysis Jiri. > >> Is it possible from these logs to identify whether 2370 finished the > >> mmap operation before 2395 tried to access 0x7fca6824bec8? That access > >> has to happen only after mmap finishes mapping the region. > > > > Hi, > > > > it's hard to tell, but I assume so. > > > > For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in: > > https://bugzilla.kernel.org/show_bug.cgi?id=217624 > > ;) > > > > FWIW, I can reproduce using the test case too. > > > > thanks, > > As another (admittedly correlation-only) data point, I noticed at least hourly crashes > of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x. > After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up > commits in 6.4.1 - it has been rock stable again, for several hours now. Jiri, Holger, would you be able to try https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/ and see if your issues still exist? > > cheers > Holger
On 2023-07-06 00:15, Suren Baghdasaryan wrote: > On Mon, Jul 3, 2023 at 6:52 AM Holger Hoffstätte > <holger@applied-asynchrony.com> wrote: >> >> On 2023-07-03 12:47, Jiri Slaby wrote: >>> Cc Jacob Young (from kernel bugzilla) >>> >>> On 30. 06. 23, 19:40, Suren Baghdasaryan wrote: >>>> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote: >>>>> >>>>> On 30. 06. 23, 10:28, Jiri Slaby wrote: >>>>>> > 2348 >>>>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351 >>>>>> > 2350 <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372 >>>>>> > 2351 <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354 >>>>>> > 2351 <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357 >>>>>> > 2354 <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355 >>>>>> > 2355 <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370 >>>>>> > 2370 mmap(NULL, 262144, PROT_READ|PROT_WRITE, >>>>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> >>>>>> > 2370 <... mmap resumed>) = 0x7fca68249000 >>>>>> > 2372 <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384 >>>>>> > 2384 <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388 >>>>>> > 2388 <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392 >>>>>> > 2392 <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395 >>>>>> > 2395 write(2, "runtime: marked free object in s"..., 36 <unfinished >>>>>> ...> >>>>>> >>>>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON >>>>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some >>>>>> reason 0x7fca6824bec8 in that region is "bad". >>>> >>>> Thanks for the analysis Jiri. >>>> Is it possible from these logs to identify whether 2370 finished the >>>> mmap operation before 2395 tried to access 0x7fca6824bec8? That access >>>> has to happen only after mmap finishes mapping the region. >>> >>> Hi, >>> >>> it's hard to tell, but I assume so. >>> >>> For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in: >>> https://bugzilla.kernel.org/show_bug.cgi?id=217624 >>> ;) >>> >>> FWIW, I can reproduce using the test case too. >>> >>> thanks, >> >> As another (admittedly correlation-only) data point, I noticed at least hourly crashes >> of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x. >> After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up >> commits in 6.4.1 - it has been rock stable again, for several hours now. > > Jiri, Holger, would you be able to try > https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/ > and see if your issues still exist? Just in time! Not 2 minutes ago I finished rebuilding 6.4.2 + the last version of your patches on a second machine (old Intel Sandy Bridge workstation) to be my crash test dummy. I removed the BROKEN dependency in mm/Kconfig, manually set PER_VMA_LOCK=y and ... it seems to work?! Boots fine, Firefox seems to work (but no exhaustive tests yet). I will also rerun a few reboot laps, just to exercise this a bit harder and see if something comes up. Tomorrow I'll also try again on my Zen2 Thinkpad and will report back. Fingers crossed! cheers Holger
On Wed, Jul 5, 2023 at 3:37 PM Holger Hoffstätte <holger@applied-asynchrony.com> wrote: > > On 2023-07-06 00:15, Suren Baghdasaryan wrote: > > On Mon, Jul 3, 2023 at 6:52 AM Holger Hoffstätte > > <holger@applied-asynchrony.com> wrote: > >> > >> On 2023-07-03 12:47, Jiri Slaby wrote: > >>> Cc Jacob Young (from kernel bugzilla) > >>> > >>> On 30. 06. 23, 19:40, Suren Baghdasaryan wrote: > >>>> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote: > >>>>> > >>>>> On 30. 06. 23, 10:28, Jiri Slaby wrote: > >>>>>> > 2348 > >>>>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351 > >>>>>> > 2350 <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372 > >>>>>> > 2351 <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354 > >>>>>> > 2351 <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357 > >>>>>> > 2354 <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355 > >>>>>> > 2355 <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370 > >>>>>> > 2370 mmap(NULL, 262144, PROT_READ|PROT_WRITE, > >>>>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...> > >>>>>> > 2370 <... mmap resumed>) = 0x7fca68249000 > >>>>>> > 2372 <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384 > >>>>>> > 2384 <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388 > >>>>>> > 2388 <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392 > >>>>>> > 2392 <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395 > >>>>>> > 2395 write(2, "runtime: marked free object in s"..., 36 <unfinished > >>>>>> ...> > >>>>>> > >>>>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON > >>>>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some > >>>>>> reason 0x7fca6824bec8 in that region is "bad". > >>>> > >>>> Thanks for the analysis Jiri. > >>>> Is it possible from these logs to identify whether 2370 finished the > >>>> mmap operation before 2395 tried to access 0x7fca6824bec8? That access > >>>> has to happen only after mmap finishes mapping the region. > >>> > >>> Hi, > >>> > >>> it's hard to tell, but I assume so. > >>> > >>> For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in: > >>> https://bugzilla.kernel.org/show_bug.cgi?id=217624 > >>> ;) > >>> > >>> FWIW, I can reproduce using the test case too. > >>> > >>> thanks, > >> > >> As another (admittedly correlation-only) data point, I noticed at least hourly crashes > >> of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x. > >> After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up > >> commits in 6.4.1 - it has been rock stable again, for several hours now. > > > > Jiri, Holger, would you be able to try > > https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/ > > and see if your issues still exist? > > Just in time! Not 2 minutes ago I finished rebuilding 6.4.2 + the last version of > your patches on a second machine (old Intel Sandy Bridge workstation) to be my > crash test dummy. I removed the BROKEN dependency in mm/Kconfig, manually set > PER_VMA_LOCK=y and ... it seems to work?! Boots fine, Firefox seems to work > (but no exhaustive tests yet). I will also rerun a few reboot laps, just to > exercise this a bit harder and see if something comes up. > > Tomorrow I'll also try again on my Zen2 Thinkpad and will report back. > > Fingers crossed! Thanks! This is promising. > > cheers > Holger
On 2023-07-06 00:55, Suren Baghdasaryan wrote: > On Wed, Jul 5, 2023 at 3:37 PM Holger Hoffstätte > <holger@applied-asynchrony.com> wrote: >>> Jiri, Holger, would you be able to try >>> https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/ >>> and see if your issues still exist? >> >> Just in time! Not 2 minutes ago I finished rebuilding 6.4.2 + the last version of >> your patches on a second machine (old Intel Sandy Bridge workstation) to be my >> crash test dummy. I removed the BROKEN dependency in mm/Kconfig, manually set >> PER_VMA_LOCK=y and ... it seems to work?! Boots fine, Firefox seems to work >> (but no exhaustive tests yet). I will also rerun a few reboot laps, just to >> exercise this a bit harder and see if something comes up. >> >> Tomorrow I'll also try again on my Zen2 Thinkpad and will report back. >> >> Fingers crossed! > > Thanks! This is promising. Indeed it was, and still is. :) This morning I wrangled 6.4.2 + v4 of the patches into all my machines, enabled PER_VMA_LOCK=y, removed BROKEN and so far everything has been humming along just fine. One machine has been compiling for several hours without issue. My Zen2 thinkpad - which was previously really unhappy with enabled PER_VMA_LOCK - has booted a few times without hiccup, and Firefox has been happily roaming the interwebs for several hours as well. \o/ cheers Holger
On Thu, Jul 6, 2023 at 7:27 AM Holger Hoffstätte <holger@applied-asynchrony.com> wrote: > > On 2023-07-06 00:55, Suren Baghdasaryan wrote: > > On Wed, Jul 5, 2023 at 3:37 PM Holger Hoffstätte > > <holger@applied-asynchrony.com> wrote: > >>> Jiri, Holger, would you be able to try > >>> https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/ > >>> and see if your issues still exist? > >> > >> Just in time! Not 2 minutes ago I finished rebuilding 6.4.2 + the last version of > >> your patches on a second machine (old Intel Sandy Bridge workstation) to be my > >> crash test dummy. I removed the BROKEN dependency in mm/Kconfig, manually set > >> PER_VMA_LOCK=y and ... it seems to work?! Boots fine, Firefox seems to work > >> (but no exhaustive tests yet). I will also rerun a few reboot laps, just to > >> exercise this a bit harder and see if something comes up. > >> > >> Tomorrow I'll also try again on my Zen2 Thinkpad and will report back. > >> > >> Fingers crossed! > > > > Thanks! This is promising. > > Indeed it was, and still is. :) > > This morning I wrangled 6.4.2 + v4 of the patches into all my machines, > enabled PER_VMA_LOCK=y, removed BROKEN and so far everything has been humming > along just fine. One machine has been compiling for several hours without issue. > My Zen2 thinkpad - which was previously really unhappy with enabled PER_VMA_LOCK - > has booted a few times without hiccup, and Firefox has been happily roaming the > interwebs for several hours as well. \o/ This is great! Thanks so much for verifying! Andrew, if it's not too late, maybe we can drop the BROKEN dependency now that we have this confirmation? > > cheers > Holger
On Thu, Jul 6, 2023 at 9:11 AM Suren Baghdasaryan <surenb@google.com> wrote: > > On Thu, Jul 6, 2023 at 7:27 AM Holger Hoffstätte > <holger@applied-asynchrony.com> wrote: > > > > On 2023-07-06 00:55, Suren Baghdasaryan wrote: > > > On Wed, Jul 5, 2023 at 3:37 PM Holger Hoffstätte > > > <holger@applied-asynchrony.com> wrote: > > >>> Jiri, Holger, would you be able to try > > >>> https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/ > > >>> and see if your issues still exist? > > >> > > >> Just in time! Not 2 minutes ago I finished rebuilding 6.4.2 + the last version of > > >> your patches on a second machine (old Intel Sandy Bridge workstation) to be my > > >> crash test dummy. I removed the BROKEN dependency in mm/Kconfig, manually set > > >> PER_VMA_LOCK=y and ... it seems to work?! Boots fine, Firefox seems to work > > >> (but no exhaustive tests yet). I will also rerun a few reboot laps, just to > > >> exercise this a bit harder and see if something comes up. > > >> > > >> Tomorrow I'll also try again on my Zen2 Thinkpad and will report back. > > >> > > >> Fingers crossed! > > > > > > Thanks! This is promising. > > > > Indeed it was, and still is. :) > > > > This morning I wrangled 6.4.2 + v4 of the patches into all my machines, > > enabled PER_VMA_LOCK=y, removed BROKEN and so far everything has been humming > > along just fine. One machine has been compiling for several hours without issue. > > My Zen2 thinkpad - which was previously really unhappy with enabled PER_VMA_LOCK - > > has booted a few times without hiccup, and Firefox has been happily roaming the > > interwebs for several hours as well. \o/ > > This is great! Thanks so much for verifying! > Andrew, if it's not too late, maybe we can drop the BROKEN dependency > now that we have this confirmation? Liam pointed me to another possible issue with per-VMA locks due to the recent changes in stack expansion locking rules. So, it's probably safer to keep it marked BROKEN. I'll post a simple fix for that shortly. > > > > > cheers > > Holger
On Thu, Jul 6, 2023 at 7:23 PM Suren Baghdasaryan <surenb@google.com> wrote: > > On Thu, Jul 6, 2023 at 9:11 AM Suren Baghdasaryan <surenb@google.com> wrote: > > > > On Thu, Jul 6, 2023 at 7:27 AM Holger Hoffstätte > > <holger@applied-asynchrony.com> wrote: > > > > > > On 2023-07-06 00:55, Suren Baghdasaryan wrote: > > > > On Wed, Jul 5, 2023 at 3:37 PM Holger Hoffstätte > > > > <holger@applied-asynchrony.com> wrote: > > > >>> Jiri, Holger, would you be able to try > > > >>> https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/ > > > >>> and see if your issues still exist? > > > >> > > > >> Just in time! Not 2 minutes ago I finished rebuilding 6.4.2 + the last version of > > > >> your patches on a second machine (old Intel Sandy Bridge workstation) to be my > > > >> crash test dummy. I removed the BROKEN dependency in mm/Kconfig, manually set > > > >> PER_VMA_LOCK=y and ... it seems to work?! Boots fine, Firefox seems to work > > > >> (but no exhaustive tests yet). I will also rerun a few reboot laps, just to > > > >> exercise this a bit harder and see if something comes up. > > > >> > > > >> Tomorrow I'll also try again on my Zen2 Thinkpad and will report back. > > > >> > > > >> Fingers crossed! > > > > > > > > Thanks! This is promising. > > > > > > Indeed it was, and still is. :) > > > > > > This morning I wrangled 6.4.2 + v4 of the patches into all my machines, > > > enabled PER_VMA_LOCK=y, removed BROKEN and so far everything has been humming > > > along just fine. One machine has been compiling for several hours without issue. > > > My Zen2 thinkpad - which was previously really unhappy with enabled PER_VMA_LOCK - > > > has booted a few times without hiccup, and Firefox has been happily roaming the > > > interwebs for several hours as well. \o/ > > > > This is great! Thanks so much for verifying! > > Andrew, if it's not too late, maybe we can drop the BROKEN dependency > > now that we have this confirmation? > > Liam pointed me to another possible issue with per-VMA locks due to > the recent changes in stack expansion locking rules. So, it's probably > safer to keep it marked BROKEN. I'll post a simple fix for that > shortly. The fix along with another less critical one is posted at https://lore.kernel.org/all/20230707043211.3682710-1-surenb@google.com/ > > > > > > > > > cheers > > > Holger
On 06. 07. 23, 0:15, Suren Baghdasaryan wrote: > Jiri, Holger, would you be able to try > https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/ > and see if your issues still exist? FWIW 6.4.3 (contains the series) is fine again. thanks,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a825bf031f49..df21fba77db1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86_64 # Options that are inherently 64-bit kernel only: select ARCH_HAS_GIGANTIC_PAGE select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 + select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY select MODULES_USE_ELF_RELA diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a498ae1fbe66..e4399983c50c 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -19,6 +19,7 @@ #include <linux/uaccess.h> /* faulthandler_disabled() */ #include <linux/efi.h> /* efi_crash_gracefully_on_page_fault()*/ #include <linux/mm_types.h> +#include <linux/mm.h> /* find_and_lock_vma() */ #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/traps.h> /* dotraplinkage, ... */ @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs, } #endif +#ifdef CONFIG_PER_VMA_LOCK + if (!(flags & FAULT_FLAG_USER)) + goto lock_mmap; + + vma = lock_vma_under_rcu(mm, address); + if (!vma) + goto lock_mmap; + + if (unlikely(access_error(error_code, vma))) { + vma_end_read(vma); + goto lock_mmap; + } + fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); + vma_end_read(vma); + + if (!(fault & VM_FAULT_RETRY)) { + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; + } + count_vm_vma_lock_event(VMA_LOCK_RETRY); + + /* Quick path to respond to signals */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + kernelmode_fixup_or_oops(regs, error_code, address, + SIGBUS, BUS_ADRERR, + ARCH_DEFAULT_PKEY); + return; + } +lock_mmap: +#endif /* CONFIG_PER_VMA_LOCK */ + /* * Kernel-mode access to the user address space should only occur * on well-defined single instructions listed in the exception @@ -1433,6 +1466,9 @@ void do_user_addr_fault(struct pt_regs *regs, } mmap_read_unlock(mm); +#ifdef CONFIG_PER_VMA_LOCK +done: +#endif if (likely(!(fault & VM_FAULT_ERROR))) return;
Attempt VMA lock-based page fault handling first, and fall back to the existing mmap_lock-based handling if that fails. Signed-off-by: Suren Baghdasaryan <surenb@google.com> --- arch/x86/Kconfig | 1 + arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+)