diff mbox series

[v4,29/33] x86/mm: try VMA lock-based page fault handling first

Message ID 20230227173632.3292573-30-surenb@google.com (mailing list archive)
State New
Headers show
Series Per-VMA locks | expand

Commit Message

Suren Baghdasaryan Feb. 27, 2023, 5:36 p.m. UTC
Attempt VMA lock-based page fault handling first, and fall back to the
existing mmap_lock-based handling if that fails.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 arch/x86/Kconfig    |  1 +
 arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

Comments

Jiri Slaby June 29, 2023, 2:40 p.m. UTC | #1
Hi,

On 27. 02. 23, 18:36, Suren Baghdasaryan wrote:
> Attempt VMA lock-based page fault handling first, and fall back to the
> existing mmap_lock-based handling if that fails.
> 
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> ---
>   arch/x86/Kconfig    |  1 +
>   arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++
>   2 files changed, 37 insertions(+)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index a825bf031f49..df21fba77db1 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -27,6 +27,7 @@ config X86_64
>   	# Options that are inherently 64-bit kernel only:
>   	select ARCH_HAS_GIGANTIC_PAGE
>   	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
> +	select ARCH_SUPPORTS_PER_VMA_LOCK
>   	select ARCH_USE_CMPXCHG_LOCKREF
>   	select HAVE_ARCH_SOFT_DIRTY
>   	select MODULES_USE_ELF_RELA
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index a498ae1fbe66..e4399983c50c 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -19,6 +19,7 @@
>   #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
>   #include <linux/efi.h>			/* efi_crash_gracefully_on_page_fault()*/
>   #include <linux/mm_types.h>
> +#include <linux/mm.h>			/* find_and_lock_vma() */
>   
>   #include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
>   #include <asm/traps.h>			/* dotraplinkage, ...		*/
> @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs,
>   	}
>   #endif
>   
> +#ifdef CONFIG_PER_VMA_LOCK
> +	if (!(flags & FAULT_FLAG_USER))
> +		goto lock_mmap;
> +
> +	vma = lock_vma_under_rcu(mm, address);
> +	if (!vma)
> +		goto lock_mmap;
> +
> +	if (unlikely(access_error(error_code, vma))) {
> +		vma_end_read(vma);
> +		goto lock_mmap;
> +	}
> +	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
> +	vma_end_read(vma);
> +
> +	if (!(fault & VM_FAULT_RETRY)) {
> +		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> +		goto done;
> +	}
> +	count_vm_vma_lock_event(VMA_LOCK_RETRY);

This is apparently not strong enough as it causes go build failures like:

[  409s] strconv
[  409s] releasep: m=0x579e2000 m->p=0x5781c600 p->m=0x0 p->status=2
[  409s] fatal error: releasep: invalid p state
[  409s]

[  325s] hash/adler32
[  325s] hash/crc32
[  325s] cmd/internal/codesign
[  336s] fatal error: runtime: out of memory

There are many kinds of similar errors. It happens in 1-3 out of 20 
builds only.

If I revert the commit on top of 6.4, they all dismiss. Any idea?

The downstream report:
https://bugzilla.suse.com/show_bug.cgi?id=1212775

> +
> +	/* Quick path to respond to signals */
> +	if (fault_signal_pending(fault, regs)) {
> +		if (!user_mode(regs))
> +			kernelmode_fixup_or_oops(regs, error_code, address,
> +						 SIGBUS, BUS_ADRERR,
> +						 ARCH_DEFAULT_PKEY);
> +		return;
> +	}
> +lock_mmap:
> +#endif /* CONFIG_PER_VMA_LOCK */
> +
>   	/*
>   	 * Kernel-mode access to the user address space should only occur
>   	 * on well-defined single instructions listed in the exception
> @@ -1433,6 +1466,9 @@ void do_user_addr_fault(struct pt_regs *regs,
>   	}
>   
>   	mmap_read_unlock(mm);
> +#ifdef CONFIG_PER_VMA_LOCK
> +done:
> +#endif
>   	if (likely(!(fault & VM_FAULT_ERROR)))
>   		return;
>   

thanks,
Suren Baghdasaryan June 29, 2023, 3:30 p.m. UTC | #2
On Thu, Jun 29, 2023 at 7:40 AM Jiri Slaby <jirislaby@kernel.org> wrote:
>
> Hi,
>
> On 27. 02. 23, 18:36, Suren Baghdasaryan wrote:
> > Attempt VMA lock-based page fault handling first, and fall back to the
> > existing mmap_lock-based handling if that fails.
> >
> > Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> > ---
> >   arch/x86/Kconfig    |  1 +
> >   arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++
> >   2 files changed, 37 insertions(+)
> >
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index a825bf031f49..df21fba77db1 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -27,6 +27,7 @@ config X86_64
> >       # Options that are inherently 64-bit kernel only:
> >       select ARCH_HAS_GIGANTIC_PAGE
> >       select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
> > +     select ARCH_SUPPORTS_PER_VMA_LOCK
> >       select ARCH_USE_CMPXCHG_LOCKREF
> >       select HAVE_ARCH_SOFT_DIRTY
> >       select MODULES_USE_ELF_RELA
> > diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> > index a498ae1fbe66..e4399983c50c 100644
> > --- a/arch/x86/mm/fault.c
> > +++ b/arch/x86/mm/fault.c
> > @@ -19,6 +19,7 @@
> >   #include <linux/uaccess.h>          /* faulthandler_disabled()      */
> >   #include <linux/efi.h>                      /* efi_crash_gracefully_on_page_fault()*/
> >   #include <linux/mm_types.h>
> > +#include <linux/mm.h>                        /* find_and_lock_vma() */
> >
> >   #include <asm/cpufeature.h>         /* boot_cpu_has, ...            */
> >   #include <asm/traps.h>                      /* dotraplinkage, ...           */
> > @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs,
> >       }
> >   #endif
> >
> > +#ifdef CONFIG_PER_VMA_LOCK
> > +     if (!(flags & FAULT_FLAG_USER))
> > +             goto lock_mmap;
> > +
> > +     vma = lock_vma_under_rcu(mm, address);
> > +     if (!vma)
> > +             goto lock_mmap;
> > +
> > +     if (unlikely(access_error(error_code, vma))) {
> > +             vma_end_read(vma);
> > +             goto lock_mmap;
> > +     }
> > +     fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
> > +     vma_end_read(vma);
> > +
> > +     if (!(fault & VM_FAULT_RETRY)) {
> > +             count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> > +             goto done;
> > +     }
> > +     count_vm_vma_lock_event(VMA_LOCK_RETRY);
>
> This is apparently not strong enough as it causes go build failures like:
>
> [  409s] strconv
> [  409s] releasep: m=0x579e2000 m->p=0x5781c600 p->m=0x0 p->status=2
> [  409s] fatal error: releasep: invalid p state
> [  409s]
>
> [  325s] hash/adler32
> [  325s] hash/crc32
> [  325s] cmd/internal/codesign
> [  336s] fatal error: runtime: out of memory

Hi Jiri,
Thanks for reporting! I'm not familiar with go builds. Could you
please explain the error to me or point me to some documentation to
decipher that error?
Thanks,
Suren.

>
> There are many kinds of similar errors. It happens in 1-3 out of 20
> builds only.
>
> If I revert the commit on top of 6.4, they all dismiss. Any idea?
>
> The downstream report:
> https://bugzilla.suse.com/show_bug.cgi?id=1212775
>
> > +
> > +     /* Quick path to respond to signals */
> > +     if (fault_signal_pending(fault, regs)) {
> > +             if (!user_mode(regs))
> > +                     kernelmode_fixup_or_oops(regs, error_code, address,
> > +                                              SIGBUS, BUS_ADRERR,
> > +                                              ARCH_DEFAULT_PKEY);
> > +             return;
> > +     }
> > +lock_mmap:
> > +#endif /* CONFIG_PER_VMA_LOCK */
> > +
> >       /*
> >        * Kernel-mode access to the user address space should only occur
> >        * on well-defined single instructions listed in the exception
> > @@ -1433,6 +1466,9 @@ void do_user_addr_fault(struct pt_regs *regs,
> >       }
> >
> >       mmap_read_unlock(mm);
> > +#ifdef CONFIG_PER_VMA_LOCK
> > +done:
> > +#endif
> >       if (likely(!(fault & VM_FAULT_ERROR)))
> >               return;
> >
>
> thanks,
> --
> js
> suse labs
>
Linux regression tracking (Thorsten Leemhuis) June 29, 2023, 5:06 p.m. UTC | #3
[CCing the regression list, as it should be in the loop for regressions:
https://docs.kernel.org/admin-guide/reporting-regressions.html]

On 29.06.23 16:40, Jiri Slaby wrote:
> 
> On 27. 02. 23, 18:36, Suren Baghdasaryan wrote:
>> Attempt VMA lock-based page fault handling first, and fall back to the
>> existing mmap_lock-based handling if that fails.
> [...]
>> +    fault = handle_mm_fault(vma, address, flags |
>> FAULT_FLAG_VMA_LOCK, regs);
>> +    vma_end_read(vma);
>> +
>> +    if (!(fault & VM_FAULT_RETRY)) {
>> +        count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>> +        goto done;
>> +    }
>> +    count_vm_vma_lock_event(VMA_LOCK_RETRY);
> 
> This is apparently not strong enough as it causes go build failures like:
> 
> [  409s] strconv
> [  409s] releasep: m=0x579e2000 m->p=0x5781c600 p->m=0x0 p->status=2
> [  409s] fatal error: releasep: invalid p state
> [  409s]
> 
> [  325s] hash/adler32
> [  325s] hash/crc32
> [  325s] cmd/internal/codesign
> [  336s] fatal error: runtime: out of memory
> 
> There are many kinds of similar errors. It happens in 1-3 out of 20
> builds only.
> 
> If I revert the commit on top of 6.4, they all dismiss. Any idea?
> 
> The downstream report:
> https://bugzilla.suse.com/show_bug.cgi?id=1212775
> [...]

Thanks for the report. To be sure the issue doesn't fall through the
cracks unnoticed, I'm adding it to regzbot, the Linux kernel regression
tracking bot:

#regzbot ^introduced 0bff0aaea03e2a3ed6bfa3021
https://bugzilla.suse.com/show_bug.cgi?id=1212775
#regzbot title mm: failures when building go in 1-3 out of 20 builds
#regzbot ignore-activity

Developers: When fixing the issue, remember to add 'Link:' tags pointing
to the report (the parent of this mail). See page linked in footer for
details.

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
That page also explains what to do if mails like this annoy you.
Jiri Slaby June 30, 2023, 6:35 a.m. UTC | #4
On 29. 06. 23, 17:30, Suren Baghdasaryan wrote:
> On Thu, Jun 29, 2023 at 7:40 AM Jiri Slaby <jirislaby@kernel.org> wrote:
>>
>> Hi,
>>
>> On 27. 02. 23, 18:36, Suren Baghdasaryan wrote:
>>> Attempt VMA lock-based page fault handling first, and fall back to the
>>> existing mmap_lock-based handling if that fails.
>>>
>>> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
>>> ---
>>>    arch/x86/Kconfig    |  1 +
>>>    arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++
>>>    2 files changed, 37 insertions(+)
>>>
>>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>>> index a825bf031f49..df21fba77db1 100644
>>> --- a/arch/x86/Kconfig
>>> +++ b/arch/x86/Kconfig
>>> @@ -27,6 +27,7 @@ config X86_64
>>>        # Options that are inherently 64-bit kernel only:
>>>        select ARCH_HAS_GIGANTIC_PAGE
>>>        select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
>>> +     select ARCH_SUPPORTS_PER_VMA_LOCK
>>>        select ARCH_USE_CMPXCHG_LOCKREF
>>>        select HAVE_ARCH_SOFT_DIRTY
>>>        select MODULES_USE_ELF_RELA
>>> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
>>> index a498ae1fbe66..e4399983c50c 100644
>>> --- a/arch/x86/mm/fault.c
>>> +++ b/arch/x86/mm/fault.c
>>> @@ -19,6 +19,7 @@
>>>    #include <linux/uaccess.h>          /* faulthandler_disabled()      */
>>>    #include <linux/efi.h>                      /* efi_crash_gracefully_on_page_fault()*/
>>>    #include <linux/mm_types.h>
>>> +#include <linux/mm.h>                        /* find_and_lock_vma() */
>>>
>>>    #include <asm/cpufeature.h>         /* boot_cpu_has, ...            */
>>>    #include <asm/traps.h>                      /* dotraplinkage, ...           */
>>> @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs,
>>>        }
>>>    #endif
>>>
>>> +#ifdef CONFIG_PER_VMA_LOCK
>>> +     if (!(flags & FAULT_FLAG_USER))
>>> +             goto lock_mmap;
>>> +
>>> +     vma = lock_vma_under_rcu(mm, address);
>>> +     if (!vma)
>>> +             goto lock_mmap;
>>> +
>>> +     if (unlikely(access_error(error_code, vma))) {
>>> +             vma_end_read(vma);
>>> +             goto lock_mmap;
>>> +     }
>>> +     fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
>>> +     vma_end_read(vma);
>>> +
>>> +     if (!(fault & VM_FAULT_RETRY)) {
>>> +             count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>>> +             goto done;
>>> +     }
>>> +     count_vm_vma_lock_event(VMA_LOCK_RETRY);
>>
>> This is apparently not strong enough as it causes go build failures like:
>>
>> [  409s] strconv
>> [  409s] releasep: m=0x579e2000 m->p=0x5781c600 p->m=0x0 p->status=2
>> [  409s] fatal error: releasep: invalid p state
>> [  409s]
>>
>> [  325s] hash/adler32
>> [  325s] hash/crc32
>> [  325s] cmd/internal/codesign
>> [  336s] fatal error: runtime: out of memory
> 
> Hi Jiri,
> Thanks for reporting! I'm not familiar with go builds. Could you
> please explain the error to me or point me to some documentation to
> decipher that error?

Sorry, we are on the same boat -- me neither. It only popped up in our 
(openSUSE) build system and I only tracked it down by bisection. Let me 
know if I can try something (like a patch or gathering some debug info).

thanks,
Jiri Slaby June 30, 2023, 8:28 a.m. UTC | #5
On 30. 06. 23, 8:35, Jiri Slaby wrote:
> On 29. 06. 23, 17:30, Suren Baghdasaryan wrote:
>> On Thu, Jun 29, 2023 at 7:40 AM Jiri Slaby <jirislaby@kernel.org> wrote:
>>>
>>> Hi,
>>>
>>> On 27. 02. 23, 18:36, Suren Baghdasaryan wrote:
>>>> Attempt VMA lock-based page fault handling first, and fall back to the
>>>> existing mmap_lock-based handling if that fails.
>>>>
>>>> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
>>>> ---
>>>>    arch/x86/Kconfig    |  1 +
>>>>    arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++
>>>>    2 files changed, 37 insertions(+)
>>>>
>>>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>>>> index a825bf031f49..df21fba77db1 100644
>>>> --- a/arch/x86/Kconfig
>>>> +++ b/arch/x86/Kconfig
>>>> @@ -27,6 +27,7 @@ config X86_64
>>>>        # Options that are inherently 64-bit kernel only:
>>>>        select ARCH_HAS_GIGANTIC_PAGE
>>>>        select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
>>>> +     select ARCH_SUPPORTS_PER_VMA_LOCK
>>>>        select ARCH_USE_CMPXCHG_LOCKREF
>>>>        select HAVE_ARCH_SOFT_DIRTY
>>>>        select MODULES_USE_ELF_RELA
>>>> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
>>>> index a498ae1fbe66..e4399983c50c 100644
>>>> --- a/arch/x86/mm/fault.c
>>>> +++ b/arch/x86/mm/fault.c
>>>> @@ -19,6 +19,7 @@
>>>>    #include <linux/uaccess.h>          /* 
>>>> faulthandler_disabled()      */
>>>>    #include <linux/efi.h>                      /* 
>>>> efi_crash_gracefully_on_page_fault()*/
>>>>    #include <linux/mm_types.h>
>>>> +#include <linux/mm.h>                        /* find_and_lock_vma() */
>>>>
>>>>    #include <asm/cpufeature.h>         /* boot_cpu_has, 
>>>> ...            */
>>>>    #include <asm/traps.h>                      /* dotraplinkage, 
>>>> ...           */
>>>> @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs,
>>>>        }
>>>>    #endif
>>>>
>>>> +#ifdef CONFIG_PER_VMA_LOCK
>>>> +     if (!(flags & FAULT_FLAG_USER))
>>>> +             goto lock_mmap;
>>>> +
>>>> +     vma = lock_vma_under_rcu(mm, address);
>>>> +     if (!vma)
>>>> +             goto lock_mmap;
>>>> +
>>>> +     if (unlikely(access_error(error_code, vma))) {
>>>> +             vma_end_read(vma);
>>>> +             goto lock_mmap;
>>>> +     }
>>>> +     fault = handle_mm_fault(vma, address, flags | 
>>>> FAULT_FLAG_VMA_LOCK, regs);
>>>> +     vma_end_read(vma);
>>>> +
>>>> +     if (!(fault & VM_FAULT_RETRY)) {
>>>> +             count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>>>> +             goto done;
>>>> +     }
>>>> +     count_vm_vma_lock_event(VMA_LOCK_RETRY);
>>>
>>> This is apparently not strong enough as it causes go build failures 
>>> like:
>>>
>>> [  409s] strconv
>>> [  409s] releasep: m=0x579e2000 m->p=0x5781c600 p->m=0x0 p->status=2
>>> [  409s] fatal error: releasep: invalid p state
>>> [  409s]
>>>
>>> [  325s] hash/adler32
>>> [  325s] hash/crc32
>>> [  325s] cmd/internal/codesign
>>> [  336s] fatal error: runtime: out of memory
>>
>> Hi Jiri,
>> Thanks for reporting! I'm not familiar with go builds. Could you
>> please explain the error to me or point me to some documentation to
>> decipher that error?
> 
> Sorry, we are on the same boat -- me neither. It only popped up in our 
> (openSUSE) build system and I only tracked it down by bisection. Let me 
> know if I can try something (like a patch or gathering some debug info).

FWIW, a failed build log:
https://decibel.fi.muni.cz/~xslaby/n/vma/log.txt

and a strace for it:
https://decibel.fi.muni.cz/~xslaby/n/vma/strace.txt

An excerpt from the log:

[   55s] runtime: marked free object in span 0x7fca6824bec8, 
elemsize=192 freeindex=0 (bad use of unsafe.Pointer? try -d=checkptr)
[   55s] 0xc0002f2000 alloc marked
[   55s] 0xc0002f20c0 alloc marked
[   55s] 0xc0002f2180 alloc marked
[   55s] 0xc0002f2240 free  unmarked
[   55s] 0xc0002f2300 alloc marked
[   55s] 0xc0002f23c0 alloc marked
[   55s] 0xc0002f2480 alloc marked
[   55s] 0xc0002f2540 alloc marked
[   55s] 0xc0002f2600 alloc marked
[   55s] 0xc0002f26c0 alloc marked
[   55s] 0xc0002f2780 alloc marked
[   55s] 0xc0002f2840 alloc marked
[   55s] 0xc0002f2900 alloc marked
[   55s] 0xc0002f29c0 free  unmarked
[   55s] 0xc0002f2a80 alloc marked
[   55s] 0xc0002f2b40 alloc marked
[   55s] 0xc0002f2c00 alloc marked
[   55s] 0xc0002f2cc0 alloc marked
[   55s] 0xc0002f2d80 alloc marked
[   55s] 0xc0002f2e40 alloc marked
[   55s] 0xc0002f2f00 alloc marked
[   55s] 0xc0002f2fc0 alloc marked
[   55s] 0xc0002f3080 alloc marked
[   55s] 0xc0002f3140 alloc marked
[   55s] 0xc0002f3200 alloc marked
[   55s] 0xc0002f32c0 alloc marked
[   55s] 0xc0002f3380 free  unmarked
[   55s] 0xc0002f3440 free  marked   zombie


An excerpt from strace:
 > 2348 
clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, 
child_tid=0x7fcaa6a1b990, parent_tid=0x7fcaa6a1b990, exit_signal=0, 
stack=0x7fcaa621b000, stack_size=0x7ffe00, tls=0x7fcaa6a1b6c0} => 
{parent_tid=[2350]}, 88) = 2350

 > 2348 
clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, 
child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, 
stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => 
{parent_tid=[2351]}, 88) = 2351
 > 2350  <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372
 > 2351  <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354
 > 2351  <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357
 > 2354  <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355
 > 2355  <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370
 > 2370  mmap(NULL, 262144, PROT_READ|PROT_WRITE, 
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
 > 2370  <... mmap resumed>)               = 0x7fca68249000
 > 2372  <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384
 > 2384  <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388
 > 2388  <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392
 > 2392  <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395
 > 2395  write(2, "runtime: marked free object in s"..., 36 <unfinished ...>

I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON 
0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some 
reason 0x7fca6824bec8 in that region is "bad".

> thanks,--
Jiri Slaby June 30, 2023, 8:43 a.m. UTC | #6
On 30. 06. 23, 10:28, Jiri Slaby wrote:
>  > 2348 
> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351
>  > 2350  <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372
>  > 2351  <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354
>  > 2351  <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357
>  > 2354  <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355
>  > 2355  <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370
>  > 2370  mmap(NULL, 262144, PROT_READ|PROT_WRITE, 
> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
>  > 2370  <... mmap resumed>)               = 0x7fca68249000
>  > 2372  <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384
>  > 2384  <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388
>  > 2388  <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392
>  > 2392  <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395
>  > 2395  write(2, "runtime: marked free object in s"..., 36 <unfinished 
> ...>
> 
> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON 
> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some 
> reason 0x7fca6824bec8 in that region is "bad".

As I was noticed, this might be as well be a fail of the go's 
inter-thread communication (or alike) too. It might now be only more 
exposed with vma-based locks as we can do more parallelism now.

There are older hard to reproduce bugs in go with similar symptoms (we 
see this error sometimes now too):
https://github.com/golang/go/issues/15246

Or this 2016 bug is a red herring. Hard to tell...

>> thanks,
Suren Baghdasaryan June 30, 2023, 5:40 p.m. UTC | #7
On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote:
>
> On 30. 06. 23, 10:28, Jiri Slaby wrote:
> >  > 2348
> > clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351
> >  > 2350  <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372
> >  > 2351  <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354
> >  > 2351  <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357
> >  > 2354  <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355
> >  > 2355  <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370
> >  > 2370  mmap(NULL, 262144, PROT_READ|PROT_WRITE,
> > MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
> >  > 2370  <... mmap resumed>)               = 0x7fca68249000
> >  > 2372  <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384
> >  > 2384  <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388
> >  > 2388  <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392
> >  > 2392  <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395
> >  > 2395  write(2, "runtime: marked free object in s"..., 36 <unfinished
> > ...>
> >
> > I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON
> > 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some
> > reason 0x7fca6824bec8 in that region is "bad".

Thanks for the analysis Jiri.
Is it possible from these logs to identify whether 2370 finished the
mmap operation before 2395 tried to access 0x7fca6824bec8? That access
has to happen only after mmap finishes mapping the region.

>
> As I was noticed, this might be as well be a fail of the go's
> inter-thread communication (or alike) too. It might now be only more
> exposed with vma-based locks as we can do more parallelism now.

Yes, with multithreaded processes like these where threads are mapping
and accessing memory areas, per-VMA locks should allow for greater
parallelism. So, if there is a race like the one I asked above, it
might become more pronounced with per-VMA locks.

I'll double check the code, but from Kernel's POV mmap would take the
mmap_lock for write then will lock the VMA lock for write. That should
prevent any page fault handlers from accessing this VMA in parallel
until writers release the locks. Page fault path would try to find the
VMA without any lock and then will try to read-lock that VMA. If it
fails it will fall back to mmap_lock. So, if the writer started first
and obtained the VMA lock, the reader will fall back to mmap_lock and
will block until the writer releases the mmap_lock. If the reader got
VMA read lock first then the writer will block while obtaining the
VMA's write lock. However for your scenario, the reader (page fault)
might be getting here before the writer (mmap) and upon not finding
the VMA it is looking for, it will fail.
Please let me know if you can verify this scenario.
Thanks,
Suren.

>
> There are older hard to reproduce bugs in go with similar symptoms (we
> see this error sometimes now too):
> https://github.com/golang/go/issues/15246
>
> Or this 2016 bug is a red herring. Hard to tell...
>
> >> thanks,
> --
> js
> suse labs
>
Linux regression tracking (Thorsten Leemhuis) July 3, 2023, 9:58 a.m. UTC | #8
On 29.06.23 16:40, Jiri Slaby wrote:
> On 27. 02. 23, 18:36, Suren Baghdasaryan wrote:
>> Attempt VMA lock-based page fault handling first, and fall back to the
>> existing mmap_lock-based handling if that fails.
>>
>> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
>> ---
>>   arch/x86/Kconfig    |  1 +
>>   arch/x86/mm/fault.c | 36 ++++++++++++++++++++++++++++++++++++
>>   2 files changed, 37 insertions(+)
>>
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index a825bf031f49..df21fba77db1 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -27,6 +27,7 @@ config X86_64
>>       # Options that are inherently 64-bit kernel only:
>>       select ARCH_HAS_GIGANTIC_PAGE
>>       select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
>> +    select ARCH_SUPPORTS_PER_VMA_LOCK
>>       select ARCH_USE_CMPXCHG_LOCKREF
>>       select HAVE_ARCH_SOFT_DIRTY
>>       select MODULES_USE_ELF_RELA
>> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
>> index a498ae1fbe66..e4399983c50c 100644
>> --- a/arch/x86/mm/fault.c
>> +++ b/arch/x86/mm/fault.c
>> @@ -19,6 +19,7 @@
>>   #include <linux/uaccess.h>        /* faulthandler_disabled()    */
>>   #include <linux/efi.h>            /*
>> efi_crash_gracefully_on_page_fault()*/
>>   #include <linux/mm_types.h>
>> +#include <linux/mm.h>            /* find_and_lock_vma() */
>>     #include <asm/cpufeature.h>        /* boot_cpu_has, ...        */
>>   #include <asm/traps.h>            /* dotraplinkage, ...        */
>> @@ -1333,6 +1334,38 @@ void do_user_addr_fault(struct pt_regs *regs,
>>       }
>>   #endif
>>   +#ifdef CONFIG_PER_VMA_LOCK
>> +    if (!(flags & FAULT_FLAG_USER))
>> +        goto lock_mmap;
>> +
>> +    vma = lock_vma_under_rcu(mm, address);
>> +    if (!vma)
>> +        goto lock_mmap;
>> +
>> +    if (unlikely(access_error(error_code, vma))) {
>> +        vma_end_read(vma);
>> +        goto lock_mmap;
>> +    }
>> +    fault = handle_mm_fault(vma, address, flags |
>> FAULT_FLAG_VMA_LOCK, regs);
>> +    vma_end_read(vma);
>> +
>> +    if (!(fault & VM_FAULT_RETRY)) {
>> +        count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>> +        goto done;
>> +    }
>> +    count_vm_vma_lock_event(VMA_LOCK_RETRY);
> 
> This is apparently not strong enough as it causes go build failures like:

TWIMC & for the record: there is another report about trouble caused by
this change; for details see

https://bugzilla.kernel.org/show_bug.cgi?id=217624

And a "forward to devs and lists" thread about that report:

https://lore.kernel.org/all/facbfec3-837a-51ed-85fa-31021c17d6ef@gmail.com/

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
If I did something stupid, please tell me, as explained on that page.

> [  409s] strconv
> [  409s] releasep: m=0x579e2000 m->p=0x5781c600 p->m=0x0 p->status=2
> [  409s] fatal error: releasep: invalid p state
> [  409s]
> 
> [  325s] hash/adler32
> [  325s] hash/crc32
> [  325s] cmd/internal/codesign
> [  336s] fatal error: runtime: out of memory
> 
> There are many kinds of similar errors. It happens in 1-3 out of 20
> builds only.
> 
> If I revert the commit on top of 6.4, they all dismiss. Any idea?
> 
> The downstream report:
> https://bugzilla.suse.com/show_bug.cgi?id=1212775
> 
>> +
>> +    /* Quick path to respond to signals */
>> +    if (fault_signal_pending(fault, regs)) {
>> +        if (!user_mode(regs))
>> +            kernelmode_fixup_or_oops(regs, error_code, address,
>> +                         SIGBUS, BUS_ADRERR,
>> +                         ARCH_DEFAULT_PKEY);
>> +        return;
>> +    }
>> +lock_mmap:
>> +#endif /* CONFIG_PER_VMA_LOCK */
>> +
>>       /*
>>        * Kernel-mode access to the user address space should only occur
>>        * on well-defined single instructions listed in the exception
>> @@ -1433,6 +1466,9 @@ void do_user_addr_fault(struct pt_regs *regs,
>>       }
>>         mmap_read_unlock(mm);
>> +#ifdef CONFIG_PER_VMA_LOCK
>> +done:
>> +#endif
>>       if (likely(!(fault & VM_FAULT_ERROR)))
>>           return;
>>   
> 
> thanks,
Jiri Slaby July 3, 2023, 10:47 a.m. UTC | #9
Cc Jacob Young (from kernel bugzilla)

On 30. 06. 23, 19:40, Suren Baghdasaryan wrote:
> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote:
>>
>> On 30. 06. 23, 10:28, Jiri Slaby wrote:
>>>   > 2348
>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351
>>>   > 2350  <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372
>>>   > 2351  <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354
>>>   > 2351  <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357
>>>   > 2354  <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355
>>>   > 2355  <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370
>>>   > 2370  mmap(NULL, 262144, PROT_READ|PROT_WRITE,
>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
>>>   > 2370  <... mmap resumed>)               = 0x7fca68249000
>>>   > 2372  <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384
>>>   > 2384  <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388
>>>   > 2388  <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392
>>>   > 2392  <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395
>>>   > 2395  write(2, "runtime: marked free object in s"..., 36 <unfinished
>>> ...>
>>>
>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON
>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some
>>> reason 0x7fca6824bec8 in that region is "bad".
> 
> Thanks for the analysis Jiri.
> Is it possible from these logs to identify whether 2370 finished the
> mmap operation before 2395 tried to access 0x7fca6824bec8? That access
> has to happen only after mmap finishes mapping the region.

Hi,

it's hard to tell, but I assume so.

For now, forget about this go's overly complicated, hard to reproduce 
case and concentrate on the very nice reduced testcase in:
  https://bugzilla.kernel.org/show_bug.cgi?id=217624
;)

FWIW, I can reproduce using the test case too.

thanks,
Holger Hoffstätte July 3, 2023, 1:52 p.m. UTC | #10
On 2023-07-03 12:47, Jiri Slaby wrote:
> Cc Jacob Young (from kernel bugzilla)
> 
> On 30. 06. 23, 19:40, Suren Baghdasaryan wrote:
>> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote:
>>>
>>> On 30. 06. 23, 10:28, Jiri Slaby wrote:
>>>>   > 2348
>>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351
>>>>   > 2350  <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372
>>>>   > 2351  <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354
>>>>   > 2351  <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357
>>>>   > 2354  <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355
>>>>   > 2355  <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370
>>>>   > 2370  mmap(NULL, 262144, PROT_READ|PROT_WRITE,
>>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
>>>>   > 2370  <... mmap resumed>)               = 0x7fca68249000
>>>>   > 2372  <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384
>>>>   > 2384  <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388
>>>>   > 2388  <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392
>>>>   > 2392  <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395
>>>>   > 2395  write(2, "runtime: marked free object in s"..., 36 <unfinished
>>>> ...>
>>>>
>>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON
>>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some
>>>> reason 0x7fca6824bec8 in that region is "bad".
>>
>> Thanks for the analysis Jiri.
>> Is it possible from these logs to identify whether 2370 finished the
>> mmap operation before 2395 tried to access 0x7fca6824bec8? That access
>> has to happen only after mmap finishes mapping the region.
> 
> Hi,
> 
> it's hard to tell, but I assume so.
> 
> For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in:
>   https://bugzilla.kernel.org/show_bug.cgi?id=217624
> ;)
> 
> FWIW, I can reproduce using the test case too.
> 
> thanks,

As another (admittedly correlation-only) data point, I noticed at least hourly crashes
of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x.
After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up
commits in 6.4.1 - it has been rock stable again, for several hours now.

cheers
Holger
Suren Baghdasaryan July 3, 2023, 2:45 p.m. UTC | #11
On Mon, Jul 3, 2023 at 6:52 AM Holger Hoffstätte
<holger@applied-asynchrony.com> wrote:
>
> On 2023-07-03 12:47, Jiri Slaby wrote:
> > Cc Jacob Young (from kernel bugzilla)
> >
> > On 30. 06. 23, 19:40, Suren Baghdasaryan wrote:
> >> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote:
> >>>
> >>> On 30. 06. 23, 10:28, Jiri Slaby wrote:
> >>>>   > 2348
> >>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351
> >>>>   > 2350  <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372
> >>>>   > 2351  <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354
> >>>>   > 2351  <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357
> >>>>   > 2354  <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355
> >>>>   > 2355  <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370
> >>>>   > 2370  mmap(NULL, 262144, PROT_READ|PROT_WRITE,
> >>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
> >>>>   > 2370  <... mmap resumed>)               = 0x7fca68249000
> >>>>   > 2372  <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384
> >>>>   > 2384  <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388
> >>>>   > 2388  <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392
> >>>>   > 2392  <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395
> >>>>   > 2395  write(2, "runtime: marked free object in s"..., 36 <unfinished
> >>>> ...>
> >>>>
> >>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON
> >>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some
> >>>> reason 0x7fca6824bec8 in that region is "bad".
> >>
> >> Thanks for the analysis Jiri.
> >> Is it possible from these logs to identify whether 2370 finished the
> >> mmap operation before 2395 tried to access 0x7fca6824bec8? That access
> >> has to happen only after mmap finishes mapping the region.
> >
> > Hi,
> >
> > it's hard to tell, but I assume so.
> >
> > For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in:
> >   https://bugzilla.kernel.org/show_bug.cgi?id=217624
> > ;)
> >
> > FWIW, I can reproduce using the test case too.

Thanks for the reproducer, Jiri!
Let me try it and see if I can figure this one out.

> >
> > thanks,
>
> As another (admittedly correlation-only) data point, I noticed at least hourly crashes
> of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x.
> After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up
> commits in 6.4.1 - it has been rock stable again, for several hours now.
>
> cheers
> Holger
Suren Baghdasaryan July 3, 2023, 3:24 p.m. UTC | #12
On Mon, Jul 3, 2023 at 2:45 PM Suren Baghdasaryan <surenb@google.com> wrote:
>
> On Mon, Jul 3, 2023 at 6:52 AM Holger Hoffstätte
> <holger@applied-asynchrony.com> wrote:
> >
> > On 2023-07-03 12:47, Jiri Slaby wrote:
> > > Cc Jacob Young (from kernel bugzilla)
> > >
> > > On 30. 06. 23, 19:40, Suren Baghdasaryan wrote:
> > >> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote:
> > >>>
> > >>> On 30. 06. 23, 10:28, Jiri Slaby wrote:
> > >>>>   > 2348
> > >>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351
> > >>>>   > 2350  <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372
> > >>>>   > 2351  <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354
> > >>>>   > 2351  <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357
> > >>>>   > 2354  <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355
> > >>>>   > 2355  <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370
> > >>>>   > 2370  mmap(NULL, 262144, PROT_READ|PROT_WRITE,
> > >>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
> > >>>>   > 2370  <... mmap resumed>)               = 0x7fca68249000
> > >>>>   > 2372  <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384
> > >>>>   > 2384  <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388
> > >>>>   > 2388  <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392
> > >>>>   > 2392  <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395
> > >>>>   > 2395  write(2, "runtime: marked free object in s"..., 36 <unfinished
> > >>>> ...>
> > >>>>
> > >>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON
> > >>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some
> > >>>> reason 0x7fca6824bec8 in that region is "bad".
> > >>
> > >> Thanks for the analysis Jiri.
> > >> Is it possible from these logs to identify whether 2370 finished the
> > >> mmap operation before 2395 tried to access 0x7fca6824bec8? That access
> > >> has to happen only after mmap finishes mapping the region.
> > >
> > > Hi,
> > >
> > > it's hard to tell, but I assume so.
> > >
> > > For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in:
> > >   https://bugzilla.kernel.org/show_bug.cgi?id=217624
> > > ;)
> > >
> > > FWIW, I can reproduce using the test case too.
>
> Thanks for the reproducer, Jiri!
> Let me try it and see if I can figure this one out.

Interestingly I can't reproduce it with qemu emulator (reproducer
returns 1) but my host machine with the same kernel reproduces it
every time. Will try tracing the major code paths to see what's going
on.
I have to leave for a day but will resume in the evening once I'm home.
Thanks,
Suren.

>
> > >
> > > thanks,
> >
> > As another (admittedly correlation-only) data point, I noticed at least hourly crashes
> > of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x.
> > After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up
> > commits in 6.4.1 - it has been rock stable again, for several hours now.
> >
> > cheers
> > Holger
Suren Baghdasaryan July 3, 2023, 6:28 p.m. UTC | #13
On Mon, Jul 3, 2023 at 8:24 AM Suren Baghdasaryan <surenb@google.com> wrote:
>
> On Mon, Jul 3, 2023 at 2:45 PM Suren Baghdasaryan <surenb@google.com> wrote:
> >
> > On Mon, Jul 3, 2023 at 6:52 AM Holger Hoffstätte
> > <holger@applied-asynchrony.com> wrote:
> > >
> > > On 2023-07-03 12:47, Jiri Slaby wrote:
> > > > Cc Jacob Young (from kernel bugzilla)
> > > >
> > > > On 30. 06. 23, 19:40, Suren Baghdasaryan wrote:
> > > >> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote:
> > > >>>
> > > >>> On 30. 06. 23, 10:28, Jiri Slaby wrote:
> > > >>>>   > 2348
> > > >>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351
> > > >>>>   > 2350  <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372
> > > >>>>   > 2351  <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354
> > > >>>>   > 2351  <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357
> > > >>>>   > 2354  <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355
> > > >>>>   > 2355  <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370
> > > >>>>   > 2370  mmap(NULL, 262144, PROT_READ|PROT_WRITE,
> > > >>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
> > > >>>>   > 2370  <... mmap resumed>)               = 0x7fca68249000
> > > >>>>   > 2372  <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384
> > > >>>>   > 2384  <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388
> > > >>>>   > 2388  <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392
> > > >>>>   > 2392  <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395
> > > >>>>   > 2395  write(2, "runtime: marked free object in s"..., 36 <unfinished
> > > >>>> ...>
> > > >>>>
> > > >>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON
> > > >>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some
> > > >>>> reason 0x7fca6824bec8 in that region is "bad".
> > > >>
> > > >> Thanks for the analysis Jiri.
> > > >> Is it possible from these logs to identify whether 2370 finished the
> > > >> mmap operation before 2395 tried to access 0x7fca6824bec8? That access
> > > >> has to happen only after mmap finishes mapping the region.
> > > >
> > > > Hi,
> > > >
> > > > it's hard to tell, but I assume so.
> > > >
> > > > For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in:
> > > >   https://bugzilla.kernel.org/show_bug.cgi?id=217624
> > > > ;)
> > > >
> > > > FWIW, I can reproduce using the test case too.
> >
> > Thanks for the reproducer, Jiri!
> > Let me try it and see if I can figure this one out.
>
> Interestingly I can't reproduce it with qemu emulator (reproducer
> returns 1) but my host machine with the same kernel reproduces it
> every time. Will try tracing the major code paths to see what's going
> on.
> I have to leave for a day but will resume in the evening once I'm home.

I posted a patch to disable per-VMA locks by default for now:
https://lore.kernel.org/all/20230703182150.2193578-1-surenb@google.com/
Will re-enable them once we figure this issue out.
Thanks,
Suren.

> Thanks,
> Suren.
>
> >
> > > >
> > > > thanks,
> > >
> > > As another (admittedly correlation-only) data point, I noticed at least hourly crashes
> > > of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x.
> > > After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up
> > > commits in 6.4.1 - it has been rock stable again, for several hours now.
> > >
> > > cheers
> > > Holger
Suren Baghdasaryan July 5, 2023, 10:15 p.m. UTC | #14
On Mon, Jul 3, 2023 at 6:52 AM Holger Hoffstätte
<holger@applied-asynchrony.com> wrote:
>
> On 2023-07-03 12:47, Jiri Slaby wrote:
> > Cc Jacob Young (from kernel bugzilla)
> >
> > On 30. 06. 23, 19:40, Suren Baghdasaryan wrote:
> >> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote:
> >>>
> >>> On 30. 06. 23, 10:28, Jiri Slaby wrote:
> >>>>   > 2348
> >>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351
> >>>>   > 2350  <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372
> >>>>   > 2351  <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354
> >>>>   > 2351  <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357
> >>>>   > 2354  <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355
> >>>>   > 2355  <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370
> >>>>   > 2370  mmap(NULL, 262144, PROT_READ|PROT_WRITE,
> >>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
> >>>>   > 2370  <... mmap resumed>)               = 0x7fca68249000
> >>>>   > 2372  <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384
> >>>>   > 2384  <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388
> >>>>   > 2388  <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392
> >>>>   > 2392  <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395
> >>>>   > 2395  write(2, "runtime: marked free object in s"..., 36 <unfinished
> >>>> ...>
> >>>>
> >>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON
> >>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some
> >>>> reason 0x7fca6824bec8 in that region is "bad".
> >>
> >> Thanks for the analysis Jiri.
> >> Is it possible from these logs to identify whether 2370 finished the
> >> mmap operation before 2395 tried to access 0x7fca6824bec8? That access
> >> has to happen only after mmap finishes mapping the region.
> >
> > Hi,
> >
> > it's hard to tell, but I assume so.
> >
> > For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in:
> >   https://bugzilla.kernel.org/show_bug.cgi?id=217624
> > ;)
> >
> > FWIW, I can reproduce using the test case too.
> >
> > thanks,
>
> As another (admittedly correlation-only) data point, I noticed at least hourly crashes
> of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x.
> After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up
> commits in 6.4.1 - it has been rock stable again, for several hours now.

Jiri, Holger, would you be able to try
https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/
and see if your issues still exist?

>
> cheers
> Holger
Holger Hoffstätte July 5, 2023, 10:37 p.m. UTC | #15
On 2023-07-06 00:15, Suren Baghdasaryan wrote:
> On Mon, Jul 3, 2023 at 6:52 AM Holger Hoffstätte
> <holger@applied-asynchrony.com> wrote:
>>
>> On 2023-07-03 12:47, Jiri Slaby wrote:
>>> Cc Jacob Young (from kernel bugzilla)
>>>
>>> On 30. 06. 23, 19:40, Suren Baghdasaryan wrote:
>>>> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote:
>>>>>
>>>>> On 30. 06. 23, 10:28, Jiri Slaby wrote:
>>>>>>    > 2348
>>>>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351
>>>>>>    > 2350  <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372
>>>>>>    > 2351  <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354
>>>>>>    > 2351  <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357
>>>>>>    > 2354  <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355
>>>>>>    > 2355  <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370
>>>>>>    > 2370  mmap(NULL, 262144, PROT_READ|PROT_WRITE,
>>>>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
>>>>>>    > 2370  <... mmap resumed>)               = 0x7fca68249000
>>>>>>    > 2372  <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384
>>>>>>    > 2384  <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388
>>>>>>    > 2388  <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392
>>>>>>    > 2392  <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395
>>>>>>    > 2395  write(2, "runtime: marked free object in s"..., 36 <unfinished
>>>>>> ...>
>>>>>>
>>>>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON
>>>>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some
>>>>>> reason 0x7fca6824bec8 in that region is "bad".
>>>>
>>>> Thanks for the analysis Jiri.
>>>> Is it possible from these logs to identify whether 2370 finished the
>>>> mmap operation before 2395 tried to access 0x7fca6824bec8? That access
>>>> has to happen only after mmap finishes mapping the region.
>>>
>>> Hi,
>>>
>>> it's hard to tell, but I assume so.
>>>
>>> For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in:
>>>    https://bugzilla.kernel.org/show_bug.cgi?id=217624
>>> ;)
>>>
>>> FWIW, I can reproduce using the test case too.
>>>
>>> thanks,
>>
>> As another (admittedly correlation-only) data point, I noticed at least hourly crashes
>> of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x.
>> After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up
>> commits in 6.4.1 - it has been rock stable again, for several hours now.
> 
> Jiri, Holger, would you be able to try
> https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/
> and see if your issues still exist?

Just in time! Not 2 minutes ago I finished rebuilding 6.4.2 + the last version of
your patches on a second machine (old Intel Sandy Bridge workstation) to be my
crash test dummy. I removed the BROKEN dependency in mm/Kconfig, manually set
PER_VMA_LOCK=y and ... it seems to work?! Boots fine, Firefox seems to work
(but no exhaustive tests yet). I will also rerun a few reboot laps, just to
exercise this a bit harder and see if something comes up.

Tomorrow I'll also try again on my Zen2 Thinkpad and will report back.

Fingers crossed!

cheers
Holger
Suren Baghdasaryan July 5, 2023, 10:55 p.m. UTC | #16
On Wed, Jul 5, 2023 at 3:37 PM Holger Hoffstätte
<holger@applied-asynchrony.com> wrote:
>
> On 2023-07-06 00:15, Suren Baghdasaryan wrote:
> > On Mon, Jul 3, 2023 at 6:52 AM Holger Hoffstätte
> > <holger@applied-asynchrony.com> wrote:
> >>
> >> On 2023-07-03 12:47, Jiri Slaby wrote:
> >>> Cc Jacob Young (from kernel bugzilla)
> >>>
> >>> On 30. 06. 23, 19:40, Suren Baghdasaryan wrote:
> >>>> On Fri, Jun 30, 2023 at 1:43 AM Jiri Slaby <jirislaby@kernel.org> wrote:
> >>>>>
> >>>>> On 30. 06. 23, 10:28, Jiri Slaby wrote:
> >>>>>>    > 2348
> >>>>>> clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7fcaa5882990, parent_tid=0x7fcaa5882990, exit_signal=0, stack=0x7fcaa5082000, stack_size=0x7ffe00, tls=0x7fcaa58826c0} => {parent_tid=[2351]}, 88) = 2351
> >>>>>>    > 2350  <... clone3 resumed> => {parent_tid=[2372]}, 88) = 2372
> >>>>>>    > 2351  <... clone3 resumed> => {parent_tid=[2354]}, 88) = 2354
> >>>>>>    > 2351  <... clone3 resumed> => {parent_tid=[2357]}, 88) = 2357
> >>>>>>    > 2354  <... clone3 resumed> => {parent_tid=[2355]}, 88) = 2355
> >>>>>>    > 2355  <... clone3 resumed> => {parent_tid=[2370]}, 88) = 2370
> >>>>>>    > 2370  mmap(NULL, 262144, PROT_READ|PROT_WRITE,
> >>>>>> MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 <unfinished ...>
> >>>>>>    > 2370  <... mmap resumed>)               = 0x7fca68249000
> >>>>>>    > 2372  <... clone3 resumed> => {parent_tid=[2384]}, 88) = 2384
> >>>>>>    > 2384  <... clone3 resumed> => {parent_tid=[2388]}, 88) = 2388
> >>>>>>    > 2388  <... clone3 resumed> => {parent_tid=[2392]}, 88) = 2392
> >>>>>>    > 2392  <... clone3 resumed> => {parent_tid=[2395]}, 88) = 2395
> >>>>>>    > 2395  write(2, "runtime: marked free object in s"..., 36 <unfinished
> >>>>>> ...>
> >>>>>>
> >>>>>> I.e. IIUC, all are threads (CLONE_VM) and thread 2370 mapped ANON
> >>>>>> 0x7fca68249000 - 0x7fca6827ffff and go in thread 2395 thinks for some
> >>>>>> reason 0x7fca6824bec8 in that region is "bad".
> >>>>
> >>>> Thanks for the analysis Jiri.
> >>>> Is it possible from these logs to identify whether 2370 finished the
> >>>> mmap operation before 2395 tried to access 0x7fca6824bec8? That access
> >>>> has to happen only after mmap finishes mapping the region.
> >>>
> >>> Hi,
> >>>
> >>> it's hard to tell, but I assume so.
> >>>
> >>> For now, forget about this go's overly complicated, hard to reproduce case and concentrate on the very nice reduced testcase in:
> >>>    https://bugzilla.kernel.org/show_bug.cgi?id=217624
> >>> ;)
> >>>
> >>> FWIW, I can reproduce using the test case too.
> >>>
> >>> thanks,
> >>
> >> As another (admittedly correlation-only) data point, I noticed at least hourly crashes
> >> of Firefox-114 after upgrading to 6.4.1, which had never happened before with 6.3.x.
> >> After reverting 0bff0aaea03e2a3ed6 - with a bit of context fixup due to follow-up
> >> commits in 6.4.1 - it has been rock stable again, for several hours now.
> >
> > Jiri, Holger, would you be able to try
> > https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/
> > and see if your issues still exist?
>
> Just in time! Not 2 minutes ago I finished rebuilding 6.4.2 + the last version of
> your patches on a second machine (old Intel Sandy Bridge workstation) to be my
> crash test dummy. I removed the BROKEN dependency in mm/Kconfig, manually set
> PER_VMA_LOCK=y and ... it seems to work?! Boots fine, Firefox seems to work
> (but no exhaustive tests yet). I will also rerun a few reboot laps, just to
> exercise this a bit harder and see if something comes up.
>
> Tomorrow I'll also try again on my Zen2 Thinkpad and will report back.
>
> Fingers crossed!

Thanks! This is promising.

>
> cheers
> Holger
Holger Hoffstätte July 6, 2023, 2:27 p.m. UTC | #17
On 2023-07-06 00:55, Suren Baghdasaryan wrote:
> On Wed, Jul 5, 2023 at 3:37 PM Holger Hoffstätte
> <holger@applied-asynchrony.com> wrote:
>>> Jiri, Holger, would you be able to try
>>> https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/
>>> and see if your issues still exist?
>>
>> Just in time! Not 2 minutes ago I finished rebuilding 6.4.2 + the last version of
>> your patches on a second machine (old Intel Sandy Bridge workstation) to be my
>> crash test dummy. I removed the BROKEN dependency in mm/Kconfig, manually set
>> PER_VMA_LOCK=y and ... it seems to work?! Boots fine, Firefox seems to work
>> (but no exhaustive tests yet). I will also rerun a few reboot laps, just to
>> exercise this a bit harder and see if something comes up.
>>
>> Tomorrow I'll also try again on my Zen2 Thinkpad and will report back.
>>
>> Fingers crossed!
> 
> Thanks! This is promising.

Indeed it was, and still is. :)

This morning I wrangled 6.4.2 + v4 of the patches into all my machines,
enabled PER_VMA_LOCK=y, removed BROKEN and so far everything has been humming
along just fine. One machine has been compiling for several hours without issue.
My Zen2 thinkpad - which was previously really unhappy with enabled PER_VMA_LOCK -
has booted a few times without hiccup, and Firefox has been happily roaming the
interwebs for several hours as well. \o/

cheers
Holger
Suren Baghdasaryan July 6, 2023, 4:11 p.m. UTC | #18
On Thu, Jul 6, 2023 at 7:27 AM Holger Hoffstätte
<holger@applied-asynchrony.com> wrote:
>
> On 2023-07-06 00:55, Suren Baghdasaryan wrote:
> > On Wed, Jul 5, 2023 at 3:37 PM Holger Hoffstätte
> > <holger@applied-asynchrony.com> wrote:
> >>> Jiri, Holger, would you be able to try
> >>> https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/
> >>> and see if your issues still exist?
> >>
> >> Just in time! Not 2 minutes ago I finished rebuilding 6.4.2 + the last version of
> >> your patches on a second machine (old Intel Sandy Bridge workstation) to be my
> >> crash test dummy. I removed the BROKEN dependency in mm/Kconfig, manually set
> >> PER_VMA_LOCK=y and ... it seems to work?! Boots fine, Firefox seems to work
> >> (but no exhaustive tests yet). I will also rerun a few reboot laps, just to
> >> exercise this a bit harder and see if something comes up.
> >>
> >> Tomorrow I'll also try again on my Zen2 Thinkpad and will report back.
> >>
> >> Fingers crossed!
> >
> > Thanks! This is promising.
>
> Indeed it was, and still is. :)
>
> This morning I wrangled 6.4.2 + v4 of the patches into all my machines,
> enabled PER_VMA_LOCK=y, removed BROKEN and so far everything has been humming
> along just fine. One machine has been compiling for several hours without issue.
> My Zen2 thinkpad - which was previously really unhappy with enabled PER_VMA_LOCK -
> has booted a few times without hiccup, and Firefox has been happily roaming the
> interwebs for several hours as well. \o/

This is great! Thanks so much for verifying!
Andrew, if it's not too late, maybe we can drop the BROKEN dependency
now that we have this confirmation?

>
> cheers
> Holger
Suren Baghdasaryan July 7, 2023, 2:23 a.m. UTC | #19
On Thu, Jul 6, 2023 at 9:11 AM Suren Baghdasaryan <surenb@google.com> wrote:
>
> On Thu, Jul 6, 2023 at 7:27 AM Holger Hoffstätte
> <holger@applied-asynchrony.com> wrote:
> >
> > On 2023-07-06 00:55, Suren Baghdasaryan wrote:
> > > On Wed, Jul 5, 2023 at 3:37 PM Holger Hoffstätte
> > > <holger@applied-asynchrony.com> wrote:
> > >>> Jiri, Holger, would you be able to try
> > >>> https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/
> > >>> and see if your issues still exist?
> > >>
> > >> Just in time! Not 2 minutes ago I finished rebuilding 6.4.2 + the last version of
> > >> your patches on a second machine (old Intel Sandy Bridge workstation) to be my
> > >> crash test dummy. I removed the BROKEN dependency in mm/Kconfig, manually set
> > >> PER_VMA_LOCK=y and ... it seems to work?! Boots fine, Firefox seems to work
> > >> (but no exhaustive tests yet). I will also rerun a few reboot laps, just to
> > >> exercise this a bit harder and see if something comes up.
> > >>
> > >> Tomorrow I'll also try again on my Zen2 Thinkpad and will report back.
> > >>
> > >> Fingers crossed!
> > >
> > > Thanks! This is promising.
> >
> > Indeed it was, and still is. :)
> >
> > This morning I wrangled 6.4.2 + v4 of the patches into all my machines,
> > enabled PER_VMA_LOCK=y, removed BROKEN and so far everything has been humming
> > along just fine. One machine has been compiling for several hours without issue.
> > My Zen2 thinkpad - which was previously really unhappy with enabled PER_VMA_LOCK -
> > has booted a few times without hiccup, and Firefox has been happily roaming the
> > interwebs for several hours as well. \o/
>
> This is great! Thanks so much for verifying!
> Andrew, if it's not too late, maybe we can drop the BROKEN dependency
> now that we have this confirmation?

Liam pointed me to another possible issue with per-VMA locks due to
the recent changes in stack expansion locking rules. So, it's probably
safer to keep it marked BROKEN. I'll post a simple fix for that
shortly.

>
> >
> > cheers
> > Holger
Suren Baghdasaryan July 7, 2023, 4:40 a.m. UTC | #20
On Thu, Jul 6, 2023 at 7:23 PM Suren Baghdasaryan <surenb@google.com> wrote:
>
> On Thu, Jul 6, 2023 at 9:11 AM Suren Baghdasaryan <surenb@google.com> wrote:
> >
> > On Thu, Jul 6, 2023 at 7:27 AM Holger Hoffstätte
> > <holger@applied-asynchrony.com> wrote:
> > >
> > > On 2023-07-06 00:55, Suren Baghdasaryan wrote:
> > > > On Wed, Jul 5, 2023 at 3:37 PM Holger Hoffstätte
> > > > <holger@applied-asynchrony.com> wrote:
> > > >>> Jiri, Holger, would you be able to try
> > > >>> https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/
> > > >>> and see if your issues still exist?
> > > >>
> > > >> Just in time! Not 2 minutes ago I finished rebuilding 6.4.2 + the last version of
> > > >> your patches on a second machine (old Intel Sandy Bridge workstation) to be my
> > > >> crash test dummy. I removed the BROKEN dependency in mm/Kconfig, manually set
> > > >> PER_VMA_LOCK=y and ... it seems to work?! Boots fine, Firefox seems to work
> > > >> (but no exhaustive tests yet). I will also rerun a few reboot laps, just to
> > > >> exercise this a bit harder and see if something comes up.
> > > >>
> > > >> Tomorrow I'll also try again on my Zen2 Thinkpad and will report back.
> > > >>
> > > >> Fingers crossed!
> > > >
> > > > Thanks! This is promising.
> > >
> > > Indeed it was, and still is. :)
> > >
> > > This morning I wrangled 6.4.2 + v4 of the patches into all my machines,
> > > enabled PER_VMA_LOCK=y, removed BROKEN and so far everything has been humming
> > > along just fine. One machine has been compiling for several hours without issue.
> > > My Zen2 thinkpad - which was previously really unhappy with enabled PER_VMA_LOCK -
> > > has booted a few times without hiccup, and Firefox has been happily roaming the
> > > interwebs for several hours as well. \o/
> >
> > This is great! Thanks so much for verifying!
> > Andrew, if it's not too late, maybe we can drop the BROKEN dependency
> > now that we have this confirmation?
>
> Liam pointed me to another possible issue with per-VMA locks due to
> the recent changes in stack expansion locking rules. So, it's probably
> safer to keep it marked BROKEN. I'll post a simple fix for that
> shortly.

The fix along with another less critical one is posted at
https://lore.kernel.org/all/20230707043211.3682710-1-surenb@google.com/

>
> >
> > >
> > > cheers
> > > Holger
Jiri Slaby July 11, 2023, 6:20 a.m. UTC | #21
On 06. 07. 23, 0:15, Suren Baghdasaryan wrote:
> Jiri, Holger, would you be able to try
> https://lore.kernel.org/all/20230705171213.2843068-2-surenb@google.com/
> and see if your issues still exist?

FWIW 6.4.3 (contains the series) is fine again.

thanks,
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a825bf031f49..df21fba77db1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -27,6 +27,7 @@  config X86_64
 	# Options that are inherently 64-bit kernel only:
 	select ARCH_HAS_GIGANTIC_PAGE
 	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
+	select ARCH_SUPPORTS_PER_VMA_LOCK
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select HAVE_ARCH_SOFT_DIRTY
 	select MODULES_USE_ELF_RELA
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a498ae1fbe66..e4399983c50c 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -19,6 +19,7 @@ 
 #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
 #include <linux/efi.h>			/* efi_crash_gracefully_on_page_fault()*/
 #include <linux/mm_types.h>
+#include <linux/mm.h>			/* find_and_lock_vma() */
 
 #include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
@@ -1333,6 +1334,38 @@  void do_user_addr_fault(struct pt_regs *regs,
 	}
 #endif
 
+#ifdef CONFIG_PER_VMA_LOCK
+	if (!(flags & FAULT_FLAG_USER))
+		goto lock_mmap;
+
+	vma = lock_vma_under_rcu(mm, address);
+	if (!vma)
+		goto lock_mmap;
+
+	if (unlikely(access_error(error_code, vma))) {
+		vma_end_read(vma);
+		goto lock_mmap;
+	}
+	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
+	vma_end_read(vma);
+
+	if (!(fault & VM_FAULT_RETRY)) {
+		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+		goto done;
+	}
+	count_vm_vma_lock_event(VMA_LOCK_RETRY);
+
+	/* Quick path to respond to signals */
+	if (fault_signal_pending(fault, regs)) {
+		if (!user_mode(regs))
+			kernelmode_fixup_or_oops(regs, error_code, address,
+						 SIGBUS, BUS_ADRERR,
+						 ARCH_DEFAULT_PKEY);
+		return;
+	}
+lock_mmap:
+#endif /* CONFIG_PER_VMA_LOCK */
+
 	/*
 	 * Kernel-mode access to the user address space should only occur
 	 * on well-defined single instructions listed in the exception
@@ -1433,6 +1466,9 @@  void do_user_addr_fault(struct pt_regs *regs,
 	}
 
 	mmap_read_unlock(mm);
+#ifdef CONFIG_PER_VMA_LOCK
+done:
+#endif
 	if (likely(!(fault & VM_FAULT_ERROR)))
 		return;