diff mbox

kernel BUG at mm/gup.c:LINE!

Message ID 20180704121107.GL22503@dhcp22.suse.cz (mailing list archive)
State New, archived
Headers show

Commit Message

Michal Hocko July 4, 2018, 12:11 p.m. UTC
On Wed 04-07-18 07:48:27, Zi Yan wrote:
> On 4 Jul 2018, at 7:17, Michal Hocko wrote:
> 
> > On Wed 04-07-18 19:01:51, Tetsuo Handa wrote:
> >> +Michal Hocko
> >>
> >> On 2018/07/04 13:19, syzbot wrote:
> >>> Hello,
> >>>
> >>> syzbot found the following crash on:
> >>>
> >>> HEAD commit:    d3bc0e67f852 Merge tag 'for-4.18-rc2-tag' of git://git.ker..
> >>> git tree:       upstream
> >>> console output: https://syzkaller.appspot.com/x/log.txt?x=1000077c400000
> >>> kernel config:  https://syzkaller.appspot.com/x/.config?x=a63be0c83e84d370
> >>> dashboard link: https://syzkaller.appspot.com/bug?extid=5dcb560fe12aa5091c06
> >>> compiler:       gcc (GCC) 8.0.1 20180413 (experimental)
> >>> userspace arch: i386
> >>> syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=158577a2400000
> >>
> >> Here is C reproducer made from syz reproducer. mlockall(MCL_FUTURE) is involved.
> >>
> >> This problem is triggerable by an unprivileged user.
> >> Shows different result on x86_64 (crash) and x86_32 (stall).
> >>
> >> ------------------------------------------------------------
> >> /* Need to compile using "-m32" option if host is 64bit. */
> >> #include <sys/types.h>
> >> #include <sys/stat.h>
> >> #include <fcntl.h>
> >> #include <unistd.h>
> >> #include <sys/mman.h>
> >> int uselib(const char *library);
> >>
> >> int main(int argc, char *argv[])
> >> {
> >> 	int fd = open("file", O_WRONLY | O_CREAT, 0644);
> >> 	write(fd, "\x7f\x45\x4c\x46\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02"
> >> 	      "\x00\x06\x00\xca\x3f\x8b\xca\x00\x00\x00\x00\x38\x00\x00\x00\x00\x00"
> >> 	      "\x00\xf7\xff\xff\xff\xff\xff\xff\x1f\x00\x02\x00\x00\x00\x00\x00\x00"
> >> 	      "\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x7b"
> >> 	      "\x66\xff\x00\x00\x05\x00\x00\x00\x76\x86\x00\x00\x00\x00\x00\x00\x00"
> >> 	      "\x00\x00\x00\x31\x0f\xf3\xee\xc1\xb0\x00\x0c\x08\x53\x55\xbe\x88\x47"
> >> 	      "\xc2\x2e\x30\xf5\x62\x82\xc6\x2c\x95\x72\x3f\x06\x8f\xe4\x2d\x27\x96"
> >> 	      "\xcc", 120);
> >> 	fchmod(fd, 0755);
> >> 	close(fd);
> >> 	mlockall(MCL_FUTURE); /* Removing this line avoids the bug. */
> >> 	uselib("file");
> >> 	return 0;
> >> }
> >> ------------------------------------------------------------
> >>
> >> ------------------------------------------------------------
> >> CentOS Linux 7 (Core)
> >> Kernel 4.18.0-rc3 on an x86_64
> >>
> >> localhost login: [   81.210241] emacs (9634) used greatest stack depth: 10416 bytes left
> >> [  140.099935] ------------[ cut here ]------------
> >> [  140.101904] kernel BUG at mm/gup.c:1242!
> >
> > Is this
> > 	VM_BUG_ON(len != PAGE_ALIGN(len));
> > in __mm_populate? I do not really get why we should VM_BUG_ON when the
> > len is not page aligned to be honest. The library is probably containing
> > some funky setup but if we simply cannot round up to the next PAGE_SIZE
> > boundary then we should probably just error out and fail. This is an
> > area I am really familiar with so I cannot really judge.
> 
> A strange thing is that __mm_populate() is only called by do_mlock() from mm/mlock.c,
> which makes len PAGE_ALIGN already. That VM_BUG_ON should not be triggered.

Not really. vm_brk_flags does call mm_populate for mlocked brk which is
the case for mlockall. I do not see any len sanitization in that path.
Well do_brk_flags does the roundup. I think we should simply remove the
bug on and round up there. mm_populate is an internal API and we should
trust our callers.

Anyway, the minimum fix seems to be the following (untested):

Comments

Oscar Salvador July 4, 2018, 3:15 p.m. UTC | #1
> 
> Not really. vm_brk_flags does call mm_populate for mlocked brk which is
> the case for mlockall. I do not see any len sanitization in that path.
> Well do_brk_flags does the roundup. I think we should simply remove the
> bug on and round up there. mm_populate is an internal API and we should
> trust our callers.
> 
> Anyway, the minimum fix seems to be the following (untested):
> 
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 9859cd4e19b9..56ad19cf2aea 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -186,8 +186,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
>  	return next;
>  }
>  
> -static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf);
> -
> +static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
> +		struct list_head *uf);
>  SYSCALL_DEFINE1(brk, unsigned long, brk)
>  {
>  	unsigned long retval;
> @@ -245,7 +245,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
>  		goto out;
>  
>  	/* Ok, looks good - let it rip. */
> -	if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0)
> +	if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
>  		goto out;
>  
>  set_brk:
> @@ -2939,12 +2939,6 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
>  	pgoff_t pgoff = addr >> PAGE_SHIFT;
>  	int error;
>  
> -	len = PAGE_ALIGN(request);
> -	if (len < request)
> -		return -ENOMEM;
> -	if (!len)
> -		return 0;
> -
>  	/* Until we need other flags, refuse anything except VM_EXEC. */
>  	if ((flags & (~VM_EXEC)) != 0)
>  		return -EINVAL;
> @@ -3016,18 +3010,20 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
>  	return 0;
>  }
>  
> -static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf)
> -{
> -	return do_brk_flags(addr, len, 0, uf);
> -}
> -
> -int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
> +int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
>  {
>  	struct mm_struct *mm = current->mm;
> +	unsigned long len;
>  	int ret;
>  	bool populate;
>  	LIST_HEAD(uf);
>  
> +	len = PAGE_ALIGN(request);
> +	if (len < request)
> +		return -ENOMEM;
> +	if (!len)
> +		return 0;
> +
>  	if (down_write_killable(&mm->mmap_sem))
>  		return -EINTR;

I gave this patch a try but the system doesn't boot.
Unfortunately, I don't have the stacktrace on hand, but I'll get back to it tomorrow.

Anyway, I just gave it a try, and making sure that bss gets page aligned seems to
"fix" the issue (at the process doesn't hang anymore):

-       bss = eppnt->p_memsz + eppnt->p_vaddr;
+       bss = ELF_PAGESTART(eppnt->p_memsz + eppnt->p_vaddr);
	if (bss > len) {
                error = vm_brk(len, bss - len);

Although I'm not sure about the correctness of this.
Tetsuo Handa July 5, 2018, 12:35 a.m. UTC | #2
Oscar Salvador wrote:
> Anyway, I just gave it a try, and making sure that bss gets page aligned seems to
> "fix" the issue (at the process doesn't hang anymore):
> 
> -       bss = eppnt->p_memsz + eppnt->p_vaddr;
> +       bss = ELF_PAGESTART(eppnt->p_memsz + eppnt->p_vaddr);
> 	if (bss > len) {
>                 error = vm_brk(len, bss - len);
> 
> Although I'm not sure about the correctness of this.

static int set_brk(unsigned long start, unsigned long end, int prot)
{
        start = ELF_PAGEALIGN(start);
        end = ELF_PAGEALIGN(end);
        if (end > start) {
                /*
                 * Map the last of the bss segment.
                 * If the header is requesting these pages to be
                 * executable, honour that (ppc32 needs this).
                 */
                int error = vm_brk_flags(start, end - start,
                                prot & PROT_EXEC ? VM_EXEC : 0);
                if (error)
                        return error;
        }
        current->mm->start_brk = current->mm->brk = end;
        return 0;
}

static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
                struct file *interpreter, unsigned long *interp_map_addr,
                unsigned long no_base, struct elf_phdr *interp_elf_phdata)
{
(...snipped...)
        /*
         * Next, align both the file and mem bss up to the page size,
         * since this is where elf_bss was just zeroed up to, and where
         * last_bss will end after the vm_brk_flags() below.
         */
        elf_bss = ELF_PAGEALIGN(elf_bss);
        last_bss = ELF_PAGEALIGN(last_bss);
        /* Finally, if there is still more bss to allocate, do it. */
        if (last_bss > elf_bss) {
                error = vm_brk_flags(elf_bss, last_bss - elf_bss,
                                bss_prot & PROT_EXEC ? VM_EXEC : 0);
                if (error)
                        goto out;
        }
(...snipped...)
}

static int load_elf_library(struct file *file)
{
(...snipped...)
        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
                            ELF_MIN_ALIGN - 1);
        bss = eppnt->p_memsz + eppnt->p_vaddr;
        if (bss > len) {
                error = vm_brk(len, bss - len);
                if (error)
                        goto out_free_ph;
        }
(...snipped...)
}

So, indeed "bss" needs to be aligned.
But ELF_PAGESTART() or ELF_PAGEALIGN(), which one to use?

#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))

Is

-	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
-			    ELF_MIN_ALIGN - 1);
+	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);

suggesting that

-	bss = eppnt->p_memsz + eppnt->p_vaddr;
+	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);

is the right choice? I don't know...
Michal Hocko July 5, 2018, 6:44 a.m. UTC | #3
On Wed 04-07-18 17:15:29, Oscar Salvador wrote:
> > 
> > Not really. vm_brk_flags does call mm_populate for mlocked brk which is
> > the case for mlockall. I do not see any len sanitization in that path.
> > Well do_brk_flags does the roundup. I think we should simply remove the
> > bug on and round up there. mm_populate is an internal API and we should
> > trust our callers.
> > 
> > Anyway, the minimum fix seems to be the following (untested):
> > 
> > diff --git a/mm/mmap.c b/mm/mmap.c
> > index 9859cd4e19b9..56ad19cf2aea 100644
> > --- a/mm/mmap.c
> > +++ b/mm/mmap.c
> > @@ -186,8 +186,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
> >  	return next;
> >  }
> >  
> > -static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf);
> > -
> > +static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
> > +		struct list_head *uf);
> >  SYSCALL_DEFINE1(brk, unsigned long, brk)
> >  {
> >  	unsigned long retval;
> > @@ -245,7 +245,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
> >  		goto out;
> >  
> >  	/* Ok, looks good - let it rip. */
> > -	if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0)
> > +	if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
> >  		goto out;
> >  
> >  set_brk:
> > @@ -2939,12 +2939,6 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
> >  	pgoff_t pgoff = addr >> PAGE_SHIFT;
> >  	int error;
> >  
> > -	len = PAGE_ALIGN(request);
> > -	if (len < request)
> > -		return -ENOMEM;
> > -	if (!len)
> > -		return 0;
> > -
> >  	/* Until we need other flags, refuse anything except VM_EXEC. */
> >  	if ((flags & (~VM_EXEC)) != 0)
> >  		return -EINVAL;
> > @@ -3016,18 +3010,20 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
> >  	return 0;
> >  }
> >  
> > -static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf)
> > -{
> > -	return do_brk_flags(addr, len, 0, uf);
> > -}
> > -
> > -int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
> > +int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
> >  {
> >  	struct mm_struct *mm = current->mm;
> > +	unsigned long len;
> >  	int ret;
> >  	bool populate;
> >  	LIST_HEAD(uf);
> >  
> > +	len = PAGE_ALIGN(request);
> > +	if (len < request)
> > +		return -ENOMEM;
> > +	if (!len)
> > +		return 0;
> > +
> >  	if (down_write_killable(&mm->mmap_sem))
> >  		return -EINTR;
> 
> I gave this patch a try but the system doesn't boot.
> Unfortunately, I don't have the stacktrace on hand, but I'll get back to it tomorrow.

This is more than unexpected. The patch merely move the alignment check
up. I will try to investigate some more but I am off for next four days
and won't be online most of the time.

Btw. does the same happen if you keep do_brk helper and add the length
sanitization there as well?
Oscar Salvador July 5, 2018, 7:18 a.m. UTC | #4
> So, indeed "bss" needs to be aligned.
> But ELF_PAGESTART() or ELF_PAGEALIGN(), which one to use?
> 
> #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
> #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
> 
> Is
> 
> -	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
> -			    ELF_MIN_ALIGN - 1);
> +	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
> 
> suggesting that
> 
> -	bss = eppnt->p_memsz + eppnt->p_vaddr;
> +	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
> 
> is the right choice? I don't know...

Yes, I think that ELF_PAGEALIGN is the right choice here.
Given that bss is 0x7bf88676, using ELF_PAGESTART aligns it but backwards, while ELF_PAGEALIGN does
the right thing:

bss = 0x7bf88676
ELF_PAGESTART (bss) = 0x7bf88000
ELF_PAGEALIGN (bss) = 0x7bf89000
Oscar Salvador July 5, 2018, 7:18 a.m. UTC | #5
> This is more than unexpected. The patch merely move the alignment check
> up. I will try to investigate some more but I am off for next four days
> and won't be online most of the time.
> 
> Btw. does the same happen if you keep do_brk helper and add the length
> sanitization there as well?

I will give it a try and I will let you know.
diff mbox

Patch

diff --git a/mm/mmap.c b/mm/mmap.c
index 9859cd4e19b9..56ad19cf2aea 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -186,8 +186,8 @@  static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 	return next;
 }
 
-static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf);
-
+static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
+		struct list_head *uf);
 SYSCALL_DEFINE1(brk, unsigned long, brk)
 {
 	unsigned long retval;
@@ -245,7 +245,7 @@  SYSCALL_DEFINE1(brk, unsigned long, brk)
 		goto out;
 
 	/* Ok, looks good - let it rip. */
-	if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0)
+	if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
 		goto out;
 
 set_brk:
@@ -2939,12 +2939,6 @@  static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
 	pgoff_t pgoff = addr >> PAGE_SHIFT;
 	int error;
 
-	len = PAGE_ALIGN(request);
-	if (len < request)
-		return -ENOMEM;
-	if (!len)
-		return 0;
-
 	/* Until we need other flags, refuse anything except VM_EXEC. */
 	if ((flags & (~VM_EXEC)) != 0)
 		return -EINVAL;
@@ -3016,18 +3010,20 @@  static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
 	return 0;
 }
 
-static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf)
-{
-	return do_brk_flags(addr, len, 0, uf);
-}
-
-int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
+int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
+	unsigned long len;
 	int ret;
 	bool populate;
 	LIST_HEAD(uf);
 
+	len = PAGE_ALIGN(request);
+	if (len < request)
+		return -ENOMEM;
+	if (!len)
+		return 0;
+
 	if (down_write_killable(&mm->mmap_sem))
 		return -EINTR;