diff mbox series

kernel/fs: last check for exec credentials on NOEXEC mount

Message ID 20240801120745.13318-1-wojciech.gladysz@infogain.com (mailing list archive)
State New
Headers show
Series kernel/fs: last check for exec credentials on NOEXEC mount | expand

Commit Message

Wojciech Gładysz Aug. 1, 2024, 12:07 p.m. UTC
Test case: thread mounts NOEXEC fuse to a file being executed.
WARN_ON_ONCE is triggered yielding panic for some config.
Add a check to security_bprm_creds_for_exec(bprm).

Stack trace:
------------[ cut here ]------------
WARNING: CPU: 0 PID: 2736 at fs/exec.c:933 do_open_execat+0x311/0x710 fs/exec.c:932
Modules linked in:
CPU: 0 PID: 2736 Comm: syz-executor384 Not tainted 5.10.0-syzkaller #0
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
RIP: 0010:do_open_execat+0x311/0x710 fs/exec.c:932
Code: 89 de e8 02 b1 a1 ff 31 ff 89 de e8 f9 b0 a1 ff 45 84 ff 75 2e 45 85 ed 0f 8f ed 03 00 00 e8 56 ae a1 ff eb bd e8 4f ae a1 ff <0f> 0b 48 c7 c3 f3 ff ff ff 4c 89 f7 e8 9e cb fe ff 49 89 de e9 2d
RSP: 0018:ffffc90008e07c20 EFLAGS: 00010293
RAX: ffffffff82131ac6 RBX: 0000000000000004 RCX: ffff88801a6611c0
RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000000
RBP: ffffc90008e07cf0 R08: ffffffff8213173f R09: ffffc90008e07aa0
R10: 0000000000000000 R11: dffffc0000000001 R12: ffff8880115810e0
R13: dffffc0000000000 R14: ffff88801122c040 R15: ffffc90008e07c60
FS:  00007f9e283ce6c0(0000) GS:ffff888058a00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f9e2848600a CR3: 00000000139de000 CR4: 0000000000352ef0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 bprm_execve+0x60b/0x1c40 fs/exec.c:1939
 do_execveat_common+0x5a6/0x770 fs/exec.c:2077
 do_execve fs/exec.c:2147 [inline]
 __do_sys_execve fs/exec.c:2223 [inline]
 __se_sys_execve fs/exec.c:2218 [inline]
 __x64_sys_execve+0x92/0xb0 fs/exec.c:2218
 do_syscall_64+0x6d/0xa0 arch/x86/entry/common.c:62
 entry_SYSCALL_64_after_hwframe+0x61/0xcb
RIP: 0033:0x7f9e2842f299
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f9e283ce218 EFLAGS: 00000246 ORIG_RAX: 000000000000003b
RAX: ffffffffffffffda RBX: 00007f9e284bd3f8 RCX: 00007f9e2842f299
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000400
RBP: 00007f9e284bd3f0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f9e2848a134
R13: 0030656c69662f2e R14: 00007ffc819a23d0 R15: 00007f9e28488130

Signed-off-by: Wojciech Gładysz <wojciech.gladysz@infogain.com>
---
 fs/exec.c | 42 +++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

Comments

Josef Bacik Aug. 1, 2024, 2:07 p.m. UTC | #1
On Thu, Aug 01, 2024 at 02:07:45PM +0200, Wojciech Gładysz wrote:
> Test case: thread mounts NOEXEC fuse to a file being executed.
> WARN_ON_ONCE is triggered yielding panic for some config.
> Add a check to security_bprm_creds_for_exec(bprm).
> 

Need more detail here, a script or something to describe the series of events
that gets us here, I can't quite figure out how to do this.

> Stack trace:
> ------------[ cut here ]------------
> WARNING: CPU: 0 PID: 2736 at fs/exec.c:933 do_open_execat+0x311/0x710 fs/exec.c:932
> Modules linked in:
> CPU: 0 PID: 2736 Comm: syz-executor384 Not tainted 5.10.0-syzkaller #0
> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
> RIP: 0010:do_open_execat+0x311/0x710 fs/exec.c:932
> Code: 89 de e8 02 b1 a1 ff 31 ff 89 de e8 f9 b0 a1 ff 45 84 ff 75 2e 45 85 ed 0f 8f ed 03 00 00 e8 56 ae a1 ff eb bd e8 4f ae a1 ff <0f> 0b 48 c7 c3 f3 ff ff ff 4c 89 f7 e8 9e cb fe ff 49 89 de e9 2d
> RSP: 0018:ffffc90008e07c20 EFLAGS: 00010293
> RAX: ffffffff82131ac6 RBX: 0000000000000004 RCX: ffff88801a6611c0
> RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000000
> RBP: ffffc90008e07cf0 R08: ffffffff8213173f R09: ffffc90008e07aa0
> R10: 0000000000000000 R11: dffffc0000000001 R12: ffff8880115810e0
> R13: dffffc0000000000 R14: ffff88801122c040 R15: ffffc90008e07c60
> FS:  00007f9e283ce6c0(0000) GS:ffff888058a00000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007f9e2848600a CR3: 00000000139de000 CR4: 0000000000352ef0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> Call Trace:
>  bprm_execve+0x60b/0x1c40 fs/exec.c:1939
>  do_execveat_common+0x5a6/0x770 fs/exec.c:2077
>  do_execve fs/exec.c:2147 [inline]
>  __do_sys_execve fs/exec.c:2223 [inline]
>  __se_sys_execve fs/exec.c:2218 [inline]
>  __x64_sys_execve+0x92/0xb0 fs/exec.c:2218
>  do_syscall_64+0x6d/0xa0 arch/x86/entry/common.c:62
>  entry_SYSCALL_64_after_hwframe+0x61/0xcb
> RIP: 0033:0x7f9e2842f299
> Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
> RSP: 002b:00007f9e283ce218 EFLAGS: 00000246 ORIG_RAX: 000000000000003b
> RAX: ffffffffffffffda RBX: 00007f9e284bd3f8 RCX: 00007f9e2842f299
> RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000400
> RBP: 00007f9e284bd3f0 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000246 R12: 00007f9e2848a134
> R13: 0030656c69662f2e R14: 00007ffc819a23d0 R15: 00007f9e28488130
> 
> Signed-off-by: Wojciech Gładysz <wojciech.gladysz@infogain.com>
> ---
>  fs/exec.c | 42 +++++++++++++++++++-----------------------
>  1 file changed, 19 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index a126e3d1cacb..0cc6a7d033a1 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -953,8 +953,6 @@ EXPORT_SYMBOL(transfer_args_to_stack);
>   */
>  static struct file *do_open_execat(int fd, struct filename *name, int flags)
>  {
> -	struct file *file;
> -	int err;
>  	struct open_flags open_exec_flags = {
>  		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
>  		.acc_mode = MAY_EXEC,
> @@ -969,26 +967,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
>  	if (flags & AT_EMPTY_PATH)
>  		open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
>  
> -	file = do_filp_open(fd, name, &open_exec_flags);
> -	if (IS_ERR(file))
> -		goto out;
> -
> -	/*
> -	 * may_open() has already checked for this, so it should be
> -	 * impossible to trip now. But we need to be extra cautious
> -	 * and check again at the very end too.
> -	 */
> -	err = -EACCES;
> -	if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
> -			 path_noexec(&file->f_path)))
> -		goto exit;
> -

This still needs to be left here to catch any bad actors in the future.  Thanks,

Josef
Mateusz Guzik Aug. 1, 2024, 3:15 p.m. UTC | #2
On Thu, Aug 01, 2024 at 10:07:39AM -0400, Josef Bacik wrote:
> On Thu, Aug 01, 2024 at 02:07:45PM +0200, Wojciech Gładysz wrote:
> > Test case: thread mounts NOEXEC fuse to a file being executed.
> > WARN_ON_ONCE is triggered yielding panic for some config.
> > Add a check to security_bprm_creds_for_exec(bprm).
> > 
> 
> Need more detail here, a script or something to describe the series of events
> that gets us here, I can't quite figure out how to do this.
> 
> > Stack trace:
> > ------------[ cut here ]------------
> > WARNING: CPU: 0 PID: 2736 at fs/exec.c:933 do_open_execat+0x311/0x710 fs/exec.c:932
> > Modules linked in:
> > CPU: 0 PID: 2736 Comm: syz-executor384 Not tainted 5.10.0-syzkaller #0
> > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
> > RIP: 0010:do_open_execat+0x311/0x710 fs/exec.c:932
> > Code: 89 de e8 02 b1 a1 ff 31 ff 89 de e8 f9 b0 a1 ff 45 84 ff 75 2e 45 85 ed 0f 8f ed 03 00 00 e8 56 ae a1 ff eb bd e8 4f ae a1 ff <0f> 0b 48 c7 c3 f3 ff ff ff 4c 89 f7 e8 9e cb fe ff 49 89 de e9 2d
> > RSP: 0018:ffffc90008e07c20 EFLAGS: 00010293
> > RAX: ffffffff82131ac6 RBX: 0000000000000004 RCX: ffff88801a6611c0
> > RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000000
> > RBP: ffffc90008e07cf0 R08: ffffffff8213173f R09: ffffc90008e07aa0
> > R10: 0000000000000000 R11: dffffc0000000001 R12: ffff8880115810e0
> > R13: dffffc0000000000 R14: ffff88801122c040 R15: ffffc90008e07c60
> > FS:  00007f9e283ce6c0(0000) GS:ffff888058a00000(0000) knlGS:0000000000000000
> > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > CR2: 00007f9e2848600a CR3: 00000000139de000 CR4: 0000000000352ef0
> > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > Call Trace:
> >  bprm_execve+0x60b/0x1c40 fs/exec.c:1939
> >  do_execveat_common+0x5a6/0x770 fs/exec.c:2077
> >  do_execve fs/exec.c:2147 [inline]
> >  __do_sys_execve fs/exec.c:2223 [inline]
> >  __se_sys_execve fs/exec.c:2218 [inline]
> >  __x64_sys_execve+0x92/0xb0 fs/exec.c:2218
> >  do_syscall_64+0x6d/0xa0 arch/x86/entry/common.c:62
> >  entry_SYSCALL_64_after_hwframe+0x61/0xcb
> > RIP: 0033:0x7f9e2842f299
> > Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
> > RSP: 002b:00007f9e283ce218 EFLAGS: 00000246 ORIG_RAX: 000000000000003b
> > RAX: ffffffffffffffda RBX: 00007f9e284bd3f8 RCX: 00007f9e2842f299
> > RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000400
> > RBP: 00007f9e284bd3f0 R08: 0000000000000000 R09: 0000000000000000
> > R10: 0000000000000000 R11: 0000000000000246 R12: 00007f9e2848a134
> > R13: 0030656c69662f2e R14: 00007ffc819a23d0 R15: 00007f9e28488130
> > 
> > Signed-off-by: Wojciech Gładysz <wojciech.gladysz@infogain.com>
> > ---
> >  fs/exec.c | 42 +++++++++++++++++++-----------------------
> >  1 file changed, 19 insertions(+), 23 deletions(-)
> > 
> > diff --git a/fs/exec.c b/fs/exec.c
> > index a126e3d1cacb..0cc6a7d033a1 100644
> > --- a/fs/exec.c
> > +++ b/fs/exec.c
> > @@ -953,8 +953,6 @@ EXPORT_SYMBOL(transfer_args_to_stack);
> >   */
> >  static struct file *do_open_execat(int fd, struct filename *name, int flags)
> >  {
> > -	struct file *file;
> > -	int err;
> >  	struct open_flags open_exec_flags = {
> >  		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
> >  		.acc_mode = MAY_EXEC,
> > @@ -969,26 +967,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
> >  	if (flags & AT_EMPTY_PATH)
> >  		open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
> >  
> > -	file = do_filp_open(fd, name, &open_exec_flags);
> > -	if (IS_ERR(file))
> > -		goto out;
> > -
> > -	/*
> > -	 * may_open() has already checked for this, so it should be
> > -	 * impossible to trip now. But we need to be extra cautious
> > -	 * and check again at the very end too.
> > -	 */
> > -	err = -EACCES;
> > -	if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
> > -			 path_noexec(&file->f_path)))
> > -		goto exit;
> > -
> 
> This still needs to be left here to catch any bad actors in the future.  Thanks,
> 

This check is fundamentally racy.

path_noexec expands to the following:
        return (path->mnt->mnt_flags & MNT_NOEXEC) ||
               (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);

An exec racing against remount setting the noexec flag can correctly
conclude the file can be execed and then trip over the check later if
the flag showed up in the meantime.

This is not fuse-specific and I disagree with the posted patch as well.

The snippet here tries to validate that permissions were correctly checked
at some point, but it fails that goal in 2 ways:
- the inode + fs combo might just happen to be fine for exec, even if
  may_open *was not issued*
- there is the aforementioned race

If this thing here is supposed to stay, it instead needs to be
reimplemented with may_open setting a marker "checking for exec was
performed and execing is allowed" somewhere in struct file.

I'm not confident this is particularly valuable, but if it is, it
probably should hide behind some debug flags.
Kees Cook Aug. 2, 2024, 3:28 a.m. UTC | #3
On August 1, 2024 5:07:45 AM PDT, "Wojciech Gładysz" <wojciech.gladysz@infogain.com> wrote:
>Test case: thread mounts NOEXEC fuse to a file being executed.
>WARN_ON_ONCE is triggered yielding panic for some config.
>Add a check to security_bprm_creds_for_exec(bprm).

As others have noted, this is racy. I would still like to keep the redundant check as-is, but let's lower it from WARN to pr_warn_ratelimited, since it's a known race that can be reached from userspace.
Josef Bacik Aug. 2, 2024, 3:58 p.m. UTC | #4
On Thu, Aug 01, 2024 at 05:15:06PM +0200, Mateusz Guzik wrote:
> On Thu, Aug 01, 2024 at 10:07:39AM -0400, Josef Bacik wrote:
> > On Thu, Aug 01, 2024 at 02:07:45PM +0200, Wojciech Gładysz wrote:
> > > Test case: thread mounts NOEXEC fuse to a file being executed.
> > > WARN_ON_ONCE is triggered yielding panic for some config.
> > > Add a check to security_bprm_creds_for_exec(bprm).
> > > 
> > 
> > Need more detail here, a script or something to describe the series of events
> > that gets us here, I can't quite figure out how to do this.
> > 
> > > Stack trace:
> > > ------------[ cut here ]------------
> > > WARNING: CPU: 0 PID: 2736 at fs/exec.c:933 do_open_execat+0x311/0x710 fs/exec.c:932
> > > Modules linked in:
> > > CPU: 0 PID: 2736 Comm: syz-executor384 Not tainted 5.10.0-syzkaller #0
> > > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014
> > > RIP: 0010:do_open_execat+0x311/0x710 fs/exec.c:932
> > > Code: 89 de e8 02 b1 a1 ff 31 ff 89 de e8 f9 b0 a1 ff 45 84 ff 75 2e 45 85 ed 0f 8f ed 03 00 00 e8 56 ae a1 ff eb bd e8 4f ae a1 ff <0f> 0b 48 c7 c3 f3 ff ff ff 4c 89 f7 e8 9e cb fe ff 49 89 de e9 2d
> > > RSP: 0018:ffffc90008e07c20 EFLAGS: 00010293
> > > RAX: ffffffff82131ac6 RBX: 0000000000000004 RCX: ffff88801a6611c0
> > > RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000000
> > > RBP: ffffc90008e07cf0 R08: ffffffff8213173f R09: ffffc90008e07aa0
> > > R10: 0000000000000000 R11: dffffc0000000001 R12: ffff8880115810e0
> > > R13: dffffc0000000000 R14: ffff88801122c040 R15: ffffc90008e07c60
> > > FS:  00007f9e283ce6c0(0000) GS:ffff888058a00000(0000) knlGS:0000000000000000
> > > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > CR2: 00007f9e2848600a CR3: 00000000139de000 CR4: 0000000000352ef0
> > > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > > Call Trace:
> > >  bprm_execve+0x60b/0x1c40 fs/exec.c:1939
> > >  do_execveat_common+0x5a6/0x770 fs/exec.c:2077
> > >  do_execve fs/exec.c:2147 [inline]
> > >  __do_sys_execve fs/exec.c:2223 [inline]
> > >  __se_sys_execve fs/exec.c:2218 [inline]
> > >  __x64_sys_execve+0x92/0xb0 fs/exec.c:2218
> > >  do_syscall_64+0x6d/0xa0 arch/x86/entry/common.c:62
> > >  entry_SYSCALL_64_after_hwframe+0x61/0xcb
> > > RIP: 0033:0x7f9e2842f299
> > > Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48
> > > RSP: 002b:00007f9e283ce218 EFLAGS: 00000246 ORIG_RAX: 000000000000003b
> > > RAX: ffffffffffffffda RBX: 00007f9e284bd3f8 RCX: 00007f9e2842f299
> > > RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000400
> > > RBP: 00007f9e284bd3f0 R08: 0000000000000000 R09: 0000000000000000
> > > R10: 0000000000000000 R11: 0000000000000246 R12: 00007f9e2848a134
> > > R13: 0030656c69662f2e R14: 00007ffc819a23d0 R15: 00007f9e28488130
> > > 
> > > Signed-off-by: Wojciech Gładysz <wojciech.gladysz@infogain.com>
> > > ---
> > >  fs/exec.c | 42 +++++++++++++++++++-----------------------
> > >  1 file changed, 19 insertions(+), 23 deletions(-)
> > > 
> > > diff --git a/fs/exec.c b/fs/exec.c
> > > index a126e3d1cacb..0cc6a7d033a1 100644
> > > --- a/fs/exec.c
> > > +++ b/fs/exec.c
> > > @@ -953,8 +953,6 @@ EXPORT_SYMBOL(transfer_args_to_stack);
> > >   */
> > >  static struct file *do_open_execat(int fd, struct filename *name, int flags)
> > >  {
> > > -	struct file *file;
> > > -	int err;
> > >  	struct open_flags open_exec_flags = {
> > >  		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
> > >  		.acc_mode = MAY_EXEC,
> > > @@ -969,26 +967,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
> > >  	if (flags & AT_EMPTY_PATH)
> > >  		open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
> > >  
> > > -	file = do_filp_open(fd, name, &open_exec_flags);
> > > -	if (IS_ERR(file))
> > > -		goto out;
> > > -
> > > -	/*
> > > -	 * may_open() has already checked for this, so it should be
> > > -	 * impossible to trip now. But we need to be extra cautious
> > > -	 * and check again at the very end too.
> > > -	 */
> > > -	err = -EACCES;
> > > -	if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
> > > -			 path_noexec(&file->f_path)))
> > > -		goto exit;
> > > -
> > 
> > This still needs to be left here to catch any bad actors in the future.  Thanks,
> > 
> 
> This check is fundamentally racy.
> 
> path_noexec expands to the following:
>         return (path->mnt->mnt_flags & MNT_NOEXEC) ||
>                (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
> 
> An exec racing against remount setting the noexec flag can correctly
> conclude the file can be execed and then trip over the check later if
> the flag showed up in the meantime.
> 
> This is not fuse-specific and I disagree with the posted patch as well.
> 
> The snippet here tries to validate that permissions were correctly checked
> at some point, but it fails that goal in 2 ways:
> - the inode + fs combo might just happen to be fine for exec, even if
>   may_open *was not issued*
> - there is the aforementioned race
> 
> If this thing here is supposed to stay, it instead needs to be
> reimplemented with may_open setting a marker "checking for exec was
> performed and execing is allowed" somewhere in struct file.

This sounds like a reasonable alternative solution.

> 
> I'm not confident this is particularly valuable, but if it is, it
> probably should hide behind some debug flags.

I'm still going to disagree here, putting it behind a debug flag means it'll
never get caught, and it obviously proved valuable because we're discussing this
particular case.

Is it racy? Yup sure.  I think that your solution is the right way to fix it,
and then we can have a 

WARN_ON(!(file->f_mode & FMODE_NO_EXEC_CHECKED));

or however we choose to flag the file, that way we are no longer racing with the
mount flags and only validating that a check that should have already occurred
has in fact occurred.  Thanks,

Josef
Mateusz Guzik Aug. 3, 2024, 6:29 a.m. UTC | #5
On Fri, Aug 2, 2024 at 5:59 PM Josef Bacik <josef@toxicpanda.com> wrote:
>
> On Thu, Aug 01, 2024 at 05:15:06PM +0200, Mateusz Guzik wrote:
> > I'm not confident this is particularly valuable, but if it is, it
> > probably should hide behind some debug flags.
>
> I'm still going to disagree here, putting it behind a debug flag means it'll
> never get caught, and it obviously proved valuable because we're discussing this
> particular case.
>
> Is it racy? Yup sure.  I think that your solution is the right way to fix it,
> and then we can have a
>
> WARN_ON(!(file->f_mode & FMODE_NO_EXEC_CHECKED));
>
> or however we choose to flag the file, that way we are no longer racing with the
> mount flags and only validating that a check that should have already occurred
> has in fact occurred.  Thanks,
>

To my understanding the submitter ran into the thing tripping over the
racy check, so this check did not find a real bug elsewhere in this
instance.

The only case that I know of where this fired and found a real problem
was after ntfs constructed a bogus inode:
https://lore.kernel.org/linux-fsdevel/20230818191239.3cprv2wncyyy5yxj@f/

But that is a deficiency in debug facilities in the vfs layer -- this
only tripped over because syzkaller tried to exec a sufficiently bogus
inode, while the vfs layer should have prevented that from happening
to begin with.

There should be well-defined spot where the filesystem claims the
inode in fully constructed at which the vfs layer verifies its state
(thus in particular i_mode). If implemented it would have caught the
problem before the inode escaped ntfs and presumably would find some
other problems which the kernel as is does not correctly report. This
in part depends on someone(tm) implementing VFS_* debug macros first,
preferably in a way which can dump inode info on assertion failure.

I have this at the bottom on a TODO list for a rainy day.

However, it's not my call to make as to what to do here. I outlined my
$0,04 and I'm buggering off.
Christian Brauner Aug. 5, 2024, 9:26 a.m. UTC | #6
On Sat, Aug 03, 2024 at 08:29:17AM GMT, Mateusz Guzik wrote:
> On Fri, Aug 2, 2024 at 5:59 PM Josef Bacik <josef@toxicpanda.com> wrote:
> >
> > On Thu, Aug 01, 2024 at 05:15:06PM +0200, Mateusz Guzik wrote:
> > > I'm not confident this is particularly valuable, but if it is, it
> > > probably should hide behind some debug flags.
> >
> > I'm still going to disagree here, putting it behind a debug flag means it'll
> > never get caught, and it obviously proved valuable because we're discussing this
> > particular case.
> >
> > Is it racy? Yup sure.  I think that your solution is the right way to fix it,
> > and then we can have a
> >
> > WARN_ON(!(file->f_mode & FMODE_NO_EXEC_CHECKED));
> >
> > or however we choose to flag the file, that way we are no longer racing with the
> > mount flags and only validating that a check that should have already occurred
> > has in fact occurred.  Thanks,
> >
> 
> To my understanding the submitter ran into the thing tripping over the
> racy check, so this check did not find a real bug elsewhere in this
> instance.

Mateusz is right. That check is mostly nonsensical. Nothing will protect
against mount properties being changed if the caller isn't claiming
write access to the mount. Which this code doesn't do (And can't do (or
anything like it) because it would cause spurious remount failures just
because someone execs something.).

So 0fd338b2d2cd ("exec: move path_noexec() check earlier") introduced
WARN_ON_ONCE(). I suspect that it simply wasn't clear that mount
properties can change while a is file held open if no write access was
claimed.

Stuff like noexec, nodev or whatever can change anytime if e.g., just
read access was requested. In other words, successful permission
checking during path lookup doesn't mean that permission checking
wouldn't fail if one redid the checks immediately after.

I think it probably never triggered because noexec -> exec remounts are
rarely done and the timing would have to be rather precise.

I think the immediate solution is to limit the scope of the
WARN_ON_ONCE() to the ->i_mode check.

diff --git a/fs/exec.c b/fs/exec.c
index a126e3d1cacb..12914e14132d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -145,13 +145,14 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
                goto out;

        /*
-        * may_open() has already checked for this, so it should be
-        * impossible to trip now. But we need to be extra cautious
-        * and check again at the very end too.
+        * Safety paranoia: Redo the check whether the mount isn't
+        * noexec so it's as close the the actual open() as possible.
+        * may_open() has already check this but the mount properties
+        * may have already changed since then.
         */
-       error = -EACCES;
-       if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
-                        path_noexec(&file->f_path)))
+       err = -EACCES;
+       if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
+           path_noexec(&file->f_path))
                goto exit;

        error = -ENOEXEC;
@@ -974,13 +975,14 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
                goto out;

        /*
-        * may_open() has already checked for this, so it should be
-        * impossible to trip now. But we need to be extra cautious
-        * and check again at the very end too.
+        * Safety paranoia: Redo the check whether the mount isn't
+        * noexec so it's as close the the actual open() as possible.
+        * may_open() has already check this but the mount properties
+        * may have already changed since then.
         */
        err = -EACCES;
-       if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
-                        path_noexec(&file->f_path)))
+       if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
+           path_noexec(&file->f_path))
                goto exit;

 out:
diff mbox series

Patch

diff --git a/fs/exec.c b/fs/exec.c
index a126e3d1cacb..0cc6a7d033a1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -953,8 +953,6 @@  EXPORT_SYMBOL(transfer_args_to_stack);
  */
 static struct file *do_open_execat(int fd, struct filename *name, int flags)
 {
-	struct file *file;
-	int err;
 	struct open_flags open_exec_flags = {
 		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
 		.acc_mode = MAY_EXEC,
@@ -969,26 +967,7 @@  static struct file *do_open_execat(int fd, struct filename *name, int flags)
 	if (flags & AT_EMPTY_PATH)
 		open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
 
-	file = do_filp_open(fd, name, &open_exec_flags);
-	if (IS_ERR(file))
-		goto out;
-
-	/*
-	 * may_open() has already checked for this, so it should be
-	 * impossible to trip now. But we need to be extra cautious
-	 * and check again at the very end too.
-	 */
-	err = -EACCES;
-	if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
-			 path_noexec(&file->f_path)))
-		goto exit;
-
-out:
-	return file;
-
-exit:
-	fput(file);
-	return ERR_PTR(err);
+	return do_filp_open(fd, name, &open_exec_flags);
 }
 
 /**
@@ -1730,6 +1709,23 @@  static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
 	}
 }
 
+static int bprm_creds_for_exec(struct linux_binprm *bprm)
+{
+	struct file *file = bprm->file;
+
+	/*
+	 * Do not execute a regular file on NOEXEC mount.
+	 * May_open() has already checked for this but a NOEXEC mount
+	 * operation may have happened to the file since then (fuse).
+	 * This is the last check point.
+	 */
+	if (!S_ISREG(file_inode(file)->i_mode) ||
+			path_noexec(&file->f_path))
+		return -EACCES;
+
+	return security_bprm_creds_for_exec(bprm);
+}
+
 /*
  * Compute brpm->cred based upon the final binary.
  */
@@ -1907,7 +1903,7 @@  static int bprm_execve(struct linux_binprm *bprm)
 	sched_exec();
 
 	/* Set the unchanging part of bprm->cred */
-	retval = security_bprm_creds_for_exec(bprm);
+	retval = bprm_creds_for_exec(bprm);
 	if (retval)
 		goto out;