diff mbox series

[RFC] fs: Add vfs_masks_device_ioctl*() helpers

Message ID 20240219183539.2926165-1-mic@digikod.net (mailing list archive)
State Handled Elsewhere
Headers show
Series [RFC] fs: Add vfs_masks_device_ioctl*() helpers | expand

Commit Message

Mickaël Salaün Feb. 19, 2024, 6:35 p.m. UTC
vfs_masks_device_ioctl() and vfs_masks_device_ioctl_compat() are useful
to differenciate between device driver IOCTL implementations and
filesystem ones.  The goal is to be able to filter well-defined IOCTLs
from per-device (i.e. namespaced) IOCTLs and control such access.

Add a new ioctl_compat() helper, similar to vfs_ioctl(), to wrap
compat_ioctl() calls and handle error conversions.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Günther Noack <gnoack@google.com>
---
 fs/ioctl.c         | 101 +++++++++++++++++++++++++++++++++++++++++----
 include/linux/fs.h |  12 ++++++
 2 files changed, 105 insertions(+), 8 deletions(-)

Comments

Mickaël Salaün March 1, 2024, 1:42 p.m. UTC | #1
Arnd, Christian, are you OK with this approach to identify VFS IOCTLs?

If yes, Günther should include it in his next patch series.

On Mon, Feb 19, 2024 at 07:35:39PM +0100, Mickaël Salaün wrote:
> vfs_masks_device_ioctl() and vfs_masks_device_ioctl_compat() are useful
> to differenciate between device driver IOCTL implementations and
> filesystem ones.  The goal is to be able to filter well-defined IOCTLs
> from per-device (i.e. namespaced) IOCTLs and control such access.
> 
> Add a new ioctl_compat() helper, similar to vfs_ioctl(), to wrap
> compat_ioctl() calls and handle error conversions.
> 
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Günther Noack <gnoack@google.com>
> ---
>  fs/ioctl.c         | 101 +++++++++++++++++++++++++++++++++++++++++----
>  include/linux/fs.h |  12 ++++++
>  2 files changed, 105 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/ioctl.c b/fs/ioctl.c
> index 76cf22ac97d7..f72c8da47d21 100644
> --- a/fs/ioctl.c
> +++ b/fs/ioctl.c
> @@ -763,6 +763,38 @@ static int ioctl_fssetxattr(struct file *file, void __user *argp)
>  	return err;
>  }
>  
> +/*
> + * Safeguard to maintain a list of valid IOCTLs handled by do_vfs_ioctl()
> + * instead of def_blk_fops or def_chr_fops (see init_special_inode).
> + */
> +__attribute_const__ bool vfs_masked_device_ioctl(const unsigned int cmd)
> +{
> +	switch (cmd) {
> +	case FIOCLEX:
> +	case FIONCLEX:
> +	case FIONBIO:
> +	case FIOASYNC:
> +	case FIOQSIZE:
> +	case FIFREEZE:
> +	case FITHAW:
> +	case FS_IOC_FIEMAP:
> +	case FIGETBSZ:
> +	case FICLONE:
> +	case FICLONERANGE:
> +	case FIDEDUPERANGE:
> +	/* FIONREAD is forwarded to device implementations. */
> +	case FS_IOC_GETFLAGS:
> +	case FS_IOC_SETFLAGS:
> +	case FS_IOC_FSGETXATTR:
> +	case FS_IOC_FSSETXATTR:
> +	/* file_ioctl()'s IOCTLs are forwarded to device implementations. */
> +		return true;
> +	default:
> +		return false;
> +	}
> +}
> +EXPORT_SYMBOL(vfs_masked_device_ioctl);
> +
>  /*
>   * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
>   * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
> @@ -858,6 +890,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
>  {
>  	struct fd f = fdget(fd);
>  	int error;
> +	const struct inode *inode;
> +	bool is_device;
>  
>  	if (!f.file)
>  		return -EBADF;
> @@ -866,9 +900,18 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
>  	if (error)
>  		goto out;
>  
> +	inode = file_inode(f.file);
> +	is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode);
> +	if (is_device && !vfs_masked_device_ioctl(cmd)) {
> +		error = vfs_ioctl(f.file, cmd, arg);
> +		goto out;
> +	}
> +
>  	error = do_vfs_ioctl(f.file, fd, cmd, arg);
> -	if (error == -ENOIOCTLCMD)
> +	if (error == -ENOIOCTLCMD) {
> +		WARN_ON_ONCE(is_device);
>  		error = vfs_ioctl(f.file, cmd, arg);
> +	}
>  
>  out:
>  	fdput(f);
> @@ -911,11 +954,49 @@ long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>  }
>  EXPORT_SYMBOL(compat_ptr_ioctl);
>  
> +static long ioctl_compat(struct file *filp, unsigned int cmd,
> +			 compat_ulong_t arg)
> +{
> +	int error = -ENOTTY;
> +
> +	if (!filp->f_op->compat_ioctl)
> +		goto out;
> +
> +	error = filp->f_op->compat_ioctl(filp, cmd, arg);
> +	if (error == -ENOIOCTLCMD)
> +		error = -ENOTTY;
> +
> +out:
> +	return error;
> +}
> +
> +__attribute_const__ bool vfs_masked_device_ioctl_compat(const unsigned int cmd)
> +{
> +	switch (cmd) {
> +	case FICLONE:
> +#if defined(CONFIG_X86_64)
> +	case FS_IOC_RESVSP_32:
> +	case FS_IOC_RESVSP64_32:
> +	case FS_IOC_UNRESVSP_32:
> +	case FS_IOC_UNRESVSP64_32:
> +	case FS_IOC_ZERO_RANGE_32:
> +#endif
> +	case FS_IOC32_GETFLAGS:
> +	case FS_IOC32_SETFLAGS:
> +		return true;
> +	default:
> +		return vfs_masked_device_ioctl(cmd);
> +	}
> +}
> +EXPORT_SYMBOL(vfs_masked_device_ioctl_compat);
> +
>  COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
>  		       compat_ulong_t, arg)
>  {
>  	struct fd f = fdget(fd);
>  	int error;
> +	const struct inode *inode;
> +	bool is_device;
>  
>  	if (!f.file)
>  		return -EBADF;
> @@ -924,6 +1005,13 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
>  	if (error)
>  		goto out;
>  
> +	inode = file_inode(f.file);
> +	is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode);
> +	if (is_device && !vfs_masked_device_ioctl_compat(cmd)) {
> +		error = ioctl_compat(f.file, cmd, arg);
> +		goto out;
> +	}
> +
>  	switch (cmd) {
>  	/* FICLONE takes an int argument, so don't use compat_ptr() */
>  	case FICLONE:
> @@ -964,13 +1052,10 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
>  	default:
>  		error = do_vfs_ioctl(f.file, fd, cmd,
>  				     (unsigned long)compat_ptr(arg));
> -		if (error != -ENOIOCTLCMD)
> -			break;
> -
> -		if (f.file->f_op->compat_ioctl)
> -			error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
> -		if (error == -ENOIOCTLCMD)
> -			error = -ENOTTY;
> +		if (error == -ENOIOCTLCMD) {
> +			WARN_ON_ONCE(is_device);
> +			error = ioctl_compat(f.file, cmd, arg);
> +		}
>  		break;
>  	}
>  
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index ed5966a70495..b620d0c00e16 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1902,6 +1902,18 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
>  #define compat_ptr_ioctl NULL
>  #endif
>  
> +extern __attribute_const__ bool vfs_masked_device_ioctl(const unsigned int cmd);
> +#ifdef CONFIG_COMPAT
> +extern __attribute_const__ bool
> +vfs_masked_device_ioctl_compat(const unsigned int cmd);
> +#else /* CONFIG_COMPAT */
> +static inline __attribute_const__ bool
> +vfs_masked_device_ioctl_compat(const unsigned int cmd)
> +{
> +	return vfs_masked_device_ioctl(cmd);
> +}
> +#endif /* CONFIG_COMPAT */
> +
>  /*
>   * VFS file helper functions.
>   */
> -- 
> 2.43.0
> 
>
Arnd Bergmann March 1, 2024, 4:24 p.m. UTC | #2
On Mon, Feb 19, 2024, at 19:35, Mickaël Salaün wrote:
> vfs_masks_device_ioctl() and vfs_masks_device_ioctl_compat() are useful
> to differenciate between device driver IOCTL implementations and
> filesystem ones.  The goal is to be able to filter well-defined IOCTLs
> from per-device (i.e. namespaced) IOCTLs and control such access.
>
> Add a new ioctl_compat() helper, similar to vfs_ioctl(), to wrap
> compat_ioctl() calls and handle error conversions.

I'm still a bit confused by what your goal is here. I see
the code getting more complex but don't see the payoff in this
patch.

> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Günther Noack <gnoack@google.com>

I assume the missing Signed-off-by is intentional while you are
still gathering feedback?

> +/*
> + * Safeguard to maintain a list of valid IOCTLs handled by 
> do_vfs_ioctl()
> + * instead of def_blk_fops or def_chr_fops (see init_special_inode).
> + */
> +__attribute_const__ bool vfs_masked_device_ioctl(const unsigned int 
> cmd)
> +{
> +	switch (cmd) {
> +	case FIOCLEX:
> +	case FIONCLEX:
> +	case FIONBIO:
> +	case FIOASYNC:
> +	case FIOQSIZE:
> +	case FIFREEZE:
> +	case FITHAW:
> +	case FS_IOC_FIEMAP:
> +	case FIGETBSZ:
> +	case FICLONE:
> +	case FICLONERANGE:
> +	case FIDEDUPERANGE:
> +	/* FIONREAD is forwarded to device implementations. */
> +	case FS_IOC_GETFLAGS:
> +	case FS_IOC_SETFLAGS:
> +	case FS_IOC_FSGETXATTR:
> +	case FS_IOC_FSSETXATTR:
> +	/* file_ioctl()'s IOCTLs are forwarded to device implementations. */
> +		return true;
> +	default:
> +		return false;
> +	}
> +}
> +EXPORT_SYMBOL(vfs_masked_device_ioctl);

It looks like this gets added into the hot path of every
ioctl command, which is not ideal, especially when this
no longer gets inlined into the caller.

> +	inode = file_inode(f.file);
> +	is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode);
> +	if (is_device && !vfs_masked_device_ioctl(cmd)) {
> +		error = vfs_ioctl(f.file, cmd, arg);
> +		goto out;
> +	}

The S_ISBLK() || S_ISCHR() check here looks like it changes
behavior, at least for sockets. If that is intentional,
it should probably be a separate patch with a detailed
explanation.

>  	error = do_vfs_ioctl(f.file, fd, cmd, arg);
> -	if (error == -ENOIOCTLCMD)
> +	if (error == -ENOIOCTLCMD) {
> +		WARN_ON_ONCE(is_device);
>  		error = vfs_ioctl(f.file, cmd, arg);
> +	}

The WARN_ON_ONCE() looks like it can be triggered from
userspace, which is generally a bad idea.
 
> +extern __attribute_const__ bool vfs_masked_device_ioctl(const unsigned 
> int cmd);
> +#ifdef CONFIG_COMPAT
> +extern __attribute_const__ bool
> +vfs_masked_device_ioctl_compat(const unsigned int cmd);
> +#else /* CONFIG_COMPAT */
> +static inline __attribute_const__ bool
> +vfs_masked_device_ioctl_compat(const unsigned int cmd)
> +{
> +	return vfs_masked_device_ioctl(cmd);
> +}
> +#endif /* CONFIG_COMPAT */

I don't understand the purpose of the #else path here,
this should not be needed.

      Arnd
Mickaël Salaün March 1, 2024, 6:35 p.m. UTC | #3
On Fri, Mar 01, 2024 at 05:24:52PM +0100, Arnd Bergmann wrote:
> On Mon, Feb 19, 2024, at 19:35, Mickaël Salaün wrote:
> > vfs_masks_device_ioctl() and vfs_masks_device_ioctl_compat() are useful
> > to differenciate between device driver IOCTL implementations and
> > filesystem ones.  The goal is to be able to filter well-defined IOCTLs
> > from per-device (i.e. namespaced) IOCTLs and control such access.
> >
> > Add a new ioctl_compat() helper, similar to vfs_ioctl(), to wrap
> > compat_ioctl() calls and handle error conversions.
> 
> I'm still a bit confused by what your goal is here. I see
> the code getting more complex but don't see the payoff in this
> patch.

The main idea is to be able to identify if an IOCTL is handled by a
device driver (i.e. block or character devices) or not.  This would be
used at least by Landlock to control such IOCTL (according to the
char/block device file, which can already be identified) while allowing
other VFS/FS IOCTLs.

> 
> > Cc: Arnd Bergmann <arnd@arndb.de>
> > Cc: Christian Brauner <brauner@kernel.org>
> > Cc: Günther Noack <gnoack@google.com>
> 
> I assume the missing Signed-off-by is intentional while you are
> still gathering feedback?

No, I sent it too quickly and forgot to add it.

Signed-off-by: Mickaël Salaün <mic@digikod.net>

> 
> > +/*
> > + * Safeguard to maintain a list of valid IOCTLs handled by 
> > do_vfs_ioctl()
> > + * instead of def_blk_fops or def_chr_fops (see init_special_inode).
> > + */
> > +__attribute_const__ bool vfs_masked_device_ioctl(const unsigned int 
> > cmd)
> > +{
> > +	switch (cmd) {
> > +	case FIOCLEX:
> > +	case FIONCLEX:
> > +	case FIONBIO:
> > +	case FIOASYNC:
> > +	case FIOQSIZE:
> > +	case FIFREEZE:
> > +	case FITHAW:
> > +	case FS_IOC_FIEMAP:
> > +	case FIGETBSZ:
> > +	case FICLONE:
> > +	case FICLONERANGE:
> > +	case FIDEDUPERANGE:
> > +	/* FIONREAD is forwarded to device implementations. */
> > +	case FS_IOC_GETFLAGS:
> > +	case FS_IOC_SETFLAGS:
> > +	case FS_IOC_FSGETXATTR:
> > +	case FS_IOC_FSSETXATTR:
> > +	/* file_ioctl()'s IOCTLs are forwarded to device implementations. */
> > +		return true;
> > +	default:
> > +		return false;
> > +	}
> > +}
> > +EXPORT_SYMBOL(vfs_masked_device_ioctl);
> 
> It looks like this gets added into the hot path of every
> ioctl command, which is not ideal, especially when this
> no longer gets inlined into the caller.

I'm looking for a way to guarantee that the list of IOCTLs in this
helper will be kept up-to-date. This kind of run time check might be
too much though. Do you have other suggestions? Do you think a simple
comment to remind contributors to update this helper would be enough?
I guess VFS IOCTLs should not be added often, but I'm worried that this
list could get out of sync...

> 
> > +	inode = file_inode(f.file);
> > +	is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode);
> > +	if (is_device && !vfs_masked_device_ioctl(cmd)) {
> > +		error = vfs_ioctl(f.file, cmd, arg);
> > +		goto out;
> > +	}
> 
> The S_ISBLK() || S_ISCHR() check here looks like it changes
> behavior, at least for sockets. If that is intentional,
> it should probably be a separate patch with a detailed
> explanation.

I don't think this changes the behavior for sockets, and at least that's
not intentionnal.  This patch should not change the current behavior at
all.

The path to reach socket IOCTLs goes through a vfs_ioctl() call, which
is...

> 
> >  	error = do_vfs_ioctl(f.file, fd, cmd, arg);
> > -	if (error == -ENOIOCTLCMD)
> > +	if (error == -ENOIOCTLCMD) {
> > +		WARN_ON_ONCE(is_device);
> >  		error = vfs_ioctl(f.file, cmd, arg);

...here!

> > +	}
> 
> The WARN_ON_ONCE() looks like it can be triggered from
> userspace, which is generally a bad idea.

This WARN_ON_ONCE() should never be triggered because if the it is a
device IOCTL it goes through the previous vfs_ioctl() (for
device-specific command) call or through this do_vfs_ioctl() call (for
VFS-specific command).

>  
> > +extern __attribute_const__ bool vfs_masked_device_ioctl(const unsigned 
> > int cmd);
> > +#ifdef CONFIG_COMPAT
> > +extern __attribute_const__ bool
> > +vfs_masked_device_ioctl_compat(const unsigned int cmd);
> > +#else /* CONFIG_COMPAT */
> > +static inline __attribute_const__ bool
> > +vfs_masked_device_ioctl_compat(const unsigned int cmd)
> > +{
> > +	return vfs_masked_device_ioctl(cmd);
> > +}
> > +#endif /* CONFIG_COMPAT */
> 
> I don't understand the purpose of the #else path here,
> this should not be needed.

Correct, this else branch is useless.

> 
>       Arnd
>
Günther Noack March 5, 2024, 6:13 p.m. UTC | #4
Hello!

More questions than answers in this code review, but maybe this discusison will
help to get a clearer picture about what we are going for here.

On Mon, Feb 19, 2024 at 07:35:39PM +0100, Mickaël Salaün wrote:
> vfs_masks_device_ioctl() and vfs_masks_device_ioctl_compat() are useful
> to differenciate between device driver IOCTL implementations and
> filesystem ones.  The goal is to be able to filter well-defined IOCTLs
> from per-device (i.e. namespaced) IOCTLs and control such access.
> 
> Add a new ioctl_compat() helper, similar to vfs_ioctl(), to wrap
> compat_ioctl() calls and handle error conversions.
> 
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Günther Noack <gnoack@google.com>
> ---
>  fs/ioctl.c         | 101 +++++++++++++++++++++++++++++++++++++++++----
>  include/linux/fs.h |  12 ++++++
>  2 files changed, 105 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/ioctl.c b/fs/ioctl.c
> index 76cf22ac97d7..f72c8da47d21 100644
> --- a/fs/ioctl.c
> +++ b/fs/ioctl.c
> @@ -763,6 +763,38 @@ static int ioctl_fssetxattr(struct file *file, void __user *argp)
>  	return err;
>  }
>  
> +/*
> + * Safeguard to maintain a list of valid IOCTLs handled by do_vfs_ioctl()
> + * instead of def_blk_fops or def_chr_fops (see init_special_inode).
> + */
> +__attribute_const__ bool vfs_masked_device_ioctl(const unsigned int cmd)
> +{
> +	switch (cmd) {
> +	case FIOCLEX:
> +	case FIONCLEX:
> +	case FIONBIO:
> +	case FIOASYNC:
> +	case FIOQSIZE:
> +	case FIFREEZE:
> +	case FITHAW:
> +	case FS_IOC_FIEMAP:
> +	case FIGETBSZ:
> +	case FICLONE:
> +	case FICLONERANGE:
> +	case FIDEDUPERANGE:
> +	/* FIONREAD is forwarded to device implementations. */
> +	case FS_IOC_GETFLAGS:
> +	case FS_IOC_SETFLAGS:
> +	case FS_IOC_FSGETXATTR:
> +	case FS_IOC_FSSETXATTR:
> +	/* file_ioctl()'s IOCTLs are forwarded to device implementations. */
> +		return true;
> +	default:
> +		return false;
> +	}
> +}
> +EXPORT_SYMBOL(vfs_masked_device_ioctl);

[
Technical implementation notes about this function: the list of IOCTLs here are
the same ones which do_vfs_ioctl() implements directly.

There are only two cases in which do_vfs_ioctl() does more complicated handling:

(1) FIONREAD falls back to the device's ioctl implemenetation.
    Therefore, we omit FIONREAD in our own list - we do not want to allow that.
(2) The default case falls back to the file_ioctl() function, but *only* for
    S_ISREG() files, so it does not matter for the Landlock case.
]


## What we are actually trying to do (?)

Let me try to take a step back and paraphrase what I think we are *actually*
trying to do here -- please correct me if I am wrong about that:

I think what we *really* are trying to do is to control from the Landlock LSM
whether the filp->f_op->unlocked_ioctl() or filp->f_op->ioctl_compat()
operations are getting called for device files.

So in a world where we cared only about correctness, we could create a new LSM
hook security_file_vfs_ioctl(), which gets checked just before these two f_op
operations get called.  With that, we could permit all IOCTLs that are
implemented in fs/ioctl.c, and we could deny all IOCTL commands that are
implemented in the device implementation.

I guess the reasons why we are not using that approach are performance, and that
it might mess up the LSM hook interface with special cases that only Landlcok
needs?  But it seems like it would be easier to reason about..?  Or maybe we can
find a middle ground, where we have the existing hook return a special value
with the meaning "permit this IOCTL, but do not invoke the f_op hook"?


## What we implemented

Of course, the existing security_file_ioctl LSM hook works differently, and so
with that hook, we need to make our blocking decision purely based on the struct
file*, the IOCTL command number and the IOCTL argument.

So in order to make that decision correctly based on that information, we end up
listing all the IOCTLs which are directly(!) implemented in do_vfs_ioctl(),
because for Landlock, this is the list of IOCTL commands which is safe to permit
on device files.  And we need to keep that list in sync with fs/ioctl.c, which
is why it ended up in the same place in this commit.


(Is it maybe possible to check with a KUnit test whether such lists are in sync?
It sounds superficially like it should be feasible to create a device file which
records whether its ioctl implementation was called.  So we could at least check
that the Landlock command list is a subset of the do_vfs_ioctl() one.)


> +
>  /*
>   * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
>   * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
> @@ -858,6 +890,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
>  {
>  	struct fd f = fdget(fd);
>  	int error;
> +	const struct inode *inode;
> +	bool is_device;
>  
>  	if (!f.file)
>  		return -EBADF;
> @@ -866,9 +900,18 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
>  	if (error)
>  		goto out;
>  
> +	inode = file_inode(f.file);
> +	is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode);
> +	if (is_device && !vfs_masked_device_ioctl(cmd)) {
> +		error = vfs_ioctl(f.file, cmd, arg);
> +		goto out;
> +	}
> +
>  	error = do_vfs_ioctl(f.file, fd, cmd, arg);
> -	if (error == -ENOIOCTLCMD)
> +	if (error == -ENOIOCTLCMD) {
> +		WARN_ON_ONCE(is_device);
>  		error = vfs_ioctl(f.file, cmd, arg);
> +	}

It is not obvious at first that adding this list requires a change to the ioctl
syscall implementations.  If I understand this right, the idea is that you want
to be 100% sure that we are not calling vfs_ioctl() for the commands in that
list.  And there is a scenario where this could potentially happen:

do_vfs_ioctl() implements most things like this:

static int do_vfs_ioctl(...) {
	switch (cmd) {
	/* many cases like the following: */
	case FITHAW:
		return ioctl_fsthaw(filp);
	/* ... */
	}
	return -ENOIOCTLCMD;
}

So I believe the scenario you want to avoid is the one where ioctl_fsthaw() or
one of the other functions return -ENOIOCTLCMD by accident, and where that will
then make the surrounding syscall implementation fall back to vfs_ioctl()
despite the cmd being listed as safe for Landlock?  Is that right?

Looking at do_vfs_ioctl() and its helper functions, I am getting the impression
that -ENOIOCTLCMD is only supposed to be returned at the very end of it, but not
by any of the helper functions?  If that were the case, we could maybe just as
well just solve that problem local to do_vfs_ioctl()?

A bit inelegant maybe, but just to get the idea across:

static int sanitize_enoioctlcmd(int res) {
	if (res == -ENOIOCTLCMD)
		return ENOTTY;
	return res;
}

static int do_vfs_ioctl(...) {
	switch (cmd) {
	/* many cases like the following: */
	case FITHAW:
		return sanitize_enoioctlcmd(ioctl_fsthaw(filp));
	/* ... */
	}
	return -ENOIOCTLCMD;
}

Would that be better?

—Günther
Mickaël Salaün March 6, 2024, 1:47 p.m. UTC | #5
On Tue, Mar 05, 2024 at 07:13:33PM +0100, Günther Noack wrote:
> Hello!
> 
> More questions than answers in this code review, but maybe this discusison will
> help to get a clearer picture about what we are going for here.
> 
> On Mon, Feb 19, 2024 at 07:35:39PM +0100, Mickaël Salaün wrote:
> > vfs_masks_device_ioctl() and vfs_masks_device_ioctl_compat() are useful
> > to differenciate between device driver IOCTL implementations and
> > filesystem ones.  The goal is to be able to filter well-defined IOCTLs
> > from per-device (i.e. namespaced) IOCTLs and control such access.
> > 
> > Add a new ioctl_compat() helper, similar to vfs_ioctl(), to wrap
> > compat_ioctl() calls and handle error conversions.
> > 
> > Cc: Arnd Bergmann <arnd@arndb.de>
> > Cc: Christian Brauner <brauner@kernel.org>
> > Cc: Günther Noack <gnoack@google.com>
> > ---
> >  fs/ioctl.c         | 101 +++++++++++++++++++++++++++++++++++++++++----
> >  include/linux/fs.h |  12 ++++++
> >  2 files changed, 105 insertions(+), 8 deletions(-)
> > 
> > diff --git a/fs/ioctl.c b/fs/ioctl.c
> > index 76cf22ac97d7..f72c8da47d21 100644
> > --- a/fs/ioctl.c
> > +++ b/fs/ioctl.c
> > @@ -763,6 +763,38 @@ static int ioctl_fssetxattr(struct file *file, void __user *argp)
> >  	return err;
> >  }
> >  
> > +/*
> > + * Safeguard to maintain a list of valid IOCTLs handled by do_vfs_ioctl()
> > + * instead of def_blk_fops or def_chr_fops (see init_special_inode).
> > + */
> > +__attribute_const__ bool vfs_masked_device_ioctl(const unsigned int cmd)
> > +{
> > +	switch (cmd) {
> > +	case FIOCLEX:
> > +	case FIONCLEX:
> > +	case FIONBIO:
> > +	case FIOASYNC:
> > +	case FIOQSIZE:
> > +	case FIFREEZE:
> > +	case FITHAW:
> > +	case FS_IOC_FIEMAP:
> > +	case FIGETBSZ:
> > +	case FICLONE:
> > +	case FICLONERANGE:
> > +	case FIDEDUPERANGE:
> > +	/* FIONREAD is forwarded to device implementations. */
> > +	case FS_IOC_GETFLAGS:
> > +	case FS_IOC_SETFLAGS:
> > +	case FS_IOC_FSGETXATTR:
> > +	case FS_IOC_FSSETXATTR:
> > +	/* file_ioctl()'s IOCTLs are forwarded to device implementations. */
> > +		return true;
> > +	default:
> > +		return false;
> > +	}
> > +}
> > +EXPORT_SYMBOL(vfs_masked_device_ioctl);
> 
> [
> Technical implementation notes about this function: the list of IOCTLs here are
> the same ones which do_vfs_ioctl() implements directly.
> 
> There are only two cases in which do_vfs_ioctl() does more complicated handling:
> 
> (1) FIONREAD falls back to the device's ioctl implemenetation.
>     Therefore, we omit FIONREAD in our own list - we do not want to allow that.
> (2) The default case falls back to the file_ioctl() function, but *only* for
>     S_ISREG() files, so it does not matter for the Landlock case.
> ]
> 
> 
> ## What we are actually trying to do (?)
> 
> Let me try to take a step back and paraphrase what I think we are *actually*
> trying to do here -- please correct me if I am wrong about that:
> 
> I think what we *really* are trying to do is to control from the Landlock LSM
> whether the filp->f_op->unlocked_ioctl() or filp->f_op->ioctl_compat()
> operations are getting called for device files.
> 
> So in a world where we cared only about correctness, we could create a new LSM
> hook security_file_vfs_ioctl(), which gets checked just before these two f_op
> operations get called.  With that, we could permit all IOCTLs that are
> implemented in fs/ioctl.c, and we could deny all IOCTL commands that are
> implemented in the device implementation.
> 
> I guess the reasons why we are not using that approach are performance, and that
> it might mess up the LSM hook interface with special cases that only Landlcok
> needs?  But it seems like it would be easier to reason about..?  Or maybe we can
> find a middle ground, where we have the existing hook return a special value
> with the meaning "permit this IOCTL, but do not invoke the f_op hook"?

Your security_file_vfs_ioctl() approach is simpler and better, I like
it!  From a performance point of view it should not change much because
either an LSM would use the current IOCTL hook or this new one.  Using a
flag with the current IOCTL hook would be a missed opportunity for
performance improvements because this hook could be called even if it is
not needed.

I don't think it would be worth it to create a new hook for compat and
non-compat mode because we want to control these IOCTLs the same way for
now, so it would not have a performance impact, but for consistency with
the current IOCTL hooks I guess Paul would prefer two new hooks:
security_file_vfs_ioctl() and security_file_vfs_ioctl_compat()?

Another approach would be to split the IOCTL hook into two: one for the
VFS layer and another for the underlying implementations.  However, it
looks like a difficult and brittle approach according to the current
IOCTL implementations.

Arnd, Christian, Paul, are you OK with this new hook proposal?

> 
> 
> ## What we implemented
> 
> Of course, the existing security_file_ioctl LSM hook works differently, and so
> with that hook, we need to make our blocking decision purely based on the struct
> file*, the IOCTL command number and the IOCTL argument.
> 
> So in order to make that decision correctly based on that information, we end up
> listing all the IOCTLs which are directly(!) implemented in do_vfs_ioctl(),
> because for Landlock, this is the list of IOCTL commands which is safe to permit
> on device files.  And we need to keep that list in sync with fs/ioctl.c, which
> is why it ended up in the same place in this commit.
> 
> 
> (Is it maybe possible to check with a KUnit test whether such lists are in sync?
> It sounds superficially like it should be feasible to create a device file which
> records whether its ioctl implementation was called.  So we could at least check
> that the Landlock command list is a subset of the do_vfs_ioctl() one.)
> 
> 
> > +
> >  /*
> >   * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
> >   * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
> > @@ -858,6 +890,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
> >  {
> >  	struct fd f = fdget(fd);
> >  	int error;
> > +	const struct inode *inode;
> > +	bool is_device;
> >  
> >  	if (!f.file)
> >  		return -EBADF;
> > @@ -866,9 +900,18 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
> >  	if (error)
> >  		goto out;
> >  
> > +	inode = file_inode(f.file);
> > +	is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode);
> > +	if (is_device && !vfs_masked_device_ioctl(cmd)) {
> > +		error = vfs_ioctl(f.file, cmd, arg);
> > +		goto out;
> > +	}
> > +
> >  	error = do_vfs_ioctl(f.file, fd, cmd, arg);
> > -	if (error == -ENOIOCTLCMD)
> > +	if (error == -ENOIOCTLCMD) {
> > +		WARN_ON_ONCE(is_device);
> >  		error = vfs_ioctl(f.file, cmd, arg);
> > +	}
> 
> It is not obvious at first that adding this list requires a change to the ioctl
> syscall implementations.  If I understand this right, the idea is that you want
> to be 100% sure that we are not calling vfs_ioctl() for the commands in that
> list.

Correct

> And there is a scenario where this could potentially happen:
> 
> do_vfs_ioctl() implements most things like this:
> 
> static int do_vfs_ioctl(...) {
> 	switch (cmd) {
> 	/* many cases like the following: */
> 	case FITHAW:
> 		return ioctl_fsthaw(filp);
> 	/* ... */
> 	}
> 	return -ENOIOCTLCMD;
> }
> 
> So I believe the scenario you want to avoid is the one where ioctl_fsthaw() or
> one of the other functions return -ENOIOCTLCMD by accident, and where that will
> then make the surrounding syscall implementation fall back to vfs_ioctl()
> despite the cmd being listed as safe for Landlock?  Is that right?

Yes

> 
> Looking at do_vfs_ioctl() and its helper functions, I am getting the impression
> that -ENOIOCTLCMD is only supposed to be returned at the very end of it, but not
> by any of the helper functions?  If that were the case, we could maybe just as
> well just solve that problem local to do_vfs_ioctl()?
> 
> A bit inelegant maybe, but just to get the idea across:
> 
> static int sanitize_enoioctlcmd(int res) {
> 	if (res == -ENOIOCTLCMD)
> 		return ENOTTY;
> 	return res;
> }
> 
> static int do_vfs_ioctl(...) {
> 	switch (cmd) {
> 	/* many cases like the following: */
> 	case FITHAW:
> 		return sanitize_enoioctlcmd(ioctl_fsthaw(filp));
> 	/* ... */
> 	}
> 	return -ENOIOCTLCMD;
> }
> 
> Would that be better?

I guess so, but a bit more intrusive. Anyway, the new LSM hook would be
much cleaner and would require less intrusive changes in fs/ioctl.c

The ioctl_compat() helper from this patch could still be useful though.

> 
> —Günther
> 
>
Arnd Bergmann March 6, 2024, 3:18 p.m. UTC | #6
On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote:
> On Tue, Mar 05, 2024 at 07:13:33PM +0100, Günther Noack wrote:
>> On Mon, Feb 19, 2024 at 07:35:39PM +0100, Mickaël Salaün wrote:

>> > +	case FS_IOC_FSGETXATTR:
>> > +	case FS_IOC_FSSETXATTR:
>> > +	/* file_ioctl()'s IOCTLs are forwarded to device implementations. */
>> > +		return true;
>> > +	default:
>> > +		return false;
>> > +	}
>> > +}
>> > +EXPORT_SYMBOL(vfs_masked_device_ioctl);
>> 
>> [
>> Technical implementation notes about this function: the list of IOCTLs here are
>> the same ones which do_vfs_ioctl() implements directly.
>> 
>> There are only two cases in which do_vfs_ioctl() does more complicated handling:
>> 
>> (1) FIONREAD falls back to the device's ioctl implemenetation.
>>     Therefore, we omit FIONREAD in our own list - we do not want to allow that.

>> (2) The default case falls back to the file_ioctl() function, but *only* for
>>     S_ISREG() files, so it does not matter for the Landlock case.

How about changing do_vfs_ioctl() to return -ENOIOCTLCMD for
FIONREAD on special files? That way, the two cases become the
same.

>> I guess the reasons why we are not using that approach are performance, and that
>> it might mess up the LSM hook interface with special cases that only Landlcok
>> needs?  But it seems like it would be easier to reason about..?  Or maybe we can
>> find a middle ground, where we have the existing hook return a special value
>> with the meaning "permit this IOCTL, but do not invoke the f_op hook"?
>
> Your security_file_vfs_ioctl() approach is simpler and better, I like
> it!  From a performance point of view it should not change much because
> either an LSM would use the current IOCTL hook or this new one.  Using a
> flag with the current IOCTL hook would be a missed opportunity for
> performance improvements because this hook could be called even if it is
> not needed.
>
> I don't think it would be worth it to create a new hook for compat and
> non-compat mode because we want to control these IOCTLs the same way for
> now, so it would not have a performance impact, but for consistency with
> the current IOCTL hooks I guess Paul would prefer two new hooks:
> security_file_vfs_ioctl() and security_file_vfs_ioctl_compat()?
>
> Another approach would be to split the IOCTL hook into two: one for the
> VFS layer and another for the underlying implementations.  However, it
> looks like a difficult and brittle approach according to the current
> IOCTL implementations.
>
> Arnd, Christian, Paul, are you OK with this new hook proposal?

I think this sounds better. It would fit more closely into
the overall structure of the ioctl handlers with their multiple
levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl,
you have the same structure for sockets and blockdev, and
then additional levels below that and some weirdness for
things like tty, scsi or cdrom.

>> And there is a scenario where this could potentially happen:
>> 
>> do_vfs_ioctl() implements most things like this:
>> 
>> static int do_vfs_ioctl(...) {
>> 	switch (cmd) {
>> 	/* many cases like the following: */
>> 	case FITHAW:
>> 		return ioctl_fsthaw(filp);
>> 	/* ... */
>> 	}
>> 	return -ENOIOCTLCMD;
>> }
>> 
>> So I believe the scenario you want to avoid is the one where ioctl_fsthaw() or
>> one of the other functions return -ENOIOCTLCMD by accident, and where that will
>> then make the surrounding syscall implementation fall back to vfs_ioctl()
>> despite the cmd being listed as safe for Landlock?  Is that right?
>
> Yes

This does go against the normal structure a bit then, where
any of the commands is allowed to return -ENOIOCTLCMD specifically
for the purpose of passing control to the next level of
callbacks. Having the landlock hook explicitly at the
place where the callback is entered, as Günther suggested makes
much more sense to me then.

      Arnd
Christian Brauner March 7, 2024, 12:15 p.m. UTC | #7
On Wed, Mar 06, 2024 at 04:18:53PM +0100, Arnd Bergmann wrote:
> On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote:
> > On Tue, Mar 05, 2024 at 07:13:33PM +0100, Günther Noack wrote:
> >> On Mon, Feb 19, 2024 at 07:35:39PM +0100, Mickaël Salaün wrote:
> 
> >> > +	case FS_IOC_FSGETXATTR:
> >> > +	case FS_IOC_FSSETXATTR:
> >> > +	/* file_ioctl()'s IOCTLs are forwarded to device implementations. */
> >> > +		return true;
> >> > +	default:
> >> > +		return false;
> >> > +	}
> >> > +}
> >> > +EXPORT_SYMBOL(vfs_masked_device_ioctl);
> >> 
> >> [
> >> Technical implementation notes about this function: the list of IOCTLs here are
> >> the same ones which do_vfs_ioctl() implements directly.
> >> 
> >> There are only two cases in which do_vfs_ioctl() does more complicated handling:
> >> 
> >> (1) FIONREAD falls back to the device's ioctl implemenetation.
> >>     Therefore, we omit FIONREAD in our own list - we do not want to allow that.
> 
> >> (2) The default case falls back to the file_ioctl() function, but *only* for
> >>     S_ISREG() files, so it does not matter for the Landlock case.
> 
> How about changing do_vfs_ioctl() to return -ENOIOCTLCMD for
> FIONREAD on special files? That way, the two cases become the
> same.
> 
> >> I guess the reasons why we are not using that approach are performance, and that
> >> it might mess up the LSM hook interface with special cases that only Landlcok
> >> needs?  But it seems like it would be easier to reason about..?  Or maybe we can
> >> find a middle ground, where we have the existing hook return a special value
> >> with the meaning "permit this IOCTL, but do not invoke the f_op hook"?
> >
> > Your security_file_vfs_ioctl() approach is simpler and better, I like
> > it!  From a performance point of view it should not change much because
> > either an LSM would use the current IOCTL hook or this new one.  Using a
> > flag with the current IOCTL hook would be a missed opportunity for
> > performance improvements because this hook could be called even if it is
> > not needed.
> >
> > I don't think it would be worth it to create a new hook for compat and
> > non-compat mode because we want to control these IOCTLs the same way for
> > now, so it would not have a performance impact, but for consistency with
> > the current IOCTL hooks I guess Paul would prefer two new hooks:
> > security_file_vfs_ioctl() and security_file_vfs_ioctl_compat()?
> >
> > Another approach would be to split the IOCTL hook into two: one for the
> > VFS layer and another for the underlying implementations.  However, it
> > looks like a difficult and brittle approach according to the current
> > IOCTL implementations.
> >
> > Arnd, Christian, Paul, are you OK with this new hook proposal?
> 
> I think this sounds better. It would fit more closely into
> the overall structure of the ioctl handlers with their multiple
> levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl,
> you have the same structure for sockets and blockdev, and
> then additional levels below that and some weirdness for
> things like tty, scsi or cdrom.

So an additional security hook called from tty, scsi, or cdrom?
And the original hook is left where it is right now?
Arnd Bergmann March 7, 2024, 12:21 p.m. UTC | #8
On Thu, Mar 7, 2024, at 13:15, Christian Brauner wrote:
> On Wed, Mar 06, 2024 at 04:18:53PM +0100, Arnd Bergmann wrote:
>> On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote:
>> >
>> > Arnd, Christian, Paul, are you OK with this new hook proposal?
>> 
>> I think this sounds better. It would fit more closely into
>> the overall structure of the ioctl handlers with their multiple
>> levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl,
>> you have the same structure for sockets and blockdev, and
>> then additional levels below that and some weirdness for
>> things like tty, scsi or cdrom.
>
> So an additional security hook called from tty, scsi, or cdrom?
> And the original hook is left where it is right now?

For the moment, I think adding another hook in vfs_ioctl()
and the corresponding compat path would do what Mickaël
wants. Beyond that, we could consider having hooks in
socket and block ioctls if needed as they are easy to
filter out based on inode->i_mode.

The tty/scsi/cdrom hooks would be harder to do, let's assume
for now that we don't need them.

      Arnd
Günther Noack March 7, 2024, 12:57 p.m. UTC | #9
On Thu, Mar 07, 2024 at 01:21:48PM +0100, Arnd Bergmann wrote:
> On Thu, Mar 7, 2024, at 13:15, Christian Brauner wrote:
> > On Wed, Mar 06, 2024 at 04:18:53PM +0100, Arnd Bergmann wrote:
> >> On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote:
> >> >
> >> > Arnd, Christian, Paul, are you OK with this new hook proposal?
> >> 
> >> I think this sounds better. It would fit more closely into
> >> the overall structure of the ioctl handlers with their multiple
> >> levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl,
> >> you have the same structure for sockets and blockdev, and
> >> then additional levels below that and some weirdness for
> >> things like tty, scsi or cdrom.
> >
> > So an additional security hook called from tty, scsi, or cdrom?
> > And the original hook is left where it is right now?
> 
> For the moment, I think adding another hook in vfs_ioctl()
> and the corresponding compat path would do what Mickaël
> wants. Beyond that, we could consider having hooks in
> socket and block ioctls if needed as they are easy to
> filter out based on inode->i_mode.
> 
> The tty/scsi/cdrom hooks would be harder to do, let's assume
> for now that we don't need them.

Thank you all for the help!

Yes, tty/scsi/cdrom are just examples.  We do not need special features for
these for Landlock right now.

What I would do is to invoke the new LSM hook in the following two places in
fs/ioctl.c:

1) at the top of vfs_ioctl()
2) at the top of ioctl_compat()

(Both of these functions are just invoking the f_op->unlocked_ioctl() and
f_op->compat_ioctl() operations with a safeguard for that being a NULL pointer.)

The intent is that the new hook gets called everytime before an ioctl is sent to
these IOCTL operations in f_op, so that the LSM can distinguish cleanly between
the "safe" IOCTLs that are implemented fully within fs/ioctl.c and the
"potentially unsafe" IOCTLs which are implemented by these hooks (as it is
unrealistic for us to holistically reason about the safety of all possible
implementations).

The alternative approach where we try to do the same based on the existing LSM
IOCTL hook resulted in the patch further up in this mail thread - it involves
maintaining a list of "safe" IOCTL commands, and it is difficult to guarantee
that these lists of IOCTL commands stay in sync.

Christian, does that make sense in your mind?

Thanks,
—Günther
Paul Moore March 7, 2024, 8:40 p.m. UTC | #10
On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote:
> On Thu, Mar 07, 2024 at 01:21:48PM +0100, Arnd Bergmann wrote:
> > On Thu, Mar 7, 2024, at 13:15, Christian Brauner wrote:
> > > On Wed, Mar 06, 2024 at 04:18:53PM +0100, Arnd Bergmann wrote:
> > >> On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote:
> > >> >
> > >> > Arnd, Christian, Paul, are you OK with this new hook proposal?
> > >>
> > >> I think this sounds better. It would fit more closely into
> > >> the overall structure of the ioctl handlers with their multiple
> > >> levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl,
> > >> you have the same structure for sockets and blockdev, and
> > >> then additional levels below that and some weirdness for
> > >> things like tty, scsi or cdrom.
> > >
> > > So an additional security hook called from tty, scsi, or cdrom?
> > > And the original hook is left where it is right now?
> >
> > For the moment, I think adding another hook in vfs_ioctl()
> > and the corresponding compat path would do what Mickaël
> > wants. Beyond that, we could consider having hooks in
> > socket and block ioctls if needed as they are easy to
> > filter out based on inode->i_mode.
> >
> > The tty/scsi/cdrom hooks would be harder to do, let's assume
> > for now that we don't need them.
>
> Thank you all for the help!
>
> Yes, tty/scsi/cdrom are just examples.  We do not need special features for
> these for Landlock right now.
>
> What I would do is to invoke the new LSM hook in the following two places in
> fs/ioctl.c:
>
> 1) at the top of vfs_ioctl()
> 2) at the top of ioctl_compat()
>
> (Both of these functions are just invoking the f_op->unlocked_ioctl() and
> f_op->compat_ioctl() operations with a safeguard for that being a NULL pointer.)
>
> The intent is that the new hook gets called everytime before an ioctl is sent to
> these IOCTL operations in f_op, so that the LSM can distinguish cleanly between
> the "safe" IOCTLs that are implemented fully within fs/ioctl.c and the
> "potentially unsafe" IOCTLs which are implemented by these hooks (as it is
> unrealistic for us to holistically reason about the safety of all possible
> implementations).
>
> The alternative approach where we try to do the same based on the existing LSM
> IOCTL hook resulted in the patch further up in this mail thread - it involves
> maintaining a list of "safe" IOCTL commands, and it is difficult to guarantee
> that these lists of IOCTL commands stay in sync.

I need some more convincing as to why we need to introduce these new
hooks, or even the vfs_masked_device_ioctl() classifier as originally
proposed at the top of this thread.  I believe I understand why
Landlock wants this, but I worry that we all might have different
definitions of a "safe" ioctl list, and encoding a definition into the
LSM hooks seems like a bad idea to me.

At this point in time, I think I'd rather see LSMs that care about
ioctls maintaining their own list of "safe" ioctls and after a while
if it looks like everyone is in agreement (VFS folks, individual LSMs,
etc.) we can look into either an ioctl classifier or multiple LSM
ioctl hooks focused on different categories of ioctls.
Dave Chinner March 7, 2024, 11:09 p.m. UTC | #11
On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote:
> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote:
> > On Thu, Mar 07, 2024 at 01:21:48PM +0100, Arnd Bergmann wrote:
> > > On Thu, Mar 7, 2024, at 13:15, Christian Brauner wrote:
> > > > On Wed, Mar 06, 2024 at 04:18:53PM +0100, Arnd Bergmann wrote:
> > > >> On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote:
> > > >> >
> > > >> > Arnd, Christian, Paul, are you OK with this new hook proposal?
> > > >>
> > > >> I think this sounds better. It would fit more closely into
> > > >> the overall structure of the ioctl handlers with their multiple
> > > >> levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl,
> > > >> you have the same structure for sockets and blockdev, and
> > > >> then additional levels below that and some weirdness for
> > > >> things like tty, scsi or cdrom.
> > > >
> > > > So an additional security hook called from tty, scsi, or cdrom?
> > > > And the original hook is left where it is right now?
> > >
> > > For the moment, I think adding another hook in vfs_ioctl()
> > > and the corresponding compat path would do what Mickaël
> > > wants. Beyond that, we could consider having hooks in
> > > socket and block ioctls if needed as they are easy to
> > > filter out based on inode->i_mode.
> > >
> > > The tty/scsi/cdrom hooks would be harder to do, let's assume
> > > for now that we don't need them.
> >
> > Thank you all for the help!
> >
> > Yes, tty/scsi/cdrom are just examples.  We do not need special features for
> > these for Landlock right now.
> >
> > What I would do is to invoke the new LSM hook in the following two places in
> > fs/ioctl.c:
> >
> > 1) at the top of vfs_ioctl()
> > 2) at the top of ioctl_compat()
> >
> > (Both of these functions are just invoking the f_op->unlocked_ioctl() and
> > f_op->compat_ioctl() operations with a safeguard for that being a NULL pointer.)
> >
> > The intent is that the new hook gets called everytime before an ioctl is sent to
> > these IOCTL operations in f_op, so that the LSM can distinguish cleanly between
> > the "safe" IOCTLs that are implemented fully within fs/ioctl.c and the
> > "potentially unsafe" IOCTLs which are implemented by these hooks (as it is
> > unrealistic for us to holistically reason about the safety of all possible
> > implementations).
> >
> > The alternative approach where we try to do the same based on the existing LSM
> > IOCTL hook resulted in the patch further up in this mail thread - it involves
> > maintaining a list of "safe" IOCTL commands, and it is difficult to guarantee
> > that these lists of IOCTL commands stay in sync.
> 
> I need some more convincing as to why we need to introduce these new
> hooks, or even the vfs_masked_device_ioctl() classifier as originally
> proposed at the top of this thread.  I believe I understand why
> Landlock wants this, but I worry that we all might have different
> definitions of a "safe" ioctl list, and encoding a definition into the
> LSM hooks seems like a bad idea to me.

I have no idea what a "safe" ioctl means here. Subsystems already
restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
"safe" clearly means something different here.

> At this point in time, I think I'd rather see LSMs that care about
> ioctls maintaining their own list of "safe" ioctls and after a while
> if it looks like everyone is in agreement (VFS folks, individual LSMs,
> etc.) we can look into either an ioctl classifier or multiple LSM
> ioctl hooks focused on different categories of ioctls.

From the perspective of a VFS and subsystem developer, I really have
no clue what would make a "safe" ioctl from a LSM perspective, and I
very much doubt an LSM developer has any clue whether deep, dark
subsystem ioctls are "safe" to allow, or even what would stop
working if they decided something was not "safe".

This just seems like a complex recipe for creating unusable and/or
impossible to configure/secure systems to me.

-Dave.
Paul Moore March 7, 2024, 11:35 p.m. UTC | #12
On Thu, Mar 7, 2024 at 6:09 PM Dave Chinner <david@fromorbit.com> wrote:
> On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote:
> > On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote:
> > > On Thu, Mar 07, 2024 at 01:21:48PM +0100, Arnd Bergmann wrote:
> > > > On Thu, Mar 7, 2024, at 13:15, Christian Brauner wrote:
> > > > > On Wed, Mar 06, 2024 at 04:18:53PM +0100, Arnd Bergmann wrote:
> > > > >> On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote:
> > > > >> >
> > > > >> > Arnd, Christian, Paul, are you OK with this new hook proposal?
> > > > >>
> > > > >> I think this sounds better. It would fit more closely into
> > > > >> the overall structure of the ioctl handlers with their multiple
> > > > >> levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl,
> > > > >> you have the same structure for sockets and blockdev, and
> > > > >> then additional levels below that and some weirdness for
> > > > >> things like tty, scsi or cdrom.
> > > > >
> > > > > So an additional security hook called from tty, scsi, or cdrom?
> > > > > And the original hook is left where it is right now?
> > > >
> > > > For the moment, I think adding another hook in vfs_ioctl()
> > > > and the corresponding compat path would do what Mickaël
> > > > wants. Beyond that, we could consider having hooks in
> > > > socket and block ioctls if needed as they are easy to
> > > > filter out based on inode->i_mode.
> > > >
> > > > The tty/scsi/cdrom hooks would be harder to do, let's assume
> > > > for now that we don't need them.
> > >
> > > Thank you all for the help!
> > >
> > > Yes, tty/scsi/cdrom are just examples.  We do not need special features for
> > > these for Landlock right now.
> > >
> > > What I would do is to invoke the new LSM hook in the following two places in
> > > fs/ioctl.c:
> > >
> > > 1) at the top of vfs_ioctl()
> > > 2) at the top of ioctl_compat()
> > >
> > > (Both of these functions are just invoking the f_op->unlocked_ioctl() and
> > > f_op->compat_ioctl() operations with a safeguard for that being a NULL pointer.)
> > >
> > > The intent is that the new hook gets called everytime before an ioctl is sent to
> > > these IOCTL operations in f_op, so that the LSM can distinguish cleanly between
> > > the "safe" IOCTLs that are implemented fully within fs/ioctl.c and the
> > > "potentially unsafe" IOCTLs which are implemented by these hooks (as it is
> > > unrealistic for us to holistically reason about the safety of all possible
> > > implementations).
> > >
> > > The alternative approach where we try to do the same based on the existing LSM
> > > IOCTL hook resulted in the patch further up in this mail thread - it involves
> > > maintaining a list of "safe" IOCTL commands, and it is difficult to guarantee
> > > that these lists of IOCTL commands stay in sync.
> >
> > I need some more convincing as to why we need to introduce these new
> > hooks, or even the vfs_masked_device_ioctl() classifier as originally
> > proposed at the top of this thread.  I believe I understand why
> > Landlock wants this, but I worry that we all might have different
> > definitions of a "safe" ioctl list, and encoding a definition into the
> > LSM hooks seems like a bad idea to me.
>
> I have no idea what a "safe" ioctl means here. Subsystems already
> restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
> "safe" clearly means something different here.

That's the point I was trying to make.  I'm not sure exactly what
Günther meant either (I was simply copying his idea of a "safe" ioctl,
complete with all of the associations around the double quotes), which
helps underscore the idea that different groups are likely to have
different ideas of what ioctls they want to allow based on their
security model, environment, etc.

> > At this point in time, I think I'd rather see LSMs that care about
> > ioctls maintaining their own list of "safe" ioctls and after a while
> > if it looks like everyone is in agreement (VFS folks, individual LSMs,
> > etc.) we can look into either an ioctl classifier or multiple LSM
> > ioctl hooks focused on different categories of ioctls.
>
> From the perspective of a VFS and subsystem developer, I really have
> no clue what would make a "safe" ioctl from a LSM perspective ...

We also need to keep in mind that we have multiple LSM implementations
and we need to support different ideas around how to control access to
ioctls, including which ioctls are "safe" for multiple definitions of
the word.

> ... and I
> very much doubt an LSM developer has any clue whether deep, dark
> subsystem ioctls are "safe" to allow, or even what would stop
> working if they decided something was not "safe".

... or for those LSMs with configurable security policies, which
ioctls are allowed by the LSM's policy developer to fit their
particular needs.

> This just seems like a complex recipe for creating unusable and/or
> impossible to configure/secure systems to me.

FWIW, Android has been using the existing LSM ioctl controls for
several years now to help increase the security of Android devices.

https://security.googleblog.com/2016/07/protecting-android-with-more-linux.html
https://kernsec.org/files/lss2015/vanderstoep.pdf
Arnd Bergmann March 8, 2024, 7:02 a.m. UTC | #13
On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote:
> On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote:
>> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote:
>> I need some more convincing as to why we need to introduce these new
>> hooks, or even the vfs_masked_device_ioctl() classifier as originally
>> proposed at the top of this thread.  I believe I understand why
>> Landlock wants this, but I worry that we all might have different
>> definitions of a "safe" ioctl list, and encoding a definition into the
>> LSM hooks seems like a bad idea to me.
>
> I have no idea what a "safe" ioctl means here. Subsystems already
> restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
> "safe" clearly means something different here.

That was my problem with the first version as well, but I think
drawing the line between "implemented in fs/ioctl.c" and
"implemented in a random device driver fops->unlock_ioctl()"
seems like a more helpful definition.

This won't just protect from calling into drivers that are lacking
a CAP_SYS_ADMIN check, but also from those that end up being
harmful regardless of the ioctl command code passed into them
because of stupid driver bugs.

      Arnd
Mickaël Salaün March 8, 2024, 9:29 a.m. UTC | #14
On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote:
> On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote:
> > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote:
> >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote:
> >> I need some more convincing as to why we need to introduce these new
> >> hooks, or even the vfs_masked_device_ioctl() classifier as originally
> >> proposed at the top of this thread.  I believe I understand why
> >> Landlock wants this, but I worry that we all might have different
> >> definitions of a "safe" ioctl list, and encoding a definition into the
> >> LSM hooks seems like a bad idea to me.
> >
> > I have no idea what a "safe" ioctl means here. Subsystems already
> > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
> > "safe" clearly means something different here.
> 
> That was my problem with the first version as well, but I think
> drawing the line between "implemented in fs/ioctl.c" and
> "implemented in a random device driver fops->unlock_ioctl()"
> seems like a more helpful definition.
> 
> This won't just protect from calling into drivers that are lacking
> a CAP_SYS_ADMIN check, but also from those that end up being
> harmful regardless of the ioctl command code passed into them
> because of stupid driver bugs.

Indeed.

"safe" is definitely not the right word, it is too broad, relative to
use cases and threat models.  There is no "safe" IOCTL.

Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock
sandbox".

Our assumptions are (in the context of Landlock):

1. There are IOCTLs tied to file types (e.g. block device with
   major/minor) that can easily be identified from user space (e.g. with
   the path name and file's metadata).  /dev/* files make sense for user
   space and they scope to a specific use case (with relative
   privileges).  This category of IOCTLs is implemented in standalone
   device drivers (for most of them).

2. Most user space processes should not be denied access to IOCTLs that
   are managed by the VFS layer or the underlying filesystem
   implementations.  For instance, the do_vfs_ioctl()'s ones (e.g.
   FIOCLEX, FIONREAD) should always be allowed because they may be
   required to legitimately use files, and for performance and security
   reasons (e.g. fs-crypt, fsverity implemented at the filesystem layer).
   Moreover, these IOCTLs should already check the read/write permission
   (on the related FD), which is not the case for most block/char device
   IOCTL.

3. IOCTLs to pipes and sockets are out of scope.  They should always be
   allowed for now because they don't directly expose files' data but
   IPCs instead, and we are focusing on FS access rights for now.

We want to add a new LANDLOCK_ACCESS_FS_IOCTL_DEV right that could match
on char/block device's specific IOCTLs, but it would not have any impact
on other IOCTLs which would then always be allowed (if the sandboxed
process is allowed to open the file).

Because IOCTLs are implemented in layers and all IOCTLs commands live in
the same 32-bit namespace, we need a way to identify the layer
implemented by block and character devices.  The new LSM hook proposal
enables us to cleanly and efficiently identify the char/block device
IOCTL layer with an additional check on the file type.
Günther Noack March 8, 2024, 11:03 a.m. UTC | #15
On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote:
> On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote:
> > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote:
> >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote:
> >> I need some more convincing as to why we need to introduce these new
> >> hooks, or even the vfs_masked_device_ioctl() classifier as originally
> >> proposed at the top of this thread.  I believe I understand why
> >> Landlock wants this, but I worry that we all might have different
> >> definitions of a "safe" ioctl list, and encoding a definition into the
> >> LSM hooks seems like a bad idea to me.
> >
> > I have no idea what a "safe" ioctl means here. Subsystems already
> > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
> > "safe" clearly means something different here.
> 
> That was my problem with the first version as well, but I think
> drawing the line between "implemented in fs/ioctl.c" and
> "implemented in a random device driver fops->unlock_ioctl()"
> seems like a more helpful definition.

Yes, sorry for the confusion - that is exactly what I meant to say with "safe".:

Those are the IOCTL commands implemented in fs/ioctl.c which do not go through
f_ops->unlocked_ioctl (or the compat equivalent).

We want to give people a way with Landlock so that they can restrict the use of
device-driver implemented IOCTLs, but where they can keep using the bulk of
more harmless IOCTLs in fs/ioctl.c.

> This won't just protect from calling into drivers that are lacking
> a CAP_SYS_ADMIN check, but also from those that end up being
> harmful regardless of the ioctl command code passed into them
> because of stupid driver bugs.

Exactly -- there is a surprising number of f_ops->unlocked_ioctl implementations
that already run various resource management and locking logic before even
looking at the command number.  That means that even the command numbers that
are not implemented there are executing code in the driver layer, before the
IOCTL returns with an error.

So the f_ops->unlocked_ioctl() invocation is in itself increasing the surface of
exposed functionality, even completely independent of the command number.  Which
makes the invocation of f_ops->unlocked_ioctl() a security boundary that we
would like to restrict.

—Günther
Paul Moore March 8, 2024, 7:22 p.m. UTC | #16
On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote:
> On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote:
> > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote:
> > > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote:
> > >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote:
> > >> I need some more convincing as to why we need to introduce these new
> > >> hooks, or even the vfs_masked_device_ioctl() classifier as originally
> > >> proposed at the top of this thread.  I believe I understand why
> > >> Landlock wants this, but I worry that we all might have different
> > >> definitions of a "safe" ioctl list, and encoding a definition into the
> > >> LSM hooks seems like a bad idea to me.
> > >
> > > I have no idea what a "safe" ioctl means here. Subsystems already
> > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
> > > "safe" clearly means something different here.
> >
> > That was my problem with the first version as well, but I think
> > drawing the line between "implemented in fs/ioctl.c" and
> > "implemented in a random device driver fops->unlock_ioctl()"
> > seems like a more helpful definition.
> >
> > This won't just protect from calling into drivers that are lacking
> > a CAP_SYS_ADMIN check, but also from those that end up being
> > harmful regardless of the ioctl command code passed into them
> > because of stupid driver bugs.
>
> Indeed.
>
> "safe" is definitely not the right word, it is too broad, relative to
> use cases and threat models.  There is no "safe" IOCTL.
>
> Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock
> sandbox".

Which is a problem from a LSM perspective as we want to avoid hooks
which are tightly bound to a single LSM or security model.  It's okay
if a new hook only has a single LSM implementation, but the hook's
definition should be such that it is reasonably generalized to support
multiple LSM/models.

> Our assumptions are (in the context of Landlock):
>
> 1. There are IOCTLs tied to file types (e.g. block device with
>    major/minor) that can easily be identified from user space (e.g. with
>    the path name and file's metadata).  /dev/* files make sense for user
>    space and they scope to a specific use case (with relative
>    privileges).  This category of IOCTLs is implemented in standalone
>    device drivers (for most of them).
>
> 2. Most user space processes should not be denied access to IOCTLs that
>    are managed by the VFS layer or the underlying filesystem
>    implementations.  For instance, the do_vfs_ioctl()'s ones (e.g.
>    FIOCLEX, FIONREAD) should always be allowed because they may be
>    required to legitimately use files, and for performance and security
>    reasons (e.g. fs-crypt, fsverity implemented at the filesystem layer).
>    Moreover, these IOCTLs should already check the read/write permission
>    (on the related FD), which is not the case for most block/char device
>    IOCTL.
>
> 3. IOCTLs to pipes and sockets are out of scope.  They should always be
>    allowed for now because they don't directly expose files' data but
>    IPCs instead, and we are focusing on FS access rights for now.
>
> We want to add a new LANDLOCK_ACCESS_FS_IOCTL_DEV right that could match
> on char/block device's specific IOCTLs, but it would not have any impact
> on other IOCTLs which would then always be allowed (if the sandboxed
> process is allowed to open the file).
>
> Because IOCTLs are implemented in layers and all IOCTLs commands live in
> the same 32-bit namespace, we need a way to identify the layer
> implemented by block and character devices.  The new LSM hook proposal
> enables us to cleanly and efficiently identify the char/block device
> IOCTL layer with an additional check on the file type.

I guess I should wait until there is an actual patch, but as of right
now a VFS ioctl specific LSM hook looks far too limited to me and
isn't something I can support at this point in time.  It's obviously
limited to only a subset of the ioctls, meaning that in order to have
comprehensive coverage we would either need to implement a full range
of subsystem ioctl hooks (ugh), or just use the existing
security_file_ioctl().  I understand that this makes things a bit more
complicated for Landlock's initial ioctl implementation, but
considering my thoughts above and the fact that Landlock's ioctl
protections are still evolving I'd rather not add a lot of extra hooks
right now.
Mickaël Salaün March 8, 2024, 8:12 p.m. UTC | #17
On Fri, Mar 08, 2024 at 02:22:58PM -0500, Paul Moore wrote:
> On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote:
> > On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote:
> > > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote:
> > > > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote:
> > > >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote:
> > > >> I need some more convincing as to why we need to introduce these new
> > > >> hooks, or even the vfs_masked_device_ioctl() classifier as originally
> > > >> proposed at the top of this thread.  I believe I understand why
> > > >> Landlock wants this, but I worry that we all might have different
> > > >> definitions of a "safe" ioctl list, and encoding a definition into the
> > > >> LSM hooks seems like a bad idea to me.
> > > >
> > > > I have no idea what a "safe" ioctl means here. Subsystems already
> > > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
> > > > "safe" clearly means something different here.
> > >
> > > That was my problem with the first version as well, but I think
> > > drawing the line between "implemented in fs/ioctl.c" and
> > > "implemented in a random device driver fops->unlock_ioctl()"
> > > seems like a more helpful definition.
> > >
> > > This won't just protect from calling into drivers that are lacking
> > > a CAP_SYS_ADMIN check, but also from those that end up being
> > > harmful regardless of the ioctl command code passed into them
> > > because of stupid driver bugs.
> >
> > Indeed.
> >
> > "safe" is definitely not the right word, it is too broad, relative to
> > use cases and threat models.  There is no "safe" IOCTL.
> >
> > Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock
> > sandbox".
> 
> Which is a problem from a LSM perspective as we want to avoid hooks
> which are tightly bound to a single LSM or security model.  It's okay
> if a new hook only has a single LSM implementation, but the hook's
> definition should be such that it is reasonably generalized to support
> multiple LSM/models.

As any new hook, there is a first user.  Obviously this new hook would
not be restricted to Landlock, it is a generic approach.  I'm pretty
sure a few hooks are only used by one LSM though. ;)

> 
> > Our assumptions are (in the context of Landlock):
> >
> > 1. There are IOCTLs tied to file types (e.g. block device with
> >    major/minor) that can easily be identified from user space (e.g. with
> >    the path name and file's metadata).  /dev/* files make sense for user
> >    space and they scope to a specific use case (with relative
> >    privileges).  This category of IOCTLs is implemented in standalone
> >    device drivers (for most of them).
> >
> > 2. Most user space processes should not be denied access to IOCTLs that
> >    are managed by the VFS layer or the underlying filesystem
> >    implementations.  For instance, the do_vfs_ioctl()'s ones (e.g.
> >    FIOCLEX, FIONREAD) should always be allowed because they may be
> >    required to legitimately use files, and for performance and security
> >    reasons (e.g. fs-crypt, fsverity implemented at the filesystem layer).
> >    Moreover, these IOCTLs should already check the read/write permission
> >    (on the related FD), which is not the case for most block/char device
> >    IOCTL.
> >
> > 3. IOCTLs to pipes and sockets are out of scope.  They should always be
> >    allowed for now because they don't directly expose files' data but
> >    IPCs instead, and we are focusing on FS access rights for now.
> >
> > We want to add a new LANDLOCK_ACCESS_FS_IOCTL_DEV right that could match
> > on char/block device's specific IOCTLs, but it would not have any impact
> > on other IOCTLs which would then always be allowed (if the sandboxed
> > process is allowed to open the file).
> >
> > Because IOCTLs are implemented in layers and all IOCTLs commands live in
> > the same 32-bit namespace, we need a way to identify the layer
> > implemented by block and character devices.  The new LSM hook proposal
> > enables us to cleanly and efficiently identify the char/block device
> > IOCTL layer with an additional check on the file type.
> 
> I guess I should wait until there is an actual patch, but as of right
> now a VFS ioctl specific LSM hook looks far too limited to me and
> isn't something I can support at this point in time.  It's obviously
> limited to only a subset of the ioctls, meaning that in order to have
> comprehensive coverage we would either need to implement a full range
> of subsystem ioctl hooks (ugh), or just use the existing
> security_file_ioctl().

I think there is a misunderstanding.  The subset of IOCTL commands the
new hook will see would be 99% of them (i.e. all except those
implemented in fs/ioctl.c).  Being able to only handle this (big) subset
would empower LSMs to control IOCTL commands without collision (e.g. the
same command/value may have different meanings according to the
implementation/layer), which is not currently possible (without manual
tweaking).

This proposal is to add a new hook for the layer just beneath the VFS
catch-all IOCTL implementation.  This layer can then differentiate
between the underlying implementation according to the file properties.
There is no need for additional hooks for other layers/subsystems.

The existing security_file_ioctl() hook is useful to catch all IOCTL
commands, but it doesn't enable to identify the underlying target and
then the semantic of the command.  Furthermore, as Günther said, an
IOCTL call can already do kernel operations without looking at the
command, but we would then be able to identify that by looking at the
char/block device file for instance.

> I understand that this makes things a bit more
> complicated for Landlock's initial ioctl implementation, but
> considering my thoughts above and the fact that Landlock's ioctl
> protections are still evolving I'd rather not add a lot of extra hooks
> right now.

Without this hook, we'll need to rely on a list of allowed IOCTLs, which
will be out-of-sync eventually.  It would be a maintenance burden and an
hacky approach.

We're definitely open to new proposals, but until now this is the best
approach we found from a maintenance, performance, and security point of
view.
Casey Schaufler March 8, 2024, 10:04 p.m. UTC | #18
On 3/8/2024 12:12 PM, Mickaël Salaün wrote:
> On Fri, Mar 08, 2024 at 02:22:58PM -0500, Paul Moore wrote:
>> On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote:
>>> On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote:
>>>> On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote:
>>>>> On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote:
>>>>>> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote:
>>>>>> I need some more convincing as to why we need to introduce these new
>>>>>> hooks, or even the vfs_masked_device_ioctl() classifier as originally
>>>>>> proposed at the top of this thread.  I believe I understand why
>>>>>> Landlock wants this, but I worry that we all might have different
>>>>>> definitions of a "safe" ioctl list, and encoding a definition into the
>>>>>> LSM hooks seems like a bad idea to me.
>>>>> I have no idea what a "safe" ioctl means here. Subsystems already
>>>>> restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
>>>>> "safe" clearly means something different here.
>>>> That was my problem with the first version as well, but I think
>>>> drawing the line between "implemented in fs/ioctl.c" and
>>>> "implemented in a random device driver fops->unlock_ioctl()"
>>>> seems like a more helpful definition.
>>>>
>>>> This won't just protect from calling into drivers that are lacking
>>>> a CAP_SYS_ADMIN check, but also from those that end up being
>>>> harmful regardless of the ioctl command code passed into them
>>>> because of stupid driver bugs.
>>> Indeed.
>>>
>>> "safe" is definitely not the right word, it is too broad, relative to
>>> use cases and threat models.  There is no "safe" IOCTL.
>>>
>>> Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock
>>> sandbox".
>> Which is a problem from a LSM perspective as we want to avoid hooks
>> which are tightly bound to a single LSM or security model.  It's okay
>> if a new hook only has a single LSM implementation, but the hook's
>> definition should be such that it is reasonably generalized to support
>> multiple LSM/models.

I've been watching this thread with some interest, as one of my side projects
has been an attempt to address the "CAP_SYS_ADMIN problem", and there looks
to be a lot of similarity between that and the "ioctl problem". In both cases
it comes down to a matter of:
	1. uniquely identifying the action
	2. providing the information to code that can act upon it
	3. providing "policy" to determine what to do about it

My thought for the CAP_SYS_ADMIN case was to provide a new LSM hook
security_sysadmin() that takes a single parameter which is the action ID.
I called the action ID a "chit", because it's short and all the good,
more descriptive words where taken. Calls to cap_able(CAP_SYS_ADMIN) could
be replaced by calls to security_sysadmin(chit). security_sysadmin() would
first call cap_able(CAP_SYSADMIN) and, if that succeeded, allow LSMs with
registered hooks (selinux_sysadmin() etc) the opportunity to disallow the
operation. I planned to include a small LSM (chits) that would allow the
operation only if the process had the chit on its chit list. Landlock could
add policy to deal with chits if so inclined.

A generalization of this scheme would be to leave the cap_able(CAP_SYS_ADMIN)
checks as they are and add an optional security_chit() hook for places where
additional enforcement information is desired. Adding

	security_chit(CHIT_IOCTL_TTY_SOMETHING)

to an ioctl would allow any LSM to make policy decisions about that ioctl
operation. Adding

	security_chit(CHIT_ERASE_TAPE_REGISTERS)

after a cap_able(CAP_SYS_ADMIN) could appease the driver writer who would
otherwise be begging for CAP_ERASE_TAPE_REGISTERS. My biggest concern with
this scheme is the management of chit values, which would have to be kept
in a uapi header.

A major advantage of this is that the security_chit() calls would only have
to be added where someone wants to take advantage of the mechanism. People
who are happy with CAP_SYS_ADMIN or ioctl as it is don't have to do anything,
and their code won't get churned for the new world order. The downside is the
potentially onerous process of deciding if an LSM cares about an action known
only by its number.

I have patches close to ready. The chit LSM isn't passing the laugh test quite
yet, so I'm holding it back for now. I wanted to bring this up before we go too
far down a more complicated path.
Paul Moore March 8, 2024, 10:25 p.m. UTC | #19
On Fri, Mar 8, 2024 at 3:12 PM Mickaël Salaün <mic@digikod.net> wrote:
> On Fri, Mar 08, 2024 at 02:22:58PM -0500, Paul Moore wrote:
> > On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote:
> > > On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote:
> > > > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote:
> > > > > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote:
> > > > >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote:
> > > > >> I need some more convincing as to why we need to introduce these new
> > > > >> hooks, or even the vfs_masked_device_ioctl() classifier as originally
> > > > >> proposed at the top of this thread.  I believe I understand why
> > > > >> Landlock wants this, but I worry that we all might have different
> > > > >> definitions of a "safe" ioctl list, and encoding a definition into the
> > > > >> LSM hooks seems like a bad idea to me.
> > > > >
> > > > > I have no idea what a "safe" ioctl means here. Subsystems already
> > > > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
> > > > > "safe" clearly means something different here.
> > > >
> > > > That was my problem with the first version as well, but I think
> > > > drawing the line between "implemented in fs/ioctl.c" and
> > > > "implemented in a random device driver fops->unlock_ioctl()"
> > > > seems like a more helpful definition.
> > > >
> > > > This won't just protect from calling into drivers that are lacking
> > > > a CAP_SYS_ADMIN check, but also from those that end up being
> > > > harmful regardless of the ioctl command code passed into them
> > > > because of stupid driver bugs.
> > >
> > > Indeed.
> > >
> > > "safe" is definitely not the right word, it is too broad, relative to
> > > use cases and threat models.  There is no "safe" IOCTL.
> > >
> > > Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock
> > > sandbox".
> >
> > Which is a problem from a LSM perspective as we want to avoid hooks
> > which are tightly bound to a single LSM or security model.  It's okay
> > if a new hook only has a single LSM implementation, but the hook's
> > definition should be such that it is reasonably generalized to support
> > multiple LSM/models.
>
> As any new hook, there is a first user.  Obviously this new hook would
> not be restricted to Landlock, it is a generic approach.  I'm pretty
> sure a few hooks are only used by one LSM though. ;)

Sure, as I said above, it's okay for there to only be a single LSM
implementation, but the basic idea behind the hook needs to have some
hope of being generic.  Your "let's redefine a safe ioctl as 'IOCTL
always allowed in a Landlock sandbox'" doesn't fill me with confidence
about the hook being generic; who knows, maybe it will be, but in the
absence of a patch, I'm left with descriptions like those.

> > > Our assumptions are (in the context of Landlock):
> > >
> > > 1. There are IOCTLs tied to file types (e.g. block device with
> > >    major/minor) that can easily be identified from user space (e.g. with
> > >    the path name and file's metadata).  /dev/* files make sense for user
> > >    space and they scope to a specific use case (with relative
> > >    privileges).  This category of IOCTLs is implemented in standalone
> > >    device drivers (for most of them).
> > >
> > > 2. Most user space processes should not be denied access to IOCTLs that
> > >    are managed by the VFS layer or the underlying filesystem
> > >    implementations.  For instance, the do_vfs_ioctl()'s ones (e.g.
> > >    FIOCLEX, FIONREAD) should always be allowed because they may be
> > >    required to legitimately use files, and for performance and security
> > >    reasons (e.g. fs-crypt, fsverity implemented at the filesystem layer).
> > >    Moreover, these IOCTLs should already check the read/write permission
> > >    (on the related FD), which is not the case for most block/char device
> > >    IOCTL.
> > >
> > > 3. IOCTLs to pipes and sockets are out of scope.  They should always be
> > >    allowed for now because they don't directly expose files' data but
> > >    IPCs instead, and we are focusing on FS access rights for now.
> > >
> > > We want to add a new LANDLOCK_ACCESS_FS_IOCTL_DEV right that could match
> > > on char/block device's specific IOCTLs, but it would not have any impact
> > > on other IOCTLs which would then always be allowed (if the sandboxed
> > > process is allowed to open the file).
> > >
> > > Because IOCTLs are implemented in layers and all IOCTLs commands live in
> > > the same 32-bit namespace, we need a way to identify the layer
> > > implemented by block and character devices.  The new LSM hook proposal
> > > enables us to cleanly and efficiently identify the char/block device
> > > IOCTL layer with an additional check on the file type.
> >
> > I guess I should wait until there is an actual patch, but as of right
> > now a VFS ioctl specific LSM hook looks far too limited to me and
> > isn't something I can support at this point in time.  It's obviously
> > limited to only a subset of the ioctls, meaning that in order to have
> > comprehensive coverage we would either need to implement a full range
> > of subsystem ioctl hooks (ugh), or just use the existing
> > security_file_ioctl().
>
> I think there is a misunderstanding.  The subset of IOCTL commands the
> new hook will see would be 99% of them (i.e. all except those
> implemented in fs/ioctl.c).

*cough* 99% != 100% *cough*

> Being able to only handle this (big) subset
> would empower LSMs to control IOCTL commands without collision (e.g. the
> same command/value may have different meanings according to the
> implementation/layer), which is not currently possible (without manual
> tweaking).
>
> This proposal is to add a new hook for the layer just beneath the VFS
> catch-all IOCTL implementation.  This layer can then differentiate
> between the underlying implementation according to the file properties.
> There is no need for additional hooks for other layers/subsystems.

I'm not sure how you reconcile less than 100% coverage, the need for a
generic hook, and the idea that there will not be a need for
additional hooks.  That still seems like a problem to me.

> The existing security_file_ioctl() hook is useful to catch all IOCTL
> commands, but it doesn't enable to identify the underlying target and
> then the semantic of the command.

The LSM hook gets the file pointer, the command, and the argument, how
is a LSM not able to identify the underlying target?

> Furthermore, as Günther said, an
> IOCTL call can already do kernel operations without looking at the
> command, but we would then be able to identify that by looking at the
> char/block device file for instance.
>
> > I understand that this makes things a bit more
> > complicated for Landlock's initial ioctl implementation, but
> > considering my thoughts above and the fact that Landlock's ioctl
> > protections are still evolving I'd rather not add a lot of extra hooks
> > right now.
>
> Without this hook, we'll need to rely on a list of allowed IOCTLs, which
> will be out-of-sync eventually.  It would be a maintenance burden and an
> hacky approach.

Welcome to the painful world of a LSM developer, ioctls are not the
only place where this is a problem, and it should be easy enough to
watch for changes in the ioctl list and update your favorite LSM
accordingly.  Honestly, I think that is kinda the right thing anyway,
I'm skeptical that one could have a generic solution that would
automatically allow or disallow a new ioctl without potentially
breaking your favorite LSM's security model.  If a new ioctl is
introduced it seems like having someone manually review it's impact on
your LSM would be a good idea.

> We're definitely open to new proposals, but until now this is the best
> approach we found from a maintenance, performance, and security point of
> view.

At this point it's probably a good idea to post another RFC patch with
your revised idea, if nothing else it will help rule out any
confusion.  While I remain skeptical, perhaps I am misunderstanding
the design and you'll get my apology and an ACK, but be warned that as
of right now I'm not convinced.

--
paul-moore.com
Günther Noack March 9, 2024, 8:14 a.m. UTC | #20
On Fri, Mar 08, 2024 at 05:25:21PM -0500, Paul Moore wrote:
> On Fri, Mar 8, 2024 at 3:12 PM Mickaël Salaün <mic@digikod.net> wrote:
> > On Fri, Mar 08, 2024 at 02:22:58PM -0500, Paul Moore wrote:
> > > On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote:
> > > > Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock
> > > > sandbox".
> > >
> > > Which is a problem from a LSM perspective as we want to avoid hooks
> > > which are tightly bound to a single LSM or security model.  It's okay
> > > if a new hook only has a single LSM implementation, but the hook's
> > > definition should be such that it is reasonably generalized to support
> > > multiple LSM/models.
> >
> > As any new hook, there is a first user.  Obviously this new hook would
> > not be restricted to Landlock, it is a generic approach.  I'm pretty
> > sure a few hooks are only used by one LSM though. ;)
> 
> Sure, as I said above, it's okay for there to only be a single LSM
> implementation, but the basic idea behind the hook needs to have some
> hope of being generic.  Your "let's redefine a safe ioctl as 'IOCTL
> always allowed in a Landlock sandbox'" doesn't fill me with confidence
> about the hook being generic; who knows, maybe it will be, but in the
> absence of a patch, I'm left with descriptions like those.

FWIW, the existing IOCTL hook is used in the following places:

* TOMOYO: seemingly configurable per IOCTL command?  (I did not dig deeper)
* SELinux: has a hardcoded switch of IOCTL commands, some with special checks.
  These are also a subset of the do_vfs_ioctl() commands,
  plus KDSKBENT, KDSKBSENT (from ioctl_console(2)).
* Smack: Decomposes the IOCTL command number to look at the _IOC_WRITE and
  _IOC_READ bits. (This is a known problematic approach, because (1) these bits
  describe whether the argument is getting read or written, not whether the
  operation is a mutating one, and (2) some IOCTL commands do not adhere to the
  convention and don't use these macros)

AppArmor does not use the LSM IOCTL hook.


> > > I understand that this makes things a bit more
> > > complicated for Landlock's initial ioctl implementation, but
> > > considering my thoughts above and the fact that Landlock's ioctl
> > > protections are still evolving I'd rather not add a lot of extra hooks
> > > right now.
> >
> > Without this hook, we'll need to rely on a list of allowed IOCTLs, which
> > will be out-of-sync eventually.  It would be a maintenance burden and an
> > hacky approach.
> 
> Welcome to the painful world of a LSM developer, ioctls are not the
> only place where this is a problem, and it should be easy enough to
> watch for changes in the ioctl list and update your favorite LSM
> accordingly.  Honestly, I think that is kinda the right thing anyway,
> I'm skeptical that one could have a generic solution that would
> automatically allow or disallow a new ioctl without potentially
> breaking your favorite LSM's security model.  If a new ioctl is
> introduced it seems like having someone manually review it's impact on
> your LSM would be a good idea.

We are concerned that we will miss a change in do_vfs_ioctl(), which we would
like to reflect in the matching Landlock code.  Do other LSMs have any
approaches for that which go beyond just watching the do_vfs_ioctl()
implementation for changes?


> > We're definitely open to new proposals, but until now this is the best
> > approach we found from a maintenance, performance, and security point of
> > view.
> 
> At this point it's probably a good idea to post another RFC patch with
> your revised idea, if nothing else it will help rule out any
> confusion.  While I remain skeptical, perhaps I am misunderstanding
> the design and you'll get my apology and an ACK, but be warned that as
> of right now I'm not convinced.

Thanks you for your feedback!

Here is V10 with the approach where we use a new LSM hook:
https://lore.kernel.org/all/20240309075320.160128-1-gnoack@google.com/

I hope this helps to clarify the approach a bit.  I'm explaining it in more
detail again in the commit which adds the LSM hook, including a call graph, and
avoiding the word "safe" this time ;-)

Let me know what you think!

Thanks!
—Günther
Casey Schaufler March 9, 2024, 5:41 p.m. UTC | #21
On 3/9/2024 12:14 AM, Günther Noack wrote:
> On Fri, Mar 08, 2024 at 05:25:21PM -0500, Paul Moore wrote:
>> On Fri, Mar 8, 2024 at 3:12 PM Mickaël Salaün <mic@digikod.net> wrote:
>>> On Fri, Mar 08, 2024 at 02:22:58PM -0500, Paul Moore wrote:
>>>> On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote:
>>>>> Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock
>>>>> sandbox".
>>>> Which is a problem from a LSM perspective as we want to avoid hooks
>>>> which are tightly bound to a single LSM or security model.  It's okay
>>>> if a new hook only has a single LSM implementation, but the hook's
>>>> definition should be such that it is reasonably generalized to support
>>>> multiple LSM/models.
>>> As any new hook, there is a first user.  Obviously this new hook would
>>> not be restricted to Landlock, it is a generic approach.  I'm pretty
>>> sure a few hooks are only used by one LSM though. ;)
>> Sure, as I said above, it's okay for there to only be a single LSM
>> implementation, but the basic idea behind the hook needs to have some
>> hope of being generic.  Your "let's redefine a safe ioctl as 'IOCTL
>> always allowed in a Landlock sandbox'" doesn't fill me with confidence
>> about the hook being generic; who knows, maybe it will be, but in the
>> absence of a patch, I'm left with descriptions like those.
> FWIW, the existing IOCTL hook is used in the following places:
>
> * TOMOYO: seemingly configurable per IOCTL command?  (I did not dig deeper)
> * SELinux: has a hardcoded switch of IOCTL commands, some with special checks.
>   These are also a subset of the do_vfs_ioctl() commands,
>   plus KDSKBENT, KDSKBSENT (from ioctl_console(2)).
> * Smack: Decomposes the IOCTL command number to look at the _IOC_WRITE and
>   _IOC_READ bits. (This is a known problematic approach, because (1) these bits
>   describe whether the argument is getting read or written, not whether the
>   operation is a mutating one, and (2) some IOCTL commands do not adhere to the
>   convention and don't use these macros)

These shortcomings are well understood. It's a whole lot better than what was
done originally, but definitely not up to formal scrutiny. Back in the bad old
days of UNIX security evaluations we spent as much time on ioctl() as we did
on the rest of the system. Or so it seemed.

>
> AppArmor does not use the LSM IOCTL hook.
>
>
>>>> I understand that this makes things a bit more
>>>> complicated for Landlock's initial ioctl implementation, but
>>>> considering my thoughts above and the fact that Landlock's ioctl
>>>> protections are still evolving I'd rather not add a lot of extra hooks
>>>> right now.
>>> Without this hook, we'll need to rely on a list of allowed IOCTLs, which
>>> will be out-of-sync eventually.  It would be a maintenance burden and an
>>> hacky approach.
>> Welcome to the painful world of a LSM developer, ioctls are not the
>> only place where this is a problem, and it should be easy enough to
>> watch for changes in the ioctl list and update your favorite LSM
>> accordingly.  Honestly, I think that is kinda the right thing anyway,
>> I'm skeptical that one could have a generic solution that would
>> automatically allow or disallow a new ioctl without potentially
>> breaking your favorite LSM's security model.  If a new ioctl is
>> introduced it seems like having someone manually review it's impact on
>> your LSM would be a good idea.
> We are concerned that we will miss a change in do_vfs_ioctl(), which we would
> like to reflect in the matching Landlock code.  Do other LSMs have any
> approaches for that which go beyond just watching the do_vfs_ioctl()
> implementation for changes?
>
>
>>> We're definitely open to new proposals, but until now this is the best
>>> approach we found from a maintenance, performance, and security point of
>>> view.
>> At this point it's probably a good idea to post another RFC patch with
>> your revised idea, if nothing else it will help rule out any
>> confusion.  While I remain skeptical, perhaps I am misunderstanding
>> the design and you'll get my apology and an ACK, but be warned that as
>> of right now I'm not convinced.
> Thanks you for your feedback!
>
> Here is V10 with the approach where we use a new LSM hook:
> https://lore.kernel.org/all/20240309075320.160128-1-gnoack@google.com/
>
> I hope this helps to clarify the approach a bit.  I'm explaining it in more
> detail again in the commit which adds the LSM hook, including a call graph, and
> avoiding the word "safe" this time ;-)
>
> Let me know what you think!
>
> Thanks!
> —Günther
>
Dave Chinner March 11, 2024, 1:03 a.m. UTC | #22
On Fri, Mar 08, 2024 at 12:03:01PM +0100, Günther Noack wrote:
> On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote:
> > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote:
> > > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote:
> > >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote:
> > >> I need some more convincing as to why we need to introduce these new
> > >> hooks, or even the vfs_masked_device_ioctl() classifier as originally
> > >> proposed at the top of this thread.  I believe I understand why
> > >> Landlock wants this, but I worry that we all might have different
> > >> definitions of a "safe" ioctl list, and encoding a definition into the
> > >> LSM hooks seems like a bad idea to me.
> > >
> > > I have no idea what a "safe" ioctl means here. Subsystems already
> > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
> > > "safe" clearly means something different here.
> > 
> > That was my problem with the first version as well, but I think
> > drawing the line between "implemented in fs/ioctl.c" and
> > "implemented in a random device driver fops->unlock_ioctl()"
> > seems like a more helpful definition.
> 
> Yes, sorry for the confusion - that is exactly what I meant to say with "safe".:
> 
> Those are the IOCTL commands implemented in fs/ioctl.c which do not go through
> f_ops->unlocked_ioctl (or the compat equivalent).

Which means all the ioctls we wrequire for to manage filesystems are
going to be considered "unsafe" and barred, yes?

That means you'll break basic commands like 'xfs_info' that tell you
the configuration of the filesystem. It will prevent things like
online growing and shrinking, online defrag, fstrim, online
scrubbing and repair, etc will not worki anymore. It will break
backup utilities like xfsdump, and break -all- the device management
of btrfs and bcachefs filesystems.

Further, all the setup and management of -VFS functionality- like
fsverity and fscrypt is actually done at the filesystem level (i.e
through ->unlocked_ioctl, no do_vfs_ioctl()) so those are all going
to get broken as well despite them being "vfs features".

Hence from a filesystem perspective, this is a fundamentally
unworkable definition of "safe".

> We want to give people a way with Landlock so that they can restrict the use of
> device-driver implemented IOCTLs, but where they can keep using the bulk of
> more harmless IOCTLs in fs/ioctl.c.

Hah! There's plenty of "harm" that can be done through those ioctls.
It's the entry point for things like filesystem freeze/thaw, FIEMAP
(returns physical data location information), file cloning,
deduplication and per-inode feature manipulation. Lots of this stuff
is under CAP_SYS_ADMIN because they aren't safe for to be exposed to
general users...

So, yeah, I don't think this definition of "safe" is actually useful
in any way. It's arbitrary, and will require both widespread
whitelisting of ioctls to maintain a useful working system and
widespread blacklisting to create a secure system....

-Dave.
Günther Noack March 11, 2024, 9:01 a.m. UTC | #23
On Mon, Mar 11, 2024 at 12:03:13PM +1100, Dave Chinner wrote:
> On Fri, Mar 08, 2024 at 12:03:01PM +0100, Günther Noack wrote:
> > On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote:
> > > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote:
> > > > I have no idea what a "safe" ioctl means here. Subsystems already
> > > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
> > > > "safe" clearly means something different here.
> > > 
> > > That was my problem with the first version as well, but I think
> > > drawing the line between "implemented in fs/ioctl.c" and
> > > "implemented in a random device driver fops->unlock_ioctl()"
> > > seems like a more helpful definition.
> > 
> > Yes, sorry for the confusion - that is exactly what I meant to say with "safe".:
> > 
> > Those are the IOCTL commands implemented in fs/ioctl.c which do not go through
> > f_ops->unlocked_ioctl (or the compat equivalent).
> 
> Which means all the ioctls we wrequire for to manage filesystems are
> going to be considered "unsafe" and barred, yes?
> 
> That means you'll break basic commands like 'xfs_info' that tell you
> the configuration of the filesystem. It will prevent things like
> online growing and shrinking, online defrag, fstrim, online
> scrubbing and repair, etc will not worki anymore. It will break
> backup utilities like xfsdump, and break -all- the device management
> of btrfs and bcachefs filesystems.
> 
> Further, all the setup and management of -VFS functionality- like
> fsverity and fscrypt is actually done at the filesystem level (i.e
> through ->unlocked_ioctl, no do_vfs_ioctl()) so those are all going
> to get broken as well despite them being "vfs features".
> 
> Hence from a filesystem perspective, this is a fundamentally
> unworkable definition of "safe".

As discussed further up in this thread[1], we want to only apply the IOCTL
command filtering to block and character devices.  I think this should resolve
your concerns about file system specific IOCTLs?  This is implemented in patch
V10 going forward[2].

[1] https://lore.kernel.org/all/20240219.chu4Yeegh3oo@digikod.net/
[2] https://lore.kernel.org/all/20240309075320.160128-1-gnoack@google.com/


> > We want to give people a way with Landlock so that they can restrict the use of
> > device-driver implemented IOCTLs, but where they can keep using the bulk of
> > more harmless IOCTLs in fs/ioctl.c.
> 
> Hah! There's plenty of "harm" that can be done through those ioctls.
> It's the entry point for things like filesystem freeze/thaw, FIEMAP
> (returns physical data location information), file cloning,
> deduplication and per-inode feature manipulation. Lots of this stuff
> is under CAP_SYS_ADMIN because they aren't safe for to be exposed to
> general users...

The operations themselves are not all harmless, but they are harmless to permit
from the Landlock perspective, because (as you point out as well) their use is
already adequately controlled in their existing implementations.

The proposed patch v10 only influences IOCTL operations on device files,
so the "reflink" deduplication IOCTLs, FIEMAP, etc. should not matter.

—Günther
Paul Moore March 11, 2024, 7:04 p.m. UTC | #24
On Sat, Mar 9, 2024 at 3:14 AM Günther Noack <gnoack@google.com> wrote:
> On Fri, Mar 08, 2024 at 05:25:21PM -0500, Paul Moore wrote:
> > On Fri, Mar 8, 2024 at 3:12 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > On Fri, Mar 08, 2024 at 02:22:58PM -0500, Paul Moore wrote:
> > > > On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote:
> > > > > Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock
> > > > > sandbox".
> > > >
> > > > Which is a problem from a LSM perspective as we want to avoid hooks
> > > > which are tightly bound to a single LSM or security model.  It's okay
> > > > if a new hook only has a single LSM implementation, but the hook's
> > > > definition should be such that it is reasonably generalized to support
> > > > multiple LSM/models.
> > >
> > > As any new hook, there is a first user.  Obviously this new hook would
> > > not be restricted to Landlock, it is a generic approach.  I'm pretty
> > > sure a few hooks are only used by one LSM though. ;)
> >
> > Sure, as I said above, it's okay for there to only be a single LSM
> > implementation, but the basic idea behind the hook needs to have some
> > hope of being generic.  Your "let's redefine a safe ioctl as 'IOCTL
> > always allowed in a Landlock sandbox'" doesn't fill me with confidence
> > about the hook being generic; who knows, maybe it will be, but in the
> > absence of a patch, I'm left with descriptions like those.
>
> FWIW, the existing IOCTL hook is used in the following places:
>
> * TOMOYO: seemingly configurable per IOCTL command?  (I did not dig deeper)
> * SELinux: has a hardcoded switch of IOCTL commands, some with special checks.
>   These are also a subset of the do_vfs_ioctl() commands,
>   plus KDSKBENT, KDSKBSENT (from ioctl_console(2)).

One should be careful using the term "hardcoded" here as I believe it
is misleading in the SELinux case.  SELinux has 11 explicitly defined
ioctls, with an additional two configurable on a per-policy basis
depending on the state of the SELinux IOCTL_SKIP_CLOEXEC policy
capability.  The security policy associated with these explicit ioctl
checks is not hardcoded into the kernel, it is defined as part of the
greater SELinux security policy.  One could make an argument that
FIONBIO and FIOASYNC look a bit hardcoded, but there is some subtlety
there that is probably not worth exploring further in this context but
I'm happy to discuss in a different thread if that is helpful.

All the ioctls that are not explicitly defined in the SELinux code,
are still subject to SELinux policy through both the file/ioctl
permission and the extended permission (xperm) functionality.  The
SELinux xperm functionality, when tied to an ioctl operation, allows
policy developers to allow or deny specific ioctl operations on a
per-domain basis.

> * Smack: Decomposes the IOCTL command number to look at the _IOC_WRITE and
>   _IOC_READ bits. (This is a known problematic approach, because (1) these bits
>   describe whether the argument is getting read or written, not whether the
>   operation is a mutating one, and (2) some IOCTL commands do not adhere to the
>   convention and don't use these macros)
>
> AppArmor does not use the LSM IOCTL hook.
Dave Chinner March 11, 2024, 10:12 p.m. UTC | #25
On Mon, Mar 11, 2024 at 10:01:33AM +0100, Günther Noack wrote:
> On Mon, Mar 11, 2024 at 12:03:13PM +1100, Dave Chinner wrote:
> > On Fri, Mar 08, 2024 at 12:03:01PM +0100, Günther Noack wrote:
> > > On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote:
> > > > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote:
> > > > > I have no idea what a "safe" ioctl means here. Subsystems already
> > > > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
> > > > > "safe" clearly means something different here.
> > > > 
> > > > That was my problem with the first version as well, but I think
> > > > drawing the line between "implemented in fs/ioctl.c" and
> > > > "implemented in a random device driver fops->unlock_ioctl()"
> > > > seems like a more helpful definition.
> > > 
> > > Yes, sorry for the confusion - that is exactly what I meant to say with "safe".:
> > > 
> > > Those are the IOCTL commands implemented in fs/ioctl.c which do not go through
> > > f_ops->unlocked_ioctl (or the compat equivalent).
> > 
> > Which means all the ioctls we wrequire for to manage filesystems are
> > going to be considered "unsafe" and barred, yes?
> > 
> > That means you'll break basic commands like 'xfs_info' that tell you
> > the configuration of the filesystem. It will prevent things like
> > online growing and shrinking, online defrag, fstrim, online
> > scrubbing and repair, etc will not worki anymore. It will break
> > backup utilities like xfsdump, and break -all- the device management
> > of btrfs and bcachefs filesystems.
> > 
> > Further, all the setup and management of -VFS functionality- like
> > fsverity and fscrypt is actually done at the filesystem level (i.e
> > through ->unlocked_ioctl, no do_vfs_ioctl()) so those are all going
> > to get broken as well despite them being "vfs features".
> > 
> > Hence from a filesystem perspective, this is a fundamentally
> > unworkable definition of "safe".
> 
> As discussed further up in this thread[1], we want to only apply the IOCTL
> command filtering to block and character devices.  I think this should resolve
> your concerns about file system specific IOCTLs?  This is implemented in patch
> V10 going forward[2].

I think you misunderstand. I used filesystem ioctls as an obvious
counter argument to this "VFS-only ioctls are safe" proposal to show
that it fundamentally breaks core filesystem boot and management
interfaces. Operations to prepare filesystems for mount may require
block device ioctls to be run. i.e. block device ioctls are required
core boot and management interfaces.

Disallowing ioctls on block devices will break udev rules that set
up block devices on kernel device instantiation events. It will
break partitioning tools that need to read/modify/rescan the
partition table. This will prevent discard, block zeroing and
*secure erase* operations. It may prevent libblkid from reporting
optimal device IO parameters to filesystem utilities like mkfs. You
won't be able to mark block devices as read only.  Management of
zoned block devices will be impossible.

Then stuff like DM and MD devices (e.g. LVM, RAID, etc) simply won't
appear on the system because they can't be scanned, configured,
assembled, etc.

And so on.

The fundamental fact is that system critical block device ioctls are
implemented by generic infrastructure below the VFS layer. They have
their own generic ioctl layer - blkdev_ioctl() is equivalent of
do_vfs_ioctl() for the block layer.  But if we cut off everything
below ->unlocked_ioctl() at the VFS, then we simply can't run any
of these generic block device ioctls.

As I said: this proposal is fundamentally unworkable without
extensive white- and black-listing of individual ioctls in the
security policies. That's not really a viable situation, because
we're going to change code and hence likely silently break those
security policy lists regularly....

-Dave.
Mickaël Salaün March 12, 2024, 10:58 a.m. UTC | #26
On Tue, Mar 12, 2024 at 09:12:28AM +1100, Dave Chinner wrote:
> On Mon, Mar 11, 2024 at 10:01:33AM +0100, Günther Noack wrote:
> > On Mon, Mar 11, 2024 at 12:03:13PM +1100, Dave Chinner wrote:
> > > On Fri, Mar 08, 2024 at 12:03:01PM +0100, Günther Noack wrote:
> > > > On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote:
> > > > > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote:
> > > > > > I have no idea what a "safe" ioctl means here. Subsystems already
> > > > > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so
> > > > > > "safe" clearly means something different here.
> > > > > 
> > > > > That was my problem with the first version as well, but I think
> > > > > drawing the line between "implemented in fs/ioctl.c" and
> > > > > "implemented in a random device driver fops->unlock_ioctl()"
> > > > > seems like a more helpful definition.
> > > > 
> > > > Yes, sorry for the confusion - that is exactly what I meant to say with "safe".:
> > > > 
> > > > Those are the IOCTL commands implemented in fs/ioctl.c which do not go through
> > > > f_ops->unlocked_ioctl (or the compat equivalent).
> > > 
> > > Which means all the ioctls we wrequire for to manage filesystems are
> > > going to be considered "unsafe" and barred, yes?
> > > 
> > > That means you'll break basic commands like 'xfs_info' that tell you
> > > the configuration of the filesystem. It will prevent things like
> > > online growing and shrinking, online defrag, fstrim, online
> > > scrubbing and repair, etc will not worki anymore. It will break
> > > backup utilities like xfsdump, and break -all- the device management
> > > of btrfs and bcachefs filesystems.
> > > 
> > > Further, all the setup and management of -VFS functionality- like
> > > fsverity and fscrypt is actually done at the filesystem level (i.e
> > > through ->unlocked_ioctl, no do_vfs_ioctl()) so those are all going
> > > to get broken as well despite them being "vfs features".
> > > 
> > > Hence from a filesystem perspective, this is a fundamentally
> > > unworkable definition of "safe".
> > 
> > As discussed further up in this thread[1], we want to only apply the IOCTL
> > command filtering to block and character devices.  I think this should resolve
> > your concerns about file system specific IOCTLs?  This is implemented in patch
> > V10 going forward[2].
> 
> I think you misunderstand. I used filesystem ioctls as an obvious
> counter argument to this "VFS-only ioctls are safe" proposal to show
> that it fundamentally breaks core filesystem boot and management
> interfaces. Operations to prepare filesystems for mount may require
> block device ioctls to be run. i.e. block device ioctls are required
> core boot and management interfaces.
> 
> Disallowing ioctls on block devices will break udev rules that set
> up block devices on kernel device instantiation events. It will
> break partitioning tools that need to read/modify/rescan the
> partition table. This will prevent discard, block zeroing and
> *secure erase* operations. It may prevent libblkid from reporting
> optimal device IO parameters to filesystem utilities like mkfs. You
> won't be able to mark block devices as read only.  Management of
> zoned block devices will be impossible.
> 
> Then stuff like DM and MD devices (e.g. LVM, RAID, etc) simply won't
> appear on the system because they can't be scanned, configured,
> assembled, etc.
> 
> And so on.
> 
> The fundamental fact is that system critical block device ioctls are
> implemented by generic infrastructure below the VFS layer. They have
> their own generic ioctl layer - blkdev_ioctl() is equivalent of
> do_vfs_ioctl() for the block layer.  But if we cut off everything
> below ->unlocked_ioctl() at the VFS, then we simply can't run any
> of these generic block device ioctls.
> 
> As I said: this proposal is fundamentally unworkable without
> extensive white- and black-listing of individual ioctls in the
> security policies. That's not really a viable situation, because
> we're going to change code and hence likely silently break those
> security policy lists regularly....

Landlock is an optional sandboxing mechanism targeting unprivileged
users/processes (even if it can of course be used by privileged ones).
This means that there is no global security policy for the whole system
(unlike SELinux, AppArmor...).  System administrators that need to
manage a file system or any block devices would just not sandbox
themselves.  Moreover, most block devices should only be accessible to
the root user (which makes root the only one able to send IOCTL commands
to these block devices).  In a nutshell, processes using boot and
management interfaces are already privileged and they don't use
Landlock.  For instance, a landlocked process cannot do any mount
action, which is documented and it makes sense for the sandboxing use
case (to avoid sandbox bypass).

However, it would be interesting to know if unprivileged users can
request legitimate IOCTL commands on block devices (on a generic
distro), and if this is required for a common file system use (i.e.
excluding administration tasks).  I think all required IOCTL for common
file system use are available through the file system, not block devices,
but please correct me if I'm wrong.  What is nice with this
LANDLOCK_ACCESS_FS_IOCTL_DEV approach is that user space can identify
(with path and dev major/minor) on which device IOCTLs should be
allowed.  This is simple to understand and the information to identify
such devices is already well known.  We can also allow IOCTLs on a set
of devices, e.g. /dev/snd/.

The goal of this patch series is to enable applications to sandbox
themselves and avoid an attacker (exploiting a bug in this application)
to send arbitrary IOCTL commands to any devices available to the user
running this application.  For this sandboxing use case, I think it
wouldn't be useful to differentiate between blkdev_ioctl()'s commands
and device-specific commands because we want to either allow all IOCTL
on a block device or deny most of them (not those handled by
do_vfs_ioctl(), e.g. FIOCLEX, but that's a detail because of the file
access rights).  This is a trade off to ease sandboxing while being able
to limit access to unneeded features (which could potentially be used to
bypass the sandbox, e.g. TTY's IOCTLs).
diff mbox series

Patch

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 76cf22ac97d7..f72c8da47d21 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -763,6 +763,38 @@  static int ioctl_fssetxattr(struct file *file, void __user *argp)
 	return err;
 }
 
+/*
+ * Safeguard to maintain a list of valid IOCTLs handled by do_vfs_ioctl()
+ * instead of def_blk_fops or def_chr_fops (see init_special_inode).
+ */
+__attribute_const__ bool vfs_masked_device_ioctl(const unsigned int cmd)
+{
+	switch (cmd) {
+	case FIOCLEX:
+	case FIONCLEX:
+	case FIONBIO:
+	case FIOASYNC:
+	case FIOQSIZE:
+	case FIFREEZE:
+	case FITHAW:
+	case FS_IOC_FIEMAP:
+	case FIGETBSZ:
+	case FICLONE:
+	case FICLONERANGE:
+	case FIDEDUPERANGE:
+	/* FIONREAD is forwarded to device implementations. */
+	case FS_IOC_GETFLAGS:
+	case FS_IOC_SETFLAGS:
+	case FS_IOC_FSGETXATTR:
+	case FS_IOC_FSSETXATTR:
+	/* file_ioctl()'s IOCTLs are forwarded to device implementations. */
+		return true;
+	default:
+		return false;
+	}
+}
+EXPORT_SYMBOL(vfs_masked_device_ioctl);
+
 /*
  * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
  * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
@@ -858,6 +890,8 @@  SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
 {
 	struct fd f = fdget(fd);
 	int error;
+	const struct inode *inode;
+	bool is_device;
 
 	if (!f.file)
 		return -EBADF;
@@ -866,9 +900,18 @@  SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
 	if (error)
 		goto out;
 
+	inode = file_inode(f.file);
+	is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode);
+	if (is_device && !vfs_masked_device_ioctl(cmd)) {
+		error = vfs_ioctl(f.file, cmd, arg);
+		goto out;
+	}
+
 	error = do_vfs_ioctl(f.file, fd, cmd, arg);
-	if (error == -ENOIOCTLCMD)
+	if (error == -ENOIOCTLCMD) {
+		WARN_ON_ONCE(is_device);
 		error = vfs_ioctl(f.file, cmd, arg);
+	}
 
 out:
 	fdput(f);
@@ -911,11 +954,49 @@  long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 }
 EXPORT_SYMBOL(compat_ptr_ioctl);
 
+static long ioctl_compat(struct file *filp, unsigned int cmd,
+			 compat_ulong_t arg)
+{
+	int error = -ENOTTY;
+
+	if (!filp->f_op->compat_ioctl)
+		goto out;
+
+	error = filp->f_op->compat_ioctl(filp, cmd, arg);
+	if (error == -ENOIOCTLCMD)
+		error = -ENOTTY;
+
+out:
+	return error;
+}
+
+__attribute_const__ bool vfs_masked_device_ioctl_compat(const unsigned int cmd)
+{
+	switch (cmd) {
+	case FICLONE:
+#if defined(CONFIG_X86_64)
+	case FS_IOC_RESVSP_32:
+	case FS_IOC_RESVSP64_32:
+	case FS_IOC_UNRESVSP_32:
+	case FS_IOC_UNRESVSP64_32:
+	case FS_IOC_ZERO_RANGE_32:
+#endif
+	case FS_IOC32_GETFLAGS:
+	case FS_IOC32_SETFLAGS:
+		return true;
+	default:
+		return vfs_masked_device_ioctl(cmd);
+	}
+}
+EXPORT_SYMBOL(vfs_masked_device_ioctl_compat);
+
 COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
 		       compat_ulong_t, arg)
 {
 	struct fd f = fdget(fd);
 	int error;
+	const struct inode *inode;
+	bool is_device;
 
 	if (!f.file)
 		return -EBADF;
@@ -924,6 +1005,13 @@  COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
 	if (error)
 		goto out;
 
+	inode = file_inode(f.file);
+	is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode);
+	if (is_device && !vfs_masked_device_ioctl_compat(cmd)) {
+		error = ioctl_compat(f.file, cmd, arg);
+		goto out;
+	}
+
 	switch (cmd) {
 	/* FICLONE takes an int argument, so don't use compat_ptr() */
 	case FICLONE:
@@ -964,13 +1052,10 @@  COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
 	default:
 		error = do_vfs_ioctl(f.file, fd, cmd,
 				     (unsigned long)compat_ptr(arg));
-		if (error != -ENOIOCTLCMD)
-			break;
-
-		if (f.file->f_op->compat_ioctl)
-			error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
-		if (error == -ENOIOCTLCMD)
-			error = -ENOTTY;
+		if (error == -ENOIOCTLCMD) {
+			WARN_ON_ONCE(is_device);
+			error = ioctl_compat(f.file, cmd, arg);
+		}
 		break;
 	}
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed5966a70495..b620d0c00e16 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1902,6 +1902,18 @@  extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
 #define compat_ptr_ioctl NULL
 #endif
 
+extern __attribute_const__ bool vfs_masked_device_ioctl(const unsigned int cmd);
+#ifdef CONFIG_COMPAT
+extern __attribute_const__ bool
+vfs_masked_device_ioctl_compat(const unsigned int cmd);
+#else /* CONFIG_COMPAT */
+static inline __attribute_const__ bool
+vfs_masked_device_ioctl_compat(const unsigned int cmd)
+{
+	return vfs_masked_device_ioctl(cmd);
+}
+#endif /* CONFIG_COMPAT */
+
 /*
  * VFS file helper functions.
  */