diff mbox series

[v2,2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check

Message ID 20241113-pidfs_fh-v2-2-9a4d28155a37@e43.eu (mailing list archive)
State New
Headers show
Series [v2,1/3] pseudofs: add support for export_ops | expand

Commit Message

Erin Shepherd Nov. 13, 2024, 5:55 p.m. UTC
For pidfs, there is no reason to restrict file handle decoding by
CAP_DAC_READ_SEARCH. Introduce an export_ops flag that can indicate
this

Signed-off-by: Erin Shepherd <erin.shepherd@e43.eu>
---
 fs/fhandle.c             | 36 +++++++++++++++++++++---------------
 include/linux/exportfs.h |  3 +++
 2 files changed, 24 insertions(+), 15 deletions(-)

Comments

kernel test robot Nov. 13, 2024, 10:50 p.m. UTC | #1
Hi Erin,

kernel test robot noticed the following build errors:

[auto build test ERROR on 14b6320953a3f856a3f93bf9a0e423395baa593d]

url:    https://github.com/intel-lab-lkp/linux/commits/Erin-Shepherd/pseudofs-add-support-for-export_ops/20241114-020539
base:   14b6320953a3f856a3f93bf9a0e423395baa593d
patch link:    https://lore.kernel.org/r/20241113-pidfs_fh-v2-2-9a4d28155a37%40e43.eu
patch subject: [PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check
config: x86_64-allnoconfig (https://download.01.org/0day-ci/archive/20241114/202411140603.E03vXsdz-lkp@intel.com/config)
compiler: clang version 19.1.3 (https://github.com/llvm/llvm-project ab51eccf88f5321e7c60591c5546b254b6afab99)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241114/202411140603.E03vXsdz-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202411140603.E03vXsdz-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from fs/fhandle.c:2:
   In file included from include/linux/syscalls.h:93:
   In file included from include/trace/syscall.h:7:
   In file included from include/linux/trace_events.h:6:
   In file included from include/linux/ring_buffer.h:5:
   In file included from include/linux/mm.h:2213:
   include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
>> fs/fhandle.c:242:28: error: initializing 'struct export_operations *' with an expression of type 'const struct export_operations *' discards qualifiers [-Werror,-Wincompatible-pointer-types-discards-qualifiers]
     242 |         struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
         |                                   ^     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1 warning and 1 error generated.


vim +242 fs/fhandle.c

   237	
   238	static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
   239					 unsigned int o_flags)
   240	{
   241		struct path *root = &ctx->root;
 > 242		struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
   243	
   244		if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN)
   245			return true;
   246	
   247		if (capable(CAP_DAC_READ_SEARCH))
   248			return true;
   249	
   250		/*
   251		 * Allow relaxed permissions of file handles if the caller has the
   252		 * ability to mount the filesystem or create a bind-mount of the
   253		 * provided @mountdirfd.
   254		 *
   255		 * In both cases the caller may be able to get an unobstructed way to
   256		 * the encoded file handle. If the caller is only able to create a
   257		 * bind-mount we need to verify that there are no locked mounts on top
   258		 * of it that could prevent us from getting to the encoded file.
   259		 *
   260		 * In principle, locked mounts can prevent the caller from mounting the
   261		 * filesystem but that only applies to procfs and sysfs neither of which
   262		 * support decoding file handles.
   263		 *
   264		 * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
   265		 * confusing api in the face of disconnected non-dir dentries.
   266		 *
   267		 * There's only one dentry for each directory inode (VFS rule)...
   268		 */
   269		if (!(o_flags & O_DIRECTORY))
   270			return false;
   271	
   272		if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
   273			ctx->flags = HANDLE_CHECK_PERMS;
   274		else if (is_mounted(root->mnt) &&
   275			 ns_capable(real_mount(root->mnt)->mnt_ns->user_ns,
   276				    CAP_SYS_ADMIN) &&
   277			 !has_locked_children(real_mount(root->mnt), root->dentry))
   278			ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE;
   279		else
   280			return false;
   281	
   282		/* Are we able to override DAC permissions? */
   283		if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH))
   284			return false;
   285	
   286		ctx->fh_flags = EXPORT_FH_DIR_ONLY;
   287		return true;
   288	}
   289
kernel test robot Nov. 14, 2024, 1:29 a.m. UTC | #2
Hi Erin,

kernel test robot noticed the following build warnings:

[auto build test WARNING on 14b6320953a3f856a3f93bf9a0e423395baa593d]

url:    https://github.com/intel-lab-lkp/linux/commits/Erin-Shepherd/pseudofs-add-support-for-export_ops/20241114-020539
base:   14b6320953a3f856a3f93bf9a0e423395baa593d
patch link:    https://lore.kernel.org/r/20241113-pidfs_fh-v2-2-9a4d28155a37%40e43.eu
patch subject: [PATCH v2 2/3] exportfs: allow fs to disable CAP_DAC_READ_SEARCH check
config: openrisc-defconfig (https://download.01.org/0day-ci/archive/20241114/202411140905.a0ntnQQG-lkp@intel.com/config)
compiler: or1k-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241114/202411140905.a0ntnQQG-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202411140905.a0ntnQQG-lkp@intel.com/

All warnings (new ones prefixed by >>):

   fs/fhandle.c: In function 'may_decode_fh':
>> fs/fhandle.c:242:41: warning: initialization discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers]
     242 |         struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
         |                                         ^~~~


vim +/const +242 fs/fhandle.c

   237	
   238	static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
   239					 unsigned int o_flags)
   240	{
   241		struct path *root = &ctx->root;
 > 242		struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
   243	
   244		if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN)
   245			return true;
   246	
   247		if (capable(CAP_DAC_READ_SEARCH))
   248			return true;
   249	
   250		/*
   251		 * Allow relaxed permissions of file handles if the caller has the
   252		 * ability to mount the filesystem or create a bind-mount of the
   253		 * provided @mountdirfd.
   254		 *
   255		 * In both cases the caller may be able to get an unobstructed way to
   256		 * the encoded file handle. If the caller is only able to create a
   257		 * bind-mount we need to verify that there are no locked mounts on top
   258		 * of it that could prevent us from getting to the encoded file.
   259		 *
   260		 * In principle, locked mounts can prevent the caller from mounting the
   261		 * filesystem but that only applies to procfs and sysfs neither of which
   262		 * support decoding file handles.
   263		 *
   264		 * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
   265		 * confusing api in the face of disconnected non-dir dentries.
   266		 *
   267		 * There's only one dentry for each directory inode (VFS rule)...
   268		 */
   269		if (!(o_flags & O_DIRECTORY))
   270			return false;
   271	
   272		if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
   273			ctx->flags = HANDLE_CHECK_PERMS;
   274		else if (is_mounted(root->mnt) &&
   275			 ns_capable(real_mount(root->mnt)->mnt_ns->user_ns,
   276				    CAP_SYS_ADMIN) &&
   277			 !has_locked_children(real_mount(root->mnt), root->dentry))
   278			ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE;
   279		else
   280			return false;
   281	
   282		/* Are we able to override DAC permissions? */
   283		if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH))
   284			return false;
   285	
   286		ctx->fh_flags = EXPORT_FH_DIR_ONLY;
   287		return true;
   288	}
   289
Christoph Hellwig Nov. 14, 2024, 4:37 a.m. UTC | #3
On Wed, Nov 13, 2024 at 05:55:24PM +0000, Erin Shepherd wrote:
> For pidfs, there is no reason to restrict file handle decoding by
> CAP_DAC_READ_SEARCH.

Why is there no reason, i.e. why do you think it is safe.

>Introduce an export_ops flag that can indicate
> this

Also why is is desirable?

To be this looks more than sketchy with the actual exporting hat on,
but I guess that's now how the cool kids use open by handle these days.
Amir Goldstein Nov. 14, 2024, 6:37 a.m. UTC | #4
On Wed, Nov 13, 2024 at 8:11 PM Erin Shepherd <erin.shepherd@e43.eu> wrote:
>
> For pidfs, there is no reason to restrict file handle decoding by
> CAP_DAC_READ_SEARCH. Introduce an export_ops flag that can indicate
> this
>
> Signed-off-by: Erin Shepherd <erin.shepherd@e43.eu>
> ---
>  fs/fhandle.c             | 36 +++++++++++++++++++++---------------
>  include/linux/exportfs.h |  3 +++
>  2 files changed, 24 insertions(+), 15 deletions(-)
>
> diff --git a/fs/fhandle.c b/fs/fhandle.c
> index 82df28d45cd70a7df525f50bbb398d646110cd99..056116e58f43983bc7bb86da170fb554c7a2fac7 100644
> --- a/fs/fhandle.c
> +++ b/fs/fhandle.c
> @@ -235,26 +235,32 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path,
>         return 0;
>  }
>
> -/*
> - * Allow relaxed permissions of file handles if the caller has the
> - * ability to mount the filesystem or create a bind-mount of the
> - * provided @mountdirfd.
> - *
> - * In both cases the caller may be able to get an unobstructed way to
> - * the encoded file handle. If the caller is only able to create a
> - * bind-mount we need to verify that there are no locked mounts on top
> - * of it that could prevent us from getting to the encoded file.
> - *
> - * In principle, locked mounts can prevent the caller from mounting the
> - * filesystem but that only applies to procfs and sysfs neither of which
> - * support decoding file handles.
> - */
>  static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
>                                  unsigned int o_flags)
>  {
>         struct path *root = &ctx->root;
> +       struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
> +
> +       if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN)
> +               return true;
> +
> +       if (capable(CAP_DAC_READ_SEARCH))
> +               return true;
>
>         /*
> +        * Allow relaxed permissions of file handles if the caller has the
> +        * ability to mount the filesystem or create a bind-mount of the
> +        * provided @mountdirfd.
> +        *
> +        * In both cases the caller may be able to get an unobstructed way to
> +        * the encoded file handle. If the caller is only able to create a
> +        * bind-mount we need to verify that there are no locked mounts on top
> +        * of it that could prevent us from getting to the encoded file.
> +        *
> +        * In principle, locked mounts can prevent the caller from mounting the
> +        * filesystem but that only applies to procfs and sysfs neither of which
> +        * support decoding file handles.
> +        *
>          * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
>          * confusing api in the face of disconnected non-dir dentries.
>          *
> @@ -293,7 +299,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
>         if (retval)
>                 goto out_err;
>
> -       if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
> +       if (!may_decode_fh(&ctx, o_flags)) {
>                 retval = -EPERM;
>                 goto out_path;
>         }
> diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
> index 893a1d21dc1c4abc7e52325d7a4cf0adb407f039..459508b53e77ed0597cee217ffe3d82cc7cc11a4 100644
> --- a/include/linux/exportfs.h
> +++ b/include/linux/exportfs.h
> @@ -247,6 +247,9 @@ struct export_operations {
>                                                 */
>  #define EXPORT_OP_FLUSH_ON_CLOSE       (0x20) /* fs flushes file data on close */
>  #define EXPORT_OP_ASYNC_LOCK           (0x40) /* fs can do async lock request */
> +#define EXPORT_OP_UNRESTRICTED_OPEN    (0x80) /* FS allows open_by_handle_at
> +                                                 without CAP_DAC_READ_SEARCH
> +                                               */

Don't love the name, but I wonder, isn't SB_NOUSER already a good
enough indication that CAP_DAC_READ_SEARCH is irrelevant?

Essentially, mnt_fd is the user's proof that they can access the mount
and CAP_DAC_READ_SEARCH is the legacy "proof" that the user can
reach from mount the inode by path lookup.

Which reminds me, what is the mnt_fd expected for opening a pidfd
file by handle?

Thanks,
Amir.
Erin Shepherd Nov. 14, 2024, 12:56 p.m. UTC | #5
On 14/11/2024 05:37, Christoph Hellwig wrote:
> On Wed, Nov 13, 2024 at 05:55:24PM +0000, Erin Shepherd wrote:
>> For pidfs, there is no reason to restrict file handle decoding by
>> CAP_DAC_READ_SEARCH.
> Why is there no reason, i.e. why do you think it is safe.

A process can use both open_by_handle_at to open the exact same set of
pidfds as they can by pidfd_open. i.e. there is no reason to additionally
restrict access to the former API.

>> Introduce an export_ops flag that can indicate
>> this
> Also why is is desirable?
>
> To be this looks more than sketchy with the actual exporting hat on,
> but I guess that's now how the cool kids use open by handle these days.
Right - we have a bunch of API file systems where userspace wants stable
non-reused file references for the same reasons network filesystems do.
The first example of this was cgroupfs, but the same rationale exists for
pidfs and process tracking.
Christian Brauner Nov. 14, 2024, 2:16 p.m. UTC | #6
On Thu, Nov 14, 2024 at 07:37:19AM +0100, Amir Goldstein wrote:
> On Wed, Nov 13, 2024 at 8:11 PM Erin Shepherd <erin.shepherd@e43.eu> wrote:
> >
> > For pidfs, there is no reason to restrict file handle decoding by
> > CAP_DAC_READ_SEARCH. Introduce an export_ops flag that can indicate
> > this
> >
> > Signed-off-by: Erin Shepherd <erin.shepherd@e43.eu>
> > ---
> >  fs/fhandle.c             | 36 +++++++++++++++++++++---------------
> >  include/linux/exportfs.h |  3 +++
> >  2 files changed, 24 insertions(+), 15 deletions(-)
> >
> > diff --git a/fs/fhandle.c b/fs/fhandle.c
> > index 82df28d45cd70a7df525f50bbb398d646110cd99..056116e58f43983bc7bb86da170fb554c7a2fac7 100644
> > --- a/fs/fhandle.c
> > +++ b/fs/fhandle.c
> > @@ -235,26 +235,32 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path,
> >         return 0;
> >  }
> >
> > -/*
> > - * Allow relaxed permissions of file handles if the caller has the
> > - * ability to mount the filesystem or create a bind-mount of the
> > - * provided @mountdirfd.
> > - *
> > - * In both cases the caller may be able to get an unobstructed way to
> > - * the encoded file handle. If the caller is only able to create a
> > - * bind-mount we need to verify that there are no locked mounts on top
> > - * of it that could prevent us from getting to the encoded file.
> > - *
> > - * In principle, locked mounts can prevent the caller from mounting the
> > - * filesystem but that only applies to procfs and sysfs neither of which
> > - * support decoding file handles.
> > - */
> >  static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
> >                                  unsigned int o_flags)
> >  {
> >         struct path *root = &ctx->root;
> > +       struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
> > +
> > +       if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN)
> > +               return true;
> > +
> > +       if (capable(CAP_DAC_READ_SEARCH))
> > +               return true;
> >
> >         /*
> > +        * Allow relaxed permissions of file handles if the caller has the
> > +        * ability to mount the filesystem or create a bind-mount of the
> > +        * provided @mountdirfd.
> > +        *
> > +        * In both cases the caller may be able to get an unobstructed way to
> > +        * the encoded file handle. If the caller is only able to create a
> > +        * bind-mount we need to verify that there are no locked mounts on top
> > +        * of it that could prevent us from getting to the encoded file.
> > +        *
> > +        * In principle, locked mounts can prevent the caller from mounting the
> > +        * filesystem but that only applies to procfs and sysfs neither of which
> > +        * support decoding file handles.
> > +        *
> >          * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
> >          * confusing api in the face of disconnected non-dir dentries.
> >          *
> > @@ -293,7 +299,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
> >         if (retval)
> >                 goto out_err;
> >
> > -       if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
> > +       if (!may_decode_fh(&ctx, o_flags)) {
> >                 retval = -EPERM;
> >                 goto out_path;
> >         }
> > diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
> > index 893a1d21dc1c4abc7e52325d7a4cf0adb407f039..459508b53e77ed0597cee217ffe3d82cc7cc11a4 100644
> > --- a/include/linux/exportfs.h
> > +++ b/include/linux/exportfs.h
> > @@ -247,6 +247,9 @@ struct export_operations {
> >                                                 */
> >  #define EXPORT_OP_FLUSH_ON_CLOSE       (0x20) /* fs flushes file data on close */
> >  #define EXPORT_OP_ASYNC_LOCK           (0x40) /* fs can do async lock request */
> > +#define EXPORT_OP_UNRESTRICTED_OPEN    (0x80) /* FS allows open_by_handle_at
> > +                                                 without CAP_DAC_READ_SEARCH
> > +                                               */
> 
> Don't love the name, but I wonder, isn't SB_NOUSER already a good
> enough indication that CAP_DAC_READ_SEARCH is irrelevant?
> 
> Essentially, mnt_fd is the user's proof that they can access the mount
> and CAP_DAC_READ_SEARCH is the legacy "proof" that the user can
> reach from mount the inode by path lookup.
> 
> Which reminds me, what is the mnt_fd expected for opening a pidfd
> file by handle?

int pidfd_self = pidfd_open(getpid(), 0);
open_by_handle_at(pidfd_self, ...);

is sufficient.
diff mbox series

Patch

diff --git a/fs/fhandle.c b/fs/fhandle.c
index 82df28d45cd70a7df525f50bbb398d646110cd99..056116e58f43983bc7bb86da170fb554c7a2fac7 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -235,26 +235,32 @@  static int do_handle_to_path(struct file_handle *handle, struct path *path,
 	return 0;
 }
 
-/*
- * Allow relaxed permissions of file handles if the caller has the
- * ability to mount the filesystem or create a bind-mount of the
- * provided @mountdirfd.
- *
- * In both cases the caller may be able to get an unobstructed way to
- * the encoded file handle. If the caller is only able to create a
- * bind-mount we need to verify that there are no locked mounts on top
- * of it that could prevent us from getting to the encoded file.
- *
- * In principle, locked mounts can prevent the caller from mounting the
- * filesystem but that only applies to procfs and sysfs neither of which
- * support decoding file handles.
- */
 static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
 				 unsigned int o_flags)
 {
 	struct path *root = &ctx->root;
+	struct export_operations *nop = root->mnt->mnt_sb->s_export_op;
+
+	if (nop && nop->flags & EXPORT_OP_UNRESTRICTED_OPEN)
+		return true;
+
+	if (capable(CAP_DAC_READ_SEARCH))
+		return true;
 
 	/*
+	 * Allow relaxed permissions of file handles if the caller has the
+	 * ability to mount the filesystem or create a bind-mount of the
+	 * provided @mountdirfd.
+	 *
+	 * In both cases the caller may be able to get an unobstructed way to
+	 * the encoded file handle. If the caller is only able to create a
+	 * bind-mount we need to verify that there are no locked mounts on top
+	 * of it that could prevent us from getting to the encoded file.
+	 *
+	 * In principle, locked mounts can prevent the caller from mounting the
+	 * filesystem but that only applies to procfs and sysfs neither of which
+	 * support decoding file handles.
+	 *
 	 * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
 	 * confusing api in the face of disconnected non-dir dentries.
 	 *
@@ -293,7 +299,7 @@  static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
 	if (retval)
 		goto out_err;
 
-	if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
+	if (!may_decode_fh(&ctx, o_flags)) {
 		retval = -EPERM;
 		goto out_path;
 	}
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 893a1d21dc1c4abc7e52325d7a4cf0adb407f039..459508b53e77ed0597cee217ffe3d82cc7cc11a4 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -247,6 +247,9 @@  struct export_operations {
 						*/
 #define EXPORT_OP_FLUSH_ON_CLOSE	(0x20) /* fs flushes file data on close */
 #define EXPORT_OP_ASYNC_LOCK		(0x40) /* fs can do async lock request */
+#define EXPORT_OP_UNRESTRICTED_OPEN	(0x80) /* FS allows open_by_handle_at
+						  without CAP_DAC_READ_SEARCH
+						*/
 	unsigned long	flags;
 };