diff mbox series

statmount: add a new supported_mask field

Message ID 20250203-statmount-v1-1-871fa7e61f69@kernel.org (mailing list archive)
State New
Headers show
Series statmount: add a new supported_mask field | expand

Commit Message

Jeff Layton Feb. 3, 2025, 5:09 p.m. UTC
Some of the fields in the statmount() reply can be optional. If the
kernel has nothing to emit in that field, then it doesn't set the flag
in the reply. This presents a problem: There is currently no way to
know what mask flags the kernel supports since you can't always count on
them being in the reply.

Add a new STATMOUNT_SUPPORTED_MASK flag and field that the kernel can
set in the reply. Userland can use this to determine if the fields it
requires from the kernel are supported. This also gives us a way to
deprecate fields in the future, if that should become necessary.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
I ran into this problem recently. We have a variety of kernels running
that have varying levels of support of statmount(), and I need to be
able to fall back to /proc scraping if support for everything isn't
present. This is difficult currently since statmount() doesn't set the
flag in the return mask if the field is empty.
---
 fs/namespace.c             | 18 ++++++++++++++++++
 include/uapi/linux/mount.h |  4 +++-
 2 files changed, 21 insertions(+), 1 deletion(-)


---
base-commit: 57c64cb6ddfb6c79a6c3fc2e434303c40f700964
change-id: 20250203-statmount-f7da9bec0f23

Best regards,

Comments

Christian Brauner Feb. 4, 2025, 11:07 a.m. UTC | #1
On Mon, Feb 03, 2025 at 12:09:48PM -0500, Jeff Layton wrote:
> Some of the fields in the statmount() reply can be optional. If the
> kernel has nothing to emit in that field, then it doesn't set the flag
> in the reply. This presents a problem: There is currently no way to
> know what mask flags the kernel supports since you can't always count on
> them being in the reply.
> 
> Add a new STATMOUNT_SUPPORTED_MASK flag and field that the kernel can
> set in the reply. Userland can use this to determine if the fields it
> requires from the kernel are supported. This also gives us a way to
> deprecate fields in the future, if that should become necessary.
> 
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
> I ran into this problem recently. We have a variety of kernels running
> that have varying levels of support of statmount(), and I need to be
> able to fall back to /proc scraping if support for everything isn't
> present. This is difficult currently since statmount() doesn't set the
> flag in the return mask if the field is empty.
> ---
>  fs/namespace.c             | 18 ++++++++++++++++++
>  include/uapi/linux/mount.h |  4 +++-
>  2 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index a3ed3f2980cbae6238cda09874e2dac146080eb6..7ec5fc436c4ff300507c4ed71a757f5d75a4d520 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -5317,6 +5317,21 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
>  	return 0;
>  }
>  
> +/* This must be updated whenever a new flag is added */
> +#define STATMOUNT_SUPPORTED (STATMOUNT_SB_BASIC | \
> +			     STATMOUNT_MNT_BASIC | \
> +			     STATMOUNT_PROPAGATE_FROM | \
> +			     STATMOUNT_MNT_ROOT | \
> +			     STATMOUNT_MNT_POINT | \
> +			     STATMOUNT_FS_TYPE | \
> +			     STATMOUNT_MNT_NS_ID | \
> +			     STATMOUNT_MNT_OPTS | \
> +			     STATMOUNT_FS_SUBTYPE | \
> +			     STATMOUNT_SB_SOURCE | \
> +			     STATMOUNT_OPT_ARRAY | \
> +			     STATMOUNT_OPT_SEC_ARRAY | \
> +			     STATMOUNT_SUPPORTED_MASK)

Hm, do we need a separate bit for STATMOUNT_SUPPORTED_MASK? Afaiu, this
is more of a convenience thing but then maybe we just do:

#define STATMOUNT_SUPPORTED_MASK STATMOUNT_MNT_BASIC

and be done with it?

Otherwise I think it is worth having support for this.

> +
>  static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
>  			struct mnt_namespace *ns)
>  {
> @@ -5386,6 +5401,9 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
>  	if (!err && s->mask & STATMOUNT_MNT_NS_ID)
>  		statmount_mnt_ns_id(s, ns);
>  
> +	if (!err && s->mask & STATMOUNT_SUPPORTED_MASK)
> +		s->sm.supported_mask = STATMOUNT_SUPPORTED_MASK;
> +
>  	if (err)
>  		return err;
>  
> diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
> index c07008816acae89cbea3087caf50d537d4e78298..c553dc4ba68407ee38c27238e9bdec2ebf5e2457 100644
> --- a/include/uapi/linux/mount.h
> +++ b/include/uapi/linux/mount.h
> @@ -179,7 +179,8 @@ struct statmount {
>  	__u32 opt_array;	/* [str] Array of nul terminated fs options */
>  	__u32 opt_sec_num;	/* Number of security options */
>  	__u32 opt_sec_array;	/* [str] Array of nul terminated security options */
> -	__u64 __spare2[46];
> +	__u64 supported_mask;	/* Mask flags that this kernel supports */
> +	__u64 __spare2[45];
>  	char str[];		/* Variable size part containing strings */
>  };
>  
> @@ -217,6 +218,7 @@ struct mnt_id_req {
>  #define STATMOUNT_SB_SOURCE		0x00000200U	/* Want/got sb_source */
>  #define STATMOUNT_OPT_ARRAY		0x00000400U	/* Want/got opt_... */
>  #define STATMOUNT_OPT_SEC_ARRAY		0x00000800U	/* Want/got opt_sec... */
> +#define STATMOUNT_SUPPORTED_MASK	0x00001000U	/* Want/got supported mask flags */
>  
>  /*
>   * Special @mnt_id values that can be passed to listmount
> 
> ---
> base-commit: 57c64cb6ddfb6c79a6c3fc2e434303c40f700964
> change-id: 20250203-statmount-f7da9bec0f23
> 
> Best regards,
> -- 
> Jeff Layton <jlayton@kernel.org>
>
Jeff Layton Feb. 4, 2025, 12:28 p.m. UTC | #2
On Tue, 2025-02-04 at 12:07 +0100, Christian Brauner wrote:
> On Mon, Feb 03, 2025 at 12:09:48PM -0500, Jeff Layton wrote:
> > Some of the fields in the statmount() reply can be optional. If the
> > kernel has nothing to emit in that field, then it doesn't set the flag
> > in the reply. This presents a problem: There is currently no way to
> > know what mask flags the kernel supports since you can't always count on
> > them being in the reply.
> > 
> > Add a new STATMOUNT_SUPPORTED_MASK flag and field that the kernel can
> > set in the reply. Userland can use this to determine if the fields it
> > requires from the kernel are supported. This also gives us a way to
> > deprecate fields in the future, if that should become necessary.
> > 
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> > I ran into this problem recently. We have a variety of kernels running
> > that have varying levels of support of statmount(), and I need to be
> > able to fall back to /proc scraping if support for everything isn't
> > present. This is difficult currently since statmount() doesn't set the
> > flag in the return mask if the field is empty.
> > ---
> >  fs/namespace.c             | 18 ++++++++++++++++++
> >  include/uapi/linux/mount.h |  4 +++-
> >  2 files changed, 21 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/namespace.c b/fs/namespace.c
> > index a3ed3f2980cbae6238cda09874e2dac146080eb6..7ec5fc436c4ff300507c4ed71a757f5d75a4d520 100644
> > --- a/fs/namespace.c
> > +++ b/fs/namespace.c
> > @@ -5317,6 +5317,21 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
> >  	return 0;
> >  }
> >  
> > +/* This must be updated whenever a new flag is added */
> > +#define STATMOUNT_SUPPORTED (STATMOUNT_SB_BASIC | \
> > +			     STATMOUNT_MNT_BASIC | \
> > +			     STATMOUNT_PROPAGATE_FROM | \
> > +			     STATMOUNT_MNT_ROOT | \
> > +			     STATMOUNT_MNT_POINT | \
> > +			     STATMOUNT_FS_TYPE | \
> > +			     STATMOUNT_MNT_NS_ID | \
> > +			     STATMOUNT_MNT_OPTS | \
> > +			     STATMOUNT_FS_SUBTYPE | \
> > +			     STATMOUNT_SB_SOURCE | \
> > +			     STATMOUNT_OPT_ARRAY | \
> > +			     STATMOUNT_OPT_SEC_ARRAY | \
> > +			     STATMOUNT_SUPPORTED_MASK)
> 
> Hm, do we need a separate bit for STATMOUNT_SUPPORTED_MASK? Afaiu, this
> is more of a convenience thing but then maybe we just do:
> 
> #define STATMOUNT_SUPPORTED_MASK STATMOUNT_MNT_BASIC
> 
> and be done with it?
> 
> Otherwise I think it is worth having support for this.
> 

Are you suggesting that we should just add the ->supported_mask field
without a declaring a bit for it? If so, how would that work on old
kernels? You'd never know if you could trust the contents of that field
since the return mask wouldn't indicate any difference.

> > +
> >  static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
> >  			struct mnt_namespace *ns)
> >  {
> > @@ -5386,6 +5401,9 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
> >  	if (!err && s->mask & STATMOUNT_MNT_NS_ID)
> >  		statmount_mnt_ns_id(s, ns);
> >  
> > +	if (!err && s->mask & STATMOUNT_SUPPORTED_MASK)
> > +		s->sm.supported_mask = STATMOUNT_SUPPORTED_MASK;
> > +
> >  	if (err)
> >  		return err;
> >  
> > diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
> > index c07008816acae89cbea3087caf50d537d4e78298..c553dc4ba68407ee38c27238e9bdec2ebf5e2457 100644
> > --- a/include/uapi/linux/mount.h
> > +++ b/include/uapi/linux/mount.h
> > @@ -179,7 +179,8 @@ struct statmount {
> >  	__u32 opt_array;	/* [str] Array of nul terminated fs options */
> >  	__u32 opt_sec_num;	/* Number of security options */
> >  	__u32 opt_sec_array;	/* [str] Array of nul terminated security options */
> > -	__u64 __spare2[46];
> > +	__u64 supported_mask;	/* Mask flags that this kernel supports */
> > +	__u64 __spare2[45];
> >  	char str[];		/* Variable size part containing strings */
> >  };
> >  
> > @@ -217,6 +218,7 @@ struct mnt_id_req {
> >  #define STATMOUNT_SB_SOURCE		0x00000200U	/* Want/got sb_source */
> >  #define STATMOUNT_OPT_ARRAY		0x00000400U	/* Want/got opt_... */
> >  #define STATMOUNT_OPT_SEC_ARRAY		0x00000800U	/* Want/got opt_sec... */
> > +#define STATMOUNT_SUPPORTED_MASK	0x00001000U	/* Want/got supported mask flags */
> >  
> >  /*
> >   * Special @mnt_id values that can be passed to listmount
> > 
> > ---
> > base-commit: 57c64cb6ddfb6c79a6c3fc2e434303c40f700964
> > change-id: 20250203-statmount-f7da9bec0f23
> > 
> > Best regards,
> > -- 
> > Jeff Layton <jlayton@kernel.org>
> >
Christian Brauner Feb. 4, 2025, 3:09 p.m. UTC | #3
On Tue, Feb 04, 2025 at 07:28:20AM -0500, Jeff Layton wrote:
> On Tue, 2025-02-04 at 12:07 +0100, Christian Brauner wrote:
> > On Mon, Feb 03, 2025 at 12:09:48PM -0500, Jeff Layton wrote:
> > > Some of the fields in the statmount() reply can be optional. If the
> > > kernel has nothing to emit in that field, then it doesn't set the flag
> > > in the reply. This presents a problem: There is currently no way to
> > > know what mask flags the kernel supports since you can't always count on
> > > them being in the reply.
> > > 
> > > Add a new STATMOUNT_SUPPORTED_MASK flag and field that the kernel can
> > > set in the reply. Userland can use this to determine if the fields it
> > > requires from the kernel are supported. This also gives us a way to
> > > deprecate fields in the future, if that should become necessary.
> > > 
> > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > ---
> > > I ran into this problem recently. We have a variety of kernels running
> > > that have varying levels of support of statmount(), and I need to be
> > > able to fall back to /proc scraping if support for everything isn't
> > > present. This is difficult currently since statmount() doesn't set the
> > > flag in the return mask if the field is empty.
> > > ---
> > >  fs/namespace.c             | 18 ++++++++++++++++++
> > >  include/uapi/linux/mount.h |  4 +++-
> > >  2 files changed, 21 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/fs/namespace.c b/fs/namespace.c
> > > index a3ed3f2980cbae6238cda09874e2dac146080eb6..7ec5fc436c4ff300507c4ed71a757f5d75a4d520 100644
> > > --- a/fs/namespace.c
> > > +++ b/fs/namespace.c
> > > @@ -5317,6 +5317,21 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
> > >  	return 0;
> > >  }
> > >  
> > > +/* This must be updated whenever a new flag is added */
> > > +#define STATMOUNT_SUPPORTED (STATMOUNT_SB_BASIC | \
> > > +			     STATMOUNT_MNT_BASIC | \
> > > +			     STATMOUNT_PROPAGATE_FROM | \
> > > +			     STATMOUNT_MNT_ROOT | \
> > > +			     STATMOUNT_MNT_POINT | \
> > > +			     STATMOUNT_FS_TYPE | \
> > > +			     STATMOUNT_MNT_NS_ID | \
> > > +			     STATMOUNT_MNT_OPTS | \
> > > +			     STATMOUNT_FS_SUBTYPE | \
> > > +			     STATMOUNT_SB_SOURCE | \
> > > +			     STATMOUNT_OPT_ARRAY | \
> > > +			     STATMOUNT_OPT_SEC_ARRAY | \
> > > +			     STATMOUNT_SUPPORTED_MASK)
> > 
> > Hm, do we need a separate bit for STATMOUNT_SUPPORTED_MASK? Afaiu, this
> > is more of a convenience thing but then maybe we just do:
> > 
> > #define STATMOUNT_SUPPORTED_MASK STATMOUNT_MNT_BASIC
> > 
> > and be done with it?
> > 
> > Otherwise I think it is worth having support for this.
> > 
> 
> Are you suggesting that we should just add the ->supported_mask field
> without a declaring a bit for it? If so, how would that work on old
> kernels? You'd never know if you could trust the contents of that field
> since the return mask wouldn't indicate any difference.

What I didn't realize because I hadn't read carefully enough in your
patch was that STATMOUNT_SUPPORTED_MASK is raised in ->mask and only
then is ->supported_mask filled in.

My thinking had been that ->supported_mask will simply always be filled
in by the kernel going forward. Which is arguably not ideal but would
work:

So the kernel guarantees since the introduction of statmount() that when
we copy out to userspace that anything the kernel doesn't know will be
copied back zeroed. So any unknown fields are zero.

(1) Say userspace passes a struct statmount with ->supported_mask to the
    kernel - even if it has put garbage in there or intentionally raised
    valid flags in there - the old kernel will copy over this and set it
    to zero.

(2) If you're on a new kernel but pass an old struct the kernel will
    fill in ->supported_mask. Imho, that's fine. Userspace simply will
    not know about it.

But we can leave the explicit request in!

> 
> > > +
> > >  static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
> > >  			struct mnt_namespace *ns)
> > >  {
> > > @@ -5386,6 +5401,9 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
> > >  	if (!err && s->mask & STATMOUNT_MNT_NS_ID)
> > >  		statmount_mnt_ns_id(s, ns);
> > >  
> > > +	if (!err && s->mask & STATMOUNT_SUPPORTED_MASK)
> > > +		s->sm.supported_mask = STATMOUNT_SUPPORTED_MASK;
> > > +
> > >  	if (err)
> > >  		return err;
> > >  
> > > diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
> > > index c07008816acae89cbea3087caf50d537d4e78298..c553dc4ba68407ee38c27238e9bdec2ebf5e2457 100644
> > > --- a/include/uapi/linux/mount.h
> > > +++ b/include/uapi/linux/mount.h
> > > @@ -179,7 +179,8 @@ struct statmount {
> > >  	__u32 opt_array;	/* [str] Array of nul terminated fs options */
> > >  	__u32 opt_sec_num;	/* Number of security options */
> > >  	__u32 opt_sec_array;	/* [str] Array of nul terminated security options */
> > > -	__u64 __spare2[46];
> > > +	__u64 supported_mask;	/* Mask flags that this kernel supports */
> > > +	__u64 __spare2[45];
> > >  	char str[];		/* Variable size part containing strings */
> > >  };
> > >  
> > > @@ -217,6 +218,7 @@ struct mnt_id_req {
> > >  #define STATMOUNT_SB_SOURCE		0x00000200U	/* Want/got sb_source */
> > >  #define STATMOUNT_OPT_ARRAY		0x00000400U	/* Want/got opt_... */
> > >  #define STATMOUNT_OPT_SEC_ARRAY		0x00000800U	/* Want/got opt_sec... */
> > > +#define STATMOUNT_SUPPORTED_MASK	0x00001000U	/* Want/got supported mask flags */
> > >  
> > >  /*
> > >   * Special @mnt_id values that can be passed to listmount
> > > 
> > > ---
> > > base-commit: 57c64cb6ddfb6c79a6c3fc2e434303c40f700964
> > > change-id: 20250203-statmount-f7da9bec0f23
> > > 
> > > Best regards,
> > > -- 
> > > Jeff Layton <jlayton@kernel.org>
> > > 
> 
> -- 
> Jeff Layton <jlayton@kernel.org>
Jeff Layton Feb. 4, 2025, 3:57 p.m. UTC | #4
On Tue, 2025-02-04 at 16:09 +0100, Christian Brauner wrote:
> On Tue, Feb 04, 2025 at 07:28:20AM -0500, Jeff Layton wrote:
> > On Tue, 2025-02-04 at 12:07 +0100, Christian Brauner wrote:
> > > On Mon, Feb 03, 2025 at 12:09:48PM -0500, Jeff Layton wrote:
> > > > Some of the fields in the statmount() reply can be optional. If the
> > > > kernel has nothing to emit in that field, then it doesn't set the flag
> > > > in the reply. This presents a problem: There is currently no way to
> > > > know what mask flags the kernel supports since you can't always count on
> > > > them being in the reply.
> > > > 
> > > > Add a new STATMOUNT_SUPPORTED_MASK flag and field that the kernel can
> > > > set in the reply. Userland can use this to determine if the fields it
> > > > requires from the kernel are supported. This also gives us a way to
> > > > deprecate fields in the future, if that should become necessary.
> > > > 
> > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > ---
> > > > I ran into this problem recently. We have a variety of kernels running
> > > > that have varying levels of support of statmount(), and I need to be
> > > > able to fall back to /proc scraping if support for everything isn't
> > > > present. This is difficult currently since statmount() doesn't set the
> > > > flag in the return mask if the field is empty.
> > > > ---
> > > >  fs/namespace.c             | 18 ++++++++++++++++++
> > > >  include/uapi/linux/mount.h |  4 +++-
> > > >  2 files changed, 21 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/fs/namespace.c b/fs/namespace.c
> > > > index a3ed3f2980cbae6238cda09874e2dac146080eb6..7ec5fc436c4ff300507c4ed71a757f5d75a4d520 100644
> > > > --- a/fs/namespace.c
> > > > +++ b/fs/namespace.c
> > > > @@ -5317,6 +5317,21 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
> > > >  	return 0;
> > > >  }
> > > >  
> > > > +/* This must be updated whenever a new flag is added */
> > > > +#define STATMOUNT_SUPPORTED (STATMOUNT_SB_BASIC | \
> > > > +			     STATMOUNT_MNT_BASIC | \
> > > > +			     STATMOUNT_PROPAGATE_FROM | \
> > > > +			     STATMOUNT_MNT_ROOT | \
> > > > +			     STATMOUNT_MNT_POINT | \
> > > > +			     STATMOUNT_FS_TYPE | \
> > > > +			     STATMOUNT_MNT_NS_ID | \
> > > > +			     STATMOUNT_MNT_OPTS | \
> > > > +			     STATMOUNT_FS_SUBTYPE | \
> > > > +			     STATMOUNT_SB_SOURCE | \
> > > > +			     STATMOUNT_OPT_ARRAY | \
> > > > +			     STATMOUNT_OPT_SEC_ARRAY | \
> > > > +			     STATMOUNT_SUPPORTED_MASK)
> > > 
> > > Hm, do we need a separate bit for STATMOUNT_SUPPORTED_MASK? Afaiu, this
> > > is more of a convenience thing but then maybe we just do:
> > > 
> > > #define STATMOUNT_SUPPORTED_MASK STATMOUNT_MNT_BASIC
> > > 
> > > and be done with it?
> > > 
> > > Otherwise I think it is worth having support for this.
> > > 
> > 
> > Are you suggesting that we should just add the ->supported_mask field
> > without a declaring a bit for it? If so, how would that work on old
> > kernels? You'd never know if you could trust the contents of that field
> > since the return mask wouldn't indicate any difference.
> 
> What I didn't realize because I hadn't read carefully enough in your
> patch was that STATMOUNT_SUPPORTED_MASK is raised in ->mask and only
> then is ->supported_mask filled in.
> 
> My thinking had been that ->supported_mask will simply always be filled
> in by the kernel going forward. Which is arguably not ideal but would
> work:
> 
> So the kernel guarantees since the introduction of statmount() that when
> we copy out to userspace that anything the kernel doesn't know will be
> copied back zeroed. So any unknown fields are zero.
> 
> (1) Say userspace passes a struct statmount with ->supported_mask to the
>     kernel - even if it has put garbage in there or intentionally raised
>     valid flags in there - the old kernel will copy over this and set it
>     to zero.
> 
> (2) If you're on a new kernel but pass an old struct the kernel will
>     fill in ->supported_mask. Imho, that's fine. Userspace simply will
>     not know about it.
> 
> But we can leave the explicit request in!
> 


I can respin without STATMOUNT_SUPPORTED_MASK. I was thinking it left
that part of the return buffer untouched, but if it's zeroed, then that
works as well.

If you see a supported_mask of 0, you know the kernel didn't fill it in
(since it should at least support _something_). That'll need to be
carefully documented though.

> > 
> > > > +
> > > >  static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
> > > >  			struct mnt_namespace *ns)
> > > >  {
> > > > @@ -5386,6 +5401,9 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
> > > >  	if (!err && s->mask & STATMOUNT_MNT_NS_ID)
> > > >  		statmount_mnt_ns_id(s, ns);
> > > >  
> > > > +	if (!err && s->mask & STATMOUNT_SUPPORTED_MASK)
> > > > +		s->sm.supported_mask = STATMOUNT_SUPPORTED_MASK;
> > > > +
> > > >  	if (err)
> > > >  		return err;
> > > >  
> > > > diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
> > > > index c07008816acae89cbea3087caf50d537d4e78298..c553dc4ba68407ee38c27238e9bdec2ebf5e2457 100644
> > > > --- a/include/uapi/linux/mount.h
> > > > +++ b/include/uapi/linux/mount.h
> > > > @@ -179,7 +179,8 @@ struct statmount {
> > > >  	__u32 opt_array;	/* [str] Array of nul terminated fs options */
> > > >  	__u32 opt_sec_num;	/* Number of security options */
> > > >  	__u32 opt_sec_array;	/* [str] Array of nul terminated security options */
> > > > -	__u64 __spare2[46];
> > > > +	__u64 supported_mask;	/* Mask flags that this kernel supports */
> > > > +	__u64 __spare2[45];
> > > >  	char str[];		/* Variable size part containing strings */
> > > >  };
> > > >  
> > > > @@ -217,6 +218,7 @@ struct mnt_id_req {
> > > >  #define STATMOUNT_SB_SOURCE		0x00000200U	/* Want/got sb_source */
> > > >  #define STATMOUNT_OPT_ARRAY		0x00000400U	/* Want/got opt_... */
> > > >  #define STATMOUNT_OPT_SEC_ARRAY		0x00000800U	/* Want/got opt_sec... */
> > > > +#define STATMOUNT_SUPPORTED_MASK	0x00001000U	/* Want/got supported mask flags */
> > > >  
> > > >  /*
> > > >   * Special @mnt_id values that can be passed to listmount
> > > > 
> > > > ---
> > > > base-commit: 57c64cb6ddfb6c79a6c3fc2e434303c40f700964
> > > > change-id: 20250203-statmount-f7da9bec0f23
> > > > 
> > > > Best regards,
> > > > -- 
> > > > Jeff Layton <jlayton@kernel.org>
> > > > 
> > 
> > -- 
> > Jeff Layton <jlayton@kernel.org>
Christian Brauner Feb. 4, 2025, 4:45 p.m. UTC | #5
On Tue, Feb 04, 2025 at 10:57:39AM -0500, Jeff Layton wrote:
> On Tue, 2025-02-04 at 16:09 +0100, Christian Brauner wrote:
> > On Tue, Feb 04, 2025 at 07:28:20AM -0500, Jeff Layton wrote:
> > > On Tue, 2025-02-04 at 12:07 +0100, Christian Brauner wrote:
> > > > On Mon, Feb 03, 2025 at 12:09:48PM -0500, Jeff Layton wrote:
> > > > > Some of the fields in the statmount() reply can be optional. If the
> > > > > kernel has nothing to emit in that field, then it doesn't set the flag
> > > > > in the reply. This presents a problem: There is currently no way to
> > > > > know what mask flags the kernel supports since you can't always count on
> > > > > them being in the reply.
> > > > > 
> > > > > Add a new STATMOUNT_SUPPORTED_MASK flag and field that the kernel can
> > > > > set in the reply. Userland can use this to determine if the fields it
> > > > > requires from the kernel are supported. This also gives us a way to
> > > > > deprecate fields in the future, if that should become necessary.
> > > > > 
> > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > > ---
> > > > > I ran into this problem recently. We have a variety of kernels running
> > > > > that have varying levels of support of statmount(), and I need to be
> > > > > able to fall back to /proc scraping if support for everything isn't
> > > > > present. This is difficult currently since statmount() doesn't set the
> > > > > flag in the return mask if the field is empty.
> > > > > ---
> > > > >  fs/namespace.c             | 18 ++++++++++++++++++
> > > > >  include/uapi/linux/mount.h |  4 +++-
> > > > >  2 files changed, 21 insertions(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/fs/namespace.c b/fs/namespace.c
> > > > > index a3ed3f2980cbae6238cda09874e2dac146080eb6..7ec5fc436c4ff300507c4ed71a757f5d75a4d520 100644
> > > > > --- a/fs/namespace.c
> > > > > +++ b/fs/namespace.c
> > > > > @@ -5317,6 +5317,21 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
> > > > >  	return 0;
> > > > >  }
> > > > >  
> > > > > +/* This must be updated whenever a new flag is added */
> > > > > +#define STATMOUNT_SUPPORTED (STATMOUNT_SB_BASIC | \
> > > > > +			     STATMOUNT_MNT_BASIC | \
> > > > > +			     STATMOUNT_PROPAGATE_FROM | \
> > > > > +			     STATMOUNT_MNT_ROOT | \
> > > > > +			     STATMOUNT_MNT_POINT | \
> > > > > +			     STATMOUNT_FS_TYPE | \
> > > > > +			     STATMOUNT_MNT_NS_ID | \
> > > > > +			     STATMOUNT_MNT_OPTS | \
> > > > > +			     STATMOUNT_FS_SUBTYPE | \
> > > > > +			     STATMOUNT_SB_SOURCE | \
> > > > > +			     STATMOUNT_OPT_ARRAY | \
> > > > > +			     STATMOUNT_OPT_SEC_ARRAY | \
> > > > > +			     STATMOUNT_SUPPORTED_MASK)
> > > > 
> > > > Hm, do we need a separate bit for STATMOUNT_SUPPORTED_MASK? Afaiu, this
> > > > is more of a convenience thing but then maybe we just do:
> > > > 
> > > > #define STATMOUNT_SUPPORTED_MASK STATMOUNT_MNT_BASIC
> > > > 
> > > > and be done with it?
> > > > 
> > > > Otherwise I think it is worth having support for this.
> > > > 
> > > 
> > > Are you suggesting that we should just add the ->supported_mask field
> > > without a declaring a bit for it? If so, how would that work on old
> > > kernels? You'd never know if you could trust the contents of that field
> > > since the return mask wouldn't indicate any difference.
> > 
> > What I didn't realize because I hadn't read carefully enough in your
> > patch was that STATMOUNT_SUPPORTED_MASK is raised in ->mask and only
> > then is ->supported_mask filled in.
> > 
> > My thinking had been that ->supported_mask will simply always be filled
> > in by the kernel going forward. Which is arguably not ideal but would
> > work:
> > 
> > So the kernel guarantees since the introduction of statmount() that when
> > we copy out to userspace that anything the kernel doesn't know will be
> > copied back zeroed. So any unknown fields are zero.
> > 
> > (1) Say userspace passes a struct statmount with ->supported_mask to the
> >     kernel - even if it has put garbage in there or intentionally raised
> >     valid flags in there - the old kernel will copy over this and set it
> >     to zero.
> > 
> > (2) If you're on a new kernel but pass an old struct the kernel will
> >     fill in ->supported_mask. Imho, that's fine. Userspace simply will
> >     not know about it.
> > 
> > But we can leave the explicit request in!
> > 
> 
> 
> I can respin without STATMOUNT_SUPPORTED_MASK. I was thinking it left
> that part of the return buffer untouched, but if it's zeroed, then that
> works as well.
> 
> If you see a supported_mask of 0, you know the kernel didn't fill it in
> (since it should at least support _something_). That'll need to be
> carefully documented though.

It's probably easier for userspace if that flag must be specifically raised.
Jeff Layton Feb. 4, 2025, 4:58 p.m. UTC | #6
On Tue, 2025-02-04 at 17:45 +0100, Christian Brauner wrote:
> On Tue, Feb 04, 2025 at 10:57:39AM -0500, Jeff Layton wrote:
> > On Tue, 2025-02-04 at 16:09 +0100, Christian Brauner wrote:
> > > On Tue, Feb 04, 2025 at 07:28:20AM -0500, Jeff Layton wrote:
> > > > On Tue, 2025-02-04 at 12:07 +0100, Christian Brauner wrote:
> > > > > On Mon, Feb 03, 2025 at 12:09:48PM -0500, Jeff Layton wrote:
> > > > > > Some of the fields in the statmount() reply can be optional. If the
> > > > > > kernel has nothing to emit in that field, then it doesn't set the flag
> > > > > > in the reply. This presents a problem: There is currently no way to
> > > > > > know what mask flags the kernel supports since you can't always count on
> > > > > > them being in the reply.
> > > > > > 
> > > > > > Add a new STATMOUNT_SUPPORTED_MASK flag and field that the kernel can
> > > > > > set in the reply. Userland can use this to determine if the fields it
> > > > > > requires from the kernel are supported. This also gives us a way to
> > > > > > deprecate fields in the future, if that should become necessary.
> > > > > > 
> > > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > > > ---
> > > > > > I ran into this problem recently. We have a variety of kernels running
> > > > > > that have varying levels of support of statmount(), and I need to be
> > > > > > able to fall back to /proc scraping if support for everything isn't
> > > > > > present. This is difficult currently since statmount() doesn't set the
> > > > > > flag in the return mask if the field is empty.
> > > > > > ---
> > > > > >  fs/namespace.c             | 18 ++++++++++++++++++
> > > > > >  include/uapi/linux/mount.h |  4 +++-
> > > > > >  2 files changed, 21 insertions(+), 1 deletion(-)
> > > > > > 
> > > > > > diff --git a/fs/namespace.c b/fs/namespace.c
> > > > > > index a3ed3f2980cbae6238cda09874e2dac146080eb6..7ec5fc436c4ff300507c4ed71a757f5d75a4d520 100644
> > > > > > --- a/fs/namespace.c
> > > > > > +++ b/fs/namespace.c
> > > > > > @@ -5317,6 +5317,21 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
> > > > > >  	return 0;
> > > > > >  }
> > > > > >  
> > > > > > +/* This must be updated whenever a new flag is added */
> > > > > > +#define STATMOUNT_SUPPORTED (STATMOUNT_SB_BASIC | \
> > > > > > +			     STATMOUNT_MNT_BASIC | \
> > > > > > +			     STATMOUNT_PROPAGATE_FROM | \
> > > > > > +			     STATMOUNT_MNT_ROOT | \
> > > > > > +			     STATMOUNT_MNT_POINT | \
> > > > > > +			     STATMOUNT_FS_TYPE | \
> > > > > > +			     STATMOUNT_MNT_NS_ID | \
> > > > > > +			     STATMOUNT_MNT_OPTS | \
> > > > > > +			     STATMOUNT_FS_SUBTYPE | \
> > > > > > +			     STATMOUNT_SB_SOURCE | \
> > > > > > +			     STATMOUNT_OPT_ARRAY | \
> > > > > > +			     STATMOUNT_OPT_SEC_ARRAY | \
> > > > > > +			     STATMOUNT_SUPPORTED_MASK)
> > > > > 
> > > > > Hm, do we need a separate bit for STATMOUNT_SUPPORTED_MASK? Afaiu, this
> > > > > is more of a convenience thing but then maybe we just do:
> > > > > 
> > > > > #define STATMOUNT_SUPPORTED_MASK STATMOUNT_MNT_BASIC
> > > > > 
> > > > > and be done with it?
> > > > > 
> > > > > Otherwise I think it is worth having support for this.
> > > > > 
> > > > 
> > > > Are you suggesting that we should just add the ->supported_mask field
> > > > without a declaring a bit for it? If so, how would that work on old
> > > > kernels? You'd never know if you could trust the contents of that field
> > > > since the return mask wouldn't indicate any difference.
> > > 
> > > What I didn't realize because I hadn't read carefully enough in your
> > > patch was that STATMOUNT_SUPPORTED_MASK is raised in ->mask and only
> > > then is ->supported_mask filled in.
> > > 
> > > My thinking had been that ->supported_mask will simply always be filled
> > > in by the kernel going forward. Which is arguably not ideal but would
> > > work:
> > > 
> > > So the kernel guarantees since the introduction of statmount() that when
> > > we copy out to userspace that anything the kernel doesn't know will be
> > > copied back zeroed. So any unknown fields are zero.
> > > 
> > > (1) Say userspace passes a struct statmount with ->supported_mask to the
> > >     kernel - even if it has put garbage in there or intentionally raised
> > >     valid flags in there - the old kernel will copy over this and set it
> > >     to zero.
> > > 
> > > (2) If you're on a new kernel but pass an old struct the kernel will
> > >     fill in ->supported_mask. Imho, that's fine. Userspace simply will
> > >     not know about it.
> > > 
> > > But we can leave the explicit request in!
> > > 
> > 
> > 
> > I can respin without STATMOUNT_SUPPORTED_MASK. I was thinking it left
> > that part of the return buffer untouched, but if it's zeroed, then that
> > works as well.
> > 
> > If you see a supported_mask of 0, you know the kernel didn't fill it in
> > (since it should at least support _something_). That'll need to be
> > carefully documented though.
> 
> It's probably easier for userspace if that flag must be specifically raised.

Agreed. Should I assume you'll take this one as-is then? If you want to
change it to always set the field though, then that's fine with me.
diff mbox series

Patch

diff --git a/fs/namespace.c b/fs/namespace.c
index a3ed3f2980cbae6238cda09874e2dac146080eb6..7ec5fc436c4ff300507c4ed71a757f5d75a4d520 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5317,6 +5317,21 @@  static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
 	return 0;
 }
 
+/* This must be updated whenever a new flag is added */
+#define STATMOUNT_SUPPORTED (STATMOUNT_SB_BASIC | \
+			     STATMOUNT_MNT_BASIC | \
+			     STATMOUNT_PROPAGATE_FROM | \
+			     STATMOUNT_MNT_ROOT | \
+			     STATMOUNT_MNT_POINT | \
+			     STATMOUNT_FS_TYPE | \
+			     STATMOUNT_MNT_NS_ID | \
+			     STATMOUNT_MNT_OPTS | \
+			     STATMOUNT_FS_SUBTYPE | \
+			     STATMOUNT_SB_SOURCE | \
+			     STATMOUNT_OPT_ARRAY | \
+			     STATMOUNT_OPT_SEC_ARRAY | \
+			     STATMOUNT_SUPPORTED_MASK)
+
 static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
 			struct mnt_namespace *ns)
 {
@@ -5386,6 +5401,9 @@  static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
 	if (!err && s->mask & STATMOUNT_MNT_NS_ID)
 		statmount_mnt_ns_id(s, ns);
 
+	if (!err && s->mask & STATMOUNT_SUPPORTED_MASK)
+		s->sm.supported_mask = STATMOUNT_SUPPORTED_MASK;
+
 	if (err)
 		return err;
 
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index c07008816acae89cbea3087caf50d537d4e78298..c553dc4ba68407ee38c27238e9bdec2ebf5e2457 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -179,7 +179,8 @@  struct statmount {
 	__u32 opt_array;	/* [str] Array of nul terminated fs options */
 	__u32 opt_sec_num;	/* Number of security options */
 	__u32 opt_sec_array;	/* [str] Array of nul terminated security options */
-	__u64 __spare2[46];
+	__u64 supported_mask;	/* Mask flags that this kernel supports */
+	__u64 __spare2[45];
 	char str[];		/* Variable size part containing strings */
 };
 
@@ -217,6 +218,7 @@  struct mnt_id_req {
 #define STATMOUNT_SB_SOURCE		0x00000200U	/* Want/got sb_source */
 #define STATMOUNT_OPT_ARRAY		0x00000400U	/* Want/got opt_... */
 #define STATMOUNT_OPT_SEC_ARRAY		0x00000800U	/* Want/got opt_sec... */
+#define STATMOUNT_SUPPORTED_MASK	0x00001000U	/* Want/got supported mask flags */
 
 /*
  * Special @mnt_id values that can be passed to listmount