diff mbox series

[RFC,1/3] add unique mount ID

Message ID 20230913152238.905247-2-mszeredi@redhat.com (mailing list archive)
State Changes Requested
Delegated to: Paul Moore
Headers show
Series quering mount attributes | expand

Commit Message

Miklos Szeredi Sept. 13, 2023, 3:22 p.m. UTC
If a mount is released then it's mnt_id can immediately be reused.  This is
bad news for user interfaces that want to uniquely identify a mount.

Implementing a unique mount ID is trivial (use a 64bit counter).
Unfortunately userspace assumes 32bit size and would overflow after the
counter reaches 2^32.

Introduce a new 64bit ID alongside the old one.  Allow new interfaces to
work on both the old and new IDs by starting the counter from 2^32.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/mount.h                | 3 ++-
 fs/namespace.c            | 4 ++++
 fs/stat.c                 | 9 +++++++--
 include/uapi/linux/stat.h | 1 +
 4 files changed, 14 insertions(+), 3 deletions(-)

Comments

Christian Brauner Sept. 14, 2023, 9:03 a.m. UTC | #1
On Wed, Sep 13, 2023 at 05:22:34PM +0200, Miklos Szeredi wrote:
> If a mount is released then it's mnt_id can immediately be reused.  This is
> bad news for user interfaces that want to uniquely identify a mount.
> 
> Implementing a unique mount ID is trivial (use a 64bit counter).
> Unfortunately userspace assumes 32bit size and would overflow after the
> counter reaches 2^32.
> 
> Introduce a new 64bit ID alongside the old one.  Allow new interfaces to
> work on both the old and new IDs by starting the counter from 2^32.
> 
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
>  fs/mount.h                | 3 ++-
>  fs/namespace.c            | 4 ++++
>  fs/stat.c                 | 9 +++++++--
>  include/uapi/linux/stat.h | 1 +
>  4 files changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/mount.h b/fs/mount.h
> index 130c07c2f8d2..a14f762b3f29 100644
> --- a/fs/mount.h
> +++ b/fs/mount.h
> @@ -72,7 +72,8 @@ struct mount {
>  	struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
>  	__u32 mnt_fsnotify_mask;
>  #endif
> -	int mnt_id;			/* mount identifier */
> +	int mnt_id;			/* mount identifier, reused */
> +	u64 mnt_id_unique;		/* mount ID unique until reboot */
>  	int mnt_group_id;		/* peer group identifier */
>  	int mnt_expiry_mark;		/* true if marked for expiry */
>  	struct hlist_head mnt_pins;
> diff --git a/fs/namespace.c b/fs/namespace.c
> index e157efc54023..de47c5f66e17 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -68,6 +68,9 @@ static u64 event;
>  static DEFINE_IDA(mnt_id_ida);
>  static DEFINE_IDA(mnt_group_ida);
>  
> +/* Don't allow confusion with mount ID allocated wit IDA */
> +static atomic64_t mnt_id_ctr = ATOMIC64_INIT(1ULL << 32);

Hm, is your concern that userspace confuses these two values? If so, I
think we shouldn't worry about this.

If a userspace program retrieves a mntid and then confuses itself about
what mnt id they're talking about something's very wrong anyway. So I'd
rather not see us waste 32 bits just for that. Other than that this
seems to implement what we agreed on at LSFMM so my hope is that this is
fairly uncontroversial.
Miklos Szeredi Sept. 14, 2023, 9:30 a.m. UTC | #2
On Thu, 14 Sept 2023 at 11:04, Christian Brauner <brauner@kernel.org> wrote:
>
> On Wed, Sep 13, 2023 at 05:22:34PM +0200, Miklos Szeredi wrote:
> > If a mount is released then it's mnt_id can immediately be reused.  This is
> > bad news for user interfaces that want to uniquely identify a mount.
> >
> > Implementing a unique mount ID is trivial (use a 64bit counter).
> > Unfortunately userspace assumes 32bit size and would overflow after the
> > counter reaches 2^32.
> >
> > Introduce a new 64bit ID alongside the old one.  Allow new interfaces to
> > work on both the old and new IDs by starting the counter from 2^32.
> >
> > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > ---
> >  fs/mount.h                | 3 ++-
> >  fs/namespace.c            | 4 ++++
> >  fs/stat.c                 | 9 +++++++--
> >  include/uapi/linux/stat.h | 1 +
> >  4 files changed, 14 insertions(+), 3 deletions(-)
> >
> > diff --git a/fs/mount.h b/fs/mount.h
> > index 130c07c2f8d2..a14f762b3f29 100644
> > --- a/fs/mount.h
> > +++ b/fs/mount.h
> > @@ -72,7 +72,8 @@ struct mount {
> >       struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
> >       __u32 mnt_fsnotify_mask;
> >  #endif
> > -     int mnt_id;                     /* mount identifier */
> > +     int mnt_id;                     /* mount identifier, reused */
> > +     u64 mnt_id_unique;              /* mount ID unique until reboot */
> >       int mnt_group_id;               /* peer group identifier */
> >       int mnt_expiry_mark;            /* true if marked for expiry */
> >       struct hlist_head mnt_pins;
> > diff --git a/fs/namespace.c b/fs/namespace.c
> > index e157efc54023..de47c5f66e17 100644
> > --- a/fs/namespace.c
> > +++ b/fs/namespace.c
> > @@ -68,6 +68,9 @@ static u64 event;
> >  static DEFINE_IDA(mnt_id_ida);
> >  static DEFINE_IDA(mnt_group_ida);
> >
> > +/* Don't allow confusion with mount ID allocated wit IDA */
> > +static atomic64_t mnt_id_ctr = ATOMIC64_INIT(1ULL << 32);
>
> Hm, is your concern that userspace confuses these two values? If so, I
> think we shouldn't worry about this.

Yes, one concern is that humans confuse the old and the new ID.

I also think it makes sense to allow the new interfaces to look up the
mount based on either the old or the new ID.   But I could be wrong
there, since that might encourage bad code.  Maybe the new interface
should only use take the new ID, which means no mixed use of
/proc/$$/mountinfo and statmnt/listmnt.

>
> If a userspace program retrieves a mntid and then confuses itself about
> what mnt id they're talking about something's very wrong anyway. So I'd
> rather not see us waste 32 bits just for that.

This is wasting a quarter of a billionth of the ID space.  We are
surely not concerned about that.

Thanks,
Miklos
Christian Brauner Sept. 14, 2023, 9:36 a.m. UTC | #3
> Yes, one concern is that humans confuse the old and the new ID.
> 
> I also think it makes sense to allow the new interfaces to look up the
> mount based on either the old or the new ID.   But I could be wrong

Hm, mount id recycling may happen so quickly that for service restarts
with a lot of mounts this becomes mostly useless...

> there, since that might encourage bad code.  Maybe the new interface
> should only use take the new ID, which means no mixed use of
> /proc/$$/mountinfo and statmnt/listmnt.

... so I think that is indeed the better way of doing things. There's no
need to encourage userspace to mix both identifiers.
Miklos Szeredi Sept. 14, 2023, 9:43 a.m. UTC | #4
On Thu, 14 Sept 2023 at 11:36, Christian Brauner <brauner@kernel.org> wrote:
>
> > Yes, one concern is that humans confuse the old and the new ID.
> >
> > I also think it makes sense to allow the new interfaces to look up the
> > mount based on either the old or the new ID.   But I could be wrong
>
> Hm, mount id recycling may happen so quickly that for service restarts
> with a lot of mounts this becomes mostly useless...

Agreed.  The old ID is mostly useful for human interaction.

>
> > there, since that might encourage bad code.  Maybe the new interface
> > should only use take the new ID, which means no mixed use of
> > /proc/$$/mountinfo and statmnt/listmnt.
>
> ... so I think that is indeed the better way of doing things. There's no
> need to encourage userspace to mix both identifiers.

Okay.

But I'd still leave the 2^32 offset for human confusion avoidance.

Thanks,
Miklos
Christian Brauner Sept. 14, 2023, 10:06 a.m. UTC | #5
> But I'd still leave the 2^32 offset for human confusion avoidance.

Sure, it's really not worth arguing about.
Ian Kent Sept. 15, 2023, 1:31 a.m. UTC | #6
On 14/9/23 17:43, Miklos Szeredi wrote:
> On Thu, 14 Sept 2023 at 11:36, Christian Brauner <brauner@kernel.org> wrote:
>>> Yes, one concern is that humans confuse the old and the new ID.
>>>
>>> I also think it makes sense to allow the new interfaces to look up the
>>> mount based on either the old or the new ID.   But I could be wrong
>> Hm, mount id recycling may happen so quickly that for service restarts
>> with a lot of mounts this becomes mostly useless...
> Agreed.  The old ID is mostly useful for human interaction.
>
>>> there, since that might encourage bad code.  Maybe the new interface
>>> should only use take the new ID, which means no mixed use of
>>> /proc/$$/mountinfo and statmnt/listmnt.
>> ... so I think that is indeed the better way of doing things. There's no
>> need to encourage userspace to mix both identifiers.
> Okay.

I think having both is leaving more opportunity for confusion and the new

mount id has a different name so I think start the move to using that now

and only allow the new one for lookups.


>
> But I'd still leave the 2^32 offset for human confusion avoidance.

Yep, agreed.


Ian
diff mbox series

Patch

diff --git a/fs/mount.h b/fs/mount.h
index 130c07c2f8d2..a14f762b3f29 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -72,7 +72,8 @@  struct mount {
 	struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
 	__u32 mnt_fsnotify_mask;
 #endif
-	int mnt_id;			/* mount identifier */
+	int mnt_id;			/* mount identifier, reused */
+	u64 mnt_id_unique;		/* mount ID unique until reboot */
 	int mnt_group_id;		/* peer group identifier */
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	struct hlist_head mnt_pins;
diff --git a/fs/namespace.c b/fs/namespace.c
index e157efc54023..de47c5f66e17 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -68,6 +68,9 @@  static u64 event;
 static DEFINE_IDA(mnt_id_ida);
 static DEFINE_IDA(mnt_group_ida);
 
+/* Don't allow confusion with mount ID allocated wit IDA */
+static atomic64_t mnt_id_ctr = ATOMIC64_INIT(1ULL << 32);
+
 static struct hlist_head *mount_hashtable __read_mostly;
 static struct hlist_head *mountpoint_hashtable __read_mostly;
 static struct kmem_cache *mnt_cache __read_mostly;
@@ -131,6 +134,7 @@  static int mnt_alloc_id(struct mount *mnt)
 	if (res < 0)
 		return res;
 	mnt->mnt_id = res;
+	mnt->mnt_id_unique = atomic64_inc_return(&mnt_id_ctr);
 	return 0;
 }
 
diff --git a/fs/stat.c b/fs/stat.c
index 6822ac77aec2..46d901b6b2de 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -280,8 +280,13 @@  static int vfs_statx(int dfd, struct filename *filename, int flags,
 
 	error = vfs_getattr(&path, stat, request_mask, flags);
 
-	stat->mnt_id = real_mount(path.mnt)->mnt_id;
-	stat->result_mask |= STATX_MNT_ID;
+	if (request_mask & STATX_MNT_ID_UNIQUE) {
+		stat->mnt_id = real_mount(path.mnt)->mnt_id_unique;
+		stat->result_mask |= STATX_MNT_ID_UNIQUE;
+	} else {
+		stat->mnt_id = real_mount(path.mnt)->mnt_id;
+		stat->result_mask |= STATX_MNT_ID;
+	}
 
 	if (path.mnt->mnt_root == path.dentry)
 		stat->attributes |= STATX_ATTR_MOUNT_ROOT;
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 7cab2c65d3d7..2f2ee82d5517 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -154,6 +154,7 @@  struct statx {
 #define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
 #define STATX_MNT_ID		0x00001000U	/* Got stx_mnt_id */
 #define STATX_DIOALIGN		0x00002000U	/* Want/got direct I/O alignment info */
+#define STATX_MNT_ID_UNIQUE	0x00004000U	/* Want/got extended stx_mount_id */
 
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */