diff mbox series

[2/3] vfs: Use the mounts_to_id array to do /proc/mounts and co.

Message ID 161581007628.2850696.11692651942358302102.stgit@warthog.procyon.org.uk (mailing list archive)
State New
Headers show
Series vfs: Use an xarray instead of inserted bookmarks to scan mount list | expand

Commit Message

David Howells March 15, 2021, 12:07 p.m. UTC
Use the mounts_to_id xarray added to the mount namespace to perform
iteration over the mounts in a namespace on behalf of /proc/mounts and
similar.

Since it doesn't trawl a standard list_head, but rather uses xarray, this
could be done under the RCU read lock only.  To do this, we would need to
hide mounts that are in the process of being inserted into the tree by
marking them in the xarray itself or using a mount flag.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Alexander Viro <viro@zeniv.linux.org.uk>
cc: Miklos Szeredi <miklos@szeredi.hu>
cc: Matthew Wilcox <willy@infradead.org>
---

 fs/mount.h          |    2 +-
 fs/namespace.c      |   40 +++++++++++++++++-----------------------
 fs/proc_namespace.c |    3 ---
 3 files changed, 18 insertions(+), 27 deletions(-)

Comments

Matthew Wilcox (Oracle) March 15, 2021, 12:54 p.m. UTC | #1
On Mon, Mar 15, 2021 at 12:07:56PM +0000, David Howells wrote:
> Use the mounts_to_id xarray added to the mount namespace to perform

You called it mounts_by_id in the last patch ...

> Since it doesn't trawl a standard list_head, but rather uses xarray, this
> could be done under the RCU read lock only.  To do this, we would need to
> hide mounts that are in the process of being inserted into the tree by
> marking them in the xarray itself or using a mount flag.

>  /* iterator; we want it to have access to namespace_sem, thus here... */
>  static void *m_start(struct seq_file *m, loff_t *pos)
>  {
> -	struct proc_mounts *p = m->private;
> -	struct list_head *prev;
> +	struct proc_mounts *state = m->private;
> +	void *entry;
>  
>  	down_read(&namespace_sem);
> -	if (!*pos) {
> -		prev = &p->ns->list;
> -	} else {
> -		prev = &p->cursor.mnt_list;
> +	state->xas = (struct xa_state) __XA_STATE(&state->ns->mounts_by_id, *pos, 0, 0);
>  
> -		/* Read after we'd reached the end? */
> -		if (list_empty(prev))
> -			return NULL;
> -	}
> +	entry = xas_find(&state->xas, ULONG_MAX);

I know you haven't enabled enough debugging because this will assert
that either the RCU read lock or the xa_lock is held to prevent xa_nodes
from disappearing underneath us.

Why do you want to use an xa_state for this?  This is /proc, so efficiency
isn't the highest priority.  I'd just use xa_find(), and then you don't
need to care about an xa_state or locking -- it handles taking the rcu
read lock for you.

> +	while (entry && xas_invalid(entry))

I've never seen anybody make that mistake before.  Good one.  Not sure
if there's anything I can do to prevent it in future.

> +		entry = xas_next_entry(&state->xas, ULONG_MAX);
diff mbox series

Patch

diff --git a/fs/mount.h b/fs/mount.h
index 455f4d293a65..114e7d603995 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -130,7 +130,7 @@  struct proc_mounts {
 	struct mnt_namespace *ns;
 	struct path root;
 	int (*show)(struct seq_file *, struct vfsmount *);
-	struct mount cursor;
+	struct xa_state xas;
 };
 
 extern const struct seq_operations mounts_op;
diff --git a/fs/namespace.c b/fs/namespace.c
index 5c9bcaeac4de..d19fde0654f7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1334,6 +1334,7 @@  struct vfsmount *mnt_clone_internal(const struct path *path)
 }
 
 #ifdef CONFIG_PROC_FS
+#if 0
 static struct mount *mnt_list_next(struct mnt_namespace *ns,
 				   struct list_head *p)
 {
@@ -1351,47 +1352,40 @@  static struct mount *mnt_list_next(struct mnt_namespace *ns,
 
 	return ret;
 }
+#endif
 
 /* iterator; we want it to have access to namespace_sem, thus here... */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
-	struct proc_mounts *p = m->private;
-	struct list_head *prev;
+	struct proc_mounts *state = m->private;
+	void *entry;
 
 	down_read(&namespace_sem);
-	if (!*pos) {
-		prev = &p->ns->list;
-	} else {
-		prev = &p->cursor.mnt_list;
+	state->xas = (struct xa_state) __XA_STATE(&state->ns->mounts_by_id, *pos, 0, 0);
 
-		/* Read after we'd reached the end? */
-		if (list_empty(prev))
-			return NULL;
-	}
+	entry = xas_find(&state->xas, ULONG_MAX);
+	while (entry && xas_invalid(entry))
+		entry = xas_next_entry(&state->xas, ULONG_MAX);
 
-	return mnt_list_next(p->ns, prev);
+	return entry;
 }
 
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct proc_mounts *p = m->private;
+	struct proc_mounts *state = m->private;
 	struct mount *mnt = v;
+	void *entry;
+
+	*pos = mnt->mnt_id + 1;
+	entry = xas_next_entry(&state->xas, ULONG_MAX);
+	while (entry && xas_invalid(entry))
+		entry = xas_next_entry(&state->xas, ULONG_MAX);
 
-	++*pos;
-	return mnt_list_next(p->ns, &mnt->mnt_list);
+	return entry;
 }
 
 static void m_stop(struct seq_file *m, void *v)
 {
-	struct proc_mounts *p = m->private;
-	struct mount *mnt = v;
-
-	lock_ns_list(p->ns);
-	if (mnt)
-		list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
-	else
-		list_del_init(&p->cursor.mnt_list);
-	unlock_ns_list(p->ns);
 	up_read(&namespace_sem);
 }
 
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 392ef5162655..9ae07f1904e6 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -283,8 +283,6 @@  static int mounts_open_common(struct inode *inode, struct file *file,
 	p->ns = ns;
 	p->root = root;
 	p->show = show;
-	INIT_LIST_HEAD(&p->cursor.mnt_list);
-	p->cursor.mnt.mnt_flags = MNT_CURSOR;
 
 	return 0;
 
@@ -301,7 +299,6 @@  static int mounts_release(struct inode *inode, struct file *file)
 	struct seq_file *m = file->private_data;
 	struct proc_mounts *p = m->private;
 	path_put(&p->root);
-	mnt_cursor_del(p->ns, &p->cursor);
 	put_mnt_ns(p->ns);
 	return seq_release_private(inode, file);
 }