diff mbox series

[v10,4/8,RFC] Allow atomic_open() on positive dentry (w/o O_CREAT)

Message ID 20231023183035.11035-5-bschubert@ddn.com (mailing list archive)
State New, archived
Headers show
Series fuse: full atomic open and atomic-open-revalidate | expand

Commit Message

Bernd Schubert Oct. 23, 2023, 6:30 p.m. UTC
Previous patch allowed atomic-open on a positive dentry when
O_CREAT was set (in lookup_open). This adds in atomic-open
when O_CREAT is not set.

Code wise it would be possible to just drop the dentry in
open_last_lookups and then fall through to lookup_open.
But then this would add some overhead for dentry drop,
re-lookup and actually also call into d_revalidate.
So as suggested by Miklos, this adds a helper function
(atomic_revalidate_open) to immediately open the dentry
with atomic_open.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Dharmendra Singh <dsingh@ddn.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
---
 fs/namei.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 3 deletions(-)

Comments

Bernd Schubert Oct. 24, 2023, 1:46 p.m. UTC | #1
On 10/23/23 20:30, Bernd Schubert wrote:
> Previous patch allowed atomic-open on a positive dentry when
> O_CREAT was set (in lookup_open). This adds in atomic-open
> when O_CREAT is not set.
> 
> Code wise it would be possible to just drop the dentry in
> open_last_lookups and then fall through to lookup_open.
> But then this would add some overhead for dentry drop,
> re-lookup and actually also call into d_revalidate.
> So as suggested by Miklos, this adds a helper function
> (atomic_revalidate_open) to immediately open the dentry
> with atomic_open.
> 
> Signed-off-by: Bernd Schubert <bschubert@ddn.com>
> Cc: Miklos Szeredi <miklos@szeredi.hu>
> Cc: Dharmendra Singh <dsingh@ddn.com>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Amir Goldstein <amir73il@gmail.com>
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> Cc: linux-fsdevel@vger.kernel.org
> ---
>   fs/namei.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++---
>   1 file changed, 63 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/namei.c b/fs/namei.c
> index ff913e6b12b4..5e2d569ffe38 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -1614,10 +1614,11 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
>   }
>   EXPORT_SYMBOL(lookup_one_qstr_excl);
>   
> -static struct dentry *lookup_fast(struct nameidata *nd)
> +static struct dentry *lookup_fast(struct nameidata *nd, bool *atomic_revalidate)
>   {
>   	struct dentry *dentry, *parent = nd->path.dentry;
>   	int status = 1;
> +	*atomic_revalidate = false;
>   
>   	/*
>   	 * Rename seqlock is not required here because in the off chance
> @@ -1659,6 +1660,10 @@ static struct dentry *lookup_fast(struct nameidata *nd)
>   		dput(dentry);
>   		return ERR_PTR(status);
>   	}
> +
> +	if (status == D_REVALIDATE_ATOMIC)
> +		*atomic_revalidate = true;
> +
>   	return dentry;
>   }
>   
> @@ -1984,6 +1989,7 @@ static const char *handle_dots(struct nameidata *nd, int type)
>   static const char *walk_component(struct nameidata *nd, int flags)
>   {
>   	struct dentry *dentry;
> +	bool atomic_revalidate;
>   	/*
>   	 * "." and ".." are special - ".." especially so because it has
>   	 * to be able to know about the current root directory and
> @@ -1994,7 +2000,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
>   			put_link(nd);
>   		return handle_dots(nd, nd->last_type);
>   	}
> -	dentry = lookup_fast(nd);
> +	dentry = lookup_fast(nd, &atomic_revalidate);
>   	if (IS_ERR(dentry))
>   		return ERR_CAST(dentry);
>   	if (unlikely(!dentry)) {
> @@ -2002,6 +2008,9 @@ static const char *walk_component(struct nameidata *nd, int flags)
>   		if (IS_ERR(dentry))
>   			return ERR_CAST(dentry);
>   	}
> +
> +	WARN_ON_ONCE(atomic_revalidate);
> +
>   	if (!(flags & WALK_MORE) && nd->depth)
>   		put_link(nd);
>   	return step_into(nd, flags, dentry);
> @@ -3383,6 +3392,42 @@ static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
>   	return dentry;
>   }
>   
> +static struct dentry *atomic_revalidate_open(struct dentry *dentry,
> +					     struct nameidata *nd,
> +					     struct file *file,
> +					     const struct open_flags *op,
> +					     bool *got_write)
> +{
> +	struct mnt_idmap *idmap;

As kernel test robot noticed, idmap is unused in this function.
Al Viro Oct. 28, 2023, 4:46 a.m. UTC | #2
On Mon, Oct 23, 2023 at 08:30:31PM +0200, Bernd Schubert wrote:

> Previous patch allowed atomic-open on a positive dentry when
> O_CREAT was set (in lookup_open). This adds in atomic-open
> when O_CREAT is not set.
> 
> Code wise it would be possible to just drop the dentry in
> open_last_lookups and then fall through to lookup_open.
> But then this would add some overhead for dentry drop,
> re-lookup and actually also call into d_revalidate.
> So as suggested by Miklos, this adds a helper function
> (atomic_revalidate_open) to immediately open the dentry
> with atomic_open.
> 
> Signed-off-by: Bernd Schubert <bschubert@ddn.com>
> Cc: Miklos Szeredi <miklos@szeredi.hu>
> Cc: Dharmendra Singh <dsingh@ddn.com>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Amir Goldstein <amir73il@gmail.com>
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> Cc: linux-fsdevel@vger.kernel.org
> ---
>  fs/namei.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 63 insertions(+), 3 deletions(-)

This is bloody awful.
 
> diff --git a/fs/namei.c b/fs/namei.c
> index ff913e6b12b4..5e2d569ffe38 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -1614,10 +1614,11 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
>  }
>  EXPORT_SYMBOL(lookup_one_qstr_excl);
>  
> -static struct dentry *lookup_fast(struct nameidata *nd)
> +static struct dentry *lookup_fast(struct nameidata *nd, bool *atomic_revalidate)

Yechhh...  Note that absolute majority of calls will be nowhere near
the case when that atomic_revalidate thing might possibly be set.

>  {
>  	struct dentry *dentry, *parent = nd->path.dentry;
>  	int status = 1;
> +	*atomic_revalidate = false;
>  
>  	/*
>  	 * Rename seqlock is not required here because in the off chance
> @@ -1659,6 +1660,10 @@ static struct dentry *lookup_fast(struct nameidata *nd)
>  		dput(dentry);
>  		return ERR_PTR(status);
>  	}
> +
> +	if (status == D_REVALIDATE_ATOMIC)
> +		*atomic_revalidate = true;
> +
>  	return dentry;
>  }

 
> @@ -1984,6 +1989,7 @@ static const char *handle_dots(struct nameidata *nd, int type)
>  static const char *walk_component(struct nameidata *nd, int flags)
>  {
>  	struct dentry *dentry;
> +	bool atomic_revalidate;
>  	/*
>  	 * "." and ".." are special - ".." especially so because it has
>  	 * to be able to know about the current root directory and
> @@ -1994,7 +2000,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
>  			put_link(nd);
>  		return handle_dots(nd, nd->last_type);
>  	}
> -	dentry = lookup_fast(nd);
> +	dentry = lookup_fast(nd, &atomic_revalidate);
>  	if (IS_ERR(dentry))
>  		return ERR_CAST(dentry);
>  	if (unlikely(!dentry)) {
> @@ -2002,6 +2008,9 @@ static const char *walk_component(struct nameidata *nd, int flags)
>  		if (IS_ERR(dentry))
>  			return ERR_CAST(dentry);
>  	}
> +
> +	WARN_ON_ONCE(atomic_revalidate);
> +
>  	if (!(flags & WALK_MORE) && nd->depth)
>  		put_link(nd);
>  	return step_into(nd, flags, dentry);
> @@ -3383,6 +3392,42 @@ static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
>  	return dentry;
>  }
  
> +static struct dentry *atomic_revalidate_open(struct dentry *dentry,
> +					     struct nameidata *nd,
> +					     struct file *file,
> +					     const struct open_flags *op,
> +					     bool *got_write)
> +{
> +	struct mnt_idmap *idmap;
> +	struct dentry *dir = nd->path.dentry;
> +	struct inode *dir_inode = dir->d_inode;
> +	int open_flag = op->open_flag;
> +	umode_t mode = op->mode;
> +
> +	if (unlikely(IS_DEADDIR(dir_inode)))
> +		return ERR_PTR(-ENOENT);

What's the point of doing that check when there's nothing to stop
directory from being removed right under you?  Note that similar
check in lookup_open() is done after the caller has locked the
damn thing.

> +	file->f_mode &= ~FMODE_CREATED;
> +
> +	if (WARN_ON_ONCE(open_flag & O_CREAT))
> +		return ERR_PTR(-EINVAL);

Really.  With the only caller being under

        int open_flag = op->open_flag;
	...
	if (!(open_flag & O_CREAT)) {

> +
> +	if (open_flag & (O_TRUNC | O_WRONLY | O_RDWR))
> +		*got_write = !mnt_want_write(nd->path.mnt);
> +	else
> +		*got_write = false;
> +
> +	if (!*got_write)
> +		open_flag &= ~O_TRUNC;
> +
> +	inode_lock_shared(dir->d_inode);
> +	dentry = atomic_open(nd, dentry, file, open_flag, mode);
> +	inode_unlock_shared(dir->d_inode);

What will happen if you get that thing called with NULL ->i_op->atomic_open()?
> +
> +	return dentry;
> +
> +}
> +
>  /*
>   * Look up and maybe create and open the last component.
>   *
> @@ -3527,12 +3572,26 @@ static const char *open_last_lookups(struct nameidata *nd,
>  	}
>  
>  	if (!(open_flag & O_CREAT)) {
> +		bool atomic_revalidate;
> +
>  		if (nd->last.name[nd->last.len])
>  			nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
>  		/* we _can_ be in RCU mode here */
> -		dentry = lookup_fast(nd);
> +		dentry = lookup_fast(nd, &atomic_revalidate);
>  		if (IS_ERR(dentry))
>  			return ERR_CAST(dentry);
> +		if (dentry && unlikely(atomic_revalidate)) {
> +			/* The file system shall not claim to support atomic
> +			 * revalidate in RCU mode
> +			 */
> +			if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU)) {
> +				dput(dentry);

dput() under rcu_read_lock()?  For one thing, it's completely wrong
as far as recovery strategy goes; we do *NOT* grab references under
LOOKUP_RCU, so whatever we got here is not a counting reference.
What's more, your comment is actively misleading - you set that
atomic_revalidate thing in the very end of lookup_fast() and
there is no way to get there with LOOKUP_RCU.  Look:

static struct dentry *lookup_fast(struct nameidata *nd)
{
        ...
        if (nd->flags & LOOKUP_RCU) {
		...
                status = d_revalidate(dentry, nd->flags);
                if (likely(status > 0))
                        return dentry;
That's where we leave if we'd found and successfully
revalidated a dentry in RCU mode.
                if (!try_to_unlazy_next(nd, dentry))
                        return ERR_PTR(-ECHILD);
... and this is where we'd already left the RCU mode.
                if (status == -ECHILD)
                        /* we'd been told to redo it in non-rcu mode */
                        status = d_revalidate(dentry, nd->flags);
	} else {
here we hadn't been in RCU mode to start with and we *never*
switch from non-RCU to RCU.
		...
	}
	// and here you set that flag of yours.

So no matter what your ->d_revalidate() returns, you are
not going to see atomic_... shite set in RCU mode.  It's not
a matter of filesystem behaviour, contrary to your comment.

> +				return ERR_PTR(-ECHILD);
> +			}
> +			dentry = atomic_revalidate_open(dentry, nd, file, op,
> +							&got_write);
> +			goto drop_write;
> +		}
>  		if (likely(dentry))
>  			goto finish_lookup;
>  
> @@ -3569,6 +3628,7 @@ static const char *open_last_lookups(struct nameidata *nd,
>  	else
>  		inode_unlock_shared(dir->d_inode);
>  
> +drop_write:
>  	if (got_write)
>  		mnt_drop_write(nd->path.mnt);

That helper of yours is a bad idea.  Control flow in that area is
messy and hard to follow as it is, and we had _MANY_ bugs stemming
from that.  You are making it harder to follow; this stuff really
should've gone into lookup_open().

And I really hate that 'atomic_revalidate' thing of yours.
Especially since the reader gets to do major head-scratching about
the WARN_ON_ONCE(atomic_revalidate) in walk_component().  Takes
guessing that it's probably a matter of LOOKUP_OPEN *not* being
there in walk_component() and always being there in the
open_last_lookups() (we never get there for O_PATH opens, so
op->intent will have it).  So at a guess you mean to have
->d_revalidate() only return that magical value if LOOKUP_OPEN
is present in flags.  Which seems to be the case, judging by
the subsequent patches in the series.

_IF_ we want to go in that direction, at least make it
	if (status == THAT_MAGIC_VALUE) {
		if (unlikely(!atomic_revalidate)) {
			if (WARN_ON_ONCE(nd->flags & LOOKUP_OPEN))
				// insane caller
				;
			else
				// insane ->d_revalidate() instance
				WARN_ON_ONCE(1);
		} else {
			*atomic_revalidate = true;
		}
	}
and pass it NULL when calling it from walk_component().

Again, I'm not at all sure it's a good idea to start with.  Hard to
tell without seeing how it'll look after massage that would move
that new call of atomic_open() down into lookup_open().
diff mbox series

Patch

diff --git a/fs/namei.c b/fs/namei.c
index ff913e6b12b4..5e2d569ffe38 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1614,10 +1614,11 @@  struct dentry *lookup_one_qstr_excl(const struct qstr *name,
 }
 EXPORT_SYMBOL(lookup_one_qstr_excl);
 
-static struct dentry *lookup_fast(struct nameidata *nd)
+static struct dentry *lookup_fast(struct nameidata *nd, bool *atomic_revalidate)
 {
 	struct dentry *dentry, *parent = nd->path.dentry;
 	int status = 1;
+	*atomic_revalidate = false;
 
 	/*
 	 * Rename seqlock is not required here because in the off chance
@@ -1659,6 +1660,10 @@  static struct dentry *lookup_fast(struct nameidata *nd)
 		dput(dentry);
 		return ERR_PTR(status);
 	}
+
+	if (status == D_REVALIDATE_ATOMIC)
+		*atomic_revalidate = true;
+
 	return dentry;
 }
 
@@ -1984,6 +1989,7 @@  static const char *handle_dots(struct nameidata *nd, int type)
 static const char *walk_component(struct nameidata *nd, int flags)
 {
 	struct dentry *dentry;
+	bool atomic_revalidate;
 	/*
 	 * "." and ".." are special - ".." especially so because it has
 	 * to be able to know about the current root directory and
@@ -1994,7 +2000,7 @@  static const char *walk_component(struct nameidata *nd, int flags)
 			put_link(nd);
 		return handle_dots(nd, nd->last_type);
 	}
-	dentry = lookup_fast(nd);
+	dentry = lookup_fast(nd, &atomic_revalidate);
 	if (IS_ERR(dentry))
 		return ERR_CAST(dentry);
 	if (unlikely(!dentry)) {
@@ -2002,6 +2008,9 @@  static const char *walk_component(struct nameidata *nd, int flags)
 		if (IS_ERR(dentry))
 			return ERR_CAST(dentry);
 	}
+
+	WARN_ON_ONCE(atomic_revalidate);
+
 	if (!(flags & WALK_MORE) && nd->depth)
 		put_link(nd);
 	return step_into(nd, flags, dentry);
@@ -3383,6 +3392,42 @@  static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	return dentry;
 }
 
+static struct dentry *atomic_revalidate_open(struct dentry *dentry,
+					     struct nameidata *nd,
+					     struct file *file,
+					     const struct open_flags *op,
+					     bool *got_write)
+{
+	struct mnt_idmap *idmap;
+	struct dentry *dir = nd->path.dentry;
+	struct inode *dir_inode = dir->d_inode;
+	int open_flag = op->open_flag;
+	umode_t mode = op->mode;
+
+	if (unlikely(IS_DEADDIR(dir_inode)))
+		return ERR_PTR(-ENOENT);
+
+	file->f_mode &= ~FMODE_CREATED;
+
+	if (WARN_ON_ONCE(open_flag & O_CREAT))
+		return ERR_PTR(-EINVAL);
+
+	if (open_flag & (O_TRUNC | O_WRONLY | O_RDWR))
+		*got_write = !mnt_want_write(nd->path.mnt);
+	else
+		*got_write = false;
+
+	if (!*got_write)
+		open_flag &= ~O_TRUNC;
+
+	inode_lock_shared(dir->d_inode);
+	dentry = atomic_open(nd, dentry, file, open_flag, mode);
+	inode_unlock_shared(dir->d_inode);
+
+	return dentry;
+
+}
+
 /*
  * Look up and maybe create and open the last component.
  *
@@ -3527,12 +3572,26 @@  static const char *open_last_lookups(struct nameidata *nd,
 	}
 
 	if (!(open_flag & O_CREAT)) {
+		bool atomic_revalidate;
+
 		if (nd->last.name[nd->last.len])
 			nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
 		/* we _can_ be in RCU mode here */
-		dentry = lookup_fast(nd);
+		dentry = lookup_fast(nd, &atomic_revalidate);
 		if (IS_ERR(dentry))
 			return ERR_CAST(dentry);
+		if (dentry && unlikely(atomic_revalidate)) {
+			/* The file system shall not claim to support atomic
+			 * revalidate in RCU mode
+			 */
+			if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU)) {
+				dput(dentry);
+				return ERR_PTR(-ECHILD);
+			}
+			dentry = atomic_revalidate_open(dentry, nd, file, op,
+							&got_write);
+			goto drop_write;
+		}
 		if (likely(dentry))
 			goto finish_lookup;
 
@@ -3569,6 +3628,7 @@  static const char *open_last_lookups(struct nameidata *nd,
 	else
 		inode_unlock_shared(dir->d_inode);
 
+drop_write:
 	if (got_write)
 		mnt_drop_write(nd->path.mnt);