diff mbox series

[10/19] VFS: introduce inode flags to report locking needs for directory ops

Message ID 20250206054504.2950516-11-neilb@suse.de
State New
Headers show
Series RFC: Allow concurrent and async changes in a directory | expand

Commit Message

NeilBrown Feb. 6, 2025, 5:42 a.m. UTC
If a filesystem supports _async ops for some directory ops we can take a
"shared" lock on i_rwsem otherwise we must take an "exclusive" lock.  As
the filesystem may support some async ops but not others we need to
easily determine which.

With this patch we group the ops into 4 groups that are likely be
supported together:

CREATE: create, link, mkdir, mknod
REMOVE: rmdir, unlink
RENAME: rename
OPEN: atomic_open, create

and set S_ASYNC_XXX for each when the inode in initialised.

We also add a LOOKUP_REMOVE intent flag which will be used by locking
interfaces to help know which group is being used.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/dcache.c           | 24 ++++++++++++++++++++++++
 include/linux/fs.h    |  5 +++++
 include/linux/namei.h |  5 +++--
 3 files changed, 32 insertions(+), 2 deletions(-)

Comments

Christian Brauner Feb. 6, 2025, 1:22 p.m. UTC | #1
On Thu, Feb 06, 2025 at 04:42:47PM +1100, NeilBrown wrote:
> If a filesystem supports _async ops for some directory ops we can take a
> "shared" lock on i_rwsem otherwise we must take an "exclusive" lock.  As
> the filesystem may support some async ops but not others we need to
> easily determine which.
> 
> With this patch we group the ops into 4 groups that are likely be
> supported together:
> 
> CREATE: create, link, mkdir, mknod
> REMOVE: rmdir, unlink
> RENAME: rename
> OPEN: atomic_open, create
> 
> and set S_ASYNC_XXX for each when the inode in initialised.
> 
> We also add a LOOKUP_REMOVE intent flag which will be used by locking
> interfaces to help know which group is being used.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/dcache.c           | 24 ++++++++++++++++++++++++
>  include/linux/fs.h    |  5 +++++
>  include/linux/namei.h |  5 +++--
>  3 files changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/dcache.c b/fs/dcache.c
> index e49607d00d2d..37c0f655166d 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -384,6 +384,27 @@ static inline void __d_set_inode_and_type(struct dentry *dentry,
>  	smp_store_release(&dentry->d_flags, flags);
>  }
>  
> +static void set_inode_flags(struct inode *inode)
> +{
> +	const struct inode_operations *i_op = inode->i_op;
> +
> +	lockdep_assert_held(&inode->i_lock);
> +	if ((i_op->create_async || !i_op->create) &&
> +	    (i_op->link_async || !i_op->link) &&
> +	    (i_op->symlink_async || !i_op->symlink) &&
> +	    (i_op->mkdir_async || !i_op->mkdir) &&
> +	    (i_op->mknod_async || !i_op->mknod))
> +		inode->i_flags |= S_ASYNC_CREATE;
> +	if ((i_op->unlink_async || !i_op->unlink) &&
> +	    (i_op->mkdir_async || !i_op->mkdir))
> +		inode->i_flags |= S_ASYNC_REMOVE;
> +	if (i_op->rename_async)
> +		inode->i_flags |= S_ASYNC_RENAME;
> +	if (i_op->atomic_open_async ||
> +	    (!i_op->atomic_open && i_op->create_async))
> +		inode->i_flags |= S_ASYNC_OPEN;
> +}

I think this is unpleasant. As I said we should fold _async into the
normal methods. Then we can add:

diff --git a/include/linux/fs.h b/include/linux/fs.h
index be3ad155ec9f..1d19f72448fc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2186,6 +2186,7 @@ int wrap_directory_iterator(struct file *, struct dir_context *,
        { return wrap_directory_iterator(file, ctx, x); }

 struct inode_operations {
+       iop_flags_t iop_flags;
        struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
        const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
        int (*permission) (struct mnt_idmap *, struct inode *, int);

which is similar to what I did for

struct file_operations {
        struct module *owner;
        fop_flags_t fop_flags;

and introduce

IOP_ASYNC_CREATE
IOP_ASYNC_OPEN

etc and then filesystems can just do:

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index df9669d4ded7..90c7aeb49466 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -10859,6 +10859,7 @@ static void nfs4_disable_swap(struct inode *inode)
 }

 static const struct inode_operations nfs4_dir_inode_operations = {
+       .iop_flags      = IOP_ASYNC_CREATE | IOP_ASYNC_OPEN,
        .create         = nfs_create,
        .lookup         = nfs_lookup,
        .atomic_open    = nfs_atomic_open,

and then you can raise S_ASYNC_OPEN and so on based on the flags, not
the individual methods.

> +
>  static inline void __d_clear_type_and_inode(struct dentry *dentry)
>  {
>  	unsigned flags = READ_ONCE(dentry->d_flags);
> @@ -1893,6 +1914,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
>  	raw_write_seqcount_begin(&dentry->d_seq);
>  	__d_set_inode_and_type(dentry, inode, add_flags);
>  	raw_write_seqcount_end(&dentry->d_seq);
> +	set_inode_flags(inode);
>  	fsnotify_update_flags(dentry);
>  	spin_unlock(&dentry->d_lock);
>  }
> @@ -1999,6 +2021,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
>  
>  		spin_lock(&new->d_lock);
>  		__d_set_inode_and_type(new, inode, add_flags);
> +		set_inode_flags(inode);
>  		hlist_add_head(&new->d_u.d_alias, &inode->i_dentry);
>  		if (!disconnected) {
>  			hlist_bl_lock(&sb->s_roots);
> @@ -2701,6 +2724,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode)
>  		raw_write_seqcount_begin(&dentry->d_seq);
>  		__d_set_inode_and_type(dentry, inode, add_flags);
>  		raw_write_seqcount_end(&dentry->d_seq);
> +		set_inode_flags(inode);
>  		fsnotify_update_flags(dentry);
>  	}
>  	__d_rehash(dentry);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index e414400c2487..9a9282fef347 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2361,6 +2361,11 @@ struct super_operations {
>  #define S_VERITY	(1 << 16) /* Verity file (using fs/verity/) */
>  #define S_KERNEL_FILE	(1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
>  
> +#define S_ASYNC_CREATE	BIT(18)	/* create, link, symlink, mkdir, mknod all _async */
> +#define S_ASYNC_REMOVE	BIT(19)	/* unlink, mkdir both _async */
> +#define S_ASYNC_RENAME	BIT(20) /* rename_async supported */
> +#define S_ASYNC_OPEN	BIT(21) /* atomic_open_async or create_async supported */
> +
>  /*
>   * Note that nosuid etc flags are inode-specific: setting some file-system
>   * flags just means all the inodes inherit those flags by default. It might be
> diff --git a/include/linux/namei.h b/include/linux/namei.h
> index 76c587a5ec3a..72e351640406 100644
> --- a/include/linux/namei.h
> +++ b/include/linux/namei.h
> @@ -40,10 +40,11 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
>  #define LOOKUP_CREATE		BIT(17)	/* ... in object creation */
>  #define LOOKUP_EXCL		BIT(18)	/* ... in target must not exist */
>  #define LOOKUP_RENAME_TARGET	BIT(19)	/* ... in destination of rename() */
> +#define LOOKUP_REMOVE		BIT(20)	/* ... in target of object removal */
>  
>  #define LOOKUP_INTENT_FLAGS	(LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_EXCL |	\
> -				 LOOKUP_RENAME_TARGET)
> -/* 4 spare bits for intent */
> +				 LOOKUP_RENAME_TARGET | LOOKUP_REMOVE)
> +/* 3 spare bits for intent */
>  
>  /* Scoping flags for lookup. */
>  #define LOOKUP_NO_SYMLINKS	BIT(24) /* No symlink crossing. */
> -- 
> 2.47.1
>
NeilBrown Feb. 7, 2025, 2:01 a.m. UTC | #2
On Fri, 07 Feb 2025, Christian Brauner wrote:
> On Thu, Feb 06, 2025 at 04:42:47PM +1100, NeilBrown wrote:
> > If a filesystem supports _async ops for some directory ops we can take a
> > "shared" lock on i_rwsem otherwise we must take an "exclusive" lock.  As
> > the filesystem may support some async ops but not others we need to
> > easily determine which.
> > 
> > With this patch we group the ops into 4 groups that are likely be
> > supported together:
> > 
> > CREATE: create, link, mkdir, mknod
> > REMOVE: rmdir, unlink
> > RENAME: rename
> > OPEN: atomic_open, create
> > 
> > and set S_ASYNC_XXX for each when the inode in initialised.
> > 
> > We also add a LOOKUP_REMOVE intent flag which will be used by locking
> > interfaces to help know which group is being used.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/dcache.c           | 24 ++++++++++++++++++++++++
> >  include/linux/fs.h    |  5 +++++
> >  include/linux/namei.h |  5 +++--
> >  3 files changed, 32 insertions(+), 2 deletions(-)
> > 
> > diff --git a/fs/dcache.c b/fs/dcache.c
> > index e49607d00d2d..37c0f655166d 100644
> > --- a/fs/dcache.c
> > +++ b/fs/dcache.c
> > @@ -384,6 +384,27 @@ static inline void __d_set_inode_and_type(struct dentry *dentry,
> >  	smp_store_release(&dentry->d_flags, flags);
> >  }
> >  
> > +static void set_inode_flags(struct inode *inode)
> > +{
> > +	const struct inode_operations *i_op = inode->i_op;
> > +
> > +	lockdep_assert_held(&inode->i_lock);
> > +	if ((i_op->create_async || !i_op->create) &&
> > +	    (i_op->link_async || !i_op->link) &&
> > +	    (i_op->symlink_async || !i_op->symlink) &&
> > +	    (i_op->mkdir_async || !i_op->mkdir) &&
> > +	    (i_op->mknod_async || !i_op->mknod))
> > +		inode->i_flags |= S_ASYNC_CREATE;
> > +	if ((i_op->unlink_async || !i_op->unlink) &&
> > +	    (i_op->mkdir_async || !i_op->mkdir))
> > +		inode->i_flags |= S_ASYNC_REMOVE;
> > +	if (i_op->rename_async)
> > +		inode->i_flags |= S_ASYNC_RENAME;
> > +	if (i_op->atomic_open_async ||
> > +	    (!i_op->atomic_open && i_op->create_async))
> > +		inode->i_flags |= S_ASYNC_OPEN;
> > +}
> 
> I think this is unpleasant. As I said we should fold _async into the
> normal methods. Then we can add:
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index be3ad155ec9f..1d19f72448fc 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2186,6 +2186,7 @@ int wrap_directory_iterator(struct file *, struct dir_context *,
>         { return wrap_directory_iterator(file, ctx, x); }
> 
>  struct inode_operations {
> +       iop_flags_t iop_flags;
>         struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
>         const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
>         int (*permission) (struct mnt_idmap *, struct inode *, int);
> 
> which is similar to what I did for
> 
> struct file_operations {
>         struct module *owner;
>         fop_flags_t fop_flags;
> 
> and introduce
> 
> IOP_ASYNC_CREATE
> IOP_ASYNC_OPEN
> 

Ahh - I see where you are going.  Interesting.
The iop_flags effectively provides versioning for the functions so we
don't have to embed the version in the name.  That would work.

I guess we would handle the mkdir change by changing every current mkdir
to return ERR_PTR() of the current return value and the vfs_mkdir_xx
caller checks if that is NULL and the original dentry is still negative,
and then performs the lookup.

Thanks,
NeilBrown


> etc and then filesystems can just do:
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index df9669d4ded7..90c7aeb49466 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -10859,6 +10859,7 @@ static void nfs4_disable_swap(struct inode *inode)
>  }
> 
>  static const struct inode_operations nfs4_dir_inode_operations = {
> +       .iop_flags      = IOP_ASYNC_CREATE | IOP_ASYNC_OPEN,
>         .create         = nfs_create,
>         .lookup         = nfs_lookup,
>         .atomic_open    = nfs_atomic_open,
> 
> and then you can raise S_ASYNC_OPEN and so on based on the flags, not
> the individual methods.
> 
> > +
> >  static inline void __d_clear_type_and_inode(struct dentry *dentry)
> >  {
> >  	unsigned flags = READ_ONCE(dentry->d_flags);
> > @@ -1893,6 +1914,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
> >  	raw_write_seqcount_begin(&dentry->d_seq);
> >  	__d_set_inode_and_type(dentry, inode, add_flags);
> >  	raw_write_seqcount_end(&dentry->d_seq);
> > +	set_inode_flags(inode);
> >  	fsnotify_update_flags(dentry);
> >  	spin_unlock(&dentry->d_lock);
> >  }
> > @@ -1999,6 +2021,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
> >  
> >  		spin_lock(&new->d_lock);
> >  		__d_set_inode_and_type(new, inode, add_flags);
> > +		set_inode_flags(inode);
> >  		hlist_add_head(&new->d_u.d_alias, &inode->i_dentry);
> >  		if (!disconnected) {
> >  			hlist_bl_lock(&sb->s_roots);
> > @@ -2701,6 +2724,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode)
> >  		raw_write_seqcount_begin(&dentry->d_seq);
> >  		__d_set_inode_and_type(dentry, inode, add_flags);
> >  		raw_write_seqcount_end(&dentry->d_seq);
> > +		set_inode_flags(inode);
> >  		fsnotify_update_flags(dentry);
> >  	}
> >  	__d_rehash(dentry);
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index e414400c2487..9a9282fef347 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -2361,6 +2361,11 @@ struct super_operations {
> >  #define S_VERITY	(1 << 16) /* Verity file (using fs/verity/) */
> >  #define S_KERNEL_FILE	(1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
> >  
> > +#define S_ASYNC_CREATE	BIT(18)	/* create, link, symlink, mkdir, mknod all _async */
> > +#define S_ASYNC_REMOVE	BIT(19)	/* unlink, mkdir both _async */
> > +#define S_ASYNC_RENAME	BIT(20) /* rename_async supported */
> > +#define S_ASYNC_OPEN	BIT(21) /* atomic_open_async or create_async supported */
> > +
> >  /*
> >   * Note that nosuid etc flags are inode-specific: setting some file-system
> >   * flags just means all the inodes inherit those flags by default. It might be
> > diff --git a/include/linux/namei.h b/include/linux/namei.h
> > index 76c587a5ec3a..72e351640406 100644
> > --- a/include/linux/namei.h
> > +++ b/include/linux/namei.h
> > @@ -40,10 +40,11 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
> >  #define LOOKUP_CREATE		BIT(17)	/* ... in object creation */
> >  #define LOOKUP_EXCL		BIT(18)	/* ... in target must not exist */
> >  #define LOOKUP_RENAME_TARGET	BIT(19)	/* ... in destination of rename() */
> > +#define LOOKUP_REMOVE		BIT(20)	/* ... in target of object removal */
> >  
> >  #define LOOKUP_INTENT_FLAGS	(LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_EXCL |	\
> > -				 LOOKUP_RENAME_TARGET)
> > -/* 4 spare bits for intent */
> > +				 LOOKUP_RENAME_TARGET | LOOKUP_REMOVE)
> > +/* 3 spare bits for intent */
> >  
> >  /* Scoping flags for lookup. */
> >  #define LOOKUP_NO_SYMLINKS	BIT(24) /* No symlink crossing. */
> > -- 
> > 2.47.1
> > 
>
diff mbox series

Patch

diff --git a/fs/dcache.c b/fs/dcache.c
index e49607d00d2d..37c0f655166d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -384,6 +384,27 @@  static inline void __d_set_inode_and_type(struct dentry *dentry,
 	smp_store_release(&dentry->d_flags, flags);
 }
 
+static void set_inode_flags(struct inode *inode)
+{
+	const struct inode_operations *i_op = inode->i_op;
+
+	lockdep_assert_held(&inode->i_lock);
+	if ((i_op->create_async || !i_op->create) &&
+	    (i_op->link_async || !i_op->link) &&
+	    (i_op->symlink_async || !i_op->symlink) &&
+	    (i_op->mkdir_async || !i_op->mkdir) &&
+	    (i_op->mknod_async || !i_op->mknod))
+		inode->i_flags |= S_ASYNC_CREATE;
+	if ((i_op->unlink_async || !i_op->unlink) &&
+	    (i_op->mkdir_async || !i_op->mkdir))
+		inode->i_flags |= S_ASYNC_REMOVE;
+	if (i_op->rename_async)
+		inode->i_flags |= S_ASYNC_RENAME;
+	if (i_op->atomic_open_async ||
+	    (!i_op->atomic_open && i_op->create_async))
+		inode->i_flags |= S_ASYNC_OPEN;
+}
+
 static inline void __d_clear_type_and_inode(struct dentry *dentry)
 {
 	unsigned flags = READ_ONCE(dentry->d_flags);
@@ -1893,6 +1914,7 @@  static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 	raw_write_seqcount_begin(&dentry->d_seq);
 	__d_set_inode_and_type(dentry, inode, add_flags);
 	raw_write_seqcount_end(&dentry->d_seq);
+	set_inode_flags(inode);
 	fsnotify_update_flags(dentry);
 	spin_unlock(&dentry->d_lock);
 }
@@ -1999,6 +2021,7 @@  static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
 
 		spin_lock(&new->d_lock);
 		__d_set_inode_and_type(new, inode, add_flags);
+		set_inode_flags(inode);
 		hlist_add_head(&new->d_u.d_alias, &inode->i_dentry);
 		if (!disconnected) {
 			hlist_bl_lock(&sb->s_roots);
@@ -2701,6 +2724,7 @@  static inline void __d_add(struct dentry *dentry, struct inode *inode)
 		raw_write_seqcount_begin(&dentry->d_seq);
 		__d_set_inode_and_type(dentry, inode, add_flags);
 		raw_write_seqcount_end(&dentry->d_seq);
+		set_inode_flags(inode);
 		fsnotify_update_flags(dentry);
 	}
 	__d_rehash(dentry);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e414400c2487..9a9282fef347 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2361,6 +2361,11 @@  struct super_operations {
 #define S_VERITY	(1 << 16) /* Verity file (using fs/verity/) */
 #define S_KERNEL_FILE	(1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
 
+#define S_ASYNC_CREATE	BIT(18)	/* create, link, symlink, mkdir, mknod all _async */
+#define S_ASYNC_REMOVE	BIT(19)	/* unlink, mkdir both _async */
+#define S_ASYNC_RENAME	BIT(20) /* rename_async supported */
+#define S_ASYNC_OPEN	BIT(21) /* atomic_open_async or create_async supported */
+
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
  * flags just means all the inodes inherit those flags by default. It might be
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 76c587a5ec3a..72e351640406 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -40,10 +40,11 @@  enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
 #define LOOKUP_CREATE		BIT(17)	/* ... in object creation */
 #define LOOKUP_EXCL		BIT(18)	/* ... in target must not exist */
 #define LOOKUP_RENAME_TARGET	BIT(19)	/* ... in destination of rename() */
+#define LOOKUP_REMOVE		BIT(20)	/* ... in target of object removal */
 
 #define LOOKUP_INTENT_FLAGS	(LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_EXCL |	\
-				 LOOKUP_RENAME_TARGET)
-/* 4 spare bits for intent */
+				 LOOKUP_RENAME_TARGET | LOOKUP_REMOVE)
+/* 3 spare bits for intent */
 
 /* Scoping flags for lookup. */
 #define LOOKUP_NO_SYMLINKS	BIT(24) /* No symlink crossing. */