Message ID | 20250206054504.2950516-11-neilb@suse.de |
---|---|
State | New |
Headers | show |
Series | RFC: Allow concurrent and async changes in a directory | expand |
On Thu, Feb 06, 2025 at 04:42:47PM +1100, NeilBrown wrote: > If a filesystem supports _async ops for some directory ops we can take a > "shared" lock on i_rwsem otherwise we must take an "exclusive" lock. As > the filesystem may support some async ops but not others we need to > easily determine which. > > With this patch we group the ops into 4 groups that are likely be > supported together: > > CREATE: create, link, mkdir, mknod > REMOVE: rmdir, unlink > RENAME: rename > OPEN: atomic_open, create > > and set S_ASYNC_XXX for each when the inode in initialised. > > We also add a LOOKUP_REMOVE intent flag which will be used by locking > interfaces to help know which group is being used. > > Signed-off-by: NeilBrown <neilb@suse.de> > --- > fs/dcache.c | 24 ++++++++++++++++++++++++ > include/linux/fs.h | 5 +++++ > include/linux/namei.h | 5 +++-- > 3 files changed, 32 insertions(+), 2 deletions(-) > > diff --git a/fs/dcache.c b/fs/dcache.c > index e49607d00d2d..37c0f655166d 100644 > --- a/fs/dcache.c > +++ b/fs/dcache.c > @@ -384,6 +384,27 @@ static inline void __d_set_inode_and_type(struct dentry *dentry, > smp_store_release(&dentry->d_flags, flags); > } > > +static void set_inode_flags(struct inode *inode) > +{ > + const struct inode_operations *i_op = inode->i_op; > + > + lockdep_assert_held(&inode->i_lock); > + if ((i_op->create_async || !i_op->create) && > + (i_op->link_async || !i_op->link) && > + (i_op->symlink_async || !i_op->symlink) && > + (i_op->mkdir_async || !i_op->mkdir) && > + (i_op->mknod_async || !i_op->mknod)) > + inode->i_flags |= S_ASYNC_CREATE; > + if ((i_op->unlink_async || !i_op->unlink) && > + (i_op->mkdir_async || !i_op->mkdir)) > + inode->i_flags |= S_ASYNC_REMOVE; > + if (i_op->rename_async) > + inode->i_flags |= S_ASYNC_RENAME; > + if (i_op->atomic_open_async || > + (!i_op->atomic_open && i_op->create_async)) > + inode->i_flags |= S_ASYNC_OPEN; > +} I think this is unpleasant. As I said we should fold _async into the normal methods. Then we can add: diff --git a/include/linux/fs.h b/include/linux/fs.h index be3ad155ec9f..1d19f72448fc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2186,6 +2186,7 @@ int wrap_directory_iterator(struct file *, struct dir_context *, { return wrap_directory_iterator(file, ctx, x); } struct inode_operations { + iop_flags_t iop_flags; struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct mnt_idmap *, struct inode *, int); which is similar to what I did for struct file_operations { struct module *owner; fop_flags_t fop_flags; and introduce IOP_ASYNC_CREATE IOP_ASYNC_OPEN etc and then filesystems can just do: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index df9669d4ded7..90c7aeb49466 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10859,6 +10859,7 @@ static void nfs4_disable_swap(struct inode *inode) } static const struct inode_operations nfs4_dir_inode_operations = { + .iop_flags = IOP_ASYNC_CREATE | IOP_ASYNC_OPEN, .create = nfs_create, .lookup = nfs_lookup, .atomic_open = nfs_atomic_open, and then you can raise S_ASYNC_OPEN and so on based on the flags, not the individual methods. > + > static inline void __d_clear_type_and_inode(struct dentry *dentry) > { > unsigned flags = READ_ONCE(dentry->d_flags); > @@ -1893,6 +1914,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) > raw_write_seqcount_begin(&dentry->d_seq); > __d_set_inode_and_type(dentry, inode, add_flags); > raw_write_seqcount_end(&dentry->d_seq); > + set_inode_flags(inode); > fsnotify_update_flags(dentry); > spin_unlock(&dentry->d_lock); > } > @@ -1999,6 +2021,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected) > > spin_lock(&new->d_lock); > __d_set_inode_and_type(new, inode, add_flags); > + set_inode_flags(inode); > hlist_add_head(&new->d_u.d_alias, &inode->i_dentry); > if (!disconnected) { > hlist_bl_lock(&sb->s_roots); > @@ -2701,6 +2724,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode) > raw_write_seqcount_begin(&dentry->d_seq); > __d_set_inode_and_type(dentry, inode, add_flags); > raw_write_seqcount_end(&dentry->d_seq); > + set_inode_flags(inode); > fsnotify_update_flags(dentry); > } > __d_rehash(dentry); > diff --git a/include/linux/fs.h b/include/linux/fs.h > index e414400c2487..9a9282fef347 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -2361,6 +2361,11 @@ struct super_operations { > #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ > #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ > > +#define S_ASYNC_CREATE BIT(18) /* create, link, symlink, mkdir, mknod all _async */ > +#define S_ASYNC_REMOVE BIT(19) /* unlink, mkdir both _async */ > +#define S_ASYNC_RENAME BIT(20) /* rename_async supported */ > +#define S_ASYNC_OPEN BIT(21) /* atomic_open_async or create_async supported */ > + > /* > * Note that nosuid etc flags are inode-specific: setting some file-system > * flags just means all the inodes inherit those flags by default. It might be > diff --git a/include/linux/namei.h b/include/linux/namei.h > index 76c587a5ec3a..72e351640406 100644 > --- a/include/linux/namei.h > +++ b/include/linux/namei.h > @@ -40,10 +40,11 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; > #define LOOKUP_CREATE BIT(17) /* ... in object creation */ > #define LOOKUP_EXCL BIT(18) /* ... in target must not exist */ > #define LOOKUP_RENAME_TARGET BIT(19) /* ... in destination of rename() */ > +#define LOOKUP_REMOVE BIT(20) /* ... in target of object removal */ > > #define LOOKUP_INTENT_FLAGS (LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_EXCL | \ > - LOOKUP_RENAME_TARGET) > -/* 4 spare bits for intent */ > + LOOKUP_RENAME_TARGET | LOOKUP_REMOVE) > +/* 3 spare bits for intent */ > > /* Scoping flags for lookup. */ > #define LOOKUP_NO_SYMLINKS BIT(24) /* No symlink crossing. */ > -- > 2.47.1 >
On Fri, 07 Feb 2025, Christian Brauner wrote: > On Thu, Feb 06, 2025 at 04:42:47PM +1100, NeilBrown wrote: > > If a filesystem supports _async ops for some directory ops we can take a > > "shared" lock on i_rwsem otherwise we must take an "exclusive" lock. As > > the filesystem may support some async ops but not others we need to > > easily determine which. > > > > With this patch we group the ops into 4 groups that are likely be > > supported together: > > > > CREATE: create, link, mkdir, mknod > > REMOVE: rmdir, unlink > > RENAME: rename > > OPEN: atomic_open, create > > > > and set S_ASYNC_XXX for each when the inode in initialised. > > > > We also add a LOOKUP_REMOVE intent flag which will be used by locking > > interfaces to help know which group is being used. > > > > Signed-off-by: NeilBrown <neilb@suse.de> > > --- > > fs/dcache.c | 24 ++++++++++++++++++++++++ > > include/linux/fs.h | 5 +++++ > > include/linux/namei.h | 5 +++-- > > 3 files changed, 32 insertions(+), 2 deletions(-) > > > > diff --git a/fs/dcache.c b/fs/dcache.c > > index e49607d00d2d..37c0f655166d 100644 > > --- a/fs/dcache.c > > +++ b/fs/dcache.c > > @@ -384,6 +384,27 @@ static inline void __d_set_inode_and_type(struct dentry *dentry, > > smp_store_release(&dentry->d_flags, flags); > > } > > > > +static void set_inode_flags(struct inode *inode) > > +{ > > + const struct inode_operations *i_op = inode->i_op; > > + > > + lockdep_assert_held(&inode->i_lock); > > + if ((i_op->create_async || !i_op->create) && > > + (i_op->link_async || !i_op->link) && > > + (i_op->symlink_async || !i_op->symlink) && > > + (i_op->mkdir_async || !i_op->mkdir) && > > + (i_op->mknod_async || !i_op->mknod)) > > + inode->i_flags |= S_ASYNC_CREATE; > > + if ((i_op->unlink_async || !i_op->unlink) && > > + (i_op->mkdir_async || !i_op->mkdir)) > > + inode->i_flags |= S_ASYNC_REMOVE; > > + if (i_op->rename_async) > > + inode->i_flags |= S_ASYNC_RENAME; > > + if (i_op->atomic_open_async || > > + (!i_op->atomic_open && i_op->create_async)) > > + inode->i_flags |= S_ASYNC_OPEN; > > +} > > I think this is unpleasant. As I said we should fold _async into the > normal methods. Then we can add: > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index be3ad155ec9f..1d19f72448fc 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -2186,6 +2186,7 @@ int wrap_directory_iterator(struct file *, struct dir_context *, > { return wrap_directory_iterator(file, ctx, x); } > > struct inode_operations { > + iop_flags_t iop_flags; > struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); > const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); > int (*permission) (struct mnt_idmap *, struct inode *, int); > > which is similar to what I did for > > struct file_operations { > struct module *owner; > fop_flags_t fop_flags; > > and introduce > > IOP_ASYNC_CREATE > IOP_ASYNC_OPEN > Ahh - I see where you are going. Interesting. The iop_flags effectively provides versioning for the functions so we don't have to embed the version in the name. That would work. I guess we would handle the mkdir change by changing every current mkdir to return ERR_PTR() of the current return value and the vfs_mkdir_xx caller checks if that is NULL and the original dentry is still negative, and then performs the lookup. Thanks, NeilBrown > etc and then filesystems can just do: > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c > index df9669d4ded7..90c7aeb49466 100644 > --- a/fs/nfs/nfs4proc.c > +++ b/fs/nfs/nfs4proc.c > @@ -10859,6 +10859,7 @@ static void nfs4_disable_swap(struct inode *inode) > } > > static const struct inode_operations nfs4_dir_inode_operations = { > + .iop_flags = IOP_ASYNC_CREATE | IOP_ASYNC_OPEN, > .create = nfs_create, > .lookup = nfs_lookup, > .atomic_open = nfs_atomic_open, > > and then you can raise S_ASYNC_OPEN and so on based on the flags, not > the individual methods. > > > + > > static inline void __d_clear_type_and_inode(struct dentry *dentry) > > { > > unsigned flags = READ_ONCE(dentry->d_flags); > > @@ -1893,6 +1914,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) > > raw_write_seqcount_begin(&dentry->d_seq); > > __d_set_inode_and_type(dentry, inode, add_flags); > > raw_write_seqcount_end(&dentry->d_seq); > > + set_inode_flags(inode); > > fsnotify_update_flags(dentry); > > spin_unlock(&dentry->d_lock); > > } > > @@ -1999,6 +2021,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected) > > > > spin_lock(&new->d_lock); > > __d_set_inode_and_type(new, inode, add_flags); > > + set_inode_flags(inode); > > hlist_add_head(&new->d_u.d_alias, &inode->i_dentry); > > if (!disconnected) { > > hlist_bl_lock(&sb->s_roots); > > @@ -2701,6 +2724,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode) > > raw_write_seqcount_begin(&dentry->d_seq); > > __d_set_inode_and_type(dentry, inode, add_flags); > > raw_write_seqcount_end(&dentry->d_seq); > > + set_inode_flags(inode); > > fsnotify_update_flags(dentry); > > } > > __d_rehash(dentry); > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > index e414400c2487..9a9282fef347 100644 > > --- a/include/linux/fs.h > > +++ b/include/linux/fs.h > > @@ -2361,6 +2361,11 @@ struct super_operations { > > #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ > > #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ > > > > +#define S_ASYNC_CREATE BIT(18) /* create, link, symlink, mkdir, mknod all _async */ > > +#define S_ASYNC_REMOVE BIT(19) /* unlink, mkdir both _async */ > > +#define S_ASYNC_RENAME BIT(20) /* rename_async supported */ > > +#define S_ASYNC_OPEN BIT(21) /* atomic_open_async or create_async supported */ > > + > > /* > > * Note that nosuid etc flags are inode-specific: setting some file-system > > * flags just means all the inodes inherit those flags by default. It might be > > diff --git a/include/linux/namei.h b/include/linux/namei.h > > index 76c587a5ec3a..72e351640406 100644 > > --- a/include/linux/namei.h > > +++ b/include/linux/namei.h > > @@ -40,10 +40,11 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; > > #define LOOKUP_CREATE BIT(17) /* ... in object creation */ > > #define LOOKUP_EXCL BIT(18) /* ... in target must not exist */ > > #define LOOKUP_RENAME_TARGET BIT(19) /* ... in destination of rename() */ > > +#define LOOKUP_REMOVE BIT(20) /* ... in target of object removal */ > > > > #define LOOKUP_INTENT_FLAGS (LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_EXCL | \ > > - LOOKUP_RENAME_TARGET) > > -/* 4 spare bits for intent */ > > + LOOKUP_RENAME_TARGET | LOOKUP_REMOVE) > > +/* 3 spare bits for intent */ > > > > /* Scoping flags for lookup. */ > > #define LOOKUP_NO_SYMLINKS BIT(24) /* No symlink crossing. */ > > -- > > 2.47.1 > > >
diff --git a/fs/dcache.c b/fs/dcache.c index e49607d00d2d..37c0f655166d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -384,6 +384,27 @@ static inline void __d_set_inode_and_type(struct dentry *dentry, smp_store_release(&dentry->d_flags, flags); } +static void set_inode_flags(struct inode *inode) +{ + const struct inode_operations *i_op = inode->i_op; + + lockdep_assert_held(&inode->i_lock); + if ((i_op->create_async || !i_op->create) && + (i_op->link_async || !i_op->link) && + (i_op->symlink_async || !i_op->symlink) && + (i_op->mkdir_async || !i_op->mkdir) && + (i_op->mknod_async || !i_op->mknod)) + inode->i_flags |= S_ASYNC_CREATE; + if ((i_op->unlink_async || !i_op->unlink) && + (i_op->mkdir_async || !i_op->mkdir)) + inode->i_flags |= S_ASYNC_REMOVE; + if (i_op->rename_async) + inode->i_flags |= S_ASYNC_RENAME; + if (i_op->atomic_open_async || + (!i_op->atomic_open && i_op->create_async)) + inode->i_flags |= S_ASYNC_OPEN; +} + static inline void __d_clear_type_and_inode(struct dentry *dentry) { unsigned flags = READ_ONCE(dentry->d_flags); @@ -1893,6 +1914,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); + set_inode_flags(inode); fsnotify_update_flags(dentry); spin_unlock(&dentry->d_lock); } @@ -1999,6 +2021,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected) spin_lock(&new->d_lock); __d_set_inode_and_type(new, inode, add_flags); + set_inode_flags(inode); hlist_add_head(&new->d_u.d_alias, &inode->i_dentry); if (!disconnected) { hlist_bl_lock(&sb->s_roots); @@ -2701,6 +2724,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode) raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); + set_inode_flags(inode); fsnotify_update_flags(dentry); } __d_rehash(dentry); diff --git a/include/linux/fs.h b/include/linux/fs.h index e414400c2487..9a9282fef347 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2361,6 +2361,11 @@ struct super_operations { #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ +#define S_ASYNC_CREATE BIT(18) /* create, link, symlink, mkdir, mknod all _async */ +#define S_ASYNC_REMOVE BIT(19) /* unlink, mkdir both _async */ +#define S_ASYNC_RENAME BIT(20) /* rename_async supported */ +#define S_ASYNC_OPEN BIT(21) /* atomic_open_async or create_async supported */ + /* * Note that nosuid etc flags are inode-specific: setting some file-system * flags just means all the inodes inherit those flags by default. It might be diff --git a/include/linux/namei.h b/include/linux/namei.h index 76c587a5ec3a..72e351640406 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -40,10 +40,11 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; #define LOOKUP_CREATE BIT(17) /* ... in object creation */ #define LOOKUP_EXCL BIT(18) /* ... in target must not exist */ #define LOOKUP_RENAME_TARGET BIT(19) /* ... in destination of rename() */ +#define LOOKUP_REMOVE BIT(20) /* ... in target of object removal */ #define LOOKUP_INTENT_FLAGS (LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_EXCL | \ - LOOKUP_RENAME_TARGET) -/* 4 spare bits for intent */ + LOOKUP_RENAME_TARGET | LOOKUP_REMOVE) +/* 3 spare bits for intent */ /* Scoping flags for lookup. */ #define LOOKUP_NO_SYMLINKS BIT(24) /* No symlink crossing. */
If a filesystem supports _async ops for some directory ops we can take a "shared" lock on i_rwsem otherwise we must take an "exclusive" lock. As the filesystem may support some async ops but not others we need to easily determine which. With this patch we group the ops into 4 groups that are likely be supported together: CREATE: create, link, mkdir, mknod REMOVE: rmdir, unlink RENAME: rename OPEN: atomic_open, create and set S_ASYNC_XXX for each when the inode in initialised. We also add a LOOKUP_REMOVE intent flag which will be used by locking interfaces to help know which group is being used. Signed-off-by: NeilBrown <neilb@suse.de> --- fs/dcache.c | 24 ++++++++++++++++++++++++ include/linux/fs.h | 5 +++++ include/linux/namei.h | 5 +++-- 3 files changed, 32 insertions(+), 2 deletions(-)