Message ID | 20250206054504.2950516-2-neilb@suse.de (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | RFC: Allow concurrent and async changes in a directory | expand |
On Thu, Feb 06, 2025 at 04:42:38PM +1100, NeilBrown wrote: > vfs_mkdir() does not guarantee to make the child dentry positive on > success. It may leave it negative and then the caller needs to perform a > lookup to find the target dentry. > > This patch introduced vfs_mkdir_return() which performs the lookup if > needed so that this code is centralised. > > This prepares for a new inode operation which will perform mkdir and > returns the correct dentry. > > Signed-off-by: NeilBrown <neilb@suse.de> > --- > fs/cachefiles/namei.c | 7 +--- > fs/namei.c | 69 ++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/vfs.c | 21 ++---------- > fs/overlayfs/dir.c | 33 +------------------ > fs/overlayfs/overlayfs.h | 10 +++--- > fs/overlayfs/super.c | 2 +- > fs/smb/server/vfs.c | 24 +++----------- > include/linux/fs.h | 2 ++ > 8 files changed, 86 insertions(+), 82 deletions(-) > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > index 7cf59713f0f7..3c866c3b9534 100644 > --- a/fs/cachefiles/namei.c > +++ b/fs/cachefiles/namei.c > @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > /* search the current directory for the element name */ > inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); > > -retry: > ret = cachefiles_inject_read_error(); > if (ret == 0) > subdir = lookup_one_len(dirname, dir, strlen(dirname)); > @@ -130,7 +129,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > goto mkdir_error; > ret = cachefiles_inject_write_error(); > if (ret == 0) > - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > + ret = vfs_mkdir_return(&nop_mnt_idmap, d_inode(dir), &subdir, 0700); > if (ret < 0) { > trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, > cachefiles_trace_mkdir_error); > @@ -138,10 +137,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > } > trace_cachefiles_mkdir(dir, subdir); > > - if (unlikely(d_unhashed(subdir))) { > - cachefiles_put_directory(subdir); > - goto retry; > - } > ASSERT(d_backing_inode(subdir)); > > _debug("mkdir -> %pd{ino=%lu}", > diff --git a/fs/namei.c b/fs/namei.c > index 3ab9440c5b93..d98caf36e867 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -4317,6 +4317,75 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > } > EXPORT_SYMBOL(vfs_mkdir); > > +/** > + * vfs_mkdir_return - create directory returning correct dentry > + * @idmap: idmap of the mount the inode was found from > + * @dir: inode of the parent directory > + * @dentryp: pointer to dentry of the child directory > + * @mode: mode of the child directory > + * > + * Create a directory. > + * > + * If the inode has been found through an idmapped mount the idmap of > + * the vfsmount must be passed through @idmap. This function will then take > + * care to map the inode according to @idmap before checking permissions. > + * On non-idmapped mounts or if permission checking is to be performed on the > + * raw inode simply pass @nop_mnt_idmap. > + * > + * The filesystem may not use the dentry that was passed in. In that case > + * the passed-in dentry is put and a new one is placed in *@dentryp; > + * So on successful return *@dentryp will always be positive. > + */ > +int vfs_mkdir_return(struct mnt_idmap *idmap, struct inode *dir, > + struct dentry **dentryp, umode_t mode) > +{ I think this is misnamed. Maybe vfs_mkdir_positive() is better here. It also be nice to have a comment on vfs_mkdir() as well pointing out that the returned dentry might be negative. And is there a particular reason to not have it return the new dentry? That seems clearer than using the argument as a return value. > + struct dentry *dentry = *dentryp; > + int error; > + unsigned max_links = dir->i_sb->s_max_links; > + > + error = may_create(idmap, dir, dentry); > + if (error) > + return error; > + > + if (!dir->i_op->mkdir) > + return -EPERM; > + > + mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); > + error = security_inode_mkdir(dir, dentry, mode); > + if (error) > + return error; > + > + if (max_links && dir->i_nlink >= max_links) > + return -EMLINK; > + > + error = dir->i_op->mkdir(idmap, dir, dentry, mode); Why isn't this calling vfs_mkdir() and then only starts differing afterwards? > + if (!error) { > + fsnotify_mkdir(dir, dentry); > + if (unlikely(d_unhashed(dentry))) { > + struct dentry *d; > + /* Need a "const" pointer. We know d_name is const > + * because we hold an exclusive lock on i_rwsem > + * in d_parent. > + */ > + const struct qstr *d_name = (void*)&dentry->d_name; > + d = lookup_dcache(d_name, dentry->d_parent, 0); > + if (!d) > + d = __lookup_slow(d_name, dentry->d_parent, 0); Quite a few caller's use lookup_one() here which calls inode_permission() on @dir again. Are we guaranteed that the permission check would always pass? > + if (IS_ERR(d)) { > + error = PTR_ERR(d); > + } else if (unlikely(d_is_negative(d))) { > + dput(d); > + error = -ENOENT; > + } else { > + dput(dentry); > + *dentryp = d; > + } > + } > + } > + return error; > +} > +EXPORT_SYMBOL(vfs_mkdir_return); > + > int do_mkdirat(int dfd, struct filename *name, umode_t mode) > { > struct dentry *dentry; > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index 29cb7b812d71..740332413138 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -1488,26 +1488,11 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, > nfsd_check_ignore_resizing(iap); > break; > case S_IFDIR: > - host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); > - if (!host_err && unlikely(d_unhashed(dchild))) { > - struct dentry *d; > - d = lookup_one_len(dchild->d_name.name, > - dchild->d_parent, > - dchild->d_name.len); > - if (IS_ERR(d)) { > - host_err = PTR_ERR(d); > - break; > - } > - if (unlikely(d_is_negative(d))) { > - dput(d); > - err = nfserr_serverfault; > - goto out; > - } > + host_err = vfs_mkdir_return(&nop_mnt_idmap, dirp, &dchild, iap->ia_mode); > + if (!host_err && unlikely(dchild != resfhp->fh_dentry)) { > dput(resfhp->fh_dentry); > - resfhp->fh_dentry = dget(d); > + resfhp->fh_dentry = dget(dchild); > err = fh_update(resfhp); > - dput(dchild); > - dchild = d; > if (err) > goto out; > } > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c > index c9993ff66fc2..e6c54c6ef0f5 100644 > --- a/fs/overlayfs/dir.c > +++ b/fs/overlayfs/dir.c > @@ -138,37 +138,6 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, > goto out; > } > > -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, > - struct dentry **newdentry, umode_t mode) > -{ > - int err; > - struct dentry *d, *dentry = *newdentry; > - > - err = ovl_do_mkdir(ofs, dir, dentry, mode); > - if (err) > - return err; > - > - if (likely(!d_unhashed(dentry))) > - return 0; > - > - /* > - * vfs_mkdir() may succeed and leave the dentry passed > - * to it unhashed and negative. If that happens, try to > - * lookup a new hashed and positive dentry. > - */ > - d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent, > - dentry->d_name.len); > - if (IS_ERR(d)) { > - pr_warn("failed lookup after mkdir (%pd2, err=%i).\n", > - dentry, err); > - return PTR_ERR(d); > - } > - dput(dentry); > - *newdentry = d; > - > - return 0; > -} > - > struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, > struct dentry *newdentry, struct ovl_cattr *attr) > { > @@ -191,7 +160,7 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, > > case S_IFDIR: > /* mkdir is special... */ > - err = ovl_mkdir_real(ofs, dir, &newdentry, attr->mode); > + err = ovl_do_mkdir(ofs, dir, &newdentry, attr->mode); > break; > > case S_IFCHR: > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h > index 0021e2025020..967870f12482 100644 > --- a/fs/overlayfs/overlayfs.h > +++ b/fs/overlayfs/overlayfs.h > @@ -242,11 +242,11 @@ static inline int ovl_do_create(struct ovl_fs *ofs, > } > > static inline int ovl_do_mkdir(struct ovl_fs *ofs, > - struct inode *dir, struct dentry *dentry, > + struct inode *dir, struct dentry **dentry, > umode_t mode) > { > - int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); > - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); > + int err = vfs_mkdir_return(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); > + pr_debug("mkdir(%pd2, 0%o) = %i\n", *dentry, mode, err); > return err; > } > > @@ -838,8 +838,8 @@ struct ovl_cattr { > > #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) > > -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, > - struct dentry **newdentry, umode_t mode); > +int ovl_do_mkdir(struct ovl_fs *ofs, struct inode *dir, > + struct dentry **newdentry, umode_t mode); > struct dentry *ovl_create_real(struct ovl_fs *ofs, > struct inode *dir, struct dentry *newdentry, > struct ovl_cattr *attr); > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > index 86ae6f6da36b..06ca8b01c336 100644 > --- a/fs/overlayfs/super.c > +++ b/fs/overlayfs/super.c > @@ -327,7 +327,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > goto retry; > } > > - err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); > + err = ovl_do_mkdir(ofs, dir, &work, attr.ia_mode); > if (err) > goto out_dput; > > diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c > index 6890016e1923..4e580bb7baf8 100644 > --- a/fs/smb/server/vfs.c > +++ b/fs/smb/server/vfs.c > @@ -211,7 +211,7 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) > { > struct mnt_idmap *idmap; > struct path path; > - struct dentry *dentry; > + struct dentry *dentry, *d; > int err; > > dentry = ksmbd_vfs_kern_path_create(work, name, > @@ -227,27 +227,11 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) > > idmap = mnt_idmap(path.mnt); > mode |= S_IFDIR; > - err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); > - if (!err && d_unhashed(dentry)) { > - struct dentry *d; > - > - d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent, > - dentry->d_name.len); > - if (IS_ERR(d)) { > - err = PTR_ERR(d); > - goto out_err; > - } > - if (unlikely(d_is_negative(d))) { > - dput(d); > - err = -ENOENT; > - goto out_err; > - } > - > + d = dentry; > + err = vfs_mkdir_return(idmap, d_inode(path.dentry), &dentry, mode); > + if (!err && dentry != d) > ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d)); > - dput(d); > - } > > -out_err: > done_path_create(&path, dentry); > if (err) > pr_err("mkdir(%s): creation failed (err:%d)\n", name, err); > diff --git a/include/linux/fs.h b/include/linux/fs.h > index be3ad155ec9f..f81d6bc65fe4 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1971,6 +1971,8 @@ int vfs_create(struct mnt_idmap *, struct inode *, > struct dentry *, umode_t, bool); > int vfs_mkdir(struct mnt_idmap *, struct inode *, > struct dentry *, umode_t); > +int vfs_mkdir_return(struct mnt_idmap *, struct inode *, > + struct dentry **, umode_t); > int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, > umode_t, dev_t); > int vfs_symlink(struct mnt_idmap *, struct inode *, > -- > 2.47.1 >
On Thu, 2025-02-06 at 16:42 +1100, NeilBrown wrote: > vfs_mkdir() does not guarantee to make the child dentry positive on > success. It may leave it negative and then the caller needs to perform a > lookup to find the target dentry. > > This patch introduced vfs_mkdir_return() which performs the lookup if > needed so that this code is centralised. > > This prepares for a new inode operation which will perform mkdir and > returns the correct dentry. > > Signed-off-by: NeilBrown <neilb@suse.de> > --- > fs/cachefiles/namei.c | 7 +--- > fs/namei.c | 69 ++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/vfs.c | 21 ++---------- > fs/overlayfs/dir.c | 33 +------------------ > fs/overlayfs/overlayfs.h | 10 +++--- > fs/overlayfs/super.c | 2 +- > fs/smb/server/vfs.c | 24 +++----------- > include/linux/fs.h | 2 ++ > 8 files changed, 86 insertions(+), 82 deletions(-) > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > index 7cf59713f0f7..3c866c3b9534 100644 > --- a/fs/cachefiles/namei.c > +++ b/fs/cachefiles/namei.c > @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > /* search the current directory for the element name */ > inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); > > -retry: > ret = cachefiles_inject_read_error(); > if (ret == 0) > subdir = lookup_one_len(dirname, dir, strlen(dirname)); > @@ -130,7 +129,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > goto mkdir_error; > ret = cachefiles_inject_write_error(); > if (ret == 0) > - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > + ret = vfs_mkdir_return(&nop_mnt_idmap, d_inode(dir), &subdir, 0700); > if (ret < 0) { > trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, > cachefiles_trace_mkdir_error); > @@ -138,10 +137,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > } > trace_cachefiles_mkdir(dir, subdir); > > - if (unlikely(d_unhashed(subdir))) { > - cachefiles_put_directory(subdir); > - goto retry; > - } > ASSERT(d_backing_inode(subdir)); > > _debug("mkdir -> %pd{ino=%lu}", > diff --git a/fs/namei.c b/fs/namei.c > index 3ab9440c5b93..d98caf36e867 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -4317,6 +4317,75 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > } > EXPORT_SYMBOL(vfs_mkdir); > > +/** > + * vfs_mkdir_return - create directory returning correct dentry > + * @idmap: idmap of the mount the inode was found from > + * @dir: inode of the parent directory > + * @dentryp: pointer to dentry of the child directory > + * @mode: mode of the child directory > + * > + * Create a directory. > + * > + * If the inode has been found through an idmapped mount the idmap of > + * the vfsmount must be passed through @idmap. This function will then take > + * care to map the inode according to @idmap before checking permissions. > + * On non-idmapped mounts or if permission checking is to be performed on the > + * raw inode simply pass @nop_mnt_idmap. > + * > + * The filesystem may not use the dentry that was passed in. In that case > + * the passed-in dentry is put and a new one is placed in *@dentryp; This sounds like the filesystem is not allowed to use the dentry that we're passing it. Maybe something like this: "In the event that the filesystem doesn't use *@dentryp, the dentry is put and a new one is placed in *@dentryp;" > + * So on successful return *@dentryp will always be positive. > + */ > +int vfs_mkdir_return(struct mnt_idmap *idmap, struct inode *dir, > + struct dentry **dentryp, umode_t mode) > +{ > + struct dentry *dentry = *dentryp; > + int error; > + unsigned max_links = dir->i_sb->s_max_links; > + > + error = may_create(idmap, dir, dentry); > + if (error) > + return error; > + > + if (!dir->i_op->mkdir) > + return -EPERM; > + > + mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); > + error = security_inode_mkdir(dir, dentry, mode); > + if (error) > + return error; > + > + if (max_links && dir->i_nlink >= max_links) > + return -EMLINK; > + > + error = dir->i_op->mkdir(idmap, dir, dentry, mode); > + if (!error) { > + fsnotify_mkdir(dir, dentry); > + if (unlikely(d_unhashed(dentry))) { > + struct dentry *d; > + /* Need a "const" pointer. We know d_name is const > + * because we hold an exclusive lock on i_rwsem > + * in d_parent. > + */ > + const struct qstr *d_name = (void*)&dentry->d_name; > + d = lookup_dcache(d_name, dentry->d_parent, 0); > + if (!d) > + d = __lookup_slow(d_name, dentry->d_parent, 0); > + if (IS_ERR(d)) { > + error = PTR_ERR(d); > + } else if (unlikely(d_is_negative(d))) { > + dput(d); > + error = -ENOENT; > + } else { > + dput(dentry); > + *dentryp = d; > + } > + } > + } > + return error; > +} > +EXPORT_SYMBOL(vfs_mkdir_return); > + > int do_mkdirat(int dfd, struct filename *name, umode_t mode) > { > struct dentry *dentry; > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index 29cb7b812d71..740332413138 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -1488,26 +1488,11 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, > nfsd_check_ignore_resizing(iap); > break; > case S_IFDIR: > - host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); > - if (!host_err && unlikely(d_unhashed(dchild))) { > - struct dentry *d; > - d = lookup_one_len(dchild->d_name.name, > - dchild->d_parent, > - dchild->d_name.len); > - if (IS_ERR(d)) { > - host_err = PTR_ERR(d); > - break; > - } > - if (unlikely(d_is_negative(d))) { > - dput(d); > - err = nfserr_serverfault; > - goto out; > - } > + host_err = vfs_mkdir_return(&nop_mnt_idmap, dirp, &dchild, iap->ia_mode); > + if (!host_err && unlikely(dchild != resfhp->fh_dentry)) { > dput(resfhp->fh_dentry); > - resfhp->fh_dentry = dget(d); > + resfhp->fh_dentry = dget(dchild); > err = fh_update(resfhp); > - dput(dchild); > - dchild = d; > if (err) > goto out; > } > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c > index c9993ff66fc2..e6c54c6ef0f5 100644 > --- a/fs/overlayfs/dir.c > +++ b/fs/overlayfs/dir.c > @@ -138,37 +138,6 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, > goto out; > } > > -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, > - struct dentry **newdentry, umode_t mode) > -{ > - int err; > - struct dentry *d, *dentry = *newdentry; > - > - err = ovl_do_mkdir(ofs, dir, dentry, mode); > - if (err) > - return err; > - > - if (likely(!d_unhashed(dentry))) > - return 0; > - > - /* > - * vfs_mkdir() may succeed and leave the dentry passed > - * to it unhashed and negative. If that happens, try to > - * lookup a new hashed and positive dentry. > - */ > - d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent, > - dentry->d_name.len); > - if (IS_ERR(d)) { > - pr_warn("failed lookup after mkdir (%pd2, err=%i).\n", > - dentry, err); > - return PTR_ERR(d); > - } > - dput(dentry); > - *newdentry = d; > - > - return 0; > -} > - > struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, > struct dentry *newdentry, struct ovl_cattr *attr) > { > @@ -191,7 +160,7 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, > > case S_IFDIR: > /* mkdir is special... */ > - err = ovl_mkdir_real(ofs, dir, &newdentry, attr->mode); > + err = ovl_do_mkdir(ofs, dir, &newdentry, attr->mode); > break; > > case S_IFCHR: > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h > index 0021e2025020..967870f12482 100644 > --- a/fs/overlayfs/overlayfs.h > +++ b/fs/overlayfs/overlayfs.h > @@ -242,11 +242,11 @@ static inline int ovl_do_create(struct ovl_fs *ofs, > } > > static inline int ovl_do_mkdir(struct ovl_fs *ofs, > - struct inode *dir, struct dentry *dentry, > + struct inode *dir, struct dentry **dentry, > umode_t mode) > { > - int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); > - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); > + int err = vfs_mkdir_return(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); > + pr_debug("mkdir(%pd2, 0%o) = %i\n", *dentry, mode, err); > return err; > } > > @@ -838,8 +838,8 @@ struct ovl_cattr { > > #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) > > -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, > - struct dentry **newdentry, umode_t mode); > +int ovl_do_mkdir(struct ovl_fs *ofs, struct inode *dir, > + struct dentry **newdentry, umode_t mode); > struct dentry *ovl_create_real(struct ovl_fs *ofs, > struct inode *dir, struct dentry *newdentry, > struct ovl_cattr *attr); > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > index 86ae6f6da36b..06ca8b01c336 100644 > --- a/fs/overlayfs/super.c > +++ b/fs/overlayfs/super.c > @@ -327,7 +327,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > goto retry; > } > > - err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); > + err = ovl_do_mkdir(ofs, dir, &work, attr.ia_mode); > if (err) > goto out_dput; > > diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c > index 6890016e1923..4e580bb7baf8 100644 > --- a/fs/smb/server/vfs.c > +++ b/fs/smb/server/vfs.c > @@ -211,7 +211,7 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) > { > struct mnt_idmap *idmap; > struct path path; > - struct dentry *dentry; > + struct dentry *dentry, *d; > int err; > > dentry = ksmbd_vfs_kern_path_create(work, name, > @@ -227,27 +227,11 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) > > idmap = mnt_idmap(path.mnt); > mode |= S_IFDIR; > - err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); > - if (!err && d_unhashed(dentry)) { > - struct dentry *d; > - > - d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent, > - dentry->d_name.len); > - if (IS_ERR(d)) { > - err = PTR_ERR(d); > - goto out_err; > - } > - if (unlikely(d_is_negative(d))) { > - dput(d); > - err = -ENOENT; > - goto out_err; > - } > - > + d = dentry; > + err = vfs_mkdir_return(idmap, d_inode(path.dentry), &dentry, mode); > + if (!err && dentry != d) > ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d)); > - dput(d); > - } > > -out_err: > done_path_create(&path, dentry); > if (err) > pr_err("mkdir(%s): creation failed (err:%d)\n", name, err); > diff --git a/include/linux/fs.h b/include/linux/fs.h > index be3ad155ec9f..f81d6bc65fe4 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1971,6 +1971,8 @@ int vfs_create(struct mnt_idmap *, struct inode *, > struct dentry *, umode_t, bool); > int vfs_mkdir(struct mnt_idmap *, struct inode *, > struct dentry *, umode_t); > +int vfs_mkdir_return(struct mnt_idmap *, struct inode *, > + struct dentry **, umode_t); > int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, > umode_t, dev_t); > int vfs_symlink(struct mnt_idmap *, struct inode *,
On Thu, 06 Feb 2025, Christian Brauner wrote: > On Thu, Feb 06, 2025 at 04:42:38PM +1100, NeilBrown wrote: > > vfs_mkdir() does not guarantee to make the child dentry positive on > > success. It may leave it negative and then the caller needs to perform a > > lookup to find the target dentry. > > > > This patch introduced vfs_mkdir_return() which performs the lookup if > > needed so that this code is centralised. > > > > This prepares for a new inode operation which will perform mkdir and > > returns the correct dentry. > > > > Signed-off-by: NeilBrown <neilb@suse.de> > > --- > > fs/cachefiles/namei.c | 7 +--- > > fs/namei.c | 69 ++++++++++++++++++++++++++++++++++++++++ > > fs/nfsd/vfs.c | 21 ++---------- > > fs/overlayfs/dir.c | 33 +------------------ > > fs/overlayfs/overlayfs.h | 10 +++--- > > fs/overlayfs/super.c | 2 +- > > fs/smb/server/vfs.c | 24 +++----------- > > include/linux/fs.h | 2 ++ > > 8 files changed, 86 insertions(+), 82 deletions(-) > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > index 7cf59713f0f7..3c866c3b9534 100644 > > --- a/fs/cachefiles/namei.c > > +++ b/fs/cachefiles/namei.c > > @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > /* search the current directory for the element name */ > > inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); > > > > -retry: > > ret = cachefiles_inject_read_error(); > > if (ret == 0) > > subdir = lookup_one_len(dirname, dir, strlen(dirname)); > > @@ -130,7 +129,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > goto mkdir_error; > > ret = cachefiles_inject_write_error(); > > if (ret == 0) > > - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > > + ret = vfs_mkdir_return(&nop_mnt_idmap, d_inode(dir), &subdir, 0700); > > if (ret < 0) { > > trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, > > cachefiles_trace_mkdir_error); > > @@ -138,10 +137,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > } > > trace_cachefiles_mkdir(dir, subdir); > > > > - if (unlikely(d_unhashed(subdir))) { > > - cachefiles_put_directory(subdir); > > - goto retry; > > - } > > ASSERT(d_backing_inode(subdir)); > > > > _debug("mkdir -> %pd{ino=%lu}", > > diff --git a/fs/namei.c b/fs/namei.c > > index 3ab9440c5b93..d98caf36e867 100644 > > --- a/fs/namei.c > > +++ b/fs/namei.c > > @@ -4317,6 +4317,75 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > } > > EXPORT_SYMBOL(vfs_mkdir); > > > > +/** > > + * vfs_mkdir_return - create directory returning correct dentry > > + * @idmap: idmap of the mount the inode was found from > > + * @dir: inode of the parent directory > > + * @dentryp: pointer to dentry of the child directory > > + * @mode: mode of the child directory > > + * > > + * Create a directory. > > + * > > + * If the inode has been found through an idmapped mount the idmap of > > + * the vfsmount must be passed through @idmap. This function will then take > > + * care to map the inode according to @idmap before checking permissions. > > + * On non-idmapped mounts or if permission checking is to be performed on the > > + * raw inode simply pass @nop_mnt_idmap. > > + * > > + * The filesystem may not use the dentry that was passed in. In that case > > + * the passed-in dentry is put and a new one is placed in *@dentryp; > > + * So on successful return *@dentryp will always be positive. > > + */ > > +int vfs_mkdir_return(struct mnt_idmap *idmap, struct inode *dir, > > + struct dentry **dentryp, umode_t mode) > > +{ > > I think this is misnamed. Maybe vfs_mkdir_positive() is better here. > It also be nice to have a comment on vfs_mkdir() as well pointing out > that the returned dentry might be negative. While I'm not particularly fond of vfs_mkdir_return(), I don't see that vfs_mkdir_positive() is an improvement. I cannot find any relevant precedent in the kernel to guide. Most _return and _positive functions are for low-level counting primitives :-) I'm tempted to add another arg to vfs_mkdir() instead of adding a new function. That would solve one problem by introducing another: what arg? Maybe pass both a 'struct dentry *' and a 'struct dentry **' and if the latter is not NULL, it gets filled with the new dentry if there is one. > > And is there a particular reason to not have it return the new dentry? > That seems clearer than using the argument as a return value. If I did that then every caller would need to check if the return value was not IS_ERR_OR_NULL() and if so, dput() the original dentry and keep the new one - just like current callers of ->lookup need to. It seems cleaner to do that once in vfs_mkdir_return() rather than in all the callers. I guess we could *always* return the dentry on success and dput the old one if it was different or if there were an error. So dentry = vfs_mkdir_return(idmap, inode, dentry, mode) would be the common pattern. Would you be OK with that? > > > + struct dentry *dentry = *dentryp; > > + int error; > > + unsigned max_links = dir->i_sb->s_max_links; > > + > > + error = may_create(idmap, dir, dentry); > > + if (error) > > + return error; > > + > > + if (!dir->i_op->mkdir) > > + return -EPERM; > > + > > + mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); > > + error = security_inode_mkdir(dir, dentry, mode); > > + if (error) > > + return error; > > + > > + if (max_links && dir->i_nlink >= max_links) > > + return -EMLINK; > > + > > + error = dir->i_op->mkdir(idmap, dir, dentry, mode); > > Why isn't this calling vfs_mkdir() and then only starts differing afterwards? Because once we introduce the new ->mkdir_async which returns a dentry the two functions start to diverge more. I could have a __vfs_mkdir() which does both and has a bool arg to tell it if we want the return value. That would avoid code duplication. > > > + if (!error) { > > + fsnotify_mkdir(dir, dentry); > > + if (unlikely(d_unhashed(dentry))) { > > + struct dentry *d; > > + /* Need a "const" pointer. We know d_name is const > > + * because we hold an exclusive lock on i_rwsem > > + * in d_parent. > > + */ > > + const struct qstr *d_name = (void*)&dentry->d_name; > > + d = lookup_dcache(d_name, dentry->d_parent, 0); > > + if (!d) > > + d = __lookup_slow(d_name, dentry->d_parent, 0); > > Quite a few caller's use lookup_one() here which calls > inode_permission() on @dir again. Are we guaranteed that the permission > check would always pass? I think they use lookup_one() because that was the easiest, not because they need all the functionality. If the process had permission to create a directory with a given name but now doesn't have permission to look up that same name, then something is weird. Maybe a race with permission changing could do that. But I think the process should have the right to hold the dentry that it has just successfully created. The lookup is hopefully just a work-around until the new improved interface is used by all relevant filesystems. Thanks, NeilBrown
On Fri, 07 Feb 2025, Jeff Layton wrote: > On Thu, 2025-02-06 at 16:42 +1100, NeilBrown wrote: > > vfs_mkdir() does not guarantee to make the child dentry positive on > > success. It may leave it negative and then the caller needs to perform a > > lookup to find the target dentry. > > > > This patch introduced vfs_mkdir_return() which performs the lookup if > > needed so that this code is centralised. > > > > This prepares for a new inode operation which will perform mkdir and > > returns the correct dentry. > > > > Signed-off-by: NeilBrown <neilb@suse.de> > > --- > > fs/cachefiles/namei.c | 7 +--- > > fs/namei.c | 69 ++++++++++++++++++++++++++++++++++++++++ > > fs/nfsd/vfs.c | 21 ++---------- > > fs/overlayfs/dir.c | 33 +------------------ > > fs/overlayfs/overlayfs.h | 10 +++--- > > fs/overlayfs/super.c | 2 +- > > fs/smb/server/vfs.c | 24 +++----------- > > include/linux/fs.h | 2 ++ > > 8 files changed, 86 insertions(+), 82 deletions(-) > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > index 7cf59713f0f7..3c866c3b9534 100644 > > --- a/fs/cachefiles/namei.c > > +++ b/fs/cachefiles/namei.c > > @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > /* search the current directory for the element name */ > > inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); > > > > -retry: > > ret = cachefiles_inject_read_error(); > > if (ret == 0) > > subdir = lookup_one_len(dirname, dir, strlen(dirname)); > > @@ -130,7 +129,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > goto mkdir_error; > > ret = cachefiles_inject_write_error(); > > if (ret == 0) > > - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > > + ret = vfs_mkdir_return(&nop_mnt_idmap, d_inode(dir), &subdir, 0700); > > if (ret < 0) { > > trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, > > cachefiles_trace_mkdir_error); > > @@ -138,10 +137,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > } > > trace_cachefiles_mkdir(dir, subdir); > > > > - if (unlikely(d_unhashed(subdir))) { > > - cachefiles_put_directory(subdir); > > - goto retry; > > - } > > ASSERT(d_backing_inode(subdir)); > > > > _debug("mkdir -> %pd{ino=%lu}", > > diff --git a/fs/namei.c b/fs/namei.c > > index 3ab9440c5b93..d98caf36e867 100644 > > --- a/fs/namei.c > > +++ b/fs/namei.c > > @@ -4317,6 +4317,75 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > } > > EXPORT_SYMBOL(vfs_mkdir); > > > > +/** > > + * vfs_mkdir_return - create directory returning correct dentry > > + * @idmap: idmap of the mount the inode was found from > > + * @dir: inode of the parent directory > > + * @dentryp: pointer to dentry of the child directory > > + * @mode: mode of the child directory > > + * > > + * Create a directory. > > + * > > + * If the inode has been found through an idmapped mount the idmap of > > + * the vfsmount must be passed through @idmap. This function will then take > > + * care to map the inode according to @idmap before checking permissions. > > + * On non-idmapped mounts or if permission checking is to be performed on the > > + * raw inode simply pass @nop_mnt_idmap. > > + * > > + * The filesystem may not use the dentry that was passed in. In that case > > + * the passed-in dentry is put and a new one is placed in *@dentryp; > > This sounds like the filesystem is not allowed to use the dentry that > we're passing it. Maybe something like this: > > "In the event that the filesystem doesn't use *@dentryp, the dentry is > put and a new one is placed in *@dentryp;" Good catch - thanks. I've updated my patch you use your test, except I decided on "dput()" rather than "put". Thanks, NeilBrown
On Thu, Feb 06, 2025 at 04:42:38PM +1100, NeilBrown wrote: > vfs_mkdir() does not guarantee to make the child dentry positive on > success. It may leave it negative and then the caller needs to perform a > lookup to find the target dentry. > > This patch introduced vfs_mkdir_return() which performs the lookup if > needed so that this code is centralised. > > This prepares for a new inode operation which will perform mkdir and > returns the correct dentry. * Calling conventions stink; make it _consume_ dentry reference and return dentry reference or ERR_PTR(). Callers will be happier that way (check it). * Calling conventions should be documented in commit message *and* in D/f/porting * devpts, nfs4recover and xfs might as well convert (not going to hit the "need a lookup" case anyway) * that + /* Need a "const" pointer. We know d_name is const + * because we hold an exclusive lock on i_rwsem + * in d_parent. + */ + const struct qstr *d_name = (void*)&dentry->d_name; + d = lookup_dcache(d_name, dentry->d_parent, 0); + if (!d) + d = __lookup_slow(d_name, dentry->d_parent, 0); doesn't need a cast. C is perfectly fine with T *x = foo(); const T *y = x; You are not allowed to _strip_ qualifiers; adding them is fine. Same reason why you are allowed to pass char * to strlen() without any casts whatsoever. Comment re stability is fine; the cast is pure WTF material.
On Sat, 08 Feb 2025, Al Viro wrote: > On Thu, Feb 06, 2025 at 04:42:38PM +1100, NeilBrown wrote: > > vfs_mkdir() does not guarantee to make the child dentry positive on > > success. It may leave it negative and then the caller needs to perform a > > lookup to find the target dentry. > > > > This patch introduced vfs_mkdir_return() which performs the lookup if > > needed so that this code is centralised. > > > > This prepares for a new inode operation which will perform mkdir and > > returns the correct dentry. > > * Calling conventions stink; make it _consume_ dentry reference and > return dentry reference or ERR_PTR(). Callers will be happier that way > (check it). With later patches it will need to consume the lock on the dentry as well, and either transfer it to the new one or (for error) unlock it. We need to have the result dentry still locked for fsnotify_mkdir(). Transferring the dentry lock would have to be done in d_splice_alias(). The __d_unalias() branch should be ok because I already trylock in there and fail if I can't get the lock. For the IS_ROOT branch .... I think it is safe to fail if a trylock doesn't succeed. So I can probably make that work - thanks. Hmm... kernfs reportedly can leave the mkdir dentry negative and fill in the inode later. How does that work? I assume it will still be hashed so mkdir won't try the lookup. done_path_create() will need to accept an IS_ERR() dentry. > > * Calling conventions should be documented in commit message *and* in > D/f/porting What is the scope of "porting" ? IT seems to be mostly about _operations interfaces, but I do see other things in there. I'll try to remember that - thanks. > > * devpts, nfs4recover and xfs might as well convert (not going to hit > the "need a lookup" case anyway) good point - avoiding the lookup when not requested is a pointless optimisation because it is hardly every needed and should always be cheap - we expect it to be in the dcache. > > * that > + /* Need a "const" pointer. We know d_name is const > + * because we hold an exclusive lock on i_rwsem > + * in d_parent. > + */ > + const struct qstr *d_name = (void*)&dentry->d_name; > + d = lookup_dcache(d_name, dentry->d_parent, 0); > + if (!d) > + d = __lookup_slow(d_name, dentry->d_parent, 0); > doesn't need a cast. C is perfectly fine with > T *x = foo(); > const T *y = x; > > You are not allowed to _strip_ qualifiers; adding them is fine. > Same reason why you are allowed to pass char * to strlen() without > any casts whatsoever. hmm.. I thought I had tried that. Maybe I didn't try hard enough. Thanks for the guidance. > > Comment re stability is fine; the cast is pure WTF material. > Thanks, NeilBrown
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 7cf59713f0f7..3c866c3b9534 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, /* search the current directory for the element name */ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); -retry: ret = cachefiles_inject_read_error(); if (ret == 0) subdir = lookup_one_len(dirname, dir, strlen(dirname)); @@ -130,7 +129,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, goto mkdir_error; ret = cachefiles_inject_write_error(); if (ret == 0) - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); + ret = vfs_mkdir_return(&nop_mnt_idmap, d_inode(dir), &subdir, 0700); if (ret < 0) { trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, cachefiles_trace_mkdir_error); @@ -138,10 +137,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, } trace_cachefiles_mkdir(dir, subdir); - if (unlikely(d_unhashed(subdir))) { - cachefiles_put_directory(subdir); - goto retry; - } ASSERT(d_backing_inode(subdir)); _debug("mkdir -> %pd{ino=%lu}", diff --git a/fs/namei.c b/fs/namei.c index 3ab9440c5b93..d98caf36e867 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4317,6 +4317,75 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, } EXPORT_SYMBOL(vfs_mkdir); +/** + * vfs_mkdir_return - create directory returning correct dentry + * @idmap: idmap of the mount the inode was found from + * @dir: inode of the parent directory + * @dentryp: pointer to dentry of the child directory + * @mode: mode of the child directory + * + * Create a directory. + * + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. + * On non-idmapped mounts or if permission checking is to be performed on the + * raw inode simply pass @nop_mnt_idmap. + * + * The filesystem may not use the dentry that was passed in. In that case + * the passed-in dentry is put and a new one is placed in *@dentryp; + * So on successful return *@dentryp will always be positive. + */ +int vfs_mkdir_return(struct mnt_idmap *idmap, struct inode *dir, + struct dentry **dentryp, umode_t mode) +{ + struct dentry *dentry = *dentryp; + int error; + unsigned max_links = dir->i_sb->s_max_links; + + error = may_create(idmap, dir, dentry); + if (error) + return error; + + if (!dir->i_op->mkdir) + return -EPERM; + + mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); + error = security_inode_mkdir(dir, dentry, mode); + if (error) + return error; + + if (max_links && dir->i_nlink >= max_links) + return -EMLINK; + + error = dir->i_op->mkdir(idmap, dir, dentry, mode); + if (!error) { + fsnotify_mkdir(dir, dentry); + if (unlikely(d_unhashed(dentry))) { + struct dentry *d; + /* Need a "const" pointer. We know d_name is const + * because we hold an exclusive lock on i_rwsem + * in d_parent. + */ + const struct qstr *d_name = (void*)&dentry->d_name; + d = lookup_dcache(d_name, dentry->d_parent, 0); + if (!d) + d = __lookup_slow(d_name, dentry->d_parent, 0); + if (IS_ERR(d)) { + error = PTR_ERR(d); + } else if (unlikely(d_is_negative(d))) { + dput(d); + error = -ENOENT; + } else { + dput(dentry); + *dentryp = d; + } + } + } + return error; +} +EXPORT_SYMBOL(vfs_mkdir_return); + int do_mkdirat(int dfd, struct filename *name, umode_t mode) { struct dentry *dentry; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 29cb7b812d71..740332413138 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1488,26 +1488,11 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_check_ignore_resizing(iap); break; case S_IFDIR: - host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); - if (!host_err && unlikely(d_unhashed(dchild))) { - struct dentry *d; - d = lookup_one_len(dchild->d_name.name, - dchild->d_parent, - dchild->d_name.len); - if (IS_ERR(d)) { - host_err = PTR_ERR(d); - break; - } - if (unlikely(d_is_negative(d))) { - dput(d); - err = nfserr_serverfault; - goto out; - } + host_err = vfs_mkdir_return(&nop_mnt_idmap, dirp, &dchild, iap->ia_mode); + if (!host_err && unlikely(dchild != resfhp->fh_dentry)) { dput(resfhp->fh_dentry); - resfhp->fh_dentry = dget(d); + resfhp->fh_dentry = dget(dchild); err = fh_update(resfhp); - dput(dchild); - dchild = d; if (err) goto out; } diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index c9993ff66fc2..e6c54c6ef0f5 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -138,37 +138,6 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, goto out; } -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, - struct dentry **newdentry, umode_t mode) -{ - int err; - struct dentry *d, *dentry = *newdentry; - - err = ovl_do_mkdir(ofs, dir, dentry, mode); - if (err) - return err; - - if (likely(!d_unhashed(dentry))) - return 0; - - /* - * vfs_mkdir() may succeed and leave the dentry passed - * to it unhashed and negative. If that happens, try to - * lookup a new hashed and positive dentry. - */ - d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent, - dentry->d_name.len); - if (IS_ERR(d)) { - pr_warn("failed lookup after mkdir (%pd2, err=%i).\n", - dentry, err); - return PTR_ERR(d); - } - dput(dentry); - *newdentry = d; - - return 0; -} - struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, struct dentry *newdentry, struct ovl_cattr *attr) { @@ -191,7 +160,7 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, case S_IFDIR: /* mkdir is special... */ - err = ovl_mkdir_real(ofs, dir, &newdentry, attr->mode); + err = ovl_do_mkdir(ofs, dir, &newdentry, attr->mode); break; case S_IFCHR: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 0021e2025020..967870f12482 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -242,11 +242,11 @@ static inline int ovl_do_create(struct ovl_fs *ofs, } static inline int ovl_do_mkdir(struct ovl_fs *ofs, - struct inode *dir, struct dentry *dentry, + struct inode *dir, struct dentry **dentry, umode_t mode) { - int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); + int err = vfs_mkdir_return(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); + pr_debug("mkdir(%pd2, 0%o) = %i\n", *dentry, mode, err); return err; } @@ -838,8 +838,8 @@ struct ovl_cattr { #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, - struct dentry **newdentry, umode_t mode); +int ovl_do_mkdir(struct ovl_fs *ofs, struct inode *dir, + struct dentry **newdentry, umode_t mode); struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, struct dentry *newdentry, struct ovl_cattr *attr); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 86ae6f6da36b..06ca8b01c336 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -327,7 +327,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, goto retry; } - err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); + err = ovl_do_mkdir(ofs, dir, &work, attr.ia_mode); if (err) goto out_dput; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 6890016e1923..4e580bb7baf8 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -211,7 +211,7 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) { struct mnt_idmap *idmap; struct path path; - struct dentry *dentry; + struct dentry *dentry, *d; int err; dentry = ksmbd_vfs_kern_path_create(work, name, @@ -227,27 +227,11 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) idmap = mnt_idmap(path.mnt); mode |= S_IFDIR; - err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); - if (!err && d_unhashed(dentry)) { - struct dentry *d; - - d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent, - dentry->d_name.len); - if (IS_ERR(d)) { - err = PTR_ERR(d); - goto out_err; - } - if (unlikely(d_is_negative(d))) { - dput(d); - err = -ENOENT; - goto out_err; - } - + d = dentry; + err = vfs_mkdir_return(idmap, d_inode(path.dentry), &dentry, mode); + if (!err && dentry != d) ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d)); - dput(d); - } -out_err: done_path_create(&path, dentry); if (err) pr_err("mkdir(%s): creation failed (err:%d)\n", name, err); diff --git a/include/linux/fs.h b/include/linux/fs.h index be3ad155ec9f..f81d6bc65fe4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1971,6 +1971,8 @@ int vfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); int vfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t); +int vfs_mkdir_return(struct mnt_idmap *, struct inode *, + struct dentry **, umode_t); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); int vfs_symlink(struct mnt_idmap *, struct inode *,
vfs_mkdir() does not guarantee to make the child dentry positive on success. It may leave it negative and then the caller needs to perform a lookup to find the target dentry. This patch introduced vfs_mkdir_return() which performs the lookup if needed so that this code is centralised. This prepares for a new inode operation which will perform mkdir and returns the correct dentry. Signed-off-by: NeilBrown <neilb@suse.de> --- fs/cachefiles/namei.c | 7 +--- fs/namei.c | 69 ++++++++++++++++++++++++++++++++++++++++ fs/nfsd/vfs.c | 21 ++---------- fs/overlayfs/dir.c | 33 +------------------ fs/overlayfs/overlayfs.h | 10 +++--- fs/overlayfs/super.c | 2 +- fs/smb/server/vfs.c | 24 +++----------- include/linux/fs.h | 2 ++ 8 files changed, 86 insertions(+), 82 deletions(-)