Message ID | 20250206054504.2950516-2-neilb@suse.de |
---|---|
State | New |
Headers | show |
Series | RFC: Allow concurrent and async changes in a directory | expand |
On Thu, Feb 06, 2025 at 04:42:38PM +1100, NeilBrown wrote: > vfs_mkdir() does not guarantee to make the child dentry positive on > success. It may leave it negative and then the caller needs to perform a > lookup to find the target dentry. > > This patch introduced vfs_mkdir_return() which performs the lookup if > needed so that this code is centralised. > > This prepares for a new inode operation which will perform mkdir and > returns the correct dentry. > > Signed-off-by: NeilBrown <neilb@suse.de> > --- > fs/cachefiles/namei.c | 7 +--- > fs/namei.c | 69 ++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/vfs.c | 21 ++---------- > fs/overlayfs/dir.c | 33 +------------------ > fs/overlayfs/overlayfs.h | 10 +++--- > fs/overlayfs/super.c | 2 +- > fs/smb/server/vfs.c | 24 +++----------- > include/linux/fs.h | 2 ++ > 8 files changed, 86 insertions(+), 82 deletions(-) > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > index 7cf59713f0f7..3c866c3b9534 100644 > --- a/fs/cachefiles/namei.c > +++ b/fs/cachefiles/namei.c > @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > /* search the current directory for the element name */ > inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); > > -retry: > ret = cachefiles_inject_read_error(); > if (ret == 0) > subdir = lookup_one_len(dirname, dir, strlen(dirname)); > @@ -130,7 +129,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > goto mkdir_error; > ret = cachefiles_inject_write_error(); > if (ret == 0) > - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > + ret = vfs_mkdir_return(&nop_mnt_idmap, d_inode(dir), &subdir, 0700); > if (ret < 0) { > trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, > cachefiles_trace_mkdir_error); > @@ -138,10 +137,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > } > trace_cachefiles_mkdir(dir, subdir); > > - if (unlikely(d_unhashed(subdir))) { > - cachefiles_put_directory(subdir); > - goto retry; > - } > ASSERT(d_backing_inode(subdir)); > > _debug("mkdir -> %pd{ino=%lu}", > diff --git a/fs/namei.c b/fs/namei.c > index 3ab9440c5b93..d98caf36e867 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -4317,6 +4317,75 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > } > EXPORT_SYMBOL(vfs_mkdir); > > +/** > + * vfs_mkdir_return - create directory returning correct dentry > + * @idmap: idmap of the mount the inode was found from > + * @dir: inode of the parent directory > + * @dentryp: pointer to dentry of the child directory > + * @mode: mode of the child directory > + * > + * Create a directory. > + * > + * If the inode has been found through an idmapped mount the idmap of > + * the vfsmount must be passed through @idmap. This function will then take > + * care to map the inode according to @idmap before checking permissions. > + * On non-idmapped mounts or if permission checking is to be performed on the > + * raw inode simply pass @nop_mnt_idmap. > + * > + * The filesystem may not use the dentry that was passed in. In that case > + * the passed-in dentry is put and a new one is placed in *@dentryp; > + * So on successful return *@dentryp will always be positive. > + */ > +int vfs_mkdir_return(struct mnt_idmap *idmap, struct inode *dir, > + struct dentry **dentryp, umode_t mode) > +{ I think this is misnamed. Maybe vfs_mkdir_positive() is better here. It also be nice to have a comment on vfs_mkdir() as well pointing out that the returned dentry might be negative. And is there a particular reason to not have it return the new dentry? That seems clearer than using the argument as a return value. > + struct dentry *dentry = *dentryp; > + int error; > + unsigned max_links = dir->i_sb->s_max_links; > + > + error = may_create(idmap, dir, dentry); > + if (error) > + return error; > + > + if (!dir->i_op->mkdir) > + return -EPERM; > + > + mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); > + error = security_inode_mkdir(dir, dentry, mode); > + if (error) > + return error; > + > + if (max_links && dir->i_nlink >= max_links) > + return -EMLINK; > + > + error = dir->i_op->mkdir(idmap, dir, dentry, mode); Why isn't this calling vfs_mkdir() and then only starts differing afterwards? > + if (!error) { > + fsnotify_mkdir(dir, dentry); > + if (unlikely(d_unhashed(dentry))) { > + struct dentry *d; > + /* Need a "const" pointer. We know d_name is const > + * because we hold an exclusive lock on i_rwsem > + * in d_parent. > + */ > + const struct qstr *d_name = (void*)&dentry->d_name; > + d = lookup_dcache(d_name, dentry->d_parent, 0); > + if (!d) > + d = __lookup_slow(d_name, dentry->d_parent, 0); Quite a few caller's use lookup_one() here which calls inode_permission() on @dir again. Are we guaranteed that the permission check would always pass? > + if (IS_ERR(d)) { > + error = PTR_ERR(d); > + } else if (unlikely(d_is_negative(d))) { > + dput(d); > + error = -ENOENT; > + } else { > + dput(dentry); > + *dentryp = d; > + } > + } > + } > + return error; > +} > +EXPORT_SYMBOL(vfs_mkdir_return); > + > int do_mkdirat(int dfd, struct filename *name, umode_t mode) > { > struct dentry *dentry; > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index 29cb7b812d71..740332413138 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -1488,26 +1488,11 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, > nfsd_check_ignore_resizing(iap); > break; > case S_IFDIR: > - host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); > - if (!host_err && unlikely(d_unhashed(dchild))) { > - struct dentry *d; > - d = lookup_one_len(dchild->d_name.name, > - dchild->d_parent, > - dchild->d_name.len); > - if (IS_ERR(d)) { > - host_err = PTR_ERR(d); > - break; > - } > - if (unlikely(d_is_negative(d))) { > - dput(d); > - err = nfserr_serverfault; > - goto out; > - } > + host_err = vfs_mkdir_return(&nop_mnt_idmap, dirp, &dchild, iap->ia_mode); > + if (!host_err && unlikely(dchild != resfhp->fh_dentry)) { > dput(resfhp->fh_dentry); > - resfhp->fh_dentry = dget(d); > + resfhp->fh_dentry = dget(dchild); > err = fh_update(resfhp); > - dput(dchild); > - dchild = d; > if (err) > goto out; > } > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c > index c9993ff66fc2..e6c54c6ef0f5 100644 > --- a/fs/overlayfs/dir.c > +++ b/fs/overlayfs/dir.c > @@ -138,37 +138,6 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, > goto out; > } > > -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, > - struct dentry **newdentry, umode_t mode) > -{ > - int err; > - struct dentry *d, *dentry = *newdentry; > - > - err = ovl_do_mkdir(ofs, dir, dentry, mode); > - if (err) > - return err; > - > - if (likely(!d_unhashed(dentry))) > - return 0; > - > - /* > - * vfs_mkdir() may succeed and leave the dentry passed > - * to it unhashed and negative. If that happens, try to > - * lookup a new hashed and positive dentry. > - */ > - d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent, > - dentry->d_name.len); > - if (IS_ERR(d)) { > - pr_warn("failed lookup after mkdir (%pd2, err=%i).\n", > - dentry, err); > - return PTR_ERR(d); > - } > - dput(dentry); > - *newdentry = d; > - > - return 0; > -} > - > struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, > struct dentry *newdentry, struct ovl_cattr *attr) > { > @@ -191,7 +160,7 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, > > case S_IFDIR: > /* mkdir is special... */ > - err = ovl_mkdir_real(ofs, dir, &newdentry, attr->mode); > + err = ovl_do_mkdir(ofs, dir, &newdentry, attr->mode); > break; > > case S_IFCHR: > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h > index 0021e2025020..967870f12482 100644 > --- a/fs/overlayfs/overlayfs.h > +++ b/fs/overlayfs/overlayfs.h > @@ -242,11 +242,11 @@ static inline int ovl_do_create(struct ovl_fs *ofs, > } > > static inline int ovl_do_mkdir(struct ovl_fs *ofs, > - struct inode *dir, struct dentry *dentry, > + struct inode *dir, struct dentry **dentry, > umode_t mode) > { > - int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); > - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); > + int err = vfs_mkdir_return(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); > + pr_debug("mkdir(%pd2, 0%o) = %i\n", *dentry, mode, err); > return err; > } > > @@ -838,8 +838,8 @@ struct ovl_cattr { > > #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) > > -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, > - struct dentry **newdentry, umode_t mode); > +int ovl_do_mkdir(struct ovl_fs *ofs, struct inode *dir, > + struct dentry **newdentry, umode_t mode); > struct dentry *ovl_create_real(struct ovl_fs *ofs, > struct inode *dir, struct dentry *newdentry, > struct ovl_cattr *attr); > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > index 86ae6f6da36b..06ca8b01c336 100644 > --- a/fs/overlayfs/super.c > +++ b/fs/overlayfs/super.c > @@ -327,7 +327,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > goto retry; > } > > - err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); > + err = ovl_do_mkdir(ofs, dir, &work, attr.ia_mode); > if (err) > goto out_dput; > > diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c > index 6890016e1923..4e580bb7baf8 100644 > --- a/fs/smb/server/vfs.c > +++ b/fs/smb/server/vfs.c > @@ -211,7 +211,7 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) > { > struct mnt_idmap *idmap; > struct path path; > - struct dentry *dentry; > + struct dentry *dentry, *d; > int err; > > dentry = ksmbd_vfs_kern_path_create(work, name, > @@ -227,27 +227,11 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) > > idmap = mnt_idmap(path.mnt); > mode |= S_IFDIR; > - err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); > - if (!err && d_unhashed(dentry)) { > - struct dentry *d; > - > - d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent, > - dentry->d_name.len); > - if (IS_ERR(d)) { > - err = PTR_ERR(d); > - goto out_err; > - } > - if (unlikely(d_is_negative(d))) { > - dput(d); > - err = -ENOENT; > - goto out_err; > - } > - > + d = dentry; > + err = vfs_mkdir_return(idmap, d_inode(path.dentry), &dentry, mode); > + if (!err && dentry != d) > ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d)); > - dput(d); > - } > > -out_err: > done_path_create(&path, dentry); > if (err) > pr_err("mkdir(%s): creation failed (err:%d)\n", name, err); > diff --git a/include/linux/fs.h b/include/linux/fs.h > index be3ad155ec9f..f81d6bc65fe4 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1971,6 +1971,8 @@ int vfs_create(struct mnt_idmap *, struct inode *, > struct dentry *, umode_t, bool); > int vfs_mkdir(struct mnt_idmap *, struct inode *, > struct dentry *, umode_t); > +int vfs_mkdir_return(struct mnt_idmap *, struct inode *, > + struct dentry **, umode_t); > int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, > umode_t, dev_t); > int vfs_symlink(struct mnt_idmap *, struct inode *, > -- > 2.47.1 >
On Thu, 2025-02-06 at 16:42 +1100, NeilBrown wrote: > vfs_mkdir() does not guarantee to make the child dentry positive on > success. It may leave it negative and then the caller needs to perform a > lookup to find the target dentry. > > This patch introduced vfs_mkdir_return() which performs the lookup if > needed so that this code is centralised. > > This prepares for a new inode operation which will perform mkdir and > returns the correct dentry. > > Signed-off-by: NeilBrown <neilb@suse.de> > --- > fs/cachefiles/namei.c | 7 +--- > fs/namei.c | 69 ++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/vfs.c | 21 ++---------- > fs/overlayfs/dir.c | 33 +------------------ > fs/overlayfs/overlayfs.h | 10 +++--- > fs/overlayfs/super.c | 2 +- > fs/smb/server/vfs.c | 24 +++----------- > include/linux/fs.h | 2 ++ > 8 files changed, 86 insertions(+), 82 deletions(-) > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > index 7cf59713f0f7..3c866c3b9534 100644 > --- a/fs/cachefiles/namei.c > +++ b/fs/cachefiles/namei.c > @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > /* search the current directory for the element name */ > inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); > > -retry: > ret = cachefiles_inject_read_error(); > if (ret == 0) > subdir = lookup_one_len(dirname, dir, strlen(dirname)); > @@ -130,7 +129,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > goto mkdir_error; > ret = cachefiles_inject_write_error(); > if (ret == 0) > - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > + ret = vfs_mkdir_return(&nop_mnt_idmap, d_inode(dir), &subdir, 0700); > if (ret < 0) { > trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, > cachefiles_trace_mkdir_error); > @@ -138,10 +137,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > } > trace_cachefiles_mkdir(dir, subdir); > > - if (unlikely(d_unhashed(subdir))) { > - cachefiles_put_directory(subdir); > - goto retry; > - } > ASSERT(d_backing_inode(subdir)); > > _debug("mkdir -> %pd{ino=%lu}", > diff --git a/fs/namei.c b/fs/namei.c > index 3ab9440c5b93..d98caf36e867 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -4317,6 +4317,75 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > } > EXPORT_SYMBOL(vfs_mkdir); > > +/** > + * vfs_mkdir_return - create directory returning correct dentry > + * @idmap: idmap of the mount the inode was found from > + * @dir: inode of the parent directory > + * @dentryp: pointer to dentry of the child directory > + * @mode: mode of the child directory > + * > + * Create a directory. > + * > + * If the inode has been found through an idmapped mount the idmap of > + * the vfsmount must be passed through @idmap. This function will then take > + * care to map the inode according to @idmap before checking permissions. > + * On non-idmapped mounts or if permission checking is to be performed on the > + * raw inode simply pass @nop_mnt_idmap. > + * > + * The filesystem may not use the dentry that was passed in. In that case > + * the passed-in dentry is put and a new one is placed in *@dentryp; This sounds like the filesystem is not allowed to use the dentry that we're passing it. Maybe something like this: "In the event that the filesystem doesn't use *@dentryp, the dentry is put and a new one is placed in *@dentryp;" > + * So on successful return *@dentryp will always be positive. > + */ > +int vfs_mkdir_return(struct mnt_idmap *idmap, struct inode *dir, > + struct dentry **dentryp, umode_t mode) > +{ > + struct dentry *dentry = *dentryp; > + int error; > + unsigned max_links = dir->i_sb->s_max_links; > + > + error = may_create(idmap, dir, dentry); > + if (error) > + return error; > + > + if (!dir->i_op->mkdir) > + return -EPERM; > + > + mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); > + error = security_inode_mkdir(dir, dentry, mode); > + if (error) > + return error; > + > + if (max_links && dir->i_nlink >= max_links) > + return -EMLINK; > + > + error = dir->i_op->mkdir(idmap, dir, dentry, mode); > + if (!error) { > + fsnotify_mkdir(dir, dentry); > + if (unlikely(d_unhashed(dentry))) { > + struct dentry *d; > + /* Need a "const" pointer. We know d_name is const > + * because we hold an exclusive lock on i_rwsem > + * in d_parent. > + */ > + const struct qstr *d_name = (void*)&dentry->d_name; > + d = lookup_dcache(d_name, dentry->d_parent, 0); > + if (!d) > + d = __lookup_slow(d_name, dentry->d_parent, 0); > + if (IS_ERR(d)) { > + error = PTR_ERR(d); > + } else if (unlikely(d_is_negative(d))) { > + dput(d); > + error = -ENOENT; > + } else { > + dput(dentry); > + *dentryp = d; > + } > + } > + } > + return error; > +} > +EXPORT_SYMBOL(vfs_mkdir_return); > + > int do_mkdirat(int dfd, struct filename *name, umode_t mode) > { > struct dentry *dentry; > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index 29cb7b812d71..740332413138 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -1488,26 +1488,11 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, > nfsd_check_ignore_resizing(iap); > break; > case S_IFDIR: > - host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); > - if (!host_err && unlikely(d_unhashed(dchild))) { > - struct dentry *d; > - d = lookup_one_len(dchild->d_name.name, > - dchild->d_parent, > - dchild->d_name.len); > - if (IS_ERR(d)) { > - host_err = PTR_ERR(d); > - break; > - } > - if (unlikely(d_is_negative(d))) { > - dput(d); > - err = nfserr_serverfault; > - goto out; > - } > + host_err = vfs_mkdir_return(&nop_mnt_idmap, dirp, &dchild, iap->ia_mode); > + if (!host_err && unlikely(dchild != resfhp->fh_dentry)) { > dput(resfhp->fh_dentry); > - resfhp->fh_dentry = dget(d); > + resfhp->fh_dentry = dget(dchild); > err = fh_update(resfhp); > - dput(dchild); > - dchild = d; > if (err) > goto out; > } > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c > index c9993ff66fc2..e6c54c6ef0f5 100644 > --- a/fs/overlayfs/dir.c > +++ b/fs/overlayfs/dir.c > @@ -138,37 +138,6 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, > goto out; > } > > -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, > - struct dentry **newdentry, umode_t mode) > -{ > - int err; > - struct dentry *d, *dentry = *newdentry; > - > - err = ovl_do_mkdir(ofs, dir, dentry, mode); > - if (err) > - return err; > - > - if (likely(!d_unhashed(dentry))) > - return 0; > - > - /* > - * vfs_mkdir() may succeed and leave the dentry passed > - * to it unhashed and negative. If that happens, try to > - * lookup a new hashed and positive dentry. > - */ > - d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent, > - dentry->d_name.len); > - if (IS_ERR(d)) { > - pr_warn("failed lookup after mkdir (%pd2, err=%i).\n", > - dentry, err); > - return PTR_ERR(d); > - } > - dput(dentry); > - *newdentry = d; > - > - return 0; > -} > - > struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, > struct dentry *newdentry, struct ovl_cattr *attr) > { > @@ -191,7 +160,7 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, > > case S_IFDIR: > /* mkdir is special... */ > - err = ovl_mkdir_real(ofs, dir, &newdentry, attr->mode); > + err = ovl_do_mkdir(ofs, dir, &newdentry, attr->mode); > break; > > case S_IFCHR: > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h > index 0021e2025020..967870f12482 100644 > --- a/fs/overlayfs/overlayfs.h > +++ b/fs/overlayfs/overlayfs.h > @@ -242,11 +242,11 @@ static inline int ovl_do_create(struct ovl_fs *ofs, > } > > static inline int ovl_do_mkdir(struct ovl_fs *ofs, > - struct inode *dir, struct dentry *dentry, > + struct inode *dir, struct dentry **dentry, > umode_t mode) > { > - int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); > - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); > + int err = vfs_mkdir_return(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); > + pr_debug("mkdir(%pd2, 0%o) = %i\n", *dentry, mode, err); > return err; > } > > @@ -838,8 +838,8 @@ struct ovl_cattr { > > #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) > > -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, > - struct dentry **newdentry, umode_t mode); > +int ovl_do_mkdir(struct ovl_fs *ofs, struct inode *dir, > + struct dentry **newdentry, umode_t mode); > struct dentry *ovl_create_real(struct ovl_fs *ofs, > struct inode *dir, struct dentry *newdentry, > struct ovl_cattr *attr); > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > index 86ae6f6da36b..06ca8b01c336 100644 > --- a/fs/overlayfs/super.c > +++ b/fs/overlayfs/super.c > @@ -327,7 +327,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > goto retry; > } > > - err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); > + err = ovl_do_mkdir(ofs, dir, &work, attr.ia_mode); > if (err) > goto out_dput; > > diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c > index 6890016e1923..4e580bb7baf8 100644 > --- a/fs/smb/server/vfs.c > +++ b/fs/smb/server/vfs.c > @@ -211,7 +211,7 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) > { > struct mnt_idmap *idmap; > struct path path; > - struct dentry *dentry; > + struct dentry *dentry, *d; > int err; > > dentry = ksmbd_vfs_kern_path_create(work, name, > @@ -227,27 +227,11 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) > > idmap = mnt_idmap(path.mnt); > mode |= S_IFDIR; > - err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); > - if (!err && d_unhashed(dentry)) { > - struct dentry *d; > - > - d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent, > - dentry->d_name.len); > - if (IS_ERR(d)) { > - err = PTR_ERR(d); > - goto out_err; > - } > - if (unlikely(d_is_negative(d))) { > - dput(d); > - err = -ENOENT; > - goto out_err; > - } > - > + d = dentry; > + err = vfs_mkdir_return(idmap, d_inode(path.dentry), &dentry, mode); > + if (!err && dentry != d) > ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d)); > - dput(d); > - } > > -out_err: > done_path_create(&path, dentry); > if (err) > pr_err("mkdir(%s): creation failed (err:%d)\n", name, err); > diff --git a/include/linux/fs.h b/include/linux/fs.h > index be3ad155ec9f..f81d6bc65fe4 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1971,6 +1971,8 @@ int vfs_create(struct mnt_idmap *, struct inode *, > struct dentry *, umode_t, bool); > int vfs_mkdir(struct mnt_idmap *, struct inode *, > struct dentry *, umode_t); > +int vfs_mkdir_return(struct mnt_idmap *, struct inode *, > + struct dentry **, umode_t); > int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, > umode_t, dev_t); > int vfs_symlink(struct mnt_idmap *, struct inode *,
On Thu, 06 Feb 2025, Christian Brauner wrote: > On Thu, Feb 06, 2025 at 04:42:38PM +1100, NeilBrown wrote: > > vfs_mkdir() does not guarantee to make the child dentry positive on > > success. It may leave it negative and then the caller needs to perform a > > lookup to find the target dentry. > > > > This patch introduced vfs_mkdir_return() which performs the lookup if > > needed so that this code is centralised. > > > > This prepares for a new inode operation which will perform mkdir and > > returns the correct dentry. > > > > Signed-off-by: NeilBrown <neilb@suse.de> > > --- > > fs/cachefiles/namei.c | 7 +--- > > fs/namei.c | 69 ++++++++++++++++++++++++++++++++++++++++ > > fs/nfsd/vfs.c | 21 ++---------- > > fs/overlayfs/dir.c | 33 +------------------ > > fs/overlayfs/overlayfs.h | 10 +++--- > > fs/overlayfs/super.c | 2 +- > > fs/smb/server/vfs.c | 24 +++----------- > > include/linux/fs.h | 2 ++ > > 8 files changed, 86 insertions(+), 82 deletions(-) > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > index 7cf59713f0f7..3c866c3b9534 100644 > > --- a/fs/cachefiles/namei.c > > +++ b/fs/cachefiles/namei.c > > @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > /* search the current directory for the element name */ > > inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); > > > > -retry: > > ret = cachefiles_inject_read_error(); > > if (ret == 0) > > subdir = lookup_one_len(dirname, dir, strlen(dirname)); > > @@ -130,7 +129,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > goto mkdir_error; > > ret = cachefiles_inject_write_error(); > > if (ret == 0) > > - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > > + ret = vfs_mkdir_return(&nop_mnt_idmap, d_inode(dir), &subdir, 0700); > > if (ret < 0) { > > trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, > > cachefiles_trace_mkdir_error); > > @@ -138,10 +137,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > } > > trace_cachefiles_mkdir(dir, subdir); > > > > - if (unlikely(d_unhashed(subdir))) { > > - cachefiles_put_directory(subdir); > > - goto retry; > > - } > > ASSERT(d_backing_inode(subdir)); > > > > _debug("mkdir -> %pd{ino=%lu}", > > diff --git a/fs/namei.c b/fs/namei.c > > index 3ab9440c5b93..d98caf36e867 100644 > > --- a/fs/namei.c > > +++ b/fs/namei.c > > @@ -4317,6 +4317,75 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > } > > EXPORT_SYMBOL(vfs_mkdir); > > > > +/** > > + * vfs_mkdir_return - create directory returning correct dentry > > + * @idmap: idmap of the mount the inode was found from > > + * @dir: inode of the parent directory > > + * @dentryp: pointer to dentry of the child directory > > + * @mode: mode of the child directory > > + * > > + * Create a directory. > > + * > > + * If the inode has been found through an idmapped mount the idmap of > > + * the vfsmount must be passed through @idmap. This function will then take > > + * care to map the inode according to @idmap before checking permissions. > > + * On non-idmapped mounts or if permission checking is to be performed on the > > + * raw inode simply pass @nop_mnt_idmap. > > + * > > + * The filesystem may not use the dentry that was passed in. In that case > > + * the passed-in dentry is put and a new one is placed in *@dentryp; > > + * So on successful return *@dentryp will always be positive. > > + */ > > +int vfs_mkdir_return(struct mnt_idmap *idmap, struct inode *dir, > > + struct dentry **dentryp, umode_t mode) > > +{ > > I think this is misnamed. Maybe vfs_mkdir_positive() is better here. > It also be nice to have a comment on vfs_mkdir() as well pointing out > that the returned dentry might be negative. While I'm not particularly fond of vfs_mkdir_return(), I don't see that vfs_mkdir_positive() is an improvement. I cannot find any relevant precedent in the kernel to guide. Most _return and _positive functions are for low-level counting primitives :-) I'm tempted to add another arg to vfs_mkdir() instead of adding a new function. That would solve one problem by introducing another: what arg? Maybe pass both a 'struct dentry *' and a 'struct dentry **' and if the latter is not NULL, it gets filled with the new dentry if there is one. > > And is there a particular reason to not have it return the new dentry? > That seems clearer than using the argument as a return value. If I did that then every caller would need to check if the return value was not IS_ERR_OR_NULL() and if so, dput() the original dentry and keep the new one - just like current callers of ->lookup need to. It seems cleaner to do that once in vfs_mkdir_return() rather than in all the callers. I guess we could *always* return the dentry on success and dput the old one if it was different or if there were an error. So dentry = vfs_mkdir_return(idmap, inode, dentry, mode) would be the common pattern. Would you be OK with that? > > > + struct dentry *dentry = *dentryp; > > + int error; > > + unsigned max_links = dir->i_sb->s_max_links; > > + > > + error = may_create(idmap, dir, dentry); > > + if (error) > > + return error; > > + > > + if (!dir->i_op->mkdir) > > + return -EPERM; > > + > > + mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); > > + error = security_inode_mkdir(dir, dentry, mode); > > + if (error) > > + return error; > > + > > + if (max_links && dir->i_nlink >= max_links) > > + return -EMLINK; > > + > > + error = dir->i_op->mkdir(idmap, dir, dentry, mode); > > Why isn't this calling vfs_mkdir() and then only starts differing afterwards? Because once we introduce the new ->mkdir_async which returns a dentry the two functions start to diverge more. I could have a __vfs_mkdir() which does both and has a bool arg to tell it if we want the return value. That would avoid code duplication. > > > + if (!error) { > > + fsnotify_mkdir(dir, dentry); > > + if (unlikely(d_unhashed(dentry))) { > > + struct dentry *d; > > + /* Need a "const" pointer. We know d_name is const > > + * because we hold an exclusive lock on i_rwsem > > + * in d_parent. > > + */ > > + const struct qstr *d_name = (void*)&dentry->d_name; > > + d = lookup_dcache(d_name, dentry->d_parent, 0); > > + if (!d) > > + d = __lookup_slow(d_name, dentry->d_parent, 0); > > Quite a few caller's use lookup_one() here which calls > inode_permission() on @dir again. Are we guaranteed that the permission > check would always pass? I think they use lookup_one() because that was the easiest, not because they need all the functionality. If the process had permission to create a directory with a given name but now doesn't have permission to look up that same name, then something is weird. Maybe a race with permission changing could do that. But I think the process should have the right to hold the dentry that it has just successfully created. The lookup is hopefully just a work-around until the new improved interface is used by all relevant filesystems. Thanks, NeilBrown
On Fri, 07 Feb 2025, Jeff Layton wrote: > On Thu, 2025-02-06 at 16:42 +1100, NeilBrown wrote: > > vfs_mkdir() does not guarantee to make the child dentry positive on > > success. It may leave it negative and then the caller needs to perform a > > lookup to find the target dentry. > > > > This patch introduced vfs_mkdir_return() which performs the lookup if > > needed so that this code is centralised. > > > > This prepares for a new inode operation which will perform mkdir and > > returns the correct dentry. > > > > Signed-off-by: NeilBrown <neilb@suse.de> > > --- > > fs/cachefiles/namei.c | 7 +--- > > fs/namei.c | 69 ++++++++++++++++++++++++++++++++++++++++ > > fs/nfsd/vfs.c | 21 ++---------- > > fs/overlayfs/dir.c | 33 +------------------ > > fs/overlayfs/overlayfs.h | 10 +++--- > > fs/overlayfs/super.c | 2 +- > > fs/smb/server/vfs.c | 24 +++----------- > > include/linux/fs.h | 2 ++ > > 8 files changed, 86 insertions(+), 82 deletions(-) > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > index 7cf59713f0f7..3c866c3b9534 100644 > > --- a/fs/cachefiles/namei.c > > +++ b/fs/cachefiles/namei.c > > @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > /* search the current directory for the element name */ > > inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); > > > > -retry: > > ret = cachefiles_inject_read_error(); > > if (ret == 0) > > subdir = lookup_one_len(dirname, dir, strlen(dirname)); > > @@ -130,7 +129,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > goto mkdir_error; > > ret = cachefiles_inject_write_error(); > > if (ret == 0) > > - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > > + ret = vfs_mkdir_return(&nop_mnt_idmap, d_inode(dir), &subdir, 0700); > > if (ret < 0) { > > trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, > > cachefiles_trace_mkdir_error); > > @@ -138,10 +137,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > } > > trace_cachefiles_mkdir(dir, subdir); > > > > - if (unlikely(d_unhashed(subdir))) { > > - cachefiles_put_directory(subdir); > > - goto retry; > > - } > > ASSERT(d_backing_inode(subdir)); > > > > _debug("mkdir -> %pd{ino=%lu}", > > diff --git a/fs/namei.c b/fs/namei.c > > index 3ab9440c5b93..d98caf36e867 100644 > > --- a/fs/namei.c > > +++ b/fs/namei.c > > @@ -4317,6 +4317,75 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > } > > EXPORT_SYMBOL(vfs_mkdir); > > > > +/** > > + * vfs_mkdir_return - create directory returning correct dentry > > + * @idmap: idmap of the mount the inode was found from > > + * @dir: inode of the parent directory > > + * @dentryp: pointer to dentry of the child directory > > + * @mode: mode of the child directory > > + * > > + * Create a directory. > > + * > > + * If the inode has been found through an idmapped mount the idmap of > > + * the vfsmount must be passed through @idmap. This function will then take > > + * care to map the inode according to @idmap before checking permissions. > > + * On non-idmapped mounts or if permission checking is to be performed on the > > + * raw inode simply pass @nop_mnt_idmap. > > + * > > + * The filesystem may not use the dentry that was passed in. In that case > > + * the passed-in dentry is put and a new one is placed in *@dentryp; > > This sounds like the filesystem is not allowed to use the dentry that > we're passing it. Maybe something like this: > > "In the event that the filesystem doesn't use *@dentryp, the dentry is > put and a new one is placed in *@dentryp;" Good catch - thanks. I've updated my patch you use your test, except I decided on "dput()" rather than "put". Thanks, NeilBrown
On Thu, Feb 06, 2025 at 04:42:38PM +1100, NeilBrown wrote: > vfs_mkdir() does not guarantee to make the child dentry positive on > success. It may leave it negative and then the caller needs to perform a > lookup to find the target dentry. > > This patch introduced vfs_mkdir_return() which performs the lookup if > needed so that this code is centralised. > > This prepares for a new inode operation which will perform mkdir and > returns the correct dentry. * Calling conventions stink; make it _consume_ dentry reference and return dentry reference or ERR_PTR(). Callers will be happier that way (check it). * Calling conventions should be documented in commit message *and* in D/f/porting * devpts, nfs4recover and xfs might as well convert (not going to hit the "need a lookup" case anyway) * that + /* Need a "const" pointer. We know d_name is const + * because we hold an exclusive lock on i_rwsem + * in d_parent. + */ + const struct qstr *d_name = (void*)&dentry->d_name; + d = lookup_dcache(d_name, dentry->d_parent, 0); + if (!d) + d = __lookup_slow(d_name, dentry->d_parent, 0); doesn't need a cast. C is perfectly fine with T *x = foo(); const T *y = x; You are not allowed to _strip_ qualifiers; adding them is fine. Same reason why you are allowed to pass char * to strlen() without any casts whatsoever. Comment re stability is fine; the cast is pure WTF material.
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 7cf59713f0f7..3c866c3b9534 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, /* search the current directory for the element name */ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); -retry: ret = cachefiles_inject_read_error(); if (ret == 0) subdir = lookup_one_len(dirname, dir, strlen(dirname)); @@ -130,7 +129,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, goto mkdir_error; ret = cachefiles_inject_write_error(); if (ret == 0) - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); + ret = vfs_mkdir_return(&nop_mnt_idmap, d_inode(dir), &subdir, 0700); if (ret < 0) { trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, cachefiles_trace_mkdir_error); @@ -138,10 +137,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, } trace_cachefiles_mkdir(dir, subdir); - if (unlikely(d_unhashed(subdir))) { - cachefiles_put_directory(subdir); - goto retry; - } ASSERT(d_backing_inode(subdir)); _debug("mkdir -> %pd{ino=%lu}", diff --git a/fs/namei.c b/fs/namei.c index 3ab9440c5b93..d98caf36e867 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4317,6 +4317,75 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, } EXPORT_SYMBOL(vfs_mkdir); +/** + * vfs_mkdir_return - create directory returning correct dentry + * @idmap: idmap of the mount the inode was found from + * @dir: inode of the parent directory + * @dentryp: pointer to dentry of the child directory + * @mode: mode of the child directory + * + * Create a directory. + * + * If the inode has been found through an idmapped mount the idmap of + * the vfsmount must be passed through @idmap. This function will then take + * care to map the inode according to @idmap before checking permissions. + * On non-idmapped mounts or if permission checking is to be performed on the + * raw inode simply pass @nop_mnt_idmap. + * + * The filesystem may not use the dentry that was passed in. In that case + * the passed-in dentry is put and a new one is placed in *@dentryp; + * So on successful return *@dentryp will always be positive. + */ +int vfs_mkdir_return(struct mnt_idmap *idmap, struct inode *dir, + struct dentry **dentryp, umode_t mode) +{ + struct dentry *dentry = *dentryp; + int error; + unsigned max_links = dir->i_sb->s_max_links; + + error = may_create(idmap, dir, dentry); + if (error) + return error; + + if (!dir->i_op->mkdir) + return -EPERM; + + mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); + error = security_inode_mkdir(dir, dentry, mode); + if (error) + return error; + + if (max_links && dir->i_nlink >= max_links) + return -EMLINK; + + error = dir->i_op->mkdir(idmap, dir, dentry, mode); + if (!error) { + fsnotify_mkdir(dir, dentry); + if (unlikely(d_unhashed(dentry))) { + struct dentry *d; + /* Need a "const" pointer. We know d_name is const + * because we hold an exclusive lock on i_rwsem + * in d_parent. + */ + const struct qstr *d_name = (void*)&dentry->d_name; + d = lookup_dcache(d_name, dentry->d_parent, 0); + if (!d) + d = __lookup_slow(d_name, dentry->d_parent, 0); + if (IS_ERR(d)) { + error = PTR_ERR(d); + } else if (unlikely(d_is_negative(d))) { + dput(d); + error = -ENOENT; + } else { + dput(dentry); + *dentryp = d; + } + } + } + return error; +} +EXPORT_SYMBOL(vfs_mkdir_return); + int do_mkdirat(int dfd, struct filename *name, umode_t mode) { struct dentry *dentry; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 29cb7b812d71..740332413138 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1488,26 +1488,11 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_check_ignore_resizing(iap); break; case S_IFDIR: - host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); - if (!host_err && unlikely(d_unhashed(dchild))) { - struct dentry *d; - d = lookup_one_len(dchild->d_name.name, - dchild->d_parent, - dchild->d_name.len); - if (IS_ERR(d)) { - host_err = PTR_ERR(d); - break; - } - if (unlikely(d_is_negative(d))) { - dput(d); - err = nfserr_serverfault; - goto out; - } + host_err = vfs_mkdir_return(&nop_mnt_idmap, dirp, &dchild, iap->ia_mode); + if (!host_err && unlikely(dchild != resfhp->fh_dentry)) { dput(resfhp->fh_dentry); - resfhp->fh_dentry = dget(d); + resfhp->fh_dentry = dget(dchild); err = fh_update(resfhp); - dput(dchild); - dchild = d; if (err) goto out; } diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index c9993ff66fc2..e6c54c6ef0f5 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -138,37 +138,6 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, goto out; } -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, - struct dentry **newdentry, umode_t mode) -{ - int err; - struct dentry *d, *dentry = *newdentry; - - err = ovl_do_mkdir(ofs, dir, dentry, mode); - if (err) - return err; - - if (likely(!d_unhashed(dentry))) - return 0; - - /* - * vfs_mkdir() may succeed and leave the dentry passed - * to it unhashed and negative. If that happens, try to - * lookup a new hashed and positive dentry. - */ - d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent, - dentry->d_name.len); - if (IS_ERR(d)) { - pr_warn("failed lookup after mkdir (%pd2, err=%i).\n", - dentry, err); - return PTR_ERR(d); - } - dput(dentry); - *newdentry = d; - - return 0; -} - struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, struct dentry *newdentry, struct ovl_cattr *attr) { @@ -191,7 +160,7 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, case S_IFDIR: /* mkdir is special... */ - err = ovl_mkdir_real(ofs, dir, &newdentry, attr->mode); + err = ovl_do_mkdir(ofs, dir, &newdentry, attr->mode); break; case S_IFCHR: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 0021e2025020..967870f12482 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -242,11 +242,11 @@ static inline int ovl_do_create(struct ovl_fs *ofs, } static inline int ovl_do_mkdir(struct ovl_fs *ofs, - struct inode *dir, struct dentry *dentry, + struct inode *dir, struct dentry **dentry, umode_t mode) { - int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); + int err = vfs_mkdir_return(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); + pr_debug("mkdir(%pd2, 0%o) = %i\n", *dentry, mode, err); return err; } @@ -838,8 +838,8 @@ struct ovl_cattr { #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, - struct dentry **newdentry, umode_t mode); +int ovl_do_mkdir(struct ovl_fs *ofs, struct inode *dir, + struct dentry **newdentry, umode_t mode); struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, struct dentry *newdentry, struct ovl_cattr *attr); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 86ae6f6da36b..06ca8b01c336 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -327,7 +327,7 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, goto retry; } - err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); + err = ovl_do_mkdir(ofs, dir, &work, attr.ia_mode); if (err) goto out_dput; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 6890016e1923..4e580bb7baf8 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -211,7 +211,7 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) { struct mnt_idmap *idmap; struct path path; - struct dentry *dentry; + struct dentry *dentry, *d; int err; dentry = ksmbd_vfs_kern_path_create(work, name, @@ -227,27 +227,11 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) idmap = mnt_idmap(path.mnt); mode |= S_IFDIR; - err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); - if (!err && d_unhashed(dentry)) { - struct dentry *d; - - d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent, - dentry->d_name.len); - if (IS_ERR(d)) { - err = PTR_ERR(d); - goto out_err; - } - if (unlikely(d_is_negative(d))) { - dput(d); - err = -ENOENT; - goto out_err; - } - + d = dentry; + err = vfs_mkdir_return(idmap, d_inode(path.dentry), &dentry, mode); + if (!err && dentry != d) ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d)); - dput(d); - } -out_err: done_path_create(&path, dentry); if (err) pr_err("mkdir(%s): creation failed (err:%d)\n", name, err); diff --git a/include/linux/fs.h b/include/linux/fs.h index be3ad155ec9f..f81d6bc65fe4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1971,6 +1971,8 @@ int vfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); int vfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t); +int vfs_mkdir_return(struct mnt_idmap *, struct inode *, + struct dentry **, umode_t); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); int vfs_symlink(struct mnt_idmap *, struct inode *,
vfs_mkdir() does not guarantee to make the child dentry positive on success. It may leave it negative and then the caller needs to perform a lookup to find the target dentry. This patch introduced vfs_mkdir_return() which performs the lookup if needed so that this code is centralised. This prepares for a new inode operation which will perform mkdir and returns the correct dentry. Signed-off-by: NeilBrown <neilb@suse.de> --- fs/cachefiles/namei.c | 7 +--- fs/namei.c | 69 ++++++++++++++++++++++++++++++++++++++++ fs/nfsd/vfs.c | 21 ++---------- fs/overlayfs/dir.c | 33 +------------------ fs/overlayfs/overlayfs.h | 10 +++--- fs/overlayfs/super.c | 2 +- fs/smb/server/vfs.c | 24 +++----------- include/linux/fs.h | 2 ++ 8 files changed, 86 insertions(+), 82 deletions(-)