Message ID | 20190214234908.GA6474@magnolia (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfs: don't decrement i_nlink in d_tmpfile | expand |
On Fri, Feb 15, 2019 at 4:23 AM Darrick J. Wong <darrick.wong@oracle.com> wrote: > > From: Darrick J. Wong <darrick.wong@oracle.com> > > d_tmpfile was introduced to instantiate an inode in the dentry cache as > a temporary file. This helper decrements the inode's nlink count and > dirties the inode, presumably so that filesystems could call new_inode > to create a new inode with nlink == 1 and then call d_tmpfile which will > decrement nlink. > > However, this doesn't play well with XFS, which needs to allocate, > initialize, and insert a tempfile inode on its unlinked list in a single > transaction. In order to maintain referential integrity of the XFS > metadata, we cannot have an inode on the unlinked list with nlink >= 1. > > XFS and btrfs hack around d_tmpfile's behavior by creating the inode > with nlink == 0 and then incrementing it just prior to calling > d_tmpfile, anticipating that it will be reset to 0. > > Everywhere else outside of d_tmpfile, it appears that nlink updates and > persistence is the responsibility of individual filesystems. Therefore, > move the nlink decrement out of d_tmpfile into the callers, and require > that callers only pass in inodes with nlink already set to 0. > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > --- > fs/btrfs/inode.c | 8 -------- > fs/dcache.c | 8 ++++++-- > fs/ext2/namei.c | 2 +- > fs/ext4/namei.c | 1 + > fs/f2fs/namei.c | 1 + > fs/minix/namei.c | 2 +- > fs/ubifs/dir.c | 1 + > fs/udf/namei.c | 2 +- > fs/xfs/xfs_iops.c | 13 ++----------- > mm/shmem.c | 1 + > 10 files changed, 15 insertions(+), 24 deletions(-) > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index 5c349667c761..bd189fc50f83 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -10382,14 +10382,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) > if (ret) > goto out; > > - /* > - * We set number of links to 0 in btrfs_new_inode(), and here we set > - * it to 1 because d_tmpfile() will issue a warning if the count is 0, > - * through: > - * > - * d_tmpfile() -> inode_dec_link_count() -> drop_nlink() > - */ > - set_nlink(inode, 1); > d_tmpfile(dentry, inode); > unlock_new_inode(inode); > mark_inode_dirty(inode); > diff --git a/fs/dcache.c b/fs/dcache.c > index aac41adf4743..5fb4ecce2589 100644 > --- a/fs/dcache.c > +++ b/fs/dcache.c > @@ -3042,12 +3042,16 @@ void d_genocide(struct dentry *parent) > > EXPORT_SYMBOL(d_genocide); > > +/* > + * Instantiate an inode in the dentry cache as a temporary file. Callers must > + * ensure that @inode has a zero link count. > + */ > void d_tmpfile(struct dentry *dentry, struct inode *inode) > { > - inode_dec_link_count(inode); > BUG_ON(dentry->d_name.name != dentry->d_iname || > !hlist_unhashed(&dentry->d_u.d_alias) || > - !d_unlinked(dentry)); > + !d_unlinked(dentry) || > + inode->i_nlink != 0); You've just promoted i_nlink filesystem accounting error (which are not that rare) from WARN_ON() to BUG_ON(), not to mention Linus' objection to any use of BUG_ON() at all. !hlist_unhashed is anyway checked again in d_instantiate(). !d_unlinked is not a reason to break the machine. The name check is really not a reason to break the machine. Can probably make tmp name code conditional to WARN_ON(). Thanks, Amir.
On Fri, Feb 15, 2019 at 10:04:12AM +0200, Amir Goldstein wrote: > On Fri, Feb 15, 2019 at 4:23 AM Darrick J. Wong <darrick.wong@oracle.com> wrote: > > > > From: Darrick J. Wong <darrick.wong@oracle.com> > > > > d_tmpfile was introduced to instantiate an inode in the dentry cache as > > a temporary file. This helper decrements the inode's nlink count and > > dirties the inode, presumably so that filesystems could call new_inode > > to create a new inode with nlink == 1 and then call d_tmpfile which will > > decrement nlink. > > > > However, this doesn't play well with XFS, which needs to allocate, > > initialize, and insert a tempfile inode on its unlinked list in a single > > transaction. In order to maintain referential integrity of the XFS > > metadata, we cannot have an inode on the unlinked list with nlink >= 1. > > > > XFS and btrfs hack around d_tmpfile's behavior by creating the inode > > with nlink == 0 and then incrementing it just prior to calling > > d_tmpfile, anticipating that it will be reset to 0. > > > > Everywhere else outside of d_tmpfile, it appears that nlink updates and > > persistence is the responsibility of individual filesystems. Therefore, > > move the nlink decrement out of d_tmpfile into the callers, and require > > that callers only pass in inodes with nlink already set to 0. > > > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > > --- > > fs/btrfs/inode.c | 8 -------- > > fs/dcache.c | 8 ++++++-- > > fs/ext2/namei.c | 2 +- > > fs/ext4/namei.c | 1 + > > fs/f2fs/namei.c | 1 + > > fs/minix/namei.c | 2 +- > > fs/ubifs/dir.c | 1 + > > fs/udf/namei.c | 2 +- > > fs/xfs/xfs_iops.c | 13 ++----------- > > mm/shmem.c | 1 + > > 10 files changed, 15 insertions(+), 24 deletions(-) > > > > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > > index 5c349667c761..bd189fc50f83 100644 > > --- a/fs/btrfs/inode.c > > +++ b/fs/btrfs/inode.c > > @@ -10382,14 +10382,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) > > if (ret) > > goto out; > > > > - /* > > - * We set number of links to 0 in btrfs_new_inode(), and here we set > > - * it to 1 because d_tmpfile() will issue a warning if the count is 0, > > - * through: > > - * > > - * d_tmpfile() -> inode_dec_link_count() -> drop_nlink() > > - */ > > - set_nlink(inode, 1); > > d_tmpfile(dentry, inode); > > unlock_new_inode(inode); > > mark_inode_dirty(inode); > > diff --git a/fs/dcache.c b/fs/dcache.c > > index aac41adf4743..5fb4ecce2589 100644 > > --- a/fs/dcache.c > > +++ b/fs/dcache.c > > @@ -3042,12 +3042,16 @@ void d_genocide(struct dentry *parent) > > > > EXPORT_SYMBOL(d_genocide); > > > > +/* > > + * Instantiate an inode in the dentry cache as a temporary file. Callers must > > + * ensure that @inode has a zero link count. > > + */ > > void d_tmpfile(struct dentry *dentry, struct inode *inode) > > { > > - inode_dec_link_count(inode); > > BUG_ON(dentry->d_name.name != dentry->d_iname || > > !hlist_unhashed(&dentry->d_u.d_alias) || > > - !d_unlinked(dentry)); > > + !d_unlinked(dentry) || > > + inode->i_nlink != 0); > > You've just promoted i_nlink filesystem accounting error (which > are not that rare) from WARN_ON() to BUG_ON(), not to mention > Linus' objection to any use of BUG_ON() at all. > > !hlist_unhashed is anyway checked again in d_instantiate(). > !d_unlinked is not a reason to break the machine. > The name check is really not a reason to break the machine. > Can probably make tmp name code conditional to WARN_ON(). Fair enough, I'll remove the redundant checks and downgrade that to a WARN_ON, if nobody else objects.... --D > Thanks, > Amir.
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5c349667c761..bd189fc50f83 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10382,14 +10382,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) if (ret) goto out; - /* - * We set number of links to 0 in btrfs_new_inode(), and here we set - * it to 1 because d_tmpfile() will issue a warning if the count is 0, - * through: - * - * d_tmpfile() -> inode_dec_link_count() -> drop_nlink() - */ - set_nlink(inode, 1); d_tmpfile(dentry, inode); unlock_new_inode(inode); mark_inode_dirty(inode); diff --git a/fs/dcache.c b/fs/dcache.c index aac41adf4743..5fb4ecce2589 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3042,12 +3042,16 @@ void d_genocide(struct dentry *parent) EXPORT_SYMBOL(d_genocide); +/* + * Instantiate an inode in the dentry cache as a temporary file. Callers must + * ensure that @inode has a zero link count. + */ void d_tmpfile(struct dentry *dentry, struct inode *inode) { - inode_dec_link_count(inode); BUG_ON(dentry->d_name.name != dentry->d_iname || !hlist_unhashed(&dentry->d_u.d_alias) || - !d_unlinked(dentry)); + !d_unlinked(dentry) || + inode->i_nlink != 0); spin_lock(&dentry->d_parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); dentry->d_name.len = sprintf(dentry->d_iname, "#%llu", diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 0c26dcc5d850..8542e9ce9677 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -117,7 +117,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) return PTR_ERR(inode); ext2_set_file_ops(inode); - mark_inode_dirty(inode); + inode_dec_link_count(inode); d_tmpfile(dentry, inode); unlock_new_inode(inode); return 0; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2b928eb07fa2..7502432f9816 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2517,6 +2517,7 @@ static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); + inode_dec_link_count(inode); d_tmpfile(dentry, inode); err = ext4_orphan_add(handle, inode); if (err) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 62d9829f3a6a..31a556af5f3a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -780,6 +780,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, f2fs_i_links_write(inode, false); *whiteout = inode; } else { + inode_dec_link_count(inode); d_tmpfile(dentry, inode); } /* link_count was changed by d_tmpfile as well. */ diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 1a6084d2b02e..3249f86c476a 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -57,7 +57,7 @@ static int minix_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode = minix_new_inode(dir, mode, &error); if (inode) { minix_set_inode(inode, 0); - mark_inode_dirty(inode); + inode_dec_link_count(inode); d_tmpfile(dentry, inode); } return error; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5767b373a8ff..7187e4fd7561 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -419,6 +419,7 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry, drop_nlink(inode); *whiteout = inode; } else { + inode_dec_link_count(inode); d_tmpfile(dentry, inode); } ubifs_assert(c, ui->dirty); diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 58cc2414992b..38bd021f9673 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -652,7 +652,7 @@ static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_data.a_ops = &udf_aops; inode->i_op = &udf_file_inode_operations; inode->i_fop = &udf_file_operations; - mark_inode_dirty(inode); + inode_dec_link_count(inode); d_tmpfile(dentry, inode); unlock_new_inode(inode); return 0; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1efef69a7f1c..f48ffd7a8d3e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -191,18 +191,9 @@ xfs_generic_create( xfs_setup_iops(ip); - if (tmpfile) { - /* - * The VFS requires that any inode fed to d_tmpfile must have - * nlink == 1 so that it can decrement the nlink in d_tmpfile. - * However, we created the temp file with nlink == 0 because - * we're not allowed to put an inode with nlink > 0 on the - * unlinked list. Therefore we have to set nlink to 1 so that - * d_tmpfile can immediately set it back to zero. - */ - set_nlink(inode, 1); + if (tmpfile) d_tmpfile(dentry, inode); - } else + else d_instantiate(dentry, inode); xfs_finish_inode_setup(ip); diff --git a/mm/shmem.c b/mm/shmem.c index 6ece1e2fe76e..4a7810093561 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2818,6 +2818,7 @@ shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) error = simple_acl_create(dir, inode); if (error) goto out_iput; + inode_dec_link_count(inode); d_tmpfile(dentry, inode); } return error;