Message ID | d41ffa26e20b15b12895812c3cad7c91a6a59bc6.1572949325.git.mbobrowski@mbobrowski.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ext4: port direct I/O to iomap infrastructure | expand |
On Tue, Nov 05, 2019 at 11:01:51PM +1100, Matthew Bobrowski wrote: > In preparation for implementing the iomap direct I/O modifications, > the inode extension/truncate code needs to be moved out from the > ext4_iomap_end() callback. For direct I/O, if the current code > remained, it would behave incorrrectly. Updating the inode size prior > to converting unwritten extents would potentially allow a racing > direct I/O read to find unwritten extents before being converted > correctly. > > The inode extension/truncate code now resides within a new helper > ext4_handle_inode_extension(). This function has been designed so that > it can accommodate for both DAX and direct I/O extension/truncate > operations. > > Signed-off-by: Matthew Bobrowski <mbobrowski@mbobrowski.org> > Reviewed-by: Jan Kara <jack@suse.cz> > Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com> > --- > fs/ext4/file.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++- > fs/ext4/inode.c | 48 +------------------------- > 2 files changed, 89 insertions(+), 48 deletions(-) > > diff --git a/fs/ext4/file.c b/fs/ext4/file.c > index 440f4c6ba4ee..ec54fec96a81 100644 > --- a/fs/ext4/file.c > +++ b/fs/ext4/file.c > @@ -33,6 +33,7 @@ > #include "ext4_jbd2.h" > #include "xattr.h" > #include "acl.h" > +#include "truncate.h" > > static bool ext4_dio_supported(struct inode *inode) > { > @@ -234,12 +235,95 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) > return iov_iter_count(from); > } > > +static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, > + ssize_t written, size_t count) > +{ > + handle_t *handle; > + bool truncate = false; > + u8 blkbits = inode->i_blkbits; > + ext4_lblk_t written_blk, end_blk; > + > + /* > + * Note that EXT4_I(inode)->i_disksize can get extended up to > + * inode->i_size while the I/O was running due to writeback of delalloc > + * blocks. But, the code in ext4_iomap_alloc() is careful to use > + * zeroed/unwritten extents if this is possible; thus we won't leave > + * uninitialized blocks in a file even if we didn't succeed in writing > + * as much as we intended. > + */ > + WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize); > + if (offset + count <= EXT4_I(inode)->i_disksize) { > + /* > + * We need to ensure that the inode is removed from the orphan > + * list if it has been added prematurely, due to writeback of > + * delalloc blocks. > + */ > + if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { > + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); > + > + if (IS_ERR(handle)) { > + ext4_orphan_del(NULL, inode); > + return PTR_ERR(handle); > + } > + > + ext4_orphan_del(handle, inode); > + ext4_journal_stop(handle); I keep seeing this chunk (and the ext4_orphan_add chunk) bouncing around through this patchset, which causes me to wonder -- would it be useful to refactor these into small helpers? Or is it really just the same two orphan_add/del chunks bouncing around multiple places? --D > + } > + > + return written; > + } > + > + if (written < 0) > + goto truncate; > + > + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); > + if (IS_ERR(handle)) { > + written = PTR_ERR(handle); > + goto truncate; > + } > + > + if (ext4_update_inode_size(inode, offset + written)) > + ext4_mark_inode_dirty(handle, inode); > + > + /* > + * We may need to truncate allocated but not written blocks beyond EOF. > + */ > + written_blk = ALIGN(offset + written, 1 << blkbits); > + end_blk = ALIGN(offset + count, 1 << blkbits); > + if (written_blk < end_blk && ext4_can_truncate(inode)) > + truncate = true; > + > + /* > + * Remove the inode from the orphan list if it has been extended and > + * everything went OK. > + */ > + if (!truncate && inode->i_nlink) > + ext4_orphan_del(handle, inode); > + ext4_journal_stop(handle); > + > + if (truncate) { > +truncate: > + ext4_truncate_failed_write(inode); > + /* > + * If the truncate operation failed early, then the inode may > + * still be on the orphan list. In that case, we need to try > + * remove the inode from the in-memory linked list. > + */ > + if (inode->i_nlink) > + ext4_orphan_del(NULL, inode); > + } > + > + return written; > +} > + > #ifdef CONFIG_FS_DAX > static ssize_t > ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) > { > - struct inode *inode = file_inode(iocb->ki_filp); > ssize_t ret; > + size_t count; > + loff_t offset; > + struct inode *inode = file_inode(iocb->ki_filp); > > if (!inode_trylock(inode)) { > if (iocb->ki_flags & IOCB_NOWAIT) > @@ -256,7 +340,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) > if (ret) > goto out; > > + offset = iocb->ki_pos; > + count = iov_iter_count(from); > ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); > + ret = ext4_handle_inode_extension(inode, offset, ret, count); > out: > inode_unlock(inode); > if (ret > 0) > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 9bd80df6b856..071a1f976aab 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -3583,53 +3583,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, > static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, > ssize_t written, unsigned flags, struct iomap *iomap) > { > - int ret = 0; > - handle_t *handle; > - int blkbits = inode->i_blkbits; > - bool truncate = false; > - > - if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) > - return 0; > - > - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); > - if (IS_ERR(handle)) { > - ret = PTR_ERR(handle); > - goto orphan_del; > - } > - if (ext4_update_inode_size(inode, offset + written)) > - ext4_mark_inode_dirty(handle, inode); > - /* > - * We may need to truncate allocated but not written blocks beyond EOF. > - */ > - if (iomap->offset + iomap->length > > - ALIGN(inode->i_size, 1 << blkbits)) { > - ext4_lblk_t written_blk, end_blk; > - > - written_blk = (offset + written) >> blkbits; > - end_blk = (offset + length) >> blkbits; > - if (written_blk < end_blk && ext4_can_truncate(inode)) > - truncate = true; > - } > - /* > - * Remove inode from orphan list if we were extending a inode and > - * everything went fine. > - */ > - if (!truncate && inode->i_nlink && > - !list_empty(&EXT4_I(inode)->i_orphan)) > - ext4_orphan_del(handle, inode); > - ext4_journal_stop(handle); > - if (truncate) { > - ext4_truncate_failed_write(inode); > -orphan_del: > - /* > - * If truncate failed early the inode might still be on the > - * orphan list; we need to make sure the inode is removed from > - * the orphan list in that case. > - */ > - if (inode->i_nlink) > - ext4_orphan_del(NULL, inode); > - } > - return ret; > + return 0; > } > > const struct iomap_ops ext4_iomap_ops = { > -- > 2.20.1 >
On Tue, Nov 05, 2019 at 07:49:50AM -0800, Darrick J. Wong wrote: > On Tue, Nov 05, 2019 at 11:01:51PM +1100, Matthew Bobrowski wrote: > > In preparation for implementing the iomap direct I/O modifications, > > the inode extension/truncate code needs to be moved out from the > > ext4_iomap_end() callback. For direct I/O, if the current code > > remained, it would behave incorrrectly. Updating the inode size prior > > to converting unwritten extents would potentially allow a racing > > direct I/O read to find unwritten extents before being converted > > correctly. > > > > The inode extension/truncate code now resides within a new helper > > ext4_handle_inode_extension(). This function has been designed so that > > it can accommodate for both DAX and direct I/O extension/truncate > > operations. > > > > Signed-off-by: Matthew Bobrowski <mbobrowski@mbobrowski.org> > > Reviewed-by: Jan Kara <jack@suse.cz> > > Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com> > > --- > > fs/ext4/file.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++- > > fs/ext4/inode.c | 48 +------------------------- > > 2 files changed, 89 insertions(+), 48 deletions(-) > > > > diff --git a/fs/ext4/file.c b/fs/ext4/file.c > > index 440f4c6ba4ee..ec54fec96a81 100644 > > --- a/fs/ext4/file.c > > +++ b/fs/ext4/file.c > > @@ -33,6 +33,7 @@ > > #include "ext4_jbd2.h" > > #include "xattr.h" > > #include "acl.h" > > +#include "truncate.h" > > > > static bool ext4_dio_supported(struct inode *inode) > > { > > @@ -234,12 +235,95 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) > > return iov_iter_count(from); > > } > > > > +static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, > > + ssize_t written, size_t count) > > +{ > > + handle_t *handle; > > + bool truncate = false; > > + u8 blkbits = inode->i_blkbits; > > + ext4_lblk_t written_blk, end_blk; > > + > > + /* > > + * Note that EXT4_I(inode)->i_disksize can get extended up to > > + * inode->i_size while the I/O was running due to writeback of delalloc > > + * blocks. But, the code in ext4_iomap_alloc() is careful to use > > + * zeroed/unwritten extents if this is possible; thus we won't leave > > + * uninitialized blocks in a file even if we didn't succeed in writing > > + * as much as we intended. > > + */ > > + WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize); > > + if (offset + count <= EXT4_I(inode)->i_disksize) { > > + /* > > + * We need to ensure that the inode is removed from the orphan > > + * list if it has been added prematurely, due to writeback of > > + * delalloc blocks. > > + */ > > + if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { > > + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); > > + > > + if (IS_ERR(handle)) { > > + ext4_orphan_del(NULL, inode); > > + return PTR_ERR(handle); > > + } > > + > > + ext4_orphan_del(handle, inode); > > + ext4_journal_stop(handle); > > I keep seeing this chunk (and the ext4_orphan_add chunk) bouncing around > through this patchset, which causes me to wonder -- would it be useful > to refactor these into small helpers? Or is it really just the same two > orphan_add/del chunks bouncing around multiple places? No, you're right. This and the other pattern is sprayed throughout the patchset, but also possibly throughout some of the other chunks of EXT4 code (I think), which I haven't touched here. So, my thought process was to actually introduce a small separate cleanup patchset that does exactly that i.e. moves out these duplicate chunks orphan_add/orphan_del into small helpers. /M
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 440f4c6ba4ee..ec54fec96a81 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -33,6 +33,7 @@ #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" +#include "truncate.h" static bool ext4_dio_supported(struct inode *inode) { @@ -234,12 +235,95 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) return iov_iter_count(from); } +static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, + ssize_t written, size_t count) +{ + handle_t *handle; + bool truncate = false; + u8 blkbits = inode->i_blkbits; + ext4_lblk_t written_blk, end_blk; + + /* + * Note that EXT4_I(inode)->i_disksize can get extended up to + * inode->i_size while the I/O was running due to writeback of delalloc + * blocks. But, the code in ext4_iomap_alloc() is careful to use + * zeroed/unwritten extents if this is possible; thus we won't leave + * uninitialized blocks in a file even if we didn't succeed in writing + * as much as we intended. + */ + WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize); + if (offset + count <= EXT4_I(inode)->i_disksize) { + /* + * We need to ensure that the inode is removed from the orphan + * list if it has been added prematurely, due to writeback of + * delalloc blocks. + */ + if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); + + if (IS_ERR(handle)) { + ext4_orphan_del(NULL, inode); + return PTR_ERR(handle); + } + + ext4_orphan_del(handle, inode); + ext4_journal_stop(handle); + } + + return written; + } + + if (written < 0) + goto truncate; + + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); + if (IS_ERR(handle)) { + written = PTR_ERR(handle); + goto truncate; + } + + if (ext4_update_inode_size(inode, offset + written)) + ext4_mark_inode_dirty(handle, inode); + + /* + * We may need to truncate allocated but not written blocks beyond EOF. + */ + written_blk = ALIGN(offset + written, 1 << blkbits); + end_blk = ALIGN(offset + count, 1 << blkbits); + if (written_blk < end_blk && ext4_can_truncate(inode)) + truncate = true; + + /* + * Remove the inode from the orphan list if it has been extended and + * everything went OK. + */ + if (!truncate && inode->i_nlink) + ext4_orphan_del(handle, inode); + ext4_journal_stop(handle); + + if (truncate) { +truncate: + ext4_truncate_failed_write(inode); + /* + * If the truncate operation failed early, then the inode may + * still be on the orphan list. In that case, we need to try + * remove the inode from the in-memory linked list. + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + } + + return written; +} + #ifdef CONFIG_FS_DAX static ssize_t ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) { - struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; + size_t count; + loff_t offset; + struct inode *inode = file_inode(iocb->ki_filp); if (!inode_trylock(inode)) { if (iocb->ki_flags & IOCB_NOWAIT) @@ -256,7 +340,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret) goto out; + offset = iocb->ki_pos; + count = iov_iter_count(from); ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); + ret = ext4_handle_inode_extension(inode, offset, ret, count); out: inode_unlock(inode); if (ret > 0) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9bd80df6b856..071a1f976aab 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3583,53 +3583,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, ssize_t written, unsigned flags, struct iomap *iomap) { - int ret = 0; - handle_t *handle; - int blkbits = inode->i_blkbits; - bool truncate = false; - - if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) - return 0; - - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto orphan_del; - } - if (ext4_update_inode_size(inode, offset + written)) - ext4_mark_inode_dirty(handle, inode); - /* - * We may need to truncate allocated but not written blocks beyond EOF. - */ - if (iomap->offset + iomap->length > - ALIGN(inode->i_size, 1 << blkbits)) { - ext4_lblk_t written_blk, end_blk; - - written_blk = (offset + written) >> blkbits; - end_blk = (offset + length) >> blkbits; - if (written_blk < end_blk && ext4_can_truncate(inode)) - truncate = true; - } - /* - * Remove inode from orphan list if we were extending a inode and - * everything went fine. - */ - if (!truncate && inode->i_nlink && - !list_empty(&EXT4_I(inode)->i_orphan)) - ext4_orphan_del(handle, inode); - ext4_journal_stop(handle); - if (truncate) { - ext4_truncate_failed_write(inode); -orphan_del: - /* - * If truncate failed early the inode might still be on the - * orphan list; we need to make sure the inode is removed from - * the orphan list in that case. - */ - if (inode->i_nlink) - ext4_orphan_del(NULL, inode); - } - return ret; + return 0; } const struct iomap_ops ext4_iomap_ops = {