[v7,08/11] ext4: move inode extension/truncate code out from ->iomap_end() callback
diff mbox series

Message ID d41ffa26e20b15b12895812c3cad7c91a6a59bc6.1572949325.git.mbobrowski@mbobrowski.org
State New
Headers show
Series
  • ext4: port direct I/O to iomap infrastructure
Related show

Commit Message

Matthew Bobrowski Nov. 5, 2019, 12:01 p.m. UTC
In preparation for implementing the iomap direct I/O modifications,
the inode extension/truncate code needs to be moved out from the
ext4_iomap_end() callback. For direct I/O, if the current code
remained, it would behave incorrrectly. Updating the inode size prior
to converting unwritten extents would potentially allow a racing
direct I/O read to find unwritten extents before being converted
correctly.

The inode extension/truncate code now resides within a new helper
ext4_handle_inode_extension(). This function has been designed so that
it can accommodate for both DAX and direct I/O extension/truncate
operations.

Signed-off-by: Matthew Bobrowski <mbobrowski@mbobrowski.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
---
 fs/ext4/file.c  | 89 ++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ext4/inode.c | 48 +-------------------------
 2 files changed, 89 insertions(+), 48 deletions(-)

Comments

Darrick J. Wong Nov. 5, 2019, 3:49 p.m. UTC | #1
On Tue, Nov 05, 2019 at 11:01:51PM +1100, Matthew Bobrowski wrote:
> In preparation for implementing the iomap direct I/O modifications,
> the inode extension/truncate code needs to be moved out from the
> ext4_iomap_end() callback. For direct I/O, if the current code
> remained, it would behave incorrrectly. Updating the inode size prior
> to converting unwritten extents would potentially allow a racing
> direct I/O read to find unwritten extents before being converted
> correctly.
> 
> The inode extension/truncate code now resides within a new helper
> ext4_handle_inode_extension(). This function has been designed so that
> it can accommodate for both DAX and direct I/O extension/truncate
> operations.
> 
> Signed-off-by: Matthew Bobrowski <mbobrowski@mbobrowski.org>
> Reviewed-by: Jan Kara <jack@suse.cz>
> Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
> ---
>  fs/ext4/file.c  | 89 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  fs/ext4/inode.c | 48 +-------------------------
>  2 files changed, 89 insertions(+), 48 deletions(-)
> 
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index 440f4c6ba4ee..ec54fec96a81 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -33,6 +33,7 @@
>  #include "ext4_jbd2.h"
>  #include "xattr.h"
>  #include "acl.h"
> +#include "truncate.h"
>  
>  static bool ext4_dio_supported(struct inode *inode)
>  {
> @@ -234,12 +235,95 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
>  	return iov_iter_count(from);
>  }
>  
> +static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
> +					   ssize_t written, size_t count)
> +{
> +	handle_t *handle;
> +	bool truncate = false;
> +	u8 blkbits = inode->i_blkbits;
> +	ext4_lblk_t written_blk, end_blk;
> +
> +	/*
> +	 * Note that EXT4_I(inode)->i_disksize can get extended up to
> +	 * inode->i_size while the I/O was running due to writeback of delalloc
> +	 * blocks. But, the code in ext4_iomap_alloc() is careful to use
> +	 * zeroed/unwritten extents if this is possible; thus we won't leave
> +	 * uninitialized blocks in a file even if we didn't succeed in writing
> +	 * as much as we intended.
> +	 */
> +	WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
> +	if (offset + count <= EXT4_I(inode)->i_disksize) {
> +		/*
> +		 * We need to ensure that the inode is removed from the orphan
> +		 * list if it has been added prematurely, due to writeback of
> +		 * delalloc blocks.
> +		 */
> +		if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
> +			handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
> +
> +			if (IS_ERR(handle)) {
> +				ext4_orphan_del(NULL, inode);
> +				return PTR_ERR(handle);
> +			}
> +
> +			ext4_orphan_del(handle, inode);
> +			ext4_journal_stop(handle);

I keep seeing this chunk (and the ext4_orphan_add chunk) bouncing around
through this patchset, which causes me to wonder -- would it be useful
to refactor these into small helpers?  Or is it really just the same two
orphan_add/del chunks bouncing around multiple places?

--D

> +		}
> +
> +		return written;
> +	}
> +
> +	if (written < 0)
> +		goto truncate;
> +
> +	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
> +	if (IS_ERR(handle)) {
> +		written = PTR_ERR(handle);
> +		goto truncate;
> +	}
> +
> +	if (ext4_update_inode_size(inode, offset + written))
> +		ext4_mark_inode_dirty(handle, inode);
> +
> +	/*
> +	 * We may need to truncate allocated but not written blocks beyond EOF.
> +	 */
> +	written_blk = ALIGN(offset + written, 1 << blkbits);
> +	end_blk = ALIGN(offset + count, 1 << blkbits);
> +	if (written_blk < end_blk && ext4_can_truncate(inode))
> +		truncate = true;
> +
> +	/*
> +	 * Remove the inode from the orphan list if it has been extended and
> +	 * everything went OK.
> +	 */
> +	if (!truncate && inode->i_nlink)
> +		ext4_orphan_del(handle, inode);
> +	ext4_journal_stop(handle);
> +
> +	if (truncate) {
> +truncate:
> +		ext4_truncate_failed_write(inode);
> +		/*
> +		 * If the truncate operation failed early, then the inode may
> +		 * still be on the orphan list. In that case, we need to try
> +		 * remove the inode from the in-memory linked list.
> +		 */
> +		if (inode->i_nlink)
> +			ext4_orphan_del(NULL, inode);
> +	}
> +
> +	return written;
> +}
> +
>  #ifdef CONFIG_FS_DAX
>  static ssize_t
>  ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  {
> -	struct inode *inode = file_inode(iocb->ki_filp);
>  	ssize_t ret;
> +	size_t count;
> +	loff_t offset;
> +	struct inode *inode = file_inode(iocb->ki_filp);
>  
>  	if (!inode_trylock(inode)) {
>  		if (iocb->ki_flags & IOCB_NOWAIT)
> @@ -256,7 +340,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  	if (ret)
>  		goto out;
>  
> +	offset = iocb->ki_pos;
> +	count = iov_iter_count(from);
>  	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
> +	ret = ext4_handle_inode_extension(inode, offset, ret, count);
>  out:
>  	inode_unlock(inode);
>  	if (ret > 0)
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 9bd80df6b856..071a1f976aab 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3583,53 +3583,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>  static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
>  			  ssize_t written, unsigned flags, struct iomap *iomap)
>  {
> -	int ret = 0;
> -	handle_t *handle;
> -	int blkbits = inode->i_blkbits;
> -	bool truncate = false;
> -
> -	if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
> -		return 0;
> -
> -	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
> -	if (IS_ERR(handle)) {
> -		ret = PTR_ERR(handle);
> -		goto orphan_del;
> -	}
> -	if (ext4_update_inode_size(inode, offset + written))
> -		ext4_mark_inode_dirty(handle, inode);
> -	/*
> -	 * We may need to truncate allocated but not written blocks beyond EOF.
> -	 */
> -	if (iomap->offset + iomap->length > 
> -	    ALIGN(inode->i_size, 1 << blkbits)) {
> -		ext4_lblk_t written_blk, end_blk;
> -
> -		written_blk = (offset + written) >> blkbits;
> -		end_blk = (offset + length) >> blkbits;
> -		if (written_blk < end_blk && ext4_can_truncate(inode))
> -			truncate = true;
> -	}
> -	/*
> -	 * Remove inode from orphan list if we were extending a inode and
> -	 * everything went fine.
> -	 */
> -	if (!truncate && inode->i_nlink &&
> -	    !list_empty(&EXT4_I(inode)->i_orphan))
> -		ext4_orphan_del(handle, inode);
> -	ext4_journal_stop(handle);
> -	if (truncate) {
> -		ext4_truncate_failed_write(inode);
> -orphan_del:
> -		/*
> -		 * If truncate failed early the inode might still be on the
> -		 * orphan list; we need to make sure the inode is removed from
> -		 * the orphan list in that case.
> -		 */
> -		if (inode->i_nlink)
> -			ext4_orphan_del(NULL, inode);
> -	}
> -	return ret;
> +	return 0;
>  }
>  
>  const struct iomap_ops ext4_iomap_ops = {
> -- 
> 2.20.1
>
Matthew Bobrowski Nov. 5, 2019, 9:10 p.m. UTC | #2
On Tue, Nov 05, 2019 at 07:49:50AM -0800, Darrick J. Wong wrote:
> On Tue, Nov 05, 2019 at 11:01:51PM +1100, Matthew Bobrowski wrote:
> > In preparation for implementing the iomap direct I/O modifications,
> > the inode extension/truncate code needs to be moved out from the
> > ext4_iomap_end() callback. For direct I/O, if the current code
> > remained, it would behave incorrrectly. Updating the inode size prior
> > to converting unwritten extents would potentially allow a racing
> > direct I/O read to find unwritten extents before being converted
> > correctly.
> > 
> > The inode extension/truncate code now resides within a new helper
> > ext4_handle_inode_extension(). This function has been designed so that
> > it can accommodate for both DAX and direct I/O extension/truncate
> > operations.
> > 
> > Signed-off-by: Matthew Bobrowski <mbobrowski@mbobrowski.org>
> > Reviewed-by: Jan Kara <jack@suse.cz>
> > Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
> > ---
> >  fs/ext4/file.c  | 89 ++++++++++++++++++++++++++++++++++++++++++++++++-
> >  fs/ext4/inode.c | 48 +-------------------------
> >  2 files changed, 89 insertions(+), 48 deletions(-)
> > 
> > diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> > index 440f4c6ba4ee..ec54fec96a81 100644
> > --- a/fs/ext4/file.c
> > +++ b/fs/ext4/file.c
> > @@ -33,6 +33,7 @@
> >  #include "ext4_jbd2.h"
> >  #include "xattr.h"
> >  #include "acl.h"
> > +#include "truncate.h"
> >  
> >  static bool ext4_dio_supported(struct inode *inode)
> >  {
> > @@ -234,12 +235,95 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
> >  	return iov_iter_count(from);
> >  }
> >  
> > +static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
> > +					   ssize_t written, size_t count)
> > +{
> > +	handle_t *handle;
> > +	bool truncate = false;
> > +	u8 blkbits = inode->i_blkbits;
> > +	ext4_lblk_t written_blk, end_blk;
> > +
> > +	/*
> > +	 * Note that EXT4_I(inode)->i_disksize can get extended up to
> > +	 * inode->i_size while the I/O was running due to writeback of delalloc
> > +	 * blocks. But, the code in ext4_iomap_alloc() is careful to use
> > +	 * zeroed/unwritten extents if this is possible; thus we won't leave
> > +	 * uninitialized blocks in a file even if we didn't succeed in writing
> > +	 * as much as we intended.
> > +	 */
> > +	WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
> > +	if (offset + count <= EXT4_I(inode)->i_disksize) {
> > +		/*
> > +		 * We need to ensure that the inode is removed from the orphan
> > +		 * list if it has been added prematurely, due to writeback of
> > +		 * delalloc blocks.
> > +		 */
> > +		if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
> > +			handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
> > +
> > +			if (IS_ERR(handle)) {
> > +				ext4_orphan_del(NULL, inode);
> > +				return PTR_ERR(handle);
> > +			}
> > +
> > +			ext4_orphan_del(handle, inode);
> > +			ext4_journal_stop(handle);
> 
> I keep seeing this chunk (and the ext4_orphan_add chunk) bouncing around
> through this patchset, which causes me to wonder -- would it be useful
> to refactor these into small helpers?  Or is it really just the same two
> orphan_add/del chunks bouncing around multiple places?

No, you're right. This and the other pattern is sprayed throughout the
patchset, but also possibly throughout some of the other chunks of
EXT4 code (I think), which I haven't touched here. So, my thought
process was to actually introduce a small separate cleanup patchset
that does exactly that i.e. moves out these duplicate chunks
orphan_add/orphan_del into small helpers.

/M

Patch
diff mbox series

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 440f4c6ba4ee..ec54fec96a81 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -33,6 +33,7 @@ 
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "truncate.h"
 
 static bool ext4_dio_supported(struct inode *inode)
 {
@@ -234,12 +235,95 @@  static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
 	return iov_iter_count(from);
 }
 
+static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
+					   ssize_t written, size_t count)
+{
+	handle_t *handle;
+	bool truncate = false;
+	u8 blkbits = inode->i_blkbits;
+	ext4_lblk_t written_blk, end_blk;
+
+	/*
+	 * Note that EXT4_I(inode)->i_disksize can get extended up to
+	 * inode->i_size while the I/O was running due to writeback of delalloc
+	 * blocks. But, the code in ext4_iomap_alloc() is careful to use
+	 * zeroed/unwritten extents if this is possible; thus we won't leave
+	 * uninitialized blocks in a file even if we didn't succeed in writing
+	 * as much as we intended.
+	 */
+	WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
+	if (offset + count <= EXT4_I(inode)->i_disksize) {
+		/*
+		 * We need to ensure that the inode is removed from the orphan
+		 * list if it has been added prematurely, due to writeback of
+		 * delalloc blocks.
+		 */
+		if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
+			handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+
+			if (IS_ERR(handle)) {
+				ext4_orphan_del(NULL, inode);
+				return PTR_ERR(handle);
+			}
+
+			ext4_orphan_del(handle, inode);
+			ext4_journal_stop(handle);
+		}
+
+		return written;
+	}
+
+	if (written < 0)
+		goto truncate;
+
+	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+	if (IS_ERR(handle)) {
+		written = PTR_ERR(handle);
+		goto truncate;
+	}
+
+	if (ext4_update_inode_size(inode, offset + written))
+		ext4_mark_inode_dirty(handle, inode);
+
+	/*
+	 * We may need to truncate allocated but not written blocks beyond EOF.
+	 */
+	written_blk = ALIGN(offset + written, 1 << blkbits);
+	end_blk = ALIGN(offset + count, 1 << blkbits);
+	if (written_blk < end_blk && ext4_can_truncate(inode))
+		truncate = true;
+
+	/*
+	 * Remove the inode from the orphan list if it has been extended and
+	 * everything went OK.
+	 */
+	if (!truncate && inode->i_nlink)
+		ext4_orphan_del(handle, inode);
+	ext4_journal_stop(handle);
+
+	if (truncate) {
+truncate:
+		ext4_truncate_failed_write(inode);
+		/*
+		 * If the truncate operation failed early, then the inode may
+		 * still be on the orphan list. In that case, we need to try
+		 * remove the inode from the in-memory linked list.
+		 */
+		if (inode->i_nlink)
+			ext4_orphan_del(NULL, inode);
+	}
+
+	return written;
+}
+
 #ifdef CONFIG_FS_DAX
 static ssize_t
 ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
-	struct inode *inode = file_inode(iocb->ki_filp);
 	ssize_t ret;
+	size_t count;
+	loff_t offset;
+	struct inode *inode = file_inode(iocb->ki_filp);
 
 	if (!inode_trylock(inode)) {
 		if (iocb->ki_flags & IOCB_NOWAIT)
@@ -256,7 +340,10 @@  ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (ret)
 		goto out;
 
+	offset = iocb->ki_pos;
+	count = iov_iter_count(from);
 	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
+	ret = ext4_handle_inode_extension(inode, offset, ret, count);
 out:
 	inode_unlock(inode);
 	if (ret > 0)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9bd80df6b856..071a1f976aab 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3583,53 +3583,7 @@  static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
 			  ssize_t written, unsigned flags, struct iomap *iomap)
 {
-	int ret = 0;
-	handle_t *handle;
-	int blkbits = inode->i_blkbits;
-	bool truncate = false;
-
-	if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
-		return 0;
-
-	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		goto orphan_del;
-	}
-	if (ext4_update_inode_size(inode, offset + written))
-		ext4_mark_inode_dirty(handle, inode);
-	/*
-	 * We may need to truncate allocated but not written blocks beyond EOF.
-	 */
-	if (iomap->offset + iomap->length > 
-	    ALIGN(inode->i_size, 1 << blkbits)) {
-		ext4_lblk_t written_blk, end_blk;
-
-		written_blk = (offset + written) >> blkbits;
-		end_blk = (offset + length) >> blkbits;
-		if (written_blk < end_blk && ext4_can_truncate(inode))
-			truncate = true;
-	}
-	/*
-	 * Remove inode from orphan list if we were extending a inode and
-	 * everything went fine.
-	 */
-	if (!truncate && inode->i_nlink &&
-	    !list_empty(&EXT4_I(inode)->i_orphan))
-		ext4_orphan_del(handle, inode);
-	ext4_journal_stop(handle);
-	if (truncate) {
-		ext4_truncate_failed_write(inode);
-orphan_del:
-		/*
-		 * If truncate failed early the inode might still be on the
-		 * orphan list; we need to make sure the inode is removed from
-		 * the orphan list in that case.
-		 */
-		if (inode->i_nlink)
-			ext4_orphan_del(NULL, inode);
-	}
-	return ret;
+	return 0;
 }
 
 const struct iomap_ops ext4_iomap_ops = {