diff mbox series

[v2,07/14] fs: iomap: Sub-extent zeroing

Message ID 20240304130428.13026-8-john.g.garry@oracle.com (mailing list archive)
State New
Headers show
Series block atomic writes for XFS | expand

Commit Message

John Garry March 4, 2024, 1:04 p.m. UTC
For FS_XFLAG_FORCEALIGN support, we want to treat any sub-extent IO like
sub-fsblock DIO, in that we will zero the sub-extent when the mapping is
unwritten.

This will be important for atomic writes support, in that atomically
writing over a partially written extent would mean that we would need to
do the unwritten extent conversion write separately, and the write could
no longer be atomic.

It is the task of the FS to set iomap.extent_shift per iter to indicate
sub-extent zeroing required.

Maybe a macro like i_blocksize() should be introduced for extent sizes,
instead of using extent_shift. It would also eliminate excessive use
of xfs_get_extss() for XFS in future.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 fs/iomap/direct-io.c  | 14 ++++++++------
 include/linux/iomap.h |  1 +
 2 files changed, 9 insertions(+), 6 deletions(-)

Comments

Dave Chinner March 6, 2024, 9:14 p.m. UTC | #1
On Mon, Mar 04, 2024 at 01:04:21PM +0000, John Garry wrote:
> For FS_XFLAG_FORCEALIGN support, we want to treat any sub-extent IO like
> sub-fsblock DIO, in that we will zero the sub-extent when the mapping is
> unwritten.
> 
> This will be important for atomic writes support, in that atomically
> writing over a partially written extent would mean that we would need to
> do the unwritten extent conversion write separately, and the write could
> no longer be atomic.
> 
> It is the task of the FS to set iomap.extent_shift per iter to indicate
> sub-extent zeroing required.
> 
> Maybe a macro like i_blocksize() should be introduced for extent sizes,
> instead of using extent_shift. It would also eliminate excessive use
> of xfs_get_extss() for XFS in future.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  fs/iomap/direct-io.c  | 14 ++++++++------
>  include/linux/iomap.h |  1 +
>  2 files changed, 9 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index bcd3f8cf5ea4..733f83f839b6 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -277,7 +277,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
>  {
>  	const struct iomap *iomap = &iter->iomap;
>  	struct inode *inode = iter->inode;
> -	unsigned int fs_block_size = i_blocksize(inode), pad;
> +	unsigned int zeroing_size, pad;
>  	loff_t length = iomap_length(iter);
>  	loff_t pos = iter->pos;
>  	blk_opf_t bio_opf;
> @@ -288,6 +288,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
>  	size_t copied = 0;
>  	size_t orig_count;
>  
> +	zeroing_size = i_blocksize(inode) << iomap->extent_shift;

The iomap interfaces use units of bytes for offsets, sizes, ranges,
etc. Using shifts to define a granularity value seems like a
throwback to decades old XFS code and just a bit weird nowdays.  Can
we just pass this as a byte count? i.e.:

	zeroing_size = i_blocksize(inode);
	if (iomap->extent_size)
		zeroing_size = iomap->extent_size;

Cheers,

Dave.
John Garry March 7, 2024, 11:51 a.m. UTC | #2
On 06/03/2024 21:14, Dave Chinner wrote:
>> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
>> index bcd3f8cf5ea4..733f83f839b6 100644
>> --- a/fs/iomap/direct-io.c
>> +++ b/fs/iomap/direct-io.c
>> @@ -277,7 +277,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
>>   {
>>   	const struct iomap *iomap = &iter->iomap;
>>   	struct inode *inode = iter->inode;
>> -	unsigned int fs_block_size = i_blocksize(inode), pad;
>> +	unsigned int zeroing_size, pad;
>>   	loff_t length = iomap_length(iter);
>>   	loff_t pos = iter->pos;
>>   	blk_opf_t bio_opf;
>> @@ -288,6 +288,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
>>   	size_t copied = 0;
>>   	size_t orig_count;
>>   
>> +	zeroing_size = i_blocksize(inode) << iomap->extent_shift;
> The iomap interfaces use units of bytes for offsets, sizes, ranges,
> etc. Using shifts to define a granularity value seems like a
> throwback to decades old XFS code and just a bit weird nowdays.  Can
> we just pass this as a byte count? i.e.:
> 
> 	zeroing_size = i_blocksize(inode);
> 	if (iomap->extent_size)
> 		zeroing_size = iomap->extent_size;

Fine

I was also thinking of something like i_extentsize() for vfs, which 
would save requiring adding iomap->extent_size, but decided to limit vfs 
changes.

Thanks,
John
diff mbox series

Patch

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index bcd3f8cf5ea4..733f83f839b6 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -277,7 +277,7 @@  static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 {
 	const struct iomap *iomap = &iter->iomap;
 	struct inode *inode = iter->inode;
-	unsigned int fs_block_size = i_blocksize(inode), pad;
+	unsigned int zeroing_size, pad;
 	loff_t length = iomap_length(iter);
 	loff_t pos = iter->pos;
 	blk_opf_t bio_opf;
@@ -288,6 +288,8 @@  static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 	size_t copied = 0;
 	size_t orig_count;
 
+	zeroing_size = i_blocksize(inode) << iomap->extent_shift;
+
 	if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) ||
 	    !bdev_iter_is_aligned(iomap->bdev, dio->submit.iter))
 		return -EINVAL;
@@ -354,8 +356,8 @@  static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 		dio->iocb->ki_flags &= ~IOCB_HIPRI;
 
 	if (need_zeroout) {
-		/* zero out from the start of the block to the write offset */
-		pad = pos & (fs_block_size - 1);
+		/* zero out from the start of the region to the write offset */
+		pad = pos & (zeroing_size - 1);
 		if (pad)
 			iomap_dio_zero(iter, dio, pos - pad, pad);
 	}
@@ -427,10 +429,10 @@  static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 zero_tail:
 	if (need_zeroout ||
 	    ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) {
-		/* zero out from the end of the write to the end of the block */
-		pad = pos & (fs_block_size - 1);
+		/* zero out from the end of the write to the end of the region */
+		pad = pos & (zeroing_size - 1);
 		if (pad)
-			iomap_dio_zero(iter, dio, pos, fs_block_size - pad);
+			iomap_dio_zero(iter, dio, pos, zeroing_size - pad);
 	}
 out:
 	/* Undo iter limitation to current extent */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 96dd0acbba44..89cd3dcbb8ec 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -97,6 +97,7 @@  struct iomap {
 	u64			length;	/* length of mapping, bytes */
 	u16			type;	/* type of mapping */
 	u16			flags;	/* flags for mapping */
+	unsigned int		extent_shift;
 	struct block_device	*bdev;	/* block device for I/O */
 	struct dax_device	*dax_dev; /* dax_dev for dax operations */
 	void			*inline_data;