diff mbox series

[v2,12/14] fs: xfs: Support atomic write for statx

Message ID 20240304130428.13026-13-john.g.garry@oracle.com (mailing list archive)
State New, archived
Headers show
Series block atomic writes for XFS | expand

Commit Message

John Garry March 4, 2024, 1:04 p.m. UTC
Support providing info on atomic write unit min and max for an inode.

For simplicity, currently we limit the min at the FS block size, but a
lower limit could be supported in future. This is required by iomap
DIO.

The atomic write unit min and max is limited by the guaranteed extent
alignment for the inode.

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 fs/xfs/xfs_iops.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

Comments

Dave Chinner March 6, 2024, 9:31 p.m. UTC | #1
On Mon, Mar 04, 2024 at 01:04:26PM +0000, John Garry wrote:
> Support providing info on atomic write unit min and max for an inode.
> 
> For simplicity, currently we limit the min at the FS block size, but a
> lower limit could be supported in future. This is required by iomap
> DIO.
> 
> The atomic write unit min and max is limited by the guaranteed extent
> alignment for the inode.
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  fs/xfs/xfs_iops.c | 38 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 38 insertions(+)
> 
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index a0d77f5f512e..6316448083d2 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -546,6 +546,37 @@ xfs_stat_blksize(
>  	return PAGE_SIZE;
>  }
>  
> +static void
> +xfs_get_atomic_write_attr(
> +	struct xfs_inode	*ip,
> +	unsigned int		*unit_min,
> +	unsigned int		*unit_max)
> +{
> +	xfs_extlen_t		extsz = xfs_get_extsz(ip);
> +	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
> +	struct block_device	*bdev = target->bt_bdev;
> +	struct request_queue	*q = bdev->bd_queue;
> +	struct xfs_mount	*mp = ip->i_mount;
> +	struct xfs_sb		*sbp = &mp->m_sb;
> +	unsigned int		awu_min, awu_max;
> +	unsigned int		extsz_bytes = XFS_FSB_TO_B(mp, extsz);
> +
> +	awu_min = queue_atomic_write_unit_min_bytes(q);
> +	awu_max = queue_atomic_write_unit_max_bytes(q);

We really should be storing these in the xfs_buftarg at mount time,
like we do logical and physical sector sizes. Similar to sector
sizes, they *must not change* once the filesystem has been created
on the device, let alone during an active mount. The whole point of
the xfs_buftarg is to store the information the filesystem
needs to do IO to the underlying block device so we don't have to
chase pointers deep into the block device whenever we need to use
static geometry information.....

> +	if (sbp->sb_blocksize > awu_max || awu_min > sbp->sb_blocksize ||
> +	    !xfs_inode_atomicwrites(ip)) {
> +		*unit_min = 0;
> +		*unit_max = 0;
> +		return;
> +	}

Again, this is comparing static geometry - if the block size doesn't
allow atomic writes, then the inode flag should never be set. i.e.
geometry is checked when configuring atomic writes, not in every
place we need to check if atomic writes are supported. Hence this
should simply be:

	if (!xfs_inode_has_atomic_writes(ip)) {
		*unit_min = 0;
		*unit_max = 0;
		return;
	}

before we even look at the xfs_buftarg to get the supported min/max
values for the given device.

Cheers,

Dave.
John Garry March 7, 2024, 10:35 a.m. UTC | #2
On 06/03/2024 21:31, Dave Chinner wrote:
>> +	xfs_extlen_t		extsz = xfs_get_extsz(ip);
>> +	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
>> +	struct block_device	*bdev = target->bt_bdev;
>> +	struct request_queue	*q = bdev->bd_queue;
>> +	struct xfs_mount	*mp = ip->i_mount;
>> +	struct xfs_sb		*sbp = &mp->m_sb;
>> +	unsigned int		awu_min, awu_max;
>> +	unsigned int		extsz_bytes = XFS_FSB_TO_B(mp, extsz);
>> +
>> +	awu_min = queue_atomic_write_unit_min_bytes(q);
>> +	awu_max = queue_atomic_write_unit_max_bytes(q);
> We really should be storing these in the xfs_buftarg at mount time,
> like we do logical and physical sector sizes. 

This has been mentioned previously, and Darrick thought that it was not 
safe. Please see first response in:
https://lore.kernel.org/linux-xfs/20231003161029.GG21298@frogsfrogsfrogs/#t

So if this really is true, then I'll stick with something like what I 
have here and add a comment on that.

However, in this series the block layer does check for out-of-range 
atomic write BIOs in 1/14. So we could store the values in xfs_buftarg, 
as you suggest for the lookup here. If the bdev geometry does really 
change beneath us, worse thing that happens is that we may report 
incorrect values for statx.

> Similar to sector
> sizes, they*must not change*  once the filesystem has been created
> on the device, let alone during an active mount. The whole point of
> the xfs_buftarg is to store the information the filesystem
> needs to do IO to the underlying block device so we don't have to
> chase pointers deep into the block device whenever we need to use
> static geometry information.....
> 
>> +	if (sbp->sb_blocksize > awu_max || awu_min > sbp->sb_blocksize ||
>> +	    !xfs_inode_atomicwrites(ip)) {
>> +		*unit_min = 0;
>> +		*unit_max = 0;
>> +		return;
>> +	}
> Again, this is comparing static geometry - if the block size doesn't
> allow atomic writes, then the inode flag should never be set. i.e.
> geometry is checked when configuring atomic writes, not in every
> place we need to check if atomic writes are supported. Hence this
> should simply be:
> 
> 	if (!xfs_inode_has_atomic_writes(ip)) {
> 		*unit_min = 0;
> 		*unit_max = 0;
> 		return;
> 	} >
> before we even look at the xfs_buftarg to get the supported min/max
> values for the given device.

Thanks,
John
diff mbox series

Patch

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index a0d77f5f512e..6316448083d2 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -546,6 +546,37 @@  xfs_stat_blksize(
 	return PAGE_SIZE;
 }
 
+static void
+xfs_get_atomic_write_attr(
+	struct xfs_inode	*ip,
+	unsigned int		*unit_min,
+	unsigned int		*unit_max)
+{
+	xfs_extlen_t		extsz = xfs_get_extsz(ip);
+	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
+	struct block_device	*bdev = target->bt_bdev;
+	struct request_queue	*q = bdev->bd_queue;
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_sb		*sbp = &mp->m_sb;
+	unsigned int		awu_min, awu_max;
+	unsigned int		extsz_bytes = XFS_FSB_TO_B(mp, extsz);
+
+	awu_min = queue_atomic_write_unit_min_bytes(q);
+	awu_max = queue_atomic_write_unit_max_bytes(q);
+
+	if (sbp->sb_blocksize > awu_max || awu_min > sbp->sb_blocksize ||
+	    !xfs_inode_atomicwrites(ip)) {
+		*unit_min = 0;
+		*unit_max = 0;
+		return;
+	}
+
+	/* Floor at FS block size */
+	*unit_min = max(sbp->sb_blocksize, awu_min);
+
+	*unit_max = min(extsz_bytes, awu_max);
+}
+
 STATIC int
 xfs_vn_getattr(
 	struct mnt_idmap	*idmap,
@@ -619,6 +650,13 @@  xfs_vn_getattr(
 			stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
 			stat->dio_offset_align = bdev_logical_block_size(bdev);
 		}
+		if (request_mask & STATX_WRITE_ATOMIC) {
+			unsigned int unit_min, unit_max;
+
+			xfs_get_atomic_write_attr(ip, &unit_min, &unit_max);
+			generic_fill_statx_atomic_writes(stat,
+				unit_min, unit_max);
+		}
 		fallthrough;
 	default:
 		stat->blksize = xfs_stat_blksize(ip);