diff mbox series

[v6,01/12] fs: add atomic write unit max opt to statx

Message ID 20250408104209.1852036-2-john.g.garry@oracle.com (mailing list archive)
State New
Headers show
Series large atomic writes for xfs | expand

Commit Message

John Garry April 8, 2025, 10:41 a.m. UTC
XFS will be able to support large atomic writes (atomic write > 1x block)
in future. This will be achieved by using different operating methods,
depending on the size of the write.

Specifically a new method of operation based in FS atomic extent remapping
will be supported in addition to the current HW offload-based method.

The FS method will generally be appreciably slower performing than the
HW-offload method. However the FS method will be typically able to
contribute to achieving a larger atomic write unit max limit.

XFS will support a hybrid mode, where HW offload method will be used when
possible, i.e. HW offload is used when the length of the write is
supported, and for other times FS-based atomic writes will be used.

As such, there is an atomic write length at which the user may experience
appreciably slower performance.

Advertise this limit in a new statx field, stx_atomic_write_unit_max_opt.

When zero, it means that there is no such performance boundary.

Masks STATX{_ATTR}_WRITE_ATOMIC can be used to get this new field. This is
ok for older kernels which don't support this new field, as they would
report 0 in this field (from zeroing in cp_statx()) already. Furthermore
those older kernels don't support large atomic writes - apart from block
fops, but there would be consistent performance there for atomic writes
in range [unit min, unit max].

Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 block/bdev.c              | 3 ++-
 fs/ext4/inode.c           | 2 +-
 fs/stat.c                 | 6 +++++-
 fs/xfs/xfs_iops.c         | 2 +-
 include/linux/fs.h        | 3 ++-
 include/linux/stat.h      | 1 +
 include/uapi/linux/stat.h | 8 ++++++--
 7 files changed, 18 insertions(+), 7 deletions(-)

Comments

Darrick J. Wong April 9, 2025, 2:23 a.m. UTC | #1
This probably should have cc'd linux-api...

On Tue, Apr 08, 2025 at 10:41:58AM +0000, John Garry wrote:
> XFS will be able to support large atomic writes (atomic write > 1x block)
> in future. This will be achieved by using different operating methods,
> depending on the size of the write.
> 
> Specifically a new method of operation based in FS atomic extent remapping
> will be supported in addition to the current HW offload-based method.
> 
> The FS method will generally be appreciably slower performing than the
> HW-offload method. However the FS method will be typically able to
> contribute to achieving a larger atomic write unit max limit.
> 
> XFS will support a hybrid mode, where HW offload method will be used when
> possible, i.e. HW offload is used when the length of the write is
> supported, and for other times FS-based atomic writes will be used.
> 
> As such, there is an atomic write length at which the user may experience
> appreciably slower performance.
> 
> Advertise this limit in a new statx field, stx_atomic_write_unit_max_opt.
> 
> When zero, it means that there is no such performance boundary.
> 
> Masks STATX{_ATTR}_WRITE_ATOMIC can be used to get this new field. This is
> ok for older kernels which don't support this new field, as they would
> report 0 in this field (from zeroing in cp_statx()) already. Furthermore
> those older kernels don't support large atomic writes - apart from block
> fops, but there would be consistent performance there for atomic writes
> in range [unit min, unit max].
> 
> Signed-off-by: John Garry <john.g.garry@oracle.com>

Seems fine to me, but I imagine others have stronger opinions.
Acked-by: "Darrick J. Wong" <djwong@kernel.org>

--D

> ---
>  block/bdev.c              | 3 ++-
>  fs/ext4/inode.c           | 2 +-
>  fs/stat.c                 | 6 +++++-
>  fs/xfs/xfs_iops.c         | 2 +-
>  include/linux/fs.h        | 3 ++-
>  include/linux/stat.h      | 1 +
>  include/uapi/linux/stat.h | 8 ++++++--
>  7 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/block/bdev.c b/block/bdev.c
> index 4844d1e27b6f..b4afc1763e8e 100644
> --- a/block/bdev.c
> +++ b/block/bdev.c
> @@ -1301,7 +1301,8 @@ void bdev_statx(struct path *path, struct kstat *stat,
>  
>  		generic_fill_statx_atomic_writes(stat,
>  			queue_atomic_write_unit_min_bytes(bd_queue),
> -			queue_atomic_write_unit_max_bytes(bd_queue));
> +			queue_atomic_write_unit_max_bytes(bd_queue),
> +			0);
>  	}
>  
>  	stat->blksize = bdev_io_min(bdev);
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 1dc09ed5d403..51a45699112c 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -5663,7 +5663,7 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
>  			awu_max = sbi->s_awu_max;
>  		}
>  
> -		generic_fill_statx_atomic_writes(stat, awu_min, awu_max);
> +		generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0);
>  	}
>  
>  	flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
> diff --git a/fs/stat.c b/fs/stat.c
> index f13308bfdc98..c41855f62d22 100644
> --- a/fs/stat.c
> +++ b/fs/stat.c
> @@ -136,13 +136,15 @@ EXPORT_SYMBOL(generic_fill_statx_attr);
>   * @stat:	Where to fill in the attribute flags
>   * @unit_min:	Minimum supported atomic write length in bytes
>   * @unit_max:	Maximum supported atomic write length in bytes
> + * @unit_max_opt: Optimised maximum supported atomic write length in bytes
>   *
>   * Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from
>   * atomic write unit_min and unit_max values.
>   */
>  void generic_fill_statx_atomic_writes(struct kstat *stat,
>  				      unsigned int unit_min,
> -				      unsigned int unit_max)
> +				      unsigned int unit_max,
> +				      unsigned int unit_max_opt)
>  {
>  	/* Confirm that the request type is known */
>  	stat->result_mask |= STATX_WRITE_ATOMIC;
> @@ -153,6 +155,7 @@ void generic_fill_statx_atomic_writes(struct kstat *stat,
>  	if (unit_min) {
>  		stat->atomic_write_unit_min = unit_min;
>  		stat->atomic_write_unit_max = unit_max;
> +		stat->atomic_write_unit_max_opt = unit_max_opt;
>  		/* Initially only allow 1x segment */
>  		stat->atomic_write_segments_max = 1;
>  
> @@ -732,6 +735,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
>  	tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
>  	tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
>  	tmp.stx_atomic_write_segments_max = stat->atomic_write_segments_max;
> +	tmp.stx_atomic_write_unit_max_opt = stat->atomic_write_unit_max_opt;
>  
>  	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
>  }
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index 756bd3ca8e00..f0e5d83195df 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -610,7 +610,7 @@ xfs_report_atomic_write(
>  
>  	if (xfs_inode_can_atomicwrite(ip))
>  		unit_min = unit_max = ip->i_mount->m_sb.sb_blocksize;
> -	generic_fill_statx_atomic_writes(stat, unit_min, unit_max);
> +	generic_fill_statx_atomic_writes(stat, unit_min, unit_max, 0);
>  }
>  
>  STATIC int
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 016b0fe1536e..7b19d8f99aff 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -3475,7 +3475,8 @@ void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
>  void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
>  void generic_fill_statx_atomic_writes(struct kstat *stat,
>  				      unsigned int unit_min,
> -				      unsigned int unit_max);
> +				      unsigned int unit_max,
> +				      unsigned int unit_max_opt);
>  extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
>  extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
>  void __inode_add_bytes(struct inode *inode, loff_t bytes);
> diff --git a/include/linux/stat.h b/include/linux/stat.h
> index be7496a6a0dd..e3d00e7bb26d 100644
> --- a/include/linux/stat.h
> +++ b/include/linux/stat.h
> @@ -57,6 +57,7 @@ struct kstat {
>  	u32		dio_read_offset_align;
>  	u32		atomic_write_unit_min;
>  	u32		atomic_write_unit_max;
> +	u32		atomic_write_unit_max_opt;
>  	u32		atomic_write_segments_max;
>  };
>  
> diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
> index f78ee3670dd5..1686861aae20 100644
> --- a/include/uapi/linux/stat.h
> +++ b/include/uapi/linux/stat.h
> @@ -182,8 +182,12 @@ struct statx {
>  	/* File offset alignment for direct I/O reads */
>  	__u32	stx_dio_read_offset_align;
>  
> -	/* 0xb8 */
> -	__u64	__spare3[9];	/* Spare space for future expansion */
> +	/* Optimised max atomic write unit in bytes */
> +	__u32	stx_atomic_write_unit_max_opt;
> +	__u32	__spare2[1];
> +
> +	/* 0xc0 */
> +	__u64	__spare3[8];	/* Spare space for future expansion */
>  
>  	/* 0x100 */
>  };
> -- 
> 2.31.1
> 
>
Christoph Hellwig April 9, 2025, 10:45 a.m. UTC | #2
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
diff mbox series

Patch

diff --git a/block/bdev.c b/block/bdev.c
index 4844d1e27b6f..b4afc1763e8e 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -1301,7 +1301,8 @@  void bdev_statx(struct path *path, struct kstat *stat,
 
 		generic_fill_statx_atomic_writes(stat,
 			queue_atomic_write_unit_min_bytes(bd_queue),
-			queue_atomic_write_unit_max_bytes(bd_queue));
+			queue_atomic_write_unit_max_bytes(bd_queue),
+			0);
 	}
 
 	stat->blksize = bdev_io_min(bdev);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1dc09ed5d403..51a45699112c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5663,7 +5663,7 @@  int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
 			awu_max = sbi->s_awu_max;
 		}
 
-		generic_fill_statx_atomic_writes(stat, awu_min, awu_max);
+		generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0);
 	}
 
 	flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
diff --git a/fs/stat.c b/fs/stat.c
index f13308bfdc98..c41855f62d22 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -136,13 +136,15 @@  EXPORT_SYMBOL(generic_fill_statx_attr);
  * @stat:	Where to fill in the attribute flags
  * @unit_min:	Minimum supported atomic write length in bytes
  * @unit_max:	Maximum supported atomic write length in bytes
+ * @unit_max_opt: Optimised maximum supported atomic write length in bytes
  *
  * Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from
  * atomic write unit_min and unit_max values.
  */
 void generic_fill_statx_atomic_writes(struct kstat *stat,
 				      unsigned int unit_min,
-				      unsigned int unit_max)
+				      unsigned int unit_max,
+				      unsigned int unit_max_opt)
 {
 	/* Confirm that the request type is known */
 	stat->result_mask |= STATX_WRITE_ATOMIC;
@@ -153,6 +155,7 @@  void generic_fill_statx_atomic_writes(struct kstat *stat,
 	if (unit_min) {
 		stat->atomic_write_unit_min = unit_min;
 		stat->atomic_write_unit_max = unit_max;
+		stat->atomic_write_unit_max_opt = unit_max_opt;
 		/* Initially only allow 1x segment */
 		stat->atomic_write_segments_max = 1;
 
@@ -732,6 +735,7 @@  cp_statx(const struct kstat *stat, struct statx __user *buffer)
 	tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
 	tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
 	tmp.stx_atomic_write_segments_max = stat->atomic_write_segments_max;
+	tmp.stx_atomic_write_unit_max_opt = stat->atomic_write_unit_max_opt;
 
 	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
 }
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 756bd3ca8e00..f0e5d83195df 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -610,7 +610,7 @@  xfs_report_atomic_write(
 
 	if (xfs_inode_can_atomicwrite(ip))
 		unit_min = unit_max = ip->i_mount->m_sb.sb_blocksize;
-	generic_fill_statx_atomic_writes(stat, unit_min, unit_max);
+	generic_fill_statx_atomic_writes(stat, unit_min, unit_max, 0);
 }
 
 STATIC int
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 016b0fe1536e..7b19d8f99aff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3475,7 +3475,8 @@  void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
 void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
 void generic_fill_statx_atomic_writes(struct kstat *stat,
 				      unsigned int unit_min,
-				      unsigned int unit_max);
+				      unsigned int unit_max,
+				      unsigned int unit_max_opt);
 extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
 extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
diff --git a/include/linux/stat.h b/include/linux/stat.h
index be7496a6a0dd..e3d00e7bb26d 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -57,6 +57,7 @@  struct kstat {
 	u32		dio_read_offset_align;
 	u32		atomic_write_unit_min;
 	u32		atomic_write_unit_max;
+	u32		atomic_write_unit_max_opt;
 	u32		atomic_write_segments_max;
 };
 
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index f78ee3670dd5..1686861aae20 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -182,8 +182,12 @@  struct statx {
 	/* File offset alignment for direct I/O reads */
 	__u32	stx_dio_read_offset_align;
 
-	/* 0xb8 */
-	__u64	__spare3[9];	/* Spare space for future expansion */
+	/* Optimised max atomic write unit in bytes */
+	__u32	stx_atomic_write_unit_max_opt;
+	__u32	__spare2[1];
+
+	/* 0xc0 */
+	__u64	__spare3[8];	/* Spare space for future expansion */
 
 	/* 0x100 */
 };