diff mbox series

[2/3] fs: add STATX_DIO_READ_ALIGN

Message ID 20240828051149.1897291-3-hch@lst.de (mailing list archive)
State New
Headers show
Series [1/3] fs: reformat the statx definition | expand

Commit Message

Christoph Hellwig Aug. 28, 2024, 5:11 a.m. UTC
Add a separate dio read align field, as many out of place write
file systems can easily do reads aligned to the device sector size,
but require bigger alignment for writes.

This is usually papered over by falling back to buffered I/O for smaller
writes and doing read-modify-write cycles, but performance for this
sucks, so applications benefit from knowing the actual write alignment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/stat.c                 | 1 +
 include/linux/stat.h      | 1 +
 include/uapi/linux/stat.h | 4 +++-
 3 files changed, 5 insertions(+), 1 deletion(-)

Comments

Darrick J. Wong Aug. 28, 2024, 4:24 p.m. UTC | #1
On Wed, Aug 28, 2024 at 08:11:02AM +0300, Christoph Hellwig wrote:
> Add a separate dio read align field, as many out of place write
> file systems can easily do reads aligned to the device sector size,
> but require bigger alignment for writes.
> 
> This is usually papered over by falling back to buffered I/O for smaller
> writes and doing read-modify-write cycles, but performance for this
> sucks, so applications benefit from knowing the actual write alignment.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks fine to me...
Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> ---
>  fs/stat.c                 | 1 +
>  include/linux/stat.h      | 1 +
>  include/uapi/linux/stat.h | 4 +++-
>  3 files changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/stat.c b/fs/stat.c
> index 89ce1be563108c..044e7ad9f3abc2 100644
> --- a/fs/stat.c
> +++ b/fs/stat.c
> @@ -700,6 +700,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
>  	tmp.stx_mnt_id = stat->mnt_id;
>  	tmp.stx_dio_mem_align = stat->dio_mem_align;
>  	tmp.stx_dio_offset_align = stat->dio_offset_align;
> +	tmp.stx_dio_read_offset_align = stat->dio_read_offset_align;
>  	tmp.stx_subvol = stat->subvol;
>  	tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
>  	tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
> diff --git a/include/linux/stat.h b/include/linux/stat.h
> index 3d900c86981c5b..9d8382e23a9c69 100644
> --- a/include/linux/stat.h
> +++ b/include/linux/stat.h
> @@ -52,6 +52,7 @@ struct kstat {
>  	u64		mnt_id;
>  	u32		dio_mem_align;
>  	u32		dio_offset_align;
> +	u32		dio_read_offset_align;
>  	u64		change_cookie;
>  	u64		subvol;
>  	u32		atomic_write_unit_min;
> diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
> index 8b35d7d511a287..f78ee3670dd5d7 100644
> --- a/include/uapi/linux/stat.h
> +++ b/include/uapi/linux/stat.h
> @@ -179,7 +179,8 @@ struct statx {
>  	/* Max atomic write segment count */
>  	__u32   stx_atomic_write_segments_max;
>  
> -	__u32   __spare1[1];
> +	/* File offset alignment for direct I/O reads */
> +	__u32	stx_dio_read_offset_align;
>  
>  	/* 0xb8 */
>  	__u64	__spare3[9];	/* Spare space for future expansion */
> @@ -213,6 +214,7 @@ struct statx {
>  #define STATX_MNT_ID_UNIQUE	0x00004000U	/* Want/got extended stx_mount_id */
>  #define STATX_SUBVOL		0x00008000U	/* Want/got stx_subvol */
>  #define STATX_WRITE_ATOMIC	0x00010000U	/* Want/got atomic_write_* fields */
> +#define STATX_DIO_READ_ALIGN	0x00020000U	/* Want/got dio read alignment info */
>  
>  #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
>  
> -- 
> 2.43.0
> 
>
Eric Biggers Aug. 28, 2024, 11:52 p.m. UTC | #2
On Wed, Aug 28, 2024 at 08:11:02AM +0300, Christoph Hellwig wrote:
> Add a separate dio read align field, as many out of place write
> file systems can easily do reads aligned to the device sector size,
> but require bigger alignment for writes.
> 
> This is usually papered over by falling back to buffered I/O for smaller
> writes and doing read-modify-write cycles, but performance for this
> sucks, so applications benefit from knowing the actual write alignment.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/stat.c                 | 1 +
>  include/linux/stat.h      | 1 +
>  include/uapi/linux/stat.h | 4 +++-
>  3 files changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/stat.c b/fs/stat.c
> index 89ce1be563108c..044e7ad9f3abc2 100644
> --- a/fs/stat.c
> +++ b/fs/stat.c
> @@ -700,6 +700,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
>  	tmp.stx_mnt_id = stat->mnt_id;
>  	tmp.stx_dio_mem_align = stat->dio_mem_align;
>  	tmp.stx_dio_offset_align = stat->dio_offset_align;
> +	tmp.stx_dio_read_offset_align = stat->dio_read_offset_align;
>  	tmp.stx_subvol = stat->subvol;
>  	tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
>  	tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
> diff --git a/include/linux/stat.h b/include/linux/stat.h
> index 3d900c86981c5b..9d8382e23a9c69 100644
> --- a/include/linux/stat.h
> +++ b/include/linux/stat.h
> @@ -52,6 +52,7 @@ struct kstat {
>  	u64		mnt_id;
>  	u32		dio_mem_align;
>  	u32		dio_offset_align;
> +	u32		dio_read_offset_align;
>  	u64		change_cookie;
>  	u64		subvol;
>  	u32		atomic_write_unit_min;
> diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
> index 8b35d7d511a287..f78ee3670dd5d7 100644
> --- a/include/uapi/linux/stat.h
> +++ b/include/uapi/linux/stat.h
> @@ -179,7 +179,8 @@ struct statx {
>  	/* Max atomic write segment count */
>  	__u32   stx_atomic_write_segments_max;
>  
> -	__u32   __spare1[1];
> +	/* File offset alignment for direct I/O reads */
> +	__u32	stx_dio_read_offset_align;
>  
>  	/* 0xb8 */
>  	__u64	__spare3[9];	/* Spare space for future expansion */
> @@ -213,6 +214,7 @@ struct statx {
>  #define STATX_MNT_ID_UNIQUE	0x00004000U	/* Want/got extended stx_mount_id */
>  #define STATX_SUBVOL		0x00008000U	/* Want/got stx_subvol */
>  #define STATX_WRITE_ATOMIC	0x00010000U	/* Want/got atomic_write_* fields */
> +#define STATX_DIO_READ_ALIGN	0x00020000U	/* Want/got dio read alignment info */
>  
>  #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */

Thanks.  We maybe should have included read/write separation in STATX_DIOALIGN,
but at the time the only case that was brought up was "DIO reads are supported
but DIO writes are not" which people had argued was not useful.

Is this patch meant to support that case, or just the case where DIO in both
directions is supported but with different alignments?  Is that different file
offset alignments, different memory alignments, or both?  This patch doesn't add
a stx_dio_read_mem_align field, so it's still assumed that both directions share
the existing stx_dio_mem_align property, including whether DIO is supported at
all (0 vs. nonzero).  So as proposed, the only case it helps with is where DIO
in both directions is supported with the same memory alignment but different
file offset alignments.  Maybe that is intended, but it's not clear to me.

Are there specific userspace applications that would like to take advantage of a
smaller value of stx_dio_read_offset_align compared to the existing
stx_dio_offset_align?  Even in the case of a discrepancy between reads and
writes, applications are supposed to be able to just use stx_dio_offset_align
since it has to be the greater of the two alignments.

And as always, new UAPIs need to be accompanied by documentation.  In this case
that's an update to the statx man page.

- Eric
Christoph Hellwig Aug. 29, 2024, 3:44 a.m. UTC | #3
On Wed, Aug 28, 2024 at 11:52:27PM +0000, Eric Biggers wrote:
> Thanks.  We maybe should have included read/write separation in STATX_DIOALIGN,
> but at the time the only case that was brought up was "DIO reads are supported
> but DIO writes are not" which people had argued was not useful.
> 
> Is this patch meant to support that case,

Why would anyone support direct I/O reads but not writes?  That seems
really weird, but maybe I'm missing something important.

> or just the case where DIO in both
> directions is supported but with different alignments?  Is that different file
> offset alignments, different memory alignments, or both?  This patch doesn't add
> a stx_dio_read_mem_align field, so it's still assumed that both directions share
> the existing stx_dio_mem_align property, including whether DIO is supported at
> all (0 vs. nonzero).

Yes.  The memory alignment really is dependent on the underlying storage
hardware DMA engine, which doesn't distinguish between reads and writes.

> So as proposed, the only case it helps with is where DIO
> in both directions is supported with the same memory alignment but different
> file offset alignments.

Yes.

> Maybe that is intended, but it's not clear to me.

Well, that's good feedback to make it more clear.

> Are there specific userspace applications that would like to take advantage of a
> smaller value of stx_dio_read_offset_align compared to the existing
> stx_dio_offset_align?

There are a lot of read-heavy workloads where smaller reads do make
a difference.
diff mbox series

Patch

diff --git a/fs/stat.c b/fs/stat.c
index 89ce1be563108c..044e7ad9f3abc2 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -700,6 +700,7 @@  cp_statx(const struct kstat *stat, struct statx __user *buffer)
 	tmp.stx_mnt_id = stat->mnt_id;
 	tmp.stx_dio_mem_align = stat->dio_mem_align;
 	tmp.stx_dio_offset_align = stat->dio_offset_align;
+	tmp.stx_dio_read_offset_align = stat->dio_read_offset_align;
 	tmp.stx_subvol = stat->subvol;
 	tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
 	tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 3d900c86981c5b..9d8382e23a9c69 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -52,6 +52,7 @@  struct kstat {
 	u64		mnt_id;
 	u32		dio_mem_align;
 	u32		dio_offset_align;
+	u32		dio_read_offset_align;
 	u64		change_cookie;
 	u64		subvol;
 	u32		atomic_write_unit_min;
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 8b35d7d511a287..f78ee3670dd5d7 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -179,7 +179,8 @@  struct statx {
 	/* Max atomic write segment count */
 	__u32   stx_atomic_write_segments_max;
 
-	__u32   __spare1[1];
+	/* File offset alignment for direct I/O reads */
+	__u32	stx_dio_read_offset_align;
 
 	/* 0xb8 */
 	__u64	__spare3[9];	/* Spare space for future expansion */
@@ -213,6 +214,7 @@  struct statx {
 #define STATX_MNT_ID_UNIQUE	0x00004000U	/* Want/got extended stx_mount_id */
 #define STATX_SUBVOL		0x00008000U	/* Want/got stx_subvol */
 #define STATX_WRITE_ATOMIC	0x00010000U	/* Want/got atomic_write_* fields */
+#define STATX_DIO_READ_ALIGN	0x00020000U	/* Want/got dio read alignment info */
 
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */