diff mbox series

[3/7] xfs: remove the m_readio_log field from struct xfs_mount

Message ID 20191025174026.31878-4-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/7] xfs: cleanup stat blksize reporting | expand

Commit Message

Christoph Hellwig Oct. 25, 2019, 5:40 p.m. UTC
The m_readio_log is only used for reporting the blksize (aka preferred
I/O size) in struct stat.  For all cases but a file system that does not
use stripe alignment, but which has the wsync and largeio mount option
set the value is the same as the write I/O size.

Remove the field and report a smaller preferred I/O size for that corner
case, which actually is the right thing to do for that case (except for
the fact that is probably is entirely unused).

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_iops.c  |  2 +-
 fs/xfs/xfs_mount.c | 17 ++++-------------
 fs/xfs/xfs_mount.h | 15 ++++-----------
 fs/xfs/xfs_super.c |  1 -
 4 files changed, 9 insertions(+), 26 deletions(-)

Comments

Eric Sandeen Oct. 25, 2019, 6:26 p.m. UTC | #1
On 10/25/19 12:40 PM, Christoph Hellwig wrote:
> The m_readio_log is only used for reporting the blksize (aka preferred
> I/O size) in struct stat.  For all cases but a file system that does not
> use stripe alignment, but which has the wsync and largeio mount option
> set the value is the same as the write I/O size.
> 
> Remove the field and report a smaller preferred I/O size for that corner
> case, which actually is the right thing to do for that case (except for
> the fact that is probably is entirely unused).

hm, I wonder what the history of the WSYNC_ sizes are, tbh.  So while I can't
speak to the need for a separate READIO_LOG or not, this doesn't seem 
too far fetched...

If Dave remembers something about NFS behavior, he can nak my rvb :)

Reviewed-by: Eric Sandeen <sandeen@redhat.com>
Dave Chinner Oct. 25, 2019, 8:43 p.m. UTC | #2
On Fri, Oct 25, 2019 at 01:26:05PM -0500, Eric Sandeen wrote:
> On 10/25/19 12:40 PM, Christoph Hellwig wrote:
> > The m_readio_log is only used for reporting the blksize (aka preferred
> > I/O size) in struct stat.  For all cases but a file system that does not
> > use stripe alignment, but which has the wsync and largeio mount option
> > set the value is the same as the write I/O size.
> > 
> > Remove the field and report a smaller preferred I/O size for that corner
> > case, which actually is the right thing to do for that case (except for
> > the fact that is probably is entirely unused).
> 
> hm, I wonder what the history of the WSYNC_ sizes are, tbh.  So while I can't
> speak to the need for a separate READIO_LOG or not, this doesn't seem 
> too far fetched...

NFSv2 had a maximum client IO size of 8kB and writes were
synchronous. The Irix NFS server had some magic in it (enabled by
the filesystem wsync mount option) that allowed clients to have two
sequential 8k writes in flight at once, allowing XFS to optimise for
16KB write IOs instead of the normal default of 64kB. This
optimisation was the reason that, at the time (early-mid 90s), SGI
machines had double the NFS write throughput of any other Unix
systems.

I'm surprised we still support NFSv2 at all in this day and age - I
suspect we should just kill NFSv2 altogether. We need to keep the
wsync option around for HA systems serving files to NFS and CIFS
clients, but the 8kB IO size optimisations can certainly die....

Cheers,

Dave.
Christoph Hellwig Oct. 26, 2019, 5:47 a.m. UTC | #3
On Sat, Oct 26, 2019 at 07:43:29AM +1100, Dave Chinner wrote:
> NFSv2 had a maximum client IO size of 8kB and writes were
> synchronous. The Irix NFS server had some magic in it (enabled by
> the filesystem wsync mount option) that allowed clients to have two
> sequential 8k writes in flight at once, allowing XFS to optimise for
> 16KB write IOs instead of the normal default of 64kB. This
> optimisation was the reason that, at the time (early-mid 90s), SGI
> machines had double the NFS write throughput of any other Unix
> systems.
> 
> I'm surprised we still support NFSv2 at all in this day and age - I
> suspect we should just kill NFSv2 altogether. We need to keep the
> wsync option around for HA systems serving files to NFS and CIFS
> clients, but the 8kB IO size optimisations can certainly die....

Last time I talked to the NFS folks there still were some very obscure
v2 use cases left (embedded devices that can't be upgraded).

But yeah, I'll add a patch to stop overriding rsize/wsize with the
sync option.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index b6dbfd8eb6a1..271fcbe04d48 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -514,7 +514,7 @@  xfs_stat_blksize(
 		if (mp->m_swidth)
 			return mp->m_swidth << mp->m_sb.sb_blocklog;
 		if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
-			return 1U << max(mp->m_readio_log, mp->m_writeio_log);
+			return 1U << mp->m_writeio_log;
 	}
 
 	return PAGE_SIZE;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 18af97512aec..9800401a7d6f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -435,26 +435,17 @@  STATIC void
 xfs_set_rw_sizes(xfs_mount_t *mp)
 {
 	xfs_sb_t	*sbp = &(mp->m_sb);
-	int		readio_log, writeio_log;
+	int		writeio_log;
 
 	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
-		if (mp->m_flags & XFS_MOUNT_WSYNC) {
-			readio_log = XFS_WSYNC_READIO_LOG;
-			writeio_log = XFS_WSYNC_WRITEIO_LOG;
-		} else {
-			readio_log = XFS_READIO_LOG_LARGE;
+		if (mp->m_flags & XFS_MOUNT_WSYNC)
+			writeio_log = XFS_WRITEIO_LOG_WSYNC;
+		else
 			writeio_log = XFS_WRITEIO_LOG_LARGE;
-		}
 	} else {
-		readio_log = mp->m_readio_log;
 		writeio_log = mp->m_writeio_log;
 	}
 
-	if (sbp->sb_blocklog > readio_log) {
-		mp->m_readio_log = sbp->sb_blocklog;
-	} else {
-		mp->m_readio_log = readio_log;
-	}
 	if (sbp->sb_blocklog > writeio_log) {
 		mp->m_writeio_log = sbp->sb_blocklog;
 	} else {
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index ecde5b3828c8..fb3a36a048cc 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -98,7 +98,6 @@  typedef struct xfs_mount {
 	xfs_agnumber_t		m_agirotor;	/* last ag dir inode alloced */
 	spinlock_t		m_agirotor_lock;/* .. and lock protecting it */
 	xfs_agnumber_t		m_maxagi;	/* highest inode alloc group */
-	uint			m_readio_log;	/* min read size log bytes */
 	uint			m_writeio_log;	/* min write size log bytes */
 	uint			m_writeio_blocks; /* min write size blocks */
 	struct xfs_da_geometry	*m_dir_geo;	/* directory block geometry */
@@ -247,10 +246,11 @@  typedef struct xfs_mount {
 
 
 /*
- * Default minimum read and write sizes.
+ * Default minimum write size.  The smaller sync value is better for
+ * NFSv2 wsync file systems.
  */
-#define XFS_READIO_LOG_LARGE	16
-#define XFS_WRITEIO_LOG_LARGE	16
+#define XFS_WRITEIO_LOG_WSYNC	14	/* 16k */
+#define XFS_WRITEIO_LOG_LARGE	16	/* 64k */
 
 /*
  * Max and min values for mount-option defined I/O
@@ -259,13 +259,6 @@  typedef struct xfs_mount {
 #define XFS_MAX_IO_LOG		30	/* 1G */
 #define XFS_MIN_IO_LOG		PAGE_SHIFT
 
-/*
- * Synchronous read and write sizes.  This should be
- * better for NFSv2 wsync filesystems.
- */
-#define	XFS_WSYNC_READIO_LOG	15	/* 32k */
-#define	XFS_WSYNC_WRITEIO_LOG	14	/* 16k */
-
 #define XFS_LAST_UNMOUNT_WAS_CLEAN(mp)	\
 				((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
 #define XFS_FORCED_SHUTDOWN(mp)	((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 0a8cf6b87a21..4ededdbed5a4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -418,7 +418,6 @@  xfs_parseargs(
 		}
 
 		mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
-		mp->m_readio_log = iosizelog;
 		mp->m_writeio_log = iosizelog;
 	}