diff mbox series

nfs: add 'noalignwrite' option for lock-less 'lost writes' prevention

Message ID 20240627100129.2482408-1-dan.aloni@vastdata.com (mailing list archive)
State New
Headers show
Series nfs: add 'noalignwrite' option for lock-less 'lost writes' prevention | expand

Commit Message

Dan Aloni June 27, 2024, 10:01 a.m. UTC
There are some applications that write to predefined non-overlapping
file offsets from multiple clients and therefore don't need to rely on
file locking. However, if these applications want non-aligned offsets
and sizes they need to either use locks or risk data corruption, as the
NFS client defaults to extending writes to whole pages.

This commit adds a new mount option `noalignwrite`, which allows to turn
that off and avoid the need of locking, as long as these applications
don't overlap on offsets.

Signed-off-by: Dan Aloni <dan.aloni@vastdata.com>
---
 fs/nfs/fs_context.c       | 8 ++++++++
 fs/nfs/super.c            | 3 +++
 fs/nfs/write.c            | 3 +++
 include/linux/nfs_fs_sb.h | 1 +
 4 files changed, 15 insertions(+)

Comments

Jeff Layton June 27, 2024, 11:06 a.m. UTC | #1
On Thu, 2024-06-27 at 13:01 +0300, Dan Aloni wrote:
> There are some applications that write to predefined non-overlapping
> file offsets from multiple clients and therefore don't need to rely
> on
> file locking. However, if these applications want non-aligned offsets
> and sizes they need to either use locks or risk data corruption, as
> the
> NFS client defaults to extending writes to whole pages.
> 
> This commit adds a new mount option `noalignwrite`, which allows to
> turn
> that off and avoid the need of locking, as long as these applications
> don't overlap on offsets.
> 
> Signed-off-by: Dan Aloni <dan.aloni@vastdata.com>
> ---
>  fs/nfs/fs_context.c       | 8 ++++++++
>  fs/nfs/super.c            | 3 +++
>  fs/nfs/write.c            | 3 +++
>  include/linux/nfs_fs_sb.h | 1 +
>  4 files changed, 15 insertions(+)
> 
> diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
> index 6c9f3f6645dd..7e000d782e28 100644
> --- a/fs/nfs/fs_context.c
> +++ b/fs/nfs/fs_context.c
> @@ -49,6 +49,7 @@ enum nfs_param {
>  	Opt_bsize,
>  	Opt_clientaddr,
>  	Opt_cto,
> +	Opt_alignwrite,
>  	Opt_fg,
>  	Opt_fscache,
>  	Opt_fscache_flag,
> @@ -149,6 +150,7 @@ static const struct fs_parameter_spec
> nfs_fs_parameters[] = {
>  	fsparam_u32   ("bsize",		Opt_bsize),
>  	fsparam_string("clientaddr",	Opt_clientaddr),
>  	fsparam_flag_no("cto",		Opt_cto),
> +	fsparam_flag_no("alignwrite",	Opt_alignwrite),
>  	fsparam_flag  ("fg",		Opt_fg),
>  	fsparam_flag_no("fsc",		Opt_fscache_flag),
>  	fsparam_string("fsc",		Opt_fscache),
> @@ -592,6 +594,12 @@ static int nfs_fs_context_parse_param(struct
> fs_context *fc,
>  		else
>  			ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY;
>  		break;
> +	case Opt_alignwrite:
> +		if (result.negated)
> +			ctx->flags |= NFS_MOUNT_NO_ALIGNWRITE;
> +		else
> +			ctx->flags &= ~NFS_MOUNT_NO_ALIGNWRITE;
> +		break;
>  	case Opt_ac:
>  		if (result.negated)
>  			ctx->flags |= NFS_MOUNT_NOAC;
> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
> index cbbd4866b0b7..1382ae19bba4 100644
> --- a/fs/nfs/super.c
> +++ b/fs/nfs/super.c
> @@ -549,6 +549,9 @@ static void nfs_show_mount_options(struct
> seq_file *m, struct nfs_server *nfss,
>  	else
>  		seq_puts(m, ",local_lock=posix");
>  
> +	if (nfss->flags & NFS_MOUNT_NO_ALIGNWRITE)
> +		seq_puts(m, ",noalignwrite");
> +
>  	if (nfss->flags & NFS_MOUNT_WRITE_EAGER) {
>  		if (nfss->flags & NFS_MOUNT_WRITE_WAIT)
>  			seq_puts(m, ",write=wait");
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index 2329cbb0e446..cfe8061bf005 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -1315,7 +1315,10 @@ static int nfs_can_extend_write(struct file
> *file, struct folio *folio,
>  	struct file_lock_context *flctx =
> locks_inode_context(inode);
>  	struct file_lock *fl;
>  	int ret;
> +	unsigned int mntflags = NFS_SERVER(inode)->flags;
>  
> +	if (mntflags & NFS_MOUNT_NO_ALIGNWRITE)
> +		return 0;
>  	if (file->f_flags & O_DSYNC)
>  		return 0;
>  	if (!nfs_folio_write_uptodate(folio, pagelen))
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 92de074e63b9..4d28b4a328a7 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -157,6 +157,7 @@ struct nfs_server {
>  #define NFS_MOUNT_WRITE_WAIT		0x02000000
>  #define NFS_MOUNT_TRUNK_DISCOVERY	0x04000000
>  #define NFS_MOUNT_SHUTDOWN			0x08000000
> +#define NFS_MOUNT_NO_ALIGNWRITE		0x10000000
>  
>  	unsigned int		fattr_valid;	/* Valid attributes
> */
>  	unsigned int		caps;		/* server
> capabilities */

While I hate that we need a new mount option for this, I do understand
the need. I do wonder if we'd be better served by a new fadvise64()
flag or something. Either way, this would probably be needed in the
interim, so...

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Sagi Grimberg July 3, 2024, 7:09 p.m. UTC | #2
On 27/06/2024 13:01, Dan Aloni wrote:
> There are some applications that write to predefined non-overlapping
> file offsets from multiple clients and therefore don't need to rely on
> file locking. However, if these applications want non-aligned offsets
> and sizes they need to either use locks or risk data corruption, as the
> NFS client defaults to extending writes to whole pages.
>
> This commit adds a new mount option `noalignwrite`, which allows to turn
> that off and avoid the need of locking, as long as these applications
> don't overlap on offsets.
>
> Signed-off-by: Dan Aloni <dan.aloni@vastdata.com>

The new name seems a better choice.
I'm also on Christoph's camp that having this behavior be the default makes
sense given that we have large folios.

Reviewed-by: Sagi Grimberg <sagi@grimberg.me>


Trond, Anna,
What are your thoughts on this?
Dan Aloni July 11, 2024, 8:06 a.m. UTC | #3
On 2024-07-03 22:09:58, Sagi Grimberg wrote:
> 
> 
> On 27/06/2024 13:01, Dan Aloni wrote:
> > There are some applications that write to predefined non-overlapping
> > file offsets from multiple clients and therefore don't need to rely on
> > file locking. However, if these applications want non-aligned offsets
> > and sizes they need to either use locks or risk data corruption, as the
> > NFS client defaults to extending writes to whole pages.
> > 
> > This commit adds a new mount option `noalignwrite`, which allows to turn
> > that off and avoid the need of locking, as long as these applications
> > don't overlap on offsets.
> > 
> > Signed-off-by: Dan Aloni <dan.aloni@vastdata.com>
> 
> The new name seems a better choice.
> I'm also on Christoph's camp that having this behavior be the default makes
> sense given that we have large folios.
> 
> Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
> 
> 
> Trond, Anna,
> What are your thoughts on this?

Pinging. Do we need to do anything further? I can prepare the nfs-utils
patch too.
diff mbox series

Patch

diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 6c9f3f6645dd..7e000d782e28 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -49,6 +49,7 @@  enum nfs_param {
 	Opt_bsize,
 	Opt_clientaddr,
 	Opt_cto,
+	Opt_alignwrite,
 	Opt_fg,
 	Opt_fscache,
 	Opt_fscache_flag,
@@ -149,6 +150,7 @@  static const struct fs_parameter_spec nfs_fs_parameters[] = {
 	fsparam_u32   ("bsize",		Opt_bsize),
 	fsparam_string("clientaddr",	Opt_clientaddr),
 	fsparam_flag_no("cto",		Opt_cto),
+	fsparam_flag_no("alignwrite",	Opt_alignwrite),
 	fsparam_flag  ("fg",		Opt_fg),
 	fsparam_flag_no("fsc",		Opt_fscache_flag),
 	fsparam_string("fsc",		Opt_fscache),
@@ -592,6 +594,12 @@  static int nfs_fs_context_parse_param(struct fs_context *fc,
 		else
 			ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY;
 		break;
+	case Opt_alignwrite:
+		if (result.negated)
+			ctx->flags |= NFS_MOUNT_NO_ALIGNWRITE;
+		else
+			ctx->flags &= ~NFS_MOUNT_NO_ALIGNWRITE;
+		break;
 	case Opt_ac:
 		if (result.negated)
 			ctx->flags |= NFS_MOUNT_NOAC;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index cbbd4866b0b7..1382ae19bba4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -549,6 +549,9 @@  static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 	else
 		seq_puts(m, ",local_lock=posix");
 
+	if (nfss->flags & NFS_MOUNT_NO_ALIGNWRITE)
+		seq_puts(m, ",noalignwrite");
+
 	if (nfss->flags & NFS_MOUNT_WRITE_EAGER) {
 		if (nfss->flags & NFS_MOUNT_WRITE_WAIT)
 			seq_puts(m, ",write=wait");
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2329cbb0e446..cfe8061bf005 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1315,7 +1315,10 @@  static int nfs_can_extend_write(struct file *file, struct folio *folio,
 	struct file_lock_context *flctx = locks_inode_context(inode);
 	struct file_lock *fl;
 	int ret;
+	unsigned int mntflags = NFS_SERVER(inode)->flags;
 
+	if (mntflags & NFS_MOUNT_NO_ALIGNWRITE)
+		return 0;
 	if (file->f_flags & O_DSYNC)
 		return 0;
 	if (!nfs_folio_write_uptodate(folio, pagelen))
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 92de074e63b9..4d28b4a328a7 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -157,6 +157,7 @@  struct nfs_server {
 #define NFS_MOUNT_WRITE_WAIT		0x02000000
 #define NFS_MOUNT_TRUNK_DISCOVERY	0x04000000
 #define NFS_MOUNT_SHUTDOWN			0x08000000
+#define NFS_MOUNT_NO_ALIGNWRITE		0x10000000
 
 	unsigned int		fattr_valid;	/* Valid attributes */
 	unsigned int		caps;		/* server capabilities */