diff mbox series

[v6,2/7] fuse: make DAX mount option a tri-state

Message ID 20211011030052.98923-3-jefflexu@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series fuse,virtiofs: support per-file DAX | expand

Commit Message

Jingbo Xu Oct. 11, 2021, 3 a.m. UTC
We add 'always', 'never', and 'inode' (default). '-o dax' continues to
operate the same which is equivalent to 'always'. To be consistemt with
ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
option is specified, the default behaviour is equal to 'inode'.

By the time this patch is applied, 'inode' mode is actually equal to
'always' mode, before the per-file DAX flag is introduced in the
following patch.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
---
 fs/fuse/dax.c       | 19 ++++++++++++++++---
 fs/fuse/fuse_i.h    | 14 ++++++++++++--
 fs/fuse/inode.c     | 10 +++++++---
 fs/fuse/virtio_fs.c | 16 ++++++++++++++--
 4 files changed, 49 insertions(+), 10 deletions(-)

Comments

Vivek Goyal Oct. 18, 2021, 2:10 p.m. UTC | #1
On Mon, Oct 11, 2021 at 11:00:47AM +0800, Jeffle Xu wrote:
> We add 'always', 'never', and 'inode' (default). '-o dax' continues to
> operate the same which is equivalent to 'always'. To be consistemt with
> ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
> option is specified, the default behaviour is equal to 'inode'.

Hi Jeffle,

I am not sure when  -o "dax=inode"  is used as a default? If user
specifies, "-o dax" then it is equal to "-o dax=always", otherwise
user will explicitly specify "-o dax=always/never/inode". So when
is dax=inode is used as default?

> 
> By the time this patch is applied, 'inode' mode is actually equal to
> 'always' mode, before the per-file DAX flag is introduced in the
> following patch.
> 
> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> ---
>  fs/fuse/dax.c       | 19 ++++++++++++++++---
>  fs/fuse/fuse_i.h    | 14 ++++++++++++--
>  fs/fuse/inode.c     | 10 +++++++---
>  fs/fuse/virtio_fs.c | 16 ++++++++++++++--
>  4 files changed, 49 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
> index 1eb6538bf1b2..4c6c64efc950 100644
> --- a/fs/fuse/dax.c
> +++ b/fs/fuse/dax.c
> @@ -1284,11 +1284,14 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
>  	return ret;
>  }
>  
> -int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
> +int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode dax_mode,
> +			struct dax_device *dax_dev)
>  {
>  	struct fuse_conn_dax *fcd;
>  	int err;
>  
> +	fc->dax_mode = dax_mode;
> +
>  	if (!dax_dev)
>  		return 0;
>  
> @@ -1335,11 +1338,21 @@ static const struct address_space_operations fuse_dax_file_aops  = {
>  static bool fuse_should_enable_dax(struct inode *inode)
>  {
>  	struct fuse_conn *fc = get_fuse_conn(inode);
> +	unsigned int dax_mode = fc->dax_mode;
> +
> +	if (dax_mode == FUSE_DAX_NEVER)
> +		return false;
>  
> -	if (fc->dax)
> +	/*
> +	 * If 'dax=always/inode', fc->dax couldn't be NULL even when fuse
> +	 * daemon doesn't support DAX, since the mount routine will fail
> +	 * early in this case.
> +	 */
> +	if (dax_mode == FUSE_DAX_ALWAYS)
>  		return true;
>  
> -	return false;
> +	/* dax_mode == FUSE_DAX_INODE */
> +	return true;

So as of this patch except FUSE_DAX_NEVER return true and this will
change in later patches for FUSE_DAX_INODE? If that's the case, keep
it simple in this patch and change it later in the patch series.

fuse_should_enable_dax()
{
	if (dax_mode == FUSE_DAX_NEVER)
		return false;
	return true;
}

>  }
>  
>  void fuse_dax_inode_init(struct inode *inode)
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 319596df5dc6..5abf9749923f 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -480,6 +480,12 @@ struct fuse_dev {
>  	struct list_head entry;
>  };
>  
> +enum fuse_dax_mode {
> +	FUSE_DAX_INODE,
> +	FUSE_DAX_ALWAYS,
> +	FUSE_DAX_NEVER,
> +};
> +
>  struct fuse_fs_context {
>  	int fd;
>  	struct file *file;
> @@ -497,7 +503,7 @@ struct fuse_fs_context {
>  	bool no_control:1;
>  	bool no_force_umount:1;
>  	bool legacy_opts_show:1;
> -	bool dax:1;
> +	enum fuse_dax_mode dax_mode;
>  	unsigned int max_read;
>  	unsigned int blksize;
>  	const char *subtype;
> @@ -802,6 +808,9 @@ struct fuse_conn {
>  	struct list_head devices;
>  
>  #ifdef CONFIG_FUSE_DAX
> +	/* dax mode: FUSE_DAX_* (always, never or per-file) */
> +	enum fuse_dax_mode dax_mode;
> +
>  	/* Dax specific conn data, non-NULL if DAX is enabled */
>  	struct fuse_conn_dax *dax;
>  #endif
> @@ -1255,7 +1264,8 @@ ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
>  ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
>  int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma);
>  int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end);
> -int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev);
> +int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode mode,
> +			struct dax_device *dax_dev);
>  void fuse_dax_conn_free(struct fuse_conn *fc);
>  bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi);
>  void fuse_dax_inode_init(struct inode *inode);
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 36cd03114b6d..b4b41683e97e 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -742,8 +742,12 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
>  			seq_printf(m, ",blksize=%lu", sb->s_blocksize);
>  	}
>  #ifdef CONFIG_FUSE_DAX
> -	if (fc->dax)
> -		seq_puts(m, ",dax");
> +	if (fc->dax_mode == FUSE_DAX_ALWAYS)
> +		seq_puts(m, ",dax=always");

So if somebody mounts with "-o dax" then kernel previous to this change
will show "dax" and kernel after this change will show "dax=always"?

How about not change the behavior. Keep a mode say FUSE_DAX_LEGACY which
will be set when user specifies "-o dax". Internally FUSE_DAX_LEGACY
and FUSE_DAX_ALWAYS will be same.

	if (fc->dax_mode == FUSE_DAX_LEGACY)
		seq_puts(m, ",dax");


Thanks
Vivek

> +	else if (fc->dax_mode == FUSE_DAX_NEVER)
> +		seq_puts(m, ",dax=never");
> +	else if (fc->dax_mode == FUSE_DAX_INODE)
> +		seq_puts(m, ",dax=inode");
>  #endif
>  
>  	return 0;
> @@ -1493,7 +1497,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
>  	sb->s_subtype = ctx->subtype;
>  	ctx->subtype = NULL;
>  	if (IS_ENABLED(CONFIG_FUSE_DAX)) {
> -		err = fuse_dax_conn_alloc(fc, ctx->dax_dev);
> +		err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev);
>  		if (err)
>  			goto err;
>  	}
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 0ad89c6629d7..58cfbaeb4a7d 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -88,12 +88,21 @@ struct virtio_fs_req_work {
>  static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
>  				 struct fuse_req *req, bool in_flight);
>  
> +static const struct constant_table dax_param_enums[] = {
> +	{"inode",	FUSE_DAX_INODE },
> +	{"always",	FUSE_DAX_ALWAYS },
> +	{"never",	FUSE_DAX_NEVER },
> +	{}
> +};
> +
>  enum {
>  	OPT_DAX,
> +	OPT_DAX_ENUM,
>  };
>  
>  static const struct fs_parameter_spec virtio_fs_parameters[] = {
>  	fsparam_flag("dax", OPT_DAX),
> +	fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums),
>  	{}
>  };
>  
> @@ -110,7 +119,10 @@ static int virtio_fs_parse_param(struct fs_context *fsc,
>  
>  	switch (opt) {
>  	case OPT_DAX:
> -		ctx->dax = 1;
> +		ctx->dax_mode = FUSE_DAX_ALWAYS;
> +		break;
> +	case OPT_DAX_ENUM:
> +		ctx->dax_mode = result.uint_32;
>  		break;
>  	default:
>  		return -EINVAL;
> @@ -1326,7 +1338,7 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
>  
>  	/* virtiofs allocates and installs its own fuse devices */
>  	ctx->fudptr = NULL;
> -	if (ctx->dax) {
> +	if (ctx->dax_mode != FUSE_DAX_NEVER) {
>  		if (!fs->dax_dev) {
>  			err = -EINVAL;
>  			pr_err("virtio-fs: dax can't be enabled as filesystem"
> -- 
> 2.27.0
>
Jingbo Xu Oct. 20, 2021, 2:52 a.m. UTC | #2
On 10/18/21 10:10 PM, Vivek Goyal wrote:
> On Mon, Oct 11, 2021 at 11:00:47AM +0800, Jeffle Xu wrote:
>> We add 'always', 'never', and 'inode' (default). '-o dax' continues to
>> operate the same which is equivalent to 'always'. To be consistemt with
>> ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
>> option is specified, the default behaviour is equal to 'inode'.
> 
> Hi Jeffle,
> 
> I am not sure when  -o "dax=inode"  is used as a default? If user
> specifies, "-o dax" then it is equal to "-o dax=always", otherwise
> user will explicitly specify "-o dax=always/never/inode". So when
> is dax=inode is used as default?

That means when neither '-o dax' nor '-o dax=always/never/inode' is
specified, it is actually equal to '-o dax=inode', which is also how
per-file DAX on ext4/xfs works.

This default behaviour for local filesystem, e.g. ext4/xfs, may be
straightforward, since the disk inode will be read into memory during
the inode instantiation, and checking for persistent inode attribute
shall be realatively cheap, except that the default behaviour has
changed from 'dax=never' to 'dax=inode'.

Come back to virtiofs, when neither '-o dax' nor '-o
dax=always/never/inode' is specified, and it actually behaves as '-o
dax=inode', as long as '-o dax=server/attr' option is not specified for
virtiofsd, virtiofsd will always clear FUSE_ATTR_DAX and thus guest will
always disable DAX. IOWs, the guest virtiofs atually behaves as '-o
dax=never' when neither '-o dax' nor '-o dax=always/never/inode' is
specified, and '-o dax=server/attr' option is not specified for virtiofsd.

But I'm okay if we need to change the default behaviour for virtiofs.


> 
>>
>> By the time this patch is applied, 'inode' mode is actually equal to
>> 'always' mode, before the per-file DAX flag is introduced in the
>> following patch.
>>
>> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
>> ---
>>  fs/fuse/dax.c       | 19 ++++++++++++++++---
>>  fs/fuse/fuse_i.h    | 14 ++++++++++++--
>>  fs/fuse/inode.c     | 10 +++++++---
>>  fs/fuse/virtio_fs.c | 16 ++++++++++++++--
>>  4 files changed, 49 insertions(+), 10 deletions(-)
>>
>> diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
>> index 1eb6538bf1b2..4c6c64efc950 100644
>> --- a/fs/fuse/dax.c
>> +++ b/fs/fuse/dax.c
>> @@ -1284,11 +1284,14 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
>>  	return ret;
>>  }
>>  
>> -int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
>> +int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode dax_mode,
>> +			struct dax_device *dax_dev)
>>  {
>>  	struct fuse_conn_dax *fcd;
>>  	int err;
>>  
>> +	fc->dax_mode = dax_mode;
>> +
>>  	if (!dax_dev)
>>  		return 0;
>>  
>> @@ -1335,11 +1338,21 @@ static const struct address_space_operations fuse_dax_file_aops  = {
>>  static bool fuse_should_enable_dax(struct inode *inode)
>>  {
>>  	struct fuse_conn *fc = get_fuse_conn(inode);
>> +	unsigned int dax_mode = fc->dax_mode;
>> +
>> +	if (dax_mode == FUSE_DAX_NEVER)
>> +		return false;
>>  
>> -	if (fc->dax)
>> +	/*
>> +	 * If 'dax=always/inode', fc->dax couldn't be NULL even when fuse
>> +	 * daemon doesn't support DAX, since the mount routine will fail
>> +	 * early in this case.
>> +	 */
>> +	if (dax_mode == FUSE_DAX_ALWAYS)
>>  		return true;
>>  
>> -	return false;
>> +	/* dax_mode == FUSE_DAX_INODE */
>> +	return true;
> 
> So as of this patch except FUSE_DAX_NEVER return true and this will
> change in later patches for FUSE_DAX_INODE? If that's the case, keep
> it simple in this patch and change it later in the patch series.
> 
> fuse_should_enable_dax()
> {
> 	if (dax_mode == FUSE_DAX_NEVER)
> 		return false;
> 	return true;
> }
> 
>>  }
>>  
>>  void fuse_dax_inode_init(struct inode *inode)
>> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
>> index 319596df5dc6..5abf9749923f 100644
>> --- a/fs/fuse/fuse_i.h
>> +++ b/fs/fuse/fuse_i.h
>> @@ -480,6 +480,12 @@ struct fuse_dev {
>>  	struct list_head entry;
>>  };
>>  
>> +enum fuse_dax_mode {
>> +	FUSE_DAX_INODE,
>> +	FUSE_DAX_ALWAYS,
>> +	FUSE_DAX_NEVER,
>> +};
>> +
>>  struct fuse_fs_context {
>>  	int fd;
>>  	struct file *file;
>> @@ -497,7 +503,7 @@ struct fuse_fs_context {
>>  	bool no_control:1;
>>  	bool no_force_umount:1;
>>  	bool legacy_opts_show:1;
>> -	bool dax:1;
>> +	enum fuse_dax_mode dax_mode;
>>  	unsigned int max_read;
>>  	unsigned int blksize;
>>  	const char *subtype;
>> @@ -802,6 +808,9 @@ struct fuse_conn {
>>  	struct list_head devices;
>>  
>>  #ifdef CONFIG_FUSE_DAX
>> +	/* dax mode: FUSE_DAX_* (always, never or per-file) */
>> +	enum fuse_dax_mode dax_mode;
>> +
>>  	/* Dax specific conn data, non-NULL if DAX is enabled */
>>  	struct fuse_conn_dax *dax;
>>  #endif
>> @@ -1255,7 +1264,8 @@ ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
>>  ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
>>  int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma);
>>  int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end);
>> -int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev);
>> +int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode mode,
>> +			struct dax_device *dax_dev);
>>  void fuse_dax_conn_free(struct fuse_conn *fc);
>>  bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi);
>>  void fuse_dax_inode_init(struct inode *inode);
>> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
>> index 36cd03114b6d..b4b41683e97e 100644
>> --- a/fs/fuse/inode.c
>> +++ b/fs/fuse/inode.c
>> @@ -742,8 +742,12 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
>>  			seq_printf(m, ",blksize=%lu", sb->s_blocksize);
>>  	}
>>  #ifdef CONFIG_FUSE_DAX
>> -	if (fc->dax)
>> -		seq_puts(m, ",dax");
>> +	if (fc->dax_mode == FUSE_DAX_ALWAYS)
>> +		seq_puts(m, ",dax=always");
> 
> So if somebody mounts with "-o dax" then kernel previous to this change
> will show "dax" and kernel after this change will show "dax=always"?

Yes. It's actually how per-file DAX on ext4/xfs behaves.

> 
> How about not change the behavior. Keep a mode say FUSE_DAX_LEGACY which
> will be set when user specifies "-o dax". Internally FUSE_DAX_LEGACY
> and FUSE_DAX_ALWAYS will be same.
> 
> 	if (fc->dax_mode == FUSE_DAX_LEGACY)
> 		seq_puts(m, ",dax");
> 




> 
>> +	else if (fc->dax_mode == FUSE_DAX_NEVER)
>> +		seq_puts(m, ",dax=never");
>> +	else if (fc->dax_mode == FUSE_DAX_INODE)
>> +		seq_puts(m, ",dax=inode");
>>  #endif
>>  
>>  	return 0;
>> @@ -1493,7 +1497,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
>>  	sb->s_subtype = ctx->subtype;
>>  	ctx->subtype = NULL;
>>  	if (IS_ENABLED(CONFIG_FUSE_DAX)) {
>> -		err = fuse_dax_conn_alloc(fc, ctx->dax_dev);
>> +		err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev);
>>  		if (err)
>>  			goto err;
>>  	}
>> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
>> index 0ad89c6629d7..58cfbaeb4a7d 100644
>> --- a/fs/fuse/virtio_fs.c
>> +++ b/fs/fuse/virtio_fs.c
>> @@ -88,12 +88,21 @@ struct virtio_fs_req_work {
>>  static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
>>  				 struct fuse_req *req, bool in_flight);
>>  
>> +static const struct constant_table dax_param_enums[] = {
>> +	{"inode",	FUSE_DAX_INODE },
>> +	{"always",	FUSE_DAX_ALWAYS },
>> +	{"never",	FUSE_DAX_NEVER },
>> +	{}
>> +};
>> +
>>  enum {
>>  	OPT_DAX,
>> +	OPT_DAX_ENUM,
>>  };
>>  
>>  static const struct fs_parameter_spec virtio_fs_parameters[] = {
>>  	fsparam_flag("dax", OPT_DAX),
>> +	fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums),
>>  	{}
>>  };
>>  
>> @@ -110,7 +119,10 @@ static int virtio_fs_parse_param(struct fs_context *fsc,
>>  
>>  	switch (opt) {
>>  	case OPT_DAX:
>> -		ctx->dax = 1;
>> +		ctx->dax_mode = FUSE_DAX_ALWAYS;
>> +		break;
>> +	case OPT_DAX_ENUM:
>> +		ctx->dax_mode = result.uint_32;
>>  		break;
>>  	default:
>>  		return -EINVAL;
>> @@ -1326,7 +1338,7 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
>>  
>>  	/* virtiofs allocates and installs its own fuse devices */
>>  	ctx->fudptr = NULL;
>> -	if (ctx->dax) {
>> +	if (ctx->dax_mode != FUSE_DAX_NEVER) {
>>  		if (!fs->dax_dev) {
>>  			err = -EINVAL;
>>  			pr_err("virtio-fs: dax can't be enabled as filesystem"
>> -- 
>> 2.27.0
>>
Vivek Goyal Oct. 20, 2021, 2:48 p.m. UTC | #3
On Wed, Oct 20, 2021 at 10:52:38AM +0800, JeffleXu wrote:
> 
> 
> On 10/18/21 10:10 PM, Vivek Goyal wrote:
> > On Mon, Oct 11, 2021 at 11:00:47AM +0800, Jeffle Xu wrote:
> >> We add 'always', 'never', and 'inode' (default). '-o dax' continues to
> >> operate the same which is equivalent to 'always'. To be consistemt with
> >> ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
> >> option is specified, the default behaviour is equal to 'inode'.
> > 
> > Hi Jeffle,
> > 
> > I am not sure when  -o "dax=inode"  is used as a default? If user
> > specifies, "-o dax" then it is equal to "-o dax=always", otherwise
> > user will explicitly specify "-o dax=always/never/inode". So when
> > is dax=inode is used as default?
> 
> That means when neither '-o dax' nor '-o dax=always/never/inode' is
> specified, it is actually equal to '-o dax=inode', which is also how
> per-file DAX on ext4/xfs works.
> 
> This default behaviour for local filesystem, e.g. ext4/xfs, may be
> straightforward, since the disk inode will be read into memory during
> the inode instantiation, and checking for persistent inode attribute
> shall be realatively cheap, except that the default behaviour has
> changed from 'dax=never' to 'dax=inode'.

Interesting that ext4/xfs allowed for this behavior change.

> 
> Come back to virtiofs, when neither '-o dax' nor '-o
> dax=always/never/inode' is specified, and it actually behaves as '-o
> dax=inode', as long as '-o dax=server/attr' option is not specified for
> virtiofsd, virtiofsd will always clear FUSE_ATTR_DAX and thus guest will
> always disable DAX. IOWs, the guest virtiofs atually behaves as '-o
> dax=never' when neither '-o dax' nor '-o dax=always/never/inode' is
> specified, and '-o dax=server/attr' option is not specified for virtiofsd.
> 
> But I'm okay if we need to change the default behaviour for virtiofs.

This is change of behavior from client's perspective. Even if client
did not opt-in for DAX, DAX can be enabled based on server's setting.
Not that there is anything wrong with it, but change of behavior part
concerns me.

In case of virtiofs, lot of features we are controlling from server.
Client typically just calls "mount" and there are not many options
users can specify for mount.  

Given we already allowed to make client a choice about DAX behavior,
I will feel more comfortable that we don't change it and let client
request a specific DAX mode and if client does not specify anything,
then DAX is not enabled.

Vivek
> 
> 
> > 
> >>
> >> By the time this patch is applied, 'inode' mode is actually equal to
> >> 'always' mode, before the per-file DAX flag is introduced in the
> >> following patch.
> >>
> >> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> >> ---
> >>  fs/fuse/dax.c       | 19 ++++++++++++++++---
> >>  fs/fuse/fuse_i.h    | 14 ++++++++++++--
> >>  fs/fuse/inode.c     | 10 +++++++---
> >>  fs/fuse/virtio_fs.c | 16 ++++++++++++++--
> >>  4 files changed, 49 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
> >> index 1eb6538bf1b2..4c6c64efc950 100644
> >> --- a/fs/fuse/dax.c
> >> +++ b/fs/fuse/dax.c
> >> @@ -1284,11 +1284,14 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
> >>  	return ret;
> >>  }
> >>  
> >> -int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
> >> +int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode dax_mode,
> >> +			struct dax_device *dax_dev)
> >>  {
> >>  	struct fuse_conn_dax *fcd;
> >>  	int err;
> >>  
> >> +	fc->dax_mode = dax_mode;
> >> +
> >>  	if (!dax_dev)
> >>  		return 0;
> >>  
> >> @@ -1335,11 +1338,21 @@ static const struct address_space_operations fuse_dax_file_aops  = {
> >>  static bool fuse_should_enable_dax(struct inode *inode)
> >>  {
> >>  	struct fuse_conn *fc = get_fuse_conn(inode);
> >> +	unsigned int dax_mode = fc->dax_mode;
> >> +
> >> +	if (dax_mode == FUSE_DAX_NEVER)
> >> +		return false;
> >>  
> >> -	if (fc->dax)
> >> +	/*
> >> +	 * If 'dax=always/inode', fc->dax couldn't be NULL even when fuse
> >> +	 * daemon doesn't support DAX, since the mount routine will fail
> >> +	 * early in this case.
> >> +	 */
> >> +	if (dax_mode == FUSE_DAX_ALWAYS)
> >>  		return true;
> >>  
> >> -	return false;
> >> +	/* dax_mode == FUSE_DAX_INODE */
> >> +	return true;
> > 
> > So as of this patch except FUSE_DAX_NEVER return true and this will
> > change in later patches for FUSE_DAX_INODE? If that's the case, keep
> > it simple in this patch and change it later in the patch series.
> > 
> > fuse_should_enable_dax()
> > {
> > 	if (dax_mode == FUSE_DAX_NEVER)
> > 		return false;
> > 	return true;
> > }
> > 
> >>  }
> >>  
> >>  void fuse_dax_inode_init(struct inode *inode)
> >> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> >> index 319596df5dc6..5abf9749923f 100644
> >> --- a/fs/fuse/fuse_i.h
> >> +++ b/fs/fuse/fuse_i.h
> >> @@ -480,6 +480,12 @@ struct fuse_dev {
> >>  	struct list_head entry;
> >>  };
> >>  
> >> +enum fuse_dax_mode {
> >> +	FUSE_DAX_INODE,
> >> +	FUSE_DAX_ALWAYS,
> >> +	FUSE_DAX_NEVER,
> >> +};
> >> +
> >>  struct fuse_fs_context {
> >>  	int fd;
> >>  	struct file *file;
> >> @@ -497,7 +503,7 @@ struct fuse_fs_context {
> >>  	bool no_control:1;
> >>  	bool no_force_umount:1;
> >>  	bool legacy_opts_show:1;
> >> -	bool dax:1;
> >> +	enum fuse_dax_mode dax_mode;
> >>  	unsigned int max_read;
> >>  	unsigned int blksize;
> >>  	const char *subtype;
> >> @@ -802,6 +808,9 @@ struct fuse_conn {
> >>  	struct list_head devices;
> >>  
> >>  #ifdef CONFIG_FUSE_DAX
> >> +	/* dax mode: FUSE_DAX_* (always, never or per-file) */
> >> +	enum fuse_dax_mode dax_mode;
> >> +
> >>  	/* Dax specific conn data, non-NULL if DAX is enabled */
> >>  	struct fuse_conn_dax *dax;
> >>  #endif
> >> @@ -1255,7 +1264,8 @@ ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
> >>  ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
> >>  int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma);
> >>  int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end);
> >> -int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev);
> >> +int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode mode,
> >> +			struct dax_device *dax_dev);
> >>  void fuse_dax_conn_free(struct fuse_conn *fc);
> >>  bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi);
> >>  void fuse_dax_inode_init(struct inode *inode);
> >> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> >> index 36cd03114b6d..b4b41683e97e 100644
> >> --- a/fs/fuse/inode.c
> >> +++ b/fs/fuse/inode.c
> >> @@ -742,8 +742,12 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
> >>  			seq_printf(m, ",blksize=%lu", sb->s_blocksize);
> >>  	}
> >>  #ifdef CONFIG_FUSE_DAX
> >> -	if (fc->dax)
> >> -		seq_puts(m, ",dax");
> >> +	if (fc->dax_mode == FUSE_DAX_ALWAYS)
> >> +		seq_puts(m, ",dax=always");
> > 
> > So if somebody mounts with "-o dax" then kernel previous to this change
> > will show "dax" and kernel after this change will show "dax=always"?
> 
> Yes. It's actually how per-file DAX on ext4/xfs behaves.
> 
> > 
> > How about not change the behavior. Keep a mode say FUSE_DAX_LEGACY which
> > will be set when user specifies "-o dax". Internally FUSE_DAX_LEGACY
> > and FUSE_DAX_ALWAYS will be same.
> > 
> > 	if (fc->dax_mode == FUSE_DAX_LEGACY)
> > 		seq_puts(m, ",dax");
> > 
> 
> 
> 
> 
> > 
> >> +	else if (fc->dax_mode == FUSE_DAX_NEVER)
> >> +		seq_puts(m, ",dax=never");
> >> +	else if (fc->dax_mode == FUSE_DAX_INODE)
> >> +		seq_puts(m, ",dax=inode");
> >>  #endif
> >>  
> >>  	return 0;
> >> @@ -1493,7 +1497,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
> >>  	sb->s_subtype = ctx->subtype;
> >>  	ctx->subtype = NULL;
> >>  	if (IS_ENABLED(CONFIG_FUSE_DAX)) {
> >> -		err = fuse_dax_conn_alloc(fc, ctx->dax_dev);
> >> +		err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev);
> >>  		if (err)
> >>  			goto err;
> >>  	}
> >> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> >> index 0ad89c6629d7..58cfbaeb4a7d 100644
> >> --- a/fs/fuse/virtio_fs.c
> >> +++ b/fs/fuse/virtio_fs.c
> >> @@ -88,12 +88,21 @@ struct virtio_fs_req_work {
> >>  static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
> >>  				 struct fuse_req *req, bool in_flight);
> >>  
> >> +static const struct constant_table dax_param_enums[] = {
> >> +	{"inode",	FUSE_DAX_INODE },
> >> +	{"always",	FUSE_DAX_ALWAYS },
> >> +	{"never",	FUSE_DAX_NEVER },
> >> +	{}
> >> +};
> >> +
> >>  enum {
> >>  	OPT_DAX,
> >> +	OPT_DAX_ENUM,
> >>  };
> >>  
> >>  static const struct fs_parameter_spec virtio_fs_parameters[] = {
> >>  	fsparam_flag("dax", OPT_DAX),
> >> +	fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums),
> >>  	{}
> >>  };
> >>  
> >> @@ -110,7 +119,10 @@ static int virtio_fs_parse_param(struct fs_context *fsc,
> >>  
> >>  	switch (opt) {
> >>  	case OPT_DAX:
> >> -		ctx->dax = 1;
> >> +		ctx->dax_mode = FUSE_DAX_ALWAYS;
> >> +		break;
> >> +	case OPT_DAX_ENUM:
> >> +		ctx->dax_mode = result.uint_32;
> >>  		break;
> >>  	default:
> >>  		return -EINVAL;
> >> @@ -1326,7 +1338,7 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
> >>  
> >>  	/* virtiofs allocates and installs its own fuse devices */
> >>  	ctx->fudptr = NULL;
> >> -	if (ctx->dax) {
> >> +	if (ctx->dax_mode != FUSE_DAX_NEVER) {
> >>  		if (!fs->dax_dev) {
> >>  			err = -EINVAL;
> >>  			pr_err("virtio-fs: dax can't be enabled as filesystem"
> >> -- 
> >> 2.27.0
> >>
> 
> -- 
> Thanks,
> Jeffle
>
Vivek Goyal Oct. 20, 2021, 3:17 p.m. UTC | #4
On Wed, Oct 20, 2021 at 10:52:38AM +0800, JeffleXu wrote:
> 
> 
> On 10/18/21 10:10 PM, Vivek Goyal wrote:
> > On Mon, Oct 11, 2021 at 11:00:47AM +0800, Jeffle Xu wrote:
> >> We add 'always', 'never', and 'inode' (default). '-o dax' continues to
> >> operate the same which is equivalent to 'always'. To be consistemt with
> >> ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
> >> option is specified, the default behaviour is equal to 'inode'.
> > 
> > Hi Jeffle,
> > 
> > I am not sure when  -o "dax=inode"  is used as a default? If user
> > specifies, "-o dax" then it is equal to "-o dax=always", otherwise
> > user will explicitly specify "-o dax=always/never/inode". So when
> > is dax=inode is used as default?
> 
> That means when neither '-o dax' nor '-o dax=always/never/inode' is
> specified, it is actually equal to '-o dax=inode', which is also how
> per-file DAX on ext4/xfs works.

[ CC dave chinner] 

Is it not change of default behavior for ext4/xfs as well. My
understanding is that prior to this new dax options, "-o dax" enabled
dax on filesystem and if user did not specify it, DAX is disbaled
by default.

Now after introduction of "-o dax=always/never/inode", if suddenly
"-o dax=inode" became the default if user did not specify anything,
that's change of behavior. Is that intentional. If given a choice,
I would rather not change default and ask user to opt-in for
appropriate dax functionality.

Dave, you might have thoughts on this. It makes me uncomfortable to
change virtiofs dax default now just because other filesytems did it.

Thanks
Vivek

> 
> This default behaviour for local filesystem, e.g. ext4/xfs, may be
> straightforward, since the disk inode will be read into memory during
> the inode instantiation, and checking for persistent inode attribute
> shall be realatively cheap, except that the default behaviour has
> changed from 'dax=never' to 'dax=inode'.
> 
> Come back to virtiofs, when neither '-o dax' nor '-o
> dax=always/never/inode' is specified, and it actually behaves as '-o
> dax=inode', as long as '-o dax=server/attr' option is not specified for
> virtiofsd, virtiofsd will always clear FUSE_ATTR_DAX and thus guest will
> always disable DAX. IOWs, the guest virtiofs atually behaves as '-o
> dax=never' when neither '-o dax' nor '-o dax=always/never/inode' is
> specified, and '-o dax=server/attr' option is not specified for virtiofsd.
> 
> But I'm okay if we need to change the default behaviour for virtiofs.
> 
> 
> > 
> >>
> >> By the time this patch is applied, 'inode' mode is actually equal to
> >> 'always' mode, before the per-file DAX flag is introduced in the
> >> following patch.
> >>
> >> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> >> ---
> >>  fs/fuse/dax.c       | 19 ++++++++++++++++---
> >>  fs/fuse/fuse_i.h    | 14 ++++++++++++--
> >>  fs/fuse/inode.c     | 10 +++++++---
> >>  fs/fuse/virtio_fs.c | 16 ++++++++++++++--
> >>  4 files changed, 49 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
> >> index 1eb6538bf1b2..4c6c64efc950 100644
> >> --- a/fs/fuse/dax.c
> >> +++ b/fs/fuse/dax.c
> >> @@ -1284,11 +1284,14 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
> >>  	return ret;
> >>  }
> >>  
> >> -int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
> >> +int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode dax_mode,
> >> +			struct dax_device *dax_dev)
> >>  {
> >>  	struct fuse_conn_dax *fcd;
> >>  	int err;
> >>  
> >> +	fc->dax_mode = dax_mode;
> >> +
> >>  	if (!dax_dev)
> >>  		return 0;
> >>  
> >> @@ -1335,11 +1338,21 @@ static const struct address_space_operations fuse_dax_file_aops  = {
> >>  static bool fuse_should_enable_dax(struct inode *inode)
> >>  {
> >>  	struct fuse_conn *fc = get_fuse_conn(inode);
> >> +	unsigned int dax_mode = fc->dax_mode;
> >> +
> >> +	if (dax_mode == FUSE_DAX_NEVER)
> >> +		return false;
> >>  
> >> -	if (fc->dax)
> >> +	/*
> >> +	 * If 'dax=always/inode', fc->dax couldn't be NULL even when fuse
> >> +	 * daemon doesn't support DAX, since the mount routine will fail
> >> +	 * early in this case.
> >> +	 */
> >> +	if (dax_mode == FUSE_DAX_ALWAYS)
> >>  		return true;
> >>  
> >> -	return false;
> >> +	/* dax_mode == FUSE_DAX_INODE */
> >> +	return true;
> > 
> > So as of this patch except FUSE_DAX_NEVER return true and this will
> > change in later patches for FUSE_DAX_INODE? If that's the case, keep
> > it simple in this patch and change it later in the patch series.
> > 
> > fuse_should_enable_dax()
> > {
> > 	if (dax_mode == FUSE_DAX_NEVER)
> > 		return false;
> > 	return true;
> > }
> > 
> >>  }
> >>  
> >>  void fuse_dax_inode_init(struct inode *inode)
> >> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> >> index 319596df5dc6..5abf9749923f 100644
> >> --- a/fs/fuse/fuse_i.h
> >> +++ b/fs/fuse/fuse_i.h
> >> @@ -480,6 +480,12 @@ struct fuse_dev {
> >>  	struct list_head entry;
> >>  };
> >>  
> >> +enum fuse_dax_mode {
> >> +	FUSE_DAX_INODE,
> >> +	FUSE_DAX_ALWAYS,
> >> +	FUSE_DAX_NEVER,
> >> +};
> >> +
> >>  struct fuse_fs_context {
> >>  	int fd;
> >>  	struct file *file;
> >> @@ -497,7 +503,7 @@ struct fuse_fs_context {
> >>  	bool no_control:1;
> >>  	bool no_force_umount:1;
> >>  	bool legacy_opts_show:1;
> >> -	bool dax:1;
> >> +	enum fuse_dax_mode dax_mode;
> >>  	unsigned int max_read;
> >>  	unsigned int blksize;
> >>  	const char *subtype;
> >> @@ -802,6 +808,9 @@ struct fuse_conn {
> >>  	struct list_head devices;
> >>  
> >>  #ifdef CONFIG_FUSE_DAX
> >> +	/* dax mode: FUSE_DAX_* (always, never or per-file) */
> >> +	enum fuse_dax_mode dax_mode;
> >> +
> >>  	/* Dax specific conn data, non-NULL if DAX is enabled */
> >>  	struct fuse_conn_dax *dax;
> >>  #endif
> >> @@ -1255,7 +1264,8 @@ ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
> >>  ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
> >>  int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma);
> >>  int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end);
> >> -int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev);
> >> +int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode mode,
> >> +			struct dax_device *dax_dev);
> >>  void fuse_dax_conn_free(struct fuse_conn *fc);
> >>  bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi);
> >>  void fuse_dax_inode_init(struct inode *inode);
> >> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> >> index 36cd03114b6d..b4b41683e97e 100644
> >> --- a/fs/fuse/inode.c
> >> +++ b/fs/fuse/inode.c
> >> @@ -742,8 +742,12 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
> >>  			seq_printf(m, ",blksize=%lu", sb->s_blocksize);
> >>  	}
> >>  #ifdef CONFIG_FUSE_DAX
> >> -	if (fc->dax)
> >> -		seq_puts(m, ",dax");
> >> +	if (fc->dax_mode == FUSE_DAX_ALWAYS)
> >> +		seq_puts(m, ",dax=always");
> > 
> > So if somebody mounts with "-o dax" then kernel previous to this change
> > will show "dax" and kernel after this change will show "dax=always"?
> 
> Yes. It's actually how per-file DAX on ext4/xfs behaves.
> 
> > 
> > How about not change the behavior. Keep a mode say FUSE_DAX_LEGACY which
> > will be set when user specifies "-o dax". Internally FUSE_DAX_LEGACY
> > and FUSE_DAX_ALWAYS will be same.
> > 
> > 	if (fc->dax_mode == FUSE_DAX_LEGACY)
> > 		seq_puts(m, ",dax");
> > 
> 
> 
> 
> 
> > 
> >> +	else if (fc->dax_mode == FUSE_DAX_NEVER)
> >> +		seq_puts(m, ",dax=never");
> >> +	else if (fc->dax_mode == FUSE_DAX_INODE)
> >> +		seq_puts(m, ",dax=inode");
> >>  #endif
> >>  
> >>  	return 0;
> >> @@ -1493,7 +1497,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
> >>  	sb->s_subtype = ctx->subtype;
> >>  	ctx->subtype = NULL;
> >>  	if (IS_ENABLED(CONFIG_FUSE_DAX)) {
> >> -		err = fuse_dax_conn_alloc(fc, ctx->dax_dev);
> >> +		err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev);
> >>  		if (err)
> >>  			goto err;
> >>  	}
> >> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> >> index 0ad89c6629d7..58cfbaeb4a7d 100644
> >> --- a/fs/fuse/virtio_fs.c
> >> +++ b/fs/fuse/virtio_fs.c
> >> @@ -88,12 +88,21 @@ struct virtio_fs_req_work {
> >>  static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
> >>  				 struct fuse_req *req, bool in_flight);
> >>  
> >> +static const struct constant_table dax_param_enums[] = {
> >> +	{"inode",	FUSE_DAX_INODE },
> >> +	{"always",	FUSE_DAX_ALWAYS },
> >> +	{"never",	FUSE_DAX_NEVER },
> >> +	{}
> >> +};
> >> +
> >>  enum {
> >>  	OPT_DAX,
> >> +	OPT_DAX_ENUM,
> >>  };
> >>  
> >>  static const struct fs_parameter_spec virtio_fs_parameters[] = {
> >>  	fsparam_flag("dax", OPT_DAX),
> >> +	fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums),
> >>  	{}
> >>  };
> >>  
> >> @@ -110,7 +119,10 @@ static int virtio_fs_parse_param(struct fs_context *fsc,
> >>  
> >>  	switch (opt) {
> >>  	case OPT_DAX:
> >> -		ctx->dax = 1;
> >> +		ctx->dax_mode = FUSE_DAX_ALWAYS;
> >> +		break;
> >> +	case OPT_DAX_ENUM:
> >> +		ctx->dax_mode = result.uint_32;
> >>  		break;
> >>  	default:
> >>  		return -EINVAL;
> >> @@ -1326,7 +1338,7 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
> >>  
> >>  	/* virtiofs allocates and installs its own fuse devices */
> >>  	ctx->fudptr = NULL;
> >> -	if (ctx->dax) {
> >> +	if (ctx->dax_mode != FUSE_DAX_NEVER) {
> >>  		if (!fs->dax_dev) {
> >>  			err = -EINVAL;
> >>  			pr_err("virtio-fs: dax can't be enabled as filesystem"
> >> -- 
> >> 2.27.0
> >>
> 
> -- 
> Thanks,
> Jeffle
>
Jingbo Xu Oct. 22, 2021, 6:54 a.m. UTC | #5
cc [Ira Weiny], author of per inode DAX on xfs/ext4

On 10/20/21 11:17 PM, Vivek Goyal wrote:
> On Wed, Oct 20, 2021 at 10:52:38AM +0800, JeffleXu wrote:
>>
>>
>> On 10/18/21 10:10 PM, Vivek Goyal wrote:
>>> On Mon, Oct 11, 2021 at 11:00:47AM +0800, Jeffle Xu wrote:
>>>> We add 'always', 'never', and 'inode' (default). '-o dax' continues to
>>>> operate the same which is equivalent to 'always'. To be consistemt with
>>>> ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
>>>> option is specified, the default behaviour is equal to 'inode'.
>>>
>>> Hi Jeffle,
>>>
>>> I am not sure when  -o "dax=inode"  is used as a default? If user
>>> specifies, "-o dax" then it is equal to "-o dax=always", otherwise
>>> user will explicitly specify "-o dax=always/never/inode". So when
>>> is dax=inode is used as default?
>>
>> That means when neither '-o dax' nor '-o dax=always/never/inode' is
>> specified, it is actually equal to '-o dax=inode', which is also how
>> per-file DAX on ext4/xfs works.
> 
> [ CC dave chinner] 
> 
> Is it not change of default behavior for ext4/xfs as well. My
> understanding is that prior to this new dax options, "-o dax" enabled
> dax on filesystem and if user did not specify it, DAX is disbaled
> by default.
> 
> Now after introduction of "-o dax=always/never/inode", if suddenly
> "-o dax=inode" became the default if user did not specify anything,
> that's change of behavior. Is that intentional. If given a choice,
> I would rather not change default and ask user to opt-in for
> appropriate dax functionality.
> 
> Dave, you might have thoughts on this. It makes me uncomfortable to
> change virtiofs dax default now just because other filesytems did it.
> 

I can only find the following discussions about the earliest record on
this tri-state mount option:

https://lore.kernel.org/lkml/20200316095509.GA13788@lst.de/
https://lore.kernel.org/lkml/20200401040021.GC56958@magnolia/


Hi, Ira Weiny,

Do you have any thought on this, i.e. why the default behavior has
changed after introduction of per inode dax?
Ira Weiny Oct. 25, 2021, 5:52 p.m. UTC | #6
On Fri, Oct 22, 2021 at 02:54:03PM +0800, JeffleXu wrote:
> cc [Ira Weiny], author of per inode DAX on xfs/ext4
> 
> On 10/20/21 11:17 PM, Vivek Goyal wrote:
> > On Wed, Oct 20, 2021 at 10:52:38AM +0800, JeffleXu wrote:
> >>
> >>
> >> On 10/18/21 10:10 PM, Vivek Goyal wrote:
> >>> On Mon, Oct 11, 2021 at 11:00:47AM +0800, Jeffle Xu wrote:
> >>>> We add 'always', 'never', and 'inode' (default). '-o dax' continues to
> >>>> operate the same which is equivalent to 'always'. To be consistemt with
> >>>> ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
> >>>> option is specified, the default behaviour is equal to 'inode'.
> >>>
> >>> Hi Jeffle,
> >>>
> >>> I am not sure when  -o "dax=inode"  is used as a default? If user
> >>> specifies, "-o dax" then it is equal to "-o dax=always", otherwise
> >>> user will explicitly specify "-o dax=always/never/inode". So when
> >>> is dax=inode is used as default?
> >>
> >> That means when neither '-o dax' nor '-o dax=always/never/inode' is
> >> specified, it is actually equal to '-o dax=inode', which is also how
> >> per-file DAX on ext4/xfs works.
> > 

It's been a while so I'm fuzzy on the details of the discussions but yes that
is the way things are now in the code.

> > [ CC dave chinner] 
> > 
> > Is it not change of default behavior for ext4/xfs as well. My
> > understanding is that prior to this new dax options, "-o dax" enabled
> > dax on filesystem and if user did not specify it, DAX is disbaled
> > by default.

Technically it does change default behavior...  However, NOT in a way which
breaks anything.  See below.

> > 
> > Now after introduction of "-o dax=always/never/inode", if suddenly
> > "-o dax=inode" became the default if user did not specify anything,
> > that's change of behavior.

Technically yes but not in a broken way.

> >
> > Is that intentional. If given a choice,
> > I would rather not change default and ask user to opt-in for
> > appropriate dax functionality.

There is no need for this.

> > 
> > Dave, you might have thoughts on this. It makes me uncomfortable to
> > change virtiofs dax default now just because other filesytems did it.
> > 
> 
> I can only find the following discussions about the earliest record on
> this tri-state mount option:
> 
> https://lore.kernel.org/lkml/20200316095509.GA13788@lst.de/
> https://lore.kernel.org/lkml/20200401040021.GC56958@magnolia/
> 
> 
> Hi, Ira Weiny,
> 
> Do you have any thought on this, i.e. why the default behavior has
> changed after introduction of per inode dax?

While this is 'technically' different behavior the end user does not see any
difference in behavior if they continue without software changes.  Specifically
specifying nothing continues to operate with all the files on the FS to be
'_not_ DAX'.  While specifying '-o dax' forces DAX on all files.

This expands the default behavior in a backwards compatible manner.  The user
can now enable DAX on some files.  But this is an opt-in on the part of the
user of the FS and again does not change with existing software/scripts/etc.

Does that make sense?

Ira
Vivek Goyal Oct. 25, 2021, 6:12 p.m. UTC | #7
On Mon, Oct 25, 2021 at 10:52:51AM -0700, Ira Weiny wrote:
> On Fri, Oct 22, 2021 at 02:54:03PM +0800, JeffleXu wrote:
> > cc [Ira Weiny], author of per inode DAX on xfs/ext4
> > 
> > On 10/20/21 11:17 PM, Vivek Goyal wrote:
> > > On Wed, Oct 20, 2021 at 10:52:38AM +0800, JeffleXu wrote:
> > >>
> > >>
> > >> On 10/18/21 10:10 PM, Vivek Goyal wrote:
> > >>> On Mon, Oct 11, 2021 at 11:00:47AM +0800, Jeffle Xu wrote:
> > >>>> We add 'always', 'never', and 'inode' (default). '-o dax' continues to
> > >>>> operate the same which is equivalent to 'always'. To be consistemt with
> > >>>> ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
> > >>>> option is specified, the default behaviour is equal to 'inode'.
> > >>>
> > >>> Hi Jeffle,
> > >>>
> > >>> I am not sure when  -o "dax=inode"  is used as a default? If user
> > >>> specifies, "-o dax" then it is equal to "-o dax=always", otherwise
> > >>> user will explicitly specify "-o dax=always/never/inode". So when
> > >>> is dax=inode is used as default?
> > >>
> > >> That means when neither '-o dax' nor '-o dax=always/never/inode' is
> > >> specified, it is actually equal to '-o dax=inode', which is also how
> > >> per-file DAX on ext4/xfs works.
> > > 
> 
> It's been a while so I'm fuzzy on the details of the discussions but yes that
> is the way things are now in the code.
> 
> > > [ CC dave chinner] 
> > > 
> > > Is it not change of default behavior for ext4/xfs as well. My
> > > understanding is that prior to this new dax options, "-o dax" enabled
> > > dax on filesystem and if user did not specify it, DAX is disbaled
> > > by default.
> 
> Technically it does change default behavior...  However, NOT in a way which
> breaks anything.  See below.
> 
> > > 
> > > Now after introduction of "-o dax=always/never/inode", if suddenly
> > > "-o dax=inode" became the default if user did not specify anything,
> > > that's change of behavior.
> 
> Technically yes but not in a broken way.
> 
> > >
> > > Is that intentional. If given a choice,
> > > I would rather not change default and ask user to opt-in for
> > > appropriate dax functionality.
> 
> There is no need for this.
> 
> > > 
> > > Dave, you might have thoughts on this. It makes me uncomfortable to
> > > change virtiofs dax default now just because other filesytems did it.
> > > 
> > 
> > I can only find the following discussions about the earliest record on
> > this tri-state mount option:
> > 
> > https://lore.kernel.org/lkml/20200316095509.GA13788@lst.de/
> > https://lore.kernel.org/lkml/20200401040021.GC56958@magnolia/
> > 
> > 
> > Hi, Ira Weiny,
> > 
> > Do you have any thought on this, i.e. why the default behavior has
> > changed after introduction of per inode dax?
> 
> While this is 'technically' different behavior the end user does not see any
> difference in behavior if they continue without software changes.  Specifically
> specifying nothing continues to operate with all the files on the FS to be
> '_not_ DAX'.  While specifying '-o dax' forces DAX on all files.
> 
> This expands the default behavior in a backwards compatible manner.

This is backward compatible in a sense that if somebody upgrades to new
kernel, things will still be same. 

I think little problematic change is that say I bring in persistent
memory from another system (which has FS_XFLAGS_DAX set on some inodes)
and then mount it without andy of the dax mount options, then per
inode dax will be enabled unexpectedly if I boot with newer kernels
but it will be disable if I mount with older kernels. Do I understand it
right.

> The user
> can now enable DAX on some files.  But this is an opt-in on the part of the
> user of the FS and again does not change with existing software/scripts/etc.

Don't understand this "opt-in" bit. If user mounts an fs without
specifying any of the dax options, then per inode dax will still be
enabled if inode has the correct flag set. So is setting of flag being
considered as opt-in (insted of mount option).

If setting of flag is being considered as opt-in, that probably will not
work very well with virtiofs. Because server can enforce a different
policy for enabling per file dax (instead of FS_XFLAG_DAX).

And given there are two entities here (client and server), I think it
will be good if if we give client a chance as well to decide whether
it wants to enable per file dax or not. I know it can alwasy do 
"dax=never" but it can still be broken if client software remains
same but host/server software is upgraded or commnad line changed.

So for virtiofs, I think better behavior is to continue to not enable
any dax until and unless user opts-in using "-o dax=foo" options.

Thanks
Vivek



> 
> Does that make sense?
> 
> Ira
>
Ira Weiny Oct. 25, 2021, 7:02 p.m. UTC | #8
On Mon, Oct 25, 2021 at 02:12:10PM -0400, Vivek Goyal wrote:
> On Mon, Oct 25, 2021 at 10:52:51AM -0700, Ira Weiny wrote:
> > On Fri, Oct 22, 2021 at 02:54:03PM +0800, JeffleXu wrote:
> > > cc [Ira Weiny], author of per inode DAX on xfs/ext4
> > > 
> > > On 10/20/21 11:17 PM, Vivek Goyal wrote:
> > > > On Wed, Oct 20, 2021 at 10:52:38AM +0800, JeffleXu wrote:
> > > >>
> > > >>
> > > >> On 10/18/21 10:10 PM, Vivek Goyal wrote:
> > > >>> On Mon, Oct 11, 2021 at 11:00:47AM +0800, Jeffle Xu wrote:
> > > >>>> We add 'always', 'never', and 'inode' (default). '-o dax' continues to
> > > >>>> operate the same which is equivalent to 'always'. To be consistemt with
> > > >>>> ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
> > > >>>> option is specified, the default behaviour is equal to 'inode'.
> > > >>>
> > > >>> Hi Jeffle,
> > > >>>
> > > >>> I am not sure when  -o "dax=inode"  is used as a default? If user
> > > >>> specifies, "-o dax" then it is equal to "-o dax=always", otherwise
> > > >>> user will explicitly specify "-o dax=always/never/inode". So when
> > > >>> is dax=inode is used as default?
> > > >>
> > > >> That means when neither '-o dax' nor '-o dax=always/never/inode' is
> > > >> specified, it is actually equal to '-o dax=inode', which is also how
> > > >> per-file DAX on ext4/xfs works.
> > > > 
> > 
> > It's been a while so I'm fuzzy on the details of the discussions but yes that
> > is the way things are now in the code.
> > 
> > > > [ CC dave chinner] 
> > > > 
> > > > Is it not change of default behavior for ext4/xfs as well. My
> > > > understanding is that prior to this new dax options, "-o dax" enabled
> > > > dax on filesystem and if user did not specify it, DAX is disbaled
> > > > by default.
> > 
> > Technically it does change default behavior...  However, NOT in a way which
> > breaks anything.  See below.
> > 
> > > > 
> > > > Now after introduction of "-o dax=always/never/inode", if suddenly
> > > > "-o dax=inode" became the default if user did not specify anything,
> > > > that's change of behavior.
> > 
> > Technically yes but not in a broken way.
> > 
> > > >
> > > > Is that intentional. If given a choice,
> > > > I would rather not change default and ask user to opt-in for
> > > > appropriate dax functionality.
> > 
> > There is no need for this.
> > 
> > > > 
> > > > Dave, you might have thoughts on this. It makes me uncomfortable to
> > > > change virtiofs dax default now just because other filesytems did it.
> > > > 
> > > 
> > > I can only find the following discussions about the earliest record on
> > > this tri-state mount option:
> > > 
> > > https://lore.kernel.org/lkml/20200316095509.GA13788@lst.de/
> > > https://lore.kernel.org/lkml/20200401040021.GC56958@magnolia/
> > > 
> > > 
> > > Hi, Ira Weiny,
> > > 
> > > Do you have any thought on this, i.e. why the default behavior has
> > > changed after introduction of per inode dax?
> > 
> > While this is 'technically' different behavior the end user does not see any
> > difference in behavior if they continue without software changes.  Specifically
> > specifying nothing continues to operate with all the files on the FS to be
> > '_not_ DAX'.  While specifying '-o dax' forces DAX on all files.
> > 
> > This expands the default behavior in a backwards compatible manner.
> 
> This is backward compatible in a sense that if somebody upgrades to new
> kernel, things will still be same. 
> 
> I think little problematic change is that say I bring in persistent
> memory from another system (which has FS_XFLAGS_DAX set on some inodes)
> and then mount it without andy of the dax mount options, then per
> inode dax will be enabled unexpectedly if I boot with newer kernels
> but it will be disable if I mount with older kernels. Do I understand it
> right.

Indeed that will happen.  However, wouldn't the users (software) of those files
have knowledge that those files were DAX and want to continue with them in that
mode?

> 
> > The user
> > can now enable DAX on some files.  But this is an opt-in on the part of the
> > user of the FS and again does not change with existing software/scripts/etc.
> 
> Don't understand this "opt-in" bit. If user mounts an fs without
> specifying any of the dax options, then per inode dax will still be
> enabled if inode has the correct flag set.

But only users who actually set that flag 'opt-in'.

> So is setting of flag being
> considered as opt-in (insted of mount option).

Yes.

> 
> If setting of flag is being considered as opt-in, that probably will not
> work very well with virtiofs. Because server can enforce a different
> policy for enabling per file dax (instead of FS_XFLAG_DAX).

I'm not sure I understand how this happens?  I think the server probably has to
enable per INODE by default to allow the client to do what the end users wants.

I agree that if the end user is expecting DAX and the server disables it then
that is a problem but couldn't that happen before?  Maybe I'm getting confused
because I'm not familiar enough with virtiofs.

> 
> And given there are two entities here (client and server), I think it
> will be good if if we give client a chance as well to decide whether
> it wants to enable per file dax or not. I know it can alwasy do 
> "dax=never" but it can still be broken if client software remains
> same but host/server software is upgraded or commnad line changed.

But the files are 'owned' by a single user or group of users who must have
placed the file in DAX mode at some point right?

> 
> So for virtiofs, I think better behavior is to continue to not enable
> any dax until and unless user opts-in using "-o dax=foo" options.

I'm not sure, maybe.

Ira
Vivek Goyal Oct. 25, 2021, 7:33 p.m. UTC | #9
On Mon, Oct 25, 2021 at 12:02:01PM -0700, Ira Weiny wrote:
> On Mon, Oct 25, 2021 at 02:12:10PM -0400, Vivek Goyal wrote:
> > On Mon, Oct 25, 2021 at 10:52:51AM -0700, Ira Weiny wrote:
> > > On Fri, Oct 22, 2021 at 02:54:03PM +0800, JeffleXu wrote:
> > > > cc [Ira Weiny], author of per inode DAX on xfs/ext4
> > > > 
> > > > On 10/20/21 11:17 PM, Vivek Goyal wrote:
> > > > > On Wed, Oct 20, 2021 at 10:52:38AM +0800, JeffleXu wrote:
> > > > >>
> > > > >>
> > > > >> On 10/18/21 10:10 PM, Vivek Goyal wrote:
> > > > >>> On Mon, Oct 11, 2021 at 11:00:47AM +0800, Jeffle Xu wrote:
> > > > >>>> We add 'always', 'never', and 'inode' (default). '-o dax' continues to
> > > > >>>> operate the same which is equivalent to 'always'. To be consistemt with
> > > > >>>> ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
> > > > >>>> option is specified, the default behaviour is equal to 'inode'.
> > > > >>>
> > > > >>> Hi Jeffle,
> > > > >>>
> > > > >>> I am not sure when  -o "dax=inode"  is used as a default? If user
> > > > >>> specifies, "-o dax" then it is equal to "-o dax=always", otherwise
> > > > >>> user will explicitly specify "-o dax=always/never/inode". So when
> > > > >>> is dax=inode is used as default?
> > > > >>
> > > > >> That means when neither '-o dax' nor '-o dax=always/never/inode' is
> > > > >> specified, it is actually equal to '-o dax=inode', which is also how
> > > > >> per-file DAX on ext4/xfs works.
> > > > > 
> > > 
> > > It's been a while so I'm fuzzy on the details of the discussions but yes that
> > > is the way things are now in the code.
> > > 
> > > > > [ CC dave chinner] 
> > > > > 
> > > > > Is it not change of default behavior for ext4/xfs as well. My
> > > > > understanding is that prior to this new dax options, "-o dax" enabled
> > > > > dax on filesystem and if user did not specify it, DAX is disbaled
> > > > > by default.
> > > 
> > > Technically it does change default behavior...  However, NOT in a way which
> > > breaks anything.  See below.
> > > 
> > > > > 
> > > > > Now after introduction of "-o dax=always/never/inode", if suddenly
> > > > > "-o dax=inode" became the default if user did not specify anything,
> > > > > that's change of behavior.
> > > 
> > > Technically yes but not in a broken way.
> > > 
> > > > >
> > > > > Is that intentional. If given a choice,
> > > > > I would rather not change default and ask user to opt-in for
> > > > > appropriate dax functionality.
> > > 
> > > There is no need for this.
> > > 
> > > > > 
> > > > > Dave, you might have thoughts on this. It makes me uncomfortable to
> > > > > change virtiofs dax default now just because other filesytems did it.
> > > > > 
> > > > 
> > > > I can only find the following discussions about the earliest record on
> > > > this tri-state mount option:
> > > > 
> > > > https://lore.kernel.org/lkml/20200316095509.GA13788@lst.de/
> > > > https://lore.kernel.org/lkml/20200401040021.GC56958@magnolia/
> > > > 
> > > > 
> > > > Hi, Ira Weiny,
> > > > 
> > > > Do you have any thought on this, i.e. why the default behavior has
> > > > changed after introduction of per inode dax?
> > > 
> > > While this is 'technically' different behavior the end user does not see any
> > > difference in behavior if they continue without software changes.  Specifically
> > > specifying nothing continues to operate with all the files on the FS to be
> > > '_not_ DAX'.  While specifying '-o dax' forces DAX on all files.
> > > 
> > > This expands the default behavior in a backwards compatible manner.
> > 
> > This is backward compatible in a sense that if somebody upgrades to new
> > kernel, things will still be same. 
> > 
> > I think little problematic change is that say I bring in persistent
> > memory from another system (which has FS_XFLAGS_DAX set on some inodes)
> > and then mount it without andy of the dax mount options, then per
> > inode dax will be enabled unexpectedly if I boot with newer kernels
> > but it will be disable if I mount with older kernels. Do I understand it
> > right.
> 
> Indeed that will happen.  However, wouldn't the users (software) of those files
> have knowledge that those files were DAX and want to continue with them in that
> mode?

I am not sure. Say before per-inode dax feature, I had written a script
which walks though all the mount points and figure out if dax is enabled
or not. I could simply look at mount options and tell if dax could be
enabled or not.

But now same script will give false results as per inode dax could
still be enabled.

> 
> > 
> > > The user
> > > can now enable DAX on some files.  But this is an opt-in on the part of the
> > > user of the FS and again does not change with existing software/scripts/etc.
> > 
> > Don't understand this "opt-in" bit. If user mounts an fs without
> > specifying any of the dax options, then per inode dax will still be
> > enabled if inode has the correct flag set.
> 
> But only users who actually set that flag 'opt-in'.
> 
> > So is setting of flag being
> > considered as opt-in (insted of mount option).
> 
> Yes.
> 
> > 
> > If setting of flag is being considered as opt-in, that probably will not
> > work very well with virtiofs. Because server can enforce a different
> > policy for enabling per file dax (instead of FS_XFLAG_DAX).
> 
> I'm not sure I understand how this happens?  I think the server probably has to
> enable per INODE by default to allow the client to do what the end users wants.
> 

Server can have either per inode disabled or enabled. If enabled, it could
determine DAX status of file based on FS_XFLAG_DAX or based on something
else depending on server policy. Users want to be able to determine
DAX status of file based on say file size.

> I agree that if the end user is expecting DAX and the server disables it then
> that is a problem but couldn't that happen before?

If end user expects to enable DAX and sever can't enable it, then mount
fails. So currently if you mount "-o dax" and server does not support
DAX, mount will fail.

I think same should happen when per inode DAX is introduced for virtiofs.
If sever does not support per inode dax and user mounts with "-o
dax=inode", then mount should fail.

In fact, this is another reason that probably "dax=inode" should not be
default. Say client is new and server is old and does not support
per inode dax, then client might start failing mount after client
upgrade, and that's not good.

More I think about it, more it feels like that "dax=never" should be
the default if user has not specified any of the dax options. This
probably will introduce least amount of surprise. Atleast for virtiofs.
IMHO, it probably would have made sense even for ext4/xfs but that
ship has already sailed.

> Maybe I'm getting confused
> because I'm not familiar enough with virtiofs.
> 
> > 
> > And given there are two entities here (client and server), I think it
> > will be good if if we give client a chance as well to decide whether
> > it wants to enable per file dax or not. I know it can alwasy do 
> > "dax=never" but it can still be broken if client software remains
> > same but host/server software is upgraded or commnad line changed.
> 
> But the files are 'owned' by a single user or group of users who must have
> placed the file in DAX mode at some point right?

Yes, either users/groups/admin might have set FS_XFLAG_DAX on inodes. But
now there is another controller (virtiofs server) which determines whether
that flag takes affect or not (based on server settings).

We did not have this server scenario in case of local filesystems.

Thanks
Vivek
>
> > 
> > So for virtiofs, I think better behavior is to continue to not enable
> > any dax until and unless user opts-in using "-o dax=foo" options.
> 
> I'm not sure, maybe.
> 
> Ira
>
Ira Weiny Oct. 25, 2021, 8:41 p.m. UTC | #10
On Mon, Oct 25, 2021 at 03:33:31PM -0400, Vivek Goyal wrote:

[snip]

> > > > > > 
> > > > > 
> > > > > I can only find the following discussions about the earliest record on
> > > > > this tri-state mount option:
> > > > > 
> > > > > https://lore.kernel.org/lkml/20200316095509.GA13788@lst.de/
> > > > > https://lore.kernel.org/lkml/20200401040021.GC56958@magnolia/
> > > > > 
> > > > > 
> > > > > Hi, Ira Weiny,
> > > > > 
> > > > > Do you have any thought on this, i.e. why the default behavior has
> > > > > changed after introduction of per inode dax?
> > > > 
> > > > While this is 'technically' different behavior the end user does not see any
> > > > difference in behavior if they continue without software changes.  Specifically
> > > > specifying nothing continues to operate with all the files on the FS to be
> > > > '_not_ DAX'.  While specifying '-o dax' forces DAX on all files.
> > > > 
> > > > This expands the default behavior in a backwards compatible manner.
> > > 
> > > This is backward compatible in a sense that if somebody upgrades to new
> > > kernel, things will still be same. 
> > > 
> > > I think little problematic change is that say I bring in persistent
> > > memory from another system (which has FS_XFLAGS_DAX set on some inodes)
> > > and then mount it without andy of the dax mount options, then per
> > > inode dax will be enabled unexpectedly if I boot with newer kernels
> > > but it will be disable if I mount with older kernels. Do I understand it
> > > right.
> > 
> > Indeed that will happen.  However, wouldn't the users (software) of those files
> > have knowledge that those files were DAX and want to continue with them in that
> > mode?
> 
> I am not sure. Say before per-inode dax feature, I had written a script
> which walks though all the mount points and figure out if dax is enabled
> or not. I could simply look at mount options and tell if dax could be
> enabled or not.
> 
> But now same script will give false results as per inode dax could
> still be enabled.

The mount option is being deprecated.  So it is best to start to phase out
scripts like that.

> 
> > 
> > > 
> > > > The user
> > > > can now enable DAX on some files.  But this is an opt-in on the part of the
> > > > user of the FS and again does not change with existing software/scripts/etc.
> > > 
> > > Don't understand this "opt-in" bit. If user mounts an fs without
> > > specifying any of the dax options, then per inode dax will still be
> > > enabled if inode has the correct flag set.
> > 
> > But only users who actually set that flag 'opt-in'.
> > 
> > > So is setting of flag being
> > > considered as opt-in (insted of mount option).
> > 
> > Yes.
> > 
> > > 
> > > If setting of flag is being considered as opt-in, that probably will not
> > > work very well with virtiofs. Because server can enforce a different
> > > policy for enabling per file dax (instead of FS_XFLAG_DAX).
> > 
> > I'm not sure I understand how this happens?  I think the server probably has to
> > enable per INODE by default to allow the client to do what the end users wants.
> > 
> 
> Server can have either per inode disabled or enabled. If enabled, it could
> determine DAX status of file based on FS_XFLAG_DAX or based on something
> else depending on server policy. Users want to be able to determine
> DAX status of file based on say file size.

'file size'?  I'm not sure how that would work.  Did you mean something else?

> 
> > I agree that if the end user is expecting DAX and the server disables it then
> > that is a problem but couldn't that happen before?
> 
> If end user expects to enable DAX and sever can't enable it, then mount
> fails. So currently if you mount "-o dax" and server does not support
> DAX, mount will fail.

The same could happen on a server where the underlying device does not support
DAX.  What if the server was mounted without '-o dax'?  Wouldn't a client mount
with '-o dax' fail now?  So why can't the same be true with the new set of
options?

> 
> I think same should happen when per inode DAX is introduced for virtiofs.
> If sever does not support per inode dax and user mounts with "-o
> dax=inode", then mount should fail.

I think that is reasonable.  The client can't mount with something the server
can't support.

> 
> In fact, this is another reason that probably "dax=inode" should not be
> default. Say client is new and server is old and does not support
> per inode dax, then client might start failing mount after client
> upgrade, and that's not good.

Shouldn't the client fall back to whatever the server supports?  It is the same
as the client wanting DAX now without server and/or device support.  It just
can't get it.  Right?

> 
> More I think about it, more it feels like that "dax=never" should be
> the default if user has not specified any of the dax options. This
> probably will introduce least amount of surprise. Atleast for virtiofs.
> IMHO, it probably would have made sense even for ext4/xfs but that
> ship has already sailed.

I disagree because dax=never is backwards from what we really want for the
future.  'dax=inode' is the most flexible setting.  In fact that setting is
best for the server by default which allows more control to be in the clients
hands.  Would you agree?

> 
> > Maybe I'm getting confused
> > because I'm not familiar enough with virtiofs.
> > 
> > > 
> > > And given there are two entities here (client and server), I think it
> > > will be good if if we give client a chance as well to decide whether
> > > it wants to enable per file dax or not. I know it can alwasy do 
> > > "dax=never" but it can still be broken if client software remains
> > > same but host/server software is upgraded or commnad line changed.
> > 
> > But the files are 'owned' by a single user or group of users who must have
> > placed the file in DAX mode at some point right?
> 
> Yes, either users/groups/admin might have set FS_XFLAG_DAX on inodes. But
> now there is another controller (virtiofs server) which determines whether
> that flag takes affect or not (based on server settings).

I think this is just like the file being on a device which does not support
DAX.  The file inode flag can be set but the file will not be in DAX mode on a
non-dax device.  So in this case the server is a non-dax device.

Ira

> 
> We did not have this server scenario in case of local filesystems.
> 
> Thanks
> Vivek
> >
> > > 
> > > So for virtiofs, I think better behavior is to continue to not enable
> > > any dax until and unless user opts-in using "-o dax=foo" options.
> > 
> > I'm not sure, maybe.
> > 
> > Ira
> > 
>
Vivek Goyal Oct. 26, 2021, 1:45 p.m. UTC | #11
On Mon, Oct 25, 2021 at 01:41:45PM -0700, Ira Weiny wrote:
> On Mon, Oct 25, 2021 at 03:33:31PM -0400, Vivek Goyal wrote:
> 
> [snip]
> 
> > > > > > > 
> > > > > > 
> > > > > > I can only find the following discussions about the earliest record on
> > > > > > this tri-state mount option:
> > > > > > 
> > > > > > https://lore.kernel.org/lkml/20200316095509.GA13788@lst.de/
> > > > > > https://lore.kernel.org/lkml/20200401040021.GC56958@magnolia/
> > > > > > 
> > > > > > 
> > > > > > Hi, Ira Weiny,
> > > > > > 
> > > > > > Do you have any thought on this, i.e. why the default behavior has
> > > > > > changed after introduction of per inode dax?
> > > > > 
> > > > > While this is 'technically' different behavior the end user does not see any
> > > > > difference in behavior if they continue without software changes.  Specifically
> > > > > specifying nothing continues to operate with all the files on the FS to be
> > > > > '_not_ DAX'.  While specifying '-o dax' forces DAX on all files.
> > > > > 
> > > > > This expands the default behavior in a backwards compatible manner.
> > > > 
> > > > This is backward compatible in a sense that if somebody upgrades to new
> > > > kernel, things will still be same. 
> > > > 
> > > > I think little problematic change is that say I bring in persistent
> > > > memory from another system (which has FS_XFLAGS_DAX set on some inodes)
> > > > and then mount it without andy of the dax mount options, then per
> > > > inode dax will be enabled unexpectedly if I boot with newer kernels
> > > > but it will be disable if I mount with older kernels. Do I understand it
> > > > right.
> > > 
> > > Indeed that will happen.  However, wouldn't the users (software) of those files
> > > have knowledge that those files were DAX and want to continue with them in that
> > > mode?
> > 
> > I am not sure. Say before per-inode dax feature, I had written a script
> > which walks though all the mount points and figure out if dax is enabled
> > or not. I could simply look at mount options and tell if dax could be
> > enabled or not.
> > 
> > But now same script will give false results as per inode dax could
> > still be enabled.
> 
> The mount option is being deprecated.  So it is best to start to phase out
> scripts like that.

Sure. But this change does break such scripts (if there is any). I am
just responding to previous comments that existing software/scripts
should not be broken. 

> 
> > 
> > > 
> > > > 
> > > > > The user
> > > > > can now enable DAX on some files.  But this is an opt-in on the part of the
> > > > > user of the FS and again does not change with existing software/scripts/etc.
> > > > 
> > > > Don't understand this "opt-in" bit. If user mounts an fs without
> > > > specifying any of the dax options, then per inode dax will still be
> > > > enabled if inode has the correct flag set.
> > > 
> > > But only users who actually set that flag 'opt-in'.
> > > 
> > > > So is setting of flag being
> > > > considered as opt-in (insted of mount option).
> > > 
> > > Yes.
> > > 
> > > > 
> > > > If setting of flag is being considered as opt-in, that probably will not
> > > > work very well with virtiofs. Because server can enforce a different
> > > > policy for enabling per file dax (instead of FS_XFLAG_DAX).
> > > 
> > > I'm not sure I understand how this happens?  I think the server probably has to
> > > enable per INODE by default to allow the client to do what the end users wants.
> > > 
> > 
> > Server can have either per inode disabled or enabled. If enabled, it could
> > determine DAX status of file based on FS_XFLAG_DAX or based on something
> > else depending on server policy. Users want to be able to determine
> > DAX status of file based on say file size.
> 
> 'file size'?  I'm not sure how that would work.  Did you mean something else?

So virtiofs uses DAX only to bypass page cache in guest. virtiofs pci
device advertizes a range of memory which is directly accessed using
dax. We use a chunk size of 2MB. That means for every 2MB chunk, there
will be around 512 pages. Each struct page will consume around 64 bytes
of RAM in guest. So for every 2MB chunk of file, RAM usage in guest
is around 512 * 64 = 32768 (32Kib). 

So there are users who claim that for smaller files say 4K or 8K in size,
it is probably better to not use DAX at all. In that case we will use
say 4K of page cache and leave DAX memory to be used for larger files.
(This will be useful only if virtiofs cache memory is in short supply). 

Hence the idea that why not use per inode dax and enable dax selectively
on files as needed. Given we have a remote server running, it gives
extra capability that we can take this DAX decision dynamically based
on some server policy (and not necessarily rely on FS_XFLAG_DAX stuff).

So once such policy is file size based policy. Where if a file size 
is small, server might not want to use DAX on that file. There could
be many more such policies depending on where DAX is most useful
in the context of virtiofs.

> 
> > 
> > > I agree that if the end user is expecting DAX and the server disables it then
> > > that is a problem but couldn't that happen before?
> > 
> > If end user expects to enable DAX and sever can't enable it, then mount
> > fails. So currently if you mount "-o dax" and server does not support
> > DAX, mount will fail.
> 
> The same could happen on a server where the underlying device does not support
> DAX.  What if the server was mounted without '-o dax'?

In general, there is no connection between DAX in guest and device on
host enabling DAX. We can very well enable DAX in guest without having
any DAX enabled on host device. From virtiofs perspective, we are just
mmapping host files in qemu address space and that works both with
dax enabled/disabled devices on host.

> Wouldn't a client mount
> with '-o dax' fail now?  So why can't the same be true with the new set of
> options?

So yes, if server does not support DAX and client asks for DAX, mount
will fail. (As it should fail).

Problem with enabling "dax=inode" by default is that if a client
is mounted without any dax option, then dax is disabled. Now if a server
is upgraded and restarted with some dax policy enabled, suddenly dax
will be enabled in client without it opting in for anything and client
might be surprised.

Now one argument can be hey, we have FS_XFLAG_DAX set on inode, so it
is ok to turn on dax. May be. But virtiofs serever can have its own
dax policies (like file size based policy), and it can ignore
FS_XFLAG_DAX completely. In that case enabling per inode dax by default
(without client opting in), seems contrary to what we are doing now.

Hence, I think not having "dax=inode" as default, is path of least
surprise for an existing user. A user can easily tell whether dax
is being used or not just by looking at filesystem mount optins.

> 
> > 
> > I think same should happen when per inode DAX is introduced for virtiofs.
> > If sever does not support per inode dax and user mounts with "-o
> > dax=inode", then mount should fail.
> 
> I think that is reasonable.  The client can't mount with something the server
> can't support.
> 
> > 
> > In fact, this is another reason that probably "dax=inode" should not be
> > default. Say client is new and server is old and does not support
> > per inode dax, then client might start failing mount after client
> > upgrade, and that's not good.
> 
> Shouldn't the client fall back to whatever the server supports?  It is the same
> as the client wanting DAX now without server and/or device support.  It just
> can't get it.  Right?

Well, current model is that fail the operation and let user try mount
again without DAX.

If we were to design fallback, then question will be how will user know
that server does not support DAX and we fallback to non-dax. Also it will
be change of behavior as well from exsiting non-fallback semantics.

I guess one could argue that if you are moving to new dax options
(-o dax=inode/always/never), then this is an opportunity to move to
fallback model. My concern remains thought that if user specified
"dax=inode or dax=always" and server does not support, how will user
know we are not using dax. 

Not sure there is a good answer here. In some cases users like to
see explicit failure if some option can't be supported. IIRC, in case
of overalayfs, if users passed in "-o metacopy=on" and if overlayfs
can't enable it, then users expected a failure (instead of a ignoring
metacopy silently).

So choosing not to fallback seems ok to be. Nobody has complained so far.

> 
> > 
> > More I think about it, more it feels like that "dax=never" should be
> > the default if user has not specified any of the dax options. This
> > probably will introduce least amount of surprise. Atleast for virtiofs.
> > IMHO, it probably would have made sense even for ext4/xfs but that
> > ship has already sailed.
> 
> I disagree because dax=never is backwards from what we really want for the
> future.  'dax=inode' is the most flexible setting.

If your goal is to enable dax by default if FS_XFLAG_DAX is set, then'
yes dax=inode default makes sense. I was only complaining about change
of behavior in some cases. I mean one coule argue same thing for
dax=always. If block device supports dax, then enable dax by default
until and unless user specifies "-o dax=never". But previous options
were not designed that way. A user had to opt-in for DAX behavior
even if device had the capability to support DAX.

And in the same line, I am arguing a user should opt-in for per inode
DAX, even if inode has the capability to be used as DAX inode.

And I don't mind "dax=inode" being default if that's deemed more useful.
My concern there is only change of behavior by default.

> In fact that setting is
> best for the server by default which allows more control to be in the clients
> hands.  Would you agree?

"dax=inode" on server default makes sense to me (as long as client asked
for dax=inode). Should it be enabled by default in client, I am still
afraid of change of behavior from existing dax mount options and having
to explain and justify change of behavior to users.

> 
> > 
> > > Maybe I'm getting confused
> > > because I'm not familiar enough with virtiofs.
> > > 
> > > > 
> > > > And given there are two entities here (client and server), I think it
> > > > will be good if if we give client a chance as well to decide whether
> > > > it wants to enable per file dax or not. I know it can alwasy do 
> > > > "dax=never" but it can still be broken if client software remains
> > > > same but host/server software is upgraded or commnad line changed.
> > > 
> > > But the files are 'owned' by a single user or group of users who must have
> > > placed the file in DAX mode at some point right?
> > 
> > Yes, either users/groups/admin might have set FS_XFLAG_DAX on inodes. But
> > now there is another controller (virtiofs server) which determines whether
> > that flag takes affect or not (based on server settings).
> 
> I think this is just like the file being on a device which does not support
> DAX.  The file inode flag can be set but the file will not be in DAX mode on a
> non-dax device.  So in this case the server is a non-dax device.

So if I mount with "dax=inode or dax=always" and block device does not
support DAX, what happens. Mount fails or it fallsback siliently to
non-dax mode?

I suspect that in new dax options it falls back to non-dax mode. And
your argument seems to be that user should stat every file and
query for STATX_ATTR_DAX to determine if dax is enabled on file
or not.

One one hand, I am not too fond of this new semantics of automatic fallback
and dax=inode default, and on the other hand, I want to be as close
as possible to ext4/xfs semantics so that there is less confusion for
users.

Vivek
Jingbo Xu Oct. 27, 2021, 6 a.m. UTC | #12
Thanks for your replying, Ira Weiny.


On 10/26/21 3:02 AM, Ira Weiny wrote:
> [snippet]
>>>> Hi, Ira Weiny,
>>>>
>>>> Do you have any thought on this, i.e. why the default behavior has
>>>> changed after introduction of per inode dax?
>>>
>>> While this is 'technically' different behavior the end user does not see any
>>> difference in behavior if they continue without software changes.  Specifically
>>> specifying nothing continues to operate with all the files on the FS to be
>>> '_not_ DAX'.  While specifying '-o dax' forces DAX on all files.
>>>
>>> This expands the default behavior in a backwards compatible manner.
>>
>> This is backward compatible in a sense that if somebody upgrades to new
>> kernel, things will still be same. 
>>
>> I think little problematic change is that say I bring in persistent
>> memory from another system (which has FS_XFLAGS_DAX set on some inodes)
>> and then mount it without andy of the dax mount options, then per
>> inode dax will be enabled unexpectedly if I boot with newer kernels
>> but it will be disable if I mount with older kernels. Do I understand it
>> right.
> 
> Indeed that will happen.  However, wouldn't the users (software) of those files
> have knowledge that those files were DAX and want to continue with them in that
> mode?
> 
>>
>>> The user
>>> can now enable DAX on some files.  But this is an opt-in on the part of the
>>> user of the FS and again does not change with existing software/scripts/etc.
>>
>> Don't understand this "opt-in" bit. If user mounts an fs without
>> specifying any of the dax options, then per inode dax will still be
>> enabled if inode has the correct flag set.
> 
> But only users who actually set that flag 'opt-in'.
> 
>> So is setting of flag being
>> considered as opt-in (insted of mount option).
> 
> Yes.
> 
>>
>> If setting of flag is being considered as opt-in, that probably will not
>> work very well with virtiofs. Because server can enforce a different
>> policy for enabling per file dax (instead of FS_XFLAG_DAX).
> 
> I'm not sure I understand how this happens?  I think the server probably has to
> enable per INODE by default to allow the client to do what the end users wants.
> 
> I agree that if the end user is expecting DAX and the server disables it then
> that is a problem but couldn't that happen before?  Maybe I'm getting confused
> because I'm not familiar enough with virtiofs.
> 
>>
>> And given there are two entities here (client and server), I think it
>> will be good if if we give client a chance as well to decide whether
>> it wants to enable per file dax or not. I know it can alwasy do 
>> "dax=never" but it can still be broken if client software remains
>> same but host/server software is upgraded or commnad line changed.
> 
> But the files are 'owned' by a single user or group of users who must have
> placed the file in DAX mode at some point right?

So this is the essence of this issue, i.e. whether those who mount the
filesystem (responsible for specifying mount options) and those who set
the persistent inode flag are one same group people.

For local filesystem like ext4/xfs, these two entities are most likely
one group people, so we can say that 'the default behavior is still
backward compatible'.

However this semantic can be challenged a little by the example exposed
by Vivek, that these two entities may not be one group even in local
filesystem. Though this case may be rare in real world.

But for remote filesystem like virtiofs, the deviation between these two
entities can be larger. For example, if the exported directory on host
is shared by two guest and guest A sets the persistent inode flag for
one file, then guest B will also see that DAX is enabled for this file
when the virtiofs is mounted with the default option inside guest B. In
this case, the persistent indoe flag is not set by guest B itself nor
the server, and it may break the expectation of guest B.

> 
>>
>> So for virtiofs, I think better behavior is to continue to not enable
>> any dax until and unless user opts-in using "-o dax=foo" options.
> 

I also prefer keeping the 'dax=never' default behavior for virtiofs.
Jingbo Xu Oct. 29, 2021, 8:33 a.m. UTC | #13
On 10/20/21 10:48 PM, Vivek Goyal wrote:
> On Wed, Oct 20, 2021 at 10:52:38AM +0800, JeffleXu wrote:
>>
>>
>> On 10/18/21 10:10 PM, Vivek Goyal wrote:
>>> On Mon, Oct 11, 2021 at 11:00:47AM +0800, Jeffle Xu wrote:
>>>> We add 'always', 'never', and 'inode' (default). '-o dax' continues to
>>>> operate the same which is equivalent to 'always'. To be consistemt with
>>>> ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
>>>> option is specified, the default behaviour is equal to 'inode'.
>>>
>>> Hi Jeffle,
>>>
>>> I am not sure when  -o "dax=inode"  is used as a default? If user
>>> specifies, "-o dax" then it is equal to "-o dax=always", otherwise
>>> user will explicitly specify "-o dax=always/never/inode". So when
>>> is dax=inode is used as default?
>>
>> That means when neither '-o dax' nor '-o dax=always/never/inode' is
>> specified, it is actually equal to '-o dax=inode', which is also how
>> per-file DAX on ext4/xfs works.
>>
>> This default behaviour for local filesystem, e.g. ext4/xfs, may be
>> straightforward, since the disk inode will be read into memory during
>> the inode instantiation, and checking for persistent inode attribute
>> shall be realatively cheap, except that the default behaviour has
>> changed from 'dax=never' to 'dax=inode'.
> 
> Interesting that ext4/xfs allowed for this behavior change.
> 
>>
>> Come back to virtiofs, when neither '-o dax' nor '-o
>> dax=always/never/inode' is specified, and it actually behaves as '-o
>> dax=inode', as long as '-o dax=server/attr' option is not specified for
>> virtiofsd, virtiofsd will always clear FUSE_ATTR_DAX and thus guest will
>> always disable DAX. IOWs, the guest virtiofs atually behaves as '-o
>> dax=never' when neither '-o dax' nor '-o dax=always/never/inode' is
>> specified, and '-o dax=server/attr' option is not specified for virtiofsd.
>>
>> But I'm okay if we need to change the default behaviour for virtiofs.
> 
> This is change of behavior from client's perspective. Even if client
> did not opt-in for DAX, DAX can be enabled based on server's setting.
> Not that there is anything wrong with it, but change of behavior part
> concerns me.
> 
> In case of virtiofs, lot of features we are controlling from server.
> Client typically just calls "mount" and there are not many options
> users can specify for mount.  
> 
> Given we already allowed to make client a choice about DAX behavior,
> I will feel more comfortable that we don't change it and let client
> request a specific DAX mode and if client does not specify anything,
> then DAX is not enabled.
> 

Hi Vivek,

How about the following design about the default behavior to move this
patchset forward, considering the discussion on another thread [1]?

- guest side: '-o dax=inode' acts as the default, keeping consistent
with xfs/ext4
- virtiofsd: the default behavior is like, neither file size based
policy ('dax=server') nor persistent inode flags based policy
('dax=attr') is used, though virtiofsd indeed advertises that
it supports per inode DAX feature (so that it won't fail FUSE_INIT
negotiation phase when guest advertises dax=inode by default)... In
fact, it acts like 'dax=never' in this case.

Then when guest opts-in and specifies '-o dax=inode' manually, then it
shall realize that proper configuration for virtiofsd is also needed (-o
dax=server|attr).

[1] https://www.spinics.net/lists/linux-xfs/msg56642.html
Vivek Goyal Oct. 29, 2021, 1:03 p.m. UTC | #14
On Fri, Oct 29, 2021 at 04:33:06PM +0800, JeffleXu wrote:
> 
> 
> On 10/20/21 10:48 PM, Vivek Goyal wrote:
> > On Wed, Oct 20, 2021 at 10:52:38AM +0800, JeffleXu wrote:
> >>
> >>
> >> On 10/18/21 10:10 PM, Vivek Goyal wrote:
> >>> On Mon, Oct 11, 2021 at 11:00:47AM +0800, Jeffle Xu wrote:
> >>>> We add 'always', 'never', and 'inode' (default). '-o dax' continues to
> >>>> operate the same which is equivalent to 'always'. To be consistemt with
> >>>> ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
> >>>> option is specified, the default behaviour is equal to 'inode'.
> >>>
> >>> Hi Jeffle,
> >>>
> >>> I am not sure when  -o "dax=inode"  is used as a default? If user
> >>> specifies, "-o dax" then it is equal to "-o dax=always", otherwise
> >>> user will explicitly specify "-o dax=always/never/inode". So when
> >>> is dax=inode is used as default?
> >>
> >> That means when neither '-o dax' nor '-o dax=always/never/inode' is
> >> specified, it is actually equal to '-o dax=inode', which is also how
> >> per-file DAX on ext4/xfs works.
> >>
> >> This default behaviour for local filesystem, e.g. ext4/xfs, may be
> >> straightforward, since the disk inode will be read into memory during
> >> the inode instantiation, and checking for persistent inode attribute
> >> shall be realatively cheap, except that the default behaviour has
> >> changed from 'dax=never' to 'dax=inode'.
> > 
> > Interesting that ext4/xfs allowed for this behavior change.
> > 
> >>
> >> Come back to virtiofs, when neither '-o dax' nor '-o
> >> dax=always/never/inode' is specified, and it actually behaves as '-o
> >> dax=inode', as long as '-o dax=server/attr' option is not specified for
> >> virtiofsd, virtiofsd will always clear FUSE_ATTR_DAX and thus guest will
> >> always disable DAX. IOWs, the guest virtiofs atually behaves as '-o
> >> dax=never' when neither '-o dax' nor '-o dax=always/never/inode' is
> >> specified, and '-o dax=server/attr' option is not specified for virtiofsd.
> >>
> >> But I'm okay if we need to change the default behaviour for virtiofs.
> > 
> > This is change of behavior from client's perspective. Even if client
> > did not opt-in for DAX, DAX can be enabled based on server's setting.
> > Not that there is anything wrong with it, but change of behavior part
> > concerns me.
> > 
> > In case of virtiofs, lot of features we are controlling from server.
> > Client typically just calls "mount" and there are not many options
> > users can specify for mount.  
> > 
> > Given we already allowed to make client a choice about DAX behavior,
> > I will feel more comfortable that we don't change it and let client
> > request a specific DAX mode and if client does not specify anything,
> > then DAX is not enabled.
> > 
> 
> Hi Vivek,
> 
> How about the following design about the default behavior to move this
> patchset forward, considering the discussion on another thread [1]?
> 
> - guest side: '-o dax=inode' acts as the default, keeping consistent
> with xfs/ext4

This sounds good.

> - virtiofsd: the default behavior is like, neither file size based
> policy ('dax=server') nor persistent inode flags based policy
> ('dax=attr') is used, though virtiofsd indeed advertises that
> it supports per inode DAX feature (so that it won't fail FUSE_INIT
> negotiation phase when guest advertises dax=inode by default)... In
> fact, it acts like 'dax=never' in this case.

Not sure why virtiofsd needs to advertise that it supports per inode
DAX even if no per inode dax policy is in affect. Guest will know that
server is not supporting per inode DAX. But it will not return an
error to user space (because dax=inode seems to be advisory).

IOW, this is very similar to the case of using dax=inode on a block
device which does not support DAX. No errors and no warnings.

> 
> Then when guest opts-in and specifies '-o dax=inode' manually, then it
> shall realize that proper configuration for virtiofsd is also needed (-o
> dax=server|attr).

I gave some comments w.r.t dax=server naming in your posting. Not sure if
you got a chance to look at it.

Thanks
Vivek

> 
> [1] https://www.spinics.net/lists/linux-xfs/msg56642.html
> 
> -- 
> Thanks,
> Jeffle
>
Jingbo Xu Nov. 1, 2021, 8:21 a.m. UTC | #15
On 10/29/21 9:03 PM, Vivek Goyal wrote:
> On Fri, Oct 29, 2021 at 04:33:06PM +0800, JeffleXu wrote:
>>
>>
>> On 10/20/21 10:48 PM, Vivek Goyal wrote:
>>> On Wed, Oct 20, 2021 at 10:52:38AM +0800, JeffleXu wrote:
>>>>
>>>>
>>>> On 10/18/21 10:10 PM, Vivek Goyal wrote:
>>>>> On Mon, Oct 11, 2021 at 11:00:47AM +0800, Jeffle Xu wrote:
>>>>>> We add 'always', 'never', and 'inode' (default). '-o dax' continues to
>>>>>> operate the same which is equivalent to 'always'. To be consistemt with
>>>>>> ext4/xfs's tri-state mount option, when neither '-o dax' nor '-o dax='
>>>>>> option is specified, the default behaviour is equal to 'inode'.
>>>>>
>>>>> Hi Jeffle,
>>>>>
>>>>> I am not sure when  -o "dax=inode"  is used as a default? If user
>>>>> specifies, "-o dax" then it is equal to "-o dax=always", otherwise
>>>>> user will explicitly specify "-o dax=always/never/inode". So when
>>>>> is dax=inode is used as default?
>>>>
>>>> That means when neither '-o dax' nor '-o dax=always/never/inode' is
>>>> specified, it is actually equal to '-o dax=inode', which is also how
>>>> per-file DAX on ext4/xfs works.
>>>>
>>>> This default behaviour for local filesystem, e.g. ext4/xfs, may be
>>>> straightforward, since the disk inode will be read into memory during
>>>> the inode instantiation, and checking for persistent inode attribute
>>>> shall be realatively cheap, except that the default behaviour has
>>>> changed from 'dax=never' to 'dax=inode'.
>>>
>>> Interesting that ext4/xfs allowed for this behavior change.
>>>
>>>>
>>>> Come back to virtiofs, when neither '-o dax' nor '-o
>>>> dax=always/never/inode' is specified, and it actually behaves as '-o
>>>> dax=inode', as long as '-o dax=server/attr' option is not specified for
>>>> virtiofsd, virtiofsd will always clear FUSE_ATTR_DAX and thus guest will
>>>> always disable DAX. IOWs, the guest virtiofs atually behaves as '-o
>>>> dax=never' when neither '-o dax' nor '-o dax=always/never/inode' is
>>>> specified, and '-o dax=server/attr' option is not specified for virtiofsd.
>>>>
>>>> But I'm okay if we need to change the default behaviour for virtiofs.
>>>
>>> This is change of behavior from client's perspective. Even if client
>>> did not opt-in for DAX, DAX can be enabled based on server's setting.
>>> Not that there is anything wrong with it, but change of behavior part
>>> concerns me.
>>>
>>> In case of virtiofs, lot of features we are controlling from server.
>>> Client typically just calls "mount" and there are not many options
>>> users can specify for mount.  
>>>
>>> Given we already allowed to make client a choice about DAX behavior,
>>> I will feel more comfortable that we don't change it and let client
>>> request a specific DAX mode and if client does not specify anything,
>>> then DAX is not enabled.
>>>
>>
>> Hi Vivek,
>>
>> How about the following design about the default behavior to move this
>> patchset forward, considering the discussion on another thread [1]?
>>
>> - guest side: '-o dax=inode' acts as the default, keeping consistent
>> with xfs/ext4
> 
> This sounds good.
> 
>> - virtiofsd: the default behavior is like, neither file size based
>> policy ('dax=server') nor persistent inode flags based policy
>> ('dax=attr') is used, though virtiofsd indeed advertises that
>> it supports per inode DAX feature (so that it won't fail FUSE_INIT
>> negotiation phase when guest advertises dax=inode by default)... In
>> fact, it acts like 'dax=never' in this case.
> 
> Not sure why virtiofsd needs to advertise that it supports per inode
> DAX even if no per inode dax policy is in affect. Guest will know that
> server is not supporting per inode DAX. But it will not return an
> error to user space (because dax=inode seems to be advisory).
> 
> IOW, this is very similar to the case of using dax=inode on a block
> device which does not support DAX. No errors and no warnings.

OK. I will adopt this behavior. That is, if virtiofsd is not specified
with 'dax=server|attr' option, it won't advertise support for per inode
DAX in FUSE_INIT either. And then client will fallback to 'dax=never'
even if it is mounted with 'dax=inode'.

> 
>>
>> Then when guest opts-in and specifies '-o dax=inode' manually, then it
>> shall realize that proper configuration for virtiofsd is also needed (-o
>> dax=server|attr).
> 
> I gave some comments w.r.t dax=server naming in your posting. Not sure if
> you got a chance to look at it.
> 
> Thanks
> Vivek
> 
>>
>> [1] https://www.spinics.net/lists/linux-xfs/msg56642.html
>>
>> -- 
>> Thanks,
>> Jeffle
>>
diff mbox series

Patch

diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index 1eb6538bf1b2..4c6c64efc950 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -1284,11 +1284,14 @@  static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
 	return ret;
 }
 
-int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev)
+int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode dax_mode,
+			struct dax_device *dax_dev)
 {
 	struct fuse_conn_dax *fcd;
 	int err;
 
+	fc->dax_mode = dax_mode;
+
 	if (!dax_dev)
 		return 0;
 
@@ -1335,11 +1338,21 @@  static const struct address_space_operations fuse_dax_file_aops  = {
 static bool fuse_should_enable_dax(struct inode *inode)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
+	unsigned int dax_mode = fc->dax_mode;
+
+	if (dax_mode == FUSE_DAX_NEVER)
+		return false;
 
-	if (fc->dax)
+	/*
+	 * If 'dax=always/inode', fc->dax couldn't be NULL even when fuse
+	 * daemon doesn't support DAX, since the mount routine will fail
+	 * early in this case.
+	 */
+	if (dax_mode == FUSE_DAX_ALWAYS)
 		return true;
 
-	return false;
+	/* dax_mode == FUSE_DAX_INODE */
+	return true;
 }
 
 void fuse_dax_inode_init(struct inode *inode)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 319596df5dc6..5abf9749923f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -480,6 +480,12 @@  struct fuse_dev {
 	struct list_head entry;
 };
 
+enum fuse_dax_mode {
+	FUSE_DAX_INODE,
+	FUSE_DAX_ALWAYS,
+	FUSE_DAX_NEVER,
+};
+
 struct fuse_fs_context {
 	int fd;
 	struct file *file;
@@ -497,7 +503,7 @@  struct fuse_fs_context {
 	bool no_control:1;
 	bool no_force_umount:1;
 	bool legacy_opts_show:1;
-	bool dax:1;
+	enum fuse_dax_mode dax_mode;
 	unsigned int max_read;
 	unsigned int blksize;
 	const char *subtype;
@@ -802,6 +808,9 @@  struct fuse_conn {
 	struct list_head devices;
 
 #ifdef CONFIG_FUSE_DAX
+	/* dax mode: FUSE_DAX_* (always, never or per-file) */
+	enum fuse_dax_mode dax_mode;
+
 	/* Dax specific conn data, non-NULL if DAX is enabled */
 	struct fuse_conn_dax *dax;
 #endif
@@ -1255,7 +1264,8 @@  ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to);
 ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from);
 int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma);
 int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end);
-int fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev);
+int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode mode,
+			struct dax_device *dax_dev);
 void fuse_dax_conn_free(struct fuse_conn *fc);
 bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi);
 void fuse_dax_inode_init(struct inode *inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 36cd03114b6d..b4b41683e97e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -742,8 +742,12 @@  static int fuse_show_options(struct seq_file *m, struct dentry *root)
 			seq_printf(m, ",blksize=%lu", sb->s_blocksize);
 	}
 #ifdef CONFIG_FUSE_DAX
-	if (fc->dax)
-		seq_puts(m, ",dax");
+	if (fc->dax_mode == FUSE_DAX_ALWAYS)
+		seq_puts(m, ",dax=always");
+	else if (fc->dax_mode == FUSE_DAX_NEVER)
+		seq_puts(m, ",dax=never");
+	else if (fc->dax_mode == FUSE_DAX_INODE)
+		seq_puts(m, ",dax=inode");
 #endif
 
 	return 0;
@@ -1493,7 +1497,7 @@  int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
 	sb->s_subtype = ctx->subtype;
 	ctx->subtype = NULL;
 	if (IS_ENABLED(CONFIG_FUSE_DAX)) {
-		err = fuse_dax_conn_alloc(fc, ctx->dax_dev);
+		err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev);
 		if (err)
 			goto err;
 	}
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 0ad89c6629d7..58cfbaeb4a7d 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -88,12 +88,21 @@  struct virtio_fs_req_work {
 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
 				 struct fuse_req *req, bool in_flight);
 
+static const struct constant_table dax_param_enums[] = {
+	{"inode",	FUSE_DAX_INODE },
+	{"always",	FUSE_DAX_ALWAYS },
+	{"never",	FUSE_DAX_NEVER },
+	{}
+};
+
 enum {
 	OPT_DAX,
+	OPT_DAX_ENUM,
 };
 
 static const struct fs_parameter_spec virtio_fs_parameters[] = {
 	fsparam_flag("dax", OPT_DAX),
+	fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums),
 	{}
 };
 
@@ -110,7 +119,10 @@  static int virtio_fs_parse_param(struct fs_context *fsc,
 
 	switch (opt) {
 	case OPT_DAX:
-		ctx->dax = 1;
+		ctx->dax_mode = FUSE_DAX_ALWAYS;
+		break;
+	case OPT_DAX_ENUM:
+		ctx->dax_mode = result.uint_32;
 		break;
 	default:
 		return -EINVAL;
@@ -1326,7 +1338,7 @@  static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
 
 	/* virtiofs allocates and installs its own fuse devices */
 	ctx->fudptr = NULL;
-	if (ctx->dax) {
+	if (ctx->dax_mode != FUSE_DAX_NEVER) {
 		if (!fs->dax_dev) {
 			err = -EINVAL;
 			pr_err("virtio-fs: dax can't be enabled as filesystem"