diff mbox series

[virtiofsd,v4,3/4] virtiofsd: support per-file DAX negotiation in FUSE_INIT

Message ID 20210817022347.18098-4-jefflexu@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series virtiofsd: support per-file DAX | expand

Commit Message

Jingbo Xu Aug. 17, 2021, 2:23 a.m. UTC
In FUSE_INIT negotiating phase, server/client should advertise if it
supports per-file DAX.

Once advertising support for per-file DAX feature, virtiofsd should
support storing FS_DAX_FL flag persistently passed by
FS_IOC_SETFLAGS/FS_IOC_FSSETXATTR ioctl, and set FUSE_ATTR_DAX in
FUSE_LOOKUP accordingly if the file is capable of per-file DAX.

Currently only ext4/xfs since linux kernel v5.8 support storing
FS_DAX_FL flag persistently, and thus advertise support for per-file
DAX feature only when the backend fs type is ext4 and xfs.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
---
 tools/virtiofsd/fuse_common.h    |  5 +++++
 tools/virtiofsd/fuse_lowlevel.c  |  6 ++++++
 tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++++++++++
 3 files changed, 40 insertions(+)

Comments

Dr. David Alan Gilbert Aug. 17, 2021, 5:15 p.m. UTC | #1
* Jeffle Xu (jefflexu@linux.alibaba.com) wrote:
> In FUSE_INIT negotiating phase, server/client should advertise if it
> supports per-file DAX.
> 
> Once advertising support for per-file DAX feature, virtiofsd should
> support storing FS_DAX_FL flag persistently passed by
> FS_IOC_SETFLAGS/FS_IOC_FSSETXATTR ioctl, and set FUSE_ATTR_DAX in
> FUSE_LOOKUP accordingly if the file is capable of per-file DAX.
> 
> Currently only ext4/xfs since linux kernel v5.8 support storing
> FS_DAX_FL flag persistently, and thus advertise support for per-file
> DAX feature only when the backend fs type is ext4 and xfs.

I'm a little worried about the meaning of the flags we're storing and
the fact we're storing them in the normal host DAX flags.

Doesn't this mean that we're using a single host flag to mean:
  a) It can be mapped as DAX on the host if it was a real DAX device
  b) We can map it as DAX inside the guest with virtiofs?

what happens when we're using usernamespaces for the guest?

Dave


> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> ---
>  tools/virtiofsd/fuse_common.h    |  5 +++++
>  tools/virtiofsd/fuse_lowlevel.c  |  6 ++++++
>  tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++++++++++
>  3 files changed, 40 insertions(+)
> 
> diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
> index 8a75729be9..ee6fc64c23 100644
> --- a/tools/virtiofsd/fuse_common.h
> +++ b/tools/virtiofsd/fuse_common.h
> @@ -372,6 +372,11 @@ struct fuse_file_info {
>   */
>  #define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28)
>  
> +/**
> + * Indicates support for per-file DAX.
> + */
> +#define FUSE_CAP_PERFILE_DAX (1 << 29)
> +
>  /**
>   * Ioctl flags
>   *
> diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
> index 50fc5c8d5a..04a4f17423 100644
> --- a/tools/virtiofsd/fuse_lowlevel.c
> +++ b/tools/virtiofsd/fuse_lowlevel.c
> @@ -2065,6 +2065,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
>      if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
>          se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2;
>      }
> +    if (arg->flags & FUSE_PERFILE_DAX) {
> +        se->conn.capable |= FUSE_CAP_PERFILE_DAX;
> +    }
>  #ifdef HAVE_SPLICE
>  #ifdef HAVE_VMSPLICE
>      se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
> @@ -2180,6 +2183,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
>      if (se->conn.want & FUSE_CAP_POSIX_ACL) {
>          outarg.flags |= FUSE_POSIX_ACL;
>      }
> +    if (se->op.ioctl && (se->conn.want & FUSE_CAP_PERFILE_DAX)) {
> +        outarg.flags |= FUSE_PERFILE_DAX;
> +    }
>      outarg.max_readahead = se->conn.max_readahead;
>      outarg.max_write = se->conn.max_write;
>      if (se->conn.max_background >= (1 << 16)) {
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index e170b17adb..5b6228210f 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -53,8 +53,10 @@
>  #include <sys/syscall.h>
>  #include <sys/wait.h>
>  #include <sys/xattr.h>
> +#include <sys/vfs.h>
>  #include <syslog.h>
>  #include <linux/fs.h>
> +#include <linux/magic.h>
>  
>  #include "qemu/cutils.h"
>  #include "passthrough_helpers.h"
> @@ -136,6 +138,13 @@ enum {
>      SANDBOX_CHROOT,
>  };
>  
> +/* capability of storing DAX flag persistently */
> +enum {
> +    DAX_CAP_NONE,  /* not supported */
> +    DAX_CAP_FLAGS, /* stored in flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */
> +    DAX_CAP_XATTR, /* stored in xflags (FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR) */
> +};
> +
>  typedef struct xattr_map_entry {
>      char *key;
>      char *prepend;
> @@ -161,6 +170,7 @@ struct lo_data {
>      int readdirplus_clear;
>      int allow_direct_io;
>      int announce_submounts;
> +    int perfile_dax_cap; /* capability of backend fs */
>      bool use_statx;
>      struct lo_inode root;
>      GHashTable *inodes; /* protected by lo->mutex */
> @@ -703,6 +713,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
>          conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;
>          lo->killpriv_v2 = 0;
>      }
> +
> +    if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) {
> +        conn->want |= FUSE_CAP_PERFILE_DAX;
> +    }
>  }
>  
>  static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
> @@ -3800,6 +3814,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
>      int fd, res;
>      struct stat stat;
>      uint64_t mnt_id;
> +    struct statfs statfs;
>  
>      fd = open("/", O_PATH);
>      if (fd == -1) {
> @@ -3826,6 +3841,20 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
>          root->posix_locks = g_hash_table_new_full(
>              g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
>      }
> +
> +    /*
> +     * Currently only ext4/xfs since linux kernel v5.8 support storing
> +     * FS_DAX_FL flag persistently. Ext4 accesses this flag through
> +     * FS_IOC_G[S]ETFLAGS ioctl, while xfs accesses this flag through
> +     * FS_IOC_FSG[S]ETXATTR ioctl.
> +     */
> +    res = fstatfs(fd, &statfs);
> +    if (!res) {
> +	if (statfs.f_type == EXT4_SUPER_MAGIC)
> +	    lo->perfile_dax_cap = DAX_CAP_FLAGS;
> +	else if (statfs.f_type == XFS_SUPER_MAGIC)
> +	    lo->perfile_dax_cap = DAX_CAP_XATTR;
> +    }
>  }
>  
>  static guint lo_key_hash(gconstpointer key)
> -- 
> 2.27.0
> 
> _______________________________________________
> Virtio-fs mailing list
> Virtio-fs@redhat.com
> https://listman.redhat.com/mailman/listinfo/virtio-fs
>
Jingbo Xu Aug. 18, 2021, 5:28 a.m. UTC | #2
On 8/18/21 1:15 AM, Dr. David Alan Gilbert wrote:
> * Jeffle Xu (jefflexu@linux.alibaba.com) wrote:
>> In FUSE_INIT negotiating phase, server/client should advertise if it
>> supports per-file DAX.
>>
>> Once advertising support for per-file DAX feature, virtiofsd should
>> support storing FS_DAX_FL flag persistently passed by
>> FS_IOC_SETFLAGS/FS_IOC_FSSETXATTR ioctl, and set FUSE_ATTR_DAX in
>> FUSE_LOOKUP accordingly if the file is capable of per-file DAX.
>>
>> Currently only ext4/xfs since linux kernel v5.8 support storing
>> FS_DAX_FL flag persistently, and thus advertise support for per-file
>> DAX feature only when the backend fs type is ext4 and xfs.
> 
> I'm a little worried about the meaning of the flags we're storing and
> the fact we're storing them in the normal host DAX flags.
> 
> Doesn't this mean that we're using a single host flag to mean:
>   a) It can be mapped as DAX on the host if it was a real DAX device
>   b) We can map it as DAX inside the guest with virtiofs?

Yes the side effect is that the host file is also dax enabled if the
backend fs is built upon real nvdimm device.

The rationale here is that, fuse daemon shall be capable of *marking*
the file as dax capable *persistently*, so that it can be informed that
this file is capable of dax later.

I'm not sure if xattr (extent attribute) is a better option for this?


> 
> what happens when we're using usernamespaces for the guest?
> 
> Dave
> 
> 
>> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
>> ---
>>  tools/virtiofsd/fuse_common.h    |  5 +++++
>>  tools/virtiofsd/fuse_lowlevel.c  |  6 ++++++
>>  tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++++++++++
>>  3 files changed, 40 insertions(+)
>>
>> diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
>> index 8a75729be9..ee6fc64c23 100644
>> --- a/tools/virtiofsd/fuse_common.h
>> +++ b/tools/virtiofsd/fuse_common.h
>> @@ -372,6 +372,11 @@ struct fuse_file_info {
>>   */
>>  #define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28)
>>  
>> +/**
>> + * Indicates support for per-file DAX.
>> + */
>> +#define FUSE_CAP_PERFILE_DAX (1 << 29)
>> +
>>  /**
>>   * Ioctl flags
>>   *
>> diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
>> index 50fc5c8d5a..04a4f17423 100644
>> --- a/tools/virtiofsd/fuse_lowlevel.c
>> +++ b/tools/virtiofsd/fuse_lowlevel.c
>> @@ -2065,6 +2065,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
>>      if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
>>          se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2;
>>      }
>> +    if (arg->flags & FUSE_PERFILE_DAX) {
>> +        se->conn.capable |= FUSE_CAP_PERFILE_DAX;
>> +    }
>>  #ifdef HAVE_SPLICE
>>  #ifdef HAVE_VMSPLICE
>>      se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
>> @@ -2180,6 +2183,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
>>      if (se->conn.want & FUSE_CAP_POSIX_ACL) {
>>          outarg.flags |= FUSE_POSIX_ACL;
>>      }
>> +    if (se->op.ioctl && (se->conn.want & FUSE_CAP_PERFILE_DAX)) {
>> +        outarg.flags |= FUSE_PERFILE_DAX;
>> +    }
>>      outarg.max_readahead = se->conn.max_readahead;
>>      outarg.max_write = se->conn.max_write;
>>      if (se->conn.max_background >= (1 << 16)) {
>> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
>> index e170b17adb..5b6228210f 100644
>> --- a/tools/virtiofsd/passthrough_ll.c
>> +++ b/tools/virtiofsd/passthrough_ll.c
>> @@ -53,8 +53,10 @@
>>  #include <sys/syscall.h>
>>  #include <sys/wait.h>
>>  #include <sys/xattr.h>
>> +#include <sys/vfs.h>
>>  #include <syslog.h>
>>  #include <linux/fs.h>
>> +#include <linux/magic.h>
>>  
>>  #include "qemu/cutils.h"
>>  #include "passthrough_helpers.h"
>> @@ -136,6 +138,13 @@ enum {
>>      SANDBOX_CHROOT,
>>  };
>>  
>> +/* capability of storing DAX flag persistently */
>> +enum {
>> +    DAX_CAP_NONE,  /* not supported */
>> +    DAX_CAP_FLAGS, /* stored in flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */
>> +    DAX_CAP_XATTR, /* stored in xflags (FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR) */
>> +};
>> +
>>  typedef struct xattr_map_entry {
>>      char *key;
>>      char *prepend;
>> @@ -161,6 +170,7 @@ struct lo_data {
>>      int readdirplus_clear;
>>      int allow_direct_io;
>>      int announce_submounts;
>> +    int perfile_dax_cap; /* capability of backend fs */
>>      bool use_statx;
>>      struct lo_inode root;
>>      GHashTable *inodes; /* protected by lo->mutex */
>> @@ -703,6 +713,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
>>          conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;
>>          lo->killpriv_v2 = 0;
>>      }
>> +
>> +    if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) {
>> +        conn->want |= FUSE_CAP_PERFILE_DAX;
>> +    }
>>  }
>>  
>>  static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
>> @@ -3800,6 +3814,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
>>      int fd, res;
>>      struct stat stat;
>>      uint64_t mnt_id;
>> +    struct statfs statfs;
>>  
>>      fd = open("/", O_PATH);
>>      if (fd == -1) {
>> @@ -3826,6 +3841,20 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
>>          root->posix_locks = g_hash_table_new_full(
>>              g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
>>      }
>> +
>> +    /*
>> +     * Currently only ext4/xfs since linux kernel v5.8 support storing
>> +     * FS_DAX_FL flag persistently. Ext4 accesses this flag through
>> +     * FS_IOC_G[S]ETFLAGS ioctl, while xfs accesses this flag through
>> +     * FS_IOC_FSG[S]ETXATTR ioctl.
>> +     */
>> +    res = fstatfs(fd, &statfs);
>> +    if (!res) {
>> +	if (statfs.f_type == EXT4_SUPER_MAGIC)
>> +	    lo->perfile_dax_cap = DAX_CAP_FLAGS;
>> +	else if (statfs.f_type == XFS_SUPER_MAGIC)
>> +	    lo->perfile_dax_cap = DAX_CAP_XATTR;
>> +    }
>>  }
>>  
>>  static guint lo_key_hash(gconstpointer key)
>> -- 
>> 2.27.0
>>
>> _______________________________________________
>> Virtio-fs mailing list
>> Virtio-fs@redhat.com
>> https://listman.redhat.com/mailman/listinfo/virtio-fs
>>
Vivek Goyal Aug. 18, 2021, 5:30 p.m. UTC | #3
On Tue, Aug 17, 2021 at 06:15:58PM +0100, Dr. David Alan Gilbert wrote:
> * Jeffle Xu (jefflexu@linux.alibaba.com) wrote:
> > In FUSE_INIT negotiating phase, server/client should advertise if it
> > supports per-file DAX.
> > 
> > Once advertising support for per-file DAX feature, virtiofsd should
> > support storing FS_DAX_FL flag persistently passed by
> > FS_IOC_SETFLAGS/FS_IOC_FSSETXATTR ioctl, and set FUSE_ATTR_DAX in
> > FUSE_LOOKUP accordingly if the file is capable of per-file DAX.
> > 
> > Currently only ext4/xfs since linux kernel v5.8 support storing
> > FS_DAX_FL flag persistently, and thus advertise support for per-file
> > DAX feature only when the backend fs type is ext4 and xfs.
> 
> I'm a little worried about the meaning of the flags we're storing and
> the fact we're storing them in the normal host DAX flags.
> 
> Doesn't this mean that we're using a single host flag to mean:
>   a) It can be mapped as DAX on the host if it was a real DAX device
>   b) We can map it as DAX inside the guest with virtiofs?

That's how passthrough filesystem is. Every attribute is passthrough.
So if guest sets something, host sees it same way. (file uid/gid, 
file mode bits, xattrs etc.). Only exception now seems to be remapping
of xattrs if users choses to do so.

> 
> what happens when we're using usernamespaces for the guest?

I don't think file attrs are namespaced. So if virtiofsd has permission
do to so, it will be just able to set attrs on file.

Vivek

> 
> Dave
> 
> 
> > Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> > ---
> >  tools/virtiofsd/fuse_common.h    |  5 +++++
> >  tools/virtiofsd/fuse_lowlevel.c  |  6 ++++++
> >  tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++++++++++
> >  3 files changed, 40 insertions(+)
> > 
> > diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
> > index 8a75729be9..ee6fc64c23 100644
> > --- a/tools/virtiofsd/fuse_common.h
> > +++ b/tools/virtiofsd/fuse_common.h
> > @@ -372,6 +372,11 @@ struct fuse_file_info {
> >   */
> >  #define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28)
> >  
> > +/**
> > + * Indicates support for per-file DAX.
> > + */
> > +#define FUSE_CAP_PERFILE_DAX (1 << 29)
> > +
> >  /**
> >   * Ioctl flags
> >   *
> > diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
> > index 50fc5c8d5a..04a4f17423 100644
> > --- a/tools/virtiofsd/fuse_lowlevel.c
> > +++ b/tools/virtiofsd/fuse_lowlevel.c
> > @@ -2065,6 +2065,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
> >      if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
> >          se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2;
> >      }
> > +    if (arg->flags & FUSE_PERFILE_DAX) {
> > +        se->conn.capable |= FUSE_CAP_PERFILE_DAX;
> > +    }
> >  #ifdef HAVE_SPLICE
> >  #ifdef HAVE_VMSPLICE
> >      se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
> > @@ -2180,6 +2183,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
> >      if (se->conn.want & FUSE_CAP_POSIX_ACL) {
> >          outarg.flags |= FUSE_POSIX_ACL;
> >      }
> > +    if (se->op.ioctl && (se->conn.want & FUSE_CAP_PERFILE_DAX)) {
> > +        outarg.flags |= FUSE_PERFILE_DAX;
> > +    }
> >      outarg.max_readahead = se->conn.max_readahead;
> >      outarg.max_write = se->conn.max_write;
> >      if (se->conn.max_background >= (1 << 16)) {
> > diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> > index e170b17adb..5b6228210f 100644
> > --- a/tools/virtiofsd/passthrough_ll.c
> > +++ b/tools/virtiofsd/passthrough_ll.c
> > @@ -53,8 +53,10 @@
> >  #include <sys/syscall.h>
> >  #include <sys/wait.h>
> >  #include <sys/xattr.h>
> > +#include <sys/vfs.h>
> >  #include <syslog.h>
> >  #include <linux/fs.h>
> > +#include <linux/magic.h>
> >  
> >  #include "qemu/cutils.h"
> >  #include "passthrough_helpers.h"
> > @@ -136,6 +138,13 @@ enum {
> >      SANDBOX_CHROOT,
> >  };
> >  
> > +/* capability of storing DAX flag persistently */
> > +enum {
> > +    DAX_CAP_NONE,  /* not supported */
> > +    DAX_CAP_FLAGS, /* stored in flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */
> > +    DAX_CAP_XATTR, /* stored in xflags (FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR) */
> > +};
> > +
> >  typedef struct xattr_map_entry {
> >      char *key;
> >      char *prepend;
> > @@ -161,6 +170,7 @@ struct lo_data {
> >      int readdirplus_clear;
> >      int allow_direct_io;
> >      int announce_submounts;
> > +    int perfile_dax_cap; /* capability of backend fs */
> >      bool use_statx;
> >      struct lo_inode root;
> >      GHashTable *inodes; /* protected by lo->mutex */
> > @@ -703,6 +713,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
> >          conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;
> >          lo->killpriv_v2 = 0;
> >      }
> > +
> > +    if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) {
> > +        conn->want |= FUSE_CAP_PERFILE_DAX;
> > +    }
> >  }
> >  
> >  static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
> > @@ -3800,6 +3814,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
> >      int fd, res;
> >      struct stat stat;
> >      uint64_t mnt_id;
> > +    struct statfs statfs;
> >  
> >      fd = open("/", O_PATH);
> >      if (fd == -1) {
> > @@ -3826,6 +3841,20 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
> >          root->posix_locks = g_hash_table_new_full(
> >              g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
> >      }
> > +
> > +    /*
> > +     * Currently only ext4/xfs since linux kernel v5.8 support storing
> > +     * FS_DAX_FL flag persistently. Ext4 accesses this flag through
> > +     * FS_IOC_G[S]ETFLAGS ioctl, while xfs accesses this flag through
> > +     * FS_IOC_FSG[S]ETXATTR ioctl.
> > +     */
> > +    res = fstatfs(fd, &statfs);
> > +    if (!res) {
> > +	if (statfs.f_type == EXT4_SUPER_MAGIC)
> > +	    lo->perfile_dax_cap = DAX_CAP_FLAGS;
> > +	else if (statfs.f_type == XFS_SUPER_MAGIC)
> > +	    lo->perfile_dax_cap = DAX_CAP_XATTR;
> > +    }
> >  }
> >  
> >  static guint lo_key_hash(gconstpointer key)
> > -- 
> > 2.27.0
> > 
> > _______________________________________________
> > Virtio-fs mailing list
> > Virtio-fs@redhat.com
> > https://listman.redhat.com/mailman/listinfo/virtio-fs
> > 
> -- 
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
Dr. David Alan Gilbert Aug. 19, 2021, 1:57 p.m. UTC | #4
* JeffleXu (jefflexu@linux.alibaba.com) wrote:
> 
> 
> On 8/18/21 1:15 AM, Dr. David Alan Gilbert wrote:
> > * Jeffle Xu (jefflexu@linux.alibaba.com) wrote:
> >> In FUSE_INIT negotiating phase, server/client should advertise if it
> >> supports per-file DAX.
> >>
> >> Once advertising support for per-file DAX feature, virtiofsd should
> >> support storing FS_DAX_FL flag persistently passed by
> >> FS_IOC_SETFLAGS/FS_IOC_FSSETXATTR ioctl, and set FUSE_ATTR_DAX in
> >> FUSE_LOOKUP accordingly if the file is capable of per-file DAX.
> >>
> >> Currently only ext4/xfs since linux kernel v5.8 support storing
> >> FS_DAX_FL flag persistently, and thus advertise support for per-file
> >> DAX feature only when the backend fs type is ext4 and xfs.
> > 
> > I'm a little worried about the meaning of the flags we're storing and
> > the fact we're storing them in the normal host DAX flags.
> > 
> > Doesn't this mean that we're using a single host flag to mean:
> >   a) It can be mapped as DAX on the host if it was a real DAX device
> >   b) We can map it as DAX inside the guest with virtiofs?
> 
> Yes the side effect is that the host file is also dax enabled if the
> backend fs is built upon real nvdimm device.
> 
> The rationale here is that, fuse daemon shall be capable of *marking*
> the file as dax capable *persistently*, so that it can be informed that
> this file is capable of dax later.

Right, so my worry here is that the untrusted guest changes both it's
own behaviour (fine) and also the behaviour of the host (less fine).

> I'm not sure if xattr (extent attribute) is a better option for this?

Well, if you used an xattr for it, it wouldn't clash with whatever the
host did (especially if it used the xattr mapping).

Dave

> 
> > 
> > what happens when we're using usernamespaces for the guest?
> > 
> > Dave
> > 
> > 
> >> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> >> ---
> >>  tools/virtiofsd/fuse_common.h    |  5 +++++
> >>  tools/virtiofsd/fuse_lowlevel.c  |  6 ++++++
> >>  tools/virtiofsd/passthrough_ll.c | 29 +++++++++++++++++++++++++++++
> >>  3 files changed, 40 insertions(+)
> >>
> >> diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
> >> index 8a75729be9..ee6fc64c23 100644
> >> --- a/tools/virtiofsd/fuse_common.h
> >> +++ b/tools/virtiofsd/fuse_common.h
> >> @@ -372,6 +372,11 @@ struct fuse_file_info {
> >>   */
> >>  #define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28)
> >>  
> >> +/**
> >> + * Indicates support for per-file DAX.
> >> + */
> >> +#define FUSE_CAP_PERFILE_DAX (1 << 29)
> >> +
> >>  /**
> >>   * Ioctl flags
> >>   *
> >> diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
> >> index 50fc5c8d5a..04a4f17423 100644
> >> --- a/tools/virtiofsd/fuse_lowlevel.c
> >> +++ b/tools/virtiofsd/fuse_lowlevel.c
> >> @@ -2065,6 +2065,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
> >>      if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
> >>          se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2;
> >>      }
> >> +    if (arg->flags & FUSE_PERFILE_DAX) {
> >> +        se->conn.capable |= FUSE_CAP_PERFILE_DAX;
> >> +    }
> >>  #ifdef HAVE_SPLICE
> >>  #ifdef HAVE_VMSPLICE
> >>      se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
> >> @@ -2180,6 +2183,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
> >>      if (se->conn.want & FUSE_CAP_POSIX_ACL) {
> >>          outarg.flags |= FUSE_POSIX_ACL;
> >>      }
> >> +    if (se->op.ioctl && (se->conn.want & FUSE_CAP_PERFILE_DAX)) {
> >> +        outarg.flags |= FUSE_PERFILE_DAX;
> >> +    }
> >>      outarg.max_readahead = se->conn.max_readahead;
> >>      outarg.max_write = se->conn.max_write;
> >>      if (se->conn.max_background >= (1 << 16)) {
> >> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> >> index e170b17adb..5b6228210f 100644
> >> --- a/tools/virtiofsd/passthrough_ll.c
> >> +++ b/tools/virtiofsd/passthrough_ll.c
> >> @@ -53,8 +53,10 @@
> >>  #include <sys/syscall.h>
> >>  #include <sys/wait.h>
> >>  #include <sys/xattr.h>
> >> +#include <sys/vfs.h>
> >>  #include <syslog.h>
> >>  #include <linux/fs.h>
> >> +#include <linux/magic.h>
> >>  
> >>  #include "qemu/cutils.h"
> >>  #include "passthrough_helpers.h"
> >> @@ -136,6 +138,13 @@ enum {
> >>      SANDBOX_CHROOT,
> >>  };
> >>  
> >> +/* capability of storing DAX flag persistently */
> >> +enum {
> >> +    DAX_CAP_NONE,  /* not supported */
> >> +    DAX_CAP_FLAGS, /* stored in flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */
> >> +    DAX_CAP_XATTR, /* stored in xflags (FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR) */
> >> +};
> >> +
> >>  typedef struct xattr_map_entry {
> >>      char *key;
> >>      char *prepend;
> >> @@ -161,6 +170,7 @@ struct lo_data {
> >>      int readdirplus_clear;
> >>      int allow_direct_io;
> >>      int announce_submounts;
> >> +    int perfile_dax_cap; /* capability of backend fs */
> >>      bool use_statx;
> >>      struct lo_inode root;
> >>      GHashTable *inodes; /* protected by lo->mutex */
> >> @@ -703,6 +713,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
> >>          conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;
> >>          lo->killpriv_v2 = 0;
> >>      }
> >> +
> >> +    if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) {
> >> +        conn->want |= FUSE_CAP_PERFILE_DAX;
> >> +    }
> >>  }
> >>  
> >>  static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
> >> @@ -3800,6 +3814,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
> >>      int fd, res;
> >>      struct stat stat;
> >>      uint64_t mnt_id;
> >> +    struct statfs statfs;
> >>  
> >>      fd = open("/", O_PATH);
> >>      if (fd == -1) {
> >> @@ -3826,6 +3841,20 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root)
> >>          root->posix_locks = g_hash_table_new_full(
> >>              g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
> >>      }
> >> +
> >> +    /*
> >> +     * Currently only ext4/xfs since linux kernel v5.8 support storing
> >> +     * FS_DAX_FL flag persistently. Ext4 accesses this flag through
> >> +     * FS_IOC_G[S]ETFLAGS ioctl, while xfs accesses this flag through
> >> +     * FS_IOC_FSG[S]ETXATTR ioctl.
> >> +     */
> >> +    res = fstatfs(fd, &statfs);
> >> +    if (!res) {
> >> +	if (statfs.f_type == EXT4_SUPER_MAGIC)
> >> +	    lo->perfile_dax_cap = DAX_CAP_FLAGS;
> >> +	else if (statfs.f_type == XFS_SUPER_MAGIC)
> >> +	    lo->perfile_dax_cap = DAX_CAP_XATTR;
> >> +    }
> >>  }
> >>  
> >>  static guint lo_key_hash(gconstpointer key)
> >> -- 
> >> 2.27.0
> >>
> >> _______________________________________________
> >> Virtio-fs mailing list
> >> Virtio-fs@redhat.com
> >> https://listman.redhat.com/mailman/listinfo/virtio-fs
> >>
> 
> -- 
> Thanks,
> Jeffle
>
diff mbox series

Patch

diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
index 8a75729be9..ee6fc64c23 100644
--- a/tools/virtiofsd/fuse_common.h
+++ b/tools/virtiofsd/fuse_common.h
@@ -372,6 +372,11 @@  struct fuse_file_info {
  */
 #define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28)
 
+/**
+ * Indicates support for per-file DAX.
+ */
+#define FUSE_CAP_PERFILE_DAX (1 << 29)
+
 /**
  * Ioctl flags
  *
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 50fc5c8d5a..04a4f17423 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -2065,6 +2065,9 @@  static void do_init(fuse_req_t req, fuse_ino_t nodeid,
     if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
         se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2;
     }
+    if (arg->flags & FUSE_PERFILE_DAX) {
+        se->conn.capable |= FUSE_CAP_PERFILE_DAX;
+    }
 #ifdef HAVE_SPLICE
 #ifdef HAVE_VMSPLICE
     se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
@@ -2180,6 +2183,9 @@  static void do_init(fuse_req_t req, fuse_ino_t nodeid,
     if (se->conn.want & FUSE_CAP_POSIX_ACL) {
         outarg.flags |= FUSE_POSIX_ACL;
     }
+    if (se->op.ioctl && (se->conn.want & FUSE_CAP_PERFILE_DAX)) {
+        outarg.flags |= FUSE_PERFILE_DAX;
+    }
     outarg.max_readahead = se->conn.max_readahead;
     outarg.max_write = se->conn.max_write;
     if (se->conn.max_background >= (1 << 16)) {
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index e170b17adb..5b6228210f 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -53,8 +53,10 @@ 
 #include <sys/syscall.h>
 #include <sys/wait.h>
 #include <sys/xattr.h>
+#include <sys/vfs.h>
 #include <syslog.h>
 #include <linux/fs.h>
+#include <linux/magic.h>
 
 #include "qemu/cutils.h"
 #include "passthrough_helpers.h"
@@ -136,6 +138,13 @@  enum {
     SANDBOX_CHROOT,
 };
 
+/* capability of storing DAX flag persistently */
+enum {
+    DAX_CAP_NONE,  /* not supported */
+    DAX_CAP_FLAGS, /* stored in flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */
+    DAX_CAP_XATTR, /* stored in xflags (FS_IOC_FSGETXATTR/FS_IOC_FSSETXATTR) */
+};
+
 typedef struct xattr_map_entry {
     char *key;
     char *prepend;
@@ -161,6 +170,7 @@  struct lo_data {
     int readdirplus_clear;
     int allow_direct_io;
     int announce_submounts;
+    int perfile_dax_cap; /* capability of backend fs */
     bool use_statx;
     struct lo_inode root;
     GHashTable *inodes; /* protected by lo->mutex */
@@ -703,6 +713,10 @@  static void lo_init(void *userdata, struct fuse_conn_info *conn)
         conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;
         lo->killpriv_v2 = 0;
     }
+
+    if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) {
+        conn->want |= FUSE_CAP_PERFILE_DAX;
+    }
 }
 
 static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
@@ -3800,6 +3814,7 @@  static void setup_root(struct lo_data *lo, struct lo_inode *root)
     int fd, res;
     struct stat stat;
     uint64_t mnt_id;
+    struct statfs statfs;
 
     fd = open("/", O_PATH);
     if (fd == -1) {
@@ -3826,6 +3841,20 @@  static void setup_root(struct lo_data *lo, struct lo_inode *root)
         root->posix_locks = g_hash_table_new_full(
             g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy);
     }
+
+    /*
+     * Currently only ext4/xfs since linux kernel v5.8 support storing
+     * FS_DAX_FL flag persistently. Ext4 accesses this flag through
+     * FS_IOC_G[S]ETFLAGS ioctl, while xfs accesses this flag through
+     * FS_IOC_FSG[S]ETXATTR ioctl.
+     */
+    res = fstatfs(fd, &statfs);
+    if (!res) {
+	if (statfs.f_type == EXT4_SUPER_MAGIC)
+	    lo->perfile_dax_cap = DAX_CAP_FLAGS;
+	else if (statfs.f_type == XFS_SUPER_MAGIC)
+	    lo->perfile_dax_cap = DAX_CAP_XATTR;
+    }
 }
 
 static guint lo_key_hash(gconstpointer key)