diff mbox series

[virtiofsd,v4,4/4] virtiofsd: support per-file DAX in FUSE_LOOKUP

Message ID 20210817022347.18098-5-jefflexu@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series virtiofsd: support per-file DAX | expand

Commit Message

Jingbo Xu Aug. 17, 2021, 2:23 a.m. UTC
For passthrough, when the corresponding virtiofs in guest is mounted
with '-o dax=inode', advertise that the file is capable of per-file
DAX if the inode in the backend fs is marked with FS_DAX_FL flag.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
---
 tools/virtiofsd/passthrough_ll.c | 43 ++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

Comments

Dr. David Alan Gilbert Aug. 17, 2021, 7 p.m. UTC | #1
* Jeffle Xu (jefflexu@linux.alibaba.com) wrote:
> For passthrough, when the corresponding virtiofs in guest is mounted
> with '-o dax=inode', advertise that the file is capable of per-file
> DAX if the inode in the backend fs is marked with FS_DAX_FL flag.
> 
> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> ---
>  tools/virtiofsd/passthrough_ll.c | 43 ++++++++++++++++++++++++++++++++
>  1 file changed, 43 insertions(+)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index 5b6228210f..4cbd904248 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -171,6 +171,7 @@ struct lo_data {
>      int allow_direct_io;
>      int announce_submounts;
>      int perfile_dax_cap; /* capability of backend fs */
> +    bool perfile_dax; /* enable per-file DAX or not */
>      bool use_statx;
>      struct lo_inode root;
>      GHashTable *inodes; /* protected by lo->mutex */
> @@ -716,6 +717,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
>  
>      if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) {
>          conn->want |= FUSE_CAP_PERFILE_DAX;
> +	lo->perfile_dax = 1;
> +    }
> +    else {
> +	lo->perfile_dax = 0;
>      }
>  }
>  
> @@ -983,6 +988,41 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
>      return 0;
>  }
>  
> +/*
> + * If the file is marked with FS_DAX_FL or FS_XFLAG_DAX, then DAX should be
> + * enabled for this file.
> + */
> +static bool lo_should_enable_dax(struct lo_data *lo, struct lo_inode *dir,
> +				 const char *name)
> +{
> +    int res, fd;
> +    int ret = false;;
> +    unsigned int attr;
> +    struct fsxattr xattr;
> +
> +    if (!lo->perfile_dax)
> +	return false;
> +
> +    /* Open file without O_PATH, so that ioctl can be called. */
> +    fd = openat(dir->fd, name, O_NOFOLLOW);
> +    if (fd == -1)
> +        return false;

Doesn't that defeat the whole benefit of using O_PATH - i.e. that we
might stumble into a /dev node or something else we're not allowed to
open?

> +    if (lo->perfile_dax_cap == DAX_CAP_FLAGS) {
> +        res = ioctl(fd, FS_IOC_GETFLAGS, &attr);
> +        if (!res && (attr & FS_DAX_FL))
> +	    ret = true;
> +    }
> +    else if (lo->perfile_dax_cap == DAX_CAP_XATTR) {
> +	res = ioctl(fd, FS_IOC_FSGETXATTR, &xattr);
> +	if (!res && (xattr.fsx_xflags & FS_XFLAG_DAX))
> +	    ret = true;
> +    }

This all looks pretty expensive for each lookup.

Dave


> +    close(fd);
> +    return ret;
> +}
> +
>  /*
>   * Increments nlookup on the inode on success. unref_inode_lolocked() must be
>   * called eventually to decrement nlookup again. If inodep is non-NULL, the
> @@ -1038,6 +1078,9 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
>          e->attr_flags |= FUSE_ATTR_SUBMOUNT;
>      }
>  
> +    if (lo_should_enable_dax(lo, dir, name))
> +	e->attr_flags |= FUSE_ATTR_DAX;
> +
>      inode = lo_find(lo, &e->attr, mnt_id);
>      if (inode) {
>          close(newfd);
> -- 
> 2.27.0
> 
> _______________________________________________
> Virtio-fs mailing list
> Virtio-fs@redhat.com
> https://listman.redhat.com/mailman/listinfo/virtio-fs
>
Jingbo Xu Aug. 18, 2021, 5:46 a.m. UTC | #2
On 8/18/21 3:00 AM, Dr. David Alan Gilbert wrote:
> * Jeffle Xu (jefflexu@linux.alibaba.com) wrote:
>> For passthrough, when the corresponding virtiofs in guest is mounted
>> with '-o dax=inode', advertise that the file is capable of per-file
>> DAX if the inode in the backend fs is marked with FS_DAX_FL flag.
>>
>> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
>> ---
>>  tools/virtiofsd/passthrough_ll.c | 43 ++++++++++++++++++++++++++++++++
>>  1 file changed, 43 insertions(+)
>>
>> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
>> index 5b6228210f..4cbd904248 100644
>> --- a/tools/virtiofsd/passthrough_ll.c
>> +++ b/tools/virtiofsd/passthrough_ll.c
>> @@ -171,6 +171,7 @@ struct lo_data {
>>      int allow_direct_io;
>>      int announce_submounts;
>>      int perfile_dax_cap; /* capability of backend fs */
>> +    bool perfile_dax; /* enable per-file DAX or not */
>>      bool use_statx;
>>      struct lo_inode root;
>>      GHashTable *inodes; /* protected by lo->mutex */
>> @@ -716,6 +717,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
>>  
>>      if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) {
>>          conn->want |= FUSE_CAP_PERFILE_DAX;
>> +	lo->perfile_dax = 1;
>> +    }
>> +    else {
>> +	lo->perfile_dax = 0;
>>      }
>>  }
>>  
>> @@ -983,6 +988,41 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
>>      return 0;
>>  }
>>  
>> +/*
>> + * If the file is marked with FS_DAX_FL or FS_XFLAG_DAX, then DAX should be
>> + * enabled for this file.
>> + */
>> +static bool lo_should_enable_dax(struct lo_data *lo, struct lo_inode *dir,
>> +				 const char *name)
>> +{
>> +    int res, fd;
>> +    int ret = false;;
>> +    unsigned int attr;
>> +    struct fsxattr xattr;
>> +
>> +    if (!lo->perfile_dax)
>> +	return false;
>> +
>> +    /* Open file without O_PATH, so that ioctl can be called. */
>> +    fd = openat(dir->fd, name, O_NOFOLLOW);
>> +    if (fd == -1)
>> +        return false;
> 
> Doesn't that defeat the whole benefit of using O_PATH - i.e. that we
> might stumble into a /dev node or something else we're not allowed to
> open?

As far as I know, virtiofsd will pivot_root/chroot to the source
directory, and can only access files inside the source directory
specified by "-o source=". Then where do these unexpected files come
from? Besides, fd opened without O_PATH here is temporary and used for
FS_IOC_GETFLAGS/FS_IOC_FSGETXATTR ioctl only. It's closed when the
function returns.

> 
>> +    if (lo->perfile_dax_cap == DAX_CAP_FLAGS) {
>> +        res = ioctl(fd, FS_IOC_GETFLAGS, &attr);
>> +        if (!res && (attr & FS_DAX_FL))
>> +	    ret = true;
>> +    }
>> +    else if (lo->perfile_dax_cap == DAX_CAP_XATTR) {
>> +	res = ioctl(fd, FS_IOC_FSGETXATTR, &xattr);
>> +	if (!res && (xattr.fsx_xflags & FS_XFLAG_DAX))
>> +	    ret = true;
>> +    }
> 
> This all looks pretty expensive for each lookup.

Yes. it can be somehow optimized if we can agree on the way of storing
the dax flag persistently.
Dr. David Alan Gilbert Aug. 19, 2021, 1:08 p.m. UTC | #3
* JeffleXu (jefflexu@linux.alibaba.com) wrote:
> 
> 
> On 8/18/21 3:00 AM, Dr. David Alan Gilbert wrote:
> > * Jeffle Xu (jefflexu@linux.alibaba.com) wrote:
> >> For passthrough, when the corresponding virtiofs in guest is mounted
> >> with '-o dax=inode', advertise that the file is capable of per-file
> >> DAX if the inode in the backend fs is marked with FS_DAX_FL flag.
> >>
> >> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> >> ---
> >>  tools/virtiofsd/passthrough_ll.c | 43 ++++++++++++++++++++++++++++++++
> >>  1 file changed, 43 insertions(+)
> >>
> >> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> >> index 5b6228210f..4cbd904248 100644
> >> --- a/tools/virtiofsd/passthrough_ll.c
> >> +++ b/tools/virtiofsd/passthrough_ll.c
> >> @@ -171,6 +171,7 @@ struct lo_data {
> >>      int allow_direct_io;
> >>      int announce_submounts;
> >>      int perfile_dax_cap; /* capability of backend fs */
> >> +    bool perfile_dax; /* enable per-file DAX or not */
> >>      bool use_statx;
> >>      struct lo_inode root;
> >>      GHashTable *inodes; /* protected by lo->mutex */
> >> @@ -716,6 +717,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
> >>  
> >>      if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) {
> >>          conn->want |= FUSE_CAP_PERFILE_DAX;
> >> +	lo->perfile_dax = 1;
> >> +    }
> >> +    else {
> >> +	lo->perfile_dax = 0;
> >>      }
> >>  }
> >>  
> >> @@ -983,6 +988,41 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
> >>      return 0;
> >>  }
> >>  
> >> +/*
> >> + * If the file is marked with FS_DAX_FL or FS_XFLAG_DAX, then DAX should be
> >> + * enabled for this file.
> >> + */
> >> +static bool lo_should_enable_dax(struct lo_data *lo, struct lo_inode *dir,
> >> +				 const char *name)
> >> +{
> >> +    int res, fd;
> >> +    int ret = false;;
> >> +    unsigned int attr;
> >> +    struct fsxattr xattr;
> >> +
> >> +    if (!lo->perfile_dax)
> >> +	return false;
> >> +
> >> +    /* Open file without O_PATH, so that ioctl can be called. */
> >> +    fd = openat(dir->fd, name, O_NOFOLLOW);
> >> +    if (fd == -1)
> >> +        return false;
> > 
> > Doesn't that defeat the whole benefit of using O_PATH - i.e. that we
> > might stumble into a /dev node or something else we're not allowed to
> > open?
> 
> As far as I know, virtiofsd will pivot_root/chroot to the source
> directory, and can only access files inside the source directory
> specified by "-o source=". Then where do these unexpected files come
> from? Besides, fd opened without O_PATH here is temporary and used for
> FS_IOC_GETFLAGS/FS_IOC_FSGETXATTR ioctl only. It's closed when the
> function returns.

The guest is still allowed to mknod.
See:
   https://lists.gnu.org/archive/html/qemu-devel/2021-01/msg05461.html

also it's legal to expose a root filesystem for a guest; the virtiofsd
should *never* open a device other than O_PATH - and it's really tricky
to do a check to see if it is a device in a race-free way.


> > 
> >> +    if (lo->perfile_dax_cap == DAX_CAP_FLAGS) {
> >> +        res = ioctl(fd, FS_IOC_GETFLAGS, &attr);
> >> +        if (!res && (attr & FS_DAX_FL))
> >> +	    ret = true;
> >> +    }
> >> +    else if (lo->perfile_dax_cap == DAX_CAP_XATTR) {
> >> +	res = ioctl(fd, FS_IOC_FSGETXATTR, &xattr);
> >> +	if (!res && (xattr.fsx_xflags & FS_XFLAG_DAX))
> >> +	    ret = true;
> >> +    }
> > 
> > This all looks pretty expensive for each lookup.
> 
> Yes. it can be somehow optimized if we can agree on the way of storing
> the dax flag persistently.

Dave

> -- 
> Thanks,
> Jeffle
>
Jingbo Xu Aug. 20, 2021, 5:03 a.m. UTC | #4
On 8/19/21 9:08 PM, Dr. David Alan Gilbert wrote:
> * JeffleXu (jefflexu@linux.alibaba.com) wrote:
>>
>>
>> On 8/18/21 3:00 AM, Dr. David Alan Gilbert wrote:
>>> * Jeffle Xu (jefflexu@linux.alibaba.com) wrote:
>>>> For passthrough, when the corresponding virtiofs in guest is mounted
>>>> with '-o dax=inode', advertise that the file is capable of per-file
>>>> DAX if the inode in the backend fs is marked with FS_DAX_FL flag.
>>>>
>>>> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
>>>> ---
>>>>  tools/virtiofsd/passthrough_ll.c | 43 ++++++++++++++++++++++++++++++++
>>>>  1 file changed, 43 insertions(+)
>>>>
>>>> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
>>>> index 5b6228210f..4cbd904248 100644
>>>> --- a/tools/virtiofsd/passthrough_ll.c
>>>> +++ b/tools/virtiofsd/passthrough_ll.c
>>>> @@ -171,6 +171,7 @@ struct lo_data {
>>>>      int allow_direct_io;
>>>>      int announce_submounts;
>>>>      int perfile_dax_cap; /* capability of backend fs */
>>>> +    bool perfile_dax; /* enable per-file DAX or not */
>>>>      bool use_statx;
>>>>      struct lo_inode root;
>>>>      GHashTable *inodes; /* protected by lo->mutex */
>>>> @@ -716,6 +717,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
>>>>  
>>>>      if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) {
>>>>          conn->want |= FUSE_CAP_PERFILE_DAX;
>>>> +	lo->perfile_dax = 1;
>>>> +    }
>>>> +    else {
>>>> +	lo->perfile_dax = 0;
>>>>      }
>>>>  }
>>>>  
>>>> @@ -983,6 +988,41 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
>>>>      return 0;
>>>>  }
>>>>  
>>>> +/*
>>>> + * If the file is marked with FS_DAX_FL or FS_XFLAG_DAX, then DAX should be
>>>> + * enabled for this file.
>>>> + */
>>>> +static bool lo_should_enable_dax(struct lo_data *lo, struct lo_inode *dir,
>>>> +				 const char *name)
>>>> +{
>>>> +    int res, fd;
>>>> +    int ret = false;;
>>>> +    unsigned int attr;
>>>> +    struct fsxattr xattr;
>>>> +
>>>> +    if (!lo->perfile_dax)
>>>> +	return false;
>>>> +
>>>> +    /* Open file without O_PATH, so that ioctl can be called. */
>>>> +    fd = openat(dir->fd, name, O_NOFOLLOW);
>>>> +    if (fd == -1)
>>>> +        return false;
>>>
>>> Doesn't that defeat the whole benefit of using O_PATH - i.e. that we
>>> might stumble into a /dev node or something else we're not allowed to
>>> open?
>>
>> As far as I know, virtiofsd will pivot_root/chroot to the source
>> directory, and can only access files inside the source directory
>> specified by "-o source=". Then where do these unexpected files come
>> from? Besides, fd opened without O_PATH here is temporary and used for
>> FS_IOC_GETFLAGS/FS_IOC_FSGETXATTR ioctl only. It's closed when the
>> function returns.
> 
> The guest is still allowed to mknod.
> See:
>    https://lists.gnu.org/archive/html/qemu-devel/2021-01/msg05461.html
> 
> also it's legal to expose a root filesystem for a guest; the virtiofsd
> should *never* open a device other than O_PATH - and it's really tricky
> to do a check to see if it is a device in a race-free way.
> 

Fine. Got it. However the returned fd (opened without O_PATH) is only
used for FS_IOC_GETFLAGS/FS_IOC_FSGETXATTR ioctl, while in most cases
for special device files, these two ioctls should return -ENOTTY.

If it's really a security issue, then lo_inode_open() could be used to
get a temporary fd, i.e., check if it's a special file before opening.
After all, FUSE_OPEN also handles in this way. Besides, I can't
understand what "race-free way" means.
Greg Kurz Aug. 24, 2021, 10:15 a.m. UTC | #5
On Fri, 20 Aug 2021 13:03:23 +0800
JeffleXu <jefflexu@linux.alibaba.com> wrote:

> 
> 
> On 8/19/21 9:08 PM, Dr. David Alan Gilbert wrote:
> > * JeffleXu (jefflexu@linux.alibaba.com) wrote:
> >>
> >>
> >> On 8/18/21 3:00 AM, Dr. David Alan Gilbert wrote:
> >>> * Jeffle Xu (jefflexu@linux.alibaba.com) wrote:
> >>>> For passthrough, when the corresponding virtiofs in guest is mounted
> >>>> with '-o dax=inode', advertise that the file is capable of per-file
> >>>> DAX if the inode in the backend fs is marked with FS_DAX_FL flag.
> >>>>
> >>>> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> >>>> ---
> >>>>  tools/virtiofsd/passthrough_ll.c | 43 ++++++++++++++++++++++++++++++++
> >>>>  1 file changed, 43 insertions(+)
> >>>>
> >>>> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> >>>> index 5b6228210f..4cbd904248 100644
> >>>> --- a/tools/virtiofsd/passthrough_ll.c
> >>>> +++ b/tools/virtiofsd/passthrough_ll.c
> >>>> @@ -171,6 +171,7 @@ struct lo_data {
> >>>>      int allow_direct_io;
> >>>>      int announce_submounts;
> >>>>      int perfile_dax_cap; /* capability of backend fs */
> >>>> +    bool perfile_dax; /* enable per-file DAX or not */
> >>>>      bool use_statx;
> >>>>      struct lo_inode root;
> >>>>      GHashTable *inodes; /* protected by lo->mutex */
> >>>> @@ -716,6 +717,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
> >>>>  
> >>>>      if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) {
> >>>>          conn->want |= FUSE_CAP_PERFILE_DAX;
> >>>> +	lo->perfile_dax = 1;
> >>>> +    }
> >>>> +    else {
> >>>> +	lo->perfile_dax = 0;
> >>>>      }
> >>>>  }
> >>>>  
> >>>> @@ -983,6 +988,41 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
> >>>>      return 0;
> >>>>  }
> >>>>  
> >>>> +/*
> >>>> + * If the file is marked with FS_DAX_FL or FS_XFLAG_DAX, then DAX should be
> >>>> + * enabled for this file.
> >>>> + */
> >>>> +static bool lo_should_enable_dax(struct lo_data *lo, struct lo_inode *dir,
> >>>> +				 const char *name)
> >>>> +{
> >>>> +    int res, fd;
> >>>> +    int ret = false;;
> >>>> +    unsigned int attr;
> >>>> +    struct fsxattr xattr;
> >>>> +
> >>>> +    if (!lo->perfile_dax)
> >>>> +	return false;
> >>>> +
> >>>> +    /* Open file without O_PATH, so that ioctl can be called. */
> >>>> +    fd = openat(dir->fd, name, O_NOFOLLOW);
> >>>> +    if (fd == -1)
> >>>> +        return false;
> >>>
> >>> Doesn't that defeat the whole benefit of using O_PATH - i.e. that we
> >>> might stumble into a /dev node or something else we're not allowed to
> >>> open?
> >>
> >> As far as I know, virtiofsd will pivot_root/chroot to the source
> >> directory, and can only access files inside the source directory
> >> specified by "-o source=". Then where do these unexpected files come
> >> from? Besides, fd opened without O_PATH here is temporary and used for
> >> FS_IOC_GETFLAGS/FS_IOC_FSGETXATTR ioctl only. It's closed when the
> >> function returns.
> > 
> > The guest is still allowed to mknod.
> > See:
> >    https://lists.gnu.org/archive/html/qemu-devel/2021-01/msg05461.html
> > 
> > also it's legal to expose a root filesystem for a guest; the virtiofsd
> > should *never* open a device other than O_PATH - and it's really tricky
> > to do a check to see if it is a device in a race-free way.
> > 
> 
> Fine. Got it. However the returned fd (opened without O_PATH) is only
> used for FS_IOC_GETFLAGS/FS_IOC_FSGETXATTR ioctl, while in most cases
> for special device files, these two ioctls should return -ENOTTY.
> 

The actual problem is that a FIFO will cause openat() to block until
the other end of the FIFO is open for writing...

> If it's really a security issue, then lo_inode_open() could be used to

... and cause a DoS on virtiofsd. So yes, this is a security issue and
lo_inode_open() was introduced specifically to handle this.

> get a temporary fd, i.e., check if it's a special file before opening.
> After all, FUSE_OPEN also handles in this way. Besides, I can't
> understand what "race-free way" means.
> 

"race-free way" means a way that guarantees that file type
cannot change between the time you check it and the time
you open it (TOCTOU error). For example, doing a plain stat(),
checking st_mode and proceeding to open() is wrong : nothing
prevents the file to be unlinked and replaced by something
else between stat() and open().

We avoid that by keeping O_PATH fds around and using
lo_inode_open() instead of openat().

In your case, it seems that you should do the checking after
you have an actual lo_inode for the target file, and pass
that to lo_should_enable_dax() instead of the parent lo_inode
and target name.

Cheers,

--
Greg
Jingbo Xu Sept. 8, 2021, 10:34 a.m. UTC | #6
On 8/24/21 6:15 PM, Greg Kurz wrote:
> On Fri, 20 Aug 2021 13:03:23 +0800
> JeffleXu <jefflexu@linux.alibaba.com> wrote:
>>
>> Fine. Got it. However the returned fd (opened without O_PATH) is only
>> used for FS_IOC_GETFLAGS/FS_IOC_FSGETXATTR ioctl, while in most cases
>> for special device files, these two ioctls should return -ENOTTY.
>>
> 
> The actual problem is that a FIFO will cause openat() to block until
> the other end of the FIFO is open for writing...

Got it.

> 
>> If it's really a security issue, then lo_inode_open() could be used to
> 
> ... and cause a DoS on virtiofsd. So yes, this is a security issue and
> lo_inode_open() was introduced specifically to handle this.
> 
>> get a temporary fd, i.e., check if it's a special file before opening.
>> After all, FUSE_OPEN also handles in this way. Besides, I can't
>> understand what "race-free way" means.
>>
> 
> "race-free way" means a way that guarantees that file type
> cannot change between the time you check it and the time
> you open it (TOCTOU error). For example, doing a plain stat(),
> checking st_mode and proceeding to open() is wrong : nothing
> prevents the file to be unlinked and replaced by something
> else between stat() and open().
> 
> We avoid that by keeping O_PATH fds around and using
> lo_inode_open() instead of openat().

Thanks for the detailed explanation. Got it.

> 
> In your case, it seems that you should do the checking after
> you have an actual lo_inode for the target file, and pass
> that to lo_should_enable_dax() instead of the parent lo_inode
> and target name.
> 

Yes, that will be more reasonable. Thanks.
diff mbox series

Patch

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 5b6228210f..4cbd904248 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -171,6 +171,7 @@  struct lo_data {
     int allow_direct_io;
     int announce_submounts;
     int perfile_dax_cap; /* capability of backend fs */
+    bool perfile_dax; /* enable per-file DAX or not */
     bool use_statx;
     struct lo_inode root;
     GHashTable *inodes; /* protected by lo->mutex */
@@ -716,6 +717,10 @@  static void lo_init(void *userdata, struct fuse_conn_info *conn)
 
     if (conn->capable & FUSE_CAP_PERFILE_DAX && lo->perfile_dax_cap ) {
         conn->want |= FUSE_CAP_PERFILE_DAX;
+	lo->perfile_dax = 1;
+    }
+    else {
+	lo->perfile_dax = 0;
     }
 }
 
@@ -983,6 +988,41 @@  static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
     return 0;
 }
 
+/*
+ * If the file is marked with FS_DAX_FL or FS_XFLAG_DAX, then DAX should be
+ * enabled for this file.
+ */
+static bool lo_should_enable_dax(struct lo_data *lo, struct lo_inode *dir,
+				 const char *name)
+{
+    int res, fd;
+    int ret = false;;
+    unsigned int attr;
+    struct fsxattr xattr;
+
+    if (!lo->perfile_dax)
+	return false;
+
+    /* Open file without O_PATH, so that ioctl can be called. */
+    fd = openat(dir->fd, name, O_NOFOLLOW);
+    if (fd == -1)
+        return false;
+
+    if (lo->perfile_dax_cap == DAX_CAP_FLAGS) {
+        res = ioctl(fd, FS_IOC_GETFLAGS, &attr);
+        if (!res && (attr & FS_DAX_FL))
+	    ret = true;
+    }
+    else if (lo->perfile_dax_cap == DAX_CAP_XATTR) {
+	res = ioctl(fd, FS_IOC_FSGETXATTR, &xattr);
+	if (!res && (xattr.fsx_xflags & FS_XFLAG_DAX))
+	    ret = true;
+    }
+
+    close(fd);
+    return ret;
+}
+
 /*
  * Increments nlookup on the inode on success. unref_inode_lolocked() must be
  * called eventually to decrement nlookup again. If inodep is non-NULL, the
@@ -1038,6 +1078,9 @@  static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
         e->attr_flags |= FUSE_ATTR_SUBMOUNT;
     }
 
+    if (lo_should_enable_dax(lo, dir, name))
+	e->attr_flags |= FUSE_ATTR_DAX;
+
     inode = lo_find(lo, &e->attr, mnt_id);
     if (inode) {
         close(newfd);