diff mbox series

[v2,17/25] DAX/unmap: virtiofsd: Add VHOST_USER_SLAVE_FS_IO

Message ID 20210414155137.46522-18-dgilbert@redhat.com (mailing list archive)
State New, archived
Headers show
Series virtiofs dax patches | expand

Commit Message

Dr. David Alan Gilbert April 14, 2021, 3:51 p.m. UTC
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

Define a new slave command 'VHOST_USER_SLAVE_FS_IO' for a
client to ask qemu to perform a read/write from an fd directly
to GPA.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 docs/interop/vhost-user.rst               | 16 ++++
 hw/virtio/trace-events                    |  6 ++
 hw/virtio/vhost-user-fs.c                 | 95 +++++++++++++++++++++++
 hw/virtio/vhost-user.c                    |  4 +
 include/hw/virtio/vhost-user-fs.h         |  2 +
 subprojects/libvhost-user/libvhost-user.h |  1 +
 6 files changed, 124 insertions(+)

Comments

Vivek Goyal April 21, 2021, 8:07 p.m. UTC | #1
On Wed, Apr 14, 2021 at 04:51:29PM +0100, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> 
> Define a new slave command 'VHOST_USER_SLAVE_FS_IO' for a
> client to ask qemu to perform a read/write from an fd directly
> to GPA.

Hi David,

Today I came across process_vm_readv() and process_vm_writev().

https://man7.org/linux/man-pages/man2/process_vm_readv.2.html

I am wondering if we can use these to read from/write to qemu address
space (DAX window, which virtiofsd does not have access to).

So for the case of reading from DAX window into an fd, we probably
will have to first read from qemu DAX window to virtiofsd local memory
and then do a writev(fd).

Don't know if this approach is faster/slower as compared to sending
a vhost-user message to qemu.

Thanks
Vivek


> 
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  docs/interop/vhost-user.rst               | 16 ++++
>  hw/virtio/trace-events                    |  6 ++
>  hw/virtio/vhost-user-fs.c                 | 95 +++++++++++++++++++++++
>  hw/virtio/vhost-user.c                    |  4 +
>  include/hw/virtio/vhost-user-fs.h         |  2 +
>  subprojects/libvhost-user/libvhost-user.h |  1 +
>  6 files changed, 124 insertions(+)
> 
> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> index 09aee3565d..2fa62ea451 100644
> --- a/docs/interop/vhost-user.rst
> +++ b/docs/interop/vhost-user.rst
> @@ -1453,6 +1453,22 @@ Slave message types
>    multiple chunks can be unmapped in one command.
>    A reply is generated indicating whether unmapping succeeded.
>  
> +``VHOST_USER_SLAVE_FS_IO``
> +  :id: 9
> +  :equivalent ioctl: N/A
> +  :slave payload: ``struct VhostUserFSSlaveMsg``
> +  :master payload: N/A
> +
> +  Requests that IO be performed directly from an fd, passed in ancillary
> +  data, to guest memory on behalf of the daemon; this is normally for a
> +  case where a memory region isn't visible to the daemon. slave payload
> +  has flags which determine the direction of IO operation.
> +
> +  The ``VHOST_USER_FS_FLAG_MAP_R`` flag must be set in the ``flags`` field to
> +  read from the file into RAM.
> +  The ``VHOST_USER_FS_FLAG_MAP_W`` flag must be set in the ``flags`` field to
> +  write to the file from RAM.
> +
>  .. _reply_ack:
>  
>  VHOST_USER_PROTOCOL_F_REPLY_ACK
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index c62727f879..20557a078e 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -53,6 +53,12 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI
>  vhost_vdpa_set_owner(void *dev) "dev: %p"
>  vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64
>  
> +# vhost-user-fs.c
> +
> +vhost_user_fs_slave_io_loop(const char *name, uint64_t owr, int is_ram, int is_romd, size_t size) "region %s with internal offset 0x%"PRIx64 " ram=%d romd=%d mrs.size=%zd"
> +vhost_user_fs_slave_io_loop_res(ssize_t transferred) "%zd"
> +vhost_user_fs_slave_io_exit(int res, size_t done) "res: %d done: %zd"
> +
>  # virtio.c
>  virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
>  virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u"
> diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
> index 963f694435..5511838f29 100644
> --- a/hw/virtio/vhost-user-fs.c
> +++ b/hw/virtio/vhost-user-fs.c
> @@ -23,6 +23,8 @@
>  #include "hw/virtio/vhost-user-fs.h"
>  #include "monitor/monitor.h"
>  #include "sysemu/sysemu.h"
> +#include "exec/address-spaces.h"
> +#include "trace.h"
>  
>  static const int user_feature_bits[] = {
>      VIRTIO_F_VERSION_1,
> @@ -220,6 +222,99 @@ uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, int message_size,
>      return (uint64_t)res;
>  }
>  
> +uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, int message_size,
> +                                VhostUserFSSlaveMsg *sm, int fd)
> +{
> +    VHostUserFS *fs = (VHostUserFS *)object_dynamic_cast(OBJECT(dev->vdev),
> +                          TYPE_VHOST_USER_FS);
> +    if (!fs) {
> +        error_report("%s: Bad fs ptr", __func__);
> +        return (uint64_t)-1;
> +    }
> +    if (!check_slave_message_entries(sm, message_size)) {
> +        return (uint64_t)-1;
> +    }
> +
> +    unsigned int i;
> +    int res = 0;
> +    size_t done = 0;
> +
> +    if (fd < 0) {
> +        error_report("Bad fd for map");
> +        return (uint64_t)-1;
> +    }
> +
> +    for (i = 0; i < sm->count && !res; i++) {
> +        VhostUserFSSlaveMsgEntry *e = &sm->entries[i];
> +        if (e->len == 0) {
> +            continue;
> +        }
> +
> +        size_t len = e->len;
> +        uint64_t fd_offset = e->fd_offset;
> +        hwaddr gpa = e->c_offset;
> +
> +        while (len && !res) {
> +            hwaddr xlat, xlat_len;
> +            bool is_write = e->flags & VHOST_USER_FS_FLAG_MAP_W;
> +            MemoryRegion *mr = address_space_translate(dev->vdev->dma_as, gpa,
> +                                                       &xlat, &xlat_len,
> +                                                       is_write,
> +                                                       MEMTXATTRS_UNSPECIFIED);
> +            if (!mr || !xlat_len) {
> +                error_report("No guest region found for 0x%" HWADDR_PRIx, gpa);
> +                res = -EFAULT;
> +                break;
> +            }
> +
> +            trace_vhost_user_fs_slave_io_loop(mr->name,
> +                                          (uint64_t)xlat,
> +                                          memory_region_is_ram(mr),
> +                                          memory_region_is_romd(mr),
> +                                          (size_t)xlat_len);
> +
> +            void *hostptr = qemu_map_ram_ptr(mr->ram_block,
> +                                             xlat);
> +            ssize_t transferred;
> +            if (e->flags & VHOST_USER_FS_FLAG_MAP_R) {
> +                /* Read from file into RAM */
> +                if (mr->readonly) {
> +                    res = -EFAULT;
> +                    break;
> +                }
> +                transferred = pread(fd, hostptr, xlat_len, fd_offset);
> +            } else if (e->flags & VHOST_USER_FS_FLAG_MAP_W) {
> +                /* Write into file from RAM */
> +                transferred = pwrite(fd, hostptr, xlat_len, fd_offset);
> +            } else {
> +                transferred = EINVAL;
> +            }
> +
> +            trace_vhost_user_fs_slave_io_loop_res(transferred);
> +            if (transferred < 0) {
> +                res = -errno;
> +                break;
> +            }
> +            if (!transferred) {
> +                /* EOF */
> +                break;
> +            }
> +
> +            done += transferred;
> +            fd_offset += transferred;
> +            gpa += transferred;
> +            len -= transferred;
> +        }
> +    }
> +    close(fd);
> +
> +    trace_vhost_user_fs_slave_io_exit(res, done);
> +    if (res < 0) {
> +        return (uint64_t)res;
> +    }
> +    return (uint64_t)done;
> +}
> +
>  static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
>  {
>      VHostUserFS *fs = VHOST_USER_FS(vdev);
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index ad9170f8dc..b9699586ae 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -138,6 +138,7 @@ typedef enum VhostUserSlaveRequest {
>      VHOST_USER_SLAVE_VRING_ERR = 5,
>      VHOST_USER_SLAVE_FS_MAP = 6,
>      VHOST_USER_SLAVE_FS_UNMAP = 7,
> +    VHOST_USER_SLAVE_FS_IO = 8,
>      VHOST_USER_SLAVE_MAX
>  }  VhostUserSlaveRequest;
>  
> @@ -1562,6 +1563,9 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
>      case VHOST_USER_SLAVE_FS_UNMAP:
>          ret = vhost_user_fs_slave_unmap(dev, hdr.size, &payload.fs);
>          break;
> +    case VHOST_USER_SLAVE_FS_IO:
> +        ret = vhost_user_fs_slave_io(dev, hdr.size, &payload.fs, fd[0]);
> +        break;
>  #endif
>      default:
>          error_report("Received unexpected msg type: %d.", hdr.request);
> diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
> index 0766f17548..2931164e23 100644
> --- a/include/hw/virtio/vhost-user-fs.h
> +++ b/include/hw/virtio/vhost-user-fs.h
> @@ -78,5 +78,7 @@ uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, int message_size,
>                                   VhostUserFSSlaveMsg *sm, int fd);
>  uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, int message_size,
>                                     VhostUserFSSlaveMsg *sm);
> +uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, int message_size,
> +                                VhostUserFSSlaveMsg *sm, int fd);
>  
>  #endif /* _QEMU_VHOST_USER_FS_H */
> diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
> index a98c5f5c11..42b0833c4b 100644
> --- a/subprojects/libvhost-user/libvhost-user.h
> +++ b/subprojects/libvhost-user/libvhost-user.h
> @@ -121,6 +121,7 @@ typedef enum VhostUserSlaveRequest {
>      VHOST_USER_SLAVE_VRING_ERR = 5,
>      VHOST_USER_SLAVE_FS_MAP = 6,
>      VHOST_USER_SLAVE_FS_UNMAP = 7,
> +    VHOST_USER_SLAVE_FS_IO = 8,
>      VHOST_USER_SLAVE_MAX
>  }  VhostUserSlaveRequest;
>  
> -- 
> 2.31.1
> 
> _______________________________________________
> Virtio-fs mailing list
> Virtio-fs@redhat.com
> https://listman.redhat.com/mailman/listinfo/virtio-fs
Dr. David Alan Gilbert April 22, 2021, 9:29 a.m. UTC | #2
* Vivek Goyal (vgoyal@redhat.com) wrote:
> On Wed, Apr 14, 2021 at 04:51:29PM +0100, Dr. David Alan Gilbert (git) wrote:
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > 
> > Define a new slave command 'VHOST_USER_SLAVE_FS_IO' for a
> > client to ask qemu to perform a read/write from an fd directly
> > to GPA.
> 
> Hi David,
> 
> Today I came across process_vm_readv() and process_vm_writev().
> 
> https://man7.org/linux/man-pages/man2/process_vm_readv.2.html

Yes, I just saw these (reading the lwn article on process_vm_exec)

> I am wondering if we can use these to read from/write to qemu address
> space (DAX window, which virtiofsd does not have access to).
> 
> So for the case of reading from DAX window into an fd, we probably
> will have to first read from qemu DAX window to virtiofsd local memory
> and then do a writev(fd).
> 
> Don't know if this approach is faster/slower as compared to sending
> a vhost-user message to qemu.

I think there are some other problems as well:
  a) I'm not sure the permissions currently work out - I think it's
saying you need to either have CAP_SYS_PTRACE or the same uid as the
other process; and I don't think that's normally the relationship
between the daemon and the qemu.

  b) The virtiofsd would have to know something about the addresses in
qemu's address space, rather than currently only knowing guest
addresses.

  c) No one said that mapping is linear/simple, especially for an area
where an fd wasn't passed for shared memory.

Also, this interface does protect qemu from the daemon writing to
arbitrary qemu data strctures.

Still, those interfaces do sound attractive for something - just not
quite figured out what.

Dave



> Thanks
> Vivek
> 
> 
> > 
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > ---
> >  docs/interop/vhost-user.rst               | 16 ++++
> >  hw/virtio/trace-events                    |  6 ++
> >  hw/virtio/vhost-user-fs.c                 | 95 +++++++++++++++++++++++
> >  hw/virtio/vhost-user.c                    |  4 +
> >  include/hw/virtio/vhost-user-fs.h         |  2 +
> >  subprojects/libvhost-user/libvhost-user.h |  1 +
> >  6 files changed, 124 insertions(+)
> > 
> > diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> > index 09aee3565d..2fa62ea451 100644
> > --- a/docs/interop/vhost-user.rst
> > +++ b/docs/interop/vhost-user.rst
> > @@ -1453,6 +1453,22 @@ Slave message types
> >    multiple chunks can be unmapped in one command.
> >    A reply is generated indicating whether unmapping succeeded.
> >  
> > +``VHOST_USER_SLAVE_FS_IO``
> > +  :id: 9
> > +  :equivalent ioctl: N/A
> > +  :slave payload: ``struct VhostUserFSSlaveMsg``
> > +  :master payload: N/A
> > +
> > +  Requests that IO be performed directly from an fd, passed in ancillary
> > +  data, to guest memory on behalf of the daemon; this is normally for a
> > +  case where a memory region isn't visible to the daemon. slave payload
> > +  has flags which determine the direction of IO operation.
> > +
> > +  The ``VHOST_USER_FS_FLAG_MAP_R`` flag must be set in the ``flags`` field to
> > +  read from the file into RAM.
> > +  The ``VHOST_USER_FS_FLAG_MAP_W`` flag must be set in the ``flags`` field to
> > +  write to the file from RAM.
> > +
> >  .. _reply_ack:
> >  
> >  VHOST_USER_PROTOCOL_F_REPLY_ACK
> > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> > index c62727f879..20557a078e 100644
> > --- a/hw/virtio/trace-events
> > +++ b/hw/virtio/trace-events
> > @@ -53,6 +53,12 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI
> >  vhost_vdpa_set_owner(void *dev) "dev: %p"
> >  vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64
> >  
> > +# vhost-user-fs.c
> > +
> > +vhost_user_fs_slave_io_loop(const char *name, uint64_t owr, int is_ram, int is_romd, size_t size) "region %s with internal offset 0x%"PRIx64 " ram=%d romd=%d mrs.size=%zd"
> > +vhost_user_fs_slave_io_loop_res(ssize_t transferred) "%zd"
> > +vhost_user_fs_slave_io_exit(int res, size_t done) "res: %d done: %zd"
> > +
> >  # virtio.c
> >  virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
> >  virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u"
> > diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
> > index 963f694435..5511838f29 100644
> > --- a/hw/virtio/vhost-user-fs.c
> > +++ b/hw/virtio/vhost-user-fs.c
> > @@ -23,6 +23,8 @@
> >  #include "hw/virtio/vhost-user-fs.h"
> >  #include "monitor/monitor.h"
> >  #include "sysemu/sysemu.h"
> > +#include "exec/address-spaces.h"
> > +#include "trace.h"
> >  
> >  static const int user_feature_bits[] = {
> >      VIRTIO_F_VERSION_1,
> > @@ -220,6 +222,99 @@ uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, int message_size,
> >      return (uint64_t)res;
> >  }
> >  
> > +uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, int message_size,
> > +                                VhostUserFSSlaveMsg *sm, int fd)
> > +{
> > +    VHostUserFS *fs = (VHostUserFS *)object_dynamic_cast(OBJECT(dev->vdev),
> > +                          TYPE_VHOST_USER_FS);
> > +    if (!fs) {
> > +        error_report("%s: Bad fs ptr", __func__);
> > +        return (uint64_t)-1;
> > +    }
> > +    if (!check_slave_message_entries(sm, message_size)) {
> > +        return (uint64_t)-1;
> > +    }
> > +
> > +    unsigned int i;
> > +    int res = 0;
> > +    size_t done = 0;
> > +
> > +    if (fd < 0) {
> > +        error_report("Bad fd for map");
> > +        return (uint64_t)-1;
> > +    }
> > +
> > +    for (i = 0; i < sm->count && !res; i++) {
> > +        VhostUserFSSlaveMsgEntry *e = &sm->entries[i];
> > +        if (e->len == 0) {
> > +            continue;
> > +        }
> > +
> > +        size_t len = e->len;
> > +        uint64_t fd_offset = e->fd_offset;
> > +        hwaddr gpa = e->c_offset;
> > +
> > +        while (len && !res) {
> > +            hwaddr xlat, xlat_len;
> > +            bool is_write = e->flags & VHOST_USER_FS_FLAG_MAP_W;
> > +            MemoryRegion *mr = address_space_translate(dev->vdev->dma_as, gpa,
> > +                                                       &xlat, &xlat_len,
> > +                                                       is_write,
> > +                                                       MEMTXATTRS_UNSPECIFIED);
> > +            if (!mr || !xlat_len) {
> > +                error_report("No guest region found for 0x%" HWADDR_PRIx, gpa);
> > +                res = -EFAULT;
> > +                break;
> > +            }
> > +
> > +            trace_vhost_user_fs_slave_io_loop(mr->name,
> > +                                          (uint64_t)xlat,
> > +                                          memory_region_is_ram(mr),
> > +                                          memory_region_is_romd(mr),
> > +                                          (size_t)xlat_len);
> > +
> > +            void *hostptr = qemu_map_ram_ptr(mr->ram_block,
> > +                                             xlat);
> > +            ssize_t transferred;
> > +            if (e->flags & VHOST_USER_FS_FLAG_MAP_R) {
> > +                /* Read from file into RAM */
> > +                if (mr->readonly) {
> > +                    res = -EFAULT;
> > +                    break;
> > +                }
> > +                transferred = pread(fd, hostptr, xlat_len, fd_offset);
> > +            } else if (e->flags & VHOST_USER_FS_FLAG_MAP_W) {
> > +                /* Write into file from RAM */
> > +                transferred = pwrite(fd, hostptr, xlat_len, fd_offset);
> > +            } else {
> > +                transferred = EINVAL;
> > +            }
> > +
> > +            trace_vhost_user_fs_slave_io_loop_res(transferred);
> > +            if (transferred < 0) {
> > +                res = -errno;
> > +                break;
> > +            }
> > +            if (!transferred) {
> > +                /* EOF */
> > +                break;
> > +            }
> > +
> > +            done += transferred;
> > +            fd_offset += transferred;
> > +            gpa += transferred;
> > +            len -= transferred;
> > +        }
> > +    }
> > +    close(fd);
> > +
> > +    trace_vhost_user_fs_slave_io_exit(res, done);
> > +    if (res < 0) {
> > +        return (uint64_t)res;
> > +    }
> > +    return (uint64_t)done;
> > +}
> > +
> >  static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
> >  {
> >      VHostUserFS *fs = VHOST_USER_FS(vdev);
> > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > index ad9170f8dc..b9699586ae 100644
> > --- a/hw/virtio/vhost-user.c
> > +++ b/hw/virtio/vhost-user.c
> > @@ -138,6 +138,7 @@ typedef enum VhostUserSlaveRequest {
> >      VHOST_USER_SLAVE_VRING_ERR = 5,
> >      VHOST_USER_SLAVE_FS_MAP = 6,
> >      VHOST_USER_SLAVE_FS_UNMAP = 7,
> > +    VHOST_USER_SLAVE_FS_IO = 8,
> >      VHOST_USER_SLAVE_MAX
> >  }  VhostUserSlaveRequest;
> >  
> > @@ -1562,6 +1563,9 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
> >      case VHOST_USER_SLAVE_FS_UNMAP:
> >          ret = vhost_user_fs_slave_unmap(dev, hdr.size, &payload.fs);
> >          break;
> > +    case VHOST_USER_SLAVE_FS_IO:
> > +        ret = vhost_user_fs_slave_io(dev, hdr.size, &payload.fs, fd[0]);
> > +        break;
> >  #endif
> >      default:
> >          error_report("Received unexpected msg type: %d.", hdr.request);
> > diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
> > index 0766f17548..2931164e23 100644
> > --- a/include/hw/virtio/vhost-user-fs.h
> > +++ b/include/hw/virtio/vhost-user-fs.h
> > @@ -78,5 +78,7 @@ uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, int message_size,
> >                                   VhostUserFSSlaveMsg *sm, int fd);
> >  uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, int message_size,
> >                                     VhostUserFSSlaveMsg *sm);
> > +uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, int message_size,
> > +                                VhostUserFSSlaveMsg *sm, int fd);
> >  
> >  #endif /* _QEMU_VHOST_USER_FS_H */
> > diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
> > index a98c5f5c11..42b0833c4b 100644
> > --- a/subprojects/libvhost-user/libvhost-user.h
> > +++ b/subprojects/libvhost-user/libvhost-user.h
> > @@ -121,6 +121,7 @@ typedef enum VhostUserSlaveRequest {
> >      VHOST_USER_SLAVE_VRING_ERR = 5,
> >      VHOST_USER_SLAVE_FS_MAP = 6,
> >      VHOST_USER_SLAVE_FS_UNMAP = 7,
> > +    VHOST_USER_SLAVE_FS_IO = 8,
> >      VHOST_USER_SLAVE_MAX
> >  }  VhostUserSlaveRequest;
> >  
> > -- 
> > 2.31.1
> > 
> > _______________________________________________
> > Virtio-fs mailing list
> > Virtio-fs@redhat.com
> > https://listman.redhat.com/mailman/listinfo/virtio-fs
Vivek Goyal April 22, 2021, 3:40 p.m. UTC | #3
On Thu, Apr 22, 2021 at 10:29:08AM +0100, Dr. David Alan Gilbert wrote:
> * Vivek Goyal (vgoyal@redhat.com) wrote:
> > On Wed, Apr 14, 2021 at 04:51:29PM +0100, Dr. David Alan Gilbert (git) wrote:
> > > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > > 
> > > Define a new slave command 'VHOST_USER_SLAVE_FS_IO' for a
> > > client to ask qemu to perform a read/write from an fd directly
> > > to GPA.
> > 
> > Hi David,
> > 
> > Today I came across process_vm_readv() and process_vm_writev().
> > 
> > https://man7.org/linux/man-pages/man2/process_vm_readv.2.html
> 
> Yes, I just saw these (reading the lwn article on process_vm_exec)
> 
> > I am wondering if we can use these to read from/write to qemu address
> > space (DAX window, which virtiofsd does not have access to).
> > 
> > So for the case of reading from DAX window into an fd, we probably
> > will have to first read from qemu DAX window to virtiofsd local memory
> > and then do a writev(fd).
> > 
> > Don't know if this approach is faster/slower as compared to sending
> > a vhost-user message to qemu.
> 
> I think there are some other problems as well:
>   a) I'm not sure the permissions currently work out - I think it's
> saying you need to either have CAP_SYS_PTRACE or the same uid as the
> other process; and I don't think that's normally the relationship
> between the daemon and the qemu.

That's a good point. We don't give CAP_SYS_PTRACE yet. May be if
qemu and virtiofsd run in same user namespace then giving CAP_SYS_PTRACE
might not be a bad idea (hopefully these interfaces work if CAP_SYS_PTRACE
is not given in init_user_ns).

We don't have a plan to run virtiofsd as user "qemu". So that will not
work well.

> 
>   b) The virtiofsd would have to know something about the addresses in
> qemu's address space, rather than currently only knowing guest
> addresses.

Yes. But that could be one time message exchange to know where the
DAX window is in qemu address space?

> 
>   c) No one said that mapping is linear/simple, especially for an area
> where an fd wasn't passed for shared memory.

I don't understand this point. DAX window mapping is linear in 
qemu address space, right? And these new interfaces are only doing
two types of I/O/

- Read from fd and write to DAX window
- Read from DAX window and write to fd.

So it is assumed fd is there. 

I am probably not getting the point you referring to. Please elaborate
a bit more.

> 
> Also, this interface does protect qemu from the daemon writing to
> arbitrary qemu data strctures.

But daemon seems to be more priviliged here (running as root), as
compared to qemu. So I can understand that it protects from 
accidental writing.

> 
> Still, those interfaces do sound attractive for something - just not
> quite figured out what.

Yes, let's stick to current implementation for now. We can experiment
with these new interfaces down the line.

Vivek

> 
> Dave
> 
> 
> 
> > Thanks
> > Vivek
> > 
> > 
> > > 
> > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > ---
> > >  docs/interop/vhost-user.rst               | 16 ++++
> > >  hw/virtio/trace-events                    |  6 ++
> > >  hw/virtio/vhost-user-fs.c                 | 95 +++++++++++++++++++++++
> > >  hw/virtio/vhost-user.c                    |  4 +
> > >  include/hw/virtio/vhost-user-fs.h         |  2 +
> > >  subprojects/libvhost-user/libvhost-user.h |  1 +
> > >  6 files changed, 124 insertions(+)
> > > 
> > > diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> > > index 09aee3565d..2fa62ea451 100644
> > > --- a/docs/interop/vhost-user.rst
> > > +++ b/docs/interop/vhost-user.rst
> > > @@ -1453,6 +1453,22 @@ Slave message types
> > >    multiple chunks can be unmapped in one command.
> > >    A reply is generated indicating whether unmapping succeeded.
> > >  
> > > +``VHOST_USER_SLAVE_FS_IO``
> > > +  :id: 9
> > > +  :equivalent ioctl: N/A
> > > +  :slave payload: ``struct VhostUserFSSlaveMsg``
> > > +  :master payload: N/A
> > > +
> > > +  Requests that IO be performed directly from an fd, passed in ancillary
> > > +  data, to guest memory on behalf of the daemon; this is normally for a
> > > +  case where a memory region isn't visible to the daemon. slave payload
> > > +  has flags which determine the direction of IO operation.
> > > +
> > > +  The ``VHOST_USER_FS_FLAG_MAP_R`` flag must be set in the ``flags`` field to
> > > +  read from the file into RAM.
> > > +  The ``VHOST_USER_FS_FLAG_MAP_W`` flag must be set in the ``flags`` field to
> > > +  write to the file from RAM.
> > > +
> > >  .. _reply_ack:
> > >  
> > >  VHOST_USER_PROTOCOL_F_REPLY_ACK
> > > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> > > index c62727f879..20557a078e 100644
> > > --- a/hw/virtio/trace-events
> > > +++ b/hw/virtio/trace-events
> > > @@ -53,6 +53,12 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI
> > >  vhost_vdpa_set_owner(void *dev) "dev: %p"
> > >  vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64
> > >  
> > > +# vhost-user-fs.c
> > > +
> > > +vhost_user_fs_slave_io_loop(const char *name, uint64_t owr, int is_ram, int is_romd, size_t size) "region %s with internal offset 0x%"PRIx64 " ram=%d romd=%d mrs.size=%zd"
> > > +vhost_user_fs_slave_io_loop_res(ssize_t transferred) "%zd"
> > > +vhost_user_fs_slave_io_exit(int res, size_t done) "res: %d done: %zd"
> > > +
> > >  # virtio.c
> > >  virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
> > >  virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u"
> > > diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
> > > index 963f694435..5511838f29 100644
> > > --- a/hw/virtio/vhost-user-fs.c
> > > +++ b/hw/virtio/vhost-user-fs.c
> > > @@ -23,6 +23,8 @@
> > >  #include "hw/virtio/vhost-user-fs.h"
> > >  #include "monitor/monitor.h"
> > >  #include "sysemu/sysemu.h"
> > > +#include "exec/address-spaces.h"
> > > +#include "trace.h"
> > >  
> > >  static const int user_feature_bits[] = {
> > >      VIRTIO_F_VERSION_1,
> > > @@ -220,6 +222,99 @@ uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, int message_size,
> > >      return (uint64_t)res;
> > >  }
> > >  
> > > +uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, int message_size,
> > > +                                VhostUserFSSlaveMsg *sm, int fd)
> > > +{
> > > +    VHostUserFS *fs = (VHostUserFS *)object_dynamic_cast(OBJECT(dev->vdev),
> > > +                          TYPE_VHOST_USER_FS);
> > > +    if (!fs) {
> > > +        error_report("%s: Bad fs ptr", __func__);
> > > +        return (uint64_t)-1;
> > > +    }
> > > +    if (!check_slave_message_entries(sm, message_size)) {
> > > +        return (uint64_t)-1;
> > > +    }
> > > +
> > > +    unsigned int i;
> > > +    int res = 0;
> > > +    size_t done = 0;
> > > +
> > > +    if (fd < 0) {
> > > +        error_report("Bad fd for map");
> > > +        return (uint64_t)-1;
> > > +    }
> > > +
> > > +    for (i = 0; i < sm->count && !res; i++) {
> > > +        VhostUserFSSlaveMsgEntry *e = &sm->entries[i];
> > > +        if (e->len == 0) {
> > > +            continue;
> > > +        }
> > > +
> > > +        size_t len = e->len;
> > > +        uint64_t fd_offset = e->fd_offset;
> > > +        hwaddr gpa = e->c_offset;
> > > +
> > > +        while (len && !res) {
> > > +            hwaddr xlat, xlat_len;
> > > +            bool is_write = e->flags & VHOST_USER_FS_FLAG_MAP_W;
> > > +            MemoryRegion *mr = address_space_translate(dev->vdev->dma_as, gpa,
> > > +                                                       &xlat, &xlat_len,
> > > +                                                       is_write,
> > > +                                                       MEMTXATTRS_UNSPECIFIED);
> > > +            if (!mr || !xlat_len) {
> > > +                error_report("No guest region found for 0x%" HWADDR_PRIx, gpa);
> > > +                res = -EFAULT;
> > > +                break;
> > > +            }
> > > +
> > > +            trace_vhost_user_fs_slave_io_loop(mr->name,
> > > +                                          (uint64_t)xlat,
> > > +                                          memory_region_is_ram(mr),
> > > +                                          memory_region_is_romd(mr),
> > > +                                          (size_t)xlat_len);
> > > +
> > > +            void *hostptr = qemu_map_ram_ptr(mr->ram_block,
> > > +                                             xlat);
> > > +            ssize_t transferred;
> > > +            if (e->flags & VHOST_USER_FS_FLAG_MAP_R) {
> > > +                /* Read from file into RAM */
> > > +                if (mr->readonly) {
> > > +                    res = -EFAULT;
> > > +                    break;
> > > +                }
> > > +                transferred = pread(fd, hostptr, xlat_len, fd_offset);
> > > +            } else if (e->flags & VHOST_USER_FS_FLAG_MAP_W) {
> > > +                /* Write into file from RAM */
> > > +                transferred = pwrite(fd, hostptr, xlat_len, fd_offset);
> > > +            } else {
> > > +                transferred = EINVAL;
> > > +            }
> > > +
> > > +            trace_vhost_user_fs_slave_io_loop_res(transferred);
> > > +            if (transferred < 0) {
> > > +                res = -errno;
> > > +                break;
> > > +            }
> > > +            if (!transferred) {
> > > +                /* EOF */
> > > +                break;
> > > +            }
> > > +
> > > +            done += transferred;
> > > +            fd_offset += transferred;
> > > +            gpa += transferred;
> > > +            len -= transferred;
> > > +        }
> > > +    }
> > > +    close(fd);
> > > +
> > > +    trace_vhost_user_fs_slave_io_exit(res, done);
> > > +    if (res < 0) {
> > > +        return (uint64_t)res;
> > > +    }
> > > +    return (uint64_t)done;
> > > +}
> > > +
> > >  static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
> > >  {
> > >      VHostUserFS *fs = VHOST_USER_FS(vdev);
> > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > > index ad9170f8dc..b9699586ae 100644
> > > --- a/hw/virtio/vhost-user.c
> > > +++ b/hw/virtio/vhost-user.c
> > > @@ -138,6 +138,7 @@ typedef enum VhostUserSlaveRequest {
> > >      VHOST_USER_SLAVE_VRING_ERR = 5,
> > >      VHOST_USER_SLAVE_FS_MAP = 6,
> > >      VHOST_USER_SLAVE_FS_UNMAP = 7,
> > > +    VHOST_USER_SLAVE_FS_IO = 8,
> > >      VHOST_USER_SLAVE_MAX
> > >  }  VhostUserSlaveRequest;
> > >  
> > > @@ -1562,6 +1563,9 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
> > >      case VHOST_USER_SLAVE_FS_UNMAP:
> > >          ret = vhost_user_fs_slave_unmap(dev, hdr.size, &payload.fs);
> > >          break;
> > > +    case VHOST_USER_SLAVE_FS_IO:
> > > +        ret = vhost_user_fs_slave_io(dev, hdr.size, &payload.fs, fd[0]);
> > > +        break;
> > >  #endif
> > >      default:
> > >          error_report("Received unexpected msg type: %d.", hdr.request);
> > > diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
> > > index 0766f17548..2931164e23 100644
> > > --- a/include/hw/virtio/vhost-user-fs.h
> > > +++ b/include/hw/virtio/vhost-user-fs.h
> > > @@ -78,5 +78,7 @@ uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, int message_size,
> > >                                   VhostUserFSSlaveMsg *sm, int fd);
> > >  uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, int message_size,
> > >                                     VhostUserFSSlaveMsg *sm);
> > > +uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, int message_size,
> > > +                                VhostUserFSSlaveMsg *sm, int fd);
> > >  
> > >  #endif /* _QEMU_VHOST_USER_FS_H */
> > > diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
> > > index a98c5f5c11..42b0833c4b 100644
> > > --- a/subprojects/libvhost-user/libvhost-user.h
> > > +++ b/subprojects/libvhost-user/libvhost-user.h
> > > @@ -121,6 +121,7 @@ typedef enum VhostUserSlaveRequest {
> > >      VHOST_USER_SLAVE_VRING_ERR = 5,
> > >      VHOST_USER_SLAVE_FS_MAP = 6,
> > >      VHOST_USER_SLAVE_FS_UNMAP = 7,
> > > +    VHOST_USER_SLAVE_FS_IO = 8,
> > >      VHOST_USER_SLAVE_MAX
> > >  }  VhostUserSlaveRequest;
> > >  
> > > -- 
> > > 2.31.1
> > > 
> > > _______________________________________________
> > > Virtio-fs mailing list
> > > Virtio-fs@redhat.com
> > > https://listman.redhat.com/mailman/listinfo/virtio-fs
> -- 
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Dr. David Alan Gilbert April 22, 2021, 3:48 p.m. UTC | #4
* Vivek Goyal (vgoyal@redhat.com) wrote:
> On Thu, Apr 22, 2021 at 10:29:08AM +0100, Dr. David Alan Gilbert wrote:
> > * Vivek Goyal (vgoyal@redhat.com) wrote:
> > > On Wed, Apr 14, 2021 at 04:51:29PM +0100, Dr. David Alan Gilbert (git) wrote:
> > > > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > > > 
> > > > Define a new slave command 'VHOST_USER_SLAVE_FS_IO' for a
> > > > client to ask qemu to perform a read/write from an fd directly
> > > > to GPA.
> > > 
> > > Hi David,
> > > 
> > > Today I came across process_vm_readv() and process_vm_writev().
> > > 
> > > https://man7.org/linux/man-pages/man2/process_vm_readv.2.html
> > 
> > Yes, I just saw these (reading the lwn article on process_vm_exec)
> > 
> > > I am wondering if we can use these to read from/write to qemu address
> > > space (DAX window, which virtiofsd does not have access to).
> > > 
> > > So for the case of reading from DAX window into an fd, we probably
> > > will have to first read from qemu DAX window to virtiofsd local memory
> > > and then do a writev(fd).
> > > 
> > > Don't know if this approach is faster/slower as compared to sending
> > > a vhost-user message to qemu.
> > 
> > I think there are some other problems as well:
> >   a) I'm not sure the permissions currently work out - I think it's
> > saying you need to either have CAP_SYS_PTRACE or the same uid as the
> > other process; and I don't think that's normally the relationship
> > between the daemon and the qemu.
> 
> That's a good point. We don't give CAP_SYS_PTRACE yet. May be if
> qemu and virtiofsd run in same user namespace then giving CAP_SYS_PTRACE
> might not be a bad idea (hopefully these interfaces work if CAP_SYS_PTRACE
> is not given in init_user_ns).
> 
> We don't have a plan to run virtiofsd as user "qemu". So that will not
> work well.
> 
> > 
> >   b) The virtiofsd would have to know something about the addresses in
> > qemu's address space, rather than currently only knowing guest
> > addresses.
> 
> Yes. But that could be one time message exchange to know where the
> DAX window is in qemu address space?
> 
> > 
> >   c) No one said that mapping is linear/simple, especially for an area
> > where an fd wasn't passed for shared memory.
> 
> I don't understand this point. DAX window mapping is linear in 
> qemu address space, right? And these new interfaces are only doing
> two types of I/O/
> 
> - Read from fd and write to DAX window
> - Read from DAX window and write to fd.
> 
> So it is assumed fd is there. 
> 
> I am probably not getting the point you referring to. Please elaborate
> a bit more.

This interface is a bit more flexible than DAX; for example, it might
well work for a read into a host-passthrough device - e.g. a read
from a virtiofs filesytsem into a physical GPU buffer or real pmem
PCI device - and now we're getting into not being able to assume how
easy it is to translate those addresses.

> > Also, this interface does protect qemu from the daemon writing to
> > arbitrary qemu data strctures.
> 
> But daemon seems to be more priviliged here (running as root), as
> compared to qemu. So I can understand that it protects from 
> accidental writing.

Yes, but in principal it doesn't need to be - e.g. the daemon could
be root in it's own namespace separate from qemu, or read-only
less privileged for configuration data.

> > Still, those interfaces do sound attractive for something - just not
> > quite figured out what.
> 
> Yes, let's stick to current implementation for now. We can experiment
> with these new interfaces down the line.

Yep.

Dave

> Vivek
> 
> > 
> > Dave
> > 
> > 
> > 
> > > Thanks
> > > Vivek
> > > 
> > > 
> > > > 
> > > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > > ---
> > > >  docs/interop/vhost-user.rst               | 16 ++++
> > > >  hw/virtio/trace-events                    |  6 ++
> > > >  hw/virtio/vhost-user-fs.c                 | 95 +++++++++++++++++++++++
> > > >  hw/virtio/vhost-user.c                    |  4 +
> > > >  include/hw/virtio/vhost-user-fs.h         |  2 +
> > > >  subprojects/libvhost-user/libvhost-user.h |  1 +
> > > >  6 files changed, 124 insertions(+)
> > > > 
> > > > diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> > > > index 09aee3565d..2fa62ea451 100644
> > > > --- a/docs/interop/vhost-user.rst
> > > > +++ b/docs/interop/vhost-user.rst
> > > > @@ -1453,6 +1453,22 @@ Slave message types
> > > >    multiple chunks can be unmapped in one command.
> > > >    A reply is generated indicating whether unmapping succeeded.
> > > >  
> > > > +``VHOST_USER_SLAVE_FS_IO``
> > > > +  :id: 9
> > > > +  :equivalent ioctl: N/A
> > > > +  :slave payload: ``struct VhostUserFSSlaveMsg``
> > > > +  :master payload: N/A
> > > > +
> > > > +  Requests that IO be performed directly from an fd, passed in ancillary
> > > > +  data, to guest memory on behalf of the daemon; this is normally for a
> > > > +  case where a memory region isn't visible to the daemon. slave payload
> > > > +  has flags which determine the direction of IO operation.
> > > > +
> > > > +  The ``VHOST_USER_FS_FLAG_MAP_R`` flag must be set in the ``flags`` field to
> > > > +  read from the file into RAM.
> > > > +  The ``VHOST_USER_FS_FLAG_MAP_W`` flag must be set in the ``flags`` field to
> > > > +  write to the file from RAM.
> > > > +
> > > >  .. _reply_ack:
> > > >  
> > > >  VHOST_USER_PROTOCOL_F_REPLY_ACK
> > > > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> > > > index c62727f879..20557a078e 100644
> > > > --- a/hw/virtio/trace-events
> > > > +++ b/hw/virtio/trace-events
> > > > @@ -53,6 +53,12 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI
> > > >  vhost_vdpa_set_owner(void *dev) "dev: %p"
> > > >  vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64
> > > >  
> > > > +# vhost-user-fs.c
> > > > +
> > > > +vhost_user_fs_slave_io_loop(const char *name, uint64_t owr, int is_ram, int is_romd, size_t size) "region %s with internal offset 0x%"PRIx64 " ram=%d romd=%d mrs.size=%zd"
> > > > +vhost_user_fs_slave_io_loop_res(ssize_t transferred) "%zd"
> > > > +vhost_user_fs_slave_io_exit(int res, size_t done) "res: %d done: %zd"
> > > > +
> > > >  # virtio.c
> > > >  virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
> > > >  virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u"
> > > > diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
> > > > index 963f694435..5511838f29 100644
> > > > --- a/hw/virtio/vhost-user-fs.c
> > > > +++ b/hw/virtio/vhost-user-fs.c
> > > > @@ -23,6 +23,8 @@
> > > >  #include "hw/virtio/vhost-user-fs.h"
> > > >  #include "monitor/monitor.h"
> > > >  #include "sysemu/sysemu.h"
> > > > +#include "exec/address-spaces.h"
> > > > +#include "trace.h"
> > > >  
> > > >  static const int user_feature_bits[] = {
> > > >      VIRTIO_F_VERSION_1,
> > > > @@ -220,6 +222,99 @@ uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, int message_size,
> > > >      return (uint64_t)res;
> > > >  }
> > > >  
> > > > +uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, int message_size,
> > > > +                                VhostUserFSSlaveMsg *sm, int fd)
> > > > +{
> > > > +    VHostUserFS *fs = (VHostUserFS *)object_dynamic_cast(OBJECT(dev->vdev),
> > > > +                          TYPE_VHOST_USER_FS);
> > > > +    if (!fs) {
> > > > +        error_report("%s: Bad fs ptr", __func__);
> > > > +        return (uint64_t)-1;
> > > > +    }
> > > > +    if (!check_slave_message_entries(sm, message_size)) {
> > > > +        return (uint64_t)-1;
> > > > +    }
> > > > +
> > > > +    unsigned int i;
> > > > +    int res = 0;
> > > > +    size_t done = 0;
> > > > +
> > > > +    if (fd < 0) {
> > > > +        error_report("Bad fd for map");
> > > > +        return (uint64_t)-1;
> > > > +    }
> > > > +
> > > > +    for (i = 0; i < sm->count && !res; i++) {
> > > > +        VhostUserFSSlaveMsgEntry *e = &sm->entries[i];
> > > > +        if (e->len == 0) {
> > > > +            continue;
> > > > +        }
> > > > +
> > > > +        size_t len = e->len;
> > > > +        uint64_t fd_offset = e->fd_offset;
> > > > +        hwaddr gpa = e->c_offset;
> > > > +
> > > > +        while (len && !res) {
> > > > +            hwaddr xlat, xlat_len;
> > > > +            bool is_write = e->flags & VHOST_USER_FS_FLAG_MAP_W;
> > > > +            MemoryRegion *mr = address_space_translate(dev->vdev->dma_as, gpa,
> > > > +                                                       &xlat, &xlat_len,
> > > > +                                                       is_write,
> > > > +                                                       MEMTXATTRS_UNSPECIFIED);
> > > > +            if (!mr || !xlat_len) {
> > > > +                error_report("No guest region found for 0x%" HWADDR_PRIx, gpa);
> > > > +                res = -EFAULT;
> > > > +                break;
> > > > +            }
> > > > +
> > > > +            trace_vhost_user_fs_slave_io_loop(mr->name,
> > > > +                                          (uint64_t)xlat,
> > > > +                                          memory_region_is_ram(mr),
> > > > +                                          memory_region_is_romd(mr),
> > > > +                                          (size_t)xlat_len);
> > > > +
> > > > +            void *hostptr = qemu_map_ram_ptr(mr->ram_block,
> > > > +                                             xlat);
> > > > +            ssize_t transferred;
> > > > +            if (e->flags & VHOST_USER_FS_FLAG_MAP_R) {
> > > > +                /* Read from file into RAM */
> > > > +                if (mr->readonly) {
> > > > +                    res = -EFAULT;
> > > > +                    break;
> > > > +                }
> > > > +                transferred = pread(fd, hostptr, xlat_len, fd_offset);
> > > > +            } else if (e->flags & VHOST_USER_FS_FLAG_MAP_W) {
> > > > +                /* Write into file from RAM */
> > > > +                transferred = pwrite(fd, hostptr, xlat_len, fd_offset);
> > > > +            } else {
> > > > +                transferred = EINVAL;
> > > > +            }
> > > > +
> > > > +            trace_vhost_user_fs_slave_io_loop_res(transferred);
> > > > +            if (transferred < 0) {
> > > > +                res = -errno;
> > > > +                break;
> > > > +            }
> > > > +            if (!transferred) {
> > > > +                /* EOF */
> > > > +                break;
> > > > +            }
> > > > +
> > > > +            done += transferred;
> > > > +            fd_offset += transferred;
> > > > +            gpa += transferred;
> > > > +            len -= transferred;
> > > > +        }
> > > > +    }
> > > > +    close(fd);
> > > > +
> > > > +    trace_vhost_user_fs_slave_io_exit(res, done);
> > > > +    if (res < 0) {
> > > > +        return (uint64_t)res;
> > > > +    }
> > > > +    return (uint64_t)done;
> > > > +}
> > > > +
> > > >  static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
> > > >  {
> > > >      VHostUserFS *fs = VHOST_USER_FS(vdev);
> > > > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > > > index ad9170f8dc..b9699586ae 100644
> > > > --- a/hw/virtio/vhost-user.c
> > > > +++ b/hw/virtio/vhost-user.c
> > > > @@ -138,6 +138,7 @@ typedef enum VhostUserSlaveRequest {
> > > >      VHOST_USER_SLAVE_VRING_ERR = 5,
> > > >      VHOST_USER_SLAVE_FS_MAP = 6,
> > > >      VHOST_USER_SLAVE_FS_UNMAP = 7,
> > > > +    VHOST_USER_SLAVE_FS_IO = 8,
> > > >      VHOST_USER_SLAVE_MAX
> > > >  }  VhostUserSlaveRequest;
> > > >  
> > > > @@ -1562,6 +1563,9 @@ static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
> > > >      case VHOST_USER_SLAVE_FS_UNMAP:
> > > >          ret = vhost_user_fs_slave_unmap(dev, hdr.size, &payload.fs);
> > > >          break;
> > > > +    case VHOST_USER_SLAVE_FS_IO:
> > > > +        ret = vhost_user_fs_slave_io(dev, hdr.size, &payload.fs, fd[0]);
> > > > +        break;
> > > >  #endif
> > > >      default:
> > > >          error_report("Received unexpected msg type: %d.", hdr.request);
> > > > diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
> > > > index 0766f17548..2931164e23 100644
> > > > --- a/include/hw/virtio/vhost-user-fs.h
> > > > +++ b/include/hw/virtio/vhost-user-fs.h
> > > > @@ -78,5 +78,7 @@ uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, int message_size,
> > > >                                   VhostUserFSSlaveMsg *sm, int fd);
> > > >  uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, int message_size,
> > > >                                     VhostUserFSSlaveMsg *sm);
> > > > +uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, int message_size,
> > > > +                                VhostUserFSSlaveMsg *sm, int fd);
> > > >  
> > > >  #endif /* _QEMU_VHOST_USER_FS_H */
> > > > diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
> > > > index a98c5f5c11..42b0833c4b 100644
> > > > --- a/subprojects/libvhost-user/libvhost-user.h
> > > > +++ b/subprojects/libvhost-user/libvhost-user.h
> > > > @@ -121,6 +121,7 @@ typedef enum VhostUserSlaveRequest {
> > > >      VHOST_USER_SLAVE_VRING_ERR = 5,
> > > >      VHOST_USER_SLAVE_FS_MAP = 6,
> > > >      VHOST_USER_SLAVE_FS_UNMAP = 7,
> > > > +    VHOST_USER_SLAVE_FS_IO = 8,
> > > >      VHOST_USER_SLAVE_MAX
> > > >  }  VhostUserSlaveRequest;
> > > >  
> > > > -- 
> > > > 2.31.1
> > > > 
> > > > _______________________________________________
> > > > Virtio-fs mailing list
> > > > Virtio-fs@redhat.com
> > > > https://listman.redhat.com/mailman/listinfo/virtio-fs
> > -- 
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 09aee3565d..2fa62ea451 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -1453,6 +1453,22 @@  Slave message types
   multiple chunks can be unmapped in one command.
   A reply is generated indicating whether unmapping succeeded.
 
+``VHOST_USER_SLAVE_FS_IO``
+  :id: 9
+  :equivalent ioctl: N/A
+  :slave payload: ``struct VhostUserFSSlaveMsg``
+  :master payload: N/A
+
+  Requests that IO be performed directly from an fd, passed in ancillary
+  data, to guest memory on behalf of the daemon; this is normally for a
+  case where a memory region isn't visible to the daemon. slave payload
+  has flags which determine the direction of IO operation.
+
+  The ``VHOST_USER_FS_FLAG_MAP_R`` flag must be set in the ``flags`` field to
+  read from the file into RAM.
+  The ``VHOST_USER_FS_FLAG_MAP_W`` flag must be set in the ``flags`` field to
+  write to the file from RAM.
+
 .. _reply_ack:
 
 VHOST_USER_PROTOCOL_F_REPLY_ACK
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index c62727f879..20557a078e 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -53,6 +53,12 @@  vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI
 vhost_vdpa_set_owner(void *dev) "dev: %p"
 vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64
 
+# vhost-user-fs.c
+
+vhost_user_fs_slave_io_loop(const char *name, uint64_t owr, int is_ram, int is_romd, size_t size) "region %s with internal offset 0x%"PRIx64 " ram=%d romd=%d mrs.size=%zd"
+vhost_user_fs_slave_io_loop_res(ssize_t transferred) "%zd"
+vhost_user_fs_slave_io_exit(int res, size_t done) "res: %d done: %zd"
+
 # virtio.c
 virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
 virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u"
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index 963f694435..5511838f29 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -23,6 +23,8 @@ 
 #include "hw/virtio/vhost-user-fs.h"
 #include "monitor/monitor.h"
 #include "sysemu/sysemu.h"
+#include "exec/address-spaces.h"
+#include "trace.h"
 
 static const int user_feature_bits[] = {
     VIRTIO_F_VERSION_1,
@@ -220,6 +222,99 @@  uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, int message_size,
     return (uint64_t)res;
 }
 
+uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, int message_size,
+                                VhostUserFSSlaveMsg *sm, int fd)
+{
+    VHostUserFS *fs = (VHostUserFS *)object_dynamic_cast(OBJECT(dev->vdev),
+                          TYPE_VHOST_USER_FS);
+    if (!fs) {
+        error_report("%s: Bad fs ptr", __func__);
+        return (uint64_t)-1;
+    }
+    if (!check_slave_message_entries(sm, message_size)) {
+        return (uint64_t)-1;
+    }
+
+    unsigned int i;
+    int res = 0;
+    size_t done = 0;
+
+    if (fd < 0) {
+        error_report("Bad fd for map");
+        return (uint64_t)-1;
+    }
+
+    for (i = 0; i < sm->count && !res; i++) {
+        VhostUserFSSlaveMsgEntry *e = &sm->entries[i];
+        if (e->len == 0) {
+            continue;
+        }
+
+        size_t len = e->len;
+        uint64_t fd_offset = e->fd_offset;
+        hwaddr gpa = e->c_offset;
+
+        while (len && !res) {
+            hwaddr xlat, xlat_len;
+            bool is_write = e->flags & VHOST_USER_FS_FLAG_MAP_W;
+            MemoryRegion *mr = address_space_translate(dev->vdev->dma_as, gpa,
+                                                       &xlat, &xlat_len,
+                                                       is_write,
+                                                       MEMTXATTRS_UNSPECIFIED);
+            if (!mr || !xlat_len) {
+                error_report("No guest region found for 0x%" HWADDR_PRIx, gpa);
+                res = -EFAULT;
+                break;
+            }
+
+            trace_vhost_user_fs_slave_io_loop(mr->name,
+                                          (uint64_t)xlat,
+                                          memory_region_is_ram(mr),
+                                          memory_region_is_romd(mr),
+                                          (size_t)xlat_len);
+
+            void *hostptr = qemu_map_ram_ptr(mr->ram_block,
+                                             xlat);
+            ssize_t transferred;
+            if (e->flags & VHOST_USER_FS_FLAG_MAP_R) {
+                /* Read from file into RAM */
+                if (mr->readonly) {
+                    res = -EFAULT;
+                    break;
+                }
+                transferred = pread(fd, hostptr, xlat_len, fd_offset);
+            } else if (e->flags & VHOST_USER_FS_FLAG_MAP_W) {
+                /* Write into file from RAM */
+                transferred = pwrite(fd, hostptr, xlat_len, fd_offset);
+            } else {
+                transferred = EINVAL;
+            }
+
+            trace_vhost_user_fs_slave_io_loop_res(transferred);
+            if (transferred < 0) {
+                res = -errno;
+                break;
+            }
+            if (!transferred) {
+                /* EOF */
+                break;
+            }
+
+            done += transferred;
+            fd_offset += transferred;
+            gpa += transferred;
+            len -= transferred;
+        }
+    }
+    close(fd);
+
+    trace_vhost_user_fs_slave_io_exit(res, done);
+    if (res < 0) {
+        return (uint64_t)res;
+    }
+    return (uint64_t)done;
+}
+
 static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
 {
     VHostUserFS *fs = VHOST_USER_FS(vdev);
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index ad9170f8dc..b9699586ae 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -138,6 +138,7 @@  typedef enum VhostUserSlaveRequest {
     VHOST_USER_SLAVE_VRING_ERR = 5,
     VHOST_USER_SLAVE_FS_MAP = 6,
     VHOST_USER_SLAVE_FS_UNMAP = 7,
+    VHOST_USER_SLAVE_FS_IO = 8,
     VHOST_USER_SLAVE_MAX
 }  VhostUserSlaveRequest;
 
@@ -1562,6 +1563,9 @@  static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
     case VHOST_USER_SLAVE_FS_UNMAP:
         ret = vhost_user_fs_slave_unmap(dev, hdr.size, &payload.fs);
         break;
+    case VHOST_USER_SLAVE_FS_IO:
+        ret = vhost_user_fs_slave_io(dev, hdr.size, &payload.fs, fd[0]);
+        break;
 #endif
     default:
         error_report("Received unexpected msg type: %d.", hdr.request);
diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
index 0766f17548..2931164e23 100644
--- a/include/hw/virtio/vhost-user-fs.h
+++ b/include/hw/virtio/vhost-user-fs.h
@@ -78,5 +78,7 @@  uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, int message_size,
                                  VhostUserFSSlaveMsg *sm, int fd);
 uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev, int message_size,
                                    VhostUserFSSlaveMsg *sm);
+uint64_t vhost_user_fs_slave_io(struct vhost_dev *dev, int message_size,
+                                VhostUserFSSlaveMsg *sm, int fd);
 
 #endif /* _QEMU_VHOST_USER_FS_H */
diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
index a98c5f5c11..42b0833c4b 100644
--- a/subprojects/libvhost-user/libvhost-user.h
+++ b/subprojects/libvhost-user/libvhost-user.h
@@ -121,6 +121,7 @@  typedef enum VhostUserSlaveRequest {
     VHOST_USER_SLAVE_VRING_ERR = 5,
     VHOST_USER_SLAVE_FS_MAP = 6,
     VHOST_USER_SLAVE_FS_UNMAP = 7,
+    VHOST_USER_SLAVE_FS_IO = 8,
     VHOST_USER_SLAVE_MAX
 }  VhostUserSlaveRequest;