diff mbox series

[v1,12/24] vfio-user: region read/write

Message ID e648032dfe45ca29141717dff7c6fb8dfae310e7.1667542066.git.john.g.johnson@oracle.com (mailing list archive)
State New, archived
Headers show
Series vfio-user client | expand

Commit Message

John Johnson Nov. 8, 2022, 11:13 p.m. UTC
Add support for posted writes on remote devices

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 hw/vfio/common.c              |  10 +++-
 hw/vfio/pci.c                 |   9 +++-
 hw/vfio/pci.h                 |   1 +
 hw/vfio/user-protocol.h       |  12 +++++
 hw/vfio/user.c                | 109 ++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/user.h                |   1 +
 include/hw/vfio/vfio-common.h |   7 +--
 7 files changed, 143 insertions(+), 6 deletions(-)

Comments

John Levon Dec. 9, 2022, 5:11 p.m. UTC | #1
On Tue, Nov 08, 2022 at 03:13:34PM -0800, John Johnson wrote:

> Add support for posted writes on remote devices

LGTM

Reviewed-by: John Levon <john.levon@nutanix.com>

regards
john
Cédric Le Goater Dec. 13, 2022, 4:13 p.m. UTC | #2
On 11/9/22 00:13, John Johnson wrote:
> Add support for posted writes on remote devices
> 
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>


Reviewed-by: Cédric Le Goater <clg@redhat.com>

Thanks,

C.


> ---
>   hw/vfio/common.c              |  10 +++-
>   hw/vfio/pci.c                 |   9 +++-
>   hw/vfio/pci.h                 |   1 +
>   hw/vfio/user-protocol.h       |  12 +++++
>   hw/vfio/user.c                | 109 ++++++++++++++++++++++++++++++++++++++++++
>   hw/vfio/user.h                |   1 +
>   include/hw/vfio/vfio-common.h |   7 +--
>   7 files changed, 143 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 87400b3..87cd1d1 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -214,6 +214,7 @@ void vfio_region_write(void *opaque, hwaddr addr,
>           uint32_t dword;
>           uint64_t qword;
>       } buf;
> +    bool post = region->post_wr;
>       int ret;
>   
>       switch (size) {
> @@ -234,7 +235,11 @@ void vfio_region_write(void *opaque, hwaddr addr,
>           break;
>       }
>   
> -    ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf);
> +    /* read-after-write hazard if guest can directly access region */
> +    if (region->nr_mmaps) {
> +        post = false;
> +    }
> +    ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf, post);
>       if (ret != size) {
>           const char *err = ret < 0 ? strerror(-ret) : "short write";
>   
> @@ -1587,6 +1592,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
>       region->size = info->size;
>       region->fd_offset = info->offset;
>       region->nr = index;
> +    region->post_wr = false;
>       if (vbasedev->regfds != NULL) {
>           region->fd = vbasedev->regfds[index];
>       } else {
> @@ -2721,7 +2727,7 @@ static int vfio_io_region_read(VFIODevice *vbasedev, uint8_t index, off_t off,
>   }
>   
>   static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t off,
> -                                uint32_t size, void *data)
> +                                uint32_t size, void *data, bool post)
>   {
>       struct vfio_region_info *info = vbasedev->regions[index];
>       int ret;
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 2e0e41d..027f9d5 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -51,7 +51,7 @@
>                        (size), (data))
>   #define VDEV_CONFIG_WRITE(vbasedev, off, size, data) \
>       VDEV_REGION_WRITE((vbasedev), VFIO_PCI_CONFIG_REGION_INDEX, (off), \
> -                      (size), (data))
> +                      (size), (data), false)
>   
>   #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
>   
> @@ -1704,6 +1704,9 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr)
>       bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
>                                            ~PCI_BASE_ADDRESS_MEM_MASK);
>       bar->size = bar->region.size;
> +
> +    /* IO regions are sync, memory can be async */
> +    bar->region.post_wr = (bar->ioport == 0);
>   }
>   
>   static void vfio_bars_prepare(VFIOPCIDevice *vdev)
> @@ -3494,6 +3497,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
>       if (udev->send_queued) {
>           proxy->flags |= VFIO_PROXY_FORCE_QUEUED;
>       }
> +    if (udev->no_post) {
> +        proxy->flags |= VFIO_PROXY_NO_POST;
> +    }
>   
>       vfio_user_validate_version(proxy, &err);
>       if (err != NULL) {
> @@ -3540,6 +3546,7 @@ static void vfio_user_instance_finalize(Object *obj)
>   static Property vfio_user_pci_dev_properties[] = {
>       DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name),
>       DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false),
> +    DEFINE_PROP_BOOL("x-no-posted-writes", VFIOUserPCIDevice, no_post, false),
>       DEFINE_PROP_END_OF_LIST(),
>   };
>   
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index c47d2f8..ec17f2e 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -196,6 +196,7 @@ struct VFIOUserPCIDevice {
>       VFIOPCIDevice device;
>       char *sock_name;
>       bool send_queued;   /* all sends are queued */
> +    bool no_post;       /* all regions write are sync */
>   };
>   
>   /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
> diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
> index a1b64fe..124340c 100644
> --- a/hw/vfio/user-protocol.h
> +++ b/hw/vfio/user-protocol.h
> @@ -140,4 +140,16 @@ typedef struct {
>       uint64_t offset;
>   } VFIOUserRegionInfo;
>   
> +/*
> + * VFIO_USER_REGION_READ
> + * VFIO_USER_REGION_WRITE
> + */
> +typedef struct {
> +    VFIOUserHdr hdr;
> +    uint64_t offset;
> +    uint32_t region;
> +    uint32_t count;
> +    char data[];
> +} VFIOUserRegionRW;
> +
>   #endif /* VFIO_USER_PROTOCOL_H */
> diff --git a/hw/vfio/user.c b/hw/vfio/user.c
> index 69b0fed..1453bb5 100644
> --- a/hw/vfio/user.c
> +++ b/hw/vfio/user.c
> @@ -57,6 +57,8 @@ static void vfio_user_cb(void *opaque);
>   
>   static void vfio_user_request(void *opaque);
>   static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg);
> +static void vfio_user_send_async(VFIOProxy *proxy, VFIOUserHdr *hdr,
> +                                 VFIOUserFDs *fds);
>   static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr,
>                                   VFIOUserFDs *fds, int rsize, bool nobql);
>   static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
> @@ -618,6 +620,33 @@ static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg)
>       return 0;
>   }
>   
> +/*
> + * async send - msg can be queued, but will be freed when sent
> + */
> +static void vfio_user_send_async(VFIOProxy *proxy, VFIOUserHdr *hdr,
> +                                 VFIOUserFDs *fds)
> +{
> +    VFIOUserMsg *msg;
> +    int ret;
> +
> +    if (!(hdr->flags & (VFIO_USER_NO_REPLY | VFIO_USER_REPLY))) {
> +        error_printf("vfio_user_send_async on sync message\n");
> +        return;
> +    }
> +
> +    QEMU_LOCK_GUARD(&proxy->lock);
> +
> +    msg = vfio_user_getmsg(proxy, hdr, fds);
> +    msg->id = hdr->id;
> +    msg->rsize = 0;
> +    msg->type = VFIO_MSG_ASYNC;
> +
> +    ret = vfio_user_send_queued(proxy, msg);
> +    if (ret < 0) {
> +        vfio_user_recycle(proxy, msg);
> +    }
> +}
> +
>   static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr,
>                                   VFIOUserFDs *fds, int rsize, bool nobql)
>   {
> @@ -1135,6 +1164,70 @@ static int vfio_user_get_region_info(VFIOProxy *proxy,
>       return 0;
>   }
>   
> +static int vfio_user_region_read(VFIOProxy *proxy, uint8_t index, off_t offset,
> +                                 uint32_t count, void *data)
> +{
> +    g_autofree VFIOUserRegionRW *msgp = NULL;
> +    int size = sizeof(*msgp) + count;
> +
> +    if (count > proxy->max_xfer_size) {
> +        return -EINVAL;
> +    }
> +
> +    msgp = g_malloc0(size);
> +    vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0);
> +    msgp->offset = offset;
> +    msgp->region = index;
> +    msgp->count = count;
> +
> +    vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, false);
> +    if (msgp->hdr.flags & VFIO_USER_ERROR) {
> +        return -msgp->hdr.error_reply;
> +    } else if (msgp->count > count) {
> +        return -E2BIG;
> +    } else {
> +        memcpy(data, &msgp->data, msgp->count);
> +    }
> +
> +    return msgp->count;
> +}
> +
> +static int vfio_user_region_write(VFIOProxy *proxy, uint8_t index, off_t offset,
> +                                  uint32_t count, void *data, bool post)
> +{
> +    VFIOUserRegionRW *msgp = NULL;
> +    int flags = post ? VFIO_USER_NO_REPLY : 0;
> +    int size = sizeof(*msgp) + count;
> +    int ret;
> +
> +    if (count > proxy->max_xfer_size) {
> +        return -EINVAL;
> +    }
> +
> +    msgp = g_malloc0(size);
> +    vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags);
> +    msgp->offset = offset;
> +    msgp->region = index;
> +    msgp->count = count;
> +    memcpy(&msgp->data, data, count);
> +
> +    /* async send will free msg after it's sent */
> +    if (post && !(proxy->flags & VFIO_PROXY_NO_POST)) {
> +        vfio_user_send_async(proxy, &msgp->hdr, NULL);
> +        return count;
> +    }
> +
> +    vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false);
> +    if (msgp->hdr.flags & VFIO_USER_ERROR) {
> +        ret = -msgp->hdr.error_reply;
> +    } else {
> +        ret = count;
> +    }
> +
> +    g_free(msgp);
> +    return ret;
> +}
> +
>   
>   /*
>    * Socket-based io_ops
> @@ -1184,8 +1277,24 @@ static int vfio_user_io_get_region_info(VFIODevice *vbasedev,
>       return 0;
>   }
>   
> +static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index,
> +                                    off_t off, uint32_t size, void *data)
> +{
> +    return vfio_user_region_read(vbasedev->proxy, index, off, size, data);
> +}
> +
> +static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index,
> +                                     off_t off, unsigned size, void *data,
> +                                     bool post)
> +{
> +    return vfio_user_region_write(vbasedev->proxy, index, off, size, data,
> +                                  post);
> +}
> +
>   VFIODevIO vfio_dev_io_sock = {
>       .get_info = vfio_user_io_get_info,
>       .get_region_info = vfio_user_io_get_region_info,
> +    .region_read = vfio_user_io_region_read,
> +    .region_write = vfio_user_io_region_write,
>   };
>   
> diff --git a/hw/vfio/user.h b/hw/vfio/user.h
> index 2547cf6..359a029 100644
> --- a/hw/vfio/user.h
> +++ b/hw/vfio/user.h
> @@ -84,6 +84,7 @@ typedef struct VFIOProxy {
>   /* VFIOProxy flags */
>   #define VFIO_PROXY_CLIENT        0x1
>   #define VFIO_PROXY_FORCE_QUEUED  0x4
> +#define VFIO_PROXY_NO_POST       0x8
>   
>   VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp);
>   void vfio_user_disconnect(VFIOProxy *proxy);
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 3406e6a..6324132 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -57,6 +57,7 @@ typedef struct VFIORegion {
>       VFIOMmap *mmaps;
>       uint8_t nr; /* cache the region number for debug */
>       int fd; /* fd to mmap() region */
> +    bool post_wr; /* writes can be posted */
>   } VFIORegion;
>   
>   typedef struct VFIOMigration {
> @@ -180,7 +181,7 @@ struct VFIODevIO {
>       int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
>                          void *data);
>       int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
> -                        void *data);
> +                        void *data, bool post);
>   };
>   
>   #define VDEV_GET_INFO(vdev, info) \
> @@ -193,8 +194,8 @@ struct VFIODevIO {
>       ((vdev)->io_ops->set_irqs((vdev), (irqs)))
>   #define VDEV_REGION_READ(vdev, nr, off, size, data) \
>       ((vdev)->io_ops->region_read((vdev), (nr), (off), (size), (data)))
> -#define VDEV_REGION_WRITE(vdev, nr, off, size, data) \
> -    ((vdev)->io_ops->region_write((vdev), (nr), (off), (size), (data)))
> +#define VDEV_REGION_WRITE(vdev, nr, off, size, data, post) \
> +    ((vdev)->io_ops->region_write((vdev), (nr), (off), (size), (data), (post)))
>   
>   struct VFIOContIO {
>       int (*dma_map)(VFIOContainer *container,
diff mbox series

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 87400b3..87cd1d1 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -214,6 +214,7 @@  void vfio_region_write(void *opaque, hwaddr addr,
         uint32_t dword;
         uint64_t qword;
     } buf;
+    bool post = region->post_wr;
     int ret;
 
     switch (size) {
@@ -234,7 +235,11 @@  void vfio_region_write(void *opaque, hwaddr addr,
         break;
     }
 
-    ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf);
+    /* read-after-write hazard if guest can directly access region */
+    if (region->nr_mmaps) {
+        post = false;
+    }
+    ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf, post);
     if (ret != size) {
         const char *err = ret < 0 ? strerror(-ret) : "short write";
 
@@ -1587,6 +1592,7 @@  int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
     region->size = info->size;
     region->fd_offset = info->offset;
     region->nr = index;
+    region->post_wr = false;
     if (vbasedev->regfds != NULL) {
         region->fd = vbasedev->regfds[index];
     } else {
@@ -2721,7 +2727,7 @@  static int vfio_io_region_read(VFIODevice *vbasedev, uint8_t index, off_t off,
 }
 
 static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t off,
-                                uint32_t size, void *data)
+                                uint32_t size, void *data, bool post)
 {
     struct vfio_region_info *info = vbasedev->regions[index];
     int ret;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 2e0e41d..027f9d5 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -51,7 +51,7 @@ 
                      (size), (data))
 #define VDEV_CONFIG_WRITE(vbasedev, off, size, data) \
     VDEV_REGION_WRITE((vbasedev), VFIO_PCI_CONFIG_REGION_INDEX, (off), \
-                      (size), (data))
+                      (size), (data), false)
 
 #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
 
@@ -1704,6 +1704,9 @@  static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr)
     bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
                                          ~PCI_BASE_ADDRESS_MEM_MASK);
     bar->size = bar->region.size;
+
+    /* IO regions are sync, memory can be async */
+    bar->region.post_wr = (bar->ioport == 0);
 }
 
 static void vfio_bars_prepare(VFIOPCIDevice *vdev)
@@ -3494,6 +3497,9 @@  static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
     if (udev->send_queued) {
         proxy->flags |= VFIO_PROXY_FORCE_QUEUED;
     }
+    if (udev->no_post) {
+        proxy->flags |= VFIO_PROXY_NO_POST;
+    }
 
     vfio_user_validate_version(proxy, &err);
     if (err != NULL) {
@@ -3540,6 +3546,7 @@  static void vfio_user_instance_finalize(Object *obj)
 static Property vfio_user_pci_dev_properties[] = {
     DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name),
     DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false),
+    DEFINE_PROP_BOOL("x-no-posted-writes", VFIOUserPCIDevice, no_post, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index c47d2f8..ec17f2e 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -196,6 +196,7 @@  struct VFIOUserPCIDevice {
     VFIOPCIDevice device;
     char *sock_name;
     bool send_queued;   /* all sends are queued */
+    bool no_post;       /* all regions write are sync */
 };
 
 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
index a1b64fe..124340c 100644
--- a/hw/vfio/user-protocol.h
+++ b/hw/vfio/user-protocol.h
@@ -140,4 +140,16 @@  typedef struct {
     uint64_t offset;
 } VFIOUserRegionInfo;
 
+/*
+ * VFIO_USER_REGION_READ
+ * VFIO_USER_REGION_WRITE
+ */
+typedef struct {
+    VFIOUserHdr hdr;
+    uint64_t offset;
+    uint32_t region;
+    uint32_t count;
+    char data[];
+} VFIOUserRegionRW;
+
 #endif /* VFIO_USER_PROTOCOL_H */
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
index 69b0fed..1453bb5 100644
--- a/hw/vfio/user.c
+++ b/hw/vfio/user.c
@@ -57,6 +57,8 @@  static void vfio_user_cb(void *opaque);
 
 static void vfio_user_request(void *opaque);
 static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg);
+static void vfio_user_send_async(VFIOProxy *proxy, VFIOUserHdr *hdr,
+                                 VFIOUserFDs *fds);
 static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr,
                                 VFIOUserFDs *fds, int rsize, bool nobql);
 static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
@@ -618,6 +620,33 @@  static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg)
     return 0;
 }
 
+/*
+ * async send - msg can be queued, but will be freed when sent
+ */
+static void vfio_user_send_async(VFIOProxy *proxy, VFIOUserHdr *hdr,
+                                 VFIOUserFDs *fds)
+{
+    VFIOUserMsg *msg;
+    int ret;
+
+    if (!(hdr->flags & (VFIO_USER_NO_REPLY | VFIO_USER_REPLY))) {
+        error_printf("vfio_user_send_async on sync message\n");
+        return;
+    }
+
+    QEMU_LOCK_GUARD(&proxy->lock);
+
+    msg = vfio_user_getmsg(proxy, hdr, fds);
+    msg->id = hdr->id;
+    msg->rsize = 0;
+    msg->type = VFIO_MSG_ASYNC;
+
+    ret = vfio_user_send_queued(proxy, msg);
+    if (ret < 0) {
+        vfio_user_recycle(proxy, msg);
+    }
+}
+
 static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr,
                                 VFIOUserFDs *fds, int rsize, bool nobql)
 {
@@ -1135,6 +1164,70 @@  static int vfio_user_get_region_info(VFIOProxy *proxy,
     return 0;
 }
 
+static int vfio_user_region_read(VFIOProxy *proxy, uint8_t index, off_t offset,
+                                 uint32_t count, void *data)
+{
+    g_autofree VFIOUserRegionRW *msgp = NULL;
+    int size = sizeof(*msgp) + count;
+
+    if (count > proxy->max_xfer_size) {
+        return -EINVAL;
+    }
+
+    msgp = g_malloc0(size);
+    vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0);
+    msgp->offset = offset;
+    msgp->region = index;
+    msgp->count = count;
+
+    vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, false);
+    if (msgp->hdr.flags & VFIO_USER_ERROR) {
+        return -msgp->hdr.error_reply;
+    } else if (msgp->count > count) {
+        return -E2BIG;
+    } else {
+        memcpy(data, &msgp->data, msgp->count);
+    }
+
+    return msgp->count;
+}
+
+static int vfio_user_region_write(VFIOProxy *proxy, uint8_t index, off_t offset,
+                                  uint32_t count, void *data, bool post)
+{
+    VFIOUserRegionRW *msgp = NULL;
+    int flags = post ? VFIO_USER_NO_REPLY : 0;
+    int size = sizeof(*msgp) + count;
+    int ret;
+
+    if (count > proxy->max_xfer_size) {
+        return -EINVAL;
+    }
+
+    msgp = g_malloc0(size);
+    vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags);
+    msgp->offset = offset;
+    msgp->region = index;
+    msgp->count = count;
+    memcpy(&msgp->data, data, count);
+
+    /* async send will free msg after it's sent */
+    if (post && !(proxy->flags & VFIO_PROXY_NO_POST)) {
+        vfio_user_send_async(proxy, &msgp->hdr, NULL);
+        return count;
+    }
+
+    vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false);
+    if (msgp->hdr.flags & VFIO_USER_ERROR) {
+        ret = -msgp->hdr.error_reply;
+    } else {
+        ret = count;
+    }
+
+    g_free(msgp);
+    return ret;
+}
+
 
 /*
  * Socket-based io_ops
@@ -1184,8 +1277,24 @@  static int vfio_user_io_get_region_info(VFIODevice *vbasedev,
     return 0;
 }
 
+static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index,
+                                    off_t off, uint32_t size, void *data)
+{
+    return vfio_user_region_read(vbasedev->proxy, index, off, size, data);
+}
+
+static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index,
+                                     off_t off, unsigned size, void *data,
+                                     bool post)
+{
+    return vfio_user_region_write(vbasedev->proxy, index, off, size, data,
+                                  post);
+}
+
 VFIODevIO vfio_dev_io_sock = {
     .get_info = vfio_user_io_get_info,
     .get_region_info = vfio_user_io_get_region_info,
+    .region_read = vfio_user_io_region_read,
+    .region_write = vfio_user_io_region_write,
 };
 
diff --git a/hw/vfio/user.h b/hw/vfio/user.h
index 2547cf6..359a029 100644
--- a/hw/vfio/user.h
+++ b/hw/vfio/user.h
@@ -84,6 +84,7 @@  typedef struct VFIOProxy {
 /* VFIOProxy flags */
 #define VFIO_PROXY_CLIENT        0x1
 #define VFIO_PROXY_FORCE_QUEUED  0x4
+#define VFIO_PROXY_NO_POST       0x8
 
 VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp);
 void vfio_user_disconnect(VFIOProxy *proxy);
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 3406e6a..6324132 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -57,6 +57,7 @@  typedef struct VFIORegion {
     VFIOMmap *mmaps;
     uint8_t nr; /* cache the region number for debug */
     int fd; /* fd to mmap() region */
+    bool post_wr; /* writes can be posted */
 } VFIORegion;
 
 typedef struct VFIOMigration {
@@ -180,7 +181,7 @@  struct VFIODevIO {
     int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
                        void *data);
     int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
-                        void *data);
+                        void *data, bool post);
 };
 
 #define VDEV_GET_INFO(vdev, info) \
@@ -193,8 +194,8 @@  struct VFIODevIO {
     ((vdev)->io_ops->set_irqs((vdev), (irqs)))
 #define VDEV_REGION_READ(vdev, nr, off, size, data) \
     ((vdev)->io_ops->region_read((vdev), (nr), (off), (size), (data)))
-#define VDEV_REGION_WRITE(vdev, nr, off, size, data) \
-    ((vdev)->io_ops->region_write((vdev), (nr), (off), (size), (data)))
+#define VDEV_REGION_WRITE(vdev, nr, off, size, data, post) \
+    ((vdev)->io_ops->region_write((vdev), (nr), (off), (size), (data), (post)))
 
 struct VFIOContIO {
     int (*dma_map)(VFIOContainer *container,