diff mbox

[v4,2/2] vhost-user: Attempt to fix a race with set_mem_table.

Message ID 1469613157-8942-3-git-send-email-saxenap.ltc@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Prerna Saxena July 27, 2016, 9:52 a.m. UTC
From: Prerna Saxena <prerna.saxena@nutanix.com>

The set_mem_table command currently does not seek a reply. Hence, there is
no easy way for a remote application to notify to QEMU when it finished
setting up memory, or if there were errors doing so.

As an example:
(1) Qemu sends a SET_MEM_TABLE to the backend (eg, a vhost-user net
application). SET_MEM_TABLE does not require a reply according to the spec.
(2) Qemu commits the memory to the guest.
(3) Guest issues an I/O operation over a new memory region which was configured on (1).
(4) The application has not yet remapped the memory, but it sees the I/O request.
(5) The application cannot satisfy the request because it does not know about those GPAs.

While a guaranteed fix would require a protocol extension (committed separately),
a best-effort workaround for existing applications is to send a GET_FEATURES
message before completing the vhost_user_set_mem_table() call.
Since GET_FEATURES requires a reply, an application that processes vhost-user
messages synchronously would probably have completed the SET_MEM_TABLE before replying.

Signed-off-by: Prerna Saxena <prerna.saxena@nutanix.com>
---
 hw/virtio/vhost-user.c | 123 ++++++++++++++++++++++++++-----------------------
 1 file changed, 65 insertions(+), 58 deletions(-)

Comments

Michael S. Tsirkin July 27, 2016, 1:30 p.m. UTC | #1
On Wed, Jul 27, 2016 at 02:52:37AM -0700, Prerna Saxena wrote:
> From: Prerna Saxena <prerna.saxena@nutanix.com>
> 
> The set_mem_table command currently does not seek a reply. Hence, there is
> no easy way for a remote application to notify to QEMU when it finished
> setting up memory, or if there were errors doing so.
> 
> As an example:
> (1) Qemu sends a SET_MEM_TABLE to the backend (eg, a vhost-user net
> application). SET_MEM_TABLE does not require a reply according to the spec.
> (2) Qemu commits the memory to the guest.
> (3) Guest issues an I/O operation over a new memory region which was configured on (1).
> (4) The application has not yet remapped the memory, but it sees the I/O request.
> (5) The application cannot satisfy the request because it does not know about those GPAs.
> 
> While a guaranteed fix would require a protocol extension (committed separately),
> a best-effort workaround for existing applications is to send a GET_FEATURES
> message before completing the vhost_user_set_mem_table() call.
> Since GET_FEATURES requires a reply, an application that processes vhost-user
> messages synchronously would probably have completed the SET_MEM_TABLE before replying.
> 
> Signed-off-by: Prerna Saxena <prerna.saxena@nutanix.com>

Could you pls reorder patchset so this is 1/2?
1/1 is still under review but I'd like to make sure
we have some kind of fix in place for 2.7.

> ---
>  hw/virtio/vhost-user.c | 123 ++++++++++++++++++++++++++-----------------------
>  1 file changed, 65 insertions(+), 58 deletions(-)
> 
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 0cdb918..f96607e 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -254,64 +254,6 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
>      return 0;
>  }
>  
> -static int vhost_user_set_mem_table(struct vhost_dev *dev,
> -                                    struct vhost_memory *mem)
> -{
> -    int fds[VHOST_MEMORY_MAX_NREGIONS];
> -    int i, fd;
> -    size_t fd_num = 0;
> -    bool reply_supported = virtio_has_feature(dev->protocol_features,
> -                            VHOST_USER_PROTOCOL_F_REPLY_ACK);
> -
> -    VhostUserMsg msg = {
> -        .request = VHOST_USER_SET_MEM_TABLE,
> -        .flags = VHOST_USER_VERSION,
> -    };
> -
> -    if (reply_supported) {
> -        msg.flags |= VHOST_USER_NEED_RESPONSE_MASK;
> -    }
> -
> -    for (i = 0; i < dev->mem->nregions; ++i) {
> -        struct vhost_memory_region *reg = dev->mem->regions + i;
> -        ram_addr_t offset;
> -        MemoryRegion *mr;
> -
> -        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
> -        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
> -                                     &offset);
> -        fd = memory_region_get_fd(mr);
> -        if (fd > 0) {
> -            msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
> -            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
> -            msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
> -            msg.payload.memory.regions[fd_num].mmap_offset = offset;
> -            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
> -            fds[fd_num++] = fd;
> -        }
> -    }
> -
> -    msg.payload.memory.nregions = fd_num;
> -
> -    if (!fd_num) {
> -        error_report("Failed initializing vhost-user memory map, "
> -                     "consider using -object memory-backend-file share=on");
> -        return -1;
> -    }
> -
> -    msg.size = sizeof(msg.payload.memory.nregions);
> -    msg.size += sizeof(msg.payload.memory.padding);
> -    msg.size += fd_num * sizeof(VhostUserMemoryRegion);
> -
> -    vhost_user_write(dev, &msg, fds, fd_num);
> -
> -    if (reply_supported) {
> -        return process_message_reply(dev, msg.request);
> -    }
> -
> -    return 0;
> -}
> -
>  static int vhost_user_set_vring_addr(struct vhost_dev *dev,
>                                       struct vhost_vring_addr *addr)
>  {
> @@ -514,6 +456,71 @@ static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
>      return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
>  }
>  
> +static int vhost_user_set_mem_table(struct vhost_dev *dev,
> +                                    struct vhost_memory *mem)
> +{
> +    int fds[VHOST_MEMORY_MAX_NREGIONS];
> +    int i, fd;
> +    size_t fd_num = 0;
> +    uint64_t features;
> +    bool reply_supported = virtio_has_feature(dev->protocol_features,
> +                            VHOST_USER_PROTOCOL_F_REPLY_ACK);
> +
> +    VhostUserMsg msg = {
> +        .request = VHOST_USER_SET_MEM_TABLE,
> +        .flags = VHOST_USER_VERSION,
> +    };
> +
> +    if (reply_supported) {
> +        msg.flags |= VHOST_USER_NEED_RESPONSE_MASK;
> +    }
> +
> +    for (i = 0; i < dev->mem->nregions; ++i) {
> +        struct vhost_memory_region *reg = dev->mem->regions + i;
> +        ram_addr_t offset;
> +        MemoryRegion *mr;
> +
> +        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
> +        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
> +                                     &offset);
> +        fd = memory_region_get_fd(mr);
> +        if (fd > 0) {
> +            msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
> +            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
> +            msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
> +            msg.payload.memory.regions[fd_num].mmap_offset = offset;
> +            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
> +            fds[fd_num++] = fd;
> +        }
> +    }
> +
> +    msg.payload.memory.nregions = fd_num;
> +
> +    if (!fd_num) {
> +        error_report("Failed initializing vhost-user memory map, "
> +                     "consider using -object memory-backend-file share=on");
> +        return -1;
> +    }
> +
> +    msg.size = sizeof(msg.payload.memory.nregions);
> +    msg.size += sizeof(msg.payload.memory.padding);
> +    msg.size += fd_num * sizeof(VhostUserMemoryRegion);
> +
> +    vhost_user_write(dev, &msg, fds, fd_num);
> +
> +    if (reply_supported) {
> +        return process_message_reply(dev, msg.request);
> +    } else {
> +        /* Note: It is (yet) unknown when the client application has finished
> +         * remapping the GPA.
> +         * Attempt to prevent a race by sending a command that requires a reply.
> +         */
> +        vhost_user_get_features(dev, &features);
> +    }
> +
> +    return 0;
> +}
> +
>  static int vhost_user_set_owner(struct vhost_dev *dev)
>  {
>      VhostUserMsg msg = {
> -- 
> 1.8.1.2
Prerna Saxena July 28, 2016, 7:09 a.m. UTC | #2
On 27/07/16 7:00 pm, "Michael S. Tsirkin" <mst@redhat.com> wrote:



>On Wed, Jul 27, 2016 at 02:52:37AM -0700, Prerna Saxena wrote:

>> From: Prerna Saxena <prerna.saxena@nutanix.com>

>> 

>> The set_mem_table command currently does not seek a reply. Hence, there is

>> no easy way for a remote application to notify to QEMU when it finished

>> setting up memory, or if there were errors doing so.

>> 

>> As an example:

>> (1) Qemu sends a SET_MEM_TABLE to the backend (eg, a vhost-user net

>> application). SET_MEM_TABLE does not require a reply according to the spec.

>> (2) Qemu commits the memory to the guest.

>> (3) Guest issues an I/O operation over a new memory region which was configured on (1).

>> (4) The application has not yet remapped the memory, but it sees the I/O request.

>> (5) The application cannot satisfy the request because it does not know about those GPAs.

>> 

>> While a guaranteed fix would require a protocol extension (committed separately),

>> a best-effort workaround for existing applications is to send a GET_FEATURES

>> message before completing the vhost_user_set_mem_table() call.

>> Since GET_FEATURES requires a reply, an application that processes vhost-user

>> messages synchronously would probably have completed the SET_MEM_TABLE before replying.

>> 

>> Signed-off-by: Prerna Saxena <prerna.saxena@nutanix.com>

>

>Could you pls reorder patchset so this is 1/2?

>1/1 is still under review but I'd like to make sure

>we have some kind of fix in place for 2.7.


Hi Michael,
The review comments for patch 1 were around documentation and the choice of name of flag.
There has been no recommendation/comment on the code itself.
I have fixed all of that and posted a new patch series. (Version v5.1)
Hope both the patches make it in time for 2.7.

Thanks, once again, for reviewing this.

Regards,
Prerna
diff mbox

Patch

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 0cdb918..f96607e 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -254,64 +254,6 @@  static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
     return 0;
 }
 
-static int vhost_user_set_mem_table(struct vhost_dev *dev,
-                                    struct vhost_memory *mem)
-{
-    int fds[VHOST_MEMORY_MAX_NREGIONS];
-    int i, fd;
-    size_t fd_num = 0;
-    bool reply_supported = virtio_has_feature(dev->protocol_features,
-                            VHOST_USER_PROTOCOL_F_REPLY_ACK);
-
-    VhostUserMsg msg = {
-        .request = VHOST_USER_SET_MEM_TABLE,
-        .flags = VHOST_USER_VERSION,
-    };
-
-    if (reply_supported) {
-        msg.flags |= VHOST_USER_NEED_RESPONSE_MASK;
-    }
-
-    for (i = 0; i < dev->mem->nregions; ++i) {
-        struct vhost_memory_region *reg = dev->mem->regions + i;
-        ram_addr_t offset;
-        MemoryRegion *mr;
-
-        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
-        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
-                                     &offset);
-        fd = memory_region_get_fd(mr);
-        if (fd > 0) {
-            msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
-            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
-            msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
-            msg.payload.memory.regions[fd_num].mmap_offset = offset;
-            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
-            fds[fd_num++] = fd;
-        }
-    }
-
-    msg.payload.memory.nregions = fd_num;
-
-    if (!fd_num) {
-        error_report("Failed initializing vhost-user memory map, "
-                     "consider using -object memory-backend-file share=on");
-        return -1;
-    }
-
-    msg.size = sizeof(msg.payload.memory.nregions);
-    msg.size += sizeof(msg.payload.memory.padding);
-    msg.size += fd_num * sizeof(VhostUserMemoryRegion);
-
-    vhost_user_write(dev, &msg, fds, fd_num);
-
-    if (reply_supported) {
-        return process_message_reply(dev, msg.request);
-    }
-
-    return 0;
-}
-
 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
                                      struct vhost_vring_addr *addr)
 {
@@ -514,6 +456,71 @@  static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
     return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
 }
 
+static int vhost_user_set_mem_table(struct vhost_dev *dev,
+                                    struct vhost_memory *mem)
+{
+    int fds[VHOST_MEMORY_MAX_NREGIONS];
+    int i, fd;
+    size_t fd_num = 0;
+    uint64_t features;
+    bool reply_supported = virtio_has_feature(dev->protocol_features,
+                            VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+    VhostUserMsg msg = {
+        .request = VHOST_USER_SET_MEM_TABLE,
+        .flags = VHOST_USER_VERSION,
+    };
+
+    if (reply_supported) {
+        msg.flags |= VHOST_USER_NEED_RESPONSE_MASK;
+    }
+
+    for (i = 0; i < dev->mem->nregions; ++i) {
+        struct vhost_memory_region *reg = dev->mem->regions + i;
+        ram_addr_t offset;
+        MemoryRegion *mr;
+
+        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
+        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
+                                     &offset);
+        fd = memory_region_get_fd(mr);
+        if (fd > 0) {
+            msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
+            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
+            msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
+            msg.payload.memory.regions[fd_num].mmap_offset = offset;
+            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
+            fds[fd_num++] = fd;
+        }
+    }
+
+    msg.payload.memory.nregions = fd_num;
+
+    if (!fd_num) {
+        error_report("Failed initializing vhost-user memory map, "
+                     "consider using -object memory-backend-file share=on");
+        return -1;
+    }
+
+    msg.size = sizeof(msg.payload.memory.nregions);
+    msg.size += sizeof(msg.payload.memory.padding);
+    msg.size += fd_num * sizeof(VhostUserMemoryRegion);
+
+    vhost_user_write(dev, &msg, fds, fd_num);
+
+    if (reply_supported) {
+        return process_message_reply(dev, msg.request);
+    } else {
+        /* Note: It is (yet) unknown when the client application has finished
+         * remapping the GPA.
+         * Attempt to prevent a race by sending a command that requires a reply.
+         */
+        vhost_user_get_features(dev, &features);
+    }
+
+    return 0;
+}
+
 static int vhost_user_set_owner(struct vhost_dev *dev)
 {
     VhostUserMsg msg = {