Message ID | 20240912145335.129447-5-aesteve@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vhost-user: Add SHMEM_MAP/UNMAP requests | expand |
On Thu, Sep 12, 2024 at 04:53:34PM +0200, Albert Esteve wrote: > Add a cache BAR in the vhost-user-device > into which files can be directly mapped. > > The number, shmid, and size of the VIRTIO Shared > Memory subregions is retrieved through a get_shmem_config > message sent by the vhost-user-base module > on the realize step, after virtio_init(). > > By default, if VHOST_USER_PROTOCOL_F_SHMEM > feature is not supported by the backend, > there is no cache. > > Signed-off-by: Albert Esteve <aesteve@redhat.com> > --- Not all devices derive from vhost-user-base.c so this does not offer full coverage. I think that's okay since few devices currently use VIRTIO Shared Memory Regions. A note about this in the commit description would be useful though. Which vhost-user devices gain VIRTIO Shared Memory Region support and what should you do if your device is not included in this list? > hw/virtio/vhost-user-base.c | 37 +++++++++++++++++++++++++++-- > hw/virtio/vhost-user-device-pci.c | 39 ++++++++++++++++++++++++++++--- > 2 files changed, 71 insertions(+), 5 deletions(-) > > diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c > index 2bc3423326..f2597d021a 100644 > --- a/hw/virtio/vhost-user-base.c > +++ b/hw/virtio/vhost-user-base.c > @@ -271,7 +271,9 @@ static void vub_device_realize(DeviceState *dev, Error **errp) > { > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > VHostUserBase *vub = VHOST_USER_BASE(dev); > - int ret; > + uint64_t memory_sizes[8]; > + void *cache_ptr; > + int i, ret, nregions; > > if (!vub->chardev.chr) { > error_setg(errp, "vhost-user-base: missing chardev"); > @@ -314,7 +316,7 @@ static void vub_device_realize(DeviceState *dev, Error **errp) > > /* Allocate queues */ > vub->vqs = g_ptr_array_sized_new(vub->num_vqs); > - for (int i = 0; i < vub->num_vqs; i++) { > + for (i = 0; i < vub->num_vqs; i++) { > g_ptr_array_add(vub->vqs, > virtio_add_queue(vdev, vub->vq_size, > vub_handle_output)); > @@ -331,6 +333,37 @@ static void vub_device_realize(DeviceState *dev, Error **errp) > do_vhost_user_cleanup(vdev, vub); Missing return statement. > } > > + ret = vub->vhost_dev.vhost_ops->vhost_get_shmem_config(&vub->vhost_dev, > + &nregions, > + memory_sizes, Buffer overflow. vhost_get_shmem_config() copies out up to 256 memory_sizes[] elements. Please introduce a constant in the VIRTIO header and use it instead of hardcoding uint64_t memory_sizes[8] above. > + errp); > + > + if (ret < 0) { > + do_vhost_user_cleanup(vdev, vub); Missing return statement. > + } > + > + for (i = 0; i < nregions; i++) { > + if (memory_sizes[i]) { > + if (memory_sizes[i] % qemu_real_host_page_size() != 0) { > + error_setg(errp, "Shared memory %d size must be a power of 2 " > + "no smaller than the page size", i); > + return; Missing do_vhost_user_cleanup(). > + } > + > + cache_ptr = mmap(NULL, memory_sizes[i], PROT_NONE, > + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > + if (cache_ptr == MAP_FAILED) { > + error_setg_errno(errp, errno, "Unable to mmap blank cache"); > + return; Missing do_vhost_user_cleanup(). > + } > + > + virtio_new_shmem_region(vdev); > + memory_region_init_ram_ptr(vdev->shmem_list[i].mr, > + OBJECT(vdev), "vub-shm-" + i, > + memory_sizes[i], cache_ptr); I think memory_region_init_ram_ptr() is included in live migration, so the contents of VIRTIO Shared Memory Regions will be transferred to the destination QEMU and written to the equivalent memory region there. I'm not sure this works: 1. If there are PROT_NONE memory ranges, then live migration will probably crash the source QEMU while trying to send this memory to the destination QEMU. 2. If the destination vhost-user device has not yet loaded its state and sent MAP messages setting up the VIRTIO Shared Memory Region, then receiving migrated data and writing it into this memory will fail. QEMU has a migration blocker API so that devices can refuse live migration. For the time being a migration blocker is probably needed here. See migrate_add_blocker()/migrate_del_blocker(). > + } > + } > + > qemu_chr_fe_set_handlers(&vub->chardev, NULL, NULL, vub_event, NULL, > dev, NULL, true); > } > diff --git a/hw/virtio/vhost-user-device-pci.c b/hw/virtio/vhost-user-device-pci.c > index efaf55d3dd..abf4e90c21 100644 > --- a/hw/virtio/vhost-user-device-pci.c > +++ b/hw/virtio/vhost-user-device-pci.c > @@ -8,14 +8,18 @@ > */ > > #include "qemu/osdep.h" > +#include "qapi/error.h" > #include "hw/qdev-properties.h" > #include "hw/virtio/vhost-user-base.h" > #include "hw/virtio/virtio-pci.h" > > +#define VIRTIO_DEVICE_PCI_CACHE_BAR 2 "Cache" is ambigous. Call it shmem_bar here and everywhere else? > + > struct VHostUserDevicePCI { > VirtIOPCIProxy parent_obj; > > VHostUserBase vub; > + MemoryRegion cachebar; > }; > > #define TYPE_VHOST_USER_DEVICE_PCI "vhost-user-device-pci-base" > @@ -25,10 +29,39 @@ OBJECT_DECLARE_SIMPLE_TYPE(VHostUserDevicePCI, VHOST_USER_DEVICE_PCI) > static void vhost_user_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) > { > VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(vpci_dev); > - DeviceState *vdev = DEVICE(&dev->vub); > - > + DeviceState *dev_state = DEVICE(&dev->vub); > + VirtIODevice *vdev = VIRTIO_DEVICE(dev_state); > + MemoryRegion *mr; > + uint64_t offset = 0, cache_size = 0; > + int i; > + > vpci_dev->nvectors = 1; > - qdev_realize(vdev, BUS(&vpci_dev->bus), errp); > + qdev_realize(dev_state, BUS(&vpci_dev->bus), errp); > + > + for (i = 0; i < vdev->n_shmem_regions; i++) { > + mr = vdev->shmem_list[i].mr; > + if (mr->size > UINT64_MAX - cache_size) { > + error_setg(errp, "Total shared memory required overflow"); > + return; > + } > + cache_size = cache_size + mr->size; > + } > + if (cache_size) { > + memory_region_init(&dev->cachebar, OBJECT(vpci_dev), > + "vhost-device-pci-cachebar", cache_size); > + for (i = 0; i < vdev->n_shmem_regions; i++) { > + mr = vdev->shmem_list[i].mr; > + memory_region_add_subregion(&dev->cachebar, offset, mr); > + virtio_pci_add_shm_cap(vpci_dev, VIRTIO_DEVICE_PCI_CACHE_BAR, > + offset, mr->size, i); > + offset = offset + mr->size; > + } > + pci_register_bar(&vpci_dev->pci_dev, VIRTIO_DEVICE_PCI_CACHE_BAR, > + PCI_BASE_ADDRESS_SPACE_MEMORY | > + PCI_BASE_ADDRESS_MEM_PREFETCH | > + PCI_BASE_ADDRESS_MEM_TYPE_64, > + &dev->cachebar); > + } > } > > static void vhost_user_device_pci_class_init(ObjectClass *klass, void *data) > -- > 2.45.2 >
On Thu, Sep 12, 2024 at 04:53:34PM +0200, Albert Esteve wrote: > @@ -331,6 +333,37 @@ static void vub_device_realize(DeviceState *dev, Error **errp) > do_vhost_user_cleanup(vdev, vub); > } > > + ret = vub->vhost_dev.vhost_ops->vhost_get_shmem_config(&vub->vhost_dev, > + &nregions, > + memory_sizes, > + errp); > + > + if (ret < 0) { > + do_vhost_user_cleanup(vdev, vub); > + } > + > + for (i = 0; i < nregions; i++) { > + if (memory_sizes[i]) { > + if (memory_sizes[i] % qemu_real_host_page_size() != 0) { > + error_setg(errp, "Shared memory %d size must be a power of 2 " > + "no smaller than the page size", i); > + return; > + } > + > + cache_ptr = mmap(NULL, memory_sizes[i], PROT_NONE, > + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > + if (cache_ptr == MAP_FAILED) { > + error_setg_errno(errp, errno, "Unable to mmap blank cache"); > + return; > + } > + > + virtio_new_shmem_region(vdev); > + memory_region_init_ram_ptr(vdev->shmem_list[i].mr, I don't think this works because virtio_new_shmem_region() leaves .mr = NULL? Why allocates the MemoryRegion and assigns it to shmem_list[i].mr?
On Tue, 17 Sept 2024 at 10:33, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > On Thu, Sep 12, 2024 at 04:53:34PM +0200, Albert Esteve wrote: > > @@ -331,6 +333,37 @@ static void vub_device_realize(DeviceState *dev, Error **errp) > > do_vhost_user_cleanup(vdev, vub); > > } > > > > + ret = vub->vhost_dev.vhost_ops->vhost_get_shmem_config(&vub->vhost_dev, > > + &nregions, > > + memory_sizes, > > + errp); > > + > > + if (ret < 0) { > > + do_vhost_user_cleanup(vdev, vub); > > + } > > + > > + for (i = 0; i < nregions; i++) { > > + if (memory_sizes[i]) { > > + if (memory_sizes[i] % qemu_real_host_page_size() != 0) { > > + error_setg(errp, "Shared memory %d size must be a power of 2 " > > + "no smaller than the page size", i); > > + return; > > + } > > + > > + cache_ptr = mmap(NULL, memory_sizes[i], PROT_NONE, > > + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > > + if (cache_ptr == MAP_FAILED) { > > + error_setg_errno(errp, errno, "Unable to mmap blank cache"); > > + return; > > + } > > + > > + virtio_new_shmem_region(vdev); > > + memory_region_init_ram_ptr(vdev->shmem_list[i].mr, > > I don't think this works because virtio_new_shmem_region() leaves .mr = > NULL? Why allocates the MemoryRegion and assigns it to shmem_list[i].mr? "Why" -> "Who"
diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c index 2bc3423326..f2597d021a 100644 --- a/hw/virtio/vhost-user-base.c +++ b/hw/virtio/vhost-user-base.c @@ -271,7 +271,9 @@ static void vub_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBase *vub = VHOST_USER_BASE(dev); - int ret; + uint64_t memory_sizes[8]; + void *cache_ptr; + int i, ret, nregions; if (!vub->chardev.chr) { error_setg(errp, "vhost-user-base: missing chardev"); @@ -314,7 +316,7 @@ static void vub_device_realize(DeviceState *dev, Error **errp) /* Allocate queues */ vub->vqs = g_ptr_array_sized_new(vub->num_vqs); - for (int i = 0; i < vub->num_vqs; i++) { + for (i = 0; i < vub->num_vqs; i++) { g_ptr_array_add(vub->vqs, virtio_add_queue(vdev, vub->vq_size, vub_handle_output)); @@ -331,6 +333,37 @@ static void vub_device_realize(DeviceState *dev, Error **errp) do_vhost_user_cleanup(vdev, vub); } + ret = vub->vhost_dev.vhost_ops->vhost_get_shmem_config(&vub->vhost_dev, + &nregions, + memory_sizes, + errp); + + if (ret < 0) { + do_vhost_user_cleanup(vdev, vub); + } + + for (i = 0; i < nregions; i++) { + if (memory_sizes[i]) { + if (memory_sizes[i] % qemu_real_host_page_size() != 0) { + error_setg(errp, "Shared memory %d size must be a power of 2 " + "no smaller than the page size", i); + return; + } + + cache_ptr = mmap(NULL, memory_sizes[i], PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (cache_ptr == MAP_FAILED) { + error_setg_errno(errp, errno, "Unable to mmap blank cache"); + return; + } + + virtio_new_shmem_region(vdev); + memory_region_init_ram_ptr(vdev->shmem_list[i].mr, + OBJECT(vdev), "vub-shm-" + i, + memory_sizes[i], cache_ptr); + } + } + qemu_chr_fe_set_handlers(&vub->chardev, NULL, NULL, vub_event, NULL, dev, NULL, true); } diff --git a/hw/virtio/vhost-user-device-pci.c b/hw/virtio/vhost-user-device-pci.c index efaf55d3dd..abf4e90c21 100644 --- a/hw/virtio/vhost-user-device-pci.c +++ b/hw/virtio/vhost-user-device-pci.c @@ -8,14 +8,18 @@ */ #include "qemu/osdep.h" +#include "qapi/error.h" #include "hw/qdev-properties.h" #include "hw/virtio/vhost-user-base.h" #include "hw/virtio/virtio-pci.h" +#define VIRTIO_DEVICE_PCI_CACHE_BAR 2 + struct VHostUserDevicePCI { VirtIOPCIProxy parent_obj; VHostUserBase vub; + MemoryRegion cachebar; }; #define TYPE_VHOST_USER_DEVICE_PCI "vhost-user-device-pci-base" @@ -25,10 +29,39 @@ OBJECT_DECLARE_SIMPLE_TYPE(VHostUserDevicePCI, VHOST_USER_DEVICE_PCI) static void vhost_user_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(vpci_dev); - DeviceState *vdev = DEVICE(&dev->vub); - + DeviceState *dev_state = DEVICE(&dev->vub); + VirtIODevice *vdev = VIRTIO_DEVICE(dev_state); + MemoryRegion *mr; + uint64_t offset = 0, cache_size = 0; + int i; + vpci_dev->nvectors = 1; - qdev_realize(vdev, BUS(&vpci_dev->bus), errp); + qdev_realize(dev_state, BUS(&vpci_dev->bus), errp); + + for (i = 0; i < vdev->n_shmem_regions; i++) { + mr = vdev->shmem_list[i].mr; + if (mr->size > UINT64_MAX - cache_size) { + error_setg(errp, "Total shared memory required overflow"); + return; + } + cache_size = cache_size + mr->size; + } + if (cache_size) { + memory_region_init(&dev->cachebar, OBJECT(vpci_dev), + "vhost-device-pci-cachebar", cache_size); + for (i = 0; i < vdev->n_shmem_regions; i++) { + mr = vdev->shmem_list[i].mr; + memory_region_add_subregion(&dev->cachebar, offset, mr); + virtio_pci_add_shm_cap(vpci_dev, VIRTIO_DEVICE_PCI_CACHE_BAR, + offset, mr->size, i); + offset = offset + mr->size; + } + pci_register_bar(&vpci_dev->pci_dev, VIRTIO_DEVICE_PCI_CACHE_BAR, + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_PREFETCH | + PCI_BASE_ADDRESS_MEM_TYPE_64, + &dev->cachebar); + } } static void vhost_user_device_pci_class_init(ObjectClass *klass, void *data)
Add a cache BAR in the vhost-user-device into which files can be directly mapped. The number, shmid, and size of the VIRTIO Shared Memory subregions is retrieved through a get_shmem_config message sent by the vhost-user-base module on the realize step, after virtio_init(). By default, if VHOST_USER_PROTOCOL_F_SHMEM feature is not supported by the backend, there is no cache. Signed-off-by: Albert Esteve <aesteve@redhat.com> --- hw/virtio/vhost-user-base.c | 37 +++++++++++++++++++++++++++-- hw/virtio/vhost-user-device-pci.c | 39 ++++++++++++++++++++++++++++--- 2 files changed, 71 insertions(+), 5 deletions(-)