@@ -4139,6 +4139,8 @@ M: John Levon <john.levon@nutanix.com>
M: Thanos Makatos <thanos.makatos@nutanix.com>
S: Supported
F: docs/devel/vfio-user.rst
+F: hw/vfio/user-container.c
+F: hw/vfio/user-pci.c
F: subprojects/libvfio-user
EBPF:
@@ -886,7 +886,7 @@ static bool vfio_get_device(VFIOGroup *group, const char *name,
return true;
}
-static void vfio_put_base_device(VFIODevice *vbasedev)
+void vfio_put_base_device(VFIODevice *vbasedev)
{
if (vbasedev->regions != NULL) {
int i;
@@ -16,6 +16,11 @@ vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
'pci-quirks.c',
'pci.c',
))
+
+if get_option('vfio_user_client').enabled()
+ vfio_ss.add(files('user-container.c', 'user-pci.c'))
+endif
+
vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c'))
vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c'))
vfio_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c'))
@@ -109,7 +109,7 @@ static void vfio_intx_interrupt(void *opaque)
}
}
-static void vfio_intx_eoi(VFIODevice *vbasedev)
+void vfio_intx_eoi(VFIODevice *vbasedev)
{
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
@@ -2585,7 +2585,7 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev)
}
}
-static Object *vfio_pci_get_object(VFIODevice *vbasedev)
+Object *vfio_pci_get_object(VFIODevice *vbasedev)
{
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
@@ -2641,7 +2641,7 @@ static const VMStateDescription vmstate_vfio_pci_config = {
}
};
-static int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error **errp)
+int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error **errp)
{
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
@@ -2649,7 +2649,7 @@ static int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error **errp)
errp);
}
-static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
+int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
{
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
PCIDevice *pdev = &vdev->pdev;
@@ -2845,7 +2845,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
return true;
}
-static void vfio_pci_put_device(VFIOPCIDevice *vdev)
+void vfio_pci_put_device(VFIOPCIDevice *vdev)
{
vfio_detach_device(&vdev->vbasedev);
@@ -3367,7 +3367,7 @@ post_reset:
vfio_pci_post_reset(vdev);
}
-static void vfio_instance_init(Object *obj)
+void vfio_instance_init(Object *obj)
{
PCIDevice *pci_dev = PCI_DEVICE(obj);
VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
@@ -213,6 +213,13 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr, uint32_t val, int len);
+void vfio_intx_eoi(VFIODevice *vbasedev);
+Object *vfio_pci_get_object(VFIODevice *vbasedev);
+int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error **errp);
+int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f);
+void vfio_pci_put_device(VFIOPCIDevice *vdev);
+void vfio_instance_init(Object *obj);
+
uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
new file mode 100644
@@ -0,0 +1,222 @@
+/*
+ * Container for vfio-user IOMMU type: rather than communicating with the kernel
+ * vfio driver, we communicate over a socket to a server using the vfio-user
+ * protocol.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+
+#include "hw/vfio/vfio-common.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "exec/ram_addr.h"
+#include "hw/hw.h"
+#include "qemu/error-report.h"
+#include "qemu/range.h"
+#include "trace.h"
+#include "qapi/error.h"
+#include "pci.h"
+
+static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb, int flags)
+{
+ return -ENOTSUP;
+}
+
+static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mrp)
+{
+ return -ENOTSUP;
+}
+
+static int
+vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
+ bool start, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
+ VFIOBitmap *vbmap, hwaddr iova,
+ hwaddr size, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static VFIOUserContainer *vfio_create_user_container(Error **errp)
+{
+ VFIOUserContainer *container;
+
+ container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
+ return container;
+}
+
+/*
+ * Try to mirror vfio_connect_container() as much as possible.
+ */
+static VFIOUserContainer *
+vfio_connect_user_container(AddressSpace *as, Error **errp)
+{
+ VFIOContainerBase *bcontainer;
+ VFIOUserContainer *container;
+ VFIOAddressSpace *space;
+ VFIOIOMMUClass *vioc;
+
+ space = vfio_get_address_space(as);
+
+ container = vfio_create_user_container(errp);
+ if (!container) {
+ goto put_space_exit;
+ }
+
+ bcontainer = &container->bcontainer;
+
+ if (!vfio_cpr_register_container(bcontainer, errp)) {
+ goto free_container_exit;
+ }
+
+ vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ assert(vioc->setup);
+
+ if (!vioc->setup(bcontainer, errp)) {
+ goto unregister_container_exit;
+ }
+
+ vfio_address_space_insert(space, bcontainer);
+
+ bcontainer->listener = vfio_memory_listener;
+ memory_listener_register(&bcontainer->listener, bcontainer->space->as);
+
+ if (bcontainer->error) {
+ errno = EINVAL;
+ error_propagate_prepend(errp, bcontainer->error,
+ "memory listener initialization failed: ");
+ goto listener_release_exit;
+ }
+
+ bcontainer->initialized = true;
+
+ return container;
+
+listener_release_exit:
+ memory_listener_unregister(&bcontainer->listener);
+ if (vioc->release) {
+ vioc->release(bcontainer);
+ }
+
+unregister_container_exit:
+ vfio_cpr_unregister_container(bcontainer);
+
+free_container_exit:
+ object_unref(container);
+
+put_space_exit:
+ vfio_put_address_space(space);
+
+ return NULL;
+}
+
+static void vfio_disconnect_user_container(VFIOUserContainer *container)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+ memory_listener_unregister(&bcontainer->listener);
+ if (vioc->release) {
+ vioc->release(bcontainer);
+ }
+
+ VFIOAddressSpace *space = bcontainer->space;
+
+ vfio_cpr_unregister_container(bcontainer);
+ object_unref(container);
+
+ vfio_put_address_space(space);
+}
+
+static bool vfio_user_get_device(VFIOUserContainer *container,
+ VFIODevice *vbasedev, Error **errp)
+{
+ struct vfio_device_info info = { 0 };
+
+ vbasedev->fd = -1;
+
+ vfio_prepare_device(vbasedev, &container->bcontainer, NULL, &info);
+
+ return true;
+}
+
+/*
+ * vfio_user_attach_device: attach a device to a new container.
+ */
+static bool vfio_user_attach_device(const char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp)
+{
+ VFIOUserContainer *container;
+
+ container = vfio_connect_user_container(as, errp);
+ if (container == NULL) {
+ error_prepend(errp, "failed to connect proxy");
+ return false;
+ }
+
+ return vfio_user_get_device(container, vbasedev, errp);
+}
+
+static void vfio_user_detach_device(VFIODevice *vbasedev)
+{
+ VFIOUserContainer *container = container_of(vbasedev->bcontainer,
+ VFIOUserContainer, bcontainer);
+
+ QLIST_REMOVE(vbasedev, global_next);
+ QLIST_REMOVE(vbasedev, container_next);
+ vbasedev->bcontainer = NULL;
+ vfio_put_base_device(vbasedev);
+ vfio_disconnect_user_container(container);
+}
+
+static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
+{
+ /* ->needs_reset is always false for vfio-user. */
+ return 0;
+}
+
+static void vfio_iommu_user_class_init(ObjectClass *klass, void *data)
+{
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
+
+ vioc->setup = vfio_user_setup;
+ vioc->dma_map = vfio_user_dma_map;
+ vioc->dma_unmap = vfio_user_dma_unmap;
+ vioc->attach_device = vfio_user_attach_device;
+ vioc->detach_device = vfio_user_detach_device;
+ vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
+ vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
+ vioc->pci_hot_reset = vfio_user_pci_hot_reset;
+};
+
+static const TypeInfo types[] = {
+ {
+ .name = TYPE_VFIO_IOMMU_USER,
+ .parent = TYPE_VFIO_IOMMU,
+ .instance_size = sizeof(VFIOUserContainer),
+ .class_init = vfio_iommu_user_class_init,
+ },
+};
+
+DEFINE_TYPES(types)
new file mode 100644
@@ -0,0 +1,158 @@
+/*
+ * vfio PCI device over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+
+#include "hw/hw.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "migration/vmstate.h"
+#include "qapi/qmp/qdict.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "qemu/range.h"
+#include "qemu/units.h"
+#include "system/kvm.h"
+#include "pci.h"
+#include "trace.h"
+#include "qapi/error.h"
+#include "migration/blocker.h"
+#include "migration/qemu-file.h"
+
+#define TYPE_VFIO_USER_PCI "vfio-user-pci"
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI)
+
+struct VFIOUserPCIDevice {
+ VFIOPCIDevice device;
+ char *sock_name;
+};
+
+/*
+ * Emulated devices don't use host hot reset
+ */
+static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
+{
+ vbasedev->needs_reset = false;
+}
+
+static VFIODeviceOps vfio_user_pci_ops = {
+ .vfio_compute_needs_reset = vfio_user_compute_needs_reset,
+ .vfio_eoi = vfio_intx_eoi,
+ .vfio_get_object = vfio_pci_get_object,
+ .vfio_save_config = vfio_pci_save_config,
+ .vfio_load_config = vfio_pci_load_config,
+};
+
+static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
+{
+ ERRP_GUARD();
+ VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+ AddressSpace *as;
+
+ /*
+ * TODO: make option parser understand SocketAddress
+ * and use that instead of having scalar options
+ * for each socket type.
+ */
+ if (!udev->sock_name) {
+ error_setg(errp, "No socket specified");
+ error_append_hint(errp, "Use -device vfio-user-pci,socket=<name>\n");
+ return;
+ }
+
+ vbasedev->name = g_strdup_printf("VFIO user <%s>", udev->sock_name);
+ vbasedev->ops = &vfio_user_pci_ops;
+ vbasedev->type = VFIO_DEVICE_TYPE_PCI;
+ vbasedev->dev = DEVICE(vdev);
+
+ /*
+ * vfio-user devices are effectively mdevs (don't use a host iommu).
+ */
+ vbasedev->mdev = true;
+
+ as = pci_device_iommu_address_space(pdev);
+ if (!vfio_attach_device_by_iommu_type(TYPE_VFIO_IOMMU_USER,
+ vbasedev->name, vbasedev,
+ as, errp)) {
+ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
+ return;
+ }
+}
+
+static void vfio_user_instance_init(Object *obj)
+{
+ PCIDevice *pci_dev = PCI_DEVICE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+
+ device_add_bootindex_property(obj, &vdev->bootindex,
+ "bootindex", NULL,
+ &pci_dev->qdev);
+ vdev->host.domain = ~0U;
+ vdev->host.bus = ~0U;
+ vdev->host.slot = ~0U;
+ vdev->host.function = ~0U;
+
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops,
+ &vfio_dev_io_ioctl, DEVICE(vdev), false);
+
+ vdev->nv_gpudirect_clique = 0xFF;
+
+ /*
+ * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
+ * line, therefore, no need to wait to realize like other devices.
+ */
+ pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
+}
+
+static void vfio_user_instance_finalize(Object *obj)
+{
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+
+ vfio_pci_put_device(vdev);
+}
+
+static const Property vfio_user_pci_dev_properties[] = {
+ DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name),
+};
+
+static void vfio_user_pci_dev_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
+
+ device_class_set_props(dc, vfio_user_pci_dev_properties);
+ dc->desc = "VFIO over socket PCI device assignment";
+ pdc->realize = vfio_user_pci_realize;
+}
+
+static const TypeInfo vfio_user_pci_dev_info = {
+ .name = TYPE_VFIO_USER_PCI,
+ .parent = TYPE_VFIO_PCI_BASE,
+ .instance_size = sizeof(VFIOUserPCIDevice),
+ .class_init = vfio_user_pci_dev_class_init,
+ .instance_init = vfio_user_instance_init,
+ .instance_finalize = vfio_user_instance_finalize,
+};
+
+static void register_vfio_user_dev_type(void)
+{
+ type_register_static(&vfio_user_pci_dev_info);
+}
+
+type_init(register_vfio_user_dev_type)
@@ -80,6 +80,7 @@ typedef struct VFIOMigration {
struct VFIOGroup;
+/* MMU container sub-class for legacy vfio implementation. */
typedef struct VFIOContainer {
VFIOContainerBase bcontainer;
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
@@ -106,6 +107,7 @@ typedef struct VFIOIOASHwpt {
QLIST_ENTRY(VFIOIOASHwpt) next;
} VFIOIOASHwpt;
+/* MMU container sub-class for vfio iommufd implementation. */
typedef struct VFIOIOMMUFDContainer {
VFIOContainerBase bcontainer;
IOMMUFDBackend *be;
@@ -115,6 +117,13 @@ typedef struct VFIOIOMMUFDContainer {
OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer, VFIO_IOMMU_IOMMUFD);
+/* MMU container sub-class for vfio-user. */
+typedef struct VFIOUserContainer {
+ VFIOContainerBase bcontainer;
+} VFIOUserContainer;
+
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
+
typedef struct VFIODeviceOps VFIODeviceOps;
typedef struct VFIODeviceIO VFIODeviceIO;
@@ -284,6 +293,7 @@ bool vfio_attach_device_by_iommu_type(const char *iommu_type, char *name,
VFIODevice *vbasedev, AddressSpace *as,
Error **errp);
void vfio_detach_device(VFIODevice *vbasedev);
+void vfio_put_base_device(VFIODevice *vbasedev);
int vfio_kvm_device_add_fd(int fd, Error **errp);
int vfio_kvm_device_del_fd(int fd, Error **errp);
@@ -100,6 +100,7 @@ vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer)
#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
+#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
@@ -109,6 +109,8 @@ option('multiprocess', type: 'feature', value: 'auto',
description: 'Out of process device emulation support')
option('relocatable', type : 'boolean', value : true,
description: 'toggle relocatable install')
+option('vfio_user_client', type: 'feature', value: 'disabled',
+ description: 'vfio-user client support')
option('vfio_user_server', type: 'feature', value: 'disabled',
description: 'vfio-user server support')
option('dbus_display', type: 'feature', value: 'auto',
@@ -201,6 +201,8 @@ meson_options_help() {
printf "%s\n" ' vdi vdi image format support'
printf "%s\n" ' vduse-blk-export'
printf "%s\n" ' VDUSE block export support'
+ printf "%s\n" ' vfio-user-client'
+ printf "%s\n" ' vfio-user client support'
printf "%s\n" ' vfio-user-server'
printf "%s\n" ' vfio-user server support'
printf "%s\n" ' vhdx vhdx image format support'
@@ -529,6 +531,8 @@ _meson_option_parse() {
--disable-vdi) printf "%s" -Dvdi=disabled ;;
--enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;;
--disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;;
+ --enable-vfio-user-client) printf "%s" -Dvfio_user_client=enabled ;;
+ --disable-vfio-user-client) printf "%s" -Dvfio_user_client=disabled ;;
--enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;;
--disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;;
--enable-vhdx) printf "%s" -Dvhdx=enabled ;;