@@ -229,7 +229,7 @@ static void vfio_ap_instance_init(Object *obj)
* handle ram_block_discard_disable().
*/
vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops,
- DEVICE(vapdev), true);
+ &vfio_device_io_ops_ioctl, DEVICE(vapdev), true);
/* AP device is mdev type device */
vbasedev->mdev = true;
@@ -676,7 +676,7 @@ static void vfio_ccw_instance_init(Object *obj)
* ram_block_discard_disable().
*/
vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_CCW, &vfio_ccw_ops,
- DEVICE(vcdev), true);
+ &vfio_device_io_ops_ioctl, DEVICE(vcdev), true);
}
#ifdef CONFIG_IOMMUFD
@@ -198,11 +198,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
feature->flags = VFIO_DEVICE_FEATURE_GET |
VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
- if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
- return -errno;
- }
-
- return 0;
+ return vbasedev->io_ops->device_feature(vbasedev, feature);
}
static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
@@ -82,7 +82,7 @@ void vfio_device_irq_disable(VFIODevice *vbasedev, int index)
.count = 0,
};
- ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
+ vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
}
void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
@@ -95,7 +95,7 @@ void vfio_device_irq_unmask(VFIODevice *vbasedev, int index)
.count = 1,
};
- ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
+ vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
}
void vfio_device_irq_mask(VFIODevice *vbasedev, int index)
@@ -108,7 +108,7 @@ void vfio_device_irq_mask(VFIODevice *vbasedev, int index)
.count = 1,
};
- ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
+ vbasedev->io_ops->set_irqs(vbasedev, &irq_set);
}
static inline const char *action_to_str(int action)
@@ -155,6 +155,7 @@ bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex
int argsz;
const char *name;
int32_t *pfd;
+ int ret;
argsz = sizeof(*irq_set) + sizeof(*pfd);
@@ -167,7 +168,9 @@ bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex
pfd = (int32_t *)&irq_set->data;
*pfd = fd;
- if (!ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
+ ret = vbasedev->io_ops->set_irqs(vbasedev, irq_set);
+
+ if (!ret) {
return true;
}
@@ -188,22 +191,19 @@ bool vfio_device_irq_set_signaling(VFIODevice *vbasedev, int index, int subindex
int vfio_device_get_irq_info(VFIODevice *vbasedev, int index,
struct vfio_irq_info *info)
{
- int ret;
-
memset(info, 0, sizeof(*info));
info->argsz = sizeof(*info);
info->index = index;
- ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, info);
-
- return ret < 0 ? -errno : ret;
+ return vbasedev->io_ops->get_irq_info(vbasedev, info);
}
int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info)
{
size_t argsz = sizeof(struct vfio_region_info);
+ int ret;
/* create region info cache */
if (vbasedev->reginfo == NULL) {
@@ -222,10 +222,11 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
retry:
(*info)->argsz = argsz;
- if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
+ ret = vbasedev->io_ops->get_region_info(vbasedev, *info);
+ if (ret != 0) {
g_free(*info);
*info = NULL;
- return -errno;
+ return ret;
}
if ((*info)->argsz > argsz) {
@@ -332,10 +333,12 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
}
void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
- DeviceState *dev, bool ram_discard)
+ VFIODeviceIOOps *io_ops, DeviceState *dev,
+ bool ram_discard)
{
vbasedev->type = type;
vbasedev->ops = ops;
+ vbasedev->io_ops = io_ops;
vbasedev->dev = dev;
vbasedev->fd = -1;
@@ -463,3 +466,78 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
vfio_device_get_all_region_info(vbasedev);
}
+
+/*
+ * Traditional ioctl() based io
+ */
+
+static int vfio_device_io_device_feature(VFIODevice *vbasedev,
+ struct vfio_device_feature *feature)
+{
+ int ret;
+
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature);
+
+ return ret < 0 ? -errno : ret;
+}
+
+static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
+ struct vfio_region_info *info)
+{
+ int ret;
+
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
+
+ return ret < 0 ? -errno : ret;
+}
+
+static int vfio_device_io_get_irq_info(VFIODevice *vbasedev,
+ struct vfio_irq_info *info)
+{
+ int ret;
+
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, info);
+
+ return ret < 0 ? -errno : ret;
+}
+
+static int vfio_device_io_set_irqs(VFIODevice *vbasedev,
+ struct vfio_irq_set *irqs)
+{
+ int ret;
+
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irqs);
+
+ return ret < 0 ? -errno : ret;
+}
+
+static int vfio_device_io_region_read(VFIODevice *vbasedev, uint8_t index,
+ off_t off, uint32_t size, void *data)
+{
+ struct vfio_region_info *info = vbasedev->reginfo[index];
+ int ret;
+
+ ret = pread(vbasedev->fd, data, size, info->offset + off);
+
+ return ret < 0 ? -errno : ret;
+}
+
+static int vfio_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
+ off_t off, uint32_t size, void *data)
+{
+ struct vfio_region_info *info = vbasedev->reginfo[index];
+ int ret;
+
+ ret = pwrite(vbasedev->fd, data, size, info->offset + off);
+
+ return ret < 0 ? -errno : ret;
+}
+
+VFIODeviceIOOps vfio_device_io_ops_ioctl = {
+ .device_feature = vfio_device_io_device_feature,
+ .get_region_info = vfio_device_io_get_region_info,
+ .get_irq_info = vfio_device_io_get_irq_info,
+ .set_irqs = vfio_device_io_set_irqs,
+ .region_read = vfio_device_io_region_read,
+ .region_write = vfio_device_io_region_write,
+};
@@ -821,13 +821,17 @@ static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer)
VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP;
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
+ int ret;
+
if (!vbasedev->dirty_tracking) {
continue;
}
- if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
+ ret = vbasedev->io_ops->device_feature(vbasedev, feature);
+
+ if (ret != 0) {
warn_report("%s: Failed to stop DMA logging, err %d (%s)",
- vbasedev->name, -errno, strerror(errno));
+ vbasedev->name, -ret, strerror(-ret));
}
vbasedev->dirty_tracking = false;
}
@@ -928,10 +932,9 @@ static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
continue;
}
- ret = ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature);
+ ret = vbasedev->io_ops->device_feature(vbasedev, feature);
if (ret) {
- ret = -errno;
- error_setg_errno(errp, errno, "%s: Failed to start DMA logging",
+ error_setg_errno(errp, -ret, "%s: Failed to start DMA logging",
vbasedev->name);
goto out;
}
@@ -381,7 +381,7 @@ static void vfio_msi_interrupt(void *opaque)
static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev)
{
g_autofree struct vfio_irq_set *irq_set = NULL;
- int ret = 0, argsz;
+ int argsz;
int32_t *fd;
argsz = sizeof(*irq_set) + sizeof(*fd);
@@ -396,9 +396,7 @@ static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev)
fd = (int32_t *)&irq_set->data;
*fd = -1;
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
-
- return ret < 0 ? -errno : ret;
+ return vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set);
}
static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
@@ -455,11 +453,11 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
fds[i] = fd;
}
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ ret = vdev->vbasedev.io_ops->set_irqs(&vdev->vbasedev, irq_set);
g_free(irq_set);
- return ret < 0 ? -errno : ret;
+ return ret;
}
static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
@@ -917,18 +915,22 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
memset(vdev->rom, 0xff, size);
while (size) {
- bytes = pread(vbasedev->fd, vdev->rom + off,
- size, vdev->rom_offset + off);
+ bytes = vbasedev->io_ops->region_read(vbasedev,
+ VFIO_PCI_ROM_REGION_INDEX,
+ off, size, vdev->rom + off);
+
if (bytes == 0) {
break;
} else if (bytes > 0) {
off += bytes;
size -= bytes;
} else {
- if (errno == EINTR || errno == EAGAIN) {
+ if (bytes == -EINTR || bytes == -EAGAIN) {
continue;
}
- error_report("vfio: Error reading device ROM: %m");
+ error_report("vfio: Error reading device ROM: %s",
+ strerror(-bytes));
+
break;
}
}
@@ -968,22 +970,18 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
static int vfio_pci_config_space_read(VFIOPCIDevice *vdev, off_t offset,
uint32_t size, void *data)
{
- ssize_t ret;
-
- ret = pread(vdev->vbasedev.fd, data, size, vdev->config_offset + offset);
-
- return ret < 0 ? -errno : (int)ret;
+ return vdev->vbasedev.io_ops->region_read(&vdev->vbasedev,
+ VFIO_PCI_CONFIG_REGION_INDEX,
+ offset, size, data);
}
/* "Raw" write of underlying config space. */
static int vfio_pci_config_space_write(VFIOPCIDevice *vdev, off_t offset,
uint32_t size, void *data)
{
- ssize_t ret;
-
- ret = pwrite(vdev->vbasedev.fd, data, size, vdev->config_offset + offset);
-
- return ret < 0 ? -errno : (int)ret;
+ return vdev->vbasedev.io_ops->region_write(&vdev->vbasedev,
+ VFIO_PCI_CONFIG_REGION_INDEX,
+ offset, size, data);
}
static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
@@ -3405,7 +3403,7 @@ static void vfio_instance_init(Object *obj)
vdev->host.function = ~0U;
vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_pci_ops,
- DEVICE(vdev), false);
+ &vfio_device_io_ops_ioctl, DEVICE(vdev), false);
vdev->nv_gpudirect_clique = 0xFF;
@@ -650,7 +650,7 @@ static void vfio_platform_instance_init(Object *obj)
VFIODevice *vbasedev = &vdev->vbasedev;
vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops,
- DEVICE(vdev), false);
+ &vfio_device_io_ops_ioctl, DEVICE(vdev), false);
}
#ifdef CONFIG_IOMMUFD
@@ -45,6 +45,7 @@ void vfio_region_write(void *opaque, hwaddr addr,
uint32_t dword;
uint64_t qword;
} buf;
+ int ret;
switch (size) {
case 1:
@@ -64,11 +65,13 @@ void vfio_region_write(void *opaque, hwaddr addr,
break;
}
- if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
+ ret = vbasedev->io_ops->region_write(vbasedev, region->nr,
+ addr, size, &buf);
+ if (ret != size) {
error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
- ",%d) failed: %m",
+ ",%d) failed: %s",
__func__, vbasedev->name, region->nr,
- addr, data, size);
+ addr, data, size, ret < 0 ? strerror(ret) : "short write");
}
trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size);
@@ -96,11 +99,13 @@ uint64_t vfio_region_read(void *opaque,
uint64_t qword;
} buf;
uint64_t data = 0;
+ int ret;
- if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
- error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m",
+ ret = vbasedev->io_ops->region_read(vbasedev, region->nr, addr, size, &buf);
+ if (ret != size) {
+ error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %s",
__func__, vbasedev->name, region->nr,
- addr, size);
+ addr, size, ret < 0 ? strerror(ret) : "short read");
return (uint64_t)-1;
}
switch (size) {
@@ -41,6 +41,7 @@ enum {
};
typedef struct VFIODeviceOps VFIODeviceOps;
+typedef struct VFIODeviceIOOps VFIODeviceIOOps;
typedef struct VFIOMigration VFIOMigration;
typedef struct IOMMUFDBackend IOMMUFDBackend;
@@ -66,6 +67,7 @@ typedef struct VFIODevice {
OnOffAuto migration_multifd_transfer;
bool migration_events;
VFIODeviceOps *ops;
+ VFIODeviceIOOps *io_ops;
unsigned int num_irqs;
unsigned int num_regions;
unsigned int flags;
@@ -140,6 +142,25 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList;
extern VFIODeviceList vfio_device_list;
#ifdef CONFIG_LINUX
+/*
+ * How devices communicate with the server. The default option is through
+ * ioctl() to the kernel VFIO driver, but vfio-user can use a socket to a remote
+ * process.
+ */
+struct VFIODeviceIOOps {
+ int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *);
+ int (*get_region_info)(VFIODevice *vdev,
+ struct vfio_region_info *info);
+ int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq);
+ int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs);
+ int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
+ void *data);
+ int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
+ void *data);
+};
+
+extern VFIODeviceIOOps vfio_device_io_ops_ioctl;
+
int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info);
int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
@@ -154,6 +175,7 @@ int vfio_device_get_irq_info(VFIODevice *vbasedev, int index,
bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
- DeviceState *dev, bool ram_discard);
+ VFIODeviceIOOps *io_ops, DeviceState *dev,
+ bool ram_discard);
int vfio_device_get_aw_bits(VFIODevice *vdev);
#endif /* HW_VFIO_VFIO_COMMON_H */