@@ -1228,10 +1228,100 @@ static int vfio_host_iommu_ctx_pasid_free(HostIOMMUContext *iommu_ctx,
return ret;
}
+/**
+ * Get iommu info from host. Caller of this funcion should free
+ * the memory pointed by the returned pointer stored in @info
+ * after a successful calling when finished its usage.
+ */
+static int vfio_get_iommu_info(VFIOContainer *container,
+ struct vfio_iommu_type1_info **info)
+{
+
+ size_t argsz = sizeof(struct vfio_iommu_type1_info);
+
+ *info = g_malloc0(argsz);
+
+retry:
+ (*info)->argsz = argsz;
+
+ if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
+ g_free(*info);
+ *info = NULL;
+ return -errno;
+ }
+
+ if (((*info)->argsz > argsz)) {
+ argsz = (*info)->argsz;
+ *info = g_realloc(*info, argsz);
+ goto retry;
+ }
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *
+vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
+{
+ struct vfio_info_cap_header *hdr;
+ void *ptr = info;
+
+ if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
+ return NULL;
+ }
+
+ for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
+ if (hdr->id == id) {
+ return hdr;
+ }
+ }
+
+ return NULL;
+}
+
+static int vfio_get_nesting_iommu_cap(VFIOContainer *container,
+ struct vfio_iommu_type1_info_cap_nesting **cap_nesting)
+{
+ struct vfio_iommu_type1_info *info;
+ struct vfio_info_cap_header *hdr;
+ struct vfio_iommu_type1_info_cap_nesting *cap;
+ struct iommu_nesting_info *nest_info;
+ int ret;
+ uint32_t minsz1, minsz2;
+
+ ret = vfio_get_iommu_info(container, &info);
+ if (ret) {
+ return ret;
+ }
+
+ hdr = vfio_get_iommu_info_cap(info,
+ VFIO_IOMMU_TYPE1_INFO_CAP_NESTING);
+ if (!hdr) {
+ g_free(info);
+ return -EINVAL;
+ }
+
+ cap = container_of(hdr,
+ struct vfio_iommu_type1_info_cap_nesting, header);
+
+ nest_info = (struct iommu_nesting_info *) &cap->info;
+ minsz1 = offsetof(struct iommu_nesting_info, data);
+ if (nest_info->size < minsz1) {
+ g_free(info);
+ return -EINVAL;
+ }
+ minsz2 = offsetof(struct vfio_iommu_type1_info_cap_nesting, info);
+ *cap_nesting = g_malloc0(minsz2 + nest_info->size);
+ memcpy(*cap_nesting, cap, minsz2 + nest_info->size);
+
+ g_free(info);
+ return 0;
+}
+
static int vfio_init_container(VFIOContainer *container, int group_fd,
bool want_nested, Error **errp)
{
int iommu_type, ret;
+ uint64_t flags = 0;
iommu_type = vfio_get_iommu_type(container, want_nested, errp);
if (iommu_type < 0) {
@@ -1259,6 +1349,27 @@ static int vfio_init_container(VFIOContainer *container, int group_fd,
return -errno;
}
+ if (iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+ struct vfio_iommu_type1_info_cap_nesting *nesting = NULL;
+ struct iommu_nesting_info *nest_info;
+
+ ret = vfio_get_nesting_iommu_cap(container, &nesting);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "Failed to get nesting iommu cap");
+ return ret;
+ }
+
+ nest_info = (struct iommu_nesting_info *) &nesting->info;
+ flags |= (nest_info->features & IOMMU_NESTING_FEAT_SYSWIDE_PASID) ?
+ HOST_IOMMU_PASID_REQUEST : 0;
+ host_iommu_ctx_init(&container->iommu_ctx,
+ sizeof(container->iommu_ctx),
+ TYPE_VFIO_HOST_IOMMU_CONTEXT,
+ flags);
+ g_free(nesting);
+ }
+
container->iommu_type = iommu_type;
return 0;
}
@@ -2710,6 +2710,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
VFIOPCIDevice *vdev = PCI_VFIO(pdev);
VFIODevice *vbasedev_iter;
VFIOGroup *group;
+ VFIOContainer *container;
char *tmp, *subsys, group_path[PATH_MAX], *group_name;
Error *err = NULL;
ssize_t len;
@@ -2787,6 +2788,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
+ container = group->container;
+ if (container->iommu_ctx.initialized &&
+ pci_device_set_iommu_context(pdev, &container->iommu_ctx)) {
+ error_setg(errp, "device attachment is denied by vIOMMU, "
+ "please check host IOMMU nesting capability");
+ vfio_put_group(group);
+ goto error;
+ }
+
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
error_setg(errp, "device is already attached");
@@ -3072,9 +3082,16 @@ static void vfio_instance_finalize(Object *obj)
static void vfio_exitfn(PCIDevice *pdev)
{
VFIOPCIDevice *vdev = PCI_VFIO(pdev);
+ VFIOContainer *container;
vfio_unregister_req_notifier(vdev);
vfio_unregister_err_notifier(vdev);
+
+ container = vdev->vbasedev.group->container;
+ if (container->iommu_ctx.initialized) {
+ pci_device_unset_iommu_context(pdev);
+ }
+
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
if (vdev->irqchip_change_notifier.notify) {
kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
In this patch, QEMU firstly gets iommu info from kernel to check the supported capabilities by a VFIO_IOMMU_TYPE1_NESTING iommu. And inits HostIOMMUContet instance. For vfio-pci devices, it could use pci_device_set/unset_iommu() to expose host iommu context to vIOMMU emulators. vIOMMU emulators could make use the methods provided by host iommu context. e.g. propagate requests to host iommu. Cc: Kevin Tian <kevin.tian@intel.com> Cc: Jacob Pan <jacob.jun.pan@linux.intel.com> Cc: Peter Xu <peterx@redhat.com> Cc: Eric Auger <eric.auger@redhat.com> Cc: Yi Sun <yi.y.sun@linux.intel.com> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Liu Yi L <yi.l.liu@intel.com> --- hw/vfio/common.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ hw/vfio/pci.c | 17 +++++++++ 2 files changed, 128 insertions(+)