Message ID | 20231009091035.433329-8-eric.auger@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Prerequisite changes for IOMMUFD support | expand |
Hi Eric, >-----Original Message----- >From: Eric Auger <eric.auger@redhat.com> >Sent: Monday, October 9, 2023 5:09 PM >Subject: [PATCH v5 07/15] vfio/pci: Introduce vfio_[attach/detach]_device > >We want the VFIO devices to be able to use two different >IOMMU backends, the legacy VFIO one and the new iommufd one. > >Introduce vfio_[attach/detach]_device which aim at hiding the >underlying IOMMU backend (IOCTLs, datatypes, ...). > >Once vfio_attach_device completes, the device is attached >to a security context and its fd can be used. Conversely >When vfio_detach_device completes, the device has been >detached from the security context. > >At the moment only the implementation based on the legacy >container/group exists. Let's use it from the vfio-pci device. >Subsequent patches will handle other devices. > >We also take benefit of this patch to properly free >vbasedev->name on failure. > >Signed-off-by: Eric Auger <eric.auger@redhat.com> >Signed-off-by: Yi Liu <yi.l.liu@intel.com> >Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >Reviewed-by: Cédric Le Goater <clg@redhat.com> > >--- > >v4 -> v5: >- remove vbasedev->name g_free as it is done on instance_finalize > >v2 -> v3: >- added trace_vfio_detach_device >- added a comment explaining why we pass @name to vfio_attach_device > although vbasedev->name is populated >- free vbasedev->name and detach_device if needed >--- > include/hw/vfio/vfio-common.h | 3 ++ > hw/vfio/common.c | 74 +++++++++++++++++++++++++++++++++++ > hw/vfio/pci.c | 66 +++++++------------------------ > hw/vfio/trace-events | 3 +- > 4 files changed, 93 insertions(+), 53 deletions(-) > >diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >index c4e7c3b4a7..12fbfbc37d 100644 >--- a/include/hw/vfio/vfio-common.h >+++ b/include/hw/vfio/vfio-common.h >@@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group); > struct vfio_device_info *vfio_get_device_info(int fd); > int vfio_get_device(VFIOGroup *group, const char *name, > VFIODevice *vbasedev, Error **errp); >+int vfio_attach_device(char *name, VFIODevice *vbasedev, >+ AddressSpace *as, Error **errp); >+void vfio_detach_device(VFIODevice *vbasedev); > > int vfio_kvm_device_add_fd(int fd, Error **errp); > int vfio_kvm_device_del_fd(int fd, Error **errp); >diff --git a/hw/vfio/common.c b/hw/vfio/common.c >index d8ed432cb6..f4c33c9858 100644 >--- a/hw/vfio/common.c >+++ b/hw/vfio/common.c >@@ -2611,3 +2611,77 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op) > } > return vfio_eeh_container_op(container, op); > } >+ >+static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) >+{ >+ char *tmp, group_path[PATH_MAX], *group_name; >+ int ret, groupid; >+ ssize_t len; >+ >+ tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); >+ len = readlink(tmp, group_path, sizeof(group_path)); >+ g_free(tmp); >+ >+ if (len <= 0 || len >= sizeof(group_path)) { >+ ret = len < 0 ? -errno : -ENAMETOOLONG; >+ error_setg_errno(errp, -ret, "no iommu_group found"); >+ return ret; >+ } >+ >+ group_path[len] = 0; >+ >+ group_name = basename(group_path); >+ if (sscanf(group_name, "%d", &groupid) != 1) { >+ error_setg_errno(errp, errno, "failed to read %s", group_path); >+ return -errno; >+ } >+ return groupid; >+} >+ >+/* >+ * vfio_attach_device: attach a device to a security context >+ * @name and @vbasedev->name are likely to be different depending >+ * on the type of the device, hence the need for passing @name >+ */ >+int vfio_attach_device(char *name, VFIODevice *vbasedev, >+ AddressSpace *as, Error **errp) >+{ >+ int groupid = vfio_device_groupid(vbasedev, errp); >+ VFIODevice *vbasedev_iter; >+ VFIOGroup *group; >+ int ret; >+ >+ if (groupid < 0) { >+ return groupid; >+ } >+ >+ trace_vfio_attach_device(vbasedev->name, groupid); >+ >+ group = vfio_get_group(groupid, as, errp); >+ if (!group) { >+ return -ENOENT; >+ } >+ >+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { >+ if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { >+ error_setg(errp, "device is already attached"); >+ vfio_put_group(group); >+ return -EBUSY; >+ } >+ } >+ ret = vfio_get_device(group, name, vbasedev, errp); >+ if (ret) { >+ vfio_put_group(group); >+ } >+ >+ return ret; >+} >+ >+void vfio_detach_device(VFIODevice *vbasedev) >+{ >+ VFIOGroup *group = vbasedev->group; >+ >+ trace_vfio_detach_device(vbasedev->name, group->groupid); >+ vfio_put_base_device(vbasedev); >+ vfio_put_group(group); >+} >diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >index 898296fd54..40ae46266e 100644 >--- a/hw/vfio/pci.c >+++ b/hw/vfio/pci.c >@@ -2895,10 +2895,10 @@ static void vfio_populate_device(VFIOPCIDevice >*vdev, Error **errp) > > static void vfio_pci_put_device(VFIOPCIDevice *vdev) > { >+ vfio_detach_device(&vdev->vbasedev); >+ > g_free(vdev->vbasedev.name); > g_free(vdev->msix); >- >- vfio_put_base_device(&vdev->vbasedev); > } > > static void vfio_err_notifier_handler(void *opaque) >@@ -3045,13 +3045,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > { > VFIOPCIDevice *vdev = VFIO_PCI(pdev); > VFIODevice *vbasedev = &vdev->vbasedev; >- VFIODevice *vbasedev_iter; >- VFIOGroup *group; >- char *tmp, *subsys, group_path[PATH_MAX], *group_name; >+ char *tmp, *subsys; > Error *err = NULL; >- ssize_t len; > struct stat st; >- int groupid; > int i, ret; > bool is_mdev; > char uuid[UUID_FMT_LEN]; >@@ -3082,39 +3078,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > vbasedev->type = VFIO_DEVICE_TYPE_PCI; > vbasedev->dev = DEVICE(vdev); > >- tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); >- len = readlink(tmp, group_path, sizeof(group_path)); >- g_free(tmp); >- >- if (len <= 0 || len >= sizeof(group_path)) { >- error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG, >- "no iommu_group found"); >- goto error; >- } >- >- group_path[len] = 0; >- >- group_name = basename(group_path); >- if (sscanf(group_name, "%d", &groupid) != 1) { >- error_setg_errno(errp, errno, "failed to read %s", group_path); >- goto error; >- } >- >- trace_vfio_realize(vbasedev->name, groupid); >- >- group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), >errp); >- if (!group) { >- goto error; >- } >- >- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { >- if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { >- error_setg(errp, "device is already attached"); >- vfio_put_group(group); >- goto error; >- } >- } >- > /* > * Mediated devices *might* operate compatibly with discarding of RAM, but > * we cannot know for certain, it depends on whether the mdev vendor driver >@@ -3132,7 +3095,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > if (vbasedev->ram_block_discard_allowed && !is_mdev) { > error_setg(errp, "x-balloon-allowed only potentially compatible " > "with mdev devices"); >- vfio_put_group(group); > goto error; > } > >@@ -3143,17 +3105,17 @@ static void vfio_realize(PCIDevice *pdev, Error >**errp) > name = g_strdup(vbasedev->name); > } > >- ret = vfio_get_device(group, name, vbasedev, errp); >+ ret = vfio_attach_device(name, vbasedev, >+ pci_device_iommu_address_space(pdev), errp); > g_free(name); > if (ret) { >- vfio_put_group(group); > goto error; > } > > vfio_populate_device(vdev, &err); > if (err) { > error_propagate(errp, err); >- goto error; >+ goto out_detach; > } > > /* Get a copy of config space */ >@@ -3163,7 +3125,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { > ret = ret < 0 ? -errno : -EFAULT; > error_setg_errno(errp, -ret, "failed to read device config space"); >- goto error; >+ goto out_detach; > } > > /* vfio emulates a lot for us, but some bits need extra love */ >@@ -3182,7 +3144,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > if (vdev->vendor_id != PCI_ANY_ID) { > if (vdev->vendor_id >= 0xffff) { > error_setg(errp, "invalid PCI vendor ID provided"); >- goto error; >+ goto out_detach; > } > vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0); > trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id); >@@ -3193,7 +3155,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > if (vdev->device_id != PCI_ANY_ID) { > if (vdev->device_id > 0xffff) { > error_setg(errp, "invalid PCI device ID provided"); >- goto error; >+ goto out_detach; > } > vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0); > trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id); >@@ -3204,7 +3166,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > if (vdev->sub_vendor_id != PCI_ANY_ID) { > if (vdev->sub_vendor_id > 0xffff) { > error_setg(errp, "invalid PCI subsystem vendor ID provided"); >- goto error; >+ goto out_detach; > } > vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID, > vdev->sub_vendor_id, ~0); >@@ -3215,7 +3177,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > if (vdev->sub_device_id != PCI_ANY_ID) { > if (vdev->sub_device_id > 0xffff) { > error_setg(errp, "invalid PCI subsystem device ID provided"); >- goto error; >+ goto out_detach; > } > vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, >~0); > trace_vfio_pci_emulated_sub_device_id(vbasedev->name, >@@ -3248,7 +3210,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > vfio_msix_early_setup(vdev, &err); > if (err) { > error_propagate(errp, err); >- goto error; >+ goto out_detach; > } > > vfio_bars_register(vdev); >@@ -3364,6 +3326,8 @@ out_deregister: > out_teardown: > vfio_teardown_msi(vdev); > vfio_bars_exit(vdev); >+out_detach: >+ vfio_detach_device(vbasedev); I just reproduced a regression here with hot-unplug: (qemu) device_add vfio-pci,host=81:11.0,id=vfio0,bus=root0 vfio: error disconnecting group 447 from container Error: vfio 0000:81:11.0: does not support requested IGD OpRegion feature Reason is there are some resources only released in finalize(), so UNSET_CONTAINER call fails here. IMHO, we can drop this vfio_detach_device call and let the same call in finalize() do the real work as original. After fix: (qemu) device_add vfio-pci,host=81:11.0,id=vfio0,bus=root0 Error: vfio 0000:81:11.0: does not support requested IGD OpRegion feature Thanks Zhenzhong > error: > error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); > } >@@ -3371,7 +3335,6 @@ error: > static void vfio_instance_finalize(Object *obj) > { > VFIOPCIDevice *vdev = VFIO_PCI(obj); >- VFIOGroup *group = vdev->vbasedev.group; > > vfio_display_finalize(vdev); > vfio_bars_finalize(vdev); >@@ -3385,7 +3348,6 @@ static void vfio_instance_finalize(Object *obj) > * g_free(vdev->igd_opregion); > */ > vfio_pci_put_device(vdev); >- vfio_put_group(group); > } > > static void vfio_exitfn(PCIDevice *pdev) >diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >index 0ba3c5a0e2..8ac13eb106 100644 >--- a/hw/vfio/trace-events >+++ b/hw/vfio/trace-events >@@ -37,7 +37,8 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, >int function, int > vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot >reset: %s" > vfio_populate_device_config(const char *name, unsigned long size, unsigned >long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, >flags: 0x%lx" > vfio_populate_device_get_irq_info_failure(const char *errstr) >"VFIO_DEVICE_GET_IRQ_INFO failure: %s" >-vfio_realize(const char *name, int group_id) " (%s) group %d" >+vfio_attach_device(const char *name, int group_id) " (%s) group %d" >+vfio_detach_device(const char *name, int group_id) " (%s) group %d" > vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d" > vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s >0x%x@0x%x" > vfio_pci_reset(const char *name) " (%s)" >-- >2.41.0
Hi Zhenzhong, On 10/11/23 03:33, Duan, Zhenzhong wrote: > Hi Eric, > >> -----Original Message----- >> From: Eric Auger <eric.auger@redhat.com> >> Sent: Monday, October 9, 2023 5:09 PM >> Subject: [PATCH v5 07/15] vfio/pci: Introduce vfio_[attach/detach]_device >> >> We want the VFIO devices to be able to use two different >> IOMMU backends, the legacy VFIO one and the new iommufd one. >> >> Introduce vfio_[attach/detach]_device which aim at hiding the >> underlying IOMMU backend (IOCTLs, datatypes, ...). >> >> Once vfio_attach_device completes, the device is attached >> to a security context and its fd can be used. Conversely >> When vfio_detach_device completes, the device has been >> detached from the security context. >> >> At the moment only the implementation based on the legacy >> container/group exists. Let's use it from the vfio-pci device. >> Subsequent patches will handle other devices. >> >> We also take benefit of this patch to properly free >> vbasedev->name on failure. >> >> Signed-off-by: Eric Auger <eric.auger@redhat.com> >> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >> Reviewed-by: Cédric Le Goater <clg@redhat.com> >> >> --- >> >> v4 -> v5: >> - remove vbasedev->name g_free as it is done on instance_finalize >> >> v2 -> v3: >> - added trace_vfio_detach_device >> - added a comment explaining why we pass @name to vfio_attach_device >> although vbasedev->name is populated >> - free vbasedev->name and detach_device if needed >> --- >> include/hw/vfio/vfio-common.h | 3 ++ >> hw/vfio/common.c | 74 +++++++++++++++++++++++++++++++++++ >> hw/vfio/pci.c | 66 +++++++------------------------ >> hw/vfio/trace-events | 3 +- >> 4 files changed, 93 insertions(+), 53 deletions(-) >> >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index c4e7c3b4a7..12fbfbc37d 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group); >> struct vfio_device_info *vfio_get_device_info(int fd); >> int vfio_get_device(VFIOGroup *group, const char *name, >> VFIODevice *vbasedev, Error **errp); >> +int vfio_attach_device(char *name, VFIODevice *vbasedev, >> + AddressSpace *as, Error **errp); >> +void vfio_detach_device(VFIODevice *vbasedev); >> >> int vfio_kvm_device_add_fd(int fd, Error **errp); >> int vfio_kvm_device_del_fd(int fd, Error **errp); >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c >> index d8ed432cb6..f4c33c9858 100644 >> --- a/hw/vfio/common.c >> +++ b/hw/vfio/common.c >> @@ -2611,3 +2611,77 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op) >> } >> return vfio_eeh_container_op(container, op); >> } >> + >> +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) >> +{ >> + char *tmp, group_path[PATH_MAX], *group_name; >> + int ret, groupid; >> + ssize_t len; >> + >> + tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); >> + len = readlink(tmp, group_path, sizeof(group_path)); >> + g_free(tmp); >> + >> + if (len <= 0 || len >= sizeof(group_path)) { >> + ret = len < 0 ? -errno : -ENAMETOOLONG; >> + error_setg_errno(errp, -ret, "no iommu_group found"); >> + return ret; >> + } >> + >> + group_path[len] = 0; >> + >> + group_name = basename(group_path); >> + if (sscanf(group_name, "%d", &groupid) != 1) { >> + error_setg_errno(errp, errno, "failed to read %s", group_path); >> + return -errno; >> + } >> + return groupid; >> +} >> + >> +/* >> + * vfio_attach_device: attach a device to a security context >> + * @name and @vbasedev->name are likely to be different depending >> + * on the type of the device, hence the need for passing @name >> + */ >> +int vfio_attach_device(char *name, VFIODevice *vbasedev, >> + AddressSpace *as, Error **errp) >> +{ >> + int groupid = vfio_device_groupid(vbasedev, errp); >> + VFIODevice *vbasedev_iter; >> + VFIOGroup *group; >> + int ret; >> + >> + if (groupid < 0) { >> + return groupid; >> + } >> + >> + trace_vfio_attach_device(vbasedev->name, groupid); >> + >> + group = vfio_get_group(groupid, as, errp); >> + if (!group) { >> + return -ENOENT; >> + } >> + >> + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { >> + if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { >> + error_setg(errp, "device is already attached"); >> + vfio_put_group(group); >> + return -EBUSY; >> + } >> + } >> + ret = vfio_get_device(group, name, vbasedev, errp); >> + if (ret) { >> + vfio_put_group(group); >> + } >> + >> + return ret; >> +} >> + >> +void vfio_detach_device(VFIODevice *vbasedev) >> +{ >> + VFIOGroup *group = vbasedev->group; >> + >> + trace_vfio_detach_device(vbasedev->name, group->groupid); >> + vfio_put_base_device(vbasedev); >> + vfio_put_group(group); >> +} >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >> index 898296fd54..40ae46266e 100644 >> --- a/hw/vfio/pci.c >> +++ b/hw/vfio/pci.c >> @@ -2895,10 +2895,10 @@ static void vfio_populate_device(VFIOPCIDevice >> *vdev, Error **errp) >> >> static void vfio_pci_put_device(VFIOPCIDevice *vdev) >> { >> + vfio_detach_device(&vdev->vbasedev); >> + >> g_free(vdev->vbasedev.name); >> g_free(vdev->msix); >> - >> - vfio_put_base_device(&vdev->vbasedev); >> } >> >> static void vfio_err_notifier_handler(void *opaque) >> @@ -3045,13 +3045,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >> { >> VFIOPCIDevice *vdev = VFIO_PCI(pdev); >> VFIODevice *vbasedev = &vdev->vbasedev; >> - VFIODevice *vbasedev_iter; >> - VFIOGroup *group; >> - char *tmp, *subsys, group_path[PATH_MAX], *group_name; >> + char *tmp, *subsys; >> Error *err = NULL; >> - ssize_t len; >> struct stat st; >> - int groupid; >> int i, ret; >> bool is_mdev; >> char uuid[UUID_FMT_LEN]; >> @@ -3082,39 +3078,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >> vbasedev->type = VFIO_DEVICE_TYPE_PCI; >> vbasedev->dev = DEVICE(vdev); >> >> - tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); >> - len = readlink(tmp, group_path, sizeof(group_path)); >> - g_free(tmp); >> - >> - if (len <= 0 || len >= sizeof(group_path)) { >> - error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG, >> - "no iommu_group found"); >> - goto error; >> - } >> - >> - group_path[len] = 0; >> - >> - group_name = basename(group_path); >> - if (sscanf(group_name, "%d", &groupid) != 1) { >> - error_setg_errno(errp, errno, "failed to read %s", group_path); >> - goto error; >> - } >> - >> - trace_vfio_realize(vbasedev->name, groupid); >> - >> - group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), >> errp); >> - if (!group) { >> - goto error; >> - } >> - >> - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { >> - if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { >> - error_setg(errp, "device is already attached"); >> - vfio_put_group(group); >> - goto error; >> - } >> - } >> - >> /* >> * Mediated devices *might* operate compatibly with discarding of RAM, but >> * we cannot know for certain, it depends on whether the mdev vendor driver >> @@ -3132,7 +3095,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >> if (vbasedev->ram_block_discard_allowed && !is_mdev) { >> error_setg(errp, "x-balloon-allowed only potentially compatible " >> "with mdev devices"); >> - vfio_put_group(group); >> goto error; >> } >> >> @@ -3143,17 +3105,17 @@ static void vfio_realize(PCIDevice *pdev, Error >> **errp) >> name = g_strdup(vbasedev->name); >> } >> >> - ret = vfio_get_device(group, name, vbasedev, errp); >> + ret = vfio_attach_device(name, vbasedev, >> + pci_device_iommu_address_space(pdev), errp); >> g_free(name); >> if (ret) { >> - vfio_put_group(group); >> goto error; >> } >> >> vfio_populate_device(vdev, &err); >> if (err) { >> error_propagate(errp, err); >> - goto error; >> + goto out_detach; >> } >> >> /* Get a copy of config space */ >> @@ -3163,7 +3125,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >> if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { >> ret = ret < 0 ? -errno : -EFAULT; >> error_setg_errno(errp, -ret, "failed to read device config space"); >> - goto error; >> + goto out_detach; >> } >> >> /* vfio emulates a lot for us, but some bits need extra love */ >> @@ -3182,7 +3144,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >> if (vdev->vendor_id != PCI_ANY_ID) { >> if (vdev->vendor_id >= 0xffff) { >> error_setg(errp, "invalid PCI vendor ID provided"); >> - goto error; >> + goto out_detach; >> } >> vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0); >> trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id); >> @@ -3193,7 +3155,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >> if (vdev->device_id != PCI_ANY_ID) { >> if (vdev->device_id > 0xffff) { >> error_setg(errp, "invalid PCI device ID provided"); >> - goto error; >> + goto out_detach; >> } >> vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0); >> trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id); >> @@ -3204,7 +3166,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >> if (vdev->sub_vendor_id != PCI_ANY_ID) { >> if (vdev->sub_vendor_id > 0xffff) { >> error_setg(errp, "invalid PCI subsystem vendor ID provided"); >> - goto error; >> + goto out_detach; >> } >> vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID, >> vdev->sub_vendor_id, ~0); >> @@ -3215,7 +3177,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >> if (vdev->sub_device_id != PCI_ANY_ID) { >> if (vdev->sub_device_id > 0xffff) { >> error_setg(errp, "invalid PCI subsystem device ID provided"); >> - goto error; >> + goto out_detach; >> } >> vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, >> ~0); >> trace_vfio_pci_emulated_sub_device_id(vbasedev->name, >> @@ -3248,7 +3210,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >> vfio_msix_early_setup(vdev, &err); >> if (err) { >> error_propagate(errp, err); >> - goto error; >> + goto out_detach; >> } >> >> vfio_bars_register(vdev); >> @@ -3364,6 +3326,8 @@ out_deregister: >> out_teardown: >> vfio_teardown_msi(vdev); >> vfio_bars_exit(vdev); >> +out_detach: >> + vfio_detach_device(vbasedev); > I just reproduced a regression here with hot-unplug: > > (qemu) device_add vfio-pci,host=81:11.0,id=vfio0,bus=root0 > vfio: error disconnecting group 447 from container > Error: vfio 0000:81:11.0: does not support requested IGD OpRegion feature > > Reason is there are some resources only released in finalize(), > so UNSET_CONTAINER call fails here. > IMHO, we can drop this vfio_detach_device call and let the same call in > finalize() do the real work as original. > > After fix: > (qemu) device_add vfio-pci,host=81:11.0,id=vfio0,bus=root0 > Error: vfio 0000:81:11.0: does not support requested IGD OpRegion feature I just sent [PATCH] vfio/pci: Remove vfio_detach_device from vfio_realize error path Please have a check and confirm this is the fix you expected Thanks! Eric > > Thanks > Zhenzhong >> error: >> error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); >> } >> @@ -3371,7 +3335,6 @@ error: >> static void vfio_instance_finalize(Object *obj) >> { >> VFIOPCIDevice *vdev = VFIO_PCI(obj); >> - VFIOGroup *group = vdev->vbasedev.group; >> >> vfio_display_finalize(vdev); >> vfio_bars_finalize(vdev); >> @@ -3385,7 +3348,6 @@ static void vfio_instance_finalize(Object *obj) >> * g_free(vdev->igd_opregion); >> */ >> vfio_pci_put_device(vdev); >> - vfio_put_group(group); >> } >> >> static void vfio_exitfn(PCIDevice *pdev) >> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events >> index 0ba3c5a0e2..8ac13eb106 100644 >> --- a/hw/vfio/trace-events >> +++ b/hw/vfio/trace-events >> @@ -37,7 +37,8 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, >> int function, int >> vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot >> reset: %s" >> vfio_populate_device_config(const char *name, unsigned long size, unsigned >> long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, >> flags: 0x%lx" >> vfio_populate_device_get_irq_info_failure(const char *errstr) >> "VFIO_DEVICE_GET_IRQ_INFO failure: %s" >> -vfio_realize(const char *name, int group_id) " (%s) group %d" >> +vfio_attach_device(const char *name, int group_id) " (%s) group %d" >> +vfio_detach_device(const char *name, int group_id) " (%s) group %d" >> vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d" >> vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s >> 0x%x@0x%x" >> vfio_pci_reset(const char *name) " (%s)" >> -- >> 2.41.0
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index c4e7c3b4a7..12fbfbc37d 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -225,6 +225,9 @@ void vfio_put_group(VFIOGroup *group); struct vfio_device_info *vfio_get_device_info(int fd); int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vbasedev, Error **errp); +int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp); +void vfio_detach_device(VFIODevice *vbasedev); int vfio_kvm_device_add_fd(int fd, Error **errp); int vfio_kvm_device_del_fd(int fd, Error **errp); diff --git a/hw/vfio/common.c b/hw/vfio/common.c index d8ed432cb6..f4c33c9858 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -2611,3 +2611,77 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op) } return vfio_eeh_container_op(container, op); } + +static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) +{ + char *tmp, group_path[PATH_MAX], *group_name; + int ret, groupid; + ssize_t len; + + tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); + len = readlink(tmp, group_path, sizeof(group_path)); + g_free(tmp); + + if (len <= 0 || len >= sizeof(group_path)) { + ret = len < 0 ? -errno : -ENAMETOOLONG; + error_setg_errno(errp, -ret, "no iommu_group found"); + return ret; + } + + group_path[len] = 0; + + group_name = basename(group_path); + if (sscanf(group_name, "%d", &groupid) != 1) { + error_setg_errno(errp, errno, "failed to read %s", group_path); + return -errno; + } + return groupid; +} + +/* + * vfio_attach_device: attach a device to a security context + * @name and @vbasedev->name are likely to be different depending + * on the type of the device, hence the need for passing @name + */ +int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) +{ + int groupid = vfio_device_groupid(vbasedev, errp); + VFIODevice *vbasedev_iter; + VFIOGroup *group; + int ret; + + if (groupid < 0) { + return groupid; + } + + trace_vfio_attach_device(vbasedev->name, groupid); + + group = vfio_get_group(groupid, as, errp); + if (!group) { + return -ENOENT; + } + + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { + if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { + error_setg(errp, "device is already attached"); + vfio_put_group(group); + return -EBUSY; + } + } + ret = vfio_get_device(group, name, vbasedev, errp); + if (ret) { + vfio_put_group(group); + } + + return ret; +} + +void vfio_detach_device(VFIODevice *vbasedev) +{ + VFIOGroup *group = vbasedev->group; + + trace_vfio_detach_device(vbasedev->name, group->groupid); + vfio_put_base_device(vbasedev); + vfio_put_group(group); +} diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 898296fd54..40ae46266e 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2895,10 +2895,10 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) static void vfio_pci_put_device(VFIOPCIDevice *vdev) { + vfio_detach_device(&vdev->vbasedev); + g_free(vdev->vbasedev.name); g_free(vdev->msix); - - vfio_put_base_device(&vdev->vbasedev); } static void vfio_err_notifier_handler(void *opaque) @@ -3045,13 +3045,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) { VFIOPCIDevice *vdev = VFIO_PCI(pdev); VFIODevice *vbasedev = &vdev->vbasedev; - VFIODevice *vbasedev_iter; - VFIOGroup *group; - char *tmp, *subsys, group_path[PATH_MAX], *group_name; + char *tmp, *subsys; Error *err = NULL; - ssize_t len; struct stat st; - int groupid; int i, ret; bool is_mdev; char uuid[UUID_FMT_LEN]; @@ -3082,39 +3078,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vbasedev->type = VFIO_DEVICE_TYPE_PCI; vbasedev->dev = DEVICE(vdev); - tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev); - len = readlink(tmp, group_path, sizeof(group_path)); - g_free(tmp); - - if (len <= 0 || len >= sizeof(group_path)) { - error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG, - "no iommu_group found"); - goto error; - } - - group_path[len] = 0; - - group_name = basename(group_path); - if (sscanf(group_name, "%d", &groupid) != 1) { - error_setg_errno(errp, errno, "failed to read %s", group_path); - goto error; - } - - trace_vfio_realize(vbasedev->name, groupid); - - group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp); - if (!group) { - goto error; - } - - QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { - if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) { - error_setg(errp, "device is already attached"); - vfio_put_group(group); - goto error; - } - } - /* * Mediated devices *might* operate compatibly with discarding of RAM, but * we cannot know for certain, it depends on whether the mdev vendor driver @@ -3132,7 +3095,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (vbasedev->ram_block_discard_allowed && !is_mdev) { error_setg(errp, "x-balloon-allowed only potentially compatible " "with mdev devices"); - vfio_put_group(group); goto error; } @@ -3143,17 +3105,17 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) name = g_strdup(vbasedev->name); } - ret = vfio_get_device(group, name, vbasedev, errp); + ret = vfio_attach_device(name, vbasedev, + pci_device_iommu_address_space(pdev), errp); g_free(name); if (ret) { - vfio_put_group(group); goto error; } vfio_populate_device(vdev, &err); if (err) { error_propagate(errp, err); - goto error; + goto out_detach; } /* Get a copy of config space */ @@ -3163,7 +3125,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { ret = ret < 0 ? -errno : -EFAULT; error_setg_errno(errp, -ret, "failed to read device config space"); - goto error; + goto out_detach; } /* vfio emulates a lot for us, but some bits need extra love */ @@ -3182,7 +3144,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (vdev->vendor_id != PCI_ANY_ID) { if (vdev->vendor_id >= 0xffff) { error_setg(errp, "invalid PCI vendor ID provided"); - goto error; + goto out_detach; } vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0); trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id); @@ -3193,7 +3155,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (vdev->device_id != PCI_ANY_ID) { if (vdev->device_id > 0xffff) { error_setg(errp, "invalid PCI device ID provided"); - goto error; + goto out_detach; } vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0); trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id); @@ -3204,7 +3166,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (vdev->sub_vendor_id != PCI_ANY_ID) { if (vdev->sub_vendor_id > 0xffff) { error_setg(errp, "invalid PCI subsystem vendor ID provided"); - goto error; + goto out_detach; } vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID, vdev->sub_vendor_id, ~0); @@ -3215,7 +3177,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) if (vdev->sub_device_id != PCI_ANY_ID) { if (vdev->sub_device_id > 0xffff) { error_setg(errp, "invalid PCI subsystem device ID provided"); - goto error; + goto out_detach; } vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0); trace_vfio_pci_emulated_sub_device_id(vbasedev->name, @@ -3248,7 +3210,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_msix_early_setup(vdev, &err); if (err) { error_propagate(errp, err); - goto error; + goto out_detach; } vfio_bars_register(vdev); @@ -3364,6 +3326,8 @@ out_deregister: out_teardown: vfio_teardown_msi(vdev); vfio_bars_exit(vdev); +out_detach: + vfio_detach_device(vbasedev); error: error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); } @@ -3371,7 +3335,6 @@ error: static void vfio_instance_finalize(Object *obj) { VFIOPCIDevice *vdev = VFIO_PCI(obj); - VFIOGroup *group = vdev->vbasedev.group; vfio_display_finalize(vdev); vfio_bars_finalize(vdev); @@ -3385,7 +3348,6 @@ static void vfio_instance_finalize(Object *obj) * g_free(vdev->igd_opregion); */ vfio_pci_put_device(vdev); - vfio_put_group(group); } static void vfio_exitfn(PCIDevice *pdev) diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 0ba3c5a0e2..8ac13eb106 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -37,7 +37,8 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s" vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s" -vfio_realize(const char *name, int group_id) " (%s) group %d" +vfio_attach_device(const char *name, int group_id) " (%s) group %d" +vfio_detach_device(const char *name, int group_id) " (%s) group %d" vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d" vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x" vfio_pci_reset(const char *name) " (%s)"