diff mbox series

[v1,13/24] vfio-user: pci_user_realize PCI setup

Message ID 9b5702602c3bc0c79df893b269276c74b057026b.1667542066.git.john.g.johnson@oracle.com (mailing list archive)
State New, archived
Headers show
Series vfio-user client | expand

Commit Message

John Johnson Nov. 8, 2022, 11:13 p.m. UTC
PCI BARs read from remote device
PCI config reads/writes sent to remote server

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 hw/vfio/pci.c | 277 ++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 174 insertions(+), 103 deletions(-)

Comments

John Levon Dec. 9, 2022, 5:22 p.m. UTC | #1
On Tue, Nov 08, 2022 at 03:13:35PM -0800, John Johnson wrote:

> PCI BARs read from remote device
> PCI config reads/writes sent to remote server

Reviewed-by: John Levon <john.levon@nutanix.com>

regards
john
Cédric Le Goater Dec. 13, 2022, 4:13 p.m. UTC | #2
On 11/9/22 00:13, John Johnson wrote:
> PCI BARs read from remote device
> PCI config reads/writes sent to remote server
> 
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>


Reviewed-by: Cédric Le Goater <clg@redhat.com>

Thanks,

C.


> ---
>   hw/vfio/pci.c | 277 ++++++++++++++++++++++++++++++++++++----------------------
>   1 file changed, 174 insertions(+), 103 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 027f9d5..7abe44e 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -2874,6 +2874,133 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>       vdev->req_enabled = false;
>   }
>   
> +static void vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
> +{
> +    PCIDevice *pdev = &vdev->pdev;
> +    VFIODevice *vbasedev = &vdev->vbasedev;
> +    Error *err = NULL;
> +
> +    /* vfio emulates a lot for us, but some bits need extra love */
> +    vdev->emulated_config_bits = g_malloc0(vdev->config_size);
> +
> +    /* QEMU can choose to expose the ROM or not */
> +    memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4);
> +    /* QEMU can also add or extend BARs */
> +    memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4);
> +
> +    /*
> +     * The PCI spec reserves vendor ID 0xffff as an invalid value.  The
> +     * device ID is managed by the vendor and need only be a 16-bit value.
> +     * Allow any 16-bit value for subsystem so they can be hidden or changed.
> +     */
> +    if (vdev->vendor_id != PCI_ANY_ID) {
> +        if (vdev->vendor_id >= 0xffff) {
> +            error_setg(errp, "invalid PCI vendor ID provided");
> +            return;
> +        }
> +        vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0);
> +        trace_vfio_pci_emulated_vendor_id(vdev->vbasedev.name, vdev->vendor_id);
> +    } else {
> +        vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
> +    }
> +
> +    if (vdev->device_id != PCI_ANY_ID) {
> +        if (vdev->device_id > 0xffff) {
> +            error_setg(errp, "invalid PCI device ID provided");
> +            return;
> +        }
> +        vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0);
> +        trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id);
> +    } else {
> +        vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
> +    }
> +
> +    if (vdev->sub_vendor_id != PCI_ANY_ID) {
> +        if (vdev->sub_vendor_id > 0xffff) {
> +            error_setg(errp, "invalid PCI subsystem vendor ID provided");
> +            return;
> +        }
> +        vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID,
> +                               vdev->sub_vendor_id, ~0);
> +        trace_vfio_pci_emulated_sub_vendor_id(vbasedev->name,
> +                                              vdev->sub_vendor_id);
> +    }
> +
> +    if (vdev->sub_device_id != PCI_ANY_ID) {
> +        if (vdev->sub_device_id > 0xffff) {
> +            error_setg(errp, "invalid PCI subsystem device ID provided");
> +            return;
> +        }
> +        vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0);
> +        trace_vfio_pci_emulated_sub_device_id(vbasedev->name,
> +                                              vdev->sub_device_id);
> +    }
> +
> +    /* QEMU can change multi-function devices to single function, or reverse */
> +    vdev->emulated_config_bits[PCI_HEADER_TYPE] =
> +                                              PCI_HEADER_TYPE_MULTI_FUNCTION;
> +
> +    /* Restore or clear multifunction, this is always controlled by QEMU */
> +    if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
> +        vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
> +    } else {
> +        vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
> +    }
> +
> +    /*
> +     * Clear host resource mapping info.  If we choose not to register a
> +     * BAR, such as might be the case with the option ROM, we can get
> +     * confusing, unwritable, residual addresses from the host here.
> +     */
> +    memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24);
> +    memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4);
> +
> +    vfio_pci_size_rom(vdev);
> +
> +    vfio_bars_prepare(vdev);
> +
> +    vfio_msix_early_setup(vdev, &err);
> +    if (err) {
> +        error_propagate(errp, err);
> +        return;
> +    }
> +
> +    vfio_bars_register(vdev);
> +}
> +
> +static int vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
> +{
> +    PCIDevice *pdev = &vdev->pdev;
> +    int ret;
> +
> +    /* QEMU emulates all of MSI & MSIX */
> +    if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
> +        memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
> +               MSIX_CAP_LENGTH);
> +    }
> +
> +    if (pdev->cap_present & QEMU_PCI_CAP_MSI) {
> +        memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff,
> +               vdev->msi_cap_size);
> +    }
> +
> +    if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
> +        vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
> +                                                  vfio_intx_mmap_enable, vdev);
> +        pci_device_set_intx_routing_notifier(&vdev->pdev,
> +                                             vfio_intx_routing_notifier);
> +        vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
> +        kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
> +        ret = vfio_intx_enable(vdev, errp);
> +        if (ret) {
> +            pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
> +            kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
> +            return ret;
> +        }
> +    }
> +    return 0;
> +}
> +
>   static void vfio_realize(PCIDevice *pdev, Error **errp)
>   {
>       VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
> @@ -2990,92 +3117,16 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>           goto error;
>       }
>   
> -    /* vfio emulates a lot for us, but some bits need extra love */
> -    vdev->emulated_config_bits = g_malloc0(vdev->config_size);
> -
> -    /* QEMU can choose to expose the ROM or not */
> -    memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4);
> -    /* QEMU can also add or extend BARs */
> -    memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4);
> -
> -    /*
> -     * The PCI spec reserves vendor ID 0xffff as an invalid value.  The
> -     * device ID is managed by the vendor and need only be a 16-bit value.
> -     * Allow any 16-bit value for subsystem so they can be hidden or changed.
> -     */
> -    if (vdev->vendor_id != PCI_ANY_ID) {
> -        if (vdev->vendor_id >= 0xffff) {
> -            error_setg(errp, "invalid PCI vendor ID provided");
> -            goto error;
> -        }
> -        vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0);
> -        trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id);
> -    } else {
> -        vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
> -    }
> -
> -    if (vdev->device_id != PCI_ANY_ID) {
> -        if (vdev->device_id > 0xffff) {
> -            error_setg(errp, "invalid PCI device ID provided");
> -            goto error;
> -        }
> -        vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0);
> -        trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id);
> -    } else {
> -        vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
> -    }
> -
> -    if (vdev->sub_vendor_id != PCI_ANY_ID) {
> -        if (vdev->sub_vendor_id > 0xffff) {
> -            error_setg(errp, "invalid PCI subsystem vendor ID provided");
> -            goto error;
> -        }
> -        vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID,
> -                               vdev->sub_vendor_id, ~0);
> -        trace_vfio_pci_emulated_sub_vendor_id(vbasedev->name,
> -                                              vdev->sub_vendor_id);
> -    }
> -
> -    if (vdev->sub_device_id != PCI_ANY_ID) {
> -        if (vdev->sub_device_id > 0xffff) {
> -            error_setg(errp, "invalid PCI subsystem device ID provided");
> -            goto error;
> -        }
> -        vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0);
> -        trace_vfio_pci_emulated_sub_device_id(vbasedev->name,
> -                                              vdev->sub_device_id);
> -    }
> -
> -    /* QEMU can change multi-function devices to single function, or reverse */
> -    vdev->emulated_config_bits[PCI_HEADER_TYPE] =
> -                                              PCI_HEADER_TYPE_MULTI_FUNCTION;
> -
> -    /* Restore or clear multifunction, this is always controlled by QEMU */
> -    if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
> -        vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
> -    } else {
> -        vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
> -    }
> -
> -    /*
> -     * Clear host resource mapping info.  If we choose not to register a
> -     * BAR, such as might be the case with the option ROM, we can get
> -     * confusing, unwritable, residual addresses from the host here.
> -     */
> -    memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24);
> -    memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4);
> -
> -    vfio_pci_size_rom(vdev);
> -
> -    vfio_bars_prepare(vdev);
> -
> -    vfio_msix_early_setup(vdev, &err);
> +    vfio_pci_config_setup(vdev, &err);
>       if (err) {
> -        error_propagate(errp, err);
>           goto error;
>       }
>   
> -    vfio_bars_register(vdev);
> +    /*
> +     * vfio_pci_config_setup will have registered the device's BARs
> +     * and setup any MSIX BARs, so errors after it succeeds must
> +     * use out_teardown
> +     */
>   
>       ret = vfio_add_capabilities(vdev, errp);
>       if (ret) {
> @@ -3116,29 +3167,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>           }
>       }
>   
> -    /* QEMU emulates all of MSI & MSIX */
> -    if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
> -        memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
> -               MSIX_CAP_LENGTH);
> -    }
> -
> -    if (pdev->cap_present & QEMU_PCI_CAP_MSI) {
> -        memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff,
> -               vdev->msi_cap_size);
> +    ret = vfio_interrupt_setup(vdev, errp);
> +    if (ret) {
> +        goto out_teardown;
>       }
>   
> -    if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
> -        vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
> -                                                  vfio_intx_mmap_enable, vdev);
> -        pci_device_set_intx_routing_notifier(&vdev->pdev,
> -                                             vfio_intx_routing_notifier);
> -        vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
> -        kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
> -        ret = vfio_intx_enable(vdev, errp);
> -        if (ret) {
> -            goto out_deregister;
> -        }
> -    }
> +    /*
> +     * vfio_interrupt_setup will have setup INTx's KVM routing
> +     * so errors after it succeeds must use out_deregister
> +     */
>   
>       if (vdev->display != ON_OFF_AUTO_OFF) {
>           ret = vfio_display_probe(vdev, errp);
> @@ -3525,8 +3562,42 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
>           goto error;
>       }
>   
> +    /* Get a copy of config space */
> +    ret = VDEV_REGION_READ(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX, 0,
> +                           MIN(pci_config_size(pdev), vdev->config_size),
> +                           pdev->config);
> +    if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
> +        error_setg_errno(errp, -ret, "failed to read device config space");
> +        goto error;
> +    }
> +
> +    vfio_pci_config_setup(vdev, &err);
> +    if (err) {
> +        error_propagate(errp, err);
> +        goto error;
> +    }
> +
> +    /*
> +     * vfio_pci_config_setup will have registered the device's BARs
> +     * and setup any MSIX BARs, so errors after it succeeds must
> +     * use out_teardown
> +     */
> +
> +    ret = vfio_add_capabilities(vdev, errp);
> +    if (ret) {
> +        goto out_teardown;
> +    }
> +
> +    ret = vfio_interrupt_setup(vdev, errp);
> +    if (ret) {
> +        goto out_teardown;
> +    }
> +
>       return;
>   
> +out_teardown:
> +    vfio_teardown_msi(vdev);
> +    vfio_bars_exit(vdev);
>   error:
>       error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
>   }
diff mbox series

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 027f9d5..7abe44e 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2874,6 +2874,133 @@  static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+static void vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    VFIODevice *vbasedev = &vdev->vbasedev;
+    Error *err = NULL;
+
+    /* vfio emulates a lot for us, but some bits need extra love */
+    vdev->emulated_config_bits = g_malloc0(vdev->config_size);
+
+    /* QEMU can choose to expose the ROM or not */
+    memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4);
+    /* QEMU can also add or extend BARs */
+    memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4);
+
+    /*
+     * The PCI spec reserves vendor ID 0xffff as an invalid value.  The
+     * device ID is managed by the vendor and need only be a 16-bit value.
+     * Allow any 16-bit value for subsystem so they can be hidden or changed.
+     */
+    if (vdev->vendor_id != PCI_ANY_ID) {
+        if (vdev->vendor_id >= 0xffff) {
+            error_setg(errp, "invalid PCI vendor ID provided");
+            return;
+        }
+        vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0);
+        trace_vfio_pci_emulated_vendor_id(vdev->vbasedev.name, vdev->vendor_id);
+    } else {
+        vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
+    }
+
+    if (vdev->device_id != PCI_ANY_ID) {
+        if (vdev->device_id > 0xffff) {
+            error_setg(errp, "invalid PCI device ID provided");
+            return;
+        }
+        vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0);
+        trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id);
+    } else {
+        vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
+    }
+
+    if (vdev->sub_vendor_id != PCI_ANY_ID) {
+        if (vdev->sub_vendor_id > 0xffff) {
+            error_setg(errp, "invalid PCI subsystem vendor ID provided");
+            return;
+        }
+        vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID,
+                               vdev->sub_vendor_id, ~0);
+        trace_vfio_pci_emulated_sub_vendor_id(vbasedev->name,
+                                              vdev->sub_vendor_id);
+    }
+
+    if (vdev->sub_device_id != PCI_ANY_ID) {
+        if (vdev->sub_device_id > 0xffff) {
+            error_setg(errp, "invalid PCI subsystem device ID provided");
+            return;
+        }
+        vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0);
+        trace_vfio_pci_emulated_sub_device_id(vbasedev->name,
+                                              vdev->sub_device_id);
+    }
+
+    /* QEMU can change multi-function devices to single function, or reverse */
+    vdev->emulated_config_bits[PCI_HEADER_TYPE] =
+                                              PCI_HEADER_TYPE_MULTI_FUNCTION;
+
+    /* Restore or clear multifunction, this is always controlled by QEMU */
+    if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+        vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
+    } else {
+        vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
+    }
+
+    /*
+     * Clear host resource mapping info.  If we choose not to register a
+     * BAR, such as might be the case with the option ROM, we can get
+     * confusing, unwritable, residual addresses from the host here.
+     */
+    memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24);
+    memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4);
+
+    vfio_pci_size_rom(vdev);
+
+    vfio_bars_prepare(vdev);
+
+    vfio_msix_early_setup(vdev, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+
+    vfio_bars_register(vdev);
+}
+
+static int vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    int ret;
+
+    /* QEMU emulates all of MSI & MSIX */
+    if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
+        memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
+               MSIX_CAP_LENGTH);
+    }
+
+    if (pdev->cap_present & QEMU_PCI_CAP_MSI) {
+        memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff,
+               vdev->msi_cap_size);
+    }
+
+    if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
+        vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+                                                  vfio_intx_mmap_enable, vdev);
+        pci_device_set_intx_routing_notifier(&vdev->pdev,
+                                             vfio_intx_routing_notifier);
+        vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
+        kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
+        ret = vfio_intx_enable(vdev, errp);
+        if (ret) {
+            pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+            kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
+            return ret;
+        }
+    }
+    return 0;
+}
+
 static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
     VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
@@ -2990,92 +3117,16 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
         goto error;
     }
 
-    /* vfio emulates a lot for us, but some bits need extra love */
-    vdev->emulated_config_bits = g_malloc0(vdev->config_size);
-
-    /* QEMU can choose to expose the ROM or not */
-    memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4);
-    /* QEMU can also add or extend BARs */
-    memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4);
-
-    /*
-     * The PCI spec reserves vendor ID 0xffff as an invalid value.  The
-     * device ID is managed by the vendor and need only be a 16-bit value.
-     * Allow any 16-bit value for subsystem so they can be hidden or changed.
-     */
-    if (vdev->vendor_id != PCI_ANY_ID) {
-        if (vdev->vendor_id >= 0xffff) {
-            error_setg(errp, "invalid PCI vendor ID provided");
-            goto error;
-        }
-        vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0);
-        trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id);
-    } else {
-        vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
-    }
-
-    if (vdev->device_id != PCI_ANY_ID) {
-        if (vdev->device_id > 0xffff) {
-            error_setg(errp, "invalid PCI device ID provided");
-            goto error;
-        }
-        vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0);
-        trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id);
-    } else {
-        vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID);
-    }
-
-    if (vdev->sub_vendor_id != PCI_ANY_ID) {
-        if (vdev->sub_vendor_id > 0xffff) {
-            error_setg(errp, "invalid PCI subsystem vendor ID provided");
-            goto error;
-        }
-        vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID,
-                               vdev->sub_vendor_id, ~0);
-        trace_vfio_pci_emulated_sub_vendor_id(vbasedev->name,
-                                              vdev->sub_vendor_id);
-    }
-
-    if (vdev->sub_device_id != PCI_ANY_ID) {
-        if (vdev->sub_device_id > 0xffff) {
-            error_setg(errp, "invalid PCI subsystem device ID provided");
-            goto error;
-        }
-        vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0);
-        trace_vfio_pci_emulated_sub_device_id(vbasedev->name,
-                                              vdev->sub_device_id);
-    }
-
-    /* QEMU can change multi-function devices to single function, or reverse */
-    vdev->emulated_config_bits[PCI_HEADER_TYPE] =
-                                              PCI_HEADER_TYPE_MULTI_FUNCTION;
-
-    /* Restore or clear multifunction, this is always controlled by QEMU */
-    if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
-        vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
-    } else {
-        vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
-    }
-
-    /*
-     * Clear host resource mapping info.  If we choose not to register a
-     * BAR, such as might be the case with the option ROM, we can get
-     * confusing, unwritable, residual addresses from the host here.
-     */
-    memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24);
-    memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4);
-
-    vfio_pci_size_rom(vdev);
-
-    vfio_bars_prepare(vdev);
-
-    vfio_msix_early_setup(vdev, &err);
+    vfio_pci_config_setup(vdev, &err);
     if (err) {
-        error_propagate(errp, err);
         goto error;
     }
 
-    vfio_bars_register(vdev);
+    /*
+     * vfio_pci_config_setup will have registered the device's BARs
+     * and setup any MSIX BARs, so errors after it succeeds must
+     * use out_teardown
+     */
 
     ret = vfio_add_capabilities(vdev, errp);
     if (ret) {
@@ -3116,29 +3167,15 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
         }
     }
 
-    /* QEMU emulates all of MSI & MSIX */
-    if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
-        memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
-               MSIX_CAP_LENGTH);
-    }
-
-    if (pdev->cap_present & QEMU_PCI_CAP_MSI) {
-        memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff,
-               vdev->msi_cap_size);
+    ret = vfio_interrupt_setup(vdev, errp);
+    if (ret) {
+        goto out_teardown;
     }
 
-    if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
-        vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
-                                                  vfio_intx_mmap_enable, vdev);
-        pci_device_set_intx_routing_notifier(&vdev->pdev,
-                                             vfio_intx_routing_notifier);
-        vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
-        kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
-        ret = vfio_intx_enable(vdev, errp);
-        if (ret) {
-            goto out_deregister;
-        }
-    }
+    /*
+     * vfio_interrupt_setup will have setup INTx's KVM routing
+     * so errors after it succeeds must use out_deregister
+     */
 
     if (vdev->display != ON_OFF_AUTO_OFF) {
         ret = vfio_display_probe(vdev, errp);
@@ -3525,8 +3562,42 @@  static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
         goto error;
     }
 
+    /* Get a copy of config space */
+    ret = VDEV_REGION_READ(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX, 0,
+                           MIN(pci_config_size(pdev), vdev->config_size),
+                           pdev->config);
+    if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
+        error_setg_errno(errp, -ret, "failed to read device config space");
+        goto error;
+    }
+
+    vfio_pci_config_setup(vdev, &err);
+    if (err) {
+        error_propagate(errp, err);
+        goto error;
+    }
+
+    /*
+     * vfio_pci_config_setup will have registered the device's BARs
+     * and setup any MSIX BARs, so errors after it succeeds must
+     * use out_teardown
+     */
+
+    ret = vfio_add_capabilities(vdev, errp);
+    if (ret) {
+        goto out_teardown;
+    }
+
+    ret = vfio_interrupt_setup(vdev, errp);
+    if (ret) {
+        goto out_teardown;
+    }
+
     return;
 
+out_teardown:
+    vfio_teardown_msi(vdev);
+    vfio_bars_exit(vdev);
 error:
     error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
 }