diff mbox series

[RFC,v2,4/4] vfio/pci: Enable AtomicOps completers on root ports

Message ID 20230526231558.1660396-5-alex.williamson@redhat.com (mailing list archive)
State New, archived
Headers show
Series vfio/pci: Atomic Ops completer support | expand

Commit Message

Alex Williamson May 26, 2023, 11:15 p.m. UTC
Dynamically enable Atomic Ops completer support around realize/exit of
vfio-pci devices reporting host support for these accesses and adhering
to a minimal configuration standard.  While the Atomic Ops completer
bits in the root port device capabilities2 register are read-only, the
PCIe spec does allow RO bits to change to reflect hardware state.  We
take advantage of that here around the realize and exit functions of
the vfio-pci device.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 hw/vfio/pci.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/pci.h |  1 +
 2 files changed, 79 insertions(+)

Comments

Cédric Le Goater May 30, 2023, 1:36 p.m. UTC | #1
On 5/27/23 01:15, Alex Williamson wrote:
> Dynamically enable Atomic Ops completer support around realize/exit of
> vfio-pci devices reporting host support for these accesses and adhering
> to a minimal configuration standard.  While the Atomic Ops completer
> bits in the root port device capabilities2 register are read-only, the
> PCIe spec does allow RO bits to change to reflect hardware state.  We
> take advantage of that here around the realize and exit functions of
> the vfio-pci device.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

LGTM. I am not sure about the single function restriction, may be that's
worth a warning ?

Thanks,

C.


> ---
>   hw/vfio/pci.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++
>   hw/vfio/pci.h |  1 +
>   2 files changed, 79 insertions(+)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index bf27a3990564..d8a0fd595560 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1826,6 +1826,81 @@ static void vfio_add_emulated_long(VFIOPCIDevice *vdev, int pos,
>       vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask);
>   }
>   
> +static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
> +{
> +    struct vfio_device_info_cap_pci_atomic_comp *cap;
> +    g_autofree struct vfio_device_info *info = NULL;
> +    PCIBus *bus = pci_get_bus(&vdev->pdev);
> +    PCIDevice *parent = bus->parent_dev;
> +    struct vfio_info_cap_header *hdr;
> +    uint32_t mask = 0;
> +    uint8_t *pos;
> +
> +    /*
> +     * PCIe Atomic Ops completer support is only added automatically for single
> +     * function devices downstream of a root port supporting DEVCAP2.  Support
> +     * is added during realize and, if added, removed during device exit.  The
> +     * single function requirement avoids conflicting requirements should a
> +     * slot be composed of multiple devices with differing capabilities.
> +     */
> +    if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap ||
> +        pcie_cap_get_type(parent) != PCI_EXP_TYPE_ROOT_PORT ||
> +        pcie_cap_get_version(parent) != PCI_EXP_FLAGS_VER2 ||
> +        vdev->pdev.devfn ||
> +        vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
> +        return;
> +    }
> +
> +    pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
> +
> +    /* Abort if there'a already an Atomic Ops configuration on the root port */
> +    if (pci_get_long(pos) & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> +                             PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
> +                             PCI_EXP_DEVCAP2_ATOMIC_COMP128)) {
> +        return;
> +    }
> +
> +    info = vfio_get_device_info(vdev->vbasedev.fd);
> +    if (!info) {
> +        return;
> +    }
> +
> +    hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP);
> +    if (!hdr) {
> +        return;
> +    }
> +
> +    cap = (void *)hdr;
> +    if (cap->flags & VFIO_PCI_ATOMIC_COMP32) {
> +        mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP32;
> +    }
> +    if (cap->flags & VFIO_PCI_ATOMIC_COMP64) {
> +        mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP64;
> +    }
> +    if (cap->flags & VFIO_PCI_ATOMIC_COMP128) {
> +        mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP128;
> +    }
> +
> +    if (!mask) {
> +        return;
> +    }
> +
> +    pci_long_test_and_set_mask(pos, mask);
> +    vdev->clear_parent_atomics_on_exit = true;
> +}
> +
> +static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev)
> +{
> +    if (vdev->clear_parent_atomics_on_exit) {
> +        PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev;
> +        uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
> +
> +        pci_long_test_and_clear_mask(pos, PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> +                                          PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
> +                                          PCI_EXP_DEVCAP2_ATOMIC_COMP128);
> +    }
> +}
> +
>   static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
>                                  Error **errp)
>   {
> @@ -1929,6 +2004,8 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
>                              QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0);
>               vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0);
>           }
> +
> +        vfio_pci_enable_rp_atomics(vdev);
>       }
>   
>       /*
> @@ -3265,6 +3342,7 @@ static void vfio_exitfn(PCIDevice *pdev)
>           timer_free(vdev->intx.mmap_timer);
>       }
>       vfio_teardown_msi(vdev);
> +    vfio_pci_disable_rp_atomics(vdev);
>       vfio_bars_exit(vdev);
>       vfio_migration_exit(&vdev->vbasedev);
>   }
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index 2674476d6c77..a2771b9ff3cc 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -174,6 +174,7 @@ struct VFIOPCIDevice {
>       bool no_vfio_ioeventfd;
>       bool enable_ramfb;
>       bool defer_kvm_irq_routing;
> +    bool clear_parent_atomics_on_exit;
>       VFIODisplay *dpy;
>       Notifier irqchip_change_notifier;
>   };
Robin Voetter May 31, 2023, 10:03 p.m. UTC | #2
On 5/27/23 01:15, Alex Williamson wrote:
> Dynamically enable Atomic Ops completer support around realize/exit of
> vfio-pci devices reporting host support for these accesses and adhering
> to a minimal configuration standard.  While the Atomic Ops completer
> bits in the root port device capabilities2 register are read-only, the
> PCIe spec does allow RO bits to change to reflect hardware state.  We
> take advantage of that here around the realize and exit functions of
> the vfio-pci device.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Robin Voetter <robin@streamhpc.com>
Tested-by: Robin Voetter <robin@streamhpc.com>

Kind regards,

Robin Voetter
Philippe Mathieu-Daudé May 31, 2023, 10:28 p.m. UTC | #3
On 27/5/23 01:15, Alex Williamson wrote:
> Dynamically enable Atomic Ops completer support around realize/exit of
> vfio-pci devices reporting host support for these accesses and adhering
> to a minimal configuration standard.  While the Atomic Ops completer
> bits in the root port device capabilities2 register are read-only, the
> PCIe spec does allow RO bits to change to reflect hardware state.  We
> take advantage of that here around the realize and exit functions of
> the vfio-pci device.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
>   hw/vfio/pci.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++
>   hw/vfio/pci.h |  1 +
>   2 files changed, 79 insertions(+)


> +static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
> +{
> +    struct vfio_device_info_cap_pci_atomic_comp *cap;
> +    g_autofree struct vfio_device_info *info = NULL;
> +    PCIBus *bus = pci_get_bus(&vdev->pdev);
> +    PCIDevice *parent = bus->parent_dev;
> +    struct vfio_info_cap_header *hdr;
> +    uint32_t mask = 0;
> +    uint8_t *pos;
> +
> +    /*
> +     * PCIe Atomic Ops completer support is only added automatically for single
> +     * function devices downstream of a root port supporting DEVCAP2.  Support
> +     * is added during realize and, if added, removed during device exit.  The
> +     * single function requirement avoids conflicting requirements should a
> +     * slot be composed of multiple devices with differing capabilities.
> +     */
> +    if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap ||
> +        pcie_cap_get_type(parent) != PCI_EXP_TYPE_ROOT_PORT ||
> +        pcie_cap_get_version(parent) != PCI_EXP_FLAGS_VER2 ||
> +        vdev->pdev.devfn ||
> +        vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
> +        return;
> +    }
> +
> +    pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
> +
> +    /* Abort if there'a already an Atomic Ops configuration on the root port */

Optional here: trace event logging pci_get_long(pos).

> +    if (pci_get_long(pos) & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> +                             PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
> +                             PCI_EXP_DEVCAP2_ATOMIC_COMP128)) {
> +        return;
> +    }
> +
> +    info = vfio_get_device_info(vdev->vbasedev.fd);
> +    if (!info) {
> +        return;
> +    }
> +
> +    hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP);
> +    if (!hdr) {
> +        return;
> +    }
> +
> +    cap = (void *)hdr;
> +    if (cap->flags & VFIO_PCI_ATOMIC_COMP32) {
> +        mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP32;
> +    }
> +    if (cap->flags & VFIO_PCI_ATOMIC_COMP64) {
> +        mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP64;
> +    }
> +    if (cap->flags & VFIO_PCI_ATOMIC_COMP128) {
> +        mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP128;
> +    }
> +
> +    if (!mask) {
> +        return;
> +    }

Similarly optional, trace event logging (cap->flags, mask).

> +
> +    pci_long_test_and_set_mask(pos, mask);
> +    vdev->clear_parent_atomics_on_exit = true;
> +}
> +
> +static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev)
> +{
> +    if (vdev->clear_parent_atomics_on_exit) {
> +        PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev;
> +        uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
> +
> +        pci_long_test_and_clear_mask(pos, PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
> +                                          PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
> +                                          PCI_EXP_DEVCAP2_ATOMIC_COMP128);
> +    }
> +}

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
diff mbox series

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index bf27a3990564..d8a0fd595560 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1826,6 +1826,81 @@  static void vfio_add_emulated_long(VFIOPCIDevice *vdev, int pos,
     vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask);
 }
 
+static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
+{
+    struct vfio_device_info_cap_pci_atomic_comp *cap;
+    g_autofree struct vfio_device_info *info = NULL;
+    PCIBus *bus = pci_get_bus(&vdev->pdev);
+    PCIDevice *parent = bus->parent_dev;
+    struct vfio_info_cap_header *hdr;
+    uint32_t mask = 0;
+    uint8_t *pos;
+
+    /*
+     * PCIe Atomic Ops completer support is only added automatically for single
+     * function devices downstream of a root port supporting DEVCAP2.  Support
+     * is added during realize and, if added, removed during device exit.  The
+     * single function requirement avoids conflicting requirements should a
+     * slot be composed of multiple devices with differing capabilities.
+     */
+    if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap ||
+        pcie_cap_get_type(parent) != PCI_EXP_TYPE_ROOT_PORT ||
+        pcie_cap_get_version(parent) != PCI_EXP_FLAGS_VER2 ||
+        vdev->pdev.devfn ||
+        vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+        return;
+    }
+
+    pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
+
+    /* Abort if there'a already an Atomic Ops configuration on the root port */
+    if (pci_get_long(pos) & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+                             PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
+                             PCI_EXP_DEVCAP2_ATOMIC_COMP128)) {
+        return;
+    }
+
+    info = vfio_get_device_info(vdev->vbasedev.fd);
+    if (!info) {
+        return;
+    }
+
+    hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP);
+    if (!hdr) {
+        return;
+    }
+
+    cap = (void *)hdr;
+    if (cap->flags & VFIO_PCI_ATOMIC_COMP32) {
+        mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP32;
+    }
+    if (cap->flags & VFIO_PCI_ATOMIC_COMP64) {
+        mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP64;
+    }
+    if (cap->flags & VFIO_PCI_ATOMIC_COMP128) {
+        mask |= PCI_EXP_DEVCAP2_ATOMIC_COMP128;
+    }
+
+    if (!mask) {
+        return;
+    }
+
+    pci_long_test_and_set_mask(pos, mask);
+    vdev->clear_parent_atomics_on_exit = true;
+}
+
+static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev)
+{
+    if (vdev->clear_parent_atomics_on_exit) {
+        PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev;
+        uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
+
+        pci_long_test_and_clear_mask(pos, PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+                                          PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
+                                          PCI_EXP_DEVCAP2_ATOMIC_COMP128);
+    }
+}
+
 static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
                                Error **errp)
 {
@@ -1929,6 +2004,8 @@  static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
                            QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0);
             vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0);
         }
+
+        vfio_pci_enable_rp_atomics(vdev);
     }
 
     /*
@@ -3265,6 +3342,7 @@  static void vfio_exitfn(PCIDevice *pdev)
         timer_free(vdev->intx.mmap_timer);
     }
     vfio_teardown_msi(vdev);
+    vfio_pci_disable_rp_atomics(vdev);
     vfio_bars_exit(vdev);
     vfio_migration_exit(&vdev->vbasedev);
 }
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 2674476d6c77..a2771b9ff3cc 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -174,6 +174,7 @@  struct VFIOPCIDevice {
     bool no_vfio_ioeventfd;
     bool enable_ramfb;
     bool defer_kvm_irq_routing;
+    bool clear_parent_atomics_on_exit;
     VFIODisplay *dpy;
     Notifier irqchip_change_notifier;
 };