Message ID | 20230605235005.20649-2-ankita@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Expose GPU memory as coherently CPU accessible | expand |
On Mon, 5 Jun 2023 16:50:02 -0700 <ankita@nvidia.com> wrote: > From: Ankit Agrawal <ankita@nvidia.com> > > The GPU memory is exposed as device BAR1 to the VM and is discovered > by QEMU through the VFIO_DEVICE_GET_REGION_INFO ioctl. QEMU performs > the mapping to it. > > The GPU memory can be added in the VM as (upto 8) separate NUMA nodes. > To achieve this, QEMU inserts a series of the PXM domains in the SRAT > and communicate this range of nodes to the VM through DSD properties. > > These PXM start and count are added as object properties and pushed to > the SRAT and DST builder code. > > The code is activated only for a set of NVIDIA devices supporting the > feature. > > Signed-off-by: Ankit Agrawal <ankita@nvidia.com> > --- > hw/vfio/pci-quirks.c | 13 +++++++ > hw/vfio/pci.c | 72 +++++++++++++++++++++++++++++++++++++ > hw/vfio/pci.h | 1 + > include/hw/pci/pci_device.h | 3 ++ > 4 files changed, 89 insertions(+) > > diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c > index f0147a050a..b7334ccd1d 100644 > --- a/hw/vfio/pci-quirks.c > +++ b/hw/vfio/pci-quirks.c > @@ -1751,3 +1751,16 @@ int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp) > > return 0; > } > + > +bool vfio_has_cpu_coherent_devmem(VFIOPCIDevice *vdev) > +{ > + switch (vdev->device_id) { > + /* Nvidia */ > + case 0x2342: > + case 0x2343: > + case 0x2345: > + return true; > + } > + > + return false; > +} I'm not sure why all of this isn't in pci-quirks.c, but the above function is misleadingly NVIDIA specific by not testing the vendor ID here. Also, none of this looks compatible with hotplug, so shouldn't any of this only be enabled only for the vfio-pci-nohotplug device type? Thanks, Alex > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index ec9a854361..403516ffb3 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -42,6 +42,8 @@ > #include "qapi/error.h" > #include "migration/blocker.h" > #include "migration/qemu-file.h" > +#include "qapi/visitor.h" > +#include "include/hw/boards.h" > > #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" > > @@ -2824,6 +2826,22 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev) > } > } > > +static void vfio_pci_get_gpu_mem_pxm_start(Object *obj, Visitor *v, > + const char *name, > + void *opaque, Error **errp) > +{ > + uint64_t pxm_start = (uintptr_t) opaque; > + visit_type_uint64(v, name, &pxm_start, errp); > +} > + > +static void vfio_pci_get_gpu_mem_pxm_count(Object *obj, Visitor *v, > + const char *name, > + void *opaque, Error **errp) > +{ > + uint64_t pxm_count = (uintptr_t) opaque; > + visit_type_uint64(v, name, &pxm_count, errp); > +} > + > static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) > { > Error *err = NULL; > @@ -2843,6 +2861,53 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) > vdev->req_enabled = false; > } > > +static int vfio_pci_nvidia_dev_mem_probe(VFIOPCIDevice *vPciDev, > + Error **errp) > +{ > + unsigned int num_nodes; > + MemoryRegion *nv2mr = g_malloc0(sizeof(*nv2mr)); > + Object *obj = NULL; > + VFIODevice *vdev = &vPciDev->vbasedev; > + MachineState *ms = MACHINE(qdev_get_machine()); > + > + if (!vfio_has_cpu_coherent_devmem(vPciDev)) { > + return -ENODEV; > + } > + > + if (vdev->type == VFIO_DEVICE_TYPE_PCI) { > + obj = vfio_pci_get_object(vdev); > + } > + > + if (!obj) { > + return -EINVAL; > + } > + > + /* > + * This device has memory that is coherently accessible from the CPU. > + * The memory can be represented by upto 8 seperate memory-only > + * NUMA nodes. > + */ > + vPciDev->pdev.has_coherent_memory = true; > + num_nodes = 8; > + > + /* > + * To have 8 unique nodes in the VM, a series of PXM nodes are > + * required to be added to VM's SRAT. Send the information about > + * the starting PXM ID and the count to the ACPI builder code. > + */ > + object_property_add(OBJECT(vPciDev), "gpu_mem_pxm_start", "uint64", > + vfio_pci_get_gpu_mem_pxm_start, NULL, NULL, > + (void *) (uintptr_t) ms->numa_state->num_nodes); > + > + object_property_add(OBJECT(vPciDev), "gpu_mem_pxm_count", "uint64", > + vfio_pci_get_gpu_mem_pxm_count, NULL, NULL, > + (void *) (uintptr_t) num_nodes); > + > + ms->numa_state->num_nodes += num_nodes; > + > + return 0; > +} > + > static void vfio_realize(PCIDevice *pdev, Error **errp) > { > VFIOPCIDevice *vdev = VFIO_PCI(pdev); > @@ -3151,6 +3216,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > } > } > > + if (vdev->vendor_id == PCI_VENDOR_ID_NVIDIA) { > + ret = vfio_pci_nvidia_dev_mem_probe(vdev, errp); > + if (ret && ret != -ENODEV) { > + error_report("Failed to setup NVIDIA dev_mem with error %d", ret); > + } > + } > + > vfio_register_err_notifier(vdev); > vfio_register_req_notifier(vdev); > vfio_setup_resetfn_quirk(vdev); > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > index 177abcc8fb..d8791f8f1f 100644 > --- a/hw/vfio/pci.h > +++ b/hw/vfio/pci.h > @@ -226,4 +226,5 @@ void vfio_display_reset(VFIOPCIDevice *vdev); > int vfio_display_probe(VFIOPCIDevice *vdev, Error **errp); > void vfio_display_finalize(VFIOPCIDevice *vdev); > > +bool vfio_has_cpu_coherent_devmem(VFIOPCIDevice *vdev); > #endif /* HW_VFIO_VFIO_PCI_H */ > diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h > index d3dd0f64b2..aacd2279ae 100644 > --- a/include/hw/pci/pci_device.h > +++ b/include/hw/pci/pci_device.h > @@ -157,6 +157,9 @@ struct PCIDevice { > MSIVectorReleaseNotifier msix_vector_release_notifier; > MSIVectorPollNotifier msix_vector_poll_notifier; > > + /* GPU coherent memory */ > + bool has_coherent_memory; > + > /* ID of standby device in net_failover pair */ > char *failover_pair_id; > uint32_t acpi_index;
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index f0147a050a..b7334ccd1d 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -1751,3 +1751,16 @@ int vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp) return 0; } + +bool vfio_has_cpu_coherent_devmem(VFIOPCIDevice *vdev) +{ + switch (vdev->device_id) { + /* Nvidia */ + case 0x2342: + case 0x2343: + case 0x2345: + return true; + } + + return false; +} diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index ec9a854361..403516ffb3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -42,6 +42,8 @@ #include "qapi/error.h" #include "migration/blocker.h" #include "migration/qemu-file.h" +#include "qapi/visitor.h" +#include "include/hw/boards.h" #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" @@ -2824,6 +2826,22 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev) } } +static void vfio_pci_get_gpu_mem_pxm_start(Object *obj, Visitor *v, + const char *name, + void *opaque, Error **errp) +{ + uint64_t pxm_start = (uintptr_t) opaque; + visit_type_uint64(v, name, &pxm_start, errp); +} + +static void vfio_pci_get_gpu_mem_pxm_count(Object *obj, Visitor *v, + const char *name, + void *opaque, Error **errp) +{ + uint64_t pxm_count = (uintptr_t) opaque; + visit_type_uint64(v, name, &pxm_count, errp); +} + static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) { Error *err = NULL; @@ -2843,6 +2861,53 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) vdev->req_enabled = false; } +static int vfio_pci_nvidia_dev_mem_probe(VFIOPCIDevice *vPciDev, + Error **errp) +{ + unsigned int num_nodes; + MemoryRegion *nv2mr = g_malloc0(sizeof(*nv2mr)); + Object *obj = NULL; + VFIODevice *vdev = &vPciDev->vbasedev; + MachineState *ms = MACHINE(qdev_get_machine()); + + if (!vfio_has_cpu_coherent_devmem(vPciDev)) { + return -ENODEV; + } + + if (vdev->type == VFIO_DEVICE_TYPE_PCI) { + obj = vfio_pci_get_object(vdev); + } + + if (!obj) { + return -EINVAL; + } + + /* + * This device has memory that is coherently accessible from the CPU. + * The memory can be represented by upto 8 seperate memory-only + * NUMA nodes. + */ + vPciDev->pdev.has_coherent_memory = true; + num_nodes = 8; + + /* + * To have 8 unique nodes in the VM, a series of PXM nodes are + * required to be added to VM's SRAT. Send the information about + * the starting PXM ID and the count to the ACPI builder code. + */ + object_property_add(OBJECT(vPciDev), "gpu_mem_pxm_start", "uint64", + vfio_pci_get_gpu_mem_pxm_start, NULL, NULL, + (void *) (uintptr_t) ms->numa_state->num_nodes); + + object_property_add(OBJECT(vPciDev), "gpu_mem_pxm_count", "uint64", + vfio_pci_get_gpu_mem_pxm_count, NULL, NULL, + (void *) (uintptr_t) num_nodes); + + ms->numa_state->num_nodes += num_nodes; + + return 0; +} + static void vfio_realize(PCIDevice *pdev, Error **errp) { VFIOPCIDevice *vdev = VFIO_PCI(pdev); @@ -3151,6 +3216,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) } } + if (vdev->vendor_id == PCI_VENDOR_ID_NVIDIA) { + ret = vfio_pci_nvidia_dev_mem_probe(vdev, errp); + if (ret && ret != -ENODEV) { + error_report("Failed to setup NVIDIA dev_mem with error %d", ret); + } + } + vfio_register_err_notifier(vdev); vfio_register_req_notifier(vdev); vfio_setup_resetfn_quirk(vdev); diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 177abcc8fb..d8791f8f1f 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -226,4 +226,5 @@ void vfio_display_reset(VFIOPCIDevice *vdev); int vfio_display_probe(VFIOPCIDevice *vdev, Error **errp); void vfio_display_finalize(VFIOPCIDevice *vdev); +bool vfio_has_cpu_coherent_devmem(VFIOPCIDevice *vdev); #endif /* HW_VFIO_VFIO_PCI_H */ diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index d3dd0f64b2..aacd2279ae 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -157,6 +157,9 @@ struct PCIDevice { MSIVectorReleaseNotifier msix_vector_release_notifier; MSIVectorPollNotifier msix_vector_poll_notifier; + /* GPU coherent memory */ + bool has_coherent_memory; + /* ID of standby device in net_failover pair */ char *failover_pair_id; uint32_t acpi_index;