diff mbox series

[v1,4/4] acpi/gpex: patch guest DSDT for dev mem information

Message ID 20230915024559.6565-5-ankita@nvidia.com (mailing list archive)
State New, archived
Headers show
Series vfio: report NUMA nodes for device memory | expand

Commit Message

Ankit Agrawal Sept. 15, 2023, 2:45 a.m. UTC
From: Ankit Agrawal <ankita@nvidia.com>

To add the memory in the guest as NUMA nodes, it needs the PXM node index
and the total count of nodes associated with the memory. The range of
proximity domains are communicated to the VM as part of the guest ACPI
using the nvidia,gpu-mem-pxm-start and nvidia,gpu-mem-pxm-count DSD
properties. These value respectively represent the staring proximity
domain id and the count. Kernel modules can then fetch this information
and determine the numa node id using pxm_to_node().

Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
---
 hw/pci-host/gpex-acpi.c | 69 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

Comments

Igor Mammedov Sept. 15, 2023, 3:13 p.m. UTC | #1
On Thu, 14 Sep 2023 19:45:59 -0700
<ankita@nvidia.com> wrote:

> From: Ankit Agrawal <ankita@nvidia.com>
> 
> To add the memory in the guest as NUMA nodes, it needs the PXM node index
> and the total count of nodes associated with the memory. The range of
> proximity domains are communicated to the VM as part of the guest ACPI

> using the nvidia,gpu-mem-pxm-start and nvidia,gpu-mem-pxm-count DSD
above examples should use devices that are (or to be) available in QEMU,
not some out of tree ones.

> properties. These value respectively represent the staring proximity
> domain id and the count. Kernel modules can then fetch this information
> and determine the numa node id using pxm_to_node().
> 
> Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
> ---
>  hw/pci-host/gpex-acpi.c | 69 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 69 insertions(+)
> 
> diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c
> index 7c7316bc96..0548feace1 100644
> --- a/hw/pci-host/gpex-acpi.c
> +++ b/hw/pci-host/gpex-acpi.c
> @@ -49,6 +49,72 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq)
>      }
>  }
>  
> +static void acpi_dsdt_add_cohmem_device(Aml *dev, int32_t devfn,
> +                                        uint64_t dev_mem_pxm_start,
> +                                        uint64_t dev_mem_pxm_count)
> +{
> +    Aml *memdev = aml_device("CMD%X", PCI_SLOT(devfn));
> +    Aml *pkg = aml_package(2);
> +    Aml *pkg1 = aml_package(2);
> +    Aml *pkg2 = aml_package(2);
> +    Aml *dev_pkg = aml_package(2);
> +    Aml *UUID;
> +
> +    aml_append(memdev, aml_name_decl("_ADR", aml_int(PCI_SLOT(devfn) << 16)));

PCI devices (especially endpoints) are typically enumerated by
bus specific means (i.e not by ACPI).

And whether OSPM will honor the remainder of AML here is very questionable.

> +
> +    aml_append(pkg1, aml_string("dev-mem-pxm-start"));
> +    aml_append(pkg1, aml_int(dev_mem_pxm_start));
> +
> +    aml_append(pkg2, aml_string("dev-mem-pxm-count"));
> +    aml_append(pkg2, aml_int(dev_mem_pxm_count));
> +
> +    aml_append(pkg, pkg1);
> +    aml_append(pkg, pkg2);
> +
> +    UUID = aml_touuid("DAFFD814-6EBA-4D8C-8A91-BC9BBF4AA301");

I'm not a fun of free form UUIDs and above one seems to be the case:
https://uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf

looking at above doc this UUID also requires HID/ACPI ID
to describe data structure definition which this patch is missing.
It's also questionable whether _HID and _ADR are allowed to go together.

PS:
Commit message and comments here should have appropriate pointers
to relevant specs.

> +    aml_append(dev_pkg, UUID);
> +    aml_append(dev_pkg, pkg);
> +
> +    aml_append(memdev, aml_name_decl("_DSD", dev_pkg));
> +    aml_append(dev, memdev);
> +}
> +
> +static void find_mem_device(PCIBus *bus, PCIDevice *pdev,
> +                            void *opaque)
> +{
> +    Aml *dev = (Aml *)opaque;
> +
> +    if (bus == NULL) {
> +        return;
> +    }
> +
> +    if (pdev->has_coherent_memory) {
> +        Object *po = OBJECT(pdev);
> +
> +        if (po == NULL) {
> +            return;
> +        }
> +
> +        uint64_t pxm_start
> +           = object_property_get_uint(po, "dev_mem_pxm_start", NULL);
> +        uint64_t pxm_count
> +           = object_property_get_uint(po, "dev_mem_pxm_count", NULL);
> +
> +        acpi_dsdt_add_cohmem_device(dev, pdev->devfn, pxm_start, pxm_count);
> +    }
> +}
> +
> +static void acpi_dsdt_find_and_add_cohmem_device(PCIBus *bus, Aml *dev)
> +{
> +    if (bus == NULL) {
> +        return;
> +    }
> +
> +    pci_for_each_device_reverse(bus, pci_bus_num(bus),
> +                                find_mem_device, dev);
> +
> +}
> +
>  static void acpi_dsdt_add_pci_osc(Aml *dev)
>  {
>      Aml *method, *UUID, *ifctx, *ifctx1, *elsectx, *buf;
> @@ -207,7 +273,10 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
>  
>      acpi_dsdt_add_pci_route_table(dev, cfg->irq);
>  
> +    acpi_dsdt_find_and_add_cohmem_device(cfg->bus, dev);
> +
>      method = aml_method("_CBA", 0, AML_NOTSERIALIZED);
> +
>      aml_append(method, aml_return(aml_int(cfg->ecam.base)));
>      aml_append(dev, method);
>
Jonathan Cameron Sept. 27, 2023, 11:42 a.m. UTC | #2
On Thu, 14 Sep 2023 19:45:59 -0700
<ankita@nvidia.com> wrote:

> From: Ankit Agrawal <ankita@nvidia.com>
> 
> To add the memory in the guest as NUMA nodes, it needs the PXM node index
> and the total count of nodes associated with the memory. The range of
> proximity domains are communicated to the VM as part of the guest ACPI
> using the nvidia,gpu-mem-pxm-start and nvidia,gpu-mem-pxm-count DSD
> properties. These value respectively represent the staring proximity
> domain id and the count. Kernel modules can then fetch this information
> and determine the numa node id using pxm_to_node().
> 
> Signed-off-by: Ankit Agrawal <ankita@nvidia.com>

Hi Ankit,

I'm not a fan of reading AML blobs, but they are better than reading
code that is generating AML.  Can we have an example of the DSDT blob
this generates?

In particular I'm not sure what the relationship of this new device
is to the related PCI bits and pieces.  Looks like it's not doing what
I'd normally expect which would be to add the _DSD to an entry for the
PCI EP.  Obviously might be too late to do that given need to match
physical system, but I'd like the opportunity to moan about it anyway :)

As a note we have various _DSD and _DSM associated with PCI devices on our
Kunpeng servers (usually for odd power control or reset corner cases or
errata work arounds) and they work very well without needing to put
the stuff in a separate device - hence I'm not really understanding
why you'd do it this way...

Jonathan

> ---
>  hw/pci-host/gpex-acpi.c | 69 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 69 insertions(+)
> 
> diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c
> index 7c7316bc96..0548feace1 100644
> --- a/hw/pci-host/gpex-acpi.c
> +++ b/hw/pci-host/gpex-acpi.c
> @@ -49,6 +49,72 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq)
>      }
>  }
>  
> +static void acpi_dsdt_add_cohmem_device(Aml *dev, int32_t devfn,
> +                                        uint64_t dev_mem_pxm_start,
> +                                        uint64_t dev_mem_pxm_count)
> +{
> +    Aml *memdev = aml_device("CMD%X", PCI_SLOT(devfn));
> +    Aml *pkg = aml_package(2);
> +    Aml *pkg1 = aml_package(2);
> +    Aml *pkg2 = aml_package(2);
> +    Aml *dev_pkg = aml_package(2);
> +    Aml *UUID;
> +
> +    aml_append(memdev, aml_name_decl("_ADR", aml_int(PCI_SLOT(devfn) << 16)));
> +
> +    aml_append(pkg1, aml_string("dev-mem-pxm-start"));
> +    aml_append(pkg1, aml_int(dev_mem_pxm_start));
> +
> +    aml_append(pkg2, aml_string("dev-mem-pxm-count"));
> +    aml_append(pkg2, aml_int(dev_mem_pxm_count));
> +
> +    aml_append(pkg, pkg1);
> +    aml_append(pkg, pkg2);
> +
> +    UUID = aml_touuid("DAFFD814-6EBA-4D8C-8A91-BC9BBF4AA301");
> +    aml_append(dev_pkg, UUID);
> +    aml_append(dev_pkg, pkg);
> +
> +    aml_append(memdev, aml_name_decl("_DSD", dev_pkg));
> +    aml_append(dev, memdev);
> +}
> +
> +static void find_mem_device(PCIBus *bus, PCIDevice *pdev,
> +                            void *opaque)
> +{
> +    Aml *dev = (Aml *)opaque;
> +
> +    if (bus == NULL) {
> +        return;
> +    }
> +
> +    if (pdev->has_coherent_memory) {
> +        Object *po = OBJECT(pdev);
> +
> +        if (po == NULL) {
> +            return;
> +        }
> +
> +        uint64_t pxm_start
> +           = object_property_get_uint(po, "dev_mem_pxm_start", NULL);
> +        uint64_t pxm_count
> +           = object_property_get_uint(po, "dev_mem_pxm_count", NULL);
> +
> +        acpi_dsdt_add_cohmem_device(dev, pdev->devfn, pxm_start, pxm_count);
> +    }
> +}
> +
> +static void acpi_dsdt_find_and_add_cohmem_device(PCIBus *bus, Aml *dev)
> +{
> +    if (bus == NULL) {
> +        return;
> +    }
> +
> +    pci_for_each_device_reverse(bus, pci_bus_num(bus),
> +                                find_mem_device, dev);
> +
> +}
> +
>  static void acpi_dsdt_add_pci_osc(Aml *dev)
>  {
>      Aml *method, *UUID, *ifctx, *ifctx1, *elsectx, *buf;
> @@ -207,7 +273,10 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
>  
>      acpi_dsdt_add_pci_route_table(dev, cfg->irq);
>  
> +    acpi_dsdt_find_and_add_cohmem_device(cfg->bus, dev);
> +
>      method = aml_method("_CBA", 0, AML_NOTSERIALIZED);
> +
>      aml_append(method, aml_return(aml_int(cfg->ecam.base)));
>      aml_append(dev, method);
>
diff mbox series

Patch

diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c
index 7c7316bc96..0548feace1 100644
--- a/hw/pci-host/gpex-acpi.c
+++ b/hw/pci-host/gpex-acpi.c
@@ -49,6 +49,72 @@  static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq)
     }
 }
 
+static void acpi_dsdt_add_cohmem_device(Aml *dev, int32_t devfn,
+                                        uint64_t dev_mem_pxm_start,
+                                        uint64_t dev_mem_pxm_count)
+{
+    Aml *memdev = aml_device("CMD%X", PCI_SLOT(devfn));
+    Aml *pkg = aml_package(2);
+    Aml *pkg1 = aml_package(2);
+    Aml *pkg2 = aml_package(2);
+    Aml *dev_pkg = aml_package(2);
+    Aml *UUID;
+
+    aml_append(memdev, aml_name_decl("_ADR", aml_int(PCI_SLOT(devfn) << 16)));
+
+    aml_append(pkg1, aml_string("dev-mem-pxm-start"));
+    aml_append(pkg1, aml_int(dev_mem_pxm_start));
+
+    aml_append(pkg2, aml_string("dev-mem-pxm-count"));
+    aml_append(pkg2, aml_int(dev_mem_pxm_count));
+
+    aml_append(pkg, pkg1);
+    aml_append(pkg, pkg2);
+
+    UUID = aml_touuid("DAFFD814-6EBA-4D8C-8A91-BC9BBF4AA301");
+    aml_append(dev_pkg, UUID);
+    aml_append(dev_pkg, pkg);
+
+    aml_append(memdev, aml_name_decl("_DSD", dev_pkg));
+    aml_append(dev, memdev);
+}
+
+static void find_mem_device(PCIBus *bus, PCIDevice *pdev,
+                            void *opaque)
+{
+    Aml *dev = (Aml *)opaque;
+
+    if (bus == NULL) {
+        return;
+    }
+
+    if (pdev->has_coherent_memory) {
+        Object *po = OBJECT(pdev);
+
+        if (po == NULL) {
+            return;
+        }
+
+        uint64_t pxm_start
+           = object_property_get_uint(po, "dev_mem_pxm_start", NULL);
+        uint64_t pxm_count
+           = object_property_get_uint(po, "dev_mem_pxm_count", NULL);
+
+        acpi_dsdt_add_cohmem_device(dev, pdev->devfn, pxm_start, pxm_count);
+    }
+}
+
+static void acpi_dsdt_find_and_add_cohmem_device(PCIBus *bus, Aml *dev)
+{
+    if (bus == NULL) {
+        return;
+    }
+
+    pci_for_each_device_reverse(bus, pci_bus_num(bus),
+                                find_mem_device, dev);
+
+}
+
 static void acpi_dsdt_add_pci_osc(Aml *dev)
 {
     Aml *method, *UUID, *ifctx, *ifctx1, *elsectx, *buf;
@@ -207,7 +273,10 @@  void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
 
     acpi_dsdt_add_pci_route_table(dev, cfg->irq);
 
+    acpi_dsdt_find_and_add_cohmem_device(cfg->bus, dev);
+
     method = aml_method("_CBA", 0, AML_NOTSERIALIZED);
+
     aml_append(method, aml_return(aml_int(cfg->ecam.base)));
     aml_append(dev, method);