diff mbox series

[v7,1/2] qom: new object to associate device to numa node

Message ID 20240223124223.800078-2-ankita@nvidia.com (mailing list archive)
State New, archived
Headers show
Series acpi: report numa nodes for device memory using GI | expand

Commit Message

Ankit Agrawal Feb. 23, 2024, 12:42 p.m. UTC
From: Ankit Agrawal <ankita@nvidia.com>

NVIDIA GPU's support MIG (Mult-Instance GPUs) feature [1], which allows
partitioning of the GPU device resources (including device memory) into
several (upto 8) isolated instances. Each of the partitioned memory needs
a dedicated NUMA node to operate. The partitions are not fixed and they
can be created/deleted at runtime.

Unfortunately Linux OS does not provide a means to dynamically create/destroy
NUMA nodes and such feature implementation is not expected to be trivial. The
nodes that OS discovers at the boot time while parsing SRAT remains fixed. So
we utilize the Generic Initiator Affinity structures that allows association
between nodes and devices. Multiple GI structures per BDF is possible,
allowing creation of multiple nodes by exposing unique PXM in each of these
structures.

Implement the mechanism to build the GI affinity structures as Qemu currently
does not. Introduce a new acpi-generic-initiator object to allow host admin
link a device with an associated NUMA node. Qemu maintains this association
and use this object to build the requisite GI Affinity Structure.

When multiple numa nodes are associated with a device, it is required to
create those many number of acpi-generic-initiator objects, each representing
a unique device:node association.

Following is one of a decoded GI affinity structure in VM ACPI SRAT.
[0C8h 0200   1]                Subtable Type : 05 [Generic Initiator Affinity]
[0C9h 0201   1]                       Length : 20

[0CAh 0202   1]                    Reserved1 : 00
[0CBh 0203   1]           Device Handle Type : 01
[0CCh 0204   4]             Proximity Domain : 00000007
[0D0h 0208  16]                Device Handle : 00 00 20 00 00 00 00 00 00 00 00
00 00 00 00 00
[0E0h 0224   4]        Flags (decoded below) : 00000001
                                     Enabled : 1
[0E4h 0228   4]                    Reserved2 : 00000000

[0E8h 0232   1]                Subtable Type : 05 [Generic Initiator Affinity]
[0E9h 0233   1]                       Length : 20

An admin can provide a range of acpi-generic-initiator objects, each
associating a device (by providing the id through pci-dev argument)
to the desired numa node (using the node argument). Currently, only PCI
device is supported.

For the grace hopper system, create a range of 8 nodes and associate that
with the device using the acpi-generic-initiator object. While a configuration
of less than 8 nodes per device is allowed, such configuration will prevent
utilization of the feature to the fullest. The following sample creates 8
nodes per PCI device for a VM with 2 PCI devices and link them to the
respecitve PCI device using acpi-generic-initiator objects:

-numa node,nodeid=2 -numa node,nodeid=3 -numa node,nodeid=4 \
-numa node,nodeid=5 -numa node,nodeid=6 -numa node,nodeid=7 \
-numa node,nodeid=8 -numa node,nodeid=9 \
-device vfio-pci-nohotplug,host=0009:01:00.0,bus=pcie.0,addr=04.0,rombar=0,id=dev0 \
-object acpi-generic-initiator,id=gi0,pci-dev=dev0,node=2 \
-object acpi-generic-initiator,id=gi1,pci-dev=dev0,node=3 \
-object acpi-generic-initiator,id=gi2,pci-dev=dev0,node=4 \
-object acpi-generic-initiator,id=gi3,pci-dev=dev0,node=5 \
-object acpi-generic-initiator,id=gi4,pci-dev=dev0,node=6 \
-object acpi-generic-initiator,id=gi5,pci-dev=dev0,node=7 \
-object acpi-generic-initiator,id=gi6,pci-dev=dev0,node=8 \
-object acpi-generic-initiator,id=gi7,pci-dev=dev0,node=9 \

-numa node,nodeid=10 -numa node,nodeid=11 -numa node,nodeid=12 \
-numa node,nodeid=13 -numa node,nodeid=14 -numa node,nodeid=15 \
-numa node,nodeid=16 -numa node,nodeid=17 \
-device vfio-pci-nohotplug,host=0009:01:01.0,bus=pcie.0,addr=05.0,rombar=0,id=dev1 \
-object acpi-generic-initiator,id=gi8,pci-dev=dev1,node=10 \
-object acpi-generic-initiator,id=gi9,pci-dev=dev1,node=11 \
-object acpi-generic-initiator,id=gi10,pci-dev=dev1,node=12 \
-object acpi-generic-initiator,id=gi11,pci-dev=dev1,node=13 \
-object acpi-generic-initiator,id=gi12,pci-dev=dev1,node=14 \
-object acpi-generic-initiator,id=gi13,pci-dev=dev1,node=15 \
-object acpi-generic-initiator,id=gi14,pci-dev=dev1,node=16 \
-object acpi-generic-initiator,id=gi15,pci-dev=dev1,node=17 \

The performance benefits can be realized by providing the NUMA node distances
appropriately (through libvirt tags or Qemu params). The admin can get the
distance among nodes in hardware using `numactl -H`.

[1] https://www.nvidia.com/en-in/technologies/multi-instance-gpu

Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
---
 hw/acpi/acpi-generic-initiator.c         | 70 ++++++++++++++++++++++++
 hw/acpi/meson.build                      |  1 +
 include/hw/acpi/acpi-generic-initiator.h | 32 +++++++++++
 qapi/qom.json                            | 17 ++++++
 4 files changed, 120 insertions(+)
 create mode 100644 hw/acpi/acpi-generic-initiator.c
 create mode 100644 include/hw/acpi/acpi-generic-initiator.h

Comments

Jonathan Cameron Feb. 27, 2024, 1 p.m. UTC | #1
> diff --git a/include/hw/acpi/acpi-generic-initiator.h b/include/hw/acpi/acpi-generic-initiator.h
> new file mode 100644
> index 0000000000..2f183b029a
> --- /dev/null
> +++ b/include/hw/acpi/acpi-generic-initiator.h

> +typedef struct AcpiGenericInitiatorClass {
> +        ObjectClass parent_class;
Too indented.
> +} AcpiGenericInitiatorClass;
Ankit Agrawal Feb. 28, 2024, 5:35 a.m. UTC | #2
>> diff --git a/include/hw/acpi/acpi-generic-initiator.h b/include/hw/acpi/acpi-generic-initiator.h
>> new file mode 100644
>> index 0000000000..2f183b029a
>> --- /dev/null
>> +++ b/include/hw/acpi/acpi-generic-initiator.h
>
>> +typedef struct AcpiGenericInitiatorClass {
>> +        ObjectClass parent_class;
>Too indented.

Yes, will fix it.

>> +} AcpiGenericInitiatorClass;
Markus Armbruster Feb. 28, 2024, 7:35 a.m. UTC | #3
<ankita@nvidia.com> writes:

> From: Ankit Agrawal <ankita@nvidia.com>
>
> NVIDIA GPU's support MIG (Mult-Instance GPUs) feature [1], which allows
> partitioning of the GPU device resources (including device memory) into
> several (upto 8) isolated instances. Each of the partitioned memory needs
> a dedicated NUMA node to operate. The partitions are not fixed and they
> can be created/deleted at runtime.
>
> Unfortunately Linux OS does not provide a means to dynamically create/destroy
> NUMA nodes and such feature implementation is not expected to be trivial. The
> nodes that OS discovers at the boot time while parsing SRAT remains fixed. So
> we utilize the Generic Initiator Affinity structures that allows association
> between nodes and devices. Multiple GI structures per BDF is possible,
> allowing creation of multiple nodes by exposing unique PXM in each of these
> structures.
>
> Implement the mechanism to build the GI affinity structures as Qemu currently
> does not. Introduce a new acpi-generic-initiator object to allow host admin
> link a device with an associated NUMA node. Qemu maintains this association
> and use this object to build the requisite GI Affinity Structure.
>
> When multiple numa nodes are associated with a device, it is required to
> create those many number of acpi-generic-initiator objects, each representing
> a unique device:node association.
>
> Following is one of a decoded GI affinity structure in VM ACPI SRAT.
> [0C8h 0200   1]                Subtable Type : 05 [Generic Initiator Affinity]
> [0C9h 0201   1]                       Length : 20
>
> [0CAh 0202   1]                    Reserved1 : 00
> [0CBh 0203   1]           Device Handle Type : 01
> [0CCh 0204   4]             Proximity Domain : 00000007
> [0D0h 0208  16]                Device Handle : 00 00 20 00 00 00 00 00 00 00 00
> 00 00 00 00 00
> [0E0h 0224   4]        Flags (decoded below) : 00000001
>                                      Enabled : 1
> [0E4h 0228   4]                    Reserved2 : 00000000
>
> [0E8h 0232   1]                Subtable Type : 05 [Generic Initiator Affinity]
> [0E9h 0233   1]                       Length : 20
>
> An admin can provide a range of acpi-generic-initiator objects, each
> associating a device (by providing the id through pci-dev argument)
> to the desired numa node (using the node argument). Currently, only PCI
> device is supported.
>
> For the grace hopper system, create a range of 8 nodes and associate that
> with the device using the acpi-generic-initiator object. While a configuration
> of less than 8 nodes per device is allowed, such configuration will prevent
> utilization of the feature to the fullest. The following sample creates 8
> nodes per PCI device for a VM with 2 PCI devices and link them to the
> respecitve PCI device using acpi-generic-initiator objects:
>
> -numa node,nodeid=2 -numa node,nodeid=3 -numa node,nodeid=4 \
> -numa node,nodeid=5 -numa node,nodeid=6 -numa node,nodeid=7 \
> -numa node,nodeid=8 -numa node,nodeid=9 \
> -device vfio-pci-nohotplug,host=0009:01:00.0,bus=pcie.0,addr=04.0,rombar=0,id=dev0 \
> -object acpi-generic-initiator,id=gi0,pci-dev=dev0,node=2 \
> -object acpi-generic-initiator,id=gi1,pci-dev=dev0,node=3 \
> -object acpi-generic-initiator,id=gi2,pci-dev=dev0,node=4 \
> -object acpi-generic-initiator,id=gi3,pci-dev=dev0,node=5 \
> -object acpi-generic-initiator,id=gi4,pci-dev=dev0,node=6 \
> -object acpi-generic-initiator,id=gi5,pci-dev=dev0,node=7 \
> -object acpi-generic-initiator,id=gi6,pci-dev=dev0,node=8 \
> -object acpi-generic-initiator,id=gi7,pci-dev=dev0,node=9 \
>
> -numa node,nodeid=10 -numa node,nodeid=11 -numa node,nodeid=12 \
> -numa node,nodeid=13 -numa node,nodeid=14 -numa node,nodeid=15 \
> -numa node,nodeid=16 -numa node,nodeid=17 \
> -device vfio-pci-nohotplug,host=0009:01:01.0,bus=pcie.0,addr=05.0,rombar=0,id=dev1 \
> -object acpi-generic-initiator,id=gi8,pci-dev=dev1,node=10 \
> -object acpi-generic-initiator,id=gi9,pci-dev=dev1,node=11 \
> -object acpi-generic-initiator,id=gi10,pci-dev=dev1,node=12 \
> -object acpi-generic-initiator,id=gi11,pci-dev=dev1,node=13 \
> -object acpi-generic-initiator,id=gi12,pci-dev=dev1,node=14 \
> -object acpi-generic-initiator,id=gi13,pci-dev=dev1,node=15 \
> -object acpi-generic-initiator,id=gi14,pci-dev=dev1,node=16 \
> -object acpi-generic-initiator,id=gi15,pci-dev=dev1,node=17 \
>
> The performance benefits can be realized by providing the NUMA node distances
> appropriately (through libvirt tags or Qemu params). The admin can get the
> distance among nodes in hardware using `numactl -H`.
>
> [1] https://www.nvidia.com/en-in/technologies/multi-instance-gpu
>
> Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
> ---
>  hw/acpi/acpi-generic-initiator.c         | 70 ++++++++++++++++++++++++
>  hw/acpi/meson.build                      |  1 +
>  include/hw/acpi/acpi-generic-initiator.h | 32 +++++++++++
>  qapi/qom.json                            | 17 ++++++
>  4 files changed, 120 insertions(+)
>  create mode 100644 hw/acpi/acpi-generic-initiator.c
>  create mode 100644 include/hw/acpi/acpi-generic-initiator.h
>
> diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c
> new file mode 100644
> index 0000000000..1ade2f723f
> --- /dev/null
> +++ b/hw/acpi/acpi-generic-initiator.c
> @@ -0,0 +1,70 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/acpi/acpi-generic-initiator.h"
> +#include "hw/pci/pci_device.h"
> +#include "qapi/error.h"
> +#include "qapi/qapi-builtin-visit.h"
> +#include "qapi/visitor.h"
> +#include "qemu/error-report.h"
> +
> +OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericInitiator, acpi_generic_initiator,
> +                   ACPI_GENERIC_INITIATOR, OBJECT,
> +                   { TYPE_USER_CREATABLE },
> +                   { NULL })
> +
> +OBJECT_DECLARE_SIMPLE_TYPE(AcpiGenericInitiator, ACPI_GENERIC_INITIATOR)
> +
> +static void acpi_generic_initiator_init(Object *obj)
> +{
> +    AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
> +
> +    gi->node = MAX_NODES;
> +    gi->pci_dev = NULL;
> +}
> +
> +static void acpi_generic_initiator_finalize(Object *obj)
> +{
> +    AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
> +
> +    g_free(gi->pci_dev);
> +}
> +
> +static void acpi_generic_initiator_set_pci_device(Object *obj, const char *val,
> +                                                  Error **errp)
> +{
> +    AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
> +
> +    gi->pci_dev = g_strdup(val);
> +}
> +
> +static void acpi_generic_initiator_set_node(Object *obj, Visitor *v,
> +                                            const char *name, void *opaque,
> +                                            Error **errp)
> +{
> +    AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
> +    uint32_t value;
> +
> +    if (!visit_type_uint32(v, name, &value, errp)) {
> +        return;
> +    }
> +
> +    if (value >= MAX_NODES) {
> +        error_printf("%s: Invalid NUMA node specified\n",
> +                     TYPE_ACPI_GENERIC_INITIATOR);
> +        exit(1);
> +    }
> +
> +    gi->node = value;
> +}
> +
> +static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data)
> +{
> +    object_class_property_add_str(oc, "pci-dev", NULL,
> +        acpi_generic_initiator_set_pci_device);
> +    object_class_property_add(oc, "node", "int", NULL,
> +        acpi_generic_initiator_set_node, NULL, NULL);
> +}
> diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build
> index fc1b952379..2268589519 100644
> --- a/hw/acpi/meson.build
> +++ b/hw/acpi/meson.build
> @@ -1,5 +1,6 @@
>  acpi_ss = ss.source_set()
>  acpi_ss.add(files(
> +  'acpi-generic-initiator.c',
>    'acpi_interface.c',
>    'aml-build.c',
>    'bios-linker-loader.c',
> diff --git a/include/hw/acpi/acpi-generic-initiator.h b/include/hw/acpi/acpi-generic-initiator.h
> new file mode 100644
> index 0000000000..2f183b029a
> --- /dev/null
> +++ b/include/hw/acpi/acpi-generic-initiator.h
> @@ -0,0 +1,32 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
> + */
> +
> +#ifndef ACPI_GENERIC_INITIATOR_H
> +#define ACPI_GENERIC_INITIATOR_H
> +
> +#include "hw/mem/pc-dimm.h"
> +#include "hw/acpi/bios-linker-loader.h"
> +#include "hw/acpi/aml-build.h"
> +#include "sysemu/numa.h"
> +#include "qemu/uuid.h"
> +#include "qom/object.h"
> +#include "qom/object_interfaces.h"
> +
> +#define TYPE_ACPI_GENERIC_INITIATOR "acpi-generic-initiator"
> +
> +typedef struct AcpiGenericInitiator {
> +    /* private */
> +    Object parent;
> +
> +    /* public */
> +    char *pci_dev;
> +    uint16_t node;
> +} AcpiGenericInitiator;
> +
> +typedef struct AcpiGenericInitiatorClass {
> +        ObjectClass parent_class;
> +} AcpiGenericInitiatorClass;
> +
> +#endif
> diff --git a/qapi/qom.json b/qapi/qom.json
> index c53ef978ff..7efa0e14f6 100644
> --- a/qapi/qom.json
> +++ b/qapi/qom.json
> @@ -794,6 +794,21 @@
>  { 'struct': 'VfioUserServerProperties',
>    'data': { 'socket': 'SocketAddress', 'device': 'str' } }
>  
> +##
> +# @AcpiGenericInitiatorProperties:
> +#
> +# Properties for acpi-generic-initiator objects.
> +#
> +# @pci-dev: PCI device ID to be associated with the node
> +#
> +# @node: numa node associated with the PCI device

NUMA

> +#
> +# Since: 9.0
> +##
> +{ 'struct': 'AcpiGenericInitiatorProperties',
> +  'data': { 'pci-dev': 'str',
> +            'node': 'uint32' } }
> +
>  ##
>  # @RngProperties:
>  #
> @@ -911,6 +926,7 @@
>  ##
>  { 'enum': 'ObjectType',
>    'data': [
> +    'acpi-generic-initiator',
>      'authz-list',
>      'authz-listfile',
>      'authz-pam',
> @@ -981,6 +997,7 @@
>              'id': 'str' },
>    'discriminator': 'qom-type',
>    'data': {
> +      'acpi-generic-initiator':     'AcpiGenericInitiatorProperties',
>        'authz-list':                 'AuthZListProperties',
>        'authz-listfile':             'AuthZListFileProperties',
>        'authz-pam':                  'AuthZPAMProperties',

Jonathan, you pointed out interface design issues in your review of v2.
Are you fully satisfied with the interface in v3?
Jonathan Cameron Feb. 28, 2024, 1:55 p.m. UTC | #4
> >  ##
> >  # @RngProperties:
> >  #
> > @@ -911,6 +926,7 @@
> >  ##
> >  { 'enum': 'ObjectType',
> >    'data': [
> > +    'acpi-generic-initiator',
> >      'authz-list',
> >      'authz-listfile',
> >      'authz-pam',
> > @@ -981,6 +997,7 @@
> >              'id': 'str' },
> >    'discriminator': 'qom-type',
> >    'data': {
> > +      'acpi-generic-initiator':     'AcpiGenericInitiatorProperties',
> >        'authz-list':                 'AuthZListProperties',
> >        'authz-listfile':             'AuthZListFileProperties',
> >        'authz-pam':                  'AuthZPAMProperties',  
> 
> Jonathan, you pointed out interface design issues in your review of v2.
> Are you fully satisfied with the interface in v3?

Yes. I'm fine with the interface in this version (though it's v7, so I'm lost
on v2 vs v3!)

Jonathan

>
Markus Armbruster Feb. 28, 2024, 4:08 p.m. UTC | #5
Jonathan Cameron <Jonathan.Cameron@Huawei.com> writes:

>> >  ##
>> >  # @RngProperties:
>> >  #
>> > @@ -911,6 +926,7 @@
>> >  ##
>> >  { 'enum': 'ObjectType',
>> >    'data': [
>> > +    'acpi-generic-initiator',
>> >      'authz-list',
>> >      'authz-listfile',
>> >      'authz-pam',
>> > @@ -981,6 +997,7 @@
>> >              'id': 'str' },
>> >    'discriminator': 'qom-type',
>> >    'data': {
>> > +      'acpi-generic-initiator':     'AcpiGenericInitiatorProperties',
>> >        'authz-list':                 'AuthZListProperties',
>> >        'authz-listfile':             'AuthZListFileProperties',
>> >        'authz-pam':                  'AuthZPAMProperties',  
>> 
>> Jonathan, you pointed out interface design issues in your review of v2.
>> Are you fully satisfied with the interface in v3?
>
> Yes. I'm fine with the interface in this version (though it's v7, so I'm lost
> on v2 vs v3!)

Looks like I can't count to 7!

With NUMA capitalized in the doc comment, QAPI schema
Acked-by: Markus Armbruster <armbru@redhat.com>

Thanks!
Ankit Agrawal Feb. 28, 2024, 4:50 p.m. UTC | #6
>>> Jonathan, you pointed out interface design issues in your review of v2.>
>> Are you fully satisfied with the interface in v3?
>>
>> Yes. I'm fine with the interface in this version (though it's v7, so I'm lost
>> on v2 vs v3!)
>
> Looks like I can't count to 7!
>
> With NUMA capitalized in the doc comment, QAPI schema
> Acked-by: Markus Armbruster <armbru@redhat.com>
>
> Thanks!

Thanks! Will fix that in the next version.
Jonathan Cameron Feb. 29, 2024, 10:22 a.m. UTC | #7
On Wed, 28 Feb 2024 16:50:30 +0000
Ankit Agrawal <ankita@nvidia.com> wrote:

> >>> Jonathan, you pointed out interface design issues in your review of v2.>  
> >> Are you fully satisfied with the interface in v3?
> >>
> >> Yes. I'm fine with the interface in this version (though it's v7, so I'm lost
> >> on v2 vs v3!)  
> >
> > Looks like I can't count to 7!
> >
> > With NUMA capitalized in the doc comment, QAPI schema
> > Acked-by: Markus Armbruster <armbru@redhat.com>
> >
> > Thanks!  
> 
> Thanks! Will fix that in the next version.

The following is really me arguing with myself, so can probably be
ignored, but maybe it will spark an idea from someone else!

One trivial tweak that might make our life easier if anyone adds
support in the future for the other device handle type might be to go
with simply dev rather than pci-dev.

There is a sticky corner though if a device is a PCI device
and in ACPI DSDT so maybe we are better off adding acpi-dev
to take either pci-dev or acpi-dev?

Annoyingly for generic ports, (I'm reusing this infrastructure here)
the kernel code currently only deals with the ACPI form (for CXL host
bridges).  Given I point that at the bus of a PXB_CXL it is both
a PCI device, and the only handle we have for getting to the
Root Bridge ACPI handle.

So I think I've argued myself around to thinking we need to extend
the interface with another optional parameter if we ever do support
the ACPI handle for generic initiators :(

Jonathan
Ankit Agrawal Feb. 29, 2024, 1 p.m. UTC | #8
>> >>> Jonathan, you pointed out interface design issues in your review of v2.>
>> >> Are you fully satisfied with the interface in v3?
>> >>
>> >> Yes. I'm fine with the interface in this version (though it's v7, so I'm lost
>> >> on v2 vs v3!)
>> >
>> > Looks like I can't count to 7!
>> >
>> > With NUMA capitalized in the doc comment, QAPI schema
>> > Acked-by: Markus Armbruster <armbru@redhat.com>
>> >
>> > Thanks!
>>
>> Thanks! Will fix that in the next version.
>
> The following is really me arguing with myself, so can probably be
> ignored, but maybe it will spark an idea from someone else!
>
> One trivial tweak that might make our life easier if anyone adds
> support in the future for the other device handle type might be to go
> with simply dev rather than pci-dev.
>
> There is a sticky corner though if a device is a PCI device
> and in ACPI DSDT so maybe we are better off adding acpi-dev
> to take either pci-dev or acpi-dev?

That use case does complicate the situation. Do you of any such
use case for generic initiator?

As for your suggestion of using acpi-dev as the arg to take both
pci-dev and acpi-dev.. Would that mean sending a pure pci device
(not the corner case you mentioned) through the acpi-dev argument
as well? Not sure if that would appropriate.

> Annoyingly for generic ports, (I'm reusing this infrastructure here)
> the kernel code currently only deals with the ACPI form (for CXL host
> bridges).  Given I point that at the bus of a PXB_CXL it is both
> a PCI device, and the only handle we have for getting to the
> Root Bridge ACPI handle.

So IIUC, you need to pass a PCI device to the generic port object, but use
that to reach the ACPI handle and build the Generic port affinity structure
for an ACPI device?

> So I think I've argued myself around to thinking we need to extend
> the interface with another optional parameter if we ever do support
> the ACPI handle for generic initiators :(
>
> Jonathan
Jonathan Cameron Feb. 29, 2024, 4:32 p.m. UTC | #9
On Thu, 29 Feb 2024 13:00:27 +0000
Ankit Agrawal <ankita@nvidia.com> wrote:

> >> >>> Jonathan, you pointed out interface design issues in your review of v2.>  
> >> >> Are you fully satisfied with the interface in v3?
> >> >>
> >> >> Yes. I'm fine with the interface in this version (though it's v7, so I'm lost
> >> >> on v2 vs v3!)  
> >> >
> >> > Looks like I can't count to 7!
> >> >
> >> > With NUMA capitalized in the doc comment, QAPI schema
> >> > Acked-by: Markus Armbruster <armbru@redhat.com>
> >> >
> >> > Thanks!  
> >>
> >> Thanks! Will fix that in the next version.  
> >
> > The following is really me arguing with myself, so can probably be
> > ignored, but maybe it will spark an idea from someone else!
> >
> > One trivial tweak that might make our life easier if anyone adds
> > support in the future for the other device handle type might be to go
> > with simply dev rather than pci-dev.
> >
> > There is a sticky corner though if a device is a PCI device
> > and in ACPI DSDT so maybe we are better off adding acpi-dev
> > to take either pci-dev or acpi-dev?  
> 
> That use case does complicate the situation. Do you of any such
> use case for generic initiator?

In physical systems yes - in QEMU not yet, though it's a quirk
of the available ids to get to the ACPI devices (which oddly
are PCI devices :()

> 
> As for your suggestion of using acpi-dev as the arg to take both
> pci-dev and acpi-dev.. Would that mean sending a pure pci device
> (not the corner case you mentioned) through the acpi-dev argument
> as well? Not sure if that would appropriate.

Ah, looking up my description is unclear. I meant two optional parameters
pci-dev or acpi-dev - which one was supplied would indicate the type
of handle to be used.

> 
> > Annoyingly for generic ports, (I'm reusing this infrastructure here)
> > the kernel code currently only deals with the ACPI form (for CXL host
> > bridges).  Given I point that at the bus of a PXB_CXL it is both
> > a PCI device, and the only handle we have for getting to the
> > Root Bridge ACPI handle.  
> 
> So IIUC, you need to pass a PCI device to the generic port object, but use
> that to reach the ACPI handle and build the Generic port affinity structure
> for an ACPI device?

Yes.  Slightly shortcut is that the UID is the bus number for all the
relevant devices so I can abuse that.  QEMU doesn't keep track of
the ACPI handles directly so this is the current cleanest solution.

> 
> > So I think I've argued myself around to thinking we need to extend
> > the interface with another optional parameter if we ever do support
> > the ACPI handle for generic initiators :(
> >
> > Jonatha
Ankit Agrawal March 1, 2024, 8:33 a.m. UTC | #10
>> As for your suggestion of using acpi-dev as the arg to take both
>> pci-dev and acpi-dev.. Would that mean sending a pure pci device
>> (not the corner case you mentioned) through the acpi-dev argument
>> as well? Not sure if that would appropriate.
>
> Ah, looking up my description is unclear. I meant two optional parameters
> pci-dev or acpi-dev - which one was supplied would indicate the type
> of handle to be used.

Yes, that makes sense. But for now only have pci-dev until we have any
acpi-dev related code added? IIRC, this is what we discussed earlier.
Alex Williamson March 1, 2024, 4:13 p.m. UTC | #11
On Fri, 1 Mar 2024 08:33:42 +0000
Ankit Agrawal <ankita@nvidia.com> wrote:

> >> As for your suggestion of using acpi-dev as the arg to take both
> >> pci-dev and acpi-dev.. Would that mean sending a pure pci device
> >> (not the corner case you mentioned) through the acpi-dev argument
> >> as well? Not sure if that would appropriate.  
> >
> > Ah, looking up my description is unclear. I meant two optional parameters
> > pci-dev or acpi-dev - which one was supplied would indicate the type
> > of handle to be used.  
> 
> Yes, that makes sense. But for now only have pci-dev until we have any
> acpi-dev related code added? IIRC, this is what we discussed earlier.

Yep, I think we acknowledged that this device supports either PCI or
ACPI devices and we only currently have a use case for PCI devices, so
that's what's implemented and what the interface expects.  A separate
ACPI device interface could be added later or derived from updating the
interface to accept a generic device object.  Thanks,

Alex
diff mbox series

Patch

diff --git a/hw/acpi/acpi-generic-initiator.c b/hw/acpi/acpi-generic-initiator.c
new file mode 100644
index 0000000000..1ade2f723f
--- /dev/null
+++ b/hw/acpi/acpi-generic-initiator.c
@@ -0,0 +1,70 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include "qemu/osdep.h"
+#include "hw/acpi/acpi-generic-initiator.h"
+#include "hw/pci/pci_device.h"
+#include "qapi/error.h"
+#include "qapi/qapi-builtin-visit.h"
+#include "qapi/visitor.h"
+#include "qemu/error-report.h"
+
+OBJECT_DEFINE_TYPE_WITH_INTERFACES(AcpiGenericInitiator, acpi_generic_initiator,
+                   ACPI_GENERIC_INITIATOR, OBJECT,
+                   { TYPE_USER_CREATABLE },
+                   { NULL })
+
+OBJECT_DECLARE_SIMPLE_TYPE(AcpiGenericInitiator, ACPI_GENERIC_INITIATOR)
+
+static void acpi_generic_initiator_init(Object *obj)
+{
+    AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+
+    gi->node = MAX_NODES;
+    gi->pci_dev = NULL;
+}
+
+static void acpi_generic_initiator_finalize(Object *obj)
+{
+    AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+
+    g_free(gi->pci_dev);
+}
+
+static void acpi_generic_initiator_set_pci_device(Object *obj, const char *val,
+                                                  Error **errp)
+{
+    AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+
+    gi->pci_dev = g_strdup(val);
+}
+
+static void acpi_generic_initiator_set_node(Object *obj, Visitor *v,
+                                            const char *name, void *opaque,
+                                            Error **errp)
+{
+    AcpiGenericInitiator *gi = ACPI_GENERIC_INITIATOR(obj);
+    uint32_t value;
+
+    if (!visit_type_uint32(v, name, &value, errp)) {
+        return;
+    }
+
+    if (value >= MAX_NODES) {
+        error_printf("%s: Invalid NUMA node specified\n",
+                     TYPE_ACPI_GENERIC_INITIATOR);
+        exit(1);
+    }
+
+    gi->node = value;
+}
+
+static void acpi_generic_initiator_class_init(ObjectClass *oc, void *data)
+{
+    object_class_property_add_str(oc, "pci-dev", NULL,
+        acpi_generic_initiator_set_pci_device);
+    object_class_property_add(oc, "node", "int", NULL,
+        acpi_generic_initiator_set_node, NULL, NULL);
+}
diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build
index fc1b952379..2268589519 100644
--- a/hw/acpi/meson.build
+++ b/hw/acpi/meson.build
@@ -1,5 +1,6 @@ 
 acpi_ss = ss.source_set()
 acpi_ss.add(files(
+  'acpi-generic-initiator.c',
   'acpi_interface.c',
   'aml-build.c',
   'bios-linker-loader.c',
diff --git a/include/hw/acpi/acpi-generic-initiator.h b/include/hw/acpi/acpi-generic-initiator.h
new file mode 100644
index 0000000000..2f183b029a
--- /dev/null
+++ b/include/hw/acpi/acpi-generic-initiator.h
@@ -0,0 +1,32 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#ifndef ACPI_GENERIC_INITIATOR_H
+#define ACPI_GENERIC_INITIATOR_H
+
+#include "hw/mem/pc-dimm.h"
+#include "hw/acpi/bios-linker-loader.h"
+#include "hw/acpi/aml-build.h"
+#include "sysemu/numa.h"
+#include "qemu/uuid.h"
+#include "qom/object.h"
+#include "qom/object_interfaces.h"
+
+#define TYPE_ACPI_GENERIC_INITIATOR "acpi-generic-initiator"
+
+typedef struct AcpiGenericInitiator {
+    /* private */
+    Object parent;
+
+    /* public */
+    char *pci_dev;
+    uint16_t node;
+} AcpiGenericInitiator;
+
+typedef struct AcpiGenericInitiatorClass {
+        ObjectClass parent_class;
+} AcpiGenericInitiatorClass;
+
+#endif
diff --git a/qapi/qom.json b/qapi/qom.json
index c53ef978ff..7efa0e14f6 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -794,6 +794,21 @@ 
 { 'struct': 'VfioUserServerProperties',
   'data': { 'socket': 'SocketAddress', 'device': 'str' } }
 
+##
+# @AcpiGenericInitiatorProperties:
+#
+# Properties for acpi-generic-initiator objects.
+#
+# @pci-dev: PCI device ID to be associated with the node
+#
+# @node: numa node associated with the PCI device
+#
+# Since: 9.0
+##
+{ 'struct': 'AcpiGenericInitiatorProperties',
+  'data': { 'pci-dev': 'str',
+            'node': 'uint32' } }
+
 ##
 # @RngProperties:
 #
@@ -911,6 +926,7 @@ 
 ##
 { 'enum': 'ObjectType',
   'data': [
+    'acpi-generic-initiator',
     'authz-list',
     'authz-listfile',
     'authz-pam',
@@ -981,6 +997,7 @@ 
             'id': 'str' },
   'discriminator': 'qom-type',
   'data': {
+      'acpi-generic-initiator':     'AcpiGenericInitiatorProperties',
       'authz-list':                 'AuthZListProperties',
       'authz-listfile':             'AuthZListFileProperties',
       'authz-pam':                  'AuthZPAMProperties',