diff mbox series

[v5,4/4] hw/acpi/aml-build: Use existing CPU topology to build PPTT table

Message ID 20220403145953.10522-5-gshan@redhat.com (mailing list archive)
State New, archived
Headers show
Series hw/arm/virt: Fix CPU's default NUMA node ID | expand

Commit Message

Gavin Shan April 3, 2022, 2:59 p.m. UTC
When the PPTT table is built, the CPU topology is re-calculated, but
it's unecessary because the CPU topology has been populated in
virt_possible_cpu_arch_ids() on arm/virt machine.

This reworks build_pptt() to avoid by reusing the existing one in
ms->possible_cpus. Currently, the only user of build_pptt() is
arm/virt machine.

Signed-off-by: Gavin Shan <gshan@redhat.com>
---
 hw/acpi/aml-build.c | 100 +++++++++++++++++---------------------------
 1 file changed, 38 insertions(+), 62 deletions(-)

Comments

Jonathan Cameron April 12, 2022, 3:40 p.m. UTC | #1
On Sun,  3 Apr 2022 22:59:53 +0800
Gavin Shan <gshan@redhat.com> wrote:

> When the PPTT table is built, the CPU topology is re-calculated, but
> it's unecessary because the CPU topology has been populated in
> virt_possible_cpu_arch_ids() on arm/virt machine.
> 
> This reworks build_pptt() to avoid by reusing the existing one in
> ms->possible_cpus. Currently, the only user of build_pptt() is
> arm/virt machine.
> 
> Signed-off-by: Gavin Shan <gshan@redhat.com>

Hi Gavin,

My compiler isn't being very smart today and gives a bunch of
maybe used uninitialized for socket_offset, cluster_offset and core_offset.

They probably are initialized in all real paths, but I think you need to
set them to something at instantiation to keep the compiler happy.

Thanks,

Jonathan

> ---
>  hw/acpi/aml-build.c | 100 +++++++++++++++++---------------------------
>  1 file changed, 38 insertions(+), 62 deletions(-)
> 
> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> index 4086879ebf..4b0f9df3e3 100644
> --- a/hw/acpi/aml-build.c
> +++ b/hw/acpi/aml-build.c
> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>                  const char *oem_id, const char *oem_table_id)
>  {
>      MachineClass *mc = MACHINE_GET_CLASS(ms);
> -    GQueue *list = g_queue_new();
> -    guint pptt_start = table_data->len;
> -    guint parent_offset;
> -    guint length, i;
> -    int uid = 0;
> -    int socket;
> +    CPUArchIdList *cpus = ms->possible_cpus;
> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
> +    uint32_t socket_offset, cluster_offset, core_offset;
> +    uint32_t pptt_start = table_data->len;
> +    int n;
>      AcpiTable table = { .sig = "PPTT", .rev = 2,
>                          .oem_id = oem_id, .oem_table_id = oem_table_id };
>  
>      acpi_table_begin(&table, table_data);
>  
> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
> -        g_queue_push_tail(list,
> -            GUINT_TO_POINTER(table_data->len - pptt_start));
> -        build_processor_hierarchy_node(
> -            table_data,
> -            /*
> -             * Physical package - represents the boundary
> -             * of a physical package
> -             */
> -            (1 << 0),
> -            0, socket, NULL, 0);
> -    }
> +    for (n = 0; n < cpus->len; n++) {
> +        if (cpus->cpus[n].props.socket_id != socket_id) {
> +            socket_id = cpus->cpus[n].props.socket_id;
> +            cluster_id = -1;
> +            core_id = -1;
> +            socket_offset = table_data->len - pptt_start;
> +            build_processor_hierarchy_node(table_data,
> +                (1 << 0), /* Physical package */
> +                0, socket_id, NULL, 0);
> +        }
>  
> -    if (mc->smp_props.clusters_supported) {
> -        length = g_queue_get_length(list);
> -        for (i = 0; i < length; i++) {
> -            int cluster;
> -
> -            parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
> -            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
> -                g_queue_push_tail(list,
> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
> -                build_processor_hierarchy_node(
> -                    table_data,
> -                    (0 << 0), /* not a physical package */
> -                    parent_offset, cluster, NULL, 0);
> +        if (mc->smp_props.clusters_supported) {
> +            if (cpus->cpus[n].props.cluster_id != cluster_id) {
> +                cluster_id = cpus->cpus[n].props.cluster_id;
> +                core_id = -1;
> +                cluster_offset = table_data->len - pptt_start;
> +                build_processor_hierarchy_node(table_data,
> +                    (0 << 0), /* Not a physical package */
> +                    socket_offset, cluster_id, NULL, 0);
>              }
> +        } else {
> +            cluster_offset = socket_offset;
>          }
> -    }
>  
> -    length = g_queue_get_length(list);
> -    for (i = 0; i < length; i++) {
> -        int core;
> -
> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
> -        for (core = 0; core < ms->smp.cores; core++) {
> -            if (ms->smp.threads > 1) {
> -                g_queue_push_tail(list,
> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
> -                build_processor_hierarchy_node(
> -                    table_data,
> +        if (ms->smp.threads <= 1) {
> +            build_processor_hierarchy_node(table_data,
> +                (1 << 1) | /* ACPI Processor ID valid */
> +                (1 << 3),  /* Node is a Leaf */
> +                cluster_offset, n, NULL, 0);
> +        } else {
> +            if (cpus->cpus[n].props.core_id != core_id) {
> +                core_id = cpus->cpus[n].props.core_id;
> +                core_offset = table_data->len - pptt_start;
> +                build_processor_hierarchy_node(table_data,
>                      (0 << 0), /* not a physical package */
> -                    parent_offset, core, NULL, 0);
> -            } else {
> -                build_processor_hierarchy_node(
> -                    table_data,
> -                    (1 << 1) | /* ACPI Processor ID valid */
> -                    (1 << 3),  /* Node is a Leaf */
> -                    parent_offset, uid++, NULL, 0);
> +                    cluster_offset, core_id, NULL, 0);
>              }
> -        }
> -    }
> -
> -    length = g_queue_get_length(list);
> -    for (i = 0; i < length; i++) {
> -        int thread;
>  
> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
> -        for (thread = 0; thread < ms->smp.threads; thread++) {
> -            build_processor_hierarchy_node(
> -                table_data,
> +            build_processor_hierarchy_node(table_data,
>                  (1 << 1) | /* ACPI Processor ID valid */
>                  (1 << 2) | /* Processor is a Thread */
>                  (1 << 3),  /* Node is a Leaf */
> -                parent_offset, uid++, NULL, 0);
> +                core_offset, n, NULL, 0);
>          }
>      }
>  
> -    g_queue_free(list);
>      acpi_table_end(linker, &table);
>  }
>
Gavin Shan April 13, 2022, 2:15 a.m. UTC | #2
Hi Jonathan,

On 4/12/22 11:40 PM, Jonathan Cameron wrote:
> On Sun,  3 Apr 2022 22:59:53 +0800
> Gavin Shan <gshan@redhat.com> wrote:
>> When the PPTT table is built, the CPU topology is re-calculated, but
>> it's unecessary because the CPU topology has been populated in
>> virt_possible_cpu_arch_ids() on arm/virt machine.
>>
>> This reworks build_pptt() to avoid by reusing the existing one in
>> ms->possible_cpus. Currently, the only user of build_pptt() is
>> arm/virt machine.
>>
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
> 
> My compiler isn't being very smart today and gives a bunch of
> maybe used uninitialized for socket_offset, cluster_offset and core_offset.
> 
> They probably are initialized in all real paths, but I think you need to
> set them to something at instantiation to keep the compiler happy.
> 

Thanks for reporting the warning raised from the compiler. I think
your compiler may be smarter than mine :)

Yeah, they're initialized in all real paths after checking the code
again. So lets initialize them with zeroes in v6, but I would hold
for Igor and Yanan's reviews on this (v5) series.

Thanks,
Gavin

> 
>> ---
>>   hw/acpi/aml-build.c | 100 +++++++++++++++++---------------------------
>>   1 file changed, 38 insertions(+), 62 deletions(-)
>>
>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
>> index 4086879ebf..4b0f9df3e3 100644
>> --- a/hw/acpi/aml-build.c
>> +++ b/hw/acpi/aml-build.c
>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>>                   const char *oem_id, const char *oem_table_id)
>>   {
>>       MachineClass *mc = MACHINE_GET_CLASS(ms);
>> -    GQueue *list = g_queue_new();
>> -    guint pptt_start = table_data->len;
>> -    guint parent_offset;
>> -    guint length, i;
>> -    int uid = 0;
>> -    int socket;
>> +    CPUArchIdList *cpus = ms->possible_cpus;
>> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
>> +    uint32_t socket_offset, cluster_offset, core_offset;
>> +    uint32_t pptt_start = table_data->len;
>> +    int n;
>>       AcpiTable table = { .sig = "PPTT", .rev = 2,
>>                           .oem_id = oem_id, .oem_table_id = oem_table_id };
>>   
>>       acpi_table_begin(&table, table_data);
>>   
>> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
>> -        g_queue_push_tail(list,
>> -            GUINT_TO_POINTER(table_data->len - pptt_start));
>> -        build_processor_hierarchy_node(
>> -            table_data,
>> -            /*
>> -             * Physical package - represents the boundary
>> -             * of a physical package
>> -             */
>> -            (1 << 0),
>> -            0, socket, NULL, 0);
>> -    }
>> +    for (n = 0; n < cpus->len; n++) {
>> +        if (cpus->cpus[n].props.socket_id != socket_id) {
>> +            socket_id = cpus->cpus[n].props.socket_id;
>> +            cluster_id = -1;
>> +            core_id = -1;
>> +            socket_offset = table_data->len - pptt_start;
>> +            build_processor_hierarchy_node(table_data,
>> +                (1 << 0), /* Physical package */
>> +                0, socket_id, NULL, 0);
>> +        }
>>   
>> -    if (mc->smp_props.clusters_supported) {
>> -        length = g_queue_get_length(list);
>> -        for (i = 0; i < length; i++) {
>> -            int cluster;
>> -
>> -            parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>> -            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
>> -                g_queue_push_tail(list,
>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>> -                build_processor_hierarchy_node(
>> -                    table_data,
>> -                    (0 << 0), /* not a physical package */
>> -                    parent_offset, cluster, NULL, 0);
>> +        if (mc->smp_props.clusters_supported) {
>> +            if (cpus->cpus[n].props.cluster_id != cluster_id) {
>> +                cluster_id = cpus->cpus[n].props.cluster_id;
>> +                core_id = -1;
>> +                cluster_offset = table_data->len - pptt_start;
>> +                build_processor_hierarchy_node(table_data,
>> +                    (0 << 0), /* Not a physical package */
>> +                    socket_offset, cluster_id, NULL, 0);
>>               }
>> +        } else {
>> +            cluster_offset = socket_offset;
>>           }
>> -    }
>>   
>> -    length = g_queue_get_length(list);
>> -    for (i = 0; i < length; i++) {
>> -        int core;
>> -
>> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>> -        for (core = 0; core < ms->smp.cores; core++) {
>> -            if (ms->smp.threads > 1) {
>> -                g_queue_push_tail(list,
>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>> -                build_processor_hierarchy_node(
>> -                    table_data,
>> +        if (ms->smp.threads <= 1) {
>> +            build_processor_hierarchy_node(table_data,
>> +                (1 << 1) | /* ACPI Processor ID valid */
>> +                (1 << 3),  /* Node is a Leaf */
>> +                cluster_offset, n, NULL, 0);
>> +        } else {
>> +            if (cpus->cpus[n].props.core_id != core_id) {
>> +                core_id = cpus->cpus[n].props.core_id;
>> +                core_offset = table_data->len - pptt_start;
>> +                build_processor_hierarchy_node(table_data,
>>                       (0 << 0), /* not a physical package */
>> -                    parent_offset, core, NULL, 0);
>> -            } else {
>> -                build_processor_hierarchy_node(
>> -                    table_data,
>> -                    (1 << 1) | /* ACPI Processor ID valid */
>> -                    (1 << 3),  /* Node is a Leaf */
>> -                    parent_offset, uid++, NULL, 0);
>> +                    cluster_offset, core_id, NULL, 0);
>>               }
>> -        }
>> -    }
>> -
>> -    length = g_queue_get_length(list);
>> -    for (i = 0; i < length; i++) {
>> -        int thread;
>>   
>> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>> -        for (thread = 0; thread < ms->smp.threads; thread++) {
>> -            build_processor_hierarchy_node(
>> -                table_data,
>> +            build_processor_hierarchy_node(table_data,
>>                   (1 << 1) | /* ACPI Processor ID valid */
>>                   (1 << 2) | /* Processor is a Thread */
>>                   (1 << 3),  /* Node is a Leaf */
>> -                parent_offset, uid++, NULL, 0);
>> +                core_offset, n, NULL, 0);
>>           }
>>       }
>>   
>> -    g_queue_free(list);
>>       acpi_table_end(linker, &table);
>>   }
>>   
>
Igor Mammedov April 13, 2022, 1:52 p.m. UTC | #3
On Sun,  3 Apr 2022 22:59:53 +0800
Gavin Shan <gshan@redhat.com> wrote:

> When the PPTT table is built, the CPU topology is re-calculated, but
> it's unecessary because the CPU topology has been populated in
> virt_possible_cpu_arch_ids() on arm/virt machine.
> 
> This reworks build_pptt() to avoid by reusing the existing one in
> ms->possible_cpus. Currently, the only user of build_pptt() is
> arm/virt machine.
> 
> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
>  hw/acpi/aml-build.c | 100 +++++++++++++++++---------------------------
>  1 file changed, 38 insertions(+), 62 deletions(-)
> 
> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> index 4086879ebf..4b0f9df3e3 100644
> --- a/hw/acpi/aml-build.c
> +++ b/hw/acpi/aml-build.c
> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>                  const char *oem_id, const char *oem_table_id)
>  {
>      MachineClass *mc = MACHINE_GET_CLASS(ms);
> -    GQueue *list = g_queue_new();
> -    guint pptt_start = table_data->len;
> -    guint parent_offset;
> -    guint length, i;
> -    int uid = 0;
> -    int socket;
> +    CPUArchIdList *cpus = ms->possible_cpus;
> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
> +    uint32_t socket_offset, cluster_offset, core_offset;
> +    uint32_t pptt_start = table_data->len;
> +    int n;
>      AcpiTable table = { .sig = "PPTT", .rev = 2,
>                          .oem_id = oem_id, .oem_table_id = oem_table_id };
>  
>      acpi_table_begin(&table, table_data);
>  
> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
> -        g_queue_push_tail(list,
> -            GUINT_TO_POINTER(table_data->len - pptt_start));
> -        build_processor_hierarchy_node(
> -            table_data,
> -            /*
> -             * Physical package - represents the boundary
> -             * of a physical package
> -             */
> -            (1 << 0),
> -            0, socket, NULL, 0);
> -    }
> +    for (n = 0; n < cpus->len; n++) {

> +        if (cpus->cpus[n].props.socket_id != socket_id) {
> +            socket_id = cpus->cpus[n].props.socket_id;

this relies on cpus->cpus[n].props.*_id being sorted form top to down levels
I'd add here and for other container_id an assert() that checks for that
specific ID goes in only one direction, to be able to detect when rule is broken.

otherwise on may end up with duplicate containers silently.

> +            cluster_id = -1;
> +            core_id = -1;
> +            socket_offset = table_data->len - pptt_start;
> +            build_processor_hierarchy_node(table_data,
> +                (1 << 0), /* Physical package */
> +                0, socket_id, NULL, 0);
> +        }
>  
> -    if (mc->smp_props.clusters_supported) {
> -        length = g_queue_get_length(list);
> -        for (i = 0; i < length; i++) {
> -            int cluster;
> -
> -            parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
> -            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
> -                g_queue_push_tail(list,
> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
> -                build_processor_hierarchy_node(
> -                    table_data,
> -                    (0 << 0), /* not a physical package */
> -                    parent_offset, cluster, NULL, 0);
> +        if (mc->smp_props.clusters_supported) {
> +            if (cpus->cpus[n].props.cluster_id != cluster_id) {
> +                cluster_id = cpus->cpus[n].props.cluster_id;
> +                core_id = -1;
> +                cluster_offset = table_data->len - pptt_start;
> +                build_processor_hierarchy_node(table_data,
> +                    (0 << 0), /* Not a physical package */
> +                    socket_offset, cluster_id, NULL, 0);
>              }
> +        } else {
> +            cluster_offset = socket_offset;
>          }
> -    }
>  
> -    length = g_queue_get_length(list);
> -    for (i = 0; i < length; i++) {
> -        int core;
> -
> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
> -        for (core = 0; core < ms->smp.cores; core++) {
> -            if (ms->smp.threads > 1) {
> -                g_queue_push_tail(list,
> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
> -                build_processor_hierarchy_node(
> -                    table_data,
> +        if (ms->smp.threads <= 1) {

why <= instead of < is used here?

> +            build_processor_hierarchy_node(table_data,
> +                (1 << 1) | /* ACPI Processor ID valid */
> +                (1 << 3),  /* Node is a Leaf */
> +                cluster_offset, n, NULL, 0);
> +        } else {
> +            if (cpus->cpus[n].props.core_id != core_id) {
> +                core_id = cpus->cpus[n].props.core_id;
> +                core_offset = table_data->len - pptt_start;
> +                build_processor_hierarchy_node(table_data,
>                      (0 << 0), /* not a physical package */
> -                    parent_offset, core, NULL, 0);
> -            } else {
> -                build_processor_hierarchy_node(
> -                    table_data,
> -                    (1 << 1) | /* ACPI Processor ID valid */
> -                    (1 << 3),  /* Node is a Leaf */
> -                    parent_offset, uid++, NULL, 0);
> +                    cluster_offset, core_id, NULL, 0);
>              }
> -        }
> -    }
> -
> -    length = g_queue_get_length(list);
> -    for (i = 0; i < length; i++) {
> -        int thread;
>  
> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
> -        for (thread = 0; thread < ms->smp.threads; thread++) {
> -            build_processor_hierarchy_node(
> -                table_data,
> +            build_processor_hierarchy_node(table_data,
>                  (1 << 1) | /* ACPI Processor ID valid */
>                  (1 << 2) | /* Processor is a Thread */
>                  (1 << 3),  /* Node is a Leaf */
> -                parent_offset, uid++, NULL, 0);
> +                core_offset, n, NULL, 0);
>          }
>      }
>  
> -    g_queue_free(list);
>      acpi_table_end(linker, &table);
>  }
>
Gavin Shan April 14, 2022, 12:33 a.m. UTC | #4
Hi Igor,

On 4/13/22 9:52 PM, Igor Mammedov wrote:
> On Sun,  3 Apr 2022 22:59:53 +0800
> Gavin Shan <gshan@redhat.com> wrote:
> 
>> When the PPTT table is built, the CPU topology is re-calculated, but
>> it's unecessary because the CPU topology has been populated in
>> virt_possible_cpu_arch_ids() on arm/virt machine.
>>
>> This reworks build_pptt() to avoid by reusing the existing one in
>> ms->possible_cpus. Currently, the only user of build_pptt() is
>> arm/virt machine.
>>
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> ---
>>   hw/acpi/aml-build.c | 100 +++++++++++++++++---------------------------
>>   1 file changed, 38 insertions(+), 62 deletions(-)
>>
>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
>> index 4086879ebf..4b0f9df3e3 100644
>> --- a/hw/acpi/aml-build.c
>> +++ b/hw/acpi/aml-build.c
>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>>                   const char *oem_id, const char *oem_table_id)
>>   {
>>       MachineClass *mc = MACHINE_GET_CLASS(ms);
>> -    GQueue *list = g_queue_new();
>> -    guint pptt_start = table_data->len;
>> -    guint parent_offset;
>> -    guint length, i;
>> -    int uid = 0;
>> -    int socket;
>> +    CPUArchIdList *cpus = ms->possible_cpus;
>> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
>> +    uint32_t socket_offset, cluster_offset, core_offset;
>> +    uint32_t pptt_start = table_data->len;
>> +    int n;
>>       AcpiTable table = { .sig = "PPTT", .rev = 2,
>>                           .oem_id = oem_id, .oem_table_id = oem_table_id };
>>   
>>       acpi_table_begin(&table, table_data);
>>   
>> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
>> -        g_queue_push_tail(list,
>> -            GUINT_TO_POINTER(table_data->len - pptt_start));
>> -        build_processor_hierarchy_node(
>> -            table_data,
>> -            /*
>> -             * Physical package - represents the boundary
>> -             * of a physical package
>> -             */
>> -            (1 << 0),
>> -            0, socket, NULL, 0);
>> -    }
>> +    for (n = 0; n < cpus->len; n++) {
> 
>> +        if (cpus->cpus[n].props.socket_id != socket_id) {
>> +            socket_id = cpus->cpus[n].props.socket_id;
> 
> this relies on cpus->cpus[n].props.*_id being sorted form top to down levels
> I'd add here and for other container_id an assert() that checks for that
> specific ID goes in only one direction, to be able to detect when rule is broken.
> 
> otherwise on may end up with duplicate containers silently.
> 

Exactly. cpus->cpus[n].props.*_id is sorted as you said in virt_possible_cpu_arch_ids().
The only user of build_pptt() is arm/virt machine. So it's fine. However, I think I
may need add comments for this in v6.

     /*
      * This works with the assumption that cpus[n].props.*_id has been
      * sorted from top to down levels in mc->possible_cpu_arch_ids().
      * Otherwise, the unexpected and duplicate containers will be created.
      */

The implementation in v3 looks complicated, but comprehensive. The one
in this revision (v6) looks simple, but the we're losing flexibility :)


>> +            cluster_id = -1;
>> +            core_id = -1;
>> +            socket_offset = table_data->len - pptt_start;
>> +            build_processor_hierarchy_node(table_data,
>> +                (1 << 0), /* Physical package */
>> +                0, socket_id, NULL, 0);
>> +        }
>>   
>> -    if (mc->smp_props.clusters_supported) {
>> -        length = g_queue_get_length(list);
>> -        for (i = 0; i < length; i++) {
>> -            int cluster;
>> -
>> -            parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>> -            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
>> -                g_queue_push_tail(list,
>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>> -                build_processor_hierarchy_node(
>> -                    table_data,
>> -                    (0 << 0), /* not a physical package */
>> -                    parent_offset, cluster, NULL, 0);
>> +        if (mc->smp_props.clusters_supported) {
>> +            if (cpus->cpus[n].props.cluster_id != cluster_id) {
>> +                cluster_id = cpus->cpus[n].props.cluster_id;
>> +                core_id = -1;
>> +                cluster_offset = table_data->len - pptt_start;
>> +                build_processor_hierarchy_node(table_data,
>> +                    (0 << 0), /* Not a physical package */
>> +                    socket_offset, cluster_id, NULL, 0);
>>               }
>> +        } else {
>> +            cluster_offset = socket_offset;
>>           }
>> -    }
>>   
>> -    length = g_queue_get_length(list);
>> -    for (i = 0; i < length; i++) {
>> -        int core;
>> -
>> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>> -        for (core = 0; core < ms->smp.cores; core++) {
>> -            if (ms->smp.threads > 1) {
>> -                g_queue_push_tail(list,
>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>> -                build_processor_hierarchy_node(
>> -                    table_data,
>> +        if (ms->smp.threads <= 1) {
> 
> why <= instead of < is used here?
> 

It's the counterpart to the one in the original implementation,
which is "if (ms->smp.threads > 1)". the nodes for threads won't
be created until ms->smp.threads is bigger than 1. If we want
to use "<" here, then the code will be changed like below in v6.
However, I really don't think it's necessary.

            if (ms->smp.threads < 2) {
                :
            }


>> +            build_processor_hierarchy_node(table_data,
>> +                (1 << 1) | /* ACPI Processor ID valid */
>> +                (1 << 3),  /* Node is a Leaf */
>> +                cluster_offset, n, NULL, 0);
>> +        } else {
>> +            if (cpus->cpus[n].props.core_id != core_id) {
>> +                core_id = cpus->cpus[n].props.core_id;
>> +                core_offset = table_data->len - pptt_start;
>> +                build_processor_hierarchy_node(table_data,
>>                       (0 << 0), /* not a physical package */
>> -                    parent_offset, core, NULL, 0);
>> -            } else {
>> -                build_processor_hierarchy_node(
>> -                    table_data,
>> -                    (1 << 1) | /* ACPI Processor ID valid */
>> -                    (1 << 3),  /* Node is a Leaf */
>> -                    parent_offset, uid++, NULL, 0);
>> +                    cluster_offset, core_id, NULL, 0);
>>               }
>> -        }
>> -    }
>> -
>> -    length = g_queue_get_length(list);
>> -    for (i = 0; i < length; i++) {
>> -        int thread;
>>   
>> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>> -        for (thread = 0; thread < ms->smp.threads; thread++) {
>> -            build_processor_hierarchy_node(
>> -                table_data,
>> +            build_processor_hierarchy_node(table_data,
>>                   (1 << 1) | /* ACPI Processor ID valid */
>>                   (1 << 2) | /* Processor is a Thread */
>>                   (1 << 3),  /* Node is a Leaf */
>> -                parent_offset, uid++, NULL, 0);
>> +                core_offset, n, NULL, 0);
>>           }
>>       }
>>   
>> -    g_queue_free(list);
>>       acpi_table_end(linker, &table);
>>   }
>>   

Thanks,
Gavin
Gonglei (Arei)" via April 14, 2022, 2:56 a.m. UTC | #5
On 2022/4/14 8:33, Gavin Shan wrote:
> Hi Igor,
>
> On 4/13/22 9:52 PM, Igor Mammedov wrote:
>> On Sun,  3 Apr 2022 22:59:53 +0800
>> Gavin Shan <gshan@redhat.com> wrote:
>>
>>> When the PPTT table is built, the CPU topology is re-calculated, but
>>> it's unecessary because the CPU topology has been populated in
>>> virt_possible_cpu_arch_ids() on arm/virt machine.
>>>
>>> This reworks build_pptt() to avoid by reusing the existing one in
>>> ms->possible_cpus. Currently, the only user of build_pptt() is
>>> arm/virt machine.
>>>
>>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>>> ---
>>>   hw/acpi/aml-build.c | 100 
>>> +++++++++++++++++---------------------------
>>>   1 file changed, 38 insertions(+), 62 deletions(-)
>>>
>>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
>>> index 4086879ebf..4b0f9df3e3 100644
>>> --- a/hw/acpi/aml-build.c
>>> +++ b/hw/acpi/aml-build.c
>>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, 
>>> BIOSLinker *linker, MachineState *ms,
>>>                   const char *oem_id, const char *oem_table_id)
>>>   {
>>>       MachineClass *mc = MACHINE_GET_CLASS(ms);
>>> -    GQueue *list = g_queue_new();
>>> -    guint pptt_start = table_data->len;
>>> -    guint parent_offset;
>>> -    guint length, i;
>>> -    int uid = 0;
>>> -    int socket;
>>> +    CPUArchIdList *cpus = ms->possible_cpus;
>>> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
>>> +    uint32_t socket_offset, cluster_offset, core_offset;
>>> +    uint32_t pptt_start = table_data->len;
>>> +    int n;
>>>       AcpiTable table = { .sig = "PPTT", .rev = 2,
>>>                           .oem_id = oem_id, .oem_table_id = 
>>> oem_table_id };
>>>         acpi_table_begin(&table, table_data);
>>>   -    for (socket = 0; socket < ms->smp.sockets; socket++) {
>>> -        g_queue_push_tail(list,
>>> -            GUINT_TO_POINTER(table_data->len - pptt_start));
>>> -        build_processor_hierarchy_node(
>>> -            table_data,
>>> -            /*
>>> -             * Physical package - represents the boundary
>>> -             * of a physical package
>>> -             */
>>> -            (1 << 0),
>>> -            0, socket, NULL, 0);
>>> -    }
>>> +    for (n = 0; n < cpus->len; n++) {
>>
>>> +        if (cpus->cpus[n].props.socket_id != socket_id) {
>>> +            socket_id = cpus->cpus[n].props.socket_id;
>>
>> this relies on cpus->cpus[n].props.*_id being sorted form top to down 
>> levels
>> I'd add here and for other container_id an assert() that checks for that
>> specific ID goes in only one direction, to be able to detect when 
>> rule is broken.
>>
>> otherwise on may end up with duplicate containers silently.
>>
>
> Exactly. cpus->cpus[n].props.*_id is sorted as you said in 
> virt_possible_cpu_arch_ids().
> The only user of build_pptt() is arm/virt machine. So it's fine. 
> However, I think I
> may need add comments for this in v6.
>
>     /*
>      * This works with the assumption that cpus[n].props.*_id has been
>      * sorted from top to down levels in mc->possible_cpu_arch_ids().
>      * Otherwise, the unexpected and duplicate containers will be 
> created.
>      */
>
> The implementation in v3 looks complicated, but comprehensive. The one
> in this revision (v6) looks simple, but the we're losing flexibility :)
>
>
>>> +            cluster_id = -1;
>>> +            core_id = -1;
>>> +            socket_offset = table_data->len - pptt_start;
>>> +            build_processor_hierarchy_node(table_data,
>>> +                (1 << 0), /* Physical package */
>>> +                0, socket_id, NULL, 0);
>>> +        }
>>>   -    if (mc->smp_props.clusters_supported) {
>>> -        length = g_queue_get_length(list);
>>> -        for (i = 0; i < length; i++) {
>>> -            int cluster;
>>> -
>>> -            parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>>> -            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
>>> -                g_queue_push_tail(list,
>>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>>> -                build_processor_hierarchy_node(
>>> -                    table_data,
>>> -                    (0 << 0), /* not a physical package */
>>> -                    parent_offset, cluster, NULL, 0);
>>> +        if (mc->smp_props.clusters_supported) {
>>> +            if (cpus->cpus[n].props.cluster_id != cluster_id) {
>>> +                cluster_id = cpus->cpus[n].props.cluster_id;
>>> +                core_id = -1;
>>> +                cluster_offset = table_data->len - pptt_start;
>>> +                build_processor_hierarchy_node(table_data,
>>> +                    (0 << 0), /* Not a physical package */
>>> +                    socket_offset, cluster_id, NULL, 0);
>>>               }
>>> +        } else {
>>> +            cluster_offset = socket_offset;
>>>           }
>>> -    }
>>>   -    length = g_queue_get_length(list);
>>> -    for (i = 0; i < length; i++) {
>>> -        int core;
>>> -
>>> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>>> -        for (core = 0; core < ms->smp.cores; core++) {
>>> -            if (ms->smp.threads > 1) {
>>> -                g_queue_push_tail(list,
>>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>>> -                build_processor_hierarchy_node(
>>> -                    table_data,
>>> +        if (ms->smp.threads <= 1) {
>>
>> why <= instead of < is used here?
>>
>
> It's the counterpart to the one in the original implementation,
> which is "if (ms->smp.threads > 1)". the nodes for threads won't
> be created until ms->smp.threads is bigger than 1. If we want
> to use "<" here, then the code will be changed like below in v6.
> However, I really don't think it's necessary.
>
>            if (ms->smp.threads < 2) {
>                :
>            }
>
The smp_parse() guarantees that we either have "one threads" or
"multi threads" in ms->smp. So I think there are two proper choices:
if (ms->smp.threads == 1) {
...
} else {
...
}

or

if (ms->smp.threads > 1) {
...
} else {
...
}

Thanks,
Yanan
>
>>> + build_processor_hierarchy_node(table_data,
>>> +                (1 << 1) | /* ACPI Processor ID valid */
>>> +                (1 << 3),  /* Node is a Leaf */
>>> +                cluster_offset, n, NULL, 0);
>>> +        } else {
>>> +            if (cpus->cpus[n].props.core_id != core_id) {
>>> +                core_id = cpus->cpus[n].props.core_id;
>>> +                core_offset = table_data->len - pptt_start;
>>> +                build_processor_hierarchy_node(table_data,
>>>                       (0 << 0), /* not a physical package */
>>> -                    parent_offset, core, NULL, 0);
>>> -            } else {
>>> -                build_processor_hierarchy_node(
>>> -                    table_data,
>>> -                    (1 << 1) | /* ACPI Processor ID valid */
>>> -                    (1 << 3),  /* Node is a Leaf */
>>> -                    parent_offset, uid++, NULL, 0);
>>> +                    cluster_offset, core_id, NULL, 0);
>>>               }
>>> -        }
>>> -    }
>>> -
>>> -    length = g_queue_get_length(list);
>>> -    for (i = 0; i < length; i++) {
>>> -        int thread;
>>>   -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>>> -        for (thread = 0; thread < ms->smp.threads; thread++) {
>>> -            build_processor_hierarchy_node(
>>> -                table_data,
>>> +            build_processor_hierarchy_node(table_data,
>>>                   (1 << 1) | /* ACPI Processor ID valid */
>>>                   (1 << 2) | /* Processor is a Thread */
>>>                   (1 << 3),  /* Node is a Leaf */
>>> -                parent_offset, uid++, NULL, 0);
>>> +                core_offset, n, NULL, 0);
>>>           }
>>>       }
>>>   -    g_queue_free(list);
>>>       acpi_table_end(linker, &table);
>>>   }
>
> Thanks,
> Gavin
>
> .
Gonglei (Arei)" via April 14, 2022, 3:09 a.m. UTC | #6
Hi Gavin,

On 2022/4/3 22:59, Gavin Shan wrote:
> When the PPTT table is built, the CPU topology is re-calculated, but
> it's unecessary because the CPU topology has been populated in
> virt_possible_cpu_arch_ids() on arm/virt machine.
>
> This reworks build_pptt() to avoid by reusing the existing one in
> ms->possible_cpus. Currently, the only user of build_pptt() is
> arm/virt machine.
>
> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
>   hw/acpi/aml-build.c | 100 +++++++++++++++++---------------------------
>   1 file changed, 38 insertions(+), 62 deletions(-)
>
> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> index 4086879ebf..4b0f9df3e3 100644
> --- a/hw/acpi/aml-build.c
> +++ b/hw/acpi/aml-build.c
> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>                   const char *oem_id, const char *oem_table_id)
>   {
>       MachineClass *mc = MACHINE_GET_CLASS(ms);
> -    GQueue *list = g_queue_new();
> -    guint pptt_start = table_data->len;
> -    guint parent_offset;
> -    guint length, i;
> -    int uid = 0;
> -    int socket;
> +    CPUArchIdList *cpus = ms->possible_cpus;
> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
nit: why not using "int" for the ID variables? (to be consistent with how
the IDs are stored in CpuInstanceProperties).

Thanks,
Yanan
> +    uint32_t socket_offset, cluster_offset, core_offset;
> +    uint32_t pptt_start = table_data->len;
> +    int n;
>       AcpiTable table = { .sig = "PPTT", .rev = 2,
>                           .oem_id = oem_id, .oem_table_id = oem_table_id };
>   
>       acpi_table_begin(&table, table_data);
>   
> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
> -        g_queue_push_tail(list,
> -            GUINT_TO_POINTER(table_data->len - pptt_start));
> -        build_processor_hierarchy_node(
> -            table_data,
> -            /*
> -             * Physical package - represents the boundary
> -             * of a physical package
> -             */
> -            (1 << 0),
> -            0, socket, NULL, 0);
> -    }
> +    for (n = 0; n < cpus->len; n++) {
> +        if (cpus->cpus[n].props.socket_id != socket_id) {
> +            socket_id = cpus->cpus[n].props.socket_id;
> +            cluster_id = -1;
> +            core_id = -1;
> +            socket_offset = table_data->len - pptt_start;
> +            build_processor_hierarchy_node(table_data,
> +                (1 << 0), /* Physical package */
> +                0, socket_id, NULL, 0);
> +        }
>   
> -    if (mc->smp_props.clusters_supported) {
> -        length = g_queue_get_length(list);
> -        for (i = 0; i < length; i++) {
> -            int cluster;
> -
> -            parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
> -            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
> -                g_queue_push_tail(list,
> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
> -                build_processor_hierarchy_node(
> -                    table_data,
> -                    (0 << 0), /* not a physical package */
> -                    parent_offset, cluster, NULL, 0);
> +        if (mc->smp_props.clusters_supported) {
> +            if (cpus->cpus[n].props.cluster_id != cluster_id) {
> +                cluster_id = cpus->cpus[n].props.cluster_id;
> +                core_id = -1;
> +                cluster_offset = table_data->len - pptt_start;
> +                build_processor_hierarchy_node(table_data,
> +                    (0 << 0), /* Not a physical package */
> +                    socket_offset, cluster_id, NULL, 0);
>               }
> +        } else {
> +            cluster_offset = socket_offset;
>           }
> -    }
>   
> -    length = g_queue_get_length(list);
> -    for (i = 0; i < length; i++) {
> -        int core;
> -
> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
> -        for (core = 0; core < ms->smp.cores; core++) {
> -            if (ms->smp.threads > 1) {
> -                g_queue_push_tail(list,
> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
> -                build_processor_hierarchy_node(
> -                    table_data,
> +        if (ms->smp.threads <= 1) {
> +            build_processor_hierarchy_node(table_data,
> +                (1 << 1) | /* ACPI Processor ID valid */
> +                (1 << 3),  /* Node is a Leaf */
> +                cluster_offset, n, NULL, 0);
> +        } else {
> +            if (cpus->cpus[n].props.core_id != core_id) {
> +                core_id = cpus->cpus[n].props.core_id;
> +                core_offset = table_data->len - pptt_start;
> +                build_processor_hierarchy_node(table_data,
>                       (0 << 0), /* not a physical package */
> -                    parent_offset, core, NULL, 0);
> -            } else {
> -                build_processor_hierarchy_node(
> -                    table_data,
> -                    (1 << 1) | /* ACPI Processor ID valid */
> -                    (1 << 3),  /* Node is a Leaf */
> -                    parent_offset, uid++, NULL, 0);
> +                    cluster_offset, core_id, NULL, 0);
>               }
> -        }
> -    }
> -
> -    length = g_queue_get_length(list);
> -    for (i = 0; i < length; i++) {
> -        int thread;
>   
> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
> -        for (thread = 0; thread < ms->smp.threads; thread++) {
> -            build_processor_hierarchy_node(
> -                table_data,
> +            build_processor_hierarchy_node(table_data,
>                   (1 << 1) | /* ACPI Processor ID valid */
>                   (1 << 2) | /* Processor is a Thread */
>                   (1 << 3),  /* Node is a Leaf */
> -                parent_offset, uid++, NULL, 0);
> +                core_offset, n, NULL, 0);
>           }
>       }
>   
> -    g_queue_free(list);
>       acpi_table_end(linker, &table);
>   }
>
Gavin Shan April 14, 2022, 7:39 a.m. UTC | #7
Hi Yanan,

On 4/14/22 10:56 AM, wangyanan (Y) wrote:
> On 2022/4/14 8:33, Gavin Shan wrote:
>> On 4/13/22 9:52 PM, Igor Mammedov wrote:
>>> On Sun,  3 Apr 2022 22:59:53 +0800
>>> Gavin Shan <gshan@redhat.com> wrote:
>>>
>>>> When the PPTT table is built, the CPU topology is re-calculated, but
>>>> it's unecessary because the CPU topology has been populated in
>>>> virt_possible_cpu_arch_ids() on arm/virt machine.
>>>>
>>>> This reworks build_pptt() to avoid by reusing the existing one in
>>>> ms->possible_cpus. Currently, the only user of build_pptt() is
>>>> arm/virt machine.
>>>>
>>>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>>>> ---
>>>>   hw/acpi/aml-build.c | 100 +++++++++++++++++---------------------------
>>>>   1 file changed, 38 insertions(+), 62 deletions(-)
>>>>
>>>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
>>>> index 4086879ebf..4b0f9df3e3 100644
>>>> --- a/hw/acpi/aml-build.c
>>>> +++ b/hw/acpi/aml-build.c
>>>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>>>>                   const char *oem_id, const char *oem_table_id)
>>>>   {
>>>>       MachineClass *mc = MACHINE_GET_CLASS(ms);
>>>> -    GQueue *list = g_queue_new();
>>>> -    guint pptt_start = table_data->len;
>>>> -    guint parent_offset;
>>>> -    guint length, i;
>>>> -    int uid = 0;
>>>> -    int socket;
>>>> +    CPUArchIdList *cpus = ms->possible_cpus;
>>>> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
>>>> +    uint32_t socket_offset, cluster_offset, core_offset;
>>>> +    uint32_t pptt_start = table_data->len;
>>>> +    int n;
>>>>       AcpiTable table = { .sig = "PPTT", .rev = 2,
>>>>                           .oem_id = oem_id, .oem_table_id = oem_table_id };
>>>>         acpi_table_begin(&table, table_data);
>>>>   -    for (socket = 0; socket < ms->smp.sockets; socket++) {
>>>> -        g_queue_push_tail(list,
>>>> -            GUINT_TO_POINTER(table_data->len - pptt_start));
>>>> -        build_processor_hierarchy_node(
>>>> -            table_data,
>>>> -            /*
>>>> -             * Physical package - represents the boundary
>>>> -             * of a physical package
>>>> -             */
>>>> -            (1 << 0),
>>>> -            0, socket, NULL, 0);
>>>> -    }
>>>> +    for (n = 0; n < cpus->len; n++) {
>>>
>>>> +        if (cpus->cpus[n].props.socket_id != socket_id) {
>>>> +            socket_id = cpus->cpus[n].props.socket_id;
>>>
>>> this relies on cpus->cpus[n].props.*_id being sorted form top to down levels
>>> I'd add here and for other container_id an assert() that checks for that
>>> specific ID goes in only one direction, to be able to detect when rule is broken.
>>>
>>> otherwise on may end up with duplicate containers silently.
>>>
>>
>> Exactly. cpus->cpus[n].props.*_id is sorted as you said in virt_possible_cpu_arch_ids().
>> The only user of build_pptt() is arm/virt machine. So it's fine. However, I think I
>> may need add comments for this in v6.
>>
>>     /*
>>      * This works with the assumption that cpus[n].props.*_id has been
>>      * sorted from top to down levels in mc->possible_cpu_arch_ids().
>>      * Otherwise, the unexpected and duplicate containers will be created.
>>      */
>>
>> The implementation in v3 looks complicated, but comprehensive. The one
>> in this revision (v6) looks simple, but the we're losing flexibility :)
>>
>>
>>>> +            cluster_id = -1;
>>>> +            core_id = -1;
>>>> +            socket_offset = table_data->len - pptt_start;
>>>> +            build_processor_hierarchy_node(table_data,
>>>> +                (1 << 0), /* Physical package */
>>>> +                0, socket_id, NULL, 0);
>>>> +        }
>>>>   -    if (mc->smp_props.clusters_supported) {
>>>> -        length = g_queue_get_length(list);
>>>> -        for (i = 0; i < length; i++) {
>>>> -            int cluster;
>>>> -
>>>> -            parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>>>> -            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
>>>> -                g_queue_push_tail(list,
>>>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>>>> -                build_processor_hierarchy_node(
>>>> -                    table_data,
>>>> -                    (0 << 0), /* not a physical package */
>>>> -                    parent_offset, cluster, NULL, 0);
>>>> +        if (mc->smp_props.clusters_supported) {
>>>> +            if (cpus->cpus[n].props.cluster_id != cluster_id) {
>>>> +                cluster_id = cpus->cpus[n].props.cluster_id;
>>>> +                core_id = -1;
>>>> +                cluster_offset = table_data->len - pptt_start;
>>>> +                build_processor_hierarchy_node(table_data,
>>>> +                    (0 << 0), /* Not a physical package */
>>>> +                    socket_offset, cluster_id, NULL, 0);
>>>>               }
>>>> +        } else {
>>>> +            cluster_offset = socket_offset;
>>>>           }
>>>> -    }
>>>>   -    length = g_queue_get_length(list);
>>>> -    for (i = 0; i < length; i++) {
>>>> -        int core;
>>>> -
>>>> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>>>> -        for (core = 0; core < ms->smp.cores; core++) {
>>>> -            if (ms->smp.threads > 1) {
>>>> -                g_queue_push_tail(list,
>>>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>>>> -                build_processor_hierarchy_node(
>>>> -                    table_data,
>>>> +        if (ms->smp.threads <= 1) {
>>>
>>> why <= instead of < is used here?
>>>
>>
>> It's the counterpart to the one in the original implementation,
>> which is "if (ms->smp.threads > 1)". the nodes for threads won't
>> be created until ms->smp.threads is bigger than 1. If we want
>> to use "<" here, then the code will be changed like below in v6.
>> However, I really don't think it's necessary.
>>
>>            if (ms->smp.threads < 2) {
>>                :
>>            }
>>
> The smp_parse() guarantees that we either have "one threads" or
> "multi threads" in ms->smp. So I think there are two proper choices:
> if (ms->smp.threads == 1) {
> ...
> } else {
> ...
> }
> 
> or
> 
> if (ms->smp.threads > 1) {
> ...
> } else {
> ...
> }
> 
Yes, it's guaranteed ms->smp.threads is equal to or larger than 1.
I will use the pattern of 'if (ms->smp.threads == 1)' in v6.

>>>> + build_processor_hierarchy_node(table_data,
>>>> +                (1 << 1) | /* ACPI Processor ID valid */
>>>> +                (1 << 3),  /* Node is a Leaf */
>>>> +                cluster_offset, n, NULL, 0);
>>>> +        } else {
>>>> +            if (cpus->cpus[n].props.core_id != core_id) {
>>>> +                core_id = cpus->cpus[n].props.core_id;
>>>> +                core_offset = table_data->len - pptt_start;
>>>> +                build_processor_hierarchy_node(table_data,
>>>>                       (0 << 0), /* not a physical package */
>>>> -                    parent_offset, core, NULL, 0);
>>>> -            } else {
>>>> -                build_processor_hierarchy_node(
>>>> -                    table_data,
>>>> -                    (1 << 1) | /* ACPI Processor ID valid */
>>>> -                    (1 << 3),  /* Node is a Leaf */
>>>> -                    parent_offset, uid++, NULL, 0);
>>>> +                    cluster_offset, core_id, NULL, 0);
>>>>               }
>>>> -        }
>>>> -    }
>>>> -
>>>> -    length = g_queue_get_length(list);
>>>> -    for (i = 0; i < length; i++) {
>>>> -        int thread;
>>>>   -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>>>> -        for (thread = 0; thread < ms->smp.threads; thread++) {
>>>> -            build_processor_hierarchy_node(
>>>> -                table_data,
>>>> +            build_processor_hierarchy_node(table_data,
>>>>                   (1 << 1) | /* ACPI Processor ID valid */
>>>>                   (1 << 2) | /* Processor is a Thread */
>>>>                   (1 << 3),  /* Node is a Leaf */
>>>> -                parent_offset, uid++, NULL, 0);
>>>> +                core_offset, n, NULL, 0);
>>>>           }
>>>>       }
>>>>   -    g_queue_free(list);
>>>>       acpi_table_end(linker, &table);
>>>>   }

Thanks,
Gavin
Gavin Shan April 14, 2022, 7:45 a.m. UTC | #8
Hi Yanan,

On 4/14/22 11:09 AM, wangyanan (Y) wrote:
> On 2022/4/3 22:59, Gavin Shan wrote:
>> When the PPTT table is built, the CPU topology is re-calculated, but
>> it's unecessary because the CPU topology has been populated in
>> virt_possible_cpu_arch_ids() on arm/virt machine.
>>
>> This reworks build_pptt() to avoid by reusing the existing one in
>> ms->possible_cpus. Currently, the only user of build_pptt() is
>> arm/virt machine.
>>
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> ---
>>   hw/acpi/aml-build.c | 100 +++++++++++++++++---------------------------
>>   1 file changed, 38 insertions(+), 62 deletions(-)
>>
>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
>> index 4086879ebf..4b0f9df3e3 100644
>> --- a/hw/acpi/aml-build.c
>> +++ b/hw/acpi/aml-build.c
>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>>                   const char *oem_id, const char *oem_table_id)
>>   {
>>       MachineClass *mc = MACHINE_GET_CLASS(ms);
>> -    GQueue *list = g_queue_new();
>> -    guint pptt_start = table_data->len;
>> -    guint parent_offset;
>> -    guint length, i;
>> -    int uid = 0;
>> -    int socket;
>> +    CPUArchIdList *cpus = ms->possible_cpus;
>> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
> nit: why not using "int" for the ID variables? (to be consistent with how
> the IDs are stored in CpuInstanceProperties).
> 

They're actually "int64_t". CPUInstanceProperty is declared in
qemu/build/qapi/qapi-types-machine.h like below:

struct CpuInstanceProperties {
     bool has_node_id;
     int64_t node_id;
     bool has_socket_id;
     int64_t socket_id;
     bool has_die_id;
     int64_t die_id;
     bool has_cluster_id;
     int64_t cluster_id;
     bool has_core_id;
     int64_t core_id;
     bool has_thread_id;
     int64_t thread_id;
};

I doubt if we're using same configurations to build QEMU. I use
the following one:

configure_qemu() {
    ./configure --target-list=aarch64-softmmu --enable-numa    \
    --enable-debug --disable-werror --enable-fdt --enable-kvm  \
    --disable-mpath --disable-xen --disable-vnc --disable-docs \
    --python=/usr/bin/python3 $@
}

>> +    uint32_t socket_offset, cluster_offset, core_offset;
>> +    uint32_t pptt_start = table_data->len;
>> +    int n;
>>       AcpiTable table = { .sig = "PPTT", .rev = 2,
>>                           .oem_id = oem_id, .oem_table_id = oem_table_id };
>>       acpi_table_begin(&table, table_data);
>> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
>> -        g_queue_push_tail(list,
>> -            GUINT_TO_POINTER(table_data->len - pptt_start));
>> -        build_processor_hierarchy_node(
>> -            table_data,
>> -            /*
>> -             * Physical package - represents the boundary
>> -             * of a physical package
>> -             */
>> -            (1 << 0),
>> -            0, socket, NULL, 0);
>> -    }
>> +    for (n = 0; n < cpus->len; n++) {
>> +        if (cpus->cpus[n].props.socket_id != socket_id) {
>> +            socket_id = cpus->cpus[n].props.socket_id;
>> +            cluster_id = -1;
>> +            core_id = -1;
>> +            socket_offset = table_data->len - pptt_start;
>> +            build_processor_hierarchy_node(table_data,
>> +                (1 << 0), /* Physical package */
>> +                0, socket_id, NULL, 0);
>> +        }
>> -    if (mc->smp_props.clusters_supported) {
>> -        length = g_queue_get_length(list);
>> -        for (i = 0; i < length; i++) {
>> -            int cluster;
>> -
>> -            parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>> -            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
>> -                g_queue_push_tail(list,
>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>> -                build_processor_hierarchy_node(
>> -                    table_data,
>> -                    (0 << 0), /* not a physical package */
>> -                    parent_offset, cluster, NULL, 0);
>> +        if (mc->smp_props.clusters_supported) {
>> +            if (cpus->cpus[n].props.cluster_id != cluster_id) {
>> +                cluster_id = cpus->cpus[n].props.cluster_id;
>> +                core_id = -1;
>> +                cluster_offset = table_data->len - pptt_start;
>> +                build_processor_hierarchy_node(table_data,
>> +                    (0 << 0), /* Not a physical package */
>> +                    socket_offset, cluster_id, NULL, 0);
>>               }
>> +        } else {
>> +            cluster_offset = socket_offset;
>>           }
>> -    }
>> -    length = g_queue_get_length(list);
>> -    for (i = 0; i < length; i++) {
>> -        int core;
>> -
>> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>> -        for (core = 0; core < ms->smp.cores; core++) {
>> -            if (ms->smp.threads > 1) {
>> -                g_queue_push_tail(list,
>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>> -                build_processor_hierarchy_node(
>> -                    table_data,
>> +        if (ms->smp.threads <= 1) {
>> +            build_processor_hierarchy_node(table_data,
>> +                (1 << 1) | /* ACPI Processor ID valid */
>> +                (1 << 3),  /* Node is a Leaf */
>> +                cluster_offset, n, NULL, 0);
>> +        } else {
>> +            if (cpus->cpus[n].props.core_id != core_id) {
>> +                core_id = cpus->cpus[n].props.core_id;
>> +                core_offset = table_data->len - pptt_start;
>> +                build_processor_hierarchy_node(table_data,
>>                       (0 << 0), /* not a physical package */
>> -                    parent_offset, core, NULL, 0);
>> -            } else {
>> -                build_processor_hierarchy_node(
>> -                    table_data,
>> -                    (1 << 1) | /* ACPI Processor ID valid */
>> -                    (1 << 3),  /* Node is a Leaf */
>> -                    parent_offset, uid++, NULL, 0);
>> +                    cluster_offset, core_id, NULL, 0);
>>               }
>> -        }
>> -    }
>> -
>> -    length = g_queue_get_length(list);
>> -    for (i = 0; i < length; i++) {
>> -        int thread;
>> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>> -        for (thread = 0; thread < ms->smp.threads; thread++) {
>> -            build_processor_hierarchy_node(
>> -                table_data,
>> +            build_processor_hierarchy_node(table_data,
>>                   (1 << 1) | /* ACPI Processor ID valid */
>>                   (1 << 2) | /* Processor is a Thread */
>>                   (1 << 3),  /* Node is a Leaf */
>> -                parent_offset, uid++, NULL, 0);
>> +                core_offset, n, NULL, 0);
>>           }
>>       }
>> -    g_queue_free(list);
>>       acpi_table_end(linker, &table);
>>   }

Thanks,
Gavin
Gonglei (Arei)" via April 14, 2022, 9:22 a.m. UTC | #9
On 2022/4/14 15:45, Gavin Shan wrote:
> Hi Yanan,
>
> On 4/14/22 11:09 AM, wangyanan (Y) wrote:
>> On 2022/4/3 22:59, Gavin Shan wrote:
>>> When the PPTT table is built, the CPU topology is re-calculated, but
>>> it's unecessary because the CPU topology has been populated in
>>> virt_possible_cpu_arch_ids() on arm/virt machine.
>>>
>>> This reworks build_pptt() to avoid by reusing the existing one in
>>> ms->possible_cpus. Currently, the only user of build_pptt() is
>>> arm/virt machine.
>>>
>>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>>> ---
>>>   hw/acpi/aml-build.c | 100 
>>> +++++++++++++++++---------------------------
>>>   1 file changed, 38 insertions(+), 62 deletions(-)
>>>
>>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
>>> index 4086879ebf..4b0f9df3e3 100644
>>> --- a/hw/acpi/aml-build.c
>>> +++ b/hw/acpi/aml-build.c
>>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, 
>>> BIOSLinker *linker, MachineState *ms,
>>>                   const char *oem_id, const char *oem_table_id)
>>>   {
>>>       MachineClass *mc = MACHINE_GET_CLASS(ms);
>>> -    GQueue *list = g_queue_new();
>>> -    guint pptt_start = table_data->len;
>>> -    guint parent_offset;
>>> -    guint length, i;
>>> -    int uid = 0;
>>> -    int socket;
>>> +    CPUArchIdList *cpus = ms->possible_cpus;
>>> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
>> nit: why not using "int" for the ID variables? (to be consistent with 
>> how
>> the IDs are stored in CpuInstanceProperties).
>>
>
> They're actually "int64_t". CPUInstanceProperty is declared in
> qemu/build/qapi/qapi-types-machine.h like below:
>
> struct CpuInstanceProperties {
>     bool has_node_id;
>     int64_t node_id;
>     bool has_socket_id;
>     int64_t socket_id;
>     bool has_die_id;
>     int64_t die_id;
>     bool has_cluster_id;
>     int64_t cluster_id;
>     bool has_core_id;
>     int64_t core_id;
>     bool has_thread_id;
>     int64_t thread_id;
> };
>
Yes, you are right.
> I doubt if we're using same configurations to build QEMU. I use
> the following one:
>
> configure_qemu() {
>    ./configure --target-list=aarch64-softmmu --enable-numa    \
>    --enable-debug --disable-werror --enable-fdt --enable-kvm  \
>    --disable-mpath --disable-xen --disable-vnc --disable-docs \
>    --python=/usr/bin/python3 $@
> }
>
Thanks,
Yanan
>>> +    uint32_t socket_offset, cluster_offset, core_offset;
>>> +    uint32_t pptt_start = table_data->len;
>>> +    int n;
>>>       AcpiTable table = { .sig = "PPTT", .rev = 2,
>>>                           .oem_id = oem_id, .oem_table_id = 
>>> oem_table_id };
>>>       acpi_table_begin(&table, table_data);
>>> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
>>> -        g_queue_push_tail(list,
>>> -            GUINT_TO_POINTER(table_data->len - pptt_start));
>>> -        build_processor_hierarchy_node(
>>> -            table_data,
>>> -            /*
>>> -             * Physical package - represents the boundary
>>> -             * of a physical package
>>> -             */
>>> -            (1 << 0),
>>> -            0, socket, NULL, 0);
>>> -    }
>>> +    for (n = 0; n < cpus->len; n++) {
>>> +        if (cpus->cpus[n].props.socket_id != socket_id) {
>>> +            socket_id = cpus->cpus[n].props.socket_id;
>>> +            cluster_id = -1;
>>> +            core_id = -1;
>>> +            socket_offset = table_data->len - pptt_start;
>>> +            build_processor_hierarchy_node(table_data,
>>> +                (1 << 0), /* Physical package */
>>> +                0, socket_id, NULL, 0);
>>> +        }
>>> -    if (mc->smp_props.clusters_supported) {
>>> -        length = g_queue_get_length(list);
>>> -        for (i = 0; i < length; i++) {
>>> -            int cluster;
>>> -
>>> -            parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>>> -            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
>>> -                g_queue_push_tail(list,
>>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>>> -                build_processor_hierarchy_node(
>>> -                    table_data,
>>> -                    (0 << 0), /* not a physical package */
>>> -                    parent_offset, cluster, NULL, 0);
>>> +        if (mc->smp_props.clusters_supported) {
>>> +            if (cpus->cpus[n].props.cluster_id != cluster_id) {
>>> +                cluster_id = cpus->cpus[n].props.cluster_id;
>>> +                core_id = -1;
>>> +                cluster_offset = table_data->len - pptt_start;
>>> +                build_processor_hierarchy_node(table_data,
>>> +                    (0 << 0), /* Not a physical package */
>>> +                    socket_offset, cluster_id, NULL, 0);
>>>               }
>>> +        } else {
>>> +            cluster_offset = socket_offset;
>>>           }
>>> -    }
>>> -    length = g_queue_get_length(list);
>>> -    for (i = 0; i < length; i++) {
>>> -        int core;
>>> -
>>> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>>> -        for (core = 0; core < ms->smp.cores; core++) {
>>> -            if (ms->smp.threads > 1) {
>>> -                g_queue_push_tail(list,
>>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>>> -                build_processor_hierarchy_node(
>>> -                    table_data,
>>> +        if (ms->smp.threads <= 1) {
>>> +            build_processor_hierarchy_node(table_data,
>>> +                (1 << 1) | /* ACPI Processor ID valid */
>>> +                (1 << 3),  /* Node is a Leaf */
>>> +                cluster_offset, n, NULL, 0);
>>> +        } else {
>>> +            if (cpus->cpus[n].props.core_id != core_id) {
>>> +                core_id = cpus->cpus[n].props.core_id;
>>> +                core_offset = table_data->len - pptt_start;
>>> +                build_processor_hierarchy_node(table_data,
>>>                       (0 << 0), /* not a physical package */
>>> -                    parent_offset, core, NULL, 0);
>>> -            } else {
>>> -                build_processor_hierarchy_node(
>>> -                    table_data,
>>> -                    (1 << 1) | /* ACPI Processor ID valid */
>>> -                    (1 << 3),  /* Node is a Leaf */
>>> -                    parent_offset, uid++, NULL, 0);
>>> +                    cluster_offset, core_id, NULL, 0);
>>>               }
>>> -        }
>>> -    }
>>> -
>>> -    length = g_queue_get_length(list);
>>> -    for (i = 0; i < length; i++) {
>>> -        int thread;
>>> -        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>>> -        for (thread = 0; thread < ms->smp.threads; thread++) {
>>> -            build_processor_hierarchy_node(
>>> -                table_data,
>>> +            build_processor_hierarchy_node(table_data,
>>>                   (1 << 1) | /* ACPI Processor ID valid */
>>>                   (1 << 2) | /* Processor is a Thread */
>>>                   (1 << 3),  /* Node is a Leaf */
>>> -                parent_offset, uid++, NULL, 0);
>>> +                core_offset, n, NULL, 0);
>>>           }
>>>       }
>>> -    g_queue_free(list);
>>>       acpi_table_end(linker, &table);
>>>   }
>
> Thanks,
> Gavin
>
>
> .
Igor Mammedov April 19, 2022, 8:54 a.m. UTC | #10
On Thu, 14 Apr 2022 08:33:29 +0800
Gavin Shan <gshan@redhat.com> wrote:

> Hi Igor,
> 
> On 4/13/22 9:52 PM, Igor Mammedov wrote:
> > On Sun,  3 Apr 2022 22:59:53 +0800
> > Gavin Shan <gshan@redhat.com> wrote:
> >   
> >> When the PPTT table is built, the CPU topology is re-calculated, but
> >> it's unecessary because the CPU topology has been populated in
> >> virt_possible_cpu_arch_ids() on arm/virt machine.
> >>
> >> This reworks build_pptt() to avoid by reusing the existing one in
> >> ms->possible_cpus. Currently, the only user of build_pptt() is
> >> arm/virt machine.
> >>
> >> Signed-off-by: Gavin Shan <gshan@redhat.com>
> >> ---
> >>   hw/acpi/aml-build.c | 100 +++++++++++++++++---------------------------
> >>   1 file changed, 38 insertions(+), 62 deletions(-)
> >>
> >> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> >> index 4086879ebf..4b0f9df3e3 100644
> >> --- a/hw/acpi/aml-build.c
> >> +++ b/hw/acpi/aml-build.c
> >> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
> >>                   const char *oem_id, const char *oem_table_id)
> >>   {
> >>       MachineClass *mc = MACHINE_GET_CLASS(ms);
> >> -    GQueue *list = g_queue_new();
> >> -    guint pptt_start = table_data->len;
> >> -    guint parent_offset;
> >> -    guint length, i;
> >> -    int uid = 0;
> >> -    int socket;
> >> +    CPUArchIdList *cpus = ms->possible_cpus;
> >> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
> >> +    uint32_t socket_offset, cluster_offset, core_offset;
> >> +    uint32_t pptt_start = table_data->len;
> >> +    int n;
> >>       AcpiTable table = { .sig = "PPTT", .rev = 2,
> >>                           .oem_id = oem_id, .oem_table_id = oem_table_id };
> >>   
> >>       acpi_table_begin(&table, table_data);
> >>   
> >> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
> >> -        g_queue_push_tail(list,
> >> -            GUINT_TO_POINTER(table_data->len - pptt_start));
> >> -        build_processor_hierarchy_node(
> >> -            table_data,
> >> -            /*
> >> -             * Physical package - represents the boundary
> >> -             * of a physical package
> >> -             */
> >> -            (1 << 0),
> >> -            0, socket, NULL, 0);
> >> -    }
> >> +    for (n = 0; n < cpus->len; n++) {  
> >   
> >> +        if (cpus->cpus[n].props.socket_id != socket_id) {
> >> +            socket_id = cpus->cpus[n].props.socket_id;  
> > 
> > this relies on cpus->cpus[n].props.*_id being sorted form top to down levels
> > I'd add here and for other container_id an assert() that checks for that
> > specific ID goes in only one direction, to be able to detect when rule is broken.
> > 
> > otherwise on may end up with duplicate containers silently.
> >   
> 
> Exactly. cpus->cpus[n].props.*_id is sorted as you said in virt_possible_cpu_arch_ids().
> The only user of build_pptt() is arm/virt machine. So it's fine. However, I think I
> may need add comments for this in v6.
> 
>      /*
>       * This works with the assumption that cpus[n].props.*_id has been
>       * sorted from top to down levels in mc->possible_cpu_arch_ids().
>       * Otherwise, the unexpected and duplicate containers will be created.
>       */
> 
> The implementation in v3 looks complicated, but comprehensive. The one
> in this revision (v6) looks simple, but the we're losing flexibility :)


comment is not enough, as it will break silently that's why I suggested
sprinkling asserts() here.

[...]
> Thanks,
> Gavin
>
Gavin Shan April 20, 2022, 5:19 a.m. UTC | #11
Hi Igor,

On 4/19/22 4:54 PM, Igor Mammedov wrote:
> On Thu, 14 Apr 2022 08:33:29 +0800
> Gavin Shan <gshan@redhat.com> wrote:
>> On 4/13/22 9:52 PM, Igor Mammedov wrote:
>>> On Sun,  3 Apr 2022 22:59:53 +0800
>>> Gavin Shan <gshan@redhat.com> wrote:
>>>    
>>>> When the PPTT table is built, the CPU topology is re-calculated, but
>>>> it's unecessary because the CPU topology has been populated in
>>>> virt_possible_cpu_arch_ids() on arm/virt machine.
>>>>
>>>> This reworks build_pptt() to avoid by reusing the existing one in
>>>> ms->possible_cpus. Currently, the only user of build_pptt() is
>>>> arm/virt machine.
>>>>
>>>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>>>> ---
>>>>    hw/acpi/aml-build.c | 100 +++++++++++++++++---------------------------
>>>>    1 file changed, 38 insertions(+), 62 deletions(-)
>>>>
>>>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
>>>> index 4086879ebf..4b0f9df3e3 100644
>>>> --- a/hw/acpi/aml-build.c
>>>> +++ b/hw/acpi/aml-build.c
>>>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>>>>                    const char *oem_id, const char *oem_table_id)
>>>>    {
>>>>        MachineClass *mc = MACHINE_GET_CLASS(ms);
>>>> -    GQueue *list = g_queue_new();
>>>> -    guint pptt_start = table_data->len;
>>>> -    guint parent_offset;
>>>> -    guint length, i;
>>>> -    int uid = 0;
>>>> -    int socket;
>>>> +    CPUArchIdList *cpus = ms->possible_cpus;
>>>> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
>>>> +    uint32_t socket_offset, cluster_offset, core_offset;
>>>> +    uint32_t pptt_start = table_data->len;
>>>> +    int n;
>>>>        AcpiTable table = { .sig = "PPTT", .rev = 2,
>>>>                            .oem_id = oem_id, .oem_table_id = oem_table_id };
>>>>    
>>>>        acpi_table_begin(&table, table_data);
>>>>    
>>>> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
>>>> -        g_queue_push_tail(list,
>>>> -            GUINT_TO_POINTER(table_data->len - pptt_start));
>>>> -        build_processor_hierarchy_node(
>>>> -            table_data,
>>>> -            /*
>>>> -             * Physical package - represents the boundary
>>>> -             * of a physical package
>>>> -             */
>>>> -            (1 << 0),
>>>> -            0, socket, NULL, 0);
>>>> -    }
>>>> +    for (n = 0; n < cpus->len; n++) {
>>>    
>>>> +        if (cpus->cpus[n].props.socket_id != socket_id) {
>>>> +            socket_id = cpus->cpus[n].props.socket_id;
>>>
>>> this relies on cpus->cpus[n].props.*_id being sorted form top to down levels
>>> I'd add here and for other container_id an assert() that checks for that
>>> specific ID goes in only one direction, to be able to detect when rule is broken.
>>>
>>> otherwise on may end up with duplicate containers silently.
>>>    
>>
>> Exactly. cpus->cpus[n].props.*_id is sorted as you said in virt_possible_cpu_arch_ids().
>> The only user of build_pptt() is arm/virt machine. So it's fine. However, I think I
>> may need add comments for this in v6.
>>
>>       /*
>>        * This works with the assumption that cpus[n].props.*_id has been
>>        * sorted from top to down levels in mc->possible_cpu_arch_ids().
>>        * Otherwise, the unexpected and duplicate containers will be created.
>>        */
>>
>> The implementation in v3 looks complicated, but comprehensive. The one
>> in this revision (v6) looks simple, but the we're losing flexibility :)
> 
> 
> comment is not enough, as it will break silently that's why I suggested
> sprinkling asserts() here.
> 

I don't think it breaks anything. Duplicated PPTT entries are allowed in
linux at least. The IDs in the duplicated PPTT entries should be same.
Otherwise, the exposed CPU topology is really broken.

I don't think it's harmful to add the check and assert, so I will introduce
a helper function like below in v7. Sadly that v6 was posted before I received
your confirm. Igor, could you please the changes, to be included into v7,
looks good to you? The complete patch is also attached :)

+static bool pptt_entry_exists(MachineState *ms, int n, bool check_socket_id,
+                              bool check_cluster_id, bool check_core_id)
+{
+    CPUArchId *cpus = ms->possible_cpus->cpus;
+    CpuInstanceProperties *t = &cpus[n].props;
+    CpuInstanceProperties *s;
+    bool match;
+    int i;
+
+    for (i = 0; i < n; i++) {
+        match = true;
+        s = &cpus[i].props;
+
+        if (check_socket_id && s->socket_id != t->socket_id) {
+            match = false;
+        }
+
+        if (match && check_cluster_id && s->cluster_id != t->cluster_id) {
+            match = false;
+        }
+
+        if (match && check_core_id && s->core_id != t->core_id) {
+            match = false;
+        }
+
+        if (match) {
+            return true;
+        }
+    }
+
+    return false;
+}

The following assert() will be applied in build_pptt():

assert(!pptt_entry_exists(ms, n, true, false, false));           /* socket  */
assert(!pptt_entry_exists(ms, n, true, true, false));            /* cluster */
assert(!pptt_entry_exists(ms, n, true,
            mc->smp_props.clusters_supported, true));             /* core    */

Thanks,
Gavin
Igor Mammedov April 20, 2022, 8:10 a.m. UTC | #12
On Wed, 20 Apr 2022 13:19:34 +0800
Gavin Shan <gshan@redhat.com> wrote:

> Hi Igor,
> 
> On 4/19/22 4:54 PM, Igor Mammedov wrote:
> > On Thu, 14 Apr 2022 08:33:29 +0800
> > Gavin Shan <gshan@redhat.com> wrote:  
> >> On 4/13/22 9:52 PM, Igor Mammedov wrote:  
> >>> On Sun,  3 Apr 2022 22:59:53 +0800
> >>> Gavin Shan <gshan@redhat.com> wrote:
> >>>      
> >>>> When the PPTT table is built, the CPU topology is re-calculated, but
> >>>> it's unecessary because the CPU topology has been populated in
> >>>> virt_possible_cpu_arch_ids() on arm/virt machine.
> >>>>
> >>>> This reworks build_pptt() to avoid by reusing the existing one in
> >>>> ms->possible_cpus. Currently, the only user of build_pptt() is
> >>>> arm/virt machine.
> >>>>
> >>>> Signed-off-by: Gavin Shan <gshan@redhat.com>
> >>>> ---
> >>>>    hw/acpi/aml-build.c | 100 +++++++++++++++++---------------------------
> >>>>    1 file changed, 38 insertions(+), 62 deletions(-)
> >>>>
> >>>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> >>>> index 4086879ebf..4b0f9df3e3 100644
> >>>> --- a/hw/acpi/aml-build.c
> >>>> +++ b/hw/acpi/aml-build.c
> >>>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
> >>>>                    const char *oem_id, const char *oem_table_id)
> >>>>    {
> >>>>        MachineClass *mc = MACHINE_GET_CLASS(ms);
> >>>> -    GQueue *list = g_queue_new();
> >>>> -    guint pptt_start = table_data->len;
> >>>> -    guint parent_offset;
> >>>> -    guint length, i;
> >>>> -    int uid = 0;
> >>>> -    int socket;
> >>>> +    CPUArchIdList *cpus = ms->possible_cpus;
> >>>> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
> >>>> +    uint32_t socket_offset, cluster_offset, core_offset;
> >>>> +    uint32_t pptt_start = table_data->len;
> >>>> +    int n;
> >>>>        AcpiTable table = { .sig = "PPTT", .rev = 2,
> >>>>                            .oem_id = oem_id, .oem_table_id = oem_table_id };
> >>>>    
> >>>>        acpi_table_begin(&table, table_data);
> >>>>    
> >>>> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
> >>>> -        g_queue_push_tail(list,
> >>>> -            GUINT_TO_POINTER(table_data->len - pptt_start));
> >>>> -        build_processor_hierarchy_node(
> >>>> -            table_data,
> >>>> -            /*
> >>>> -             * Physical package - represents the boundary
> >>>> -             * of a physical package
> >>>> -             */
> >>>> -            (1 << 0),
> >>>> -            0, socket, NULL, 0);
> >>>> -    }
> >>>> +    for (n = 0; n < cpus->len; n++) {  
> >>>      
> >>>> +        if (cpus->cpus[n].props.socket_id != socket_id) {
> >>>> +            socket_id = cpus->cpus[n].props.socket_id;  
> >>>
> >>> this relies on cpus->cpus[n].props.*_id being sorted form top to down levels
> >>> I'd add here and for other container_id an assert() that checks for that
> >>> specific ID goes in only one direction, to be able to detect when rule is broken.
> >>>
> >>> otherwise on may end up with duplicate containers silently.
> >>>      
> >>
> >> Exactly. cpus->cpus[n].props.*_id is sorted as you said in virt_possible_cpu_arch_ids().
> >> The only user of build_pptt() is arm/virt machine. So it's fine. However, I think I
> >> may need add comments for this in v6.
> >>
> >>       /*
> >>        * This works with the assumption that cpus[n].props.*_id has been
> >>        * sorted from top to down levels in mc->possible_cpu_arch_ids().
> >>        * Otherwise, the unexpected and duplicate containers will be created.
> >>        */
> >>
> >> The implementation in v3 looks complicated, but comprehensive. The one
> >> in this revision (v6) looks simple, but the we're losing flexibility :)  
> > 
> > 
> > comment is not enough, as it will break silently that's why I suggested
> > sprinkling asserts() here.
> >   
> 
> I don't think it breaks anything. Duplicated PPTT entries are allowed in
> linux at least. The IDs in the duplicated PPTT entries should be same.
> Otherwise, the exposed CPU topology is really broken.

Spec doesn't say anything about allowing duplicate entries so I'd rather
avoid that (if you find a such provision in spec then put a reference
in this commit message to end discussion on duplicates).


> 
> I don't think it's harmful to add the check and assert, so I will introduce
> a helper function like below in v7. Sadly that v6 was posted before I received
> your confirm. Igor, could you please the changes, to be included into v7,
> looks good to you? The complete patch is also attached :)
> 
> +static bool pptt_entry_exists(MachineState *ms, int n, bool check_socket_id,
> +                              bool check_cluster_id, bool check_core_id)
> +{
> +    CPUArchId *cpus = ms->possible_cpus->cpus;
> +    CpuInstanceProperties *t = &cpus[n].props;
> +    CpuInstanceProperties *s;
> +    bool match;
> +    int i;
> +
> +    for (i = 0; i < n; i++) {

Wouldn't it make whole thing O(n^2) in worst case?

I suggest put asserts directly into build_pptt() and considering that
it relies on ids being sorted, do something like this:
   assert(foo_id_val > previous_id)
which will ensure that id doesn't jump back unexpectedly


> +        match = true;
> +        s = &cpus[i].props;
> +
> +        if (check_socket_id && s->socket_id != t->socket_id) {
> +            match = false;
> +        }
> +
> +        if (match && check_cluster_id && s->cluster_id != t->cluster_id) {
> +            match = false;
> +        }
> +
> +        if (match && check_core_id && s->core_id != t->core_id) {
> +            match = false;
> +        }
> +
> +        if (match) {
> +            return true;
> +        }
> +    }
> +
> +    return false;
> +}
> 
> The following assert() will be applied in build_pptt():
> 
> assert(!pptt_entry_exists(ms, n, true, false, false));           /* socket  */
> assert(!pptt_entry_exists(ms, n, true, true, false));            /* cluster */
> assert(!pptt_entry_exists(ms, n, true,
>             mc->smp_props.clusters_supported, true));             /* core    */
> 
> Thanks,
> Gavin
>
Gavin Shan April 20, 2022, 10:22 a.m. UTC | #13
Hi Igor,

On 4/20/22 4:10 PM, Igor Mammedov wrote:
> On Wed, 20 Apr 2022 13:19:34 +0800
> Gavin Shan <gshan@redhat.com> wrote:
>> On 4/19/22 4:54 PM, Igor Mammedov wrote:
>>> On Thu, 14 Apr 2022 08:33:29 +0800
>>> Gavin Shan <gshan@redhat.com> wrote:
>>>> On 4/13/22 9:52 PM, Igor Mammedov wrote:
>>>>> On Sun,  3 Apr 2022 22:59:53 +0800
>>>>> Gavin Shan <gshan@redhat.com> wrote:
>>>>>       
>>>>>> When the PPTT table is built, the CPU topology is re-calculated, but
>>>>>> it's unecessary because the CPU topology has been populated in
>>>>>> virt_possible_cpu_arch_ids() on arm/virt machine.
>>>>>>
>>>>>> This reworks build_pptt() to avoid by reusing the existing one in
>>>>>> ms->possible_cpus. Currently, the only user of build_pptt() is
>>>>>> arm/virt machine.
>>>>>>
>>>>>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>>>>>> ---
>>>>>>     hw/acpi/aml-build.c | 100 +++++++++++++++++---------------------------
>>>>>>     1 file changed, 38 insertions(+), 62 deletions(-)
>>>>>>
>>>>>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
>>>>>> index 4086879ebf..4b0f9df3e3 100644
>>>>>> --- a/hw/acpi/aml-build.c
>>>>>> +++ b/hw/acpi/aml-build.c
>>>>>> @@ -2002,86 +2002,62 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>>>>>>                     const char *oem_id, const char *oem_table_id)
>>>>>>     {
>>>>>>         MachineClass *mc = MACHINE_GET_CLASS(ms);
>>>>>> -    GQueue *list = g_queue_new();
>>>>>> -    guint pptt_start = table_data->len;
>>>>>> -    guint parent_offset;
>>>>>> -    guint length, i;
>>>>>> -    int uid = 0;
>>>>>> -    int socket;
>>>>>> +    CPUArchIdList *cpus = ms->possible_cpus;
>>>>>> +    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
>>>>>> +    uint32_t socket_offset, cluster_offset, core_offset;
>>>>>> +    uint32_t pptt_start = table_data->len;
>>>>>> +    int n;
>>>>>>         AcpiTable table = { .sig = "PPTT", .rev = 2,
>>>>>>                             .oem_id = oem_id, .oem_table_id = oem_table_id };
>>>>>>     
>>>>>>         acpi_table_begin(&table, table_data);
>>>>>>     
>>>>>> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
>>>>>> -        g_queue_push_tail(list,
>>>>>> -            GUINT_TO_POINTER(table_data->len - pptt_start));
>>>>>> -        build_processor_hierarchy_node(
>>>>>> -            table_data,
>>>>>> -            /*
>>>>>> -             * Physical package - represents the boundary
>>>>>> -             * of a physical package
>>>>>> -             */
>>>>>> -            (1 << 0),
>>>>>> -            0, socket, NULL, 0);
>>>>>> -    }
>>>>>> +    for (n = 0; n < cpus->len; n++) {
>>>>>       
>>>>>> +        if (cpus->cpus[n].props.socket_id != socket_id) {
>>>>>> +            socket_id = cpus->cpus[n].props.socket_id;
>>>>>
>>>>> this relies on cpus->cpus[n].props.*_id being sorted form top to down levels
>>>>> I'd add here and for other container_id an assert() that checks for that
>>>>> specific ID goes in only one direction, to be able to detect when rule is broken.
>>>>>
>>>>> otherwise on may end up with duplicate containers silently.
>>>>>       
>>>>
>>>> Exactly. cpus->cpus[n].props.*_id is sorted as you said in virt_possible_cpu_arch_ids().
>>>> The only user of build_pptt() is arm/virt machine. So it's fine. However, I think I
>>>> may need add comments for this in v6.
>>>>
>>>>        /*
>>>>         * This works with the assumption that cpus[n].props.*_id has been
>>>>         * sorted from top to down levels in mc->possible_cpu_arch_ids().
>>>>         * Otherwise, the unexpected and duplicate containers will be created.
>>>>         */
>>>>
>>>> The implementation in v3 looks complicated, but comprehensive. The one
>>>> in this revision (v6) looks simple, but the we're losing flexibility :)
>>>
>>>
>>> comment is not enough, as it will break silently that's why I suggested
>>> sprinkling asserts() here.
>>>    
>>
>> I don't think it breaks anything. Duplicated PPTT entries are allowed in
>> linux at least. The IDs in the duplicated PPTT entries should be same.
>> Otherwise, the exposed CPU topology is really broken.
> 
> Spec doesn't say anything about allowing duplicate entries so I'd rather
> avoid that (if you find a such provision in spec then put a reference
> in this commit message to end discussion on duplicates).
> 

Yes, Spec doesn't say it's allowed. I checked linux implementation on
how PPTT table is parsed. Duplicate entries won't break anything actually
in Linux. I'm not sure about other OS. So I think it's still needed.

> 
>>
>> I don't think it's harmful to add the check and assert, so I will introduce
>> a helper function like below in v7. Sadly that v6 was posted before I received
>> your confirm. Igor, could you please the changes, to be included into v7,
>> looks good to you? The complete patch is also attached :)
>>
>> +static bool pptt_entry_exists(MachineState *ms, int n, bool check_socket_id,
>> +                              bool check_cluster_id, bool check_core_id)
>> +{
>> +    CPUArchId *cpus = ms->possible_cpus->cpus;
>> +    CpuInstanceProperties *t = &cpus[n].props;
>> +    CpuInstanceProperties *s;
>> +    bool match;
>> +    int i;
>> +
>> +    for (i = 0; i < n; i++) {
> 
> Wouldn't it make whole thing O(n^2) in worst case?
> 
> I suggest put asserts directly into build_pptt() and considering that
> it relies on ids being sorted, do something like this:
>     assert(foo_id_val > previous_id)
> which will ensure that id doesn't jump back unexpectedly
> 

Yes, your suggested method is much simpler and more efficient. I will
include it in v7. By the way, please skip v6 and go ahead to review
v7 directly. v7 should be posted shortly after some tests :)

> 
>> +        match = true;
>> +        s = &cpus[i].props;
>> +
>> +        if (check_socket_id && s->socket_id != t->socket_id) {
>> +            match = false;
>> +        }
>> +
>> +        if (match && check_cluster_id && s->cluster_id != t->cluster_id) {
>> +            match = false;
>> +        }
>> +
>> +        if (match && check_core_id && s->core_id != t->core_id) {
>> +            match = false;
>> +        }
>> +
>> +        if (match) {
>> +            return true;
>> +        }
>> +    }
>> +
>> +    return false;
>> +}
>>
>> The following assert() will be applied in build_pptt():
>>
>> assert(!pptt_entry_exists(ms, n, true, false, false));           /* socket  */
>> assert(!pptt_entry_exists(ms, n, true, true, false));            /* cluster */
>> assert(!pptt_entry_exists(ms, n, true,
>>              mc->smp_props.clusters_supported, true));             /* core    */
>>

Thanks,
Gavin
diff mbox series

Patch

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 4086879ebf..4b0f9df3e3 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -2002,86 +2002,62 @@  void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
                 const char *oem_id, const char *oem_table_id)
 {
     MachineClass *mc = MACHINE_GET_CLASS(ms);
-    GQueue *list = g_queue_new();
-    guint pptt_start = table_data->len;
-    guint parent_offset;
-    guint length, i;
-    int uid = 0;
-    int socket;
+    CPUArchIdList *cpus = ms->possible_cpus;
+    int64_t socket_id = -1, cluster_id = -1, core_id = -1;
+    uint32_t socket_offset, cluster_offset, core_offset;
+    uint32_t pptt_start = table_data->len;
+    int n;
     AcpiTable table = { .sig = "PPTT", .rev = 2,
                         .oem_id = oem_id, .oem_table_id = oem_table_id };
 
     acpi_table_begin(&table, table_data);
 
-    for (socket = 0; socket < ms->smp.sockets; socket++) {
-        g_queue_push_tail(list,
-            GUINT_TO_POINTER(table_data->len - pptt_start));
-        build_processor_hierarchy_node(
-            table_data,
-            /*
-             * Physical package - represents the boundary
-             * of a physical package
-             */
-            (1 << 0),
-            0, socket, NULL, 0);
-    }
+    for (n = 0; n < cpus->len; n++) {
+        if (cpus->cpus[n].props.socket_id != socket_id) {
+            socket_id = cpus->cpus[n].props.socket_id;
+            cluster_id = -1;
+            core_id = -1;
+            socket_offset = table_data->len - pptt_start;
+            build_processor_hierarchy_node(table_data,
+                (1 << 0), /* Physical package */
+                0, socket_id, NULL, 0);
+        }
 
-    if (mc->smp_props.clusters_supported) {
-        length = g_queue_get_length(list);
-        for (i = 0; i < length; i++) {
-            int cluster;
-
-            parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
-            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
-                g_queue_push_tail(list,
-                    GUINT_TO_POINTER(table_data->len - pptt_start));
-                build_processor_hierarchy_node(
-                    table_data,
-                    (0 << 0), /* not a physical package */
-                    parent_offset, cluster, NULL, 0);
+        if (mc->smp_props.clusters_supported) {
+            if (cpus->cpus[n].props.cluster_id != cluster_id) {
+                cluster_id = cpus->cpus[n].props.cluster_id;
+                core_id = -1;
+                cluster_offset = table_data->len - pptt_start;
+                build_processor_hierarchy_node(table_data,
+                    (0 << 0), /* Not a physical package */
+                    socket_offset, cluster_id, NULL, 0);
             }
+        } else {
+            cluster_offset = socket_offset;
         }
-    }
 
-    length = g_queue_get_length(list);
-    for (i = 0; i < length; i++) {
-        int core;
-
-        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
-        for (core = 0; core < ms->smp.cores; core++) {
-            if (ms->smp.threads > 1) {
-                g_queue_push_tail(list,
-                    GUINT_TO_POINTER(table_data->len - pptt_start));
-                build_processor_hierarchy_node(
-                    table_data,
+        if (ms->smp.threads <= 1) {
+            build_processor_hierarchy_node(table_data,
+                (1 << 1) | /* ACPI Processor ID valid */
+                (1 << 3),  /* Node is a Leaf */
+                cluster_offset, n, NULL, 0);
+        } else {
+            if (cpus->cpus[n].props.core_id != core_id) {
+                core_id = cpus->cpus[n].props.core_id;
+                core_offset = table_data->len - pptt_start;
+                build_processor_hierarchy_node(table_data,
                     (0 << 0), /* not a physical package */
-                    parent_offset, core, NULL, 0);
-            } else {
-                build_processor_hierarchy_node(
-                    table_data,
-                    (1 << 1) | /* ACPI Processor ID valid */
-                    (1 << 3),  /* Node is a Leaf */
-                    parent_offset, uid++, NULL, 0);
+                    cluster_offset, core_id, NULL, 0);
             }
-        }
-    }
-
-    length = g_queue_get_length(list);
-    for (i = 0; i < length; i++) {
-        int thread;
 
-        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
-        for (thread = 0; thread < ms->smp.threads; thread++) {
-            build_processor_hierarchy_node(
-                table_data,
+            build_processor_hierarchy_node(table_data,
                 (1 << 1) | /* ACPI Processor ID valid */
                 (1 << 2) | /* Processor is a Thread */
                 (1 << 3),  /* Node is a Leaf */
-                parent_offset, uid++, NULL, 0);
+                core_offset, n, NULL, 0);
         }
     }
 
-    g_queue_free(list);
     acpi_table_end(linker, &table);
 }