diff mbox series

[v3,3/4] hw/acpi/aml-build: Use existing CPU topology to build PPTT table

Message ID 20220323072438.71815-4-gshan@redhat.com (mailing list archive)
State New, archived
Headers show
Series hw/arm/virt: Fix CPU's default NUMA node ID | expand

Commit Message

Gavin Shan March 23, 2022, 7:24 a.m. UTC
When the PPTT table is built, the CPU topology is re-calculated, but
it's unecessary because the CPU topology has been populated in
virt_possible_cpu_arch_ids() on arm/virt machine.

This avoids to re-calculate the CPU topology by reusing the existing
one in ms->possible_cpus. Currently, the only user of build_pptt() is
arm/virt machine.

Signed-off-by: Gavin Shan <gshan@redhat.com>
---
 hw/acpi/aml-build.c | 96 +++++++++++++++++++++++++++++++++------------
 1 file changed, 72 insertions(+), 24 deletions(-)

Comments

Igor Mammedov March 30, 2022, 2:10 p.m. UTC | #1
On Wed, 23 Mar 2022 15:24:37 +0800
Gavin Shan <gshan@redhat.com> wrote:

> When the PPTT table is built, the CPU topology is re-calculated, but
> it's unecessary because the CPU topology has been populated in
> virt_possible_cpu_arch_ids() on arm/virt machine.
> 
> This avoids to re-calculate the CPU topology by reusing the existing
> one in ms->possible_cpus. Currently, the only user of build_pptt() is
> arm/virt machine.
> 
> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
>  hw/acpi/aml-build.c | 96 +++++++++++++++++++++++++++++++++------------
>  1 file changed, 72 insertions(+), 24 deletions(-)
> 
> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> index 4086879ebf..10a2d63b96 100644
> --- a/hw/acpi/aml-build.c
> +++ b/hw/acpi/aml-build.c
> @@ -2002,18 +2002,27 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>                  const char *oem_id, const char *oem_table_id)
>  {
>      MachineClass *mc = MACHINE_GET_CLASS(ms);
> +    CPUArchIdList *cpus = ms->possible_cpus;
> +    GQueue *socket_list = g_queue_new();
> +    GQueue *cluster_list = g_queue_new();
> +    GQueue *core_list = g_queue_new();
>      GQueue *list = g_queue_new();
>      guint pptt_start = table_data->len;
>      guint parent_offset;
>      guint length, i;
> -    int uid = 0;
> -    int socket;
> +    int n, socket_id, cluster_id, core_id, thread_id;
>      AcpiTable table = { .sig = "PPTT", .rev = 2,
>                          .oem_id = oem_id, .oem_table_id = oem_table_id };
>  
>      acpi_table_begin(&table, table_data);
>  
> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
> +    for (n = 0; n < cpus->len; n++) {
> +        socket_id = cpus->cpus[n].props.socket_id;
> +        if (g_queue_find(socket_list, GUINT_TO_POINTER(socket_id))) {
> +            continue;
> +        }

maybe instead of scanning cpus[n] every time for each topology level
and trying to keep code flattened (which mimics PPTT fattened tree
table for not much of the reason, spec doesn't require entries
from the same level to e described contiguously),
try to rebuild hierarchy tree from flat cpus[n] in 1 pass first
and then use nested loops or recursion to build PPTT table,
something like:

 sockets = cpus_to_topo(possible) 
 build_pptt_level(items = sockets, parent_ref = 0)
  for item in items
     level_ref = table_data->len - pptt_start
     build_processor_hierarchy_node(item {id, flags, ...}, parent_ref)
     if not leaf:
        build_pptt_level(item, level_ref)

which is much more compact and easier to read compared to
unrolled impl. it's now with all push/pop stack emulation.


> +
> +        g_queue_push_tail(socket_list, GUINT_TO_POINTER(socket_id));
>          g_queue_push_tail(list,
>              GUINT_TO_POINTER(table_data->len - pptt_start));
>          build_processor_hierarchy_node(
> @@ -2023,65 +2032,104 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>               * of a physical package
>               */
>              (1 << 0),
> -            0, socket, NULL, 0);
> +            0, socket_id, NULL, 0);
>      }
>  
>      if (mc->smp_props.clusters_supported) {
>          length = g_queue_get_length(list);
>          for (i = 0; i < length; i++) {
> -            int cluster;
> -
>              parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
> -            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
> +            socket_id = GPOINTER_TO_UINT(g_queue_pop_head(socket_list));
> +
> +            for (n = 0; n < cpus->len; n++) {
> +                if (cpus->cpus[n].props.socket_id != socket_id) {
> +                    continue;
> +                }
> +
> +                cluster_id = cpus->cpus[n].props.cluster_id;
> +                if (g_queue_find(cluster_list, GUINT_TO_POINTER(cluster_id))) {
> +                    continue;
> +                }
> +
> +                g_queue_push_tail(cluster_list, GUINT_TO_POINTER(cluster_id));
>                  g_queue_push_tail(list,
>                      GUINT_TO_POINTER(table_data->len - pptt_start));
>                  build_processor_hierarchy_node(
>                      table_data,
>                      (0 << 0), /* not a physical package */
> -                    parent_offset, cluster, NULL, 0);
> +                    parent_offset, cluster_id, NULL, 0);
>              }
>          }
>      }
>  
>      length = g_queue_get_length(list);
>      for (i = 0; i < length; i++) {
> -        int core;
> -
>          parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
> -        for (core = 0; core < ms->smp.cores; core++) {
> -            if (ms->smp.threads > 1) {
> -                g_queue_push_tail(list,
> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
> -                build_processor_hierarchy_node(
> -                    table_data,
> -                    (0 << 0), /* not a physical package */
> -                    parent_offset, core, NULL, 0);
> -            } else {
> +        if (!mc->smp_props.clusters_supported) {
> +            socket_id = GPOINTER_TO_UINT(g_queue_pop_head(socket_list));
> +        } else {
> +            cluster_id = GPOINTER_TO_UINT(g_queue_pop_head(cluster_list));
> +        }
> +
> +        for (n = 0; n < cpus->len; n++) {
> +            if (!mc->smp_props.clusters_supported &&
> +                cpus->cpus[n].props.socket_id != socket_id) {
> +                continue;
> +            }
> +
> +            if (mc->smp_props.clusters_supported &&
> +                cpus->cpus[n].props.cluster_id != cluster_id) {
> +                continue;
> +            }
> +
> +            core_id = cpus->cpus[n].props.core_id;
> +            if (ms->smp.threads <= 1) {
>                  build_processor_hierarchy_node(
>                      table_data,
>                      (1 << 1) | /* ACPI Processor ID valid */
>                      (1 << 3),  /* Node is a Leaf */
> -                    parent_offset, uid++, NULL, 0);
> +                    parent_offset, core_id, NULL, 0);
> +                continue;
> +            }
> +
> +            if (g_queue_find(core_list, GUINT_TO_POINTER(core_id))) {
> +                continue;
>              }
> +
> +            g_queue_push_tail(core_list, GUINT_TO_POINTER(core_id));
> +            g_queue_push_tail(list,
> +                GUINT_TO_POINTER(table_data->len - pptt_start));
> +            build_processor_hierarchy_node(
> +                table_data,
> +                (0 << 0), /* not a physical package */
> +                parent_offset, core_id, NULL, 0);
>          }
>      }
>  
>      length = g_queue_get_length(list);
>      for (i = 0; i < length; i++) {
> -        int thread;
> -
>          parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
> -        for (thread = 0; thread < ms->smp.threads; thread++) {
> +        core_id = GPOINTER_TO_UINT(g_queue_pop_head(core_list));
> +
> +        for (n = 0; n < cpus->len; n++) {
> +            if (cpus->cpus[n].props.core_id != core_id) {
> +                continue;
> +            }
> +
> +            thread_id = cpus->cpus[n].props.thread_id;
>              build_processor_hierarchy_node(
>                  table_data,
>                  (1 << 1) | /* ACPI Processor ID valid */
>                  (1 << 2) | /* Processor is a Thread */
>                  (1 << 3),  /* Node is a Leaf */
> -                parent_offset, uid++, NULL, 0);
> +                parent_offset, thread_id, NULL, 0);
>          }
>      }
>  
>      g_queue_free(list);
> +    g_queue_free(core_list);
> +    g_queue_free(cluster_list);
> +    g_queue_free(socket_list);
>      acpi_table_end(linker, &table);
>  }
>
Gavin Shan April 3, 2022, 2:40 p.m. UTC | #2
Hi Igor,

On 3/30/22 10:10 PM, Igor Mammedov wrote:
> On Wed, 23 Mar 2022 15:24:37 +0800
> Gavin Shan <gshan@redhat.com> wrote:
> 
>> When the PPTT table is built, the CPU topology is re-calculated, but
>> it's unecessary because the CPU topology has been populated in
>> virt_possible_cpu_arch_ids() on arm/virt machine.
>>
>> This avoids to re-calculate the CPU topology by reusing the existing
>> one in ms->possible_cpus. Currently, the only user of build_pptt() is
>> arm/virt machine.
>>
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> ---
>>   hw/acpi/aml-build.c | 96 +++++++++++++++++++++++++++++++++------------
>>   1 file changed, 72 insertions(+), 24 deletions(-)
>>
>> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
>> index 4086879ebf..10a2d63b96 100644
>> --- a/hw/acpi/aml-build.c
>> +++ b/hw/acpi/aml-build.c
>> @@ -2002,18 +2002,27 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>>                   const char *oem_id, const char *oem_table_id)
>>   {
>>       MachineClass *mc = MACHINE_GET_CLASS(ms);
>> +    CPUArchIdList *cpus = ms->possible_cpus;
>> +    GQueue *socket_list = g_queue_new();
>> +    GQueue *cluster_list = g_queue_new();
>> +    GQueue *core_list = g_queue_new();
>>       GQueue *list = g_queue_new();
>>       guint pptt_start = table_data->len;
>>       guint parent_offset;
>>       guint length, i;
>> -    int uid = 0;
>> -    int socket;
>> +    int n, socket_id, cluster_id, core_id, thread_id;
>>       AcpiTable table = { .sig = "PPTT", .rev = 2,
>>                           .oem_id = oem_id, .oem_table_id = oem_table_id };
>>   
>>       acpi_table_begin(&table, table_data);
>>   
>> -    for (socket = 0; socket < ms->smp.sockets; socket++) {
>> +    for (n = 0; n < cpus->len; n++) {
>> +        socket_id = cpus->cpus[n].props.socket_id;
>> +        if (g_queue_find(socket_list, GUINT_TO_POINTER(socket_id))) {
>> +            continue;
>> +        }
> 
> maybe instead of scanning cpus[n] every time for each topology level
> and trying to keep code flattened (which mimics PPTT fattened tree
> table for not much of the reason, spec doesn't require entries
> from the same level to e described contiguously),
> try to rebuild hierarchy tree from flat cpus[n] in 1 pass first
> and then use nested loops or recursion to build PPTT table,
> something like:
> 
>   sockets = cpus_to_topo(possible)
>   build_pptt_level(items = sockets, parent_ref = 0)
>    for item in items
>       level_ref = table_data->len - pptt_start
>       build_processor_hierarchy_node(item {id, flags, ...}, parent_ref)
>       if not leaf:
>          build_pptt_level(item, level_ref)
> 
> which is much more compact and easier to read compared to
> unrolled impl. it's now with all push/pop stack emulation.
> 

I missed your comments when v4 was posted. Sorry about this. I'm using
thunderbird mail client and have some filters running to put incoming
mails into the corresponding folders, but this reply has been put into
wrong folder. It's why I always copy my private email while sending
patches and emails. Please ignore v4 and review v5 directly.

Thanks for the suggestion, but it's going to introduce duplicate entries
for same socket/cluster/core, or I missed something. Otherwise, the
CPUs need to be iterated to check if they're in the corresponding level.

In order to make it simplified and remove the stack emulation stuff,
I will introduce variables to track the socket/cluster/core IDs whose
ACPI table entries have been added. Once the socket, cluster or core ID
changes while iterating 'ms->possible_cpus', the corresponding ACPI table
entry is added and the IDs for child levels are invalidated. With this,
all needed ACPI table entries will be created in one loop on 'ms->possible_cpus'

The changes will be included to v5, which will be posted shortly.

Thanks,
Gavin

> 
>> +
>> +        g_queue_push_tail(socket_list, GUINT_TO_POINTER(socket_id));
>>           g_queue_push_tail(list,
>>               GUINT_TO_POINTER(table_data->len - pptt_start));
>>           build_processor_hierarchy_node(
>> @@ -2023,65 +2032,104 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
>>                * of a physical package
>>                */
>>               (1 << 0),
>> -            0, socket, NULL, 0);
>> +            0, socket_id, NULL, 0);
>>       }
>>   
>>       if (mc->smp_props.clusters_supported) {
>>           length = g_queue_get_length(list);
>>           for (i = 0; i < length; i++) {
>> -            int cluster;
>> -
>>               parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>> -            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
>> +            socket_id = GPOINTER_TO_UINT(g_queue_pop_head(socket_list));
>> +
>> +            for (n = 0; n < cpus->len; n++) {
>> +                if (cpus->cpus[n].props.socket_id != socket_id) {
>> +                    continue;
>> +                }
>> +
>> +                cluster_id = cpus->cpus[n].props.cluster_id;
>> +                if (g_queue_find(cluster_list, GUINT_TO_POINTER(cluster_id))) {
>> +                    continue;
>> +                }
>> +
>> +                g_queue_push_tail(cluster_list, GUINT_TO_POINTER(cluster_id));
>>                   g_queue_push_tail(list,
>>                       GUINT_TO_POINTER(table_data->len - pptt_start));
>>                   build_processor_hierarchy_node(
>>                       table_data,
>>                       (0 << 0), /* not a physical package */
>> -                    parent_offset, cluster, NULL, 0);
>> +                    parent_offset, cluster_id, NULL, 0);
>>               }
>>           }
>>       }
>>   
>>       length = g_queue_get_length(list);
>>       for (i = 0; i < length; i++) {
>> -        int core;
>> -
>>           parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>> -        for (core = 0; core < ms->smp.cores; core++) {
>> -            if (ms->smp.threads > 1) {
>> -                g_queue_push_tail(list,
>> -                    GUINT_TO_POINTER(table_data->len - pptt_start));
>> -                build_processor_hierarchy_node(
>> -                    table_data,
>> -                    (0 << 0), /* not a physical package */
>> -                    parent_offset, core, NULL, 0);
>> -            } else {
>> +        if (!mc->smp_props.clusters_supported) {
>> +            socket_id = GPOINTER_TO_UINT(g_queue_pop_head(socket_list));
>> +        } else {
>> +            cluster_id = GPOINTER_TO_UINT(g_queue_pop_head(cluster_list));
>> +        }
>> +
>> +        for (n = 0; n < cpus->len; n++) {
>> +            if (!mc->smp_props.clusters_supported &&
>> +                cpus->cpus[n].props.socket_id != socket_id) {
>> +                continue;
>> +            }
>> +
>> +            if (mc->smp_props.clusters_supported &&
>> +                cpus->cpus[n].props.cluster_id != cluster_id) {
>> +                continue;
>> +            }
>> +
>> +            core_id = cpus->cpus[n].props.core_id;
>> +            if (ms->smp.threads <= 1) {
>>                   build_processor_hierarchy_node(
>>                       table_data,
>>                       (1 << 1) | /* ACPI Processor ID valid */
>>                       (1 << 3),  /* Node is a Leaf */
>> -                    parent_offset, uid++, NULL, 0);
>> +                    parent_offset, core_id, NULL, 0);
>> +                continue;
>> +            }
>> +
>> +            if (g_queue_find(core_list, GUINT_TO_POINTER(core_id))) {
>> +                continue;
>>               }
>> +
>> +            g_queue_push_tail(core_list, GUINT_TO_POINTER(core_id));
>> +            g_queue_push_tail(list,
>> +                GUINT_TO_POINTER(table_data->len - pptt_start));
>> +            build_processor_hierarchy_node(
>> +                table_data,
>> +                (0 << 0), /* not a physical package */
>> +                parent_offset, core_id, NULL, 0);
>>           }
>>       }
>>   
>>       length = g_queue_get_length(list);
>>       for (i = 0; i < length; i++) {
>> -        int thread;
>> -
>>           parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
>> -        for (thread = 0; thread < ms->smp.threads; thread++) {
>> +        core_id = GPOINTER_TO_UINT(g_queue_pop_head(core_list));
>> +
>> +        for (n = 0; n < cpus->len; n++) {
>> +            if (cpus->cpus[n].props.core_id != core_id) {
>> +                continue;
>> +            }
>> +
>> +            thread_id = cpus->cpus[n].props.thread_id;
>>               build_processor_hierarchy_node(
>>                   table_data,
>>                   (1 << 1) | /* ACPI Processor ID valid */
>>                   (1 << 2) | /* Processor is a Thread */
>>                   (1 << 3),  /* Node is a Leaf */
>> -                parent_offset, uid++, NULL, 0);
>> +                parent_offset, thread_id, NULL, 0);
>>           }
>>       }
>>   
>>       g_queue_free(list);
>> +    g_queue_free(core_list);
>> +    g_queue_free(cluster_list);
>> +    g_queue_free(socket_list);
>>       acpi_table_end(linker, &table);
>>   }
>>   
>
diff mbox series

Patch

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 4086879ebf..10a2d63b96 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -2002,18 +2002,27 @@  void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
                 const char *oem_id, const char *oem_table_id)
 {
     MachineClass *mc = MACHINE_GET_CLASS(ms);
+    CPUArchIdList *cpus = ms->possible_cpus;
+    GQueue *socket_list = g_queue_new();
+    GQueue *cluster_list = g_queue_new();
+    GQueue *core_list = g_queue_new();
     GQueue *list = g_queue_new();
     guint pptt_start = table_data->len;
     guint parent_offset;
     guint length, i;
-    int uid = 0;
-    int socket;
+    int n, socket_id, cluster_id, core_id, thread_id;
     AcpiTable table = { .sig = "PPTT", .rev = 2,
                         .oem_id = oem_id, .oem_table_id = oem_table_id };
 
     acpi_table_begin(&table, table_data);
 
-    for (socket = 0; socket < ms->smp.sockets; socket++) {
+    for (n = 0; n < cpus->len; n++) {
+        socket_id = cpus->cpus[n].props.socket_id;
+        if (g_queue_find(socket_list, GUINT_TO_POINTER(socket_id))) {
+            continue;
+        }
+
+        g_queue_push_tail(socket_list, GUINT_TO_POINTER(socket_id));
         g_queue_push_tail(list,
             GUINT_TO_POINTER(table_data->len - pptt_start));
         build_processor_hierarchy_node(
@@ -2023,65 +2032,104 @@  void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
              * of a physical package
              */
             (1 << 0),
-            0, socket, NULL, 0);
+            0, socket_id, NULL, 0);
     }
 
     if (mc->smp_props.clusters_supported) {
         length = g_queue_get_length(list);
         for (i = 0; i < length; i++) {
-            int cluster;
-
             parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
-            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
+            socket_id = GPOINTER_TO_UINT(g_queue_pop_head(socket_list));
+
+            for (n = 0; n < cpus->len; n++) {
+                if (cpus->cpus[n].props.socket_id != socket_id) {
+                    continue;
+                }
+
+                cluster_id = cpus->cpus[n].props.cluster_id;
+                if (g_queue_find(cluster_list, GUINT_TO_POINTER(cluster_id))) {
+                    continue;
+                }
+
+                g_queue_push_tail(cluster_list, GUINT_TO_POINTER(cluster_id));
                 g_queue_push_tail(list,
                     GUINT_TO_POINTER(table_data->len - pptt_start));
                 build_processor_hierarchy_node(
                     table_data,
                     (0 << 0), /* not a physical package */
-                    parent_offset, cluster, NULL, 0);
+                    parent_offset, cluster_id, NULL, 0);
             }
         }
     }
 
     length = g_queue_get_length(list);
     for (i = 0; i < length; i++) {
-        int core;
-
         parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
-        for (core = 0; core < ms->smp.cores; core++) {
-            if (ms->smp.threads > 1) {
-                g_queue_push_tail(list,
-                    GUINT_TO_POINTER(table_data->len - pptt_start));
-                build_processor_hierarchy_node(
-                    table_data,
-                    (0 << 0), /* not a physical package */
-                    parent_offset, core, NULL, 0);
-            } else {
+        if (!mc->smp_props.clusters_supported) {
+            socket_id = GPOINTER_TO_UINT(g_queue_pop_head(socket_list));
+        } else {
+            cluster_id = GPOINTER_TO_UINT(g_queue_pop_head(cluster_list));
+        }
+
+        for (n = 0; n < cpus->len; n++) {
+            if (!mc->smp_props.clusters_supported &&
+                cpus->cpus[n].props.socket_id != socket_id) {
+                continue;
+            }
+
+            if (mc->smp_props.clusters_supported &&
+                cpus->cpus[n].props.cluster_id != cluster_id) {
+                continue;
+            }
+
+            core_id = cpus->cpus[n].props.core_id;
+            if (ms->smp.threads <= 1) {
                 build_processor_hierarchy_node(
                     table_data,
                     (1 << 1) | /* ACPI Processor ID valid */
                     (1 << 3),  /* Node is a Leaf */
-                    parent_offset, uid++, NULL, 0);
+                    parent_offset, core_id, NULL, 0);
+                continue;
+            }
+
+            if (g_queue_find(core_list, GUINT_TO_POINTER(core_id))) {
+                continue;
             }
+
+            g_queue_push_tail(core_list, GUINT_TO_POINTER(core_id));
+            g_queue_push_tail(list,
+                GUINT_TO_POINTER(table_data->len - pptt_start));
+            build_processor_hierarchy_node(
+                table_data,
+                (0 << 0), /* not a physical package */
+                parent_offset, core_id, NULL, 0);
         }
     }
 
     length = g_queue_get_length(list);
     for (i = 0; i < length; i++) {
-        int thread;
-
         parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
-        for (thread = 0; thread < ms->smp.threads; thread++) {
+        core_id = GPOINTER_TO_UINT(g_queue_pop_head(core_list));
+
+        for (n = 0; n < cpus->len; n++) {
+            if (cpus->cpus[n].props.core_id != core_id) {
+                continue;
+            }
+
+            thread_id = cpus->cpus[n].props.thread_id;
             build_processor_hierarchy_node(
                 table_data,
                 (1 << 1) | /* ACPI Processor ID valid */
                 (1 << 2) | /* Processor is a Thread */
                 (1 << 3),  /* Node is a Leaf */
-                parent_offset, uid++, NULL, 0);
+                parent_offset, thread_id, NULL, 0);
         }
     }
 
     g_queue_free(list);
+    g_queue_free(core_list);
+    g_queue_free(cluster_list);
+    g_queue_free(socket_list);
     acpi_table_end(linker, &table);
 }