diff mbox series

[v6,5/6] spapr: move FORM1 verifications to post CAS

Message ID 20210910195539.797170-6-danielhb413@gmail.com (mailing list archive)
State New, archived
Headers show
Series pSeries FORM2 affinity support | expand

Commit Message

Daniel Henrique Barboza Sept. 10, 2021, 7:55 p.m. UTC
FORM2 NUMA affinity is prepared to deal with empty (memory/cpu less)
NUMA nodes. This is used by the DAX KMEM driver to locate a PAPR SCM
device that has a different latency than the original NUMA node from the
regular memory. FORM2 is also enable to deal with asymmetric NUMA
distances gracefully, something that our FORM1 implementation doesn't
do.

Move these FORM1 verifications to a new function and wait until after
CAS, when we're sure that we're sticking with FORM1, to enforce them.

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 hw/ppc/spapr.c              | 35 +----------------------
 hw/ppc/spapr_hcall.c        |  2 +-
 hw/ppc/spapr_numa.c         | 55 ++++++++++++++++++++++++++++++++-----
 include/hw/ppc/spapr_numa.h |  3 +-
 4 files changed, 52 insertions(+), 43 deletions(-)

Comments

Greg Kurz Sept. 14, 2021, 12:26 p.m. UTC | #1
On Fri, 10 Sep 2021 16:55:38 -0300
Daniel Henrique Barboza <danielhb413@gmail.com> wrote:

> FORM2 NUMA affinity is prepared to deal with empty (memory/cpu less)
> NUMA nodes. This is used by the DAX KMEM driver to locate a PAPR SCM
> device that has a different latency than the original NUMA node from the
> regular memory. FORM2 is also enable to deal with asymmetric NUMA

s/enable/able

> distances gracefully, something that our FORM1 implementation doesn't
> do.
> 
> Move these FORM1 verifications to a new function and wait until after
> CAS, when we're sure that we're sticking with FORM1, to enforce them.
> 
> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> ---
>  hw/ppc/spapr.c              | 35 +----------------------
>  hw/ppc/spapr_hcall.c        |  2 +-
>  hw/ppc/spapr_numa.c         | 55 ++++++++++++++++++++++++++++++++-----
>  include/hw/ppc/spapr_numa.h |  3 +-
>  4 files changed, 52 insertions(+), 43 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 5afbb76cab..0703a26093 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1798,7 +1798,7 @@ static int spapr_post_load(void *opaque, int version_id)
>       * through it, but since it's a lightweight operation it's worth being
>       * a little redundant to be safe.
>       */
> -     spapr_numa_associativity_reset(spapr);
> +     spapr_numa_associativity_reset(spapr, false);
>  
>      return err;
>  }
> @@ -2787,39 +2787,6 @@ static void spapr_machine_init(MachineState *machine)
>      /* init CPUs */
>      spapr_init_cpus(spapr);
>  
> -    /*
> -     * check we don't have a memory-less/cpu-less NUMA node
> -     * Firmware relies on the existing memory/cpu topology to provide the
> -     * NUMA topology to the kernel.
> -     * And the linux kernel needs to know the NUMA topology at start
> -     * to be able to hotplug CPUs later.
> -     */
> -    if (machine->numa_state->num_nodes) {
> -        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
> -            /* check for memory-less node */
> -            if (machine->numa_state->nodes[i].node_mem == 0) {
> -                CPUState *cs;
> -                int found = 0;
> -                /* check for cpu-less node */
> -                CPU_FOREACH(cs) {
> -                    PowerPCCPU *cpu = POWERPC_CPU(cs);
> -                    if (cpu->node_id == i) {
> -                        found = 1;
> -                        break;
> -                    }
> -                }
> -                /* memory-less and cpu-less node */
> -                if (!found) {
> -                    error_report(
> -                       "Memory-less/cpu-less nodes are not supported (node %d)",
> -                                 i);
> -                    exit(1);
> -                }
> -            }
> -        }
> -
> -    }
> -
>      spapr->gpu_numa_id = spapr_numa_initial_nvgpu_numa_id(machine);
>  
>      /* Init numa_assoc_array */
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 82ab92ddba..2dc22e2dc7 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -1202,7 +1202,7 @@ target_ulong do_client_architecture_support(PowerPCCPU *cpu,
>       * Reset numa_assoc_array now that we know which NUMA affinity
>       * the guest will use.
>       */
> -    spapr_numa_associativity_reset(spapr);
> +    spapr_numa_associativity_reset(spapr, true);
>  
>      /*
>       * Ensure the guest asks for an interrupt mode we support;
> diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
> index 7ad4b6582b..0ade63c2d3 100644
> --- a/hw/ppc/spapr_numa.c
> +++ b/hw/ppc/spapr_numa.c
> @@ -198,6 +198,48 @@ static void spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
>  
>  }
>  
> +static void spapr_numa_FORM1_affinity_check(MachineState *machine)
> +{
> +    int i;
> +
> +    /*
> +     * Check we don't have a memory-less/cpu-less NUMA node
> +     * Firmware relies on the existing memory/cpu topology to provide the
> +     * NUMA topology to the kernel.
> +     * And the linux kernel needs to know the NUMA topology at start
> +     * to be able to hotplug CPUs later.
> +     */
> +    if (machine->numa_state->num_nodes) {
> +        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
> +            /* check for memory-less node */
> +            if (machine->numa_state->nodes[i].node_mem == 0) {
> +                CPUState *cs;
> +                int found = 0;
> +                /* check for cpu-less node */
> +                CPU_FOREACH(cs) {
> +                    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +                    if (cpu->node_id == i) {
> +                        found = 1;
> +                        break;
> +                    }
> +                }
> +                /* memory-less and cpu-less node */
> +                if (!found) {
> +                    error_report(
> +"Memory-less/cpu-less nodes are not supported with FORM1 NUMA (node %d)", i);
> +                    exit(EXIT_FAILURE);
> +                }
> +            }
> +        }
> +    }
> +
> +    if (!spapr_numa_is_symmetrical(machine)) {
> +        error_report(
> +"Asymmetrical NUMA topologies aren't supported in the pSeries machine using FORM1 NUMA");
> +        exit(EXIT_FAILURE);
> +    }
> +}
> +
>  /*
>   * Set NUMA machine state data based on FORM1 affinity semantics.
>   */
> @@ -260,12 +302,6 @@ static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
>          return;
>      }
>  
> -    if (!spapr_numa_is_symmetrical(machine)) {
> -        error_report("Asymmetrical NUMA topologies aren't supported "
> -                     "in the pSeries machine");
> -        exit(EXIT_FAILURE);
> -    }
> -
>      spapr_numa_define_FORM1_domains(spapr);
>  }
>  
> @@ -287,10 +323,15 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr,
>      spapr->numa_assoc_array = spapr->FORM1_assoc_array;
>  }
>  
> -void spapr_numa_associativity_reset(SpaprMachineState *spapr)
> +void spapr_numa_associativity_reset(SpaprMachineState *spapr,
> +                                    bool post_CAS_check)

Coding style requires lower case for variable names and that's what
we already with other CAS related variables in the rest of the code.

Rest LGTM so with these remarks addressed :

Reviewed-by: Greg Kurz <groug@kaod.org>

>  {
>      /* No FORM2 affinity implemented yet */
>      spapr->numa_assoc_array = spapr->FORM1_assoc_array;
> +
> +    if (post_CAS_check) {
> +        spapr_numa_FORM1_affinity_check(MACHINE(spapr));
> +    }
>  }
>  
>  void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
> diff --git a/include/hw/ppc/spapr_numa.h b/include/hw/ppc/spapr_numa.h
> index ccf3e4eae8..246767d0a8 100644
> --- a/include/hw/ppc/spapr_numa.h
> +++ b/include/hw/ppc/spapr_numa.h
> @@ -24,7 +24,8 @@
>   */
>  void spapr_numa_associativity_init(SpaprMachineState *spapr,
>                                     MachineState *machine);
> -void spapr_numa_associativity_reset(SpaprMachineState *spapr);
> +void spapr_numa_associativity_reset(SpaprMachineState *spapr,
> +                                    bool post_CAS_check);
>  void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas);
>  void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
>                                         int offset, int nodeid);
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 5afbb76cab..0703a26093 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1798,7 +1798,7 @@  static int spapr_post_load(void *opaque, int version_id)
      * through it, but since it's a lightweight operation it's worth being
      * a little redundant to be safe.
      */
-     spapr_numa_associativity_reset(spapr);
+     spapr_numa_associativity_reset(spapr, false);
 
     return err;
 }
@@ -2787,39 +2787,6 @@  static void spapr_machine_init(MachineState *machine)
     /* init CPUs */
     spapr_init_cpus(spapr);
 
-    /*
-     * check we don't have a memory-less/cpu-less NUMA node
-     * Firmware relies on the existing memory/cpu topology to provide the
-     * NUMA topology to the kernel.
-     * And the linux kernel needs to know the NUMA topology at start
-     * to be able to hotplug CPUs later.
-     */
-    if (machine->numa_state->num_nodes) {
-        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
-            /* check for memory-less node */
-            if (machine->numa_state->nodes[i].node_mem == 0) {
-                CPUState *cs;
-                int found = 0;
-                /* check for cpu-less node */
-                CPU_FOREACH(cs) {
-                    PowerPCCPU *cpu = POWERPC_CPU(cs);
-                    if (cpu->node_id == i) {
-                        found = 1;
-                        break;
-                    }
-                }
-                /* memory-less and cpu-less node */
-                if (!found) {
-                    error_report(
-                       "Memory-less/cpu-less nodes are not supported (node %d)",
-                                 i);
-                    exit(1);
-                }
-            }
-        }
-
-    }
-
     spapr->gpu_numa_id = spapr_numa_initial_nvgpu_numa_id(machine);
 
     /* Init numa_assoc_array */
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 82ab92ddba..2dc22e2dc7 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1202,7 +1202,7 @@  target_ulong do_client_architecture_support(PowerPCCPU *cpu,
      * Reset numa_assoc_array now that we know which NUMA affinity
      * the guest will use.
      */
-    spapr_numa_associativity_reset(spapr);
+    spapr_numa_associativity_reset(spapr, true);
 
     /*
      * Ensure the guest asks for an interrupt mode we support;
diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 7ad4b6582b..0ade63c2d3 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -198,6 +198,48 @@  static void spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
 
 }
 
+static void spapr_numa_FORM1_affinity_check(MachineState *machine)
+{
+    int i;
+
+    /*
+     * Check we don't have a memory-less/cpu-less NUMA node
+     * Firmware relies on the existing memory/cpu topology to provide the
+     * NUMA topology to the kernel.
+     * And the linux kernel needs to know the NUMA topology at start
+     * to be able to hotplug CPUs later.
+     */
+    if (machine->numa_state->num_nodes) {
+        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
+            /* check for memory-less node */
+            if (machine->numa_state->nodes[i].node_mem == 0) {
+                CPUState *cs;
+                int found = 0;
+                /* check for cpu-less node */
+                CPU_FOREACH(cs) {
+                    PowerPCCPU *cpu = POWERPC_CPU(cs);
+                    if (cpu->node_id == i) {
+                        found = 1;
+                        break;
+                    }
+                }
+                /* memory-less and cpu-less node */
+                if (!found) {
+                    error_report(
+"Memory-less/cpu-less nodes are not supported with FORM1 NUMA (node %d)", i);
+                    exit(EXIT_FAILURE);
+                }
+            }
+        }
+    }
+
+    if (!spapr_numa_is_symmetrical(machine)) {
+        error_report(
+"Asymmetrical NUMA topologies aren't supported in the pSeries machine using FORM1 NUMA");
+        exit(EXIT_FAILURE);
+    }
+}
+
 /*
  * Set NUMA machine state data based on FORM1 affinity semantics.
  */
@@ -260,12 +302,6 @@  static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
         return;
     }
 
-    if (!spapr_numa_is_symmetrical(machine)) {
-        error_report("Asymmetrical NUMA topologies aren't supported "
-                     "in the pSeries machine");
-        exit(EXIT_FAILURE);
-    }
-
     spapr_numa_define_FORM1_domains(spapr);
 }
 
@@ -287,10 +323,15 @@  void spapr_numa_associativity_init(SpaprMachineState *spapr,
     spapr->numa_assoc_array = spapr->FORM1_assoc_array;
 }
 
-void spapr_numa_associativity_reset(SpaprMachineState *spapr)
+void spapr_numa_associativity_reset(SpaprMachineState *spapr,
+                                    bool post_CAS_check)
 {
     /* No FORM2 affinity implemented yet */
     spapr->numa_assoc_array = spapr->FORM1_assoc_array;
+
+    if (post_CAS_check) {
+        spapr_numa_FORM1_affinity_check(MACHINE(spapr));
+    }
 }
 
 void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
diff --git a/include/hw/ppc/spapr_numa.h b/include/hw/ppc/spapr_numa.h
index ccf3e4eae8..246767d0a8 100644
--- a/include/hw/ppc/spapr_numa.h
+++ b/include/hw/ppc/spapr_numa.h
@@ -24,7 +24,8 @@ 
  */
 void spapr_numa_associativity_init(SpaprMachineState *spapr,
                                    MachineState *machine);
-void spapr_numa_associativity_reset(SpaprMachineState *spapr);
+void spapr_numa_associativity_reset(SpaprMachineState *spapr,
+                                    bool post_CAS_check);
 void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas);
 void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
                                        int offset, int nodeid);