diff mbox series

[4/4] spapr: Fold spapr_node0_size() into its only caller

Message ID 20200313040539.819138-5-david@gibson.dropbear.id.au
State New, archived
Headers show
Series spapr: Assorted minor cleanups | expand

Commit Message

David Gibson March 13, 2020, 4:05 a.m. UTC
The Real Mode Area (RMA) needs to fit within the NUMA node owning memory
at address 0.  That's usually node 0, but can be a later one if there are
some nodes which have no memory (only CPUs).

This is currently handled by the spapr_node0_size() helper.  It has only
one caller, so there's not a lot of point splitting it out.  It's also
extremely easy to misread the code as clamping to the size of the smallest
node rather than the first node with any memory.

So, fold it into the caller, and add some commentary to make it a bit
clearer exactly what it's doing.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr.c | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

Comments

Greg Kurz March 13, 2020, 9:33 a.m. UTC | #1
On Fri, 13 Mar 2020 15:05:39 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> The Real Mode Area (RMA) needs to fit within the NUMA node owning memory
> at address 0.  That's usually node 0, but can be a later one if there are
> some nodes which have no memory (only CPUs).
> 
> This is currently handled by the spapr_node0_size() helper.  It has only
> one caller, so there's not a lot of point splitting it out.  It's also
> extremely easy to misread the code as clamping to the size of the smallest
> node rather than the first node with any memory.
> 
> So, fold it into the caller, and add some commentary to make it a bit
> clearer exactly what it's doing.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/ppc/spapr.c | 37 +++++++++++++++++++++----------------
>  1 file changed, 21 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 6c32ec3c0a..6a42c0f1c9 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -295,20 +295,6 @@ static void spapr_dt_pa_features(SpaprMachineState *spapr,
>      _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
>  }
>  
> -static hwaddr spapr_node0_size(MachineState *machine)
> -{
> -    if (machine->numa_state->num_nodes) {
> -        int i;
> -        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
> -            if (machine->numa_state->nodes[i].node_mem) {
> -                return MIN(pow2floor(machine->numa_state->nodes[i].node_mem),
> -                           machine->ram_size);
> -            }
> -        }
> -    }
> -    return machine->ram_size;
> -}
> -
>  static void add_str(GString *s, const gchar *s1)
>  {
>      g_string_append_len(s, s1, strlen(s1) + 1);
> @@ -2631,10 +2617,24 @@ static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
>      MachineState *machine = MACHINE(spapr);
>      SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
>      hwaddr rma_size = machine->ram_size;
> -    hwaddr node0_size = spapr_node0_size(machine);
>  
>      /* RMA has to fit in the first NUMA node */
> -    rma_size = MIN(rma_size, node0_size);
> +    if (machine->numa_state->num_nodes) {
> +        /*
> +         * It's possible for there to be some zero-memory nodes first
> +         * in the list.  We need the RMA to fit inside the memory of
> +         * the first node which actually has some memory.
> +         */
> +        int i;
> +
> +        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
> +            if (machine->numa_state->nodes[i].node_mem != 0) {
> +                hwaddr node_size = machine->numa_state->nodes[i].node_mem;
> +                rma_size = MIN(rma_size, pow2floor(node_size));
> +                break;
> +            }
> +        }
> +    }
>  
>      /*
>       * VRMA access is via a special 1TiB SLB mapping, so the RMA can
> @@ -2651,6 +2651,11 @@ static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
>          rma_size = MIN(rma_size, smc->rma_limit);
>      }
>  
> +    /*
> +     * RMA size must be a power of 2
> +     */
> +    rma_size = pow2floor(rma_size);
> +

The patch is identical to the last spin, for which I had
a comment already:

-----------------------------------------------------------------------
On Wed, 4 Mar 2020 12:25:55 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:
> On Tue, Mar 03, 2020 at 11:32:49AM +0100, Greg Kurz wrote:

[...]

> > In any case, it would probably help to mention somewhere
> > why the rounding is introduced by this patch.
> 
> Drat.  I meant to sort out your comment on the last spin better than
> this, but got part way through and forgot what I was doing.
>
-----------------------------------------------------------------------

I still think that the rounding introduced by this patch deserves
some explanations in the changelog...

>      if (rma_size < MIN_RMA_SLOF) {
>          error_setg(errp,
>  "pSeries SLOF firmware requires >= %ldMiB guest RMA (Real Mode Area memory)",
David Gibson March 16, 2020, 2:55 a.m. UTC | #2
On Fri, Mar 13, 2020 at 10:33:30AM +0100, Greg Kurz wrote:
> On Fri, 13 Mar 2020 15:05:39 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > The Real Mode Area (RMA) needs to fit within the NUMA node owning memory
> > at address 0.  That's usually node 0, but can be a later one if there are
> > some nodes which have no memory (only CPUs).
> > 
> > This is currently handled by the spapr_node0_size() helper.  It has only
> > one caller, so there's not a lot of point splitting it out.  It's also
> > extremely easy to misread the code as clamping to the size of the smallest
> > node rather than the first node with any memory.
> > 
> > So, fold it into the caller, and add some commentary to make it a bit
> > clearer exactly what it's doing.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  hw/ppc/spapr.c | 37 +++++++++++++++++++++----------------
> >  1 file changed, 21 insertions(+), 16 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 6c32ec3c0a..6a42c0f1c9 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -295,20 +295,6 @@ static void spapr_dt_pa_features(SpaprMachineState *spapr,
> >      _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
> >  }
> >  
> > -static hwaddr spapr_node0_size(MachineState *machine)
> > -{
> > -    if (machine->numa_state->num_nodes) {
> > -        int i;
> > -        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
> > -            if (machine->numa_state->nodes[i].node_mem) {
> > -                return MIN(pow2floor(machine->numa_state->nodes[i].node_mem),
> > -                           machine->ram_size);
> > -            }
> > -        }
> > -    }
> > -    return machine->ram_size;
> > -}
> > -
> >  static void add_str(GString *s, const gchar *s1)
> >  {
> >      g_string_append_len(s, s1, strlen(s1) + 1);
> > @@ -2631,10 +2617,24 @@ static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
> >      MachineState *machine = MACHINE(spapr);
> >      SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> >      hwaddr rma_size = machine->ram_size;
> > -    hwaddr node0_size = spapr_node0_size(machine);
> >  
> >      /* RMA has to fit in the first NUMA node */
> > -    rma_size = MIN(rma_size, node0_size);
> > +    if (machine->numa_state->num_nodes) {
> > +        /*
> > +         * It's possible for there to be some zero-memory nodes first
> > +         * in the list.  We need the RMA to fit inside the memory of
> > +         * the first node which actually has some memory.
> > +         */
> > +        int i;
> > +
> > +        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
> > +            if (machine->numa_state->nodes[i].node_mem != 0) {
> > +                hwaddr node_size = machine->numa_state->nodes[i].node_mem;
> > +                rma_size = MIN(rma_size, pow2floor(node_size));
> > +                break;
> > +            }
> > +        }
> > +    }
> >  
> >      /*
> >       * VRMA access is via a special 1TiB SLB mapping, so the RMA can
> > @@ -2651,6 +2651,11 @@ static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
> >          rma_size = MIN(rma_size, smc->rma_limit);
> >      }
> >  
> > +    /*
> > +     * RMA size must be a power of 2
> > +     */
> > +    rma_size = pow2floor(rma_size);
> > +
> 
> The patch is identical to the last spin, for which I had
> a comment already:

Ah, dangit.  It's actually not identical.  I put the pow2floor() back
in the node loop, but forgot to remove it from the bottom here.  I'll
put this one off for now.

> -----------------------------------------------------------------------
> On Wed, 4 Mar 2020 12:25:55 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> > On Tue, Mar 03, 2020 at 11:32:49AM +0100, Greg Kurz wrote:
> 
> [...]
> 
> > > In any case, it would probably help to mention somewhere
> > > why the rounding is introduced by this patch.
> > 
> > Drat.  I meant to sort out your comment on the last spin better than
> > this, but got part way through and forgot what I was doing.
> >
> -----------------------------------------------------------------------
> 
> I still think that the rounding introduced by this patch deserves
> some explanations in the changelog...

Yeah.. turns out this is more complicated than you'd think.  It is
indeed related to that block splitting you noticed.  Except that
AFAICT that block splitting is irrelevant in most cases, and broken
for most of the remaining cases.  It seems to have been based on a
misunderstanding of how the kernel's early memory block handling
works.

My intention had been to make this patch do just the cleanup without
the rounding change, then address the rounding change another day.
Except I messed up the first part.
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6c32ec3c0a..6a42c0f1c9 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -295,20 +295,6 @@  static void spapr_dt_pa_features(SpaprMachineState *spapr,
     _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
 }
 
-static hwaddr spapr_node0_size(MachineState *machine)
-{
-    if (machine->numa_state->num_nodes) {
-        int i;
-        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
-            if (machine->numa_state->nodes[i].node_mem) {
-                return MIN(pow2floor(machine->numa_state->nodes[i].node_mem),
-                           machine->ram_size);
-            }
-        }
-    }
-    return machine->ram_size;
-}
-
 static void add_str(GString *s, const gchar *s1)
 {
     g_string_append_len(s, s1, strlen(s1) + 1);
@@ -2631,10 +2617,24 @@  static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
     MachineState *machine = MACHINE(spapr);
     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
     hwaddr rma_size = machine->ram_size;
-    hwaddr node0_size = spapr_node0_size(machine);
 
     /* RMA has to fit in the first NUMA node */
-    rma_size = MIN(rma_size, node0_size);
+    if (machine->numa_state->num_nodes) {
+        /*
+         * It's possible for there to be some zero-memory nodes first
+         * in the list.  We need the RMA to fit inside the memory of
+         * the first node which actually has some memory.
+         */
+        int i;
+
+        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
+            if (machine->numa_state->nodes[i].node_mem != 0) {
+                hwaddr node_size = machine->numa_state->nodes[i].node_mem;
+                rma_size = MIN(rma_size, pow2floor(node_size));
+                break;
+            }
+        }
+    }
 
     /*
      * VRMA access is via a special 1TiB SLB mapping, so the RMA can
@@ -2651,6 +2651,11 @@  static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
         rma_size = MIN(rma_size, smc->rma_limit);
     }
 
+    /*
+     * RMA size must be a power of 2
+     */
+    rma_size = pow2floor(rma_size);
+
     if (rma_size < MIN_RMA_SLOF) {
         error_setg(errp,
 "pSeries SLOF firmware requires >= %ldMiB guest RMA (Real Mode Area memory)",