diff mbox series

[17/22] x86/setup: vmap heap nodes when they are outside the direct map

Message ID 20221216114853.8227-18-julien@xen.org (mailing list archive)
State New, archived
Headers show
Series Remove the directmap | expand

Commit Message

Julien Grall Dec. 16, 2022, 11:48 a.m. UTC
From: Hongyan Xia <hongyxia@amazon.com>

When we do not have a direct map, archs_mfn_in_direct_map() will always
return false, thus init_node_heap() will allocate xenheap pages from an
existing node for the metadata of a new node. This means that the
metadata of a new node is in a different node, slowing down heap
allocation.

Since we now have early vmap, vmap the metadata locally in the new node.

Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
Signed-off-by: Julien Grall <jgrall@amazon.com>

----

    Changes from Hongyan's version:
        * arch_mfn_in_direct_map() was renamed to
          arch_mfns_in_direct_map()
        * Use vmap_contig_pages() rather than __vmap(...).
        * Add missing include (xen/vmap.h) so it compiles on Arm
---
 xen/common/page_alloc.c | 42 +++++++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 10 deletions(-)

Comments

Jan Beulich Jan. 11, 2023, 2:39 p.m. UTC | #1
On 16.12.2022 12:48, Julien Grall wrote:
> @@ -597,22 +598,43 @@ static unsigned long init_node_heap(int node, unsigned long mfn,
>          needed = 0;
>      }
>      else if ( *use_tail && nr >= needed &&
> -              arch_mfns_in_directmap(mfn + nr - needed, needed) &&
>                (!xenheap_bits ||
>                 !((mfn + nr - 1) >> (xenheap_bits - PAGE_SHIFT))) )
>      {
> -        _heap[node] = mfn_to_virt(mfn + nr - needed);
> -        avail[node] = mfn_to_virt(mfn + nr - 1) +
> -                      PAGE_SIZE - sizeof(**avail) * NR_ZONES;
> -    }
> -    else if ( nr >= needed &&

By replacing these two well-formed lines with ...

> -              arch_mfns_in_directmap(mfn, needed) &&
> +        if ( arch_mfns_in_directmap(mfn + nr - needed, needed) )
> +        {
> +            _heap[node] = mfn_to_virt(mfn + nr - needed);
> +            avail[node] = mfn_to_virt(mfn + nr - 1) +
> +                          PAGE_SIZE - sizeof(**avail) * NR_ZONES;
> +        }
> +        else
> +        {
> +            mfn_t needed_start = _mfn(mfn + nr - needed);
> +
> +            _heap[node] = vmap_contig_pages(needed_start, needed);
> +            BUG_ON(!_heap[node]);
> +            avail[node] = (void *)(_heap[node]) + (needed << PAGE_SHIFT) -
> +                          sizeof(**avail) * NR_ZONES;
> +        }
> +    } else if ( nr >= needed &&

... this, you're not only violating style here, but you also ...

>                (!xenheap_bits ||
>                 !((mfn + needed - 1) >> (xenheap_bits - PAGE_SHIFT))) )

... break indentation for these two lines.

Jan
Stefano Stabellini Jan. 23, 2023, 10:03 p.m. UTC | #2
On Fri, 16 Dec 2022, Julien Grall wrote:
> From: Hongyan Xia <hongyxia@amazon.com>
> 
> When we do not have a direct map, archs_mfn_in_direct_map() will always
> return false, thus init_node_heap() will allocate xenheap pages from an
> existing node for the metadata of a new node. This means that the
> metadata of a new node is in a different node, slowing down heap
> allocation.
> 
> Since we now have early vmap, vmap the metadata locally in the new node.
> 
> Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
> Signed-off-by: Julien Grall <jgrall@amazon.com>
> 
> ----
> 
>     Changes from Hongyan's version:
>         * arch_mfn_in_direct_map() was renamed to
>           arch_mfns_in_direct_map()
>         * Use vmap_contig_pages() rather than __vmap(...).
>         * Add missing include (xen/vmap.h) so it compiles on Arm
> ---
>  xen/common/page_alloc.c | 42 +++++++++++++++++++++++++++++++----------
>  1 file changed, 32 insertions(+), 10 deletions(-)
> 
> diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
> index 0c4af5a71407..581c15d74dfb 100644
> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -136,6 +136,7 @@
>  #include <xen/sched.h>
>  #include <xen/softirq.h>
>  #include <xen/spinlock.h>
> +#include <xen/vmap.h>
>  
>  #include <asm/flushtlb.h>
>  #include <asm/numa.h>
> @@ -597,22 +598,43 @@ static unsigned long init_node_heap(int node, unsigned long mfn,
>          needed = 0;
>      }
>      else if ( *use_tail && nr >= needed &&
> -              arch_mfns_in_directmap(mfn + nr - needed, needed) &&
>                (!xenheap_bits ||
>                 !((mfn + nr - 1) >> (xenheap_bits - PAGE_SHIFT))) )
>      {
> -        _heap[node] = mfn_to_virt(mfn + nr - needed);
> -        avail[node] = mfn_to_virt(mfn + nr - 1) +
> -                      PAGE_SIZE - sizeof(**avail) * NR_ZONES;
> -    }
> -    else if ( nr >= needed &&
> -              arch_mfns_in_directmap(mfn, needed) &&
> +        if ( arch_mfns_in_directmap(mfn + nr - needed, needed) )
> +        {
> +            _heap[node] = mfn_to_virt(mfn + nr - needed);
> +            avail[node] = mfn_to_virt(mfn + nr - 1) +
> +                          PAGE_SIZE - sizeof(**avail) * NR_ZONES;
> +        }
> +        else
> +        {
> +            mfn_t needed_start = _mfn(mfn + nr - needed);
> +
> +            _heap[node] = vmap_contig_pages(needed_start, needed);
> +            BUG_ON(!_heap[node]);

I see a BUG_ON here but init_node_heap is not __init. Asking because
BUG_ON is only a good idea during init time. Should init_node_heap be
__init (not necessarely in this patch, but still)?


> +            avail[node] = (void *)(_heap[node]) + (needed << PAGE_SHIFT) -
> +                          sizeof(**avail) * NR_ZONES;
> +        }
> +    } else if ( nr >= needed &&
>                (!xenheap_bits ||
>                 !((mfn + needed - 1) >> (xenheap_bits - PAGE_SHIFT))) )
>      {
> -        _heap[node] = mfn_to_virt(mfn);
> -        avail[node] = mfn_to_virt(mfn + needed - 1) +
> -                      PAGE_SIZE - sizeof(**avail) * NR_ZONES;
> +        if ( arch_mfns_in_directmap(mfn, needed) )
> +        {
> +            _heap[node] = mfn_to_virt(mfn);
> +            avail[node] = mfn_to_virt(mfn + needed - 1) +
> +                          PAGE_SIZE - sizeof(**avail) * NR_ZONES;
> +        }
> +        else
> +        {
> +            mfn_t needed_start = _mfn(mfn);
> +
> +            _heap[node] = vmap_contig_pages(needed_start, needed);
> +            BUG_ON(!_heap[node]);
> +            avail[node] = (void *)(_heap[node]) + (needed << PAGE_SHIFT) -
> +                          sizeof(**avail) * NR_ZONES;
> +        }
>          *use_tail = false;
>      }
>      else if ( get_order_from_bytes(sizeof(**_heap)) ==
> -- 
> 2.38.1
>
Julien Grall Jan. 23, 2023, 10:23 p.m. UTC | #3
Hi,

On 23/01/2023 22:03, Stefano Stabellini wrote:
> On Fri, 16 Dec 2022, Julien Grall wrote:
>> From: Hongyan Xia <hongyxia@amazon.com>
>>
>> When we do not have a direct map, archs_mfn_in_direct_map() will always
>> return false, thus init_node_heap() will allocate xenheap pages from an
>> existing node for the metadata of a new node. This means that the
>> metadata of a new node is in a different node, slowing down heap
>> allocation.
>>
>> Since we now have early vmap, vmap the metadata locally in the new node.
>>
>> Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
>> Signed-off-by: Julien Grall <jgrall@amazon.com>
>>
>> ----
>>
>>      Changes from Hongyan's version:
>>          * arch_mfn_in_direct_map() was renamed to
>>            arch_mfns_in_direct_map()
>>          * Use vmap_contig_pages() rather than __vmap(...).
>>          * Add missing include (xen/vmap.h) so it compiles on Arm
>> ---
>>   xen/common/page_alloc.c | 42 +++++++++++++++++++++++++++++++----------
>>   1 file changed, 32 insertions(+), 10 deletions(-)
>>
>> diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
>> index 0c4af5a71407..581c15d74dfb 100644
>> --- a/xen/common/page_alloc.c
>> +++ b/xen/common/page_alloc.c
>> @@ -136,6 +136,7 @@
>>   #include <xen/sched.h>
>>   #include <xen/softirq.h>
>>   #include <xen/spinlock.h>
>> +#include <xen/vmap.h>
>>   
>>   #include <asm/flushtlb.h>
>>   #include <asm/numa.h>
>> @@ -597,22 +598,43 @@ static unsigned long init_node_heap(int node, unsigned long mfn,
>>           needed = 0;
>>       }
>>       else if ( *use_tail && nr >= needed &&
>> -              arch_mfns_in_directmap(mfn + nr - needed, needed) &&
>>                 (!xenheap_bits ||
>>                  !((mfn + nr - 1) >> (xenheap_bits - PAGE_SHIFT))) )
>>       {
>> -        _heap[node] = mfn_to_virt(mfn + nr - needed);
>> -        avail[node] = mfn_to_virt(mfn + nr - 1) +
>> -                      PAGE_SIZE - sizeof(**avail) * NR_ZONES;
>> -    }
>> -    else if ( nr >= needed &&
>> -              arch_mfns_in_directmap(mfn, needed) &&
>> +        if ( arch_mfns_in_directmap(mfn + nr - needed, needed) )
>> +        {
>> +            _heap[node] = mfn_to_virt(mfn + nr - needed);
>> +            avail[node] = mfn_to_virt(mfn + nr - 1) +
>> +                          PAGE_SIZE - sizeof(**avail) * NR_ZONES;
>> +        }
>> +        else
>> +        {
>> +            mfn_t needed_start = _mfn(mfn + nr - needed);
>> +
>> +            _heap[node] = vmap_contig_pages(needed_start, needed);
>> +            BUG_ON(!_heap[node]);
> 
> I see a BUG_ON here but init_node_heap is not __init.

FWIW, this is not the first introducing the first BUG_ON() in this function.

  Asking because
> BUG_ON is only a good idea during init time. Should init_node_heap be
> __init (not necessarely in this patch, but still)?
AFAIK, there are two uses outside of __init:
   1) Free the init sections
   2) Memory hotplug

In the first case, we will likely need to panic() in case of an error. 
For ther second case, I am not entirely sure.

But there would be a fair bit of plumbing and thinking (how do you deal 
with the case where part of the memory were already added?).

Anyway, I don't think I am making the function worse, so I would rather 
no open that can of worms (yet).

Cheers,
Stefano Stabellini Jan. 23, 2023, 10:56 p.m. UTC | #4
On Mon, 23 Jan 2023, Julien Grall wrote:
> On 23/01/2023 22:03, Stefano Stabellini wrote:
> > On Fri, 16 Dec 2022, Julien Grall wrote:
> > > From: Hongyan Xia <hongyxia@amazon.com>
> > > 
> > > When we do not have a direct map, archs_mfn_in_direct_map() will always
> > > return false, thus init_node_heap() will allocate xenheap pages from an
> > > existing node for the metadata of a new node. This means that the
> > > metadata of a new node is in a different node, slowing down heap
> > > allocation.
> > > 
> > > Since we now have early vmap, vmap the metadata locally in the new node.
> > > 
> > > Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
> > > Signed-off-by: Julien Grall <jgrall@amazon.com>
> > > 
> > > ----
> > > 
> > >      Changes from Hongyan's version:
> > >          * arch_mfn_in_direct_map() was renamed to
> > >            arch_mfns_in_direct_map()
> > >          * Use vmap_contig_pages() rather than __vmap(...).
> > >          * Add missing include (xen/vmap.h) so it compiles on Arm
> > > ---
> > >   xen/common/page_alloc.c | 42 +++++++++++++++++++++++++++++++----------
> > >   1 file changed, 32 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
> > > index 0c4af5a71407..581c15d74dfb 100644
> > > --- a/xen/common/page_alloc.c
> > > +++ b/xen/common/page_alloc.c
> > > @@ -136,6 +136,7 @@
> > >   #include <xen/sched.h>
> > >   #include <xen/softirq.h>
> > >   #include <xen/spinlock.h>
> > > +#include <xen/vmap.h>
> > >     #include <asm/flushtlb.h>
> > >   #include <asm/numa.h>
> > > @@ -597,22 +598,43 @@ static unsigned long init_node_heap(int node,
> > > unsigned long mfn,
> > >           needed = 0;
> > >       }
> > >       else if ( *use_tail && nr >= needed &&
> > > -              arch_mfns_in_directmap(mfn + nr - needed, needed) &&
> > >                 (!xenheap_bits ||
> > >                  !((mfn + nr - 1) >> (xenheap_bits - PAGE_SHIFT))) )
> > >       {
> > > -        _heap[node] = mfn_to_virt(mfn + nr - needed);
> > > -        avail[node] = mfn_to_virt(mfn + nr - 1) +
> > > -                      PAGE_SIZE - sizeof(**avail) * NR_ZONES;
> > > -    }
> > > -    else if ( nr >= needed &&
> > > -              arch_mfns_in_directmap(mfn, needed) &&
> > > +        if ( arch_mfns_in_directmap(mfn + nr - needed, needed) )
> > > +        {
> > > +            _heap[node] = mfn_to_virt(mfn + nr - needed);
> > > +            avail[node] = mfn_to_virt(mfn + nr - 1) +
> > > +                          PAGE_SIZE - sizeof(**avail) * NR_ZONES;
> > > +        }
> > > +        else
> > > +        {
> > > +            mfn_t needed_start = _mfn(mfn + nr - needed);
> > > +
> > > +            _heap[node] = vmap_contig_pages(needed_start, needed);
> > > +            BUG_ON(!_heap[node]);
> > 
> > I see a BUG_ON here but init_node_heap is not __init.
> 
> FWIW, this is not the first introducing the first BUG_ON() in this function.
> 
>  Asking because
> > BUG_ON is only a good idea during init time. Should init_node_heap be
> > __init (not necessarely in this patch, but still)?
> AFAIK, there are two uses outside of __init:
>   1) Free the init sections
>   2) Memory hotplug
> 
> In the first case, we will likely need to panic() in case of an error. For
> ther second case, I am not entirely sure.
> 
> But there would be a fair bit of plumbing and thinking (how do you deal with
> the case where part of the memory were already added?).
> 
> Anyway, I don't think I am making the function worse, so I would rather no
> open that can of worms (yet).

I am only trying to check that we are not introducing any BUG_ONs that
could be triggered at runtime. We don't have a rule that requires the
function with a BUG_ON to be __init, however that is a simple and nice
way to check that the BUG_ON is appropriate.

In this specific case, you are right that there are already 2 BUG_ONs
in this function so you are not making things worse.

Aside from Jan's code style comment:

Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
diff mbox series

Patch

diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 0c4af5a71407..581c15d74dfb 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -136,6 +136,7 @@ 
 #include <xen/sched.h>
 #include <xen/softirq.h>
 #include <xen/spinlock.h>
+#include <xen/vmap.h>
 
 #include <asm/flushtlb.h>
 #include <asm/numa.h>
@@ -597,22 +598,43 @@  static unsigned long init_node_heap(int node, unsigned long mfn,
         needed = 0;
     }
     else if ( *use_tail && nr >= needed &&
-              arch_mfns_in_directmap(mfn + nr - needed, needed) &&
               (!xenheap_bits ||
                !((mfn + nr - 1) >> (xenheap_bits - PAGE_SHIFT))) )
     {
-        _heap[node] = mfn_to_virt(mfn + nr - needed);
-        avail[node] = mfn_to_virt(mfn + nr - 1) +
-                      PAGE_SIZE - sizeof(**avail) * NR_ZONES;
-    }
-    else if ( nr >= needed &&
-              arch_mfns_in_directmap(mfn, needed) &&
+        if ( arch_mfns_in_directmap(mfn + nr - needed, needed) )
+        {
+            _heap[node] = mfn_to_virt(mfn + nr - needed);
+            avail[node] = mfn_to_virt(mfn + nr - 1) +
+                          PAGE_SIZE - sizeof(**avail) * NR_ZONES;
+        }
+        else
+        {
+            mfn_t needed_start = _mfn(mfn + nr - needed);
+
+            _heap[node] = vmap_contig_pages(needed_start, needed);
+            BUG_ON(!_heap[node]);
+            avail[node] = (void *)(_heap[node]) + (needed << PAGE_SHIFT) -
+                          sizeof(**avail) * NR_ZONES;
+        }
+    } else if ( nr >= needed &&
               (!xenheap_bits ||
                !((mfn + needed - 1) >> (xenheap_bits - PAGE_SHIFT))) )
     {
-        _heap[node] = mfn_to_virt(mfn);
-        avail[node] = mfn_to_virt(mfn + needed - 1) +
-                      PAGE_SIZE - sizeof(**avail) * NR_ZONES;
+        if ( arch_mfns_in_directmap(mfn, needed) )
+        {
+            _heap[node] = mfn_to_virt(mfn);
+            avail[node] = mfn_to_virt(mfn + needed - 1) +
+                          PAGE_SIZE - sizeof(**avail) * NR_ZONES;
+        }
+        else
+        {
+            mfn_t needed_start = _mfn(mfn);
+
+            _heap[node] = vmap_contig_pages(needed_start, needed);
+            BUG_ON(!_heap[node]);
+            avail[node] = (void *)(_heap[node]) + (needed << PAGE_SHIFT) -
+                          sizeof(**avail) * NR_ZONES;
+        }
         *use_tail = false;
     }
     else if ( get_order_from_bytes(sizeof(**_heap)) ==