diff mbox series

[RFCv2,14/15] xen/arm: mm: Rework setup_xenheap_mappings()

Message ID 20210425201318.15447-15-julien@xen.org (mailing list archive)
State New, archived
Headers show
Series xen/arm: mm: Remove open-coding mappings | expand

Commit Message

Julien Grall April 25, 2021, 8:13 p.m. UTC
From: Julien Grall <julien.grall@arm.com>

A few issues have been reported with setup_xenheap_mappings() over the
last couple of years. The main ones are:
    - It will break on platform supporting more than 512GB of RAM
      because the memory allocated by the boot allocator is not yet
      mapped.
    - Aligning all the regions to 1GB may lead to unexpected result
      because we may alias non-cacheable region (such as device or reserved
      regions).

map_pages_to_xen() was recently reworked to allow superpage mappings and
deal with the use of page-tables before they are mapped.

Most of the code in setup_xenheap_mappings() is now replaced with a
single call to map_pages_to_xen().

This also require to re-order the steps setup_mm() so the regions are
given to the boot allocator first and then we setup the xenheap
mappings.

Note that the 1GB alignment is not yet removed.

Signed-off-by: Julien Grall <julien.grall@arm.com>
Signed-off-by: Julien Grall <jgrall@amazon.com>

---
    Changes in v2:
        - New patch

    TODO:
        - Remove the 1GB alignment
        - Add support for setting the contiguous bit
---
 xen/arch/arm/mm.c    | 60 ++++----------------------------------------
 xen/arch/arm/setup.c | 10 ++++++--
 2 files changed, 13 insertions(+), 57 deletions(-)

Comments

Stefano Stabellini May 14, 2021, 11:51 p.m. UTC | #1
On Sun, 25 Apr 2021, Julien Grall wrote:
> From: Julien Grall <julien.grall@arm.com>
> 
> A few issues have been reported with setup_xenheap_mappings() over the
> last couple of years. The main ones are:
>     - It will break on platform supporting more than 512GB of RAM
>       because the memory allocated by the boot allocator is not yet
>       mapped.
>     - Aligning all the regions to 1GB may lead to unexpected result
>       because we may alias non-cacheable region (such as device or reserved
>       regions).
> 
> map_pages_to_xen() was recently reworked to allow superpage mappings and
> deal with the use of page-tables before they are mapped.
> 
> Most of the code in setup_xenheap_mappings() is now replaced with a
> single call to map_pages_to_xen().
> 
> This also require to re-order the steps setup_mm() so the regions are
> given to the boot allocator first and then we setup the xenheap
> mappings.

I know this is paranoia but doesn't this introduce a requirement on the
size of the first bank in bootinfo.mem.bank[] ?

It should be at least 8KB?

I know it is unlikely but it is theoretically possible to have a first
bank of just 1KB.

I think we should write the requirement down with an in-code comment?
Or better maybe we should write a message about it in the panic below?


> Note that the 1GB alignment is not yet removed.
> 
> Signed-off-by: Julien Grall <julien.grall@arm.com>
> Signed-off-by: Julien Grall <jgrall@amazon.com>
> 
> ---
>     Changes in v2:
>         - New patch
> 
>     TODO:
>         - Remove the 1GB alignment
>         - Add support for setting the contiguous bit
> ---
>  xen/arch/arm/mm.c    | 60 ++++----------------------------------------
>  xen/arch/arm/setup.c | 10 ++++++--

I love it!


>  2 files changed, 13 insertions(+), 57 deletions(-)
> 
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index f5768f2d4a81..c49403b687f5 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -143,17 +143,6 @@ static DEFINE_PAGE_TABLE(cpu0_pgtable);
>  static DEFINE_PAGE_TABLES(cpu0_dommap, DOMHEAP_SECOND_PAGES);
>  #endif
>  
> -#ifdef CONFIG_ARM_64
> -/* The first page of the first level mapping of the xenheap. The
> - * subsequent xenheap first level pages are dynamically allocated, but
> - * we need this one to bootstrap ourselves. */
> -static DEFINE_PAGE_TABLE(xenheap_first_first);
> -/* The zeroeth level slot which uses xenheap_first_first. Used because
> - * setup_xenheap_mappings otherwise relies on mfn_to_virt which isn't
> - * valid for a non-xenheap mapping. */
> -static __initdata int xenheap_first_first_slot = -1;
> -#endif
> -
>  /* Common pagetable leaves */
>  /* Second level page tables.
>   *
> @@ -818,9 +807,9 @@ void __init setup_xenheap_mappings(unsigned long base_mfn,
>  void __init setup_xenheap_mappings(unsigned long base_mfn,
>                                     unsigned long nr_mfns)
>  {
> -    lpae_t *first, pte;
>      unsigned long mfn, end_mfn;
>      vaddr_t vaddr;
> +    int rc;
>  
>      /* Align to previous 1GB boundary */
>      mfn = base_mfn & ~((FIRST_SIZE>>PAGE_SHIFT)-1);
> @@ -846,49 +835,10 @@ void __init setup_xenheap_mappings(unsigned long base_mfn,
>       */
>      vaddr = (vaddr_t)__mfn_to_virt(base_mfn) & FIRST_MASK;
>  
> -    while ( mfn < end_mfn )
> -    {
> -        int slot = zeroeth_table_offset(vaddr);
> -        lpae_t *p = &xen_pgtable[slot];
> -
> -        if ( p->pt.valid )
> -        {
> -            /* mfn_to_virt is not valid on the 1st 1st mfn, since it
> -             * is not within the xenheap. */
> -            first = slot == xenheap_first_first_slot ?
> -                xenheap_first_first : mfn_to_virt(lpae_get_mfn(*p));
> -        }
> -        else if ( xenheap_first_first_slot == -1)
> -        {
> -            /* Use xenheap_first_first to bootstrap the mappings */
> -            first = xenheap_first_first;
> -
> -            pte = pte_of_xenaddr((vaddr_t)xenheap_first_first);
> -            pte.pt.table = 1;
> -            write_pte(p, pte);
> -
> -            xenheap_first_first_slot = slot;
> -        }
> -        else
> -        {
> -            mfn_t first_mfn = alloc_boot_pages(1, 1);
> -
> -            clear_page(mfn_to_virt(first_mfn));
> -            pte = mfn_to_xen_entry(first_mfn, MT_NORMAL);
> -            pte.pt.table = 1;
> -            write_pte(p, pte);
> -            first = mfn_to_virt(first_mfn);
> -        }
> -
> -        pte = mfn_to_xen_entry(_mfn(mfn), MT_NORMAL);
> -        /* TODO: Set pte.pt.contig when appropriate. */
> -        write_pte(&first[first_table_offset(vaddr)], pte);
> -
> -        mfn += FIRST_SIZE>>PAGE_SHIFT;
> -        vaddr += FIRST_SIZE;
> -    }
> -
> -    flush_xen_tlb_local();
> +    rc = map_pages_to_xen(vaddr, _mfn(mfn), end_mfn - mfn,
> +                          PAGE_HYPERVISOR_RW | _PAGE_BLOCK);
> +    if ( rc )
> +        panic("Unable to setup the xenheap mappings.\n");

This is the panic I was talking about


>  }
>  #endif
>  
> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> index 00aad1c194b9..0993a4bb52d4 100644
> --- a/xen/arch/arm/setup.c
> +++ b/xen/arch/arm/setup.c
> @@ -761,8 +761,11 @@ static void __init setup_mm(void)
>          ram_start = min(ram_start,bank_start);
>          ram_end = max(ram_end,bank_end);
>  
> -        setup_xenheap_mappings(bank_start>>PAGE_SHIFT, bank_size>>PAGE_SHIFT);
> -
> +        /*
> +         * Add the region to the boot allocator first, so we can use
> +         * some to allocate page-tables for setting up the xenheap
> +         * mappings.
> +         */
>          s = bank_start;
>          while ( s < bank_end )
>          {
> @@ -781,6 +784,9 @@ static void __init setup_mm(void)
>              fw_unreserved_regions(s, e, init_boot_pages, 0);
>              s = n;
>          }
> +
> +        setup_xenheap_mappings(bank_start >> PAGE_SHIFT,
> +                               bank_size >> PAGE_SHIFT);
>      }
>  
>      total_pages += ram_size >> PAGE_SHIFT;
> -- 
> 2.17.1
>
Julien Grall May 15, 2021, 9:21 a.m. UTC | #2
Hi,

On 15/05/2021 00:51, Stefano Stabellini wrote:
> On Sun, 25 Apr 2021, Julien Grall wrote:
>> From: Julien Grall <julien.grall@arm.com>
>>
>> A few issues have been reported with setup_xenheap_mappings() over the
>> last couple of years. The main ones are:
>>      - It will break on platform supporting more than 512GB of RAM
>>        because the memory allocated by the boot allocator is not yet
>>        mapped.
>>      - Aligning all the regions to 1GB may lead to unexpected result
>>        because we may alias non-cacheable region (such as device or reserved
>>        regions).
>>
>> map_pages_to_xen() was recently reworked to allow superpage mappings and
>> deal with the use of page-tables before they are mapped.
>>
>> Most of the code in setup_xenheap_mappings() is now replaced with a
>> single call to map_pages_to_xen().
>>
>> This also require to re-order the steps setup_mm() so the regions are
>> given to the boot allocator first and then we setup the xenheap
>> mappings.
> 
> I know this is paranoia but doesn't this introduce a requirement on the
> size of the first bank in bootinfo.mem.bank[] ?
> 
> It should be at least 8KB?

AFAIK, the current requirement is 4KB because of the 1GB mapping. Long 
term, it would be 8KB.

> 
> I know it is unlikely but it is theoretically possible to have a first
> bank of just 1KB.

All the page allocators are working at the page granularity level. I am 
not entirely sure whether the current Xen would ignore the region or break.

Note that setup_xenheap_mappings() is taking a base MFN and a number of 
pages to map. So this doesn't look to be a new problem here.

For the 8KB requirement, we can look at first all the pages to the boot 
allocator and then do the mapping.

> 
> I think we should write the requirement down with an in-code comment?
> Or better maybe we should write a message about it in the panic below?

I can write an in-code comment but I think writing down in the panic() 
would be wrong because there is no promise this is going to be the only 
reason it fails (imagine there is a bug in the code...).

Cheers,
Stefano Stabellini May 18, 2021, 12:50 a.m. UTC | #3
On Sat, 15 May 2021, Julien Grall wrote:
> Hi,
> 
> On 15/05/2021 00:51, Stefano Stabellini wrote:
> > On Sun, 25 Apr 2021, Julien Grall wrote:
> > > From: Julien Grall <julien.grall@arm.com>
> > > 
> > > A few issues have been reported with setup_xenheap_mappings() over the
> > > last couple of years. The main ones are:
> > >      - It will break on platform supporting more than 512GB of RAM
> > >        because the memory allocated by the boot allocator is not yet
> > >        mapped.
> > >      - Aligning all the regions to 1GB may lead to unexpected result
> > >        because we may alias non-cacheable region (such as device or
> > > reserved
> > >        regions).
> > > 
> > > map_pages_to_xen() was recently reworked to allow superpage mappings and
> > > deal with the use of page-tables before they are mapped.
> > > 
> > > Most of the code in setup_xenheap_mappings() is now replaced with a
> > > single call to map_pages_to_xen().
> > > 
> > > This also require to re-order the steps setup_mm() so the regions are
> > > given to the boot allocator first and then we setup the xenheap
> > > mappings.
> > 
> > I know this is paranoia but doesn't this introduce a requirement on the
> > size of the first bank in bootinfo.mem.bank[] ?
> > 
> > It should be at least 8KB?
> 
> AFAIK, the current requirement is 4KB because of the 1GB mapping. Long term,
> it would be 8KB.
> 
> > 
> > I know it is unlikely but it is theoretically possible to have a first
> > bank of just 1KB.
> 
> All the page allocators are working at the page granularity level. I am not
> entirely sure whether the current Xen would ignore the region or break.
> 
> Note that setup_xenheap_mappings() is taking a base MFN and a number of pages
> to map. So this doesn't look to be a new problem here.

Yeah... the example of the first bank being 1KB is wrong because, like
you wrote, it wouldn't work before your patches either, and probably it
will never work.

Maybe a better example is a first bank of 4KB exactly.


> For the 8KB requirement, we can look at first all the pages to the boot
> allocator and then do the mapping.
> 
> > 
> > I think we should write the requirement down with an in-code comment?
> > Or better maybe we should write a message about it in the panic below?
> 
> I can write an in-code comment but I think writing down in the panic() would
> be wrong because there is no promise this is going to be the only reason it
> fails (imagine there is a bug in the code...).

Maybe it is sufficient to print out the error code (EINVAL / ENOMEM etc)
in the panic. If you see -12 you know what to look for.

Looking into it I realize that we are not returning -ENOMEM in case of
alloc_boot_pages failures in create_xen_table. It looks like we would
hit a BUG() in the implementation of alloc_boot_pages. Maybe that's good
enough as is then.
Julien Grall Feb. 12, 2022, 7:16 p.m. UTC | #4
Hi,

Sorry for the late answering. I finally picked up that series again and 
now preparing a new version.

On 18/05/2021 01:50, Stefano Stabellini wrote:
> On Sat, 15 May 2021, Julien Grall wrote:
>> Hi,
>>
>> On 15/05/2021 00:51, Stefano Stabellini wrote:
>>> On Sun, 25 Apr 2021, Julien Grall wrote:
>>>> From: Julien Grall <julien.grall@arm.com>
>>>>
>>>> A few issues have been reported with setup_xenheap_mappings() over the
>>>> last couple of years. The main ones are:
>>>>       - It will break on platform supporting more than 512GB of RAM
>>>>         because the memory allocated by the boot allocator is not yet
>>>>         mapped.
>>>>       - Aligning all the regions to 1GB may lead to unexpected result
>>>>         because we may alias non-cacheable region (such as device or
>>>> reserved
>>>>         regions).
>>>>
>>>> map_pages_to_xen() was recently reworked to allow superpage mappings and
>>>> deal with the use of page-tables before they are mapped.
>>>>
>>>> Most of the code in setup_xenheap_mappings() is now replaced with a
>>>> single call to map_pages_to_xen().
>>>>
>>>> This also require to re-order the steps setup_mm() so the regions are
>>>> given to the boot allocator first and then we setup the xenheap
>>>> mappings.
>>>
>>> I know this is paranoia but doesn't this introduce a requirement on the
>>> size of the first bank in bootinfo.mem.bank[] ?
>>>
>>> It should be at least 8KB?
>>
>> AFAIK, the current requirement is 4KB because of the 1GB mapping. Long term,
>> it would be 8KB.
>>
>>>
>>> I know it is unlikely but it is theoretically possible to have a first
>>> bank of just 1KB.
>>
>> All the page allocators are working at the page granularity level. I am not
>> entirely sure whether the current Xen would ignore the region or break.
>>
>> Note that setup_xenheap_mappings() is taking a base MFN and a number of pages
>> to map. So this doesn't look to be a new problem here.
> 
> Yeah... the example of the first bank being 1KB is wrong because, like
> you wrote, it wouldn't work before your patches either, and probably it
> will never work.
> 
> Maybe a better example is a first bank of 4KB exactly.

I have done more testing with the 1GB alignment dropped. The 
restrictions are a bit more complicated.

Not all the memory in a bank will go to the boot allocator. This can 
happen if the memory were have already been allocated for other purpose 
(e.g. modules, reserved area...).

So the region needs enough free memory to be able to map the entire 
region. The amount needed will depend on the size of the region.

So I will split the loop in two separate loops. The first loop will add 
all available pages to the boot allocator. The second loop will actually 
do the mapping.

Cheers,
diff mbox series

Patch

diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index f5768f2d4a81..c49403b687f5 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -143,17 +143,6 @@  static DEFINE_PAGE_TABLE(cpu0_pgtable);
 static DEFINE_PAGE_TABLES(cpu0_dommap, DOMHEAP_SECOND_PAGES);
 #endif
 
-#ifdef CONFIG_ARM_64
-/* The first page of the first level mapping of the xenheap. The
- * subsequent xenheap first level pages are dynamically allocated, but
- * we need this one to bootstrap ourselves. */
-static DEFINE_PAGE_TABLE(xenheap_first_first);
-/* The zeroeth level slot which uses xenheap_first_first. Used because
- * setup_xenheap_mappings otherwise relies on mfn_to_virt which isn't
- * valid for a non-xenheap mapping. */
-static __initdata int xenheap_first_first_slot = -1;
-#endif
-
 /* Common pagetable leaves */
 /* Second level page tables.
  *
@@ -818,9 +807,9 @@  void __init setup_xenheap_mappings(unsigned long base_mfn,
 void __init setup_xenheap_mappings(unsigned long base_mfn,
                                    unsigned long nr_mfns)
 {
-    lpae_t *first, pte;
     unsigned long mfn, end_mfn;
     vaddr_t vaddr;
+    int rc;
 
     /* Align to previous 1GB boundary */
     mfn = base_mfn & ~((FIRST_SIZE>>PAGE_SHIFT)-1);
@@ -846,49 +835,10 @@  void __init setup_xenheap_mappings(unsigned long base_mfn,
      */
     vaddr = (vaddr_t)__mfn_to_virt(base_mfn) & FIRST_MASK;
 
-    while ( mfn < end_mfn )
-    {
-        int slot = zeroeth_table_offset(vaddr);
-        lpae_t *p = &xen_pgtable[slot];
-
-        if ( p->pt.valid )
-        {
-            /* mfn_to_virt is not valid on the 1st 1st mfn, since it
-             * is not within the xenheap. */
-            first = slot == xenheap_first_first_slot ?
-                xenheap_first_first : mfn_to_virt(lpae_get_mfn(*p));
-        }
-        else if ( xenheap_first_first_slot == -1)
-        {
-            /* Use xenheap_first_first to bootstrap the mappings */
-            first = xenheap_first_first;
-
-            pte = pte_of_xenaddr((vaddr_t)xenheap_first_first);
-            pte.pt.table = 1;
-            write_pte(p, pte);
-
-            xenheap_first_first_slot = slot;
-        }
-        else
-        {
-            mfn_t first_mfn = alloc_boot_pages(1, 1);
-
-            clear_page(mfn_to_virt(first_mfn));
-            pte = mfn_to_xen_entry(first_mfn, MT_NORMAL);
-            pte.pt.table = 1;
-            write_pte(p, pte);
-            first = mfn_to_virt(first_mfn);
-        }
-
-        pte = mfn_to_xen_entry(_mfn(mfn), MT_NORMAL);
-        /* TODO: Set pte.pt.contig when appropriate. */
-        write_pte(&first[first_table_offset(vaddr)], pte);
-
-        mfn += FIRST_SIZE>>PAGE_SHIFT;
-        vaddr += FIRST_SIZE;
-    }
-
-    flush_xen_tlb_local();
+    rc = map_pages_to_xen(vaddr, _mfn(mfn), end_mfn - mfn,
+                          PAGE_HYPERVISOR_RW | _PAGE_BLOCK);
+    if ( rc )
+        panic("Unable to setup the xenheap mappings.\n");
 }
 #endif
 
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index 00aad1c194b9..0993a4bb52d4 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -761,8 +761,11 @@  static void __init setup_mm(void)
         ram_start = min(ram_start,bank_start);
         ram_end = max(ram_end,bank_end);
 
-        setup_xenheap_mappings(bank_start>>PAGE_SHIFT, bank_size>>PAGE_SHIFT);
-
+        /*
+         * Add the region to the boot allocator first, so we can use
+         * some to allocate page-tables for setting up the xenheap
+         * mappings.
+         */
         s = bank_start;
         while ( s < bank_end )
         {
@@ -781,6 +784,9 @@  static void __init setup_mm(void)
             fw_unreserved_regions(s, e, init_boot_pages, 0);
             s = n;
         }
+
+        setup_xenheap_mappings(bank_start >> PAGE_SHIFT,
+                               bank_size >> PAGE_SHIFT);
     }
 
     total_pages += ram_size >> PAGE_SHIFT;