diff mbox series

[v11,12/12] xen/arm: add cache coloring support for Xen image

Message ID 20241202165921.249585-13-carlo.nonato@minervasys.tech (mailing list archive)
State New
Headers show
Series Arm cache coloring | expand

Commit Message

Carlo Nonato Dec. 2, 2024, 4:59 p.m. UTC
Xen image is relocated to a new colored physical space. Some relocation
functionalities must be brought back:
- the virtual address of the new space is taken from 0c18fb76323b
  ("xen/arm: Remove unused BOOT_RELOC_VIRT_START").
- relocate_xen() and get_xen_paddr() are taken from f60658c6ae47
  ("xen/arm: Stop relocating Xen").

setup_pagetables() must be adapted for coloring and for relocation. Runtime
page tables are used to map the colored space, but they are also linked in
boot tables so that the new space is temporarily available for relocation.
This implies that Xen protection must happen after the copy.

Finally, since the alternative framework needs to remap the Xen text and
inittext sections, this operation must be done in a coloring-aware way.
The function xen_remap_colored() is introduced for that.

Signed-off-by: Carlo Nonato <carlo.nonato@minervasys.tech>
Signed-off-by: Marco Solieri <marco.solieri@minervasys.tech>
Reviewed-by: Jan Beulich <jbeulich@suse.com> # common
---
v11:
- else if -> if in xen_colored_mfn()
v10:
- no changes
v9:
- patch adapted to changes to setup_pagetables()
v8:
- moved xen_colored_map_size() to arm/llc-coloring.c
v7:
- added BUG_ON() checks to arch_llc_coloring_init() and
  create_llc_coloring_mappings()
v6:
- squashed with BOOT_RELOC_VIRT_START patch
- consider_modules() moved in another patch
- removed psci and smpboot code because of new idmap work already handles that
- moved xen_remap_colored() in alternative.c since it's only used there
- removed xen_colored_temp[] in favor of xen_xenmap[] usage for mapping
- use of boot_module_find_by_kind() to remove the need of extra parameter in
  setup_pagetables()
- moved get_xen_paddr() in arm/llc-coloring.c since it's only used there
v5:
- FIXME: consider_modules copy pasted since it got moved
v4:
- removed set_value_for_secondary() because it was wrongly cleaning cache
- relocate_xen() now calls switch_ttbr_id()
---
 xen/arch/arm/alternative.c            | 30 +++++++-
 xen/arch/arm/arm64/mmu/head.S         | 58 +++++++++++++++-
 xen/arch/arm/arm64/mmu/mm.c           | 28 ++++++--
 xen/arch/arm/include/asm/mmu/layout.h |  3 +
 xen/arch/arm/llc-coloring.c           | 63 +++++++++++++++++
 xen/arch/arm/mmu/setup.c              | 99 +++++++++++++++++++++++----
 xen/arch/arm/setup.c                  | 10 ++-
 xen/common/llc-coloring.c             | 18 +++++
 xen/include/xen/llc-coloring.h        | 14 ++++
 9 files changed, 302 insertions(+), 21 deletions(-)

Comments

Julien Grall Dec. 2, 2024, 9:44 p.m. UTC | #1
Hi Carlo,

I appreciate this is v11. So I will try to keep the comments to only 
what I consider important to fix and some coding style issue.

On 02/12/2024 16:59, Carlo Nonato wrote:
> ---
>   xen/arch/arm/alternative.c            | 30 +++++++-
>   xen/arch/arm/arm64/mmu/head.S         | 58 +++++++++++++++-
>   xen/arch/arm/arm64/mmu/mm.c           | 28 ++++++--
>   xen/arch/arm/include/asm/mmu/layout.h |  3 +
>   xen/arch/arm/llc-coloring.c           | 63 +++++++++++++++++
>   xen/arch/arm/mmu/setup.c              | 99 +++++++++++++++++++++++----
>   xen/arch/arm/setup.c                  | 10 ++-
>   xen/common/llc-coloring.c             | 18 +++++
>   xen/include/xen/llc-coloring.h        | 14 ++++
>   9 files changed, 302 insertions(+), 21 deletions(-)
> 
> diff --git a/xen/arch/arm/alternative.c b/xen/arch/arm/alternative.c
> index d99b507093..0fcf4e451d 100644
> --- a/xen/arch/arm/alternative.c
> +++ b/xen/arch/arm/alternative.c
> @@ -9,6 +9,7 @@
>   #include <xen/init.h>
>   #include <xen/types.h>
>   #include <xen/kernel.h>
> +#include <xen/llc-coloring.h>
>   #include <xen/mm.h>
>   #include <xen/vmap.h>
>   #include <xen/smp.h>
> @@ -191,6 +192,27 @@ static int __apply_alternatives_multi_stop(void *xenmap)
>       return 0;
>   }
>   
> +static void __init *xen_remap_colored(mfn_t xen_mfn, paddr_t xen_size)
> +{
> +    unsigned int i;
> +    void *xenmap;
> +    mfn_t *xen_colored_mfns, mfn;
> +
> +    xen_colored_mfns = xmalloc_array(mfn_t, xen_size >> PAGE_SHIFT);
 > +    if ( !xen_colored_mfns )> +        panic("Can't allocate LLC 
colored MFNs\n");
> +
> +    for_each_xen_colored_mfn ( xen_mfn, mfn, i )
> +    {
> +        xen_colored_mfns[i] = mfn;
> +    }

NIT: Parenthesis should not be necessary.

> +
> +    xenmap = vmap(xen_colored_mfns, xen_size >> PAGE_SHIFT);
 > +    xfree(xen_colored_mfns);> +
> +    return xenmap;
> +}
> +
>   /*
>    * This function should only be called during boot and before CPU0 jump
>    * into the idle_loop.
> @@ -209,8 +231,12 @@ void __init apply_alternatives_all(void)
>        * The text and inittext section are read-only. So re-map Xen to
>        * be able to patch the code.
>        */
> -    xenmap = __vmap(&xen_mfn, 1U << xen_order, 1, 1, PAGE_HYPERVISOR,
> -                    VMAP_DEFAULT);
> +    if ( llc_coloring_enabled )
> +        xenmap = xen_remap_colored(xen_mfn, xen_size);
> +    else
> +        xenmap = __vmap(&xen_mfn, 1U << xen_order, 1, 1, PAGE_HYPERVISOR,
> +                        VMAP_DEFAULT);
> +
>       /* Re-mapping Xen is not expected to fail during boot. */
>       BUG_ON(!xenmap);
>   
> diff --git a/xen/arch/arm/arm64/mmu/head.S b/xen/arch/arm/arm64/mmu/head.S
> index 665a51a337..a1fc9a82f1 100644
> --- a/xen/arch/arm/arm64/mmu/head.S
> +++ b/xen/arch/arm/arm64/mmu/head.S
> @@ -428,6 +428,61 @@ FUNC_LOCAL(fail)
>           b     1b
>   END(fail)
>   
> +/*
> + * Copy Xen to new location and switch TTBR
> + * x0    ttbr
> + * x1    source address
> + * x2    destination address
> + * x3    length
> + *
> + * Source and destination must be word aligned, length is rounded up
> + * to a 16 byte boundary.
> + *
> + * MUST BE VERY CAREFUL when saving things to RAM over the copy
> + */
> +ENTRY(relocate_xen)

We are trying to get rid of ENTRY. Instead, please use FUNC().

> +        /*
> +         * Copy 16 bytes at a time using:
> +         *   x9: counter
> +         *   x10: data
> +         *   x11: data
> +         *   x12: source
> +         *   x13: destination
> +         */
> +        mov     x9, x3
> +        mov     x12, x1
> +        mov     x13, x2
> +
> +1:      ldp     x10, x11, [x12], #16
> +        stp     x10, x11, [x13], #16
> +
> +        subs    x9, x9, #16
> +        bgt     1b
> +
> +        /*
> +         * Flush destination from dcache using:
> +         *   x9: counter
> +         *   x10: step
> +         *   x11: vaddr
> +         *
> +         * This is to ensure data is visible to the instruction cache
 > +         */

The comments implies that the only reason we need to flush the cache is 
to ensure the data and cache is coherent. But...

> +        dsb   sy
> +
> +        mov   x9, x3
> +        ldr   x10, =dcache_line_bytes /* x10 := step */
 > +        ldr   x10, [x10]> +        mov   x11, x2
> +
> +1:      dc    cvac, x11

... here you use Point of Coherency. Point of Unification should be 
sufficient here. This is boot code, so I am not against having stricter 
cache maintenance. But it would be good to clarify.

> +
> +        add   x11, x11, x10
> +        subs  x9, x9, x10
> +        bgt   1b
> +
> +        /* No need for dsb/isb because they are alredy done in switch_ttbr_id */
> +        b switch_ttbr_id
> +
>   /*
>    * Switch TTBR
>    *
> @@ -453,7 +508,8 @@ FUNC(switch_ttbr_id)
>   
>           /*
>            * 5) Flush I-cache
> -         * This should not be necessary but it is kept for safety.
> +         * This should not be necessary in the general case, but it's needed
> +         * for cache coloring because code is relocated in that case.
>            */
>           ic     iallu
>           isb
> diff --git a/xen/arch/arm/arm64/mmu/mm.c b/xen/arch/arm/arm64/mmu/mm.c
> index 671eaadbc1..3732d5897e 100644
> --- a/xen/arch/arm/arm64/mmu/mm.c
> +++ b/xen/arch/arm/arm64/mmu/mm.c
> @@ -1,6 +1,7 @@
>   /* SPDX-License-Identifier: GPL-2.0-only */
>   
>   #include <xen/init.h>
> +#include <xen/llc-coloring.h>
>   #include <xen/mm.h>
>   #include <xen/pfn.h>
>   
> @@ -138,27 +139,46 @@ void update_boot_mapping(bool enable)
>   }
>   
>   extern void switch_ttbr_id(uint64_t ttbr);
> +extern void relocate_xen(uint64_t ttbr, void *src, void *dst, size_t len);
>   
>   typedef void (switch_ttbr_fn)(uint64_t ttbr);
> +typedef void (relocate_xen_fn)(uint64_t ttbr, void *src, void *dst, size_t len);
>   
>   void __init switch_ttbr(uint64_t ttbr)

Given the change below, I think this function needs to be renamed. 
Possibly to relocate_and_jump() with a comment explaning that the 
relocation only happen for cache-coloring.

>   {
> -    vaddr_t id_addr = virt_to_maddr(switch_ttbr_id);
> -    switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr;
> +    vaddr_t vaddr, id_addr;
>       lpae_t pte;
>   
> +    if ( llc_coloring_enabled )
> +        vaddr = (vaddr_t)relocate_xen;
> +    else
> +        vaddr = (vaddr_t)switch_ttbr_id;
> +
> +    id_addr = virt_to_maddr(vaddr);
> +
>       /* Enable the identity mapping in the boot page tables */
>       update_identity_mapping(true);
>   
>       /* Enable the identity mapping in the runtime page tables */
> -    pte = pte_of_xenaddr((vaddr_t)switch_ttbr_id);
> +    pte = pte_of_xenaddr(vaddr);
>       pte.pt.table = 1;
>       pte.pt.xn = 0;
>       pte.pt.ro = 1;
>       write_pte(&xen_third_id[third_table_offset(id_addr)], pte);
>   
>       /* Switch TTBR */

This comment needs to be updated.

> -    fn(ttbr);
> +    if ( llc_coloring_enabled )
> +    {
> +        relocate_xen_fn *fn = (relocate_xen_fn *)id_addr;
> +
> +        fn(ttbr, _start, (void *)BOOT_RELOC_VIRT_START, _end - _start);
> +    }
> +    else
> +    {
> +        switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr;
> +
> +        fn(ttbr);
> +    }
>   
>       /*
>        * Disable the identity mapping in the runtime page tables.
> diff --git a/xen/arch/arm/include/asm/mmu/layout.h b/xen/arch/arm/include/asm/mmu/layout.h
> index a3b546465b..19c0ec63a5 100644
> --- a/xen/arch/arm/include/asm/mmu/layout.h
> +++ b/xen/arch/arm/include/asm/mmu/layout.h
> @@ -30,6 +30,7 @@
>    *  10M -  12M   Fixmap: special-purpose 4K mapping slots
>    *  12M -  16M   Early boot mapping of FDT
>    *  16M -  18M   Livepatch vmap (if compiled in)
> + *  16M -  24M   Cache-colored Xen text, data, bss (temporary, if compiled in)
>    *
>    *   1G -   2G   VMAP: ioremap and early_ioremap
>    *
> @@ -74,6 +75,8 @@
>   #define BOOT_FDT_VIRT_START     (FIXMAP_VIRT_START + FIXMAP_VIRT_SIZE)
>   #define BOOT_FDT_VIRT_SIZE      _AT(vaddr_t, MB(4))
>   
> +#define BOOT_RELOC_VIRT_START   (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE)
 > +>   #ifdef CONFIG_LIVEPATCH
>   #define LIVEPATCH_VMAP_START    (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE)
>   #define LIVEPATCH_VMAP_SIZE    _AT(vaddr_t, MB(2))
> diff --git a/xen/arch/arm/llc-coloring.c b/xen/arch/arm/llc-coloring.c
> index 6c8fa6b576..8e10a505db 100644
> --- a/xen/arch/arm/llc-coloring.c
> +++ b/xen/arch/arm/llc-coloring.c
> @@ -10,6 +10,7 @@
>   #include <xen/types.h>
>   
>   #include <asm/processor.h>
> +#include <asm/setup.h>
>   #include <asm/sysregs.h>
>   
>   /* Return the LLC way size by probing the hardware */
> @@ -64,8 +65,70 @@ unsigned int __init get_llc_way_size(void)
>       return line_size * num_sets;
>   }
>   
> +/**
> + * get_xen_paddr - get physical address to relocate Xen to
> + *
> + * Xen is relocated to as near to the top of RAM as possible and
> + * aligned to a XEN_PADDR_ALIGN boundary.
> + */
> +static paddr_t __init get_xen_paddr(paddr_t xen_size)
> +{
> +    const struct membanks *mem = bootinfo_get_mem();
> +    paddr_t min_size, paddr = 0;
> +    unsigned int i;
> +
> +    min_size = (xen_size + (XEN_PADDR_ALIGN-1)) & ~(XEN_PADDR_ALIGN-1);

Style: Missing space before *and* after '-' in both cases.

But effectively, this is an open-coded version of ROUNDUP().

> +
> +    /* Find the highest bank with enough space. */
> +    for ( i = 0; i < mem->nr_banks; i++ )
> +    {
> +        const struct membank *bank = &mem->bank[i];
> +        paddr_t s, e;
> +
> +        if ( bank->size >= min_size )
> +        {
> +            e = consider_modules(bank->start, bank->start + bank->size,
> +                                 min_size, XEN_PADDR_ALIGN, 0);
> +            if ( !e )
> +                continue;
> +
> +#ifdef CONFIG_ARM_32
> +            /* Xen must be under 4GB */
> +            if ( e > GB(4) )
> +                e = GB(4);
> +            if ( e < bank->start )
> +                continue;
> +#endif
> +
> +            s = e - min_size;
> +
> +            if ( s > paddr )
> +                paddr = s;
> +        }
> +    }
> +
> +    if ( !paddr )
> +        panic("Not enough memory to relocate Xen\n");
> +
> +    printk("Placing Xen at 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
> +           paddr, paddr + min_size);
> +
> +    return paddr;
> +}
> +

[...]

> +static void __init create_llc_coloring_mappings(void)
> +{
> +    lpae_t pte;
> +    unsigned int i;
> +    struct bootmodule *xen_bootmodule = boot_module_find_by_kind(BOOTMOD_XEN);
 > +    mfn_t start_mfn = maddr_to_mfn(xen_bootmodule->start), mfn;> +
> +    for_each_xen_colored_mfn ( start_mfn, mfn, i )
> +    {
> +        pte = mfn_to_xen_entry(mfn, MT_NORMAL);
> +        pte.pt.table = 1; /* level 3 mappings always have this bit set */
> +        xen_xenmap[i] = pte;
> +    }
> +
> +    for ( i = 0; i < XEN_NR_ENTRIES(2); i++ )
> +    {
> +        vaddr_t va = BOOT_RELOC_VIRT_START + (i << XEN_PT_LEVEL_SHIFT(2));
> +
> +        pte = mfn_to_xen_entry(virt_to_mfn(xen_xenmap +
> +                                           i * XEN_PT_LPAE_ENTRIES),
> +                               MT_NORMAL);
> +        pte.pt.table = 1;
> +        write_pte(&boot_second[second_table_offset(va)], pte);
> +    }
> +}
> +
>   /*
> - * Boot-time pagetable setup.
> + * Boot-time pagetable setup with coloring support

I am a bit confused with this change. I agree you added support for 
cache coloring, but the code is still doing the same thing: Preparing 
the page-tables regardless on whether this is cache coloring or not.

So I would say this update is not warrant.

>    * Changes here may need matching changes in head.S
> + *
> + * The cache coloring support consists of:
> + * - Create colored mapping that conforms to Xen color selection in xen_xenmap[]
> + * - Link the mapping in boot page tables using BOOT_RELOC_VIRT_START as vaddr
> + * - pte_of_xenaddr() takes care of translating addresses to the new space
> + *   during runtime page tables creation
> + * - Relocate xen and update TTBR with the new address in the colored space
> + *   (see switch_ttbr())
> + * - Protect the new space

Similarly here. Most of what is written has nothing to do with cache 
coloring. So I think this comment needs to be made a bit more generic.

>    */
>   void __init setup_pagetables(void)
>   {
> @@ -326,6 +369,9 @@ void __init setup_pagetables(void)
>       lpae_t pte, *p;
>       int i;
>   
> +    if ( llc_coloring_enabled )
> +        create_llc_coloring_mappings();
> +
>       arch_setup_page_tables();
>   
>   #ifdef CONFIG_ARM_64
> @@ -353,13 +399,7 @@ void __init setup_pagetables(void)
>               break;
>           pte = pte_of_xenaddr(va);
>           pte.pt.table = 1; /* third level mappings always have this bit set */
> -        if ( is_kernel_text(va) || is_kernel_inittext(va) )
> -        {
> -            pte.pt.xn = 0;
> -            pte.pt.ro = 1;
> -        }
> -        if ( is_kernel_rodata(va) )
> -            pte.pt.ro = 1;
> +        pte.pt.xn = 0; /* Permissions will be enforced later. Allow execution */
>           xen_xenmap[i] = pte;
>       }
>   
> @@ -385,13 +425,48 @@ void __init setup_pagetables(void)
>       ttbr = virt_to_maddr(cpu0_pgtable);
>   #endif
>   
> -    switch_ttbr(ttbr);
> -
> -    xen_pt_enforce_wnx();
> -
>   #ifdef CONFIG_ARM_32
>       per_cpu(xen_pgtable, 0) = cpu0_pgtable;
>   #endif
> +
> +    if ( llc_coloring_enabled )
> +        ttbr = virt_to_maddr(virt_to_reloc_virt(THIS_CPU_PGTABLE));

The logic is a bit difficult to understand. You first update ttbr above:

ttbr = virt_to_maddr(cpu0_pgtable);

But then overwrite it for cache coloring. virt_to_maddr() is also not a 
trivial function.

So I think it would be better to write the following:

#ifdef CONFIG_ARM_32
per_cpu(xen_pgtable, 0) = cpu0_pgtable;
#endif

if ( llc_coloring_enabled )
   ttbr = virt_to_maddr(virt_to_reloc_virt(...));
else
   ttbr = virt_to_maddr(THIS_CPU_PGTABLE);

 > +> +    switch_ttbr(ttbr);
> +
> +    /* Protect Xen */
> +    for ( i = 0; i < XEN_NR_ENTRIES(3); i++ )
> +    {
> +        vaddr_t va = XEN_VIRT_START + (i << PAGE_SHIFT);
> +        lpae_t *entry = xen_xenmap + i;
> +
> +        if ( !is_kernel(va) )
> +            break;
> +
> +        pte = read_atomic(entry);
> +
> +        if ( is_kernel_text(va) || is_kernel_inittext(va) )
> +        {
> +            pte.pt.xn = 0;
> +            pte.pt.ro = 1;
> +        } else if ( is_kernel_rodata(va) ) {

Coding style:

}
else if
{
...
}
else
{
...
}

> +            pte.pt.ro = 1;
> +            pte.pt.xn = 1;
> +        } else {
> +            pte.pt.xn = 1;
> +            pte.pt.ro = 0;
> +        }
> +
> +        write_pte(entry, pte);
> +    }
> +
> +    /*
> +     * We modified live page-tables. Ensure the TLBs are invalidated
> +     * before setting enforcing the WnX permissions.
> +     */
> +    flush_xen_tlb_local();
> +
> +    xen_pt_enforce_wnx();
>   }

Cheers,
Carlo Nonato Dec. 3, 2024, 10:08 a.m. UTC | #2
Hi Julien,

On Mon, Dec 2, 2024 at 10:44 PM Julien Grall <julien@xen.org> wrote:
>
> Hi Carlo,

[...]

> > diff --git a/xen/arch/arm/arm64/mmu/mm.c b/xen/arch/arm/arm64/mmu/mm.c
> > index 671eaadbc1..3732d5897e 100644
> > --- a/xen/arch/arm/arm64/mmu/mm.c
> > +++ b/xen/arch/arm/arm64/mmu/mm.c
> > @@ -1,6 +1,7 @@
> >   /* SPDX-License-Identifier: GPL-2.0-only */
> >
> >   #include <xen/init.h>
> > +#include <xen/llc-coloring.h>
> >   #include <xen/mm.h>
> >   #include <xen/pfn.h>
> >
> > @@ -138,27 +139,46 @@ void update_boot_mapping(bool enable)
> >   }
> >
> >   extern void switch_ttbr_id(uint64_t ttbr);
> > +extern void relocate_xen(uint64_t ttbr, void *src, void *dst, size_t len);
> >
> >   typedef void (switch_ttbr_fn)(uint64_t ttbr);
> > +typedef void (relocate_xen_fn)(uint64_t ttbr, void *src, void *dst, size_t len);
> >
> >   void __init switch_ttbr(uint64_t ttbr)
>
> Given the change below, I think this function needs to be renamed.
> Possibly to relocate_and_jump() with a comment explaning that the
> relocation only happen for cache-coloring.

Changing the name of switch_ttbr() to relocate_and_jump() seems a bit
misleading to me. First I need to change the name also for arm32 where there's
no relocation at all. Second, relocation is something that happens
conditionally so I don't think it's a good name for the function.

[...]

> Cheers,
>
> --
> Julien Grall

Thanks.
- Carlo
Julien Grall Dec. 3, 2024, 10:36 a.m. UTC | #3
On 03/12/2024 10:08, Carlo Nonato wrote:
> Hi Julien,
> 
> On Mon, Dec 2, 2024 at 10:44 PM Julien Grall <julien@xen.org> wrote:
>>
>> Hi Carlo,
> 
> [...]
> 
>>> diff --git a/xen/arch/arm/arm64/mmu/mm.c b/xen/arch/arm/arm64/mmu/mm.c
>>> index 671eaadbc1..3732d5897e 100644
>>> --- a/xen/arch/arm/arm64/mmu/mm.c
>>> +++ b/xen/arch/arm/arm64/mmu/mm.c
>>> @@ -1,6 +1,7 @@
>>>    /* SPDX-License-Identifier: GPL-2.0-only */
>>>
>>>    #include <xen/init.h>
>>> +#include <xen/llc-coloring.h>
>>>    #include <xen/mm.h>
>>>    #include <xen/pfn.h>
>>>
>>> @@ -138,27 +139,46 @@ void update_boot_mapping(bool enable)
>>>    }
>>>
>>>    extern void switch_ttbr_id(uint64_t ttbr);
>>> +extern void relocate_xen(uint64_t ttbr, void *src, void *dst, size_t len);
>>>
>>>    typedef void (switch_ttbr_fn)(uint64_t ttbr);
>>> +typedef void (relocate_xen_fn)(uint64_t ttbr, void *src, void *dst, size_t len);
>>>
>>>    void __init switch_ttbr(uint64_t ttbr)
>>
>> Given the change below, I think this function needs to be renamed.
>> Possibly to relocate_and_jump() with a comment explaning that the
>> relocation only happen for cache-coloring.
> 
> Changing the name of switch_ttbr() to relocate_and_jump() seems a bit
> misleading to me. First I need to change the name also for arm32 where there's
> no relocation at all. Second, relocation is something that happens
> conditionally so I don't think it's a good name for the function.

Feel free to propose a new name. The main thing is the current name 
can't stay "switch_ttbr()" because you are doing more than switching the
TTBR.

The other solution is to have a separate call for relocating xen (which 
will fall-through to switch_ttbr) and another one for those that only to 
switch TTBR.

Cheers,
Carlo Nonato Dec. 3, 2024, 11:37 a.m. UTC | #4
Hi Julien,

On Tue, Dec 3, 2024 at 11:36 AM Julien Grall <julien@xen.org> wrote:
>
> On 03/12/2024 10:08, Carlo Nonato wrote:
> > Hi Julien,
> >
> > On Mon, Dec 2, 2024 at 10:44 PM Julien Grall <julien@xen.org> wrote:
> >>
> >> Hi Carlo,
> >
> > [...]
> >
> >>> diff --git a/xen/arch/arm/arm64/mmu/mm.c b/xen/arch/arm/arm64/mmu/mm.c
> >>> index 671eaadbc1..3732d5897e 100644
> >>> --- a/xen/arch/arm/arm64/mmu/mm.c
> >>> +++ b/xen/arch/arm/arm64/mmu/mm.c
> >>> @@ -1,6 +1,7 @@
> >>>    /* SPDX-License-Identifier: GPL-2.0-only */
> >>>
> >>>    #include <xen/init.h>
> >>> +#include <xen/llc-coloring.h>
> >>>    #include <xen/mm.h>
> >>>    #include <xen/pfn.h>
> >>>
> >>> @@ -138,27 +139,46 @@ void update_boot_mapping(bool enable)
> >>>    }
> >>>
> >>>    extern void switch_ttbr_id(uint64_t ttbr);
> >>> +extern void relocate_xen(uint64_t ttbr, void *src, void *dst, size_t len);
> >>>
> >>>    typedef void (switch_ttbr_fn)(uint64_t ttbr);
> >>> +typedef void (relocate_xen_fn)(uint64_t ttbr, void *src, void *dst, size_t len);
> >>>
> >>>    void __init switch_ttbr(uint64_t ttbr)
> >>
> >> Given the change below, I think this function needs to be renamed.
> >> Possibly to relocate_and_jump() with a comment explaning that the
> >> relocation only happen for cache-coloring.
> >
> > Changing the name of switch_ttbr() to relocate_and_jump() seems a bit
> > misleading to me. First I need to change the name also for arm32 where there's
> > no relocation at all. Second, relocation is something that happens
> > conditionally so I don't think it's a good name for the function.
>
> Feel free to propose a new name. The main thing is the current name
> can't stay "switch_ttbr()" because you are doing more than switching the
> TTBR.
>
> The other solution is to have a separate call for relocating xen (which
> will fall-through to switch_ttbr) and another one for those that only to
> switch TTBR.

What about a function like this one, defined in xen/arch/arm/arm64/mmu/mm.c:

typedef void (relocate_xen_fn)(uint64_t ttbr, void *src, void *dst, size_t len);

void __init relocate_and_switch_ttbr(uint64_t ttbr) {
    vaddr_t id_addr = virt_to_maddr(relocate_xen);
    relocate_xen_fn *fn = (relocate_xen_fn *)id_addr;
    lpae_t pte;

    /* Enable the identity mapping in the boot page tables */
    update_identity_mapping(true);

    /* Enable the identity mapping in the runtime page tables */
    pte = pte_of_xenaddr((vaddr_t)relocate_xen);
    pte.pt.table = 1;
    pte.pt.xn = 0;
    pte.pt.ro = 1;
    write_pte(&xen_third_id[third_table_offset(id_addr)], pte);

    /* Relocate Xen and switch TTBR */
    fn(ttbr, _start, (void *)BOOT_RELOC_VIRT_START, _end - _start);

    /*
     * Disable the identity mapping in the runtime page tables.
     * Note it is not necessary to disable it in the boot page tables
     * because they are not going to be used by this CPU anymore.
     */
    update_identity_mapping(false);
}

which is actually a clone of switch_ttbr() but it does relocation. I would
then call it in case of coloring in setup_pagetables(). This should go in the
direction you suggested, but it would duplicate a bit of code. What do you
think about it?

> Cheers,
>
> --
> Julien Grall
>

Thanks,
- Carlo
Julien Grall Dec. 4, 2024, 11:18 a.m. UTC | #5
Hi Carlo,

On 03/12/2024 11:37, Carlo Nonato wrote:
> On Tue, Dec 3, 2024 at 11:36 AM Julien Grall <julien@xen.org> wrote:
>>
>> On 03/12/2024 10:08, Carlo Nonato wrote:
>>> Hi Julien,
>>>
>>> On Mon, Dec 2, 2024 at 10:44 PM Julien Grall <julien@xen.org> wrote:
>>>>
>>>> Hi Carlo,
>>>
>>> [...]
>>>
>>>>> diff --git a/xen/arch/arm/arm64/mmu/mm.c b/xen/arch/arm/arm64/mmu/mm.c
>>>>> index 671eaadbc1..3732d5897e 100644
>>>>> --- a/xen/arch/arm/arm64/mmu/mm.c
>>>>> +++ b/xen/arch/arm/arm64/mmu/mm.c
>>>>> @@ -1,6 +1,7 @@
>>>>>     /* SPDX-License-Identifier: GPL-2.0-only */
>>>>>
>>>>>     #include <xen/init.h>
>>>>> +#include <xen/llc-coloring.h>
>>>>>     #include <xen/mm.h>
>>>>>     #include <xen/pfn.h>
>>>>>
>>>>> @@ -138,27 +139,46 @@ void update_boot_mapping(bool enable)
>>>>>     }
>>>>>
>>>>>     extern void switch_ttbr_id(uint64_t ttbr);
>>>>> +extern void relocate_xen(uint64_t ttbr, void *src, void *dst, size_t len);
>>>>>
>>>>>     typedef void (switch_ttbr_fn)(uint64_t ttbr);
>>>>> +typedef void (relocate_xen_fn)(uint64_t ttbr, void *src, void *dst, size_t len);
>>>>>
>>>>>     void __init switch_ttbr(uint64_t ttbr)
>>>>
>>>> Given the change below, I think this function needs to be renamed.
>>>> Possibly to relocate_and_jump() with a comment explaning that the
>>>> relocation only happen for cache-coloring.
>>>
>>> Changing the name of switch_ttbr() to relocate_and_jump() seems a bit
>>> misleading to me. First I need to change the name also for arm32 where there's
>>> no relocation at all. Second, relocation is something that happens
>>> conditionally so I don't think it's a good name for the function.
>>
>> Feel free to propose a new name. The main thing is the current name
>> can't stay "switch_ttbr()" because you are doing more than switching the
>> TTBR.
>>
>> The other solution is to have a separate call for relocating xen (which
>> will fall-through to switch_ttbr) and another one for those that only to
>> switch TTBR.
> 
> What about a function like this one, defined in xen/arch/arm/arm64/mmu/mm.c:
> 
> typedef void (relocate_xen_fn)(uint64_t ttbr, void *src, void *dst, size_t len);
> 
> void __init relocate_and_switch_ttbr(uint64_t ttbr) {
>      vaddr_t id_addr = virt_to_maddr(relocate_xen);
>      relocate_xen_fn *fn = (relocate_xen_fn *)id_addr;
>      lpae_t pte;
> 
>      /* Enable the identity mapping in the boot page tables */
>      update_identity_mapping(true);
> 
>      /* Enable the identity mapping in the runtime page tables */
>      pte = pte_of_xenaddr((vaddr_t)relocate_xen);
>      pte.pt.table = 1;
>      pte.pt.xn = 0;
>      pte.pt.ro = 1;
>      write_pte(&xen_third_id[third_table_offset(id_addr)], pte);
> 
>      /* Relocate Xen and switch TTBR */
>      fn(ttbr, _start, (void *)BOOT_RELOC_VIRT_START, _end - _start);
> 
>      /*
>       * Disable the identity mapping in the runtime page tables.
>       * Note it is not necessary to disable it in the boot page tables
>       * because they are not going to be used by this CPU anymore.
>       */
>      update_identity_mapping(false);
> }
> 
> which is actually a clone of switch_ttbr() but it does relocation. I would
> then call it in case of coloring in setup_pagetables(). This should go in the
> direction you suggested, but it would duplicate a bit of code. What do you
> think about it?

I think the duplication is fine here.

It would be possible to reduce the duplication is we introduce an helper 
for call update_identity_mapping(true) and update the PTE. But I am not 
sure it is worth it.

Cheers,
diff mbox series

Patch

diff --git a/xen/arch/arm/alternative.c b/xen/arch/arm/alternative.c
index d99b507093..0fcf4e451d 100644
--- a/xen/arch/arm/alternative.c
+++ b/xen/arch/arm/alternative.c
@@ -9,6 +9,7 @@ 
 #include <xen/init.h>
 #include <xen/types.h>
 #include <xen/kernel.h>
+#include <xen/llc-coloring.h>
 #include <xen/mm.h>
 #include <xen/vmap.h>
 #include <xen/smp.h>
@@ -191,6 +192,27 @@  static int __apply_alternatives_multi_stop(void *xenmap)
     return 0;
 }
 
+static void __init *xen_remap_colored(mfn_t xen_mfn, paddr_t xen_size)
+{
+    unsigned int i;
+    void *xenmap;
+    mfn_t *xen_colored_mfns, mfn;
+
+    xen_colored_mfns = xmalloc_array(mfn_t, xen_size >> PAGE_SHIFT);
+    if ( !xen_colored_mfns )
+        panic("Can't allocate LLC colored MFNs\n");
+
+    for_each_xen_colored_mfn ( xen_mfn, mfn, i )
+    {
+        xen_colored_mfns[i] = mfn;
+    }
+
+    xenmap = vmap(xen_colored_mfns, xen_size >> PAGE_SHIFT);
+    xfree(xen_colored_mfns);
+
+    return xenmap;
+}
+
 /*
  * This function should only be called during boot and before CPU0 jump
  * into the idle_loop.
@@ -209,8 +231,12 @@  void __init apply_alternatives_all(void)
      * The text and inittext section are read-only. So re-map Xen to
      * be able to patch the code.
      */
-    xenmap = __vmap(&xen_mfn, 1U << xen_order, 1, 1, PAGE_HYPERVISOR,
-                    VMAP_DEFAULT);
+    if ( llc_coloring_enabled )
+        xenmap = xen_remap_colored(xen_mfn, xen_size);
+    else
+        xenmap = __vmap(&xen_mfn, 1U << xen_order, 1, 1, PAGE_HYPERVISOR,
+                        VMAP_DEFAULT);
+
     /* Re-mapping Xen is not expected to fail during boot. */
     BUG_ON(!xenmap);
 
diff --git a/xen/arch/arm/arm64/mmu/head.S b/xen/arch/arm/arm64/mmu/head.S
index 665a51a337..a1fc9a82f1 100644
--- a/xen/arch/arm/arm64/mmu/head.S
+++ b/xen/arch/arm/arm64/mmu/head.S
@@ -428,6 +428,61 @@  FUNC_LOCAL(fail)
         b     1b
 END(fail)
 
+/*
+ * Copy Xen to new location and switch TTBR
+ * x0    ttbr
+ * x1    source address
+ * x2    destination address
+ * x3    length
+ *
+ * Source and destination must be word aligned, length is rounded up
+ * to a 16 byte boundary.
+ *
+ * MUST BE VERY CAREFUL when saving things to RAM over the copy
+ */
+ENTRY(relocate_xen)
+        /*
+         * Copy 16 bytes at a time using:
+         *   x9: counter
+         *   x10: data
+         *   x11: data
+         *   x12: source
+         *   x13: destination
+         */
+        mov     x9, x3
+        mov     x12, x1
+        mov     x13, x2
+
+1:      ldp     x10, x11, [x12], #16
+        stp     x10, x11, [x13], #16
+
+        subs    x9, x9, #16
+        bgt     1b
+
+        /*
+         * Flush destination from dcache using:
+         *   x9: counter
+         *   x10: step
+         *   x11: vaddr
+         *
+         * This is to ensure data is visible to the instruction cache
+         */
+        dsb   sy
+
+        mov   x9, x3
+        ldr   x10, =dcache_line_bytes /* x10 := step */
+        ldr   x10, [x10]
+        mov   x11, x2
+
+1:      dc    cvac, x11
+
+        add   x11, x11, x10
+        subs  x9, x9, x10
+        bgt   1b
+
+        /* No need for dsb/isb because they are alredy done in switch_ttbr_id */
+        b switch_ttbr_id
+
 /*
  * Switch TTBR
  *
@@ -453,7 +508,8 @@  FUNC(switch_ttbr_id)
 
         /*
          * 5) Flush I-cache
-         * This should not be necessary but it is kept for safety.
+         * This should not be necessary in the general case, but it's needed
+         * for cache coloring because code is relocated in that case.
          */
         ic     iallu
         isb
diff --git a/xen/arch/arm/arm64/mmu/mm.c b/xen/arch/arm/arm64/mmu/mm.c
index 671eaadbc1..3732d5897e 100644
--- a/xen/arch/arm/arm64/mmu/mm.c
+++ b/xen/arch/arm/arm64/mmu/mm.c
@@ -1,6 +1,7 @@ 
 /* SPDX-License-Identifier: GPL-2.0-only */
 
 #include <xen/init.h>
+#include <xen/llc-coloring.h>
 #include <xen/mm.h>
 #include <xen/pfn.h>
 
@@ -138,27 +139,46 @@  void update_boot_mapping(bool enable)
 }
 
 extern void switch_ttbr_id(uint64_t ttbr);
+extern void relocate_xen(uint64_t ttbr, void *src, void *dst, size_t len);
 
 typedef void (switch_ttbr_fn)(uint64_t ttbr);
+typedef void (relocate_xen_fn)(uint64_t ttbr, void *src, void *dst, size_t len);
 
 void __init switch_ttbr(uint64_t ttbr)
 {
-    vaddr_t id_addr = virt_to_maddr(switch_ttbr_id);
-    switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr;
+    vaddr_t vaddr, id_addr;
     lpae_t pte;
 
+    if ( llc_coloring_enabled )
+        vaddr = (vaddr_t)relocate_xen;
+    else
+        vaddr = (vaddr_t)switch_ttbr_id;
+
+    id_addr = virt_to_maddr(vaddr);
+
     /* Enable the identity mapping in the boot page tables */
     update_identity_mapping(true);
 
     /* Enable the identity mapping in the runtime page tables */
-    pte = pte_of_xenaddr((vaddr_t)switch_ttbr_id);
+    pte = pte_of_xenaddr(vaddr);
     pte.pt.table = 1;
     pte.pt.xn = 0;
     pte.pt.ro = 1;
     write_pte(&xen_third_id[third_table_offset(id_addr)], pte);
 
     /* Switch TTBR */
-    fn(ttbr);
+    if ( llc_coloring_enabled )
+    {
+        relocate_xen_fn *fn = (relocate_xen_fn *)id_addr;
+
+        fn(ttbr, _start, (void *)BOOT_RELOC_VIRT_START, _end - _start);
+    }
+    else
+    {
+        switch_ttbr_fn *fn = (switch_ttbr_fn *)id_addr;
+
+        fn(ttbr);
+    }
 
     /*
      * Disable the identity mapping in the runtime page tables.
diff --git a/xen/arch/arm/include/asm/mmu/layout.h b/xen/arch/arm/include/asm/mmu/layout.h
index a3b546465b..19c0ec63a5 100644
--- a/xen/arch/arm/include/asm/mmu/layout.h
+++ b/xen/arch/arm/include/asm/mmu/layout.h
@@ -30,6 +30,7 @@ 
  *  10M -  12M   Fixmap: special-purpose 4K mapping slots
  *  12M -  16M   Early boot mapping of FDT
  *  16M -  18M   Livepatch vmap (if compiled in)
+ *  16M -  24M   Cache-colored Xen text, data, bss (temporary, if compiled in)
  *
  *   1G -   2G   VMAP: ioremap and early_ioremap
  *
@@ -74,6 +75,8 @@ 
 #define BOOT_FDT_VIRT_START     (FIXMAP_VIRT_START + FIXMAP_VIRT_SIZE)
 #define BOOT_FDT_VIRT_SIZE      _AT(vaddr_t, MB(4))
 
+#define BOOT_RELOC_VIRT_START   (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE)
+
 #ifdef CONFIG_LIVEPATCH
 #define LIVEPATCH_VMAP_START    (BOOT_FDT_VIRT_START + BOOT_FDT_VIRT_SIZE)
 #define LIVEPATCH_VMAP_SIZE    _AT(vaddr_t, MB(2))
diff --git a/xen/arch/arm/llc-coloring.c b/xen/arch/arm/llc-coloring.c
index 6c8fa6b576..8e10a505db 100644
--- a/xen/arch/arm/llc-coloring.c
+++ b/xen/arch/arm/llc-coloring.c
@@ -10,6 +10,7 @@ 
 #include <xen/types.h>
 
 #include <asm/processor.h>
+#include <asm/setup.h>
 #include <asm/sysregs.h>
 
 /* Return the LLC way size by probing the hardware */
@@ -64,8 +65,70 @@  unsigned int __init get_llc_way_size(void)
     return line_size * num_sets;
 }
 
+/**
+ * get_xen_paddr - get physical address to relocate Xen to
+ *
+ * Xen is relocated to as near to the top of RAM as possible and
+ * aligned to a XEN_PADDR_ALIGN boundary.
+ */
+static paddr_t __init get_xen_paddr(paddr_t xen_size)
+{
+    const struct membanks *mem = bootinfo_get_mem();
+    paddr_t min_size, paddr = 0;
+    unsigned int i;
+
+    min_size = (xen_size + (XEN_PADDR_ALIGN-1)) & ~(XEN_PADDR_ALIGN-1);
+
+    /* Find the highest bank with enough space. */
+    for ( i = 0; i < mem->nr_banks; i++ )
+    {
+        const struct membank *bank = &mem->bank[i];
+        paddr_t s, e;
+
+        if ( bank->size >= min_size )
+        {
+            e = consider_modules(bank->start, bank->start + bank->size,
+                                 min_size, XEN_PADDR_ALIGN, 0);
+            if ( !e )
+                continue;
+
+#ifdef CONFIG_ARM_32
+            /* Xen must be under 4GB */
+            if ( e > GB(4) )
+                e = GB(4);
+            if ( e < bank->start )
+                continue;
+#endif
+
+            s = e - min_size;
+
+            if ( s > paddr )
+                paddr = s;
+        }
+    }
+
+    if ( !paddr )
+        panic("Not enough memory to relocate Xen\n");
+
+    printk("Placing Xen at 0x%"PRIpaddr"-0x%"PRIpaddr"\n",
+           paddr, paddr + min_size);
+
+    return paddr;
+}
+
+static paddr_t __init xen_colored_map_size(void)
+{
+    return ROUNDUP((_end - _start) * get_max_nr_llc_colors(), XEN_PADDR_ALIGN);
+}
+
 void __init arch_llc_coloring_init(void)
 {
+    struct bootmodule *xen_bootmodule = boot_module_find_by_kind(BOOTMOD_XEN);
+
+    BUG_ON(!xen_bootmodule);
+
+    xen_bootmodule->size = xen_colored_map_size();
+    xen_bootmodule->start = get_xen_paddr(xen_bootmodule->size);
 }
 
 /*
diff --git a/xen/arch/arm/mmu/setup.c b/xen/arch/arm/mmu/setup.c
index 196974f3e2..1d8d7eb70c 100644
--- a/xen/arch/arm/mmu/setup.c
+++ b/xen/arch/arm/mmu/setup.c
@@ -7,6 +7,7 @@ 
 
 #include <xen/init.h>
 #include <xen/libfdt/libfdt.h>
+#include <xen/llc-coloring.h>
 #include <xen/sections.h>
 #include <xen/sizes.h>
 #include <xen/vmap.h>
@@ -20,6 +21,9 @@ 
 #undef virt_to_mfn
 #define virt_to_mfn(va) _mfn(__virt_to_mfn(va))
 
+#define virt_to_reloc_virt(virt) \
+    (((vaddr_t)virt) - XEN_VIRT_START + BOOT_RELOC_VIRT_START)
+
 /* Main runtime page tables */
 
 /*
@@ -69,6 +73,7 @@  static void __init __maybe_unused build_assertions(void)
     /* 2MB aligned regions */
     BUILD_BUG_ON(XEN_VIRT_START & ~SECOND_MASK);
     BUILD_BUG_ON(FIXMAP_ADDR(0) & ~SECOND_MASK);
+    BUILD_BUG_ON(BOOT_RELOC_VIRT_START & ~SECOND_MASK);
     /* 1GB aligned regions */
 #ifdef CONFIG_ARM_32
     BUILD_BUG_ON(XENHEAP_VIRT_START & ~FIRST_MASK);
@@ -138,6 +143,9 @@  static void __init __maybe_unused build_assertions(void)
 
 lpae_t __init pte_of_xenaddr(vaddr_t va)
 {
+    if ( llc_coloring_enabled )
+        va = virt_to_reloc_virt(va);
+
     return mfn_to_xen_entry(virt_to_mfn(va), MT_NORMAL);
 }
 
@@ -316,9 +324,44 @@  paddr_t __init consider_modules(paddr_t s, paddr_t e,
     return e;
 }
 
+static void __init create_llc_coloring_mappings(void)
+{
+    lpae_t pte;
+    unsigned int i;
+    struct bootmodule *xen_bootmodule = boot_module_find_by_kind(BOOTMOD_XEN);
+    mfn_t start_mfn = maddr_to_mfn(xen_bootmodule->start), mfn;
+
+    for_each_xen_colored_mfn ( start_mfn, mfn, i )
+    {
+        pte = mfn_to_xen_entry(mfn, MT_NORMAL);
+        pte.pt.table = 1; /* level 3 mappings always have this bit set */
+        xen_xenmap[i] = pte;
+    }
+
+    for ( i = 0; i < XEN_NR_ENTRIES(2); i++ )
+    {
+        vaddr_t va = BOOT_RELOC_VIRT_START + (i << XEN_PT_LEVEL_SHIFT(2));
+
+        pte = mfn_to_xen_entry(virt_to_mfn(xen_xenmap +
+                                           i * XEN_PT_LPAE_ENTRIES),
+                               MT_NORMAL);
+        pte.pt.table = 1;
+        write_pte(&boot_second[second_table_offset(va)], pte);
+    }
+}
+
 /*
- * Boot-time pagetable setup.
+ * Boot-time pagetable setup with coloring support
  * Changes here may need matching changes in head.S
+ *
+ * The cache coloring support consists of:
+ * - Create colored mapping that conforms to Xen color selection in xen_xenmap[]
+ * - Link the mapping in boot page tables using BOOT_RELOC_VIRT_START as vaddr
+ * - pte_of_xenaddr() takes care of translating addresses to the new space
+ *   during runtime page tables creation
+ * - Relocate xen and update TTBR with the new address in the colored space
+ *   (see switch_ttbr())
+ * - Protect the new space
  */
 void __init setup_pagetables(void)
 {
@@ -326,6 +369,9 @@  void __init setup_pagetables(void)
     lpae_t pte, *p;
     int i;
 
+    if ( llc_coloring_enabled )
+        create_llc_coloring_mappings();
+
     arch_setup_page_tables();
 
 #ifdef CONFIG_ARM_64
@@ -353,13 +399,7 @@  void __init setup_pagetables(void)
             break;
         pte = pte_of_xenaddr(va);
         pte.pt.table = 1; /* third level mappings always have this bit set */
-        if ( is_kernel_text(va) || is_kernel_inittext(va) )
-        {
-            pte.pt.xn = 0;
-            pte.pt.ro = 1;
-        }
-        if ( is_kernel_rodata(va) )
-            pte.pt.ro = 1;
+        pte.pt.xn = 0; /* Permissions will be enforced later. Allow execution */
         xen_xenmap[i] = pte;
     }
 
@@ -385,13 +425,48 @@  void __init setup_pagetables(void)
     ttbr = virt_to_maddr(cpu0_pgtable);
 #endif
 
-    switch_ttbr(ttbr);
-
-    xen_pt_enforce_wnx();
-
 #ifdef CONFIG_ARM_32
     per_cpu(xen_pgtable, 0) = cpu0_pgtable;
 #endif
+
+    if ( llc_coloring_enabled )
+        ttbr = virt_to_maddr(virt_to_reloc_virt(THIS_CPU_PGTABLE));
+
+    switch_ttbr(ttbr);
+
+    /* Protect Xen */
+    for ( i = 0; i < XEN_NR_ENTRIES(3); i++ )
+    {
+        vaddr_t va = XEN_VIRT_START + (i << PAGE_SHIFT);
+        lpae_t *entry = xen_xenmap + i;
+
+        if ( !is_kernel(va) )
+            break;
+
+        pte = read_atomic(entry);
+
+        if ( is_kernel_text(va) || is_kernel_inittext(va) )
+        {
+            pte.pt.xn = 0;
+            pte.pt.ro = 1;
+        } else if ( is_kernel_rodata(va) ) {
+            pte.pt.ro = 1;
+            pte.pt.xn = 1;
+        } else {
+            pte.pt.xn = 1;
+            pte.pt.ro = 0;
+        }
+
+        write_pte(entry, pte);
+    }
+
+    /*
+     * We modified live page-tables. Ensure the TLBs are invalidated
+     * before setting enforcing the WnX permissions.
+     */
+    flush_xen_tlb_local();
+
+    xen_pt_enforce_wnx();
 }
 
 void *__init arch_vmap_virt_end(void)
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index 568a49b274..5e2c519ce8 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -304,8 +304,6 @@  void asmlinkage __init start_xen(unsigned long fdt_paddr)
     /* Initialize traps early allow us to get backtrace when an error occurred */
     init_traps();
 
-    setup_pagetables();
-
     smp_clear_cpu_maps();
 
     device_tree_flattened = early_fdt_map(fdt_paddr);
@@ -329,6 +327,14 @@  void asmlinkage __init start_xen(unsigned long fdt_paddr)
 
     llc_coloring_init();
 
+    /*
+     * Page tables must be setup after LLC coloring initialization because
+     * coloring info are required in order to create colored mappings
+     */
+    setup_pagetables();
+    /* Device-tree was mapped in boot page tables, remap it in the new tables */
+    device_tree_flattened = early_fdt_map(fdt_paddr);
+
     setup_mm();
 
     vm_init();
diff --git a/xen/common/llc-coloring.c b/xen/common/llc-coloring.c
index 2e7c0f505d..2d6aed5fb4 100644
--- a/xen/common/llc-coloring.c
+++ b/xen/common/llc-coloring.c
@@ -38,6 +38,8 @@  static unsigned int __ro_after_init xen_num_colors;
 
 #define mfn_color_mask              (max_nr_colors - 1)
 #define mfn_to_color(mfn)           (mfn_x(mfn) & mfn_color_mask)
+#define get_mfn_with_color(mfn, color) \
+    (_mfn((mfn_x(mfn) & ~mfn_color_mask) | (color)))
 
 /*
  * Parse the coloring configuration given in the buf string, following the
@@ -359,6 +361,22 @@  unsigned int get_max_nr_llc_colors(void)
     return max_nr_colors;
 }
 
+mfn_t __init xen_colored_mfn(mfn_t mfn)
+{
+    unsigned int i, color = mfn_to_color(mfn);
+
+    for ( i = 0; i < xen_num_colors; i++ )
+    {
+        if ( color == xen_colors[i] )
+            return mfn;
+        if ( color < xen_colors[i] )
+            return get_mfn_with_color(mfn, xen_colors[i]);
+    }
+
+    /* Jump to next color space (max_nr_colors mfns) and use the first color */
+    return get_mfn_with_color(mfn_add(mfn, max_nr_colors), xen_colors[0]);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/include/xen/llc-coloring.h b/xen/include/xen/llc-coloring.h
index b3f2fa22bc..4f10a5310f 100644
--- a/xen/include/xen/llc-coloring.h
+++ b/xen/include/xen/llc-coloring.h
@@ -8,6 +8,8 @@ 
 #ifndef __XEN_LLC_COLORING_H__
 #define __XEN_LLC_COLORING_H__
 
+#include <xen/mm-frame.h>
+
 struct domain;
 struct page_info;
 struct xen_domctl_set_llc_colors;
@@ -28,6 +30,17 @@  static inline void domain_dump_llc_colors(const struct domain *d) {}
 static inline void domain_llc_coloring_free(struct domain *d) {}
 #endif
 
+/**
+ * Iterate over each Xen mfn in the colored space.
+ * @start_mfn:  the first mfn that needs to be colored.
+ * @mfn:        the current mfn.
+ * @i:          loop index.
+ */
+#define for_each_xen_colored_mfn(start_mfn, mfn, i) \
+    for ( i = 0, mfn = xen_colored_mfn(start_mfn);  \
+          i < (_end - _start) >> PAGE_SHIFT;        \
+          i++, mfn = xen_colored_mfn(mfn_add(mfn, 1)) )
+
 unsigned int get_llc_way_size(void);
 void arch_llc_coloring_init(void);
 int dom0_set_llc_colors(struct domain *d);
@@ -36,6 +49,7 @@  int domain_set_llc_colors(struct domain *d,
 int domain_set_llc_colors_from_str(struct domain *d, const char *str);
 unsigned int page_to_llc_color(const struct page_info *pg);
 unsigned int get_max_nr_llc_colors(void);
+mfn_t xen_colored_mfn(mfn_t mfn);
 
 #endif /* __XEN_LLC_COLORING_H__ */