diff mbox series

[v2,11/40] xen/mpu: build up start-of-day Xen MPU memory region map

Message ID 20230113052914.3845596-12-Penny.Zheng@arm.com (mailing list archive)
State New, archived
Headers show
Series xen/arm: Add Armv8-R64 MPU support to Xen - Part#1 | expand

Commit Message

Penny Zheng Jan. 13, 2023, 5:28 a.m. UTC
From: Penny Zheng <penny.zheng@arm.com>

The start-of-day Xen MPU memory region layout shall be like as follows:

xen_mpumap[0] : Xen text
xen_mpumap[1] : Xen read-only data
xen_mpumap[2] : Xen read-only after init data
xen_mpumap[3] : Xen read-write data
xen_mpumap[4] : Xen BSS
......
xen_mpumap[max_xen_mpumap - 2]: Xen init data
xen_mpumap[max_xen_mpumap - 1]: Xen init text

max_xen_mpumap refers to the number of regions supported by the EL2 MPU.
The layout shall be compliant with what we describe in xen.lds.S, or the
codes need adjustment.

As MMU system and MPU system have different functions to create
the boot MMU/MPU memory management data, instead of introducing
extra #ifdef in main code flow, we introduce a neutral name
prepare_early_mappings for both, and also to replace create_page_tables for MMU.

Signed-off-by: Penny Zheng <penny.zheng@arm.com>
Signed-off-by: Wei Chen <wei.chen@arm.com>
---
 xen/arch/arm/arm64/Makefile              |   2 +
 xen/arch/arm/arm64/head.S                |  17 +-
 xen/arch/arm/arm64/head_mmu.S            |   4 +-
 xen/arch/arm/arm64/head_mpu.S            | 323 +++++++++++++++++++++++
 xen/arch/arm/include/asm/arm64/mpu.h     |  63 +++++
 xen/arch/arm/include/asm/arm64/sysregs.h |  49 ++++
 xen/arch/arm/mm_mpu.c                    |  48 ++++
 xen/arch/arm/xen.lds.S                   |   4 +
 8 files changed, 502 insertions(+), 8 deletions(-)
 create mode 100644 xen/arch/arm/arm64/head_mpu.S
 create mode 100644 xen/arch/arm/include/asm/arm64/mpu.h
 create mode 100644 xen/arch/arm/mm_mpu.c

Comments

Ayan Kumar Halder Jan. 19, 2023, 10:18 a.m. UTC | #1
On 13/01/2023 05:28, Penny Zheng wrote:
> CAUTION: This message has originated from an External Source. Please use proper judgment and caution when opening attachments, clicking links, or responding to this email.
>
>
> From: Penny Zheng <penny.zheng@arm.com>
>
> The start-of-day Xen MPU memory region layout shall be like as follows:
>
> xen_mpumap[0] : Xen text
> xen_mpumap[1] : Xen read-only data
> xen_mpumap[2] : Xen read-only after init data
> xen_mpumap[3] : Xen read-write data
> xen_mpumap[4] : Xen BSS
> ......
> xen_mpumap[max_xen_mpumap - 2]: Xen init data
> xen_mpumap[max_xen_mpumap - 1]: Xen init text
>
> max_xen_mpumap refers to the number of regions supported by the EL2 MPU.
> The layout shall be compliant with what we describe in xen.lds.S, or the
> codes need adjustment.
>
> As MMU system and MPU system have different functions to create
> the boot MMU/MPU memory management data, instead of introducing
> extra #ifdef in main code flow, we introduce a neutral name
> prepare_early_mappings for both, and also to replace create_page_tables for MMU.
>
> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> Signed-off-by: Wei Chen <wei.chen@arm.com>
> ---
>   xen/arch/arm/arm64/Makefile              |   2 +
>   xen/arch/arm/arm64/head.S                |  17 +-
>   xen/arch/arm/arm64/head_mmu.S            |   4 +-
>   xen/arch/arm/arm64/head_mpu.S            | 323 +++++++++++++++++++++++
>   xen/arch/arm/include/asm/arm64/mpu.h     |  63 +++++
>   xen/arch/arm/include/asm/arm64/sysregs.h |  49 ++++
>   xen/arch/arm/mm_mpu.c                    |  48 ++++
>   xen/arch/arm/xen.lds.S                   |   4 +
>   8 files changed, 502 insertions(+), 8 deletions(-)
>   create mode 100644 xen/arch/arm/arm64/head_mpu.S
>   create mode 100644 xen/arch/arm/include/asm/arm64/mpu.h
>   create mode 100644 xen/arch/arm/mm_mpu.c
>
> diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile
> index 22da2f54b5..438c9737ad 100644
> --- a/xen/arch/arm/arm64/Makefile
> +++ b/xen/arch/arm/arm64/Makefile
> @@ -10,6 +10,8 @@ obj-y += entry.o
>   obj-y += head.o
>   ifneq ($(CONFIG_HAS_MPU),y)
>   obj-y += head_mmu.o
> +else
> +obj-y += head_mpu.o
>   endif
>   obj-y += insn.o
>   obj-$(CONFIG_LIVEPATCH) += livepatch.o
> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> index 782bd1f94c..145e3d53dc 100644
> --- a/xen/arch/arm/arm64/head.S
> +++ b/xen/arch/arm/arm64/head.S
> @@ -68,9 +68,9 @@
>    *  x24 -
>    *  x25 -
>    *  x26 - skip_zero_bss (boot cpu only)
> - *  x27 -
> - *  x28 -
> - *  x29 -
> + *  x27 - region selector (mpu only)
> + *  x28 - prbar (mpu only)
> + *  x29 - prlar (mpu only)
>    *  x30 - lr
>    */
>
> @@ -82,7 +82,7 @@
>    * ---------------------------
>    *
>    * The requirements are:
> - *   MMU = off, D-cache = off, I-cache = on or off,
> + *   MMU/MPU = off, D-cache = off, I-cache = on or off,
>    *   x0 = physical address to the FDT blob.
>    *
>    * This must be the very first address in the loaded image.
> @@ -252,7 +252,12 @@ real_start_efi:
>
>           bl    check_cpu_mode
>           bl    cpu_init
> -        bl    create_page_tables
> +
> +        /*
> +         * Create boot memory management data, pagetable for MMU systems
> +         * and memory regions for MPU systems.
> +         */
> +        bl    prepare_early_mappings
>           bl    enable_mmu
>
>           /* We are still in the 1:1 mapping. Jump to the runtime Virtual Address. */
> @@ -310,7 +315,7 @@ GLOBAL(init_secondary)
>   #endif
>           bl    check_cpu_mode
>           bl    cpu_init
> -        bl    create_page_tables
> +        bl    prepare_early_mappings
>           bl    enable_mmu
>
>           /* We are still in the 1:1 mapping. Jump to the runtime Virtual Address. */
> diff --git a/xen/arch/arm/arm64/head_mmu.S b/xen/arch/arm/arm64/head_mmu.S
> index 6ff13c751c..2346f755df 100644
> --- a/xen/arch/arm/arm64/head_mmu.S
> +++ b/xen/arch/arm/arm64/head_mmu.S
> @@ -123,7 +123,7 @@
>    *
>    * Clobbers x0 - x4
>    */
> -ENTRY(create_page_tables)
> +ENTRY(prepare_early_mappings)
>           /* Prepare the page-tables for mapping Xen */
>           ldr   x0, =XEN_VIRT_START
>           create_table_entry boot_pgtable, boot_first, x0, 0, x1, x2, x3
> @@ -208,7 +208,7 @@ virtphys_clash:
>           /* Identity map clashes with boot_third, which we cannot handle yet */
>           PRINT("- Unable to build boot page tables - virt and phys addresses clash. -\r\n")
>           b     fail
> -ENDPROC(create_page_tables)
> +ENDPROC(prepare_early_mappings)

NIT:- Can this renaming be done in a separate patch of its own (before 
this patch).

So that this patch can be only about the new functionality introduced.

>
>   /*
>    * Turn on the Data Cache and the MMU. The function will return on the 1:1
> diff --git a/xen/arch/arm/arm64/head_mpu.S b/xen/arch/arm/arm64/head_mpu.S
> new file mode 100644
> index 0000000000..0b97ce4646
> --- /dev/null
> +++ b/xen/arch/arm/arm64/head_mpu.S
> @@ -0,0 +1,323 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Start-of-day code for an Armv8-R AArch64 MPU system.
> + */
> +
> +#include <asm/arm64/mpu.h>
> +#include <asm/early_printk.h>
> +#include <asm/page.h>
> +
> +/*
> + * One entry in Xen MPU memory region mapping table(xen_mpumap) is a structure
> + * of pr_t, which is 16-bytes size, so the entry offset is the order of 4.
> + */
NIT :- It would be good to quote Arm ARM from which can be referred for 
the definitions.
> +#define MPU_ENTRY_SHIFT         0x4
> +
> +#define REGION_SEL_MASK         0xf
> +
> +#define REGION_TEXT_PRBAR       0x38    /* SH=11 AP=10 XN=00 */
> +#define REGION_RO_PRBAR         0x3A    /* SH=11 AP=10 XN=10 */
> +#define REGION_DATA_PRBAR       0x32    /* SH=11 AP=00 XN=10 */
> +
> +#define REGION_NORMAL_PRLAR     0x0f    /* NS=0 ATTR=111 EN=1 */
> +
> +/*
> + * Macro to round up the section address to be PAGE_SIZE aligned
> + * Each section(e.g. .text, .data, etc) in xen.lds.S is page-aligned,
> + * which is usually guarded with ". = ALIGN(PAGE_SIZE)" in the head,
> + * or in the end
> + */
> +.macro roundup_section, xb
> +        add   \xb, \xb, #(PAGE_SIZE-1)
> +        and   \xb, \xb, #PAGE_MASK
> +.endm
> +
> +/*
> + * Macro to create a new MPU memory region entry, which is a structure
> + * of pr_t,  in \prmap.
> + *
> + * Inputs:
> + * prmap:   mpu memory region map table symbol
> + * sel:     region selector
> + * prbar:   preserve value for PRBAR_EL2
> + * prlar    preserve value for PRLAR_EL2
> + *
> + * Clobbers \tmp1, \tmp2
> + *
> + */
> +.macro create_mpu_entry prmap, sel, prbar, prlar, tmp1, tmp2
> +    mov   \tmp2, \sel
> +    lsl   \tmp2, \tmp2, #MPU_ENTRY_SHIFT
> +    adr_l \tmp1, \prmap
> +    /* Write the first 8 bytes(prbar_t) of pr_t */
> +    str   \prbar, [\tmp1, \tmp2]
> +
> +    add   \tmp2, \tmp2, #8
> +    /* Write the last 8 bytes(prlar_t) of pr_t */
> +    str   \prlar, [\tmp1, \tmp2]
> +.endm
> +
> +/*
> + * Macro to store the maximum number of regions supported by the EL2 MPU
> + * in max_xen_mpumap, which is identified by MPUIR_EL2.
> + *
> + * Outputs:
> + * nr_regions: preserve the maximum number of regions supported by the EL2 MPU
> + *
> + * Clobbers \tmp1
> + *
> + */
> +.macro read_max_el2_regions, nr_regions, tmp1
> +    load_paddr \tmp1, max_xen_mpumap
> +    mrs   \nr_regions, MPUIR_EL2
> +    isb
> +    str   \nr_regions, [\tmp1]
> +.endm
> +
> +/*
> + * Macro to prepare and set a MPU memory region
> + *
> + * Inputs:
> + * base:        base address symbol (should be page-aligned)
> + * limit:       limit address symbol
> + * sel:         region selector
> + * prbar:       store computed PRBAR_EL2 value
> + * prlar:       store computed PRLAR_EL2 value
> + * attr_prbar:  PRBAR_EL2-related memory attributes. If not specified it will be REGION_DATA_PRBAR
> + * attr_prlar:  PRLAR_EL2-related memory attributes. If not specified it will be REGION_NORMAL_PRLAR
> + *
> + * Clobber \tmp1
> + *
> + */
> +.macro prepare_xen_region, base, limit, sel, prbar, prlar, tmp1, attr_prbar=REGION_DATA_PRBAR, attr_prlar=REGION_NORMAL_PRLAR
> +    /* Prepare value for PRBAR_EL2 reg and preserve it in \prbar.*/
> +    load_paddr \prbar, \base
> +    and   \prbar, \prbar, #MPU_REGION_MASK
> +    mov   \tmp1, #\attr_prbar
> +    orr   \prbar, \prbar, \tmp1
> +
> +    /* Prepare value for PRLAR_EL2 reg and preserve it in \prlar.*/
> +    load_paddr \prlar, \limit
> +    /* Round up limit address to be PAGE_SIZE aligned */
> +    roundup_section \prlar
> +    /* Limit address should be inclusive */
> +    sub   \prlar, \prlar, #1
> +    and   \prlar, \prlar, #MPU_REGION_MASK
> +    mov   \tmp1, #\attr_prlar
> +    orr   \prlar, \prlar, \tmp1
> +
> +    mov   x27, \sel
> +    mov   x28, \prbar
> +    mov   x29, \prlar

Any reasons for using x27, x28, x29 to pass function parameters.

https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst 
states x0..x7 should be used (Table 2, General-purpose registers and 
AAPCS64 usage).

> +    /*
> +     * x27, x28, x29 are special registers designed as
> +     * inputs for function write_pr
> +     */
> +    bl    write_pr
> +.endm
> +
> +.section .text.idmap, "ax", %progbits
> +
> +/*
> + * ENTRY to configure a EL2 MPU memory region
> + * ARMv8-R AArch64 at most supports 255 MPU protection regions.
> + * See section G1.3.18 of the reference manual for ARMv8-R AArch64,
> + * PRBAR<n>_EL2 and PRLAR<n>_EL2 provides access to the EL2 MPU region
> + * determined by the value of 'n' and PRSELR_EL2.REGION as
> + * PRSELR_EL2.REGION<7:4>:n.(n = 0, 1, 2, ... , 15)
> + * For example to access regions from 16 to 31 (0b10000 to 0b11111):
> + * - Set PRSELR_EL2 to 0b1xxxx
> + * - Region 16 configuration is accessible through PRBAR0_EL2 and PRLAR0_EL2
> + * - Region 17 configuration is accessible through PRBAR1_EL2 and PRLAR1_EL2
> + * - Region 18 configuration is accessible through PRBAR2_EL2 and PRLAR2_EL2
> + * - ...
> + * - Region 31 configuration is accessible through PRBAR15_EL2 and PRLAR15_EL2
> + *
> + * Inputs:
> + * x27: region selector
> + * x28: preserve value for PRBAR_EL2
> + * x29: preserve value for PRLAR_EL2
> + *
> + */
> +ENTRY(write_pr)
> +    msr   PRSELR_EL2, x27
> +    dsb   sy
> +    and   x27, x27, #REGION_SEL_MASK
> +    cmp   x27, #0
> +    bne   1f
> +    msr   PRBAR0_EL2, x28
> +    msr   PRLAR0_EL2, x29
> +    b     out
> +1:
> +    cmp   x27, #1
> +    bne   2f
> +    msr   PRBAR1_EL2, x28
> +    msr   PRLAR1_EL2, x29
> +    b     out
> +2:
> +    cmp   x27, #2
> +    bne   3f
> +    msr   PRBAR2_EL2, x28
> +    msr   PRLAR2_EL2, x29
> +    b     out
> +3:
> +    cmp   x27, #3
> +    bne   4f
> +    msr   PRBAR3_EL2, x28
> +    msr   PRLAR3_EL2, x29
> +    b     out
> +4:
> +    cmp   x27, #4
> +    bne   5f
> +    msr   PRBAR4_EL2, x28
> +    msr   PRLAR4_EL2, x29
> +    b     out
> +5:
> +    cmp   x27, #5
> +    bne   6f
> +    msr   PRBAR5_EL2, x28
> +    msr   PRLAR5_EL2, x29
> +    b     out
> +6:
> +    cmp   x27, #6
> +    bne   7f
> +    msr   PRBAR6_EL2, x28
> +    msr   PRLAR6_EL2, x29
> +    b     out
> +7:
> +    cmp   x27, #7
> +    bne   8f
> +    msr   PRBAR7_EL2, x28
> +    msr   PRLAR7_EL2, x29
> +    b     out
> +8:
> +    cmp   x27, #8
> +    bne   9f
> +    msr   PRBAR8_EL2, x28
> +    msr   PRLAR8_EL2, x29
> +    b     out
> +9:
> +    cmp   x27, #9
> +    bne   10f
> +    msr   PRBAR9_EL2, x28
> +    msr   PRLAR9_EL2, x29
> +    b     out
> +10:
> +    cmp   x27, #10
> +    bne   11f
> +    msr   PRBAR10_EL2, x28
> +    msr   PRLAR10_EL2, x29
> +    b     out
> +11:
> +    cmp   x27, #11
> +    bne   12f
> +    msr   PRBAR11_EL2, x28
> +    msr   PRLAR11_EL2, x29
> +    b     out
> +12:
> +    cmp   x27, #12
> +    bne   13f
> +    msr   PRBAR12_EL2, x28
> +    msr   PRLAR12_EL2, x29
> +    b     out
> +13:
> +    cmp   x27, #13
> +    bne   14f
> +    msr   PRBAR13_EL2, x28
> +    msr   PRLAR13_EL2, x29
> +    b     out
> +14:
> +    cmp   x27, #14
> +    bne   15f
> +    msr   PRBAR14_EL2, x28
> +    msr   PRLAR14_EL2, x29
> +    b     out
> +15:
> +    msr   PRBAR15_EL2, x28
> +    msr   PRLAR15_EL2, x29
> +out:
> +    isb
> +    ret
> +ENDPROC(write_pr)
> +
> +/*
> + * Static start-of-day Xen EL2 MPU memory region layout.
> + *
> + *     xen_mpumap[0] : Xen text
> + *     xen_mpumap[1] : Xen read-only data
> + *     xen_mpumap[2] : Xen read-only after init data
> + *     xen_mpumap[3] : Xen read-write data
> + *     xen_mpumap[4] : Xen BSS
> + *     ......
> + *     xen_mpumap[max_xen_mpumap - 2]: Xen init data
> + *     xen_mpumap[max_xen_mpumap - 1]: Xen init text
> + *
> + * Clobbers x0 - x6
> + *
> + * It shall be compliant with what describes in xen.lds.S, or the below
> + * codes need adjustment.
> + * It shall also follow the rules of putting fixed MPU memory region in
> + * the front, and the others in the rear, which, here, mainly refers to
> + * boot-only region, like Xen init text region.
> + */
> +ENTRY(prepare_early_mappings)
> +    /* stack LR as write_pr will be called later like nested function */
> +    mov   x6, lr
> +
> +    /* x0: region sel */
> +    mov   x0, xzr
> +    /* Xen text section. */
> +    prepare_xen_region _stext, _etext, x0, x1, x2, x3, attr_prbar=REGION_TEXT_PRBAR
> +    create_mpu_entry xen_mpumap, x0, x1, x2, x3, x4
> +
> +    add   x0, x0, #1
> +    /* Xen read-only data section. */
> +    prepare_xen_region _srodata, _erodata, x0, x1, x2, x3, attr_prbar=REGION_RO_PRBAR
> +    create_mpu_entry xen_mpumap, x0, x1, x2, x3, x4
> +
> +    add   x0, x0, #1
> +    /* Xen read-only after init data section. */
> +    prepare_xen_region __ro_after_init_start, __ro_after_init_end, x0, x1, x2, x3
> +    create_mpu_entry xen_mpumap, x0, x1, x2, x3, x4
> +
> +    add   x0, x0, #1
> +    /* Xen read-write data section. */
> +    prepare_xen_region __data_begin, __init_begin, x0, x1, x2, x3
> +    create_mpu_entry xen_mpumap, x0, x1, x2, x3, x4
> +
> +    read_max_el2_regions x5, x3 /* x5: max_mpumap */
> +    sub   x5, x5, #1
> +    /* Xen init text section. */
> +    prepare_xen_region _sinittext, _einittext, x5, x1, x2, x3, attr_prbar=REGION_TEXT_PRBAR
> +    create_mpu_entry xen_mpumap, x5, x1, x2, x3, x4
> +
> +    sub   x5, x5, #1
> +    /* Xen init data section. */
> +    prepare_xen_region __init_data_begin, __init_end, x5, x1, x2, x3
> +    create_mpu_entry xen_mpumap, x5, x1, x2, x3, x4
> +
> +    add   x0, x0, #1
> +    /* Xen BSS section. */
> +    prepare_xen_region __bss_start, __bss_end, x0, x1, x2, x3
> +    create_mpu_entry xen_mpumap, x0, x1, x2, x3, x4
> +
> +    /* Update next_fixed_region_idx and next_transient_region_idx */
> +    load_paddr x3, next_fixed_region_idx
> +    add   x0, x0, #1
> +    str   x0, [x3]
> +    load_paddr x4, next_transient_region_idx
> +    sub   x5, x5, #1
> +    str   x5, [x4]
> +
> +    mov   lr, x6
> +    ret
> +ENDPROC(prepare_early_mappings)
> +
> +GLOBAL(_end_boot)
> +
> +/*
> + * Local variables:
> + * mode: ASM
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/arm/include/asm/arm64/mpu.h b/xen/arch/arm/include/asm/arm64/mpu.h
> new file mode 100644
> index 0000000000..c945dd53db
> --- /dev/null
> +++ b/xen/arch/arm/include/asm/arm64/mpu.h
> @@ -0,0 +1,63 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * mpu.h: Arm Memory Protection Region definitions.
> + */
> +
> +#ifndef __ARM64_MPU_H__
> +#define __ARM64_MPU_H__
> +
> +#define MPU_REGION_SHIFT  6
> +#define MPU_REGION_ALIGN  (_AC(1, UL) << MPU_REGION_SHIFT)
> +#define MPU_REGION_MASK   (~(MPU_REGION_ALIGN - 1))
> +
> +/*
> + * MPUIR_EL2.Region identifies the number of regions supported by the EL2 MPU.
> + * It is a 8-bit field, so 255 MPU memory regions at most.
> + */
> +#define ARM_MAX_MPU_MEMORY_REGIONS 255
> +
> +#ifndef __ASSEMBLY__
> +
> +/* Protection Region Base Address Register */
> +typedef union {
> +    struct __packed {
> +        unsigned long xn:2;       /* Execute-Never */
> +        unsigned long ap:2;       /* Acess Permission */
> +        unsigned long sh:2;       /* Sharebility */
> +        unsigned long base:42;    /* Base Address */
> +        unsigned long pad:16;
> +    } reg;
> +    uint64_t bits;
> +} prbar_t;
> +
> +/* Protection Region Limit Address Register */
> +typedef union {
> +    struct __packed {
> +        unsigned long en:1;     /* Region enable */
> +        unsigned long ai:3;     /* Memory Attribute Index */
> +        unsigned long ns:1;     /* Not-Secure */
> +        unsigned long res:1;    /* Reserved 0 by hardware */
> +        unsigned long limit:42; /* Limit Address */
> +        unsigned long pad:16;
> +    } reg;
> +    uint64_t bits;
> +} prlar_t;
> +
> +/* MPU Protection Region */
> +typedef struct {
> +    prbar_t prbar;
> +    prlar_t prlar;
> +} pr_t;
> +
> +#endif /* __ASSEMBLY__ */
> +
> +#endif /* __ARM64_MPU_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/arm/include/asm/arm64/sysregs.h b/xen/arch/arm/include/asm/arm64/sysregs.h
> index 4638999514..aca9bca5b1 100644
> --- a/xen/arch/arm/include/asm/arm64/sysregs.h
> +++ b/xen/arch/arm/include/asm/arm64/sysregs.h
> @@ -458,6 +458,55 @@
>   #define ZCR_ELx_LEN_SIZE             9
>   #define ZCR_ELx_LEN_MASK             0x1ff
>
> +/* System registers for Armv8-R AArch64 */
> +#ifdef CONFIG_HAS_MPU
> +
> +/* EL2 MPU Protection Region Base Address Register encode */
> +#define PRBAR_EL2   S3_4_C6_C8_0
> +#define PRBAR0_EL2  S3_4_C6_C8_0
> +#define PRBAR1_EL2  S3_4_C6_C8_4
> +#define PRBAR2_EL2  S3_4_C6_C9_0
> +#define PRBAR3_EL2  S3_4_C6_C9_4
> +#define PRBAR4_EL2  S3_4_C6_C10_0
> +#define PRBAR5_EL2  S3_4_C6_C10_4
> +#define PRBAR6_EL2  S3_4_C6_C11_0
> +#define PRBAR7_EL2  S3_4_C6_C11_4
> +#define PRBAR8_EL2  S3_4_C6_C12_0
> +#define PRBAR9_EL2  S3_4_C6_C12_4
> +#define PRBAR10_EL2 S3_4_C6_C13_0
> +#define PRBAR11_EL2 S3_4_C6_C13_4
> +#define PRBAR12_EL2 S3_4_C6_C14_0
> +#define PRBAR13_EL2 S3_4_C6_C14_4
> +#define PRBAR14_EL2 S3_4_C6_C15_0
> +#define PRBAR15_EL2 S3_4_C6_C15_4
> +
> +/* EL2 MPU Protection Region Limit Address Register encode */
> +#define PRLAR_EL2   S3_4_C6_C8_1
> +#define PRLAR0_EL2  S3_4_C6_C8_1
> +#define PRLAR1_EL2  S3_4_C6_C8_5
> +#define PRLAR2_EL2  S3_4_C6_C9_1
> +#define PRLAR3_EL2  S3_4_C6_C9_5
> +#define PRLAR4_EL2  S3_4_C6_C10_1
> +#define PRLAR5_EL2  S3_4_C6_C10_5
> +#define PRLAR6_EL2  S3_4_C6_C11_1
> +#define PRLAR7_EL2  S3_4_C6_C11_5
> +#define PRLAR8_EL2  S3_4_C6_C12_1
> +#define PRLAR9_EL2  S3_4_C6_C12_5
> +#define PRLAR10_EL2 S3_4_C6_C13_1
> +#define PRLAR11_EL2 S3_4_C6_C13_5
> +#define PRLAR12_EL2 S3_4_C6_C14_1
> +#define PRLAR13_EL2 S3_4_C6_C14_5
> +#define PRLAR14_EL2 S3_4_C6_C15_1
> +#define PRLAR15_EL2 S3_4_C6_C15_5
> +
> +/* MPU Protection Region Selection Register encode */
> +#define PRSELR_EL2 S3_4_C6_C2_1
> +
> +/* MPU Type registers encode */
> +#define MPUIR_EL2 S3_4_C0_C0_4
> +
> +#endif
> +
>   /* Access to system registers */
>
>   #define WRITE_SYSREG64(v, name) do {                    \
> diff --git a/xen/arch/arm/mm_mpu.c b/xen/arch/arm/mm_mpu.c
> new file mode 100644
> index 0000000000..43e9a1be4d
> --- /dev/null
> +++ b/xen/arch/arm/mm_mpu.c
> @@ -0,0 +1,48 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * xen/arch/arm/mm_mpu.c
> + *
> + * MPU based memory managment code for Armv8-R AArch64.
> + *
> + * Copyright (C) 2022 Arm Ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <xen/init.h>
> +#include <xen/page-size.h>
> +#include <asm/arm64/mpu.h>
> +
> +/* Xen MPU memory region mapping table. */
> +pr_t __aligned(PAGE_SIZE) __section(".data.page_aligned")
> +     xen_mpumap[ARM_MAX_MPU_MEMORY_REGIONS];
> +
> +/* Index into MPU memory region map for fixed regions, ascending from zero. */
> +uint64_t __ro_after_init next_fixed_region_idx;
> +/*
> + * Index into MPU memory region map for transient regions, like boot-only
> + * region, which descends from max_xen_mpumap.
> + */
> +uint64_t __ro_after_init next_transient_region_idx;
> +
> +/* Maximum number of supported MPU memory regions by the EL2 MPU. */
> +uint64_t __ro_after_init max_xen_mpumap;
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/arm/xen.lds.S b/xen/arch/arm/xen.lds.S
> index bc45ea2c65..79965a3c17 100644
> --- a/xen/arch/arm/xen.lds.S
> +++ b/xen/arch/arm/xen.lds.S
> @@ -91,6 +91,8 @@ SECTIONS
>         __ro_after_init_end = .;
>     } : text
>
> +  . = ALIGN(PAGE_SIZE);
> +  __data_begin = .;
>     .data.read_mostly : {
>          /* Exception table */
>          __start___ex_table = .;
> @@ -157,7 +159,9 @@ SECTIONS
>          *(.altinstr_replacement)
>     } :text
>     . = ALIGN(PAGE_SIZE);
> +
>     .init.data : {
> +       __init_data_begin = .;            /* Init data */
>          *(.init.rodata)
>          *(.init.rodata.*)
>
> --
> 2.25.1
>
NIT:- Would you consider splitting this patch, something like this :-

1. Renaming of the mmu function

2. Define sysregs, prlar_t, prbar_t and other other hardware specific 
macros.

3. Define write_pr

4. The rest of the changes (ie prepare_early_mappings(), xen.lds.S, etc)

- Ayan
Julien Grall Jan. 19, 2023, 3:04 p.m. UTC | #2
Hi Penny,

On 13/01/2023 05:28, Penny Zheng wrote:
> From: Penny Zheng <penny.zheng@arm.com>
> 
> The start-of-day Xen MPU memory region layout shall be like as follows:
> 
> xen_mpumap[0] : Xen text
> xen_mpumap[1] : Xen read-only data
> xen_mpumap[2] : Xen read-only after init data
> xen_mpumap[3] : Xen read-write data
> xen_mpumap[4] : Xen BSS
> ......
> xen_mpumap[max_xen_mpumap - 2]: Xen init data
> xen_mpumap[max_xen_mpumap - 1]: Xen init text

Can you explain why the init region should be at the end of the MPU?

> 
> max_xen_mpumap refers to the number of regions supported by the EL2 MPU.
> The layout shall be compliant with what we describe in xen.lds.S, or the
> codes need adjustment.
> 
> As MMU system and MPU system have different functions to create
> the boot MMU/MPU memory management data, instead of introducing
> extra #ifdef in main code flow, we introduce a neutral name
> prepare_early_mappings for both, and also to replace create_page_tables for MMU.
> 
> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> Signed-off-by: Wei Chen <wei.chen@arm.com>
> ---
>   xen/arch/arm/arm64/Makefile              |   2 +
>   xen/arch/arm/arm64/head.S                |  17 +-
>   xen/arch/arm/arm64/head_mmu.S            |   4 +-
>   xen/arch/arm/arm64/head_mpu.S            | 323 +++++++++++++++++++++++
>   xen/arch/arm/include/asm/arm64/mpu.h     |  63 +++++
>   xen/arch/arm/include/asm/arm64/sysregs.h |  49 ++++
>   xen/arch/arm/mm_mpu.c                    |  48 ++++
>   xen/arch/arm/xen.lds.S                   |   4 +
>   8 files changed, 502 insertions(+), 8 deletions(-)
>   create mode 100644 xen/arch/arm/arm64/head_mpu.S
>   create mode 100644 xen/arch/arm/include/asm/arm64/mpu.h
>   create mode 100644 xen/arch/arm/mm_mpu.c
> 
> diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile
> index 22da2f54b5..438c9737ad 100644
> --- a/xen/arch/arm/arm64/Makefile
> +++ b/xen/arch/arm/arm64/Makefile
> @@ -10,6 +10,8 @@ obj-y += entry.o
>   obj-y += head.o
>   ifneq ($(CONFIG_HAS_MPU),y)
>   obj-y += head_mmu.o
> +else
> +obj-y += head_mpu.o
>   endif
>   obj-y += insn.o
>   obj-$(CONFIG_LIVEPATCH) += livepatch.o
> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> index 782bd1f94c..145e3d53dc 100644
> --- a/xen/arch/arm/arm64/head.S
> +++ b/xen/arch/arm/arm64/head.S
> @@ -68,9 +68,9 @@
>    *  x24 -
>    *  x25 -
>    *  x26 - skip_zero_bss (boot cpu only)
> - *  x27 -
> - *  x28 -
> - *  x29 -
> + *  x27 - region selector (mpu only)
> + *  x28 - prbar (mpu only)
> + *  x29 - prlar (mpu only) >    *  x30 - lr
>    */
>   
> @@ -82,7 +82,7 @@
>    * ---------------------------
>    *
>    * The requirements are:
> - *   MMU = off, D-cache = off, I-cache = on or off,
> + *   MMU/MPU = off, D-cache = off, I-cache = on or off,
>    *   x0 = physical address to the FDT blob.
>    *
>    * This must be the very first address in the loaded image.
> @@ -252,7 +252,12 @@ real_start_efi:
>   
>           bl    check_cpu_mode
>           bl    cpu_init
> -        bl    create_page_tables
> +
> +        /*
> +         * Create boot memory management data, pagetable for MMU systems
> +         * and memory regions for MPU systems.
> +         */
> +        bl    prepare_early_mappings
>           bl    enable_mmu
>   
>           /* We are still in the 1:1 mapping. Jump to the runtime Virtual Address. */
> @@ -310,7 +315,7 @@ GLOBAL(init_secondary)
>   #endif
>           bl    check_cpu_mode
>           bl    cpu_init
> -        bl    create_page_tables
> +        bl    prepare_early_mappings
>           bl    enable_mmu
>   
>           /* We are still in the 1:1 mapping. Jump to the runtime Virtual Address. */
> diff --git a/xen/arch/arm/arm64/head_mmu.S b/xen/arch/arm/arm64/head_mmu.S
> index 6ff13c751c..2346f755df 100644
> --- a/xen/arch/arm/arm64/head_mmu.S
> +++ b/xen/arch/arm/arm64/head_mmu.S
> @@ -123,7 +123,7 @@
>    *
>    * Clobbers x0 - x4
>    */
> -ENTRY(create_page_tables)
> +ENTRY(prepare_early_mappings)
>           /* Prepare the page-tables for mapping Xen */
>           ldr   x0, =XEN_VIRT_START
>           create_table_entry boot_pgtable, boot_first, x0, 0, x1, x2, x3
> @@ -208,7 +208,7 @@ virtphys_clash:
>           /* Identity map clashes with boot_third, which we cannot handle yet */
>           PRINT("- Unable to build boot page tables - virt and phys addresses clash. -\r\n")
>           b     fail
> -ENDPROC(create_page_tables)
> +ENDPROC(prepare_early_mappings)
>   
>   /*
>    * Turn on the Data Cache and the MMU. The function will return on the 1:1
> diff --git a/xen/arch/arm/arm64/head_mpu.S b/xen/arch/arm/arm64/head_mpu.S
> new file mode 100644
> index 0000000000..0b97ce4646
> --- /dev/null
> +++ b/xen/arch/arm/arm64/head_mpu.S
> @@ -0,0 +1,323 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Start-of-day code for an Armv8-R AArch64 MPU system.
> + */
> +
> +#include <asm/arm64/mpu.h>
> +#include <asm/early_printk.h>
> +#include <asm/page.h>
> +
> +/*
> + * One entry in Xen MPU memory region mapping table(xen_mpumap) is a structure
> + * of pr_t, which is 16-bytes size, so the entry offset is the order of 4.
> + */
> +#define MPU_ENTRY_SHIFT         0x4
> +
> +#define REGION_SEL_MASK         0xf
> +
> +#define REGION_TEXT_PRBAR       0x38    /* SH=11 AP=10 XN=00 */
> +#define REGION_RO_PRBAR         0x3A    /* SH=11 AP=10 XN=10 */
> +#define REGION_DATA_PRBAR       0x32    /* SH=11 AP=00 XN=10 */
> +
> +#define REGION_NORMAL_PRLAR     0x0f    /* NS=0 ATTR=111 EN=1 */
> +
> +/*
> + * Macro to round up the section address to be PAGE_SIZE aligned
> + * Each section(e.g. .text, .data, etc) in xen.lds.S is page-aligned,
> + * which is usually guarded with ". = ALIGN(PAGE_SIZE)" in the head,
> + * or in the end
> + */
> +.macro roundup_section, xb
> +        add   \xb, \xb, #(PAGE_SIZE-1)
> +        and   \xb, \xb, #PAGE_MASK
> +.endm
> +
> +/*
> + * Macro to create a new MPU memory region entry, which is a structure
> + * of pr_t,  in \prmap.
> + *
> + * Inputs:
> + * prmap:   mpu memory region map table symbol
> + * sel:     region selector
> + * prbar:   preserve value for PRBAR_EL2
> + * prlar    preserve value for PRLAR_EL2
> + *
> + * Clobbers \tmp1, \tmp2
> + *
> + */
> +.macro create_mpu_entry prmap, sel, prbar, prlar, tmp1, tmp2
> +    mov   \tmp2, \sel
> +    lsl   \tmp2, \tmp2, #MPU_ENTRY_SHIFT
> +    adr_l \tmp1, \prmap
> +    /* Write the first 8 bytes(prbar_t) of pr_t */
> +    str   \prbar, [\tmp1, \tmp2]
> +
> +    add   \tmp2, \tmp2, #8
> +    /* Write the last 8 bytes(prlar_t) of pr_t */
> +    str   \prlar, [\tmp1, \tmp2]

Any particular reason to not use 'stp'?

Also, AFAICT, with data cache disabled. But at least on ARMv8-A, the 
cache is never really off. So don't need some cache maintainance?

FAOD, I know the existing MMU code has the same issue. But I would 
rather prefer if the new code introduced is compliant to the Arm Arm.

> +.endm
> +
> +/*
> + * Macro to store the maximum number of regions supported by the EL2 MPU
> + * in max_xen_mpumap, which is identified by MPUIR_EL2.
> + *
> + * Outputs:
> + * nr_regions: preserve the maximum number of regions supported by the EL2 MPU
> + *
> + * Clobbers \tmp1
> + * > + */

Are you going to have multiple users? If not, then I would prefer if 
this is folded in the only caller.

> +.macro read_max_el2_regions, nr_regions, tmp1
> +    load_paddr \tmp1, max_xen_mpumap

I would rather prefer if we restrict the use of global while the MMU if 
off (see why above).

> +    mrs   \nr_regions, MPUIR_EL2
> +    isb

What's that isb for?

> +    str   \nr_regions, [\tmp1]
> +.endm
> +
> +/*
> + * Macro to prepare and set a MPU memory region
> + *
> + * Inputs:
> + * base:        base address symbol (should be page-aligned)
> + * limit:       limit address symbol
> + * sel:         region selector
> + * prbar:       store computed PRBAR_EL2 value
> + * prlar:       store computed PRLAR_EL2 value
> + * attr_prbar:  PRBAR_EL2-related memory attributes. If not specified it will be REGION_DATA_PRBAR
> + * attr_prlar:  PRLAR_EL2-related memory attributes. If not specified it will be REGION_NORMAL_PRLAR
> + *
> + * Clobber \tmp1

This macro will also clobber x27, x28, x29.

> + *
> + */
> +.macro prepare_xen_region, base, limit, sel, prbar, prlar, tmp1, attr_prbar=REGION_DATA_PRBAR, attr_prlar=REGION_NORMAL_PRLAR
> +    /* Prepare value for PRBAR_EL2 reg and preserve it in \prbar.*/
> +    load_paddr \prbar, \base
> +    and   \prbar, \prbar, #MPU_REGION_MASK
> +    mov   \tmp1, #\attr_prbar
> +    orr   \prbar, \prbar, \tmp1
> +
> +    /* Prepare value for PRLAR_EL2 reg and preserve it in \prlar.*/
> +    load_paddr \prlar, \limit
> +    /* Round up limit address to be PAGE_SIZE aligned */
> +    roundup_section \prlar
> +    /* Limit address should be inclusive */
> +    sub   \prlar, \prlar, #1
> +    and   \prlar, \prlar, #MPU_REGION_MASK
> +    mov   \tmp1, #\attr_prlar
> +    orr   \prlar, \prlar, \tmp1
> +
> +    mov   x27, \sel
> +    mov   x28, \prbar
> +    mov   x29, \prlar
> +    /*
> +     * x27, x28, x29 are special registers designed as
> +     * inputs for function write_pr
> +     */
> +    bl    write_pr
> +.endm
> +
> +.section .text.idmap, "ax", %progbits
> +
> +/*
> + * ENTRY to configure a EL2 MPU memory region
> + * ARMv8-R AArch64 at most supports 255 MPU protection regions.
> + * See section G1.3.18 of the reference manual for ARMv8-R AArch64,
> + * PRBAR<n>_EL2 and PRLAR<n>_EL2 provides access to the EL2 MPU region
> + * determined by the value of 'n' and PRSELR_EL2.REGION as
> + * PRSELR_EL2.REGION<7:4>:n.(n = 0, 1, 2, ... , 15)
> + * For example to access regions from 16 to 31 (0b10000 to 0b11111):
> + * - Set PRSELR_EL2 to 0b1xxxx
> + * - Region 16 configuration is accessible through PRBAR0_EL2 and PRLAR0_EL2
> + * - Region 17 configuration is accessible through PRBAR1_EL2 and PRLAR1_EL2
> + * - Region 18 configuration is accessible through PRBAR2_EL2 and PRLAR2_EL2
> + * - ...
> + * - Region 31 configuration is accessible through PRBAR15_EL2 and PRLAR15_EL2
> + *
> + * Inputs:
> + * x27: region selector
> + * x28: preserve value for PRBAR_EL2
> + * x29: preserve value for PRLAR_EL2
> + *
> + */
> +ENTRY(write_pr)

AFAICT, this function would not be necessary if the index for the init 
sections were hardcoded.

So I would like to understand why the index cannot be hardcoded.

> +    msr   PRSELR_EL2, x27
> +    dsb   sy

What is this 'dsb' for? Also why 'sy'?

> +    and   x27, x27, #REGION_SEL_MASK
> +    cmp   x27, #0
> +    bne   1f
> +    msr   PRBAR0_EL2, x28
> +    msr   PRLAR0_EL2, x29
> +    b     out
> +1:
> +    cmp   x27, #1
> +    bne   2f
> +    msr   PRBAR1_EL2, x28
> +    msr   PRLAR1_EL2, x29
> +    b     out
> +2:
> +    cmp   x27, #2
> +    bne   3f
> +    msr   PRBAR2_EL2, x28
> +    msr   PRLAR2_EL2, x29
> +    b     out
> +3:
> +    cmp   x27, #3
> +    bne   4f
> +    msr   PRBAR3_EL2, x28
> +    msr   PRLAR3_EL2, x29
> +    b     out
> +4:
> +    cmp   x27, #4
> +    bne   5f
> +    msr   PRBAR4_EL2, x28
> +    msr   PRLAR4_EL2, x29
> +    b     out
> +5:
> +    cmp   x27, #5
> +    bne   6f
> +    msr   PRBAR5_EL2, x28
> +    msr   PRLAR5_EL2, x29
> +    b     out
> +6:
> +    cmp   x27, #6
> +    bne   7f
> +    msr   PRBAR6_EL2, x28
> +    msr   PRLAR6_EL2, x29
> +    b     out
> +7:
> +    cmp   x27, #7
> +    bne   8f
> +    msr   PRBAR7_EL2, x28
> +    msr   PRLAR7_EL2, x29
> +    b     out
> +8:
> +    cmp   x27, #8
> +    bne   9f
> +    msr   PRBAR8_EL2, x28
> +    msr   PRLAR8_EL2, x29
> +    b     out
> +9:
> +    cmp   x27, #9
> +    bne   10f
> +    msr   PRBAR9_EL2, x28
> +    msr   PRLAR9_EL2, x29
> +    b     out
> +10:
> +    cmp   x27, #10
> +    bne   11f
> +    msr   PRBAR10_EL2, x28
> +    msr   PRLAR10_EL2, x29
> +    b     out
> +11:
> +    cmp   x27, #11
> +    bne   12f
> +    msr   PRBAR11_EL2, x28
> +    msr   PRLAR11_EL2, x29
> +    b     out
> +12:
> +    cmp   x27, #12
> +    bne   13f
> +    msr   PRBAR12_EL2, x28
> +    msr   PRLAR12_EL2, x29
> +    b     out
> +13:
> +    cmp   x27, #13
> +    bne   14f
> +    msr   PRBAR13_EL2, x28
> +    msr   PRLAR13_EL2, x29
> +    b     out
> +14:
> +    cmp   x27, #14
> +    bne   15f
> +    msr   PRBAR14_EL2, x28
> +    msr   PRLAR14_EL2, x29
> +    b     out
> +15:
> +    msr   PRBAR15_EL2, x28
> +    msr   PRLAR15_EL2, x29
> +out:
> +    isb

What is this 'isb' for?

> +    ret
> +ENDPROC(write_pr)
> +
> +/*
> + * Static start-of-day Xen EL2 MPU memory region layout.
> + *
> + *     xen_mpumap[0] : Xen text
> + *     xen_mpumap[1] : Xen read-only data
> + *     xen_mpumap[2] : Xen read-only after init data
> + *     xen_mpumap[3] : Xen read-write data
> + *     xen_mpumap[4] : Xen BSS
> + *     ......
> + *     xen_mpumap[max_xen_mpumap - 2]: Xen init data
> + *     xen_mpumap[max_xen_mpumap - 1]: Xen init text
> + *
> + * Clobbers x0 - x6
> + *
> + * It shall be compliant with what describes in xen.lds.S, or the below
> + * codes need adjustment.
> + * It shall also follow the rules of putting fixed MPU memory region in
> + * the front, and the others in the rear, which, here, mainly refers to
> + * boot-only region, like Xen init text region.
> + */
> +ENTRY(prepare_early_mappings)
> +    /* stack LR as write_pr will be called later like nested function */
> +    mov   x6, lr
> +
> +    /* x0: region sel */
> +    mov   x0, xzr
> +    /* Xen text section. */
> +    prepare_xen_region _stext, _etext, x0, x1, x2, x3, attr_prbar=REGION_TEXT_PRBAR
> +    create_mpu_entry xen_mpumap, x0, x1, x2, x3, x4

You always seem to call prepare_xen_region and create_mpu_entry. Can 
they be combined?

Also, will the first parameter of create_mpu_entry always be xen_mpumap? 
If so, I will remove it from the parameter.


[...]

> diff --git a/xen/arch/arm/xen.lds.S b/xen/arch/arm/xen.lds.S
> index bc45ea2c65..79965a3c17 100644
> --- a/xen/arch/arm/xen.lds.S
> +++ b/xen/arch/arm/xen.lds.S
> @@ -91,6 +91,8 @@ SECTIONS
>         __ro_after_init_end = .;
>     } : text
>   
> +  . = ALIGN(PAGE_SIZE);

Why do you need this ALIGN?

> +  __data_begin = .;
>     .data.read_mostly : {
>          /* Exception table */
>          __start___ex_table = .;
> @@ -157,7 +159,9 @@ SECTIONS
>          *(.altinstr_replacement)

I know you are not using alternative instructions yet. But, you should 
make sure they are included. So I think rather than introduce 
__init_data_begin, you want to use "_einitext" for the start of the 
"Init data" section.

>     } :text
>     . = ALIGN(PAGE_SIZE);
> +

Spurious?

>     .init.data : {
> +       __init_data_begin = .;            /* Init data */
>          *(.init.rodata)
>          *(.init.rodata.*)
>   

Cheers,
Penny Zheng Jan. 29, 2023, 5:39 a.m. UTC | #3
> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: Thursday, January 19, 2023 11:04 PM
> To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org
> Cc: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
> <sstabellini@kernel.org>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
> Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
> memory region map
> 
> Hi Penny,
>

Hi Julien

Sorry for the late response, just come back from Chinese Spring Festival Holiday~
 
> On 13/01/2023 05:28, Penny Zheng wrote:
> > From: Penny Zheng <penny.zheng@arm.com>
> >
> > The start-of-day Xen MPU memory region layout shall be like as follows:
> >
> > xen_mpumap[0] : Xen text
> > xen_mpumap[1] : Xen read-only data
> > xen_mpumap[2] : Xen read-only after init data xen_mpumap[3] : Xen
> > read-write data xen_mpumap[4] : Xen BSS ......
> > xen_mpumap[max_xen_mpumap - 2]: Xen init data
> > xen_mpumap[max_xen_mpumap - 1]: Xen init text
> 
> Can you explain why the init region should be at the end of the MPU?
> 

As discussed in the v1 Serie, I'd like to put all transient MPU regions, like boot-only region,
at the end of the MPU.
Since they will get removed at the end of the boot, I am trying not to leave holes in the MPU
map by putting all transient MPU regions at rear. 

> >
> > max_xen_mpumap refers to the number of regions supported by the EL2
> MPU.
> > The layout shall be compliant with what we describe in xen.lds.S, or
> > the codes need adjustment.
> >
> > As MMU system and MPU system have different functions to create the
> > boot MMU/MPU memory management data, instead of introducing extra
> > #ifdef in main code flow, we introduce a neutral name
> > prepare_early_mappings for both, and also to replace create_page_tables
> for MMU.
> >
> > Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> > Signed-off-by: Wei Chen <wei.chen@arm.com>
> > ---
> >   xen/arch/arm/arm64/Makefile              |   2 +
> >   xen/arch/arm/arm64/head.S                |  17 +-
> >   xen/arch/arm/arm64/head_mmu.S            |   4 +-
> >   xen/arch/arm/arm64/head_mpu.S            | 323
> +++++++++++++++++++++++
> >   xen/arch/arm/include/asm/arm64/mpu.h     |  63 +++++
> >   xen/arch/arm/include/asm/arm64/sysregs.h |  49 ++++
> >   xen/arch/arm/mm_mpu.c                    |  48 ++++
> >   xen/arch/arm/xen.lds.S                   |   4 +
> >   8 files changed, 502 insertions(+), 8 deletions(-)
> >   create mode 100644 xen/arch/arm/arm64/head_mpu.S
> >   create mode 100644 xen/arch/arm/include/asm/arm64/mpu.h
> >   create mode 100644 xen/arch/arm/mm_mpu.c
> >
> > +/*
> > + * Macro to create a new MPU memory region entry, which is a
> > +structure
> > + * of pr_t,  in \prmap.
> > + *
> > + * Inputs:
> > + * prmap:   mpu memory region map table symbol
> > + * sel:     region selector
> > + * prbar:   preserve value for PRBAR_EL2
> > + * prlar    preserve value for PRLAR_EL2
> > + *
> > + * Clobbers \tmp1, \tmp2
> > + *
> > + */
> > +.macro create_mpu_entry prmap, sel, prbar, prlar, tmp1, tmp2
> > +    mov   \tmp2, \sel
> > +    lsl   \tmp2, \tmp2, #MPU_ENTRY_SHIFT
> > +    adr_l \tmp1, \prmap
> > +    /* Write the first 8 bytes(prbar_t) of pr_t */
> > +    str   \prbar, [\tmp1, \tmp2]
> > +
> > +    add   \tmp2, \tmp2, #8
> > +    /* Write the last 8 bytes(prlar_t) of pr_t */
> > +    str   \prlar, [\tmp1, \tmp2]
> 
> Any particular reason to not use 'stp'?
> 
> Also, AFAICT, with data cache disabled. But at least on ARMv8-A, the cache is
> never really off. So don't need some cache maintainance?
> 
> FAOD, I know the existing MMU code has the same issue. But I would rather
> prefer if the new code introduced is compliant to the Arm Arm.
> 

True, `stp` is better and I will clean data cache to be compliant to the Arm Arm.
I write the following example to see if I catch what you suggested:
```
add \tmp1, \tmp1, \tmp2
stp \prbar, \prlar, [\tmp1]
dc cvau, \tmp1
isb
dsb sy
```

> > +.endm
> > +
> > +/*
> > + * Macro to store the maximum number of regions supported by the EL2
> > +MPU
> > + * in max_xen_mpumap, which is identified by MPUIR_EL2.
> > + *
> > + * Outputs:
> > + * nr_regions: preserve the maximum number of regions supported by
> > +the EL2 MPU
> > + *
> > + * Clobbers \tmp1
> > + * > + */
> 
> Are you going to have multiple users? If not, then I would prefer if this is
> folded in the only caller.
> 

Ok. I will fold since I think it is one-time reading thingy.

> > +.macro read_max_el2_regions, nr_regions, tmp1
> > +    load_paddr \tmp1, max_xen_mpumap
> 
> I would rather prefer if we restrict the use of global while the MMU if off (see
> why above).
> 

If we don't use global here, then after MPU enabled, we need to re-read MPUIR_EL2
to get the number of maximum EL2 regions.

Or I put data cache clean after accessing global, is it better?
```
str   \nr_regions, [\tmp1]
dc cvau, \tmp1
isb
dsb sy
```

> > +    mrs   \nr_regions, MPUIR_EL2
> > +    isb
> 
> What's that isb for?
> 
> > +    str   \nr_regions, [\tmp1]
> > +.endm
> > +
> > +/*
> > + * ENTRY to configure a EL2 MPU memory region
> > + * ARMv8-R AArch64 at most supports 255 MPU protection regions.
> > + * See section G1.3.18 of the reference manual for ARMv8-R AArch64,
> > + * PRBAR<n>_EL2 and PRLAR<n>_EL2 provides access to the EL2 MPU
> > +region
> > + * determined by the value of 'n' and PRSELR_EL2.REGION as
> > + * PRSELR_EL2.REGION<7:4>:n.(n = 0, 1, 2, ... , 15)
> > + * For example to access regions from 16 to 31 (0b10000 to 0b11111):
> > + * - Set PRSELR_EL2 to 0b1xxxx
> > + * - Region 16 configuration is accessible through PRBAR0_EL2 and
> > +PRLAR0_EL2
> > + * - Region 17 configuration is accessible through PRBAR1_EL2 and
> > +PRLAR1_EL2
> > + * - Region 18 configuration is accessible through PRBAR2_EL2 and
> > +PRLAR2_EL2
> > + * - ...
> > + * - Region 31 configuration is accessible through PRBAR15_EL2 and
> > +PRLAR15_EL2
> > + *
> > + * Inputs:
> > + * x27: region selector
> > + * x28: preserve value for PRBAR_EL2
> > + * x29: preserve value for PRLAR_EL2
> > + *
> > + */
> > +ENTRY(write_pr)
> 
> AFAICT, this function would not be necessary if the index for the init sections
> were hardcoded.
> 
> So I would like to understand why the index cannot be hardcoded.
> 

The reason is that we are putting init sections at the *end* of the MPU map, and
the length of the whole MPU map is platform-specific. We read it from MPUIR_EL2.
 
> > +    msr   PRSELR_EL2, x27
> > +    dsb   sy
> 
> [...]
> 
> > diff --git a/xen/arch/arm/xen.lds.S b/xen/arch/arm/xen.lds.S index
> > bc45ea2c65..79965a3c17 100644
> > --- a/xen/arch/arm/xen.lds.S
> > +++ b/xen/arch/arm/xen.lds.S
> > @@ -91,6 +91,8 @@ SECTIONS
> >         __ro_after_init_end = .;
> >     } : text
> >
> > +  . = ALIGN(PAGE_SIZE);
> 
> Why do you need this ALIGN?
> 

I need a symbol as the start of the data section, so I introduce
"__data_begin = .;". 
If I use "__ro_after_init_end = .;" instead, I'm afraid in the future,
if someone introduces a new section after ro-after-init section, this part
also needs modification too.

When we define MPU regions for each section in xen.lds.S, we always treat these sections
page-aligned.
I checked each section in xen.lds.S, and ". = ALIGN(PAGE_SIZE);" is either added
at section head, or at the tail of the previous section, to make sure starting address symbol
page-aligned.

And if we don't put this ALIGN, if "__ro_after_init_end " symbol itself is not page-aligned,
the two adjacent sections will overlap in MPU.
 
> > +  __data_begin = .;
> >     .data.read_mostly : {
> >          /* Exception table */
> >          __start___ex_table = .;
> > @@ -157,7 +159,9 @@ SECTIONS
> >          *(.altinstr_replacement)
> 
> I know you are not using alternative instructions yet. But, you should make
> sure they are included. So I think rather than introduce __init_data_begin,
> you want to use "_einitext" for the start of the "Init data" section.
> 
> >     } :text
> >     . = ALIGN(PAGE_SIZE);
> > +
> 
> Spurious?
> 
> >     .init.data : {
> > +       __init_data_begin = .;            /* Init data */
> >          *(.init.rodata)
> >          *(.init.rodata.*)
> >
> 
> Cheers,
> 
> --
> Julien Grall

Cheers,

--
Penny Zheng
Penny Zheng Jan. 29, 2023, 6:47 a.m. UTC | #4
Hi Ayan

> -----Original Message-----
> From: Ayan Kumar Halder <ayankuma@amd.com>
> Sent: Thursday, January 19, 2023 6:19 PM
> To: xen-devel@lists.xenproject.org
> Cc: Wei Chen <Wei.Chen@arm.com>; Penny Zheng
> <Penny.Zheng@arm.com>; Stefano Stabellini <sstabellini@kernel.org>; Julien
> Grall <julien@xen.org>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
> Volodymyr_Babchuk@epam.com
> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
> memory region map
> 
> 
> On 13/01/2023 05:28, Penny Zheng wrote:
> > CAUTION: This message has originated from an External Source. Please use
> proper judgment and caution when opening attachments, clicking links, or
> responding to this email.
> >
> >
> > From: Penny Zheng <penny.zheng@arm.com>
> >
> > The start-of-day Xen MPU memory region layout shall be like as follows:
> >
> > xen_mpumap[0] : Xen text
> > xen_mpumap[1] : Xen read-only data
> > xen_mpumap[2] : Xen read-only after init data xen_mpumap[3] : Xen
> > read-write data xen_mpumap[4] : Xen BSS ......
> > xen_mpumap[max_xen_mpumap - 2]: Xen init data
> > xen_mpumap[max_xen_mpumap - 1]: Xen init text
> >
> > max_xen_mpumap refers to the number of regions supported by the EL2
> MPU.
> > The layout shall be compliant with what we describe in xen.lds.S, or
> > the codes need adjustment.
> >
> > As MMU system and MPU system have different functions to create the
> > boot MMU/MPU memory management data, instead of introducing extra
> > #ifdef in main code flow, we introduce a neutral name
> > prepare_early_mappings for both, and also to replace create_page_tables
> for MMU.
> >
> > Signed-off-by: Penny Zheng <penny.zheng@arm.com>
> > Signed-off-by: Wei Chen <wei.chen@arm.com>
> > ---
> >   xen/arch/arm/arm64/Makefile              |   2 +
> >   xen/arch/arm/arm64/head.S                |  17 +-
> >   xen/arch/arm/arm64/head_mmu.S            |   4 +-
> >   xen/arch/arm/arm64/head_mpu.S            | 323
> +++++++++++++++++++++++
> >   xen/arch/arm/include/asm/arm64/mpu.h     |  63 +++++
> >   xen/arch/arm/include/asm/arm64/sysregs.h |  49 ++++
> >   xen/arch/arm/mm_mpu.c                    |  48 ++++
> >   xen/arch/arm/xen.lds.S                   |   4 +
> >   8 files changed, 502 insertions(+), 8 deletions(-)
> >   create mode 100644 xen/arch/arm/arm64/head_mpu.S
> >   create mode 100644 xen/arch/arm/include/asm/arm64/mpu.h
> >   create mode 100644 xen/arch/arm/mm_mpu.c
> >
> > diff --git a/xen/arch/arm/arm64/Makefile
> b/xen/arch/arm/arm64/Makefile
> > index 22da2f54b5..438c9737ad 100644
> > --- a/xen/arch/arm/arm64/Makefile
> > +++ b/xen/arch/arm/arm64/Makefile
> > @@ -10,6 +10,8 @@ obj-y += entry.o
> >   obj-y += head.o
> >   ifneq ($(CONFIG_HAS_MPU),y)
> >   obj-y += head_mmu.o
> > +else
> > +obj-y += head_mpu.o
> >   endif
> >   obj-y += insn.o
> >   obj-$(CONFIG_LIVEPATCH) += livepatch.o diff --git
> > a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S index
> > 782bd1f94c..145e3d53dc 100644
> > --- a/xen/arch/arm/arm64/head.S
> > +++ b/xen/arch/arm/arm64/head.S
> > @@ -68,9 +68,9 @@
> >    *  x24 -
> >    *  x25 -
> >    *  x26 - skip_zero_bss (boot cpu only)
> > - *  x27 -
> > - *  x28 -
> > - *  x29 -
> > + *  x27 - region selector (mpu only)
> > + *  x28 - prbar (mpu only)
> > + *  x29 - prlar (mpu only)
> >    *  x30 - lr
> >    */
> >
> > @@ -82,7 +82,7 @@
> >    * ---------------------------
> >    *
> >    * The requirements are:
> > - *   MMU = off, D-cache = off, I-cache = on or off,
> > + *   MMU/MPU = off, D-cache = off, I-cache = on or off,
> >    *   x0 = physical address to the FDT blob.
> >    *
> >    * This must be the very first address in the loaded image.
> > @@ -252,7 +252,12 @@ real_start_efi:
> >
> >           bl    check_cpu_mode
> >           bl    cpu_init
> > -        bl    create_page_tables
> > +
> > +        /*
> > +         * Create boot memory management data, pagetable for MMU
> systems
> > +         * and memory regions for MPU systems.
> > +         */
> > +        bl    prepare_early_mappings
> >           bl    enable_mmu
> >
> >           /* We are still in the 1:1 mapping. Jump to the runtime
> > Virtual Address. */ @@ -310,7 +315,7 @@ GLOBAL(init_secondary)
> >   #endif
> >           bl    check_cpu_mode
> >           bl    cpu_init
> > -        bl    create_page_tables
> > +        bl    prepare_early_mappings
> >           bl    enable_mmu
> >
> >           /* We are still in the 1:1 mapping. Jump to the runtime
> > Virtual Address. */ diff --git a/xen/arch/arm/arm64/head_mmu.S
> > b/xen/arch/arm/arm64/head_mmu.S index 6ff13c751c..2346f755df
> 100644
> > --- a/xen/arch/arm/arm64/head_mmu.S
> > +++ b/xen/arch/arm/arm64/head_mmu.S
> > @@ -123,7 +123,7 @@
> >    *
> >    * Clobbers x0 - x4
> >    */
> > -ENTRY(create_page_tables)
> > +ENTRY(prepare_early_mappings)
> >           /* Prepare the page-tables for mapping Xen */
> >           ldr   x0, =XEN_VIRT_START
> >           create_table_entry boot_pgtable, boot_first, x0, 0, x1, x2,
> > x3 @@ -208,7 +208,7 @@ virtphys_clash:
> >           /* Identity map clashes with boot_third, which we cannot handle yet
> */
> >           PRINT("- Unable to build boot page tables - virt and phys addresses
> clash. -\r\n")
> >           b     fail
> > -ENDPROC(create_page_tables)
> > +ENDPROC(prepare_early_mappings)
> 
> NIT:- Can this renaming be done in a separate patch of its own (before this
> patch).
> 

Yay, you're right. I'll put it in different commit.

> So that this patch can be only about the new functionality introduced.
> 
> >
> >   /*
> >    * Turn on the Data Cache and the MMU. The function will return on
> > the 1:1 diff --git a/xen/arch/arm/arm64/head_mpu.S
> > b/xen/arch/arm/arm64/head_mpu.S new file mode 100644 index
> > 0000000000..0b97ce4646
> > --- /dev/null
> > +++ b/xen/arch/arm/arm64/head_mpu.S
> > @@ -0,0 +1,323 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * Start-of-day code for an Armv8-R AArch64 MPU system.
> > + */
> > +
> > +#include <asm/arm64/mpu.h>
> > +#include <asm/early_printk.h>
> > +#include <asm/page.h>
> > +
> > +/*
> > + * One entry in Xen MPU memory region mapping table(xen_mpumap) is
> a
> > +structure
> > + * of pr_t, which is 16-bytes size, so the entry offset is the order of 4.
> > + */
> NIT :- It would be good to quote Arm ARM from which can be referred for
> the definitions.
> > +#define MPU_ENTRY_SHIFT         0x4
> > +
> > +#define REGION_SEL_MASK         0xf
> > +
> > +#define REGION_TEXT_PRBAR       0x38    /* SH=11 AP=10 XN=00 */
> > +#define REGION_RO_PRBAR         0x3A    /* SH=11 AP=10 XN=10 */
> > +#define REGION_DATA_PRBAR       0x32    /* SH=11 AP=00 XN=10 */
> > +
> > +#define REGION_NORMAL_PRLAR     0x0f    /* NS=0 ATTR=111 EN=1 */
> > +
> > +/*
> > + * Macro to round up the section address to be PAGE_SIZE aligned
> > + * Each section(e.g. .text, .data, etc) in xen.lds.S is page-aligned,
> > + * which is usually guarded with ". = ALIGN(PAGE_SIZE)" in the head,
> > + * or in the end
> > + */
> > +.macro roundup_section, xb
> > +        add   \xb, \xb, #(PAGE_SIZE-1)
> > +        and   \xb, \xb, #PAGE_MASK
> > +.endm
> > +
> > +/*
> > + * Macro to create a new MPU memory region entry, which is a
> > +structure
> > + * of pr_t,  in \prmap.
> > + *
> > + * Inputs:
> > + * prmap:   mpu memory region map table symbol
> > + * sel:     region selector
> > + * prbar:   preserve value for PRBAR_EL2
> > + * prlar    preserve value for PRLAR_EL2
> > + *
> > + * Clobbers \tmp1, \tmp2
> > + *
> > + */
> > +.macro create_mpu_entry prmap, sel, prbar, prlar, tmp1, tmp2
> > +    mov   \tmp2, \sel
> > +    lsl   \tmp2, \tmp2, #MPU_ENTRY_SHIFT
> > +    adr_l \tmp1, \prmap
> > +    /* Write the first 8 bytes(prbar_t) of pr_t */
> > +    str   \prbar, [\tmp1, \tmp2]
> > +
> > +    add   \tmp2, \tmp2, #8
> > +    /* Write the last 8 bytes(prlar_t) of pr_t */
> > +    str   \prlar, [\tmp1, \tmp2]
> > +.endm
> > +
> > +/*
> > + * Macro to store the maximum number of regions supported by the EL2
> > +MPU
> > + * in max_xen_mpumap, which is identified by MPUIR_EL2.
> > + *
> > + * Outputs:
> > + * nr_regions: preserve the maximum number of regions supported by
> > +the EL2 MPU
> > + *
> > + * Clobbers \tmp1
> > + *
> > + */
> > +.macro read_max_el2_regions, nr_regions, tmp1
> > +    load_paddr \tmp1, max_xen_mpumap
> > +    mrs   \nr_regions, MPUIR_EL2
> > +    isb
> > +    str   \nr_regions, [\tmp1]
> > +.endm
> > +
> > +/*
> > + * Macro to prepare and set a MPU memory region
> > + *
> > + * Inputs:
> > + * base:        base address symbol (should be page-aligned)
> > + * limit:       limit address symbol
> > + * sel:         region selector
> > + * prbar:       store computed PRBAR_EL2 value
> > + * prlar:       store computed PRLAR_EL2 value
> > + * attr_prbar:  PRBAR_EL2-related memory attributes. If not specified
> > +it will be REGION_DATA_PRBAR
> > + * attr_prlar:  PRLAR_EL2-related memory attributes. If not specified
> > +it will be REGION_NORMAL_PRLAR
> > + *
> > + * Clobber \tmp1
> > + *
> > + */
> > +.macro prepare_xen_region, base, limit, sel, prbar, prlar, tmp1,
> attr_prbar=REGION_DATA_PRBAR, attr_prlar=REGION_NORMAL_PRLAR
> > +    /* Prepare value for PRBAR_EL2 reg and preserve it in \prbar.*/
> > +    load_paddr \prbar, \base
> > +    and   \prbar, \prbar, #MPU_REGION_MASK
> > +    mov   \tmp1, #\attr_prbar
> > +    orr   \prbar, \prbar, \tmp1
> > +
> > +    /* Prepare value for PRLAR_EL2 reg and preserve it in \prlar.*/
> > +    load_paddr \prlar, \limit
> > +    /* Round up limit address to be PAGE_SIZE aligned */
> > +    roundup_section \prlar
> > +    /* Limit address should be inclusive */
> > +    sub   \prlar, \prlar, #1
> > +    and   \prlar, \prlar, #MPU_REGION_MASK
> > +    mov   \tmp1, #\attr_prlar
> > +    orr   \prlar, \prlar, \tmp1
> > +
> > +    mov   x27, \sel
> > +    mov   x28, \prbar
> > +    mov   x29, \prlar
> 
> Any reasons for using x27, x28, x29 to pass function parameters.
> 
> https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst
> states x0..x7 should be used (Table 2, General-purpose registers and
> AAPCS64 usage).
> 

These registers are documented and reserved in xen/arch/arm/arm64/head.S, like
how we reserve x26 to pass function parameter in skip_zero_bss, see
```
diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
index 782bd1f94c..145e3d53dc 100644
--- a/xen/arch/arm/arm64/head.S
+++ b/xen/arch/arm/arm64/head.S
@@ -68,9 +68,9 @@
  *  x24 -
  *  x25 -
  *  x26 - skip_zero_bss (boot cpu only)
- *  x27 -
- *  x28 -
- *  x29 -
+ *  x27 - region selector (mpu only)
+ *  x28 - prbar (mpu only)
+ *  x29 - prlar (mpu only)
  *  x30 - lr
  */
```
x0...x7 are already commonly used in xen/arch/arm/arm64/head.S, it is difficult for me
to preserve them only for write_pr.

If we are using x0...x7 as function parameter, I need to stack/pop them to mutate
stack operation in write_pr to avoid corruption.

> > +    /*
> > +     * x2skip_zero7, x28, x29 are special registers designed as
> > +     * inputs for function write_pr
> > +     */
> > +    bl    write_pr
> > +.endm
> > +
[...]
> > --
> > 2.25.1
> >
> NIT:- Would you consider splitting this patch, something like this :-
> 
> 1. Renaming of the mmu function
> 
> 2. Define sysregs, prlar_t, prbar_t and other other hardware specific macros.
> 
> 3. Define write_pr
> 
> 4. The rest of the changes (ie prepare_early_mappings(), xen.lds.S, etc)
> 

For 2, 3 and 4, it will break the rule of "Always define and introduce at the
first usage".
However, I know that this commit is very big ;/, so as long as maintainers are also
in favor of your splitting suggestion, I'm happy to do the split too~

> - Ayan
Julien Grall Jan. 29, 2023, 7:37 a.m. UTC | #5
Hi Penny,

On 29/01/2023 05:39, Penny Zheng wrote:
>> -----Original Message-----
>> From: Julien Grall <julien@xen.org>
>> Sent: Thursday, January 19, 2023 11:04 PM
>> To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org
>> Cc: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
>> <sstabellini@kernel.org>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
>> Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
>> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
>> memory region map
>>
>> Hi Penny,
>>
> 
> Hi Julien
> 
> Sorry for the late response, just come back from Chinese Spring Festival Holiday~
>   
>> On 13/01/2023 05:28, Penny Zheng wrote:
>>> From: Penny Zheng <penny.zheng@arm.com>
>>>
>>> The start-of-day Xen MPU memory region layout shall be like as follows:
>>>
>>> xen_mpumap[0] : Xen text
>>> xen_mpumap[1] : Xen read-only data
>>> xen_mpumap[2] : Xen read-only after init data xen_mpumap[3] : Xen
>>> read-write data xen_mpumap[4] : Xen BSS ......
>>> xen_mpumap[max_xen_mpumap - 2]: Xen init data
>>> xen_mpumap[max_xen_mpumap - 1]: Xen init text
>>
>> Can you explain why the init region should be at the end of the MPU?
>>
> 
> As discussed in the v1 Serie, I'd like to put all transient MPU regions, like boot-only region,
> at the end of the MPU.

I vaguely recall the discussion but can't seem to find the thread. Do 
you have a link? (A summary in the patch would have been nice)

> Since they will get removed at the end of the boot, I am trying not to leave holes in the MPU
> map by putting all transient MPU regions at rear.

I understand the principle, but I am not convinced this is worth it 
because of the increase complexity in the assembly code.

What would be the problem with reshuffling partially the MPU once we booted?

> 
>>>
>>> max_xen_mpumap refers to the number of regions supported by the EL2
>> MPU.
>>> The layout shall be compliant with what we describe in xen.lds.S, or
>>> the codes need adjustment.
>>>
>>> As MMU system and MPU system have different functions to create the
>>> boot MMU/MPU memory management data, instead of introducing extra
>>> #ifdef in main code flow, we introduce a neutral name
>>> prepare_early_mappings for both, and also to replace create_page_tables
>> for MMU.
>>>
>>> Signed-off-by: Penny Zheng <penny.zheng@arm.com>
>>> Signed-off-by: Wei Chen <wei.chen@arm.com>
>>> ---
>>>    xen/arch/arm/arm64/Makefile              |   2 +
>>>    xen/arch/arm/arm64/head.S                |  17 +-
>>>    xen/arch/arm/arm64/head_mmu.S            |   4 +-
>>>    xen/arch/arm/arm64/head_mpu.S            | 323
>> +++++++++++++++++++++++
>>>    xen/arch/arm/include/asm/arm64/mpu.h     |  63 +++++
>>>    xen/arch/arm/include/asm/arm64/sysregs.h |  49 ++++
>>>    xen/arch/arm/mm_mpu.c                    |  48 ++++
>>>    xen/arch/arm/xen.lds.S                   |   4 +
>>>    8 files changed, 502 insertions(+), 8 deletions(-)
>>>    create mode 100644 xen/arch/arm/arm64/head_mpu.S
>>>    create mode 100644 xen/arch/arm/include/asm/arm64/mpu.h
>>>    create mode 100644 xen/arch/arm/mm_mpu.c
>>>
>>> +/*
>>> + * Macro to create a new MPU memory region entry, which is a
>>> +structure
>>> + * of pr_t,  in \prmap.
>>> + *
>>> + * Inputs:
>>> + * prmap:   mpu memory region map table symbol
>>> + * sel:     region selector
>>> + * prbar:   preserve value for PRBAR_EL2
>>> + * prlar    preserve value for PRLAR_EL2
>>> + *
>>> + * Clobbers \tmp1, \tmp2
>>> + *
>>> + */
>>> +.macro create_mpu_entry prmap, sel, prbar, prlar, tmp1, tmp2
>>> +    mov   \tmp2, \sel
>>> +    lsl   \tmp2, \tmp2, #MPU_ENTRY_SHIFT
>>> +    adr_l \tmp1, \prmap
>>> +    /* Write the first 8 bytes(prbar_t) of pr_t */
>>> +    str   \prbar, [\tmp1, \tmp2]
>>> +
>>> +    add   \tmp2, \tmp2, #8
>>> +    /* Write the last 8 bytes(prlar_t) of pr_t */
>>> +    str   \prlar, [\tmp1, \tmp2]
>>
>> Any particular reason to not use 'stp'?
>>
>> Also, AFAICT, with data cache disabled. But at least on ARMv8-A, the cache is
>> never really off. So don't need some cache maintainance?
>>
>> FAOD, I know the existing MMU code has the same issue. But I would rather
>> prefer if the new code introduced is compliant to the Arm Arm.
>>
> 
> True, `stp` is better and I will clean data cache to be compliant to the Arm Arm.
> I write the following example to see if I catch what you suggested:
> ```
> add \tmp1, \tmp1, \tmp2
> stp \prbar, \prlar, [\tmp1]
> dc cvau, \tmp1

I think this wants to be invalidate rather than clean because the cache 
is off.

> isb
> dsb sy
> ```
> 
>>> +.endm
>>> +
>>> +/*
>>> + * Macro to store the maximum number of regions supported by the EL2
>>> +MPU
>>> + * in max_xen_mpumap, which is identified by MPUIR_EL2.
>>> + *
>>> + * Outputs:
>>> + * nr_regions: preserve the maximum number of regions supported by
>>> +the EL2 MPU
>>> + *
>>> + * Clobbers \tmp1
>>> + * > + */
>>
>> Are you going to have multiple users? If not, then I would prefer if this is
>> folded in the only caller.
>>
> 
> Ok. I will fold since I think it is one-time reading thingy.
> 
>>> +.macro read_max_el2_regions, nr_regions, tmp1
>>> +    load_paddr \tmp1, max_xen_mpumap
>>
>> I would rather prefer if we restrict the use of global while the MMU if off (see
>> why above).
>>
> 
> If we don't use global here, then after MPU enabled, we need to re-read MPUIR_EL2
> to get the number of maximum EL2 regions.

Which, IMHO, is better than having to think about cache.

> 
> Or I put data cache clean after accessing global, is it better?
> ```
> str   \nr_regions, [\tmp1]
> dc cvau, \tmp1
> isb
> dsb sy
> ```
> 
>>> +    mrs   \nr_regions, MPUIR_EL2
>>> +    isb
>>
>> What's that isb for?
>>
>>> +    str   \nr_regions, [\tmp1]
>>> +.endm
>>> +
>>> +/*
>>> + * ENTRY to configure a EL2 MPU memory region
>>> + * ARMv8-R AArch64 at most supports 255 MPU protection regions.
>>> + * See section G1.3.18 of the reference manual for ARMv8-R AArch64,
>>> + * PRBAR<n>_EL2 and PRLAR<n>_EL2 provides access to the EL2 MPU
>>> +region
>>> + * determined by the value of 'n' and PRSELR_EL2.REGION as
>>> + * PRSELR_EL2.REGION<7:4>:n.(n = 0, 1, 2, ... , 15)
>>> + * For example to access regions from 16 to 31 (0b10000 to 0b11111):
>>> + * - Set PRSELR_EL2 to 0b1xxxx
>>> + * - Region 16 configuration is accessible through PRBAR0_EL2 and
>>> +PRLAR0_EL2
>>> + * - Region 17 configuration is accessible through PRBAR1_EL2 and
>>> +PRLAR1_EL2
>>> + * - Region 18 configuration is accessible through PRBAR2_EL2 and
>>> +PRLAR2_EL2
>>> + * - ...
>>> + * - Region 31 configuration is accessible through PRBAR15_EL2 and
>>> +PRLAR15_EL2
>>> + *
>>> + * Inputs:
>>> + * x27: region selector
>>> + * x28: preserve value for PRBAR_EL2
>>> + * x29: preserve value for PRLAR_EL2
>>> + *
>>> + */
>>> +ENTRY(write_pr)
>>
>> AFAICT, this function would not be necessary if the index for the init sections
>> were hardcoded.
>>
>> So I would like to understand why the index cannot be hardcoded.
>>
> 
> The reason is that we are putting init sections at the *end* of the MPU map, and
> the length of the whole MPU map is platform-specific. We read it from MPUIR_EL2.

Right, I got that bit from the code. What I would like to understand is 
why all the initial address cannot be hardocoded?

 From a brief look, this would simplify a lot the assembly code.

>   
>>> +    msr   PRSELR_EL2, x27
>>> +    dsb   sy
>>
>> [...]
>>
>>> diff --git a/xen/arch/arm/xen.lds.S b/xen/arch/arm/xen.lds.S index
>>> bc45ea2c65..79965a3c17 100644
>>> --- a/xen/arch/arm/xen.lds.S
>>> +++ b/xen/arch/arm/xen.lds.S
>>> @@ -91,6 +91,8 @@ SECTIONS
>>>          __ro_after_init_end = .;
>>>      } : text
>>>
>>> +  . = ALIGN(PAGE_SIZE);
>>
>> Why do you need this ALIGN?
>>
> 
> I need a symbol as the start of the data section, so I introduce
> "__data_begin = .;".
> If I use "__ro_after_init_end = .;" instead, I'm afraid in the future,
> if someone introduces a new section after ro-after-init section, this part
> also needs modification too.

I haven't suggested there is a problem to define a new symbol. I am 
merely asking about the align.

> 
> When we define MPU regions for each section in xen.lds.S, we always treat these sections
> page-aligned.
> I checked each section in xen.lds.S, and ". = ALIGN(PAGE_SIZE);" is either added
> at section head, or at the tail of the previous section, to make sure starting address symbol
> page-aligned.
> 
> And if we don't put this ALIGN, if "__ro_after_init_end " symbol itself is not page-aligned,
> the two adjacent sections will overlap in MPU.

__ro_after_init_end *has* to be page aligned because the permissions are 
different than for __data_begin.

If we were going to add a new section, then either it has the same 
permission as .data.read.mostly and we will bundle them or it doesn't 
and we would need a .align.

But today, the extra .ALIGN seems unnecessary (at least in the context 
of this patch).

Cheers,
Penny Zheng Jan. 30, 2023, 5:45 a.m. UTC | #6
> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: Sunday, January 29, 2023 3:37 PM
> To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org
> Cc: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
> <sstabellini@kernel.org>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
> Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
> memory region map
> 
> Hi Penny,
> 

Hi Julien,

> On 29/01/2023 05:39, Penny Zheng wrote:
> >> -----Original Message-----
> >> From: Julien Grall <julien@xen.org>
> >> Sent: Thursday, January 19, 2023 11:04 PM
> >> To: Penny Zheng <Penny.Zheng@arm.com>; xen-
> devel@lists.xenproject.org
> >> Cc: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
> >> <sstabellini@kernel.org>; Bertrand Marquis
> >> <Bertrand.Marquis@arm.com>; Volodymyr Babchuk
> >> <Volodymyr_Babchuk@epam.com>
> >> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
> >> memory region map
> >>
> >> Hi Penny,
> >>
> >
> > Hi Julien
> >
> > Sorry for the late response, just come back from Chinese Spring
> > Festival Holiday~
> >
> >> On 13/01/2023 05:28, Penny Zheng wrote:
> >>> From: Penny Zheng <penny.zheng@arm.com>
> >>>
> >>> The start-of-day Xen MPU memory region layout shall be like as follows:
> >>>
> >>> xen_mpumap[0] : Xen text
> >>> xen_mpumap[1] : Xen read-only data
> >>> xen_mpumap[2] : Xen read-only after init data xen_mpumap[3] : Xen
> >>> read-write data xen_mpumap[4] : Xen BSS ......
> >>> xen_mpumap[max_xen_mpumap - 2]: Xen init data
> >>> xen_mpumap[max_xen_mpumap - 1]: Xen init text
> >>
> >> Can you explain why the init region should be at the end of the MPU?
> >>
> >
> > As discussed in the v1 Serie, I'd like to put all transient MPU
> > regions, like boot-only region, at the end of the MPU.
> 
> I vaguely recall the discussion but can't seem to find the thread. Do you have
> a link? (A summary in the patch would have been nice)
> 
> > Since they will get removed at the end of the boot, I am trying not to
> > leave holes in the MPU map by putting all transient MPU regions at rear.
> 
> I understand the principle, but I am not convinced this is worth it because of
> the increase complexity in the assembly code.
> 
> What would be the problem with reshuffling partially the MPU once we
> booted?

 There are three types of MPU regions during boot-time:
1. Fixed MPU region
Regions like Xen text section, Xen heap section, etc.
2. Boot-only MPU region
Regions like Xen init sections, etc. It will be removed at the end of booting.
3.   Regions need switching in/out during vcpu context switch
Region like system device memory map. 
For example, for FVP_BaseR_AEMv8R, we have [0x80000000, 0xfffff000) as
the whole system device memory map for Xen(idle vcpu) in EL2,  when
context switching to guest vcpu, it shall be replaced with guest-specific
device mapping, like vgic, vpl011, passthrough device, etc.

We don't have two mappings for different stage translations in MPU, like we had in MMU.
Xen stage 1 EL2 mapping and stage 2 mapping are both sharing one MPU memory mapping(xen_mpumap)
So to save the trouble of hunting down each switching regions in time-sensitive context switch, we
must re-order xen_mpumap to keep fixed regions in the front, and switching ones in the heels of them.

In Patch Serie v1, I was adding MPU regions in sequence,  and I introduced a set of bitmaps to record the location of
same type regions. At the end of booting, I need to *disable* MPU to do the reshuffling, as I can't
move regions like xen heap while MPU on.

And we discussed that it is too risky to disable MPU, and you suggested [1]
"
> You should not need any reorg if you map the boot-only section towards in
> the higher slot index (or just after the fixed ones).
"

Maybe in assembly, we know exactly how many fixed regions are, boot-only regions are, but in C codes, we parse FDT
to get static configuration, like we don't know how many fixed regions for xen static heap is enough. 
Approximation is not suggested in MPU system with limited MPU regions, some platform may only have 16 MPU regions,
IMHO, it is not worthy wasting in approximation. 

So I take the suggestion of putting regions in the higher slot index. Putting fixed regions in the front, and putting
boot-only and switching ones at tail. Then, at the end of booting, when we reorg the mpu mapping, we remove
all boot-only regions, and for switching ones, we disable-relocate(after fixed ones)-enable them. Specific codes in [2].

[1] https://lists.xenproject.org/archives/html/xen-devel/2022-11/msg00457.html
[2] https://lists.xenproject.org/archives/html/xen-devel/2023-01/msg00795.html

> 
[...]
> >>> +/*
> >>> + * ENTRY to configure a EL2 MPU memory region
> >>> + * ARMv8-R AArch64 at most supports 255 MPU protection regions.
> >>> + * See section G1.3.18 of the reference manual for ARMv8-R AArch64,
> >>> + * PRBAR<n>_EL2 and PRLAR<n>_EL2 provides access to the EL2 MPU
> >>> +region
> >>> + * determined by the value of 'n' and PRSELR_EL2.REGION as
> >>> + * PRSELR_EL2.REGION<7:4>:n.(n = 0, 1, 2, ... , 15)
> >>> + * For example to access regions from 16 to 31 (0b10000 to 0b11111):
> >>> + * - Set PRSELR_EL2 to 0b1xxxx
> >>> + * - Region 16 configuration is accessible through PRBAR0_EL2 and
> >>> +PRLAR0_EL2
> >>> + * - Region 17 configuration is accessible through PRBAR1_EL2 and
> >>> +PRLAR1_EL2
> >>> + * - Region 18 configuration is accessible through PRBAR2_EL2 and
> >>> +PRLAR2_EL2
> >>> + * - ...
> >>> + * - Region 31 configuration is accessible through PRBAR15_EL2 and
> >>> +PRLAR15_EL2
> >>> + *
> >>> + * Inputs:
> >>> + * x27: region selector
> >>> + * x28: preserve value for PRBAR_EL2
> >>> + * x29: preserve value for PRLAR_EL2
> >>> + *
> >>> + */
> >>> +ENTRY(write_pr)
> >>
> >> AFAICT, this function would not be necessary if the index for the
> >> init sections were hardcoded.
> >>
> >> So I would like to understand why the index cannot be hardcoded.
> >>
> >
> > The reason is that we are putting init sections at the *end* of the
> > MPU map, and the length of the whole MPU map is platform-specific. We
> read it from MPUIR_EL2.
> 
> Right, I got that bit from the code. What I would like to understand is why all
> the initial address cannot be hardocoded?
> 
>  From a brief look, this would simplify a lot the assembly code.
> 

Like I said before,  "map towards higher slot", if it is not the tail, it is hard to decide another
number to meet different platforms and various FDT static configuration.

If we, in assembly, put fixed regions in front and boot-only regions after, then, when we
enter C world, we immediately do a simple reshuffle, which means that we need to relocate
these init sections to tail, it is workable only when MPU is disabled, unless we're sure that
"reshuffling part" is not using any init codes or data.
  
> >
> >>> +    msr   PRSELR_EL2, x27
> >>> +    dsb   sy
> >>
> >> [...]
> >>
> >>> diff --git a/xen/arch/arm/xen.lds.S b/xen/arch/arm/xen.lds.S index
> >>> bc45ea2c65..79965a3c17 100644
> >>> --- a/xen/arch/arm/xen.lds.S
> >>> +++ b/xen/arch/arm/xen.lds.S
> >>> @@ -91,6 +91,8 @@ SECTIONS
> >>>          __ro_after_init_end = .;
> >>>      } : text
> >>>
> >>> +  . = ALIGN(PAGE_SIZE);
> >>
> >> Why do you need this ALIGN?
> >>
> >
> > I need a symbol as the start of the data section, so I introduce
> > "__data_begin = .;".
> > If I use "__ro_after_init_end = .;" instead, I'm afraid in the future,
> > if someone introduces a new section after ro-after-init section, this
> > part also needs modification too.
> 
> I haven't suggested there is a problem to define a new symbol. I am merely
> asking about the align.
> 
> >
> > When we define MPU regions for each section in xen.lds.S, we always
> > treat these sections page-aligned.
> > I checked each section in xen.lds.S, and ". = ALIGN(PAGE_SIZE);" is
> > either added at section head, or at the tail of the previous section,
> > to make sure starting address symbol page-aligned.
> >
> > And if we don't put this ALIGN, if "__ro_after_init_end " symbol
> > itself is not page-aligned, the two adjacent sections will overlap in MPU.
> 
> __ro_after_init_end *has* to be page aligned because the permissions are
> different than for __data_begin.
> 
> If we were going to add a new section, then either it has the same permission
> as .data.read.mostly and we will bundle them or it doesn't and we would
> need a .align.
> 
> But today, the extra .ALIGN seems unnecessary (at least in the context of this
> patch).
> 

Understood, I'll remove

> Cheers,
> 
> --
> Julien Grall

Cheers,

--
Penny Zheng

--
Penny Zheng
Julien Grall Jan. 30, 2023, 9:39 a.m. UTC | #7
Hi Penny,

On 30/01/2023 05:45, Penny Zheng wrote:
>   There are three types of MPU regions during boot-time:
> 1. Fixed MPU region
> Regions like Xen text section, Xen heap section, etc.
> 2. Boot-only MPU region
> Regions like Xen init sections, etc. It will be removed at the end of booting.
> 3.   Regions need switching in/out during vcpu context switch
> Region like system device memory map.
> For example, for FVP_BaseR_AEMv8R, we have [0x80000000, 0xfffff000) as
> the whole system device memory map for Xen(idle vcpu) in EL2,  when
> context switching to guest vcpu, it shall be replaced with guest-specific
> device mapping, like vgic, vpl011, passthrough device, etc.
> 
> We don't have two mappings for different stage translations in MPU, like we had in MMU.
> Xen stage 1 EL2 mapping and stage 2 mapping are both sharing one MPU memory mapping(xen_mpumap)
> So to save the trouble of hunting down each switching regions in time-sensitive context switch, we
> must re-order xen_mpumap to keep fixed regions in the front, and switching ones in the heels of them.

 From my understanding, hunting down each switching regions would be a 
matter to loop over a bitmap. There will be a slight increase in the 
number of instructions executed, but I don't believe it will be noticeable.

> 
> In Patch Serie v1, I was adding MPU regions in sequence,  and I introduced a set of bitmaps to record the location of
> same type regions. At the end of booting, I need to *disable* MPU to do the reshuffling, as I can't
> move regions like xen heap while MPU on.
> 
> And we discussed that it is too risky to disable MPU, and you suggested [1]
> "
>> You should not need any reorg if you map the boot-only section towards in
>> the higher slot index (or just after the fixed ones).
> "

Right, looking at the new code. I realize that this was probably a bad 
idea because we are adding complexity in the assembly code.

> 
> Maybe in assembly, we know exactly how many fixed regions are, boot-only regions are, but in C codes, we parse FDT
> to get static configuration, like we don't know how many fixed regions for xen static heap is enough.
> Approximation is not suggested in MPU system with limited MPU regions, some platform may only have 16 MPU regions,
> IMHO, it is not worthy wasting in approximation.

I haven't suggested to use approximation anywhere here. I will answer 
about the limited number of entries in the other thread.

> 
> So I take the suggestion of putting regions in the higher slot index. Putting fixed regions in the front, and putting
> boot-only and switching ones at tail. Then, at the end of booting, when we reorg the mpu mapping, we remove
> all boot-only regions, and for switching ones, we disable-relocate(after fixed ones)-enable them. Specific codes in [2].

 From this discussion, it feels to me that you are trying to make the 
code more complicated just to keep the split and save a few cycles (see 
above).

I would suggest to investigate the cost of "hunting down each section". 
Depending on the result, we can discuss what the best approach.

Cheers,
Penny Zheng Jan. 31, 2023, 4:11 a.m. UTC | #8
Hi Julien

> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: Monday, January 30, 2023 5:40 PM
> To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org
> Cc: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
> <sstabellini@kernel.org>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
> Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
> memory region map
> 
> Hi Penny,
> 
> On 30/01/2023 05:45, Penny Zheng wrote:
> >   There are three types of MPU regions during boot-time:
> > 1. Fixed MPU region
> > Regions like Xen text section, Xen heap section, etc.
> > 2. Boot-only MPU region
> > Regions like Xen init sections, etc. It will be removed at the end of booting.
> > 3.   Regions need switching in/out during vcpu context switch
> > Region like system device memory map.
> > For example, for FVP_BaseR_AEMv8R, we have [0x80000000, 0xfffff000) as
> > the whole system device memory map for Xen(idle vcpu) in EL2,  when
> > context switching to guest vcpu, it shall be replaced with
> > guest-specific device mapping, like vgic, vpl011, passthrough device, etc.
> >
> > We don't have two mappings for different stage translations in MPU, like
> we had in MMU.
> > Xen stage 1 EL2 mapping and stage 2 mapping are both sharing one MPU
> > memory mapping(xen_mpumap) So to save the trouble of hunting down
> each
> > switching regions in time-sensitive context switch, we must re-order
> xen_mpumap to keep fixed regions in the front, and switching ones in the
> heels of them.
> 
>  From my understanding, hunting down each switching regions would be a
> matter to loop over a bitmap. There will be a slight increase in the number
> of instructions executed, but I don't believe it will be noticeable.
> 
> >
> > In Patch Serie v1, I was adding MPU regions in sequence,  and I
> > introduced a set of bitmaps to record the location of same type
> > regions. At the end of booting, I need to *disable* MPU to do the
> reshuffling, as I can't move regions like xen heap while MPU on.
> >
> > And we discussed that it is too risky to disable MPU, and you
> > suggested [1] "
> >> You should not need any reorg if you map the boot-only section
> >> towards in the higher slot index (or just after the fixed ones).
> > "
> 
> Right, looking at the new code. I realize that this was probably a bad idea
> because we are adding complexity in the assembly code.
> 
> >
> > Maybe in assembly, we know exactly how many fixed regions are,
> > boot-only regions are, but in C codes, we parse FDT to get static
> configuration, like we don't know how many fixed regions for xen static
> heap is enough.
> > Approximation is not suggested in MPU system with limited MPU regions,
> > some platform may only have 16 MPU regions, IMHO, it is not worthy
> wasting in approximation.
> 
> I haven't suggested to use approximation anywhere here. I will answer
> about the limited number of entries in the other thread.
> 
> >
> > So I take the suggestion of putting regions in the higher slot index.
> > Putting fixed regions in the front, and putting boot-only and
> > switching ones at tail. Then, at the end of booting, when we reorg the
> mpu mapping, we remove all boot-only regions, and for switching ones, we
> disable-relocate(after fixed ones)-enable them. Specific codes in [2].
> 
>  From this discussion, it feels to me that you are trying to make the code
> more complicated just to keep the split and save a few cycles (see above).
> 
> I would suggest to investigate the cost of "hunting down each section".
> Depending on the result, we can discuss what the best approach.
> 

Correct me if I'm wrong, the complicated things in assembly you are worried about
is that we couldn't define the index for initial sections, no hardcoded to keep simple.
And function write_pr, ik, is really a big chunk of codes, however the logic is simple there,
just a bunch of "switch-cases".

If we are adding MPU regions in sequence as you suggested, while using bitmap at the
same time to record used entry.
TBH, this is how I designed at the very beginning internally. We found that if we don't
do reorg late-boot to keep fixed in front and switching ones after, each time when we
do vcpu context switch, not only we need to hunt down switching ones to disable,
while we add new switch-in regions, using bitmap to find free entry is saying that the
process is unpredictable. Uncertainty is what we want to avoid in Armv8-R architecture. 

Hmmm, TBH, I really really like your suggestion to put boot-only/switching regions into
higher slot. It really saved a lot trouble in late-init reorg and also avoids disabling MPU
at the same time. The split is a simple and easy-to-understand construction compared
with bitmap too.

IMHO, reorg is really worth doing. We put all complicated things in boot-time to
make runtime context-switch simple and fast, even for a few cycles.
As the Armv8-R architecture profile from the beginning is designed to support use
cases that have a high sensitivity to deterministic execution. (e.g. Fuel Injection,
Brake control, Drive trains, Motor control etc).
However, when talking about architecture thing, I need more professional opinions, 
@Wei Chen @Bertrand Marquis
Also, Will R52 implementation encounter the same issue. @ayan.kumar.halder@xilinx.com
@Stefano Stabellini

> Cheers,
> 
> --
> Julien Grall

Cheers,

--
Penny Zheng
Julien Grall Jan. 31, 2023, 9:27 a.m. UTC | #9
On 31/01/2023 04:11, Penny Zheng wrote:
> Hi Julien
> 
>> -----Original Message-----
>> From: Julien Grall <julien@xen.org>
>> Sent: Monday, January 30, 2023 5:40 PM
>> To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org
>> Cc: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
>> <sstabellini@kernel.org>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
>> Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
>> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
>> memory region map
>>
>> Hi Penny,
>>
>> On 30/01/2023 05:45, Penny Zheng wrote:
>>>    There are three types of MPU regions during boot-time:
>>> 1. Fixed MPU region
>>> Regions like Xen text section, Xen heap section, etc.
>>> 2. Boot-only MPU region
>>> Regions like Xen init sections, etc. It will be removed at the end of booting.
>>> 3.   Regions need switching in/out during vcpu context switch
>>> Region like system device memory map.
>>> For example, for FVP_BaseR_AEMv8R, we have [0x80000000, 0xfffff000) as
>>> the whole system device memory map for Xen(idle vcpu) in EL2,  when
>>> context switching to guest vcpu, it shall be replaced with
>>> guest-specific device mapping, like vgic, vpl011, passthrough device, etc.
>>>
>>> We don't have two mappings for different stage translations in MPU, like
>> we had in MMU.
>>> Xen stage 1 EL2 mapping and stage 2 mapping are both sharing one MPU
>>> memory mapping(xen_mpumap) So to save the trouble of hunting down
>> each
>>> switching regions in time-sensitive context switch, we must re-order
>> xen_mpumap to keep fixed regions in the front, and switching ones in the
>> heels of them.
>>
>>   From my understanding, hunting down each switching regions would be a
>> matter to loop over a bitmap. There will be a slight increase in the number
>> of instructions executed, but I don't believe it will be noticeable.
>>
>>>
>>> In Patch Serie v1, I was adding MPU regions in sequence,  and I
>>> introduced a set of bitmaps to record the location of same type
>>> regions. At the end of booting, I need to *disable* MPU to do the
>> reshuffling, as I can't move regions like xen heap while MPU on.
>>>
>>> And we discussed that it is too risky to disable MPU, and you
>>> suggested [1] "
>>>> You should not need any reorg if you map the boot-only section
>>>> towards in the higher slot index (or just after the fixed ones).
>>> "
>>
>> Right, looking at the new code. I realize that this was probably a bad idea
>> because we are adding complexity in the assembly code.
>>
>>>
>>> Maybe in assembly, we know exactly how many fixed regions are,
>>> boot-only regions are, but in C codes, we parse FDT to get static
>> configuration, like we don't know how many fixed regions for xen static
>> heap is enough.
>>> Approximation is not suggested in MPU system with limited MPU regions,
>>> some platform may only have 16 MPU regions, IMHO, it is not worthy
>> wasting in approximation.
>>
>> I haven't suggested to use approximation anywhere here. I will answer
>> about the limited number of entries in the other thread.
>>
>>>
>>> So I take the suggestion of putting regions in the higher slot index.
>>> Putting fixed regions in the front, and putting boot-only and
>>> switching ones at tail. Then, at the end of booting, when we reorg the
>> mpu mapping, we remove all boot-only regions, and for switching ones, we
>> disable-relocate(after fixed ones)-enable them. Specific codes in [2].
>>
>>   From this discussion, it feels to me that you are trying to make the code
>> more complicated just to keep the split and save a few cycles (see above).
>>
>> I would suggest to investigate the cost of "hunting down each section".
>> Depending on the result, we can discuss what the best approach.
>>
> 
> Correct me if I'm wrong, the complicated things in assembly you are worried about
> is that we couldn't define the index for initial sections, no hardcoded to keep simple.

Correct.

> And function write_pr, ik, is really a big chunk of codes, however the logic is simple there,
> just a bunch of "switch-cases".

I agree that write_pr() is a bunch of switch-cases. But there are a lot 
of duplication in it and the interface to use it is, IMHO, not intuitive.

> 
> If we are adding MPU regions in sequence as you suggested, while using bitmap at the
> same time to record used entry.
> TBH, this is how I designed at the very beginning internally. We found that if we don't
> do reorg late-boot to keep fixed in front and switching ones after, each time when we
> do vcpu context switch, not only we need to hunt down switching ones to disable,
> while we add new switch-in regions, using bitmap to find free entry is saying that the
> process is unpredictable. Uncertainty is what we want to avoid in Armv8-R architecture.

I don't understand why it would be unpredictable. For a given 
combination of platform/device-tree, the bitmap will always look the 
same. So the number of cycles/instructions will always be the same.

This is not very different from the case where you split the MPU in two 
because

> 
> Hmmm, TBH, I really really like your suggestion to put boot-only/switching regions into
> higher slot. It really saved a lot trouble in late-init reorg and also avoids disabling MPU
> at the same time. The split is a simple and easy-to-understand construction compared
> with bitmap too.

I would like to propose another split. I will reply to that in the 
thread where you provided the MPU layout.

Cheers,
Penny Zheng Feb. 1, 2023, 5:39 a.m. UTC | #10
Hi Julien,

> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: Tuesday, January 31, 2023 5:28 PM
> To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org;
> Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
> <sstabellini@kernel.org>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
> ayan.kumar.halder@xilinx.com
> Cc: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
> memory region map
> 
> 
> 
> On 31/01/2023 04:11, Penny Zheng wrote:
> > Hi Julien
> >
> >> -----Original Message-----
> >> From: Julien Grall <julien@xen.org>
> >> Sent: Monday, January 30, 2023 5:40 PM
> >> To: Penny Zheng <Penny.Zheng@arm.com>; xen-
> devel@lists.xenproject.org
> >> Cc: Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
> >> <sstabellini@kernel.org>; Bertrand Marquis
> >> <Bertrand.Marquis@arm.com>; Volodymyr Babchuk
> >> <Volodymyr_Babchuk@epam.com>
> >> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
> >> memory region map
> >>
> >> Hi Penny,
> >>
[...]
> >>
> >> I would suggest to investigate the cost of "hunting down each section".
> >> Depending on the result, we can discuss what the best approach.
> >>
> >
> > Correct me if I'm wrong, the complicated things in assembly you are
> > worried about is that we couldn't define the index for initial sections, no
> hardcoded to keep simple.
> 
> Correct.
> 
> > And function write_pr, ik, is really a big chunk of codes, however the
> > logic is simple there, just a bunch of "switch-cases".
> 
> I agree that write_pr() is a bunch of switch-cases. But there are a lot of
> duplication in it and the interface to use it is, IMHO, not intuitive.
> 
> >
> > If we are adding MPU regions in sequence as you suggested, while using
> > bitmap at the same time to record used entry.
> > TBH, this is how I designed at the very beginning internally. We found
> > that if we don't do reorg late-boot to keep fixed in front and
> > switching ones after, each time when we do vcpu context switch, not
> > only we need to hunt down switching ones to disable, while we add new
> > switch-in regions, using bitmap to find free entry is saying that the
> process is unpredictable. Uncertainty is what we want to avoid in Armv8-R
> architecture.
> 
> I don't understand why it would be unpredictable. For a given combination
> of platform/device-tree, the bitmap will always look the same. So the
> number of cycles/instructions will always be the same.
> 

In boot-time, it will be always the same. But if we still use bitmap to find free
entry(for switching MPU regions) on runtime, hmmm, I thought this part will
be unpredictable.

> This is not very different from the case where you split the MPU in two
> because
> 
> >
> > Hmmm, TBH, I really really like your suggestion to put
> > boot-only/switching regions into higher slot. It really saved a lot
> > trouble in late-init reorg and also avoids disabling MPU at the same
> > time. The split is a simple and easy-to-understand construction compared
> with bitmap too.
> 
> I would like to propose another split. I will reply to that in the thread where
> you provided the MPU layout.
> 
> Cheers,
> 
> --
> Julien Grall
Julien Grall Feb. 1, 2023, 6:56 p.m. UTC | #11
Hi Penny,

On 01/02/2023 05:39, Penny Zheng wrote:
>>> If we are adding MPU regions in sequence as you suggested, while using
>>> bitmap at the same time to record used entry.
>>> TBH, this is how I designed at the very beginning internally. We found
>>> that if we don't do reorg late-boot to keep fixed in front and
>>> switching ones after, each time when we do vcpu context switch, not
>>> only we need to hunt down switching ones to disable, while we add new
>>> switch-in regions, using bitmap to find free entry is saying that the
>> process is unpredictable. Uncertainty is what we want to avoid in Armv8-R
>> architecture.
>>
>> I don't understand why it would be unpredictable. For a given combination
>> of platform/device-tree, the bitmap will always look the same. So the
>> number of cycles/instructions will always be the same.
>>
> 
> In boot-time, it will be always the same. But if we still use bitmap to find free
> entry(for switching MPU regions) on runtime, hmmm, I thought this part will
> be unpredictable.

I know this point is now moot as we agreed on not using a bitmap but I 
wanted to answer on the unpredictability part.

It depends on whether you decide to allocate more entry at runtime. My 
assumption is you won't and therefore the the time to walk the bitmap 
will always be consistent.

Cheers,
Penny Zheng Feb. 2, 2023, 10:53 a.m. UTC | #12
Hi Julien,

> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: Thursday, February 2, 2023 2:57 AM
> To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org;
> Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
> <sstabellini@kernel.org>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
> ayan.kumar.halder@xilinx.com
> Cc: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
> memory region map
> 
> Hi Penny,
> 
> On 01/02/2023 05:39, Penny Zheng wrote:
> >>> If we are adding MPU regions in sequence as you suggested, while
> >>> using bitmap at the same time to record used entry.
> >>> TBH, this is how I designed at the very beginning internally. We
> >>> found that if we don't do reorg late-boot to keep fixed in front and
> >>> switching ones after, each time when we do vcpu context switch, not
> >>> only we need to hunt down switching ones to disable, while we add
> >>> new switch-in regions, using bitmap to find free entry is saying
> >>> that the
> >> process is unpredictable. Uncertainty is what we want to avoid in
> >> Armv8-R architecture.
> >>
> >> I don't understand why it would be unpredictable. For a given
> >> combination of platform/device-tree, the bitmap will always look the
> >> same. So the number of cycles/instructions will always be the same.
> >>
> >
> > In boot-time, it will be always the same. But if we still use bitmap
> > to find free entry(for switching MPU regions) on runtime, hmmm, I
> > thought this part will be unpredictable.
> 
> I know this point is now moot as we agreed on not using a bitmap but I
> wanted to answer on the unpredictability part.
> 
> It depends on whether you decide to allocate more entry at runtime. My
> assumption is you won't and therefore the the time to walk the bitmap will
> always be consistent.
> 

In MPU, we don't have something like vttbr_el2 in MMU, to store stage 2
EL1/EL0 translation table. Xen stage 1 EL2 mapping and stage 2 EL1/EL0
mapping are both sharing one table.
So when context switching into different guest, the current design is to disable
DOM1's guest RAM mapping firstly, then enable DOM2's guest RAM mapping,
to ensure isolation and safety.

> Cheers,
> 
> --
> Julien Grall
Julien Grall Feb. 2, 2023, 10:58 a.m. UTC | #13
On 02/02/2023 10:53, Penny Zheng wrote:
> Hi Julien,

Hi,

>> -----Original Message-----
>> From: Julien Grall <julien@xen.org>
>> Sent: Thursday, February 2, 2023 2:57 AM
>> To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org;
>> Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
>> <sstabellini@kernel.org>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
>> ayan.kumar.halder@xilinx.com
>> Cc: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
>> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
>> memory region map
>>
>> Hi Penny,
>>
>> On 01/02/2023 05:39, Penny Zheng wrote:
>>>>> If we are adding MPU regions in sequence as you suggested, while
>>>>> using bitmap at the same time to record used entry.
>>>>> TBH, this is how I designed at the very beginning internally. We
>>>>> found that if we don't do reorg late-boot to keep fixed in front and
>>>>> switching ones after, each time when we do vcpu context switch, not
>>>>> only we need to hunt down switching ones to disable, while we add
>>>>> new switch-in regions, using bitmap to find free entry is saying
>>>>> that the
>>>> process is unpredictable. Uncertainty is what we want to avoid in
>>>> Armv8-R architecture.
>>>>
>>>> I don't understand why it would be unpredictable. For a given
>>>> combination of platform/device-tree, the bitmap will always look the
>>>> same. So the number of cycles/instructions will always be the same.
>>>>
>>>
>>> In boot-time, it will be always the same. But if we still use bitmap
>>> to find free entry(for switching MPU regions) on runtime, hmmm, I
>>> thought this part will be unpredictable.
>>
>> I know this point is now moot as we agreed on not using a bitmap but I
>> wanted to answer on the unpredictability part.
>>
>> It depends on whether you decide to allocate more entry at runtime. My
>> assumption is you won't and therefore the the time to walk the bitmap will
>> always be consistent.
>>
> 
> In MPU, we don't have something like vttbr_el2 in MMU, to store stage 2
> EL1/EL0 translation table. Xen stage 1 EL2 mapping and stage 2 EL1/EL0
> mapping are both sharing one table.
> So when context switching into different guest, the current design is to disable
> DOM1's guest RAM mapping firstly, then enable DOM2's guest RAM mapping,
> to ensure isolation and safety.

I understood that but I don't understand how this is related to my point 
here. The entries you are replacing are always going to be the same 
after boot.

So if you have a bitmap indicate the fixed entries and you don't add 
more fixed one at runtime, then it will always take the same time to 
walk it.

Cheers,
Penny Zheng Feb. 2, 2023, 11:30 a.m. UTC | #14
Hi, Julien

> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: Thursday, February 2, 2023 6:58 PM
> To: Penny Zheng <Penny.Zheng@arm.com>; xen-devel@lists.xenproject.org;
> Wei Chen <Wei.Chen@arm.com>; Stefano Stabellini
> <sstabellini@kernel.org>; Bertrand Marquis <Bertrand.Marquis@arm.com>;
> ayan.kumar.halder@xilinx.com
> Cc: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
> memory region map
> 
> 
> 
> On 02/02/2023 10:53, Penny Zheng wrote:
> > Hi Julien,
> 
> Hi,
> 
> >> -----Original Message-----
> >> From: Julien Grall <julien@xen.org>
> >> Sent: Thursday, February 2, 2023 2:57 AM
> >> To: Penny Zheng <Penny.Zheng@arm.com>;
> >> xen-devel@lists.xenproject.org; Wei Chen <Wei.Chen@arm.com>;
> Stefano
> >> Stabellini <sstabellini@kernel.org>; Bertrand Marquis
> >> <Bertrand.Marquis@arm.com>; ayan.kumar.halder@xilinx.com
> >> Cc: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>
> >> Subject: Re: [PATCH v2 11/40] xen/mpu: build up start-of-day Xen MPU
> >> memory region map
> >>
> >> Hi Penny,
> >>
> >> On 01/02/2023 05:39, Penny Zheng wrote:
> >>>>> If we are adding MPU regions in sequence as you suggested, while
> >>>>> using bitmap at the same time to record used entry.
> >>>>> TBH, this is how I designed at the very beginning internally. We
> >>>>> found that if we don't do reorg late-boot to keep fixed in front
> >>>>> and switching ones after, each time when we do vcpu context
> >>>>> switch, not only we need to hunt down switching ones to disable,
> >>>>> while we add new switch-in regions, using bitmap to find free
> >>>>> entry is saying that the
> >>>> process is unpredictable. Uncertainty is what we want to avoid in
> >>>> Armv8-R architecture.
> >>>>
> >>>> I don't understand why it would be unpredictable. For a given
> >>>> combination of platform/device-tree, the bitmap will always look
> >>>> the same. So the number of cycles/instructions will always be the
> same.
> >>>>
> >>>
> >>> In boot-time, it will be always the same. But if we still use bitmap
> >>> to find free entry(for switching MPU regions) on runtime, hmmm, I
> >>> thought this part will be unpredictable.
> >>
> >> I know this point is now moot as we agreed on not using a bitmap but
> >> I wanted to answer on the unpredictability part.
> >>
> >> It depends on whether you decide to allocate more entry at runtime.
> >> My assumption is you won't and therefore the the time to walk the
> >> bitmap will always be consistent.
> >>
> >
> > In MPU, we don't have something like vttbr_el2 in MMU, to store stage
> > 2
> > EL1/EL0 translation table. Xen stage 1 EL2 mapping and stage 2 EL1/EL0
> > mapping are both sharing one table.
> > So when context switching into different guest, the current design is
> > to disable DOM1's guest RAM mapping firstly, then enable DOM2's guest
> > RAM mapping, to ensure isolation and safety.
> 
> I understood that but I don't understand how this is related to my point
> here. The entries you are replacing are always going to be the same after
> boot.
> 
> So if you have a bitmap indicate the fixed entries and you don't add more
> fixed one at runtime, then it will always take the same time to walk it.
> 

Ah, sorry for taking so long to understand ;/. True, the fixed entries will never
change after boot-time, each time when switching to guest vcpu, we always choose
the same entry.

> Cheers,
> 
> --
> Julien Grall

Cheers,

--
Penny Zheng
diff mbox series

Patch

diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile
index 22da2f54b5..438c9737ad 100644
--- a/xen/arch/arm/arm64/Makefile
+++ b/xen/arch/arm/arm64/Makefile
@@ -10,6 +10,8 @@  obj-y += entry.o
 obj-y += head.o
 ifneq ($(CONFIG_HAS_MPU),y)
 obj-y += head_mmu.o
+else
+obj-y += head_mpu.o
 endif
 obj-y += insn.o
 obj-$(CONFIG_LIVEPATCH) += livepatch.o
diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
index 782bd1f94c..145e3d53dc 100644
--- a/xen/arch/arm/arm64/head.S
+++ b/xen/arch/arm/arm64/head.S
@@ -68,9 +68,9 @@ 
  *  x24 -
  *  x25 -
  *  x26 - skip_zero_bss (boot cpu only)
- *  x27 -
- *  x28 -
- *  x29 -
+ *  x27 - region selector (mpu only)
+ *  x28 - prbar (mpu only)
+ *  x29 - prlar (mpu only)
  *  x30 - lr
  */
 
@@ -82,7 +82,7 @@ 
  * ---------------------------
  *
  * The requirements are:
- *   MMU = off, D-cache = off, I-cache = on or off,
+ *   MMU/MPU = off, D-cache = off, I-cache = on or off,
  *   x0 = physical address to the FDT blob.
  *
  * This must be the very first address in the loaded image.
@@ -252,7 +252,12 @@  real_start_efi:
 
         bl    check_cpu_mode
         bl    cpu_init
-        bl    create_page_tables
+
+        /*
+         * Create boot memory management data, pagetable for MMU systems
+         * and memory regions for MPU systems.
+         */
+        bl    prepare_early_mappings
         bl    enable_mmu
 
         /* We are still in the 1:1 mapping. Jump to the runtime Virtual Address. */
@@ -310,7 +315,7 @@  GLOBAL(init_secondary)
 #endif
         bl    check_cpu_mode
         bl    cpu_init
-        bl    create_page_tables
+        bl    prepare_early_mappings
         bl    enable_mmu
 
         /* We are still in the 1:1 mapping. Jump to the runtime Virtual Address. */
diff --git a/xen/arch/arm/arm64/head_mmu.S b/xen/arch/arm/arm64/head_mmu.S
index 6ff13c751c..2346f755df 100644
--- a/xen/arch/arm/arm64/head_mmu.S
+++ b/xen/arch/arm/arm64/head_mmu.S
@@ -123,7 +123,7 @@ 
  *
  * Clobbers x0 - x4
  */
-ENTRY(create_page_tables)
+ENTRY(prepare_early_mappings)
         /* Prepare the page-tables for mapping Xen */
         ldr   x0, =XEN_VIRT_START
         create_table_entry boot_pgtable, boot_first, x0, 0, x1, x2, x3
@@ -208,7 +208,7 @@  virtphys_clash:
         /* Identity map clashes with boot_third, which we cannot handle yet */
         PRINT("- Unable to build boot page tables - virt and phys addresses clash. -\r\n")
         b     fail
-ENDPROC(create_page_tables)
+ENDPROC(prepare_early_mappings)
 
 /*
  * Turn on the Data Cache and the MMU. The function will return on the 1:1
diff --git a/xen/arch/arm/arm64/head_mpu.S b/xen/arch/arm/arm64/head_mpu.S
new file mode 100644
index 0000000000..0b97ce4646
--- /dev/null
+++ b/xen/arch/arm/arm64/head_mpu.S
@@ -0,0 +1,323 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Start-of-day code for an Armv8-R AArch64 MPU system.
+ */
+
+#include <asm/arm64/mpu.h>
+#include <asm/early_printk.h>
+#include <asm/page.h>
+
+/*
+ * One entry in Xen MPU memory region mapping table(xen_mpumap) is a structure
+ * of pr_t, which is 16-bytes size, so the entry offset is the order of 4.
+ */
+#define MPU_ENTRY_SHIFT         0x4
+
+#define REGION_SEL_MASK         0xf
+
+#define REGION_TEXT_PRBAR       0x38    /* SH=11 AP=10 XN=00 */
+#define REGION_RO_PRBAR         0x3A    /* SH=11 AP=10 XN=10 */
+#define REGION_DATA_PRBAR       0x32    /* SH=11 AP=00 XN=10 */
+
+#define REGION_NORMAL_PRLAR     0x0f    /* NS=0 ATTR=111 EN=1 */
+
+/*
+ * Macro to round up the section address to be PAGE_SIZE aligned
+ * Each section(e.g. .text, .data, etc) in xen.lds.S is page-aligned,
+ * which is usually guarded with ". = ALIGN(PAGE_SIZE)" in the head,
+ * or in the end
+ */
+.macro roundup_section, xb
+        add   \xb, \xb, #(PAGE_SIZE-1)
+        and   \xb, \xb, #PAGE_MASK
+.endm
+
+/*
+ * Macro to create a new MPU memory region entry, which is a structure
+ * of pr_t,  in \prmap.
+ *
+ * Inputs:
+ * prmap:   mpu memory region map table symbol
+ * sel:     region selector
+ * prbar:   preserve value for PRBAR_EL2
+ * prlar    preserve value for PRLAR_EL2
+ *
+ * Clobbers \tmp1, \tmp2
+ *
+ */
+.macro create_mpu_entry prmap, sel, prbar, prlar, tmp1, tmp2
+    mov   \tmp2, \sel
+    lsl   \tmp2, \tmp2, #MPU_ENTRY_SHIFT
+    adr_l \tmp1, \prmap
+    /* Write the first 8 bytes(prbar_t) of pr_t */
+    str   \prbar, [\tmp1, \tmp2]
+
+    add   \tmp2, \tmp2, #8
+    /* Write the last 8 bytes(prlar_t) of pr_t */
+    str   \prlar, [\tmp1, \tmp2]
+.endm
+
+/*
+ * Macro to store the maximum number of regions supported by the EL2 MPU
+ * in max_xen_mpumap, which is identified by MPUIR_EL2.
+ *
+ * Outputs:
+ * nr_regions: preserve the maximum number of regions supported by the EL2 MPU
+ *
+ * Clobbers \tmp1
+ *
+ */
+.macro read_max_el2_regions, nr_regions, tmp1
+    load_paddr \tmp1, max_xen_mpumap
+    mrs   \nr_regions, MPUIR_EL2
+    isb
+    str   \nr_regions, [\tmp1]
+.endm
+
+/*
+ * Macro to prepare and set a MPU memory region
+ *
+ * Inputs:
+ * base:        base address symbol (should be page-aligned)
+ * limit:       limit address symbol
+ * sel:         region selector
+ * prbar:       store computed PRBAR_EL2 value
+ * prlar:       store computed PRLAR_EL2 value
+ * attr_prbar:  PRBAR_EL2-related memory attributes. If not specified it will be REGION_DATA_PRBAR
+ * attr_prlar:  PRLAR_EL2-related memory attributes. If not specified it will be REGION_NORMAL_PRLAR
+ *
+ * Clobber \tmp1
+ *
+ */
+.macro prepare_xen_region, base, limit, sel, prbar, prlar, tmp1, attr_prbar=REGION_DATA_PRBAR, attr_prlar=REGION_NORMAL_PRLAR
+    /* Prepare value for PRBAR_EL2 reg and preserve it in \prbar.*/
+    load_paddr \prbar, \base
+    and   \prbar, \prbar, #MPU_REGION_MASK
+    mov   \tmp1, #\attr_prbar
+    orr   \prbar, \prbar, \tmp1
+
+    /* Prepare value for PRLAR_EL2 reg and preserve it in \prlar.*/
+    load_paddr \prlar, \limit
+    /* Round up limit address to be PAGE_SIZE aligned */
+    roundup_section \prlar
+    /* Limit address should be inclusive */
+    sub   \prlar, \prlar, #1
+    and   \prlar, \prlar, #MPU_REGION_MASK
+    mov   \tmp1, #\attr_prlar
+    orr   \prlar, \prlar, \tmp1
+
+    mov   x27, \sel
+    mov   x28, \prbar
+    mov   x29, \prlar
+    /*
+     * x27, x28, x29 are special registers designed as
+     * inputs for function write_pr
+     */
+    bl    write_pr
+.endm
+
+.section .text.idmap, "ax", %progbits
+
+/*
+ * ENTRY to configure a EL2 MPU memory region
+ * ARMv8-R AArch64 at most supports 255 MPU protection regions.
+ * See section G1.3.18 of the reference manual for ARMv8-R AArch64,
+ * PRBAR<n>_EL2 and PRLAR<n>_EL2 provides access to the EL2 MPU region
+ * determined by the value of 'n' and PRSELR_EL2.REGION as
+ * PRSELR_EL2.REGION<7:4>:n.(n = 0, 1, 2, ... , 15)
+ * For example to access regions from 16 to 31 (0b10000 to 0b11111):
+ * - Set PRSELR_EL2 to 0b1xxxx
+ * - Region 16 configuration is accessible through PRBAR0_EL2 and PRLAR0_EL2
+ * - Region 17 configuration is accessible through PRBAR1_EL2 and PRLAR1_EL2
+ * - Region 18 configuration is accessible through PRBAR2_EL2 and PRLAR2_EL2
+ * - ...
+ * - Region 31 configuration is accessible through PRBAR15_EL2 and PRLAR15_EL2
+ *
+ * Inputs:
+ * x27: region selector
+ * x28: preserve value for PRBAR_EL2
+ * x29: preserve value for PRLAR_EL2
+ *
+ */
+ENTRY(write_pr)
+    msr   PRSELR_EL2, x27
+    dsb   sy
+    and   x27, x27, #REGION_SEL_MASK
+    cmp   x27, #0
+    bne   1f
+    msr   PRBAR0_EL2, x28
+    msr   PRLAR0_EL2, x29
+    b     out
+1:
+    cmp   x27, #1
+    bne   2f
+    msr   PRBAR1_EL2, x28
+    msr   PRLAR1_EL2, x29
+    b     out
+2:
+    cmp   x27, #2
+    bne   3f
+    msr   PRBAR2_EL2, x28
+    msr   PRLAR2_EL2, x29
+    b     out
+3:
+    cmp   x27, #3
+    bne   4f
+    msr   PRBAR3_EL2, x28
+    msr   PRLAR3_EL2, x29
+    b     out
+4:
+    cmp   x27, #4
+    bne   5f
+    msr   PRBAR4_EL2, x28
+    msr   PRLAR4_EL2, x29
+    b     out
+5:
+    cmp   x27, #5
+    bne   6f
+    msr   PRBAR5_EL2, x28
+    msr   PRLAR5_EL2, x29
+    b     out
+6:
+    cmp   x27, #6
+    bne   7f
+    msr   PRBAR6_EL2, x28
+    msr   PRLAR6_EL2, x29
+    b     out
+7:
+    cmp   x27, #7
+    bne   8f
+    msr   PRBAR7_EL2, x28
+    msr   PRLAR7_EL2, x29
+    b     out
+8:
+    cmp   x27, #8
+    bne   9f
+    msr   PRBAR8_EL2, x28
+    msr   PRLAR8_EL2, x29
+    b     out
+9:
+    cmp   x27, #9
+    bne   10f
+    msr   PRBAR9_EL2, x28
+    msr   PRLAR9_EL2, x29
+    b     out
+10:
+    cmp   x27, #10
+    bne   11f
+    msr   PRBAR10_EL2, x28
+    msr   PRLAR10_EL2, x29
+    b     out
+11:
+    cmp   x27, #11
+    bne   12f
+    msr   PRBAR11_EL2, x28
+    msr   PRLAR11_EL2, x29
+    b     out
+12:
+    cmp   x27, #12
+    bne   13f
+    msr   PRBAR12_EL2, x28
+    msr   PRLAR12_EL2, x29
+    b     out
+13:
+    cmp   x27, #13
+    bne   14f
+    msr   PRBAR13_EL2, x28
+    msr   PRLAR13_EL2, x29
+    b     out
+14:
+    cmp   x27, #14
+    bne   15f
+    msr   PRBAR14_EL2, x28
+    msr   PRLAR14_EL2, x29
+    b     out
+15:
+    msr   PRBAR15_EL2, x28
+    msr   PRLAR15_EL2, x29
+out:
+    isb
+    ret
+ENDPROC(write_pr)
+
+/*
+ * Static start-of-day Xen EL2 MPU memory region layout.
+ *
+ *     xen_mpumap[0] : Xen text
+ *     xen_mpumap[1] : Xen read-only data
+ *     xen_mpumap[2] : Xen read-only after init data
+ *     xen_mpumap[3] : Xen read-write data
+ *     xen_mpumap[4] : Xen BSS
+ *     ......
+ *     xen_mpumap[max_xen_mpumap - 2]: Xen init data
+ *     xen_mpumap[max_xen_mpumap - 1]: Xen init text
+ *
+ * Clobbers x0 - x6
+ *
+ * It shall be compliant with what describes in xen.lds.S, or the below
+ * codes need adjustment.
+ * It shall also follow the rules of putting fixed MPU memory region in
+ * the front, and the others in the rear, which, here, mainly refers to
+ * boot-only region, like Xen init text region.
+ */
+ENTRY(prepare_early_mappings)
+    /* stack LR as write_pr will be called later like nested function */
+    mov   x6, lr
+
+    /* x0: region sel */
+    mov   x0, xzr
+    /* Xen text section. */
+    prepare_xen_region _stext, _etext, x0, x1, x2, x3, attr_prbar=REGION_TEXT_PRBAR
+    create_mpu_entry xen_mpumap, x0, x1, x2, x3, x4
+
+    add   x0, x0, #1
+    /* Xen read-only data section. */
+    prepare_xen_region _srodata, _erodata, x0, x1, x2, x3, attr_prbar=REGION_RO_PRBAR
+    create_mpu_entry xen_mpumap, x0, x1, x2, x3, x4
+
+    add   x0, x0, #1
+    /* Xen read-only after init data section. */
+    prepare_xen_region __ro_after_init_start, __ro_after_init_end, x0, x1, x2, x3
+    create_mpu_entry xen_mpumap, x0, x1, x2, x3, x4
+
+    add   x0, x0, #1
+    /* Xen read-write data section. */
+    prepare_xen_region __data_begin, __init_begin, x0, x1, x2, x3
+    create_mpu_entry xen_mpumap, x0, x1, x2, x3, x4
+
+    read_max_el2_regions x5, x3 /* x5: max_mpumap */
+    sub   x5, x5, #1
+    /* Xen init text section. */
+    prepare_xen_region _sinittext, _einittext, x5, x1, x2, x3, attr_prbar=REGION_TEXT_PRBAR
+    create_mpu_entry xen_mpumap, x5, x1, x2, x3, x4
+
+    sub   x5, x5, #1
+    /* Xen init data section. */
+    prepare_xen_region __init_data_begin, __init_end, x5, x1, x2, x3
+    create_mpu_entry xen_mpumap, x5, x1, x2, x3, x4
+
+    add   x0, x0, #1
+    /* Xen BSS section. */
+    prepare_xen_region __bss_start, __bss_end, x0, x1, x2, x3
+    create_mpu_entry xen_mpumap, x0, x1, x2, x3, x4
+
+    /* Update next_fixed_region_idx and next_transient_region_idx */
+    load_paddr x3, next_fixed_region_idx
+    add   x0, x0, #1
+    str   x0, [x3]
+    load_paddr x4, next_transient_region_idx
+    sub   x5, x5, #1
+    str   x5, [x4]
+
+    mov   lr, x6
+    ret
+ENDPROC(prepare_early_mappings)
+
+GLOBAL(_end_boot)
+
+/*
+ * Local variables:
+ * mode: ASM
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/include/asm/arm64/mpu.h b/xen/arch/arm/include/asm/arm64/mpu.h
new file mode 100644
index 0000000000..c945dd53db
--- /dev/null
+++ b/xen/arch/arm/include/asm/arm64/mpu.h
@@ -0,0 +1,63 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * mpu.h: Arm Memory Protection Region definitions.
+ */
+
+#ifndef __ARM64_MPU_H__
+#define __ARM64_MPU_H__
+
+#define MPU_REGION_SHIFT  6
+#define MPU_REGION_ALIGN  (_AC(1, UL) << MPU_REGION_SHIFT)
+#define MPU_REGION_MASK   (~(MPU_REGION_ALIGN - 1))
+
+/*
+ * MPUIR_EL2.Region identifies the number of regions supported by the EL2 MPU.
+ * It is a 8-bit field, so 255 MPU memory regions at most.
+ */
+#define ARM_MAX_MPU_MEMORY_REGIONS 255
+
+#ifndef __ASSEMBLY__
+
+/* Protection Region Base Address Register */
+typedef union {
+    struct __packed {
+        unsigned long xn:2;       /* Execute-Never */
+        unsigned long ap:2;       /* Acess Permission */
+        unsigned long sh:2;       /* Sharebility */
+        unsigned long base:42;    /* Base Address */
+        unsigned long pad:16;
+    } reg;
+    uint64_t bits;
+} prbar_t;
+
+/* Protection Region Limit Address Register */
+typedef union {
+    struct __packed {
+        unsigned long en:1;     /* Region enable */
+        unsigned long ai:3;     /* Memory Attribute Index */
+        unsigned long ns:1;     /* Not-Secure */
+        unsigned long res:1;    /* Reserved 0 by hardware */
+        unsigned long limit:42; /* Limit Address */
+        unsigned long pad:16;
+    } reg;
+    uint64_t bits;
+} prlar_t;
+
+/* MPU Protection Region */
+typedef struct {
+    prbar_t prbar;
+    prlar_t prlar;
+} pr_t;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ARM64_MPU_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/include/asm/arm64/sysregs.h b/xen/arch/arm/include/asm/arm64/sysregs.h
index 4638999514..aca9bca5b1 100644
--- a/xen/arch/arm/include/asm/arm64/sysregs.h
+++ b/xen/arch/arm/include/asm/arm64/sysregs.h
@@ -458,6 +458,55 @@ 
 #define ZCR_ELx_LEN_SIZE             9
 #define ZCR_ELx_LEN_MASK             0x1ff
 
+/* System registers for Armv8-R AArch64 */
+#ifdef CONFIG_HAS_MPU
+
+/* EL2 MPU Protection Region Base Address Register encode */
+#define PRBAR_EL2   S3_4_C6_C8_0
+#define PRBAR0_EL2  S3_4_C6_C8_0
+#define PRBAR1_EL2  S3_4_C6_C8_4
+#define PRBAR2_EL2  S3_4_C6_C9_0
+#define PRBAR3_EL2  S3_4_C6_C9_4
+#define PRBAR4_EL2  S3_4_C6_C10_0
+#define PRBAR5_EL2  S3_4_C6_C10_4
+#define PRBAR6_EL2  S3_4_C6_C11_0
+#define PRBAR7_EL2  S3_4_C6_C11_4
+#define PRBAR8_EL2  S3_4_C6_C12_0
+#define PRBAR9_EL2  S3_4_C6_C12_4
+#define PRBAR10_EL2 S3_4_C6_C13_0
+#define PRBAR11_EL2 S3_4_C6_C13_4
+#define PRBAR12_EL2 S3_4_C6_C14_0
+#define PRBAR13_EL2 S3_4_C6_C14_4
+#define PRBAR14_EL2 S3_4_C6_C15_0
+#define PRBAR15_EL2 S3_4_C6_C15_4
+
+/* EL2 MPU Protection Region Limit Address Register encode */
+#define PRLAR_EL2   S3_4_C6_C8_1
+#define PRLAR0_EL2  S3_4_C6_C8_1
+#define PRLAR1_EL2  S3_4_C6_C8_5
+#define PRLAR2_EL2  S3_4_C6_C9_1
+#define PRLAR3_EL2  S3_4_C6_C9_5
+#define PRLAR4_EL2  S3_4_C6_C10_1
+#define PRLAR5_EL2  S3_4_C6_C10_5
+#define PRLAR6_EL2  S3_4_C6_C11_1
+#define PRLAR7_EL2  S3_4_C6_C11_5
+#define PRLAR8_EL2  S3_4_C6_C12_1
+#define PRLAR9_EL2  S3_4_C6_C12_5
+#define PRLAR10_EL2 S3_4_C6_C13_1
+#define PRLAR11_EL2 S3_4_C6_C13_5
+#define PRLAR12_EL2 S3_4_C6_C14_1
+#define PRLAR13_EL2 S3_4_C6_C14_5
+#define PRLAR14_EL2 S3_4_C6_C15_1
+#define PRLAR15_EL2 S3_4_C6_C15_5
+
+/* MPU Protection Region Selection Register encode */
+#define PRSELR_EL2 S3_4_C6_C2_1
+
+/* MPU Type registers encode */
+#define MPUIR_EL2 S3_4_C0_C0_4
+
+#endif
+
 /* Access to system registers */
 
 #define WRITE_SYSREG64(v, name) do {                    \
diff --git a/xen/arch/arm/mm_mpu.c b/xen/arch/arm/mm_mpu.c
new file mode 100644
index 0000000000..43e9a1be4d
--- /dev/null
+++ b/xen/arch/arm/mm_mpu.c
@@ -0,0 +1,48 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * xen/arch/arm/mm_mpu.c
+ *
+ * MPU based memory managment code for Armv8-R AArch64.
+ *
+ * Copyright (C) 2022 Arm Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/init.h>
+#include <xen/page-size.h>
+#include <asm/arm64/mpu.h>
+
+/* Xen MPU memory region mapping table. */
+pr_t __aligned(PAGE_SIZE) __section(".data.page_aligned")
+     xen_mpumap[ARM_MAX_MPU_MEMORY_REGIONS];
+
+/* Index into MPU memory region map for fixed regions, ascending from zero. */
+uint64_t __ro_after_init next_fixed_region_idx;
+/*
+ * Index into MPU memory region map for transient regions, like boot-only
+ * region, which descends from max_xen_mpumap.
+ */
+uint64_t __ro_after_init next_transient_region_idx;
+
+/* Maximum number of supported MPU memory regions by the EL2 MPU. */
+uint64_t __ro_after_init max_xen_mpumap;
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/xen.lds.S b/xen/arch/arm/xen.lds.S
index bc45ea2c65..79965a3c17 100644
--- a/xen/arch/arm/xen.lds.S
+++ b/xen/arch/arm/xen.lds.S
@@ -91,6 +91,8 @@  SECTIONS
       __ro_after_init_end = .;
   } : text
 
+  . = ALIGN(PAGE_SIZE);
+  __data_begin = .;
   .data.read_mostly : {
        /* Exception table */
        __start___ex_table = .;
@@ -157,7 +159,9 @@  SECTIONS
        *(.altinstr_replacement)
   } :text
   . = ALIGN(PAGE_SIZE);
+
   .init.data : {
+       __init_data_begin = .;            /* Init data */
        *(.init.rodata)
        *(.init.rodata.*)