diff mbox series

[kvm-unit-tests,v3,4/8] arm/arm64: mmu: Stop mapping an assumed IO region

Message ID 20210429164130.405198-5-drjones@redhat.com (mailing list archive)
State New, archived
Headers show
Series arm/arm64: Prepare for target-efi | expand

Commit Message

Andrew Jones April 29, 2021, 4:41 p.m. UTC
By providing a proper ioremap function, we can just rely on devices
calling it for each region they need (as they already do) instead of
mapping a big assumed I/O range. We don't require the MMU to be
enabled at the time of the ioremap. In that case, we add the mapping
to the identity map anyway. This allows us to call setup_vm after
io_init. Why don't we just call setup_vm before io_init, I hear you
ask? Well, that's because tests like sieve want to start with the MMU
off, later call setup_vm, and all the while have working I/O. Some
unit tests are just really demanding...

While at it, ensure we map the I/O regions with XN (execute never),
as suggested by Alexandru Elisei.

Signed-off-by: Andrew Jones <drjones@redhat.com>
---
 lib/arm/asm/io.h            |  6 ++++++
 lib/arm/asm/mmu.h           |  3 +++
 lib/arm/asm/page.h          |  2 ++
 lib/arm/asm/pgtable-hwdef.h |  1 +
 lib/arm/mmu.c               | 37 +++++++++++++++++++++++++++----------
 lib/arm64/asm/io.h          |  6 ++++++
 lib/arm64/asm/mmu.h         |  1 +
 lib/arm64/asm/page.h        |  2 ++
 8 files changed, 48 insertions(+), 10 deletions(-)

Comments

Alexandru Elisei May 10, 2021, 3:45 p.m. UTC | #1
Hi Drew,

On 4/29/21 5:41 PM, Andrew Jones wrote:
> By providing a proper ioremap function, we can just rely on devices
> calling it for each region they need (as they already do) instead of
> mapping a big assumed I/O range. We don't require the MMU to be
> enabled at the time of the ioremap. In that case, we add the mapping
> to the identity map anyway. This allows us to call setup_vm after
> io_init. Why don't we just call setup_vm before io_init, I hear you
> ask? Well, that's because tests like sieve want to start with the MMU
> off, later call setup_vm, and all the while have working I/O. Some
> unit tests are just really demanding...
>
> While at it, ensure we map the I/O regions with XN (execute never),
> as suggested by Alexandru Elisei.

I got to thinking why this wasn't an issue before. Under KVM, device memory is not
usually mapped at stage 2, so any speculated reads wouldn't have reached memory at
all. The only way I imagine that happening if the user was running kvm-unit-tests
with a passthrough PCI device, which I don't think happens too often.

But we cannot rely on devices not being mapped at stage 2 when running under EFI
(we're mapping them ourselves with ioremap), so I believe this is a good fix.

Had another look at the patch, looks good to me:

Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>

Thanks,

Alex

>
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> ---
>  lib/arm/asm/io.h            |  6 ++++++
>  lib/arm/asm/mmu.h           |  3 +++
>  lib/arm/asm/page.h          |  2 ++
>  lib/arm/asm/pgtable-hwdef.h |  1 +
>  lib/arm/mmu.c               | 37 +++++++++++++++++++++++++++----------
>  lib/arm64/asm/io.h          |  6 ++++++
>  lib/arm64/asm/mmu.h         |  1 +
>  lib/arm64/asm/page.h        |  2 ++
>  8 files changed, 48 insertions(+), 10 deletions(-)
>
> diff --git a/lib/arm/asm/io.h b/lib/arm/asm/io.h
> index ba3b0b2412ad..e4caa6ff5d1e 100644
> --- a/lib/arm/asm/io.h
> +++ b/lib/arm/asm/io.h
> @@ -77,6 +77,12 @@ static inline void __raw_writel(u32 val, volatile void __iomem *addr)
>  		     : "r" (val));
>  }
>  
> +#define ioremap ioremap
> +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
> +{
> +	return __ioremap(phys_addr, size);
> +}
> +
>  #define virt_to_phys virt_to_phys
>  static inline phys_addr_t virt_to_phys(const volatile void *x)
>  {
> diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h
> index 122874b8aebe..94e70f0a84bf 100644
> --- a/lib/arm/asm/mmu.h
> +++ b/lib/arm/asm/mmu.h
> @@ -8,10 +8,13 @@
>  #include <asm/barrier.h>
>  
>  #define PTE_USER		L_PTE_USER
> +#define PTE_UXN			L_PTE_XN
> +#define PTE_PXN			L_PTE_PXN
>  #define PTE_RDONLY		PTE_AP2
>  #define PTE_SHARED		L_PTE_SHARED
>  #define PTE_AF			PTE_EXT_AF
>  #define PTE_WBWA		L_PTE_MT_WRITEALLOC
> +#define PTE_UNCACHED		L_PTE_MT_UNCACHED
>  
>  /* See B3.18.7 TLB maintenance operations */
>  
> diff --git a/lib/arm/asm/page.h b/lib/arm/asm/page.h
> index 1fb5cd26ac66..8eb4a883808e 100644
> --- a/lib/arm/asm/page.h
> +++ b/lib/arm/asm/page.h
> @@ -47,5 +47,7 @@ typedef struct { pteval_t pgprot; } pgprot_t;
>  extern phys_addr_t __virt_to_phys(unsigned long addr);
>  extern unsigned long __phys_to_virt(phys_addr_t addr);
>  
> +extern void *__ioremap(phys_addr_t phys_addr, size_t size);
> +
>  #endif /* !__ASSEMBLY__ */
>  #endif /* _ASMARM_PAGE_H_ */
> diff --git a/lib/arm/asm/pgtable-hwdef.h b/lib/arm/asm/pgtable-hwdef.h
> index fe1d8540ea3f..90fd306c7cc0 100644
> --- a/lib/arm/asm/pgtable-hwdef.h
> +++ b/lib/arm/asm/pgtable-hwdef.h
> @@ -34,6 +34,7 @@
>  #define L_PTE_USER		(_AT(pteval_t, 1) << 6)		/* AP[1] */
>  #define L_PTE_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
>  #define L_PTE_YOUNG		(_AT(pteval_t, 1) << 10)	/* AF */
> +#define L_PTE_PXN		(_AT(pteval_t, 1) << 53)	/* PXN */
>  #define L_PTE_XN		(_AT(pteval_t, 1) << 54)	/* XN */
>  
>  /*
> diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
> index 15eef007f256..791b1f88f946 100644
> --- a/lib/arm/mmu.c
> +++ b/lib/arm/mmu.c
> @@ -11,6 +11,7 @@
>  #include <asm/mmu.h>
>  #include <asm/setup.h>
>  #include <asm/page.h>
> +#include <asm/io.h>
>  
>  #include "alloc_page.h"
>  #include "vmalloc.h"
> @@ -157,9 +158,8 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
>  void *setup_mmu(phys_addr_t phys_end)
>  {
>  	uintptr_t code_end = (uintptr_t)&etext;
> -	struct mem_region *r;
>  
> -	/* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */
> +	/* 3G-4G region is reserved for vmalloc, cap phys_end at 3G */
>  	if (phys_end > (3ul << 30))
>  		phys_end = 3ul << 30;
>  
> @@ -170,14 +170,8 @@ void *setup_mmu(phys_addr_t phys_end)
>  			"Unsupported translation granule %ld\n", PAGE_SIZE);
>  #endif
>  
> -	mmu_idmap = alloc_page();
> -
> -	for (r = mem_regions; r->end; ++r) {
> -		if (!(r->flags & MR_F_IO))
> -			continue;
> -		mmu_set_range_sect(mmu_idmap, r->start, r->start, r->end,
> -				   __pgprot(PMD_SECT_UNCACHED | PMD_SECT_USER));
> -	}
> +	if (!mmu_idmap)
> +		mmu_idmap = alloc_page();
>  
>  	/* armv8 requires code shared between EL1 and EL0 to be read-only */
>  	mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET,
> @@ -192,6 +186,29 @@ void *setup_mmu(phys_addr_t phys_end)
>  	return mmu_idmap;
>  }
>  
> +void __iomem *__ioremap(phys_addr_t phys_addr, size_t size)
> +{
> +	phys_addr_t paddr_aligned = phys_addr & PAGE_MASK;
> +	phys_addr_t paddr_end = PAGE_ALIGN(phys_addr + size);
> +	pgprot_t prot = __pgprot(PTE_UNCACHED | PTE_USER | PTE_UXN | PTE_PXN);
> +	pgd_t *pgtable;
> +
> +	assert(sizeof(long) == 8 || !(phys_addr >> 32));
> +
> +	if (mmu_enabled()) {
> +		pgtable = current_thread_info()->pgtable;
> +	} else {
> +		if (!mmu_idmap)
> +			mmu_idmap = alloc_page();
> +		pgtable = mmu_idmap;
> +	}
> +
> +	mmu_set_range_ptes(pgtable, paddr_aligned, paddr_aligned,
> +			   paddr_end, prot);
> +
> +	return (void __iomem *)(unsigned long)phys_addr;
> +}
> +
>  phys_addr_t __virt_to_phys(unsigned long addr)
>  {
>  	if (mmu_enabled()) {
> diff --git a/lib/arm64/asm/io.h b/lib/arm64/asm/io.h
> index e0a03b250d5b..be19f471c0fa 100644
> --- a/lib/arm64/asm/io.h
> +++ b/lib/arm64/asm/io.h
> @@ -71,6 +71,12 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
>  	return val;
>  }
>  
> +#define ioremap ioremap
> +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
> +{
> +	return __ioremap(phys_addr, size);
> +}
> +
>  #define virt_to_phys virt_to_phys
>  static inline phys_addr_t virt_to_phys(const volatile void *x)
>  {
> diff --git a/lib/arm64/asm/mmu.h b/lib/arm64/asm/mmu.h
> index 72d75eafc882..72371b2d9fe3 100644
> --- a/lib/arm64/asm/mmu.h
> +++ b/lib/arm64/asm/mmu.h
> @@ -8,6 +8,7 @@
>  #include <asm/barrier.h>
>  
>  #define PMD_SECT_UNCACHED	PMD_ATTRINDX(MT_DEVICE_nGnRE)
> +#define PTE_UNCACHED		PTE_ATTRINDX(MT_DEVICE_nGnRE)
>  #define PTE_WBWA		PTE_ATTRINDX(MT_NORMAL)
>  
>  static inline void flush_tlb_all(void)
> diff --git a/lib/arm64/asm/page.h b/lib/arm64/asm/page.h
> index ae4484b22114..d0fac6ea563d 100644
> --- a/lib/arm64/asm/page.h
> +++ b/lib/arm64/asm/page.h
> @@ -72,5 +72,7 @@ typedef struct { pteval_t pgprot; } pgprot_t;
>  extern phys_addr_t __virt_to_phys(unsigned long addr);
>  extern unsigned long __phys_to_virt(phys_addr_t addr);
>  
> +extern void *__ioremap(phys_addr_t phys_addr, size_t size);
> +
>  #endif /* !__ASSEMBLY__ */
>  #endif /* _ASMARM64_PAGE_H_ */
Alexandru Elisei May 13, 2021, 3:48 p.m. UTC | #2
Hi Drew,

On 5/10/21 4:45 PM, Alexandru Elisei wrote:
> Hi Drew,
>
> On 4/29/21 5:41 PM, Andrew Jones wrote:
>> By providing a proper ioremap function, we can just rely on devices
>> calling it for each region they need (as they already do) instead of
>> mapping a big assumed I/O range. We don't require the MMU to be
>> enabled at the time of the ioremap. In that case, we add the mapping
>> to the identity map anyway. This allows us to call setup_vm after
>> io_init. Why don't we just call setup_vm before io_init, I hear you
>> ask? Well, that's because tests like sieve want to start with the MMU
>> off, later call setup_vm, and all the while have working I/O. Some
>> unit tests are just really demanding...
>>
>> While at it, ensure we map the I/O regions with XN (execute never),
>> as suggested by Alexandru Elisei.
> I got to thinking why this wasn't an issue before. Under KVM, device memory is not
> usually mapped at stage 2, so any speculated reads wouldn't have reached memory at
> all. The only way I imagine that happening if the user was running kvm-unit-tests
> with a passthrough PCI device, which I don't think happens too often.
>
> But we cannot rely on devices not being mapped at stage 2 when running under EFI
> (we're mapping them ourselves with ioremap), so I believe this is a good fix.
>
> Had another look at the patch, looks good to me:

While testing the series I discovered that this patch introduces a bug when
running under kvmtool.

Here's the splat:

$ ./configure --vmm=kvmtool --earlycon=uart,mmio,0x1000000 --page-size=4K && make
clean && make -j6 && ./vm run -c2 -m128 -f arm/micro-bench.flat
[..]
  # lkvm run --firmware arm/micro-bench.flat -m 128 -c 2 --name guest-6986
  Info: Placing fdt at 0x80200000 - 0x80210000
chr_testdev_init: chr-testdev: can't find a virtio-console
Timer Frequency 24000000 Hz (Output in microseconds)

name                                    total ns                         avg
ns            
--------------------------------------------------------------------------------------------
hvc                                 168674516.0                        
2573.0             
Load address: 80000000
PC: 80000128 PC offset: 128
Unhandled exception ec=0x25 (DABT_EL1)
Vector: 4 (el1h_sync)
ESR_EL1:         96000006, ec=0x25 (DABT_EL1)
FAR_EL1: 000000000a000008 (valid)
Exception frame registers:
pc : [<0000000080000128>] lr : [<000000008000cac8>] pstate: 800003c5
sp : 000000008003ff90
x29: 0000000000000000 x28: 0000000000000000
x27: 00000011ada4d0c2 x26: 0000000000000000
x25: 0000000080015978 x24: 0000000080015a90
x23: 0000048c27394fff x22: 20c49ba5e353f7cf
x21: 28f5c28f5c28f5c3 x20: 0000000080016af0
x19: 000000e8d4a51000 x18: 0000000080040000
x17: 0000000000000000 x16: 0000000080008210
x15: 000000008003fe5c x14: 0000000000000260
x13: 00000000000002a4 x12: 0000000080040000
x11: 0000000000000001 x10: 0000000000000060
x9 : 0000000000000000 x8 : 0000000000000039
x7 : 0000000000000040 x6 : 0000000080013983
x5 : 000000008003f74e x4 : 000000008003f69c
x3 : 0000000000000000 x2 : 0000000000000000
x1 : 0000000000000000 x0 : 000000000a000008

The issue is caused by the mmio_read_user_exec() function from arm/micro-bench.c.
kvmtool doesn't have a chr-testdev device, and because probing fails, the address
0x0a000008 isn't ioremap'ed. The 0-1G memory region is not automatically mapped
anymore, and the access triggers a data abort at stage 1.

I fixed the splat with this change:

diff --git a/arm/micro-bench.c b/arm/micro-bench.c
index 95c418c10eb4..ad9e44d71d8d 100644
--- a/arm/micro-bench.c
+++ b/arm/micro-bench.c
@@ -281,7 +281,7 @@ static void mmio_read_user_exec(void)
         * updated in the future if any relevant changes in QEMU
         * test-dev are made.
         */
-       void *userspace_emulated_addr = (void*)0x0a000008;
+       void *userspace_emulated_addr = (void*)ioremap(0x0a000008, 8);
 
        readl(userspace_emulated_addr);
 }

kvmtool ignores the MMIO exit reason if no device owns the IPA, that's why it also
works on kvmtool.

The micro-bench test with the diff passes under qemu and kvmtool, tested with 4K,
16K and 64K pages on an odroid-c4.

Thanks,

Alex

>
> Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
>
> Thanks,
>
> Alex
>
>> Signed-off-by: Andrew Jones <drjones@redhat.com>
>> ---
>>  lib/arm/asm/io.h            |  6 ++++++
>>  lib/arm/asm/mmu.h           |  3 +++
>>  lib/arm/asm/page.h          |  2 ++
>>  lib/arm/asm/pgtable-hwdef.h |  1 +
>>  lib/arm/mmu.c               | 37 +++++++++++++++++++++++++++----------
>>  lib/arm64/asm/io.h          |  6 ++++++
>>  lib/arm64/asm/mmu.h         |  1 +
>>  lib/arm64/asm/page.h        |  2 ++
>>  8 files changed, 48 insertions(+), 10 deletions(-)
>>
>> diff --git a/lib/arm/asm/io.h b/lib/arm/asm/io.h
>> index ba3b0b2412ad..e4caa6ff5d1e 100644
>> --- a/lib/arm/asm/io.h
>> +++ b/lib/arm/asm/io.h
>> @@ -77,6 +77,12 @@ static inline void __raw_writel(u32 val, volatile void __iomem *addr)
>>  		     : "r" (val));
>>  }
>>  
>> +#define ioremap ioremap
>> +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
>> +{
>> +	return __ioremap(phys_addr, size);
>> +}
>> +
>>  #define virt_to_phys virt_to_phys
>>  static inline phys_addr_t virt_to_phys(const volatile void *x)
>>  {
>> diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h
>> index 122874b8aebe..94e70f0a84bf 100644
>> --- a/lib/arm/asm/mmu.h
>> +++ b/lib/arm/asm/mmu.h
>> @@ -8,10 +8,13 @@
>>  #include <asm/barrier.h>
>>  
>>  #define PTE_USER		L_PTE_USER
>> +#define PTE_UXN			L_PTE_XN
>> +#define PTE_PXN			L_PTE_PXN
>>  #define PTE_RDONLY		PTE_AP2
>>  #define PTE_SHARED		L_PTE_SHARED
>>  #define PTE_AF			PTE_EXT_AF
>>  #define PTE_WBWA		L_PTE_MT_WRITEALLOC
>> +#define PTE_UNCACHED		L_PTE_MT_UNCACHED
>>  
>>  /* See B3.18.7 TLB maintenance operations */
>>  
>> diff --git a/lib/arm/asm/page.h b/lib/arm/asm/page.h
>> index 1fb5cd26ac66..8eb4a883808e 100644
>> --- a/lib/arm/asm/page.h
>> +++ b/lib/arm/asm/page.h
>> @@ -47,5 +47,7 @@ typedef struct { pteval_t pgprot; } pgprot_t;
>>  extern phys_addr_t __virt_to_phys(unsigned long addr);
>>  extern unsigned long __phys_to_virt(phys_addr_t addr);
>>  
>> +extern void *__ioremap(phys_addr_t phys_addr, size_t size);
>> +
>>  #endif /* !__ASSEMBLY__ */
>>  #endif /* _ASMARM_PAGE_H_ */
>> diff --git a/lib/arm/asm/pgtable-hwdef.h b/lib/arm/asm/pgtable-hwdef.h
>> index fe1d8540ea3f..90fd306c7cc0 100644
>> --- a/lib/arm/asm/pgtable-hwdef.h
>> +++ b/lib/arm/asm/pgtable-hwdef.h
>> @@ -34,6 +34,7 @@
>>  #define L_PTE_USER		(_AT(pteval_t, 1) << 6)		/* AP[1] */
>>  #define L_PTE_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
>>  #define L_PTE_YOUNG		(_AT(pteval_t, 1) << 10)	/* AF */
>> +#define L_PTE_PXN		(_AT(pteval_t, 1) << 53)	/* PXN */
>>  #define L_PTE_XN		(_AT(pteval_t, 1) << 54)	/* XN */
>>  
>>  /*
>> diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
>> index 15eef007f256..791b1f88f946 100644
>> --- a/lib/arm/mmu.c
>> +++ b/lib/arm/mmu.c
>> @@ -11,6 +11,7 @@
>>  #include <asm/mmu.h>
>>  #include <asm/setup.h>
>>  #include <asm/page.h>
>> +#include <asm/io.h>
>>  
>>  #include "alloc_page.h"
>>  #include "vmalloc.h"
>> @@ -157,9 +158,8 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
>>  void *setup_mmu(phys_addr_t phys_end)
>>  {
>>  	uintptr_t code_end = (uintptr_t)&etext;
>> -	struct mem_region *r;
>>  
>> -	/* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */
>> +	/* 3G-4G region is reserved for vmalloc, cap phys_end at 3G */
>>  	if (phys_end > (3ul << 30))
>>  		phys_end = 3ul << 30;
>>  
>> @@ -170,14 +170,8 @@ void *setup_mmu(phys_addr_t phys_end)
>>  			"Unsupported translation granule %ld\n", PAGE_SIZE);
>>  #endif
>>  
>> -	mmu_idmap = alloc_page();
>> -
>> -	for (r = mem_regions; r->end; ++r) {
>> -		if (!(r->flags & MR_F_IO))
>> -			continue;
>> -		mmu_set_range_sect(mmu_idmap, r->start, r->start, r->end,
>> -				   __pgprot(PMD_SECT_UNCACHED | PMD_SECT_USER));
>> -	}
>> +	if (!mmu_idmap)
>> +		mmu_idmap = alloc_page();
>>  
>>  	/* armv8 requires code shared between EL1 and EL0 to be read-only */
>>  	mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET,
>> @@ -192,6 +186,29 @@ void *setup_mmu(phys_addr_t phys_end)
>>  	return mmu_idmap;
>>  }
>>  
>> +void __iomem *__ioremap(phys_addr_t phys_addr, size_t size)
>> +{
>> +	phys_addr_t paddr_aligned = phys_addr & PAGE_MASK;
>> +	phys_addr_t paddr_end = PAGE_ALIGN(phys_addr + size);
>> +	pgprot_t prot = __pgprot(PTE_UNCACHED | PTE_USER | PTE_UXN | PTE_PXN);
>> +	pgd_t *pgtable;
>> +
>> +	assert(sizeof(long) == 8 || !(phys_addr >> 32));
>> +
>> +	if (mmu_enabled()) {
>> +		pgtable = current_thread_info()->pgtable;
>> +	} else {
>> +		if (!mmu_idmap)
>> +			mmu_idmap = alloc_page();
>> +		pgtable = mmu_idmap;
>> +	}
>> +
>> +	mmu_set_range_ptes(pgtable, paddr_aligned, paddr_aligned,
>> +			   paddr_end, prot);
>> +
>> +	return (void __iomem *)(unsigned long)phys_addr;
>> +}
>> +
>>  phys_addr_t __virt_to_phys(unsigned long addr)
>>  {
>>  	if (mmu_enabled()) {
>> diff --git a/lib/arm64/asm/io.h b/lib/arm64/asm/io.h
>> index e0a03b250d5b..be19f471c0fa 100644
>> --- a/lib/arm64/asm/io.h
>> +++ b/lib/arm64/asm/io.h
>> @@ -71,6 +71,12 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
>>  	return val;
>>  }
>>  
>> +#define ioremap ioremap
>> +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
>> +{
>> +	return __ioremap(phys_addr, size);
>> +}
>> +
>>  #define virt_to_phys virt_to_phys
>>  static inline phys_addr_t virt_to_phys(const volatile void *x)
>>  {
>> diff --git a/lib/arm64/asm/mmu.h b/lib/arm64/asm/mmu.h
>> index 72d75eafc882..72371b2d9fe3 100644
>> --- a/lib/arm64/asm/mmu.h
>> +++ b/lib/arm64/asm/mmu.h
>> @@ -8,6 +8,7 @@
>>  #include <asm/barrier.h>
>>  
>>  #define PMD_SECT_UNCACHED	PMD_ATTRINDX(MT_DEVICE_nGnRE)
>> +#define PTE_UNCACHED		PTE_ATTRINDX(MT_DEVICE_nGnRE)
>>  #define PTE_WBWA		PTE_ATTRINDX(MT_NORMAL)
>>  
>>  static inline void flush_tlb_all(void)
>> diff --git a/lib/arm64/asm/page.h b/lib/arm64/asm/page.h
>> index ae4484b22114..d0fac6ea563d 100644
>> --- a/lib/arm64/asm/page.h
>> +++ b/lib/arm64/asm/page.h
>> @@ -72,5 +72,7 @@ typedef struct { pteval_t pgprot; } pgprot_t;
>>  extern phys_addr_t __virt_to_phys(unsigned long addr);
>>  extern unsigned long __phys_to_virt(phys_addr_t addr);
>>  
>> +extern void *__ioremap(phys_addr_t phys_addr, size_t size);
>> +
>>  #endif /* !__ASSEMBLY__ */
>>  #endif /* _ASMARM64_PAGE_H_ */
Andrew Jones May 13, 2021, 5:18 p.m. UTC | #3
On Thu, May 13, 2021 at 04:48:16PM +0100, Alexandru Elisei wrote:
> Hi Drew,
> 
> On 5/10/21 4:45 PM, Alexandru Elisei wrote:
> > Hi Drew,
> >
> > On 4/29/21 5:41 PM, Andrew Jones wrote:
> >> By providing a proper ioremap function, we can just rely on devices
> >> calling it for each region they need (as they already do) instead of
> >> mapping a big assumed I/O range. We don't require the MMU to be
> >> enabled at the time of the ioremap. In that case, we add the mapping
> >> to the identity map anyway. This allows us to call setup_vm after
> >> io_init. Why don't we just call setup_vm before io_init, I hear you
> >> ask? Well, that's because tests like sieve want to start with the MMU
> >> off, later call setup_vm, and all the while have working I/O. Some
> >> unit tests are just really demanding...
> >>
> >> While at it, ensure we map the I/O regions with XN (execute never),
> >> as suggested by Alexandru Elisei.
> > I got to thinking why this wasn't an issue before. Under KVM, device memory is not
> > usually mapped at stage 2, so any speculated reads wouldn't have reached memory at
> > all. The only way I imagine that happening if the user was running kvm-unit-tests
> > with a passthrough PCI device, which I don't think happens too often.
> >
> > But we cannot rely on devices not being mapped at stage 2 when running under EFI
> > (we're mapping them ourselves with ioremap), so I believe this is a good fix.
> >
> > Had another look at the patch, looks good to me:
> 
> While testing the series I discovered that this patch introduces a bug when
> running under kvmtool.
> 
> Here's the splat:
> 
> $ ./configure --vmm=kvmtool --earlycon=uart,mmio,0x1000000 --page-size=4K && make
> clean && make -j6 && ./vm run -c2 -m128 -f arm/micro-bench.flat
> [..]
>   # lkvm run --firmware arm/micro-bench.flat -m 128 -c 2 --name guest-6986
>   Info: Placing fdt at 0x80200000 - 0x80210000
> chr_testdev_init: chr-testdev: can't find a virtio-console
> Timer Frequency 24000000 Hz (Output in microseconds)
> 
> name                                    total ns                         avg
> ns            
> --------------------------------------------------------------------------------------------
> hvc                                 168674516.0                        
> 2573.0             
> Load address: 80000000
> PC: 80000128 PC offset: 128
> Unhandled exception ec=0x25 (DABT_EL1)
> Vector: 4 (el1h_sync)
> ESR_EL1:         96000006, ec=0x25 (DABT_EL1)
> FAR_EL1: 000000000a000008 (valid)
> Exception frame registers:
> pc : [<0000000080000128>] lr : [<000000008000cac8>] pstate: 800003c5
> sp : 000000008003ff90
> x29: 0000000000000000 x28: 0000000000000000
> x27: 00000011ada4d0c2 x26: 0000000000000000
> x25: 0000000080015978 x24: 0000000080015a90
> x23: 0000048c27394fff x22: 20c49ba5e353f7cf
> x21: 28f5c28f5c28f5c3 x20: 0000000080016af0
> x19: 000000e8d4a51000 x18: 0000000080040000
> x17: 0000000000000000 x16: 0000000080008210
> x15: 000000008003fe5c x14: 0000000000000260
> x13: 00000000000002a4 x12: 0000000080040000
> x11: 0000000000000001 x10: 0000000000000060
> x9 : 0000000000000000 x8 : 0000000000000039
> x7 : 0000000000000040 x6 : 0000000080013983
> x5 : 000000008003f74e x4 : 000000008003f69c
> x3 : 0000000000000000 x2 : 0000000000000000
> x1 : 0000000000000000 x0 : 000000000a000008
> 
> The issue is caused by the mmio_read_user_exec() function from arm/micro-bench.c.
> kvmtool doesn't have a chr-testdev device, and because probing fails, the address
> 0x0a000008 isn't ioremap'ed. The 0-1G memory region is not automatically mapped
> anymore, and the access triggers a data abort at stage 1.
> 
> I fixed the splat with this change:
> 
> diff --git a/arm/micro-bench.c b/arm/micro-bench.c
> index 95c418c10eb4..ad9e44d71d8d 100644
> --- a/arm/micro-bench.c
> +++ b/arm/micro-bench.c
> @@ -281,7 +281,7 @@ static void mmio_read_user_exec(void)
>          * updated in the future if any relevant changes in QEMU
>          * test-dev are made.
>          */
> -       void *userspace_emulated_addr = (void*)0x0a000008;
> +       void *userspace_emulated_addr = (void*)ioremap(0x0a000008, 8);
>  
>         readl(userspace_emulated_addr);
>  }
> 
> kvmtool ignores the MMIO exit reason if no device owns the IPA, that's why it also
> works on kvmtool.
> 
> The micro-bench test with the diff passes under qemu and kvmtool, tested with 4K,
> 16K and 64K pages on an odroid-c4.
>

Thanks Alex,

I think a better fix is this untested one below, though. If you can test
it out and confirm it also resolves the issue, then I'll add this patch
to the series.

Thanks,
drew


diff --git a/arm/micro-bench.c b/arm/micro-bench.c
index 95c418c10eb4..deafd5695c33 100644
--- a/arm/micro-bench.c
+++ b/arm/micro-bench.c
@@ -273,16 +273,22 @@ static void hvc_exec(void)
        asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0");
 }
 
-static void mmio_read_user_exec(void)
+/*
+ * FIXME: Read device-id in virtio mmio here in order to
+ * force an exit to userspace. This address needs to be
+ * updated in the future if any relevant changes in QEMU
+ * test-dev are made.
+ */
+static void *userspace_emulated_addr;
+
+static bool mmio_read_user_prep(void)
 {
-       /*
-        * FIXME: Read device-id in virtio mmio here in order to
-        * force an exit to userspace. This address needs to be
-        * updated in the future if any relevant changes in QEMU
-        * test-dev are made.
-        */
-       void *userspace_emulated_addr = (void*)0x0a000008;
+       userspace_emulated_addr = (void*)ioremap(0x0a000008, 8);
+       return true;
+}
 
+static void mmio_read_user_exec(void)
+{
        readl(userspace_emulated_addr);
 }
 
@@ -309,14 +315,14 @@ struct exit_test {
 };
 
 static struct exit_test tests[] = {
-       {"hvc",                 NULL,           hvc_exec,               NULL,           65536,          true},
-       {"mmio_read_user",      NULL,           mmio_read_user_exec,    NULL,           65536,          true},
-       {"mmio_read_vgic",      NULL,           mmio_read_vgic_exec,    NULL,           65536,          true},
-       {"eoi",                 NULL,           eoi_exec,               NULL,           65536,          true},
-       {"ipi",                 ipi_prep,       ipi_exec,               NULL,           65536,          true},
-       {"ipi_hw",              ipi_hw_prep,    ipi_exec,               NULL,           65536,          true},
-       {"lpi",                 lpi_prep,       lpi_exec,               NULL,           65536,          true},
-       {"timer_10ms",          timer_prep,     timer_exec,             timer_post,     256,            true},
+       {"hvc",                 NULL,                   hvc_exec,               NULL,           65536,          true},
+       {"mmio_read_user",      mmio_read_user_prep,    mmio_read_user_exec,    NULL,           65536,          true},
+       {"mmio_read_vgic",      NULL,                   mmio_read_vgic_exec,    NULL,           65536,          true},
+       {"eoi",                 NULL,                   eoi_exec,               NULL,           65536,          true},
+       {"ipi",                 ipi_prep,               ipi_exec,               NULL,           65536,          true},
+       {"ipi_hw",              ipi_hw_prep,            ipi_exec,               NULL,           65536,          true},
+       {"lpi",                 lpi_prep,               lpi_exec,               NULL,           65536,          true},
+       {"timer_10ms",          timer_prep,             timer_exec,             timer_post,     256,            true},
 };
 
 struct ns_time {
Andrew Jones May 13, 2021, 5:43 p.m. UTC | #4
On Thu, May 13, 2021 at 07:18:44PM +0200, Andrew Jones wrote:
> On Thu, May 13, 2021 at 04:48:16PM +0100, Alexandru Elisei wrote:
> > Hi Drew,
> > 
> > On 5/10/21 4:45 PM, Alexandru Elisei wrote:
> > > Hi Drew,
> > >
> > > On 4/29/21 5:41 PM, Andrew Jones wrote:
> > >> By providing a proper ioremap function, we can just rely on devices
> > >> calling it for each region they need (as they already do) instead of
> > >> mapping a big assumed I/O range. We don't require the MMU to be
> > >> enabled at the time of the ioremap. In that case, we add the mapping
> > >> to the identity map anyway. This allows us to call setup_vm after
> > >> io_init. Why don't we just call setup_vm before io_init, I hear you
> > >> ask? Well, that's because tests like sieve want to start with the MMU
> > >> off, later call setup_vm, and all the while have working I/O. Some
> > >> unit tests are just really demanding...
> > >>
> > >> While at it, ensure we map the I/O regions with XN (execute never),
> > >> as suggested by Alexandru Elisei.
> > > I got to thinking why this wasn't an issue before. Under KVM, device memory is not
> > > usually mapped at stage 2, so any speculated reads wouldn't have reached memory at
> > > all. The only way I imagine that happening if the user was running kvm-unit-tests
> > > with a passthrough PCI device, which I don't think happens too often.
> > >
> > > But we cannot rely on devices not being mapped at stage 2 when running under EFI
> > > (we're mapping them ourselves with ioremap), so I believe this is a good fix.
> > >
> > > Had another look at the patch, looks good to me:
> > 
> > While testing the series I discovered that this patch introduces a bug when
> > running under kvmtool.
> > 
> > Here's the splat:
> > 
> > $ ./configure --vmm=kvmtool --earlycon=uart,mmio,0x1000000 --page-size=4K && make
> > clean && make -j6 && ./vm run -c2 -m128 -f arm/micro-bench.flat
> > [..]
> >   # lkvm run --firmware arm/micro-bench.flat -m 128 -c 2 --name guest-6986
> >   Info: Placing fdt at 0x80200000 - 0x80210000
> > chr_testdev_init: chr-testdev: can't find a virtio-console
> > Timer Frequency 24000000 Hz (Output in microseconds)
> > 
> > name                                    total ns                         avg
> > ns            
> > --------------------------------------------------------------------------------------------
> > hvc                                 168674516.0                        
> > 2573.0             
> > Load address: 80000000
> > PC: 80000128 PC offset: 128
> > Unhandled exception ec=0x25 (DABT_EL1)
> > Vector: 4 (el1h_sync)
> > ESR_EL1:         96000006, ec=0x25 (DABT_EL1)
> > FAR_EL1: 000000000a000008 (valid)
> > Exception frame registers:
> > pc : [<0000000080000128>] lr : [<000000008000cac8>] pstate: 800003c5
> > sp : 000000008003ff90
> > x29: 0000000000000000 x28: 0000000000000000
> > x27: 00000011ada4d0c2 x26: 0000000000000000
> > x25: 0000000080015978 x24: 0000000080015a90
> > x23: 0000048c27394fff x22: 20c49ba5e353f7cf
> > x21: 28f5c28f5c28f5c3 x20: 0000000080016af0
> > x19: 000000e8d4a51000 x18: 0000000080040000
> > x17: 0000000000000000 x16: 0000000080008210
> > x15: 000000008003fe5c x14: 0000000000000260
> > x13: 00000000000002a4 x12: 0000000080040000
> > x11: 0000000000000001 x10: 0000000000000060
> > x9 : 0000000000000000 x8 : 0000000000000039
> > x7 : 0000000000000040 x6 : 0000000080013983
> > x5 : 000000008003f74e x4 : 000000008003f69c
> > x3 : 0000000000000000 x2 : 0000000000000000
> > x1 : 0000000000000000 x0 : 000000000a000008
> > 
> > The issue is caused by the mmio_read_user_exec() function from arm/micro-bench.c.
> > kvmtool doesn't have a chr-testdev device, and because probing fails, the address
> > 0x0a000008 isn't ioremap'ed. The 0-1G memory region is not automatically mapped
> > anymore, and the access triggers a data abort at stage 1.
> > 
> > I fixed the splat with this change:
> > 
> > diff --git a/arm/micro-bench.c b/arm/micro-bench.c
> > index 95c418c10eb4..ad9e44d71d8d 100644
> > --- a/arm/micro-bench.c
> > +++ b/arm/micro-bench.c
> > @@ -281,7 +281,7 @@ static void mmio_read_user_exec(void)
> >          * updated in the future if any relevant changes in QEMU
> >          * test-dev are made.
> >          */
> > -       void *userspace_emulated_addr = (void*)0x0a000008;
> > +       void *userspace_emulated_addr = (void*)ioremap(0x0a000008, 8);
> >  
> >         readl(userspace_emulated_addr);
> >  }
> > 
> > kvmtool ignores the MMIO exit reason if no device owns the IPA, that's why it also
> > works on kvmtool.
> > 
> > The micro-bench test with the diff passes under qemu and kvmtool, tested with 4K,
> > 16K and 64K pages on an odroid-c4.
> >
> 
> Thanks Alex,
> 
> I think a better fix is this untested one below, though. If you can test
> it out and confirm it also resolves the issue, then I'll add this patch
> to the series.
> 
> Thanks,
> drew
> 
> 
> diff --git a/arm/micro-bench.c b/arm/micro-bench.c
> index 95c418c10eb4..deafd5695c33 100644
> --- a/arm/micro-bench.c
> +++ b/arm/micro-bench.c
> @@ -273,16 +273,22 @@ static void hvc_exec(void)
>         asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0");
>  }
>  
> -static void mmio_read_user_exec(void)
> +/*
> + * FIXME: Read device-id in virtio mmio here in order to
> + * force an exit to userspace. This address needs to be
> + * updated in the future if any relevant changes in QEMU
> + * test-dev are made.
> + */
> +static void *userspace_emulated_addr;
> +
> +static bool mmio_read_user_prep(void)
>  {
> -       /*
> -        * FIXME: Read device-id in virtio mmio here in order to
> -        * force an exit to userspace. This address needs to be
> -        * updated in the future if any relevant changes in QEMU
> -        * test-dev are made.
> -        */
> -       void *userspace_emulated_addr = (void*)0x0a000008;
> +       userspace_emulated_addr = (void*)ioremap(0x0a000008, 8);
> +       return true;
> +}
>  
> +static void mmio_read_user_exec(void)
> +{
>         readl(userspace_emulated_addr);
>  }
>  
> @@ -309,14 +315,14 @@ struct exit_test {
>  };
>  
>  static struct exit_test tests[] = {
> -       {"hvc",                 NULL,           hvc_exec,               NULL,           65536,          true},
> -       {"mmio_read_user",      NULL,           mmio_read_user_exec,    NULL,           65536,          true},
> -       {"mmio_read_vgic",      NULL,           mmio_read_vgic_exec,    NULL,           65536,          true},
> -       {"eoi",                 NULL,           eoi_exec,               NULL,           65536,          true},
> -       {"ipi",                 ipi_prep,       ipi_exec,               NULL,           65536,          true},
> -       {"ipi_hw",              ipi_hw_prep,    ipi_exec,               NULL,           65536,          true},
> -       {"lpi",                 lpi_prep,       lpi_exec,               NULL,           65536,          true},
> -       {"timer_10ms",          timer_prep,     timer_exec,             timer_post,     256,            true},
> +       {"hvc",                 NULL,                   hvc_exec,               NULL,           65536,          true},
> +       {"mmio_read_user",      mmio_read_user_prep,    mmio_read_user_exec,    NULL,           65536,          true},
> +       {"mmio_read_vgic",      NULL,                   mmio_read_vgic_exec,    NULL,           65536,          true},
> +       {"eoi",                 NULL,                   eoi_exec,               NULL,           65536,          true},
> +       {"ipi",                 ipi_prep,               ipi_exec,               NULL,           65536,          true},
> +       {"ipi_hw",              ipi_hw_prep,            ipi_exec,               NULL,           65536,          true},
> +       {"lpi",                 lpi_prep,               lpi_exec,               NULL,           65536,          true},
> +       {"timer_10ms",          timer_prep,             timer_exec,             timer_post,     256,            true},
>  };
>  
>  struct ns_time {
>

I still haven't tested it (beyond compiling), but I've tweaked this a bit.
You can see it here

https://gitlab.com/rhdrjones/kvm-unit-tests/-/commit/71938030d160e021db3388037d0d407df17e8e5e

The whole v4 of this series is here

https://gitlab.com/rhdrjones/kvm-unit-tests/-/commits/efiprep

Thanks,
drew
Alexandru Elisei May 17, 2021, 10:38 a.m. UTC | #5
Hi Drew,

On 5/13/21 6:43 PM, Andrew Jones wrote:
> On Thu, May 13, 2021 at 07:18:44PM +0200, Andrew Jones wrote:
>> [..]
>> Thanks Alex,
>>
>> I think a better fix is this untested one below, though. If you can test
>> it out and confirm it also resolves the issue, then I'll add this patch
>> to the series.
>>
>> Thanks,
>> drew
>>
>>
>> diff --git a/arm/micro-bench.c b/arm/micro-bench.c
>> index 95c418c10eb4..deafd5695c33 100644
>> --- a/arm/micro-bench.c
>> +++ b/arm/micro-bench.c
>> @@ -273,16 +273,22 @@ static void hvc_exec(void)
>>         asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0");
>>  }
>>  
>> -static void mmio_read_user_exec(void)
>> +/*
>> + * FIXME: Read device-id in virtio mmio here in order to
>> + * force an exit to userspace. This address needs to be
>> + * updated in the future if any relevant changes in QEMU
>> + * test-dev are made.
>> + */
>> +static void *userspace_emulated_addr;
>> +
>> +static bool mmio_read_user_prep(void)
>>  {
>> -       /*
>> -        * FIXME: Read device-id in virtio mmio here in order to
>> -        * force an exit to userspace. This address needs to be
>> -        * updated in the future if any relevant changes in QEMU
>> -        * test-dev are made.
>> -        */
>> -       void *userspace_emulated_addr = (void*)0x0a000008;
>> +       userspace_emulated_addr = (void*)ioremap(0x0a000008, 8);
>> +       return true;
>> +}
>>  
>> +static void mmio_read_user_exec(void)
>> +{
>>         readl(userspace_emulated_addr);
>>  }
>>  
>> @@ -309,14 +315,14 @@ struct exit_test {
>>  };
>>  
>>  static struct exit_test tests[] = {
>> -       {"hvc",                 NULL,           hvc_exec,               NULL,           65536,          true},
>> -       {"mmio_read_user",      NULL,           mmio_read_user_exec,    NULL,           65536,          true},
>> -       {"mmio_read_vgic",      NULL,           mmio_read_vgic_exec,    NULL,           65536,          true},
>> -       {"eoi",                 NULL,           eoi_exec,               NULL,           65536,          true},
>> -       {"ipi",                 ipi_prep,       ipi_exec,               NULL,           65536,          true},
>> -       {"ipi_hw",              ipi_hw_prep,    ipi_exec,               NULL,           65536,          true},
>> -       {"lpi",                 lpi_prep,       lpi_exec,               NULL,           65536,          true},
>> -       {"timer_10ms",          timer_prep,     timer_exec,             timer_post,     256,            true},
>> +       {"hvc",                 NULL,                   hvc_exec,               NULL,           65536,          true},
>> +       {"mmio_read_user",      mmio_read_user_prep,    mmio_read_user_exec,    NULL,           65536,          true},
>> +       {"mmio_read_vgic",      NULL,                   mmio_read_vgic_exec,    NULL,           65536,          true},
>> +       {"eoi",                 NULL,                   eoi_exec,               NULL,           65536,          true},
>> +       {"ipi",                 ipi_prep,               ipi_exec,               NULL,           65536,          true},
>> +       {"ipi_hw",              ipi_hw_prep,            ipi_exec,               NULL,           65536,          true},
>> +       {"lpi",                 lpi_prep,               lpi_exec,               NULL,           65536,          true},
>> +       {"timer_10ms",          timer_prep,             timer_exec,             timer_post,     256,            true},
>>  };
>>  
>>  struct ns_time {
>>
> I still haven't tested it (beyond compiling), but I've tweaked this a bit.
> You can see it here
>
> https://gitlab.com/rhdrjones/kvm-unit-tests/-/commit/71938030d160e021db3388037d0d407df17e8e5e
>
> The whole v4 of this series is here
>
> https://gitlab.com/rhdrjones/kvm-unit-tests/-/commits/efiprep

Had a look at the patch, looks good; in my suggestion I wrongly thought that readl
reads a long (64 bits), not an uint32_t value:

Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>

I also ran some tests on the v4 series from your repo.

Qemu TCG on x86 machine:
    - arm compiled with arm-linux-gnu-gcc and arm-none-eabi-gcc
    - arm64, 4k and 64k pages.

Odroid-c4:
    - arm, both compilers, under kvmtool
    - arm64, 4k, 16k and 64k pages under qemu KVM and kvmtool

Rockpro64:
    - arm, both compilers, under kvmtool
    - arm64, 4k and 64k pages, under qemu KVM and kvmtool.

The ITS migration tests I had to run manually on the rockpro64 (Odroid has a
gicv2) because it looks like the run script wasn't detecting the prompt to start
migration. I'm guessing something on my side, because I had issues with the
migration tests before. Nonetheless, those tests ran just fine manually under qemu
and kvmtool, so everything looks correct to me:

Tested-by: Alexandru Elisei <alexandru.elisei@arm.com>

Thanks,

Alex
Andrew Jones May 17, 2021, 2:40 p.m. UTC | #6
On Mon, May 17, 2021 at 11:38:46AM +0100, Alexandru Elisei wrote:
> Hi Drew,
> 
> On 5/13/21 6:43 PM, Andrew Jones wrote:
> > On Thu, May 13, 2021 at 07:18:44PM +0200, Andrew Jones wrote:
> >> [..]
> >> Thanks Alex,
> >>
> >> I think a better fix is this untested one below, though. If you can test
> >> it out and confirm it also resolves the issue, then I'll add this patch
> >> to the series.
> >>
> >> Thanks,
> >> drew
> >>
> >>
> >> diff --git a/arm/micro-bench.c b/arm/micro-bench.c
> >> index 95c418c10eb4..deafd5695c33 100644
> >> --- a/arm/micro-bench.c
> >> +++ b/arm/micro-bench.c
> >> @@ -273,16 +273,22 @@ static void hvc_exec(void)
> >>         asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0");
> >>  }
> >>  
> >> -static void mmio_read_user_exec(void)
> >> +/*
> >> + * FIXME: Read device-id in virtio mmio here in order to
> >> + * force an exit to userspace. This address needs to be
> >> + * updated in the future if any relevant changes in QEMU
> >> + * test-dev are made.
> >> + */
> >> +static void *userspace_emulated_addr;
> >> +
> >> +static bool mmio_read_user_prep(void)
> >>  {
> >> -       /*
> >> -        * FIXME: Read device-id in virtio mmio here in order to
> >> -        * force an exit to userspace. This address needs to be
> >> -        * updated in the future if any relevant changes in QEMU
> >> -        * test-dev are made.
> >> -        */
> >> -       void *userspace_emulated_addr = (void*)0x0a000008;
> >> +       userspace_emulated_addr = (void*)ioremap(0x0a000008, 8);
> >> +       return true;
> >> +}
> >>  
> >> +static void mmio_read_user_exec(void)
> >> +{
> >>         readl(userspace_emulated_addr);
> >>  }
> >>  
> >> @@ -309,14 +315,14 @@ struct exit_test {
> >>  };
> >>  
> >>  static struct exit_test tests[] = {
> >> -       {"hvc",                 NULL,           hvc_exec,               NULL,           65536,          true},
> >> -       {"mmio_read_user",      NULL,           mmio_read_user_exec,    NULL,           65536,          true},
> >> -       {"mmio_read_vgic",      NULL,           mmio_read_vgic_exec,    NULL,           65536,          true},
> >> -       {"eoi",                 NULL,           eoi_exec,               NULL,           65536,          true},
> >> -       {"ipi",                 ipi_prep,       ipi_exec,               NULL,           65536,          true},
> >> -       {"ipi_hw",              ipi_hw_prep,    ipi_exec,               NULL,           65536,          true},
> >> -       {"lpi",                 lpi_prep,       lpi_exec,               NULL,           65536,          true},
> >> -       {"timer_10ms",          timer_prep,     timer_exec,             timer_post,     256,            true},
> >> +       {"hvc",                 NULL,                   hvc_exec,               NULL,           65536,          true},
> >> +       {"mmio_read_user",      mmio_read_user_prep,    mmio_read_user_exec,    NULL,           65536,          true},
> >> +       {"mmio_read_vgic",      NULL,                   mmio_read_vgic_exec,    NULL,           65536,          true},
> >> +       {"eoi",                 NULL,                   eoi_exec,               NULL,           65536,          true},
> >> +       {"ipi",                 ipi_prep,               ipi_exec,               NULL,           65536,          true},
> >> +       {"ipi_hw",              ipi_hw_prep,            ipi_exec,               NULL,           65536,          true},
> >> +       {"lpi",                 lpi_prep,               lpi_exec,               NULL,           65536,          true},
> >> +       {"timer_10ms",          timer_prep,             timer_exec,             timer_post,     256,            true},
> >>  };
> >>  
> >>  struct ns_time {
> >>
> > I still haven't tested it (beyond compiling), but I've tweaked this a bit.
> > You can see it here
> >
> > https://gitlab.com/rhdrjones/kvm-unit-tests/-/commit/71938030d160e021db3388037d0d407df17e8e5e
> >
> > The whole v4 of this series is here
> >
> > https://gitlab.com/rhdrjones/kvm-unit-tests/-/commits/efiprep
> 
> Had a look at the patch, looks good; in my suggestion I wrongly thought that readl
> reads a long (64 bits), not an uint32_t value:
> 
> Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
> 
> I also ran some tests on the v4 series from your repo.
> 
> Qemu TCG on x86 machine:
>     - arm compiled with arm-linux-gnu-gcc and arm-none-eabi-gcc
>     - arm64, 4k and 64k pages.
> 
> Odroid-c4:
>     - arm, both compilers, under kvmtool
>     - arm64, 4k, 16k and 64k pages under qemu KVM and kvmtool
> 
> Rockpro64:
>     - arm, both compilers, under kvmtool
>     - arm64, 4k and 64k pages, under qemu KVM and kvmtool.
> 
> The ITS migration tests I had to run manually on the rockpro64 (Odroid has a
> gicv2) because it looks like the run script wasn't detecting the prompt to start
> migration. I'm guessing something on my side, because I had issues with the
> migration tests before. Nonetheless, those tests ran just fine manually under qemu
> and kvmtool, so everything looks correct to me:
> 
> Tested-by: Alexandru Elisei <alexandru.elisei@arm.com>
>

Thanks Alex! I've added your tags, applied to arm/queue and sent the pull
request.

Thanks,
drew
diff mbox series

Patch

diff --git a/lib/arm/asm/io.h b/lib/arm/asm/io.h
index ba3b0b2412ad..e4caa6ff5d1e 100644
--- a/lib/arm/asm/io.h
+++ b/lib/arm/asm/io.h
@@ -77,6 +77,12 @@  static inline void __raw_writel(u32 val, volatile void __iomem *addr)
 		     : "r" (val));
 }
 
+#define ioremap ioremap
+static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
+{
+	return __ioremap(phys_addr, size);
+}
+
 #define virt_to_phys virt_to_phys
 static inline phys_addr_t virt_to_phys(const volatile void *x)
 {
diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h
index 122874b8aebe..94e70f0a84bf 100644
--- a/lib/arm/asm/mmu.h
+++ b/lib/arm/asm/mmu.h
@@ -8,10 +8,13 @@ 
 #include <asm/barrier.h>
 
 #define PTE_USER		L_PTE_USER
+#define PTE_UXN			L_PTE_XN
+#define PTE_PXN			L_PTE_PXN
 #define PTE_RDONLY		PTE_AP2
 #define PTE_SHARED		L_PTE_SHARED
 #define PTE_AF			PTE_EXT_AF
 #define PTE_WBWA		L_PTE_MT_WRITEALLOC
+#define PTE_UNCACHED		L_PTE_MT_UNCACHED
 
 /* See B3.18.7 TLB maintenance operations */
 
diff --git a/lib/arm/asm/page.h b/lib/arm/asm/page.h
index 1fb5cd26ac66..8eb4a883808e 100644
--- a/lib/arm/asm/page.h
+++ b/lib/arm/asm/page.h
@@ -47,5 +47,7 @@  typedef struct { pteval_t pgprot; } pgprot_t;
 extern phys_addr_t __virt_to_phys(unsigned long addr);
 extern unsigned long __phys_to_virt(phys_addr_t addr);
 
+extern void *__ioremap(phys_addr_t phys_addr, size_t size);
+
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASMARM_PAGE_H_ */
diff --git a/lib/arm/asm/pgtable-hwdef.h b/lib/arm/asm/pgtable-hwdef.h
index fe1d8540ea3f..90fd306c7cc0 100644
--- a/lib/arm/asm/pgtable-hwdef.h
+++ b/lib/arm/asm/pgtable-hwdef.h
@@ -34,6 +34,7 @@ 
 #define L_PTE_USER		(_AT(pteval_t, 1) << 6)		/* AP[1] */
 #define L_PTE_SHARED		(_AT(pteval_t, 3) << 8)		/* SH[1:0], inner shareable */
 #define L_PTE_YOUNG		(_AT(pteval_t, 1) << 10)	/* AF */
+#define L_PTE_PXN		(_AT(pteval_t, 1) << 53)	/* PXN */
 #define L_PTE_XN		(_AT(pteval_t, 1) << 54)	/* XN */
 
 /*
diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
index 15eef007f256..791b1f88f946 100644
--- a/lib/arm/mmu.c
+++ b/lib/arm/mmu.c
@@ -11,6 +11,7 @@ 
 #include <asm/mmu.h>
 #include <asm/setup.h>
 #include <asm/page.h>
+#include <asm/io.h>
 
 #include "alloc_page.h"
 #include "vmalloc.h"
@@ -157,9 +158,8 @@  void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
 void *setup_mmu(phys_addr_t phys_end)
 {
 	uintptr_t code_end = (uintptr_t)&etext;
-	struct mem_region *r;
 
-	/* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */
+	/* 3G-4G region is reserved for vmalloc, cap phys_end at 3G */
 	if (phys_end > (3ul << 30))
 		phys_end = 3ul << 30;
 
@@ -170,14 +170,8 @@  void *setup_mmu(phys_addr_t phys_end)
 			"Unsupported translation granule %ld\n", PAGE_SIZE);
 #endif
 
-	mmu_idmap = alloc_page();
-
-	for (r = mem_regions; r->end; ++r) {
-		if (!(r->flags & MR_F_IO))
-			continue;
-		mmu_set_range_sect(mmu_idmap, r->start, r->start, r->end,
-				   __pgprot(PMD_SECT_UNCACHED | PMD_SECT_USER));
-	}
+	if (!mmu_idmap)
+		mmu_idmap = alloc_page();
 
 	/* armv8 requires code shared between EL1 and EL0 to be read-only */
 	mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET,
@@ -192,6 +186,29 @@  void *setup_mmu(phys_addr_t phys_end)
 	return mmu_idmap;
 }
 
+void __iomem *__ioremap(phys_addr_t phys_addr, size_t size)
+{
+	phys_addr_t paddr_aligned = phys_addr & PAGE_MASK;
+	phys_addr_t paddr_end = PAGE_ALIGN(phys_addr + size);
+	pgprot_t prot = __pgprot(PTE_UNCACHED | PTE_USER | PTE_UXN | PTE_PXN);
+	pgd_t *pgtable;
+
+	assert(sizeof(long) == 8 || !(phys_addr >> 32));
+
+	if (mmu_enabled()) {
+		pgtable = current_thread_info()->pgtable;
+	} else {
+		if (!mmu_idmap)
+			mmu_idmap = alloc_page();
+		pgtable = mmu_idmap;
+	}
+
+	mmu_set_range_ptes(pgtable, paddr_aligned, paddr_aligned,
+			   paddr_end, prot);
+
+	return (void __iomem *)(unsigned long)phys_addr;
+}
+
 phys_addr_t __virt_to_phys(unsigned long addr)
 {
 	if (mmu_enabled()) {
diff --git a/lib/arm64/asm/io.h b/lib/arm64/asm/io.h
index e0a03b250d5b..be19f471c0fa 100644
--- a/lib/arm64/asm/io.h
+++ b/lib/arm64/asm/io.h
@@ -71,6 +71,12 @@  static inline u64 __raw_readq(const volatile void __iomem *addr)
 	return val;
 }
 
+#define ioremap ioremap
+static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
+{
+	return __ioremap(phys_addr, size);
+}
+
 #define virt_to_phys virt_to_phys
 static inline phys_addr_t virt_to_phys(const volatile void *x)
 {
diff --git a/lib/arm64/asm/mmu.h b/lib/arm64/asm/mmu.h
index 72d75eafc882..72371b2d9fe3 100644
--- a/lib/arm64/asm/mmu.h
+++ b/lib/arm64/asm/mmu.h
@@ -8,6 +8,7 @@ 
 #include <asm/barrier.h>
 
 #define PMD_SECT_UNCACHED	PMD_ATTRINDX(MT_DEVICE_nGnRE)
+#define PTE_UNCACHED		PTE_ATTRINDX(MT_DEVICE_nGnRE)
 #define PTE_WBWA		PTE_ATTRINDX(MT_NORMAL)
 
 static inline void flush_tlb_all(void)
diff --git a/lib/arm64/asm/page.h b/lib/arm64/asm/page.h
index ae4484b22114..d0fac6ea563d 100644
--- a/lib/arm64/asm/page.h
+++ b/lib/arm64/asm/page.h
@@ -72,5 +72,7 @@  typedef struct { pteval_t pgprot; } pgprot_t;
 extern phys_addr_t __virt_to_phys(unsigned long addr);
 extern unsigned long __phys_to_virt(phys_addr_t addr);
 
+extern void *__ioremap(phys_addr_t phys_addr, size_t size);
+
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASMARM64_PAGE_H_ */