Message ID | 20210429164130.405198-5-drjones@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm/arm64: Prepare for target-efi | expand |
Hi Drew, On 4/29/21 5:41 PM, Andrew Jones wrote: > By providing a proper ioremap function, we can just rely on devices > calling it for each region they need (as they already do) instead of > mapping a big assumed I/O range. We don't require the MMU to be > enabled at the time of the ioremap. In that case, we add the mapping > to the identity map anyway. This allows us to call setup_vm after > io_init. Why don't we just call setup_vm before io_init, I hear you > ask? Well, that's because tests like sieve want to start with the MMU > off, later call setup_vm, and all the while have working I/O. Some > unit tests are just really demanding... > > While at it, ensure we map the I/O regions with XN (execute never), > as suggested by Alexandru Elisei. I got to thinking why this wasn't an issue before. Under KVM, device memory is not usually mapped at stage 2, so any speculated reads wouldn't have reached memory at all. The only way I imagine that happening if the user was running kvm-unit-tests with a passthrough PCI device, which I don't think happens too often. But we cannot rely on devices not being mapped at stage 2 when running under EFI (we're mapping them ourselves with ioremap), so I believe this is a good fix. Had another look at the patch, looks good to me: Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com> Thanks, Alex > > Signed-off-by: Andrew Jones <drjones@redhat.com> > --- > lib/arm/asm/io.h | 6 ++++++ > lib/arm/asm/mmu.h | 3 +++ > lib/arm/asm/page.h | 2 ++ > lib/arm/asm/pgtable-hwdef.h | 1 + > lib/arm/mmu.c | 37 +++++++++++++++++++++++++++---------- > lib/arm64/asm/io.h | 6 ++++++ > lib/arm64/asm/mmu.h | 1 + > lib/arm64/asm/page.h | 2 ++ > 8 files changed, 48 insertions(+), 10 deletions(-) > > diff --git a/lib/arm/asm/io.h b/lib/arm/asm/io.h > index ba3b0b2412ad..e4caa6ff5d1e 100644 > --- a/lib/arm/asm/io.h > +++ b/lib/arm/asm/io.h > @@ -77,6 +77,12 @@ static inline void __raw_writel(u32 val, volatile void __iomem *addr) > : "r" (val)); > } > > +#define ioremap ioremap > +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size) > +{ > + return __ioremap(phys_addr, size); > +} > + > #define virt_to_phys virt_to_phys > static inline phys_addr_t virt_to_phys(const volatile void *x) > { > diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h > index 122874b8aebe..94e70f0a84bf 100644 > --- a/lib/arm/asm/mmu.h > +++ b/lib/arm/asm/mmu.h > @@ -8,10 +8,13 @@ > #include <asm/barrier.h> > > #define PTE_USER L_PTE_USER > +#define PTE_UXN L_PTE_XN > +#define PTE_PXN L_PTE_PXN > #define PTE_RDONLY PTE_AP2 > #define PTE_SHARED L_PTE_SHARED > #define PTE_AF PTE_EXT_AF > #define PTE_WBWA L_PTE_MT_WRITEALLOC > +#define PTE_UNCACHED L_PTE_MT_UNCACHED > > /* See B3.18.7 TLB maintenance operations */ > > diff --git a/lib/arm/asm/page.h b/lib/arm/asm/page.h > index 1fb5cd26ac66..8eb4a883808e 100644 > --- a/lib/arm/asm/page.h > +++ b/lib/arm/asm/page.h > @@ -47,5 +47,7 @@ typedef struct { pteval_t pgprot; } pgprot_t; > extern phys_addr_t __virt_to_phys(unsigned long addr); > extern unsigned long __phys_to_virt(phys_addr_t addr); > > +extern void *__ioremap(phys_addr_t phys_addr, size_t size); > + > #endif /* !__ASSEMBLY__ */ > #endif /* _ASMARM_PAGE_H_ */ > diff --git a/lib/arm/asm/pgtable-hwdef.h b/lib/arm/asm/pgtable-hwdef.h > index fe1d8540ea3f..90fd306c7cc0 100644 > --- a/lib/arm/asm/pgtable-hwdef.h > +++ b/lib/arm/asm/pgtable-hwdef.h > @@ -34,6 +34,7 @@ > #define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ > #define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ > #define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */ > +#define L_PTE_PXN (_AT(pteval_t, 1) << 53) /* PXN */ > #define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */ > > /* > diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c > index 15eef007f256..791b1f88f946 100644 > --- a/lib/arm/mmu.c > +++ b/lib/arm/mmu.c > @@ -11,6 +11,7 @@ > #include <asm/mmu.h> > #include <asm/setup.h> > #include <asm/page.h> > +#include <asm/io.h> > > #include "alloc_page.h" > #include "vmalloc.h" > @@ -157,9 +158,8 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset, > void *setup_mmu(phys_addr_t phys_end) > { > uintptr_t code_end = (uintptr_t)&etext; > - struct mem_region *r; > > - /* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */ > + /* 3G-4G region is reserved for vmalloc, cap phys_end at 3G */ > if (phys_end > (3ul << 30)) > phys_end = 3ul << 30; > > @@ -170,14 +170,8 @@ void *setup_mmu(phys_addr_t phys_end) > "Unsupported translation granule %ld\n", PAGE_SIZE); > #endif > > - mmu_idmap = alloc_page(); > - > - for (r = mem_regions; r->end; ++r) { > - if (!(r->flags & MR_F_IO)) > - continue; > - mmu_set_range_sect(mmu_idmap, r->start, r->start, r->end, > - __pgprot(PMD_SECT_UNCACHED | PMD_SECT_USER)); > - } > + if (!mmu_idmap) > + mmu_idmap = alloc_page(); > > /* armv8 requires code shared between EL1 and EL0 to be read-only */ > mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET, > @@ -192,6 +186,29 @@ void *setup_mmu(phys_addr_t phys_end) > return mmu_idmap; > } > > +void __iomem *__ioremap(phys_addr_t phys_addr, size_t size) > +{ > + phys_addr_t paddr_aligned = phys_addr & PAGE_MASK; > + phys_addr_t paddr_end = PAGE_ALIGN(phys_addr + size); > + pgprot_t prot = __pgprot(PTE_UNCACHED | PTE_USER | PTE_UXN | PTE_PXN); > + pgd_t *pgtable; > + > + assert(sizeof(long) == 8 || !(phys_addr >> 32)); > + > + if (mmu_enabled()) { > + pgtable = current_thread_info()->pgtable; > + } else { > + if (!mmu_idmap) > + mmu_idmap = alloc_page(); > + pgtable = mmu_idmap; > + } > + > + mmu_set_range_ptes(pgtable, paddr_aligned, paddr_aligned, > + paddr_end, prot); > + > + return (void __iomem *)(unsigned long)phys_addr; > +} > + > phys_addr_t __virt_to_phys(unsigned long addr) > { > if (mmu_enabled()) { > diff --git a/lib/arm64/asm/io.h b/lib/arm64/asm/io.h > index e0a03b250d5b..be19f471c0fa 100644 > --- a/lib/arm64/asm/io.h > +++ b/lib/arm64/asm/io.h > @@ -71,6 +71,12 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) > return val; > } > > +#define ioremap ioremap > +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size) > +{ > + return __ioremap(phys_addr, size); > +} > + > #define virt_to_phys virt_to_phys > static inline phys_addr_t virt_to_phys(const volatile void *x) > { > diff --git a/lib/arm64/asm/mmu.h b/lib/arm64/asm/mmu.h > index 72d75eafc882..72371b2d9fe3 100644 > --- a/lib/arm64/asm/mmu.h > +++ b/lib/arm64/asm/mmu.h > @@ -8,6 +8,7 @@ > #include <asm/barrier.h> > > #define PMD_SECT_UNCACHED PMD_ATTRINDX(MT_DEVICE_nGnRE) > +#define PTE_UNCACHED PTE_ATTRINDX(MT_DEVICE_nGnRE) > #define PTE_WBWA PTE_ATTRINDX(MT_NORMAL) > > static inline void flush_tlb_all(void) > diff --git a/lib/arm64/asm/page.h b/lib/arm64/asm/page.h > index ae4484b22114..d0fac6ea563d 100644 > --- a/lib/arm64/asm/page.h > +++ b/lib/arm64/asm/page.h > @@ -72,5 +72,7 @@ typedef struct { pteval_t pgprot; } pgprot_t; > extern phys_addr_t __virt_to_phys(unsigned long addr); > extern unsigned long __phys_to_virt(phys_addr_t addr); > > +extern void *__ioremap(phys_addr_t phys_addr, size_t size); > + > #endif /* !__ASSEMBLY__ */ > #endif /* _ASMARM64_PAGE_H_ */
Hi Drew, On 5/10/21 4:45 PM, Alexandru Elisei wrote: > Hi Drew, > > On 4/29/21 5:41 PM, Andrew Jones wrote: >> By providing a proper ioremap function, we can just rely on devices >> calling it for each region they need (as they already do) instead of >> mapping a big assumed I/O range. We don't require the MMU to be >> enabled at the time of the ioremap. In that case, we add the mapping >> to the identity map anyway. This allows us to call setup_vm after >> io_init. Why don't we just call setup_vm before io_init, I hear you >> ask? Well, that's because tests like sieve want to start with the MMU >> off, later call setup_vm, and all the while have working I/O. Some >> unit tests are just really demanding... >> >> While at it, ensure we map the I/O regions with XN (execute never), >> as suggested by Alexandru Elisei. > I got to thinking why this wasn't an issue before. Under KVM, device memory is not > usually mapped at stage 2, so any speculated reads wouldn't have reached memory at > all. The only way I imagine that happening if the user was running kvm-unit-tests > with a passthrough PCI device, which I don't think happens too often. > > But we cannot rely on devices not being mapped at stage 2 when running under EFI > (we're mapping them ourselves with ioremap), so I believe this is a good fix. > > Had another look at the patch, looks good to me: While testing the series I discovered that this patch introduces a bug when running under kvmtool. Here's the splat: $ ./configure --vmm=kvmtool --earlycon=uart,mmio,0x1000000 --page-size=4K && make clean && make -j6 && ./vm run -c2 -m128 -f arm/micro-bench.flat [..] # lkvm run --firmware arm/micro-bench.flat -m 128 -c 2 --name guest-6986 Info: Placing fdt at 0x80200000 - 0x80210000 chr_testdev_init: chr-testdev: can't find a virtio-console Timer Frequency 24000000 Hz (Output in microseconds) name total ns avg ns -------------------------------------------------------------------------------------------- hvc 168674516.0 2573.0 Load address: 80000000 PC: 80000128 PC offset: 128 Unhandled exception ec=0x25 (DABT_EL1) Vector: 4 (el1h_sync) ESR_EL1: 96000006, ec=0x25 (DABT_EL1) FAR_EL1: 000000000a000008 (valid) Exception frame registers: pc : [<0000000080000128>] lr : [<000000008000cac8>] pstate: 800003c5 sp : 000000008003ff90 x29: 0000000000000000 x28: 0000000000000000 x27: 00000011ada4d0c2 x26: 0000000000000000 x25: 0000000080015978 x24: 0000000080015a90 x23: 0000048c27394fff x22: 20c49ba5e353f7cf x21: 28f5c28f5c28f5c3 x20: 0000000080016af0 x19: 000000e8d4a51000 x18: 0000000080040000 x17: 0000000000000000 x16: 0000000080008210 x15: 000000008003fe5c x14: 0000000000000260 x13: 00000000000002a4 x12: 0000000080040000 x11: 0000000000000001 x10: 0000000000000060 x9 : 0000000000000000 x8 : 0000000000000039 x7 : 0000000000000040 x6 : 0000000080013983 x5 : 000000008003f74e x4 : 000000008003f69c x3 : 0000000000000000 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 000000000a000008 The issue is caused by the mmio_read_user_exec() function from arm/micro-bench.c. kvmtool doesn't have a chr-testdev device, and because probing fails, the address 0x0a000008 isn't ioremap'ed. The 0-1G memory region is not automatically mapped anymore, and the access triggers a data abort at stage 1. I fixed the splat with this change: diff --git a/arm/micro-bench.c b/arm/micro-bench.c index 95c418c10eb4..ad9e44d71d8d 100644 --- a/arm/micro-bench.c +++ b/arm/micro-bench.c @@ -281,7 +281,7 @@ static void mmio_read_user_exec(void) * updated in the future if any relevant changes in QEMU * test-dev are made. */ - void *userspace_emulated_addr = (void*)0x0a000008; + void *userspace_emulated_addr = (void*)ioremap(0x0a000008, 8); readl(userspace_emulated_addr); } kvmtool ignores the MMIO exit reason if no device owns the IPA, that's why it also works on kvmtool. The micro-bench test with the diff passes under qemu and kvmtool, tested with 4K, 16K and 64K pages on an odroid-c4. Thanks, Alex > > Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com> > > Thanks, > > Alex > >> Signed-off-by: Andrew Jones <drjones@redhat.com> >> --- >> lib/arm/asm/io.h | 6 ++++++ >> lib/arm/asm/mmu.h | 3 +++ >> lib/arm/asm/page.h | 2 ++ >> lib/arm/asm/pgtable-hwdef.h | 1 + >> lib/arm/mmu.c | 37 +++++++++++++++++++++++++++---------- >> lib/arm64/asm/io.h | 6 ++++++ >> lib/arm64/asm/mmu.h | 1 + >> lib/arm64/asm/page.h | 2 ++ >> 8 files changed, 48 insertions(+), 10 deletions(-) >> >> diff --git a/lib/arm/asm/io.h b/lib/arm/asm/io.h >> index ba3b0b2412ad..e4caa6ff5d1e 100644 >> --- a/lib/arm/asm/io.h >> +++ b/lib/arm/asm/io.h >> @@ -77,6 +77,12 @@ static inline void __raw_writel(u32 val, volatile void __iomem *addr) >> : "r" (val)); >> } >> >> +#define ioremap ioremap >> +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size) >> +{ >> + return __ioremap(phys_addr, size); >> +} >> + >> #define virt_to_phys virt_to_phys >> static inline phys_addr_t virt_to_phys(const volatile void *x) >> { >> diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h >> index 122874b8aebe..94e70f0a84bf 100644 >> --- a/lib/arm/asm/mmu.h >> +++ b/lib/arm/asm/mmu.h >> @@ -8,10 +8,13 @@ >> #include <asm/barrier.h> >> >> #define PTE_USER L_PTE_USER >> +#define PTE_UXN L_PTE_XN >> +#define PTE_PXN L_PTE_PXN >> #define PTE_RDONLY PTE_AP2 >> #define PTE_SHARED L_PTE_SHARED >> #define PTE_AF PTE_EXT_AF >> #define PTE_WBWA L_PTE_MT_WRITEALLOC >> +#define PTE_UNCACHED L_PTE_MT_UNCACHED >> >> /* See B3.18.7 TLB maintenance operations */ >> >> diff --git a/lib/arm/asm/page.h b/lib/arm/asm/page.h >> index 1fb5cd26ac66..8eb4a883808e 100644 >> --- a/lib/arm/asm/page.h >> +++ b/lib/arm/asm/page.h >> @@ -47,5 +47,7 @@ typedef struct { pteval_t pgprot; } pgprot_t; >> extern phys_addr_t __virt_to_phys(unsigned long addr); >> extern unsigned long __phys_to_virt(phys_addr_t addr); >> >> +extern void *__ioremap(phys_addr_t phys_addr, size_t size); >> + >> #endif /* !__ASSEMBLY__ */ >> #endif /* _ASMARM_PAGE_H_ */ >> diff --git a/lib/arm/asm/pgtable-hwdef.h b/lib/arm/asm/pgtable-hwdef.h >> index fe1d8540ea3f..90fd306c7cc0 100644 >> --- a/lib/arm/asm/pgtable-hwdef.h >> +++ b/lib/arm/asm/pgtable-hwdef.h >> @@ -34,6 +34,7 @@ >> #define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ >> #define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ >> #define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */ >> +#define L_PTE_PXN (_AT(pteval_t, 1) << 53) /* PXN */ >> #define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */ >> >> /* >> diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c >> index 15eef007f256..791b1f88f946 100644 >> --- a/lib/arm/mmu.c >> +++ b/lib/arm/mmu.c >> @@ -11,6 +11,7 @@ >> #include <asm/mmu.h> >> #include <asm/setup.h> >> #include <asm/page.h> >> +#include <asm/io.h> >> >> #include "alloc_page.h" >> #include "vmalloc.h" >> @@ -157,9 +158,8 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset, >> void *setup_mmu(phys_addr_t phys_end) >> { >> uintptr_t code_end = (uintptr_t)&etext; >> - struct mem_region *r; >> >> - /* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */ >> + /* 3G-4G region is reserved for vmalloc, cap phys_end at 3G */ >> if (phys_end > (3ul << 30)) >> phys_end = 3ul << 30; >> >> @@ -170,14 +170,8 @@ void *setup_mmu(phys_addr_t phys_end) >> "Unsupported translation granule %ld\n", PAGE_SIZE); >> #endif >> >> - mmu_idmap = alloc_page(); >> - >> - for (r = mem_regions; r->end; ++r) { >> - if (!(r->flags & MR_F_IO)) >> - continue; >> - mmu_set_range_sect(mmu_idmap, r->start, r->start, r->end, >> - __pgprot(PMD_SECT_UNCACHED | PMD_SECT_USER)); >> - } >> + if (!mmu_idmap) >> + mmu_idmap = alloc_page(); >> >> /* armv8 requires code shared between EL1 and EL0 to be read-only */ >> mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET, >> @@ -192,6 +186,29 @@ void *setup_mmu(phys_addr_t phys_end) >> return mmu_idmap; >> } >> >> +void __iomem *__ioremap(phys_addr_t phys_addr, size_t size) >> +{ >> + phys_addr_t paddr_aligned = phys_addr & PAGE_MASK; >> + phys_addr_t paddr_end = PAGE_ALIGN(phys_addr + size); >> + pgprot_t prot = __pgprot(PTE_UNCACHED | PTE_USER | PTE_UXN | PTE_PXN); >> + pgd_t *pgtable; >> + >> + assert(sizeof(long) == 8 || !(phys_addr >> 32)); >> + >> + if (mmu_enabled()) { >> + pgtable = current_thread_info()->pgtable; >> + } else { >> + if (!mmu_idmap) >> + mmu_idmap = alloc_page(); >> + pgtable = mmu_idmap; >> + } >> + >> + mmu_set_range_ptes(pgtable, paddr_aligned, paddr_aligned, >> + paddr_end, prot); >> + >> + return (void __iomem *)(unsigned long)phys_addr; >> +} >> + >> phys_addr_t __virt_to_phys(unsigned long addr) >> { >> if (mmu_enabled()) { >> diff --git a/lib/arm64/asm/io.h b/lib/arm64/asm/io.h >> index e0a03b250d5b..be19f471c0fa 100644 >> --- a/lib/arm64/asm/io.h >> +++ b/lib/arm64/asm/io.h >> @@ -71,6 +71,12 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) >> return val; >> } >> >> +#define ioremap ioremap >> +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size) >> +{ >> + return __ioremap(phys_addr, size); >> +} >> + >> #define virt_to_phys virt_to_phys >> static inline phys_addr_t virt_to_phys(const volatile void *x) >> { >> diff --git a/lib/arm64/asm/mmu.h b/lib/arm64/asm/mmu.h >> index 72d75eafc882..72371b2d9fe3 100644 >> --- a/lib/arm64/asm/mmu.h >> +++ b/lib/arm64/asm/mmu.h >> @@ -8,6 +8,7 @@ >> #include <asm/barrier.h> >> >> #define PMD_SECT_UNCACHED PMD_ATTRINDX(MT_DEVICE_nGnRE) >> +#define PTE_UNCACHED PTE_ATTRINDX(MT_DEVICE_nGnRE) >> #define PTE_WBWA PTE_ATTRINDX(MT_NORMAL) >> >> static inline void flush_tlb_all(void) >> diff --git a/lib/arm64/asm/page.h b/lib/arm64/asm/page.h >> index ae4484b22114..d0fac6ea563d 100644 >> --- a/lib/arm64/asm/page.h >> +++ b/lib/arm64/asm/page.h >> @@ -72,5 +72,7 @@ typedef struct { pteval_t pgprot; } pgprot_t; >> extern phys_addr_t __virt_to_phys(unsigned long addr); >> extern unsigned long __phys_to_virt(phys_addr_t addr); >> >> +extern void *__ioremap(phys_addr_t phys_addr, size_t size); >> + >> #endif /* !__ASSEMBLY__ */ >> #endif /* _ASMARM64_PAGE_H_ */
On Thu, May 13, 2021 at 04:48:16PM +0100, Alexandru Elisei wrote: > Hi Drew, > > On 5/10/21 4:45 PM, Alexandru Elisei wrote: > > Hi Drew, > > > > On 4/29/21 5:41 PM, Andrew Jones wrote: > >> By providing a proper ioremap function, we can just rely on devices > >> calling it for each region they need (as they already do) instead of > >> mapping a big assumed I/O range. We don't require the MMU to be > >> enabled at the time of the ioremap. In that case, we add the mapping > >> to the identity map anyway. This allows us to call setup_vm after > >> io_init. Why don't we just call setup_vm before io_init, I hear you > >> ask? Well, that's because tests like sieve want to start with the MMU > >> off, later call setup_vm, and all the while have working I/O. Some > >> unit tests are just really demanding... > >> > >> While at it, ensure we map the I/O regions with XN (execute never), > >> as suggested by Alexandru Elisei. > > I got to thinking why this wasn't an issue before. Under KVM, device memory is not > > usually mapped at stage 2, so any speculated reads wouldn't have reached memory at > > all. The only way I imagine that happening if the user was running kvm-unit-tests > > with a passthrough PCI device, which I don't think happens too often. > > > > But we cannot rely on devices not being mapped at stage 2 when running under EFI > > (we're mapping them ourselves with ioremap), so I believe this is a good fix. > > > > Had another look at the patch, looks good to me: > > While testing the series I discovered that this patch introduces a bug when > running under kvmtool. > > Here's the splat: > > $ ./configure --vmm=kvmtool --earlycon=uart,mmio,0x1000000 --page-size=4K && make > clean && make -j6 && ./vm run -c2 -m128 -f arm/micro-bench.flat > [..] > # lkvm run --firmware arm/micro-bench.flat -m 128 -c 2 --name guest-6986 > Info: Placing fdt at 0x80200000 - 0x80210000 > chr_testdev_init: chr-testdev: can't find a virtio-console > Timer Frequency 24000000 Hz (Output in microseconds) > > name total ns avg > ns > -------------------------------------------------------------------------------------------- > hvc 168674516.0 > 2573.0 > Load address: 80000000 > PC: 80000128 PC offset: 128 > Unhandled exception ec=0x25 (DABT_EL1) > Vector: 4 (el1h_sync) > ESR_EL1: 96000006, ec=0x25 (DABT_EL1) > FAR_EL1: 000000000a000008 (valid) > Exception frame registers: > pc : [<0000000080000128>] lr : [<000000008000cac8>] pstate: 800003c5 > sp : 000000008003ff90 > x29: 0000000000000000 x28: 0000000000000000 > x27: 00000011ada4d0c2 x26: 0000000000000000 > x25: 0000000080015978 x24: 0000000080015a90 > x23: 0000048c27394fff x22: 20c49ba5e353f7cf > x21: 28f5c28f5c28f5c3 x20: 0000000080016af0 > x19: 000000e8d4a51000 x18: 0000000080040000 > x17: 0000000000000000 x16: 0000000080008210 > x15: 000000008003fe5c x14: 0000000000000260 > x13: 00000000000002a4 x12: 0000000080040000 > x11: 0000000000000001 x10: 0000000000000060 > x9 : 0000000000000000 x8 : 0000000000000039 > x7 : 0000000000000040 x6 : 0000000080013983 > x5 : 000000008003f74e x4 : 000000008003f69c > x3 : 0000000000000000 x2 : 0000000000000000 > x1 : 0000000000000000 x0 : 000000000a000008 > > The issue is caused by the mmio_read_user_exec() function from arm/micro-bench.c. > kvmtool doesn't have a chr-testdev device, and because probing fails, the address > 0x0a000008 isn't ioremap'ed. The 0-1G memory region is not automatically mapped > anymore, and the access triggers a data abort at stage 1. > > I fixed the splat with this change: > > diff --git a/arm/micro-bench.c b/arm/micro-bench.c > index 95c418c10eb4..ad9e44d71d8d 100644 > --- a/arm/micro-bench.c > +++ b/arm/micro-bench.c > @@ -281,7 +281,7 @@ static void mmio_read_user_exec(void) > * updated in the future if any relevant changes in QEMU > * test-dev are made. > */ > - void *userspace_emulated_addr = (void*)0x0a000008; > + void *userspace_emulated_addr = (void*)ioremap(0x0a000008, 8); > > readl(userspace_emulated_addr); > } > > kvmtool ignores the MMIO exit reason if no device owns the IPA, that's why it also > works on kvmtool. > > The micro-bench test with the diff passes under qemu and kvmtool, tested with 4K, > 16K and 64K pages on an odroid-c4. > Thanks Alex, I think a better fix is this untested one below, though. If you can test it out and confirm it also resolves the issue, then I'll add this patch to the series. Thanks, drew diff --git a/arm/micro-bench.c b/arm/micro-bench.c index 95c418c10eb4..deafd5695c33 100644 --- a/arm/micro-bench.c +++ b/arm/micro-bench.c @@ -273,16 +273,22 @@ static void hvc_exec(void) asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0"); } -static void mmio_read_user_exec(void) +/* + * FIXME: Read device-id in virtio mmio here in order to + * force an exit to userspace. This address needs to be + * updated in the future if any relevant changes in QEMU + * test-dev are made. + */ +static void *userspace_emulated_addr; + +static bool mmio_read_user_prep(void) { - /* - * FIXME: Read device-id in virtio mmio here in order to - * force an exit to userspace. This address needs to be - * updated in the future if any relevant changes in QEMU - * test-dev are made. - */ - void *userspace_emulated_addr = (void*)0x0a000008; + userspace_emulated_addr = (void*)ioremap(0x0a000008, 8); + return true; +} +static void mmio_read_user_exec(void) +{ readl(userspace_emulated_addr); } @@ -309,14 +315,14 @@ struct exit_test { }; static struct exit_test tests[] = { - {"hvc", NULL, hvc_exec, NULL, 65536, true}, - {"mmio_read_user", NULL, mmio_read_user_exec, NULL, 65536, true}, - {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true}, - {"eoi", NULL, eoi_exec, NULL, 65536, true}, - {"ipi", ipi_prep, ipi_exec, NULL, 65536, true}, - {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true}, - {"lpi", lpi_prep, lpi_exec, NULL, 65536, true}, - {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true}, + {"hvc", NULL, hvc_exec, NULL, 65536, true}, + {"mmio_read_user", mmio_read_user_prep, mmio_read_user_exec, NULL, 65536, true}, + {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true}, + {"eoi", NULL, eoi_exec, NULL, 65536, true}, + {"ipi", ipi_prep, ipi_exec, NULL, 65536, true}, + {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true}, + {"lpi", lpi_prep, lpi_exec, NULL, 65536, true}, + {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true}, }; struct ns_time {
On Thu, May 13, 2021 at 07:18:44PM +0200, Andrew Jones wrote: > On Thu, May 13, 2021 at 04:48:16PM +0100, Alexandru Elisei wrote: > > Hi Drew, > > > > On 5/10/21 4:45 PM, Alexandru Elisei wrote: > > > Hi Drew, > > > > > > On 4/29/21 5:41 PM, Andrew Jones wrote: > > >> By providing a proper ioremap function, we can just rely on devices > > >> calling it for each region they need (as they already do) instead of > > >> mapping a big assumed I/O range. We don't require the MMU to be > > >> enabled at the time of the ioremap. In that case, we add the mapping > > >> to the identity map anyway. This allows us to call setup_vm after > > >> io_init. Why don't we just call setup_vm before io_init, I hear you > > >> ask? Well, that's because tests like sieve want to start with the MMU > > >> off, later call setup_vm, and all the while have working I/O. Some > > >> unit tests are just really demanding... > > >> > > >> While at it, ensure we map the I/O regions with XN (execute never), > > >> as suggested by Alexandru Elisei. > > > I got to thinking why this wasn't an issue before. Under KVM, device memory is not > > > usually mapped at stage 2, so any speculated reads wouldn't have reached memory at > > > all. The only way I imagine that happening if the user was running kvm-unit-tests > > > with a passthrough PCI device, which I don't think happens too often. > > > > > > But we cannot rely on devices not being mapped at stage 2 when running under EFI > > > (we're mapping them ourselves with ioremap), so I believe this is a good fix. > > > > > > Had another look at the patch, looks good to me: > > > > While testing the series I discovered that this patch introduces a bug when > > running under kvmtool. > > > > Here's the splat: > > > > $ ./configure --vmm=kvmtool --earlycon=uart,mmio,0x1000000 --page-size=4K && make > > clean && make -j6 && ./vm run -c2 -m128 -f arm/micro-bench.flat > > [..] > > # lkvm run --firmware arm/micro-bench.flat -m 128 -c 2 --name guest-6986 > > Info: Placing fdt at 0x80200000 - 0x80210000 > > chr_testdev_init: chr-testdev: can't find a virtio-console > > Timer Frequency 24000000 Hz (Output in microseconds) > > > > name total ns avg > > ns > > -------------------------------------------------------------------------------------------- > > hvc 168674516.0 > > 2573.0 > > Load address: 80000000 > > PC: 80000128 PC offset: 128 > > Unhandled exception ec=0x25 (DABT_EL1) > > Vector: 4 (el1h_sync) > > ESR_EL1: 96000006, ec=0x25 (DABT_EL1) > > FAR_EL1: 000000000a000008 (valid) > > Exception frame registers: > > pc : [<0000000080000128>] lr : [<000000008000cac8>] pstate: 800003c5 > > sp : 000000008003ff90 > > x29: 0000000000000000 x28: 0000000000000000 > > x27: 00000011ada4d0c2 x26: 0000000000000000 > > x25: 0000000080015978 x24: 0000000080015a90 > > x23: 0000048c27394fff x22: 20c49ba5e353f7cf > > x21: 28f5c28f5c28f5c3 x20: 0000000080016af0 > > x19: 000000e8d4a51000 x18: 0000000080040000 > > x17: 0000000000000000 x16: 0000000080008210 > > x15: 000000008003fe5c x14: 0000000000000260 > > x13: 00000000000002a4 x12: 0000000080040000 > > x11: 0000000000000001 x10: 0000000000000060 > > x9 : 0000000000000000 x8 : 0000000000000039 > > x7 : 0000000000000040 x6 : 0000000080013983 > > x5 : 000000008003f74e x4 : 000000008003f69c > > x3 : 0000000000000000 x2 : 0000000000000000 > > x1 : 0000000000000000 x0 : 000000000a000008 > > > > The issue is caused by the mmio_read_user_exec() function from arm/micro-bench.c. > > kvmtool doesn't have a chr-testdev device, and because probing fails, the address > > 0x0a000008 isn't ioremap'ed. The 0-1G memory region is not automatically mapped > > anymore, and the access triggers a data abort at stage 1. > > > > I fixed the splat with this change: > > > > diff --git a/arm/micro-bench.c b/arm/micro-bench.c > > index 95c418c10eb4..ad9e44d71d8d 100644 > > --- a/arm/micro-bench.c > > +++ b/arm/micro-bench.c > > @@ -281,7 +281,7 @@ static void mmio_read_user_exec(void) > > * updated in the future if any relevant changes in QEMU > > * test-dev are made. > > */ > > - void *userspace_emulated_addr = (void*)0x0a000008; > > + void *userspace_emulated_addr = (void*)ioremap(0x0a000008, 8); > > > > readl(userspace_emulated_addr); > > } > > > > kvmtool ignores the MMIO exit reason if no device owns the IPA, that's why it also > > works on kvmtool. > > > > The micro-bench test with the diff passes under qemu and kvmtool, tested with 4K, > > 16K and 64K pages on an odroid-c4. > > > > Thanks Alex, > > I think a better fix is this untested one below, though. If you can test > it out and confirm it also resolves the issue, then I'll add this patch > to the series. > > Thanks, > drew > > > diff --git a/arm/micro-bench.c b/arm/micro-bench.c > index 95c418c10eb4..deafd5695c33 100644 > --- a/arm/micro-bench.c > +++ b/arm/micro-bench.c > @@ -273,16 +273,22 @@ static void hvc_exec(void) > asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0"); > } > > -static void mmio_read_user_exec(void) > +/* > + * FIXME: Read device-id in virtio mmio here in order to > + * force an exit to userspace. This address needs to be > + * updated in the future if any relevant changes in QEMU > + * test-dev are made. > + */ > +static void *userspace_emulated_addr; > + > +static bool mmio_read_user_prep(void) > { > - /* > - * FIXME: Read device-id in virtio mmio here in order to > - * force an exit to userspace. This address needs to be > - * updated in the future if any relevant changes in QEMU > - * test-dev are made. > - */ > - void *userspace_emulated_addr = (void*)0x0a000008; > + userspace_emulated_addr = (void*)ioremap(0x0a000008, 8); > + return true; > +} > > +static void mmio_read_user_exec(void) > +{ > readl(userspace_emulated_addr); > } > > @@ -309,14 +315,14 @@ struct exit_test { > }; > > static struct exit_test tests[] = { > - {"hvc", NULL, hvc_exec, NULL, 65536, true}, > - {"mmio_read_user", NULL, mmio_read_user_exec, NULL, 65536, true}, > - {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true}, > - {"eoi", NULL, eoi_exec, NULL, 65536, true}, > - {"ipi", ipi_prep, ipi_exec, NULL, 65536, true}, > - {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true}, > - {"lpi", lpi_prep, lpi_exec, NULL, 65536, true}, > - {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true}, > + {"hvc", NULL, hvc_exec, NULL, 65536, true}, > + {"mmio_read_user", mmio_read_user_prep, mmio_read_user_exec, NULL, 65536, true}, > + {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true}, > + {"eoi", NULL, eoi_exec, NULL, 65536, true}, > + {"ipi", ipi_prep, ipi_exec, NULL, 65536, true}, > + {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true}, > + {"lpi", lpi_prep, lpi_exec, NULL, 65536, true}, > + {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true}, > }; > > struct ns_time { > I still haven't tested it (beyond compiling), but I've tweaked this a bit. You can see it here https://gitlab.com/rhdrjones/kvm-unit-tests/-/commit/71938030d160e021db3388037d0d407df17e8e5e The whole v4 of this series is here https://gitlab.com/rhdrjones/kvm-unit-tests/-/commits/efiprep Thanks, drew
Hi Drew, On 5/13/21 6:43 PM, Andrew Jones wrote: > On Thu, May 13, 2021 at 07:18:44PM +0200, Andrew Jones wrote: >> [..] >> Thanks Alex, >> >> I think a better fix is this untested one below, though. If you can test >> it out and confirm it also resolves the issue, then I'll add this patch >> to the series. >> >> Thanks, >> drew >> >> >> diff --git a/arm/micro-bench.c b/arm/micro-bench.c >> index 95c418c10eb4..deafd5695c33 100644 >> --- a/arm/micro-bench.c >> +++ b/arm/micro-bench.c >> @@ -273,16 +273,22 @@ static void hvc_exec(void) >> asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0"); >> } >> >> -static void mmio_read_user_exec(void) >> +/* >> + * FIXME: Read device-id in virtio mmio here in order to >> + * force an exit to userspace. This address needs to be >> + * updated in the future if any relevant changes in QEMU >> + * test-dev are made. >> + */ >> +static void *userspace_emulated_addr; >> + >> +static bool mmio_read_user_prep(void) >> { >> - /* >> - * FIXME: Read device-id in virtio mmio here in order to >> - * force an exit to userspace. This address needs to be >> - * updated in the future if any relevant changes in QEMU >> - * test-dev are made. >> - */ >> - void *userspace_emulated_addr = (void*)0x0a000008; >> + userspace_emulated_addr = (void*)ioremap(0x0a000008, 8); >> + return true; >> +} >> >> +static void mmio_read_user_exec(void) >> +{ >> readl(userspace_emulated_addr); >> } >> >> @@ -309,14 +315,14 @@ struct exit_test { >> }; >> >> static struct exit_test tests[] = { >> - {"hvc", NULL, hvc_exec, NULL, 65536, true}, >> - {"mmio_read_user", NULL, mmio_read_user_exec, NULL, 65536, true}, >> - {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true}, >> - {"eoi", NULL, eoi_exec, NULL, 65536, true}, >> - {"ipi", ipi_prep, ipi_exec, NULL, 65536, true}, >> - {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true}, >> - {"lpi", lpi_prep, lpi_exec, NULL, 65536, true}, >> - {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true}, >> + {"hvc", NULL, hvc_exec, NULL, 65536, true}, >> + {"mmio_read_user", mmio_read_user_prep, mmio_read_user_exec, NULL, 65536, true}, >> + {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true}, >> + {"eoi", NULL, eoi_exec, NULL, 65536, true}, >> + {"ipi", ipi_prep, ipi_exec, NULL, 65536, true}, >> + {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true}, >> + {"lpi", lpi_prep, lpi_exec, NULL, 65536, true}, >> + {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true}, >> }; >> >> struct ns_time { >> > I still haven't tested it (beyond compiling), but I've tweaked this a bit. > You can see it here > > https://gitlab.com/rhdrjones/kvm-unit-tests/-/commit/71938030d160e021db3388037d0d407df17e8e5e > > The whole v4 of this series is here > > https://gitlab.com/rhdrjones/kvm-unit-tests/-/commits/efiprep Had a look at the patch, looks good; in my suggestion I wrongly thought that readl reads a long (64 bits), not an uint32_t value: Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com> I also ran some tests on the v4 series from your repo. Qemu TCG on x86 machine: - arm compiled with arm-linux-gnu-gcc and arm-none-eabi-gcc - arm64, 4k and 64k pages. Odroid-c4: - arm, both compilers, under kvmtool - arm64, 4k, 16k and 64k pages under qemu KVM and kvmtool Rockpro64: - arm, both compilers, under kvmtool - arm64, 4k and 64k pages, under qemu KVM and kvmtool. The ITS migration tests I had to run manually on the rockpro64 (Odroid has a gicv2) because it looks like the run script wasn't detecting the prompt to start migration. I'm guessing something on my side, because I had issues with the migration tests before. Nonetheless, those tests ran just fine manually under qemu and kvmtool, so everything looks correct to me: Tested-by: Alexandru Elisei <alexandru.elisei@arm.com> Thanks, Alex
On Mon, May 17, 2021 at 11:38:46AM +0100, Alexandru Elisei wrote: > Hi Drew, > > On 5/13/21 6:43 PM, Andrew Jones wrote: > > On Thu, May 13, 2021 at 07:18:44PM +0200, Andrew Jones wrote: > >> [..] > >> Thanks Alex, > >> > >> I think a better fix is this untested one below, though. If you can test > >> it out and confirm it also resolves the issue, then I'll add this patch > >> to the series. > >> > >> Thanks, > >> drew > >> > >> > >> diff --git a/arm/micro-bench.c b/arm/micro-bench.c > >> index 95c418c10eb4..deafd5695c33 100644 > >> --- a/arm/micro-bench.c > >> +++ b/arm/micro-bench.c > >> @@ -273,16 +273,22 @@ static void hvc_exec(void) > >> asm volatile("mov w0, #0x4b000000; hvc #0" ::: "w0"); > >> } > >> > >> -static void mmio_read_user_exec(void) > >> +/* > >> + * FIXME: Read device-id in virtio mmio here in order to > >> + * force an exit to userspace. This address needs to be > >> + * updated in the future if any relevant changes in QEMU > >> + * test-dev are made. > >> + */ > >> +static void *userspace_emulated_addr; > >> + > >> +static bool mmio_read_user_prep(void) > >> { > >> - /* > >> - * FIXME: Read device-id in virtio mmio here in order to > >> - * force an exit to userspace. This address needs to be > >> - * updated in the future if any relevant changes in QEMU > >> - * test-dev are made. > >> - */ > >> - void *userspace_emulated_addr = (void*)0x0a000008; > >> + userspace_emulated_addr = (void*)ioremap(0x0a000008, 8); > >> + return true; > >> +} > >> > >> +static void mmio_read_user_exec(void) > >> +{ > >> readl(userspace_emulated_addr); > >> } > >> > >> @@ -309,14 +315,14 @@ struct exit_test { > >> }; > >> > >> static struct exit_test tests[] = { > >> - {"hvc", NULL, hvc_exec, NULL, 65536, true}, > >> - {"mmio_read_user", NULL, mmio_read_user_exec, NULL, 65536, true}, > >> - {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true}, > >> - {"eoi", NULL, eoi_exec, NULL, 65536, true}, > >> - {"ipi", ipi_prep, ipi_exec, NULL, 65536, true}, > >> - {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true}, > >> - {"lpi", lpi_prep, lpi_exec, NULL, 65536, true}, > >> - {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true}, > >> + {"hvc", NULL, hvc_exec, NULL, 65536, true}, > >> + {"mmio_read_user", mmio_read_user_prep, mmio_read_user_exec, NULL, 65536, true}, > >> + {"mmio_read_vgic", NULL, mmio_read_vgic_exec, NULL, 65536, true}, > >> + {"eoi", NULL, eoi_exec, NULL, 65536, true}, > >> + {"ipi", ipi_prep, ipi_exec, NULL, 65536, true}, > >> + {"ipi_hw", ipi_hw_prep, ipi_exec, NULL, 65536, true}, > >> + {"lpi", lpi_prep, lpi_exec, NULL, 65536, true}, > >> + {"timer_10ms", timer_prep, timer_exec, timer_post, 256, true}, > >> }; > >> > >> struct ns_time { > >> > > I still haven't tested it (beyond compiling), but I've tweaked this a bit. > > You can see it here > > > > https://gitlab.com/rhdrjones/kvm-unit-tests/-/commit/71938030d160e021db3388037d0d407df17e8e5e > > > > The whole v4 of this series is here > > > > https://gitlab.com/rhdrjones/kvm-unit-tests/-/commits/efiprep > > Had a look at the patch, looks good; in my suggestion I wrongly thought that readl > reads a long (64 bits), not an uint32_t value: > > Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com> > > I also ran some tests on the v4 series from your repo. > > Qemu TCG on x86 machine: > - arm compiled with arm-linux-gnu-gcc and arm-none-eabi-gcc > - arm64, 4k and 64k pages. > > Odroid-c4: > - arm, both compilers, under kvmtool > - arm64, 4k, 16k and 64k pages under qemu KVM and kvmtool > > Rockpro64: > - arm, both compilers, under kvmtool > - arm64, 4k and 64k pages, under qemu KVM and kvmtool. > > The ITS migration tests I had to run manually on the rockpro64 (Odroid has a > gicv2) because it looks like the run script wasn't detecting the prompt to start > migration. I'm guessing something on my side, because I had issues with the > migration tests before. Nonetheless, those tests ran just fine manually under qemu > and kvmtool, so everything looks correct to me: > > Tested-by: Alexandru Elisei <alexandru.elisei@arm.com> > Thanks Alex! I've added your tags, applied to arm/queue and sent the pull request. Thanks, drew
diff --git a/lib/arm/asm/io.h b/lib/arm/asm/io.h index ba3b0b2412ad..e4caa6ff5d1e 100644 --- a/lib/arm/asm/io.h +++ b/lib/arm/asm/io.h @@ -77,6 +77,12 @@ static inline void __raw_writel(u32 val, volatile void __iomem *addr) : "r" (val)); } +#define ioremap ioremap +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size) +{ + return __ioremap(phys_addr, size); +} + #define virt_to_phys virt_to_phys static inline phys_addr_t virt_to_phys(const volatile void *x) { diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h index 122874b8aebe..94e70f0a84bf 100644 --- a/lib/arm/asm/mmu.h +++ b/lib/arm/asm/mmu.h @@ -8,10 +8,13 @@ #include <asm/barrier.h> #define PTE_USER L_PTE_USER +#define PTE_UXN L_PTE_XN +#define PTE_PXN L_PTE_PXN #define PTE_RDONLY PTE_AP2 #define PTE_SHARED L_PTE_SHARED #define PTE_AF PTE_EXT_AF #define PTE_WBWA L_PTE_MT_WRITEALLOC +#define PTE_UNCACHED L_PTE_MT_UNCACHED /* See B3.18.7 TLB maintenance operations */ diff --git a/lib/arm/asm/page.h b/lib/arm/asm/page.h index 1fb5cd26ac66..8eb4a883808e 100644 --- a/lib/arm/asm/page.h +++ b/lib/arm/asm/page.h @@ -47,5 +47,7 @@ typedef struct { pteval_t pgprot; } pgprot_t; extern phys_addr_t __virt_to_phys(unsigned long addr); extern unsigned long __phys_to_virt(phys_addr_t addr); +extern void *__ioremap(phys_addr_t phys_addr, size_t size); + #endif /* !__ASSEMBLY__ */ #endif /* _ASMARM_PAGE_H_ */ diff --git a/lib/arm/asm/pgtable-hwdef.h b/lib/arm/asm/pgtable-hwdef.h index fe1d8540ea3f..90fd306c7cc0 100644 --- a/lib/arm/asm/pgtable-hwdef.h +++ b/lib/arm/asm/pgtable-hwdef.h @@ -34,6 +34,7 @@ #define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */ +#define L_PTE_PXN (_AT(pteval_t, 1) << 53) /* PXN */ #define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */ /* diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c index 15eef007f256..791b1f88f946 100644 --- a/lib/arm/mmu.c +++ b/lib/arm/mmu.c @@ -11,6 +11,7 @@ #include <asm/mmu.h> #include <asm/setup.h> #include <asm/page.h> +#include <asm/io.h> #include "alloc_page.h" #include "vmalloc.h" @@ -157,9 +158,8 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset, void *setup_mmu(phys_addr_t phys_end) { uintptr_t code_end = (uintptr_t)&etext; - struct mem_region *r; - /* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */ + /* 3G-4G region is reserved for vmalloc, cap phys_end at 3G */ if (phys_end > (3ul << 30)) phys_end = 3ul << 30; @@ -170,14 +170,8 @@ void *setup_mmu(phys_addr_t phys_end) "Unsupported translation granule %ld\n", PAGE_SIZE); #endif - mmu_idmap = alloc_page(); - - for (r = mem_regions; r->end; ++r) { - if (!(r->flags & MR_F_IO)) - continue; - mmu_set_range_sect(mmu_idmap, r->start, r->start, r->end, - __pgprot(PMD_SECT_UNCACHED | PMD_SECT_USER)); - } + if (!mmu_idmap) + mmu_idmap = alloc_page(); /* armv8 requires code shared between EL1 and EL0 to be read-only */ mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET, @@ -192,6 +186,29 @@ void *setup_mmu(phys_addr_t phys_end) return mmu_idmap; } +void __iomem *__ioremap(phys_addr_t phys_addr, size_t size) +{ + phys_addr_t paddr_aligned = phys_addr & PAGE_MASK; + phys_addr_t paddr_end = PAGE_ALIGN(phys_addr + size); + pgprot_t prot = __pgprot(PTE_UNCACHED | PTE_USER | PTE_UXN | PTE_PXN); + pgd_t *pgtable; + + assert(sizeof(long) == 8 || !(phys_addr >> 32)); + + if (mmu_enabled()) { + pgtable = current_thread_info()->pgtable; + } else { + if (!mmu_idmap) + mmu_idmap = alloc_page(); + pgtable = mmu_idmap; + } + + mmu_set_range_ptes(pgtable, paddr_aligned, paddr_aligned, + paddr_end, prot); + + return (void __iomem *)(unsigned long)phys_addr; +} + phys_addr_t __virt_to_phys(unsigned long addr) { if (mmu_enabled()) { diff --git a/lib/arm64/asm/io.h b/lib/arm64/asm/io.h index e0a03b250d5b..be19f471c0fa 100644 --- a/lib/arm64/asm/io.h +++ b/lib/arm64/asm/io.h @@ -71,6 +71,12 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) return val; } +#define ioremap ioremap +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size) +{ + return __ioremap(phys_addr, size); +} + #define virt_to_phys virt_to_phys static inline phys_addr_t virt_to_phys(const volatile void *x) { diff --git a/lib/arm64/asm/mmu.h b/lib/arm64/asm/mmu.h index 72d75eafc882..72371b2d9fe3 100644 --- a/lib/arm64/asm/mmu.h +++ b/lib/arm64/asm/mmu.h @@ -8,6 +8,7 @@ #include <asm/barrier.h> #define PMD_SECT_UNCACHED PMD_ATTRINDX(MT_DEVICE_nGnRE) +#define PTE_UNCACHED PTE_ATTRINDX(MT_DEVICE_nGnRE) #define PTE_WBWA PTE_ATTRINDX(MT_NORMAL) static inline void flush_tlb_all(void) diff --git a/lib/arm64/asm/page.h b/lib/arm64/asm/page.h index ae4484b22114..d0fac6ea563d 100644 --- a/lib/arm64/asm/page.h +++ b/lib/arm64/asm/page.h @@ -72,5 +72,7 @@ typedef struct { pteval_t pgprot; } pgprot_t; extern phys_addr_t __virt_to_phys(unsigned long addr); extern unsigned long __phys_to_virt(phys_addr_t addr); +extern void *__ioremap(phys_addr_t phys_addr, size_t size); + #endif /* !__ASSEMBLY__ */ #endif /* _ASMARM64_PAGE_H_ */
By providing a proper ioremap function, we can just rely on devices calling it for each region they need (as they already do) instead of mapping a big assumed I/O range. We don't require the MMU to be enabled at the time of the ioremap. In that case, we add the mapping to the identity map anyway. This allows us to call setup_vm after io_init. Why don't we just call setup_vm before io_init, I hear you ask? Well, that's because tests like sieve want to start with the MMU off, later call setup_vm, and all the while have working I/O. Some unit tests are just really demanding... While at it, ensure we map the I/O regions with XN (execute never), as suggested by Alexandru Elisei. Signed-off-by: Andrew Jones <drjones@redhat.com> --- lib/arm/asm/io.h | 6 ++++++ lib/arm/asm/mmu.h | 3 +++ lib/arm/asm/page.h | 2 ++ lib/arm/asm/pgtable-hwdef.h | 1 + lib/arm/mmu.c | 37 +++++++++++++++++++++++++++---------- lib/arm64/asm/io.h | 6 ++++++ lib/arm64/asm/mmu.h | 1 + lib/arm64/asm/page.h | 2 ++ 8 files changed, 48 insertions(+), 10 deletions(-)