diff mbox series

[v3,5/6] target/arm: Do memory type alignment check when translation disabled

Message ID 20240301204110.656742-6-richard.henderson@linaro.org (mailing list archive)
State New, archived
Headers show
Series target/arm: Do memory alignment check for device memory | expand

Commit Message

Richard Henderson March 1, 2024, 8:41 p.m. UTC
If translation is disabled, the default memory type is Device, which
requires alignment checking.  This is more optimally done early via
the MemOp given to the TCG memory operation.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reported-by: Idan Horowitz <idan.horowitz@gmail.com>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1204
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/hflags.c | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

Comments

Jonathan Cameron April 16, 2024, 3:11 p.m. UTC | #1
On Fri,  1 Mar 2024 10:41:09 -1000
Richard Henderson <richard.henderson@linaro.org> wrote:

> If translation is disabled, the default memory type is Device, which
> requires alignment checking.  This is more optimally done early via
> the MemOp given to the TCG memory operation.
> 
> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> Reported-by: Idan Horowitz <idan.horowitz@gmail.com>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1204
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Hi Richard.

I noticed some tests I was running stopped booting with master.
(it's a fun and complex stack of QEMU + kvm on QEMU for vCPU Hotplug kernel work,
but this is the host booting)

EDK2 build from upstream as of somepoint last week.

Bisects to this patch.

 qemu-system-aarch64 -M virt,gic-version=3,virtualization=true -m 4g,maxmem=8G,slots=8 -cpu cortex-a76 -smp cpus=4,threads=2,clusters=2,sockets=1 \
 -kernel Image \
 -drive if=none,file=full.qcow2,format=qcow2,id=hd \
 -device ioh3420,id=root_port1 -device virtio-blk-pci,drive=hd \
 -netdev user,id=mynet,hostfwd=tcp::5555-:22 -device virtio-net-pci,netdev=mynet,id=bob \
 -nographic -no-reboot -append 'earlycon root=/dev/vda2 fsck.mode=skip tp_printk' \
 -monitor telnet:127.0.0.1:1235,server,nowait -bios QEMU_EFI.fd \
 -object memory-backend-ram,size=4G,id=mem0 \
 -numa node,nodeid=0,cpus=0-3,memdev=mem0

Symptoms: Nothing on console from edk2 which is built in debug mode so is normally very noisy.
          No sign of anything much happening at all :(

Jonathan



> ---
>  target/arm/tcg/hflags.c | 34 ++++++++++++++++++++++++++++++++--
>  1 file changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
> index 8e5d35d922..5da1b0fc1d 100644
> --- a/target/arm/tcg/hflags.c
> +++ b/target/arm/tcg/hflags.c
> @@ -26,6 +26,35 @@ static inline bool fgt_svc(CPUARMState *env, int el)
>          FIELD_EX64(env->cp15.fgt_exec[FGTREG_HFGITR], HFGITR_EL2, SVC_EL1);
>  }
>  
> +/* Return true if memory alignment should be enforced. */
> +static bool aprofile_require_alignment(CPUARMState *env, int el, uint64_t sctlr)
> +{
> +#ifdef CONFIG_USER_ONLY
> +    return false;
> +#else
> +    /* Check the alignment enable bit. */
> +    if (sctlr & SCTLR_A) {
> +        return true;
> +    }
> +
> +    /*
> +     * If translation is disabled, then the default memory type is
> +     * Device(-nGnRnE) instead of Normal, which requires that alignment
> +     * be enforced.  Since this affects all ram, it is most efficient
> +     * to handle this during translation.
> +     */
> +    if (sctlr & SCTLR_M) {
> +        /* Translation enabled: memory type in PTE via MAIR_ELx. */
> +        return false;
> +    }
> +    if (el < 2 && (arm_hcr_el2_eff(env) & (HCR_DC | HCR_VM))) {
> +        /* Stage 2 translation enabled: memory type in PTE. */
> +        return false;
> +    }
> +    return true;
> +#endif
> +}
> +
>  static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el,
>                                             ARMMMUIdx mmu_idx,
>                                             CPUARMTBFlags flags)
> @@ -121,8 +150,9 @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
>  {
>      CPUARMTBFlags flags = {};
>      int el = arm_current_el(env);
> +    uint64_t sctlr = arm_sctlr(env, el);
>  
> -    if (arm_sctlr(env, el) & SCTLR_A) {
> +    if (aprofile_require_alignment(env, el, sctlr)) {
>          DP_TBFLAG_ANY(flags, ALIGN_MEM, 1);
>      }
>  
> @@ -223,7 +253,7 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
>  
>      sctlr = regime_sctlr(env, stage1);
>  
> -    if (sctlr & SCTLR_A) {
> +    if (aprofile_require_alignment(env, el, sctlr)) {
>          DP_TBFLAG_ANY(flags, ALIGN_MEM, 1);
>      }
>
Richard Henderson April 17, 2024, 8:07 p.m. UTC | #2
On 4/16/24 08:11, Jonathan Cameron wrote:
> On Fri,  1 Mar 2024 10:41:09 -1000
> Richard Henderson <richard.henderson@linaro.org> wrote:
> 
>> If translation is disabled, the default memory type is Device, which
>> requires alignment checking.  This is more optimally done early via
>> the MemOp given to the TCG memory operation.
>>
>> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>> Reported-by: Idan Horowitz <idan.horowitz@gmail.com>
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1204
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> 
> Hi Richard.
> 
> I noticed some tests I was running stopped booting with master.
> (it's a fun and complex stack of QEMU + kvm on QEMU for vCPU Hotplug kernel work,
> but this is the host booting)
> 
> EDK2 build from upstream as of somepoint last week.
> 
> Bisects to this patch.
> 
>   qemu-system-aarch64 -M virt,gic-version=3,virtualization=true -m 4g,maxmem=8G,slots=8 -cpu cortex-a76 -smp cpus=4,threads=2,clusters=2,sockets=1 \
>   -kernel Image \
>   -drive if=none,file=full.qcow2,format=qcow2,id=hd \
>   -device ioh3420,id=root_port1 -device virtio-blk-pci,drive=hd \
>   -netdev user,id=mynet,hostfwd=tcp::5555-:22 -device virtio-net-pci,netdev=mynet,id=bob \
>   -nographic -no-reboot -append 'earlycon root=/dev/vda2 fsck.mode=skip tp_printk' \
>   -monitor telnet:127.0.0.1:1235,server,nowait -bios QEMU_EFI.fd \
>   -object memory-backend-ram,size=4G,id=mem0 \
>   -numa node,nodeid=0,cpus=0-3,memdev=mem0
> 
> Symptoms: Nothing on console from edk2 which is built in debug mode so is normally very noisy.
>            No sign of anything much happening at all :(

This isn't a fantastic bug report.

(1) If it doesn't boot efi, then none of the -kernel parameters are necessary.
(2) I'd be surprised if the full.qcow2 drive parameters are necessary either.
     But if they are, what contents?  Is a new empty drive sufficient, just
     enough to send the bios through the correct device initialization?
(3) edk2 build from ...
     Well, this is partly edk2's fault, as the build documentation is awful.
     I spent an entire afternoon trying to figure it out and gave up.

I will say that the edk2 shipped with qemu does work, so... are you absolutely
certain that it isn't a bug in edk2 since then?  Firmware bugs are exactly what
that patch is supposed to expose, as requested by issue #1204.

I'd say you should boot with "-d int" and see what kind of interrupts you're getting very 
early on.  I suspect that you'll see data aborts with ESR xx/yy where the last 6 bits of 
yy are 0x21 (alignment fault).


r~
Jonathan Cameron April 18, 2024, 8:15 a.m. UTC | #3
On Wed, 17 Apr 2024 13:07:35 -0700
Richard Henderson <richard.henderson@linaro.org> wrote:

> On 4/16/24 08:11, Jonathan Cameron wrote:
> > On Fri,  1 Mar 2024 10:41:09 -1000
> > Richard Henderson <richard.henderson@linaro.org> wrote:
> >   
> >> If translation is disabled, the default memory type is Device, which
> >> requires alignment checking.  This is more optimally done early via
> >> the MemOp given to the TCG memory operation.
> >>
> >> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> >> Reported-by: Idan Horowitz <idan.horowitz@gmail.com>
> >> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1204
> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>  
> > 
> > Hi Richard.
> > 
> > I noticed some tests I was running stopped booting with master.
> > (it's a fun and complex stack of QEMU + kvm on QEMU for vCPU Hotplug kernel work,
> > but this is the host booting)
> > 
> > EDK2 build from upstream as of somepoint last week.
> > 
> > Bisects to this patch.
> > 
> >   qemu-system-aarch64 -M virt,gic-version=3,virtualization=true -m 4g,maxmem=8G,slots=8 -cpu cortex-a76 -smp cpus=4,threads=2,clusters=2,sockets=1 \
> >   -kernel Image \
> >   -drive if=none,file=full.qcow2,format=qcow2,id=hd \
> >   -device ioh3420,id=root_port1 -device virtio-blk-pci,drive=hd \
> >   -netdev user,id=mynet,hostfwd=tcp::5555-:22 -device virtio-net-pci,netdev=mynet,id=bob \
> >   -nographic -no-reboot -append 'earlycon root=/dev/vda2 fsck.mode=skip tp_printk' \
> >   -monitor telnet:127.0.0.1:1235,server,nowait -bios QEMU_EFI.fd \
> >   -object memory-backend-ram,size=4G,id=mem0 \
> >   -numa node,nodeid=0,cpus=0-3,memdev=mem0
> > 
> > Symptoms: Nothing on console from edk2 which is built in debug mode so is normally very noisy.
> >            No sign of anything much happening at all :(  
> 
> This isn't a fantastic bug report.
> 
> (1) If it doesn't boot efi, then none of the -kernel parameters are necessary.
> (2) I'd be surprised if the full.qcow2 drive parameters are necessary either.
>      But if they are, what contents?  Is a new empty drive sufficient, just
>      enough to send the bios through the correct device initialization?
> (3) edk2 build from ...
>      Well, this is partly edk2's fault, as the build documentation is awful.
>      I spent an entire afternoon trying to figure it out and gave up.
> 
> I will say that the edk2 shipped with qemu does work, so... are you absolutely
> certain that it isn't a bug in edk2 since then?  Firmware bugs are exactly what
> that patch is supposed to expose, as requested by issue #1204.
> 
> I'd say you should boot with "-d int" and see what kind of interrupts you're getting very 
> early on.  I suspect that you'll see data aborts with ESR xx/yy where the last 6 bits of 
> yy are 0x21 (alignment fault).

Hi Richard,

Sorry for lack of details, I was aware it wasn't great and should have stated I planned
to come back with more details when I had time to debug.  Snowed under so for now I've
just dropped back to 8.2 and will get back to this perhaps next week.

Jonathan

> 
> 
> r~
Jonathan Cameron April 18, 2024, 5:40 p.m. UTC | #4
On Thu, 18 Apr 2024 09:15:55 +0100
Jonathan Cameron via <qemu-devel@nongnu.org> wrote:

> On Wed, 17 Apr 2024 13:07:35 -0700
> Richard Henderson <richard.henderson@linaro.org> wrote:
> 
> > On 4/16/24 08:11, Jonathan Cameron wrote:  
> > > On Fri,  1 Mar 2024 10:41:09 -1000
> > > Richard Henderson <richard.henderson@linaro.org> wrote:
> > >     
> > >> If translation is disabled, the default memory type is Device, which
> > >> requires alignment checking.  This is more optimally done early via
> > >> the MemOp given to the TCG memory operation.
> > >>
> > >> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> > >> Reported-by: Idan Horowitz <idan.horowitz@gmail.com>
> > >> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1204
> > >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>    
> > > 
> > > Hi Richard.
> > > 
> > > I noticed some tests I was running stopped booting with master.
> > > (it's a fun and complex stack of QEMU + kvm on QEMU for vCPU Hotplug kernel work,
> > > but this is the host booting)
> > > 
> > > EDK2 build from upstream as of somepoint last week.
> > > 
> > > Bisects to this patch.
> > > 
> > >   qemu-system-aarch64 -M virt,gic-version=3,virtualization=true -m 4g,maxmem=8G,slots=8 -cpu cortex-a76 -smp cpus=4,threads=2,clusters=2,sockets=1 \
> > >   -kernel Image \
> > >   -drive if=none,file=full.qcow2,format=qcow2,id=hd \
> > >   -device ioh3420,id=root_port1 -device virtio-blk-pci,drive=hd \
> > >   -netdev user,id=mynet,hostfwd=tcp::5555-:22 -device virtio-net-pci,netdev=mynet,id=bob \
> > >   -nographic -no-reboot -append 'earlycon root=/dev/vda2 fsck.mode=skip tp_printk' \
> > >   -monitor telnet:127.0.0.1:1235,server,nowait -bios QEMU_EFI.fd \
> > >   -object memory-backend-ram,size=4G,id=mem0 \
> > >   -numa node,nodeid=0,cpus=0-3,memdev=mem0
> > > 
> > > Symptoms: Nothing on console from edk2 which is built in debug mode so is normally very noisy.
> > >            No sign of anything much happening at all :(    
> > 
> > This isn't a fantastic bug report.
> > 
> > (1) If it doesn't boot efi, then none of the -kernel parameters are necessary.
> > (2) I'd be surprised if the full.qcow2 drive parameters are necessary either.
> >      But if they are, what contents?  Is a new empty drive sufficient, just
> >      enough to send the bios through the correct device initialization?
> > (3) edk2 build from ...
> >      Well, this is partly edk2's fault, as the build documentation is awful.
> >      I spent an entire afternoon trying to figure it out and gave up.
> > 
> > I will say that the edk2 shipped with qemu does work, so... are you absolutely
> > certain that it isn't a bug in edk2 since then?  Firmware bugs are exactly what
> > that patch is supposed to expose, as requested by issue #1204.
> > 
> > I'd say you should boot with "-d int" and see what kind of interrupts you're getting very 
> > early on.  I suspect that you'll see data aborts with ESR xx/yy where the last 6 bits of 
> > yy are 0x21 (alignment fault).  
> 
> Hi Richard,
> 
> Sorry for lack of details, I was aware it wasn't great and should have stated I planned
> to come back with more details when I had time to debug.  Snowed under so for now I've
> just dropped back to 8.2 and will get back to this perhaps next week.

+CC EDK2 list and Gerd.

Still not a thorough report but some breadcrumbs.

May be something about my local build setup as the shipped EDK2 succeeds,
but the one I'm building via
uefi-tools/edk2-build.sh armvirtqemu64
(some aged instructions here that are more or less working still)
https://people.kernel.org/jic23/

Indeed starts out with some alignment faults.

Gerd, any ideas?  Maybe I needs something subtly different in my
edk2 build?  I've not looked at this bit of the qemu infrastructure
before - is there a document on how that image is built?
As Richard observed, EDK2 isn't the simplest thing to build - I've
been using uefitools for this for a long time, so maybe I missed some
new requirement?

Build machine is x86_64 ubuntu, gcc 12.2.0.

I need to build it because of some necessary tweaks to debug a
PCI enumeration issue in Linux. (these tests were without those
tweaks)

As Richard observed, most of the command line isn't needed.

qemu-system-aarch64 -M virt,virtualization=true, -m 4g -cpu cortex-a76 \
-bios QEMU_EFI.fd -d int

Jonathan

 


> 
> Jonathan
> 
> > 
> > 
> > r~  
> 
>
Gerd Hoffmann April 19, 2024, 11:52 a.m. UTC | #5
Hi,

> Gerd, any ideas?  Maybe I needs something subtly different in my
> edk2 build?  I've not looked at this bit of the qemu infrastructure
> before - is there a document on how that image is built?

There is roms/Makefile for that.

make -C roms help
make -C roms efi

So easiest would be to just update the edk2 submodule to what you
need, then rebuild.

The build is handled by the roms/edk2-build.py script,
with the build configuration being in roms/edk2-build.config.
That is usable outside the qemu source tree too, i.e. like this:

  python3 /path/to/qemu.git/roms/edk2-build.py \
    --config /path/to/qemu.git/roms/edk2-build.config \
    --core /path/to/edk2.git \
    --match armvirt \
    --silent --no-logs

That'll try to place the images build in "../pc-bios", so maybe better
work with a copy of the config file where you adjust this.

HTH,
  Gerd
Jonathan Cameron April 19, 2024, 4:09 p.m. UTC | #6
On Fri, 19 Apr 2024 13:52:07 +0200
Gerd Hoffmann <kraxel@redhat.com> wrote:

>   Hi,
> 
> > Gerd, any ideas?  Maybe I needs something subtly different in my
> > edk2 build?  I've not looked at this bit of the qemu infrastructure
> > before - is there a document on how that image is built?  
> 
> There is roms/Makefile for that.
> 
> make -C roms help
> make -C roms efi
> 
> So easiest would be to just update the edk2 submodule to what you
> need, then rebuild.
> 
> The build is handled by the roms/edk2-build.py script,
> with the build configuration being in roms/edk2-build.config.
> That is usable outside the qemu source tree too, i.e. like this:
> 
>   python3 /path/to/qemu.git/roms/edk2-build.py \
>     --config /path/to/qemu.git/roms/edk2-build.config \
>     --core /path/to/edk2.git \
>     --match armvirt \
>     --silent --no-logs
> 
> That'll try to place the images build in "../pc-bios", so maybe better
> work with a copy of the config file where you adjust this.
> 
> HTH,
>   Gerd
> 

Thanks Gerd!

So the builds are very similar via the two method...
However - the QEMU build sets -D CAVIUM_ERRATUM_27456=TRUE

And that's the difference - with that set for my other builds the alignment
problems go away...

Any idea why we have that set in roms/edk2-build.config?
Superficially it seems rather unlikely anyone cares about thunderx1
(if they do we need to get them some new hardware with fresh bugs)
bugs now and this config file was only added last year.


However, the last comment in Ard's commit message below seems
highly likely to be relevant!

Chasing through Ard's patch it has the side effect of dropping
an override of a requirement for strict alignment. 
So with out the errata 
DEFINE GCC_AARCH64_CC_XIPFLAGS     = -mstrict-align -mgeneral-regs-only
is replaced with
 [BuildOptions]
+!if $(CAVIUM_ERRATUM_27456) == TRUE^M
+  GCC:*_*_AARCH64_PP_FLAGS = -DCAVIUM_ERRATUM_27456^M
+!else^M
   GCC:*_*_AARCH64_CC_XIPFLAGS ==
+!endif^M

The edk2 commit that added this was the following +CC Ard.

Given I wasn't sure of the syntax of that file I set it
manually to the original value and indeed it works.


commit ec54ce1f1ab41b92782b37ae59e752fff0ef9c41
Author: Ard Biesheuvel <ardb@kernel.org>
Date:   Wed Jan 4 16:51:35 2023 +0100

    ArmVirtPkg/ArmVirtQemu: Avoid early ID map on ThunderX

    The early ID map used by ArmVirtQemu uses ASID scoped non-global
    mappings, as this allows us to switch to the permanent ID map seamlessly
    without the need for explicit TLB maintenance.

    However, this triggers a known erratum on ThunderX, which does not
    tolerate non-global mappings that are executable at EL1, as this appears
    to result in I-cache corruption. (Linux disables the KPTI based Meltdown
    mitigation on ThunderX for the same reason)

    So work around this, by detecting the CPU implementor and part number,
    and proceeding without the early ID map if a ThunderX CPU is detected.

    Note that this requires the C code to be built with strict alignment
    again, as we may end up executing it with the MMU and caches off.

    Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
    Acked-by: Laszlo Ersek <lersek@redhat.com>
    Tested-by: dann frazier <dann.frazier@canonical.com>

Test case is
qemu-system-aarch64 -M virt,virtualization=true, -m 4g -cpu cortex-a76 \
-bios QEMU_EFI.fd -d int

Which gets alignment faults since:
https://lore.kernel.org/all/20240301204110.656742-6-richard.henderson@linaro.org/

So my feeling here is EDK2 should either have yet another config for QEMU as a host
or should always set the alignment without needing to pick the CAVIUM 27456 errata
which I suspect will get dropped soonish anyway if anyone ever cleans up
old errata.

Jonathan
Ard Biesheuvel April 19, 2024, 4:36 p.m. UTC | #7
On Fri, 19 Apr 2024 at 18:09, Jonathan Cameron via groups.io
<jonathan.cameron=huawei.com@groups.io> wrote:
>
> On Fri, 19 Apr 2024 13:52:07 +0200
> Gerd Hoffmann <kraxel@redhat.com> wrote:
>
> >   Hi,
> >
> > > Gerd, any ideas?  Maybe I needs something subtly different in my
> > > edk2 build?  I've not looked at this bit of the qemu infrastructure
> > > before - is there a document on how that image is built?
> >
> > There is roms/Makefile for that.
> >
> > make -C roms help
> > make -C roms efi
> >
> > So easiest would be to just update the edk2 submodule to what you
> > need, then rebuild.
> >
> > The build is handled by the roms/edk2-build.py script,
> > with the build configuration being in roms/edk2-build.config.
> > That is usable outside the qemu source tree too, i.e. like this:
> >
> >   python3 /path/to/qemu.git/roms/edk2-build.py \
> >     --config /path/to/qemu.git/roms/edk2-build.config \
> >     --core /path/to/edk2.git \
> >     --match armvirt \
> >     --silent --no-logs
> >
> > That'll try to place the images build in "../pc-bios", so maybe better
> > work with a copy of the config file where you adjust this.
> >
> > HTH,
> >   Gerd
> >
>
> Thanks Gerd!
>
> So the builds are very similar via the two method...
> However - the QEMU build sets -D CAVIUM_ERRATUM_27456=TRUE
>
> And that's the difference - with that set for my other builds the alignment
> problems go away...
>
> Any idea why we have that set in roms/edk2-build.config?
> Superficially it seems rather unlikely anyone cares about thunderx1
> (if they do we need to get them some new hardware with fresh bugs)
> bugs now and this config file was only added last year.
>
>
> However, the last comment in Ard's commit message below seems
> highly likely to be relevant!
>
> Chasing through Ard's patch it has the side effect of dropping
> an override of a requirement for strict alignment.
> So with out the errata
> DEFINE GCC_AARCH64_CC_XIPFLAGS     = -mstrict-align -mgeneral-regs-only
> is replaced with
>  [BuildOptions]
> +!if $(CAVIUM_ERRATUM_27456) == TRUE^M
> +  GCC:*_*_AARCH64_PP_FLAGS = -DCAVIUM_ERRATUM_27456^M
> +!else^M
>    GCC:*_*_AARCH64_CC_XIPFLAGS ==
> +!endif^M
>
> The edk2 commit that added this was the following +CC Ard.
>
> Given I wasn't sure of the syntax of that file I set it
> manually to the original value and indeed it works.
>
>
> commit ec54ce1f1ab41b92782b37ae59e752fff0ef9c41
> Author: Ard Biesheuvel <ardb@kernel.org>
> Date:   Wed Jan 4 16:51:35 2023 +0100
>
>     ArmVirtPkg/ArmVirtQemu: Avoid early ID map on ThunderX
>
>     The early ID map used by ArmVirtQemu uses ASID scoped non-global
>     mappings, as this allows us to switch to the permanent ID map seamlessly
>     without the need for explicit TLB maintenance.
>
>     However, this triggers a known erratum on ThunderX, which does not
>     tolerate non-global mappings that are executable at EL1, as this appears
>     to result in I-cache corruption. (Linux disables the KPTI based Meltdown
>     mitigation on ThunderX for the same reason)
>
>     So work around this, by detecting the CPU implementor and part number,
>     and proceeding without the early ID map if a ThunderX CPU is detected.
>
>     Note that this requires the C code to be built with strict alignment
>     again, as we may end up executing it with the MMU and caches off.
>
>     Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
>     Acked-by: Laszlo Ersek <lersek@redhat.com>
>     Tested-by: dann frazier <dann.frazier@canonical.com>
>
> Test case is
> qemu-system-aarch64 -M virt,virtualization=true, -m 4g -cpu cortex-a76 \
> -bios QEMU_EFI.fd -d int
>
> Which gets alignment faults since:
> https://lore.kernel.org/all/20240301204110.656742-6-richard.henderson@linaro.org/
>
> So my feeling here is EDK2 should either have yet another config for QEMU as a host
> or should always set the alignment without needing to pick the CAVIUM 27456 errata
> which I suspect will get dropped soonish anyway if anyone ever cleans up
> old errata.
>

This code was never really intended for execution at EL2, but it
happened to work, partially because TCG's lack of strict alignment
checking when the MMU is off.

Those assumptions no longer hold, so yes, let's get this fixed properly.

Given VHE and nested virt (which will likely imply VHE in practice), I
would like to extend this functionality (i.e., the use of preliminary
page tables in NOR flash) to EL2 as well, but with VHE enabled. This
means we can still elide TLB maintenance (and BBM checks) by using
different ASIDs, and otherwise, fall back to entering with the MMU off
if VHE is not available. In that case, we should enforce strict
alignment too, so that needs to be fixed regardless.

I'll try to code something up and send it round. In the mean time,
feel free to propose a minimal patch that reinstates the strict
alignment if you are pressed for time, and I'll merge it right away.
Ard Biesheuvel April 19, 2024, 5:38 p.m. UTC | #8
On Fri, 19 Apr 2024 at 18:36, Ard Biesheuvel <ardb@kernel.org> wrote:
>
> On Fri, 19 Apr 2024 at 18:09, Jonathan Cameron via groups.io
> <jonathan.cameron=huawei.com@groups.io> wrote:
> >
> > On Fri, 19 Apr 2024 13:52:07 +0200
> > Gerd Hoffmann <kraxel@redhat.com> wrote:
> >
> > >   Hi,
> > >
> > > > Gerd, any ideas?  Maybe I needs something subtly different in my
> > > > edk2 build?  I've not looked at this bit of the qemu infrastructure
> > > > before - is there a document on how that image is built?
> > >
> > > There is roms/Makefile for that.
> > >
> > > make -C roms help
> > > make -C roms efi
> > >
> > > So easiest would be to just update the edk2 submodule to what you
> > > need, then rebuild.
> > >
> > > The build is handled by the roms/edk2-build.py script,
> > > with the build configuration being in roms/edk2-build.config.
> > > That is usable outside the qemu source tree too, i.e. like this:
> > >
> > >   python3 /path/to/qemu.git/roms/edk2-build.py \
> > >     --config /path/to/qemu.git/roms/edk2-build.config \
> > >     --core /path/to/edk2.git \
> > >     --match armvirt \
> > >     --silent --no-logs
> > >
> > > That'll try to place the images build in "../pc-bios", so maybe better
> > > work with a copy of the config file where you adjust this.
> > >
> > > HTH,
> > >   Gerd
> > >
> >
> > Thanks Gerd!
> >
> > So the builds are very similar via the two method...
> > However - the QEMU build sets -D CAVIUM_ERRATUM_27456=TRUE
> >
> > And that's the difference - with that set for my other builds the alignment
> > problems go away...
> >
> > Any idea why we have that set in roms/edk2-build.config?
> > Superficially it seems rather unlikely anyone cares about thunderx1
> > (if they do we need to get them some new hardware with fresh bugs)
> > bugs now and this config file was only added last year.
> >
> >
> > However, the last comment in Ard's commit message below seems
> > highly likely to be relevant!
> >
> > Chasing through Ard's patch it has the side effect of dropping
> > an override of a requirement for strict alignment.
> > So with out the errata
> > DEFINE GCC_AARCH64_CC_XIPFLAGS     = -mstrict-align -mgeneral-regs-only
> > is replaced with
> >  [BuildOptions]
> > +!if $(CAVIUM_ERRATUM_27456) == TRUE^M
> > +  GCC:*_*_AARCH64_PP_FLAGS = -DCAVIUM_ERRATUM_27456^M
> > +!else^M
> >    GCC:*_*_AARCH64_CC_XIPFLAGS ==
> > +!endif^M
> >
> > The edk2 commit that added this was the following +CC Ard.
> >
> > Given I wasn't sure of the syntax of that file I set it
> > manually to the original value and indeed it works.
> >
> >
> > commit ec54ce1f1ab41b92782b37ae59e752fff0ef9c41
> > Author: Ard Biesheuvel <ardb@kernel.org>
> > Date:   Wed Jan 4 16:51:35 2023 +0100
> >
> >     ArmVirtPkg/ArmVirtQemu: Avoid early ID map on ThunderX
> >
> >     The early ID map used by ArmVirtQemu uses ASID scoped non-global
> >     mappings, as this allows us to switch to the permanent ID map seamlessly
> >     without the need for explicit TLB maintenance.
> >
> >     However, this triggers a known erratum on ThunderX, which does not
> >     tolerate non-global mappings that are executable at EL1, as this appears
> >     to result in I-cache corruption. (Linux disables the KPTI based Meltdown
> >     mitigation on ThunderX for the same reason)
> >
> >     So work around this, by detecting the CPU implementor and part number,
> >     and proceeding without the early ID map if a ThunderX CPU is detected.
> >
> >     Note that this requires the C code to be built with strict alignment
> >     again, as we may end up executing it with the MMU and caches off.
> >
> >     Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> >     Acked-by: Laszlo Ersek <lersek@redhat.com>
> >     Tested-by: dann frazier <dann.frazier@canonical.com>
> >
> > Test case is
> > qemu-system-aarch64 -M virt,virtualization=true, -m 4g -cpu cortex-a76 \
> > -bios QEMU_EFI.fd -d int
> >
> > Which gets alignment faults since:
> > https://lore.kernel.org/all/20240301204110.656742-6-richard.henderson@linaro.org/
> >
> > So my feeling here is EDK2 should either have yet another config for QEMU as a host
> > or should always set the alignment without needing to pick the CAVIUM 27456 errata
> > which I suspect will get dropped soonish anyway if anyone ever cleans up
> > old errata.
> >
>
> This code was never really intended for execution at EL2, but it
> happened to work, partially because TCG's lack of strict alignment
> checking when the MMU is off.
>
> Those assumptions no longer hold, so yes, let's get this fixed properly.
>
> Given VHE and nested virt (which will likely imply VHE in practice), I
> would like to extend this functionality (i.e., the use of preliminary
> page tables in NOR flash) to EL2 as well, but with VHE enabled. This
> means we can still elide TLB maintenance (and BBM checks) by using
> different ASIDs, and otherwise, fall back to entering with the MMU off
> if VHE is not available. In that case, we should enforce strict
> alignment too, so that needs to be fixed regardless.
>
> I'll try to code something up and send it round. In the mean time,
> feel free to propose a minimal patch that reinstates the strict
> alignment if you are pressed for time, and I'll merge it right away.

Actually, let's just so that first - I'll send out a patch.
Clément Chigot April 22, 2024, 3:26 p.m. UTC | #9
Hi Richard,

While testing the future V9, I've some regressions on a custom board
using cortex-R5 CPUs.
Unaligned data accesses are no longer allowed because of that patch.

I've dug into the various documentation and it seems that R-profile
CPUs don't have the same default memory type as A-profile. It depends
on a default memory map provided in the R-Profile RM in C1.3 [1], even
when PMU is disabled.

> Each PMSAv8-32 MPU has an associated default memory map which is used when the MPU is not enabled.
> ...
> Table C1-4 and Table C1-5 describe the default memory map defined for the EL1 MPU.

For our case, Table C1-5 can be simplified as:
 |  0x00000000 – 0x7FFFFFFF Normal
 |  0x80000000 – 0xBFFFFFFF Device-nGnRE
 |  0xC0000000 – 0xFFFFFFFF Device-nGnRnE

Therefore, we can't blindly enable strict alignment checking solely on
SCTLR bits. We should make it depend on the address targeted. But is
it possible to know that address in `aprofile_require_alignment` ?
with `mmu_idx` ?

By the way, are R-Profile CPUs the same as those having the `PMSA`
feature ? That could mean we can use the `ARM_FEATURE_PMSA` to deal
with that, instead of create a new `ARM_FEATURE_R`

Note that the RM I've linked is for ARMv8. But this other link [2]
seems to show a similar behavior for arm-v7.

cc Jonathan and Ard, though not sure this is the same bug you've
reported earlier.

Thanks,
Clément
[1] https://developer.arm.com/documentation/ddi0568/a-c/?lang=en
[2] https://developer.arm.com/documentation/ddi0406/cb/System-Level-Architecture/Protected-Memory-System-Architecture--PMSA-/About-the-PMSA/Enabling-and-disabling-the-MPU?lang=en#BEIJEFCJ

On Fri, Mar 1, 2024 at 9:43 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> If translation is disabled, the default memory type is Device, which
> requires alignment checking.  This is more optimally done early via
> the MemOp given to the TCG memory operation.
>
> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> Reported-by: Idan Horowitz <idan.horowitz@gmail.com>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1204
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/tcg/hflags.c | 34 ++++++++++++++++++++++++++++++++--
>  1 file changed, 32 insertions(+), 2 deletions(-)
>
> diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
> index 8e5d35d922..5da1b0fc1d 100644
> --- a/target/arm/tcg/hflags.c
> +++ b/target/arm/tcg/hflags.c
> @@ -26,6 +26,35 @@ static inline bool fgt_svc(CPUARMState *env, int el)
>          FIELD_EX64(env->cp15.fgt_exec[FGTREG_HFGITR], HFGITR_EL2, SVC_EL1);
>  }
>
> +/* Return true if memory alignment should be enforced. */
> +static bool aprofile_require_alignment(CPUARMState *env, int el, uint64_t sctlr)
> +{
> +#ifdef CONFIG_USER_ONLY
> +    return false;
> +#else
> +    /* Check the alignment enable bit. */
> +    if (sctlr & SCTLR_A) {
> +        return true;
> +    }
> +
> +    /*
> +     * If translation is disabled, then the default memory type is
> +     * Device(-nGnRnE) instead of Normal, which requires that alignment
> +     * be enforced.  Since this affects all ram, it is most efficient
> +     * to handle this during translation.
> +     */
> +    if (sctlr & SCTLR_M) {
> +        /* Translation enabled: memory type in PTE via MAIR_ELx. */
> +        return false;
> +    }
> +    if (el < 2 && (arm_hcr_el2_eff(env) & (HCR_DC | HCR_VM))) {
> +        /* Stage 2 translation enabled: memory type in PTE. */
> +        return false;
> +    }
> +    return true;
> +#endif
> +}
> +
>  static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el,
>                                             ARMMMUIdx mmu_idx,
>                                             CPUARMTBFlags flags)
> @@ -121,8 +150,9 @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
>  {
>      CPUARMTBFlags flags = {};
>      int el = arm_current_el(env);
> +    uint64_t sctlr = arm_sctlr(env, el);
>
> -    if (arm_sctlr(env, el) & SCTLR_A) {
> +    if (aprofile_require_alignment(env, el, sctlr)) {
>          DP_TBFLAG_ANY(flags, ALIGN_MEM, 1);
>      }
>
> @@ -223,7 +253,7 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
>
>      sctlr = regime_sctlr(env, stage1);
>
> -    if (sctlr & SCTLR_A) {
> +    if (aprofile_require_alignment(env, el, sctlr)) {
>          DP_TBFLAG_ANY(flags, ALIGN_MEM, 1);
>      }
>
> --
> 2.34.1
>
>
Richard Henderson April 22, 2024, 3:47 p.m. UTC | #10
On 4/22/24 08:26, Clément Chigot wrote:
> Hi Richard,
> 
> While testing the future V9, I've some regressions on a custom board
> using cortex-R5 CPUs.
> Unaligned data accesses are no longer allowed because of that patch.
> 
> I've dug into the various documentation and it seems that R-profile
> CPUs don't have the same default memory type as A-profile. It depends
> on a default memory map provided in the R-Profile RM in C1.3 [1], even
> when PMU is disabled.
> 
>> Each PMSAv8-32 MPU has an associated default memory map which is used when the MPU is not enabled.
>> ...
>> Table C1-4 and Table C1-5 describe the default memory map defined for the EL1 MPU.
> 
> For our case, Table C1-5 can be simplified as:
>   |  0x00000000 – 0x7FFFFFFF Normal
>   |  0x80000000 – 0xBFFFFFFF Device-nGnRE
>   |  0xC0000000 – 0xFFFFFFFF Device-nGnRnE
> 
> Therefore, we can't blindly enable strict alignment checking solely on
> SCTLR bits. We should make it depend on the address targeted. But is
> it possible to know that address in `aprofile_require_alignment` ?
> with `mmu_idx` ?

No, this would need to be handled in get_phys_addr_disabled.

> By the way, are R-Profile CPUs the same as those having the `PMSA`
> feature ? That could mean we can use the `ARM_FEATURE_PMSA` to deal
> with that, instead of create a new `ARM_FEATURE_R`

No, some armv5 have PMSA.

> 
> Note that the RM I've linked is for ARMv8. But this other link [2]
> seems to show a similar behavior for arm-v7.
> 
> cc Jonathan and Ard, though not sure this is the same bug you've
> reported earlier.
> 
> Thanks,
> Clément
> [1] https://developer.arm.com/documentation/ddi0568/a-c/?lang=en
> [2] https://developer.arm.com/documentation/ddi0406/cb/System-Level-Architecture/Protected-Memory-System-Architecture--PMSA-/About-the-PMSA/Enabling-and-disabling-the-MPU?lang=en#BEIJEFCJ

Ouch, thanks for the armv7 link.  At the moment it looks like my blanket mmu-disabled 
change should be restricted to armv8.


r~
Peter Maydell April 22, 2024, 3:59 p.m. UTC | #11
On Mon, 22 Apr 2024 at 16:48, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 4/22/24 08:26, Clément Chigot wrote:
> > Hi Richard,
> >
> > While testing the future V9, I've some regressions on a custom board
> > using cortex-R5 CPUs.
> > Unaligned data accesses are no longer allowed because of that patch.
> >
> > I've dug into the various documentation and it seems that R-profile
> > CPUs don't have the same default memory type as A-profile. It depends
> > on a default memory map provided in the R-Profile RM in C1.3 [1], even
> > when PMU is disabled.
> >
> >> Each PMSAv8-32 MPU has an associated default memory map which is used when the MPU is not enabled.
> >> ...
> >> Table C1-4 and Table C1-5 describe the default memory map defined for the EL1 MPU.
> >
> > For our case, Table C1-5 can be simplified as:
> >   |  0x00000000 – 0x7FFFFFFF Normal
> >   |  0x80000000 – 0xBFFFFFFF Device-nGnRE
> >   |  0xC0000000 – 0xFFFFFFFF Device-nGnRnE
> >
> > Therefore, we can't blindly enable strict alignment checking solely on
> > SCTLR bits. We should make it depend on the address targeted. But is
> > it possible to know that address in `aprofile_require_alignment` ?
> > with `mmu_idx` ?
>
> No, this would need to be handled in get_phys_addr_disabled.
>
> > By the way, are R-Profile CPUs the same as those having the `PMSA`
> > feature ? That could mean we can use the `ARM_FEATURE_PMSA` to deal
> > with that, instead of create a new `ARM_FEATURE_R`
>
> No, some armv5 have PMSA.
>
> >
> > Note that the RM I've linked is for ARMv8. But this other link [2]
> > seems to show a similar behavior for arm-v7.
> >
> > cc Jonathan and Ard, though not sure this is the same bug you've
> > reported earlier.
> >
> > Thanks,
> > Clément
> > [1] https://developer.arm.com/documentation/ddi0568/a-c/?lang=en
> > [2] https://developer.arm.com/documentation/ddi0406/cb/System-Level-Architecture/Protected-Memory-System-Architecture--PMSA-/About-the-PMSA/Enabling-and-disabling-the-MPU?lang=en#BEIJEFCJ
>
> Ouch, thanks for the armv7 link.  At the moment it looks like my blanket mmu-disabled
> change should be restricted to armv8.

Restricted to A-profile, probably. I haven't cross-checked, but IIRC
for v7A this is IMPDEF and we're OK to fault it.

thanks
-- PMM
diff mbox series

Patch

diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index 8e5d35d922..5da1b0fc1d 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -26,6 +26,35 @@  static inline bool fgt_svc(CPUARMState *env, int el)
         FIELD_EX64(env->cp15.fgt_exec[FGTREG_HFGITR], HFGITR_EL2, SVC_EL1);
 }
 
+/* Return true if memory alignment should be enforced. */
+static bool aprofile_require_alignment(CPUARMState *env, int el, uint64_t sctlr)
+{
+#ifdef CONFIG_USER_ONLY
+    return false;
+#else
+    /* Check the alignment enable bit. */
+    if (sctlr & SCTLR_A) {
+        return true;
+    }
+
+    /*
+     * If translation is disabled, then the default memory type is
+     * Device(-nGnRnE) instead of Normal, which requires that alignment
+     * be enforced.  Since this affects all ram, it is most efficient
+     * to handle this during translation.
+     */
+    if (sctlr & SCTLR_M) {
+        /* Translation enabled: memory type in PTE via MAIR_ELx. */
+        return false;
+    }
+    if (el < 2 && (arm_hcr_el2_eff(env) & (HCR_DC | HCR_VM))) {
+        /* Stage 2 translation enabled: memory type in PTE. */
+        return false;
+    }
+    return true;
+#endif
+}
+
 static CPUARMTBFlags rebuild_hflags_common(CPUARMState *env, int fp_el,
                                            ARMMMUIdx mmu_idx,
                                            CPUARMTBFlags flags)
@@ -121,8 +150,9 @@  static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el,
 {
     CPUARMTBFlags flags = {};
     int el = arm_current_el(env);
+    uint64_t sctlr = arm_sctlr(env, el);
 
-    if (arm_sctlr(env, el) & SCTLR_A) {
+    if (aprofile_require_alignment(env, el, sctlr)) {
         DP_TBFLAG_ANY(flags, ALIGN_MEM, 1);
     }
 
@@ -223,7 +253,7 @@  static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
 
     sctlr = regime_sctlr(env, stage1);
 
-    if (sctlr & SCTLR_A) {
+    if (aprofile_require_alignment(env, el, sctlr)) {
         DP_TBFLAG_ANY(flags, ALIGN_MEM, 1);
     }