diff mbox series

[v7,3/3] hw/riscv: clear kernel_entry higher bits in load_elf_ram_sym()

Message ID 20230113171805.470252-4-dbarboza@ventanamicro.com (mailing list archive)
State New, archived
Headers show
Series hw/riscv: clear kernel_entry high bits with 32bit CPUs | expand

Commit Message

Daniel Henrique Barboza Jan. 13, 2023, 5:18 p.m. UTC
Recent hw/risc/boot.c changes caused a regression in an use case with
the Xvisor hypervisor. Running a 32 bit QEMU guest with '-kernel'
stopped working. The reason seems to be that Xvisor is using 64 bit to
encode the 32 bit addresses from the guest, and load_elf_ram_sym() is
sign-extending the result with '1's [1].

This can very well be an issue with Xvisor, but since it's not hard to
amend it in our side we're going for it. Use a translate_fn() callback
to be called by load_elf_ram_sym() and clear the higher bits of the
result if we're running a 32 bit CPU.

[1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html

Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Suggested-by: Bin Meng <bmeng.cn@gmail.com>
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
---
 hw/riscv/boot.c            | 23 ++++++++++++++++++++++-
 hw/riscv/microchip_pfsoc.c |  4 ++--
 hw/riscv/opentitan.c       |  3 ++-
 hw/riscv/sifive_e.c        |  3 ++-
 hw/riscv/sifive_u.c        |  4 ++--
 hw/riscv/spike.c           |  2 +-
 hw/riscv/virt.c            |  4 ++--
 include/hw/riscv/boot.h    |  1 +
 8 files changed, 34 insertions(+), 10 deletions(-)

Comments

Bin Meng Jan. 14, 2023, 1:40 p.m. UTC | #1
On Sat, Jan 14, 2023 at 1:18 AM Daniel Henrique Barboza
<dbarboza@ventanamicro.com> wrote:
>
> Recent hw/risc/boot.c changes caused a regression in an use case with
> the Xvisor hypervisor. Running a 32 bit QEMU guest with '-kernel'
> stopped working. The reason seems to be that Xvisor is using 64 bit to
> encode the 32 bit addresses from the guest, and load_elf_ram_sym() is
> sign-extending the result with '1's [1].

I would say it's not a regression of QEMU but something weird happened
to Alistair's 32-bit Xvisor image.

I just built a 32-bit Xvisor image from the latest Xvisor head
following the instructions provided in its source tree. With the
mainline QEMU only BIN file boots, but ELF does not. My 32-bit Xvisor
image has an address of 0x10000000. Apparently this address is not
correct, and the issue I saw is different from Alistair's. Alistair,
could you investigate why your 32-bit Xvisor ELF image has an address
of 0xffffffff80000000 set to kernel_load_base?

>
> This can very well be an issue with Xvisor, but since it's not hard to
> amend it in our side we're going for it. Use a translate_fn() callback
> to be called by load_elf_ram_sym() and clear the higher bits of the
> result if we're running a 32 bit CPU.
>
> [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html
>
> Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> Suggested-by: Bin Meng <bmeng.cn@gmail.com>
> Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
> ---
>  hw/riscv/boot.c            | 23 ++++++++++++++++++++++-
>  hw/riscv/microchip_pfsoc.c |  4 ++--
>  hw/riscv/opentitan.c       |  3 ++-
>  hw/riscv/sifive_e.c        |  3 ++-
>  hw/riscv/sifive_u.c        |  4 ++--
>  hw/riscv/spike.c           |  2 +-
>  hw/riscv/virt.c            |  4 ++--
>  include/hw/riscv/boot.h    |  1 +
>  8 files changed, 34 insertions(+), 10 deletions(-)
>
> diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
> index e868fb6ade..7f8295bf5e 100644
> --- a/hw/riscv/boot.c
> +++ b/hw/riscv/boot.c
> @@ -213,7 +213,27 @@ static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry)
>      }
>  }
>
> +static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
> +{
> +    RISCVHartArrayState *harts = opaque;
> +
> +    /*
> +     * For 32 bit CPUs, kernel_load_base is sign-extended (i.e.
> +     * it can be padded with '1's) if the hypervisor, for some
> +     * reason, is using 64 bit addresses with 32 bit guests.
> +     *
> +     * Clear the higher bits to avoid the padding if we're
> +     * running a 32 bit CPU.
> +     */
> +    if (riscv_is_32bit(harts)) {
> +        return addr & 0x0fffffff;
> +    }
> +
> +    return addr;
> +}
> +
>  target_ulong riscv_load_kernel(MachineState *machine,
> +                               RISCVHartArrayState *harts,
>                                 target_ulong kernel_start_addr,
>                                 bool load_initrd,
>                                 symbol_fn_t sym_cb)
> @@ -231,7 +251,8 @@ target_ulong riscv_load_kernel(MachineState *machine,
>       * the (expected) load address load address. This allows kernels to have
>       * separate SBI and ELF entry points (used by FreeBSD, for example).
>       */
> -    if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL,
> +    if (load_elf_ram_sym(kernel_filename, NULL,
> +                         translate_kernel_address, NULL,
>                           NULL, &kernel_load_base, NULL, NULL, 0,
>                           EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
>          kernel_entry = kernel_load_base;
> diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
> index c45023a2b1..b7e171b605 100644
> --- a/hw/riscv/microchip_pfsoc.c
> +++ b/hw/riscv/microchip_pfsoc.c
> @@ -629,8 +629,8 @@ static void microchip_icicle_kit_machine_init(MachineState *machine)
>          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
>                                                           firmware_end_addr);
>
> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
> -                                         true, NULL);
> +        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
> +                                         kernel_start_addr, true, NULL);
>
>          /* Compute the fdt load address in dram */
>          fdt_load_addr = riscv_load_fdt(memmap[MICROCHIP_PFSOC_DRAM_LO].base,
> diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c
> index f6fd9725a5..1404a52da0 100644
> --- a/hw/riscv/opentitan.c
> +++ b/hw/riscv/opentitan.c
> @@ -101,7 +101,8 @@ static void opentitan_board_init(MachineState *machine)
>      }
>
>      if (machine->kernel_filename) {
> -        riscv_load_kernel(machine, memmap[IBEX_DEV_RAM].base, false, NULL);
> +        riscv_load_kernel(machine, &s->soc.cpus,
> +                          memmap[IBEX_DEV_RAM].base, false, NULL);
>      }
>  }
>
> diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
> index 6835d1c807..04939b60c3 100644
> --- a/hw/riscv/sifive_e.c
> +++ b/hw/riscv/sifive_e.c
> @@ -114,7 +114,8 @@ static void sifive_e_machine_init(MachineState *machine)
>                            memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory);
>
>      if (machine->kernel_filename) {
> -        riscv_load_kernel(machine, memmap[SIFIVE_E_DEV_DTIM].base,
> +        riscv_load_kernel(machine, &s->soc.cpus,
> +                          memmap[SIFIVE_E_DEV_DTIM].base,
>                            false, NULL);
>      }
>  }
> diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
> index 9a75d4aa62..214430d40c 100644
> --- a/hw/riscv/sifive_u.c
> +++ b/hw/riscv/sifive_u.c
> @@ -598,8 +598,8 @@ static void sifive_u_machine_init(MachineState *machine)
>          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
>                                                           firmware_end_addr);
>
> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
> -                                         true, NULL);
> +        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
> +                                         kernel_start_addr, true, NULL);
>      } else {
>         /*
>          * If dynamic firmware is used, it doesn't know where is the next mode
> diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
> index c517885e6e..b3aac2178b 100644
> --- a/hw/riscv/spike.c
> +++ b/hw/riscv/spike.c
> @@ -307,7 +307,7 @@ static void spike_board_init(MachineState *machine)
>          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
>                                                           firmware_end_addr);
>
> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0], kernel_start_addr,
>                                           true, htif_symbol_callback);
>      } else {
>         /*
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index a931ed05ab..60c8729b5f 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -1281,8 +1281,8 @@ static void virt_machine_done(Notifier *notifier, void *data)
>          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
>                                                           firmware_end_addr);
>
> -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
> -                                         true, NULL);
> +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
> +                                         kernel_start_addr, true, NULL);
>      } else {
>         /*
>          * If dynamic firmware is used, it doesn't know where is the next mode
> diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h
> index cbd131bad7..bc9faed397 100644
> --- a/include/hw/riscv/boot.h
> +++ b/include/hw/riscv/boot.h
> @@ -44,6 +44,7 @@ target_ulong riscv_load_firmware(const char *firmware_filename,
>                                   hwaddr firmware_load_addr,
>                                   symbol_fn_t sym_cb);
>  target_ulong riscv_load_kernel(MachineState *machine,
> +                               RISCVHartArrayState *harts,
>                                 target_ulong firmware_end_addr,
>                                 bool load_initrd,
>                                 symbol_fn_t sym_cb);

Regards,
Bin
Alistair Francis Jan. 16, 2023, 4:28 a.m. UTC | #2
On Sat, Jan 14, 2023 at 11:41 PM Bin Meng <bmeng.cn@gmail.com> wrote:
>
> On Sat, Jan 14, 2023 at 1:18 AM Daniel Henrique Barboza
> <dbarboza@ventanamicro.com> wrote:
> >
> > Recent hw/risc/boot.c changes caused a regression in an use case with
> > the Xvisor hypervisor. Running a 32 bit QEMU guest with '-kernel'
> > stopped working. The reason seems to be that Xvisor is using 64 bit to
> > encode the 32 bit addresses from the guest, and load_elf_ram_sym() is
> > sign-extending the result with '1's [1].
>
> I would say it's not a regression of QEMU but something weird happened
> to Alistair's 32-bit Xvisor image.

I don't think it's a Xvisor issue.

>
> I just built a 32-bit Xvisor image from the latest Xvisor head
> following the instructions provided in its source tree. With the
> mainline QEMU only BIN file boots, but ELF does not. My 32-bit Xvisor
> image has an address of 0x10000000. Apparently this address is not
> correct, and the issue I saw is different from Alistair's. Alistair,
> could you investigate why your 32-bit Xvisor ELF image has an address
> of 0xffffffff80000000 set to kernel_load_base?

Looking in load_elf() in include/hw/elf_ops.h at this line:

    if (lowaddr)
        *lowaddr = (uint64_t)(elf_sword)low;

I can see that `low` is 0x80000000 but lowaddr is set to
0xffffffff80000000. So the address is being sign extended with 1s.

This patch seems to be the correct fix.

Alistair

>
> >
> > This can very well be an issue with Xvisor, but since it's not hard to
> > amend it in our side we're going for it. Use a translate_fn() callback
> > to be called by load_elf_ram_sym() and clear the higher bits of the
> > result if we're running a 32 bit CPU.
> >
> > [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html
> >
> > Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> > Suggested-by: Bin Meng <bmeng.cn@gmail.com>
> > Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>

Thanks for the patch. This should be the first patch of the series
though, so that we never break guest loading.

> > ---
> >  hw/riscv/boot.c            | 23 ++++++++++++++++++++++-
> >  hw/riscv/microchip_pfsoc.c |  4 ++--
> >  hw/riscv/opentitan.c       |  3 ++-
> >  hw/riscv/sifive_e.c        |  3 ++-
> >  hw/riscv/sifive_u.c        |  4 ++--
> >  hw/riscv/spike.c           |  2 +-
> >  hw/riscv/virt.c            |  4 ++--
> >  include/hw/riscv/boot.h    |  1 +
> >  8 files changed, 34 insertions(+), 10 deletions(-)
> >
> > diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
> > index e868fb6ade..7f8295bf5e 100644
> > --- a/hw/riscv/boot.c
> > +++ b/hw/riscv/boot.c
> > @@ -213,7 +213,27 @@ static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry)
> >      }
> >  }
> >
> > +static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
> > +{
> > +    RISCVHartArrayState *harts = opaque;
> > +
> > +    /*
> > +     * For 32 bit CPUs, kernel_load_base is sign-extended (i.e.
> > +     * it can be padded with '1's) if the hypervisor, for some
> > +     * reason, is using 64 bit addresses with 32 bit guests.
> > +     *
> > +     * Clear the higher bits to avoid the padding if we're
> > +     * running a 32 bit CPU.
> > +     */
> > +    if (riscv_is_32bit(harts)) {
> > +        return addr & 0x0fffffff;
> > +    }
> > +
> > +    return addr;
> > +}
> > +
> >  target_ulong riscv_load_kernel(MachineState *machine,
> > +                               RISCVHartArrayState *harts,
> >                                 target_ulong kernel_start_addr,
> >                                 bool load_initrd,
> >                                 symbol_fn_t sym_cb)
> > @@ -231,7 +251,8 @@ target_ulong riscv_load_kernel(MachineState *machine,
> >       * the (expected) load address load address. This allows kernels to have
> >       * separate SBI and ELF entry points (used by FreeBSD, for example).
> >       */
> > -    if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL,
> > +    if (load_elf_ram_sym(kernel_filename, NULL,
> > +                         translate_kernel_address, NULL,
> >                           NULL, &kernel_load_base, NULL, NULL, 0,
> >                           EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
> >          kernel_entry = kernel_load_base;
> > diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
> > index c45023a2b1..b7e171b605 100644
> > --- a/hw/riscv/microchip_pfsoc.c
> > +++ b/hw/riscv/microchip_pfsoc.c
> > @@ -629,8 +629,8 @@ static void microchip_icicle_kit_machine_init(MachineState *machine)
> >          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
> >                                                           firmware_end_addr);
> >
> > -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
> > -                                         true, NULL);
> > +        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
> > +                                         kernel_start_addr, true, NULL);
> >
> >          /* Compute the fdt load address in dram */
> >          fdt_load_addr = riscv_load_fdt(memmap[MICROCHIP_PFSOC_DRAM_LO].base,
> > diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c
> > index f6fd9725a5..1404a52da0 100644
> > --- a/hw/riscv/opentitan.c
> > +++ b/hw/riscv/opentitan.c
> > @@ -101,7 +101,8 @@ static void opentitan_board_init(MachineState *machine)
> >      }
> >
> >      if (machine->kernel_filename) {
> > -        riscv_load_kernel(machine, memmap[IBEX_DEV_RAM].base, false, NULL);
> > +        riscv_load_kernel(machine, &s->soc.cpus,
> > +                          memmap[IBEX_DEV_RAM].base, false, NULL);
> >      }
> >  }
> >
> > diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
> > index 6835d1c807..04939b60c3 100644
> > --- a/hw/riscv/sifive_e.c
> > +++ b/hw/riscv/sifive_e.c
> > @@ -114,7 +114,8 @@ static void sifive_e_machine_init(MachineState *machine)
> >                            memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory);
> >
> >      if (machine->kernel_filename) {
> > -        riscv_load_kernel(machine, memmap[SIFIVE_E_DEV_DTIM].base,
> > +        riscv_load_kernel(machine, &s->soc.cpus,
> > +                          memmap[SIFIVE_E_DEV_DTIM].base,
> >                            false, NULL);
> >      }
> >  }
> > diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
> > index 9a75d4aa62..214430d40c 100644
> > --- a/hw/riscv/sifive_u.c
> > +++ b/hw/riscv/sifive_u.c
> > @@ -598,8 +598,8 @@ static void sifive_u_machine_init(MachineState *machine)
> >          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
> >                                                           firmware_end_addr);
> >
> > -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
> > -                                         true, NULL);
> > +        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
> > +                                         kernel_start_addr, true, NULL);
> >      } else {
> >         /*
> >          * If dynamic firmware is used, it doesn't know where is the next mode
> > diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
> > index c517885e6e..b3aac2178b 100644
> > --- a/hw/riscv/spike.c
> > +++ b/hw/riscv/spike.c
> > @@ -307,7 +307,7 @@ static void spike_board_init(MachineState *machine)
> >          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
> >                                                           firmware_end_addr);
> >
> > -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
> > +        kernel_entry = riscv_load_kernel(machine, &s->soc[0], kernel_start_addr,
> >                                           true, htif_symbol_callback);
> >      } else {
> >         /*
> > diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> > index a931ed05ab..60c8729b5f 100644
> > --- a/hw/riscv/virt.c
> > +++ b/hw/riscv/virt.c
> > @@ -1281,8 +1281,8 @@ static void virt_machine_done(Notifier *notifier, void *data)
> >          kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
> >                                                           firmware_end_addr);
> >
> > -        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
> > -                                         true, NULL);
> > +        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
> > +                                         kernel_start_addr, true, NULL);
> >      } else {
> >         /*
> >          * If dynamic firmware is used, it doesn't know where is the next mode
> > diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h
> > index cbd131bad7..bc9faed397 100644
> > --- a/include/hw/riscv/boot.h
> > +++ b/include/hw/riscv/boot.h
> > @@ -44,6 +44,7 @@ target_ulong riscv_load_firmware(const char *firmware_filename,
> >                                   hwaddr firmware_load_addr,
> >                                   symbol_fn_t sym_cb);
> >  target_ulong riscv_load_kernel(MachineState *machine,
> > +                               RISCVHartArrayState *harts,
> >                                 target_ulong firmware_end_addr,
> >                                 bool load_initrd,
> >                                 symbol_fn_t sym_cb);
>
> Regards,
> Bin
>
Philippe Mathieu-Daudé Jan. 16, 2023, 9:25 a.m. UTC | #3
On 13/1/23 18:18, Daniel Henrique Barboza wrote:
> Recent hw/risc/boot.c changes caused a regression in an use case with
> the Xvisor hypervisor. Running a 32 bit QEMU guest with '-kernel'
> stopped working. The reason seems to be that Xvisor is using 64 bit to
> encode the 32 bit addresses from the guest, and load_elf_ram_sym() is
> sign-extending the result with '1's [1].
> 
> This can very well be an issue with Xvisor, but since it's not hard to
> amend it in our side we're going for it. Use a translate_fn() callback
> to be called by load_elf_ram_sym() and clear the higher bits of the
> result if we're running a 32 bit CPU.
> 
> [1] https://lists.gnu.org/archive/html/qemu-devel/2023-01/msg02281.html
> 
> Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> Suggested-by: Bin Meng <bmeng.cn@gmail.com>
> Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
> ---
>   hw/riscv/boot.c            | 23 ++++++++++++++++++++++-
>   hw/riscv/microchip_pfsoc.c |  4 ++--
>   hw/riscv/opentitan.c       |  3 ++-
>   hw/riscv/sifive_e.c        |  3 ++-
>   hw/riscv/sifive_u.c        |  4 ++--
>   hw/riscv/spike.c           |  2 +-
>   hw/riscv/virt.c            |  4 ++--
>   include/hw/riscv/boot.h    |  1 +
>   8 files changed, 34 insertions(+), 10 deletions(-)

> +static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
> +{
> +    RISCVHartArrayState *harts = opaque;
> +
> +    /*
> +     * For 32 bit CPUs, kernel_load_base is sign-extended (i.e.
> +     * it can be padded with '1's) if the hypervisor, for some
> +     * reason, is using 64 bit addresses with 32 bit guests.
> +     *
> +     * Clear the higher bits to avoid the padding if we're
> +     * running a 32 bit CPU.
> +     */
> +    if (riscv_is_32bit(harts)) {
> +        return addr & 0x0fffffff;

Instead of this magic mask, can we add some architectural definition
in target/riscv/cpu_bits.h and use it as:

            return extract64(addr, 0, xxx_ADDR_BITS);

to make the code self-descriptive?

Otherwise LGTM, thanks!

> +    }
> +
> +    return addr;
> +}
Bin Meng Jan. 26, 2023, 12:07 p.m. UTC | #4
Hi Alistair,

On Mon, Jan 16, 2023 at 12:28 PM Alistair Francis <alistair23@gmail.com> wrote:
>
> On Sat, Jan 14, 2023 at 11:41 PM Bin Meng <bmeng.cn@gmail.com> wrote:
> >
> > On Sat, Jan 14, 2023 at 1:18 AM Daniel Henrique Barboza
> > <dbarboza@ventanamicro.com> wrote:
> > >
> > > Recent hw/risc/boot.c changes caused a regression in an use case with
> > > the Xvisor hypervisor. Running a 32 bit QEMU guest with '-kernel'
> > > stopped working. The reason seems to be that Xvisor is using 64 bit to
> > > encode the 32 bit addresses from the guest, and load_elf_ram_sym() is
> > > sign-extending the result with '1's [1].
> >
> > I would say it's not a regression of QEMU but something weird happened
> > to Alistair's 32-bit Xvisor image.
>
> I don't think it's a Xvisor issue.
>
> >
> > I just built a 32-bit Xvisor image from the latest Xvisor head
> > following the instructions provided in its source tree. With the
> > mainline QEMU only BIN file boots, but ELF does not. My 32-bit Xvisor
> > image has an address of 0x10000000. Apparently this address is not
> > correct, and the issue I saw is different from Alistair's. Alistair,
> > could you investigate why your 32-bit Xvisor ELF image has an address
> > of 0xffffffff80000000 set to kernel_load_base?
>
> Looking in load_elf() in include/hw/elf_ops.h at this line:
>
>     if (lowaddr)
>         *lowaddr = (uint64_t)(elf_sword)low;
>
> I can see that `low` is 0x80000000 but lowaddr is set to
> 0xffffffff80000000. So the address is being sign extended with 1s.
>

I don't understand the sign extension here. This seems intentional as
the codes does the signed extension then casted to unsigned 64-bit.

Do you know why?

> This patch seems to be the correct fix.
>

Regards,
Bin
Alistair Francis Jan. 29, 2023, 10:50 p.m. UTC | #5
On Thu, Jan 26, 2023 at 10:07 PM Bin Meng <bmeng.cn@gmail.com> wrote:
>
> Hi Alistair,
>
> On Mon, Jan 16, 2023 at 12:28 PM Alistair Francis <alistair23@gmail.com> wrote:
> >
> > On Sat, Jan 14, 2023 at 11:41 PM Bin Meng <bmeng.cn@gmail.com> wrote:
> > >
> > > On Sat, Jan 14, 2023 at 1:18 AM Daniel Henrique Barboza
> > > <dbarboza@ventanamicro.com> wrote:
> > > >
> > > > Recent hw/risc/boot.c changes caused a regression in an use case with
> > > > the Xvisor hypervisor. Running a 32 bit QEMU guest with '-kernel'
> > > > stopped working. The reason seems to be that Xvisor is using 64 bit to
> > > > encode the 32 bit addresses from the guest, and load_elf_ram_sym() is
> > > > sign-extending the result with '1's [1].
> > >
> > > I would say it's not a regression of QEMU but something weird happened
> > > to Alistair's 32-bit Xvisor image.
> >
> > I don't think it's a Xvisor issue.
> >
> > >
> > > I just built a 32-bit Xvisor image from the latest Xvisor head
> > > following the instructions provided in its source tree. With the
> > > mainline QEMU only BIN file boots, but ELF does not. My 32-bit Xvisor
> > > image has an address of 0x10000000. Apparently this address is not
> > > correct, and the issue I saw is different from Alistair's. Alistair,
> > > could you investigate why your 32-bit Xvisor ELF image has an address
> > > of 0xffffffff80000000 set to kernel_load_base?
> >
> > Looking in load_elf() in include/hw/elf_ops.h at this line:
> >
> >     if (lowaddr)
> >         *lowaddr = (uint64_t)(elf_sword)low;
> >
> > I can see that `low` is 0x80000000 but lowaddr is set to
> > 0xffffffff80000000. So the address is being sign extended with 1s.
> >
>
> I don't understand the sign extension here. This seems intentional as
> the codes does the signed extension then casted to unsigned 64-bit.
>
> Do you know why?

No idea!

Alistair

>
> > This patch seems to be the correct fix.
> >
>
> Regards,
> Bin
diff mbox series

Patch

diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index e868fb6ade..7f8295bf5e 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -213,7 +213,27 @@  static void riscv_load_initrd(MachineState *machine, uint64_t kernel_entry)
     }
 }
 
+static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
+{
+    RISCVHartArrayState *harts = opaque;
+
+    /*
+     * For 32 bit CPUs, kernel_load_base is sign-extended (i.e.
+     * it can be padded with '1's) if the hypervisor, for some
+     * reason, is using 64 bit addresses with 32 bit guests.
+     *
+     * Clear the higher bits to avoid the padding if we're
+     * running a 32 bit CPU.
+     */
+    if (riscv_is_32bit(harts)) {
+        return addr & 0x0fffffff;
+    }
+
+    return addr;
+}
+
 target_ulong riscv_load_kernel(MachineState *machine,
+                               RISCVHartArrayState *harts,
                                target_ulong kernel_start_addr,
                                bool load_initrd,
                                symbol_fn_t sym_cb)
@@ -231,7 +251,8 @@  target_ulong riscv_load_kernel(MachineState *machine,
      * the (expected) load address load address. This allows kernels to have
      * separate SBI and ELF entry points (used by FreeBSD, for example).
      */
-    if (load_elf_ram_sym(kernel_filename, NULL, NULL, NULL,
+    if (load_elf_ram_sym(kernel_filename, NULL,
+                         translate_kernel_address, NULL,
                          NULL, &kernel_load_base, NULL, NULL, 0,
                          EM_RISCV, 1, 0, NULL, true, sym_cb) > 0) {
         kernel_entry = kernel_load_base;
diff --git a/hw/riscv/microchip_pfsoc.c b/hw/riscv/microchip_pfsoc.c
index c45023a2b1..b7e171b605 100644
--- a/hw/riscv/microchip_pfsoc.c
+++ b/hw/riscv/microchip_pfsoc.c
@@ -629,8 +629,8 @@  static void microchip_icicle_kit_machine_init(MachineState *machine)
         kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
                                                          firmware_end_addr);
 
-        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
-                                         true, NULL);
+        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
+                                         kernel_start_addr, true, NULL);
 
         /* Compute the fdt load address in dram */
         fdt_load_addr = riscv_load_fdt(memmap[MICROCHIP_PFSOC_DRAM_LO].base,
diff --git a/hw/riscv/opentitan.c b/hw/riscv/opentitan.c
index f6fd9725a5..1404a52da0 100644
--- a/hw/riscv/opentitan.c
+++ b/hw/riscv/opentitan.c
@@ -101,7 +101,8 @@  static void opentitan_board_init(MachineState *machine)
     }
 
     if (machine->kernel_filename) {
-        riscv_load_kernel(machine, memmap[IBEX_DEV_RAM].base, false, NULL);
+        riscv_load_kernel(machine, &s->soc.cpus,
+                          memmap[IBEX_DEV_RAM].base, false, NULL);
     }
 }
 
diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
index 6835d1c807..04939b60c3 100644
--- a/hw/riscv/sifive_e.c
+++ b/hw/riscv/sifive_e.c
@@ -114,7 +114,8 @@  static void sifive_e_machine_init(MachineState *machine)
                           memmap[SIFIVE_E_DEV_MROM].base, &address_space_memory);
 
     if (machine->kernel_filename) {
-        riscv_load_kernel(machine, memmap[SIFIVE_E_DEV_DTIM].base,
+        riscv_load_kernel(machine, &s->soc.cpus,
+                          memmap[SIFIVE_E_DEV_DTIM].base,
                           false, NULL);
     }
 }
diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c
index 9a75d4aa62..214430d40c 100644
--- a/hw/riscv/sifive_u.c
+++ b/hw/riscv/sifive_u.c
@@ -598,8 +598,8 @@  static void sifive_u_machine_init(MachineState *machine)
         kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc.u_cpus,
                                                          firmware_end_addr);
 
-        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
-                                         true, NULL);
+        kernel_entry = riscv_load_kernel(machine, &s->soc.u_cpus,
+                                         kernel_start_addr, true, NULL);
     } else {
        /*
         * If dynamic firmware is used, it doesn't know where is the next mode
diff --git a/hw/riscv/spike.c b/hw/riscv/spike.c
index c517885e6e..b3aac2178b 100644
--- a/hw/riscv/spike.c
+++ b/hw/riscv/spike.c
@@ -307,7 +307,7 @@  static void spike_board_init(MachineState *machine)
         kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
                                                          firmware_end_addr);
 
-        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
+        kernel_entry = riscv_load_kernel(machine, &s->soc[0], kernel_start_addr,
                                          true, htif_symbol_callback);
     } else {
        /*
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index a931ed05ab..60c8729b5f 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1281,8 +1281,8 @@  static void virt_machine_done(Notifier *notifier, void *data)
         kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
                                                          firmware_end_addr);
 
-        kernel_entry = riscv_load_kernel(machine, kernel_start_addr,
-                                         true, NULL);
+        kernel_entry = riscv_load_kernel(machine, &s->soc[0],
+                                         kernel_start_addr, true, NULL);
     } else {
        /*
         * If dynamic firmware is used, it doesn't know where is the next mode
diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h
index cbd131bad7..bc9faed397 100644
--- a/include/hw/riscv/boot.h
+++ b/include/hw/riscv/boot.h
@@ -44,6 +44,7 @@  target_ulong riscv_load_firmware(const char *firmware_filename,
                                  hwaddr firmware_load_addr,
                                  symbol_fn_t sym_cb);
 target_ulong riscv_load_kernel(MachineState *machine,
+                               RISCVHartArrayState *harts,
                                target_ulong firmware_end_addr,
                                bool load_initrd,
                                symbol_fn_t sym_cb);