diff mbox

[RFC,3/5] hw/arm: add scattered RAM memory region support

Message ID 1510622154-17224-4-git-send-email-zhuyijun@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zhu Yijun Nov. 14, 2017, 1:15 a.m. UTC
From: Zhu Yijun <zhuyijun@huawei.com>

Dig out reserved memory holes and collect scattered RAM memory
regions by adding mem_list member in arm_boot_info struct.

Signed-off-by: Zhu Yijun <zhuyijun@huawei.com>
---
 hw/arm/boot.c        |   8 ++++
 hw/arm/virt.c        | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/hw/arm/arm.h |   1 +
 3 files changed, 108 insertions(+), 2 deletions(-)

Comments

Andrew Jones Nov. 14, 2017, 2:50 p.m. UTC | #1
On Tue, Nov 14, 2017 at 09:15:52AM +0800, zhuyijun@huawei.com wrote:
> From: Zhu Yijun <zhuyijun@huawei.com>
> 
> Dig out reserved memory holes and collect scattered RAM memory
> regions by adding mem_list member in arm_boot_info struct.
> 
> Signed-off-by: Zhu Yijun <zhuyijun@huawei.com>
> ---
>  hw/arm/boot.c        |   8 ++++
>  hw/arm/virt.c        | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/hw/arm/arm.h |   1 +
>  3 files changed, 108 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/arm/boot.c b/hw/arm/boot.c
> index c2720c8..30438f4 100644
> --- a/hw/arm/boot.c
> +++ b/hw/arm/boot.c
> @@ -836,6 +836,14 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
>       */
>      assert(!(info->secure_board_setup && kvm_enabled()));
>  
> +    /* If machine is not virt, the mem_list will empty. */
> +    if (QLIST_EMPTY(&vms->bootinfo.mem_list)) {
> +        RAMRegion *new = g_new(RAMRegion, 1);
> +        new->base = info->loader_start;
> +        new->size = info->ram_size;
> +        QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next);
> +    }
> +
>      info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb");
>  
>      /* Load the kernel.  */
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index ddde5e1..ff334c1 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -56,6 +56,7 @@
>  #include "hw/smbios/smbios.h"
>  #include "qapi/visitor.h"
>  #include "standard-headers/linux/input.h"
> +#include "hw/vfio/vfio-common.h"
>  
>  #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
>      static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
> @@ -1225,6 +1226,98 @@ void virt_machine_done(Notifier *notifier, void *data)
>      virt_build_smbios(vms);
>  }
>  
> +static void handle_reserved_ram_region_overlap(void)
> +{
> +    hwaddr cur_end, next_end;
> +    RAMRegion *reg, *next_reg, *tmp_reg;
> +
> +    QLIST_FOREACH(reg, &reserved_ram_regions, next) {
> +        next_reg = QLIST_NEXT(reg, next);
> +
> +        while (next_reg && next_reg->base <= (reg->base + reg->size)) {
> +            next_end = next_reg->base + next_reg->size;
> +            cur_end = reg->base + reg->size;
> +            if (next_end > cur_end) {
> +                reg->size += (next_end - cur_end);
> +            }
> +
> +            tmp_reg = QLIST_NEXT(next_reg, next);
> +            QLIST_REMOVE(next_reg, next);
> +            g_free(next_reg);
> +            next_reg = tmp_reg;
> +        }
> +    }
> +}

Why isn't the above integrated into the reserved ram region insertion?

> +
> +static void update_memory_regions(VirtMachineState *vms, hwaddr ram_size)
> +{
> +
> +    RAMRegion *new, *reg, *last = NULL;
> +    hwaddr virt_start, virt_end;

Either need a blank line here, or to initialize virt_start/end on the
declaration lines.

> +    virt_start = vms->memmap[VIRT_MEM].base;
> +    virt_end = virt_start + ram_size - 1;
> +
> +    handle_reserved_ram_region_overlap();
> +
> +    QLIST_FOREACH(reg, &reserved_ram_regions, next) {
> +        if (reg->base >= virt_start && reg->base < virt_end) {

What about the case where reg->base + reg->size > virt_start?

> +            if (reg->base == virt_start) {
> +                virt_start += reg->size;

We can't move virt_start without breaking AAVMF.

> +                virt_end += reg->size;

We can't increase virt_end without checking it against RAMLIMIT.

> +                continue;
> +            } else {
> +                new = g_new(RAMRegion, 1);
> +                new->base = virt_start;
> +                new->size = reg->base - virt_start;
> +                virt_start = reg->base + reg->size;
> +            }
> +
> +            if (QLIST_EMPTY(&vms->bootinfo.mem_list)) {
> +                QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next);
> +            } else {
> +                QLIST_INSERT_AFTER(last, new, next);
> +            }
> +
> +            last = new;
> +            ram_size -= new->size;
> +            virt_end += reg->size;
> +        }
> +    }
> +
> +    if (ram_size > 0) {
> +        new = g_new(RAMRegion, 1);
> +        new->base = virt_start;
> +        new->size = ram_size;
> +
> +        if (QLIST_EMPTY(&vms->bootinfo.mem_list)) {
> +            QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next);
> +        } else {
> +            QLIST_INSERT_AFTER(last, new, next);
> +        }
> +    }

Where's the else? ram_size <= 0 is not likely something we should ignore.

> +}
> +
> +static void create_ram_alias(VirtMachineState *vms,
> +                             MemoryRegion *sysmem,
> +                             MemoryRegion *ram)
> +{
> +    RAMRegion *reg;
> +    MemoryRegion *ram_memory;
> +    char *nodename;
> +    hwaddr sz = 0;
> +
> +    QLIST_FOREACH(reg, &vms->bootinfo.mem_list, next) {
> +        nodename = g_strdup_printf("ram@%" PRIx64, reg->base);
> +        ram_memory = g_new(MemoryRegion, 1);
> +        memory_region_init_alias(ram_memory, NULL, nodename, ram, sz,
> +                                 reg->size);
> +        memory_region_add_subregion(sysmem, reg->base, ram_memory);
> +        sz += reg->size;
> +
> +        g_free(nodename);
> +    }
> +}

Instead of using memory region aliases, it would be best if each RAM
region was modeled with pc-dimms, as that would move us towards supporting
memory hotplug and allow the regions to be explicitly identified
(start/size) on the command line - supporting migration. Actually, how
does this series address migration? What if the host we migrate to doesn't
have the same reserved regions in sysfs?

> +
>  static void virt_ram_memory_region_init(Notifier *notifier, void *data)
>  {
>      MachineState *machine = MACHINE(qdev_get_machine());
> @@ -1232,10 +1325,15 @@ static void virt_ram_memory_region_init(Notifier *notifier, void *data)
>      MemoryRegion *ram = g_new(MemoryRegion, 1);
>      VirtMachineState *vms = container_of(notifier, VirtMachineState,
>                                           ram_memory_region_init);
> +    RAMRegion *first_mem_reg;
>  
>      memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram",
>                                           machine->ram_size);
> -    memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, ram);
> +    update_memory_regions(vms, machine->ram_size);
> +    create_ram_alias(vms, sysmem, ram);
> +
> +    first_mem_reg = QLIST_FIRST(&vms->bootinfo.mem_list);
> +    vms->bootinfo.loader_start = first_mem_reg->base;
>  }
>  
>  static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
> @@ -1458,7 +1556,6 @@ static void machvirt_init(MachineState *machine)
>      vms->bootinfo.initrd_filename = machine->initrd_filename;
>      vms->bootinfo.nb_cpus = smp_cpus;
>      vms->bootinfo.board_id = -1;
> -    vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base;
>      vms->bootinfo.get_dtb = machvirt_dtb;
>      vms->bootinfo.firmware_loaded = firmware_loaded;
>      arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo);
> diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h
> index ce769bd..d953026 100644
> --- a/include/hw/arm/arm.h
> +++ b/include/hw/arm/arm.h
> @@ -124,6 +124,7 @@ struct arm_boot_info {
>      bool secure_board_setup;
>  
>      arm_endianness endianness;
> +    QLIST_HEAD(, RAMRegion) mem_list;
>  };
>  
>  /**
> -- 
> 1.8.3.1
> 
> 
>

Thanks,
drew
Zhu Yijun Nov. 20, 2017, 9:54 a.m. UTC | #2
On 2017/11/14 22:50, Andrew Jones wrote:
> On Tue, Nov 14, 2017 at 09:15:52AM +0800, zhuyijun@huawei.com wrote:
>> From: Zhu Yijun <zhuyijun@huawei.com>
>>
>> Dig out reserved memory holes and collect scattered RAM memory
>> regions by adding mem_list member in arm_boot_info struct.
>>
>> Signed-off-by: Zhu Yijun <zhuyijun@huawei.com>
>> ---
>>  hw/arm/boot.c        |   8 ++++
>>  hw/arm/virt.c        | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>>  include/hw/arm/arm.h |   1 +
>>  3 files changed, 108 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/arm/boot.c b/hw/arm/boot.c
>> index c2720c8..30438f4 100644
>> --- a/hw/arm/boot.c
>> +++ b/hw/arm/boot.c
>> @@ -836,6 +836,14 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
>>       */
>>      assert(!(info->secure_board_setup && kvm_enabled()));
>>  
>> +    /* If machine is not virt, the mem_list will empty. */
>> +    if (QLIST_EMPTY(&vms->bootinfo.mem_list)) {
>> +        RAMRegion *new = g_new(RAMRegion, 1);
>> +        new->base = info->loader_start;
>> +        new->size = info->ram_size;
>> +        QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next);
>> +    }
>> +
>>      info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb");
>>  
>>      /* Load the kernel.  */
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index ddde5e1..ff334c1 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -56,6 +56,7 @@
>>  #include "hw/smbios/smbios.h"
>>  #include "qapi/visitor.h"
>>  #include "standard-headers/linux/input.h"
>> +#include "hw/vfio/vfio-common.h"
>>  
>>  #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
>>      static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
>> @@ -1225,6 +1226,98 @@ void virt_machine_done(Notifier *notifier, void *data)
>>      virt_build_smbios(vms);
>>  }
>>  
>> +static void handle_reserved_ram_region_overlap(void)
>> +{
>> +    hwaddr cur_end, next_end;
>> +    RAMRegion *reg, *next_reg, *tmp_reg;
>> +
>> +    QLIST_FOREACH(reg, &reserved_ram_regions, next) {
>> +        next_reg = QLIST_NEXT(reg, next);
>> +
>> +        while (next_reg && next_reg->base <= (reg->base + reg->size)) {
>> +            next_end = next_reg->base + next_reg->size;
>> +            cur_end = reg->base + reg->size;
>> +            if (next_end > cur_end) {
>> +                reg->size += (next_end - cur_end);
>> +            }
>> +
>> +            tmp_reg = QLIST_NEXT(next_reg, next);
>> +            QLIST_REMOVE(next_reg, next);
>> +            g_free(next_reg);
>> +            next_reg = tmp_reg;
>> +        }
>> +    }
>> +}
> Why isn't the above integrated into the reserved ram region insertion?

We can do it once all the reserved regions are captured and then update the list once for any overlaps.

>> +
>> +static void update_memory_regions(VirtMachineState *vms, hwaddr ram_size)
>> +{
>> +
>> +    RAMRegion *new, *reg, *last = NULL;
>> +    hwaddr virt_start, virt_end;
> Either need a blank line here, or to initialize virt_start/end on the
> declaration lines.

OK

>> +    virt_start = vms->memmap[VIRT_MEM].base;
>> +    virt_end = virt_start + ram_size - 1;
>> +
>> +    handle_reserved_ram_region_overlap();
>> +
>> +    QLIST_FOREACH(reg, &reserved_ram_regions, next) {
>> +        if (reg->base >= virt_start && reg->base < virt_end) {
> What about the case where reg->base + reg->size > virt_start?

I will add the case next version.

>> +            if (reg->base == virt_start) {
>> +                virt_start += reg->size;
> We can't move virt_start without breaking AAVMF.

But it may exist reserved memory region which begins at 0x40000000 in theory.
>
>> +                virt_end += reg->size;
> We can't increase virt_end without checking it against RAMLIMIT.
>
>> +                continue;
>> +            } else {
>> +                new = g_new(RAMRegion, 1);
>> +                new->base = virt_start;
>> +                new->size = reg->base - virt_start;
>> +                virt_start = reg->base + reg->size;
>> +            }
>> +
>> +            if (QLIST_EMPTY(&vms->bootinfo.mem_list)) {
>> +                QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next);
>> +            } else {
>> +                QLIST_INSERT_AFTER(last, new, next);
>> +            }
>> +
>> +            last = new;
>> +            ram_size -= new->size;
>> +            virt_end += reg->size;
>> +        }
>> +    }
>> +
>> +    if (ram_size > 0) {
>> +        new = g_new(RAMRegion, 1);
>> +        new->base = virt_start;
>> +        new->size = ram_size;
>> +
>> +        if (QLIST_EMPTY(&vms->bootinfo.mem_list)) {
>> +            QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next);
>> +        } else {
>> +            QLIST_INSERT_AFTER(last, new, next);
>> +        }
>> +    }
> Where's the else? ram_size <= 0 is not likely something we should ignore.

ok, will add it.

>> +}
>> +
>> +static void create_ram_alias(VirtMachineState *vms,
>> +                             MemoryRegion *sysmem,
>> +                             MemoryRegion *ram)
>> +{
>> +    RAMRegion *reg;
>> +    MemoryRegion *ram_memory;
>> +    char *nodename;
>> +    hwaddr sz = 0;
>> +
>> +    QLIST_FOREACH(reg, &vms->bootinfo.mem_list, next) {
>> +        nodename = g_strdup_printf("ram@%" PRIx64, reg->base);
>> +        ram_memory = g_new(MemoryRegion, 1);
>> +        memory_region_init_alias(ram_memory, NULL, nodename, ram, sz,
>> +                                 reg->size);
>> +        memory_region_add_subregion(sysmem, reg->base, ram_memory);
>> +        sz += reg->size;
>> +
>> +        g_free(nodename);
>> +    }
>> +}
> Instead of using memory region aliases, it would be best if each RAM
> region was modeled with pc-dimms, as that would move us towards supporting
> memory hotplug and allow the regions to be explicitly identified
> (start/size) on the command line - supporting migration. Actually, how
> does this series address migration? What if the host we migrate to doesn't
> have the same reserved regions in sysfs?

I  did not consider memory hotplug and migration before, thinks for pointing it out to me.

>> +
>>  static void virt_ram_memory_region_init(Notifier *notifier, void *data)
>>  {
>>      MachineState *machine = MACHINE(qdev_get_machine());
>> @@ -1232,10 +1325,15 @@ static void virt_ram_memory_region_init(Notifier *notifier, void *data)
>>      MemoryRegion *ram = g_new(MemoryRegion, 1);
>>      VirtMachineState *vms = container_of(notifier, VirtMachineState,
>>                                           ram_memory_region_init);
>> +    RAMRegion *first_mem_reg;
>>  
>>      memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram",
>>                                           machine->ram_size);
>> -    memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, ram);
>> +    update_memory_regions(vms, machine->ram_size);
>> +    create_ram_alias(vms, sysmem, ram);
>> +
>> +    first_mem_reg = QLIST_FIRST(&vms->bootinfo.mem_list);
>> +    vms->bootinfo.loader_start = first_mem_reg->base;
>>  }
>>  
>>  static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
>> @@ -1458,7 +1556,6 @@ static void machvirt_init(MachineState *machine)
>>      vms->bootinfo.initrd_filename = machine->initrd_filename;
>>      vms->bootinfo.nb_cpus = smp_cpus;
>>      vms->bootinfo.board_id = -1;
>> -    vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base;
>>      vms->bootinfo.get_dtb = machvirt_dtb;
>>      vms->bootinfo.firmware_loaded = firmware_loaded;
>>      arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo);
>> diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h
>> index ce769bd..d953026 100644
>> --- a/include/hw/arm/arm.h
>> +++ b/include/hw/arm/arm.h
>> @@ -124,6 +124,7 @@ struct arm_boot_info {
>>      bool secure_board_setup;
>>  
>>      arm_endianness endianness;
>> +    QLIST_HEAD(, RAMRegion) mem_list;
>>  };
>>  
>>  /**
>> -- 
>> 1.8.3.1
>>
>>
>>
> Thanks,
> drew
>
> .
>
Shameerali Kolothum Thodi April 19, 2018, 9:06 a.m. UTC | #3
Hi Andrew,

> -----Original Message-----
> From: Andrew Jones [mailto:drjones@redhat.com]
> Sent: Tuesday, November 14, 2017 2:50 PM
> To: Zhuyijun <zhuyijun@huawei.com>
> Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org;
> eric.auger@redhat.com; peter.maydell@linaro.org; Shameerali Kolothum
> Thodi <shameerali.kolothum.thodi@huawei.com>; Zhaoshenglong
> <zhaoshenglong@huawei.com>
> Subject: Re: [Qemu-devel] [RFC 3/5] hw/arm: add scattered RAM memory
> region support
> 
> On Tue, Nov 14, 2017 at 09:15:52AM +0800, zhuyijun@huawei.com wrote:
> > From: Zhu Yijun <zhuyijun@huawei.com>
> >
> > Dig out reserved memory holes and collect scattered RAM memory
> > regions by adding mem_list member in arm_boot_info struct.
> >
> > Signed-off-by: Zhu Yijun <zhuyijun@huawei.com>
> > ---
> >  hw/arm/boot.c        |   8 ++++
> >  hw/arm/virt.c        | 101
> ++++++++++++++++++++++++++++++++++++++++++++++++++-
> >  include/hw/arm/arm.h |   1 +
> >  3 files changed, 108 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/arm/boot.c b/hw/arm/boot.c
> > index c2720c8..30438f4 100644
> > --- a/hw/arm/boot.c
> > +++ b/hw/arm/boot.c
> > @@ -836,6 +836,14 @@ static void arm_load_kernel_notify(Notifier
> *notifier, void *data)
> >       */
> >      assert(!(info->secure_board_setup && kvm_enabled()));
> >
> > +    /* If machine is not virt, the mem_list will empty. */
> > +    if (QLIST_EMPTY(&vms->bootinfo.mem_list)) {
> > +        RAMRegion *new = g_new(RAMRegion, 1);
> > +        new->base = info->loader_start;
> > +        new->size = info->ram_size;
> > +        QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next);
> > +    }
> > +
> >      info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb");
> >
> >      /* Load the kernel.  */
> > diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> > index ddde5e1..ff334c1 100644
> > --- a/hw/arm/virt.c
> > +++ b/hw/arm/virt.c
> > @@ -56,6 +56,7 @@
> >  #include "hw/smbios/smbios.h"
> >  #include "qapi/visitor.h"
> >  #include "standard-headers/linux/input.h"
> > +#include "hw/vfio/vfio-common.h"
> >
> >  #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
> >      static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
> > @@ -1225,6 +1226,98 @@ void virt_machine_done(Notifier *notifier, void
> *data)
> >      virt_build_smbios(vms);
> >  }
> >
> > +static void handle_reserved_ram_region_overlap(void)
> > +{
> > +    hwaddr cur_end, next_end;
> > +    RAMRegion *reg, *next_reg, *tmp_reg;
> > +
> > +    QLIST_FOREACH(reg, &reserved_ram_regions, next) {
> > +        next_reg = QLIST_NEXT(reg, next);
> > +
> > +        while (next_reg && next_reg->base <= (reg->base + reg->size)) {
> > +            next_end = next_reg->base + next_reg->size;
> > +            cur_end = reg->base + reg->size;
> > +            if (next_end > cur_end) {
> > +                reg->size += (next_end - cur_end);
> > +            }
> > +
> > +            tmp_reg = QLIST_NEXT(next_reg, next);
> > +            QLIST_REMOVE(next_reg, next);
> > +            g_free(next_reg);
> > +            next_reg = tmp_reg;
> > +        }
> > +    }
> > +}
> 
> Why isn't the above integrated into the reserved ram region insertion?
> 
> > +
> > +static void update_memory_regions(VirtMachineState *vms, hwaddr
> ram_size)
> > +{
> > +
> > +    RAMRegion *new, *reg, *last = NULL;
> > +    hwaddr virt_start, virt_end;
> 
> Either need a blank line here, or to initialize virt_start/end on the
> declaration lines.
> 
> > +    virt_start = vms->memmap[VIRT_MEM].base;
> > +    virt_end = virt_start + ram_size - 1;
> > +
> > +    handle_reserved_ram_region_overlap();
> > +
> > +    QLIST_FOREACH(reg, &reserved_ram_regions, next) {
> > +        if (reg->base >= virt_start && reg->base < virt_end) {
> 
> What about the case where reg->base + reg->size > virt_start?
> 
> > +            if (reg->base == virt_start) {
> > +                virt_start += reg->size;
> 
> We can't move virt_start without breaking AAVMF.
> 
> > +                virt_end += reg->size;
> 
> We can't increase virt_end without checking it against RAMLIMIT.
> 
> > +                continue;
> > +            } else {
> > +                new = g_new(RAMRegion, 1);
> > +                new->base = virt_start;
> > +                new->size = reg->base - virt_start;
> > +                virt_start = reg->base + reg->size;
> > +            }
> > +
> > +            if (QLIST_EMPTY(&vms->bootinfo.mem_list)) {
> > +                QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next);
> > +            } else {
> > +                QLIST_INSERT_AFTER(last, new, next);
> > +            }
> > +
> > +            last = new;
> > +            ram_size -= new->size;
> > +            virt_end += reg->size;
> > +        }
> > +    }
> > +
> > +    if (ram_size > 0) {
> > +        new = g_new(RAMRegion, 1);
> > +        new->base = virt_start;
> > +        new->size = ram_size;
> > +
> > +        if (QLIST_EMPTY(&vms->bootinfo.mem_list)) {
> > +            QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next);
> > +        } else {
> > +            QLIST_INSERT_AFTER(last, new, next);
> > +        }
> > +    }
> 
> Where's the else? ram_size <= 0 is not likely something we should ignore.
> 
> > +}
> > +
> > +static void create_ram_alias(VirtMachineState *vms,
> > +                             MemoryRegion *sysmem,
> > +                             MemoryRegion *ram)
> > +{
> > +    RAMRegion *reg;
> > +    MemoryRegion *ram_memory;
> > +    char *nodename;
> > +    hwaddr sz = 0;
> > +
> > +    QLIST_FOREACH(reg, &vms->bootinfo.mem_list, next) {
> > +        nodename = g_strdup_printf("ram@%" PRIx64, reg->base);
> > +        ram_memory = g_new(MemoryRegion, 1);
> > +        memory_region_init_alias(ram_memory, NULL, nodename, ram, sz,
> > +                                 reg->size);
> > +        memory_region_add_subregion(sysmem, reg->base, ram_memory);
> > +        sz += reg->size;
> > +
> > +        g_free(nodename);
> > +    }
> > +}
> 
> Instead of using memory region aliases, it would be best if each RAM
> region was modeled with pc-dimms, as that would move us towards supporting
> memory hotplug and allow the regions to be explicitly identified
> (start/size) on the command line - supporting migration. Actually, how
> does this series address migration? What if the host we migrate to doesn't
> have the same reserved regions in sysfs?

Thanks for going through this series and comments.

I am looking into reviving this series based on the new proposed vfio iova 
interface[1]. The vfio interface will now provide  a list of valid iova ranges.
That means, instead of working on reserved regions to find out the valid
 memory regions, the code here will have the valid regions list directly.

The above comment of yours mentions about modelling the memory regions
with pc-dimms. If I understand that proposal correctly, in case the iova list
has  multiple entries in it(means there are holes in the memory) the extra
regions has to be added as a pc-dimm slot memory. Having gone through
the Qemu source, I am not sure what is the best way to accomplish that.
(of course I am not that familiar with the Qemu source). Is it ok to invoke
qdev_device_add() from here? Any pointers on this is very appreciated.

One more point is, considering the fact that ARM64 linux kernel still doesn't
support hotplug memory at the moment, not sure how much we gain
from the pc-dimm model.

Please let me know your thoughts on this.

Thanks,
Shameer

[1]. https://lkml.org/lkml/2018/4/18/293


> > +
> >  static void virt_ram_memory_region_init(Notifier *notifier, void *data)
> >  {
> >      MachineState *machine = MACHINE(qdev_get_machine());
> > @@ -1232,10 +1325,15 @@ static void
> virt_ram_memory_region_init(Notifier *notifier, void *data)
> >      MemoryRegion *ram = g_new(MemoryRegion, 1);
> >      VirtMachineState *vms = container_of(notifier, VirtMachineState,
> >                                           ram_memory_region_init);
> > +    RAMRegion *first_mem_reg;
> >
> >      memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram",
> >                                           machine->ram_size);
> > -    memory_region_add_subregion(sysmem, vms-
> >memmap[VIRT_MEM].base, ram);
> > +    update_memory_regions(vms, machine->ram_size);
> > +    create_ram_alias(vms, sysmem, ram);
> > +
> > +    first_mem_reg = QLIST_FIRST(&vms->bootinfo.mem_list);
> > +    vms->bootinfo.loader_start = first_mem_reg->base;
> >  }
> >
> >  static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
> > @@ -1458,7 +1556,6 @@ static void machvirt_init(MachineState *machine)
> >      vms->bootinfo.initrd_filename = machine->initrd_filename;
> >      vms->bootinfo.nb_cpus = smp_cpus;
> >      vms->bootinfo.board_id = -1;
> > -    vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base;
> >      vms->bootinfo.get_dtb = machvirt_dtb;
> >      vms->bootinfo.firmware_loaded = firmware_loaded;
> >      arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo);
> > diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h
> > index ce769bd..d953026 100644
> > --- a/include/hw/arm/arm.h
> > +++ b/include/hw/arm/arm.h
> > @@ -124,6 +124,7 @@ struct arm_boot_info {
> >      bool secure_board_setup;
> >
> >      arm_endianness endianness;
> > +    QLIST_HEAD(, RAMRegion) mem_list;
> >  };
> >
> >  /**
> > --
> > 1.8.3.1
> >
> >
> >
> 
> Thanks,
> drew
Andrew Jones April 24, 2018, 3:29 p.m. UTC | #4
On Thu, Apr 19, 2018 at 09:06:30AM +0000, Shameerali Kolothum Thodi wrote:
> > From: Andrew Jones [mailto:drjones@redhat.com]
> > Instead of using memory region aliases, it would be best if each RAM
> > region was modeled with pc-dimms, as that would move us towards supporting
> > memory hotplug and allow the regions to be explicitly identified
> > (start/size) on the command line - supporting migration. Actually, how
> > does this series address migration? What if the host we migrate to doesn't
> > have the same reserved regions in sysfs?
> 
> Thanks for going through this series and comments.
> 
> I am looking into reviving this series based on the new proposed vfio iova 
> interface[1]. The vfio interface will now provide  a list of valid iova ranges.
> That means, instead of working on reserved regions to find out the valid
>  memory regions, the code here will have the valid regions list directly.
> 
> The above comment of yours mentions about modelling the memory regions
> with pc-dimms. If I understand that proposal correctly, in case the iova list
> has  multiple entries in it(means there are holes in the memory) the extra
> regions has to be added as a pc-dimm slot memory.

Hi Shameer,

iiuc, the concern is that the valid region list may look something like
this made up example:

 0x000e0000000 0x000000011fffffff
 0x00180000000 0x00000002ffffffff
 0x02000000000 0x0000011fffffffff
 0x22000000000 0xffffffffffffffff

You then want to avoid overlapping the holes with memory, because the
holes represent regions that may have platform devices assigned, which
need to keep 1:1 gpa -> hpa mappings.

If that's the case, then I think the biggest problem with the made up
example is the initial hole [0x0 0xdfffffff], as that would overlap the
currently hard coded base of RAM (0x40000000). Indeed we were thinking
that the base of RAM would be modeled as a non-pluggable dimm of size
1G at the 1G boundary. As for the rest of memory (for memory > 1G), in
the made up example we could just allocate a single pc-dimm from a memory
region representing the [0x22000000000 0xffffffffffffffff] range without
any hassle.

> Having gone through
> the Qemu source, I am not sure what is the best way to accomplish that.
> (of course I am not that familiar with the Qemu source). Is it ok to invoke
> qdev_device_add() from here? Any pointers on this is very appreciated.

I'm not an expert on that (nor any of this stuff, really - hopefully what
I've written above isn't completely bonkers). I've CC'ed Igor for help.
I've also CC'ed Eric, because of the magic word 'vfio'.

> 
> One more point is, considering the fact that ARM64 linux kernel still doesn't
> support hotplug memory at the moment, not sure how much we gain
> from the pc-dimm model.

It will eventually. Moving towards the pc-dimm model now, in order to
handle the non-contiguous memory needs, will go a long ways to enabling
hotplug later.

Thanks,
drew

> 
> Please let me know your thoughts on this.
> 
> Thanks,
> Shameer
> 
> [1]. https://lkml.org/lkml/2018/4/18/293
Shameerali Kolothum Thodi April 25, 2018, 1:24 p.m. UTC | #5
Hi Drew,

> -----Original Message-----
> From: Andrew Jones [mailto:drjones@redhat.com]
> Sent: Tuesday, April 24, 2018 4:30 PM
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: peter.maydell@linaro.org; qemu-devel@nongnu.org; Linuxarm
> <linuxarm@huawei.com>; eric.auger@redhat.com; qemu-arm@nongnu.org;
> xuwei (O) <xuwei5@huawei.com>; Zhaoshenglong
> <zhaoshenglong@huawei.com>; Igor Mammedov <imammedo@redhat.com>
> Subject: Re: [Qemu-devel] [RFC 3/5] hw/arm: add scattered RAM memory
> region support
> 
> On Thu, Apr 19, 2018 at 09:06:30AM +0000, Shameerali Kolothum Thodi wrote:
> > > From: Andrew Jones [mailto:drjones@redhat.com] Instead of using
> > > memory region aliases, it would be best if each RAM region was
> > > modeled with pc-dimms, as that would move us towards supporting
> > > memory hotplug and allow the regions to be explicitly identified
> > > (start/size) on the command line - supporting migration. Actually,
> > > how does this series address migration? What if the host we migrate
> > > to doesn't have the same reserved regions in sysfs?
> >
> > Thanks for going through this series and comments.
> >
> > I am looking into reviving this series based on the new proposed vfio
> > iova interface[1]. The vfio interface will now provide  a list of valid iova
> ranges.
> > That means, instead of working on reserved regions to find out the
> > valid  memory regions, the code here will have the valid regions list directly.
> >
> > The above comment of yours mentions about modelling the memory regions
> > with pc-dimms. If I understand that proposal correctly, in case the
> > iova list has  multiple entries in it(means there are holes in the
> > memory) the extra regions has to be added as a pc-dimm slot memory.
> 
> Hi Shameer,
> 
> iiuc, the concern is that the valid region list may look something like this made
> up example:
> 
>  0x000e0000000 0x000000011fffffff
>  0x00180000000 0x00000002ffffffff
>  0x02000000000 0x0000011fffffffff
>  0x22000000000 0xffffffffffffffff
> 
> You then want to avoid overlapping the holes with memory, because the holes
> represent regions that may have platform devices assigned, which need to keep
> 1:1 gpa -> hpa mappings.

Yes, the holes are basically reserved regions not translated by iommu.

> If that's the case, then I think the biggest problem with the made up example is
> the initial hole [0x0 0xdfffffff], as that would overlap the currently hard coded
> base of RAM (0x40000000). Indeed we were thinking that the base of RAM
> would be modeled as a non-pluggable dimm of size 1G at the 1G boundary. As
> for the rest of memory (for memory > 1G), in the made up example we could
> just allocate a single pc-dimm from a memory region representing the
> [0x22000000000 0xffffffffffffffff] range without any hassle.

Right. I was more worried about a highly fragmented case where we have to
model multiple pc-dimms to accommodate all the required memory. But that
may be a highly unlikely scenario.
 
> > Having gone through
> > the Qemu source, I am not sure what is the best way to accomplish that.
> > (of course I am not that familiar with the Qemu source). Is it ok to
> > invoke
> > qdev_device_add() from here? Any pointers on this is very appreciated.
> 
> I'm not an expert on that (nor any of this stuff, really - hopefully what I've
> written above isn't completely bonkers). I've CC'ed Igor for help.
> I've also CC'ed Eric, because of the magic word 'vfio'.

Thanks. I am moving in this direction at the moment.  Please let me know
if there is a better way of doing this, than invoking user_creatable_add_opts()
for mem backend object creation and qdev_device_add() for pc-dimm dev.

> > One more point is, considering the fact that ARM64 linux kernel still
> > doesn't support hotplug memory at the moment, not sure how much we
> > gain from the pc-dimm model.
> 
> It will eventually. Moving towards the pc-dimm model now, in order to handle
> the non-contiguous memory needs, will go a long ways to enabling hotplug
> later.

Ok. I will try to come up with something and will post as an RFC. 

Appreciate your feedback,
Shameer

> 
> Thanks,
> drew
> 
> >
> > Please let me know your thoughts on this.
> >
> > Thanks,
> > Shameer
> >
> > [1]. https://lkml.org/lkml/2018/4/18/293
diff mbox

Patch

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index c2720c8..30438f4 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -836,6 +836,14 @@  static void arm_load_kernel_notify(Notifier *notifier, void *data)
      */
     assert(!(info->secure_board_setup && kvm_enabled()));
 
+    /* If machine is not virt, the mem_list will empty. */
+    if (QLIST_EMPTY(&vms->bootinfo.mem_list)) {
+        RAMRegion *new = g_new(RAMRegion, 1);
+        new->base = info->loader_start;
+        new->size = info->ram_size;
+        QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next);
+    }
+
     info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb");
 
     /* Load the kernel.  */
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ddde5e1..ff334c1 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -56,6 +56,7 @@ 
 #include "hw/smbios/smbios.h"
 #include "qapi/visitor.h"
 #include "standard-headers/linux/input.h"
+#include "hw/vfio/vfio-common.h"
 
 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
     static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
@@ -1225,6 +1226,98 @@  void virt_machine_done(Notifier *notifier, void *data)
     virt_build_smbios(vms);
 }
 
+static void handle_reserved_ram_region_overlap(void)
+{
+    hwaddr cur_end, next_end;
+    RAMRegion *reg, *next_reg, *tmp_reg;
+
+    QLIST_FOREACH(reg, &reserved_ram_regions, next) {
+        next_reg = QLIST_NEXT(reg, next);
+
+        while (next_reg && next_reg->base <= (reg->base + reg->size)) {
+            next_end = next_reg->base + next_reg->size;
+            cur_end = reg->base + reg->size;
+            if (next_end > cur_end) {
+                reg->size += (next_end - cur_end);
+            }
+
+            tmp_reg = QLIST_NEXT(next_reg, next);
+            QLIST_REMOVE(next_reg, next);
+            g_free(next_reg);
+            next_reg = tmp_reg;
+        }
+    }
+}
+
+static void update_memory_regions(VirtMachineState *vms, hwaddr ram_size)
+{
+
+    RAMRegion *new, *reg, *last = NULL;
+    hwaddr virt_start, virt_end;
+    virt_start = vms->memmap[VIRT_MEM].base;
+    virt_end = virt_start + ram_size - 1;
+
+    handle_reserved_ram_region_overlap();
+
+    QLIST_FOREACH(reg, &reserved_ram_regions, next) {
+        if (reg->base >= virt_start && reg->base < virt_end) {
+            if (reg->base == virt_start) {
+                virt_start += reg->size;
+                virt_end += reg->size;
+                continue;
+            } else {
+                new = g_new(RAMRegion, 1);
+                new->base = virt_start;
+                new->size = reg->base - virt_start;
+                virt_start = reg->base + reg->size;
+            }
+
+            if (QLIST_EMPTY(&vms->bootinfo.mem_list)) {
+                QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next);
+            } else {
+                QLIST_INSERT_AFTER(last, new, next);
+            }
+
+            last = new;
+            ram_size -= new->size;
+            virt_end += reg->size;
+        }
+    }
+
+    if (ram_size > 0) {
+        new = g_new(RAMRegion, 1);
+        new->base = virt_start;
+        new->size = ram_size;
+
+        if (QLIST_EMPTY(&vms->bootinfo.mem_list)) {
+            QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next);
+        } else {
+            QLIST_INSERT_AFTER(last, new, next);
+        }
+    }
+}
+
+static void create_ram_alias(VirtMachineState *vms,
+                             MemoryRegion *sysmem,
+                             MemoryRegion *ram)
+{
+    RAMRegion *reg;
+    MemoryRegion *ram_memory;
+    char *nodename;
+    hwaddr sz = 0;
+
+    QLIST_FOREACH(reg, &vms->bootinfo.mem_list, next) {
+        nodename = g_strdup_printf("ram@%" PRIx64, reg->base);
+        ram_memory = g_new(MemoryRegion, 1);
+        memory_region_init_alias(ram_memory, NULL, nodename, ram, sz,
+                                 reg->size);
+        memory_region_add_subregion(sysmem, reg->base, ram_memory);
+        sz += reg->size;
+
+        g_free(nodename);
+    }
+}
+
 static void virt_ram_memory_region_init(Notifier *notifier, void *data)
 {
     MachineState *machine = MACHINE(qdev_get_machine());
@@ -1232,10 +1325,15 @@  static void virt_ram_memory_region_init(Notifier *notifier, void *data)
     MemoryRegion *ram = g_new(MemoryRegion, 1);
     VirtMachineState *vms = container_of(notifier, VirtMachineState,
                                          ram_memory_region_init);
+    RAMRegion *first_mem_reg;
 
     memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram",
                                          machine->ram_size);
-    memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, ram);
+    update_memory_regions(vms, machine->ram_size);
+    create_ram_alias(vms, sysmem, ram);
+
+    first_mem_reg = QLIST_FIRST(&vms->bootinfo.mem_list);
+    vms->bootinfo.loader_start = first_mem_reg->base;
 }
 
 static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx)
@@ -1458,7 +1556,6 @@  static void machvirt_init(MachineState *machine)
     vms->bootinfo.initrd_filename = machine->initrd_filename;
     vms->bootinfo.nb_cpus = smp_cpus;
     vms->bootinfo.board_id = -1;
-    vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base;
     vms->bootinfo.get_dtb = machvirt_dtb;
     vms->bootinfo.firmware_loaded = firmware_loaded;
     arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo);
diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h
index ce769bd..d953026 100644
--- a/include/hw/arm/arm.h
+++ b/include/hw/arm/arm.h
@@ -124,6 +124,7 @@  struct arm_boot_info {
     bool secure_board_setup;
 
     arm_endianness endianness;
+    QLIST_HEAD(, RAMRegion) mem_list;
 };
 
 /**