Message ID | 1510622154-17224-4-git-send-email-zhuyijun@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Nov 14, 2017 at 09:15:52AM +0800, zhuyijun@huawei.com wrote: > From: Zhu Yijun <zhuyijun@huawei.com> > > Dig out reserved memory holes and collect scattered RAM memory > regions by adding mem_list member in arm_boot_info struct. > > Signed-off-by: Zhu Yijun <zhuyijun@huawei.com> > --- > hw/arm/boot.c | 8 ++++ > hw/arm/virt.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++- > include/hw/arm/arm.h | 1 + > 3 files changed, 108 insertions(+), 2 deletions(-) > > diff --git a/hw/arm/boot.c b/hw/arm/boot.c > index c2720c8..30438f4 100644 > --- a/hw/arm/boot.c > +++ b/hw/arm/boot.c > @@ -836,6 +836,14 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) > */ > assert(!(info->secure_board_setup && kvm_enabled())); > > + /* If machine is not virt, the mem_list will empty. */ > + if (QLIST_EMPTY(&vms->bootinfo.mem_list)) { > + RAMRegion *new = g_new(RAMRegion, 1); > + new->base = info->loader_start; > + new->size = info->ram_size; > + QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next); > + } > + > info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb"); > > /* Load the kernel. */ > diff --git a/hw/arm/virt.c b/hw/arm/virt.c > index ddde5e1..ff334c1 100644 > --- a/hw/arm/virt.c > +++ b/hw/arm/virt.c > @@ -56,6 +56,7 @@ > #include "hw/smbios/smbios.h" > #include "qapi/visitor.h" > #include "standard-headers/linux/input.h" > +#include "hw/vfio/vfio-common.h" > > #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ > static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ > @@ -1225,6 +1226,98 @@ void virt_machine_done(Notifier *notifier, void *data) > virt_build_smbios(vms); > } > > +static void handle_reserved_ram_region_overlap(void) > +{ > + hwaddr cur_end, next_end; > + RAMRegion *reg, *next_reg, *tmp_reg; > + > + QLIST_FOREACH(reg, &reserved_ram_regions, next) { > + next_reg = QLIST_NEXT(reg, next); > + > + while (next_reg && next_reg->base <= (reg->base + reg->size)) { > + next_end = next_reg->base + next_reg->size; > + cur_end = reg->base + reg->size; > + if (next_end > cur_end) { > + reg->size += (next_end - cur_end); > + } > + > + tmp_reg = QLIST_NEXT(next_reg, next); > + QLIST_REMOVE(next_reg, next); > + g_free(next_reg); > + next_reg = tmp_reg; > + } > + } > +} Why isn't the above integrated into the reserved ram region insertion? > + > +static void update_memory_regions(VirtMachineState *vms, hwaddr ram_size) > +{ > + > + RAMRegion *new, *reg, *last = NULL; > + hwaddr virt_start, virt_end; Either need a blank line here, or to initialize virt_start/end on the declaration lines. > + virt_start = vms->memmap[VIRT_MEM].base; > + virt_end = virt_start + ram_size - 1; > + > + handle_reserved_ram_region_overlap(); > + > + QLIST_FOREACH(reg, &reserved_ram_regions, next) { > + if (reg->base >= virt_start && reg->base < virt_end) { What about the case where reg->base + reg->size > virt_start? > + if (reg->base == virt_start) { > + virt_start += reg->size; We can't move virt_start without breaking AAVMF. > + virt_end += reg->size; We can't increase virt_end without checking it against RAMLIMIT. > + continue; > + } else { > + new = g_new(RAMRegion, 1); > + new->base = virt_start; > + new->size = reg->base - virt_start; > + virt_start = reg->base + reg->size; > + } > + > + if (QLIST_EMPTY(&vms->bootinfo.mem_list)) { > + QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next); > + } else { > + QLIST_INSERT_AFTER(last, new, next); > + } > + > + last = new; > + ram_size -= new->size; > + virt_end += reg->size; > + } > + } > + > + if (ram_size > 0) { > + new = g_new(RAMRegion, 1); > + new->base = virt_start; > + new->size = ram_size; > + > + if (QLIST_EMPTY(&vms->bootinfo.mem_list)) { > + QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next); > + } else { > + QLIST_INSERT_AFTER(last, new, next); > + } > + } Where's the else? ram_size <= 0 is not likely something we should ignore. > +} > + > +static void create_ram_alias(VirtMachineState *vms, > + MemoryRegion *sysmem, > + MemoryRegion *ram) > +{ > + RAMRegion *reg; > + MemoryRegion *ram_memory; > + char *nodename; > + hwaddr sz = 0; > + > + QLIST_FOREACH(reg, &vms->bootinfo.mem_list, next) { > + nodename = g_strdup_printf("ram@%" PRIx64, reg->base); > + ram_memory = g_new(MemoryRegion, 1); > + memory_region_init_alias(ram_memory, NULL, nodename, ram, sz, > + reg->size); > + memory_region_add_subregion(sysmem, reg->base, ram_memory); > + sz += reg->size; > + > + g_free(nodename); > + } > +} Instead of using memory region aliases, it would be best if each RAM region was modeled with pc-dimms, as that would move us towards supporting memory hotplug and allow the regions to be explicitly identified (start/size) on the command line - supporting migration. Actually, how does this series address migration? What if the host we migrate to doesn't have the same reserved regions in sysfs? > + > static void virt_ram_memory_region_init(Notifier *notifier, void *data) > { > MachineState *machine = MACHINE(qdev_get_machine()); > @@ -1232,10 +1325,15 @@ static void virt_ram_memory_region_init(Notifier *notifier, void *data) > MemoryRegion *ram = g_new(MemoryRegion, 1); > VirtMachineState *vms = container_of(notifier, VirtMachineState, > ram_memory_region_init); > + RAMRegion *first_mem_reg; > > memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram", > machine->ram_size); > - memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, ram); > + update_memory_regions(vms, machine->ram_size); > + create_ram_alias(vms, sysmem, ram); > + > + first_mem_reg = QLIST_FIRST(&vms->bootinfo.mem_list); > + vms->bootinfo.loader_start = first_mem_reg->base; > } > > static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) > @@ -1458,7 +1556,6 @@ static void machvirt_init(MachineState *machine) > vms->bootinfo.initrd_filename = machine->initrd_filename; > vms->bootinfo.nb_cpus = smp_cpus; > vms->bootinfo.board_id = -1; > - vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base; > vms->bootinfo.get_dtb = machvirt_dtb; > vms->bootinfo.firmware_loaded = firmware_loaded; > arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo); > diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h > index ce769bd..d953026 100644 > --- a/include/hw/arm/arm.h > +++ b/include/hw/arm/arm.h > @@ -124,6 +124,7 @@ struct arm_boot_info { > bool secure_board_setup; > > arm_endianness endianness; > + QLIST_HEAD(, RAMRegion) mem_list; > }; > > /** > -- > 1.8.3.1 > > > Thanks, drew
On 2017/11/14 22:50, Andrew Jones wrote: > On Tue, Nov 14, 2017 at 09:15:52AM +0800, zhuyijun@huawei.com wrote: >> From: Zhu Yijun <zhuyijun@huawei.com> >> >> Dig out reserved memory holes and collect scattered RAM memory >> regions by adding mem_list member in arm_boot_info struct. >> >> Signed-off-by: Zhu Yijun <zhuyijun@huawei.com> >> --- >> hw/arm/boot.c | 8 ++++ >> hw/arm/virt.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++- >> include/hw/arm/arm.h | 1 + >> 3 files changed, 108 insertions(+), 2 deletions(-) >> >> diff --git a/hw/arm/boot.c b/hw/arm/boot.c >> index c2720c8..30438f4 100644 >> --- a/hw/arm/boot.c >> +++ b/hw/arm/boot.c >> @@ -836,6 +836,14 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) >> */ >> assert(!(info->secure_board_setup && kvm_enabled())); >> >> + /* If machine is not virt, the mem_list will empty. */ >> + if (QLIST_EMPTY(&vms->bootinfo.mem_list)) { >> + RAMRegion *new = g_new(RAMRegion, 1); >> + new->base = info->loader_start; >> + new->size = info->ram_size; >> + QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next); >> + } >> + >> info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb"); >> >> /* Load the kernel. */ >> diff --git a/hw/arm/virt.c b/hw/arm/virt.c >> index ddde5e1..ff334c1 100644 >> --- a/hw/arm/virt.c >> +++ b/hw/arm/virt.c >> @@ -56,6 +56,7 @@ >> #include "hw/smbios/smbios.h" >> #include "qapi/visitor.h" >> #include "standard-headers/linux/input.h" >> +#include "hw/vfio/vfio-common.h" >> >> #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ >> static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ >> @@ -1225,6 +1226,98 @@ void virt_machine_done(Notifier *notifier, void *data) >> virt_build_smbios(vms); >> } >> >> +static void handle_reserved_ram_region_overlap(void) >> +{ >> + hwaddr cur_end, next_end; >> + RAMRegion *reg, *next_reg, *tmp_reg; >> + >> + QLIST_FOREACH(reg, &reserved_ram_regions, next) { >> + next_reg = QLIST_NEXT(reg, next); >> + >> + while (next_reg && next_reg->base <= (reg->base + reg->size)) { >> + next_end = next_reg->base + next_reg->size; >> + cur_end = reg->base + reg->size; >> + if (next_end > cur_end) { >> + reg->size += (next_end - cur_end); >> + } >> + >> + tmp_reg = QLIST_NEXT(next_reg, next); >> + QLIST_REMOVE(next_reg, next); >> + g_free(next_reg); >> + next_reg = tmp_reg; >> + } >> + } >> +} > Why isn't the above integrated into the reserved ram region insertion? We can do it once all the reserved regions are captured and then update the list once for any overlaps. >> + >> +static void update_memory_regions(VirtMachineState *vms, hwaddr ram_size) >> +{ >> + >> + RAMRegion *new, *reg, *last = NULL; >> + hwaddr virt_start, virt_end; > Either need a blank line here, or to initialize virt_start/end on the > declaration lines. OK >> + virt_start = vms->memmap[VIRT_MEM].base; >> + virt_end = virt_start + ram_size - 1; >> + >> + handle_reserved_ram_region_overlap(); >> + >> + QLIST_FOREACH(reg, &reserved_ram_regions, next) { >> + if (reg->base >= virt_start && reg->base < virt_end) { > What about the case where reg->base + reg->size > virt_start? I will add the case next version. >> + if (reg->base == virt_start) { >> + virt_start += reg->size; > We can't move virt_start without breaking AAVMF. But it may exist reserved memory region which begins at 0x40000000 in theory. > >> + virt_end += reg->size; > We can't increase virt_end without checking it against RAMLIMIT. > >> + continue; >> + } else { >> + new = g_new(RAMRegion, 1); >> + new->base = virt_start; >> + new->size = reg->base - virt_start; >> + virt_start = reg->base + reg->size; >> + } >> + >> + if (QLIST_EMPTY(&vms->bootinfo.mem_list)) { >> + QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next); >> + } else { >> + QLIST_INSERT_AFTER(last, new, next); >> + } >> + >> + last = new; >> + ram_size -= new->size; >> + virt_end += reg->size; >> + } >> + } >> + >> + if (ram_size > 0) { >> + new = g_new(RAMRegion, 1); >> + new->base = virt_start; >> + new->size = ram_size; >> + >> + if (QLIST_EMPTY(&vms->bootinfo.mem_list)) { >> + QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next); >> + } else { >> + QLIST_INSERT_AFTER(last, new, next); >> + } >> + } > Where's the else? ram_size <= 0 is not likely something we should ignore. ok, will add it. >> +} >> + >> +static void create_ram_alias(VirtMachineState *vms, >> + MemoryRegion *sysmem, >> + MemoryRegion *ram) >> +{ >> + RAMRegion *reg; >> + MemoryRegion *ram_memory; >> + char *nodename; >> + hwaddr sz = 0; >> + >> + QLIST_FOREACH(reg, &vms->bootinfo.mem_list, next) { >> + nodename = g_strdup_printf("ram@%" PRIx64, reg->base); >> + ram_memory = g_new(MemoryRegion, 1); >> + memory_region_init_alias(ram_memory, NULL, nodename, ram, sz, >> + reg->size); >> + memory_region_add_subregion(sysmem, reg->base, ram_memory); >> + sz += reg->size; >> + >> + g_free(nodename); >> + } >> +} > Instead of using memory region aliases, it would be best if each RAM > region was modeled with pc-dimms, as that would move us towards supporting > memory hotplug and allow the regions to be explicitly identified > (start/size) on the command line - supporting migration. Actually, how > does this series address migration? What if the host we migrate to doesn't > have the same reserved regions in sysfs? I did not consider memory hotplug and migration before, thinks for pointing it out to me. >> + >> static void virt_ram_memory_region_init(Notifier *notifier, void *data) >> { >> MachineState *machine = MACHINE(qdev_get_machine()); >> @@ -1232,10 +1325,15 @@ static void virt_ram_memory_region_init(Notifier *notifier, void *data) >> MemoryRegion *ram = g_new(MemoryRegion, 1); >> VirtMachineState *vms = container_of(notifier, VirtMachineState, >> ram_memory_region_init); >> + RAMRegion *first_mem_reg; >> >> memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram", >> machine->ram_size); >> - memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, ram); >> + update_memory_regions(vms, machine->ram_size); >> + create_ram_alias(vms, sysmem, ram); >> + >> + first_mem_reg = QLIST_FIRST(&vms->bootinfo.mem_list); >> + vms->bootinfo.loader_start = first_mem_reg->base; >> } >> >> static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) >> @@ -1458,7 +1556,6 @@ static void machvirt_init(MachineState *machine) >> vms->bootinfo.initrd_filename = machine->initrd_filename; >> vms->bootinfo.nb_cpus = smp_cpus; >> vms->bootinfo.board_id = -1; >> - vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base; >> vms->bootinfo.get_dtb = machvirt_dtb; >> vms->bootinfo.firmware_loaded = firmware_loaded; >> arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo); >> diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h >> index ce769bd..d953026 100644 >> --- a/include/hw/arm/arm.h >> +++ b/include/hw/arm/arm.h >> @@ -124,6 +124,7 @@ struct arm_boot_info { >> bool secure_board_setup; >> >> arm_endianness endianness; >> + QLIST_HEAD(, RAMRegion) mem_list; >> }; >> >> /** >> -- >> 1.8.3.1 >> >> >> > Thanks, > drew > > . >
Hi Andrew, > -----Original Message----- > From: Andrew Jones [mailto:drjones@redhat.com] > Sent: Tuesday, November 14, 2017 2:50 PM > To: Zhuyijun <zhuyijun@huawei.com> > Cc: qemu-arm@nongnu.org; qemu-devel@nongnu.org; > eric.auger@redhat.com; peter.maydell@linaro.org; Shameerali Kolothum > Thodi <shameerali.kolothum.thodi@huawei.com>; Zhaoshenglong > <zhaoshenglong@huawei.com> > Subject: Re: [Qemu-devel] [RFC 3/5] hw/arm: add scattered RAM memory > region support > > On Tue, Nov 14, 2017 at 09:15:52AM +0800, zhuyijun@huawei.com wrote: > > From: Zhu Yijun <zhuyijun@huawei.com> > > > > Dig out reserved memory holes and collect scattered RAM memory > > regions by adding mem_list member in arm_boot_info struct. > > > > Signed-off-by: Zhu Yijun <zhuyijun@huawei.com> > > --- > > hw/arm/boot.c | 8 ++++ > > hw/arm/virt.c | 101 > ++++++++++++++++++++++++++++++++++++++++++++++++++- > > include/hw/arm/arm.h | 1 + > > 3 files changed, 108 insertions(+), 2 deletions(-) > > > > diff --git a/hw/arm/boot.c b/hw/arm/boot.c > > index c2720c8..30438f4 100644 > > --- a/hw/arm/boot.c > > +++ b/hw/arm/boot.c > > @@ -836,6 +836,14 @@ static void arm_load_kernel_notify(Notifier > *notifier, void *data) > > */ > > assert(!(info->secure_board_setup && kvm_enabled())); > > > > + /* If machine is not virt, the mem_list will empty. */ > > + if (QLIST_EMPTY(&vms->bootinfo.mem_list)) { > > + RAMRegion *new = g_new(RAMRegion, 1); > > + new->base = info->loader_start; > > + new->size = info->ram_size; > > + QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next); > > + } > > + > > info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb"); > > > > /* Load the kernel. */ > > diff --git a/hw/arm/virt.c b/hw/arm/virt.c > > index ddde5e1..ff334c1 100644 > > --- a/hw/arm/virt.c > > +++ b/hw/arm/virt.c > > @@ -56,6 +56,7 @@ > > #include "hw/smbios/smbios.h" > > #include "qapi/visitor.h" > > #include "standard-headers/linux/input.h" > > +#include "hw/vfio/vfio-common.h" > > > > #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ > > static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ > > @@ -1225,6 +1226,98 @@ void virt_machine_done(Notifier *notifier, void > *data) > > virt_build_smbios(vms); > > } > > > > +static void handle_reserved_ram_region_overlap(void) > > +{ > > + hwaddr cur_end, next_end; > > + RAMRegion *reg, *next_reg, *tmp_reg; > > + > > + QLIST_FOREACH(reg, &reserved_ram_regions, next) { > > + next_reg = QLIST_NEXT(reg, next); > > + > > + while (next_reg && next_reg->base <= (reg->base + reg->size)) { > > + next_end = next_reg->base + next_reg->size; > > + cur_end = reg->base + reg->size; > > + if (next_end > cur_end) { > > + reg->size += (next_end - cur_end); > > + } > > + > > + tmp_reg = QLIST_NEXT(next_reg, next); > > + QLIST_REMOVE(next_reg, next); > > + g_free(next_reg); > > + next_reg = tmp_reg; > > + } > > + } > > +} > > Why isn't the above integrated into the reserved ram region insertion? > > > + > > +static void update_memory_regions(VirtMachineState *vms, hwaddr > ram_size) > > +{ > > + > > + RAMRegion *new, *reg, *last = NULL; > > + hwaddr virt_start, virt_end; > > Either need a blank line here, or to initialize virt_start/end on the > declaration lines. > > > + virt_start = vms->memmap[VIRT_MEM].base; > > + virt_end = virt_start + ram_size - 1; > > + > > + handle_reserved_ram_region_overlap(); > > + > > + QLIST_FOREACH(reg, &reserved_ram_regions, next) { > > + if (reg->base >= virt_start && reg->base < virt_end) { > > What about the case where reg->base + reg->size > virt_start? > > > + if (reg->base == virt_start) { > > + virt_start += reg->size; > > We can't move virt_start without breaking AAVMF. > > > + virt_end += reg->size; > > We can't increase virt_end without checking it against RAMLIMIT. > > > + continue; > > + } else { > > + new = g_new(RAMRegion, 1); > > + new->base = virt_start; > > + new->size = reg->base - virt_start; > > + virt_start = reg->base + reg->size; > > + } > > + > > + if (QLIST_EMPTY(&vms->bootinfo.mem_list)) { > > + QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next); > > + } else { > > + QLIST_INSERT_AFTER(last, new, next); > > + } > > + > > + last = new; > > + ram_size -= new->size; > > + virt_end += reg->size; > > + } > > + } > > + > > + if (ram_size > 0) { > > + new = g_new(RAMRegion, 1); > > + new->base = virt_start; > > + new->size = ram_size; > > + > > + if (QLIST_EMPTY(&vms->bootinfo.mem_list)) { > > + QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next); > > + } else { > > + QLIST_INSERT_AFTER(last, new, next); > > + } > > + } > > Where's the else? ram_size <= 0 is not likely something we should ignore. > > > +} > > + > > +static void create_ram_alias(VirtMachineState *vms, > > + MemoryRegion *sysmem, > > + MemoryRegion *ram) > > +{ > > + RAMRegion *reg; > > + MemoryRegion *ram_memory; > > + char *nodename; > > + hwaddr sz = 0; > > + > > + QLIST_FOREACH(reg, &vms->bootinfo.mem_list, next) { > > + nodename = g_strdup_printf("ram@%" PRIx64, reg->base); > > + ram_memory = g_new(MemoryRegion, 1); > > + memory_region_init_alias(ram_memory, NULL, nodename, ram, sz, > > + reg->size); > > + memory_region_add_subregion(sysmem, reg->base, ram_memory); > > + sz += reg->size; > > + > > + g_free(nodename); > > + } > > +} > > Instead of using memory region aliases, it would be best if each RAM > region was modeled with pc-dimms, as that would move us towards supporting > memory hotplug and allow the regions to be explicitly identified > (start/size) on the command line - supporting migration. Actually, how > does this series address migration? What if the host we migrate to doesn't > have the same reserved regions in sysfs? Thanks for going through this series and comments. I am looking into reviving this series based on the new proposed vfio iova interface[1]. The vfio interface will now provide a list of valid iova ranges. That means, instead of working on reserved regions to find out the valid memory regions, the code here will have the valid regions list directly. The above comment of yours mentions about modelling the memory regions with pc-dimms. If I understand that proposal correctly, in case the iova list has multiple entries in it(means there are holes in the memory) the extra regions has to be added as a pc-dimm slot memory. Having gone through the Qemu source, I am not sure what is the best way to accomplish that. (of course I am not that familiar with the Qemu source). Is it ok to invoke qdev_device_add() from here? Any pointers on this is very appreciated. One more point is, considering the fact that ARM64 linux kernel still doesn't support hotplug memory at the moment, not sure how much we gain from the pc-dimm model. Please let me know your thoughts on this. Thanks, Shameer [1]. https://lkml.org/lkml/2018/4/18/293 > > + > > static void virt_ram_memory_region_init(Notifier *notifier, void *data) > > { > > MachineState *machine = MACHINE(qdev_get_machine()); > > @@ -1232,10 +1325,15 @@ static void > virt_ram_memory_region_init(Notifier *notifier, void *data) > > MemoryRegion *ram = g_new(MemoryRegion, 1); > > VirtMachineState *vms = container_of(notifier, VirtMachineState, > > ram_memory_region_init); > > + RAMRegion *first_mem_reg; > > > > memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram", > > machine->ram_size); > > - memory_region_add_subregion(sysmem, vms- > >memmap[VIRT_MEM].base, ram); > > + update_memory_regions(vms, machine->ram_size); > > + create_ram_alias(vms, sysmem, ram); > > + > > + first_mem_reg = QLIST_FIRST(&vms->bootinfo.mem_list); > > + vms->bootinfo.loader_start = first_mem_reg->base; > > } > > > > static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) > > @@ -1458,7 +1556,6 @@ static void machvirt_init(MachineState *machine) > > vms->bootinfo.initrd_filename = machine->initrd_filename; > > vms->bootinfo.nb_cpus = smp_cpus; > > vms->bootinfo.board_id = -1; > > - vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base; > > vms->bootinfo.get_dtb = machvirt_dtb; > > vms->bootinfo.firmware_loaded = firmware_loaded; > > arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo); > > diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h > > index ce769bd..d953026 100644 > > --- a/include/hw/arm/arm.h > > +++ b/include/hw/arm/arm.h > > @@ -124,6 +124,7 @@ struct arm_boot_info { > > bool secure_board_setup; > > > > arm_endianness endianness; > > + QLIST_HEAD(, RAMRegion) mem_list; > > }; > > > > /** > > -- > > 1.8.3.1 > > > > > > > > Thanks, > drew
On Thu, Apr 19, 2018 at 09:06:30AM +0000, Shameerali Kolothum Thodi wrote: > > From: Andrew Jones [mailto:drjones@redhat.com] > > Instead of using memory region aliases, it would be best if each RAM > > region was modeled with pc-dimms, as that would move us towards supporting > > memory hotplug and allow the regions to be explicitly identified > > (start/size) on the command line - supporting migration. Actually, how > > does this series address migration? What if the host we migrate to doesn't > > have the same reserved regions in sysfs? > > Thanks for going through this series and comments. > > I am looking into reviving this series based on the new proposed vfio iova > interface[1]. The vfio interface will now provide a list of valid iova ranges. > That means, instead of working on reserved regions to find out the valid > memory regions, the code here will have the valid regions list directly. > > The above comment of yours mentions about modelling the memory regions > with pc-dimms. If I understand that proposal correctly, in case the iova list > has multiple entries in it(means there are holes in the memory) the extra > regions has to be added as a pc-dimm slot memory. Hi Shameer, iiuc, the concern is that the valid region list may look something like this made up example: 0x000e0000000 0x000000011fffffff 0x00180000000 0x00000002ffffffff 0x02000000000 0x0000011fffffffff 0x22000000000 0xffffffffffffffff You then want to avoid overlapping the holes with memory, because the holes represent regions that may have platform devices assigned, which need to keep 1:1 gpa -> hpa mappings. If that's the case, then I think the biggest problem with the made up example is the initial hole [0x0 0xdfffffff], as that would overlap the currently hard coded base of RAM (0x40000000). Indeed we were thinking that the base of RAM would be modeled as a non-pluggable dimm of size 1G at the 1G boundary. As for the rest of memory (for memory > 1G), in the made up example we could just allocate a single pc-dimm from a memory region representing the [0x22000000000 0xffffffffffffffff] range without any hassle. > Having gone through > the Qemu source, I am not sure what is the best way to accomplish that. > (of course I am not that familiar with the Qemu source). Is it ok to invoke > qdev_device_add() from here? Any pointers on this is very appreciated. I'm not an expert on that (nor any of this stuff, really - hopefully what I've written above isn't completely bonkers). I've CC'ed Igor for help. I've also CC'ed Eric, because of the magic word 'vfio'. > > One more point is, considering the fact that ARM64 linux kernel still doesn't > support hotplug memory at the moment, not sure how much we gain > from the pc-dimm model. It will eventually. Moving towards the pc-dimm model now, in order to handle the non-contiguous memory needs, will go a long ways to enabling hotplug later. Thanks, drew > > Please let me know your thoughts on this. > > Thanks, > Shameer > > [1]. https://lkml.org/lkml/2018/4/18/293
Hi Drew, > -----Original Message----- > From: Andrew Jones [mailto:drjones@redhat.com] > Sent: Tuesday, April 24, 2018 4:30 PM > To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com> > Cc: peter.maydell@linaro.org; qemu-devel@nongnu.org; Linuxarm > <linuxarm@huawei.com>; eric.auger@redhat.com; qemu-arm@nongnu.org; > xuwei (O) <xuwei5@huawei.com>; Zhaoshenglong > <zhaoshenglong@huawei.com>; Igor Mammedov <imammedo@redhat.com> > Subject: Re: [Qemu-devel] [RFC 3/5] hw/arm: add scattered RAM memory > region support > > On Thu, Apr 19, 2018 at 09:06:30AM +0000, Shameerali Kolothum Thodi wrote: > > > From: Andrew Jones [mailto:drjones@redhat.com] Instead of using > > > memory region aliases, it would be best if each RAM region was > > > modeled with pc-dimms, as that would move us towards supporting > > > memory hotplug and allow the regions to be explicitly identified > > > (start/size) on the command line - supporting migration. Actually, > > > how does this series address migration? What if the host we migrate > > > to doesn't have the same reserved regions in sysfs? > > > > Thanks for going through this series and comments. > > > > I am looking into reviving this series based on the new proposed vfio > > iova interface[1]. The vfio interface will now provide a list of valid iova > ranges. > > That means, instead of working on reserved regions to find out the > > valid memory regions, the code here will have the valid regions list directly. > > > > The above comment of yours mentions about modelling the memory regions > > with pc-dimms. If I understand that proposal correctly, in case the > > iova list has multiple entries in it(means there are holes in the > > memory) the extra regions has to be added as a pc-dimm slot memory. > > Hi Shameer, > > iiuc, the concern is that the valid region list may look something like this made > up example: > > 0x000e0000000 0x000000011fffffff > 0x00180000000 0x00000002ffffffff > 0x02000000000 0x0000011fffffffff > 0x22000000000 0xffffffffffffffff > > You then want to avoid overlapping the holes with memory, because the holes > represent regions that may have platform devices assigned, which need to keep > 1:1 gpa -> hpa mappings. Yes, the holes are basically reserved regions not translated by iommu. > If that's the case, then I think the biggest problem with the made up example is > the initial hole [0x0 0xdfffffff], as that would overlap the currently hard coded > base of RAM (0x40000000). Indeed we were thinking that the base of RAM > would be modeled as a non-pluggable dimm of size 1G at the 1G boundary. As > for the rest of memory (for memory > 1G), in the made up example we could > just allocate a single pc-dimm from a memory region representing the > [0x22000000000 0xffffffffffffffff] range without any hassle. Right. I was more worried about a highly fragmented case where we have to model multiple pc-dimms to accommodate all the required memory. But that may be a highly unlikely scenario. > > Having gone through > > the Qemu source, I am not sure what is the best way to accomplish that. > > (of course I am not that familiar with the Qemu source). Is it ok to > > invoke > > qdev_device_add() from here? Any pointers on this is very appreciated. > > I'm not an expert on that (nor any of this stuff, really - hopefully what I've > written above isn't completely bonkers). I've CC'ed Igor for help. > I've also CC'ed Eric, because of the magic word 'vfio'. Thanks. I am moving in this direction at the moment. Please let me know if there is a better way of doing this, than invoking user_creatable_add_opts() for mem backend object creation and qdev_device_add() for pc-dimm dev. > > One more point is, considering the fact that ARM64 linux kernel still > > doesn't support hotplug memory at the moment, not sure how much we > > gain from the pc-dimm model. > > It will eventually. Moving towards the pc-dimm model now, in order to handle > the non-contiguous memory needs, will go a long ways to enabling hotplug > later. Ok. I will try to come up with something and will post as an RFC. Appreciate your feedback, Shameer > > Thanks, > drew > > > > > Please let me know your thoughts on this. > > > > Thanks, > > Shameer > > > > [1]. https://lkml.org/lkml/2018/4/18/293
diff --git a/hw/arm/boot.c b/hw/arm/boot.c index c2720c8..30438f4 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -836,6 +836,14 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) */ assert(!(info->secure_board_setup && kvm_enabled())); + /* If machine is not virt, the mem_list will empty. */ + if (QLIST_EMPTY(&vms->bootinfo.mem_list)) { + RAMRegion *new = g_new(RAMRegion, 1); + new->base = info->loader_start; + new->size = info->ram_size; + QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next); + } + info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb"); /* Load the kernel. */ diff --git a/hw/arm/virt.c b/hw/arm/virt.c index ddde5e1..ff334c1 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -56,6 +56,7 @@ #include "hw/smbios/smbios.h" #include "qapi/visitor.h" #include "standard-headers/linux/input.h" +#include "hw/vfio/vfio-common.h" #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ @@ -1225,6 +1226,98 @@ void virt_machine_done(Notifier *notifier, void *data) virt_build_smbios(vms); } +static void handle_reserved_ram_region_overlap(void) +{ + hwaddr cur_end, next_end; + RAMRegion *reg, *next_reg, *tmp_reg; + + QLIST_FOREACH(reg, &reserved_ram_regions, next) { + next_reg = QLIST_NEXT(reg, next); + + while (next_reg && next_reg->base <= (reg->base + reg->size)) { + next_end = next_reg->base + next_reg->size; + cur_end = reg->base + reg->size; + if (next_end > cur_end) { + reg->size += (next_end - cur_end); + } + + tmp_reg = QLIST_NEXT(next_reg, next); + QLIST_REMOVE(next_reg, next); + g_free(next_reg); + next_reg = tmp_reg; + } + } +} + +static void update_memory_regions(VirtMachineState *vms, hwaddr ram_size) +{ + + RAMRegion *new, *reg, *last = NULL; + hwaddr virt_start, virt_end; + virt_start = vms->memmap[VIRT_MEM].base; + virt_end = virt_start + ram_size - 1; + + handle_reserved_ram_region_overlap(); + + QLIST_FOREACH(reg, &reserved_ram_regions, next) { + if (reg->base >= virt_start && reg->base < virt_end) { + if (reg->base == virt_start) { + virt_start += reg->size; + virt_end += reg->size; + continue; + } else { + new = g_new(RAMRegion, 1); + new->base = virt_start; + new->size = reg->base - virt_start; + virt_start = reg->base + reg->size; + } + + if (QLIST_EMPTY(&vms->bootinfo.mem_list)) { + QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next); + } else { + QLIST_INSERT_AFTER(last, new, next); + } + + last = new; + ram_size -= new->size; + virt_end += reg->size; + } + } + + if (ram_size > 0) { + new = g_new(RAMRegion, 1); + new->base = virt_start; + new->size = ram_size; + + if (QLIST_EMPTY(&vms->bootinfo.mem_list)) { + QLIST_INSERT_HEAD(&vms->bootinfo.mem_list, new, next); + } else { + QLIST_INSERT_AFTER(last, new, next); + } + } +} + +static void create_ram_alias(VirtMachineState *vms, + MemoryRegion *sysmem, + MemoryRegion *ram) +{ + RAMRegion *reg; + MemoryRegion *ram_memory; + char *nodename; + hwaddr sz = 0; + + QLIST_FOREACH(reg, &vms->bootinfo.mem_list, next) { + nodename = g_strdup_printf("ram@%" PRIx64, reg->base); + ram_memory = g_new(MemoryRegion, 1); + memory_region_init_alias(ram_memory, NULL, nodename, ram, sz, + reg->size); + memory_region_add_subregion(sysmem, reg->base, ram_memory); + sz += reg->size; + + g_free(nodename); + } +} + static void virt_ram_memory_region_init(Notifier *notifier, void *data) { MachineState *machine = MACHINE(qdev_get_machine()); @@ -1232,10 +1325,15 @@ static void virt_ram_memory_region_init(Notifier *notifier, void *data) MemoryRegion *ram = g_new(MemoryRegion, 1); VirtMachineState *vms = container_of(notifier, VirtMachineState, ram_memory_region_init); + RAMRegion *first_mem_reg; memory_region_allocate_system_memory(ram, NULL, "mach-virt.ram", machine->ram_size); - memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base, ram); + update_memory_regions(vms, machine->ram_size); + create_ram_alias(vms, sysmem, ram); + + first_mem_reg = QLIST_FIRST(&vms->bootinfo.mem_list); + vms->bootinfo.loader_start = first_mem_reg->base; } static uint64_t virt_cpu_mp_affinity(VirtMachineState *vms, int idx) @@ -1458,7 +1556,6 @@ static void machvirt_init(MachineState *machine) vms->bootinfo.initrd_filename = machine->initrd_filename; vms->bootinfo.nb_cpus = smp_cpus; vms->bootinfo.board_id = -1; - vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base; vms->bootinfo.get_dtb = machvirt_dtb; vms->bootinfo.firmware_loaded = firmware_loaded; arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo); diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h index ce769bd..d953026 100644 --- a/include/hw/arm/arm.h +++ b/include/hw/arm/arm.h @@ -124,6 +124,7 @@ struct arm_boot_info { bool secure_board_setup; arm_endianness endianness; + QLIST_HEAD(, RAMRegion) mem_list; }; /**