diff mbox series

[RFC,v2,4/4] s390: do not call memory_region_allocate_system_memory() multiple times

Message ID 20190802093854.5343-5-imammedo@redhat.com (mailing list archive)
State New, archived
Headers show
Series s390: stop abusing memory_region_allocate_system_memory() | expand

Commit Message

Igor Mammedov Aug. 2, 2019, 9:38 a.m. UTC
s390 was trying to solve limited KVM memslot size issue by abusing
memory_region_allocate_system_memory(), which breaks API contract
where the function might be called only once.

s390 should have used memory aliases to fragment inital memory into
smaller chunks to satisfy KVM's memslot limitation. But its a bit
late now, since allocated pieces are transfered in migration stream
separately, so it's not possible to just replace broken layout with
correct one. To workaround issue, MemoryRegion alases are made
migratable and this patch switches to use them to split big initial
RAM chunk into smaller pieces (KVM_SLOT_MAX_BYTES max) and registers
aliases for migration. That should keep migration compatible with
previous QEMU versions.

New machine types (since 4.2) will use single memory region, which
will get transimitted in migration stream as a whole RAMBlock.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
I don't have access to a suitable system to test it, so I've simulated
it with smaller chunks on x84 host. Ping-pong migration between old
and new QEMU worked fine. KVM part should be fine as memslots
using mapped MemoryRegions (in this case it would be aliases) as
far as I know but is someone could test it on big enough host it
would be nice.
---
 include/hw/s390x/s390-virtio-ccw.h |  4 +++
 hw/s390x/s390-virtio-ccw.c         | 56 +++++++++++++++++++++---------
 2 files changed, 43 insertions(+), 17 deletions(-)

Comments

David Hildenbrand Aug. 2, 2019, 10:25 a.m. UTC | #1
On 02.08.19 11:38, Igor Mammedov wrote:
> s390 was trying to solve limited KVM memslot size issue by abusing
> memory_region_allocate_system_memory(), which breaks API contract
> where the function might be called only once.
> 
> s390 should have used memory aliases to fragment inital memory into
> smaller chunks to satisfy KVM's memslot limitation. But its a bit
> late now, since allocated pieces are transfered in migration stream
> separately, so it's not possible to just replace broken layout with
> correct one. To workaround issue, MemoryRegion alases are made
> migratable and this patch switches to use them to split big initial
> RAM chunk into smaller pieces (KVM_SLOT_MAX_BYTES max) and registers
> aliases for migration. That should keep migration compatible with
> previous QEMU versions.
> 
> New machine types (since 4.2) will use single memory region, which
> will get transimitted in migration stream as a whole RAMBlock.
> 
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> ---
> I don't have access to a suitable system to test it, so I've simulated
> it with smaller chunks on x84 host. Ping-pong migration between old
> and new QEMU worked fine. KVM part should be fine as memslots
> using mapped MemoryRegions (in this case it would be aliases) as
> far as I know but is someone could test it on big enough host it
> would be nice.
> ---
>  include/hw/s390x/s390-virtio-ccw.h |  4 +++
>  hw/s390x/s390-virtio-ccw.c         | 56 +++++++++++++++++++++---------
>  2 files changed, 43 insertions(+), 17 deletions(-)
> 
> diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h
> index 00632f94b4..f9ed3737f8 100644
> --- a/include/hw/s390x/s390-virtio-ccw.h
> +++ b/include/hw/s390x/s390-virtio-ccw.h
> @@ -21,6 +21,9 @@
>  #define S390_MACHINE_CLASS(klass) \
>      OBJECT_CLASS_CHECK(S390CcwMachineClass, (klass), TYPE_S390_CCW_MACHINE)
>  
> +#define S390_MACHINE_GET_CLASS(obj) \
> +    OBJECT_GET_CLASS(S390CcwMachineClass, (obj), TYPE_S390_CCW_MACHINE)
> +
>  /*
>   * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
>   * as the dirty bitmap must be managed by bitops that take an int as
> @@ -50,6 +53,7 @@ typedef struct S390CcwMachineClass {
>      bool cpu_model_allowed;
>      bool css_migration_enabled;
>      bool hpage_1m_allowed;
> +    bool split_ram_layout;
>  } S390CcwMachineClass;
>  
>  /* runtime-instrumentation allowed by the machine */
> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> index 073672f9cb..9160c1ed0a 100644
> --- a/hw/s390x/s390-virtio-ccw.c
> +++ b/hw/s390x/s390-virtio-ccw.c
> @@ -151,28 +151,47 @@ static void virtio_ccw_register_hcalls(void)
>                                     virtio_ccw_hcall_early_printk);
>  }
>  
> -static void s390_memory_init(ram_addr_t mem_size)
> +static void s390_memory_init(MachineState *ms)
>  {
> +    S390CcwMachineClass *s390mc = S390_MACHINE_GET_CLASS(ms);
>      MemoryRegion *sysmem = get_system_memory();
> -    ram_addr_t chunk, offset = 0;
> +    MemoryRegion *ram = g_new(MemoryRegion, 1);
>      unsigned int number = 0;
>      Error *local_err = NULL;
> -    gchar *name;
> +    ram_addr_t mem_size = ms->ram_size;
> +    gchar *name = g_strdup_printf(s390mc->split_ram_layout ?
> +                                  "s390.whole.ram" : "s390.ram");
>  
>      /* allocate RAM for core */
> -    name = g_strdup_printf("s390.ram");
> -    while (mem_size) {
> -        MemoryRegion *ram = g_new(MemoryRegion, 1);
> -        uint64_t size = mem_size;
> -
> -        /* KVM does not allow memslots >= 8 TB */
> -        chunk = MIN(size, KVM_SLOT_MAX_BYTES);
> -        memory_region_allocate_system_memory(ram, NULL, name, chunk);
> -        memory_region_add_subregion(sysmem, offset, ram);
> -        mem_size -= chunk;
> -        offset += chunk;
> -        g_free(name);
> -        name = g_strdup_printf("s390.ram.%u", ++number);
> +    memory_region_allocate_system_memory(ram, NULL, name, mem_size);
> +
> +    /* migration compatible RAM handling for 4.1 and older machines */
> +    if (s390mc->split_ram_layout) {
> +       ram_addr_t chunk, offset = 0;
> +       /*
> +        * memory_region_allocate_system_memory() registers allocated RAM for
> +        * migration, however for compat reasons the RAM should be passed over
> +        * as RAMBlocks of the size upto KVM_SLOT_MAX_BYTES. So unregister just
> +        * allocated RAM so it won't be migrated directly. Aliases will take care
> +        * of segmenting RAM into legacy chunks that migration compatible.
> +        */
> +       vmstate_unregister_ram(ram, NULL);
> +       name = g_strdup_printf("s390.ram");
> +       while (mem_size) {
> +           MemoryRegion *alias = g_new(MemoryRegion, 1);
> +
> +           /* KVM does not allow memslots >= 8 TB */
> +           chunk = MIN(mem_size, KVM_SLOT_MAX_BYTES);
> +           memory_region_init_alias(alias, NULL, name, ram, offset, chunk);
> +           vmstate_register_ram_global(alias);
> +           memory_region_add_subregion(sysmem, offset, alias);
> +           mem_size -= chunk;
> +           offset += chunk;
> +           g_free(name);
> +           name = g_strdup_printf("s390.ram.%u", ++number);
> +       }
> +    } else {
> +       memory_region_add_subregion(sysmem, 0, ram);
>      }
>      g_free(name);
>  
> @@ -257,7 +276,7 @@ static void ccw_init(MachineState *machine)
>  
>      s390_sclp_init();
>      /* init memory + setup max page size. Required for the CPU model */
> -    s390_memory_init(machine->ram_size);
> +    s390_memory_init(machine);
>  
>      /* init CPUs (incl. CPU model) early so s390_has_feature() works */
>      s390_init_cpus(machine);
> @@ -667,8 +686,11 @@ static void ccw_machine_4_1_instance_options(MachineState *machine)
>  
>  static void ccw_machine_4_1_class_options(MachineClass *mc)
>  {
> +    S390CcwMachineClass *s390mc = S390_MACHINE_CLASS(mc);
> +
>      ccw_machine_4_2_class_options(mc);
>      compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
> +    s390mc->split_ram_layout = true;
>  }
>  DEFINE_CCW_MACHINE(4_1, "4.1", false);
>  
> 

As discussed, I am not sure if adding that compat code really is worth
it. :)
Christian Borntraeger Aug. 2, 2019, 10:27 a.m. UTC | #2
On 02.08.19 12:25, David Hildenbrand wrote:
> On 02.08.19 11:38, Igor Mammedov wrote:
>> s390 was trying to solve limited KVM memslot size issue by abusing
>> memory_region_allocate_system_memory(), which breaks API contract
>> where the function might be called only once.
>>
>> s390 should have used memory aliases to fragment inital memory into
>> smaller chunks to satisfy KVM's memslot limitation. But its a bit
>> late now, since allocated pieces are transfered in migration stream
>> separately, so it's not possible to just replace broken layout with
>> correct one. To workaround issue, MemoryRegion alases are made
>> migratable and this patch switches to use them to split big initial
>> RAM chunk into smaller pieces (KVM_SLOT_MAX_BYTES max) and registers
>> aliases for migration. That should keep migration compatible with
>> previous QEMU versions.
>>
>> New machine types (since 4.2) will use single memory region, which
>> will get transimitted in migration stream as a whole RAMBlock.
>>
>> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
>> ---
>> I don't have access to a suitable system to test it, so I've simulated
>> it with smaller chunks on x84 host. Ping-pong migration between old
>> and new QEMU worked fine. KVM part should be fine as memslots
>> using mapped MemoryRegions (in this case it would be aliases) as
>> far as I know but is someone could test it on big enough host it
>> would be nice.
>> ---
>>  include/hw/s390x/s390-virtio-ccw.h |  4 +++
>>  hw/s390x/s390-virtio-ccw.c         | 56 +++++++++++++++++++++---------
>>  2 files changed, 43 insertions(+), 17 deletions(-)
>>
>> diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h
>> index 00632f94b4..f9ed3737f8 100644
>> --- a/include/hw/s390x/s390-virtio-ccw.h
>> +++ b/include/hw/s390x/s390-virtio-ccw.h
>> @@ -21,6 +21,9 @@
>>  #define S390_MACHINE_CLASS(klass) \
>>      OBJECT_CLASS_CHECK(S390CcwMachineClass, (klass), TYPE_S390_CCW_MACHINE)
>>  
>> +#define S390_MACHINE_GET_CLASS(obj) \
>> +    OBJECT_GET_CLASS(S390CcwMachineClass, (obj), TYPE_S390_CCW_MACHINE)
>> +
>>  /*
>>   * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
>>   * as the dirty bitmap must be managed by bitops that take an int as
>> @@ -50,6 +53,7 @@ typedef struct S390CcwMachineClass {
>>      bool cpu_model_allowed;
>>      bool css_migration_enabled;
>>      bool hpage_1m_allowed;
>> +    bool split_ram_layout;
>>  } S390CcwMachineClass;
>>  
>>  /* runtime-instrumentation allowed by the machine */
>> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
>> index 073672f9cb..9160c1ed0a 100644
>> --- a/hw/s390x/s390-virtio-ccw.c
>> +++ b/hw/s390x/s390-virtio-ccw.c
>> @@ -151,28 +151,47 @@ static void virtio_ccw_register_hcalls(void)
>>                                     virtio_ccw_hcall_early_printk);
>>  }
>>  
>> -static void s390_memory_init(ram_addr_t mem_size)
>> +static void s390_memory_init(MachineState *ms)
>>  {
>> +    S390CcwMachineClass *s390mc = S390_MACHINE_GET_CLASS(ms);
>>      MemoryRegion *sysmem = get_system_memory();
>> -    ram_addr_t chunk, offset = 0;
>> +    MemoryRegion *ram = g_new(MemoryRegion, 1);
>>      unsigned int number = 0;
>>      Error *local_err = NULL;
>> -    gchar *name;
>> +    ram_addr_t mem_size = ms->ram_size;
>> +    gchar *name = g_strdup_printf(s390mc->split_ram_layout ?
>> +                                  "s390.whole.ram" : "s390.ram");
>>  
>>      /* allocate RAM for core */
>> -    name = g_strdup_printf("s390.ram");
>> -    while (mem_size) {
>> -        MemoryRegion *ram = g_new(MemoryRegion, 1);
>> -        uint64_t size = mem_size;
>> -
>> -        /* KVM does not allow memslots >= 8 TB */
>> -        chunk = MIN(size, KVM_SLOT_MAX_BYTES);
>> -        memory_region_allocate_system_memory(ram, NULL, name, chunk);
>> -        memory_region_add_subregion(sysmem, offset, ram);
>> -        mem_size -= chunk;
>> -        offset += chunk;
>> -        g_free(name);
>> -        name = g_strdup_printf("s390.ram.%u", ++number);
>> +    memory_region_allocate_system_memory(ram, NULL, name, mem_size);
>> +
>> +    /* migration compatible RAM handling for 4.1 and older machines */
>> +    if (s390mc->split_ram_layout) {
>> +       ram_addr_t chunk, offset = 0;
>> +       /*
>> +        * memory_region_allocate_system_memory() registers allocated RAM for
>> +        * migration, however for compat reasons the RAM should be passed over
>> +        * as RAMBlocks of the size upto KVM_SLOT_MAX_BYTES. So unregister just
>> +        * allocated RAM so it won't be migrated directly. Aliases will take care
>> +        * of segmenting RAM into legacy chunks that migration compatible.
>> +        */
>> +       vmstate_unregister_ram(ram, NULL);
>> +       name = g_strdup_printf("s390.ram");
>> +       while (mem_size) {
>> +           MemoryRegion *alias = g_new(MemoryRegion, 1);
>> +
>> +           /* KVM does not allow memslots >= 8 TB */
>> +           chunk = MIN(mem_size, KVM_SLOT_MAX_BYTES);
>> +           memory_region_init_alias(alias, NULL, name, ram, offset, chunk);
>> +           vmstate_register_ram_global(alias);
>> +           memory_region_add_subregion(sysmem, offset, alias);
>> +           mem_size -= chunk;
>> +           offset += chunk;
>> +           g_free(name);
>> +           name = g_strdup_printf("s390.ram.%u", ++number);
>> +       }
>> +    } else {
>> +       memory_region_add_subregion(sysmem, 0, ram);
>>      }
>>      g_free(name);
>>  
>> @@ -257,7 +276,7 @@ static void ccw_init(MachineState *machine)
>>  
>>      s390_sclp_init();
>>      /* init memory + setup max page size. Required for the CPU model */
>> -    s390_memory_init(machine->ram_size);
>> +    s390_memory_init(machine);
>>  
>>      /* init CPUs (incl. CPU model) early so s390_has_feature() works */
>>      s390_init_cpus(machine);
>> @@ -667,8 +686,11 @@ static void ccw_machine_4_1_instance_options(MachineState *machine)
>>  
>>  static void ccw_machine_4_1_class_options(MachineClass *mc)
>>  {
>> +    S390CcwMachineClass *s390mc = S390_MACHINE_CLASS(mc);
>> +
>>      ccw_machine_4_2_class_options(mc);
>>      compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
>> +    s390mc->split_ram_layout = true;
>>  }
>>  DEFINE_CCW_MACHINE(4_1, "4.1", false);
>>  
>>
> 
> As discussed, I am not sure if adding that compat code really is worth
> it. :)

Agreed. As long as forward migration (e.g. 4.2->4.3 will work) and we document
that migration will fails for 2.12->.....4.1 for guests >=8TB we  can simplify
things.
Igor Mammedov Aug. 2, 2019, 11:40 a.m. UTC | #3
On Fri, 2 Aug 2019 12:27:50 +0200
Christian Borntraeger <borntraeger@de.ibm.com> wrote:

> On 02.08.19 12:25, David Hildenbrand wrote:
> > On 02.08.19 11:38, Igor Mammedov wrote:  
> >> s390 was trying to solve limited KVM memslot size issue by abusing
> >> memory_region_allocate_system_memory(), which breaks API contract
> >> where the function might be called only once.
> >>
> >> s390 should have used memory aliases to fragment inital memory into
> >> smaller chunks to satisfy KVM's memslot limitation. But its a bit
> >> late now, since allocated pieces are transfered in migration stream
> >> separately, so it's not possible to just replace broken layout with
> >> correct one. To workaround issue, MemoryRegion alases are made
> >> migratable and this patch switches to use them to split big initial
> >> RAM chunk into smaller pieces (KVM_SLOT_MAX_BYTES max) and registers
> >> aliases for migration. That should keep migration compatible with
> >> previous QEMU versions.
> >>
> >> New machine types (since 4.2) will use single memory region, which
> >> will get transimitted in migration stream as a whole RAMBlock.
> >>
> >> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> >> ---
> >> I don't have access to a suitable system to test it, so I've simulated
> >> it with smaller chunks on x84 host. Ping-pong migration between old
> >> and new QEMU worked fine. KVM part should be fine as memslots
> >> using mapped MemoryRegions (in this case it would be aliases) as
> >> far as I know but is someone could test it on big enough host it
> >> would be nice.
> >> ---
> >>  include/hw/s390x/s390-virtio-ccw.h |  4 +++
> >>  hw/s390x/s390-virtio-ccw.c         | 56 +++++++++++++++++++++---------
> >>  2 files changed, 43 insertions(+), 17 deletions(-)
> >>
> >> diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h
> >> index 00632f94b4..f9ed3737f8 100644
> >> --- a/include/hw/s390x/s390-virtio-ccw.h
> >> +++ b/include/hw/s390x/s390-virtio-ccw.h
> >> @@ -21,6 +21,9 @@
> >>  #define S390_MACHINE_CLASS(klass) \
> >>      OBJECT_CLASS_CHECK(S390CcwMachineClass, (klass), TYPE_S390_CCW_MACHINE)
> >>  
> >> +#define S390_MACHINE_GET_CLASS(obj) \
> >> +    OBJECT_GET_CLASS(S390CcwMachineClass, (obj), TYPE_S390_CCW_MACHINE)
> >> +
> >>  /*
> >>   * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
> >>   * as the dirty bitmap must be managed by bitops that take an int as
> >> @@ -50,6 +53,7 @@ typedef struct S390CcwMachineClass {
> >>      bool cpu_model_allowed;
> >>      bool css_migration_enabled;
> >>      bool hpage_1m_allowed;
> >> +    bool split_ram_layout;
> >>  } S390CcwMachineClass;
> >>  
> >>  /* runtime-instrumentation allowed by the machine */
> >> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> >> index 073672f9cb..9160c1ed0a 100644
> >> --- a/hw/s390x/s390-virtio-ccw.c
> >> +++ b/hw/s390x/s390-virtio-ccw.c
> >> @@ -151,28 +151,47 @@ static void virtio_ccw_register_hcalls(void)
> >>                                     virtio_ccw_hcall_early_printk);
> >>  }
> >>  
> >> -static void s390_memory_init(ram_addr_t mem_size)
> >> +static void s390_memory_init(MachineState *ms)
> >>  {
> >> +    S390CcwMachineClass *s390mc = S390_MACHINE_GET_CLASS(ms);
> >>      MemoryRegion *sysmem = get_system_memory();
> >> -    ram_addr_t chunk, offset = 0;
> >> +    MemoryRegion *ram = g_new(MemoryRegion, 1);
> >>      unsigned int number = 0;
> >>      Error *local_err = NULL;
> >> -    gchar *name;
> >> +    ram_addr_t mem_size = ms->ram_size;
> >> +    gchar *name = g_strdup_printf(s390mc->split_ram_layout ?
> >> +                                  "s390.whole.ram" : "s390.ram");
> >>  
> >>      /* allocate RAM for core */
> >> -    name = g_strdup_printf("s390.ram");
> >> -    while (mem_size) {
> >> -        MemoryRegion *ram = g_new(MemoryRegion, 1);
> >> -        uint64_t size = mem_size;
> >> -
> >> -        /* KVM does not allow memslots >= 8 TB */
> >> -        chunk = MIN(size, KVM_SLOT_MAX_BYTES);
> >> -        memory_region_allocate_system_memory(ram, NULL, name, chunk);
> >> -        memory_region_add_subregion(sysmem, offset, ram);
> >> -        mem_size -= chunk;
> >> -        offset += chunk;
> >> -        g_free(name);
> >> -        name = g_strdup_printf("s390.ram.%u", ++number);
> >> +    memory_region_allocate_system_memory(ram, NULL, name, mem_size);
> >> +
> >> +    /* migration compatible RAM handling for 4.1 and older machines */
> >> +    if (s390mc->split_ram_layout) {
> >> +       ram_addr_t chunk, offset = 0;
> >> +       /*
> >> +        * memory_region_allocate_system_memory() registers allocated RAM for
> >> +        * migration, however for compat reasons the RAM should be passed over
> >> +        * as RAMBlocks of the size upto KVM_SLOT_MAX_BYTES. So unregister just
> >> +        * allocated RAM so it won't be migrated directly. Aliases will take care
> >> +        * of segmenting RAM into legacy chunks that migration compatible.
> >> +        */
> >> +       vmstate_unregister_ram(ram, NULL);
> >> +       name = g_strdup_printf("s390.ram");
> >> +       while (mem_size) {
> >> +           MemoryRegion *alias = g_new(MemoryRegion, 1);
> >> +
> >> +           /* KVM does not allow memslots >= 8 TB */
> >> +           chunk = MIN(mem_size, KVM_SLOT_MAX_BYTES);
> >> +           memory_region_init_alias(alias, NULL, name, ram, offset, chunk);
> >> +           vmstate_register_ram_global(alias);
> >> +           memory_region_add_subregion(sysmem, offset, alias);
> >> +           mem_size -= chunk;
> >> +           offset += chunk;
> >> +           g_free(name);
> >> +           name = g_strdup_printf("s390.ram.%u", ++number);
> >> +       }
> >> +    } else {
> >> +       memory_region_add_subregion(sysmem, 0, ram);
> >>      }
> >>      g_free(name);
> >>  
> >> @@ -257,7 +276,7 @@ static void ccw_init(MachineState *machine)
> >>  
> >>      s390_sclp_init();
> >>      /* init memory + setup max page size. Required for the CPU model */
> >> -    s390_memory_init(machine->ram_size);
> >> +    s390_memory_init(machine);
> >>  
> >>      /* init CPUs (incl. CPU model) early so s390_has_feature() works */
> >>      s390_init_cpus(machine);
> >> @@ -667,8 +686,11 @@ static void ccw_machine_4_1_instance_options(MachineState *machine)
> >>  
> >>  static void ccw_machine_4_1_class_options(MachineClass *mc)
> >>  {
> >> +    S390CcwMachineClass *s390mc = S390_MACHINE_CLASS(mc);
> >> +
> >>      ccw_machine_4_2_class_options(mc);
> >>      compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
> >> +    s390mc->split_ram_layout = true;
> >>  }
> >>  DEFINE_CCW_MACHINE(4_1, "4.1", false);
> >>  
> >>  
> > 
> > As discussed, I am not sure if adding that compat code really is worth
> > it. :)  
> 
> Agreed. As long as forward migration (e.g. 4.2->4.3 will work) and we document
> that migration will fails for 2.12->.....4.1 for guests >=8TB we  can simplify
> things.

I'll drop 3/4 and compat code in this patch.

Christian,

could you test this series on big enough host to ensure that KVM memslot splitting
part works as expected (i.e. it's possible to start >8TB guest)?
Christian Borntraeger Aug. 2, 2019, 12:13 p.m. UTC | #4
On 02.08.19 13:40, Igor Mammedov wrote:
> On Fri, 2 Aug 2019 12:27:50 +0200
> Christian Borntraeger <borntraeger@de.ibm.com> wrote:
> 
>> On 02.08.19 12:25, David Hildenbrand wrote:
>>> On 02.08.19 11:38, Igor Mammedov wrote:  
>>>> s390 was trying to solve limited KVM memslot size issue by abusing
>>>> memory_region_allocate_system_memory(), which breaks API contract
>>>> where the function might be called only once.
>>>>
>>>> s390 should have used memory aliases to fragment inital memory into
>>>> smaller chunks to satisfy KVM's memslot limitation. But its a bit
>>>> late now, since allocated pieces are transfered in migration stream
>>>> separately, so it's not possible to just replace broken layout with
>>>> correct one. To workaround issue, MemoryRegion alases are made
>>>> migratable and this patch switches to use them to split big initial
>>>> RAM chunk into smaller pieces (KVM_SLOT_MAX_BYTES max) and registers
>>>> aliases for migration. That should keep migration compatible with
>>>> previous QEMU versions.
>>>>
>>>> New machine types (since 4.2) will use single memory region, which
>>>> will get transimitted in migration stream as a whole RAMBlock.
>>>>
>>>> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
>>>> ---
>>>> I don't have access to a suitable system to test it, so I've simulated
>>>> it with smaller chunks on x84 host. Ping-pong migration between old
>>>> and new QEMU worked fine. KVM part should be fine as memslots
>>>> using mapped MemoryRegions (in this case it would be aliases) as
>>>> far as I know but is someone could test it on big enough host it
>>>> would be nice.
>>>> ---
>>>>  include/hw/s390x/s390-virtio-ccw.h |  4 +++
>>>>  hw/s390x/s390-virtio-ccw.c         | 56 +++++++++++++++++++++---------
>>>>  2 files changed, 43 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h
>>>> index 00632f94b4..f9ed3737f8 100644
>>>> --- a/include/hw/s390x/s390-virtio-ccw.h
>>>> +++ b/include/hw/s390x/s390-virtio-ccw.h
>>>> @@ -21,6 +21,9 @@
>>>>  #define S390_MACHINE_CLASS(klass) \
>>>>      OBJECT_CLASS_CHECK(S390CcwMachineClass, (klass), TYPE_S390_CCW_MACHINE)
>>>>  
>>>> +#define S390_MACHINE_GET_CLASS(obj) \
>>>> +    OBJECT_GET_CLASS(S390CcwMachineClass, (obj), TYPE_S390_CCW_MACHINE)
>>>> +
>>>>  /*
>>>>   * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
>>>>   * as the dirty bitmap must be managed by bitops that take an int as
>>>> @@ -50,6 +53,7 @@ typedef struct S390CcwMachineClass {
>>>>      bool cpu_model_allowed;
>>>>      bool css_migration_enabled;
>>>>      bool hpage_1m_allowed;
>>>> +    bool split_ram_layout;
>>>>  } S390CcwMachineClass;
>>>>  
>>>>  /* runtime-instrumentation allowed by the machine */
>>>> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
>>>> index 073672f9cb..9160c1ed0a 100644
>>>> --- a/hw/s390x/s390-virtio-ccw.c
>>>> +++ b/hw/s390x/s390-virtio-ccw.c
>>>> @@ -151,28 +151,47 @@ static void virtio_ccw_register_hcalls(void)
>>>>                                     virtio_ccw_hcall_early_printk);
>>>>  }
>>>>  
>>>> -static void s390_memory_init(ram_addr_t mem_size)
>>>> +static void s390_memory_init(MachineState *ms)
>>>>  {
>>>> +    S390CcwMachineClass *s390mc = S390_MACHINE_GET_CLASS(ms);
>>>>      MemoryRegion *sysmem = get_system_memory();
>>>> -    ram_addr_t chunk, offset = 0;
>>>> +    MemoryRegion *ram = g_new(MemoryRegion, 1);
>>>>      unsigned int number = 0;
>>>>      Error *local_err = NULL;
>>>> -    gchar *name;
>>>> +    ram_addr_t mem_size = ms->ram_size;
>>>> +    gchar *name = g_strdup_printf(s390mc->split_ram_layout ?
>>>> +                                  "s390.whole.ram" : "s390.ram");
>>>>  
>>>>      /* allocate RAM for core */
>>>> -    name = g_strdup_printf("s390.ram");
>>>> -    while (mem_size) {
>>>> -        MemoryRegion *ram = g_new(MemoryRegion, 1);
>>>> -        uint64_t size = mem_size;
>>>> -
>>>> -        /* KVM does not allow memslots >= 8 TB */
>>>> -        chunk = MIN(size, KVM_SLOT_MAX_BYTES);
>>>> -        memory_region_allocate_system_memory(ram, NULL, name, chunk);
>>>> -        memory_region_add_subregion(sysmem, offset, ram);
>>>> -        mem_size -= chunk;
>>>> -        offset += chunk;
>>>> -        g_free(name);
>>>> -        name = g_strdup_printf("s390.ram.%u", ++number);
>>>> +    memory_region_allocate_system_memory(ram, NULL, name, mem_size);
>>>> +
>>>> +    /* migration compatible RAM handling for 4.1 and older machines */
>>>> +    if (s390mc->split_ram_layout) {
>>>> +       ram_addr_t chunk, offset = 0;
>>>> +       /*
>>>> +        * memory_region_allocate_system_memory() registers allocated RAM for
>>>> +        * migration, however for compat reasons the RAM should be passed over
>>>> +        * as RAMBlocks of the size upto KVM_SLOT_MAX_BYTES. So unregister just
>>>> +        * allocated RAM so it won't be migrated directly. Aliases will take care
>>>> +        * of segmenting RAM into legacy chunks that migration compatible.
>>>> +        */
>>>> +       vmstate_unregister_ram(ram, NULL);
>>>> +       name = g_strdup_printf("s390.ram");
>>>> +       while (mem_size) {
>>>> +           MemoryRegion *alias = g_new(MemoryRegion, 1);
>>>> +
>>>> +           /* KVM does not allow memslots >= 8 TB */
>>>> +           chunk = MIN(mem_size, KVM_SLOT_MAX_BYTES);
>>>> +           memory_region_init_alias(alias, NULL, name, ram, offset, chunk);
>>>> +           vmstate_register_ram_global(alias);
>>>> +           memory_region_add_subregion(sysmem, offset, alias);
>>>> +           mem_size -= chunk;
>>>> +           offset += chunk;
>>>> +           g_free(name);
>>>> +           name = g_strdup_printf("s390.ram.%u", ++number);
>>>> +       }
>>>> +    } else {
>>>> +       memory_region_add_subregion(sysmem, 0, ram);
>>>>      }
>>>>      g_free(name);
>>>>  
>>>> @@ -257,7 +276,7 @@ static void ccw_init(MachineState *machine)
>>>>  
>>>>      s390_sclp_init();
>>>>      /* init memory + setup max page size. Required for the CPU model */
>>>> -    s390_memory_init(machine->ram_size);
>>>> +    s390_memory_init(machine);
>>>>  
>>>>      /* init CPUs (incl. CPU model) early so s390_has_feature() works */
>>>>      s390_init_cpus(machine);
>>>> @@ -667,8 +686,11 @@ static void ccw_machine_4_1_instance_options(MachineState *machine)
>>>>  
>>>>  static void ccw_machine_4_1_class_options(MachineClass *mc)
>>>>  {
>>>> +    S390CcwMachineClass *s390mc = S390_MACHINE_CLASS(mc);
>>>> +
>>>>      ccw_machine_4_2_class_options(mc);
>>>>      compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
>>>> +    s390mc->split_ram_layout = true;
>>>>  }
>>>>  DEFINE_CCW_MACHINE(4_1, "4.1", false);
>>>>  
>>>>  
>>>
>>> As discussed, I am not sure if adding that compat code really is worth
>>> it. :)  
>>
>> Agreed. As long as forward migration (e.g. 4.2->4.3 will work) and we document
>> that migration will fails for 2.12->.....4.1 for guests >=8TB we  can simplify
>> things.
> 
> I'll drop 3/4 and compat code in this patch.
> 
> Christian,
> 
> could you test this series on big enough host to ensure that KVM memslot splitting
> part works as expected (i.e. it's possible to start >8TB guest)?

Can you send a v3 with all changes?
diff mbox series

Patch

diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h
index 00632f94b4..f9ed3737f8 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -21,6 +21,9 @@ 
 #define S390_MACHINE_CLASS(klass) \
     OBJECT_CLASS_CHECK(S390CcwMachineClass, (klass), TYPE_S390_CCW_MACHINE)
 
+#define S390_MACHINE_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(S390CcwMachineClass, (obj), TYPE_S390_CCW_MACHINE)
+
 /*
  * KVM does only support memory slots up to KVM_MEM_MAX_NR_PAGES pages
  * as the dirty bitmap must be managed by bitops that take an int as
@@ -50,6 +53,7 @@  typedef struct S390CcwMachineClass {
     bool cpu_model_allowed;
     bool css_migration_enabled;
     bool hpage_1m_allowed;
+    bool split_ram_layout;
 } S390CcwMachineClass;
 
 /* runtime-instrumentation allowed by the machine */
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 073672f9cb..9160c1ed0a 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -151,28 +151,47 @@  static void virtio_ccw_register_hcalls(void)
                                    virtio_ccw_hcall_early_printk);
 }
 
-static void s390_memory_init(ram_addr_t mem_size)
+static void s390_memory_init(MachineState *ms)
 {
+    S390CcwMachineClass *s390mc = S390_MACHINE_GET_CLASS(ms);
     MemoryRegion *sysmem = get_system_memory();
-    ram_addr_t chunk, offset = 0;
+    MemoryRegion *ram = g_new(MemoryRegion, 1);
     unsigned int number = 0;
     Error *local_err = NULL;
-    gchar *name;
+    ram_addr_t mem_size = ms->ram_size;
+    gchar *name = g_strdup_printf(s390mc->split_ram_layout ?
+                                  "s390.whole.ram" : "s390.ram");
 
     /* allocate RAM for core */
-    name = g_strdup_printf("s390.ram");
-    while (mem_size) {
-        MemoryRegion *ram = g_new(MemoryRegion, 1);
-        uint64_t size = mem_size;
-
-        /* KVM does not allow memslots >= 8 TB */
-        chunk = MIN(size, KVM_SLOT_MAX_BYTES);
-        memory_region_allocate_system_memory(ram, NULL, name, chunk);
-        memory_region_add_subregion(sysmem, offset, ram);
-        mem_size -= chunk;
-        offset += chunk;
-        g_free(name);
-        name = g_strdup_printf("s390.ram.%u", ++number);
+    memory_region_allocate_system_memory(ram, NULL, name, mem_size);
+
+    /* migration compatible RAM handling for 4.1 and older machines */
+    if (s390mc->split_ram_layout) {
+       ram_addr_t chunk, offset = 0;
+       /*
+        * memory_region_allocate_system_memory() registers allocated RAM for
+        * migration, however for compat reasons the RAM should be passed over
+        * as RAMBlocks of the size upto KVM_SLOT_MAX_BYTES. So unregister just
+        * allocated RAM so it won't be migrated directly. Aliases will take care
+        * of segmenting RAM into legacy chunks that migration compatible.
+        */
+       vmstate_unregister_ram(ram, NULL);
+       name = g_strdup_printf("s390.ram");
+       while (mem_size) {
+           MemoryRegion *alias = g_new(MemoryRegion, 1);
+
+           /* KVM does not allow memslots >= 8 TB */
+           chunk = MIN(mem_size, KVM_SLOT_MAX_BYTES);
+           memory_region_init_alias(alias, NULL, name, ram, offset, chunk);
+           vmstate_register_ram_global(alias);
+           memory_region_add_subregion(sysmem, offset, alias);
+           mem_size -= chunk;
+           offset += chunk;
+           g_free(name);
+           name = g_strdup_printf("s390.ram.%u", ++number);
+       }
+    } else {
+       memory_region_add_subregion(sysmem, 0, ram);
     }
     g_free(name);
 
@@ -257,7 +276,7 @@  static void ccw_init(MachineState *machine)
 
     s390_sclp_init();
     /* init memory + setup max page size. Required for the CPU model */
-    s390_memory_init(machine->ram_size);
+    s390_memory_init(machine);
 
     /* init CPUs (incl. CPU model) early so s390_has_feature() works */
     s390_init_cpus(machine);
@@ -667,8 +686,11 @@  static void ccw_machine_4_1_instance_options(MachineState *machine)
 
 static void ccw_machine_4_1_class_options(MachineClass *mc)
 {
+    S390CcwMachineClass *s390mc = S390_MACHINE_CLASS(mc);
+
     ccw_machine_4_2_class_options(mc);
     compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
+    s390mc->split_ram_layout = true;
 }
 DEFINE_CCW_MACHINE(4_1, "4.1", false);