diff mbox

[QEMU,v5,1/6] migration: alternative way to set instance_id in SaveStateEntry

Message ID 1475519097-27611-2-git-send-email-duanj@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jianjun Duan Oct. 3, 2016, 6:24 p.m. UTC
In QOM(QEMU Object Model) migrated objects are identified with instance_id
which is calculated automatically using their path in the QOM composition
tree. For some objects, this path could change from source to target in
migration. To migrate such objects, we need to make sure the instance_id does
not change from source to target. We add a hook in DeviceClass to do customized
instance_id calculation in such cases.

As a result, in these cases compat will not be set in the concerned
SaveStateEntry. This will prevent the inconsistent idstr to be sent over in
migration. We could have set alias_id in a similar way. But that will be
overloading the purpose of alias_id.

The first application will be setting instance_id for DRC using its unique
index. Doing this makes the instance_id of DRC to be consistent across migration
and supports flexible management of DRC objects in migration.

Signed-off-by: Jianjun Duan <duanj@linux.vnet.ibm.com>
---
 include/hw/qdev-core.h |  6 ++++++
 migration/savevm.c     | 20 ++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

Comments

Dr. David Alan Gilbert Oct. 5, 2016, 10:12 a.m. UTC | #1
* Jianjun Duan (duanj@linux.vnet.ibm.com) wrote:
> In QOM(QEMU Object Model) migrated objects are identified with instance_id
> which is calculated automatically using their path in the QOM composition
> tree. For some objects, this path could change from source to target in
> migration. To migrate such objects, we need to make sure the instance_id does
> not change from source to target. We add a hook in DeviceClass to do customized
> instance_id calculation in such cases.

Can you explain a bit about why the path changes from source to destination;
the path here should be a feature of the guest state not the host, and so I
don't understand why it changes.

> As a result, in these cases compat will not be set in the concerned
> SaveStateEntry. This will prevent the inconsistent idstr to be sent over in
> migration. We could have set alias_id in a similar way. But that will be
> overloading the purpose of alias_id.
> 
> The first application will be setting instance_id for DRC using its unique
> index. Doing this makes the instance_id of DRC to be consistent across migration
> and supports flexible management of DRC objects in migration.

Is there a reason to use a custom instance_id rather than a custom idstr?

> 
> Signed-off-by: Jianjun Duan <duanj@linux.vnet.ibm.com>
> ---
>  include/hw/qdev-core.h |  6 ++++++
>  migration/savevm.c     | 20 ++++++++++++++++++--
>  2 files changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index 2c97347..a012e8e 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -139,6 +139,12 @@ typedef struct DeviceClass {
>      qdev_initfn init; /* TODO remove, once users are converted to realize */
>      qdev_event exit; /* TODO remove, once users are converted to unrealize */
>      const char *bus_type;
> +
> +    /* When this field is set, qemu will use it to get an unique instance_id
> +     * instead of calculating an auto idstr and instanc_id for the relevant
> +     * SaveStateEntry
> +     */
> +    int (*dev_get_instance_id)(DeviceState *dev);
>  } DeviceClass;
>  
>  typedef struct NamedGPIOList NamedGPIOList;
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 33a2911..ef5c3d1 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -495,6 +495,11 @@ int register_savevm_live(DeviceState *dev,
>                           void *opaque)
>  {
>      SaveStateEntry *se;
> +    /* when it is a device and it provides a way to get instance_id,
> +     * we will use it and skip setting idstr and compat.
> +     */
> +    bool flag = (dev != NULL) &&
> +                (DEVICE_GET_CLASS(dev)->dev_get_instance_id != NULL);

We must be able to get a more descriptive name than 'flag'; how about
 'has_custom_id'

>      se = g_new0(SaveStateEntry, 1);
>      se->version_id = version_id;
> @@ -507,7 +512,7 @@ int register_savevm_live(DeviceState *dev,
>          se->is_ram = 1;
>      }
>  
> -    if (dev) {
> +    if (dev && !flag) {
>          char *id = qdev_get_dev_path(dev);
>          if (id) {
>              pstrcpy(se->idstr, sizeof(se->idstr), id);
> @@ -523,6 +528,9 @@ int register_savevm_live(DeviceState *dev,
>      }
>      pstrcat(se->idstr, sizeof(se->idstr), idstr);
>  
> +    if (flag) {
> +        instance_id = DEVICE_GET_CLASS(dev)->dev_get_instance_id(dev);
> +    }
>      if (instance_id == -1) {
>          se->instance_id = calculate_new_instance_id(se->idstr);
>      } else {
> @@ -580,6 +588,11 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
>                                     int required_for_version)
>  {
>      SaveStateEntry *se;
> +    /* when it is a device and it provides a way to get instance_id,
> +     * we will use it and skip setting idstr and compat.
> +     */
> +    bool flag = (dev != NULL) &&
> +                (DEVICE_GET_CLASS(dev)->dev_get_instance_id != NULL);
>  
>      /* If this triggers, alias support can be dropped for the vmsd. */
>      assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
> @@ -591,7 +604,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
>      se->vmsd = vmsd;
>      se->alias_id = alias_id;
>  
> -    if (dev) {
> +    if (dev && !flag) {
>          char *id = qdev_get_dev_path(dev);
>          if (id) {
>              pstrcpy(se->idstr, sizeof(se->idstr), id);
> @@ -607,6 +620,9 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
>      }
>      pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
>  
> +    if (flag) {
> +        instance_id = DEVICE_GET_CLASS(dev)->dev_get_instance_id(dev);
> +    }
>      if (instance_id == -1) {
>          se->instance_id = calculate_new_instance_id(se->idstr);
>      } else {
> -- 
> 1.9.1

Dave
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Jianjun Duan Oct. 5, 2016, 4:44 p.m. UTC | #2
Please see comments below:

On 10/05/2016 03:12 AM, Dr. David Alan Gilbert wrote:
> * Jianjun Duan (duanj@linux.vnet.ibm.com) wrote:
>> In QOM(QEMU Object Model) migrated objects are identified with instance_id
>> which is calculated automatically using their path in the QOM composition
>> tree. For some objects, this path could change from source to target in
>> migration. To migrate such objects, we need to make sure the instance_id does
>> not change from source to target. We add a hook in DeviceClass to do customized
>> instance_id calculation in such cases.
> 
> Can you explain a bit about why the path changes from source to destination;
> the path here should be a feature of the guest state not the host, and so I
> don't understand why it changes.
Please see the discussion with David in the previous versions:
http://lists.nongnu.org/archive/html/qemu-ppc/2016-06/msg00062.html


>> As a result, in these cases compat will not be set in the concerned
>> SaveStateEntry. This will prevent the inconsistent idstr to be sent over in
>> migration. We could have set alias_id in a similar way. But that will be
>> overloading the purpose of alias_id.
>>
>> The first application will be setting instance_id for DRC using its unique
>> index. Doing this makes the instance_id of DRC to be consistent across migration
>> and supports flexible management of DRC objects in migration.
> 
> Is there a reason to use a custom instance_id rather than a custom idstr

It can be done either way. But it is easier to deal with a integer than
a string.


>>
>> Signed-off-by: Jianjun Duan <duanj@linux.vnet.ibm.com>
>> ---
>>  include/hw/qdev-core.h |  6 ++++++
>>  migration/savevm.c     | 20 ++++++++++++++++++--
>>  2 files changed, 24 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
>> index 2c97347..a012e8e 100644
>> --- a/include/hw/qdev-core.h
>> +++ b/include/hw/qdev-core.h
>> @@ -139,6 +139,12 @@ typedef struct DeviceClass {
>>      qdev_initfn init; /* TODO remove, once users are converted to realize */
>>      qdev_event exit; /* TODO remove, once users are converted to unrealize */
>>      const char *bus_type;
>> +
>> +    /* When this field is set, qemu will use it to get an unique instance_id
>> +     * instead of calculating an auto idstr and instanc_id for the relevant
>> +     * SaveStateEntry
>> +     */
>> +    int (*dev_get_instance_id)(DeviceState *dev);
>>  } DeviceClass;
>>  
>>  typedef struct NamedGPIOList NamedGPIOList;
>> diff --git a/migration/savevm.c b/migration/savevm.c
>> index 33a2911..ef5c3d1 100644
>> --- a/migration/savevm.c
>> +++ b/migration/savevm.c
>> @@ -495,6 +495,11 @@ int register_savevm_live(DeviceState *dev,
>>                           void *opaque)
>>  {
>>      SaveStateEntry *se;
>> +    /* when it is a device and it provides a way to get instance_id,
>> +     * we will use it and skip setting idstr and compat.
>> +     */
>> +    bool flag = (dev != NULL) &&
>> +                (DEVICE_GET_CLASS(dev)->dev_get_instance_id != NULL);
> 
> We must be able to get a more descriptive name than 'flag'; how about
>  'has_custom_id'
> 
>>      se = g_new0(SaveStateEntry, 1);
>>      se->version_id = version_id;
>> @@ -507,7 +512,7 @@ int register_savevm_live(DeviceState *dev,
>>          se->is_ram = 1;
>>      }
>>  
>> -    if (dev) {
>> +    if (dev && !flag) {
>>          char *id = qdev_get_dev_path(dev);
>>          if (id) {
>>              pstrcpy(se->idstr, sizeof(se->idstr), id);
>> @@ -523,6 +528,9 @@ int register_savevm_live(DeviceState *dev,
>>      }
>>      pstrcat(se->idstr, sizeof(se->idstr), idstr);
>>  
>> +    if (flag) {
>> +        instance_id = DEVICE_GET_CLASS(dev)->dev_get_instance_id(dev);
>> +    }
>>      if (instance_id == -1) {
>>          se->instance_id = calculate_new_instance_id(se->idstr);
>>      } else {
>> @@ -580,6 +588,11 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
>>                                     int required_for_version)
>>  {
>>      SaveStateEntry *se;
>> +    /* when it is a device and it provides a way to get instance_id,
>> +     * we will use it and skip setting idstr and compat.
>> +     */
>> +    bool flag = (dev != NULL) &&
>> +                (DEVICE_GET_CLASS(dev)->dev_get_instance_id != NULL);
>>  
>>      /* If this triggers, alias support can be dropped for the vmsd. */
>>      assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
>> @@ -591,7 +604,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
>>      se->vmsd = vmsd;
>>      se->alias_id = alias_id;
>>  
>> -    if (dev) {
>> +    if (dev && !flag) {
>>          char *id = qdev_get_dev_path(dev);
>>          if (id) {
>>              pstrcpy(se->idstr, sizeof(se->idstr), id);
>> @@ -607,6 +620,9 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
>>      }
>>      pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
>>  
>> +    if (flag) {
>> +        instance_id = DEVICE_GET_CLASS(dev)->dev_get_instance_id(dev);
>> +    }
>>      if (instance_id == -1) {
>>          se->instance_id = calculate_new_instance_id(se->idstr);
>>      } else {
>> -- 
>> 1.9.1
> 
> Dave
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> 


Thanks,
Jianjun
Jianjun Duan Oct. 5, 2016, 4:46 p.m. UTC | #3
On 10/05/2016 03:12 AM, Dr. David Alan Gilbert wrote:
> * Jianjun Duan (duanj@linux.vnet.ibm.com) wrote:
>> In QOM(QEMU Object Model) migrated objects are identified with instance_id
>> which is calculated automatically using their path in the QOM composition
>> tree. For some objects, this path could change from source to target in
>> migration. To migrate such objects, we need to make sure the instance_id does
>> not change from source to target. We add a hook in DeviceClass to do customized
>> instance_id calculation in such cases.
> 
> Can you explain a bit about why the path changes from source to destination;
> the path here should be a feature of the guest state not the host, and so I
> don't understand why it changes.
> 
>> As a result, in these cases compat will not be set in the concerned
>> SaveStateEntry. This will prevent the inconsistent idstr to be sent over in
>> migration. We could have set alias_id in a similar way. But that will be
>> overloading the purpose of alias_id.
>>
>> The first application will be setting instance_id for DRC using its unique
>> index. Doing this makes the instance_id of DRC to be consistent across migration
>> and supports flexible management of DRC objects in migration.
> 
> Is there a reason to use a custom instance_id rather than a custom idstr?
> 
>>
>> Signed-off-by: Jianjun Duan <duanj@linux.vnet.ibm.com>
>> ---
>>  include/hw/qdev-core.h |  6 ++++++
>>  migration/savevm.c     | 20 ++++++++++++++++++--
>>  2 files changed, 24 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
>> index 2c97347..a012e8e 100644
>> --- a/include/hw/qdev-core.h
>> +++ b/include/hw/qdev-core.h
>> @@ -139,6 +139,12 @@ typedef struct DeviceClass {
>>      qdev_initfn init; /* TODO remove, once users are converted to realize */
>>      qdev_event exit; /* TODO remove, once users are converted to unrealize */
>>      const char *bus_type;
>> +
>> +    /* When this field is set, qemu will use it to get an unique instance_id
>> +     * instead of calculating an auto idstr and instanc_id for the relevant
>> +     * SaveStateEntry
>> +     */
>> +    int (*dev_get_instance_id)(DeviceState *dev);
>>  } DeviceClass;
>>  
>>  typedef struct NamedGPIOList NamedGPIOList;
>> diff --git a/migration/savevm.c b/migration/savevm.c
>> index 33a2911..ef5c3d1 100644
>> --- a/migration/savevm.c
>> +++ b/migration/savevm.c
>> @@ -495,6 +495,11 @@ int register_savevm_live(DeviceState *dev,
>>                           void *opaque)
>>  {
>>      SaveStateEntry *se;
>> +    /* when it is a device and it provides a way to get instance_id,
>> +     * we will use it and skip setting idstr and compat.
>> +     */
>> +    bool flag = (dev != NULL) &&
>> +                (DEVICE_GET_CLASS(dev)->dev_get_instance_id != NULL);
> 
> We must be able to get a more descriptive name than 'flag'; how about
>  'has_custom_id'
Sure. Will wait for more comments and address them together.

>>      se = g_new0(SaveStateEntry, 1);
>>      se->version_id = version_id;
>> @@ -507,7 +512,7 @@ int register_savevm_live(DeviceState *dev,
>>          se->is_ram = 1;
>>      }
>>  
>> -    if (dev) {
>> +    if (dev && !flag) {
>>          char *id = qdev_get_dev_path(dev);
>>          if (id) {
>>              pstrcpy(se->idstr, sizeof(se->idstr), id);
>> @@ -523,6 +528,9 @@ int register_savevm_live(DeviceState *dev,
>>      }
>>      pstrcat(se->idstr, sizeof(se->idstr), idstr);
>>  
>> +    if (flag) {
>> +        instance_id = DEVICE_GET_CLASS(dev)->dev_get_instance_id(dev);
>> +    }
>>      if (instance_id == -1) {
>>          se->instance_id = calculate_new_instance_id(se->idstr);
>>      } else {
>> @@ -580,6 +588,11 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
>>                                     int required_for_version)
>>  {
>>      SaveStateEntry *se;
>> +    /* when it is a device and it provides a way to get instance_id,
>> +     * we will use it and skip setting idstr and compat.
>> +     */
>> +    bool flag = (dev != NULL) &&
>> +                (DEVICE_GET_CLASS(dev)->dev_get_instance_id != NULL);
>>  
>>      /* If this triggers, alias support can be dropped for the vmsd. */
>>      assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
>> @@ -591,7 +604,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
>>      se->vmsd = vmsd;
>>      se->alias_id = alias_id;
>>  
>> -    if (dev) {
>> +    if (dev && !flag) {
>>          char *id = qdev_get_dev_path(dev);
>>          if (id) {
>>              pstrcpy(se->idstr, sizeof(se->idstr), id);
>> @@ -607,6 +620,9 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
>>      }
>>      pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
>>  
>> +    if (flag) {
>> +        instance_id = DEVICE_GET_CLASS(dev)->dev_get_instance_id(dev);
>> +    }
>>      if (instance_id == -1) {
>>          se->instance_id = calculate_new_instance_id(se->idstr);
>>      } else {
>> -- 
>> 1.9.1
> 
> Dave
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> 
Thanks,
Jianjun
David Gibson Oct. 7, 2016, 2:54 a.m. UTC | #4
On Wed, Oct 05, 2016 at 09:44:57AM -0700, Jianjun Duan wrote:
> Please see comments below:
> 
> On 10/05/2016 03:12 AM, Dr. David Alan Gilbert wrote:
> > * Jianjun Duan (duanj@linux.vnet.ibm.com) wrote:
> >> In QOM(QEMU Object Model) migrated objects are identified with instance_id
> >> which is calculated automatically using their path in the QOM composition
> >> tree. For some objects, this path could change from source to target in
> >> migration. To migrate such objects, we need to make sure the instance_id does
> >> not change from source to target. We add a hook in DeviceClass to do customized
> >> instance_id calculation in such cases.
> > 
> > Can you explain a bit about why the path changes from source to destination;
> > the path here should be a feature of the guest state not the host, and so I
> > don't understand why it changes.
> Please see the discussion with David in the previous versions:
> http://lists.nongnu.org/archive/html/qemu-ppc/2016-06/msg00062.html

Um.. your description above really isn't an accurate summary of that
discussion.

The point is not that the qom path will vary from source to
destination for some arbitrary reason, but rather that we anticipate
future changes in the QOM structure.  Specifically we're considering
eliminating the DRC objects, and folding their (limited) state into an
array in the parent object (either the machine or a PCI host bridge).

That would change the qom paths, and hence the auto-generated instance
ids, which would break migration between qemu versions before and
after the restructure.

I'm not sure that changing the instance ids is enough though, anyway,
since we're talking about eliminating the object entirely, the
class/type information in the migration stream also wouldn't match.

Dave, if you have ideas on how to deal with that, I'd love to hear
them

> 
> >> As a result, in these cases compat will not be set in the concerned
> >> SaveStateEntry. This will prevent the inconsistent idstr to be sent over in
> >> migration. We could have set alias_id in a similar way. But that will be
> >> overloading the purpose of alias_id.
> >>
> >> The first application will be setting instance_id for DRC using its unique
> >> index. Doing this makes the instance_id of DRC to be consistent across migration
> >> and supports flexible management of DRC objects in migration.
> > 
> > Is there a reason to use a custom instance_id rather than a custom idstr
> 
> It can be done either way. But it is easier to deal with a integer than
> a string.

A bit, but I don't think that's a good enough reason to introduce a
second mechanism for overriding instance id allocations.
Dr. David Alan Gilbert Oct. 7, 2016, 8:07 a.m. UTC | #5
* David Gibson (david@gibson.dropbear.id.au) wrote:
> On Wed, Oct 05, 2016 at 09:44:57AM -0700, Jianjun Duan wrote:
> > Please see comments below:
> > 
> > On 10/05/2016 03:12 AM, Dr. David Alan Gilbert wrote:
> > > * Jianjun Duan (duanj@linux.vnet.ibm.com) wrote:
> > >> In QOM(QEMU Object Model) migrated objects are identified with instance_id
> > >> which is calculated automatically using their path in the QOM composition
> > >> tree. For some objects, this path could change from source to target in
> > >> migration. To migrate such objects, we need to make sure the instance_id does
> > >> not change from source to target. We add a hook in DeviceClass to do customized
> > >> instance_id calculation in such cases.
> > > 
> > > Can you explain a bit about why the path changes from source to destination;
> > > the path here should be a feature of the guest state not the host, and so I
> > > don't understand why it changes.
> > Please see the discussion with David in the previous versions:
> > http://lists.nongnu.org/archive/html/qemu-ppc/2016-06/msg00062.html
> 
> Um.. your description above really isn't an accurate summary of that
> discussion.
> 
> The point is not that the qom path will vary from source to
> destination for some arbitrary reason, but rather that we anticipate
> future changes in the QOM structure.  Specifically we're considering
> eliminating the DRC objects, and folding their (limited) state into an
> array in the parent object (either the machine or a PCI host bridge).
> 
> That would change the qom paths, and hence the auto-generated instance
> ids, which would break migration between qemu versions before and
> after the restructure.
> 
> I'm not sure that changing the instance ids is enough though, anyway,
> since we're talking about eliminating the object entirely, the
> class/type information in the migration stream also wouldn't match.
> 
> Dave, if you have ideas on how to deal with that, I'd love to hear
> them

Eliminating the object entirely would be tricky to deal with;
allowing the structure to change would work if instead of a custom instance_id
you had a custom idstr.

However, the question then becomes why is the structure changing so much;
ideally things in the migration stream should represent some tangible object
that corresponds to something real, but (again ideally) the contents
of the stream should reflect the state of those objects not the current
implementation - so if you want to change the implementation the stream
doesn't change.  Is it your implementation, or the understanding of what
the objects actually represent that's in flux?

Dave

> 
> > 
> > >> As a result, in these cases compat will not be set in the concerned
> > >> SaveStateEntry. This will prevent the inconsistent idstr to be sent over in
> > >> migration. We could have set alias_id in a similar way. But that will be
> > >> overloading the purpose of alias_id.
> > >>
> > >> The first application will be setting instance_id for DRC using its unique
> > >> index. Doing this makes the instance_id of DRC to be consistent across migration
> > >> and supports flexible management of DRC objects in migration.
> > > 
> > > Is there a reason to use a custom instance_id rather than a custom idstr
> > 
> > It can be done either way. But it is easier to deal with a integer than
> > a string.
> 
> A bit, but I don't think that's a good enough reason to introduce a
> second mechanism for overriding instance id allocations.
> 
> -- 
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson


--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
David Gibson Oct. 10, 2016, 5:31 a.m. UTC | #6
On Fri, Oct 07, 2016 at 09:07:49AM +0100, Dr. David Alan Gilbert wrote:
> * David Gibson (david@gibson.dropbear.id.au) wrote:
> > On Wed, Oct 05, 2016 at 09:44:57AM -0700, Jianjun Duan wrote:
> > > Please see comments below:
> > > 
> > > On 10/05/2016 03:12 AM, Dr. David Alan Gilbert wrote:
> > > > * Jianjun Duan (duanj@linux.vnet.ibm.com) wrote:
> > > >> In QOM(QEMU Object Model) migrated objects are identified with instance_id
> > > >> which is calculated automatically using their path in the QOM composition
> > > >> tree. For some objects, this path could change from source to target in
> > > >> migration. To migrate such objects, we need to make sure the instance_id does
> > > >> not change from source to target. We add a hook in DeviceClass to do customized
> > > >> instance_id calculation in such cases.
> > > > 
> > > > Can you explain a bit about why the path changes from source to destination;
> > > > the path here should be a feature of the guest state not the host, and so I
> > > > don't understand why it changes.
> > > Please see the discussion with David in the previous versions:
> > > http://lists.nongnu.org/archive/html/qemu-ppc/2016-06/msg00062.html
> > 
> > Um.. your description above really isn't an accurate summary of that
> > discussion.
> > 
> > The point is not that the qom path will vary from source to
> > destination for some arbitrary reason, but rather that we anticipate
> > future changes in the QOM structure.  Specifically we're considering
> > eliminating the DRC objects, and folding their (limited) state into an
> > array in the parent object (either the machine or a PCI host bridge).
> > 
> > That would change the qom paths, and hence the auto-generated instance
> > ids, which would break migration between qemu versions before and
> > after the restructure.
> > 
> > I'm not sure that changing the instance ids is enough though, anyway,
> > since we're talking about eliminating the object entirely, the
> > class/type information in the migration stream also wouldn't match.
> > 
> > Dave, if you have ideas on how to deal with that, I'd love to hear
> > them
> 
> Eliminating the object entirely would be tricky to deal with;
> allowing the structure to change would work if instead of a custom instance_id
> you had a custom idstr.

Sorry, two misunderstandings here.

  * When I said "structure" I meant in the high-level sense of what
    objects exist and are responsible for what things, not in the
    'struct WhateverState' sense.

  * In fact right now eliminating the objects would be ok, since they
    have no migration state (which causes the problems this series is
    trying to address).  But applying this series which adds migration
    state would make it difficult to eliminate the objects in future.
    That's an awkward constraint given that we've already got some
    hints that these objects are not a good idea.

> However, the question then becomes why is the structure changing so much;
> ideally things in the migration stream should represent some tangible object
> that corresponds to something real, but (again ideally) the contents
> of the stream should reflect the state of those objects not the current
> implementation - so if you want to change the implementation the stream
> doesn't change.  Is it your implementation, or the understanding of what
> the objects actually represent that's in flux?

So, the objects in question are DRCs - "Dynamic Re-configuration
Connector"; silly IBM talk for "a port into which something can be
hotplugged", bascially.  These aren't "real" devices, but rather a
firmware/hypervisor abstraction which are used to describe a hotplug
point.  Each DRC allows either one CPU core, one PCI device, or one
LMB (256MiB chunk of RAM) to be hotplugged (or removed).  The PCI DRCs
are "owned" by the PCI host bridge to which the device would be
connected, the CPU and memory DRCs are owned by the machine.

The state variables which Jianjun Duan is adding to migration are
values defined in the PAPR (hypervisor interface) spec, and so are
tangible enough to be sensible to migrate.  At the moment, each LMB is
represented by a discrete QOM object, but I've been thinking for a
while that this may be a mistake.  In particular it's a problem for
the LMB DRCs - because each LMB is only 256MiB of memory, we end up
with thousands, maybe tens of thousands of DRC objects on a guest with
with large maxmem (even if initial memory is small).  The QOM
infrastructure doesn't deal terribly well that many object child
properties.

The construction of those DRCs used to be an N^3 algorithm, which as
you'd imagine caused horrible startup times with large maxmem (minutes
to hours above ~512GiB).  We fixed it to be only N^2, so now the
startup delays are merely bad (at least once you get to ~2TiB+
maxmem).

We could fix that by changing QOM to use a hash or tree for object
children, instead of a plain list, but I'm not sure if it makes sense
to do that with only this use case.

So, for some time, I've been considering rewriting the DRC stuff to be
implemented via QOM interface on the "owner" object, rather than
separate objects for every single connector.

So the difficulty is that if we add the state migration to the DRC
objects as they stand, we prevent ourselves from doing a refactor
that's probably a good idea.  But no-one's yet had time to actually
thrash out such a refactor. :/
Michael Roth Oct. 11, 2016, 4:17 p.m. UTC | #7
Quoting David Gibson (2016-10-10 00:31:20)
> On Fri, Oct 07, 2016 at 09:07:49AM +0100, Dr. David Alan Gilbert wrote:
> > * David Gibson (david@gibson.dropbear.id.au) wrote:
> > > On Wed, Oct 05, 2016 at 09:44:57AM -0700, Jianjun Duan wrote:
> > > > Please see comments below:
> > > > 
> > > > On 10/05/2016 03:12 AM, Dr. David Alan Gilbert wrote:
> > > > > * Jianjun Duan (duanj@linux.vnet.ibm.com) wrote:
> > > > >> In QOM(QEMU Object Model) migrated objects are identified with instance_id
> > > > >> which is calculated automatically using their path in the QOM composition
> > > > >> tree. For some objects, this path could change from source to target in
> > > > >> migration. To migrate such objects, we need to make sure the instance_id does
> > > > >> not change from source to target. We add a hook in DeviceClass to do customized
> > > > >> instance_id calculation in such cases.
> > > > > 
> > > > > Can you explain a bit about why the path changes from source to destination;
> > > > > the path here should be a feature of the guest state not the host, and so I
> > > > > don't understand why it changes.
> > > > Please see the discussion with David in the previous versions:
> > > > http://lists.nongnu.org/archive/html/qemu-ppc/2016-06/msg00062.html
> > > 
> > > Um.. your description above really isn't an accurate summary of that
> > > discussion.
> > > 
> > > The point is not that the qom path will vary from source to
> > > destination for some arbitrary reason, but rather that we anticipate
> > > future changes in the QOM structure.  Specifically we're considering
> > > eliminating the DRC objects, and folding their (limited) state into an
> > > array in the parent object (either the machine or a PCI host bridge).
> > > 
> > > That would change the qom paths, and hence the auto-generated instance
> > > ids, which would break migration between qemu versions before and
> > > after the restructure.
> > > 
> > > I'm not sure that changing the instance ids is enough though, anyway,
> > > since we're talking about eliminating the object entirely, the
> > > class/type information in the migration stream also wouldn't match.
> > > 
> > > Dave, if you have ideas on how to deal with that, I'd love to hear
> > > them
> > 
> > Eliminating the object entirely would be tricky to deal with;
> > allowing the structure to change would work if instead of a custom instance_id
> > you had a custom idstr.
> 
> Sorry, two misunderstandings here.
> 
>   * When I said "structure" I meant in the high-level sense of what
>     objects exist and are responsible for what things, not in the
>     'struct WhateverState' sense.
> 
>   * In fact right now eliminating the objects would be ok, since they
>     have no migration state (which causes the problems this series is
>     trying to address).  But applying this series which adds migration
>     state would make it difficult to eliminate the objects in future.
>     That's an awkward constraint given that we've already got some
>     hints that these objects are not a good idea.
> 
> > However, the question then becomes why is the structure changing so much;
> > ideally things in the migration stream should represent some tangible object
> > that corresponds to something real, but (again ideally) the contents
> > of the stream should reflect the state of those objects not the current
> > implementation - so if you want to change the implementation the stream
> > doesn't change.  Is it your implementation, or the understanding of what
> > the objects actually represent that's in flux?
> 
> So, the objects in question are DRCs - "Dynamic Re-configuration
> Connector"; silly IBM talk for "a port into which something can be
> hotplugged", bascially.  These aren't "real" devices, but rather a
> firmware/hypervisor abstraction which are used to describe a hotplug
> point.  Each DRC allows either one CPU core, one PCI device, or one
> LMB (256MiB chunk of RAM) to be hotplugged (or removed).  The PCI DRCs
> are "owned" by the PCI host bridge to which the device would be
> connected, the CPU and memory DRCs are owned by the machine.
> 
> The state variables which Jianjun Duan is adding to migration are
> values defined in the PAPR (hypervisor interface) spec, and so are
> tangible enough to be sensible to migrate.  At the moment, each LMB is
> represented by a discrete QOM object, but I've been thinking for a
> while that this may be a mistake.  In particular it's a problem for
> the LMB DRCs - because each LMB is only 256MiB of memory, we end up
> with thousands, maybe tens of thousands of DRC objects on a guest with
> with large maxmem (even if initial memory is small).  The QOM
> infrastructure doesn't deal terribly well that many object child
> properties.
> 
> The construction of those DRCs used to be an N^3 algorithm, which as
> you'd imagine caused horrible startup times with large maxmem (minutes
> to hours above ~512GiB).  We fixed it to be only N^2, so now the
> startup delays are merely bad (at least once you get to ~2TiB+
> maxmem).
> 
> We could fix that by changing QOM to use a hash or tree for object
> children, instead of a plain list, but I'm not sure if it makes sense
> to do that with only this use case.

Is that still as much an issue as of this patch?

commit b604a854e843505007c59d68112c654556102a20
Author: Pavel Fedin <p.fedin@samsung.com>
Date:   Tue Oct 13 13:37:45 2015 +0100

    qom: Replace object property list with GHashTable
    
    ARM GICv3 systems with large number of CPUs create lots of IRQ pins. Since
    every pin is represented as a property, number of these properties becomes
    very large. Every property add first makes sure there's no duplicates.
    Traversing the list becomes very slow, therefore QEMU initialization takes
    significant time (several seconds for e. g. 16 CPUs).
    
    This patch replaces list with GHashTable, making lookup very fast. The only
    drawback is that object_child_foreach() and object_child_foreach_recursive()
    cannot add or remove properties during traversal, since GHashTableIter does
    not have modify-safe version. However, the code seems not to modify objects
    via these functions.

> 
> So, for some time, I've been considering rewriting the DRC stuff to be
> implemented via QOM interface on the "owner" object, rather than
> separate objects for every single connector.

At a high-level I think I'd prefer improving QOM over working around it's
performance issues. It seems like those limits are bound to be pushed
eventually given the wide range of hardware it's meant to support
in theory.

But identifying a DRC state in the migration stream with a stable value based
on DRC index makes sense either way. If I understand this patch it seems
like we have that: if a custom instance_id getter is available, we avoid
using the QOM path for idstr and use the default of vmsd->name, then use
the instance_id from the getter. If we dropped DRCs as objects, we'd need
a mechanism other than DeviceClass->dev_get_instance_id() to provide this
custom instance_id, but otherwise so long as the state was the same, and
the VMSD was the same, it seems like it should be doable in theory.

> 
> So the difficulty is that if we add the state migration to the DRC
> objects as they stand, we prevent ourselves from doing a refactor
> that's probably a good idea.  But no-one's yet had time to actually
> thrash out such a refactor. :/
> 
> -- 
> David Gibson                    | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>                                 | _way_ _around_!
> http://www.ozlabs.org/~dgibson
David Gibson Oct. 11, 2016, 11:37 p.m. UTC | #8
On Tue, Oct 11, 2016 at 11:17:35AM -0500, Michael Roth wrote:
> Quoting David Gibson (2016-10-10 00:31:20)
> > On Fri, Oct 07, 2016 at 09:07:49AM +0100, Dr. David Alan Gilbert wrote:
> > > * David Gibson (david@gibson.dropbear.id.au) wrote:
> > > > On Wed, Oct 05, 2016 at 09:44:57AM -0700, Jianjun Duan wrote:
> > > > > Please see comments below:
> > > > > 
> > > > > On 10/05/2016 03:12 AM, Dr. David Alan Gilbert wrote:
> > > > > > * Jianjun Duan (duanj@linux.vnet.ibm.com) wrote:
> > > > > >> In QOM(QEMU Object Model) migrated objects are identified with instance_id
> > > > > >> which is calculated automatically using their path in the QOM composition
> > > > > >> tree. For some objects, this path could change from source to target in
> > > > > >> migration. To migrate such objects, we need to make sure the instance_id does
> > > > > >> not change from source to target. We add a hook in DeviceClass to do customized
> > > > > >> instance_id calculation in such cases.
> > > > > > 
> > > > > > Can you explain a bit about why the path changes from source to destination;
> > > > > > the path here should be a feature of the guest state not the host, and so I
> > > > > > don't understand why it changes.
> > > > > Please see the discussion with David in the previous versions:
> > > > > http://lists.nongnu.org/archive/html/qemu-ppc/2016-06/msg00062.html
> > > > 
> > > > Um.. your description above really isn't an accurate summary of that
> > > > discussion.
> > > > 
> > > > The point is not that the qom path will vary from source to
> > > > destination for some arbitrary reason, but rather that we anticipate
> > > > future changes in the QOM structure.  Specifically we're considering
> > > > eliminating the DRC objects, and folding their (limited) state into an
> > > > array in the parent object (either the machine or a PCI host bridge).
> > > > 
> > > > That would change the qom paths, and hence the auto-generated instance
> > > > ids, which would break migration between qemu versions before and
> > > > after the restructure.
> > > > 
> > > > I'm not sure that changing the instance ids is enough though, anyway,
> > > > since we're talking about eliminating the object entirely, the
> > > > class/type information in the migration stream also wouldn't match.
> > > > 
> > > > Dave, if you have ideas on how to deal with that, I'd love to hear
> > > > them
> > > 
> > > Eliminating the object entirely would be tricky to deal with;
> > > allowing the structure to change would work if instead of a custom instance_id
> > > you had a custom idstr.
> > 
> > Sorry, two misunderstandings here.
> > 
> >   * When I said "structure" I meant in the high-level sense of what
> >     objects exist and are responsible for what things, not in the
> >     'struct WhateverState' sense.
> > 
> >   * In fact right now eliminating the objects would be ok, since they
> >     have no migration state (which causes the problems this series is
> >     trying to address).  But applying this series which adds migration
> >     state would make it difficult to eliminate the objects in future.
> >     That's an awkward constraint given that we've already got some
> >     hints that these objects are not a good idea.
> > 
> > > However, the question then becomes why is the structure changing so much;
> > > ideally things in the migration stream should represent some tangible object
> > > that corresponds to something real, but (again ideally) the contents
> > > of the stream should reflect the state of those objects not the current
> > > implementation - so if you want to change the implementation the stream
> > > doesn't change.  Is it your implementation, or the understanding of what
> > > the objects actually represent that's in flux?
> > 
> > So, the objects in question are DRCs - "Dynamic Re-configuration
> > Connector"; silly IBM talk for "a port into which something can be
> > hotplugged", bascially.  These aren't "real" devices, but rather a
> > firmware/hypervisor abstraction which are used to describe a hotplug
> > point.  Each DRC allows either one CPU core, one PCI device, or one
> > LMB (256MiB chunk of RAM) to be hotplugged (or removed).  The PCI DRCs
> > are "owned" by the PCI host bridge to which the device would be
> > connected, the CPU and memory DRCs are owned by the machine.
> > 
> > The state variables which Jianjun Duan is adding to migration are
> > values defined in the PAPR (hypervisor interface) spec, and so are
> > tangible enough to be sensible to migrate.  At the moment, each LMB is
> > represented by a discrete QOM object, but I've been thinking for a
> > while that this may be a mistake.  In particular it's a problem for
> > the LMB DRCs - because each LMB is only 256MiB of memory, we end up
> > with thousands, maybe tens of thousands of DRC objects on a guest with
> > with large maxmem (even if initial memory is small).  The QOM
> > infrastructure doesn't deal terribly well that many object child
> > properties.
> > 
> > The construction of those DRCs used to be an N^3 algorithm, which as
> > you'd imagine caused horrible startup times with large maxmem (minutes
> > to hours above ~512GiB).  We fixed it to be only N^2, so now the
> > startup delays are merely bad (at least once you get to ~2TiB+
> > maxmem).
> > 
> > We could fix that by changing QOM to use a hash or tree for object
> > children, instead of a plain list, but I'm not sure if it makes sense
> > to do that with only this use case.
> 
> Is that still as much an issue as of this patch?
> 
> commit b604a854e843505007c59d68112c654556102a20
> Author: Pavel Fedin <p.fedin@samsung.com>
> Date:   Tue Oct 13 13:37:45 2015 +0100
> 
>     qom: Replace object property list with GHashTable
>     
>     ARM GICv3 systems with large number of CPUs create lots of IRQ pins. Since
>     every pin is represented as a property, number of these properties becomes
>     very large. Every property add first makes sure there's no duplicates.
>     Traversing the list becomes very slow, therefore QEMU initialization takes
>     significant time (several seconds for e. g. 16 CPUs).
>     
>     This patch replaces list with GHashTable, making lookup very fast. The only
>     drawback is that object_child_foreach() and object_child_foreach_recursive()
>     cannot add or remove properties during traversal, since GHashTableIter does
>     not have modify-safe version. However, the code seems not to modify objects
>     via these functions.

Huh.. good point.  I'd completely forgotten that change had already
happened.

> > So, for some time, I've been considering rewriting the DRC stuff to be
> > implemented via QOM interface on the "owner" object, rather than
> > separate objects for every single connector.
> 
> At a high-level I think I'd prefer improving QOM over working around it's
> performance issues. It seems like those limits are bound to be pushed
> eventually given the wide range of hardware it's meant to support
> in theory.

Yeah, given we already have that hash, it probably makes sense to keep
the DRCs as "real" objects.

> But identifying a DRC state in the migration stream with a stable value based
> on DRC index makes sense either way. If I understand this patch it seems
> like we have that: if a custom instance_id getter is available, we avoid
> using the QOM path for idstr and use the default of vmsd->name, then use
> the instance_id from the getter. If we dropped DRCs as objects, we'd need
> a mechanism other than DeviceClass->dev_get_instance_id() to provide this
> custom instance_id, but otherwise so long as the state was the same, and
> the VMSD was the same, it seems like it should be doable in theory.

So, as noted in other comments, I'm not sure we actually need a new
mechanism for overriding the instance id.  But regardless of the
mechanism once we have a stable id based on the DRC index, yes it
should be ok to migrate the state variables of each DRC this way.

> 
> > 
> > So the difficulty is that if we add the state migration to the DRC
> > objects as they stand, we prevent ourselves from doing a refactor
> > that's probably a good idea.  But no-one's yet had time to actually
> > thrash out such a refactor. :/
> > 
>
diff mbox

Patch

diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 2c97347..a012e8e 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -139,6 +139,12 @@  typedef struct DeviceClass {
     qdev_initfn init; /* TODO remove, once users are converted to realize */
     qdev_event exit; /* TODO remove, once users are converted to unrealize */
     const char *bus_type;
+
+    /* When this field is set, qemu will use it to get an unique instance_id
+     * instead of calculating an auto idstr and instanc_id for the relevant
+     * SaveStateEntry
+     */
+    int (*dev_get_instance_id)(DeviceState *dev);
 } DeviceClass;
 
 typedef struct NamedGPIOList NamedGPIOList;
diff --git a/migration/savevm.c b/migration/savevm.c
index 33a2911..ef5c3d1 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -495,6 +495,11 @@  int register_savevm_live(DeviceState *dev,
                          void *opaque)
 {
     SaveStateEntry *se;
+    /* when it is a device and it provides a way to get instance_id,
+     * we will use it and skip setting idstr and compat.
+     */
+    bool flag = (dev != NULL) &&
+                (DEVICE_GET_CLASS(dev)->dev_get_instance_id != NULL);
 
     se = g_new0(SaveStateEntry, 1);
     se->version_id = version_id;
@@ -507,7 +512,7 @@  int register_savevm_live(DeviceState *dev,
         se->is_ram = 1;
     }
 
-    if (dev) {
+    if (dev && !flag) {
         char *id = qdev_get_dev_path(dev);
         if (id) {
             pstrcpy(se->idstr, sizeof(se->idstr), id);
@@ -523,6 +528,9 @@  int register_savevm_live(DeviceState *dev,
     }
     pstrcat(se->idstr, sizeof(se->idstr), idstr);
 
+    if (flag) {
+        instance_id = DEVICE_GET_CLASS(dev)->dev_get_instance_id(dev);
+    }
     if (instance_id == -1) {
         se->instance_id = calculate_new_instance_id(se->idstr);
     } else {
@@ -580,6 +588,11 @@  int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
                                    int required_for_version)
 {
     SaveStateEntry *se;
+    /* when it is a device and it provides a way to get instance_id,
+     * we will use it and skip setting idstr and compat.
+     */
+    bool flag = (dev != NULL) &&
+                (DEVICE_GET_CLASS(dev)->dev_get_instance_id != NULL);
 
     /* If this triggers, alias support can be dropped for the vmsd. */
     assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id);
@@ -591,7 +604,7 @@  int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
     se->vmsd = vmsd;
     se->alias_id = alias_id;
 
-    if (dev) {
+    if (dev && !flag) {
         char *id = qdev_get_dev_path(dev);
         if (id) {
             pstrcpy(se->idstr, sizeof(se->idstr), id);
@@ -607,6 +620,9 @@  int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
     }
     pstrcat(se->idstr, sizeof(se->idstr), vmsd->name);
 
+    if (flag) {
+        instance_id = DEVICE_GET_CLASS(dev)->dev_get_instance_id(dev);
+    }
     if (instance_id == -1) {
         se->instance_id = calculate_new_instance_id(se->idstr);
     } else {