diff mbox

[v2,3/5] hpet 'driftfix': add fields to HPETTimer and VMStateDescription

Message ID 1302276042-3497-4-git-send-email-uobergfe@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ulrich Obergfell April 8, 2011, 3:20 p.m. UTC
Signed-off-by: Ulrich Obergfell <uobergfe@redhat.com>
---
 hw/hpet.c |   14 ++++++++++++--
 1 files changed, 12 insertions(+), 2 deletions(-)

Comments

Anthony Liguori April 8, 2011, 3:47 p.m. UTC | #1
On 04/08/2011 10:20 AM, Ulrich Obergfell wrote:
> Signed-off-by: Ulrich Obergfell<uobergfe@redhat.com>
> ---
>   hw/hpet.c |   14 ++++++++++++--
>   1 files changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/hw/hpet.c b/hw/hpet.c
> index 45847ed..c150da5 100644
> --- a/hw/hpet.c
> +++ b/hw/hpet.c
> @@ -55,6 +55,11 @@ typedef struct HPETTimer {  /* timers */
>       uint8_t wrap_flag;      /* timer pop will indicate wrap for one-shot 32-bit
>                                * mode. Next pop will be actual timer expiration.
>                                */
> +    uint64_t saved_period;
> +    uint64_t ticks_not_accounted;
> +    uint32_t irqs_to_inject;
> +    uint32_t irq_rate;
> +    uint32_t divisor;
>   } HPETTimer;
>
>   typedef struct HPETState {
> @@ -248,7 +253,7 @@ static int hpet_post_load(void *opaque, int version_id)
>
>   static const VMStateDescription vmstate_hpet_timer = {
>       .name = "hpet_timer",
> -    .version_id = 1,
> +    .version_id = 3,

Why jump from 1 to 3?

>       .minimum_version_id = 1,
>       .minimum_version_id_old = 1,
>       .fields      = (VMStateField []) {
> @@ -258,6 +263,11 @@ static const VMStateDescription vmstate_hpet_timer = {
>           VMSTATE_UINT64(fsb, HPETTimer),
>           VMSTATE_UINT64(period, HPETTimer),
>           VMSTATE_UINT8(wrap_flag, HPETTimer),
> +        VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
> +        VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
> +        VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
> +        VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
> +        VMSTATE_UINT32_V(divisor, HPETTimer, 3),

We ought to be able to use a subsection keyed off of whether any ticks 
are currently accumulated, no?

Regards,

Anthony Liguori

>           VMSTATE_TIMER(qemu_timer, HPETTimer),
>           VMSTATE_END_OF_LIST()
>       }
> @@ -265,7 +275,7 @@ static const VMStateDescription vmstate_hpet_timer = {
>
>   static const VMStateDescription vmstate_hpet = {
>       .name = "hpet",
> -    .version_id = 2,
> +    .version_id = 3,
>       .minimum_version_id = 1,
>       .minimum_version_id_old = 1,
>       .pre_save = hpet_pre_save,

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ulrich Obergfell April 11, 2011, 8:24 a.m. UTC | #2
>>   typedef struct HPETState {
>> @@ -248,7 +253,7 @@ static int hpet_post_load(void *opaque, int
>> version_id)
>>
>>   static const VMStateDescription vmstate_hpet_timer = {
>>       .name = "hpet_timer",
>> - .version_id = 1,
>> + .version_id = 3,
>
> Why jump from 1 to 3?
> 
>>       .minimum_version_id = 1,
>>       .minimum_version_id_old = 1,
>>       .fields = (VMStateField []) {
>> @@ -258,6 +263,11 @@ static const VMStateDescription
>> vmstate_hpet_timer = {
>>           VMSTATE_UINT64(fsb, HPETTimer),
>>           VMSTATE_UINT64(period, HPETTimer),
>>           VMSTATE_UINT8(wrap_flag, HPETTimer),
>> + VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
>> + VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
>> + VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
>> + VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
>> + VMSTATE_UINT32_V(divisor, HPETTimer, 3),


Anthony,

I incremented the version ID of 'vmstate_hpet' from 2 to 3 to make sure
that migrations from a QEMU process that is capable of 'driftfix' to a
QEMU process that is _not_ capable of 'driftfix' will fail. I assigned
version ID 3 to 'vmstate_hpet_timer' and to the new fields in there too
to indicate that adding those fields was the reason why the version ID
of 'vmstate_hpet' was incremented to 3.

As far as the flow of execution in vmstate_load_state() is concerned, I
think it does not matter whether the version ID of 'vmstate_hpet_timer'
and the new fields in there is 2 or 3 (as long as they are consistent).
When the 'while(field->name)' loop in vmstate_load_state() gets to the
following field in 'vmstate_hpet' ...

    VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers, 0,
                                vmstate_hpet_timer, HPETTimer),

... it calls itself recursively ...

    if (field->flags & VMS_STRUCT) {
        ret = vmstate_load_state(f, field->vmsd, addr, field->vmsd->version_id);

'field->vmsd->version_id' is the version ID of 'vmstate_hpet_timer' [1].
Hence 'vmstate_hpet_timer.version_id' is being checked against itself ...

    if (version_id > vmsd->version_id) {
        return -EINVAL;
    }

... and the version IDs of the new fields are also being checked against
'vmstate_hpet_timer.version_id' ...

    if ((field->field_exists &&
         field->field_exists(opaque, version_id)) ||
        (!field->field_exists &&
         field->version_id <= version_id)) {

If you want me to change the version ID of 'vmstate_hpet_timer' and the
new fields in there from 3 to 2, I can do that.


Regards,

Uli


[1] Ref.: commit fa3aad24d94a6cf894db52d83f72a399324a17bb
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ulrich Obergfell April 11, 2011, 9:06 a.m. UTC | #3
>> vmstate_hpet_timer = {
>>           VMSTATE_UINT64(fsb, HPETTimer),
>>           VMSTATE_UINT64(period, HPETTimer),
>>           VMSTATE_UINT8(wrap_flag, HPETTimer),
>> + VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
>> + VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
>> + VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
>> + VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
>> + VMSTATE_UINT32_V(divisor, HPETTimer, 3),
>
> We ought to be able to use a subsection keyed off of whether any ticks
> are currently accumulated, no?


Anthony,

I'm not sure if I understand your question correctly. Are you suggesting
to migrate the driftfix-related state conditionally / only if there are
any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?

The size of the driftfix-related state is 28 bytes per timer and we have
32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
Hence, unconditional migration of the driftfix-related state should not
cause significant additional overhead.

Maybe I missed something. Could you please explain which benefit you see
in using a subsection ?


Regards,

Uli
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity April 11, 2011, 9:08 a.m. UTC | #4
On 04/11/2011 12:06 PM, Ulrich Obergfell wrote:
> >>  vmstate_hpet_timer = {
> >>            VMSTATE_UINT64(fsb, HPETTimer),
> >>            VMSTATE_UINT64(period, HPETTimer),
> >>            VMSTATE_UINT8(wrap_flag, HPETTimer),
> >>  + VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
> >>  + VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
> >>  + VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
> >>  + VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
> >>  + VMSTATE_UINT32_V(divisor, HPETTimer, 3),
> >
> >  We ought to be able to use a subsection keyed off of whether any ticks
> >  are currently accumulated, no?
>
>
> Anthony,
>
> I'm not sure if I understand your question correctly. Are you suggesting
> to migrate the driftfix-related state conditionally / only if there are
> any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?
>
> The size of the driftfix-related state is 28 bytes per timer and we have
> 32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
> maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
> Hence, unconditional migration of the driftfix-related state should not
> cause significant additional overhead.
>

It's not about overhead.

> Maybe I missed something. Could you please explain which benefit you see
> in using a subsection ?

In the common case of there being no drift, you can migrate from a qemu 
that supports driftfix to a qemu that doesn't.
Anthony Liguori April 11, 2011, 1:10 p.m. UTC | #5
On 04/11/2011 04:08 AM, Avi Kivity wrote:
> On 04/11/2011 12:06 PM, Ulrich Obergfell wrote:
>> >>  vmstate_hpet_timer = {
>> >>            VMSTATE_UINT64(fsb, HPETTimer),
>> >>            VMSTATE_UINT64(period, HPETTimer),
>> >>            VMSTATE_UINT8(wrap_flag, HPETTimer),
>> >>  + VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
>> >>  + VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
>> >>  + VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
>> >>  + VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
>> >>  + VMSTATE_UINT32_V(divisor, HPETTimer, 3),
>> >
>> >  We ought to be able to use a subsection keyed off of whether any 
>> ticks
>> >  are currently accumulated, no?
>>
>>
>> Anthony,
>>
>> I'm not sure if I understand your question correctly. Are you suggesting
>> to migrate the driftfix-related state conditionally / only if there are
>> any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?
>>
>> The size of the driftfix-related state is 28 bytes per timer and we have
>> 32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
>> maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
>> Hence, unconditional migration of the driftfix-related state should not
>> cause significant additional overhead.
>>
>
> It's not about overhead.
>
>> Maybe I missed something. Could you please explain which benefit you see
>> in using a subsection ?
>
> In the common case of there being no drift, you can migrate from a 
> qemu that supports driftfix to a qemu that doesn't.
>

Right, subsections are a trick.  The idea is that when you introduce new 
state for a device model that is not always going to be set, when you do 
the migration, you detect whether the state is set or not and if it's 
not set, instead of sending empty versions of that state (i.e. 
missed_ticks=0) you just don't send the new state at all.

This means that you can migrate to an older version of QEMU provided the 
migration would work correctly.

Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Anthony Liguori April 11, 2011, 1:10 p.m. UTC | #6
On 04/11/2011 03:24 AM, Ulrich Obergfell wrote:
>>>    typedef struct HPETState {
>>> @@ -248,7 +253,7 @@ static int hpet_post_load(void *opaque, int
>>> version_id)
>>>
>>>    static const VMStateDescription vmstate_hpet_timer = {
>>>        .name = "hpet_timer",
>>> - .version_id = 1,
>>> + .version_id = 3,
>> Why jump from 1 to 3?
>>
>>>        .minimum_version_id = 1,
>>>        .minimum_version_id_old = 1,
>>>        .fields = (VMStateField []) {
>>> @@ -258,6 +263,11 @@ static const VMStateDescription
>>> vmstate_hpet_timer = {
>>>            VMSTATE_UINT64(fsb, HPETTimer),
>>>            VMSTATE_UINT64(period, HPETTimer),
>>>            VMSTATE_UINT8(wrap_flag, HPETTimer),
>>> + VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
>>> + VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
>>> + VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
>>> + VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
>>> + VMSTATE_UINT32_V(divisor, HPETTimer, 3),
>
> Anthony,
>
> I incremented the version ID of 'vmstate_hpet' from 2 to 3 to make sure
> that migrations from a QEMU process that is capable of 'driftfix' to a
> QEMU process that is _not_ capable of 'driftfix' will fail. I assigned
> version ID 3 to 'vmstate_hpet_timer' and to the new fields in there too
> to indicate that adding those fields was the reason why the version ID
> of 'vmstate_hpet' was incremented to 3.
>
> As far as the flow of execution in vmstate_load_state() is concerned, I
> think it does not matter whether the version ID of 'vmstate_hpet_timer'
> and the new fields in there is 2 or 3 (as long as they are consistent).
> When the 'while(field->name)' loop in vmstate_load_state() gets to the
> following field in 'vmstate_hpet' ...
>
>      VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers, 0,
>                                  vmstate_hpet_timer, HPETTimer),
>
> ... it calls itself recursively ...
>
>      if (field->flags&  VMS_STRUCT) {
>          ret = vmstate_load_state(f, field->vmsd, addr, field->vmsd->version_id);
>
> 'field->vmsd->version_id' is the version ID of 'vmstate_hpet_timer' [1].
> Hence 'vmstate_hpet_timer.version_id' is being checked against itself ...
>
>      if (version_id>  vmsd->version_id) {
>          return -EINVAL;
>      }
>
> ... and the version IDs of the new fields are also being checked against
> 'vmstate_hpet_timer.version_id' ...
>
>      if ((field->field_exists&&
>           field->field_exists(opaque, version_id)) ||
>          (!field->field_exists&&
>           field->version_id<= version_id)) {
>
> If you want me to change the version ID of 'vmstate_hpet_timer' and the
> new fields in there from 3 to 2, I can do that.

It avoids surprises so I think it's a reasonable thing to do.  But yes, 
your analysis is correct.

Regards,

Anthony Liguori

>
> Regards,
>
> Uli
>
>
> [1] Ref.: commit fa3aad24d94a6cf894db52d83f72a399324a17bb

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Glauber Costa April 11, 2011, 1:39 p.m. UTC | #7
On Mon, 2011-04-11 at 08:10 -0500, Anthony Liguori wrote:
> On 04/11/2011 04:08 AM, Avi Kivity wrote:
> > On 04/11/2011 12:06 PM, Ulrich Obergfell wrote:
> >> >>  vmstate_hpet_timer = {
> >> >>            VMSTATE_UINT64(fsb, HPETTimer),
> >> >>            VMSTATE_UINT64(period, HPETTimer),
> >> >>            VMSTATE_UINT8(wrap_flag, HPETTimer),
> >> >>  + VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
> >> >>  + VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
> >> >>  + VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
> >> >>  + VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
> >> >>  + VMSTATE_UINT32_V(divisor, HPETTimer, 3),
> >> >
> >> >  We ought to be able to use a subsection keyed off of whether any 
> >> ticks
> >> >  are currently accumulated, no?
> >>
> >>
> >> Anthony,
> >>
> >> I'm not sure if I understand your question correctly. Are you suggesting
> >> to migrate the driftfix-related state conditionally / only if there are
> >> any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?
> >>
> >> The size of the driftfix-related state is 28 bytes per timer and we have
> >> 32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
> >> maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
> >> Hence, unconditional migration of the driftfix-related state should not
> >> cause significant additional overhead.
> >>
> >
> > It's not about overhead.
> >
> >> Maybe I missed something. Could you please explain which benefit you see
> >> in using a subsection ?
> >
> > In the common case of there being no drift, you can migrate from a 
> > qemu that supports driftfix to a qemu that doesn't.
> >
> 
> Right, subsections are a trick.  The idea is that when you introduce new 
> state for a device model that is not always going to be set, when you do 
> the migration, you detect whether the state is set or not and if it's 
> not set, instead of sending empty versions of that state (i.e. 
> missed_ticks=0) you just don't send the new state at all.
> 
> This means that you can migrate to an older version of QEMU provided the 
> migration would work correctly.

Using subsections and testing for hpet option being disabled vs enabled,
is fine. But checking for the existence of drift, like you suggested (or
at least how I understood you), is very tricky. It is expected to change
many times during guest lifetime, and would make our migration
predictability something Heisenberg would be proud of.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity April 11, 2011, 1:43 p.m. UTC | #8
On 04/11/2011 04:39 PM, Glauber Costa wrote:
> On Mon, 2011-04-11 at 08:10 -0500, Anthony Liguori wrote:
> >  On 04/11/2011 04:08 AM, Avi Kivity wrote:
> >  >  On 04/11/2011 12:06 PM, Ulrich Obergfell wrote:
> >  >>  >>   vmstate_hpet_timer = {
> >  >>  >>             VMSTATE_UINT64(fsb, HPETTimer),
> >  >>  >>             VMSTATE_UINT64(period, HPETTimer),
> >  >>  >>             VMSTATE_UINT8(wrap_flag, HPETTimer),
> >  >>  >>   + VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
> >  >>  >>   + VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
> >  >>  >>   + VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
> >  >>  >>   + VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
> >  >>  >>   + VMSTATE_UINT32_V(divisor, HPETTimer, 3),
> >  >>  >
> >  >>  >   We ought to be able to use a subsection keyed off of whether any
> >  >>  ticks
> >  >>  >   are currently accumulated, no?
> >  >>
> >  >>
> >  >>  Anthony,
> >  >>
> >  >>  I'm not sure if I understand your question correctly. Are you suggesting
> >  >>  to migrate the driftfix-related state conditionally / only if there are
> >  >>  any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?
> >  >>
> >  >>  The size of the driftfix-related state is 28 bytes per timer and we have
> >  >>  32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
> >  >>  maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
> >  >>  Hence, unconditional migration of the driftfix-related state should not
> >  >>  cause significant additional overhead.
> >  >>
> >  >
> >  >  It's not about overhead.
> >  >
> >  >>  Maybe I missed something. Could you please explain which benefit you see
> >  >>  in using a subsection ?
> >  >
> >  >  In the common case of there being no drift, you can migrate from a
> >  >  qemu that supports driftfix to a qemu that doesn't.
> >  >
> >
> >  Right, subsections are a trick.  The idea is that when you introduce new
> >  state for a device model that is not always going to be set, when you do
> >  the migration, you detect whether the state is set or not and if it's
> >  not set, instead of sending empty versions of that state (i.e.
> >  missed_ticks=0) you just don't send the new state at all.
> >
> >  This means that you can migrate to an older version of QEMU provided the
> >  migration would work correctly.
>
> Using subsections and testing for hpet option being disabled vs enabled,
> is fine. But checking for the existence of drift, like you suggested (or
> at least how I understood you), is very tricky. It is expected to change
> many times during guest lifetime, and would make our migration
> predictability something Heisenberg would be proud of.

First, I'd expect no drift under normal circumstances, at least without 
overcommit.  We may also allow a small amount of drift to pass migration 
(we lost time during the last phase anyway).

Second, the problem only occurs on new->old migrations.
Anthony Liguori April 11, 2011, 1:47 p.m. UTC | #9
On 04/11/2011 08:39 AM, Glauber Costa wrote:
> On Mon, 2011-04-11 at 08:10 -0500, Anthony Liguori wrote:
>> On 04/11/2011 04:08 AM, Avi Kivity wrote:
>>> On 04/11/2011 12:06 PM, Ulrich Obergfell wrote:
>>>>>>   vmstate_hpet_timer = {
>>>>>>             VMSTATE_UINT64(fsb, HPETTimer),
>>>>>>             VMSTATE_UINT64(period, HPETTimer),
>>>>>>             VMSTATE_UINT8(wrap_flag, HPETTimer),
>>>>>>   + VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
>>>>>>   + VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
>>>>>>   + VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
>>>>>>   + VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
>>>>>>   + VMSTATE_UINT32_V(divisor, HPETTimer, 3),
>>>>>   We ought to be able to use a subsection keyed off of whether any
>>>> ticks
>>>>>   are currently accumulated, no?
>>>>
>>>> Anthony,
>>>>
>>>> I'm not sure if I understand your question correctly. Are you suggesting
>>>> to migrate the driftfix-related state conditionally / only if there are
>>>> any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?
>>>>
>>>> The size of the driftfix-related state is 28 bytes per timer and we have
>>>> 32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
>>>> maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
>>>> Hence, unconditional migration of the driftfix-related state should not
>>>> cause significant additional overhead.
>>>>
>>> It's not about overhead.
>>>
>>>> Maybe I missed something. Could you please explain which benefit you see
>>>> in using a subsection ?
>>> In the common case of there being no drift, you can migrate from a
>>> qemu that supports driftfix to a qemu that doesn't.
>>>
>> Right, subsections are a trick.  The idea is that when you introduce new
>> state for a device model that is not always going to be set, when you do
>> the migration, you detect whether the state is set or not and if it's
>> not set, instead of sending empty versions of that state (i.e.
>> missed_ticks=0) you just don't send the new state at all.
>>
>> This means that you can migrate to an older version of QEMU provided the
>> migration would work correctly.
> Using subsections and testing for hpet option being disabled vs enabled,
> is fine. But checking for the existence of drift, like you suggested (or
> at least how I understood you), is very tricky. It is expected to change
> many times during guest lifetime, and would make our migration
> predictability something Heisenberg would be proud of.

Is this true?  I would expect it to be very tied to workloads.  For idle 
workloads, you should never have accumulated missed ticks whereas with 
heavy workloads, you always will have accumulated ticks.

Is that not correct?

Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Glauber Costa April 11, 2011, 1:57 p.m. UTC | #10
On Mon, 2011-04-11 at 08:47 -0500, Anthony Liguori wrote:
> On 04/11/2011 08:39 AM, Glauber Costa wrote:
> > On Mon, 2011-04-11 at 08:10 -0500, Anthony Liguori wrote:
> >> On 04/11/2011 04:08 AM, Avi Kivity wrote:
> >>> On 04/11/2011 12:06 PM, Ulrich Obergfell wrote:
> >>>>>>   vmstate_hpet_timer = {
> >>>>>>             VMSTATE_UINT64(fsb, HPETTimer),
> >>>>>>             VMSTATE_UINT64(period, HPETTimer),
> >>>>>>             VMSTATE_UINT8(wrap_flag, HPETTimer),
> >>>>>>   + VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
> >>>>>>   + VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
> >>>>>>   + VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
> >>>>>>   + VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
> >>>>>>   + VMSTATE_UINT32_V(divisor, HPETTimer, 3),
> >>>>>   We ought to be able to use a subsection keyed off of whether any
> >>>> ticks
> >>>>>   are currently accumulated, no?
> >>>>
> >>>> Anthony,
> >>>>
> >>>> I'm not sure if I understand your question correctly. Are you suggesting
> >>>> to migrate the driftfix-related state conditionally / only if there are
> >>>> any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?
> >>>>
> >>>> The size of the driftfix-related state is 28 bytes per timer and we have
> >>>> 32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
> >>>> maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
> >>>> Hence, unconditional migration of the driftfix-related state should not
> >>>> cause significant additional overhead.
> >>>>
> >>> It's not about overhead.
> >>>
> >>>> Maybe I missed something. Could you please explain which benefit you see
> >>>> in using a subsection ?
> >>> In the common case of there being no drift, you can migrate from a
> >>> qemu that supports driftfix to a qemu that doesn't.
> >>>
> >> Right, subsections are a trick.  The idea is that when you introduce new
> >> state for a device model that is not always going to be set, when you do
> >> the migration, you detect whether the state is set or not and if it's
> >> not set, instead of sending empty versions of that state (i.e.
> >> missed_ticks=0) you just don't send the new state at all.
> >>
> >> This means that you can migrate to an older version of QEMU provided the
> >> migration would work correctly.
> > Using subsections and testing for hpet option being disabled vs enabled,
> > is fine. But checking for the existence of drift, like you suggested (or
> > at least how I understood you), is very tricky. It is expected to change
> > many times during guest lifetime, and would make our migration
> > predictability something Heisenberg would be proud of.
> 
> Is this true?  I would expect it to be very tied to workloads.  For idle 
> workloads, you should never have accumulated missed ticks whereas with 
> heavy workloads, you always will have accumulated ticks.
> 
> Is that not correct?
Yes, it is , but we lose a lot of reliability by tying migration to the workload. Given that
we still have to start qemu the same way both sides, we end up with a
situation in which at time t, migration is possible, and at time t+1
migration is not.

I'd rather have subsections enabled at all times when the option to
allow driftfix is enabled.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/hw/hpet.c b/hw/hpet.c
index 45847ed..c150da5 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -55,6 +55,11 @@  typedef struct HPETTimer {  /* timers */
     uint8_t wrap_flag;      /* timer pop will indicate wrap for one-shot 32-bit
                              * mode. Next pop will be actual timer expiration.
                              */
+    uint64_t saved_period;
+    uint64_t ticks_not_accounted;
+    uint32_t irqs_to_inject;
+    uint32_t irq_rate;
+    uint32_t divisor;
 } HPETTimer;
 
 typedef struct HPETState {
@@ -248,7 +253,7 @@  static int hpet_post_load(void *opaque, int version_id)
 
 static const VMStateDescription vmstate_hpet_timer = {
     .name = "hpet_timer",
-    .version_id = 1,
+    .version_id = 3,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
     .fields      = (VMStateField []) {
@@ -258,6 +263,11 @@  static const VMStateDescription vmstate_hpet_timer = {
         VMSTATE_UINT64(fsb, HPETTimer),
         VMSTATE_UINT64(period, HPETTimer),
         VMSTATE_UINT8(wrap_flag, HPETTimer),
+        VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
+        VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
+        VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
+        VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
+        VMSTATE_UINT32_V(divisor, HPETTimer, 3),
         VMSTATE_TIMER(qemu_timer, HPETTimer),
         VMSTATE_END_OF_LIST()
     }
@@ -265,7 +275,7 @@  static const VMStateDescription vmstate_hpet_timer = {
 
 static const VMStateDescription vmstate_hpet = {
     .name = "hpet",
-    .version_id = 2,
+    .version_id = 3,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
     .pre_save = hpet_pre_save,