diff mbox series

[v3,10/16] hw/i386/vmport: Add support for CMD_GETTIME

Message ID 20200312165431.82118-11-liran.alon@oracle.com (mailing list archive)
State New, archived
Headers show
Series : hw/i386/vmport: Bug fixes and improvements | expand

Commit Message

Liran Alon March 12, 2020, 4:54 p.m. UTC
This command is used by guest to gettimeofday() from host.
See usage example in open-vm-tools TimeSyncReadHost() function.

Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
---
 hw/i386/vmport.c         | 21 +++++++++++++++++++++
 include/hw/i386/vmport.h |  1 +
 2 files changed, 22 insertions(+)

Comments

Michael S. Tsirkin March 13, 2020, 12:04 a.m. UTC | #1
On Thu, Mar 12, 2020 at 06:54:25PM +0200, Liran Alon wrote:
> This command is used by guest to gettimeofday() from host.
> See usage example in open-vm-tools TimeSyncReadHost() function.
> 
> Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
> Signed-off-by: Liran Alon <liran.alon@oracle.com>
> ---
>  hw/i386/vmport.c         | 21 +++++++++++++++++++++
>  include/hw/i386/vmport.h |  1 +
>  2 files changed, 22 insertions(+)
> 
> diff --git a/hw/i386/vmport.c b/hw/i386/vmport.c
> index 3fb8a8bd458a..c5b659c59343 100644
> --- a/hw/i386/vmport.c
> +++ b/hw/i386/vmport.c
> @@ -66,6 +66,7 @@ typedef struct VMPortState {
>  
>      uint32_t vmware_vmx_version;
>      uint8_t vmware_vmx_type;
> +    uint32_t max_time_lag_us;
>  
>      uint32_t compat_flags;
>  } VMPortState;
> @@ -168,6 +169,20 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
>      return ram_size;
>  }
>  
> +static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
> +{
> +    X86CPU *cpu = X86_CPU(current_cpu);
> +    qemu_timeval tv;
> +
> +    if (qemu_gettimeofday(&tv) < 0) {
> +        return UINT32_MAX;
> +    }
> +
> +    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
> +    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
> +    return (uint32_t)tv.tv_sec;
> +}
> +
>  /* vmmouse helpers */
>  void vmmouse_get_data(uint32_t *data)
>  {

That's a very weird thing to return to the guest.
For example it's not monotonic across migrations.
And what does max_time_lag_us refer to, anyway?


So please add documentation about what this does.
If there's no document to refer to then pls write
code comments or a document under docs/ - this does not
belong in commit log.



> @@ -214,6 +229,7 @@ static void vmport_realizefn(DeviceState *dev, Error **errp)
>      vmport_register(VMPORT_CMD_GETRAMSIZE, vmport_cmd_ram_size, NULL);
>      if (s->compat_flags & VMPORT_COMPAT_CMDS_V2) {
>          vmport_register(VMPORT_CMD_GETBIOSUUID, vmport_cmd_get_bios_uuid, NULL);
> +        vmport_register(VMPORT_CMD_GETTIME, vmport_cmd_time, NULL);
>      }
>  }
>  
> @@ -249,6 +265,11 @@ static Property vmport_properties[] = {
>       * 5 - ACE 1.x (Deprecated)
>       */
>      DEFINE_PROP_UINT8("vmware-vmx-type", VMPortState, vmware_vmx_type, 2),
> +    /*
> +     * Max amount of time lag that can go uncorrected.

What does uncorrected mean?

> +     * Value taken from VMware Workstation 5.5.


How do we know this makes sense for KVM? That has significantly
different runtime characteristics.


Also, the version returns ESX server, why does it make
sense to take some values from workstation?

> +     **/
> +    DEFINE_PROP_UINT32("max-time-lag", VMPortState, max_time_lag_us, 1000000),
>  
>      DEFINE_PROP_END_OF_LIST(),
>  };
> diff --git a/include/hw/i386/vmport.h b/include/hw/i386/vmport.h
> index 7f33512ca6f0..50416c8c8f3e 100644
> --- a/include/hw/i386/vmport.h
> +++ b/include/hw/i386/vmport.h
> @@ -8,6 +8,7 @@ typedef enum {
>      VMPORT_CMD_GETVERSION       = 10,
>      VMPORT_CMD_GETBIOSUUID      = 19,
>      VMPORT_CMD_GETRAMSIZE       = 20,
> +    VMPORT_CMD_GETTIME          = 23,
>      VMPORT_CMD_VMMOUSE_DATA     = 39,
>      VMPORT_CMD_VMMOUSE_STATUS   = 40,
>      VMPORT_CMD_VMMOUSE_COMMAND  = 41,
> -- 
> 2.20.1
Liran Alon March 13, 2020, 3:25 p.m. UTC | #2
On 13/03/2020 2:04, Michael S. Tsirkin wrote:
> On Thu, Mar 12, 2020 at 06:54:25PM +0200, Liran Alon wrote:
>> This command is used by guest to gettimeofday() from host.
>> See usage example in open-vm-tools TimeSyncReadHost() function.
>>
>> Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
>> Signed-off-by: Liran Alon <liran.alon@oracle.com>
>> ---
>>   hw/i386/vmport.c         | 21 +++++++++++++++++++++
>>   include/hw/i386/vmport.h |  1 +
>>   2 files changed, 22 insertions(+)
>>
>> diff --git a/hw/i386/vmport.c b/hw/i386/vmport.c
>> index 3fb8a8bd458a..c5b659c59343 100644
>> --- a/hw/i386/vmport.c
>> +++ b/hw/i386/vmport.c
>> @@ -66,6 +66,7 @@ typedef struct VMPortState {
>>   
>>       uint32_t vmware_vmx_version;
>>       uint8_t vmware_vmx_type;
>> +    uint32_t max_time_lag_us;
>>   
>>       uint32_t compat_flags;
>>   } VMPortState;
>> @@ -168,6 +169,20 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
>>       return ram_size;
>>   }
>>   
>> +static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
>> +{
>> +    X86CPU *cpu = X86_CPU(current_cpu);
>> +    qemu_timeval tv;
>> +
>> +    if (qemu_gettimeofday(&tv) < 0) {
>> +        return UINT32_MAX;
>> +    }
>> +
>> +    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
>> +    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
>> +    return (uint32_t)tv.tv_sec;
>> +}
>> +
>>   /* vmmouse helpers */
>>   void vmmouse_get_data(uint32_t *data)
>>   {
> That's a very weird thing to return to the guest.
> For example it's not monotonic across migrations.
That's the VMware PV interface... I didn't design it. :P
Regarding how it handles the fact time is not monotonic across 
migrations, see big comment at the start of 
services/plugins/timeSync/timeSync.c in open-vm-tools regarding the 
time-sync algorithm used by VMware Tools.
Specifically:
"""
During normal operation this plugin only steps the time forward and only 
if the error is greater than one second.
"""
> And what does max_time_lag_us refer to, anyway?
According to the comment in open-vm-tools TimeSyncReadHost():
"""
maximum time lag allowed (config option), which is a threshold that 
keeps the tools from being over eager about resetting the time when it 
is only a little bit off.
"""

Looking at open-vm-tools timeSync.c code, it defines the threshold of 
how far guest time can be from host time before deciding to do a "step 
correction".
A "step correction" is defined as explicitly setting the time in the 
guest to the time in the host.
>
>
> So please add documentation about what this does.
You are right. I agree.
I think it would be best to just point to open-vm-tools 
services/plugins/timeSync/timeSync.c.
Do you agree or should I copy some paragraphs from there?
> If there's no document to refer to then pls write
> code comments or a document under docs/ - this does not
> belong in commit log.
>
>
>
>> @@ -214,6 +229,7 @@ static void vmport_realizefn(DeviceState *dev, Error **errp)
>>       vmport_register(VMPORT_CMD_GETRAMSIZE, vmport_cmd_ram_size, NULL);
>>       if (s->compat_flags & VMPORT_COMPAT_CMDS_V2) {
>>           vmport_register(VMPORT_CMD_GETBIOSUUID, vmport_cmd_get_bios_uuid, NULL);
>> +        vmport_register(VMPORT_CMD_GETTIME, vmport_cmd_time, NULL);
>>       }
>>   }
>>   
>> @@ -249,6 +265,11 @@ static Property vmport_properties[] = {
>>        * 5 - ACE 1.x (Deprecated)
>>        */
>>       DEFINE_PROP_UINT8("vmware-vmx-type", VMPortState, vmware_vmx_type, 2),
>> +    /*
>> +     * Max amount of time lag that can go uncorrected.
> What does uncorrected mean?
You are right this is a bad phrasing taken from open-vm-tools.
It should mean "How far we allow guest time to go from host time before 
guest VMware Tools will sync it to host time".
How you prefer to phrase this?
>
>> +     * Value taken from VMware Workstation 5.5.
>
> How do we know this makes sense for KVM? That has significantly
> different runtime characteristics.
This is just a threshold as you can understand from the above reply of 
mine (I should rephrase the comments to make this clearer).
So we just chose a threshold that makes sense for common workloads.
One of the reasons to put this as a property, is to still allow user to 
override it.
>
>
> Also, the version returns ESX server, why does it make
> sense to take some values from workstation?
I believe (don't remember) that ESXi was observed to return similar value.
Most of our workloads that runs with this came from ESXi and we never 
examined an issue regarding this in our production environment.
Which makes sense as this is just a thresthold that specifies when guest 
time should be synced to host time.
You prefer I would just remove this comment?

Thanks,
-Liran

>
>> +     **/
>> +    DEFINE_PROP_UINT32("max-time-lag", VMPortState, max_time_lag_us, 1000000),
>>   
>>       DEFINE_PROP_END_OF_LIST(),
>>   };
>> diff --git a/include/hw/i386/vmport.h b/include/hw/i386/vmport.h
>> index 7f33512ca6f0..50416c8c8f3e 100644
>> --- a/include/hw/i386/vmport.h
>> +++ b/include/hw/i386/vmport.h
>> @@ -8,6 +8,7 @@ typedef enum {
>>       VMPORT_CMD_GETVERSION       = 10,
>>       VMPORT_CMD_GETBIOSUUID      = 19,
>>       VMPORT_CMD_GETRAMSIZE       = 20,
>> +    VMPORT_CMD_GETTIME          = 23,
>>       VMPORT_CMD_VMMOUSE_DATA     = 39,
>>       VMPORT_CMD_VMMOUSE_STATUS   = 40,
>>       VMPORT_CMD_VMMOUSE_COMMAND  = 41,
>> -- 
>> 2.20.1
Michael S. Tsirkin March 13, 2020, 3:47 p.m. UTC | #3
On Fri, Mar 13, 2020 at 05:25:20PM +0200, Liran Alon wrote:
> > > @@ -168,6 +169,20 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
> > >       return ram_size;
> > >   }
> > > +static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
> > > +{
> > > +    X86CPU *cpu = X86_CPU(current_cpu);
> > > +    qemu_timeval tv;
> > > +
> > > +    if (qemu_gettimeofday(&tv) < 0) {
> > > +        return UINT32_MAX;
> > > +    }
> > > +
> > > +    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
> > > +    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
> > > +    return (uint32_t)tv.tv_sec;
> > > +}
> > > +
> > >   /* vmmouse helpers */
> > >   void vmmouse_get_data(uint32_t *data)
> > >   {
> > That's a very weird thing to return to the guest.
> > For example it's not monotonic across migrations.
> That's the VMware PV interface... I didn't design it. :P
> Regarding how it handles the fact time is not monotonic across migrations,
> see big comment at the start of services/plugins/timeSync/timeSync.c in
> open-vm-tools regarding the time-sync algorithm used by VMware Tools.
> Specifically:
> """
> During normal operation this plugin only steps the time forward and only if
> the error is greater than one second.

Looks like guest assumes this time only moves forward.
So something needs to be done to avoid it moving
backward across migrations.



> """
> > And what does max_time_lag_us refer to, anyway?
> According to the comment in open-vm-tools TimeSyncReadHost():
> """
> maximum time lag allowed (config option), which is a threshold that keeps
> the tools from being over eager about resetting the time when it is only a
> little bit off.
> """
> 
> Looking at open-vm-tools timeSync.c code, it defines the threshold of how
> far guest time can be from host time before deciding to do a "step
> correction".
> A "step correction" is defined as explicitly setting the time in the guest
> to the time in the host.
> > 
> > 
> > So please add documentation about what this does.
> You are right. I agree.
> I think it would be best to just point to open-vm-tools
> services/plugins/timeSync/timeSync.c.
> Do you agree or should I copy some paragraphs from there?

Neither. Their documentation will be from guest point of view.  Please
look at that code and write documentation from host point of view.
Your documentation for the lag parameter is I think a good
example of how to do it.

> > If there's no document to refer to then pls write
> > code comments or a document under docs/ - this does not
> > belong in commit log.
> > 
> > 
> > 
> > > @@ -214,6 +229,7 @@ static void vmport_realizefn(DeviceState *dev, Error **errp)
> > >       vmport_register(VMPORT_CMD_GETRAMSIZE, vmport_cmd_ram_size, NULL);
> > >       if (s->compat_flags & VMPORT_COMPAT_CMDS_V2) {
> > >           vmport_register(VMPORT_CMD_GETBIOSUUID, vmport_cmd_get_bios_uuid, NULL);
> > > +        vmport_register(VMPORT_CMD_GETTIME, vmport_cmd_time, NULL);
> > >       }
> > >   }
> > > @@ -249,6 +265,11 @@ static Property vmport_properties[] = {
> > >        * 5 - ACE 1.x (Deprecated)
> > >        */
> > >       DEFINE_PROP_UINT8("vmware-vmx-type", VMPortState, vmware_vmx_type, 2),
> > > +    /*
> > > +     * Max amount of time lag that can go uncorrected.
> > What does uncorrected mean?
> You are right this is a bad phrasing taken from open-vm-tools.
> It should mean "How far we allow guest time to go from host time before
> guest VMware Tools will sync it to host time".
> How you prefer to phrase this?

Sounds like a good explanation. Maybe we allow -> can
since "we" is hypervisor and it's actually under guest control.


> > 
> > > +     * Value taken from VMware Workstation 5.5.
> > 
> > How do we know this makes sense for KVM? That has significantly
> > different runtime characteristics.
> This is just a threshold as you can understand from the above reply of mine
> (I should rephrase the comments to make this clearer).
> So we just chose a threshold that makes sense for common workloads.
> One of the reasons to put this as a property, is to still allow user to
> override it.

Well close to 100% of users will have no idea what to set it to.


> > 
> > 
> > Also, the version returns ESX server, why does it make
> > sense to take some values from workstation?
> I believe (don't remember) that ESXi was observed to return similar value.
> Most of our workloads that runs with this came from ESXi and we never
> examined an issue regarding this in our production environment.
> Which makes sense as this is just a thresthold that specifies when guest
> time should be synced to host time.
> You prefer I would just remove this comment?

Maybe add " TODO: should this depend on vmare-vmx-type? ".

> 
> Thanks,
> -Liran
> 
> > 
> > > +     **/
> > > +    DEFINE_PROP_UINT32("max-time-lag", VMPortState, max_time_lag_us, 1000000),
> > >       DEFINE_PROP_END_OF_LIST(),
> > >   };
> > > diff --git a/include/hw/i386/vmport.h b/include/hw/i386/vmport.h
> > > index 7f33512ca6f0..50416c8c8f3e 100644
> > > --- a/include/hw/i386/vmport.h
> > > +++ b/include/hw/i386/vmport.h
> > > @@ -8,6 +8,7 @@ typedef enum {
> > >       VMPORT_CMD_GETVERSION       = 10,
> > >       VMPORT_CMD_GETBIOSUUID      = 19,
> > >       VMPORT_CMD_GETRAMSIZE       = 20,
> > > +    VMPORT_CMD_GETTIME          = 23,
> > >       VMPORT_CMD_VMMOUSE_DATA     = 39,
> > >       VMPORT_CMD_VMMOUSE_STATUS   = 40,
> > >       VMPORT_CMD_VMMOUSE_COMMAND  = 41,
> > > -- 
> > > 2.20.1
Liran Alon March 13, 2020, 4:26 p.m. UTC | #4
On 13/03/2020 17:47, Michael S. Tsirkin wrote:
> On Fri, Mar 13, 2020 at 05:25:20PM +0200, Liran Alon wrote:
>>>> @@ -168,6 +169,20 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
>>>>        return ram_size;
>>>>    }
>>>> +static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
>>>> +{
>>>> +    X86CPU *cpu = X86_CPU(current_cpu);
>>>> +    qemu_timeval tv;
>>>> +
>>>> +    if (qemu_gettimeofday(&tv) < 0) {
>>>> +        return UINT32_MAX;
>>>> +    }
>>>> +
>>>> +    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
>>>> +    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
>>>> +    return (uint32_t)tv.tv_sec;
>>>> +}
>>>> +
>>>>    /* vmmouse helpers */
>>>>    void vmmouse_get_data(uint32_t *data)
>>>>    {
>>> That's a very weird thing to return to the guest.
>>> For example it's not monotonic across migrations.
>> That's the VMware PV interface... I didn't design it. :P
>> Regarding how it handles the fact time is not monotonic across migrations,
>> see big comment at the start of services/plugins/timeSync/timeSync.c in
>> open-vm-tools regarding the time-sync algorithm used by VMware Tools.
>> Specifically:
>> """
>> During normal operation this plugin only steps the time forward and only if
>> the error is greater than one second.
> Looks like guest assumes this time only moves forward.
> So something needs to be done to avoid it moving
> backward across migrations.
Where do you see this assumption in guest code? I don't think this is true.
Guest code seems to handle this by making sure to only step the time 
forward.
Read carefully services/plugins/timeSync/timeSync.c and point me to what 
I'm missing if you think otherwise (i.e. I missed something).
>> """
>>> And what does max_time_lag_us refer to, anyway?
>> According to the comment in open-vm-tools TimeSyncReadHost():
>> """
>> maximum time lag allowed (config option), which is a threshold that keeps
>> the tools from being over eager about resetting the time when it is only a
>> little bit off.
>> """
>>
>> Looking at open-vm-tools timeSync.c code, it defines the threshold of how
>> far guest time can be from host time before deciding to do a "step
>> correction".
>> A "step correction" is defined as explicitly setting the time in the guest
>> to the time in the host.
>>>
>>> So please add documentation about what this does.
>> You are right. I agree.
>> I think it would be best to just point to open-vm-tools
>> services/plugins/timeSync/timeSync.c.
>> Do you agree or should I copy some paragraphs from there?
> Neither. Their documentation will be from guest point of view.  Please
> look at that code and write documentation from host point of view.
> Your documentation for the lag parameter is I think a good
> example of how to do it.
Ok. Will try to phrase something for v4.
>
>>> If there's no document to refer to then pls write
>>> code comments or a document under docs/ - this does not
>>> belong in commit log.
>>>
>>>
>>>
>>>> @@ -214,6 +229,7 @@ static void vmport_realizefn(DeviceState *dev, Error **errp)
>>>>        vmport_register(VMPORT_CMD_GETRAMSIZE, vmport_cmd_ram_size, NULL);
>>>>        if (s->compat_flags & VMPORT_COMPAT_CMDS_V2) {
>>>>            vmport_register(VMPORT_CMD_GETBIOSUUID, vmport_cmd_get_bios_uuid, NULL);
>>>> +        vmport_register(VMPORT_CMD_GETTIME, vmport_cmd_time, NULL);
>>>>        }
>>>>    }
>>>> @@ -249,6 +265,11 @@ static Property vmport_properties[] = {
>>>>         * 5 - ACE 1.x (Deprecated)
>>>>         */
>>>>        DEFINE_PROP_UINT8("vmware-vmx-type", VMPortState, vmware_vmx_type, 2),
>>>> +    /*
>>>> +     * Max amount of time lag that can go uncorrected.
>>> What does uncorrected mean?
>> You are right this is a bad phrasing taken from open-vm-tools.
>> It should mean "How far we allow guest time to go from host time before
>> guest VMware Tools will sync it to host time".
>> How you prefer to phrase this?
> Sounds like a good explanation. Maybe we allow -> can
> since "we" is hypervisor and it's actually under guest control.
Ok. Will add this to v4.
>
>
>>>> +     * Value taken from VMware Workstation 5.5.
>>> How do we know this makes sense for KVM? That has significantly
>>> different runtime characteristics.
>> This is just a threshold as you can understand from the above reply of mine
>> (I should rephrase the comments to make this clearer).
>> So we just chose a threshold that makes sense for common workloads.
>> One of the reasons to put this as a property, is to still allow user to
>> override it.
> Well close to 100% of users will have no idea what to set it to.
I agree. :) That's why there is a default value.
>
>
>>>
>>> Also, the version returns ESX server, why does it make
>>> sense to take some values from workstation?
>> I believe (don't remember) that ESXi was observed to return similar value.
>> Most of our workloads that runs with this came from ESXi and we never
>> examined an issue regarding this in our production environment.
>> Which makes sense as this is just a thresthold that specifies when guest
>> time should be synced to host time.
>> You prefer I would just remove this comment?
> Maybe add " TODO: should this depend on vmare-vmx-type? ".

Ok. Will add to v4.

Thanks,
-Liran
Michael S. Tsirkin March 14, 2020, 6:18 p.m. UTC | #5
On Fri, Mar 13, 2020 at 06:26:54PM +0200, Liran Alon wrote:
> 
> On 13/03/2020 17:47, Michael S. Tsirkin wrote:
> > On Fri, Mar 13, 2020 at 05:25:20PM +0200, Liran Alon wrote:
> > > > > @@ -168,6 +169,20 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
> > > > >        return ram_size;
> > > > >    }
> > > > > +static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
> > > > > +{
> > > > > +    X86CPU *cpu = X86_CPU(current_cpu);
> > > > > +    qemu_timeval tv;
> > > > > +
> > > > > +    if (qemu_gettimeofday(&tv) < 0) {
> > > > > +        return UINT32_MAX;
> > > > > +    }
> > > > > +
> > > > > +    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
> > > > > +    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
> > > > > +    return (uint32_t)tv.tv_sec;
> > > > > +}
> > > > > +
> > > > >    /* vmmouse helpers */
> > > > >    void vmmouse_get_data(uint32_t *data)
> > > > >    {
> > > > That's a very weird thing to return to the guest.
> > > > For example it's not monotonic across migrations.
> > > That's the VMware PV interface... I didn't design it. :P
> > > Regarding how it handles the fact time is not monotonic across migrations,
> > > see big comment at the start of services/plugins/timeSync/timeSync.c in
> > > open-vm-tools regarding the time-sync algorithm used by VMware Tools.
> > > Specifically:
> > > """
> > > During normal operation this plugin only steps the time forward and only if
> > > the error is greater than one second.
> > Looks like guest assumes this time only moves forward.
> > So something needs to be done to avoid it moving
> > backward across migrations.
> Where do you see this assumption in guest code? I don't think this is true.
> Guest code seems to handle this by making sure to only step the time
> forward.

Exactly. So if host time moved backward e.g. by 100s, then for 100s
time is not correcting. Which possibly vmware has a way to mitigate
against e.g. by synchronising host time using their
management app.

> Read carefully services/plugins/timeSync/timeSync.c and point me to what I'm
> missing if you think otherwise (i.e. I missed something).

I'm just going by what you write in a patch.

> > > """
> > > > And what does max_time_lag_us refer to, anyway?
> > > According to the comment in open-vm-tools TimeSyncReadHost():
> > > """
> > > maximum time lag allowed (config option), which is a threshold that keeps
> > > the tools from being over eager about resetting the time when it is only a
> > > little bit off.
> > > """
> > > 
> > > Looking at open-vm-tools timeSync.c code, it defines the threshold of how
> > > far guest time can be from host time before deciding to do a "step
> > > correction".
> > > A "step correction" is defined as explicitly setting the time in the guest
> > > to the time in the host.
> > > > 
> > > > So please add documentation about what this does.
> > > You are right. I agree.
> > > I think it would be best to just point to open-vm-tools
> > > services/plugins/timeSync/timeSync.c.
> > > Do you agree or should I copy some paragraphs from there?
> > Neither. Their documentation will be from guest point of view.  Please
> > look at that code and write documentation from host point of view.
> > Your documentation for the lag parameter is I think a good
> > example of how to do it.
> Ok. Will try to phrase something for v4.
> > 
> > > > If there's no document to refer to then pls write
> > > > code comments or a document under docs/ - this does not
> > > > belong in commit log.
> > > > 
> > > > 
> > > > 
> > > > > @@ -214,6 +229,7 @@ static void vmport_realizefn(DeviceState *dev, Error **errp)
> > > > >        vmport_register(VMPORT_CMD_GETRAMSIZE, vmport_cmd_ram_size, NULL);
> > > > >        if (s->compat_flags & VMPORT_COMPAT_CMDS_V2) {
> > > > >            vmport_register(VMPORT_CMD_GETBIOSUUID, vmport_cmd_get_bios_uuid, NULL);
> > > > > +        vmport_register(VMPORT_CMD_GETTIME, vmport_cmd_time, NULL);
> > > > >        }
> > > > >    }
> > > > > @@ -249,6 +265,11 @@ static Property vmport_properties[] = {
> > > > >         * 5 - ACE 1.x (Deprecated)
> > > > >         */
> > > > >        DEFINE_PROP_UINT8("vmware-vmx-type", VMPortState, vmware_vmx_type, 2),
> > > > > +    /*
> > > > > +     * Max amount of time lag that can go uncorrected.
> > > > What does uncorrected mean?
> > > You are right this is a bad phrasing taken from open-vm-tools.
> > > It should mean "How far we allow guest time to go from host time before
> > > guest VMware Tools will sync it to host time".
> > > How you prefer to phrase this?
> > Sounds like a good explanation. Maybe we allow -> can
> > since "we" is hypervisor and it's actually under guest control.
> Ok. Will add this to v4.
> > 
> > 
> > > > > +     * Value taken from VMware Workstation 5.5.
> > > > How do we know this makes sense for KVM? That has significantly
> > > > different runtime characteristics.
> > > This is just a threshold as you can understand from the above reply of mine
> > > (I should rephrase the comments to make this clearer).
> > > So we just chose a threshold that makes sense for common workloads.
> > > One of the reasons to put this as a property, is to still allow user to
> > > override it.
> > Well close to 100% of users will have no idea what to set it to.
> I agree. :) That's why there is a default value.
> > 
> > 
> > > > 
> > > > Also, the version returns ESX server, why does it make
> > > > sense to take some values from workstation?
> > > I believe (don't remember) that ESXi was observed to return similar value.
> > > Most of our workloads that runs with this came from ESXi and we never
> > > examined an issue regarding this in our production environment.
> > > Which makes sense as this is just a thresthold that specifies when guest
> > > time should be synced to host time.
> > > You prefer I would just remove this comment?
> > Maybe add " TODO: should this depend on vmare-vmx-type? ".
> 
> Ok. Will add to v4.
> 
> Thanks,
> -Liran
>
Liran Alon March 14, 2020, 7:04 p.m. UTC | #6
On 14/03/2020 20:18, Michael S. Tsirkin wrote:
> On Fri, Mar 13, 2020 at 06:26:54PM +0200, Liran Alon wrote:
>> On 13/03/2020 17:47, Michael S. Tsirkin wrote:
>>> On Fri, Mar 13, 2020 at 05:25:20PM +0200, Liran Alon wrote:
>>>>>> @@ -168,6 +169,20 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
>>>>>>         return ram_size;
>>>>>>     }
>>>>>> +static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
>>>>>> +{
>>>>>> +    X86CPU *cpu = X86_CPU(current_cpu);
>>>>>> +    qemu_timeval tv;
>>>>>> +
>>>>>> +    if (qemu_gettimeofday(&tv) < 0) {
>>>>>> +        return UINT32_MAX;
>>>>>> +    }
>>>>>> +
>>>>>> +    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
>>>>>> +    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
>>>>>> +    return (uint32_t)tv.tv_sec;
>>>>>> +}
>>>>>> +
>>>>>>     /* vmmouse helpers */
>>>>>>     void vmmouse_get_data(uint32_t *data)
>>>>>>     {
>>>>> That's a very weird thing to return to the guest.
>>>>> For example it's not monotonic across migrations.
>>>> That's the VMware PV interface... I didn't design it. :P
>>>> Regarding how it handles the fact time is not monotonic across migrations,
>>>> see big comment at the start of services/plugins/timeSync/timeSync.c in
>>>> open-vm-tools regarding the time-sync algorithm used by VMware Tools.
>>>> Specifically:
>>>> """
>>>> During normal operation this plugin only steps the time forward and only if
>>>> the error is greater than one second.
>>> Looks like guest assumes this time only moves forward.
>>> So something needs to be done to avoid it moving
>>> backward across migrations.
>> Where do you see this assumption in guest code? I don't think this is true.
>> Guest code seems to handle this by making sure to only step the time
>> forward.
> Exactly. So if host time moved backward e.g. by 100s, then for 100s
> time is not correcting. Which possibly vmware has a way to mitigate
> against e.g. by synchronising host time using their
> management app.
>
>> Read carefully services/plugins/timeSync/timeSync.c and point me to what I'm
>> missing if you think otherwise (i.e. I missed something).
> I'm just going by what you write in a patch.
>
So guest doesn't assume that this time only moves forward...

Can you clarify then which change do you suggest making to this patch in 
this regard? It seems correct to me.
i.e. The CMD_GETTIME implementation seems correct to me and it doesn't 
need to do anything special to handle host time moving backwards.

-Liran
Liran Alon March 14, 2020, 7:04 p.m. UTC | #7
On 14/03/2020 20:18, Michael S. Tsirkin wrote:
> On Fri, Mar 13, 2020 at 06:26:54PM +0200, Liran Alon wrote:
>> On 13/03/2020 17:47, Michael S. Tsirkin wrote:
>>> On Fri, Mar 13, 2020 at 05:25:20PM +0200, Liran Alon wrote:
>>>>>> @@ -168,6 +169,20 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
>>>>>>         return ram_size;
>>>>>>     }
>>>>>> +static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
>>>>>> +{
>>>>>> +    X86CPU *cpu = X86_CPU(current_cpu);
>>>>>> +    qemu_timeval tv;
>>>>>> +
>>>>>> +    if (qemu_gettimeofday(&tv) < 0) {
>>>>>> +        return UINT32_MAX;
>>>>>> +    }
>>>>>> +
>>>>>> +    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
>>>>>> +    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
>>>>>> +    return (uint32_t)tv.tv_sec;
>>>>>> +}
>>>>>> +
>>>>>>     /* vmmouse helpers */
>>>>>>     void vmmouse_get_data(uint32_t *data)
>>>>>>     {
>>>>> That's a very weird thing to return to the guest.
>>>>> For example it's not monotonic across migrations.
>>>> That's the VMware PV interface... I didn't design it. :P
>>>> Regarding how it handles the fact time is not monotonic across migrations,
>>>> see big comment at the start of services/plugins/timeSync/timeSync.c in
>>>> open-vm-tools regarding the time-sync algorithm used by VMware Tools.
>>>> Specifically:
>>>> """
>>>> During normal operation this plugin only steps the time forward and only if
>>>> the error is greater than one second.
>>> Looks like guest assumes this time only moves forward.
>>> So something needs to be done to avoid it moving
>>> backward across migrations.
>> Where do you see this assumption in guest code? I don't think this is true.
>> Guest code seems to handle this by making sure to only step the time
>> forward.
> Exactly. So if host time moved backward e.g. by 100s, then for 100s
> time is not correcting. Which possibly vmware has a way to mitigate
> against e.g. by synchronising host time using their
> management app.
>
>> Read carefully services/plugins/timeSync/timeSync.c and point me to what I'm
>> missing if you think otherwise (i.e. I missed something).
> I'm just going by what you write in a patch.
>
So guest doesn't assume that this time only moves forward...

Can you clarify then which change do you suggest making to this patch in 
this regard? It seems correct to me.
i.e. The CMD_GETTIME implementation seems correct to me and it doesn't 
need to do anything special to handle host time moving backwards.

-Liran
Michael S. Tsirkin March 14, 2020, 7:14 p.m. UTC | #8
On Sat, Mar 14, 2020 at 09:04:30PM +0200, Liran Alon wrote:
> 
> On 14/03/2020 20:18, Michael S. Tsirkin wrote:
> > On Fri, Mar 13, 2020 at 06:26:54PM +0200, Liran Alon wrote:
> > > On 13/03/2020 17:47, Michael S. Tsirkin wrote:
> > > > On Fri, Mar 13, 2020 at 05:25:20PM +0200, Liran Alon wrote:
> > > > > > > @@ -168,6 +169,20 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
> > > > > > >         return ram_size;
> > > > > > >     }
> > > > > > > +static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
> > > > > > > +{
> > > > > > > +    X86CPU *cpu = X86_CPU(current_cpu);
> > > > > > > +    qemu_timeval tv;
> > > > > > > +
> > > > > > > +    if (qemu_gettimeofday(&tv) < 0) {
> > > > > > > +        return UINT32_MAX;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
> > > > > > > +    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
> > > > > > > +    return (uint32_t)tv.tv_sec;
> > > > > > > +}
> > > > > > > +
> > > > > > >     /* vmmouse helpers */
> > > > > > >     void vmmouse_get_data(uint32_t *data)
> > > > > > >     {
> > > > > > That's a very weird thing to return to the guest.
> > > > > > For example it's not monotonic across migrations.
> > > > > That's the VMware PV interface... I didn't design it. :P
> > > > > Regarding how it handles the fact time is not monotonic across migrations,
> > > > > see big comment at the start of services/plugins/timeSync/timeSync.c in
> > > > > open-vm-tools regarding the time-sync algorithm used by VMware Tools.
> > > > > Specifically:
> > > > > """
> > > > > During normal operation this plugin only steps the time forward and only if
> > > > > the error is greater than one second.
> > > > Looks like guest assumes this time only moves forward.
> > > > So something needs to be done to avoid it moving
> > > > backward across migrations.
> > > Where do you see this assumption in guest code? I don't think this is true.
> > > Guest code seems to handle this by making sure to only step the time
> > > forward.
> > Exactly. So if host time moved backward e.g. by 100s, then for 100s
> > time is not correcting. Which possibly vmware has a way to mitigate
> > against e.g. by synchronising host time using their
> > management app.
> > 
> > > Read carefully services/plugins/timeSync/timeSync.c and point me to what I'm
> > > missing if you think otherwise (i.e. I missed something).
> > I'm just going by what you write in a patch.
> > 
> So guest doesn't assume that this time only moves forward...
> 
> Can you clarify then which change do you suggest making to this patch in
> this regard? It seems correct to me.
> i.e. The CMD_GETTIME implementation seems correct to me and it doesn't need
> to do anything special to handle host time moving backwards.
> 
> -Liran
> 

I think something needs to be done to make sure host time as reported by
this command does not move backwards significantly. Just forwarding
gettimeofday won't cut it IMHO. Look at kvm clock for inspiration of
things to do.
Liran Alon March 14, 2020, 7:17 p.m. UTC | #9
On 14/03/2020 21:14, Michael S. Tsirkin wrote:
> On Sat, Mar 14, 2020 at 09:04:30PM +0200, Liran Alon wrote:
>> On 14/03/2020 20:18, Michael S. Tsirkin wrote:
>>> On Fri, Mar 13, 2020 at 06:26:54PM +0200, Liran Alon wrote:
>>>> On 13/03/2020 17:47, Michael S. Tsirkin wrote:
>>>>> On Fri, Mar 13, 2020 at 05:25:20PM +0200, Liran Alon wrote:
>>>>>>>> @@ -168,6 +169,20 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
>>>>>>>>          return ram_size;
>>>>>>>>      }
>>>>>>>> +static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
>>>>>>>> +{
>>>>>>>> +    X86CPU *cpu = X86_CPU(current_cpu);
>>>>>>>> +    qemu_timeval tv;
>>>>>>>> +
>>>>>>>> +    if (qemu_gettimeofday(&tv) < 0) {
>>>>>>>> +        return UINT32_MAX;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
>>>>>>>> +    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
>>>>>>>> +    return (uint32_t)tv.tv_sec;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>      /* vmmouse helpers */
>>>>>>>>      void vmmouse_get_data(uint32_t *data)
>>>>>>>>      {
>>>>>>> That's a very weird thing to return to the guest.
>>>>>>> For example it's not monotonic across migrations.
>>>>>> That's the VMware PV interface... I didn't design it. :P
>>>>>> Regarding how it handles the fact time is not monotonic across migrations,
>>>>>> see big comment at the start of services/plugins/timeSync/timeSync.c in
>>>>>> open-vm-tools regarding the time-sync algorithm used by VMware Tools.
>>>>>> Specifically:
>>>>>> """
>>>>>> During normal operation this plugin only steps the time forward and only if
>>>>>> the error is greater than one second.
>>>>> Looks like guest assumes this time only moves forward.
>>>>> So something needs to be done to avoid it moving
>>>>> backward across migrations.
>>>> Where do you see this assumption in guest code? I don't think this is true.
>>>> Guest code seems to handle this by making sure to only step the time
>>>> forward.
>>> Exactly. So if host time moved backward e.g. by 100s, then for 100s
>>> time is not correcting. Which possibly vmware has a way to mitigate
>>> against e.g. by synchronising host time using their
>>> management app.
>>>
>>>> Read carefully services/plugins/timeSync/timeSync.c and point me to what I'm
>>>> missing if you think otherwise (i.e. I missed something).
>>> I'm just going by what you write in a patch.
>>>
>> So guest doesn't assume that this time only moves forward...
>>
>> Can you clarify then which change do you suggest making to this patch in
>> this regard? It seems correct to me.
>> i.e. The CMD_GETTIME implementation seems correct to me and it doesn't need
>> to do anything special to handle host time moving backwards.
>>
>> -Liran
>>
> I think something needs to be done to make sure host time as reported by
> this command does not move backwards significantly. Just forwarding
> gettimeofday won't cut it IMHO. Look at kvm clock for inspiration of
> things to do.
>
It isn't required by the command interface definition.
It's explicitly specified in timeSync.c that guest code handles the case 
host time goes backwards.
We are not inventing the interface, we just implement it according to 
it's definition.

-Liran
Michael S. Tsirkin March 14, 2020, 7:26 p.m. UTC | #10
On Sat, Mar 14, 2020 at 09:17:30PM +0200, Liran Alon wrote:
> 
> On 14/03/2020 21:14, Michael S. Tsirkin wrote:
> > On Sat, Mar 14, 2020 at 09:04:30PM +0200, Liran Alon wrote:
> > > On 14/03/2020 20:18, Michael S. Tsirkin wrote:
> > > > On Fri, Mar 13, 2020 at 06:26:54PM +0200, Liran Alon wrote:
> > > > > On 13/03/2020 17:47, Michael S. Tsirkin wrote:
> > > > > > On Fri, Mar 13, 2020 at 05:25:20PM +0200, Liran Alon wrote:
> > > > > > > > > @@ -168,6 +169,20 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
> > > > > > > > >          return ram_size;
> > > > > > > > >      }
> > > > > > > > > +static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
> > > > > > > > > +{
> > > > > > > > > +    X86CPU *cpu = X86_CPU(current_cpu);
> > > > > > > > > +    qemu_timeval tv;
> > > > > > > > > +
> > > > > > > > > +    if (qemu_gettimeofday(&tv) < 0) {
> > > > > > > > > +        return UINT32_MAX;
> > > > > > > > > +    }
> > > > > > > > > +
> > > > > > > > > +    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
> > > > > > > > > +    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
> > > > > > > > > +    return (uint32_t)tv.tv_sec;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >      /* vmmouse helpers */
> > > > > > > > >      void vmmouse_get_data(uint32_t *data)
> > > > > > > > >      {
> > > > > > > > That's a very weird thing to return to the guest.
> > > > > > > > For example it's not monotonic across migrations.
> > > > > > > That's the VMware PV interface... I didn't design it. :P
> > > > > > > Regarding how it handles the fact time is not monotonic across migrations,
> > > > > > > see big comment at the start of services/plugins/timeSync/timeSync.c in
> > > > > > > open-vm-tools regarding the time-sync algorithm used by VMware Tools.
> > > > > > > Specifically:
> > > > > > > """
> > > > > > > During normal operation this plugin only steps the time forward and only if
> > > > > > > the error is greater than one second.
> > > > > > Looks like guest assumes this time only moves forward.
> > > > > > So something needs to be done to avoid it moving
> > > > > > backward across migrations.
> > > > > Where do you see this assumption in guest code? I don't think this is true.
> > > > > Guest code seems to handle this by making sure to only step the time
> > > > > forward.
> > > > Exactly. So if host time moved backward e.g. by 100s, then for 100s
> > > > time is not correcting. Which possibly vmware has a way to mitigate
> > > > against e.g. by synchronising host time using their
> > > > management app.
> > > > 
> > > > > Read carefully services/plugins/timeSync/timeSync.c and point me to what I'm
> > > > > missing if you think otherwise (i.e. I missed something).
> > > > I'm just going by what you write in a patch.
> > > > 
> > > So guest doesn't assume that this time only moves forward...
> > > 
> > > Can you clarify then which change do you suggest making to this patch in
> > > this regard? It seems correct to me.
> > > i.e. The CMD_GETTIME implementation seems correct to me and it doesn't need
> > > to do anything special to handle host time moving backwards.
> > > 
> > > -Liran
> > > 
> > I think something needs to be done to make sure host time as reported by
> > this command does not move backwards significantly. Just forwarding
> > gettimeofday won't cut it IMHO. Look at kvm clock for inspiration of
> > things to do.
> > 
> It isn't required by the command interface definition.
> It's explicitly specified in timeSync.c that guest code handles the case
> host time goes backwards.

According to what you wrote it does not crash but also does not do
updates. That will work well only if the time difference isn't large.
Then with time as guest gets interrupted by host, the time difference
shrinks and eventually you are resyncing again.  And I'm guessing there
are hypervisor management interfaces in place to make sure that is so.
Linux is not guaranteed to have such interfaces so you have to come
up with QEMU specific ones.


> We are not inventing the interface, we just implement it according to it's
> definition.
> 
> -Liran

Host time is a vague enough terminology. I suspect this implementation
is too simplistic to cover all cases.
Nikita Leshenko March 14, 2020, 7:58 p.m. UTC | #11
> On 14 Mar 2020, at 21:26, Michael S. Tsirkin <mst@redhat.com> wrote:
> 
> On Sat, Mar 14, 2020 at 09:17:30PM +0200, Liran Alon wrote:
>> 
>> On 14/03/2020 21:14, Michael S. Tsirkin wrote:
>>> On Sat, Mar 14, 2020 at 09:04:30PM +0200, Liran Alon wrote:
>>>> On 14/03/2020 20:18, Michael S. Tsirkin wrote:
>>>>> On Fri, Mar 13, 2020 at 06:26:54PM +0200, Liran Alon wrote:
>>>>>> On 13/03/2020 17:47, Michael S. Tsirkin wrote:
>>>>>>> On Fri, Mar 13, 2020 at 05:25:20PM +0200, Liran Alon wrote:
>>>>>>>>>> @@ -168,6 +169,20 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
>>>>>>>>>>         return ram_size;
>>>>>>>>>>     }
>>>>>>>>>> +static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
>>>>>>>>>> +{
>>>>>>>>>> +    X86CPU *cpu = X86_CPU(current_cpu);
>>>>>>>>>> +    qemu_timeval tv;
>>>>>>>>>> +
>>>>>>>>>> +    if (qemu_gettimeofday(&tv) < 0) {
>>>>>>>>>> +        return UINT32_MAX;
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
>>>>>>>>>> +    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
>>>>>>>>>> +    return (uint32_t)tv.tv_sec;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>     /* vmmouse helpers */
>>>>>>>>>>     void vmmouse_get_data(uint32_t *data)
>>>>>>>>>>     {
>>>>>>>>> That's a very weird thing to return to the guest.
>>>>>>>>> For example it's not monotonic across migrations.
>>>>>>>> That's the VMware PV interface... I didn't design it. :P
>>>>>>>> Regarding how it handles the fact time is not monotonic across migrations,
>>>>>>>> see big comment at the start of services/plugins/timeSync/timeSync.c in
>>>>>>>> open-vm-tools regarding the time-sync algorithm used by VMware Tools.
>>>>>>>> Specifically:
>>>>>>>> """
>>>>>>>> During normal operation this plugin only steps the time forward and only if
>>>>>>>> the error is greater than one second.
>>>>>>> Looks like guest assumes this time only moves forward.
>>>>>>> So something needs to be done to avoid it moving
>>>>>>> backward across migrations.
>>>>>> Where do you see this assumption in guest code? I don't think this is true.
>>>>>> Guest code seems to handle this by making sure to only step the time
>>>>>> forward.
>>>>> Exactly. So if host time moved backward e.g. by 100s, then for 100s
>>>>> time is not correcting. Which possibly vmware has a way to mitigate
>>>>> against e.g. by synchronising host time using their
>>>>> management app.
>>>>> 
>>>>>> Read carefully services/plugins/timeSync/timeSync.c and point me to what I'm
>>>>>> missing if you think otherwise (i.e. I missed something).
>>>>> I'm just going by what you write in a patch.
>>>>> 
>>>> So guest doesn't assume that this time only moves forward...
>>>> 
>>>> Can you clarify then which change do you suggest making to this patch in
>>>> this regard? It seems correct to me.
>>>> i.e. The CMD_GETTIME implementation seems correct to me and it doesn't need
>>>> to do anything special to handle host time moving backwards.
>>>> 
>>>> -Liran
>>>> 
>>> I think something needs to be done to make sure host time as reported by
>>> this command does not move backwards significantly. Just forwarding
>>> gettimeofday won't cut it IMHO. Look at kvm clock for inspiration of
>>> things to do.
>>> 
>> It isn't required by the command interface definition.
>> It's explicitly specified in timeSync.c that guest code handles the case
>> host time goes backwards.
> 
> According to what you wrote it does not crash but also does not do
> updates. That will work well only if the time difference isn't large.
> Then with time as guest gets interrupted by host, the time difference
> shrinks and eventually you are resyncing again.  And I'm guessing there
> are hypervisor management interfaces in place to make sure that is so.
> Linux is not guaranteed to have such interfaces so you have to come
> up with QEMU specific ones.
> 
> 
>> We are not inventing the interface, we just implement it according to it's
>> definition.
>> 
>> -Liran
> 
> Host time is a vague enough terminology. I suspect this implementation
> is too simplistic to cover all cases.
> 
> -- 
> MST

I saw this discussion from the side and I just wanted to point out that as far
as I understand this interface is needed to sync *wallclock time* between the
guest and the host, mostly for user experience. There is no guarantee that it's
monotonic. Unlike TSC, we don't need to take special care with regard to live
migration or drift.

Just as NTP may report inconsistent time, it's OK if this interface is somewhat
inconsistent.

I think that the reason that open-vm-tools doesn't move time backwards is to
help applications that treat wallclock time as if it's monotonic time and break
if the date is moved backwards (which may happen more frequently in virtual
environment so it's handled specifically). But this doesn't change the fact that
this PV interface is for reporting wall time. So I couldn't understand what
source other than gettimeofday() you were suggesting for Liran to use to report
wallclock time.

Nikita
Liran Alon March 14, 2020, 8:05 p.m. UTC | #12
On 14/03/2020 21:58, Nikita Leshenko wrote:
>
>> On 14 Mar 2020, at 21:26, Michael S. Tsirkin <mst@redhat.com> wrote:
>>
>> On Sat, Mar 14, 2020 at 09:17:30PM +0200, Liran Alon wrote:
>>> On 14/03/2020 21:14, Michael S. Tsirkin wrote:
>>>> On Sat, Mar 14, 2020 at 09:04:30PM +0200, Liran Alon wrote:
>>>>> On 14/03/2020 20:18, Michael S. Tsirkin wrote:
>>>>>> On Fri, Mar 13, 2020 at 06:26:54PM +0200, Liran Alon wrote:
>>>>>>> On 13/03/2020 17:47, Michael S. Tsirkin wrote:
>>>>>>>> On Fri, Mar 13, 2020 at 05:25:20PM +0200, Liran Alon wrote:
>>>>>>>>>>> @@ -168,6 +169,20 @@ static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
>>>>>>>>>>>          return ram_size;
>>>>>>>>>>>      }
>>>>>>>>>>> +static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
>>>>>>>>>>> +{
>>>>>>>>>>> +    X86CPU *cpu = X86_CPU(current_cpu);
>>>>>>>>>>> +    qemu_timeval tv;
>>>>>>>>>>> +
>>>>>>>>>>> +    if (qemu_gettimeofday(&tv) < 0) {
>>>>>>>>>>> +        return UINT32_MAX;
>>>>>>>>>>> +    }
>>>>>>>>>>> +
>>>>>>>>>>> +    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
>>>>>>>>>>> +    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
>>>>>>>>>>> +    return (uint32_t)tv.tv_sec;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>>      /* vmmouse helpers */
>>>>>>>>>>>      void vmmouse_get_data(uint32_t *data)
>>>>>>>>>>>      {
>>>>>>>>>> That's a very weird thing to return to the guest.
>>>>>>>>>> For example it's not monotonic across migrations.
>>>>>>>>> That's the VMware PV interface... I didn't design it. :P
>>>>>>>>> Regarding how it handles the fact time is not monotonic across migrations,
>>>>>>>>> see big comment at the start of services/plugins/timeSync/timeSync.c in
>>>>>>>>> open-vm-tools regarding the time-sync algorithm used by VMware Tools.
>>>>>>>>> Specifically:
>>>>>>>>> """
>>>>>>>>> During normal operation this plugin only steps the time forward and only if
>>>>>>>>> the error is greater than one second.
>>>>>>>> Looks like guest assumes this time only moves forward.
>>>>>>>> So something needs to be done to avoid it moving
>>>>>>>> backward across migrations.
>>>>>>> Where do you see this assumption in guest code? I don't think this is true.
>>>>>>> Guest code seems to handle this by making sure to only step the time
>>>>>>> forward.
>>>>>> Exactly. So if host time moved backward e.g. by 100s, then for 100s
>>>>>> time is not correcting. Which possibly vmware has a way to mitigate
>>>>>> against e.g. by synchronising host time using their
>>>>>> management app.
>>>>>>
>>>>>>> Read carefully services/plugins/timeSync/timeSync.c and point me to what I'm
>>>>>>> missing if you think otherwise (i.e. I missed something).
>>>>>> I'm just going by what you write in a patch.
>>>>>>
>>>>> So guest doesn't assume that this time only moves forward...
>>>>>
>>>>> Can you clarify then which change do you suggest making to this patch in
>>>>> this regard? It seems correct to me.
>>>>> i.e. The CMD_GETTIME implementation seems correct to me and it doesn't need
>>>>> to do anything special to handle host time moving backwards.
>>>>>
>>>>> -Liran
>>>>>
>>>> I think something needs to be done to make sure host time as reported by
>>>> this command does not move backwards significantly. Just forwarding
>>>> gettimeofday won't cut it IMHO. Look at kvm clock for inspiration of
>>>> things to do.
>>>>
>>> It isn't required by the command interface definition.
>>> It's explicitly specified in timeSync.c that guest code handles the case
>>> host time goes backwards.
>> According to what you wrote it does not crash but also does not do
>> updates. That will work well only if the time difference isn't large.
>> Then with time as guest gets interrupted by host, the time difference
>> shrinks and eventually you are resyncing again.  And I'm guessing there
>> are hypervisor management interfaces in place to make sure that is so.
>> Linux is not guaranteed to have such interfaces so you have to come
>> up with QEMU specific ones.
>>
>>
>>> We are not inventing the interface, we just implement it according to it's
>>> definition.
>>>
>>> -Liran
>> Host time is a vague enough terminology. I suspect this implementation
>> is too simplistic to cover all cases.
>>
>> -- 
>> MST
> I saw this discussion from the side and I just wanted to point out that as far
> as I understand this interface is needed to sync *wallclock time* between the
> guest and the host, mostly for user experience. There is no guarantee that it's
> monotonic. Unlike TSC, we don't need to take special care with regard to live
> migration or drift.
>
> Just as NTP may report inconsistent time, it's OK if this interface is somewhat
> inconsistent.
>
> I think that the reason that open-vm-tools doesn't move time backwards is to
> help applications that treat wallclock time as if it's monotonic time and break
> if the date is moved backwards (which may happen more frequently in virtual
> environment so it's handled specifically). But this doesn't change the fact that
> this PV interface is for reporting wall time. So I couldn't understand what
> source other than gettimeofday() you were suggesting for Liran to use to report
> wallclock time.
>
> Nikita
>
Michael, you can also refer to this VMware time-keeping whitepaper: 
https://www.vmware.com/pdf/vmware_timekeeping.pdf.
According to section "Initializing and Correcting Wall-Clock Time":
"""
VMware Tools can also optionally be used to correct long‐term drift and 
errors by periodically
resynchronizing the virtual machine’s clock to the host’s clock, but the 
current version at this writing is limited.
In particular, in guest operating systems other than NetWare, it does 
not correct errors in which the guest clock
is ahead of real time, only those in which the guest clock is behind.

"""

If I understand correctly, this seems to validate my assumption that 
current implementation for CMD_GETTIME is sufficient.

-Liran
Michael S. Tsirkin March 14, 2020, 8:48 p.m. UTC | #13
On Sat, Mar 14, 2020 at 09:58:23PM +0200, Nikita Leshenko wrote:
> I think that the reason that open-vm-tools doesn't move time backwards is to
> help applications that treat wallclock time as if it's monotonic time and break
> if the date is moved backwards (which may happen more frequently in virtual
> environment so it's handled specifically). But this doesn't change the fact that
> this PV interface is for reporting wall time.
> So I couldn't understand what
> source other than gettimeofday() you were suggesting for Liran to use to report
> wallclock time.

Some kind of offset to wallclock time I'm guessing. For example,
we could save wall clock on vm save, and if it goes
backwards on vm load, add an offset.
Michael S. Tsirkin March 14, 2020, 8:56 p.m. UTC | #14
On Sat, Mar 14, 2020 at 10:05:20PM +0200, Liran Alon wrote:
> Michael, you can also refer to this VMware time-keeping whitepaper:
> https://www.vmware.com/pdf/vmware_timekeeping.pdf.
> According to section "Initializing and Correcting Wall-Clock Time":
> """
> VMware Tools can also optionally be used to correct long‐term drift and
> errors by periodically
> resynchronizing the virtual machine’s clock to the host’s clock, but the
> current version at this writing is limited.
> In particular, in guest operating systems other than NetWare, it does not
> correct errors in which the guest clock
> is ahead of real time, only those in which the guest clock is behind.
> 
> """

This talks about guest time.
What this does not mention is whether hosts need to employ any mechanisms
to synchronise wall clock between hosts.

> If I understand correctly, this seems to validate my assumption that current
> implementation for CMD_GETTIME is sufficient.

So I am concerned this does not interact well with other time sources
in QEMU. For example, it's very useful to set guest time with -rtc base
flag.

Can you use qemu_get_timedate?
Liran Alon March 15, 2020, 11:56 a.m. UTC | #15
On 14/03/2020 22:56, Michael S. Tsirkin wrote:
> On Sat, Mar 14, 2020 at 10:05:20PM +0200, Liran Alon wrote:
>> Michael, you can also refer to this VMware time-keeping whitepaper:
>> https://urldefense.com/v3/__https://www.vmware.com/pdf/vmware_timekeeping.pdf__;!!GqivPVa7Brio!K8sfnfvVgKwrQ4SMwX-K6-S5yR4ln9_qZ6o4GzIpQkohfWtinlplNhXzFlyUgks$ .
>> According to section "Initializing and Correcting Wall-Clock Time":
>> """
>> VMware Tools can also optionally be used to correct long‐term drift and
>> errors by periodically
>> resynchronizing the virtual machine’s clock to the host’s clock, but the
>> current version at this writing is limited.
>> In particular, in guest operating systems other than NetWare, it does not
>> correct errors in which the guest clock
>> is ahead of real time, only those in which the guest clock is behind.
>>
>> """
> This talks about guest time.
> What this does not mention is whether hosts need to employ any mechanisms
> to synchronise wall clock between hosts.
The above mentioned whitepaper also discuss how VMware maintains the 
wallclock time across migrations (vMotion).
See section "Using VMware Tools Clock Synchronization" in whitepaper.

Specifically, there is an option in .vmx file named 
"time.synchronize.resume.disk" which:
"""
If set to TRUE, the clock syncs after resuming from suspend and after 
migrating to a new host using the VMware VMotion feature.
"""

The matching functionality in open-vm-tools can can be seen in 
services/plugins/timeSync/timeSync.c where ToolsOnLoad()
registers the "Time_Synchronize" RpcCallback, which is 
TimeSyncTcloHandler(), that is possibly allowed to sync time backwards 
(Note the "backwardSync" var).

The current patch-series I have submitted doesn't implement this 
RpcCallback functionality.
That work can be delayed to a future patch-series that will add this 
extra functionality as-well.

>> If I understand correctly, this seems to validate my assumption that current
>> implementation for CMD_GETTIME is sufficient.
> So I am concerned this does not interact well with other time sources
> in QEMU. For example, it's very useful to set guest time with -rtc base
> flag.
>
> Can you use qemu_get_timedate?
>
This is a very good point.
VMware also have the ability that allows user to explicitly set guest 
time with .vmx "rtc.startTime" option.
(The time-zone can also be set by specifying an offset from UTC with 
"rtc.diffFromUTC" option)

However, if you will read section "Using VMware Tools Clock 
Synchronization -> Disabling All Synchronization" in above mentioned 
whitepaper,
you will notice that in VMware's design, VMPort CMD_GETTIME command is 
intentionally not synced with virtual CMOS TOD. i.e. The section explicitly
documents that if a user wants to set guest time to fictitious time, 
user must disable VMware Tools time sync functionality by manipulating
"tools.syncTime" and "time.synchronize.*" configuration options as desired.

Therefore, I think current patch VMPort CMD_GETTIME command 
implementation is correct.
What do you think?

-Liran
Liran Alon March 22, 2020, 11:22 a.m. UTC | #16
On 15/03/2020 13:56, Liran Alon wrote:
>
> On 14/03/2020 22:56, Michael S. Tsirkin wrote:
>> On Sat, Mar 14, 2020 at 10:05:20PM +0200, Liran Alon wrote:
>>> Michael, you can also refer to this VMware time-keeping whitepaper:
>>> https://urldefense.com/v3/__https://www.vmware.com/pdf/vmware_timekeeping.pdf__;!!GqivPVa7Brio!K8sfnfvVgKwrQ4SMwX-K6-S5yR4ln9_qZ6o4GzIpQkohfWtinlplNhXzFlyUgks$ 
>>> .
>>> According to section "Initializing and Correcting Wall-Clock Time":
>>> """
>>> VMware Tools can also optionally be used to correct long‐term drift and
>>> errors by periodically
>>> resynchronizing the virtual machine’s clock to the host’s clock, but 
>>> the
>>> current version at this writing is limited.
>>> In particular, in guest operating systems other than NetWare, it 
>>> does not
>>> correct errors in which the guest clock
>>> is ahead of real time, only those in which the guest clock is behind.
>>>
>>> """
>> This talks about guest time.
>> What this does not mention is whether hosts need to employ any 
>> mechanisms
>> to synchronise wall clock between hosts.
> The above mentioned whitepaper also discuss how VMware maintains the 
> wallclock time across migrations (vMotion).
> See section "Using VMware Tools Clock Synchronization" in whitepaper.
>
> Specifically, there is an option in .vmx file named 
> "time.synchronize.resume.disk" which:
> """
> If set to TRUE, the clock syncs after resuming from suspend and after 
> migrating to a new host using the VMware VMotion feature.
> """
>
> The matching functionality in open-vm-tools can can be seen in 
> services/plugins/timeSync/timeSync.c where ToolsOnLoad()
> registers the "Time_Synchronize" RpcCallback, which is 
> TimeSyncTcloHandler(), that is possibly allowed to sync time backwards 
> (Note the "backwardSync" var).
>
> The current patch-series I have submitted doesn't implement this 
> RpcCallback functionality.
> That work can be delayed to a future patch-series that will add this 
> extra functionality as-well.
>
>>> If I understand correctly, this seems to validate my assumption that 
>>> current
>>> implementation for CMD_GETTIME is sufficient.
>> So I am concerned this does not interact well with other time sources
>> in QEMU. For example, it's very useful to set guest time with -rtc base
>> flag.
>>
>> Can you use qemu_get_timedate?
>>
> This is a very good point.
> VMware also have the ability that allows user to explicitly set guest 
> time with .vmx "rtc.startTime" option.
> (The time-zone can also be set by specifying an offset from UTC with 
> "rtc.diffFromUTC" option)
>
> However, if you will read section "Using VMware Tools Clock 
> Synchronization -> Disabling All Synchronization" in above mentioned 
> whitepaper,
> you will notice that in VMware's design, VMPort CMD_GETTIME command is 
> intentionally not synced with virtual CMOS TOD. i.e. The section 
> explicitly
> documents that if a user wants to set guest time to fictitious time, 
> user must disable VMware Tools time sync functionality by manipulating
> "tools.syncTime" and "time.synchronize.*" configuration options as 
> desired.
>
> Therefore, I think current patch VMPort CMD_GETTIME command 
> implementation is correct.
> What do you think?
>
> -Liran
>
Gentle ping.

I would like to send the next version of the patch-series.
But before I do, I would like to know what we have agreed upon regarding 
this patch.

Thanks,
-Liran
Liran Alon March 31, 2020, 12:35 p.m. UTC | #17
On 22/03/2020 13:22, Liran Alon wrote:
>
> On 15/03/2020 13:56, Liran Alon wrote:
>>
>> On 14/03/2020 22:56, Michael S. Tsirkin wrote:
>>> On Sat, Mar 14, 2020 at 10:05:20PM +0200, Liran Alon wrote:
>>>> Michael, you can also refer to this VMware time-keeping whitepaper:
>>>> https://urldefense.com/v3/__https://www.vmware.com/pdf/vmware_timekeeping.pdf__;!!GqivPVa7Brio!K8sfnfvVgKwrQ4SMwX-K6-S5yR4ln9_qZ6o4GzIpQkohfWtinlplNhXzFlyUgks$ 
>>>> .
>>>> According to section "Initializing and Correcting Wall-Clock Time":
>>>> """
>>>> VMware Tools can also optionally be used to correct long‐term drift 
>>>> and
>>>> errors by periodically
>>>> resynchronizing the virtual machine’s clock to the host’s clock, 
>>>> but the
>>>> current version at this writing is limited.
>>>> In particular, in guest operating systems other than NetWare, it 
>>>> does not
>>>> correct errors in which the guest clock
>>>> is ahead of real time, only those in which the guest clock is behind.
>>>>
>>>> """
>>> This talks about guest time.
>>> What this does not mention is whether hosts need to employ any 
>>> mechanisms
>>> to synchronise wall clock between hosts.
>> The above mentioned whitepaper also discuss how VMware maintains the 
>> wallclock time across migrations (vMotion).
>> See section "Using VMware Tools Clock Synchronization" in whitepaper.
>>
>> Specifically, there is an option in .vmx file named 
>> "time.synchronize.resume.disk" which:
>> """
>> If set to TRUE, the clock syncs after resuming from suspend and after 
>> migrating to a new host using the VMware VMotion feature.
>> """
>>
>> The matching functionality in open-vm-tools can can be seen in 
>> services/plugins/timeSync/timeSync.c where ToolsOnLoad()
>> registers the "Time_Synchronize" RpcCallback, which is 
>> TimeSyncTcloHandler(), that is possibly allowed to sync time 
>> backwards (Note the "backwardSync" var).
>>
>> The current patch-series I have submitted doesn't implement this 
>> RpcCallback functionality.
>> That work can be delayed to a future patch-series that will add this 
>> extra functionality as-well.
>>
>>>> If I understand correctly, this seems to validate my assumption 
>>>> that current
>>>> implementation for CMD_GETTIME is sufficient.
>>> So I am concerned this does not interact well with other time sources
>>> in QEMU. For example, it's very useful to set guest time with -rtc base
>>> flag.
>>>
>>> Can you use qemu_get_timedate?
>>>
>> This is a very good point.
>> VMware also have the ability that allows user to explicitly set guest 
>> time with .vmx "rtc.startTime" option.
>> (The time-zone can also be set by specifying an offset from UTC with 
>> "rtc.diffFromUTC" option)
>>
>> However, if you will read section "Using VMware Tools Clock 
>> Synchronization -> Disabling All Synchronization" in above mentioned 
>> whitepaper,
>> you will notice that in VMware's design, VMPort CMD_GETTIME command 
>> is intentionally not synced with virtual CMOS TOD. i.e. The section 
>> explicitly
>> documents that if a user wants to set guest time to fictitious time, 
>> user must disable VMware Tools time sync functionality by manipulating
>> "tools.syncTime" and "time.synchronize.*" configuration options as 
>> desired.
>>
>> Therefore, I think current patch VMPort CMD_GETTIME command 
>> implementation is correct.
>> What do you think?
>>
>> -Liran
>>
> Gentle ping.
>
> I would like to send the next version of the patch-series.
> But before I do, I would like to know what we have agreed upon 
> regarding this patch.
>
> Thanks,
> -Liran
>
Another gentle ping before I send v4 of patch-series.

Thanks,
-Liran
diff mbox series

Patch

diff --git a/hw/i386/vmport.c b/hw/i386/vmport.c
index 3fb8a8bd458a..c5b659c59343 100644
--- a/hw/i386/vmport.c
+++ b/hw/i386/vmport.c
@@ -66,6 +66,7 @@  typedef struct VMPortState {
 
     uint32_t vmware_vmx_version;
     uint8_t vmware_vmx_type;
+    uint32_t max_time_lag_us;
 
     uint32_t compat_flags;
 } VMPortState;
@@ -168,6 +169,20 @@  static uint32_t vmport_cmd_ram_size(void *opaque, uint32_t addr)
     return ram_size;
 }
 
+static uint32_t vmport_cmd_time(void *opaque, uint32_t addr)
+{
+    X86CPU *cpu = X86_CPU(current_cpu);
+    qemu_timeval tv;
+
+    if (qemu_gettimeofday(&tv) < 0) {
+        return UINT32_MAX;
+    }
+
+    cpu->env.regs[R_EBX] = (uint32_t)tv.tv_usec;
+    cpu->env.regs[R_ECX] = port_state->max_time_lag_us;
+    return (uint32_t)tv.tv_sec;
+}
+
 /* vmmouse helpers */
 void vmmouse_get_data(uint32_t *data)
 {
@@ -214,6 +229,7 @@  static void vmport_realizefn(DeviceState *dev, Error **errp)
     vmport_register(VMPORT_CMD_GETRAMSIZE, vmport_cmd_ram_size, NULL);
     if (s->compat_flags & VMPORT_COMPAT_CMDS_V2) {
         vmport_register(VMPORT_CMD_GETBIOSUUID, vmport_cmd_get_bios_uuid, NULL);
+        vmport_register(VMPORT_CMD_GETTIME, vmport_cmd_time, NULL);
     }
 }
 
@@ -249,6 +265,11 @@  static Property vmport_properties[] = {
      * 5 - ACE 1.x (Deprecated)
      */
     DEFINE_PROP_UINT8("vmware-vmx-type", VMPortState, vmware_vmx_type, 2),
+    /*
+     * Max amount of time lag that can go uncorrected.
+     * Value taken from VMware Workstation 5.5.
+     **/
+    DEFINE_PROP_UINT32("max-time-lag", VMPortState, max_time_lag_us, 1000000),
 
     DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/include/hw/i386/vmport.h b/include/hw/i386/vmport.h
index 7f33512ca6f0..50416c8c8f3e 100644
--- a/include/hw/i386/vmport.h
+++ b/include/hw/i386/vmport.h
@@ -8,6 +8,7 @@  typedef enum {
     VMPORT_CMD_GETVERSION       = 10,
     VMPORT_CMD_GETBIOSUUID      = 19,
     VMPORT_CMD_GETRAMSIZE       = 20,
+    VMPORT_CMD_GETTIME          = 23,
     VMPORT_CMD_VMMOUSE_DATA     = 39,
     VMPORT_CMD_VMMOUSE_STATUS   = 40,
     VMPORT_CMD_VMMOUSE_COMMAND  = 41,