diff mbox

[1/3] KVM: x86: provide realtime host clock via vsyscall notifiers

Message ID 20170113120317.948215303@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Marcelo Tosatti Jan. 13, 2017, 12:01 p.m. UTC
Expose the realtime host clock and save the TSC value
used for the clock calculation.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 arch/x86/kvm/x86.c |   38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Radim Krčmář Jan. 13, 2017, 3:18 p.m. UTC | #1
2017-01-13 10:01-0200, Marcelo Tosatti:
> Expose the realtime host clock and save the TSC value
> used for the clock calculation.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> ---
>  arch/x86/kvm/x86.c |   38 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 38 insertions(+)
> 
> Index: kvm-ptpdriver/arch/x86/kvm/x86.c
> ===================================================================
> --- kvm-ptpdriver.orig/arch/x86/kvm/x86.c	2017-01-13 08:59:03.015895353 -0200
> +++ kvm-ptpdriver/arch/x86/kvm/x86.c	2017-01-13 09:04:46.581415259 -0200
> @@ -1139,6 +1139,8 @@
>  
>  	u64		boot_ns;
>  	u64		nsec_base;
> +	u64		wall_time_sec;
> +	u64		wall_time_snsec;

The leading "s" in "snsec" looks like a copy-paste residue.

>  };
>  
>  static struct pvclock_gtod_data pvclock_gtod_data;
> @@ -1162,6 +1164,9 @@
>  	vdata->boot_ns			= boot_ns;
>  	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
>  
> +	vdata->wall_time_sec            = tk->xtime_sec;
> +	vdata->wall_time_snsec          = tk->tkr_mono.xtime_nsec;

Using tk->tkr_mono offsets for real time seems wrong -- what happens if
the real time is half a second shifted from monotonic time?

If it's ok, then vdata->nsec_base == vdata->wall_time_snsec, so we don't
need it.

> +
>  	write_seqcount_end(&vdata->seq);
>  }
>  #endif
> @@ -1623,6 +1628,28 @@
>  	return mode;
>  }
>  
> +static int do_realtime(struct timespec *ts, cycle_t *cycle_now)

This is too similar to do_monotonic_boot(), but I don't see a solution
that is both nice and efficient. :(

(It usually means macros or copying pvclock_gtod_data.)

> +{
> +	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
> +	unsigned long seq;
> +	int mode;
> +	u64 ns;
> +
> +	do {
> +		seq = read_seqcount_begin(&gtod->seq);
> +		mode = gtod->clock.vclock_mode;
> +		ts->tv_sec = gtod->wall_time_sec;
> +		ns = gtod->wall_time_snsec;
> +		ns += vgettsc(cycle_now);
> +		ns >>= gtod->clock.shift;
> +	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
> +
> +	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
> +	ts->tv_nsec = ns;
> +
> +	return mode;
> +}
> +
>  /* returns true if host is using tsc clocksource */
>  static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
>  {
> @@ -1632,6 +1659,17 @@
>  
>  	return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
>  }
> +
> +/* returns true if host is using tsc clocksource */
> +static bool kvm_get_walltime_and_clockread(struct timespec *ts,
> +					   cycle_t *cycle_now)
> +{
> +	/* checked again under seqlock below */
> +	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
> +		return false;
> +
> +	return do_realtime(ts, cycle_now) == VCLOCK_TSC;
> +}
>  #endif
>  
>  /*
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 13, 2017, 3:34 p.m. UTC | #2
On Fri, Jan 13, 2017 at 04:18:04PM +0100, Radim Krcmar wrote:
> 2017-01-13 10:01-0200, Marcelo Tosatti:
> > Expose the realtime host clock and save the TSC value
> > used for the clock calculation.
> > 
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> > 
> > ---
> >  arch/x86/kvm/x86.c |   38 ++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 38 insertions(+)
> > 
> > Index: kvm-ptpdriver/arch/x86/kvm/x86.c
> > ===================================================================
> > --- kvm-ptpdriver.orig/arch/x86/kvm/x86.c	2017-01-13 08:59:03.015895353 -0200
> > +++ kvm-ptpdriver/arch/x86/kvm/x86.c	2017-01-13 09:04:46.581415259 -0200
> > @@ -1139,6 +1139,8 @@
> >  
> >  	u64		boot_ns;
> >  	u64		nsec_base;
> > +	u64		wall_time_sec;
> > +	u64		wall_time_snsec;
> 
> The leading "s" in "snsec" looks like a copy-paste residue.

Just copying the userspace vsyscall interface.

> >  };
> >  
> >  static struct pvclock_gtod_data pvclock_gtod_data;
> > @@ -1162,6 +1164,9 @@
> >  	vdata->boot_ns			= boot_ns;
> >  	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
> >  
> > +	vdata->wall_time_sec            = tk->xtime_sec;
> > +	vdata->wall_time_snsec          = tk->tkr_mono.xtime_nsec;
> 
> Using tk->tkr_mono offsets for real time seems wrong -- what happens if
> the real time is half a second shifted from monotonic time?

Both the userspace vsyscall interface and getnstimeofday
use it for realtime clock.

Monotonic clock adds the offset:

        vdata->monotonic_time_snsec     = tk->tkr_mono.xtime_nsec
                                        +
((u64)tk->wall_to_monotonic.tv_nsec
                                                << tk->tkr_mono.shift);

> If it's ok, then vdata->nsec_base == vdata->wall_time_snsec, so we don't
> need it.

Just copying the userspace vsyscall interface.

Do you actually want to change the "s" and unify wall_time_snsec with
nsec_base?

> > +
> >  	write_seqcount_end(&vdata->seq);
> >  }
> >  #endif
> > @@ -1623,6 +1628,28 @@
> >  	return mode;
> >  }
> >  
> > +static int do_realtime(struct timespec *ts, cycle_t *cycle_now)
> 
> This is too similar to do_monotonic_boot(), but I don't see a solution
> that is both nice and efficient. :(
> 
> (It usually means macros or copying pvclock_gtod_data.)

Yep.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Konrad Rzeszutek Wilk Jan. 13, 2017, 3:41 p.m. UTC | #3
On Fri, Jan 13, 2017 at 04:18:04PM +0100, Radim Krcmar wrote:
> 2017-01-13 10:01-0200, Marcelo Tosatti:
> > Expose the realtime host clock and save the TSC value
> > used for the clock calculation.
> > 
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> > 
> > ---
> >  arch/x86/kvm/x86.c |   38 ++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 38 insertions(+)
> > 
> > Index: kvm-ptpdriver/arch/x86/kvm/x86.c
> > ===================================================================
> > --- kvm-ptpdriver.orig/arch/x86/kvm/x86.c	2017-01-13 08:59:03.015895353 -0200
> > +++ kvm-ptpdriver/arch/x86/kvm/x86.c	2017-01-13 09:04:46.581415259 -0200
> > @@ -1139,6 +1139,8 @@
> >  
> >  	u64		boot_ns;
> >  	u64		nsec_base;
> > +	u64		wall_time_sec;
> > +	u64		wall_time_snsec;
> 
> The leading "s" in "snsec" looks like a copy-paste residue.
> 
> >  };
> >  
> >  static struct pvclock_gtod_data pvclock_gtod_data;
> > @@ -1162,6 +1164,9 @@
> >  	vdata->boot_ns			= boot_ns;
> >  	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
> >  
> > +	vdata->wall_time_sec            = tk->xtime_sec;
> > +	vdata->wall_time_snsec          = tk->tkr_mono.xtime_nsec;
> 
> Using tk->tkr_mono offsets for real time seems wrong -- what happens if
> the real time is half a second shifted from monotonic time?
> 
> If it's ok, then vdata->nsec_base == vdata->wall_time_snsec, so we don't
> need it.
> 
> > +
> >  	write_seqcount_end(&vdata->seq);
> >  }
> >  #endif
> > @@ -1623,6 +1628,28 @@
> >  	return mode;
> >  }
> >  
> > +static int do_realtime(struct timespec *ts, cycle_t *cycle_now)
> 
> This is too similar to do_monotonic_boot(), but I don't see a solution
> that is both nice and efficient. :(
> 
> (It usually means macros or copying pvclock_gtod_data.)


PV Clock is hypervisor agnostic so both KVM and Xen can use it. Is this clock
interface suppose to follow that?

Thanks.
> 
> > +{
> > +	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
> > +	unsigned long seq;
> > +	int mode;
> > +	u64 ns;
> > +
> > +	do {
> > +		seq = read_seqcount_begin(&gtod->seq);
> > +		mode = gtod->clock.vclock_mode;
> > +		ts->tv_sec = gtod->wall_time_sec;
> > +		ns = gtod->wall_time_snsec;
> > +		ns += vgettsc(cycle_now);
> > +		ns >>= gtod->clock.shift;
> > +	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
> > +
> > +	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
> > +	ts->tv_nsec = ns;
> > +
> > +	return mode;
> > +}
> > +
> >  /* returns true if host is using tsc clocksource */
> >  static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
> >  {
> > @@ -1632,6 +1659,17 @@
> >  
> >  	return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
> >  }
> > +
> > +/* returns true if host is using tsc clocksource */
> > +static bool kvm_get_walltime_and_clockread(struct timespec *ts,
> > +					   cycle_t *cycle_now)
> > +{
> > +	/* checked again under seqlock below */
> > +	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
> > +		return false;
> > +
> > +	return do_realtime(ts, cycle_now) == VCLOCK_TSC;
> > +}
> >  #endif
> >  
> >  /*
> > 
> > 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 13, 2017, 3:46 p.m. UTC | #4
On Fri, Jan 13, 2017 at 10:41:10AM -0500, Konrad Rzeszutek Wilk wrote:
> On Fri, Jan 13, 2017 at 04:18:04PM +0100, Radim Krcmar wrote:
> > 2017-01-13 10:01-0200, Marcelo Tosatti:
> > > Expose the realtime host clock and save the TSC value
> > > used for the clock calculation.
> > > 
> > > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> > > 
> > > ---
> > >  arch/x86/kvm/x86.c |   38 ++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 38 insertions(+)
> > > 
> > > Index: kvm-ptpdriver/arch/x86/kvm/x86.c
> > > ===================================================================
> > > --- kvm-ptpdriver.orig/arch/x86/kvm/x86.c	2017-01-13 08:59:03.015895353 -0200
> > > +++ kvm-ptpdriver/arch/x86/kvm/x86.c	2017-01-13 09:04:46.581415259 -0200
> > > @@ -1139,6 +1139,8 @@
> > >  
> > >  	u64		boot_ns;
> > >  	u64		nsec_base;
> > > +	u64		wall_time_sec;
> > > +	u64		wall_time_snsec;
> > 
> > The leading "s" in "snsec" looks like a copy-paste residue.
> > 
> > >  };
> > >  
> > >  static struct pvclock_gtod_data pvclock_gtod_data;
> > > @@ -1162,6 +1164,9 @@
> > >  	vdata->boot_ns			= boot_ns;
> > >  	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
> > >  
> > > +	vdata->wall_time_sec            = tk->xtime_sec;
> > > +	vdata->wall_time_snsec          = tk->tkr_mono.xtime_nsec;
> > 
> > Using tk->tkr_mono offsets for real time seems wrong -- what happens if
> > the real time is half a second shifted from monotonic time?
> > 
> > If it's ok, then vdata->nsec_base == vdata->wall_time_snsec, so we don't
> > need it.
> > 
> > > +
> > >  	write_seqcount_end(&vdata->seq);
> > >  }
> > >  #endif
> > > @@ -1623,6 +1628,28 @@
> > >  	return mode;
> > >  }
> > >  
> > > +static int do_realtime(struct timespec *ts, cycle_t *cycle_now)
> > 
> > This is too similar to do_monotonic_boot(), but I don't see a solution
> > that is both nice and efficient. :(
> > 
> > (It usually means macros or copying pvclock_gtod_data.)
> 
> 
> PV Clock is hypervisor agnostic so both KVM and Xen can use it. Is this clock
> interface suppose to follow that?

If Xen implements the KVM_HC_CLOCK_OFFSET hypercall, Xen guests can use the 
kvm ptp driver, yes.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Jan. 13, 2017, 4:28 p.m. UTC | #5
2017-01-13 13:34-0200, Marcelo Tosatti:
> On Fri, Jan 13, 2017 at 04:18:04PM +0100, Radim Krcmar wrote:
>> 2017-01-13 10:01-0200, Marcelo Tosatti:
>> > Expose the realtime host clock and save the TSC value
>> > used for the clock calculation.
>> > 
>> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>> > 
>> > ---
>> >  arch/x86/kvm/x86.c |   38 ++++++++++++++++++++++++++++++++++++++
>> >  1 file changed, 38 insertions(+)
>> > 
>> > Index: kvm-ptpdriver/arch/x86/kvm/x86.c
>> > ===================================================================
>> > --- kvm-ptpdriver.orig/arch/x86/kvm/x86.c	2017-01-13 08:59:03.015895353 -0200
>> > +++ kvm-ptpdriver/arch/x86/kvm/x86.c	2017-01-13 09:04:46.581415259 -0200
>> > @@ -1139,6 +1139,8 @@
>> >  
>> >  	u64		boot_ns;
>> >  	u64		nsec_base;
>> > +	u64		wall_time_sec;
>> > +	u64		wall_time_snsec;
>> 
>> The leading "s" in "snsec" looks like a copy-paste residue.
> 
> Just copying the userspace vsyscall interface.

Oh, so the "s" means "sub-" for sub-nanosecond precision.

>> >  };
>> >  
>> >  static struct pvclock_gtod_data pvclock_gtod_data;
>> > @@ -1162,6 +1164,9 @@
>> >  	vdata->boot_ns			= boot_ns;
>> >  	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
>> >  
>> > +	vdata->wall_time_sec            = tk->xtime_sec;
>> > +	vdata->wall_time_snsec          = tk->tkr_mono.xtime_nsec;
>> 
>> Using tk->tkr_mono offsets for real time seems wrong -- what happens if
>> the real time is half a second shifted from monotonic time?
> 
> Both the userspace vsyscall interface and getnstimeofday
> use it for realtime clock.
> 
> Monotonic clock adds the offset:
> 
>         vdata->monotonic_time_snsec     = tk->tkr_mono.xtime_nsec
>                                         +
> ((u64)tk->wall_to_monotonic.tv_nsec
>                                                 << tk->tkr_mono.shift);

I see, thanks.  Makes me wonder why our monotonic time is correct then,
but that is problably thanks to boot_ns.

>> If it's ok, then vdata->nsec_base == vdata->wall_time_snsec, so we don't
>> need it.
> 
> Just copying the userspace vsyscall interface.
> 
> Do you actually want to change the "s" and unify wall_time_snsec with
> nsec_base?

The "s" isn't important, even though I don't think we do anything that
would justify it, but make use just 8 bytes for both.
Renaming nsec_base is ok, but I'm not sure what tk->tkr_mono.xtime_nsec
is anymore.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 13, 2017, 5:51 p.m. UTC | #6
On Fri, Jan 13, 2017 at 05:28:09PM +0100, Radim Krcmar wrote:
> 2017-01-13 13:34-0200, Marcelo Tosatti:
> > On Fri, Jan 13, 2017 at 04:18:04PM +0100, Radim Krcmar wrote:
> >> 2017-01-13 10:01-0200, Marcelo Tosatti:
> >> > Expose the realtime host clock and save the TSC value
> >> > used for the clock calculation.
> >> > 
> >> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> >> > 
> >> > ---
> >> >  arch/x86/kvm/x86.c |   38 ++++++++++++++++++++++++++++++++++++++
> >> >  1 file changed, 38 insertions(+)
> >> > 
> >> > Index: kvm-ptpdriver/arch/x86/kvm/x86.c
> >> > ===================================================================
> >> > --- kvm-ptpdriver.orig/arch/x86/kvm/x86.c	2017-01-13 08:59:03.015895353 -0200
> >> > +++ kvm-ptpdriver/arch/x86/kvm/x86.c	2017-01-13 09:04:46.581415259 -0200
> >> > @@ -1139,6 +1139,8 @@
> >> >  
> >> >  	u64		boot_ns;
> >> >  	u64		nsec_base;
> >> > +	u64		wall_time_sec;
> >> > +	u64		wall_time_snsec;
> >> 
> >> The leading "s" in "snsec" looks like a copy-paste residue.
> > 
> > Just copying the userspace vsyscall interface.
> 
> Oh, so the "s" means "sub-" for sub-nanosecond precision.

It only counts nanoseconds, how can it be sub nanosecond precise?

> >> >  };
> >> >  
> >> >  static struct pvclock_gtod_data pvclock_gtod_data;
> >> > @@ -1162,6 +1164,9 @@
> >> >  	vdata->boot_ns			= boot_ns;
> >> >  	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
> >> >  
> >> > +	vdata->wall_time_sec            = tk->xtime_sec;
> >> > +	vdata->wall_time_snsec          = tk->tkr_mono.xtime_nsec;
> >> 
> >> Using tk->tkr_mono offsets for real time seems wrong -- what happens if
> >> the real time is half a second shifted from monotonic time?
> > 
> > Both the userspace vsyscall interface and getnstimeofday
> > use it for realtime clock.
> > 
> > Monotonic clock adds the offset:
> > 
> >         vdata->monotonic_time_snsec     = tk->tkr_mono.xtime_nsec
> >                                         +
> > ((u64)tk->wall_to_monotonic.tv_nsec
> >                                                 << tk->tkr_mono.shift);
> 
> I see, thanks.  Makes me wonder why our monotonic time is correct then,
> but that is problably thanks to boot_ns.

The actual starting point of the system_timestamp part of kvmclock
does not matter, all it matters is that it counts in nanoseconds.

> >> If it's ok, then vdata->nsec_base == vdata->wall_time_snsec, so we don't
> >> need it.
> > 
> > Just copying the userspace vsyscall interface.
> > 
> > Do you actually want to change the "s" and unify wall_time_snsec with
> > nsec_base?
> 
> The "s" isn't important, even though I don't think we do anything that
> would justify it, but make use just 8 bytes for both.

Unified.

> Renaming nsec_base is ok, but I'm not sure what tk->tkr_mono.xtime_nsec
> is anymore.

Is the nsec part of tk->xtime_sec. See accumulate_nsecs_to_secs
(which is called from the timer interrupt handler).

/**
 * struct timekeeper - Structure holding internal timekeeping values.
 * @tkr_mono:           The readout base structure for CLOCK_MONOTONIC
 * @tkr_raw:            The readout base structure for
 * CLOCK_MONOTONIC_RAW
 * @xtime_sec:          Current CLOCK_REALTIME time in seconds
 * @ktime_sec:          Current CLOCK_MONOTONIC time in seconds
 * @wall_to_monotonic:  CLOCK_REALTIME to CLOCK_MONOTONIC offset
 * @offs_real:          Offset clock monotonic -> clock realtime
 * @offs_boot:          Offset clock monotonic -> clock boottime
 * @offs_tai:           Offset clock monotonic -> clock tai
 * @tai_offset:         The current UTC to TAI offset in seconds



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Jan. 16, 2017, 3:40 p.m. UTC | #7
2017-01-13 15:51-0200, Marcelo Tosatti:
> On Fri, Jan 13, 2017 at 05:28:09PM +0100, Radim Krcmar wrote:
>> 2017-01-13 13:34-0200, Marcelo Tosatti:
>> > On Fri, Jan 13, 2017 at 04:18:04PM +0100, Radim Krcmar wrote:
>> >> 2017-01-13 10:01-0200, Marcelo Tosatti:
>> >> > Expose the realtime host clock and save the TSC value
>> >> > used for the clock calculation.
>> >> > 
>> >> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>> >> > 
>> >> > ---
>> >> >  arch/x86/kvm/x86.c |   38 ++++++++++++++++++++++++++++++++++++++
>> >> >  1 file changed, 38 insertions(+)
>> >> > 
>> >> > Index: kvm-ptpdriver/arch/x86/kvm/x86.c
>> >> > ===================================================================
>> >> > --- kvm-ptpdriver.orig/arch/x86/kvm/x86.c	2017-01-13 08:59:03.015895353 -0200
>> >> > +++ kvm-ptpdriver/arch/x86/kvm/x86.c	2017-01-13 09:04:46.581415259 -0200
>> >> > @@ -1139,6 +1139,8 @@
>> >> >  
>> >> >  	u64		boot_ns;
>> >> >  	u64		nsec_base;
>> >> > +	u64		wall_time_sec;
>> >> > +	u64		wall_time_snsec;
>> >> 
>> >> The leading "s" in "snsec" looks like a copy-paste residue.
>> > 
>> > Just copying the userspace vsyscall interface.
>> 
>> Oh, so the "s" means "sub-" for sub-nanosecond precision.
> 
> It only counts nanoseconds, how can it be sub nanosecond precise?

Because it doesn't count nanoseconds.  In update_vsyscall(), the *_snsec
are shifted by tk->tkr_mono.shift bits to the left and that precision
goes to sub-nanoseconds.

64 bit value makes sense then -- would be nice if we could pass that
extra precision to the guest.

>> >> >  };
>> >> >  
>> >> >  static struct pvclock_gtod_data pvclock_gtod_data;
>> >> > @@ -1162,6 +1164,9 @@
>> >> >  	vdata->boot_ns			= boot_ns;
>> >> >  	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
>> >> >  
>> >> > +	vdata->wall_time_sec            = tk->xtime_sec;
>> >> > +	vdata->wall_time_snsec          = tk->tkr_mono.xtime_nsec;
>> >> 
>> >> Using tk->tkr_mono offsets for real time seems wrong -- what happens if
>> >> the real time is half a second shifted from monotonic time?
>> > 
>> > Both the userspace vsyscall interface and getnstimeofday
>> > use it for realtime clock.
>> > 
>> > Monotonic clock adds the offset:
>> > 
>> >         vdata->monotonic_time_snsec     = tk->tkr_mono.xtime_nsec
>> >                                         +
>> > ((u64)tk->wall_to_monotonic.tv_nsec
>> >                                                 << tk->tkr_mono.shift);
>> 
>> I see, thanks.  Makes me wonder why our monotonic time is correct then,
>> but that is problably thanks to boot_ns.
> 
> The actual starting point of the system_timestamp part of kvmclock
> does not matter, all it matters is that it counts in nanoseconds.

True.

>> >> If it's ok, then vdata->nsec_base == vdata->wall_time_snsec, so we don't
>> >> need it.
>> > 
>> > Just copying the userspace vsyscall interface.
>> > 
>> > Do you actually want to change the "s" and unify wall_time_snsec with
>> > nsec_base?
>> 
>> The "s" isn't important, even though I don't think we do anything that
>> would justify it, but make use just 8 bytes for both.
> 
> Unified.
> 
>> Renaming nsec_base is ok, but I'm not sure what tk->tkr_mono.xtime_nsec
>> is anymore.
> 
> Is the nsec part of tk->xtime_sec. See accumulate_nsecs_to_secs
> (which is called from the timer interrupt handler).

I see, thanks.  They are not nanoseconds as the sub-nanosecond shift is
there as well:

  u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
  while (tk->tkr_mono.xtime_nsec >= nsecps) {
  	tk->tkr_mono.xtime_nsec -= nsecps;
  	tk->xtime_sec++;
  }
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: kvm-ptpdriver/arch/x86/kvm/x86.c
===================================================================
--- kvm-ptpdriver.orig/arch/x86/kvm/x86.c	2017-01-13 08:59:03.015895353 -0200
+++ kvm-ptpdriver/arch/x86/kvm/x86.c	2017-01-13 09:04:46.581415259 -0200
@@ -1139,6 +1139,8 @@ 
 
 	u64		boot_ns;
 	u64		nsec_base;
+	u64		wall_time_sec;
+	u64		wall_time_snsec;
 };
 
 static struct pvclock_gtod_data pvclock_gtod_data;
@@ -1162,6 +1164,9 @@ 
 	vdata->boot_ns			= boot_ns;
 	vdata->nsec_base		= tk->tkr_mono.xtime_nsec;
 
+	vdata->wall_time_sec            = tk->xtime_sec;
+	vdata->wall_time_snsec          = tk->tkr_mono.xtime_nsec;
+
 	write_seqcount_end(&vdata->seq);
 }
 #endif
@@ -1623,6 +1628,28 @@ 
 	return mode;
 }
 
+static int do_realtime(struct timespec *ts, cycle_t *cycle_now)
+{
+	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+	unsigned long seq;
+	int mode;
+	u64 ns;
+
+	do {
+		seq = read_seqcount_begin(&gtod->seq);
+		mode = gtod->clock.vclock_mode;
+		ts->tv_sec = gtod->wall_time_sec;
+		ns = gtod->wall_time_snsec;
+		ns += vgettsc(cycle_now);
+		ns >>= gtod->clock.shift;
+	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+
+	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+	ts->tv_nsec = ns;
+
+	return mode;
+}
+
 /* returns true if host is using tsc clocksource */
 static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
 {
@@ -1632,6 +1659,17 @@ 
 
 	return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
 }
+
+/* returns true if host is using tsc clocksource */
+static bool kvm_get_walltime_and_clockread(struct timespec *ts,
+					   cycle_t *cycle_now)
+{
+	/* checked again under seqlock below */
+	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+		return false;
+
+	return do_realtime(ts, cycle_now) == VCLOCK_TSC;
+}
 #endif
 
 /*