diff mbox

[5/5] PTP: add kvm PTP driver

Message ID 20170120122503.842086637@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Marcelo Tosatti Jan. 20, 2017, 12:20 p.m. UTC
Add a driver with gettime method returning hosts realtime clock.
This allows Chrony to synchronize host and guest clocks with 
high precision (see results below).

chronyc> sources
MS Name/IP address         Stratum Poll Reach LastRx Last sample


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Paolo Bonzini Jan. 20, 2017, 12:58 p.m. UTC | #1
On 20/01/2017 13:20, Marcelo Tosatti wrote:
> +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
> +				     clock_off_gpa,
> +				     KVM_CLOCK_PAIRING_WALLCLOCK);
> +		if (ret != 0) {
> +			pr_err("clock offset hypercall ret %lu\n", ret);
> +			spin_unlock(&kvm_ptp_lock);
> +			preempt_enable_notrace();
> +			return -EOPNOTSUPP;
> +		}
> +

Is it worth making this hypercall, or even all of ptp_kvm_get_time_fn, a
pv_ops entry?

But this looks good already, apart from my different preference on
emulate_ptp_sys_offset_mean.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 20, 2017, 1:11 p.m. UTC | #2
On Fri, Jan 20, 2017 at 01:58:33PM +0100, Paolo Bonzini wrote:
> 
> 
> On 20/01/2017 13:20, Marcelo Tosatti wrote:
> > +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
> > +				     clock_off_gpa,
> > +				     KVM_CLOCK_PAIRING_WALLCLOCK);
> > +		if (ret != 0) {
> > +			pr_err("clock offset hypercall ret %lu\n", ret);
> > +			spin_unlock(&kvm_ptp_lock);
> > +			preempt_enable_notrace();
> > +			return -EOPNOTSUPP;
> > +		}
> > +
> 
> Is it worth making this hypercall, or even all of ptp_kvm_get_time_fn, a
> pv_ops entry?

Well, i don't know how Xen is going to implement this. Maybe you can, 
when you have more low level implementations to be able to generalize.

> But this looks good already, apart from my different preference on
> emulate_ptp_sys_offset_mean.

Sending v4 with the direct export of kvm clocksource.

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Jan. 20, 2017, 2:12 p.m. UTC | #3
2017-01-20 10:20-0200, Marcelo Tosatti:
> Add a driver with gettime method returning hosts realtime clock.
> This allows Chrony to synchronize host and guest clocks with 
> high precision (see results below).
> 
> chronyc> sources
> MS Name/IP address         Stratum Poll Reach LastRx Last sample
> ===============================================================================
> #* PHC0                          0   3   377     4   +162ns[ -683ns] +/-   11ns
> 
> To configure Chronyd to use PHC refclock, add the 
> following line to its configuration file:
> 
> refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0
> 
> Where /dev/ptpX is the kvmclock PTP clock.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> ---
>  drivers/ptp/Kconfig   |   12 ++
>  drivers/ptp/Makefile  |    1 
>  drivers/ptp/ptp_kvm.c |  213 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 226 insertions(+)
> 
> v2: check for kvmclock (Radim)
>     initialize global variables before device registration (Radim)
> v3: use cross timestamps callback (Paolo, Miroslav, Radim)
> 
> Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c	2017-01-20 10:19:20.555311672 -0200
> @@ -0,0 +1,213 @@
> +/*
> + * Virtual PTP 1588 clock for use with KVM guests
> + *
> + * Copyright (C) 2017 Red Hat Inc.
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; either version 2 of the License, or
> + *  (at your option) any later version.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + */
> +#include <linux/device.h>
> +#include <linux/err.h>
> +#include <linux/init.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <uapi/linux/kvm_para.h>
> +#include <asm/kvm_para.h>
> +#include <asm/pvclock.h>
> +#include <asm/kvmclock.h>
> +#include <uapi/asm/kvm_para.h>
> +
> +#include <linux/ptp_clock_kernel.h>
> +
> +struct kvm_ptp_clock {
> +	struct ptp_clock *ptp_clock;
> +	struct ptp_clock_info caps;
> +};
> +
> +DEFINE_SPINLOCK(kvm_ptp_lock);
> +
> +static struct pvclock_vsyscall_time_info *hv_clock;
> +
> +static struct kvm_clock_offset clock_off;
> +static phys_addr_t clock_off_gpa;
> +
> +/*
> + * system_counterval.cycles: kvmclock value com TSC do host.
> + * system_counterval.cs: kvmclock clocksource.
> + * device_time: host realtime clock.
> + *
> + */
> +static int ptp_kvm_get_time_fn(ktime_t *device_time,
> +			       struct system_counterval_t *system_counter,
> +			       void *ctx)
> +{
> +	unsigned long ret;
> +	struct timespec64 tspec;
> +	unsigned version;
> +	u8 flags;
> +	int cpu;
> +	struct pvclock_vcpu_time_info *src;
> +
> +	preempt_disable_notrace();
> +	cpu = smp_processor_id();
> +	src = &hv_clock[cpu].pvti;
> +
> +	spin_lock(&kvm_ptp_lock);

What does the lock prevent?

> +
> +	do {
> +		/*
> +		 * We are measuring the delay between
> +		 * kvm_hypercall and rdtsc using TSC,
> +		 * and converting that delta to
> +		 * tsc_to_system_mul and tsc_shift
> +		 * So any changes to tsc_to_system_mul
> +		 * and tsc_shift in this region
> +		 * invalidate the measurement.
> +		 */
> +		version = pvclock_read_begin(src);
> +
> +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
> +				     clock_off_gpa,
> +				     KVM_CLOCK_PAIRING_WALLCLOCK);
> +		if (ret != 0) {
> +			pr_err("clock offset hypercall ret %lu\n", ret);
> +			spin_unlock(&kvm_ptp_lock);
> +			preempt_enable_notrace();
> +			return -EOPNOTSUPP;
> +		}
> +
> +		tspec.tv_sec = clock_off.sec;
> +		tspec.tv_nsec = clock_off.nsec;
> +		ret = __pvclock_read_cycles(src, clock_off.tsc);
> +		flags = src->flags;
> +	} while (pvclock_read_retry(src, version));
> +
> +	preempt_enable_notrace();
> +
> +	system_counter->cycles = ret;
> +	system_counter->cs = get_kvmclock_cs();

Can't we use clocksource_tsc and just pass the tsc without kvmclock in
the middle?

> +	tspec.tv_nsec = tspec.tv_nsec;

(This looks extraneous.)

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Jan. 20, 2017, 2:20 p.m. UTC | #4
2017-01-20 15:12+0100, Radim Krcmar:
> 2017-01-20 10:20-0200, Marcelo Tosatti:
>> +	spin_lock(&kvm_ptp_lock);
> 
> What does the lock prevent?

clock_off/clock_off_gpa. :)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 20, 2017, 3 p.m. UTC | #5
On Fri, Jan 20, 2017 at 03:12:56PM +0100, Radim Krcmar wrote:
> 2017-01-20 10:20-0200, Marcelo Tosatti:
> > Add a driver with gettime method returning hosts realtime clock.
> > This allows Chrony to synchronize host and guest clocks with 
> > high precision (see results below).
> > 
> > chronyc> sources
> > MS Name/IP address         Stratum Poll Reach LastRx Last sample
> > ===============================================================================
> > #* PHC0                          0   3   377     4   +162ns[ -683ns] +/-   11ns
> > 
> > To configure Chronyd to use PHC refclock, add the 
> > following line to its configuration file:
> > 
> > refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0
> > 
> > Where /dev/ptpX is the kvmclock PTP clock.
> > 
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> > 
> > ---
> >  drivers/ptp/Kconfig   |   12 ++
> >  drivers/ptp/Makefile  |    1 
> >  drivers/ptp/ptp_kvm.c |  213 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 226 insertions(+)
> > 
> > v2: check for kvmclock (Radim)
> >     initialize global variables before device registration (Radim)
> > v3: use cross timestamps callback (Paolo, Miroslav, Radim)
> > 
> > Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
> > ===================================================================
> > --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> > +++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c	2017-01-20 10:19:20.555311672 -0200
> > @@ -0,0 +1,213 @@
> > +/*
> > + * Virtual PTP 1588 clock for use with KVM guests
> > + *
> > + * Copyright (C) 2017 Red Hat Inc.
> > + *
> > + *  This program is free software; you can redistribute it and/or modify
> > + *  it under the terms of the GNU General Public License as published by
> > + *  the Free Software Foundation; either version 2 of the License, or
> > + *  (at your option) any later version.
> > + *
> > + *  This program is distributed in the hope that it will be useful,
> > + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + *  GNU General Public License for more details.
> > + *
> > + */
> > +#include <linux/device.h>
> > +#include <linux/err.h>
> > +#include <linux/init.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <uapi/linux/kvm_para.h>
> > +#include <asm/kvm_para.h>
> > +#include <asm/pvclock.h>
> > +#include <asm/kvmclock.h>
> > +#include <uapi/asm/kvm_para.h>
> > +
> > +#include <linux/ptp_clock_kernel.h>
> > +
> > +struct kvm_ptp_clock {
> > +	struct ptp_clock *ptp_clock;
> > +	struct ptp_clock_info caps;
> > +};
> > +
> > +DEFINE_SPINLOCK(kvm_ptp_lock);
> > +
> > +static struct pvclock_vsyscall_time_info *hv_clock;
> > +
> > +static struct kvm_clock_offset clock_off;
> > +static phys_addr_t clock_off_gpa;
> > +
> > +/*
> > + * system_counterval.cycles: kvmclock value com TSC do host.
> > + * system_counterval.cs: kvmclock clocksource.
> > + * device_time: host realtime clock.
> > + *
> > + */
> > +static int ptp_kvm_get_time_fn(ktime_t *device_time,
> > +			       struct system_counterval_t *system_counter,
> > +			       void *ctx)
> > +{
> > +	unsigned long ret;
> > +	struct timespec64 tspec;
> > +	unsigned version;
> > +	u8 flags;
> > +	int cpu;
> > +	struct pvclock_vcpu_time_info *src;
> > +
> > +	preempt_disable_notrace();
> > +	cpu = smp_processor_id();
> > +	src = &hv_clock[cpu].pvti;
> > +
> > +	spin_lock(&kvm_ptp_lock);
> 
> What does the lock prevent?

Protects access to "struct kvm_clock_offset clock_off".

> > +
> > +	do {
> > +		/*
> > +		 * We are measuring the delay between
> > +		 * kvm_hypercall and rdtsc using TSC,
> > +		 * and converting that delta to
> > +		 * tsc_to_system_mul and tsc_shift
> > +		 * So any changes to tsc_to_system_mul
> > +		 * and tsc_shift in this region
> > +		 * invalidate the measurement.
> > +		 */
> > +		version = pvclock_read_begin(src);
> > +
> > +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
> > +				     clock_off_gpa,
> > +				     KVM_CLOCK_PAIRING_WALLCLOCK);
> > +		if (ret != 0) {
> > +			pr_err("clock offset hypercall ret %lu\n", ret);
> > +			spin_unlock(&kvm_ptp_lock);
> > +			preempt_enable_notrace();
> > +			return -EOPNOTSUPP;
> > +		}
> > +
> > +		tspec.tv_sec = clock_off.sec;
> > +		tspec.tv_nsec = clock_off.nsec;
> > +		ret = __pvclock_read_cycles(src, clock_off.tsc);
> > +		flags = src->flags;
> > +	} while (pvclock_read_retry(src, version));
> > +
> > +	preempt_enable_notrace();
> > +
> > +	system_counter->cycles = ret;
> > +	system_counter->cs = get_kvmclock_cs();
> 
> Can't we use clocksource_tsc and just pass the tsc without kvmclock in
> the middle?

No, it has to be the kvmclock value.

> > +	tspec.tv_nsec = tspec.tv_nsec;

Oops, yes, will resend only this patch if there are no further comments.

> 
> (This looks extraneous.)
> 
> Thanks.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Jan. 20, 2017, 5:11 p.m. UTC | #6
On 20/01/2017 16:00, Marcelo Tosatti wrote:
>>> +	system_counter->cs = get_kvmclock_cs();
>>
>> Can't we use clocksource_tsc and just pass the tsc without kvmclock in
>> the middle?
> 
> No, it has to be the kvmclock value.

And that would be too tricky anyway. :)

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Jan. 20, 2017, 6:08 p.m. UTC | #7
2017-01-20 13:00-0200, Marcelo Tosatti:
> On Fri, Jan 20, 2017 at 03:12:56PM +0100, Radim Krcmar wrote:
>> 2017-01-20 10:20-0200, Marcelo Tosatti:
>> > +	do {
>> > +		/*
>> > +		 * We are measuring the delay between
>> > +		 * kvm_hypercall and rdtsc using TSC,
>> > +		 * and converting that delta to
>> > +		 * tsc_to_system_mul and tsc_shift
>> > +		 * So any changes to tsc_to_system_mul
>> > +		 * and tsc_shift in this region
>> > +		 * invalidate the measurement.
>> > +		 */
>> > +		version = pvclock_read_begin(src);
>> > +
>> > +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
>> > +				     clock_off_gpa,
>> > +				     KVM_CLOCK_PAIRING_WALLCLOCK);
>> > +		if (ret != 0) {
>> > +			pr_err("clock offset hypercall ret %lu\n", ret);
>> > +			spin_unlock(&kvm_ptp_lock);
>> > +			preempt_enable_notrace();
>> > +			return -EOPNOTSUPP;
>> > +		}
>> > +
>> > +		tspec.tv_sec = clock_off.sec;
>> > +		tspec.tv_nsec = clock_off.nsec;
>> > +		ret = __pvclock_read_cycles(src, clock_off.tsc);
>> > +		flags = src->flags;
>> > +	} while (pvclock_read_retry(src, version));
>> > +
>> > +	preempt_enable_notrace();
>> > +
>> > +	system_counter->cycles = ret;
>> > +	system_counter->cs = get_kvmclock_cs();
>> 
>> Can't we use clocksource_tsc and just pass the tsc without kvmclock in
>> the middle?
> 
> No, it has to be the kvmclock value.

What happens if the guest switches from kvmclock to tsc?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Jan. 20, 2017, 7:10 p.m. UTC | #8
On Fri, Jan 20, 2017 at 07:08:49PM +0100, Radim Krcmar wrote:
> 2017-01-20 13:00-0200, Marcelo Tosatti:
> > On Fri, Jan 20, 2017 at 03:12:56PM +0100, Radim Krcmar wrote:
> >> 2017-01-20 10:20-0200, Marcelo Tosatti:
> >> > +	do {
> >> > +		/*
> >> > +		 * We are measuring the delay between
> >> > +		 * kvm_hypercall and rdtsc using TSC,
> >> > +		 * and converting that delta to
> >> > +		 * tsc_to_system_mul and tsc_shift
> >> > +		 * So any changes to tsc_to_system_mul
> >> > +		 * and tsc_shift in this region
> >> > +		 * invalidate the measurement.
> >> > +		 */
> >> > +		version = pvclock_read_begin(src);
> >> > +
> >> > +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
> >> > +				     clock_off_gpa,
> >> > +				     KVM_CLOCK_PAIRING_WALLCLOCK);
> >> > +		if (ret != 0) {
> >> > +			pr_err("clock offset hypercall ret %lu\n", ret);
> >> > +			spin_unlock(&kvm_ptp_lock);
> >> > +			preempt_enable_notrace();
> >> > +			return -EOPNOTSUPP;
> >> > +		}
> >> > +
> >> > +		tspec.tv_sec = clock_off.sec;
> >> > +		tspec.tv_nsec = clock_off.nsec;
> >> > +		ret = __pvclock_read_cycles(src, clock_off.tsc);
> >> > +		flags = src->flags;
> >> > +	} while (pvclock_read_retry(src, version));
> >> > +
> >> > +	preempt_enable_notrace();
> >> > +
> >> > +	system_counter->cycles = ret;
> >> > +	system_counter->cs = get_kvmclock_cs();
> >> 
> >> Can't we use clocksource_tsc and just pass the tsc without kvmclock in
> >> the middle?
> > 
> > No, it has to be the kvmclock value.
> 
> What happens if the guest switches from kvmclock to tsc?

The ioctl will return -ENODEV.

From get_device_system_crosststamp function:

                /*
                 * Verify that the clocksource associated with the
                 * captured
                 * system counter value is the same as the currently
                 * installed
                 * timekeeper clocksource
                 */
                if (tk->tkr_mono.clock != system_counterval.cs)
                        return -ENODEV;



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Jan. 21, 2017, 8:02 a.m. UTC | #9
On 20/01/2017 19:08, Radim Krcmar wrote:
> 2017-01-20 13:00-0200, Marcelo Tosatti:
>> On Fri, Jan 20, 2017 at 03:12:56PM +0100, Radim Krcmar wrote:
>>> 2017-01-20 10:20-0200, Marcelo Tosatti:
>>>> +	do {
>>>> +		/*
>>>> +		 * We are measuring the delay between
>>>> +		 * kvm_hypercall and rdtsc using TSC,
>>>> +		 * and converting that delta to
>>>> +		 * tsc_to_system_mul and tsc_shift
>>>> +		 * So any changes to tsc_to_system_mul
>>>> +		 * and tsc_shift in this region
>>>> +		 * invalidate the measurement.
>>>> +		 */
>>>> +		version = pvclock_read_begin(src);
>>>> +
>>>> +		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
>>>> +				     clock_off_gpa,
>>>> +				     KVM_CLOCK_PAIRING_WALLCLOCK);
>>>> +		if (ret != 0) {
>>>> +			pr_err("clock offset hypercall ret %lu\n", ret);
>>>> +			spin_unlock(&kvm_ptp_lock);
>>>> +			preempt_enable_notrace();
>>>> +			return -EOPNOTSUPP;
>>>> +		}
>>>> +
>>>> +		tspec.tv_sec = clock_off.sec;
>>>> +		tspec.tv_nsec = clock_off.nsec;
>>>> +		ret = __pvclock_read_cycles(src, clock_off.tsc);
>>>> +		flags = src->flags;
>>>> +	} while (pvclock_read_retry(src, version));
>>>> +
>>>> +	preempt_enable_notrace();
>>>> +
>>>> +	system_counter->cycles = ret;
>>>> +	system_counter->cs = get_kvmclock_cs();
>>>
>>> Can't we use clocksource_tsc and just pass the tsc without kvmclock in
>>> the middle?
>>
>> No, it has to be the kvmclock value.
> 
> What happens if the guest switches from kvmclock to tsc?

get_device_system_crosststamp handles it, that's why there is a
clocksource field in system_counter.

Paolo

> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
diff mbox

Patch

===============================================================================
#* PHC0                          0   3   377     4   +162ns[ -683ns] +/-   11ns

To configure Chronyd to use PHC refclock, add the 
following line to its configuration file:

refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0

Where /dev/ptpX is the kvmclock PTP clock.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---
 drivers/ptp/Kconfig   |   12 ++
 drivers/ptp/Makefile  |    1 
 drivers/ptp/ptp_kvm.c |  213 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 226 insertions(+)

v2: check for kvmclock (Radim)
    initialize global variables before device registration (Radim)
v3: use cross timestamps callback (Paolo, Miroslav, Radim)

Index: kvm-ptpdriver/drivers/ptp/Kconfig
===================================================================
--- kvm-ptpdriver.orig/drivers/ptp/Kconfig	2017-01-20 10:03:44.458489214 -0200
+++ kvm-ptpdriver/drivers/ptp/Kconfig	2017-01-20 10:04:26.912597433 -0200
@@ -90,4 +90,16 @@ 
 	  To compile this driver as a module, choose M here: the module
 	  will be called ptp_pch.
 
+config PTP_1588_CLOCK_KVM
+	tristate "KVM virtual PTP clock"
+	depends on PTP_1588_CLOCK
+	depends on KVM_GUEST
+	default y
+	help
+	  This driver adds support for using kvm infrastructure as a PTP
+	  clock. This clock is only useful if you are using KVM guests.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called ptp_kvm.
+
 endmenu
Index: kvm-ptpdriver/drivers/ptp/Makefile
===================================================================
--- kvm-ptpdriver.orig/drivers/ptp/Makefile	2017-01-20 10:03:44.458489214 -0200
+++ kvm-ptpdriver/drivers/ptp/Makefile	2017-01-20 10:04:26.913597436 -0200
@@ -6,3 +6,4 @@ 
 obj-$(CONFIG_PTP_1588_CLOCK)		+= ptp.o
 obj-$(CONFIG_PTP_1588_CLOCK_IXP46X)	+= ptp_ixp46x.o
 obj-$(CONFIG_PTP_1588_CLOCK_PCH)	+= ptp_pch.o
+obj-$(CONFIG_PTP_1588_CLOCK_KVM)	+= ptp_kvm.o
Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c	2017-01-20 10:19:20.555311672 -0200
@@ -0,0 +1,213 @@ 
+/*
+ * Virtual PTP 1588 clock for use with KVM guests
+ *
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ */
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <uapi/linux/kvm_para.h>
+#include <asm/kvm_para.h>
+#include <asm/pvclock.h>
+#include <asm/kvmclock.h>
+#include <uapi/asm/kvm_para.h>
+
+#include <linux/ptp_clock_kernel.h>
+
+struct kvm_ptp_clock {
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info caps;
+};
+
+DEFINE_SPINLOCK(kvm_ptp_lock);
+
+static struct pvclock_vsyscall_time_info *hv_clock;
+
+static struct kvm_clock_offset clock_off;
+static phys_addr_t clock_off_gpa;
+
+/*
+ * system_counterval.cycles: kvmclock value com TSC do host.
+ * system_counterval.cs: kvmclock clocksource.
+ * device_time: host realtime clock.
+ *
+ */
+static int ptp_kvm_get_time_fn(ktime_t *device_time,
+			       struct system_counterval_t *system_counter,
+			       void *ctx)
+{
+	unsigned long ret;
+	struct timespec64 tspec;
+	unsigned version;
+	u8 flags;
+	int cpu;
+	struct pvclock_vcpu_time_info *src;
+
+	preempt_disable_notrace();
+	cpu = smp_processor_id();
+	src = &hv_clock[cpu].pvti;
+
+	spin_lock(&kvm_ptp_lock);
+
+	do {
+		/*
+		 * We are measuring the delay between
+		 * kvm_hypercall and rdtsc using TSC,
+		 * and converting that delta to
+		 * tsc_to_system_mul and tsc_shift
+		 * So any changes to tsc_to_system_mul
+		 * and tsc_shift in this region
+		 * invalidate the measurement.
+		 */
+		version = pvclock_read_begin(src);
+
+		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
+				     clock_off_gpa,
+				     KVM_CLOCK_PAIRING_WALLCLOCK);
+		if (ret != 0) {
+			pr_err("clock offset hypercall ret %lu\n", ret);
+			spin_unlock(&kvm_ptp_lock);
+			preempt_enable_notrace();
+			return -EOPNOTSUPP;
+		}
+
+		tspec.tv_sec = clock_off.sec;
+		tspec.tv_nsec = clock_off.nsec;
+		ret = __pvclock_read_cycles(src, clock_off.tsc);
+		flags = src->flags;
+	} while (pvclock_read_retry(src, version));
+
+	preempt_enable_notrace();
+
+	system_counter->cycles = ret;
+	system_counter->cs = get_kvmclock_cs();
+
+	tspec.tv_nsec = tspec.tv_nsec;
+
+	*device_time = timespec_to_ktime(tspec);
+
+	spin_unlock(&kvm_ptp_lock);
+
+	return 0;
+}
+
+static int ptp_kvm_getcrosststamp(struct ptp_clock_info *ptp,
+				  struct system_device_crosststamp *xtstamp)
+{
+	return get_device_system_crosststamp(ptp_kvm_get_time_fn, NULL,
+					     NULL, xtstamp);
+}
+
+/*
+ * PTP clock operations
+ */
+
+static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+	unsigned long ret;
+	struct timespec64 tspec;
+
+	spin_lock(&kvm_ptp_lock);
+
+	ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
+			     clock_off_gpa,
+			     KVM_CLOCK_PAIRING_WALLCLOCK);
+	if (ret != 0) {
+		pr_err("clock offset hypercall ret %lu\n", ret);
+		spin_unlock(&kvm_ptp_lock);
+		return -EOPNOTSUPP;
+	}
+
+	tspec.tv_sec = clock_off.sec;
+	tspec.tv_nsec = clock_off.nsec;
+	spin_unlock(&kvm_ptp_lock);
+
+	memcpy(ts, &tspec, sizeof(struct timespec64));
+
+	return 0;
+}
+
+static int ptp_kvm_settime(struct ptp_clock_info *ptp,
+			   const struct timespec64 *ts)
+{
+	return -EOPNOTSUPP;
+}
+
+static int ptp_kvm_enable(struct ptp_clock_info *ptp,
+			  struct ptp_clock_request *rq, int on)
+{
+	return -EOPNOTSUPP;
+}
+
+static struct ptp_clock_info ptp_kvm_caps = {
+	.owner		= THIS_MODULE,
+	.name		= "KVM virtual PTP",
+	.max_adj	= 0,
+	.n_ext_ts	= 0,
+	.n_pins		= 0,
+	.pps		= 0,
+	.adjfreq	= ptp_kvm_adjfreq,
+	.adjtime	= ptp_kvm_adjtime,
+	.gettime64	= ptp_kvm_gettime,
+	.settime64	= ptp_kvm_settime,
+	.enable		= ptp_kvm_enable,
+	.getcrosststamp = ptp_kvm_getcrosststamp,
+	.emulate_ptp_sys_offset_mean = true,
+};
+
+/* module operations */
+
+static struct kvm_ptp_clock kvm_ptp_clock;
+
+static void __exit ptp_kvm_exit(void)
+{
+	ptp_clock_unregister(kvm_ptp_clock.ptp_clock);
+}
+
+static int __init ptp_kvm_init(void)
+{
+	clock_off_gpa = slow_virt_to_phys(&clock_off);
+	hv_clock = pvclock_pvti_cpu0_va();
+
+	if (!hv_clock)
+		return -ENODEV;
+
+	kvm_ptp_clock.caps = ptp_kvm_caps;
+
+	kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL);
+
+	if (IS_ERR(kvm_ptp_clock.ptp_clock))
+		return PTR_ERR(kvm_ptp_clock.ptp_clock);
+
+	return 0;
+}
+
+module_init(ptp_kvm_init);
+module_exit(ptp_kvm_exit);
+
+MODULE_AUTHOR("Marcelo Tosatti <mtosatti@redhat.com>");
+MODULE_DESCRIPTION("PTP clock using KVMCLOCK");
+MODULE_LICENSE("GPL");