diff mbox

kvm PTP: drop TSC correction from ptp_kvm_gettime

Message ID 20170208133138.GA1126@amt.cnet (mailing list archive)
State New, archived
Headers show

Commit Message

Marcelo Tosatti Feb. 8, 2017, 1:31 p.m. UTC
The TSC correction in ptp_kvm_gettime was an attempt to handle
high load situations. Unfortunately it is incorrect because 
userspace expects PTP clock reads to happen in the middle point
between realtime clock reads:


	|* | | | | | | | | | | | |* | | | | | | | | | | | |* |
	rt-read-1	ptp-clock-read-1	rt-read-2

The TSC correction causes the PTP clock reads to read the clock at:

	|* | | | | | | | | | | | | | | | | |* | | | | | | |* |
	rt-read-1	ptp-clock-read-1	rt-read-2

So drop the TSC offset correction, increasing precision of synchronization
of an idle guest to <= 10ns.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Comments

Radim Krčmář Feb. 8, 2017, 1:59 p.m. UTC | #1
2017-02-08 11:31-0200, Marcelo Tosatti:
> 
> The TSC correction in ptp_kvm_gettime was an attempt to handle
> high load situations. Unfortunately it is incorrect because 
> userspace expects PTP clock reads to happen in the middle point
> between realtime clock reads:
> 
> 
> 	|* | | | | | | | | | | | |* | | | | | | | | | | | |* |
> 	rt-read-1	ptp-clock-read-1	rt-read-2
> 
> The TSC correction causes the PTP clock reads to read the clock at:
> 
> 	|* | | | | | | | | | | | | | | | | |* | | | | | | |* |
> 	rt-read-1	ptp-clock-read-1	rt-read-2
> 
> So drop the TSC offset correction, increasing precision of synchronization
> of an idle guest to <= 10ns.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Can't say no to the diffstat, :)

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>

(The subject tag should be different, though, "ptp_kvm:"?)

> diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c
> index 3ea41b8..994aa04 100644
> --- a/drivers/ptp/ptp_kvm.c
> +++ b/drivers/ptp/ptp_kvm.c
> @@ -123,61 +123,21 @@ static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
>  {
>  	unsigned long ret;
>  	struct timespec64 tspec;
> -	u64 delta, offset;
> -	unsigned version;
> -	int cpu;
> -	struct pvclock_vcpu_time_info *src;
>  
>  	spin_lock(&kvm_ptp_lock);
>  
> -	preempt_disable_notrace();
> -	cpu = smp_processor_id();
> -	src = &hv_clock[cpu].pvti;
> -
> -	do {
> -		/*
> -		 * We are measuring the delay between
> -		 * kvm_hypercall and rdtsc using TSC,
> -		 * and converting that delta through
> -		 * tsc_to_system_mul and tsc_shift
> -		 * So any changes to tsc_to_system_mul
> -		 * and tsc_shift in this region
> -		 * invalidate the measurement.
> -		 */
> -		version = pvclock_read_begin(src);
> -
> -		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
> -				     clock_pair_gpa,
> -				     KVM_CLOCK_PAIRING_WALLCLOCK);
> -		if (ret != 0) {
> -			pr_err_ratelimited("clock pairing hypercall ret %lu\n", ret);
> -			spin_unlock(&kvm_ptp_lock);
> -			preempt_enable_notrace();
> -			return -EOPNOTSUPP;
> -		}
> -
> -		tspec.tv_sec = clock_pair.sec;
> -		tspec.tv_nsec = clock_pair.nsec;
> -
> -		delta = rdtsc_ordered() - clock_pair.tsc;
> -		offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
> -					     src->tsc_shift);
> -	} while (pvclock_read_retry(src, version));
> -
> -	preempt_enable_notrace();
> -
> -	spin_unlock(&kvm_ptp_lock);
> -
> -	while (unlikely(offset >= NSEC_PER_SEC)) {
> -		tspec.tv_sec++;
> -		offset -= NSEC_PER_SEC;
> +	ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
> +			     clock_pair_gpa,
> +			     KVM_CLOCK_PAIRING_WALLCLOCK);
> +	if (ret != 0) {
> +		pr_err("clock offset hypercall ret %lu\n", ret);
> +		spin_unlock(&kvm_ptp_lock);
> +		return -EOPNOTSUPP;
>  	}
>  
> -	tspec.tv_nsec = tspec.tv_nsec + offset;
> -	if (tspec.tv_nsec >= NSEC_PER_SEC) {
> -		tspec.tv_sec++;
> -		tspec.tv_nsec -= NSEC_PER_SEC;
> -	}
> +	tspec.tv_sec = clock_pair.sec;
> +	tspec.tv_nsec = clock_pair.nsec;
> +	spin_unlock(&kvm_ptp_lock);
>  
>  	memcpy(ts, &tspec, sizeof(struct timespec64));
>
Paolo Bonzini Feb. 8, 2017, 4:14 p.m. UTC | #2
On 08/02/2017 14:31, Marcelo Tosatti wrote:
> The TSC correction in ptp_kvm_gettime was an attempt to handle
> high load situations. Unfortunately it is incorrect because 
> userspace expects PTP clock reads to happen in the middle point
> between realtime clock reads:
> 
> 
> 	|* | | | | | | | | | | | |* | | | | | | | | | | | |* |
> 	rt-read-1	ptp-clock-read-1	rt-read-2
> 
> The TSC correction causes the PTP clock reads to read the clock at:
> 
> 	|* | | | | | | | | | | | | | | | | |* | | | | | | |* |
> 	rt-read-1	ptp-clock-read-1	rt-read-2
> 
> So drop the TSC offset correction, increasing precision of synchronization
> of an idle guest to <= 10ns.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Applied, thanks.

Paolo
diff mbox

Patch

diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c
index 3ea41b8..994aa04 100644
--- a/drivers/ptp/ptp_kvm.c
+++ b/drivers/ptp/ptp_kvm.c
@@ -123,61 +123,21 @@  static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 {
 	unsigned long ret;
 	struct timespec64 tspec;
-	u64 delta, offset;
-	unsigned version;
-	int cpu;
-	struct pvclock_vcpu_time_info *src;
 
 	spin_lock(&kvm_ptp_lock);
 
-	preempt_disable_notrace();
-	cpu = smp_processor_id();
-	src = &hv_clock[cpu].pvti;
-
-	do {
-		/*
-		 * We are measuring the delay between
-		 * kvm_hypercall and rdtsc using TSC,
-		 * and converting that delta through
-		 * tsc_to_system_mul and tsc_shift
-		 * So any changes to tsc_to_system_mul
-		 * and tsc_shift in this region
-		 * invalidate the measurement.
-		 */
-		version = pvclock_read_begin(src);
-
-		ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
-				     clock_pair_gpa,
-				     KVM_CLOCK_PAIRING_WALLCLOCK);
-		if (ret != 0) {
-			pr_err_ratelimited("clock pairing hypercall ret %lu\n", ret);
-			spin_unlock(&kvm_ptp_lock);
-			preempt_enable_notrace();
-			return -EOPNOTSUPP;
-		}
-
-		tspec.tv_sec = clock_pair.sec;
-		tspec.tv_nsec = clock_pair.nsec;
-
-		delta = rdtsc_ordered() - clock_pair.tsc;
-		offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
-					     src->tsc_shift);
-	} while (pvclock_read_retry(src, version));
-
-	preempt_enable_notrace();
-
-	spin_unlock(&kvm_ptp_lock);
-
-	while (unlikely(offset >= NSEC_PER_SEC)) {
-		tspec.tv_sec++;
-		offset -= NSEC_PER_SEC;
+	ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
+			     clock_pair_gpa,
+			     KVM_CLOCK_PAIRING_WALLCLOCK);
+	if (ret != 0) {
+		pr_err("clock offset hypercall ret %lu\n", ret);
+		spin_unlock(&kvm_ptp_lock);
+		return -EOPNOTSUPP;
 	}
 
-	tspec.tv_nsec = tspec.tv_nsec + offset;
-	if (tspec.tv_nsec >= NSEC_PER_SEC) {
-		tspec.tv_sec++;
-		tspec.tv_nsec -= NSEC_PER_SEC;
-	}
+	tspec.tv_sec = clock_pair.sec;
+	tspec.tv_nsec = clock_pair.nsec;
+	spin_unlock(&kvm_ptp_lock);
 
 	memcpy(ts, &tspec, sizeof(struct timespec64));