diff mbox series

[1/3] trace-cmd: Extend host-guest time sync with fraction bits

Message ID 20210923094526.765059-2-tz.stoyanov@gmail.com (mailing list archive)
State Superseded
Headers show
Series trace-cmd: Align guest TSC calculation with the kernel | expand

Commit Message

Tzvetomir Stoyanov (VMware) Sept. 23, 2021, 9:45 a.m. UTC
The formula, used in the Linux kernel for guest TSC clock is:
 guest_tsc = tsc-offset + (host_tsc * tsc-scaling-ratio) >> tsc-scaling-ratio-frac-bits

The KVM hypervisor exports these parameters in the debugfs, if clock
scaling is supported:
 /sys/kernel/debug/kvm:
	tsc-offset
	tsc-scaling-ratio
	tsc-scaling-ratio-frac-bits

Current trace-cmd implementation does not take into account fraction
bits, when calculating guest clock offset in case of a clock scaling.
The TRACECMD_OPTION_TIME_SHIFT option is extended with per-cpu array
with fraction bits. KVM and PTP algorithms are extended to get fraction:
 - in PTP, there are fixed scaling=1 and fraction=0;
 - KVM gets these from KVM debugfs, if available.
The formula from the kernel is implemented in trace-cmd library, when
calculating timestamps of the guest events.

This change is backward compatible with trace-cmd 2.9, where the
TRACECMD_OPTION_TIME_SHIFT option is introduced.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 .../include/private/trace-cmd-private.h       |  4 +-
 lib/trace-cmd/include/trace-tsync-local.h     |  7 ++-
 lib/trace-cmd/trace-timesync-kvm.c            | 59 ++++++++++++++-----
 lib/trace-cmd/trace-timesync-ptp.c            |  4 +-
 lib/trace-cmd/trace-timesync.c                | 49 +++++++++++----
 5 files changed, 93 insertions(+), 30 deletions(-)

Comments

Steven Rostedt Oct. 13, 2021, 3:04 a.m. UTC | #1
On Thu, 23 Sep 2021 12:45:24 +0300
"Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote:
> @@ -175,6 +184,12 @@ static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str)
>  					 dir_str, entry->d_name);
>  				kvm->vcpu_scalings[cpu] = strdup(path);
>  			}
> +			if (!strncmp(entry->d_name, KVM_DEBUG_FRACTION_FILE,
> +				     strlen(KVM_DEBUG_FRACTION_FILE))) {

I'm curious, why not just use strcmp(), or does "d_name" have more
characters than the fraction file?

> +				snprintf(path, sizeof(path), "%s/%s",
> +					 dir_str, entry->d_name);
> +				kvm->vcpu_frac[cpu] = strdup(path);
> +			}
>  		}
>  	}
>  	if (!kvm->vcpu_offsets[cpu])
> @@ -189,6 +204,8 @@ error:
>  	kvm->vcpu_offsets[cpu] = NULL;
>  	free(kvm->vcpu_scalings[cpu]);
>  	kvm->vcpu_scalings[cpu] = NULL;
> +	free(kvm->vcpu_frac[cpu]);
> +	kvm->vcpu_frac[cpu] = NULL;
>  	return -1;
>  }
>  
> @@ -254,7 +271,8 @@ static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync,
>  	kvm->vcpu_count = tsync->vcpu_count;
>  	kvm->vcpu_offsets = calloc(kvm->vcpu_count, sizeof(char *));
>  	kvm->vcpu_scalings = calloc(kvm->vcpu_count, sizeof(char *));
> -	if (!kvm->vcpu_offsets || !kvm->vcpu_scalings)
> +	kvm->vcpu_frac = calloc(kvm->vcpu_count, sizeof(char *));
> +	if (!kvm->vcpu_offsets || !kvm->vcpu_scalings || !kvm->vcpu_frac)
>  		goto error;
>  	if (kvm_open_debug_files(kvm, tsync->guest_pid) < 0)
>  		goto error;
> @@ -263,6 +281,7 @@ static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync,
>  error:
>  	free(kvm->vcpu_offsets);
>  	free(kvm->vcpu_scalings);
> +	free(kvm->vcpu_frac);
>  	return -1;
>  }
>  
> @@ -353,6 +372,8 @@ static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync)
>  			kvm->vcpu_offsets[i] = NULL;
>  			free(kvm->vcpu_scalings[i]);
>  			kvm->vcpu_scalings[i] = NULL;
> +			free(kvm->vcpu_frac[i]);
> +			kvm->vcpu_frac[i] = NULL;
>  		}
>  		if (kvm->tep)
>  			tep_free(kvm->tep);
> @@ -364,7 +385,7 @@ static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync)
>  }
>  
>  static int kvm_clock_host(struct tracecmd_time_sync *tsync,
> -			  long long *offset, long long *scaling,
> +			  long long *offset, long long *scaling, long long *frac,
>  			  long long *timestamp, unsigned int cpu)
>  {
>  	char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
> @@ -374,6 +395,7 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync,
>  	long long kvm_scaling = 1;
                              ^^^
Hmm, this is initialized here.

>  	unsigned int sync_msg;
>  	long long kvm_offset;
> +	long long kvm_frac;
>  	unsigned int size;
>  	char *msg;
>  	int ret;
> @@ -388,12 +410,18 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync,
>  	ret = read_ll_from_file(kvm->vcpu_offsets[cpu], &kvm_offset);
>  	if (ret < 0)
>  		return -1;
> +
> +	kvm_scaling = 1;

Why are you adding the above?

>  	if (kvm->vcpu_scalings && kvm->vcpu_scalings[cpu]) {
>  		read_ll_from_file(kvm->vcpu_scalings[cpu], &kvm_scaling);
>  		if (kvm_scaling == KVM_SCALING_AMD_DEFAULT ||
>  		    kvm_scaling == KVM_SCALING_INTEL_DEFAULT)
>  			kvm_scaling = 1;
>  	}
> +
> +	kvm_frac = 0;

Should probably initialize that at the beginning too.

> +	if (kvm->vcpu_frac && kvm->vcpu_frac[cpu])
> +		ret = read_ll_from_file(kvm->vcpu_frac[cpu], &kvm_frac);
>  	msg = (char *)&packet;
>  	size = sizeof(packet);
>  	ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
> @@ -405,6 +433,7 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync,
>  
>  	packet.offset = -kvm_offset;
>  	packet.scaling = kvm_scaling;
> +	packet.frac = kvm_frac;
>  	ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME,
>  					  KVM_SYNC_PKT_RESPONSE, sizeof(packet),
>  					  (char *)&packet);
> @@ -413,6 +442,7 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync,
>  
>  	*scaling = packet.scaling;
>  	*offset = packet.offset;
> +	*frac = kvm_frac;
>  	*timestamp = packet.ts;
>  
>  	return 0;
> @@ -444,10 +474,10 @@ static int kvm_marker_find(struct tep_event *event, struct tep_record *record,
>  	return 0;
>  }
>  
> -
>  static int kvm_clock_guest(struct tracecmd_time_sync *tsync,
>  			   long long *offset,
>  			   long long *scaling,
> +			   long long *frac,
>  			   long long *timestamp)
>  {
>  	char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
> @@ -488,12 +518,13 @@ static int kvm_clock_guest(struct tracecmd_time_sync *tsync,
>  
>  	*scaling = packet.scaling;
>  	*offset = packet.offset;
> +	*frac = packet.frac;
>  	*timestamp = packet.ts;
>  	return 0;
>  }
>  
>  static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync,
> -			       long long *offset, long long *scaling,
> +			       long long *offset, long long *scaling, long long *frac,
>  			       long long *timestamp, unsigned int cpu)
>  {
>  	struct clock_sync_context *clock_context;
> @@ -505,9 +536,9 @@ static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync,
>  	clock_context = (struct clock_sync_context *)tsync->context;
>  
>  	if (clock_context->is_guest)
> -		ret = kvm_clock_guest(tsync, offset, scaling, timestamp);
> +		ret = kvm_clock_guest(tsync, offset, scaling, frac, timestamp);
>  	else
> -		ret = kvm_clock_host(tsync, offset, scaling, timestamp, cpu);
> +		ret = kvm_clock_host(tsync, offset, scaling, frac, timestamp, cpu);
>  	return ret;
>  }
>  
> diff --git a/lib/trace-cmd/trace-timesync-ptp.c b/lib/trace-cmd/trace-timesync-ptp.c
> index b05f1cd0..70242ee3 100644
> --- a/lib/trace-cmd/trace-timesync-ptp.c
> +++ b/lib/trace-cmd/trace-timesync-ptp.c
> @@ -663,7 +663,7 @@ static int ptp_clock_server(struct tracecmd_time_sync *tsync,
>  }
>  
>  static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync,
> -			       long long *offset, long long *scaling,
> +			       long long *offset, long long *scaling, long long *frac,
>  			       long long *timestamp, unsigned int cpu)
>  {
>  	struct clock_sync_context *clock_context;
> @@ -689,6 +689,8 @@ static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync,
>  
>  	if (scaling)
>  		*scaling = 1;
> +	if (frac)
> +		*frac = 0;
>  	if (clock_context->is_server)
>  		ret = ptp_clock_server(tsync, offset, timestamp);
>  	else
> diff --git a/lib/trace-cmd/trace-timesync.c b/lib/trace-cmd/trace-timesync.c
> index 19ca19d7..298913a1 100644
> --- a/lib/trace-cmd/trace-timesync.c
> +++ b/lib/trace-cmd/trace-timesync.c
> @@ -36,7 +36,7 @@ struct tsync_proto {
>  	int (*clock_sync_init)(struct tracecmd_time_sync *clock_context);
>  	int (*clock_sync_free)(struct tracecmd_time_sync *clock_context);
>  	int (*clock_sync_calc)(struct tracecmd_time_sync *clock_context,
> -			       long long *offset, long long *scaling,
> +			       long long *offset, long long *scaling, long long *frac,
>  			       long long *timestamp, unsigned int cpu);
>  };
>  
> @@ -76,7 +76,7 @@ int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int role
>  				  int (*init)(struct tracecmd_time_sync *),
>  				  int (*free)(struct tracecmd_time_sync *),
>  				  int (*calc)(struct tracecmd_time_sync *,
> -					      long long *, long long *,
> +					      long long *, long long *, long long *,
>  					      long long *, unsigned int))
>  {
>  	struct tsync_proto *proto = NULL;
> @@ -137,12 +137,13 @@ bool __hidden tsync_proto_is_supported(const char *proto_name)
>   * @ts: Array of size @count containing timestamps of callculated offsets
>   * @offsets: array of size @count, containing offsets for each timestamp
>   * @scalings: array of size @count, containing scaling ratios for each timestamp
> + * @frac: array of size @count, containing fraction bits for each timestamp
>   *
>   * Retuns -1 in case of an error, or 0 otherwise
>   */
>  int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
>  			       int *count, long long **ts,
> -			       long long **offsets, long long **scalings)
> +			       long long **offsets, long long **scalings, long long **frac)
>  {
>  	struct clock_sync_context *tsync_context;
>  
> @@ -159,6 +160,8 @@ int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
>  		*offsets = tsync_context->offsets[cpu].sync_offsets;
>  	if (scalings)
>  		*scalings = tsync_context->offsets[cpu].sync_scalings;
> +	if (frac)
> +		*frac = tsync_context->offsets[cpu].sync_frac;
>  
>  	return 0;
>  }
> @@ -564,9 +567,11 @@ void tracecmd_tsync_free(struct tracecmd_time_sync *tsync)
>  				free(tsync_context->offsets[i].sync_ts);
>  				free(tsync_context->offsets[i].sync_offsets);
>  				free(tsync_context->offsets[i].sync_scalings);
> +				free(tsync_context->offsets[i].sync_frac);
>  				tsync_context->offsets[i].sync_ts = NULL;
>  				tsync_context->offsets[i].sync_offsets = NULL;
>  				tsync_context->offsets[i].sync_scalings = NULL;
> +				tsync_context->offsets[i].sync_frac = NULL;
>  				tsync_context->offsets[i].sync_count = 0;
>  				tsync_context->offsets[i].sync_size = 0;
>  			}
> @@ -643,10 +648,11 @@ static int tsync_send(struct tracecmd_time_sync *tsync,
>  	long long timestamp = 0;
>  	long long scaling = 0;
>  	long long offset = 0;
> +	long long frac = 0;
>  	int ret;
>  
>  	old_set = pin_to_cpu(cpu);
> -	ret = proto->clock_sync_calc(tsync, &offset, &scaling, &timestamp, cpu);
> +	ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, &timestamp, cpu);
>  	if (old_set)
>  		restore_pin_to_cpu(old_set);
>  
> @@ -685,10 +691,11 @@ static void tsync_with_host(struct tracecmd_time_sync *tsync)
>  }
>  
>  static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step,
> -			      long long offset, long long scaling, long long ts)
> +			      long long offset, long long scaling, long long frac, long long ts)
>  {
>  	long long *sync_scalings = NULL;
>  	long long *sync_offsets = NULL;
> +	long long *sync_frac = NULL;
>  	long long *sync_ts = NULL;
>  
>  	if (offsets->sync_count >= offsets->sync_size) {
> @@ -698,22 +705,27 @@ static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step
>  				       (offsets->sync_size + array_step) * sizeof(long long));
>  		sync_scalings = realloc(offsets->sync_scalings,
>  				       (offsets->sync_size + array_step) * sizeof(long long));
> +		sync_frac = realloc(offsets->sync_frac,
> +				    (offsets->sync_size + array_step) * sizeof(long long));
>  
> -		if (!sync_ts || !sync_offsets || !sync_scalings) {
> +		if (!sync_ts || !sync_offsets || !sync_scalings || !sync_frac) {
>  			free(sync_ts);
>  			free(sync_offsets);
>  			free(sync_scalings);
> +			free(sync_frac);
>  			return -1;
>  		}
>  		offsets->sync_size += array_step;
>  		offsets->sync_ts = sync_ts;
>  		offsets->sync_offsets = sync_offsets;
>  		offsets->sync_scalings = sync_scalings;
> +		offsets->sync_frac = sync_frac;
>  	}
>  
>  	offsets->sync_ts[offsets->sync_count] = ts;
>  	offsets->sync_offsets[offsets->sync_count] = offset;
>  	offsets->sync_scalings[offsets->sync_count] = scaling;
> +	offsets->sync_frac[offsets->sync_count] = frac;
>  	offsets->sync_count++;
>  
>  	return 0;
> @@ -726,9 +738,10 @@ static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu,
>  	long long timestamp = 0;
>  	long long scaling = 0;
>  	long long offset = 0;
> +	long long frac = 0;
>  	int ret;
>  
> -	ret = proto->clock_sync_calc(tsync, &offset, &scaling, &timestamp, cpu);
> +	ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, &timestamp, cpu);
>  	if (ret) {
>  		tracecmd_warning("Failed to synchronize timestamps with guest");
>  		return -1;
> @@ -739,7 +752,7 @@ static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu,
>  	if (!clock || cpu >= clock->cpu_count || !clock->offsets)
>  		return -1;
>  	return record_sync_sample(&clock->offsets[cpu], array_step,
> -				  offset, scaling, timestamp);
> +				  offset, scaling, frac, timestamp);
>  }
>  
>  #define TIMER_SEC_NANO 1000000000LL
> @@ -928,6 +941,7 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
>  	unsigned int flags;
>  	long long *scalings = NULL;
>  	long long *offsets = NULL;
> +	long long *frac = NULL;
>  	long long *ts = NULL;
>  	int vcount;
>  	int count;
> @@ -936,7 +950,7 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
>  
>  	if (!tsync->vcpu_count)
>  		return -1;
> -	vcount = 3 + (4 * tsync->vcpu_count);
> +	vcount = 3 + (5 * tsync->vcpu_count);
>  	vector = calloc(vcount, sizeof(struct iovec));
>  	if (!vector)
>  		return -1;
> @@ -955,7 +969,7 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
>  		if (j >= vcount)
>  			break;
>  		ret = tracecmd_tsync_get_offsets(tsync, i, &count,
> -						 &ts, &offsets, &scalings);
> +						 &ts, &offsets, &scalings, NULL);
>  		if (ret < 0 || !count || !ts || !offsets || !scalings)
>  			break;
>  		vector[j].iov_len = 4;
> @@ -971,6 +985,21 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
>  		ret = -1;
>  		goto out;
>  	}

Should have a comment here to why this is a separate loop. I'm guessing
that the frac isn't added in the above loop due to backward compatibility?

> +	for (i = 0; i < tsync->vcpu_count; i++) {
> +		if (j >= vcount)
> +			break;
> +		ret = tracecmd_tsync_get_offsets(tsync, i, NULL,
> +						 NULL, NULL, NULL, &frac);
> +		if (ret < 0 || !frac)
> +			break;
> +		vector[j].iov_len = 8 * count;
> +		vector[j++].iov_base = frac;
> +	}
> +	if (i < tsync->vcpu_count) {
> +		ret = -1;
> +		goto out;

Hmm, so if frac doesn't exist we don't use this? Is this used if we
only have a tsc_offset but no scaling?

-- Steve

> +	}
> +
>  	tracecmd_add_option_v(handle, TRACECMD_OPTION_TIME_SHIFT, vector, vcount);
>  #ifdef TSYNC_DEBUG
>  	if (count > 1)
Tzvetomir Stoyanov (VMware) Oct. 13, 2021, 9:48 a.m. UTC | #2
On Wed, Oct 13, 2021 at 6:04 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Thu, 23 Sep 2021 12:45:24 +0300
> "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote:
> > @@ -175,6 +184,12 @@ static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str)
> >                                        dir_str, entry->d_name);
> >                               kvm->vcpu_scalings[cpu] = strdup(path);
> >                       }
> > +                     if (!strncmp(entry->d_name, KVM_DEBUG_FRACTION_FILE,
> > +                                  strlen(KVM_DEBUG_FRACTION_FILE))) {
>
> I'm curious, why not just use strcmp(), or does "d_name" have more
> characters than the fraction file?

Just to be on the safe side. Currently the fraction file is with the
longest name, but that may change in the future.

>
> > +                             snprintf(path, sizeof(path), "%s/%s",
> > +                                      dir_str, entry->d_name);
> > +                             kvm->vcpu_frac[cpu] = strdup(path);
> > +                     }
> >               }
> >       }
> >       if (!kvm->vcpu_offsets[cpu])
> > @@ -189,6 +204,8 @@ error:
> >       kvm->vcpu_offsets[cpu] = NULL;
> >       free(kvm->vcpu_scalings[cpu]);
> >       kvm->vcpu_scalings[cpu] = NULL;
> > +     free(kvm->vcpu_frac[cpu]);
> > +     kvm->vcpu_frac[cpu] = NULL;
> >       return -1;
> >  }
> >
> > @@ -254,7 +271,8 @@ static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync,
> >       kvm->vcpu_count = tsync->vcpu_count;
> >       kvm->vcpu_offsets = calloc(kvm->vcpu_count, sizeof(char *));
> >       kvm->vcpu_scalings = calloc(kvm->vcpu_count, sizeof(char *));
> > -     if (!kvm->vcpu_offsets || !kvm->vcpu_scalings)
> > +     kvm->vcpu_frac = calloc(kvm->vcpu_count, sizeof(char *));
> > +     if (!kvm->vcpu_offsets || !kvm->vcpu_scalings || !kvm->vcpu_frac)
> >               goto error;
> >       if (kvm_open_debug_files(kvm, tsync->guest_pid) < 0)
> >               goto error;
> > @@ -263,6 +281,7 @@ static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync,
> >  error:
> >       free(kvm->vcpu_offsets);
> >       free(kvm->vcpu_scalings);
> > +     free(kvm->vcpu_frac);
> >       return -1;
> >  }
> >
> > @@ -353,6 +372,8 @@ static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync)
> >                       kvm->vcpu_offsets[i] = NULL;
> >                       free(kvm->vcpu_scalings[i]);
> >                       kvm->vcpu_scalings[i] = NULL;
> > +                     free(kvm->vcpu_frac[i]);
> > +                     kvm->vcpu_frac[i] = NULL;
> >               }
> >               if (kvm->tep)
> >                       tep_free(kvm->tep);
> > @@ -364,7 +385,7 @@ static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync)
> >  }
> >
> >  static int kvm_clock_host(struct tracecmd_time_sync *tsync,
> > -                       long long *offset, long long *scaling,
> > +                       long long *offset, long long *scaling, long long *frac,
> >                         long long *timestamp, unsigned int cpu)
> >  {
> >       char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
> > @@ -374,6 +395,7 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync,
> >       long long kvm_scaling = 1;
>                               ^^^
> Hmm, this is initialized here.
>
> >       unsigned int sync_msg;
> >       long long kvm_offset;
> > +     long long kvm_frac;
> >       unsigned int size;
> >       char *msg;
> >       int ret;
> > @@ -388,12 +410,18 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync,
> >       ret = read_ll_from_file(kvm->vcpu_offsets[cpu], &kvm_offset);
> >       if (ret < 0)
> >               return -1;
> > +
> > +     kvm_scaling = 1;
>
> Why are you adding the above?

you are right, this one is useless.

>
> >       if (kvm->vcpu_scalings && kvm->vcpu_scalings[cpu]) {
> >               read_ll_from_file(kvm->vcpu_scalings[cpu], &kvm_scaling);
> >               if (kvm_scaling == KVM_SCALING_AMD_DEFAULT ||
> >                   kvm_scaling == KVM_SCALING_INTEL_DEFAULT)
> >                       kvm_scaling = 1;
> >       }
> > +
> > +     kvm_frac = 0;
>
> Should probably initialize that at the beginning too.
>
> > +     if (kvm->vcpu_frac && kvm->vcpu_frac[cpu])
> > +             ret = read_ll_from_file(kvm->vcpu_frac[cpu], &kvm_frac);
> >       msg = (char *)&packet;
> >       size = sizeof(packet);
> >       ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
> > @@ -405,6 +433,7 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync,
> >
> >       packet.offset = -kvm_offset;
> >       packet.scaling = kvm_scaling;
> > +     packet.frac = kvm_frac;
> >       ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME,
> >                                         KVM_SYNC_PKT_RESPONSE, sizeof(packet),
> >                                         (char *)&packet);
> > @@ -413,6 +442,7 @@ static int kvm_clock_host(struct tracecmd_time_sync *tsync,
> >
> >       *scaling = packet.scaling;
> >       *offset = packet.offset;
> > +     *frac = kvm_frac;
> >       *timestamp = packet.ts;
> >
> >       return 0;
> > @@ -444,10 +474,10 @@ static int kvm_marker_find(struct tep_event *event, struct tep_record *record,
> >       return 0;
> >  }
> >
> > -
> >  static int kvm_clock_guest(struct tracecmd_time_sync *tsync,
> >                          long long *offset,
> >                          long long *scaling,
> > +                        long long *frac,
> >                          long long *timestamp)
> >  {
> >       char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
> > @@ -488,12 +518,13 @@ static int kvm_clock_guest(struct tracecmd_time_sync *tsync,
> >
> >       *scaling = packet.scaling;
> >       *offset = packet.offset;
> > +     *frac = packet.frac;
> >       *timestamp = packet.ts;
> >       return 0;
> >  }
> >
> >  static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync,
> > -                            long long *offset, long long *scaling,
> > +                            long long *offset, long long *scaling, long long *frac,
> >                              long long *timestamp, unsigned int cpu)
> >  {
> >       struct clock_sync_context *clock_context;
> > @@ -505,9 +536,9 @@ static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync,
> >       clock_context = (struct clock_sync_context *)tsync->context;
> >
> >       if (clock_context->is_guest)
> > -             ret = kvm_clock_guest(tsync, offset, scaling, timestamp);
> > +             ret = kvm_clock_guest(tsync, offset, scaling, frac, timestamp);
> >       else
> > -             ret = kvm_clock_host(tsync, offset, scaling, timestamp, cpu);
> > +             ret = kvm_clock_host(tsync, offset, scaling, frac, timestamp, cpu);
> >       return ret;
> >  }
> >
> > diff --git a/lib/trace-cmd/trace-timesync-ptp.c b/lib/trace-cmd/trace-timesync-ptp.c
> > index b05f1cd0..70242ee3 100644
> > --- a/lib/trace-cmd/trace-timesync-ptp.c
> > +++ b/lib/trace-cmd/trace-timesync-ptp.c
> > @@ -663,7 +663,7 @@ static int ptp_clock_server(struct tracecmd_time_sync *tsync,
> >  }
> >
> >  static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync,
> > -                            long long *offset, long long *scaling,
> > +                            long long *offset, long long *scaling, long long *frac,
> >                              long long *timestamp, unsigned int cpu)
> >  {
> >       struct clock_sync_context *clock_context;
> > @@ -689,6 +689,8 @@ static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync,
> >
> >       if (scaling)
> >               *scaling = 1;
> > +     if (frac)
> > +             *frac = 0;
> >       if (clock_context->is_server)
> >               ret = ptp_clock_server(tsync, offset, timestamp);
> >       else
> > diff --git a/lib/trace-cmd/trace-timesync.c b/lib/trace-cmd/trace-timesync.c
> > index 19ca19d7..298913a1 100644
> > --- a/lib/trace-cmd/trace-timesync.c
> > +++ b/lib/trace-cmd/trace-timesync.c
> > @@ -36,7 +36,7 @@ struct tsync_proto {
> >       int (*clock_sync_init)(struct tracecmd_time_sync *clock_context);
> >       int (*clock_sync_free)(struct tracecmd_time_sync *clock_context);
> >       int (*clock_sync_calc)(struct tracecmd_time_sync *clock_context,
> > -                            long long *offset, long long *scaling,
> > +                            long long *offset, long long *scaling, long long *frac,
> >                              long long *timestamp, unsigned int cpu);
> >  };
> >
> > @@ -76,7 +76,7 @@ int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int role
> >                                 int (*init)(struct tracecmd_time_sync *),
> >                                 int (*free)(struct tracecmd_time_sync *),
> >                                 int (*calc)(struct tracecmd_time_sync *,
> > -                                           long long *, long long *,
> > +                                           long long *, long long *, long long *,
> >                                             long long *, unsigned int))
> >  {
> >       struct tsync_proto *proto = NULL;
> > @@ -137,12 +137,13 @@ bool __hidden tsync_proto_is_supported(const char *proto_name)
> >   * @ts: Array of size @count containing timestamps of callculated offsets
> >   * @offsets: array of size @count, containing offsets for each timestamp
> >   * @scalings: array of size @count, containing scaling ratios for each timestamp
> > + * @frac: array of size @count, containing fraction bits for each timestamp
> >   *
> >   * Retuns -1 in case of an error, or 0 otherwise
> >   */
> >  int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
> >                              int *count, long long **ts,
> > -                            long long **offsets, long long **scalings)
> > +                            long long **offsets, long long **scalings, long long **frac)
> >  {
> >       struct clock_sync_context *tsync_context;
> >
> > @@ -159,6 +160,8 @@ int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
> >               *offsets = tsync_context->offsets[cpu].sync_offsets;
> >       if (scalings)
> >               *scalings = tsync_context->offsets[cpu].sync_scalings;
> > +     if (frac)
> > +             *frac = tsync_context->offsets[cpu].sync_frac;
> >
> >       return 0;
> >  }
> > @@ -564,9 +567,11 @@ void tracecmd_tsync_free(struct tracecmd_time_sync *tsync)
> >                               free(tsync_context->offsets[i].sync_ts);
> >                               free(tsync_context->offsets[i].sync_offsets);
> >                               free(tsync_context->offsets[i].sync_scalings);
> > +                             free(tsync_context->offsets[i].sync_frac);
> >                               tsync_context->offsets[i].sync_ts = NULL;
> >                               tsync_context->offsets[i].sync_offsets = NULL;
> >                               tsync_context->offsets[i].sync_scalings = NULL;
> > +                             tsync_context->offsets[i].sync_frac = NULL;
> >                               tsync_context->offsets[i].sync_count = 0;
> >                               tsync_context->offsets[i].sync_size = 0;
> >                       }
> > @@ -643,10 +648,11 @@ static int tsync_send(struct tracecmd_time_sync *tsync,
> >       long long timestamp = 0;
> >       long long scaling = 0;
> >       long long offset = 0;
> > +     long long frac = 0;
> >       int ret;
> >
> >       old_set = pin_to_cpu(cpu);
> > -     ret = proto->clock_sync_calc(tsync, &offset, &scaling, &timestamp, cpu);
> > +     ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, &timestamp, cpu);
> >       if (old_set)
> >               restore_pin_to_cpu(old_set);
> >
> > @@ -685,10 +691,11 @@ static void tsync_with_host(struct tracecmd_time_sync *tsync)
> >  }
> >
> >  static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step,
> > -                           long long offset, long long scaling, long long ts)
> > +                           long long offset, long long scaling, long long frac, long long ts)
> >  {
> >       long long *sync_scalings = NULL;
> >       long long *sync_offsets = NULL;
> > +     long long *sync_frac = NULL;
> >       long long *sync_ts = NULL;
> >
> >       if (offsets->sync_count >= offsets->sync_size) {
> > @@ -698,22 +705,27 @@ static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step
> >                                      (offsets->sync_size + array_step) * sizeof(long long));
> >               sync_scalings = realloc(offsets->sync_scalings,
> >                                      (offsets->sync_size + array_step) * sizeof(long long));
> > +             sync_frac = realloc(offsets->sync_frac,
> > +                                 (offsets->sync_size + array_step) * sizeof(long long));
> >
> > -             if (!sync_ts || !sync_offsets || !sync_scalings) {
> > +             if (!sync_ts || !sync_offsets || !sync_scalings || !sync_frac) {
> >                       free(sync_ts);
> >                       free(sync_offsets);
> >                       free(sync_scalings);
> > +                     free(sync_frac);
> >                       return -1;
> >               }
> >               offsets->sync_size += array_step;
> >               offsets->sync_ts = sync_ts;
> >               offsets->sync_offsets = sync_offsets;
> >               offsets->sync_scalings = sync_scalings;
> > +             offsets->sync_frac = sync_frac;
> >       }
> >
> >       offsets->sync_ts[offsets->sync_count] = ts;
> >       offsets->sync_offsets[offsets->sync_count] = offset;
> >       offsets->sync_scalings[offsets->sync_count] = scaling;
> > +     offsets->sync_frac[offsets->sync_count] = frac;
> >       offsets->sync_count++;
> >
> >       return 0;
> > @@ -726,9 +738,10 @@ static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu,
> >       long long timestamp = 0;
> >       long long scaling = 0;
> >       long long offset = 0;
> > +     long long frac = 0;
> >       int ret;
> >
> > -     ret = proto->clock_sync_calc(tsync, &offset, &scaling, &timestamp, cpu);
> > +     ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, &timestamp, cpu);
> >       if (ret) {
> >               tracecmd_warning("Failed to synchronize timestamps with guest");
> >               return -1;
> > @@ -739,7 +752,7 @@ static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu,
> >       if (!clock || cpu >= clock->cpu_count || !clock->offsets)
> >               return -1;
> >       return record_sync_sample(&clock->offsets[cpu], array_step,
> > -                               offset, scaling, timestamp);
> > +                               offset, scaling, frac, timestamp);
> >  }
> >
> >  #define TIMER_SEC_NANO 1000000000LL
> > @@ -928,6 +941,7 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
> >       unsigned int flags;
> >       long long *scalings = NULL;
> >       long long *offsets = NULL;
> > +     long long *frac = NULL;
> >       long long *ts = NULL;
> >       int vcount;
> >       int count;
> > @@ -936,7 +950,7 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
> >
> >       if (!tsync->vcpu_count)
> >               return -1;
> > -     vcount = 3 + (4 * tsync->vcpu_count);
> > +     vcount = 3 + (5 * tsync->vcpu_count);
> >       vector = calloc(vcount, sizeof(struct iovec));
> >       if (!vector)
> >               return -1;
> > @@ -955,7 +969,7 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
> >               if (j >= vcount)
> >                       break;
> >               ret = tracecmd_tsync_get_offsets(tsync, i, &count,
> > -                                              &ts, &offsets, &scalings);
> > +                                              &ts, &offsets, &scalings, NULL);
> >               if (ret < 0 || !count || !ts || !offsets || !scalings)
> >                       break;
> >               vector[j].iov_len = 4;
> > @@ -971,6 +985,21 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
> >               ret = -1;
> >               goto out;
> >       }
>
> Should have a comment here to why this is a separate loop. I'm guessing
> that the frac isn't added in the above loop due to backward compatibility?

Yes, because of backward compatibility. Going to describe it in the
next version.

>
> > +     for (i = 0; i < tsync->vcpu_count; i++) {
> > +             if (j >= vcount)
> > +                     break;
> > +             ret = tracecmd_tsync_get_offsets(tsync, i, NULL,
> > +                                              NULL, NULL, NULL, &frac);
> > +             if (ret < 0 || !frac)
> > +                     break;
> > +             vector[j].iov_len = 8 * count;
> > +             vector[j++].iov_base = frac;
> > +     }
> > +     if (i < tsync->vcpu_count) {
> > +             ret = -1;
> > +             goto out;
>
> Hmm, so if frac doesn't exist we don't use this? Is this used if we
> only have a tsc_offset but no scaling?
>

You are right, actually frac 0 could be a valid value - the check
inside the loop "!frac" must be removed. If there is only tsc_offset,
the logic should work.

> -- Steve
>
> > +     }
> > +
> >       tracecmd_add_option_v(handle, TRACECMD_OPTION_TIME_SHIFT, vector, vcount);
> >  #ifdef TSYNC_DEBUG
> >       if (count > 1)
>
Steven Rostedt Oct. 13, 2021, 2:53 p.m. UTC | #3
On Wed, 13 Oct 2021 12:48:55 +0300
Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote:

> On Wed, Oct 13, 2021 at 6:04 AM Steven Rostedt <rostedt@goodmis.org> wrote:
> >
> > On Thu, 23 Sep 2021 12:45:24 +0300
> > "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote:  
> > > @@ -175,6 +184,12 @@ static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str)
> > >                                        dir_str, entry->d_name);
> > >                               kvm->vcpu_scalings[cpu] = strdup(path);
> > >                       }
> > > +                     if (!strncmp(entry->d_name, KVM_DEBUG_FRACTION_FILE,
> > > +                                  strlen(KVM_DEBUG_FRACTION_FILE))) {  
> >
> > I'm curious, why not just use strcmp(), or does "d_name" have more
> > characters than the fraction file?  
> 
> Just to be on the safe side. Currently the fraction file is with the
> longest name, but that may change in the future.\\


I'm confused, what do you mean on the safe side? What are you protecting
against? In other words, what can possibly go wrong? And I notice you do
this for all of them. I don't get it.

The only thing that this is protecting against, is if the file name becomes
"tsc-scaling-ratio-frac-bits-some-garbage-here" then it it will still
match. But other than that, you are not protecting against anything.

-- Steve


> 
> >  
> > > +                             snprintf(path, sizeof(path), "%s/%s",
> > > +                                      dir_str, entry->d_name);
> > > +                             kvm->vcpu_frac[cpu] = strdup(path);
> > > +                     }
> > >               }
> > >       }
diff mbox series

Patch

diff --git a/lib/trace-cmd/include/private/trace-cmd-private.h b/lib/trace-cmd/include/private/trace-cmd-private.h
index c58b09e9..f40babef 100644
--- a/lib/trace-cmd/include/private/trace-cmd-private.h
+++ b/lib/trace-cmd/include/private/trace-cmd-private.h
@@ -454,8 +454,8 @@  tracecmd_tsync_with_guest(unsigned long long trace_id, int loop_interval,
 			  int guest_cpus, const char *proto_name, const char *clock);
 int tracecmd_tsync_with_guest_stop(struct tracecmd_time_sync *tsync);
 int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
-				int *count, long long **ts,
-				long long **offsets, long long **scalings);
+			       int *count, long long **ts,
+			       long long **offsets, long long **scalings, long long **frac);
 int tracecmd_tsync_get_session_params(struct tracecmd_time_sync *tsync,
 				      char **selected_proto,
 				      unsigned int *tsync_port);
diff --git a/lib/trace-cmd/include/trace-tsync-local.h b/lib/trace-cmd/include/trace-tsync-local.h
index 7e56d187..b357662c 100644
--- a/lib/trace-cmd/include/trace-tsync-local.h
+++ b/lib/trace-cmd/include/trace-tsync-local.h
@@ -27,14 +27,15 @@  struct tracecmd_time_sync {
 struct clock_sync_offsets {
 	/* Arrays with calculated time offsets at given time */
 	int				sync_size;	/* Allocated size of sync_ts,
-							 * sync_offsets and sync_scalings
+							 * sync_offsets, sync_scalings and sync_frac
 							 */
 	int				sync_count;	/* Number of elements in sync_ts,
-							 * sync_offsets and sync_scalings
+							 * sync_offsets, sync_scalings and sync_frac
 							 */
 	long long			*sync_ts;
 	long long			*sync_offsets;
 	long long			*sync_scalings;
+	long long			*sync_frac;
 };
 
 struct clock_sync_context {
@@ -60,7 +61,7 @@  int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int role
 				  int (*init)(struct tracecmd_time_sync *),
 				  int (*free)(struct tracecmd_time_sync *),
 				  int (*calc)(struct tracecmd_time_sync *,
-					      long long *, long long *,
+					      long long *, long long *, long long*,
 					      long long *, unsigned int));
 int tracecmd_tsync_proto_unregister(char *proto_name);
 
diff --git a/lib/trace-cmd/trace-timesync-kvm.c b/lib/trace-cmd/trace-timesync-kvm.c
index 58c028ea..acadaca2 100644
--- a/lib/trace-cmd/trace-timesync-kvm.c
+++ b/lib/trace-cmd/trace-timesync-kvm.c
@@ -19,6 +19,7 @@ 
 #define KVM_DEBUG_FS "/sys/kernel/debug/kvm"
 #define KVM_DEBUG_OFFSET_FILE	"tsc-offset"
 #define KVM_DEBUG_SCALING_FILE	"tsc-scaling-ratio"
+#define KVM_DEBUG_FRACTION_FILE	"tsc-scaling-ratio-frac-bits"
 #define KVM_DEBUG_VCPU_DIR	"vcpu"
 
 /* default KVM scaling values, taken from the Linux kernel */
@@ -37,6 +38,7 @@  struct kvm_clock_sync {
 	int vcpu_count;
 	char **vcpu_offsets;
 	char **vcpu_scalings;
+	char **vcpu_frac;
 	int marker_fd;
 	struct tep_handle *tep;
 	int raw_id;
@@ -47,6 +49,7 @@  struct kvm_clock_offset_msg {
 	s64	ts;
 	s64	offset;
 	s64	scaling;
+	s64	frac;
 };
 
 static int read_ll_from_file(char *file, long long *res)
@@ -72,21 +75,27 @@  static int read_ll_from_file(char *file, long long *res)
 
 static bool kvm_scaling_check_vm_cpu(char *vname, char *cpu)
 {
-	long long scaling;
+	long long scaling, frac;
+	bool has_scaling = false;
+	bool has_frac = false;
 	char *path;
 	int ret;
 
 	if (asprintf(&path, "%s/%s/%s", vname, cpu, KVM_DEBUG_SCALING_FILE) < 0)
-		return true;
+		return false;
 	ret = read_ll_from_file(path, &scaling);
 	free(path);
+	if (!ret)
+		has_scaling = true;
+
+	if (asprintf(&path, "%s/%s/%s", vname, cpu, KVM_DEBUG_FRACTION_FILE) < 0)
+		return false;
+	ret = read_ll_from_file(path, &frac);
+	free(path);
+	if (!ret)
+		has_frac = true;
 
-	/*
-	 * If there is a scaling, different from the default -
-	 * return false, not supported.
-	 */
-	if (!ret &&
-	    scaling != KVM_SCALING_AMD_DEFAULT && scaling != KVM_SCALING_INTEL_DEFAULT)
+	if (has_scaling != has_frac)
 		return false;
 
 	return true;
@@ -175,6 +184,12 @@  static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int cpu, char *dir_str)
 					 dir_str, entry->d_name);
 				kvm->vcpu_scalings[cpu] = strdup(path);
 			}
+			if (!strncmp(entry->d_name, KVM_DEBUG_FRACTION_FILE,
+				     strlen(KVM_DEBUG_FRACTION_FILE))) {
+				snprintf(path, sizeof(path), "%s/%s",
+					 dir_str, entry->d_name);
+				kvm->vcpu_frac[cpu] = strdup(path);
+			}
 		}
 	}
 	if (!kvm->vcpu_offsets[cpu])
@@ -189,6 +204,8 @@  error:
 	kvm->vcpu_offsets[cpu] = NULL;
 	free(kvm->vcpu_scalings[cpu]);
 	kvm->vcpu_scalings[cpu] = NULL;
+	free(kvm->vcpu_frac[cpu]);
+	kvm->vcpu_frac[cpu] = NULL;
 	return -1;
 }
 
@@ -254,7 +271,8 @@  static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync,
 	kvm->vcpu_count = tsync->vcpu_count;
 	kvm->vcpu_offsets = calloc(kvm->vcpu_count, sizeof(char *));
 	kvm->vcpu_scalings = calloc(kvm->vcpu_count, sizeof(char *));
-	if (!kvm->vcpu_offsets || !kvm->vcpu_scalings)
+	kvm->vcpu_frac = calloc(kvm->vcpu_count, sizeof(char *));
+	if (!kvm->vcpu_offsets || !kvm->vcpu_scalings || !kvm->vcpu_frac)
 		goto error;
 	if (kvm_open_debug_files(kvm, tsync->guest_pid) < 0)
 		goto error;
@@ -263,6 +281,7 @@  static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync,
 error:
 	free(kvm->vcpu_offsets);
 	free(kvm->vcpu_scalings);
+	free(kvm->vcpu_frac);
 	return -1;
 }
 
@@ -353,6 +372,8 @@  static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync)
 			kvm->vcpu_offsets[i] = NULL;
 			free(kvm->vcpu_scalings[i]);
 			kvm->vcpu_scalings[i] = NULL;
+			free(kvm->vcpu_frac[i]);
+			kvm->vcpu_frac[i] = NULL;
 		}
 		if (kvm->tep)
 			tep_free(kvm->tep);
@@ -364,7 +385,7 @@  static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync)
 }
 
 static int kvm_clock_host(struct tracecmd_time_sync *tsync,
-			  long long *offset, long long *scaling,
+			  long long *offset, long long *scaling, long long *frac,
 			  long long *timestamp, unsigned int cpu)
 {
 	char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
@@ -374,6 +395,7 @@  static int kvm_clock_host(struct tracecmd_time_sync *tsync,
 	long long kvm_scaling = 1;
 	unsigned int sync_msg;
 	long long kvm_offset;
+	long long kvm_frac;
 	unsigned int size;
 	char *msg;
 	int ret;
@@ -388,12 +410,18 @@  static int kvm_clock_host(struct tracecmd_time_sync *tsync,
 	ret = read_ll_from_file(kvm->vcpu_offsets[cpu], &kvm_offset);
 	if (ret < 0)
 		return -1;
+
+	kvm_scaling = 1;
 	if (kvm->vcpu_scalings && kvm->vcpu_scalings[cpu]) {
 		read_ll_from_file(kvm->vcpu_scalings[cpu], &kvm_scaling);
 		if (kvm_scaling == KVM_SCALING_AMD_DEFAULT ||
 		    kvm_scaling == KVM_SCALING_INTEL_DEFAULT)
 			kvm_scaling = 1;
 	}
+
+	kvm_frac = 0;
+	if (kvm->vcpu_frac && kvm->vcpu_frac[cpu])
+		ret = read_ll_from_file(kvm->vcpu_frac[cpu], &kvm_frac);
 	msg = (char *)&packet;
 	size = sizeof(packet);
 	ret = tracecmd_msg_recv_time_sync(tsync->msg_handle,
@@ -405,6 +433,7 @@  static int kvm_clock_host(struct tracecmd_time_sync *tsync,
 
 	packet.offset = -kvm_offset;
 	packet.scaling = kvm_scaling;
+	packet.frac = kvm_frac;
 	ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME,
 					  KVM_SYNC_PKT_RESPONSE, sizeof(packet),
 					  (char *)&packet);
@@ -413,6 +442,7 @@  static int kvm_clock_host(struct tracecmd_time_sync *tsync,
 
 	*scaling = packet.scaling;
 	*offset = packet.offset;
+	*frac = kvm_frac;
 	*timestamp = packet.ts;
 
 	return 0;
@@ -444,10 +474,10 @@  static int kvm_marker_find(struct tep_event *event, struct tep_record *record,
 	return 0;
 }
 
-
 static int kvm_clock_guest(struct tracecmd_time_sync *tsync,
 			   long long *offset,
 			   long long *scaling,
+			   long long *frac,
 			   long long *timestamp)
 {
 	char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH];
@@ -488,12 +518,13 @@  static int kvm_clock_guest(struct tracecmd_time_sync *tsync,
 
 	*scaling = packet.scaling;
 	*offset = packet.offset;
+	*frac = packet.frac;
 	*timestamp = packet.ts;
 	return 0;
 }
 
 static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync,
-			       long long *offset, long long *scaling,
+			       long long *offset, long long *scaling, long long *frac,
 			       long long *timestamp, unsigned int cpu)
 {
 	struct clock_sync_context *clock_context;
@@ -505,9 +536,9 @@  static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync,
 	clock_context = (struct clock_sync_context *)tsync->context;
 
 	if (clock_context->is_guest)
-		ret = kvm_clock_guest(tsync, offset, scaling, timestamp);
+		ret = kvm_clock_guest(tsync, offset, scaling, frac, timestamp);
 	else
-		ret = kvm_clock_host(tsync, offset, scaling, timestamp, cpu);
+		ret = kvm_clock_host(tsync, offset, scaling, frac, timestamp, cpu);
 	return ret;
 }
 
diff --git a/lib/trace-cmd/trace-timesync-ptp.c b/lib/trace-cmd/trace-timesync-ptp.c
index b05f1cd0..70242ee3 100644
--- a/lib/trace-cmd/trace-timesync-ptp.c
+++ b/lib/trace-cmd/trace-timesync-ptp.c
@@ -663,7 +663,7 @@  static int ptp_clock_server(struct tracecmd_time_sync *tsync,
 }
 
 static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync,
-			       long long *offset, long long *scaling,
+			       long long *offset, long long *scaling, long long *frac,
 			       long long *timestamp, unsigned int cpu)
 {
 	struct clock_sync_context *clock_context;
@@ -689,6 +689,8 @@  static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync,
 
 	if (scaling)
 		*scaling = 1;
+	if (frac)
+		*frac = 0;
 	if (clock_context->is_server)
 		ret = ptp_clock_server(tsync, offset, timestamp);
 	else
diff --git a/lib/trace-cmd/trace-timesync.c b/lib/trace-cmd/trace-timesync.c
index 19ca19d7..298913a1 100644
--- a/lib/trace-cmd/trace-timesync.c
+++ b/lib/trace-cmd/trace-timesync.c
@@ -36,7 +36,7 @@  struct tsync_proto {
 	int (*clock_sync_init)(struct tracecmd_time_sync *clock_context);
 	int (*clock_sync_free)(struct tracecmd_time_sync *clock_context);
 	int (*clock_sync_calc)(struct tracecmd_time_sync *clock_context,
-			       long long *offset, long long *scaling,
+			       long long *offset, long long *scaling, long long *frac,
 			       long long *timestamp, unsigned int cpu);
 };
 
@@ -76,7 +76,7 @@  int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int role
 				  int (*init)(struct tracecmd_time_sync *),
 				  int (*free)(struct tracecmd_time_sync *),
 				  int (*calc)(struct tracecmd_time_sync *,
-					      long long *, long long *,
+					      long long *, long long *, long long *,
 					      long long *, unsigned int))
 {
 	struct tsync_proto *proto = NULL;
@@ -137,12 +137,13 @@  bool __hidden tsync_proto_is_supported(const char *proto_name)
  * @ts: Array of size @count containing timestamps of callculated offsets
  * @offsets: array of size @count, containing offsets for each timestamp
  * @scalings: array of size @count, containing scaling ratios for each timestamp
+ * @frac: array of size @count, containing fraction bits for each timestamp
  *
  * Retuns -1 in case of an error, or 0 otherwise
  */
 int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
 			       int *count, long long **ts,
-			       long long **offsets, long long **scalings)
+			       long long **offsets, long long **scalings, long long **frac)
 {
 	struct clock_sync_context *tsync_context;
 
@@ -159,6 +160,8 @@  int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
 		*offsets = tsync_context->offsets[cpu].sync_offsets;
 	if (scalings)
 		*scalings = tsync_context->offsets[cpu].sync_scalings;
+	if (frac)
+		*frac = tsync_context->offsets[cpu].sync_frac;
 
 	return 0;
 }
@@ -564,9 +567,11 @@  void tracecmd_tsync_free(struct tracecmd_time_sync *tsync)
 				free(tsync_context->offsets[i].sync_ts);
 				free(tsync_context->offsets[i].sync_offsets);
 				free(tsync_context->offsets[i].sync_scalings);
+				free(tsync_context->offsets[i].sync_frac);
 				tsync_context->offsets[i].sync_ts = NULL;
 				tsync_context->offsets[i].sync_offsets = NULL;
 				tsync_context->offsets[i].sync_scalings = NULL;
+				tsync_context->offsets[i].sync_frac = NULL;
 				tsync_context->offsets[i].sync_count = 0;
 				tsync_context->offsets[i].sync_size = 0;
 			}
@@ -643,10 +648,11 @@  static int tsync_send(struct tracecmd_time_sync *tsync,
 	long long timestamp = 0;
 	long long scaling = 0;
 	long long offset = 0;
+	long long frac = 0;
 	int ret;
 
 	old_set = pin_to_cpu(cpu);
-	ret = proto->clock_sync_calc(tsync, &offset, &scaling, &timestamp, cpu);
+	ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, &timestamp, cpu);
 	if (old_set)
 		restore_pin_to_cpu(old_set);
 
@@ -685,10 +691,11 @@  static void tsync_with_host(struct tracecmd_time_sync *tsync)
 }
 
 static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step,
-			      long long offset, long long scaling, long long ts)
+			      long long offset, long long scaling, long long frac, long long ts)
 {
 	long long *sync_scalings = NULL;
 	long long *sync_offsets = NULL;
+	long long *sync_frac = NULL;
 	long long *sync_ts = NULL;
 
 	if (offsets->sync_count >= offsets->sync_size) {
@@ -698,22 +705,27 @@  static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step
 				       (offsets->sync_size + array_step) * sizeof(long long));
 		sync_scalings = realloc(offsets->sync_scalings,
 				       (offsets->sync_size + array_step) * sizeof(long long));
+		sync_frac = realloc(offsets->sync_frac,
+				    (offsets->sync_size + array_step) * sizeof(long long));
 
-		if (!sync_ts || !sync_offsets || !sync_scalings) {
+		if (!sync_ts || !sync_offsets || !sync_scalings || !sync_frac) {
 			free(sync_ts);
 			free(sync_offsets);
 			free(sync_scalings);
+			free(sync_frac);
 			return -1;
 		}
 		offsets->sync_size += array_step;
 		offsets->sync_ts = sync_ts;
 		offsets->sync_offsets = sync_offsets;
 		offsets->sync_scalings = sync_scalings;
+		offsets->sync_frac = sync_frac;
 	}
 
 	offsets->sync_ts[offsets->sync_count] = ts;
 	offsets->sync_offsets[offsets->sync_count] = offset;
 	offsets->sync_scalings[offsets->sync_count] = scaling;
+	offsets->sync_frac[offsets->sync_count] = frac;
 	offsets->sync_count++;
 
 	return 0;
@@ -726,9 +738,10 @@  static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu,
 	long long timestamp = 0;
 	long long scaling = 0;
 	long long offset = 0;
+	long long frac = 0;
 	int ret;
 
-	ret = proto->clock_sync_calc(tsync, &offset, &scaling, &timestamp, cpu);
+	ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, &timestamp, cpu);
 	if (ret) {
 		tracecmd_warning("Failed to synchronize timestamps with guest");
 		return -1;
@@ -739,7 +752,7 @@  static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu,
 	if (!clock || cpu >= clock->cpu_count || !clock->offsets)
 		return -1;
 	return record_sync_sample(&clock->offsets[cpu], array_step,
-				  offset, scaling, timestamp);
+				  offset, scaling, frac, timestamp);
 }
 
 #define TIMER_SEC_NANO 1000000000LL
@@ -928,6 +941,7 @@  int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
 	unsigned int flags;
 	long long *scalings = NULL;
 	long long *offsets = NULL;
+	long long *frac = NULL;
 	long long *ts = NULL;
 	int vcount;
 	int count;
@@ -936,7 +950,7 @@  int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
 
 	if (!tsync->vcpu_count)
 		return -1;
-	vcount = 3 + (4 * tsync->vcpu_count);
+	vcount = 3 + (5 * tsync->vcpu_count);
 	vector = calloc(vcount, sizeof(struct iovec));
 	if (!vector)
 		return -1;
@@ -955,7 +969,7 @@  int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
 		if (j >= vcount)
 			break;
 		ret = tracecmd_tsync_get_offsets(tsync, i, &count,
-						 &ts, &offsets, &scalings);
+						 &ts, &offsets, &scalings, NULL);
 		if (ret < 0 || !count || !ts || !offsets || !scalings)
 			break;
 		vector[j].iov_len = 4;
@@ -971,6 +985,21 @@  int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
 		ret = -1;
 		goto out;
 	}
+	for (i = 0; i < tsync->vcpu_count; i++) {
+		if (j >= vcount)
+			break;
+		ret = tracecmd_tsync_get_offsets(tsync, i, NULL,
+						 NULL, NULL, NULL, &frac);
+		if (ret < 0 || !frac)
+			break;
+		vector[j].iov_len = 8 * count;
+		vector[j++].iov_base = frac;
+	}
+	if (i < tsync->vcpu_count) {
+		ret = -1;
+		goto out;
+	}
+
 	tracecmd_add_option_v(handle, TRACECMD_OPTION_TIME_SHIFT, vector, vcount);
 #ifdef TSYNC_DEBUG
 	if (count > 1)