diff mbox series

[v3,06/23] trace-cmd: Add new trace-cmd clock tsc2nsec

Message ID 20210324130418.436206-7-tz.stoyanov@gmail.com (mailing list archive)
State Superseded
Headers show
Series TSC trace clock to nanosecond conversion | expand

Commit Message

Tzvetomir Stoyanov (VMware) March 24, 2021, 1:04 p.m. UTC
A new clock is added to "trace-cmd record" command:
 trace-cmd record -C tsc2nsec ...
which uses x86-tsc ftrace clock and converts the timestamps from tsc to
nanoseconds, using data from perf kernel interface.
A new options is added also:
 trace-cmd record --tsc2nsec ...
which does not change the current trace clock, just convert whatever it
is to nanoseconds, using the same data from perf kernel interface.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 tracecmd/include/trace-local.h |   7 ++
 tracecmd/trace-record.c        | 146 ++++++++++++++++++++++++++++++++-
 2 files changed, 149 insertions(+), 4 deletions(-)

Comments

Steven Rostedt March 24, 2021, 3:05 p.m. UTC | #1
On Wed, 24 Mar 2021 15:04:01 +0200
"Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote:

> @@ -5904,7 +5965,15 @@ static void parse_record_options(int argc,
>  			break;
>  		case 'C':
>  			check_instance_die(ctx->instance, "-C");
> -			ctx->instance->clock = optarg;
> +			if (!strncmp(optarg, TSCNSEC_CLOCK, strlen(TSCNSEC_CLOCK))) {
> +				ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
> +						   &ctx->tsc2nsec.mult);
> +				if (ret || !clock_is_supported(NULL, TSC_CLOCK))
> +					die("Clock %s is not supported", optarg);
> +				ctx->instance->flags |= BUFFER_FL_TSC2NSEC;
> +				ctx->instance->clock = strdup(TSC_CLOCK);
> +			} else
> +				ctx->instance->clock = optarg;
>  			ctx->instance->flags |= BUFFER_FL_HAS_CLOCK;
>  			if (is_top_instance(ctx->instance))


I actually expected in the previous patch to have:

	case 'C':
		if (!clock_is_supported(optarg))
			die("Clock %s is not supported", optarg);

Then in this patch, change it to:

		} else if (!clock_is_supported(optarg)) {
			die("Clock %s is not supported", optarg);
		} else {
			ctx->instance->clock = optarg;
		}

-- Steve
Steven Rostedt March 24, 2021, 3:20 p.m. UTC | #2
On Wed, 24 Mar 2021 15:04:01 +0200
"Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote:

> @@ -199,6 +200,12 @@ struct filter_pids {
>  	int exclude;
>  };
>  
> +struct tsc_nsec {
> +	int mult;
> +	int shift;
> +	unsigned long long offset;
> +};
> +
>  struct buffer_instance {
>  	struct buffer_instance	*next;
>  	char			*name;
> diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
> index 5f7f5b3d..2a594736 100644
> --- a/tracecmd/trace-record.c
> +++ b/tracecmd/trace-record.c
> @@ -57,6 +57,8 @@
>  #define MAX_LATENCY	"tracing_max_latency"
>  #define STAMP		"stamp"
>  #define FUNC_STACK_TRACE "func_stack_trace"
> +#define TSC_CLOCK	"x86-tsc"
> +#define TSCNSEC_CLOCK	"tsc2nsec"
>  
>  enum trace_type {
>  	TRACE_TYPE_RECORD	= 1,
> @@ -198,6 +200,7 @@ struct common_record_context {
>  	char *date2ts;
>  	char *user;
>  	int data_flags;
> +	struct tsc_nsec tsc2nsec;

struct tsc_nsec is 16 bytes in size, ending with a 8 byte word. It should
be added before the "int data_flags" to prevent a hole in the middle of
this struct.


>  
>  	int record_all;
>  	int total_disable;


> @@ -5904,7 +5965,15 @@ static void parse_record_options(int argc,
>  			break;
>  		case 'C':
>  			check_instance_die(ctx->instance, "-C");
> -			ctx->instance->clock = optarg;
> +			if (!strncmp(optarg, TSCNSEC_CLOCK, strlen(TSCNSEC_CLOCK))) {

Hmm, why the strncmp()? Shouldn't it be a full match, not a partial one?

> +				ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
> +						   &ctx->tsc2nsec.mult);
> +				if (ret || !clock_is_supported(NULL, TSC_CLOCK))
> +					die("Clock %s is not supported", optarg);
> +				ctx->instance->flags |= BUFFER_FL_TSC2NSEC;
> +				ctx->instance->clock = strdup(TSC_CLOCK);

Hmm, why the strdup? below we have clock = optarg, one of them is wrong. If
we free instance->clock, then we can't have it set to optarg. As that was
the way it was before, it looks to be a separate bug that probably needs
its own patch.

> +			} else
> +				ctx->instance->clock = optarg;

Actually, I think we should have the above be:

		case 'C':
			if (strcmp(optarg, TSCNSEC_CLOCK) == 0) {
				ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
						   &ctx->tsc2nsec.mult);
				if (ret)
					die("TSC to nanosecond is not supported");

				ctx->instance->clock = TSC_CLOCK;
			} else {
				ctx->instance->clock = optarg;
			}
			if (!clock_is_supported(ctx->instance->clock))
				die("Clock %s is not supported",
				    ctx->instance->clock);
			ctx->instance->clock = strdup(ctx->instance->clock);
			if (!ctx->instance->clock)
				die("Failed allocation");

The above is more informative about the reason for the error, and also
removes duplicate code in the check of supported clocks.

-- Steve


>  			ctx->instance->flags |= BUFFER_FL_HAS_CLOCK;
>  			if (is_top_instance(ctx->instance))
>  				guest_sync_set = true;
> @@ -6159,6 +6228,13 @@ static void parse_record_options(int argc,
>  				die("--fork option used for 'start' command only");
>  			fork_process = true;
>  			break;
> +		case OPT_tsc2nsec:
> +			ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
> +					   &ctx->tsc2nsec.mult);
> +			if (ret)
> +				die("TSC to nanosecond is not supported");
> +			ctx->instance->flags |= BUFFER_FL_TSC2NSEC;
> +			break;
>  		case OPT_quiet:
>  		case 'q':
>  			quiet = true;
Tzvetomir Stoyanov (VMware) March 24, 2021, 3:38 p.m. UTC | #3
On Wed, Mar 24, 2021 at 5:20 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Wed, 24 Mar 2021 15:04:01 +0200
> "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote:
>
> > @@ -199,6 +200,12 @@ struct filter_pids {
> >       int exclude;
> >  };
> >
> > +struct tsc_nsec {
> > +     int mult;
> > +     int shift;
> > +     unsigned long long offset;
> > +};
> > +
> >  struct buffer_instance {
> >       struct buffer_instance  *next;
> >       char                    *name;
> > diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
> > index 5f7f5b3d..2a594736 100644
> > --- a/tracecmd/trace-record.c
> > +++ b/tracecmd/trace-record.c
> > @@ -57,6 +57,8 @@
> >  #define MAX_LATENCY  "tracing_max_latency"
> >  #define STAMP                "stamp"
> >  #define FUNC_STACK_TRACE "func_stack_trace"
> > +#define TSC_CLOCK    "x86-tsc"
> > +#define TSCNSEC_CLOCK        "tsc2nsec"
> >
> >  enum trace_type {
> >       TRACE_TYPE_RECORD       = 1,
> > @@ -198,6 +200,7 @@ struct common_record_context {
> >       char *date2ts;
> >       char *user;
> >       int data_flags;
> > +     struct tsc_nsec tsc2nsec;
>
> struct tsc_nsec is 16 bytes in size, ending with a 8 byte word. It should
> be added before the "int data_flags" to prevent a hole in the middle of
> this struct.
>
>
> >
> >       int record_all;
> >       int total_disable;
>
>
> > @@ -5904,7 +5965,15 @@ static void parse_record_options(int argc,
> >                       break;
> >               case 'C':
> >                       check_instance_die(ctx->instance, "-C");
> > -                     ctx->instance->clock = optarg;
> > +                     if (!strncmp(optarg, TSCNSEC_CLOCK, strlen(TSCNSEC_CLOCK))) {
>
> Hmm, why the strncmp()? Shouldn't it be a full match, not a partial one?
>
> > +                             ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
> > +                                                &ctx->tsc2nsec.mult);
> > +                             if (ret || !clock_is_supported(NULL, TSC_CLOCK))
> > +                                     die("Clock %s is not supported", optarg);
> > +                             ctx->instance->flags |= BUFFER_FL_TSC2NSEC;
> > +                             ctx->instance->clock = strdup(TSC_CLOCK);
>
> Hmm, why the strdup? below we have clock = optarg, one of them is wrong. If
> we free instance->clock, then we can't have it set to optarg. As that was
> the way it was before, it looks to be a separate bug that probably needs
> its own patch.

Actually, instance->clock is never freed - because there is no
function to free an instance. There is allocate_instance(), but no API
to free. That's why both are valid. I was wondering if to use strdup
or simple assignment, and decided to allocate a memory. One day we may
implement free_instance(), there are a lot of resources in an instance
that should be freed.

>
> > +                     } else
> > +                             ctx->instance->clock = optarg;
>
> Actually, I think we should have the above be:
>
>                 case 'C':
>                         if (strcmp(optarg, TSCNSEC_CLOCK) == 0) {
>                                 ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
>                                                    &ctx->tsc2nsec.mult);
>                                 if (ret)
>                                         die("TSC to nanosecond is not supported");
>
>                                 ctx->instance->clock = TSC_CLOCK;
>                         } else {
>                                 ctx->instance->clock = optarg;
>                         }
>                         if (!clock_is_supported(ctx->instance->clock))
>                                 die("Clock %s is not supported",
>                                     ctx->instance->clock);
>                         ctx->instance->clock = strdup(ctx->instance->clock);
>                         if (!ctx->instance->clock)
>                                 die("Failed allocation");
>
> The above is more informative about the reason for the error, and also
> removes duplicate code in the check of supported clocks.
>
> -- Steve
>
>
> >                       ctx->instance->flags |= BUFFER_FL_HAS_CLOCK;
> >                       if (is_top_instance(ctx->instance))
> >                               guest_sync_set = true;
> > @@ -6159,6 +6228,13 @@ static void parse_record_options(int argc,
> >                               die("--fork option used for 'start' command only");
> >                       fork_process = true;
> >                       break;
> > +             case OPT_tsc2nsec:
> > +                     ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
> > +                                        &ctx->tsc2nsec.mult);
> > +                     if (ret)
> > +                             die("TSC to nanosecond is not supported");
> > +                     ctx->instance->flags |= BUFFER_FL_TSC2NSEC;
> > +                     break;
> >               case OPT_quiet:
> >               case 'q':
> >                       quiet = true;
Steven Rostedt March 24, 2021, 4:22 p.m. UTC | #4
On Wed, 24 Mar 2021 17:38:37 +0200
Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote:

> On Wed, Mar 24, 2021 at 5:20 PM Steven Rostedt <rostedt@goodmis.org> wrote:
> >> > > @@ -5904,7 +5965,15 @@ static void parse_record_options(int argc,
> > >                       break;
> > >               case 'C':
> > >                       check_instance_die(ctx->instance, "-C");
> > > -                     ctx->instance->clock = optarg;
> > > +                     if (!strncmp(optarg, TSCNSEC_CLOCK, strlen(TSCNSEC_CLOCK))) {  
> >
> > Hmm, why the strncmp()? Shouldn't it be a full match, not a partial one?
> >  
> > > +                             ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
> > > +                                                &ctx->tsc2nsec.mult);
> > > +                             if (ret || !clock_is_supported(NULL, TSC_CLOCK))
> > > +                                     die("Clock %s is not supported", optarg);
> > > +                             ctx->instance->flags |= BUFFER_FL_TSC2NSEC;
> > > +                             ctx->instance->clock = strdup(TSC_CLOCK);  
> >
> > Hmm, why the strdup? below we have clock = optarg, one of them is wrong. If
> > we free instance->clock, then we can't have it set to optarg. As that was
> > the way it was before, it looks to be a separate bug that probably needs
> > its own patch.  
> 
> Actually, instance->clock is never freed - because there is no
> function to free an instance. There is allocate_instance(), but no API

Bah, I just noticed that I was confusing this with tracefs_instance :-p


> to free. That's why both are valid. I was wondering if to use strdup
> or simple assignment, and decided to allocate a memory. One day we may
> implement free_instance(), there are a lot of resources in an instance
> that should be freed.

Perhaps that "one day" should be this week ;-)

-- Steve
Tzvetomir Stoyanov (VMware) March 24, 2021, 4:56 p.m. UTC | #5
On Wed, Mar 24, 2021 at 6:22 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Wed, 24 Mar 2021 17:38:37 +0200
> Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote:
>
> > On Wed, Mar 24, 2021 at 5:20 PM Steven Rostedt <rostedt@goodmis.org> wrote:
> > >> > > @@ -5904,7 +5965,15 @@ static void parse_record_options(int argc,
> > > >                       break;
> > > >               case 'C':
> > > >                       check_instance_die(ctx->instance, "-C");
> > > > -                     ctx->instance->clock = optarg;
> > > > +                     if (!strncmp(optarg, TSCNSEC_CLOCK, strlen(TSCNSEC_CLOCK))) {
> > >
> > > Hmm, why the strncmp()? Shouldn't it be a full match, not a partial one?
> > >
> > > > +                             ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
> > > > +                                                &ctx->tsc2nsec.mult);
> > > > +                             if (ret || !clock_is_supported(NULL, TSC_CLOCK))
> > > > +                                     die("Clock %s is not supported", optarg);
> > > > +                             ctx->instance->flags |= BUFFER_FL_TSC2NSEC;
> > > > +                             ctx->instance->clock = strdup(TSC_CLOCK);
> > >
> > > Hmm, why the strdup? below we have clock = optarg, one of them is wrong. If
> > > we free instance->clock, then we can't have it set to optarg. As that was
> > > the way it was before, it looks to be a separate bug that probably needs
> > > its own patch.
> >
> > Actually, instance->clock is never freed - because there is no
> > function to free an instance. There is allocate_instance(), but no API
>
> Bah, I just noticed that I was confusing this with tracefs_instance :-p
>
>
> > to free. That's why both are valid. I was wondering if to use strdup
> > or simple assignment, and decided to allocate a memory. One day we may
> > implement free_instance(), there are a lot of resources in an instance
> > that should be freed.
>
> Perhaps that "one day" should be this week ;-)

That means to implement a cleanup after each trace-cmd subcommand ?
Currently, trace-cmd just exits and not freeing that memory is not a
big problem. I was thinking many times to implement free_instance(),
but there is no sense for it if there is no cleanup logic at the end
of each command.

>
> -- Steve
Steven Rostedt March 24, 2021, 6:32 p.m. UTC | #6
On Wed, 24 Mar 2021 15:04:01 +0200
"Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote:

> +static void get_tsc_offset(struct common_record_context *ctx)
> +{
> +	struct buffer_instance *instance;
> +
> +	for_all_instances(instance) {
> +		if (is_guest(instance) || !instance->clock)
> +			return;
> +
> +		ctx->tsc2nsec.offset = get_clock_now(instance->clock);
> +		return;
> +	}
> +
> +	ctx->tsc2nsec.offset = get_clock_now(NULL);
> +}

Why is the above "return" and not "continue"?

It seems inconsistent depending on the order. If the first instance found
is a guest or does not have a clock set, then we don't get the tsc offset,
but if the first instance is something else we do, even if a guest or an
instance exists.

-- Steve


> +
>  /*
>   * This function contains common code for the following commands:
>   * record, start, stream, profile.
Steven Rostedt March 24, 2021, 6:33 p.m. UTC | #7
On Wed, 24 Mar 2021 14:32:07 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Wed, 24 Mar 2021 15:04:01 +0200
> "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote:
> 
> > +static void get_tsc_offset(struct common_record_context *ctx)
> > +{
> > +	struct buffer_instance *instance;
> > +
> > +	for_all_instances(instance) {
> > +		if (is_guest(instance) || !instance->clock)
> > +			return;

			^^^^^^^

That's the return I was talking about.

-- Steve

> > +
> > +		ctx->tsc2nsec.offset = get_clock_now(instance->clock);
> > +		return;
> > +	}
> > +
> > +	ctx->tsc2nsec.offset = get_clock_now(NULL);
> > +}  
> 
> Why is the above "return" and not "continue"?
> 
> It seems inconsistent depending on the order. If the first instance found
> is a guest or does not have a clock set, then we don't get the tsc offset,
> but if the first instance is something else we do, even if a guest or an
> instance exists.
> 
> -- Steve
> 
> 
> > +
> >  /*
> >   * This function contains common code for the following commands:
> >   * record, start, stream, profile.
Steven Rostedt March 24, 2021, 8:55 p.m. UTC | #8
On Wed, 24 Mar 2021 18:56:39 +0200
Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote:

> > > to free. That's why both are valid. I was wondering if to use strdup
> > > or simple assignment, and decided to allocate a memory. One day we may
> > > implement free_instance(), there are a lot of resources in an instance
> > > that should be freed.  
> >
> > Perhaps that "one day" should be this week ;-)  
> 
> That means to implement a cleanup after each trace-cmd subcommand ?
> Currently, trace-cmd just exits and not freeing that memory is not a
> big problem. I was thinking many times to implement free_instance(),
> but there is no sense for it if there is no cleanup logic at the end
> of each command.

It's not top priority. But something we should keep in mind.

-- Steve
diff mbox series

Patch

diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
index 6e39d27d..7773e9fc 100644
--- a/tracecmd/include/trace-local.h
+++ b/tracecmd/include/trace-local.h
@@ -172,6 +172,7 @@  enum buffer_instance_flags {
 	BUFFER_FL_GUEST		= 1 << 2,
 	BUFFER_FL_AGENT		= 1 << 3,
 	BUFFER_FL_HAS_CLOCK	= 1 << 4,
+	BUFFER_FL_TSC2NSEC	= 1 << 5,
 };
 
 struct func_list {
@@ -199,6 +200,12 @@  struct filter_pids {
 	int exclude;
 };
 
+struct tsc_nsec {
+	int mult;
+	int shift;
+	unsigned long long offset;
+};
+
 struct buffer_instance {
 	struct buffer_instance	*next;
 	char			*name;
diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
index 5f7f5b3d..2a594736 100644
--- a/tracecmd/trace-record.c
+++ b/tracecmd/trace-record.c
@@ -57,6 +57,8 @@ 
 #define MAX_LATENCY	"tracing_max_latency"
 #define STAMP		"stamp"
 #define FUNC_STACK_TRACE "func_stack_trace"
+#define TSC_CLOCK	"x86-tsc"
+#define TSCNSEC_CLOCK	"tsc2nsec"
 
 enum trace_type {
 	TRACE_TYPE_RECORD	= 1,
@@ -198,6 +200,7 @@  struct common_record_context {
 	char *date2ts;
 	char *user;
 	int data_flags;
+	struct tsc_nsec tsc2nsec;
 
 	int record_all;
 	int total_disable;
@@ -4443,11 +4446,12 @@  static int find_ts(struct tep_event *event, struct tep_record *record,
 	return 0;
 }
 
-static unsigned long long find_time_stamp(struct tep_handle *tep)
+static unsigned long long find_time_stamp(struct tep_handle *tep,
+					  struct tracefs_instance *instance)
 {
 	unsigned long long ts = 0;
 
-	if (!tracefs_iterate_raw_events(tep, NULL, NULL, 0, find_ts, &ts))
+	if (!tracefs_iterate_raw_events(tep, instance, NULL, 0, find_ts, &ts))
 		return ts;
 
 	return 0;
@@ -4527,7 +4531,7 @@  static char *get_date_to_ts(void)
 		clock_gettime(CLOCK_REALTIME, &end);
 
 		tracecmd_disable_tracing();
-		ts = find_time_stamp(tep);
+		ts = find_time_stamp(tep, NULL);
 		if (!ts)
 			continue;
 
@@ -5388,6 +5392,7 @@  void init_top_instance(void)
 }
 
 enum {
+	OPT_tsc2nsec		= 240,
 	OPT_fork		= 241,
 	OPT_tsyncinterval	= 242,
 	OPT_user		= 243,
@@ -5710,6 +5715,61 @@  static bool clock_is_supported(struct tracefs_instance *instance, char *clock)
 	return ret != NULL;
 }
 
+#ifdef PERF
+static int get_tsc_nsec(int *shift, int *mult)
+{
+	static int cpu_shift, cpu_mult;
+	static int supported;
+	int cpus = tracecmd_count_cpus();
+	struct trace_perf perf;
+	int i;
+
+	if (supported)
+		goto out;
+
+	supported = -1;
+	if (trace_perf_init(&perf, 1, 0, getpid()))
+		return -1;
+	if (trace_perf_open(&perf))
+		return -1;
+	cpu_shift = perf.mmap->time_shift;
+	cpu_mult = perf.mmap->time_mult;
+	for (i = 1; i < cpus; i++) {
+		trace_perf_close(&perf);
+		if (trace_perf_init(&perf, 1, i, getpid()))
+			break;
+		if (trace_perf_open(&perf))
+			break;
+		if (perf.mmap->time_shift != cpu_shift ||
+		    perf.mmap->time_mult != cpu_mult) {
+			warning("Found different TSC multiplier and shift for CPU %d: %d;%d instead of %d;%d",
+				i, perf.mmap->time_mult, perf.mmap->time_shift, cpu_mult, cpu_shift);
+			break;
+		}
+	}
+	trace_perf_close(&perf);
+	if (i < cpus)
+		return -1;
+
+	supported = 1;
+out:
+	if (supported < 0)
+		return -1;
+
+	if (shift)
+		*shift = cpu_shift;
+	if (mult)
+		*mult = cpu_mult;
+
+	return 0;
+}
+#else
+static int get_tsc_nsec(int *shift, int *mult)
+{
+	return -1;
+}
+#endif
+
 static void parse_record_options(int argc,
 				 char **argv,
 				 enum trace_cmd curr_cmd,
@@ -5759,6 +5819,7 @@  static void parse_record_options(int argc,
 			{"module", required_argument, NULL, OPT_module},
 			{"tsync-interval", required_argument, NULL, OPT_tsyncinterval},
 			{"fork", no_argument, NULL, OPT_fork},
+			{"tsc2nsec", no_argument, NULL, OPT_tsc2nsec},
 			{NULL, 0, NULL, 0}
 		};
 
@@ -5904,7 +5965,15 @@  static void parse_record_options(int argc,
 			break;
 		case 'C':
 			check_instance_die(ctx->instance, "-C");
-			ctx->instance->clock = optarg;
+			if (!strncmp(optarg, TSCNSEC_CLOCK, strlen(TSCNSEC_CLOCK))) {
+				ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
+						   &ctx->tsc2nsec.mult);
+				if (ret || !clock_is_supported(NULL, TSC_CLOCK))
+					die("Clock %s is not supported", optarg);
+				ctx->instance->flags |= BUFFER_FL_TSC2NSEC;
+				ctx->instance->clock = strdup(TSC_CLOCK);
+			} else
+				ctx->instance->clock = optarg;
 			ctx->instance->flags |= BUFFER_FL_HAS_CLOCK;
 			if (is_top_instance(ctx->instance))
 				guest_sync_set = true;
@@ -6159,6 +6228,13 @@  static void parse_record_options(int argc,
 				die("--fork option used for 'start' command only");
 			fork_process = true;
 			break;
+		case OPT_tsc2nsec:
+			ret = get_tsc_nsec(&ctx->tsc2nsec.shift,
+					   &ctx->tsc2nsec.mult);
+			if (ret)
+				die("TSC to nanosecond is not supported");
+			ctx->instance->flags |= BUFFER_FL_TSC2NSEC;
+			break;
 		case OPT_quiet:
 		case 'q':
 			quiet = true;
@@ -6306,6 +6382,65 @@  static bool has_local_instances(void)
 	return false;
 }
 
+/*
+ * Get the current clock value
+ */
+#define CLOCK_INST_NAME	"_clock_instance_"
+static unsigned long long get_clock_now(const char *clock)
+{
+	struct tracefs_instance *ts_instance = NULL;
+	unsigned long long ts = 0;
+	struct tep_handle *tep;
+	int tfd;
+	int ret;
+
+	/* Set up a tep to read the raw format */
+	tep = get_ftrace_tep();
+	if (!tep)
+		return 0;
+	ts_instance = tracefs_instance_create(CLOCK_INST_NAME);
+	if (!ts_instance)
+		goto out;
+	if (clock) {
+		ret = tracefs_instance_file_write(ts_instance, "trace_clock", clock);
+		if (ret < strlen(clock))
+			goto out;
+	}
+	tfd = tracefs_instance_file_open(ts_instance, "trace_marker", O_WRONLY);
+	if (tfd < 0)
+		goto out;
+	tracefs_trace_on(ts_instance);
+	ret = write(tfd, STAMP, 5);
+	tracefs_trace_off(ts_instance);
+	ts = find_time_stamp(tep, ts_instance);
+	close(tfd);
+
+out:
+	if (ts_instance) {
+		if (tracefs_instance_is_new(ts_instance))
+			tracefs_instance_destroy(ts_instance);
+		tracefs_instance_free(ts_instance);
+	}
+	tep_free(tep);
+
+	return ts;
+}
+
+static void get_tsc_offset(struct common_record_context *ctx)
+{
+	struct buffer_instance *instance;
+
+	for_all_instances(instance) {
+		if (is_guest(instance) || !instance->clock)
+			return;
+
+		ctx->tsc2nsec.offset = get_clock_now(instance->clock);
+		return;
+	}
+
+	ctx->tsc2nsec.offset = get_clock_now(NULL);
+}
+
 /*
  * This function contains common code for the following commands:
  * record, start, stream, profile.
@@ -6364,6 +6499,9 @@  static void record_trace(int argc, char **argv,
 	for_all_instances(instance)
 		set_clock(instance);
 
+	if (ctx->tsc2nsec.mult)
+		get_tsc_offset(ctx);
+
 	/* Record records the date first */
 	if (ctx->date &&
 	    ((IS_RECORD(ctx) && has_local_instances()) || IS_RECORD_AGENT(ctx)))