Message ID | 20210324130418.436206-7-tz.stoyanov@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | TSC trace clock to nanosecond conversion | expand |
On Wed, 24 Mar 2021 15:04:01 +0200 "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote: > @@ -5904,7 +5965,15 @@ static void parse_record_options(int argc, > break; > case 'C': > check_instance_die(ctx->instance, "-C"); > - ctx->instance->clock = optarg; > + if (!strncmp(optarg, TSCNSEC_CLOCK, strlen(TSCNSEC_CLOCK))) { > + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, > + &ctx->tsc2nsec.mult); > + if (ret || !clock_is_supported(NULL, TSC_CLOCK)) > + die("Clock %s is not supported", optarg); > + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; > + ctx->instance->clock = strdup(TSC_CLOCK); > + } else > + ctx->instance->clock = optarg; > ctx->instance->flags |= BUFFER_FL_HAS_CLOCK; > if (is_top_instance(ctx->instance)) I actually expected in the previous patch to have: case 'C': if (!clock_is_supported(optarg)) die("Clock %s is not supported", optarg); Then in this patch, change it to: } else if (!clock_is_supported(optarg)) { die("Clock %s is not supported", optarg); } else { ctx->instance->clock = optarg; } -- Steve
On Wed, 24 Mar 2021 15:04:01 +0200 "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote: > @@ -199,6 +200,12 @@ struct filter_pids { > int exclude; > }; > > +struct tsc_nsec { > + int mult; > + int shift; > + unsigned long long offset; > +}; > + > struct buffer_instance { > struct buffer_instance *next; > char *name; > diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c > index 5f7f5b3d..2a594736 100644 > --- a/tracecmd/trace-record.c > +++ b/tracecmd/trace-record.c > @@ -57,6 +57,8 @@ > #define MAX_LATENCY "tracing_max_latency" > #define STAMP "stamp" > #define FUNC_STACK_TRACE "func_stack_trace" > +#define TSC_CLOCK "x86-tsc" > +#define TSCNSEC_CLOCK "tsc2nsec" > > enum trace_type { > TRACE_TYPE_RECORD = 1, > @@ -198,6 +200,7 @@ struct common_record_context { > char *date2ts; > char *user; > int data_flags; > + struct tsc_nsec tsc2nsec; struct tsc_nsec is 16 bytes in size, ending with a 8 byte word. It should be added before the "int data_flags" to prevent a hole in the middle of this struct. > > int record_all; > int total_disable; > @@ -5904,7 +5965,15 @@ static void parse_record_options(int argc, > break; > case 'C': > check_instance_die(ctx->instance, "-C"); > - ctx->instance->clock = optarg; > + if (!strncmp(optarg, TSCNSEC_CLOCK, strlen(TSCNSEC_CLOCK))) { Hmm, why the strncmp()? Shouldn't it be a full match, not a partial one? > + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, > + &ctx->tsc2nsec.mult); > + if (ret || !clock_is_supported(NULL, TSC_CLOCK)) > + die("Clock %s is not supported", optarg); > + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; > + ctx->instance->clock = strdup(TSC_CLOCK); Hmm, why the strdup? below we have clock = optarg, one of them is wrong. If we free instance->clock, then we can't have it set to optarg. As that was the way it was before, it looks to be a separate bug that probably needs its own patch. > + } else > + ctx->instance->clock = optarg; Actually, I think we should have the above be: case 'C': if (strcmp(optarg, TSCNSEC_CLOCK) == 0) { ret = get_tsc_nsec(&ctx->tsc2nsec.shift, &ctx->tsc2nsec.mult); if (ret) die("TSC to nanosecond is not supported"); ctx->instance->clock = TSC_CLOCK; } else { ctx->instance->clock = optarg; } if (!clock_is_supported(ctx->instance->clock)) die("Clock %s is not supported", ctx->instance->clock); ctx->instance->clock = strdup(ctx->instance->clock); if (!ctx->instance->clock) die("Failed allocation"); The above is more informative about the reason for the error, and also removes duplicate code in the check of supported clocks. -- Steve > ctx->instance->flags |= BUFFER_FL_HAS_CLOCK; > if (is_top_instance(ctx->instance)) > guest_sync_set = true; > @@ -6159,6 +6228,13 @@ static void parse_record_options(int argc, > die("--fork option used for 'start' command only"); > fork_process = true; > break; > + case OPT_tsc2nsec: > + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, > + &ctx->tsc2nsec.mult); > + if (ret) > + die("TSC to nanosecond is not supported"); > + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; > + break; > case OPT_quiet: > case 'q': > quiet = true;
On Wed, Mar 24, 2021 at 5:20 PM Steven Rostedt <rostedt@goodmis.org> wrote: > > On Wed, 24 Mar 2021 15:04:01 +0200 > "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote: > > > @@ -199,6 +200,12 @@ struct filter_pids { > > int exclude; > > }; > > > > +struct tsc_nsec { > > + int mult; > > + int shift; > > + unsigned long long offset; > > +}; > > + > > struct buffer_instance { > > struct buffer_instance *next; > > char *name; > > diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c > > index 5f7f5b3d..2a594736 100644 > > --- a/tracecmd/trace-record.c > > +++ b/tracecmd/trace-record.c > > @@ -57,6 +57,8 @@ > > #define MAX_LATENCY "tracing_max_latency" > > #define STAMP "stamp" > > #define FUNC_STACK_TRACE "func_stack_trace" > > +#define TSC_CLOCK "x86-tsc" > > +#define TSCNSEC_CLOCK "tsc2nsec" > > > > enum trace_type { > > TRACE_TYPE_RECORD = 1, > > @@ -198,6 +200,7 @@ struct common_record_context { > > char *date2ts; > > char *user; > > int data_flags; > > + struct tsc_nsec tsc2nsec; > > struct tsc_nsec is 16 bytes in size, ending with a 8 byte word. It should > be added before the "int data_flags" to prevent a hole in the middle of > this struct. > > > > > > int record_all; > > int total_disable; > > > > @@ -5904,7 +5965,15 @@ static void parse_record_options(int argc, > > break; > > case 'C': > > check_instance_die(ctx->instance, "-C"); > > - ctx->instance->clock = optarg; > > + if (!strncmp(optarg, TSCNSEC_CLOCK, strlen(TSCNSEC_CLOCK))) { > > Hmm, why the strncmp()? Shouldn't it be a full match, not a partial one? > > > + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, > > + &ctx->tsc2nsec.mult); > > + if (ret || !clock_is_supported(NULL, TSC_CLOCK)) > > + die("Clock %s is not supported", optarg); > > + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; > > + ctx->instance->clock = strdup(TSC_CLOCK); > > Hmm, why the strdup? below we have clock = optarg, one of them is wrong. If > we free instance->clock, then we can't have it set to optarg. As that was > the way it was before, it looks to be a separate bug that probably needs > its own patch. Actually, instance->clock is never freed - because there is no function to free an instance. There is allocate_instance(), but no API to free. That's why both are valid. I was wondering if to use strdup or simple assignment, and decided to allocate a memory. One day we may implement free_instance(), there are a lot of resources in an instance that should be freed. > > > + } else > > + ctx->instance->clock = optarg; > > Actually, I think we should have the above be: > > case 'C': > if (strcmp(optarg, TSCNSEC_CLOCK) == 0) { > ret = get_tsc_nsec(&ctx->tsc2nsec.shift, > &ctx->tsc2nsec.mult); > if (ret) > die("TSC to nanosecond is not supported"); > > ctx->instance->clock = TSC_CLOCK; > } else { > ctx->instance->clock = optarg; > } > if (!clock_is_supported(ctx->instance->clock)) > die("Clock %s is not supported", > ctx->instance->clock); > ctx->instance->clock = strdup(ctx->instance->clock); > if (!ctx->instance->clock) > die("Failed allocation"); > > The above is more informative about the reason for the error, and also > removes duplicate code in the check of supported clocks. > > -- Steve > > > > ctx->instance->flags |= BUFFER_FL_HAS_CLOCK; > > if (is_top_instance(ctx->instance)) > > guest_sync_set = true; > > @@ -6159,6 +6228,13 @@ static void parse_record_options(int argc, > > die("--fork option used for 'start' command only"); > > fork_process = true; > > break; > > + case OPT_tsc2nsec: > > + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, > > + &ctx->tsc2nsec.mult); > > + if (ret) > > + die("TSC to nanosecond is not supported"); > > + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; > > + break; > > case OPT_quiet: > > case 'q': > > quiet = true;
On Wed, 24 Mar 2021 17:38:37 +0200 Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote: > On Wed, Mar 24, 2021 at 5:20 PM Steven Rostedt <rostedt@goodmis.org> wrote: > >> > > @@ -5904,7 +5965,15 @@ static void parse_record_options(int argc, > > > break; > > > case 'C': > > > check_instance_die(ctx->instance, "-C"); > > > - ctx->instance->clock = optarg; > > > + if (!strncmp(optarg, TSCNSEC_CLOCK, strlen(TSCNSEC_CLOCK))) { > > > > Hmm, why the strncmp()? Shouldn't it be a full match, not a partial one? > > > > > + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, > > > + &ctx->tsc2nsec.mult); > > > + if (ret || !clock_is_supported(NULL, TSC_CLOCK)) > > > + die("Clock %s is not supported", optarg); > > > + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; > > > + ctx->instance->clock = strdup(TSC_CLOCK); > > > > Hmm, why the strdup? below we have clock = optarg, one of them is wrong. If > > we free instance->clock, then we can't have it set to optarg. As that was > > the way it was before, it looks to be a separate bug that probably needs > > its own patch. > > Actually, instance->clock is never freed - because there is no > function to free an instance. There is allocate_instance(), but no API Bah, I just noticed that I was confusing this with tracefs_instance :-p > to free. That's why both are valid. I was wondering if to use strdup > or simple assignment, and decided to allocate a memory. One day we may > implement free_instance(), there are a lot of resources in an instance > that should be freed. Perhaps that "one day" should be this week ;-) -- Steve
On Wed, Mar 24, 2021 at 6:22 PM Steven Rostedt <rostedt@goodmis.org> wrote: > > On Wed, 24 Mar 2021 17:38:37 +0200 > Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote: > > > On Wed, Mar 24, 2021 at 5:20 PM Steven Rostedt <rostedt@goodmis.org> wrote: > > >> > > @@ -5904,7 +5965,15 @@ static void parse_record_options(int argc, > > > > break; > > > > case 'C': > > > > check_instance_die(ctx->instance, "-C"); > > > > - ctx->instance->clock = optarg; > > > > + if (!strncmp(optarg, TSCNSEC_CLOCK, strlen(TSCNSEC_CLOCK))) { > > > > > > Hmm, why the strncmp()? Shouldn't it be a full match, not a partial one? > > > > > > > + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, > > > > + &ctx->tsc2nsec.mult); > > > > + if (ret || !clock_is_supported(NULL, TSC_CLOCK)) > > > > + die("Clock %s is not supported", optarg); > > > > + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; > > > > + ctx->instance->clock = strdup(TSC_CLOCK); > > > > > > Hmm, why the strdup? below we have clock = optarg, one of them is wrong. If > > > we free instance->clock, then we can't have it set to optarg. As that was > > > the way it was before, it looks to be a separate bug that probably needs > > > its own patch. > > > > Actually, instance->clock is never freed - because there is no > > function to free an instance. There is allocate_instance(), but no API > > Bah, I just noticed that I was confusing this with tracefs_instance :-p > > > > to free. That's why both are valid. I was wondering if to use strdup > > or simple assignment, and decided to allocate a memory. One day we may > > implement free_instance(), there are a lot of resources in an instance > > that should be freed. > > Perhaps that "one day" should be this week ;-) That means to implement a cleanup after each trace-cmd subcommand ? Currently, trace-cmd just exits and not freeing that memory is not a big problem. I was thinking many times to implement free_instance(), but there is no sense for it if there is no cleanup logic at the end of each command. > > -- Steve
On Wed, 24 Mar 2021 15:04:01 +0200 "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote: > +static void get_tsc_offset(struct common_record_context *ctx) > +{ > + struct buffer_instance *instance; > + > + for_all_instances(instance) { > + if (is_guest(instance) || !instance->clock) > + return; > + > + ctx->tsc2nsec.offset = get_clock_now(instance->clock); > + return; > + } > + > + ctx->tsc2nsec.offset = get_clock_now(NULL); > +} Why is the above "return" and not "continue"? It seems inconsistent depending on the order. If the first instance found is a guest or does not have a clock set, then we don't get the tsc offset, but if the first instance is something else we do, even if a guest or an instance exists. -- Steve > + > /* > * This function contains common code for the following commands: > * record, start, stream, profile.
On Wed, 24 Mar 2021 14:32:07 -0400 Steven Rostedt <rostedt@goodmis.org> wrote: > On Wed, 24 Mar 2021 15:04:01 +0200 > "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote: > > > +static void get_tsc_offset(struct common_record_context *ctx) > > +{ > > + struct buffer_instance *instance; > > + > > + for_all_instances(instance) { > > + if (is_guest(instance) || !instance->clock) > > + return; ^^^^^^^ That's the return I was talking about. -- Steve > > + > > + ctx->tsc2nsec.offset = get_clock_now(instance->clock); > > + return; > > + } > > + > > + ctx->tsc2nsec.offset = get_clock_now(NULL); > > +} > > Why is the above "return" and not "continue"? > > It seems inconsistent depending on the order. If the first instance found > is a guest or does not have a clock set, then we don't get the tsc offset, > but if the first instance is something else we do, even if a guest or an > instance exists. > > -- Steve > > > > + > > /* > > * This function contains common code for the following commands: > > * record, start, stream, profile.
On Wed, 24 Mar 2021 18:56:39 +0200 Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote: > > > to free. That's why both are valid. I was wondering if to use strdup > > > or simple assignment, and decided to allocate a memory. One day we may > > > implement free_instance(), there are a lot of resources in an instance > > > that should be freed. > > > > Perhaps that "one day" should be this week ;-) > > That means to implement a cleanup after each trace-cmd subcommand ? > Currently, trace-cmd just exits and not freeing that memory is not a > big problem. I was thinking many times to implement free_instance(), > but there is no sense for it if there is no cleanup logic at the end > of each command. It's not top priority. But something we should keep in mind. -- Steve
diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h index 6e39d27d..7773e9fc 100644 --- a/tracecmd/include/trace-local.h +++ b/tracecmd/include/trace-local.h @@ -172,6 +172,7 @@ enum buffer_instance_flags { BUFFER_FL_GUEST = 1 << 2, BUFFER_FL_AGENT = 1 << 3, BUFFER_FL_HAS_CLOCK = 1 << 4, + BUFFER_FL_TSC2NSEC = 1 << 5, }; struct func_list { @@ -199,6 +200,12 @@ struct filter_pids { int exclude; }; +struct tsc_nsec { + int mult; + int shift; + unsigned long long offset; +}; + struct buffer_instance { struct buffer_instance *next; char *name; diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c index 5f7f5b3d..2a594736 100644 --- a/tracecmd/trace-record.c +++ b/tracecmd/trace-record.c @@ -57,6 +57,8 @@ #define MAX_LATENCY "tracing_max_latency" #define STAMP "stamp" #define FUNC_STACK_TRACE "func_stack_trace" +#define TSC_CLOCK "x86-tsc" +#define TSCNSEC_CLOCK "tsc2nsec" enum trace_type { TRACE_TYPE_RECORD = 1, @@ -198,6 +200,7 @@ struct common_record_context { char *date2ts; char *user; int data_flags; + struct tsc_nsec tsc2nsec; int record_all; int total_disable; @@ -4443,11 +4446,12 @@ static int find_ts(struct tep_event *event, struct tep_record *record, return 0; } -static unsigned long long find_time_stamp(struct tep_handle *tep) +static unsigned long long find_time_stamp(struct tep_handle *tep, + struct tracefs_instance *instance) { unsigned long long ts = 0; - if (!tracefs_iterate_raw_events(tep, NULL, NULL, 0, find_ts, &ts)) + if (!tracefs_iterate_raw_events(tep, instance, NULL, 0, find_ts, &ts)) return ts; return 0; @@ -4527,7 +4531,7 @@ static char *get_date_to_ts(void) clock_gettime(CLOCK_REALTIME, &end); tracecmd_disable_tracing(); - ts = find_time_stamp(tep); + ts = find_time_stamp(tep, NULL); if (!ts) continue; @@ -5388,6 +5392,7 @@ void init_top_instance(void) } enum { + OPT_tsc2nsec = 240, OPT_fork = 241, OPT_tsyncinterval = 242, OPT_user = 243, @@ -5710,6 +5715,61 @@ static bool clock_is_supported(struct tracefs_instance *instance, char *clock) return ret != NULL; } +#ifdef PERF +static int get_tsc_nsec(int *shift, int *mult) +{ + static int cpu_shift, cpu_mult; + static int supported; + int cpus = tracecmd_count_cpus(); + struct trace_perf perf; + int i; + + if (supported) + goto out; + + supported = -1; + if (trace_perf_init(&perf, 1, 0, getpid())) + return -1; + if (trace_perf_open(&perf)) + return -1; + cpu_shift = perf.mmap->time_shift; + cpu_mult = perf.mmap->time_mult; + for (i = 1; i < cpus; i++) { + trace_perf_close(&perf); + if (trace_perf_init(&perf, 1, i, getpid())) + break; + if (trace_perf_open(&perf)) + break; + if (perf.mmap->time_shift != cpu_shift || + perf.mmap->time_mult != cpu_mult) { + warning("Found different TSC multiplier and shift for CPU %d: %d;%d instead of %d;%d", + i, perf.mmap->time_mult, perf.mmap->time_shift, cpu_mult, cpu_shift); + break; + } + } + trace_perf_close(&perf); + if (i < cpus) + return -1; + + supported = 1; +out: + if (supported < 0) + return -1; + + if (shift) + *shift = cpu_shift; + if (mult) + *mult = cpu_mult; + + return 0; +} +#else +static int get_tsc_nsec(int *shift, int *mult) +{ + return -1; +} +#endif + static void parse_record_options(int argc, char **argv, enum trace_cmd curr_cmd, @@ -5759,6 +5819,7 @@ static void parse_record_options(int argc, {"module", required_argument, NULL, OPT_module}, {"tsync-interval", required_argument, NULL, OPT_tsyncinterval}, {"fork", no_argument, NULL, OPT_fork}, + {"tsc2nsec", no_argument, NULL, OPT_tsc2nsec}, {NULL, 0, NULL, 0} }; @@ -5904,7 +5965,15 @@ static void parse_record_options(int argc, break; case 'C': check_instance_die(ctx->instance, "-C"); - ctx->instance->clock = optarg; + if (!strncmp(optarg, TSCNSEC_CLOCK, strlen(TSCNSEC_CLOCK))) { + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, + &ctx->tsc2nsec.mult); + if (ret || !clock_is_supported(NULL, TSC_CLOCK)) + die("Clock %s is not supported", optarg); + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; + ctx->instance->clock = strdup(TSC_CLOCK); + } else + ctx->instance->clock = optarg; ctx->instance->flags |= BUFFER_FL_HAS_CLOCK; if (is_top_instance(ctx->instance)) guest_sync_set = true; @@ -6159,6 +6228,13 @@ static void parse_record_options(int argc, die("--fork option used for 'start' command only"); fork_process = true; break; + case OPT_tsc2nsec: + ret = get_tsc_nsec(&ctx->tsc2nsec.shift, + &ctx->tsc2nsec.mult); + if (ret) + die("TSC to nanosecond is not supported"); + ctx->instance->flags |= BUFFER_FL_TSC2NSEC; + break; case OPT_quiet: case 'q': quiet = true; @@ -6306,6 +6382,65 @@ static bool has_local_instances(void) return false; } +/* + * Get the current clock value + */ +#define CLOCK_INST_NAME "_clock_instance_" +static unsigned long long get_clock_now(const char *clock) +{ + struct tracefs_instance *ts_instance = NULL; + unsigned long long ts = 0; + struct tep_handle *tep; + int tfd; + int ret; + + /* Set up a tep to read the raw format */ + tep = get_ftrace_tep(); + if (!tep) + return 0; + ts_instance = tracefs_instance_create(CLOCK_INST_NAME); + if (!ts_instance) + goto out; + if (clock) { + ret = tracefs_instance_file_write(ts_instance, "trace_clock", clock); + if (ret < strlen(clock)) + goto out; + } + tfd = tracefs_instance_file_open(ts_instance, "trace_marker", O_WRONLY); + if (tfd < 0) + goto out; + tracefs_trace_on(ts_instance); + ret = write(tfd, STAMP, 5); + tracefs_trace_off(ts_instance); + ts = find_time_stamp(tep, ts_instance); + close(tfd); + +out: + if (ts_instance) { + if (tracefs_instance_is_new(ts_instance)) + tracefs_instance_destroy(ts_instance); + tracefs_instance_free(ts_instance); + } + tep_free(tep); + + return ts; +} + +static void get_tsc_offset(struct common_record_context *ctx) +{ + struct buffer_instance *instance; + + for_all_instances(instance) { + if (is_guest(instance) || !instance->clock) + return; + + ctx->tsc2nsec.offset = get_clock_now(instance->clock); + return; + } + + ctx->tsc2nsec.offset = get_clock_now(NULL); +} + /* * This function contains common code for the following commands: * record, start, stream, profile. @@ -6364,6 +6499,9 @@ static void record_trace(int argc, char **argv, for_all_instances(instance) set_clock(instance); + if (ctx->tsc2nsec.mult) + get_tsc_offset(ctx); + /* Record records the date first */ if (ctx->date && ((IS_RECORD(ctx) && has_local_instances()) || IS_RECORD_AGENT(ctx)))
A new clock is added to "trace-cmd record" command: trace-cmd record -C tsc2nsec ... which uses x86-tsc ftrace clock and converts the timestamps from tsc to nanoseconds, using data from perf kernel interface. A new options is added also: trace-cmd record --tsc2nsec ... which does not change the current trace clock, just convert whatever it is to nanoseconds, using the same data from perf kernel interface. Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com> --- tracecmd/include/trace-local.h | 7 ++ tracecmd/trace-record.c | 146 ++++++++++++++++++++++++++++++++- 2 files changed, 149 insertions(+), 4 deletions(-)