diff mbox series

[v7,9/9] trace-cmd [POC]: Implemented timestamps synch algorithm, using vsock events.

Message ID 20190319155536.19381-10-tstoyanov@vmware.com (mailing list archive)
State Superseded
Headers show
Series trace-cmd: Timetamps sync between host and guest machines, relying on vsock events. | expand

Commit Message

Tzvetomir Stoyanov March 19, 2019, 3:55 p.m. UTC
This is a POC patch, implementing an algorithm for syncing timestamps between
host and guest machines, using vsock trace events to catch the host / guest time.

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
---
 include/trace-cmd/trace-cmd.h  |  21 +-
 tracecmd/Makefile              |   1 +
 tracecmd/include/trace-local.h |  33 +-
 tracecmd/trace-agent.c         |  13 +-
 tracecmd/trace-msg.c           | 208 ++++++++-
 tracecmd/trace-record.c        |  85 +++-
 tracecmd/trace-timesync.c      | 825 +++++++++++++++++++++++++++++++++
 7 files changed, 1150 insertions(+), 36 deletions(-)
 create mode 100644 tracecmd/trace-timesync.c

Comments

Slavomir Kaslev March 19, 2019, 5:14 p.m. UTC | #1
On Tue, Mar 19, 2019 at 5:56 PM Tzvetomir Stoyanov <tstoyanov@vmware.com> wrote:
>
> This is a POC patch, implementing an algorithm for syncing timestamps between
> host and guest machines, using vsock trace events to catch the host / guest time.
>
> Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
> ---
>  include/trace-cmd/trace-cmd.h  |  21 +-
>  tracecmd/Makefile              |   1 +
>  tracecmd/include/trace-local.h |  33 +-
>  tracecmd/trace-agent.c         |  13 +-
>  tracecmd/trace-msg.c           | 208 ++++++++-
>  tracecmd/trace-record.c        |  85 +++-
>  tracecmd/trace-timesync.c      | 825 +++++++++++++++++++++++++++++++++
>  7 files changed, 1150 insertions(+), 36 deletions(-)
>  create mode 100644 tracecmd/trace-timesync.c
>
> diff --git a/include/trace-cmd/trace-cmd.h b/include/trace-cmd/trace-cmd.h
> index 5552396..905f3ec 100644
> --- a/include/trace-cmd/trace-cmd.h
> +++ b/include/trace-cmd/trace-cmd.h
> @@ -341,16 +341,29 @@ bool tracecmd_msg_done(struct tracecmd_msg_handle *msg_handle);
>  void tracecmd_msg_set_done(struct tracecmd_msg_handle *msg_handle);
>
>  int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle,
> -                               int argc, char **argv, bool use_fifos);
> +                               int argc, char **argv, bool use_fifos,
> +                               bool do_tsync);
>  int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle,
> -                               int *argc, char ***argv, bool *use_fifos);
> +                               int *argc, char ***argv, bool *use_fifos,
> +                               bool *do_tsync);
>
>  int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle,
>                                  int nr_cpus, int page_size,
> -                                unsigned int *ports, bool use_fifos);
> +                                unsigned int *ports, bool use_fifos,
> +                                bool do_tsync);
>  int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle,
>                                  int *nr_cpus, int *page_size,
> -                                unsigned int **ports, bool *use_fifos);
> +                                unsigned int **ports, bool *use_fifos,
> +                                bool *do_tsync);
> +
> +struct tracecmd_clock_sync;
> +
> +int tracecmd_msg_rcv_time_sync(struct tracecmd_msg_handle *msg_handle,
> +                              struct tracecmd_clock_sync *clock_sync,
> +                              long long *offset, long long *timestamp);
> +int tracecmd_msg_snd_time_sync(struct tracecmd_msg_handle *msg_handle,
> +                              struct tracecmd_clock_sync *clock_sync,
> +                              long long *offset, long long *timestamp);
>
>  /* --- Plugin handling --- */
>  extern struct tep_plugin_option trace_ftrace_options[];
> diff --git a/tracecmd/Makefile b/tracecmd/Makefile
> index d3e3080..8a73bf7 100644
> --- a/tracecmd/Makefile
> +++ b/tracecmd/Makefile
> @@ -32,6 +32,7 @@ TRACE_CMD_OBJS += trace-list.o
>  TRACE_CMD_OBJS += trace-output.o
>  TRACE_CMD_OBJS += trace-usage.o
>  TRACE_CMD_OBJS += trace-msg.o
> +TRACE_CMD_OBJS += trace-timesync.o

Shouldn't this be on only if $VSOCK_DEFINED, same as trace-agent.o below?

>
>  ifeq ($(VSOCK_DEFINED), 1)
>  TRACE_CMD_OBJS += trace-agent.o
> diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
> index 62f5e47..7688b02 100644
> --- a/tracecmd/include/trace-local.h
> +++ b/tracecmd/include/trace-local.h
> @@ -26,6 +26,7 @@ extern int quiet;
>  typedef unsigned long long u64;
>
>  struct buffer_instance;
> +struct tracecmd_clock_sync;
>
>  /* for local shared information with trace-cmd executable */
>
> @@ -101,7 +102,7 @@ void trace_usage(int argc, char **argv);
>
>  int trace_record_agent(struct tracecmd_msg_handle *msg_handle,
>                        int cpus, int *fds,
> -                      int argc, char **argv, bool use_fifos);
> +                      int argc, char **argv, bool use_fifos, bool do_tsync);
>
>  struct hook_list;
>
> @@ -214,6 +215,12 @@ struct buffer_instance {
>         int                     port;
>         int                     *fds;
>         bool                    use_fifos;
> +       bool                    do_tsync;
> +
> +       struct tracecmd_clock_sync *clock_sync;
> +       int                     time_sync_count;
> +       long long               *time_sync_ts;
> +       long long               *time_sync_offsets;
>  };
>
>  extern struct buffer_instance top_instance;
> @@ -235,6 +242,30 @@ void update_first_instance(struct buffer_instance *instance, int topt);
>  void show_instance_file(struct buffer_instance *instance, const char *name);
>
>  int count_cpus(void);
> +
> +struct tracecmd_time_sync_event {
> +       int                     id;
> +       int                     cpu;
> +       int                     pid;
> +       unsigned long long      ts;
> +};
> +
> +int tracecmd_clock_get_peer(struct tracecmd_clock_sync *clock_context,
> +                           unsigned int *remote_cid, unsigned int *remote_port);
> +bool tracecmd_time_sync_check(void);
> +void tracecmd_clock_context_free(struct buffer_instance *instance);
> +int tracecmd_clock_find_event(struct tracecmd_clock_sync *clock, int cpu,
> +                             struct tracecmd_time_sync_event *event);
> +void tracecmd_clock_synch_enable(struct tracecmd_clock_sync *clock_context);
> +void tracecmd_clock_synch_disable(struct tracecmd_clock_sync *clock_context);
> +void tracecmd_clock_synch_calc_reset(struct tracecmd_clock_sync *clock_context);
> +void tracecmd_clock_synch_calc_probe(struct tracecmd_clock_sync *clock_context,
> +                                    long long ts_local, long long ts_remote);
> +int tracecmd_clock_synch_calc(struct tracecmd_clock_sync *clock_context,
> +                              long long *offset_ret, long long *time_ret);
> +void sync_time_with_host_v3(struct buffer_instance *instance);
> +void sync_time_with_guest_v3(struct buffer_instance *instance);
> +
>  void write_tracing_on(struct buffer_instance *instance, int on);
>  char *get_instance_dir(struct buffer_instance *instance);
>  int write_instance_file(struct buffer_instance *instance,
> diff --git a/tracecmd/trace-agent.c b/tracecmd/trace-agent.c
> index 7389f72..2189bc1 100644
> --- a/tracecmd/trace-agent.c
> +++ b/tracecmd/trace-agent.c
> @@ -116,6 +116,7 @@ static void agent_handle(int sd, int nr_cpus, int page_size)
>         char **argv = NULL;
>         int argc = 0;
>         bool use_fifos;
> +       bool do_tsync;
>         int *fds;
>         int ret;
>
> @@ -128,7 +129,8 @@ static void agent_handle(int sd, int nr_cpus, int page_size)
>         if (!msg_handle)
>                 die("Failed to allocate message handle");
>
> -       ret = tracecmd_msg_recv_trace_req(msg_handle, &argc, &argv, &use_fifos);
> +       ret = tracecmd_msg_recv_trace_req(msg_handle, &argc, &argv,
> +                                         &use_fifos, &do_tsync);
>         if (ret < 0)
>                 die("Failed to receive trace request");
>
> @@ -137,13 +139,18 @@ static void agent_handle(int sd, int nr_cpus, int page_size)
>
>         if (!use_fifos)
>                 make_vsocks(nr_cpus, fds, ports);
> +       if (do_tsync) {
> +               do_tsync = tracecmd_time_sync_check();
> +               if (!do_tsync)
> +                       warning("Failed to negotiate timestamps synchronization with the host");
> +       }
>
>         ret = tracecmd_msg_send_trace_resp(msg_handle, nr_cpus, page_size,
> -                                          ports, use_fifos);
> +                                          ports, use_fifos, do_tsync);
>         if (ret < 0)
>                 die("Failed to send trace response");
>
> -       trace_record_agent(msg_handle, nr_cpus, fds, argc, argv, use_fifos);
> +       trace_record_agent(msg_handle, nr_cpus, fds, argc, argv, use_fifos, do_tsync);
>
>         free(argv[0]);
>         free(argv);
> diff --git a/tracecmd/trace-msg.c b/tracecmd/trace-msg.c
> index 065a01e..bb3c060 100644
> --- a/tracecmd/trace-msg.c
> +++ b/tracecmd/trace-msg.c
> @@ -26,8 +26,12 @@
>  #include "trace-local.h"
>  #include "trace-msg.h"
>
> +typedef __u16 u16;
> +typedef __s16 s16;
>  typedef __u32 u32;
>  typedef __be32 be32;
> +typedef __u64 u64;
> +typedef __s64 s64;
>
>  static inline void dprint(const char *fmt, ...)
>  {
> @@ -50,6 +54,9 @@ static inline void dprint(const char *fmt, ...)
>
>  unsigned int page_size;
>
> +/* Try a few times to get an accurate time sync */
> +#define TSYNC_TRIES 300
> +
>  struct tracecmd_msg_opt {
>         be32 size;
>         be32 opt_cmd;
> @@ -76,6 +83,20 @@ struct tracecmd_msg_trace_resp {
>         be32 page_size;
>  } __attribute__((packed));
>
> +struct tracecmd_msg_tsync_stop {
> +       long long offset;
> +       long long timestamp;
> +} __attribute__((packed));
> +
> +struct tracecmd_msg_tsync_req {
> +       u16 cpu;
> +} __attribute__((packed));
> +
> +struct tracecmd_msg_tsync_resp {
> +       u64 time;
> +} __attribute__((packed));
> +
> +
>  struct tracecmd_msg_header {
>         be32    size;
>         be32    cmd;
> @@ -83,14 +104,19 @@ struct tracecmd_msg_header {
>  } __attribute__((packed));
>
>  #define MSG_MAP                                                                \
> -       C(CLOSE,        0,      0),                                     \
> -       C(TINIT,        1,      sizeof(struct tracecmd_msg_tinit)),     \
> -       C(RINIT,        2,      sizeof(struct tracecmd_msg_rinit)),     \
> -       C(SEND_DATA,    3,      0),                                     \
> -       C(FIN_DATA,     4,      0),                                     \
> -       C(NOT_SUPP,     5,      0),                                     \
> -       C(TRACE_REQ,    6,      sizeof(struct tracecmd_msg_trace_req)), \
> -       C(TRACE_RESP,   7,      sizeof(struct tracecmd_msg_trace_resp)),
> +       C(CLOSE,          0,    0),                                     \
> +       C(TINIT,          1,    sizeof(struct tracecmd_msg_tinit)),     \
> +       C(RINIT,          2,    sizeof(struct tracecmd_msg_rinit)),     \
> +       C(SEND_DATA,      3,    0),                                     \
> +       C(FIN_DATA,       4,    0),                                     \
> +       C(NOT_SUPP,       5,    0),                                     \
> +       C(TRACE_REQ,      6,    sizeof(struct tracecmd_msg_trace_req)), \
> +       C(TRACE_RESP,     7,    sizeof(struct tracecmd_msg_trace_resp)),\
> +       C(TSYNC_START,    8,    0),                                     \
> +       C(TSYNC_STOP,     9,    sizeof(struct tracecmd_msg_tsync_stop)),\
> +       C(TSYNC_PROBE,    11,   0),                                     \
> +       C(TSYNC_REQ,      11,   sizeof(struct tracecmd_msg_tsync_req)), \
> +       C(TSYNC_RESP,     12,   sizeof(struct tracecmd_msg_tsync_resp)),
>
>  #undef C
>  #define C(a,b,c)       MSG_##a = b
> @@ -120,10 +146,13 @@ static const char *cmd_to_name(int cmd)
>  struct tracecmd_msg {
>         struct tracecmd_msg_header              hdr;
>         union {
> -               struct tracecmd_msg_tinit       tinit;
> -               struct tracecmd_msg_rinit       rinit;
> -               struct tracecmd_msg_trace_req   trace_req;
> -               struct tracecmd_msg_trace_resp  trace_resp;
> +               struct tracecmd_msg_tinit               tinit;
> +               struct tracecmd_msg_rinit               rinit;
> +               struct tracecmd_msg_trace_req           trace_req;
> +               struct tracecmd_msg_trace_resp          trace_resp;
> +               struct tracecmd_msg_tsync_stop          ts_stop;
> +               struct tracecmd_msg_tsync_req           ts_req;
> +               struct tracecmd_msg_tsync_resp          ts_resp;
>         };
>         union {
>                 struct tracecmd_msg_opt         *opt;
> @@ -161,6 +190,7 @@ static int msg_write(int fd, struct tracecmd_msg *msg)
>
>  enum msg_trace_flags {
>         MSG_TRACE_USE_FIFOS = 1 << 0,
> +       MSG_TRACE_DO_TSYNC =  1 << 1,
>  };
>
>  enum msg_opt_command {
> @@ -744,7 +774,8 @@ error:
>         return ret;
>  }
>
> -static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool use_fifos)
> +static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv,
> +                         bool use_fifos, bool do_tsync)
>  {
>         size_t args_size = 0;
>         char *p;
> @@ -754,7 +785,12 @@ static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool
>                 args_size += strlen(argv[i]) + 1;
>
>         msg->hdr.size = htonl(ntohl(msg->hdr.size) + args_size);
> -       msg->trace_req.flags = use_fifos ? htonl(MSG_TRACE_USE_FIFOS) : htonl(0);
> +       msg->trace_req.flags = 0;
> +       if (use_fifos)
> +               msg->trace_req.flags |= MSG_TRACE_USE_FIFOS;
> +       if (do_tsync)
> +               msg->trace_req.flags |= MSG_TRACE_DO_TSYNC;
> +       msg->trace_req.flags = htonl(msg->trace_req.flags);
>         msg->trace_req.argc = htonl(argc);
>         msg->buf = calloc(args_size, 1);
>         if (!msg->buf)
> @@ -768,13 +804,14 @@ static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool
>  }
>
>  int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle,
> -                               int argc, char **argv, bool use_fifos)
> +                               int argc, char **argv, bool use_fifos,
> +                               bool do_tsync)
>  {
>         struct tracecmd_msg msg;
>         int ret;
>
>         tracecmd_msg_init(MSG_TRACE_REQ, &msg);
> -       ret = make_trace_req(&msg, argc, argv, use_fifos);
> +       ret = make_trace_req(&msg, argc, argv, use_fifos, do_tsync);
>         if (ret < 0)
>                 return ret;
>
> @@ -787,7 +824,8 @@ int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle,
>    *     free(argv);
>    */
>  int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle,
> -                               int *argc, char ***argv, bool *use_fifos)
> +                               int *argc, char ***argv, bool *use_fifos,
> +                               bool *do_tsync)
>  {
>         struct tracecmd_msg msg;
>         char *p, *buf_end, **args;
> @@ -840,6 +878,7 @@ int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle,
>         *argc = nr_args;
>         *argv = args;
>         *use_fifos = ntohl(msg.trace_req.flags) & MSG_TRACE_USE_FIFOS;
> +       *do_tsync = ntohl(msg.trace_req.flags) & MSG_TRACE_DO_TSYNC;
>
>         /*
>          * On success we're passing msg.buf to the caller through argv[0] so we
> @@ -859,14 +898,136 @@ out:
>         return ret;
>  }
>
> +int tracecmd_msg_rcv_time_sync(struct tracecmd_msg_handle *msg_handle,
> +                              struct tracecmd_clock_sync *clock_context,
> +                              long long *offset, long long *timestamp)
> +{
> +       struct tracecmd_time_sync_event event;
> +       unsigned int remote_cid = 0;
> +       struct tracecmd_msg msg;
> +       int cpu_pid, ret;
> +
> +       if (clock_context == NULL || msg_handle == NULL)
> +               return 0;
> +
> +       if (offset)
> +               *offset = 0;
> +
> +       ret = tracecmd_msg_recv(msg_handle->fd, &msg);
> +       if (ret < 0 || ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP)
> +               return 0;
> +       if (ntohl(msg.hdr.cmd) != MSG_TSYNC_START) {
> +               handle_unexpected_msg(msg_handle, &msg);
> +               return 0;
> +       }
> +
> +       tracecmd_clock_get_peer(clock_context, &remote_cid, NULL);
> +       tracecmd_msg_init(MSG_TSYNC_START, &msg);
> +       tracecmd_msg_send(msg_handle->fd, &msg);
> +       tracecmd_clock_synch_enable(clock_context);
> +
> +       do {
> +               memset(&event, 0, sizeof(event));
> +               ret = tracecmd_msg_recv(msg_handle->fd, &msg);
> +               if (ret < 0 || ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP)
> +                       break;
> +               if (ntohl(msg.hdr.cmd) != MSG_TSYNC_PROBE) {
> +                       handle_unexpected_msg(msg_handle, &msg);
> +                       break;
> +               }
> +               ret = tracecmd_msg_recv(msg_handle->fd, &msg);
> +               if (ret < 0 || ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP)
> +                       break;
> +               if (ntohl(msg.hdr.cmd) != MSG_TSYNC_REQ) {
> +                       handle_unexpected_msg(msg_handle, &msg);
> +                       break;
> +               }
> +               /* Get kvm event related to the corresponding VCPU context */
> +               cpu_pid = get_guest_vcpu_pid(remote_cid, ntohs(msg.ts_req.cpu));
> +               tracecmd_clock_find_event(clock_context, cpu_pid, &event);
> +               tracecmd_msg_init(MSG_TSYNC_RESP, &msg);
> +               msg.ts_resp.time = htonll(event.ts);
> +               tracecmd_msg_send(msg_handle->fd, &msg);
> +       } while (true);
> +
> +       tracecmd_clock_synch_disable(clock_context);
> +
> +       if (ret >= 0 && ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP) {
> +               if (offset)
> +                       *offset = ntohll(msg.ts_stop.offset);
> +               if (timestamp)
> +                       *timestamp = ntohll(msg.ts_stop.timestamp);
> +       }
> +
> +       msg_free(&msg);
> +       return 0;
> +}
> +
> +int tracecmd_msg_snd_time_sync(struct tracecmd_msg_handle *msg_handle,
> +                              struct tracecmd_clock_sync *clock_context,
> +                              long long *offset, long long *timestamp)
> +{
> +       struct tracecmd_time_sync_event event;
> +       int sync_loop = TSYNC_TRIES;
> +       struct tracecmd_msg msg;
> +       int ret;
> +
> +       if (clock_context == NULL || msg_handle == NULL)
> +               return 0;
> +
> +       tracecmd_msg_init(MSG_TSYNC_START, &msg);
> +       tracecmd_msg_send(msg_handle->fd, &msg);
> +       ret = tracecmd_msg_recv(msg_handle->fd, &msg);
> +       if (ret < 0 || ntohl(msg.hdr.cmd) != MSG_TSYNC_START)
> +               return 0;
> +       tracecmd_clock_synch_calc_reset(clock_context);
> +       tracecmd_clock_synch_enable(clock_context);
> +
> +       do {
> +               tracecmd_msg_init(MSG_TSYNC_PROBE, &msg);
> +               tracecmd_msg_send(msg_handle->fd, &msg);
> +               /* Get the ts and CPU of the sent event */
> +               ret = tracecmd_clock_find_event(clock_context, -1, &event);
> +               tracecmd_msg_init(MSG_TSYNC_REQ, &msg);
> +               msg.ts_req.cpu = htons(event.cpu);
> +               tracecmd_msg_send(msg_handle->fd, &msg);
> +               memset(&msg, 0, sizeof(msg));
> +               ret = tracecmd_msg_recv(msg_handle->fd, &msg);
> +               if (ret < 0)
> +                       break;
> +               if (ntohl(msg.hdr.cmd) != MSG_TSYNC_RESP) {
> +                       handle_unexpected_msg(msg_handle, &msg);
> +                       break;
> +               }
> +               tracecmd_clock_synch_calc_probe(clock_context,
> +                                               event.ts,
> +                                               htonll(msg.ts_resp.time));
> +       } while (--sync_loop);
> +
> +       tracecmd_clock_synch_disable(clock_context);
> +       tracecmd_clock_synch_calc(clock_context, offset, timestamp);
> +       tracecmd_msg_init(MSG_TSYNC_STOP, &msg);
> +       msg.ts_stop.offset = htonll(*offset);
> +       msg.ts_stop.timestamp = htonll(*timestamp);
> +       tracecmd_msg_send(msg_handle->fd, &msg);
> +
> +       msg_free(&msg);
> +       return 0;
> +}
> +
>  static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus,
> -                          unsigned int *ports, bool use_fifos)
> +                          unsigned int *ports, bool use_fifos, bool do_tsync)
>  {
>         int ports_size = nr_cpus * sizeof(*msg->port_array);
>         int i;
>
>         msg->hdr.size = htonl(ntohl(msg->hdr.size) + ports_size);
> -       msg->trace_resp.flags = use_fifos ? htonl(MSG_TRACE_USE_FIFOS) : htonl(0);
> +       msg->trace_resp.flags = 0;
> +       if (use_fifos)
> +               msg->trace_resp.flags |= MSG_TRACE_USE_FIFOS;
> +       if (do_tsync)
> +               msg->trace_resp.flags |= MSG_TRACE_DO_TSYNC;
> +       msg->trace_resp.flags = htonl(msg->trace_resp.flags);
>         msg->trace_resp.cpus = htonl(nr_cpus);
>         msg->trace_resp.page_size = htonl(page_size);
>
> @@ -882,13 +1043,14 @@ static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus,
>
>  int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle,
>                                  int nr_cpus, int page_size,
> -                                unsigned int *ports, bool use_fifos)
> +                                unsigned int *ports, bool use_fifos,
> +                                bool do_tsync)
>  {
>         struct tracecmd_msg msg;
>         int ret;
>
>         tracecmd_msg_init(MSG_TRACE_RESP, &msg);
> -       ret = make_trace_resp(&msg, page_size, nr_cpus, ports, use_fifos);
> +       ret = make_trace_resp(&msg, page_size, nr_cpus, ports, use_fifos, do_tsync);
>         if (ret < 0)
>                 return ret;
>
> @@ -897,7 +1059,8 @@ int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle,
>
>  int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle,
>                                  int *nr_cpus, int *page_size,
> -                                unsigned int **ports, bool *use_fifos)
> +                                unsigned int **ports, bool *use_fifos,
> +                                bool *do_tsync)
>  {
>         struct tracecmd_msg msg;
>         ssize_t buf_len;
> @@ -920,6 +1083,7 @@ int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle,
>         }
>
>         *use_fifos = ntohl(msg.trace_resp.flags) & MSG_TRACE_USE_FIFOS;
> +       *do_tsync = ntohl(msg.trace_resp.flags) & MSG_TRACE_DO_TSYNC;
>         *nr_cpus = ntohl(msg.trace_resp.cpus);
>         *page_size = ntohl(msg.trace_resp.page_size);
>         *ports = calloc(*nr_cpus, sizeof(**ports));
> diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
> index 8c8e46f..0ae42b0 100644
> --- a/tracecmd/trace-record.c
> +++ b/tracecmd/trace-record.c
> @@ -210,6 +210,8 @@ struct common_record_context {
>         char *date2ts;
>         char *max_graph_depth;
>         int data_flags;
> +       int time_shift_count;
> +       struct tracecmd_option *time_shift_option;
>
>         int record_all;
>         int total_disable;
> @@ -649,11 +651,20 @@ static void tell_guests_to_stop(void)
>         for_all_instances(instance) {
>                 if (is_guest(instance)) {
>                         tracecmd_msg_send_close_msg(instance->msg_handle);
> -                       tracecmd_msg_handle_close(instance->msg_handle);
>                 }
>         }
>  }
>
> +static void close_guests_handle(void)
> +{
> +       struct buffer_instance *instance;
> +
> +       for_all_instances(instance) {
> +               if (is_guest(instance))
> +                       tracecmd_msg_handle_close(instance->msg_handle);
> +       }
> +}
> +
>  static void stop_threads(enum trace_type type)
>  {
>         struct timeval tv = { 0, 0 };
> @@ -3359,6 +3370,7 @@ static void connect_to_agent(struct buffer_instance *instance)
>         unsigned int *ports;
>         int i, *fds = NULL;
>         bool use_fifos = false;
> +       bool do_tsync, do_tsync_reply;
>
>         if (!no_fifos) {
>                 nr_fifos = open_guest_fifos(instance->name, &fds);
> @@ -3370,20 +3382,24 @@ static void connect_to_agent(struct buffer_instance *instance)
>                 die("Failed to connect to vsocket @%d:%d",
>                     instance->cid, instance->port);
>
> +       do_tsync = tracecmd_time_sync_check();
> +
>         msg_handle = tracecmd_msg_handle_alloc(sd, 0);
>         if (!msg_handle)
>                 die("Failed to allocate message handle");
>
>         ret = tracecmd_msg_send_trace_req(msg_handle, instance->argc,
> -                                         instance->argv, use_fifos);
> +                                         instance->argv, use_fifos, do_tsync);
>         if (ret < 0)
>                 die("Failed to send trace request");
>
>         ret = tracecmd_msg_recv_trace_resp(msg_handle, &nr_cpus, &page_size,
> -                                          &ports, &use_fifos);
> +                                          &ports, &use_fifos, &do_tsync_reply);
>         if (ret < 0)
>                 die("Failed to receive trace response");
> -
> +       if (do_tsync != do_tsync_reply)
> +               warning("Failed to negotiate timestamps synchronization with the guest %s",
> +                       instance->name);
>         if (use_fifos) {
>                 if (nr_cpus != nr_fifos) {
>                         warning("number of FIFOs (%d) for guest %s differs "
> @@ -3401,10 +3417,13 @@ static void connect_to_agent(struct buffer_instance *instance)
>         }
>
>         instance->use_fifos = use_fifos;
> +       instance->do_tsync = do_tsync_reply;
>         instance->cpu_count = nr_cpus;
>
>         /* the msg_handle now points to the guest fd */
>         instance->msg_handle = msg_handle;
> +
> +       sync_time_with_guest_v3(instance);
>  }
>
>  static void setup_guest(struct buffer_instance *instance)
> @@ -3429,10 +3448,13 @@ static void setup_guest(struct buffer_instance *instance)
>         close(fd);
>  }
>
> -static void setup_agent(struct buffer_instance *instance, struct common_record_context *ctx)
> +static void setup_agent(struct buffer_instance *instance,
> +                       struct common_record_context *ctx)
>  {
>         struct tracecmd_output *network_handle;
>
> +       sync_time_with_host_v3(instance);
> +
>         network_handle = tracecmd_create_init_fd_msg(instance->msg_handle,
>                                                      listed_events);
>         add_options(network_handle, ctx);
> @@ -5607,6 +5629,42 @@ static bool has_local_instances(void)
>         return false;
>  }
>
> +//#define TSYNC_DEBUG
> +static void write_guest_time_shift(struct buffer_instance *instance)
> +{
> +       struct tracecmd_output *handle;
> +       struct iovec vector[3];
> +       char *file;
> +       int fd;
> +
> +       if (!instance->time_sync_count)
> +               return;
> +
> +       file = get_guest_file(output_file, instance->name);
> +       fd = open(file, O_RDWR);
> +       if (fd < 0)
> +               die("error opening %s", file);
> +       put_temp_file(file);
> +       handle = tracecmd_get_output_handle_fd(fd);
> +       vector[0].iov_len = sizeof(instance->time_sync_count);
> +       vector[0].iov_base = &instance->time_sync_count;
> +       vector[1].iov_len = sizeof(long long) * instance->time_sync_count;
> +       vector[1].iov_base = instance->time_sync_ts;
> +       vector[2].iov_len = sizeof(long long) * instance->time_sync_count;
> +       vector[2].iov_base = instance->time_sync_offsets;
> +       tracecmd_add_option_v(handle, TRACECMD_OPTION_TIME_SHIFT, vector, 3);
> +       tracecmd_append_options(handle);
> +       tracecmd_output_close(handle);
> +#ifdef TSYNC_DEBUG
> +       if (instance->time_sync_count > 1)
> +               printf("\n\rDetected %lld ns ts offset drift in %lld ns for guest %s\n\r",
> +                       instance->time_sync_offsets[instance->time_sync_count-1] -
> +                       instance->time_sync_offsets[0],
> +                       instance->time_sync_ts[instance->time_sync_count-1]-
> +                       instance->time_sync_ts[0], instance->name);
> +#endif
> +}
> +
>  /*
>   * This function contains common code for the following commands:
>   * record, start, stream, profile.
> @@ -5726,6 +5784,20 @@ static void record_trace(int argc, char **argv,
>         if (!latency)
>                 wait_threads();
>
> +       if (ctx->instance && is_agent(ctx->instance)) {
> +               sync_time_with_host_v3(ctx->instance);
> +               tracecmd_clock_context_free(ctx->instance);
> +       } else {
> +               for_all_instances(instance) {
> +                       if (is_guest(instance)) {
> +                               sync_time_with_guest_v3(instance);
> +                               write_guest_time_shift(instance);
> +                               tracecmd_clock_context_free(instance);
> +                       }
> +               }
> +       }
> +       close_guests_handle();
> +
>         if (IS_RECORD(ctx)) {
>                 record_data(ctx);
>                 delete_thread_data();
> @@ -5864,7 +5936,7 @@ void trace_record(int argc, char **argv)
>  int trace_record_agent(struct tracecmd_msg_handle *msg_handle,
>                        int cpus, int *fds,
>                        int argc, char **argv,
> -                      bool use_fifos)
> +                      bool use_fifos, bool do_tsync)
>  {
>         struct common_record_context ctx;
>         char **argv_plus;
> @@ -5891,6 +5963,7 @@ int trace_record_agent(struct tracecmd_msg_handle *msg_handle,
>
>         ctx.instance->fds = fds;
>         ctx.instance->use_fifos = use_fifos;
> +       ctx.instance->do_tsync = do_tsync;
>         ctx.instance->flags |= BUFFER_FL_AGENT;
>         ctx.instance->msg_handle = msg_handle;
>         msg_handle->version = V3_PROTOCOL;
> diff --git a/tracecmd/trace-timesync.c b/tracecmd/trace-timesync.c
> new file mode 100644
> index 0000000..c9b6241
> --- /dev/null
> +++ b/tracecmd/trace-timesync.c
> @@ -0,0 +1,825 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2019, VMware, Tzvetomir Stoyanov <tstoyanov@vmware.com>
> + *
> + */
> +
> +#include <fcntl.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <arpa/inet.h>
> +#include <linux/vm_sockets.h>
> +#include "trace-local.h"
> +
> +//#define TSYNC_DEBUG
> +
> +static int clock_sync_x86_host_init(struct tracecmd_clock_sync *clock_context);
> +static int clock_sync_x86_host_free(struct tracecmd_clock_sync *clock_context);
> +static int clock_sync_x86_host_find_events(struct tracecmd_clock_sync *clock,
> +                                   int cpu, struct tracecmd_time_sync_event *event);
> +static int clock_sync_x86_guest_init(struct tracecmd_clock_sync *clock_context);
> +static int clock_sync_x86_guest_free(struct tracecmd_clock_sync *clock_context);
> +static int clock_sync_x86_guest_find_events(struct tracecmd_clock_sync *clock,
> +                                           int pid,
> +                                           struct tracecmd_time_sync_event *event);
> +
> +struct tracecmd_event_descr {
> +       char                    *system;
> +       char                    *name;
> +};
> +
> +struct tracecmd_ftrace_param {
> +       char    *file;
> +       char    *set;
> +       char    *reset;
> +};
> +
> +enum clock_sync_context {
> +       CLOCK_KVM_X86_VSOCK_HOST,
> +       CLOCK_KVM_X86_VSOCK_GUEST,
> +       CLOCK_CONTEXT_MAX,
> +};
> +
> +struct tracecmd_clock_sync {
> +       enum clock_sync_context         clock_context_id;
> +       struct tracecmd_ftrace_param    *ftrace_params;
> +       struct tracecmd_time_sync_event *events;
> +       int                             events_count;
> +       struct tep_handle               *tep;
> +       struct buffer_instance          *vinst;
> +
> +       int                             probes_count;
> +       int                             bad_probes;
> +       int                             probes_size;
> +       long long                       *times;
> +       long long                       *offsets;
> +       long long                       offset_av;
> +       long long                       offset_min;
> +       long long                       offset_max;
> +       int                             debug_fd;
> +
> +       unsigned int                    local_cid;
> +       unsigned int                    local_port;
> +       unsigned int                    remote_cid;
> +       unsigned int                    remote_port;
> +};
> +
> +struct {
> +       const char *systems[3];
> +       struct tracecmd_ftrace_param ftrace_params[5];
> +       struct tracecmd_event_descr events[3];
> +       int (*clock_sync_init)(struct tracecmd_clock_sync *clock_context);
> +       int (*clock_sync_free)(struct tracecmd_clock_sync *clock_context);
> +       int (*clock_sync_find_events)(struct tracecmd_clock_sync *clock_context,
> +                                     int pid,
> +                                     struct tracecmd_time_sync_event *event);
> +       int (*clock_sync_load)(struct tracecmd_clock_sync *clock_context);
> +       int (*clock_sync_unload)(struct tracecmd_clock_sync *clock_context);
> +} static clock_sync[CLOCK_CONTEXT_MAX] = {
> +       { /* CLOCK_KVM_X86_VSOCK_HOST */
> +         .systems = {"vsock", "ftrace", NULL},
> +         .ftrace_params = {
> +         {"set_ftrace_filter", "vmx_read_l1_tsc_offset\nsvm_read_l1_tsc_offset", "\0"},
> +         {"current_tracer", "function", "nop"},
> +         {"events/vsock/virtio_transport_recv_pkt/enable", "1", "0"},
> +         {"events/vsock/virtio_transport_recv_pkt/filter", NULL, "\0"},
> +         {NULL, NULL, NULL} },
> +         .events = {
> +         {.system = "ftrace", .name = "function"},
> +         {.system = "vsock", .name = "virtio_transport_recv_pkt"},
> +         {.system = NULL, .name = NULL} },
> +        clock_sync_x86_host_init,
> +        clock_sync_x86_host_free,
> +        clock_sync_x86_host_find_events,
> +       },
> +
> +       { /* CLOCK_KVM_X86_VSOCK_GUEST */
> +        .systems = { "vsock", "ftrace", NULL},
> +        .ftrace_params = {
> +         {"set_ftrace_filter", "vp_notify", "\0"},
> +         {"current_tracer", "function", "nop"},
> +         {"events/vsock/virtio_transport_alloc_pkt/enable", "1", "0"},
> +         {"events/vsock/virtio_transport_alloc_pkt/filter", NULL, "\0"},
> +         {NULL, NULL, NULL},
> +         },
> +         .events = {
> +         {.system = "vsock", .name = "virtio_transport_alloc_pkt"},
> +         {.system = "ftrace", .name = "function"},
> +         {.system = NULL, .name = NULL}
> +        },
> +         clock_sync_x86_guest_init,
> +         clock_sync_x86_guest_free,
> +         clock_sync_x86_guest_find_events,
> +       }
> +};
> +
> +static int clock_sync_x86_host_init(struct tracecmd_clock_sync *clock_context)
> +{
> +       char vsock_filter[255];
> +
> +       snprintf(vsock_filter, 255,
> +               "src_cid==%u && src_port==%u && dst_cid==%u && dst_port==%u && len!=0",
> +               clock_context->remote_cid, clock_context->remote_port,
> +               clock_context->local_cid, clock_context->local_port);
> +
> +       clock_context->ftrace_params[3].set = strdup(vsock_filter);
> +       return 1;
> +}
> +
> +static int clock_sync_x86_host_free(struct tracecmd_clock_sync *clock_context)
> +{
> +       free(clock_context->ftrace_params[3].set);
> +       clock_context->ftrace_params[3].set = NULL;
> +       return 1;
> +}
> +
> +static int clock_sync_x86_guest_init(struct tracecmd_clock_sync *clock_context)
> +{
> +       char vsock_filter[255];
> +
> +       snprintf(vsock_filter, 255,
> +               "src_cid==%u && src_port==%u && dst_cid==%u && dst_port==%u && len!=0",
> +               clock_context->local_cid, clock_context->local_port,
> +               clock_context->remote_cid, clock_context->remote_port);
> +
> +       clock_context->ftrace_params[3].set = strdup(vsock_filter);
> +       return 1;
> +}
> +
> +static int clock_sync_x86_guest_free(struct tracecmd_clock_sync *clock_context)
> +{
> +       free(clock_context->ftrace_params[3].set);
> +       clock_context->ftrace_params[3].set = NULL;
> +       return 1;
> +}
> +
> +static int
> +get_events_in_page(struct tep_handle *tep, void *page,
> +                   int size, int cpu, struct tracecmd_time_sync_event **events,
> +                   int *events_count, int *events_size)
> +{
> +       struct tracecmd_time_sync_event *events_array = NULL;
> +       struct tep_record *last_record = NULL;
> +       struct tep_event *event = NULL;
> +       struct tep_record *record;
> +       int id, cnt = 0;
> +
> +       if (size <= 0)
> +               return 0;
> +
> +       if (*events == NULL) {
> +               *events = malloc(10*sizeof(struct tracecmd_time_sync_event));
> +               *events_size = 10;
> +               *events_count = 0;
> +       }
> +
> +       while (true) {
> +               event = NULL;
> +               record = tracecmd_read_page_record(tep, page, size,
> +                                                  last_record);
> +               if (!record)
> +                       break;
> +               free_record(last_record);
> +               id = tep_data_type(tep, record);
> +               event = tep_data_event_from_type(tep, id);
> +               if (event) {
> +                       if (*events_count >= *events_size) {
> +                               events_array = realloc(*events,
> +                                       ((3*(*events_size))/2)*
> +                                       sizeof(struct tracecmd_time_sync_event));
> +                               if (events_array) {
> +                                       *events = events_array;
> +                                       (*events_size) = (((*events_size)*3)/2);
> +                               }
> +                       }
> +
> +                       if (*events_count < *events_size) {
> +                               (*events)[*events_count].ts = record->ts;
> +                               (*events)[*events_count].cpu = cpu;
> +                               (*events)[*events_count].id = id;
> +                               (*events)[*events_count].pid = tep_data_pid(tep, record);
> +                               (*events_count)++;
> +                               cnt++;
> +                       }
> +               }
> +               last_record = record;
> +       }
> +       free_record(last_record);
> +
> +       return cnt;
> +}
> +
> +static int sync_events_cmp(const void *a, const void *b)
> +{
> +       const struct tracecmd_time_sync_event *ea = (const struct tracecmd_time_sync_event *)a;
> +       const struct tracecmd_time_sync_event *eb = (const struct tracecmd_time_sync_event *)b;
> +
> +       if (ea->ts > eb->ts)
> +               return 1;
> +       if (ea->ts < eb->ts)
> +               return -1;
> +       return 0;
> +}
> +
> +static int find_sync_events(struct tep_handle *pevent,
> +                           struct tracecmd_time_sync_event *recorded,
> +                           int rsize, struct tracecmd_time_sync_event *events)
> +{
> +       int i = 0, j = 0;
> +
> +       while (i < rsize) {
> +               if (!events[j].ts && events[j].id == recorded[i].id &&
> +                   (events[j].pid < 0 || events[j].pid == recorded[i].pid)) {
> +                       events[j].cpu = recorded[i].cpu;
> +                       events[j].ts = recorded[i].ts;
> +                       j++;
> +               } else if (j > 0 && events[j-1].id == recorded[i].id &&
> +                         (events[j-1].pid < 0 || events[j-1].pid == recorded[i].pid)) {
> +                       events[j-1].cpu = recorded[i].cpu;
> +                       events[j-1].ts = recorded[i].ts;
> +               }
> +               i++;
> +       }
> +       return j;
> +}
> +
> +//#define TSYNC_RBUFFER_DEBUG
> +static int find_raw_events(struct tep_handle *tep,
> +                   struct buffer_instance *instance,
> +                   struct tracecmd_time_sync_event *events)
> +{
> +       struct tracecmd_time_sync_event *events_array = NULL;
> +       int events_count = 0;
> +       int events_size = 0;
> +       unsigned int p_size;
> +       int ts = 0;
> +       void *page;
> +       char *path;
> +       char *file;
> +       DIR *dir;

`dir` is unused besides getting opened and closed below, remove it?
Or is it supposed to check for the existence of such folder? If so,
maybe a stat() will suffice or rely on that opening files inside this
directory will fail too.

> +       int len;
> +       int fd;
> +       int i;
> +       int r;
> +
> +       p_size = getpagesize();
> +#ifdef TSYNC_RBUFFER_DEBUG
> +       file = get_instance_file(instance, "trace");
> +       if (!file)
> +               return ts;
> +       {
> +               char *buf = NULL;
> +               FILE *fp;
> +               size_t n;
> +               int r;
> +
> +               printf("Events:\n\r");
> +               fp = fopen(file, "r");
> +               while ((r = getline(&buf, &n, fp)) >= 0) {
> +
> +                       if (buf[0] != '#')
> +                               printf("%s", buf);
> +                       free(buf);
> +                       buf = NULL;
> +               }
> +               fclose(fp);
> +       }
> +       tracecmd_put_tracing_file(file);
> +#endif /* TSYNC_RBUFFER_DEBUG */
> +       path = get_instance_file(instance, "per_cpu");
> +       if (!path)
> +               return ts;
> +
> +       dir = opendir(path);
> +       if (!dir)
> +               goto out;
> +
> +       len = strlen(path);
> +       file = malloc(len + strlen("trace_pipe_raw") + 32);

How about having file defined as

    char file[PATH_MAX];

and avoid allocation and computing how much space we need for it?

> +       page = malloc(p_size);
> +       if (!file || !page)
> +               die("Failed to allocate time_stamp info");
> +       for (i = 0; i < instance->cpu_count; i++) {
> +               sprintf(file, "%s/cpu%d/trace_pipe_raw", path, i);
> +               fd = open(file, O_RDONLY | O_NONBLOCK);
> +               if (fd < 0)
> +                       continue;
> +               do {
> +                       r = read(fd, page, p_size);
> +                       if (r > 0) {
> +                               get_events_in_page(tep, page, r, i,
> +                                                  &events_array, &events_count,
> +                                                  &events_size);
> +                       }
> +               } while (r > 0);
> +               close(fd);
> +       }
> +       qsort(events_array, events_count, sizeof(*events_array), sync_events_cmp);
> +       r = find_sync_events(tep, events_array, events_count, events);
> +#ifdef TSYNC_RBUFFER_DEBUG
> +       len = 0;
> +       while (events[len].id) {
> +               printf("Found %d @ cpu %d: %lld pid %d\n\r",
> +                       events[len].id,  events[len].cpu,
> +                       events[len].ts, events[len].pid);
> +               len++;
> +       }
> +#endif
> +
> +       free(events_array);
> +       free(file);
> +       free(page);
> +       closedir(dir);
> +
> + out:
> +       tracecmd_put_tracing_file(path);
> +       return r;
> +}
> +
> +static int clock_sync_x86_host_find_events(struct tracecmd_clock_sync *clock,
> +                                          int pid,
> +                                          struct tracecmd_time_sync_event *event)
> +{
> +       int ret;
> +
> +       clock->events[0].pid = pid;
> +       ret = find_raw_events(clock->tep, clock->vinst, clock->events);
> +       event->ts = clock->events[0].ts;
> +       event->cpu = clock->events[0].cpu;
> +       return ret;
> +
> +}
> +
> +static int clock_sync_x86_guest_find_events(struct tracecmd_clock_sync *clock,
> +                                           int pid,
> +                                           struct tracecmd_time_sync_event *event)
> +{
> +       int ret;
> +
> +       ret = find_raw_events(clock->tep, clock->vinst, clock->events);
> +       if (ret != clock->events_count)
> +               return 0;
> +       event->ts = clock->events[1].ts;
> +       event->cpu = clock->events[0].cpu;
> +       return 1;
> +
> +}
> +
> +static void tracecmd_clock_sync_reset(struct tracecmd_clock_sync *clock_context)
> +{
> +       int i = 0;
> +
> +       while (clock_context->events[i].id) {
> +               clock_context->events[i].cpu = 0;
> +               clock_context->events[i].ts = 0;
> +               clock_context->events[i].pid = -1;
> +               i++;
> +       }
> +}
> +
> +int tracecmd_clock_find_event(struct tracecmd_clock_sync *clock, int pid,
> +                             struct tracecmd_time_sync_event *event)
> +{
> +       int ret = 0;
> +       int id;
> +
> +       if (clock == NULL ||
> +           clock->clock_context_id >= CLOCK_CONTEXT_MAX)
> +               return 0;
> +
> +       id = clock->clock_context_id;
> +       if (clock_sync[id].clock_sync_find_events)
> +               ret = clock_sync[id].clock_sync_find_events(clock, pid, event);
> +
> +       tracecmd_clock_sync_reset(clock);
> +       return ret;
> +}
> +
> +static void clock_context_copy(struct tracecmd_clock_sync *clock_context,
> +                              struct tracecmd_ftrace_param *params,
> +                              struct tracecmd_event_descr *events)
> +{
> +       int i;
> +
> +       i = 0;
> +       while (params[i].file)
> +               i++;
> +       clock_context->ftrace_params = calloc(i+1, sizeof(struct tracecmd_ftrace_param));
> +       i = 0;
> +       while (params[i].file) {
> +               clock_context->ftrace_params[i].file = strdup(params[i].file);
> +               if (params[i].set)
> +                       clock_context->ftrace_params[i].set = strdup(params[i].set);
> +               if (params[i].reset)
> +                       clock_context->ftrace_params[i].reset = strdup(params[i].reset);
> +               i++;
> +       }
> +
> +       i = 0;
> +       while (events[i].name)
> +               i++;
> +       clock_context->events = calloc(i+1, sizeof(struct tracecmd_time_sync_event));
> +       clock_context->events_count = i;
> +}
> +
> +void trace_instance_reset(struct buffer_instance *vinst)
> +{
> +       write_instance_file(vinst, "trace", "\0", NULL);
> +}
> +
> +static struct buffer_instance *
> +clock_synch_create_instance(const char *clock, unsigned int cid)
> +{
> +       struct buffer_instance *vinst;
> +       char inst_name[256];
> +
> +       snprintf(inst_name, 256, "clock_synch-%d", cid);
> +
> +       vinst = create_instance(strdup(inst_name));
> +       tracecmd_init_instance(vinst);
> +       vinst->cpu_count = tracecmd_local_cpu_count();
> +       tracecmd_make_instance(vinst);
> +       trace_instance_reset(vinst);
> +       if (clock)
> +               vinst->clock = strdup(clock);
> +       tracecmd_set_clock(vinst);
> +       return vinst;
> +}
> +
> +static struct tep_handle *clock_synch_get_tep(struct buffer_instance *instance,
> +                                             const char * const *systems)
> +{
> +       struct tep_handle *tep = NULL;
> +       char *path;
> +
> +       path = get_instance_dir(instance);
> +       tep = tracecmd_local_events_system(path, systems);
> +       tracecmd_put_tracing_file(path);
> +
> +       tep_set_file_bigendian(tep, tracecmd_host_bigendian());
> +       tep_set_host_bigendian(tep, tracecmd_host_bigendian());
> +
> +       return tep;
> +}
> +
> +static void get_vsocket_params(int fd, unsigned int *lcid, unsigned int *lport,
> +                              unsigned int *rcid, unsigned int *rport)

This static function is called only once in this file and never with
NULL output arguments. I think we can safely drop all NULL pointer
checks in it and simplify it.

> +{
> +       struct sockaddr_vm addr;
> +       socklen_t addr_len = sizeof(addr);
> +
> +       if (lcid || lport) {
> +               memset(&addr, 0, sizeof(addr));
> +               if (getsockname(fd, (struct sockaddr *)&addr, &addr_len))
> +                       return;
> +               if (addr.svm_family != AF_VSOCK)
> +                       return;
> +               if (lport)
> +                       *lport = addr.svm_port;
> +               if (lcid)
> +                       *lcid = addr.svm_cid;
> +       }
> +
> +       if (rcid || rport) {
> +               memset(&addr, 0, sizeof(addr));
> +               addr_len = sizeof(addr);
> +               if (getpeername(fd, (struct sockaddr *)&addr, &addr_len))
> +                       return;
> +               if (addr.svm_family != AF_VSOCK)
> +                       return;
> +
> +               if (rport)
> +                       *rport = addr.svm_port;
> +               if (rcid)
> +                       *rcid = addr.svm_cid;
> +       }
> +}
> +
> +struct tracecmd_clock_sync*
> +tracecmd_clock_context_new(struct tracecmd_msg_handle *msg_handle,
> +                           const char *clock_str,
> +                           enum clock_sync_context id)
> +{
> +       struct tracecmd_clock_sync *clock_context;
> +       struct tep_event *event;
> +       unsigned int i = 0;
> +
> +       switch (id) {
> +#ifdef VSOCK
> +       case CLOCK_KVM_X86_VSOCK_HOST:
> +       case CLOCK_KVM_X86_VSOCK_GUEST:
> +               break;
> +#endif
> +       default: /* not supported clock sync context */
> +               return NULL;
> +       }
> +
> +       if (id >= CLOCK_CONTEXT_MAX || NULL == msg_handle)
> +               return NULL;
> +       clock_context = calloc(1, sizeof(struct tracecmd_clock_sync));
> +       if (!clock_context)
> +               return NULL;
> +       clock_context->clock_context_id = id;
> +       clock_context_copy(clock_context,
> +                          clock_sync[id].ftrace_params, clock_sync[id].events);
> +
> +       get_vsocket_params(msg_handle->fd,
> +                          &(clock_context->local_cid),
> +                          &(clock_context->local_port),
> +                          &(clock_context->remote_cid),
> +                          &(clock_context->remote_port));

Nit: parenthesis are not needed:

      get_vsocket_params(msg_handle->fd,
                         &clock_context->local_cid,
                         &clock_context->local_port,
                         &clock_context->remote_cid,
                         &clock_context->remote_port);

> +
> +       if (clock_sync[id].clock_sync_init)
> +               clock_sync[id].clock_sync_init(clock_context);
> +
> +       clock_context->vinst = clock_synch_create_instance(clock_str, clock_context->remote_cid);
> +       clock_context->tep = clock_synch_get_tep(clock_context->vinst,
> +                                                clock_sync[id].systems);
> +       i = 0;
> +       while (clock_sync[id].events[i].name) {
> +               event = tep_find_event_by_name(clock_context->tep,
> +                                              clock_sync[id].events[i].system,
> +                                              clock_sync[id].events[i].name);
> +               if (!event)
> +                       break;
> +               clock_context->events[i].id = event->id;
> +               i++;
> +       }
> +#ifdef TSYNC_DEBUG
> +       clock_context->debug_fd = -1;
> +#endif
> +
> +       return clock_context;
> +
> +}
> +
> +void tracecmd_clock_context_free(struct buffer_instance *instance)
> +{
> +       int i;
> +
> +       if (instance->clock_sync == NULL ||
> +           instance->clock_sync->clock_context_id >= CLOCK_CONTEXT_MAX)
> +               return;
> +       if (clock_sync[instance->clock_sync->clock_context_id].clock_sync_free)
> +               clock_sync[instance->clock_sync->clock_context_id].clock_sync_free(instance->clock_sync);
> +
> +       i = 0;
> +       while (instance->clock_sync->ftrace_params[i].file) {
> +               free(instance->clock_sync->ftrace_params[i].file);
> +               free(instance->clock_sync->ftrace_params[i].set);
> +               free(instance->clock_sync->ftrace_params[i].reset);
> +               i++;
> +       }
> +       free(instance->clock_sync->ftrace_params);
> +       free(instance->clock_sync->events);
> +       tracecmd_remove_instance(instance->clock_sync->vinst);
> +       /* todo: clean up the instance */
> +       tep_free(instance->clock_sync->tep);
> +
> +       free(instance->clock_sync->offsets);
> +       free(instance->clock_sync->times);
> +#ifdef TSYNC_DEBUG
> +       if (instance->clock_sync->debug_fd >= 0) {
> +               close(instance->clock_sync->debug_fd);
> +               instance->clock_sync->debug_fd = -1;
> +       }
> +#endif
> +       free(instance->clock_sync);
> +       instance->clock_sync = NULL;
> +}
> +
> +bool tracecmd_time_sync_check(void)
> +{
> +#ifdef VSOCK
> +       return true;
> +#endif
> +       return false;
> +}
> +
> +void sync_time_with_host_v3(struct buffer_instance *instance)
> +{
> +       long long timestamp = 0;
> +       long long offset = 0;
> +
> +       if (!instance->do_tsync)
> +               return;
> +
> +       if (instance->clock_sync == NULL)
> +               instance->clock_sync = tracecmd_clock_context_new(instance->msg_handle,
> +                                       instance->clock, CLOCK_KVM_X86_VSOCK_GUEST);
> +
> +       tracecmd_msg_snd_time_sync(instance->msg_handle, instance->clock_sync,
> +                                  &offset, &timestamp);
> +       if (!offset && !timestamp)
> +               warning("Failed to synchronize timestamps with the host");
> +}
> +
> +void sync_time_with_guest_v3(struct buffer_instance *instance)
> +{
> +       long long timestamp = 0;
> +       long long offset = 0;
> +       long long *sync_array_ts;
> +       long long *sync_array_offs;
> +
> +       if (!instance->do_tsync)
> +               return;
> +
> +       if (instance->clock_sync == NULL)
> +               instance->clock_sync = tracecmd_clock_context_new(instance->msg_handle,
> +                                               top_instance.clock, CLOCK_KVM_X86_VSOCK_HOST);
> +
> +       tracecmd_msg_rcv_time_sync(instance->msg_handle,
> +                                  instance->clock_sync, &offset, &timestamp);
> +
> +       if (!offset && !timestamp) {
> +               warning("Failed to synchronize timestamps with guest %s",
> +                       instance->name);
> +               return;
> +       }
> +
> +       sync_array_ts = realloc(instance->time_sync_ts,
> +                           (instance->time_sync_count+1)*sizeof(long long));
> +       sync_array_offs = realloc(instance->time_sync_offsets,
> +                           (instance->time_sync_count+1)*sizeof(long long));
> +
> +       if (sync_array_ts && sync_array_offs) {
> +               sync_array_ts[instance->time_sync_count] = timestamp;
> +               sync_array_offs[instance->time_sync_count] = offset;
> +               instance->time_sync_count++;
> +               instance->time_sync_ts = sync_array_ts;
> +               instance->time_sync_offsets = sync_array_offs;
> +
> +       } else {
> +               free(sync_array_ts);
> +               free(sync_array_offs);
> +       }
> +
> +}
> +
> +static void set_clock_synch_events(struct buffer_instance *instance,
> +                                  struct tracecmd_ftrace_param *params,
> +                                  bool enable)
> +{
> +       int i = 0;
> +
> +       if (!enable)
> +               write_tracing_on(instance, 0);
> +
> +       while (params[i].file) {
> +               if (enable && params[i].set) {
> +                       write_instance_file(instance, params[i].file,
> +                                           params[i].set, NULL);
> +               }
> +               if (!enable && params[i].reset)
> +                       write_instance_file(instance, params[i].file,
> +                                           params[i].reset, NULL);
> +               i++;
> +       }
> +
> +       if (enable)
> +               write_tracing_on(instance, 1);
> +}
> +
> +int tracecmd_clock_get_peer(struct tracecmd_clock_sync *clock_context,
> +                           unsigned int *remote_cid, unsigned int *remote_port)
> +{
> +       if (!clock_context)
> +               return 0;
> +       if (remote_cid)
> +               *remote_cid = clock_context->remote_cid;
> +       if (remote_port)
> +               *remote_cid = clock_context->remote_port;
> +       return 1;
> +}
> +
> +void tracecmd_clock_synch_enable(struct tracecmd_clock_sync *clock_context)
> +{
> +       set_clock_synch_events(clock_context->vinst,
> +                              clock_context->ftrace_params, true);
> +}
> +
> +void tracecmd_clock_synch_disable(struct tracecmd_clock_sync *clock_context)
> +{
> +       set_clock_synch_events(clock_context->vinst,
> +                              clock_context->ftrace_params, false);
> +}
> +
> +int tracecmd_clock_synch_calc(struct tracecmd_clock_sync *clock_context,
> +                              long long *offset_ret, long long *time_ret)
> +{
> +       int i, j = 0;
> +       long long av, tresch, offset = 0, time = 0;
> +
> +       if (!clock_context || !clock_context->probes_count)
> +               return 0;
> +       av = clock_context->offset_av / clock_context->probes_count;
> +       tresch = (long long)((clock_context->offset_max - clock_context->offset_min)/10);
> +
> +       for (i = 0; i < clock_context->probes_count; i++) {
> +               /* filter the offsets with deviation up to 10% */
> +               if (llabs(clock_context->offsets[i] - av) < tresch) {
> +                       offset += clock_context->offsets[i];
> +                       j++;
> +               }
> +       }
> +       if (j)
> +               offset /= (long long)j;
> +
> +       tresch = 0;
> +       for (i = 0; i < clock_context->probes_count; i++) {
> +               if ((!tresch || tresch > llabs(offset-clock_context->offsets[i]))) {
> +                       tresch = llabs(offset-clock_context->offsets[i]);
> +                       time = clock_context->times[i];
> +               }
> +       }
> +       if (offset_ret)
> +               *offset_ret = offset;
> +       if (time_ret)
> +               *time_ret = time;
> +#ifdef TSYNC_DEBUG
> +       printf("\n calculated offset: %lld, %d/%d probes\n\r",
> +               *offset_ret, clock_context->probes_count,
> +               clock_context->probes_count + clock_context->bad_probes);
> +#endif
> +       return 1;
> +}
> +
> +void tracecmd_clock_synch_calc_reset(struct tracecmd_clock_sync *clock_context)
> +{
> +       if (!clock_context)
> +               return;
> +
> +       clock_context->probes_count = 0;
> +       clock_context->bad_probes = 0;
> +       clock_context->offset_av = 0;
> +       clock_context->offset_min = 0;
> +       clock_context->offset_max = 0;
> +#ifdef TSYNC_DEBUG
> +       if (clock_context->debug_fd >= 0) {
> +               close(clock_context->debug_fd);
> +               clock_context->debug_fd = -1;
> +       }
> +#endif
> +
> +}
> +
> +void tracecmd_clock_synch_calc_probe(struct tracecmd_clock_sync *clock_context,
> +                                    long long ts_local, long long ts_remote)
> +{
> +       int count;
> +#ifdef TSYNC_DEBUG
> +       char buff[256];
> +#endif
> +
> +       if (!clock_context || !ts_local || !ts_remote)
> +               return;
> +       if (!ts_local || !ts_remote) {
> +               clock_context->bad_probes++;
> +               return;
> +       }
> +
> +       if (!clock_context->offsets && !clock_context->times) {
> +               clock_context->offsets = calloc(10, sizeof(long long));
> +               clock_context->times = calloc(10, sizeof(long long));
> +               clock_context->probes_size = 10;
> +       }
> +
> +       if (clock_context->probes_size == clock_context->probes_count) {
> +               clock_context->probes_size = (3*clock_context->probes_size)/2;
> +               clock_context->offsets = realloc(clock_context->offsets,
> +                                                clock_context->probes_size *
> +                                                sizeof(long long));
> +               clock_context->times = realloc(clock_context->times,
> +                                              clock_context->probes_size*
> +                                              sizeof(long long));
> +       }
> +
> +       if (!clock_context->offsets || !clock_context->times) {
> +               clock_context->probes_size = 0;
> +               tracecmd_clock_synch_calc_reset(clock_context);
> +               return;
> +       }
> +#ifdef TSYNC_DEBUG
> +       if (clock_context->debug_fd < 0) {
> +               sprintf(buff, "s-cid%d.txt", clock_context->remote_cid);
> +               clock_context->debug_fd = open(buff, O_CREAT|O_WRONLY|O_TRUNC, 0644);
> +       }
> +#endif
> +       count = clock_context->probes_count;
> +       clock_context->probes_count++;
> +       clock_context->offsets[count] = ts_remote - ts_local;
> +       clock_context->times[count] = ts_local;
> +       clock_context->offset_av += clock_context->offsets[count];
> +
> +       if (!clock_context->offset_min ||
> +           clock_context->offset_min > llabs(clock_context->offsets[count]))
> +               clock_context->offset_min = llabs(clock_context->offsets[count]);
> +       if (!clock_context->offset_max ||
> +           clock_context->offset_max < llabs(clock_context->offsets[count]))
> +               clock_context->offset_max = llabs(clock_context->offsets[count]);
> +#ifdef TSYNC_DEBUG
> +       sprintf(buff, "%lld %lld\n", ts_local, ts_remote);
> +       write(clock_context->debug_fd, buff, strlen(buff));
> +#endif
> +
> +}

> --
> 2.20.1
>
Steven Rostedt March 19, 2019, 5:51 p.m. UTC | #2
On Tue, 19 Mar 2019 17:14:15 +0000
Slavomir Kaslev <kaslevs@vmware.com> wrote:

> > diff --git a/tracecmd/Makefile b/tracecmd/Makefile
> > index d3e3080..8a73bf7 100644
> > --- a/tracecmd/Makefile
> > +++ b/tracecmd/Makefile
> > @@ -32,6 +32,7 @@ TRACE_CMD_OBJS += trace-list.o
> >  TRACE_CMD_OBJS += trace-output.o
> >  TRACE_CMD_OBJS += trace-usage.o
> >  TRACE_CMD_OBJS += trace-msg.o
> > +TRACE_CMD_OBJS += trace-timesync.o  
> 
> Shouldn't this be on only if $VSOCK_DEFINED, same as trace-agent.o below?
> 
> >
> >  ifeq ($(VSOCK_DEFINED), 1)
> >  TRACE_CMD_OBJS += trace-agent.o
> > diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h

> > +static int find_sync_events(struct tep_handle *pevent,
> > +                           struct tracecmd_time_sync_event *recorded,
> > +                           int rsize, struct tracecmd_time_sync_event *events)
> > +{
> > +       int i = 0, j = 0;
> > +
> > +       while (i < rsize) {
> > +               if (!events[j].ts && events[j].id == recorded[i].id &&
> > +                   (events[j].pid < 0 || events[j].pid == recorded[i].pid)) {
> > +                       events[j].cpu = recorded[i].cpu;
> > +                       events[j].ts = recorded[i].ts;
> > +                       j++;
> > +               } else if (j > 0 && events[j-1].id == recorded[i].id &&
> > +                         (events[j-1].pid < 0 || events[j-1].pid == recorded[i].pid)) {
> > +                       events[j-1].cpu = recorded[i].cpu;
> > +                       events[j-1].ts = recorded[i].ts;
> > +               }
> > +               i++;
> > +       }
> > +       return j;
> > +}
> > +
> > +//#define TSYNC_RBUFFER_DEBUG
> > +static int find_raw_events(struct tep_handle *tep,
> > +                   struct buffer_instance *instance,
> > +                   struct tracecmd_time_sync_event *events)
> > +{
> > +       struct tracecmd_time_sync_event *events_array = NULL;
> > +       int events_count = 0;
> > +       int events_size = 0;
> > +       unsigned int p_size;
> > +       int ts = 0;
> > +       void *page;
> > +       char *path;
> > +       char *file;
> > +       DIR *dir;  
> 
> `dir` is unused besides getting opened and closed below, remove it?
> Or is it supposed to check for the existence of such folder? If so,
> maybe a stat() will suffice or rely on that opening files inside this
> directory will fail too.
> 
> > +       int len;
> > +       int fd;
> > +       int i;
> > +       int r;
> > +
> > +       p_size = getpagesize();
> > +#ifdef TSYNC_RBUFFER_DEBUG
> > +       file = get_instance_file(instance, "trace");
> > +       if (!file)
> > +               return ts;
> > +       {
> > +               char *buf = NULL;
> > +               FILE *fp;
> > +               size_t n;
> > +               int r;
> > +
> > +               printf("Events:\n\r");
> > +               fp = fopen(file, "r");
> > +               while ((r = getline(&buf, &n, fp)) >= 0) {
> > +
> > +                       if (buf[0] != '#')
> > +                               printf("%s", buf);
> > +                       free(buf);
> > +                       buf = NULL;
> > +               }
> > +               fclose(fp);
> > +       }
> > +       tracecmd_put_tracing_file(file);
> > +#endif /* TSYNC_RBUFFER_DEBUG */
> > +       path = get_instance_file(instance, "per_cpu");
> > +       if (!path)
> > +               return ts;
> > +
> > +       dir = opendir(path);
> > +       if (!dir)
> > +               goto out;
> > +
> > +       len = strlen(path);
> > +       file = malloc(len + strlen("trace_pipe_raw") + 32);  
> 
> How about having file defined as
> 
>     char file[PATH_MAX];
> 
> and avoid allocation and computing how much space we need for it?
> 
> > +       page = malloc(p_size);
> > +       if (!file || !page)
> > +               die("Failed to allocate time_stamp info");
> > +       for (i = 0; i < instance->cpu_count; i++) {
> > +               sprintf(file, "%s/cpu%d/trace_pipe_raw", path, i);
> > +               fd = open(file, O_RDONLY | O_NONBLOCK);
> > +               if (fd < 0)
> > +                       continue;
> > +               do {
> > +                       r = read(fd, page, p_size);
> > +                       if (r > 0) {
> > +                               get_events_in_page(tep, page, r, i,
> > +                                                  &events_array, &events_count,
> > +                                                  &events_size);
> > +                       }
> > +               } while (r > 0);
> > +               close(fd);
> > +       }
> > +       qsort(events_array, events_count, sizeof(*events_array), sync_events_cmp);
> > +       r = find_sync_events(tep, events_array, events_count, events);
> > +#ifdef TSYNC_RBUFFER_DEBUG
> > +       len = 0;
> > +       while (events[len].id) {
> > +               printf("Found %d @ cpu %d: %lld pid %d\n\r",
> > +                       events[len].id,  events[len].cpu,
> > +                       events[len].ts, events[len].pid);
> > +               len++;
> > +       }
> > +#endif
> > +
> > +       free(events_array);
> > +       free(file);
> > +       free(page);
> > +       closedir(dir);
> > +
> > + out:
> > +       tracecmd_put_tracing_file(path);
> > +       return r;
> > +}
> > +
> > +static int clock_sync_x86_host_find_events(struct tracecmd_clock_sync *clock,
> > +                                          int pid,
> > +                                          struct tracecmd_time_sync_event *event)
> > +{
> > +       int ret;
> > +
> > +       clock->events[0].pid = pid;
> > +       ret = find_raw_events(clock->tep, clock->vinst, clock->events);
> > +       event->ts = clock->events[0].ts;
> > +       event->cpu = clock->events[0].cpu;
> > +       return ret;
> > +
> > +}
> > +
> > +static int clock_sync_x86_guest_find_events(struct tracecmd_clock_sync *clock,
> > +                                           int pid,
> > +                                           struct tracecmd_time_sync_event *event)
> > +{
> > +       int ret;
> > +
> > +       ret = find_raw_events(clock->tep, clock->vinst, clock->events);
> > +       if (ret != clock->events_count)
> > +               return 0;
> > +       event->ts = clock->events[1].ts;
> > +       event->cpu = clock->events[0].cpu;
> > +       return 1;
> > +
> > +}
> > +
> > +static void tracecmd_clock_sync_reset(struct tracecmd_clock_sync *clock_context)
> > +{
> > +       int i = 0;
> > +
> > +       while (clock_context->events[i].id) {
> > +               clock_context->events[i].cpu = 0;
> > +               clock_context->events[i].ts = 0;
> > +               clock_context->events[i].pid = -1;
> > +               i++;
> > +       }
> > +}
> > +
> > +int tracecmd_clock_find_event(struct tracecmd_clock_sync *clock, int pid,
> > +                             struct tracecmd_time_sync_event *event)
> > +{
> > +       int ret = 0;
> > +       int id;
> > +
> > +       if (clock == NULL ||
> > +           clock->clock_context_id >= CLOCK_CONTEXT_MAX)
> > +               return 0;
> > +
> > +       id = clock->clock_context_id;
> > +       if (clock_sync[id].clock_sync_find_events)
> > +               ret = clock_sync[id].clock_sync_find_events(clock, pid, event);
> > +
> > +       tracecmd_clock_sync_reset(clock);
> > +       return ret;
> > +}
> > +
> > +static void clock_context_copy(struct tracecmd_clock_sync *clock_context,
> > +                              struct tracecmd_ftrace_param *params,
> > +                              struct tracecmd_event_descr *events)
> > +{
> > +       int i;
> > +
> > +       i = 0;
> > +       while (params[i].file)
> > +               i++;
> > +       clock_context->ftrace_params = calloc(i+1, sizeof(struct tracecmd_ftrace_param));
> > +       i = 0;
> > +       while (params[i].file) {
> > +               clock_context->ftrace_params[i].file = strdup(params[i].file);
> > +               if (params[i].set)
> > +                       clock_context->ftrace_params[i].set = strdup(params[i].set);
> > +               if (params[i].reset)
> > +                       clock_context->ftrace_params[i].reset = strdup(params[i].reset);
> > +               i++;
> > +       }
> > +
> > +       i = 0;
> > +       while (events[i].name)
> > +               i++;
> > +       clock_context->events = calloc(i+1, sizeof(struct tracecmd_time_sync_event));
> > +       clock_context->events_count = i;
> > +}
> > +
> > +void trace_instance_reset(struct buffer_instance *vinst)
> > +{
> > +       write_instance_file(vinst, "trace", "\0", NULL);
> > +}
> > +
> > +static struct buffer_instance *
> > +clock_synch_create_instance(const char *clock, unsigned int cid)
> > +{
> > +       struct buffer_instance *vinst;
> > +       char inst_name[256];
> > +
> > +       snprintf(inst_name, 256, "clock_synch-%d", cid);
> > +
> > +       vinst = create_instance(strdup(inst_name));
> > +       tracecmd_init_instance(vinst);
> > +       vinst->cpu_count = tracecmd_local_cpu_count();
> > +       tracecmd_make_instance(vinst);
> > +       trace_instance_reset(vinst);
> > +       if (clock)
> > +               vinst->clock = strdup(clock);
> > +       tracecmd_set_clock(vinst);
> > +       return vinst;
> > +}
> > +
> > +static struct tep_handle *clock_synch_get_tep(struct buffer_instance *instance,
> > +                                             const char * const *systems)
> > +{
> > +       struct tep_handle *tep = NULL;
> > +       char *path;
> > +
> > +       path = get_instance_dir(instance);
> > +       tep = tracecmd_local_events_system(path, systems);
> > +       tracecmd_put_tracing_file(path);
> > +
> > +       tep_set_file_bigendian(tep, tracecmd_host_bigendian());
> > +       tep_set_host_bigendian(tep, tracecmd_host_bigendian());
> > +
> > +       return tep;
> > +}
> > +
> > +static void get_vsocket_params(int fd, unsigned int *lcid, unsigned int *lport,
> > +                              unsigned int *rcid, unsigned int *rport)  
> 
> This static function is called only once in this file and never with
> NULL output arguments. I think we can safely drop all NULL pointer
> checks in it and simplify it.
> 
> > +{
> > +       struct sockaddr_vm addr;
> > +       socklen_t addr_len = sizeof(addr);
> > +
> > +       if (lcid || lport) {
> > +               memset(&addr, 0, sizeof(addr));
> > +               if (getsockname(fd, (struct sockaddr *)&addr, &addr_len))
> > +                       return;
> > +               if (addr.svm_family != AF_VSOCK)
> > +                       return;
> > +               if (lport)
> > +                       *lport = addr.svm_port;
> > +               if (lcid)
> > +                       *lcid = addr.svm_cid;
> > +       }
> > +
> > +       if (rcid || rport) {
> > +               memset(&addr, 0, sizeof(addr));
> > +               addr_len = sizeof(addr);
> > +               if (getpeername(fd, (struct sockaddr *)&addr, &addr_len))
> > +                       return;
> > +               if (addr.svm_family != AF_VSOCK)
> > +                       return;
> > +
> > +               if (rport)
> > +                       *rport = addr.svm_port;
> > +               if (rcid)
> > +                       *rcid = addr.svm_cid;
> > +       }
> > +}
> > +
> > +struct tracecmd_clock_sync*
> > +tracecmd_clock_context_new(struct tracecmd_msg_handle *msg_handle,
> > +                           const char *clock_str,
> > +                           enum clock_sync_context id)
> > +{
> > +       struct tracecmd_clock_sync *clock_context;
> > +       struct tep_event *event;
> > +       unsigned int i = 0;
> > +
> > +       switch (id) {
> > +#ifdef VSOCK
> > +       case CLOCK_KVM_X86_VSOCK_HOST:
> > +       case CLOCK_KVM_X86_VSOCK_GUEST:
> > +               break;
> > +#endif
> > +       default: /* not supported clock sync context */
> > +               return NULL;
> > +       }
> > +
> > +       if (id >= CLOCK_CONTEXT_MAX || NULL == msg_handle)
> > +               return NULL;
> > +       clock_context = calloc(1, sizeof(struct tracecmd_clock_sync));
> > +       if (!clock_context)
> > +               return NULL;
> > +       clock_context->clock_context_id = id;
> > +       clock_context_copy(clock_context,
> > +                          clock_sync[id].ftrace_params, clock_sync[id].events);
> > +
> > +       get_vsocket_params(msg_handle->fd,
> > +                          &(clock_context->local_cid),
> > +                          &(clock_context->local_port),
> > +                          &(clock_context->remote_cid),
> > +                          &(clock_context->remote_port));  
> 
> Nit: parenthesis are not needed:
> 
>       get_vsocket_params(msg_handle->fd,
>                          &clock_context->local_cid,
>                          &clock_context->local_port,
>                          &clock_context->remote_cid,
>                          &clock_context->remote_port);
> 
> > +
> > +       if (clock_sync[id].clock_sync_init)
> > +               clock_sync[id].clock_sync_init(clock_context);
> > +
> > +       clock_context->vinst = clock_synch_create_instance(clock_str, clock_context->remote_cid);
> > +       clock_context->tep = clock_synch_get_tep(clock_context->vinst,
> > +                                                clock_sync[id].systems);
> > +       i = 0;
> > +       while (clock_sync[id].events[i].name) {
> > +               event = tep_find_event_by_name(clock_context->tep,
> > +                                              clock_sync[id].events[i].system,
> > +                                              clock_sync[id].events[i].name);
> > +               if (!event)
> > +                       break;
> > +               clock_context->events[i].id = event->id;
> > +               i++;
> > +       }

Just an FYI, when replying to large patches, it's recommended to crop
the message to the specifics, that way people don't need to scroll down
forever, and most likely will miss a response. Some kernel developers I
know would ignore replies if they are not properly cropped.

In some cases, that includes me ;-) (If I scroll more than 3 pages, and
don't see a response, I sometimes stop there).

-- Steve
Slavomir Kaslev March 19, 2019, 6:02 p.m. UTC | #3
On Tue, Mar 19, 2019 at 7:52 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> Just an FYI, when replying to large patches, it's recommended to crop
> the message to the specifics, that way people don't need to scroll down
> forever, and most likely will miss a response. Some kernel developers I
> know would ignore replies if they are not properly cropped.
>
> In some cases, that includes me ;-) (If I scroll more than 3 pages, and
> don't see a response, I sometimes stop there).

Good point, I'll start doing that. Thanks.

-- Slavi
Matt Helsley March 19, 2019, 6:29 p.m. UTC | #4
> On Mar 19, 2019, at 11:02 AM, Slavomir Kaslev <kaslevs@vmware.com> wrote:
> 
> On Tue, Mar 19, 2019 at 7:52 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>> 
>> Just an FYI, when replying to large patches, it's recommended to crop
>> the message to the specifics, that way people don't need to scroll down
>> forever, and most likely will miss a response. Some kernel developers I
>> know would ignore replies if they are not properly cropped.
>> 
>> In some cases, that includes me ;-) (If I scroll more than 3 pages, and
>> don't see a response, I sometimes stop there).
> 
> Good point, I'll start doing that. Thanks.
> 
> -- Slavi

Thank you! I, for one, really appreciate folks who take the time to trim the context. When I do it, I like to mark removed parts with (including newlines)

<snip>

or

<snipped>

just so everyone knows something got removed from the quoted email.

Cheers,
     -Matt
Steven Rostedt March 19, 2019, 6:40 p.m. UTC | #5
On Tue, 19 Mar 2019 18:29:39 +0000
Matt Helsley <mhelsley@vmware.com> wrote:
> 
> Thank you! I, for one, really appreciate folks who take the time to trim the context. When I do it, I like to mark removed parts with (including newlines)
> 
> <snip>
> 
> or
> 
> <snipped>
> 
> just so everyone knows something got removed from the quoted email.


I will say that's fine, but not a requirement. I will at most do "[..]"
but most cases, I just cut it, since it is usually pretty obvious that
something was cut.

At a minimum, I make sure there's a space where a cut is. That is:

> this is some text I keep
> and this will be cut
> and this is more text

I do

> this is some text I keep

> and this is more text

Instead of:

> this is some text I keep
> and this is more text


-- Steve
Tzvetomir Stoyanov March 20, 2019, 3:06 p.m. UTC | #6
On Tue, Mar 19, 2019 at 7:14 PM Slavomir Kaslev <kaslevs@vmware.com> wrote:
>
> On Tue, Mar 19, 2019 at 5:56 PM Tzvetomir Stoyanov <tstoyanov@vmware.com> wrote:
> >
> > This is a POC patch, implementing an algorithm for syncing timestamps between
> > host and guest machines, using vsock trace events to catch the host / guest time.
> >
> > Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
> > ---
> >  include/trace-cmd/trace-cmd.h  |  21 +-
> >  tracecmd/Makefile              |   1 +
> >  tracecmd/include/trace-local.h |  33 +-
> >  tracecmd/trace-agent.c         |  13 +-
> >  tracecmd/trace-msg.c           | 208 ++++++++-
> >  tracecmd/trace-record.c        |  85 +++-
> >  tracecmd/trace-timesync.c      | 825 +++++++++++++++++++++++++++++++++
> >  7 files changed, 1150 insertions(+), 36 deletions(-)
> >  create mode 100644 tracecmd/trace-timesync.c
> >

> > --- a/tracecmd/Makefile
> > +++ b/tracecmd/Makefile
> > @@ -32,6 +32,7 @@ TRACE_CMD_OBJS += trace-list.o
> >  TRACE_CMD_OBJS += trace-output.o
> >  TRACE_CMD_OBJS += trace-usage.o
> >  TRACE_CMD_OBJS += trace-msg.o
> > +TRACE_CMD_OBJS += trace-timesync.o
>
> Shouldn't this be on only if $VSOCK_DEFINED, same as trace-agent.o below?

By design, trace-agent.c should handle all timestamp sync logic, not
only vsock relatated one.
Currently we rely only on vsock events, but we might have timestamp
sync which does not
depend on vsocks in the future.

>
> >
> >  ifeq ($(VSOCK_DEFINED), 1)
> >  TRACE_CMD_OBJS += trace-agent.o


> > +//#define TSYNC_RBUFFER_DEBUG
> > +static int find_raw_events(struct tep_handle *tep,
> > +                   struct buffer_instance *instance,
> > +                   struct tracecmd_time_sync_event *events)
> > +{
> > +       struct tracecmd_time_sync_event *events_array = NULL;
> > +       int events_count = 0;
> > +       int events_size = 0;
> > +       unsigned int p_size;
> > +       int ts = 0;
> > +       void *page;
> > +       char *path;
> > +       char *file;
> > +       DIR *dir;
>
> `dir` is unused besides getting opened and closed below, remove it?
> Or is it supposed to check for the existence of such folder? If so,
> maybe a stat() will suffice or rely on that opening files inside this
> directory will fail too.
>
I'll remove it, I think there is no need to check if it exists.

> > +       path = get_instance_file(instance, "per_cpu");
> > +       if (!path)
> > +               return ts;
> > +
> > +       dir = opendir(path);
> > +       if (!dir)
> > +               goto out;
> > +
> > +       len = strlen(path);
> > +       file = malloc(len + strlen("trace_pipe_raw") + 32);
>
> How about having file defined as
>
>     char file[PATH_MAX];
>
> and avoid allocation and computing how much space we need for it?

Thanks, I'll add it in the next version of the patch.

>
> > +static void get_vsocket_params(int fd, unsigned int *lcid, unsigned int *lport,
> > +                              unsigned int *rcid, unsigned int *rport)
>
> This static function is called only once in this file and never with
> NULL output arguments. I think we can safely drop all NULL pointer
> checks in it and simplify it.
>

That's true, the reason for those checks is that in one of the
previous versions of
the patch it was called with NULL pointers. I'll remove the checks.

> > +tracecmd_clock_context_new(struct tracecmd_msg_handle *msg_handle,
> > +                           const char *clock_str,
> > +                           enum clock_sync_context id)
> > +{
> > +       struct tracecmd_clock_sync *clock_context;
> > +       struct tep_event *event;
> > +       unsigned int i = 0;
> > +
> > +       switch (id) {
> > +#ifdef VSOCK
> > +       case CLOCK_KVM_X86_VSOCK_HOST:
> > +       case CLOCK_KVM_X86_VSOCK_GUEST:
> > +               break;
> > +#endif
> > +       default: /* not supported clock sync context */
> > +               return NULL;
> > +       }
> > +
> > +       if (id >= CLOCK_CONTEXT_MAX || NULL == msg_handle)
> > +               return NULL;
> > +       clock_context = calloc(1, sizeof(struct tracecmd_clock_sync));
> > +       if (!clock_context)
> > +               return NULL;
> > +       clock_context->clock_context_id = id;
> > +       clock_context_copy(clock_context,
> > +                          clock_sync[id].ftrace_params, clock_sync[id].events);
> > +
> > +       get_vsocket_params(msg_handle->fd,
> > +                          &(clock_context->local_cid),
> > +                          &(clock_context->local_port),
> > +                          &(clock_context->remote_cid),
> > +                          &(clock_context->remote_port));
>
> Nit: parenthesis are not needed:
>
>       get_vsocket_params(msg_handle->fd,
>                          &clock_context->local_cid,
>                          &clock_context->local_port,
>                          &clock_context->remote_cid,
>                          &clock_context->remote_port);
>
> > +

Thanks Slavi
diff mbox series

Patch

diff --git a/include/trace-cmd/trace-cmd.h b/include/trace-cmd/trace-cmd.h
index 5552396..905f3ec 100644
--- a/include/trace-cmd/trace-cmd.h
+++ b/include/trace-cmd/trace-cmd.h
@@ -341,16 +341,29 @@  bool tracecmd_msg_done(struct tracecmd_msg_handle *msg_handle);
 void tracecmd_msg_set_done(struct tracecmd_msg_handle *msg_handle);
 
 int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle,
-				int argc, char **argv, bool use_fifos);
+				int argc, char **argv, bool use_fifos,
+				bool do_tsync);
 int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle,
-				int *argc, char ***argv, bool *use_fifos);
+				int *argc, char ***argv, bool *use_fifos,
+				bool *do_tsync);
 
 int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle,
 				 int nr_cpus, int page_size,
-				 unsigned int *ports, bool use_fifos);
+				 unsigned int *ports, bool use_fifos,
+				 bool do_tsync);
 int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle,
 				 int *nr_cpus, int *page_size,
-				 unsigned int **ports, bool *use_fifos);
+				 unsigned int **ports, bool *use_fifos,
+				 bool *do_tsync);
+
+struct tracecmd_clock_sync;
+
+int tracecmd_msg_rcv_time_sync(struct tracecmd_msg_handle *msg_handle,
+			       struct tracecmd_clock_sync *clock_sync,
+			       long long *offset, long long *timestamp);
+int tracecmd_msg_snd_time_sync(struct tracecmd_msg_handle *msg_handle,
+			       struct tracecmd_clock_sync *clock_sync,
+			       long long *offset, long long *timestamp);
 
 /* --- Plugin handling --- */
 extern struct tep_plugin_option trace_ftrace_options[];
diff --git a/tracecmd/Makefile b/tracecmd/Makefile
index d3e3080..8a73bf7 100644
--- a/tracecmd/Makefile
+++ b/tracecmd/Makefile
@@ -32,6 +32,7 @@  TRACE_CMD_OBJS += trace-list.o
 TRACE_CMD_OBJS += trace-output.o
 TRACE_CMD_OBJS += trace-usage.o
 TRACE_CMD_OBJS += trace-msg.o
+TRACE_CMD_OBJS += trace-timesync.o
 
 ifeq ($(VSOCK_DEFINED), 1)
 TRACE_CMD_OBJS += trace-agent.o
diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
index 62f5e47..7688b02 100644
--- a/tracecmd/include/trace-local.h
+++ b/tracecmd/include/trace-local.h
@@ -26,6 +26,7 @@  extern int quiet;
 typedef unsigned long long u64;
 
 struct buffer_instance;
+struct tracecmd_clock_sync;
 
 /* for local shared information with trace-cmd executable */
 
@@ -101,7 +102,7 @@  void trace_usage(int argc, char **argv);
 
 int trace_record_agent(struct tracecmd_msg_handle *msg_handle,
 		       int cpus, int *fds,
-		       int argc, char **argv, bool use_fifos);
+		       int argc, char **argv, bool use_fifos, bool do_tsync);
 
 struct hook_list;
 
@@ -214,6 +215,12 @@  struct buffer_instance {
 	int			port;
 	int			*fds;
 	bool			use_fifos;
+	bool			do_tsync;
+
+	struct tracecmd_clock_sync *clock_sync;
+	int			time_sync_count;
+	long long		*time_sync_ts;
+	long long		*time_sync_offsets;
 };
 
 extern struct buffer_instance top_instance;
@@ -235,6 +242,30 @@  void update_first_instance(struct buffer_instance *instance, int topt);
 void show_instance_file(struct buffer_instance *instance, const char *name);
 
 int count_cpus(void);
+
+struct tracecmd_time_sync_event {
+	int			id;
+	int			cpu;
+	int			pid;
+	unsigned long long	ts;
+};
+
+int tracecmd_clock_get_peer(struct tracecmd_clock_sync *clock_context,
+			    unsigned int *remote_cid, unsigned int *remote_port);
+bool tracecmd_time_sync_check(void);
+void tracecmd_clock_context_free(struct buffer_instance *instance);
+int tracecmd_clock_find_event(struct tracecmd_clock_sync *clock, int cpu,
+			      struct tracecmd_time_sync_event *event);
+void tracecmd_clock_synch_enable(struct tracecmd_clock_sync *clock_context);
+void tracecmd_clock_synch_disable(struct tracecmd_clock_sync *clock_context);
+void tracecmd_clock_synch_calc_reset(struct tracecmd_clock_sync *clock_context);
+void tracecmd_clock_synch_calc_probe(struct tracecmd_clock_sync *clock_context,
+				     long long ts_local, long long ts_remote);
+int tracecmd_clock_synch_calc(struct tracecmd_clock_sync *clock_context,
+			       long long *offset_ret, long long *time_ret);
+void sync_time_with_host_v3(struct buffer_instance *instance);
+void sync_time_with_guest_v3(struct buffer_instance *instance);
+
 void write_tracing_on(struct buffer_instance *instance, int on);
 char *get_instance_dir(struct buffer_instance *instance);
 int write_instance_file(struct buffer_instance *instance,
diff --git a/tracecmd/trace-agent.c b/tracecmd/trace-agent.c
index 7389f72..2189bc1 100644
--- a/tracecmd/trace-agent.c
+++ b/tracecmd/trace-agent.c
@@ -116,6 +116,7 @@  static void agent_handle(int sd, int nr_cpus, int page_size)
 	char **argv = NULL;
 	int argc = 0;
 	bool use_fifos;
+	bool do_tsync;
 	int *fds;
 	int ret;
 
@@ -128,7 +129,8 @@  static void agent_handle(int sd, int nr_cpus, int page_size)
 	if (!msg_handle)
 		die("Failed to allocate message handle");
 
-	ret = tracecmd_msg_recv_trace_req(msg_handle, &argc, &argv, &use_fifos);
+	ret = tracecmd_msg_recv_trace_req(msg_handle, &argc, &argv,
+					  &use_fifos, &do_tsync);
 	if (ret < 0)
 		die("Failed to receive trace request");
 
@@ -137,13 +139,18 @@  static void agent_handle(int sd, int nr_cpus, int page_size)
 
 	if (!use_fifos)
 		make_vsocks(nr_cpus, fds, ports);
+	if (do_tsync) {
+		do_tsync = tracecmd_time_sync_check();
+		if (!do_tsync)
+			warning("Failed to negotiate timestamps synchronization with the host");
+	}
 
 	ret = tracecmd_msg_send_trace_resp(msg_handle, nr_cpus, page_size,
-					   ports, use_fifos);
+					   ports, use_fifos, do_tsync);
 	if (ret < 0)
 		die("Failed to send trace response");
 
-	trace_record_agent(msg_handle, nr_cpus, fds, argc, argv, use_fifos);
+	trace_record_agent(msg_handle, nr_cpus, fds, argc, argv, use_fifos, do_tsync);
 
 	free(argv[0]);
 	free(argv);
diff --git a/tracecmd/trace-msg.c b/tracecmd/trace-msg.c
index 065a01e..bb3c060 100644
--- a/tracecmd/trace-msg.c
+++ b/tracecmd/trace-msg.c
@@ -26,8 +26,12 @@ 
 #include "trace-local.h"
 #include "trace-msg.h"
 
+typedef __u16 u16;
+typedef __s16 s16;
 typedef __u32 u32;
 typedef __be32 be32;
+typedef __u64 u64;
+typedef __s64 s64;
 
 static inline void dprint(const char *fmt, ...)
 {
@@ -50,6 +54,9 @@  static inline void dprint(const char *fmt, ...)
 
 unsigned int page_size;
 
+/* Try a few times to get an accurate time sync */
+#define TSYNC_TRIES 300
+
 struct tracecmd_msg_opt {
 	be32 size;
 	be32 opt_cmd;
@@ -76,6 +83,20 @@  struct tracecmd_msg_trace_resp {
 	be32 page_size;
 } __attribute__((packed));
 
+struct tracecmd_msg_tsync_stop {
+	long long offset;
+	long long timestamp;
+} __attribute__((packed));
+
+struct tracecmd_msg_tsync_req {
+	u16 cpu;
+} __attribute__((packed));
+
+struct tracecmd_msg_tsync_resp {
+	u64 time;
+} __attribute__((packed));
+
+
 struct tracecmd_msg_header {
 	be32	size;
 	be32	cmd;
@@ -83,14 +104,19 @@  struct tracecmd_msg_header {
 } __attribute__((packed));
 
 #define MSG_MAP								\
-	C(CLOSE,	0,	0),					\
-	C(TINIT,	1,	sizeof(struct tracecmd_msg_tinit)),	\
-	C(RINIT,	2,	sizeof(struct tracecmd_msg_rinit)),	\
-	C(SEND_DATA,	3,	0),					\
-	C(FIN_DATA,	4,	0),					\
-	C(NOT_SUPP,	5,	0),					\
-	C(TRACE_REQ,	6,	sizeof(struct tracecmd_msg_trace_req)),	\
-	C(TRACE_RESP,	7,	sizeof(struct tracecmd_msg_trace_resp)),
+	C(CLOSE,	  0,	0),					\
+	C(TINIT,	  1,	sizeof(struct tracecmd_msg_tinit)),	\
+	C(RINIT,	  2,	sizeof(struct tracecmd_msg_rinit)),	\
+	C(SEND_DATA,	  3,	0),					\
+	C(FIN_DATA,	  4,	0),					\
+	C(NOT_SUPP,	  5,	0),					\
+	C(TRACE_REQ,	  6,	sizeof(struct tracecmd_msg_trace_req)),	\
+	C(TRACE_RESP,	  7,	sizeof(struct tracecmd_msg_trace_resp)),\
+	C(TSYNC_START,	  8,	0),					\
+	C(TSYNC_STOP,	  9,	sizeof(struct tracecmd_msg_tsync_stop)),\
+	C(TSYNC_PROBE,	  11,	0),					\
+	C(TSYNC_REQ,	  11,	sizeof(struct tracecmd_msg_tsync_req)),	\
+	C(TSYNC_RESP,	  12,	sizeof(struct tracecmd_msg_tsync_resp)),
 
 #undef C
 #define C(a,b,c)	MSG_##a = b
@@ -120,10 +146,13 @@  static const char *cmd_to_name(int cmd)
 struct tracecmd_msg {
 	struct tracecmd_msg_header		hdr;
 	union {
-		struct tracecmd_msg_tinit	tinit;
-		struct tracecmd_msg_rinit	rinit;
-		struct tracecmd_msg_trace_req	trace_req;
-		struct tracecmd_msg_trace_resp	trace_resp;
+		struct tracecmd_msg_tinit		tinit;
+		struct tracecmd_msg_rinit		rinit;
+		struct tracecmd_msg_trace_req		trace_req;
+		struct tracecmd_msg_trace_resp		trace_resp;
+		struct tracecmd_msg_tsync_stop		ts_stop;
+		struct tracecmd_msg_tsync_req		ts_req;
+		struct tracecmd_msg_tsync_resp		ts_resp;
 	};
 	union {
 		struct tracecmd_msg_opt		*opt;
@@ -161,6 +190,7 @@  static int msg_write(int fd, struct tracecmd_msg *msg)
 
 enum msg_trace_flags {
 	MSG_TRACE_USE_FIFOS = 1 << 0,
+	MSG_TRACE_DO_TSYNC =  1 << 1,
 };
 
 enum msg_opt_command {
@@ -744,7 +774,8 @@  error:
 	return ret;
 }
 
-static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool use_fifos)
+static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv,
+			  bool use_fifos, bool do_tsync)
 {
 	size_t args_size = 0;
 	char *p;
@@ -754,7 +785,12 @@  static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool
 		args_size += strlen(argv[i]) + 1;
 
 	msg->hdr.size = htonl(ntohl(msg->hdr.size) + args_size);
-	msg->trace_req.flags = use_fifos ? htonl(MSG_TRACE_USE_FIFOS) : htonl(0);
+	msg->trace_req.flags = 0;
+	if (use_fifos)
+		msg->trace_req.flags |= MSG_TRACE_USE_FIFOS;
+	if (do_tsync)
+		msg->trace_req.flags |= MSG_TRACE_DO_TSYNC;
+	msg->trace_req.flags = htonl(msg->trace_req.flags);
 	msg->trace_req.argc = htonl(argc);
 	msg->buf = calloc(args_size, 1);
 	if (!msg->buf)
@@ -768,13 +804,14 @@  static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool
 }
 
 int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle,
-				int argc, char **argv, bool use_fifos)
+				int argc, char **argv, bool use_fifos,
+				bool do_tsync)
 {
 	struct tracecmd_msg msg;
 	int ret;
 
 	tracecmd_msg_init(MSG_TRACE_REQ, &msg);
-	ret = make_trace_req(&msg, argc, argv, use_fifos);
+	ret = make_trace_req(&msg, argc, argv, use_fifos, do_tsync);
 	if (ret < 0)
 		return ret;
 
@@ -787,7 +824,8 @@  int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle,
   *     free(argv);
   */
 int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle,
-				int *argc, char ***argv, bool *use_fifos)
+				int *argc, char ***argv, bool *use_fifos,
+				bool *do_tsync)
 {
 	struct tracecmd_msg msg;
 	char *p, *buf_end, **args;
@@ -840,6 +878,7 @@  int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle,
 	*argc = nr_args;
 	*argv = args;
 	*use_fifos = ntohl(msg.trace_req.flags) & MSG_TRACE_USE_FIFOS;
+	*do_tsync = ntohl(msg.trace_req.flags) & MSG_TRACE_DO_TSYNC;
 
 	/*
 	 * On success we're passing msg.buf to the caller through argv[0] so we
@@ -859,14 +898,136 @@  out:
 	return ret;
 }
 
+int tracecmd_msg_rcv_time_sync(struct tracecmd_msg_handle *msg_handle,
+			       struct tracecmd_clock_sync *clock_context,
+			       long long *offset, long long *timestamp)
+{
+	struct tracecmd_time_sync_event event;
+	unsigned int remote_cid = 0;
+	struct tracecmd_msg msg;
+	int cpu_pid, ret;
+
+	if (clock_context == NULL || msg_handle == NULL)
+		return 0;
+
+	if (offset)
+		*offset = 0;
+
+	ret = tracecmd_msg_recv(msg_handle->fd, &msg);
+	if (ret < 0 || ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP)
+		return 0;
+	if (ntohl(msg.hdr.cmd) != MSG_TSYNC_START) {
+		handle_unexpected_msg(msg_handle, &msg);
+		return 0;
+	}
+
+	tracecmd_clock_get_peer(clock_context, &remote_cid, NULL);
+	tracecmd_msg_init(MSG_TSYNC_START, &msg);
+	tracecmd_msg_send(msg_handle->fd, &msg);
+	tracecmd_clock_synch_enable(clock_context);
+
+	do {
+		memset(&event, 0, sizeof(event));
+		ret = tracecmd_msg_recv(msg_handle->fd, &msg);
+		if (ret < 0 || ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP)
+			break;
+		if (ntohl(msg.hdr.cmd) != MSG_TSYNC_PROBE) {
+			handle_unexpected_msg(msg_handle, &msg);
+			break;
+		}
+		ret = tracecmd_msg_recv(msg_handle->fd, &msg);
+		if (ret < 0 || ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP)
+			break;
+		if (ntohl(msg.hdr.cmd) != MSG_TSYNC_REQ) {
+			handle_unexpected_msg(msg_handle, &msg);
+			break;
+		}
+		/* Get kvm event related to the corresponding VCPU context */
+		cpu_pid = get_guest_vcpu_pid(remote_cid, ntohs(msg.ts_req.cpu));
+		tracecmd_clock_find_event(clock_context, cpu_pid, &event);
+		tracecmd_msg_init(MSG_TSYNC_RESP, &msg);
+		msg.ts_resp.time = htonll(event.ts);
+		tracecmd_msg_send(msg_handle->fd, &msg);
+	} while (true);
+
+	tracecmd_clock_synch_disable(clock_context);
+
+	if (ret >= 0 && ntohl(msg.hdr.cmd) == MSG_TSYNC_STOP) {
+		if (offset)
+			*offset = ntohll(msg.ts_stop.offset);
+		if (timestamp)
+			*timestamp = ntohll(msg.ts_stop.timestamp);
+	}
+
+	msg_free(&msg);
+	return 0;
+}
+
+int tracecmd_msg_snd_time_sync(struct tracecmd_msg_handle *msg_handle,
+			       struct tracecmd_clock_sync *clock_context,
+			       long long *offset, long long *timestamp)
+{
+	struct tracecmd_time_sync_event event;
+	int sync_loop = TSYNC_TRIES;
+	struct tracecmd_msg msg;
+	int ret;
+
+	if (clock_context == NULL || msg_handle == NULL)
+		return 0;
+
+	tracecmd_msg_init(MSG_TSYNC_START, &msg);
+	tracecmd_msg_send(msg_handle->fd, &msg);
+	ret = tracecmd_msg_recv(msg_handle->fd, &msg);
+	if (ret < 0 || ntohl(msg.hdr.cmd) != MSG_TSYNC_START)
+		return 0;
+	tracecmd_clock_synch_calc_reset(clock_context);
+	tracecmd_clock_synch_enable(clock_context);
+
+	do {
+		tracecmd_msg_init(MSG_TSYNC_PROBE, &msg);
+		tracecmd_msg_send(msg_handle->fd, &msg);
+		/* Get the ts and CPU of the sent event */
+		ret = tracecmd_clock_find_event(clock_context, -1, &event);
+		tracecmd_msg_init(MSG_TSYNC_REQ, &msg);
+		msg.ts_req.cpu = htons(event.cpu);
+		tracecmd_msg_send(msg_handle->fd, &msg);
+		memset(&msg, 0, sizeof(msg));
+		ret = tracecmd_msg_recv(msg_handle->fd, &msg);
+		if (ret < 0)
+			break;
+		if (ntohl(msg.hdr.cmd) != MSG_TSYNC_RESP) {
+			handle_unexpected_msg(msg_handle, &msg);
+			break;
+		}
+		tracecmd_clock_synch_calc_probe(clock_context,
+						event.ts,
+						htonll(msg.ts_resp.time));
+	} while (--sync_loop);
+
+	tracecmd_clock_synch_disable(clock_context);
+	tracecmd_clock_synch_calc(clock_context, offset, timestamp);
+	tracecmd_msg_init(MSG_TSYNC_STOP, &msg);
+	msg.ts_stop.offset = htonll(*offset);
+	msg.ts_stop.timestamp = htonll(*timestamp);
+	tracecmd_msg_send(msg_handle->fd, &msg);
+
+	msg_free(&msg);
+	return 0;
+}
+
 static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus,
-			   unsigned int *ports, bool use_fifos)
+			   unsigned int *ports, bool use_fifos, bool do_tsync)
 {
 	int ports_size = nr_cpus * sizeof(*msg->port_array);
 	int i;
 
 	msg->hdr.size = htonl(ntohl(msg->hdr.size) + ports_size);
-	msg->trace_resp.flags = use_fifos ? htonl(MSG_TRACE_USE_FIFOS) : htonl(0);
+	msg->trace_resp.flags = 0;
+	if (use_fifos)
+		msg->trace_resp.flags |= MSG_TRACE_USE_FIFOS;
+	if (do_tsync)
+		msg->trace_resp.flags |= MSG_TRACE_DO_TSYNC;
+	msg->trace_resp.flags = htonl(msg->trace_resp.flags);
 	msg->trace_resp.cpus = htonl(nr_cpus);
 	msg->trace_resp.page_size = htonl(page_size);
 
@@ -882,13 +1043,14 @@  static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus,
 
 int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle,
 				 int nr_cpus, int page_size,
-				 unsigned int *ports, bool use_fifos)
+				 unsigned int *ports, bool use_fifos,
+				 bool do_tsync)
 {
 	struct tracecmd_msg msg;
 	int ret;
 
 	tracecmd_msg_init(MSG_TRACE_RESP, &msg);
-	ret = make_trace_resp(&msg, page_size, nr_cpus, ports, use_fifos);
+	ret = make_trace_resp(&msg, page_size, nr_cpus, ports, use_fifos, do_tsync);
 	if (ret < 0)
 		return ret;
 
@@ -897,7 +1059,8 @@  int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle,
 
 int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle,
 				 int *nr_cpus, int *page_size,
-				 unsigned int **ports, bool *use_fifos)
+				 unsigned int **ports, bool *use_fifos,
+				 bool *do_tsync)
 {
 	struct tracecmd_msg msg;
 	ssize_t buf_len;
@@ -920,6 +1083,7 @@  int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle,
 	}
 
 	*use_fifos = ntohl(msg.trace_resp.flags) & MSG_TRACE_USE_FIFOS;
+	*do_tsync = ntohl(msg.trace_resp.flags) & MSG_TRACE_DO_TSYNC;
 	*nr_cpus = ntohl(msg.trace_resp.cpus);
 	*page_size = ntohl(msg.trace_resp.page_size);
 	*ports = calloc(*nr_cpus, sizeof(**ports));
diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
index 8c8e46f..0ae42b0 100644
--- a/tracecmd/trace-record.c
+++ b/tracecmd/trace-record.c
@@ -210,6 +210,8 @@  struct common_record_context {
 	char *date2ts;
 	char *max_graph_depth;
 	int data_flags;
+	int time_shift_count;
+	struct tracecmd_option *time_shift_option;
 
 	int record_all;
 	int total_disable;
@@ -649,11 +651,20 @@  static void tell_guests_to_stop(void)
 	for_all_instances(instance) {
 		if (is_guest(instance)) {
 			tracecmd_msg_send_close_msg(instance->msg_handle);
-			tracecmd_msg_handle_close(instance->msg_handle);
 		}
 	}
 }
 
+static void close_guests_handle(void)
+{
+	struct buffer_instance *instance;
+
+	for_all_instances(instance) {
+		if (is_guest(instance))
+			tracecmd_msg_handle_close(instance->msg_handle);
+	}
+}
+
 static void stop_threads(enum trace_type type)
 {
 	struct timeval tv = { 0, 0 };
@@ -3359,6 +3370,7 @@  static void connect_to_agent(struct buffer_instance *instance)
 	unsigned int *ports;
 	int i, *fds = NULL;
 	bool use_fifos = false;
+	bool do_tsync, do_tsync_reply;
 
 	if (!no_fifos) {
 		nr_fifos = open_guest_fifos(instance->name, &fds);
@@ -3370,20 +3382,24 @@  static void connect_to_agent(struct buffer_instance *instance)
 		die("Failed to connect to vsocket @%d:%d",
 		    instance->cid, instance->port);
 
+	do_tsync = tracecmd_time_sync_check();
+
 	msg_handle = tracecmd_msg_handle_alloc(sd, 0);
 	if (!msg_handle)
 		die("Failed to allocate message handle");
 
 	ret = tracecmd_msg_send_trace_req(msg_handle, instance->argc,
-					  instance->argv, use_fifos);
+					  instance->argv, use_fifos, do_tsync);
 	if (ret < 0)
 		die("Failed to send trace request");
 
 	ret = tracecmd_msg_recv_trace_resp(msg_handle, &nr_cpus, &page_size,
-					   &ports, &use_fifos);
+					   &ports, &use_fifos, &do_tsync_reply);
 	if (ret < 0)
 		die("Failed to receive trace response");
-
+	if (do_tsync != do_tsync_reply)
+		warning("Failed to negotiate timestamps synchronization with the guest %s",
+			instance->name);
 	if (use_fifos) {
 		if (nr_cpus != nr_fifos) {
 			warning("number of FIFOs (%d) for guest %s differs "
@@ -3401,10 +3417,13 @@  static void connect_to_agent(struct buffer_instance *instance)
 	}
 
 	instance->use_fifos = use_fifos;
+	instance->do_tsync = do_tsync_reply;
 	instance->cpu_count = nr_cpus;
 
 	/* the msg_handle now points to the guest fd */
 	instance->msg_handle = msg_handle;
+
+	sync_time_with_guest_v3(instance);
 }
 
 static void setup_guest(struct buffer_instance *instance)
@@ -3429,10 +3448,13 @@  static void setup_guest(struct buffer_instance *instance)
 	close(fd);
 }
 
-static void setup_agent(struct buffer_instance *instance, struct common_record_context *ctx)
+static void setup_agent(struct buffer_instance *instance,
+			struct common_record_context *ctx)
 {
 	struct tracecmd_output *network_handle;
 
+	sync_time_with_host_v3(instance);
+
 	network_handle = tracecmd_create_init_fd_msg(instance->msg_handle,
 						     listed_events);
 	add_options(network_handle, ctx);
@@ -5607,6 +5629,42 @@  static bool has_local_instances(void)
 	return false;
 }
 
+//#define TSYNC_DEBUG
+static void write_guest_time_shift(struct buffer_instance *instance)
+{
+	struct tracecmd_output *handle;
+	struct iovec vector[3];
+	char *file;
+	int fd;
+
+	if (!instance->time_sync_count)
+		return;
+
+	file = get_guest_file(output_file, instance->name);
+	fd = open(file, O_RDWR);
+	if (fd < 0)
+		die("error opening %s", file);
+	put_temp_file(file);
+	handle = tracecmd_get_output_handle_fd(fd);
+	vector[0].iov_len = sizeof(instance->time_sync_count);
+	vector[0].iov_base = &instance->time_sync_count;
+	vector[1].iov_len = sizeof(long long) * instance->time_sync_count;
+	vector[1].iov_base = instance->time_sync_ts;
+	vector[2].iov_len = sizeof(long long) * instance->time_sync_count;
+	vector[2].iov_base = instance->time_sync_offsets;
+	tracecmd_add_option_v(handle, TRACECMD_OPTION_TIME_SHIFT, vector, 3);
+	tracecmd_append_options(handle);
+	tracecmd_output_close(handle);
+#ifdef TSYNC_DEBUG
+	if (instance->time_sync_count > 1)
+		printf("\n\rDetected %lld ns ts offset drift in %lld ns for guest %s\n\r",
+			instance->time_sync_offsets[instance->time_sync_count-1] -
+			instance->time_sync_offsets[0],
+			instance->time_sync_ts[instance->time_sync_count-1]-
+			instance->time_sync_ts[0], instance->name);
+#endif
+}
+
 /*
  * This function contains common code for the following commands:
  * record, start, stream, profile.
@@ -5726,6 +5784,20 @@  static void record_trace(int argc, char **argv,
 	if (!latency)
 		wait_threads();
 
+	if (ctx->instance && is_agent(ctx->instance)) {
+		sync_time_with_host_v3(ctx->instance);
+		tracecmd_clock_context_free(ctx->instance);
+	} else {
+		for_all_instances(instance) {
+			if (is_guest(instance)) {
+				sync_time_with_guest_v3(instance);
+				write_guest_time_shift(instance);
+				tracecmd_clock_context_free(instance);
+			}
+		}
+	}
+	close_guests_handle();
+
 	if (IS_RECORD(ctx)) {
 		record_data(ctx);
 		delete_thread_data();
@@ -5864,7 +5936,7 @@  void trace_record(int argc, char **argv)
 int trace_record_agent(struct tracecmd_msg_handle *msg_handle,
 		       int cpus, int *fds,
 		       int argc, char **argv,
-		       bool use_fifos)
+		       bool use_fifos, bool do_tsync)
 {
 	struct common_record_context ctx;
 	char **argv_plus;
@@ -5891,6 +5963,7 @@  int trace_record_agent(struct tracecmd_msg_handle *msg_handle,
 
 	ctx.instance->fds = fds;
 	ctx.instance->use_fifos = use_fifos;
+	ctx.instance->do_tsync = do_tsync;
 	ctx.instance->flags |= BUFFER_FL_AGENT;
 	ctx.instance->msg_handle = msg_handle;
 	msg_handle->version = V3_PROTOCOL;
diff --git a/tracecmd/trace-timesync.c b/tracecmd/trace-timesync.c
new file mode 100644
index 0000000..c9b6241
--- /dev/null
+++ b/tracecmd/trace-timesync.c
@@ -0,0 +1,825 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019, VMware, Tzvetomir Stoyanov <tstoyanov@vmware.com>
+ *
+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <linux/vm_sockets.h>
+#include "trace-local.h"
+
+//#define TSYNC_DEBUG
+
+static int clock_sync_x86_host_init(struct tracecmd_clock_sync *clock_context);
+static int clock_sync_x86_host_free(struct tracecmd_clock_sync *clock_context);
+static int clock_sync_x86_host_find_events(struct tracecmd_clock_sync *clock,
+				    int cpu, struct tracecmd_time_sync_event *event);
+static int clock_sync_x86_guest_init(struct tracecmd_clock_sync *clock_context);
+static int clock_sync_x86_guest_free(struct tracecmd_clock_sync *clock_context);
+static int clock_sync_x86_guest_find_events(struct tracecmd_clock_sync *clock,
+					    int pid,
+					    struct tracecmd_time_sync_event *event);
+
+struct tracecmd_event_descr {
+	char			*system;
+	char			*name;
+};
+
+struct tracecmd_ftrace_param {
+	char	*file;
+	char	*set;
+	char	*reset;
+};
+
+enum clock_sync_context {
+	CLOCK_KVM_X86_VSOCK_HOST,
+	CLOCK_KVM_X86_VSOCK_GUEST,
+	CLOCK_CONTEXT_MAX,
+};
+
+struct tracecmd_clock_sync {
+	enum clock_sync_context		clock_context_id;
+	struct tracecmd_ftrace_param	*ftrace_params;
+	struct tracecmd_time_sync_event	*events;
+	int				events_count;
+	struct tep_handle		*tep;
+	struct buffer_instance		*vinst;
+
+	int				probes_count;
+	int				bad_probes;
+	int				probes_size;
+	long long			*times;
+	long long			*offsets;
+	long long			offset_av;
+	long long			offset_min;
+	long long			offset_max;
+	int				debug_fd;
+
+	unsigned int			local_cid;
+	unsigned int			local_port;
+	unsigned int			remote_cid;
+	unsigned int			remote_port;
+};
+
+struct {
+	const char *systems[3];
+	struct tracecmd_ftrace_param ftrace_params[5];
+	struct tracecmd_event_descr events[3];
+	int (*clock_sync_init)(struct tracecmd_clock_sync *clock_context);
+	int (*clock_sync_free)(struct tracecmd_clock_sync *clock_context);
+	int (*clock_sync_find_events)(struct tracecmd_clock_sync *clock_context,
+				      int pid,
+				      struct tracecmd_time_sync_event *event);
+	int (*clock_sync_load)(struct tracecmd_clock_sync *clock_context);
+	int (*clock_sync_unload)(struct tracecmd_clock_sync *clock_context);
+} static clock_sync[CLOCK_CONTEXT_MAX] = {
+	{ /* CLOCK_KVM_X86_VSOCK_HOST */
+	  .systems = {"vsock", "ftrace", NULL},
+	  .ftrace_params = {
+	  {"set_ftrace_filter", "vmx_read_l1_tsc_offset\nsvm_read_l1_tsc_offset", "\0"},
+	  {"current_tracer", "function", "nop"},
+	  {"events/vsock/virtio_transport_recv_pkt/enable", "1", "0"},
+	  {"events/vsock/virtio_transport_recv_pkt/filter", NULL, "\0"},
+	  {NULL, NULL, NULL} },
+	  .events = {
+	  {.system = "ftrace", .name = "function"},
+	  {.system = "vsock", .name = "virtio_transport_recv_pkt"},
+	  {.system = NULL, .name = NULL} },
+	 clock_sync_x86_host_init,
+	 clock_sync_x86_host_free,
+	 clock_sync_x86_host_find_events,
+	},
+
+	{ /* CLOCK_KVM_X86_VSOCK_GUEST */
+	 .systems = { "vsock", "ftrace", NULL},
+	 .ftrace_params = {
+	  {"set_ftrace_filter", "vp_notify", "\0"},
+	  {"current_tracer", "function", "nop"},
+	  {"events/vsock/virtio_transport_alloc_pkt/enable", "1", "0"},
+	  {"events/vsock/virtio_transport_alloc_pkt/filter", NULL, "\0"},
+	  {NULL, NULL, NULL},
+	  },
+	  .events = {
+	  {.system = "vsock", .name = "virtio_transport_alloc_pkt"},
+	  {.system = "ftrace", .name = "function"},
+	  {.system = NULL, .name = NULL}
+	 },
+	  clock_sync_x86_guest_init,
+	  clock_sync_x86_guest_free,
+	  clock_sync_x86_guest_find_events,
+	}
+};
+
+static int clock_sync_x86_host_init(struct tracecmd_clock_sync *clock_context)
+{
+	char vsock_filter[255];
+
+	snprintf(vsock_filter, 255,
+		"src_cid==%u && src_port==%u && dst_cid==%u && dst_port==%u && len!=0",
+		clock_context->remote_cid, clock_context->remote_port,
+		clock_context->local_cid, clock_context->local_port);
+
+	clock_context->ftrace_params[3].set = strdup(vsock_filter);
+	return 1;
+}
+
+static int clock_sync_x86_host_free(struct tracecmd_clock_sync *clock_context)
+{
+	free(clock_context->ftrace_params[3].set);
+	clock_context->ftrace_params[3].set = NULL;
+	return 1;
+}
+
+static int clock_sync_x86_guest_init(struct tracecmd_clock_sync *clock_context)
+{
+	char vsock_filter[255];
+
+	snprintf(vsock_filter, 255,
+		"src_cid==%u && src_port==%u && dst_cid==%u && dst_port==%u && len!=0",
+		clock_context->local_cid, clock_context->local_port,
+		clock_context->remote_cid, clock_context->remote_port);
+
+	clock_context->ftrace_params[3].set = strdup(vsock_filter);
+	return 1;
+}
+
+static int clock_sync_x86_guest_free(struct tracecmd_clock_sync *clock_context)
+{
+	free(clock_context->ftrace_params[3].set);
+	clock_context->ftrace_params[3].set = NULL;
+	return 1;
+}
+
+static int
+get_events_in_page(struct tep_handle *tep, void *page,
+		    int size, int cpu, struct tracecmd_time_sync_event **events,
+		    int *events_count, int *events_size)
+{
+	struct tracecmd_time_sync_event *events_array = NULL;
+	struct tep_record *last_record = NULL;
+	struct tep_event *event = NULL;
+	struct tep_record *record;
+	int id, cnt = 0;
+
+	if (size <= 0)
+		return 0;
+
+	if (*events == NULL) {
+		*events = malloc(10*sizeof(struct tracecmd_time_sync_event));
+		*events_size = 10;
+		*events_count = 0;
+	}
+
+	while (true) {
+		event = NULL;
+		record = tracecmd_read_page_record(tep, page, size,
+						   last_record);
+		if (!record)
+			break;
+		free_record(last_record);
+		id = tep_data_type(tep, record);
+		event = tep_data_event_from_type(tep, id);
+		if (event) {
+			if (*events_count >= *events_size) {
+				events_array = realloc(*events,
+					((3*(*events_size))/2)*
+					sizeof(struct tracecmd_time_sync_event));
+				if (events_array) {
+					*events = events_array;
+					(*events_size) = (((*events_size)*3)/2);
+				}
+			}
+
+			if (*events_count < *events_size) {
+				(*events)[*events_count].ts = record->ts;
+				(*events)[*events_count].cpu = cpu;
+				(*events)[*events_count].id = id;
+				(*events)[*events_count].pid = tep_data_pid(tep, record);
+				(*events_count)++;
+				cnt++;
+			}
+		}
+		last_record = record;
+	}
+	free_record(last_record);
+
+	return cnt;
+}
+
+static int sync_events_cmp(const void *a, const void *b)
+{
+	const struct tracecmd_time_sync_event *ea = (const struct tracecmd_time_sync_event *)a;
+	const struct tracecmd_time_sync_event *eb = (const struct tracecmd_time_sync_event *)b;
+
+	if (ea->ts > eb->ts)
+		return 1;
+	if (ea->ts < eb->ts)
+		return -1;
+	return 0;
+}
+
+static int find_sync_events(struct tep_handle *pevent,
+			    struct tracecmd_time_sync_event *recorded,
+			    int rsize, struct tracecmd_time_sync_event *events)
+{
+	int i = 0, j = 0;
+
+	while (i < rsize) {
+		if (!events[j].ts && events[j].id == recorded[i].id &&
+		    (events[j].pid < 0 || events[j].pid == recorded[i].pid)) {
+			events[j].cpu = recorded[i].cpu;
+			events[j].ts = recorded[i].ts;
+			j++;
+		} else if (j > 0 && events[j-1].id == recorded[i].id &&
+			  (events[j-1].pid < 0 || events[j-1].pid == recorded[i].pid)) {
+			events[j-1].cpu = recorded[i].cpu;
+			events[j-1].ts = recorded[i].ts;
+		}
+		i++;
+	}
+	return j;
+}
+
+//#define TSYNC_RBUFFER_DEBUG
+static int find_raw_events(struct tep_handle *tep,
+		    struct buffer_instance *instance,
+		    struct tracecmd_time_sync_event *events)
+{
+	struct tracecmd_time_sync_event *events_array = NULL;
+	int events_count = 0;
+	int events_size = 0;
+	unsigned int p_size;
+	int ts = 0;
+	void *page;
+	char *path;
+	char *file;
+	DIR *dir;
+	int len;
+	int fd;
+	int i;
+	int r;
+
+	p_size = getpagesize();
+#ifdef TSYNC_RBUFFER_DEBUG
+	file = get_instance_file(instance, "trace");
+	if (!file)
+		return ts;
+	{
+		char *buf = NULL;
+		FILE *fp;
+		size_t n;
+		int r;
+
+		printf("Events:\n\r");
+		fp = fopen(file, "r");
+		while ((r = getline(&buf, &n, fp)) >= 0) {
+
+			if (buf[0] != '#')
+				printf("%s", buf);
+			free(buf);
+			buf = NULL;
+		}
+		fclose(fp);
+	}
+	tracecmd_put_tracing_file(file);
+#endif /* TSYNC_RBUFFER_DEBUG */
+	path = get_instance_file(instance, "per_cpu");
+	if (!path)
+		return ts;
+
+	dir = opendir(path);
+	if (!dir)
+		goto out;
+
+	len = strlen(path);
+	file = malloc(len + strlen("trace_pipe_raw") + 32);
+	page = malloc(p_size);
+	if (!file || !page)
+		die("Failed to allocate time_stamp info");
+	for (i = 0; i < instance->cpu_count; i++) {
+		sprintf(file, "%s/cpu%d/trace_pipe_raw", path, i);
+		fd = open(file, O_RDONLY | O_NONBLOCK);
+		if (fd < 0)
+			continue;
+		do {
+			r = read(fd, page, p_size);
+			if (r > 0) {
+				get_events_in_page(tep, page, r, i,
+						   &events_array, &events_count,
+						   &events_size);
+			}
+		} while (r > 0);
+		close(fd);
+	}
+	qsort(events_array, events_count, sizeof(*events_array), sync_events_cmp);
+	r = find_sync_events(tep, events_array, events_count, events);
+#ifdef TSYNC_RBUFFER_DEBUG
+	len = 0;
+	while (events[len].id) {
+		printf("Found %d @ cpu %d: %lld pid %d\n\r",
+			events[len].id,  events[len].cpu,
+			events[len].ts, events[len].pid);
+		len++;
+	}
+#endif
+
+	free(events_array);
+	free(file);
+	free(page);
+	closedir(dir);
+
+ out:
+	tracecmd_put_tracing_file(path);
+	return r;
+}
+
+static int clock_sync_x86_host_find_events(struct tracecmd_clock_sync *clock,
+					   int pid,
+					   struct tracecmd_time_sync_event *event)
+{
+	int ret;
+
+	clock->events[0].pid = pid;
+	ret = find_raw_events(clock->tep, clock->vinst, clock->events);
+	event->ts = clock->events[0].ts;
+	event->cpu = clock->events[0].cpu;
+	return ret;
+
+}
+
+static int clock_sync_x86_guest_find_events(struct tracecmd_clock_sync *clock,
+					    int pid,
+					    struct tracecmd_time_sync_event *event)
+{
+	int ret;
+
+	ret = find_raw_events(clock->tep, clock->vinst, clock->events);
+	if (ret != clock->events_count)
+		return 0;
+	event->ts = clock->events[1].ts;
+	event->cpu = clock->events[0].cpu;
+	return 1;
+
+}
+
+static void tracecmd_clock_sync_reset(struct tracecmd_clock_sync *clock_context)
+{
+	int i = 0;
+
+	while (clock_context->events[i].id) {
+		clock_context->events[i].cpu = 0;
+		clock_context->events[i].ts = 0;
+		clock_context->events[i].pid = -1;
+		i++;
+	}
+}
+
+int tracecmd_clock_find_event(struct tracecmd_clock_sync *clock, int pid,
+			      struct tracecmd_time_sync_event *event)
+{
+	int ret = 0;
+	int id;
+
+	if (clock == NULL ||
+	    clock->clock_context_id >= CLOCK_CONTEXT_MAX)
+		return 0;
+
+	id = clock->clock_context_id;
+	if (clock_sync[id].clock_sync_find_events)
+		ret = clock_sync[id].clock_sync_find_events(clock, pid, event);
+
+	tracecmd_clock_sync_reset(clock);
+	return ret;
+}
+
+static void clock_context_copy(struct tracecmd_clock_sync *clock_context,
+			       struct tracecmd_ftrace_param *params,
+			       struct tracecmd_event_descr *events)
+{
+	int i;
+
+	i = 0;
+	while (params[i].file)
+		i++;
+	clock_context->ftrace_params = calloc(i+1, sizeof(struct tracecmd_ftrace_param));
+	i = 0;
+	while (params[i].file) {
+		clock_context->ftrace_params[i].file = strdup(params[i].file);
+		if (params[i].set)
+			clock_context->ftrace_params[i].set = strdup(params[i].set);
+		if (params[i].reset)
+			clock_context->ftrace_params[i].reset = strdup(params[i].reset);
+		i++;
+	}
+
+	i = 0;
+	while (events[i].name)
+		i++;
+	clock_context->events = calloc(i+1, sizeof(struct tracecmd_time_sync_event));
+	clock_context->events_count = i;
+}
+
+void trace_instance_reset(struct buffer_instance *vinst)
+{
+	write_instance_file(vinst, "trace", "\0", NULL);
+}
+
+static struct buffer_instance *
+clock_synch_create_instance(const char *clock, unsigned int cid)
+{
+	struct buffer_instance *vinst;
+	char inst_name[256];
+
+	snprintf(inst_name, 256, "clock_synch-%d", cid);
+
+	vinst = create_instance(strdup(inst_name));
+	tracecmd_init_instance(vinst);
+	vinst->cpu_count = tracecmd_local_cpu_count();
+	tracecmd_make_instance(vinst);
+	trace_instance_reset(vinst);
+	if (clock)
+		vinst->clock = strdup(clock);
+	tracecmd_set_clock(vinst);
+	return vinst;
+}
+
+static struct tep_handle *clock_synch_get_tep(struct buffer_instance *instance,
+					      const char * const *systems)
+{
+	struct tep_handle *tep = NULL;
+	char *path;
+
+	path = get_instance_dir(instance);
+	tep = tracecmd_local_events_system(path, systems);
+	tracecmd_put_tracing_file(path);
+
+	tep_set_file_bigendian(tep, tracecmd_host_bigendian());
+	tep_set_host_bigendian(tep, tracecmd_host_bigendian());
+
+	return tep;
+}
+
+static void get_vsocket_params(int fd, unsigned int *lcid, unsigned int *lport,
+			       unsigned int *rcid, unsigned int *rport)
+{
+	struct sockaddr_vm addr;
+	socklen_t addr_len = sizeof(addr);
+
+	if (lcid || lport) {
+		memset(&addr, 0, sizeof(addr));
+		if (getsockname(fd, (struct sockaddr *)&addr, &addr_len))
+			return;
+		if (addr.svm_family != AF_VSOCK)
+			return;
+		if (lport)
+			*lport = addr.svm_port;
+		if (lcid)
+			*lcid = addr.svm_cid;
+	}
+
+	if (rcid || rport) {
+		memset(&addr, 0, sizeof(addr));
+		addr_len = sizeof(addr);
+		if (getpeername(fd, (struct sockaddr *)&addr, &addr_len))
+			return;
+		if (addr.svm_family != AF_VSOCK)
+			return;
+
+		if (rport)
+			*rport = addr.svm_port;
+		if (rcid)
+			*rcid = addr.svm_cid;
+	}
+}
+
+struct tracecmd_clock_sync*
+tracecmd_clock_context_new(struct tracecmd_msg_handle *msg_handle,
+			    const char *clock_str,
+			    enum clock_sync_context id)
+{
+	struct tracecmd_clock_sync *clock_context;
+	struct tep_event *event;
+	unsigned int i = 0;
+
+	switch (id) {
+#ifdef VSOCK
+	case CLOCK_KVM_X86_VSOCK_HOST:
+	case CLOCK_KVM_X86_VSOCK_GUEST:
+		break;
+#endif
+	default: /* not supported clock sync context */
+		return NULL;
+	}
+
+	if (id >= CLOCK_CONTEXT_MAX || NULL == msg_handle)
+		return NULL;
+	clock_context = calloc(1, sizeof(struct tracecmd_clock_sync));
+	if (!clock_context)
+		return NULL;
+	clock_context->clock_context_id = id;
+	clock_context_copy(clock_context,
+			   clock_sync[id].ftrace_params, clock_sync[id].events);
+
+	get_vsocket_params(msg_handle->fd,
+			   &(clock_context->local_cid),
+			   &(clock_context->local_port),
+			   &(clock_context->remote_cid),
+			   &(clock_context->remote_port));
+
+	if (clock_sync[id].clock_sync_init)
+		clock_sync[id].clock_sync_init(clock_context);
+
+	clock_context->vinst = clock_synch_create_instance(clock_str, clock_context->remote_cid);
+	clock_context->tep = clock_synch_get_tep(clock_context->vinst,
+						 clock_sync[id].systems);
+	i = 0;
+	while (clock_sync[id].events[i].name) {
+		event = tep_find_event_by_name(clock_context->tep,
+					       clock_sync[id].events[i].system,
+					       clock_sync[id].events[i].name);
+		if (!event)
+			break;
+		clock_context->events[i].id = event->id;
+		i++;
+	}
+#ifdef TSYNC_DEBUG
+	clock_context->debug_fd = -1;
+#endif
+
+	return clock_context;
+
+}
+
+void tracecmd_clock_context_free(struct buffer_instance *instance)
+{
+	int i;
+
+	if (instance->clock_sync == NULL ||
+	    instance->clock_sync->clock_context_id >= CLOCK_CONTEXT_MAX)
+		return;
+	if (clock_sync[instance->clock_sync->clock_context_id].clock_sync_free)
+		clock_sync[instance->clock_sync->clock_context_id].clock_sync_free(instance->clock_sync);
+
+	i = 0;
+	while (instance->clock_sync->ftrace_params[i].file) {
+		free(instance->clock_sync->ftrace_params[i].file);
+		free(instance->clock_sync->ftrace_params[i].set);
+		free(instance->clock_sync->ftrace_params[i].reset);
+		i++;
+	}
+	free(instance->clock_sync->ftrace_params);
+	free(instance->clock_sync->events);
+	tracecmd_remove_instance(instance->clock_sync->vinst);
+	/* todo: clean up the instance */
+	tep_free(instance->clock_sync->tep);
+
+	free(instance->clock_sync->offsets);
+	free(instance->clock_sync->times);
+#ifdef TSYNC_DEBUG
+	if (instance->clock_sync->debug_fd >= 0) {
+		close(instance->clock_sync->debug_fd);
+		instance->clock_sync->debug_fd = -1;
+	}
+#endif
+	free(instance->clock_sync);
+	instance->clock_sync = NULL;
+}
+
+bool tracecmd_time_sync_check(void)
+{
+#ifdef VSOCK
+	return true;
+#endif
+	return false;
+}
+
+void sync_time_with_host_v3(struct buffer_instance *instance)
+{
+	long long timestamp = 0;
+	long long offset = 0;
+
+	if (!instance->do_tsync)
+		return;
+
+	if (instance->clock_sync == NULL)
+		instance->clock_sync = tracecmd_clock_context_new(instance->msg_handle,
+					instance->clock, CLOCK_KVM_X86_VSOCK_GUEST);
+
+	tracecmd_msg_snd_time_sync(instance->msg_handle, instance->clock_sync,
+				   &offset, &timestamp);
+	if (!offset && !timestamp)
+		warning("Failed to synchronize timestamps with the host");
+}
+
+void sync_time_with_guest_v3(struct buffer_instance *instance)
+{
+	long long timestamp = 0;
+	long long offset = 0;
+	long long *sync_array_ts;
+	long long *sync_array_offs;
+
+	if (!instance->do_tsync)
+		return;
+
+	if (instance->clock_sync == NULL)
+		instance->clock_sync = tracecmd_clock_context_new(instance->msg_handle,
+						top_instance.clock, CLOCK_KVM_X86_VSOCK_HOST);
+
+	tracecmd_msg_rcv_time_sync(instance->msg_handle,
+				   instance->clock_sync, &offset, &timestamp);
+
+	if (!offset && !timestamp) {
+		warning("Failed to synchronize timestamps with guest %s",
+			instance->name);
+		return;
+	}
+
+	sync_array_ts = realloc(instance->time_sync_ts,
+			    (instance->time_sync_count+1)*sizeof(long long));
+	sync_array_offs = realloc(instance->time_sync_offsets,
+			    (instance->time_sync_count+1)*sizeof(long long));
+
+	if (sync_array_ts && sync_array_offs) {
+		sync_array_ts[instance->time_sync_count] = timestamp;
+		sync_array_offs[instance->time_sync_count] = offset;
+		instance->time_sync_count++;
+		instance->time_sync_ts = sync_array_ts;
+		instance->time_sync_offsets = sync_array_offs;
+
+	} else {
+		free(sync_array_ts);
+		free(sync_array_offs);
+	}
+
+}
+
+static void set_clock_synch_events(struct buffer_instance *instance,
+				   struct tracecmd_ftrace_param *params,
+				   bool enable)
+{
+	int i = 0;
+
+	if (!enable)
+		write_tracing_on(instance, 0);
+
+	while (params[i].file) {
+		if (enable && params[i].set) {
+			write_instance_file(instance, params[i].file,
+					    params[i].set, NULL);
+		}
+		if (!enable && params[i].reset)
+			write_instance_file(instance, params[i].file,
+					    params[i].reset, NULL);
+		i++;
+	}
+
+	if (enable)
+		write_tracing_on(instance, 1);
+}
+
+int tracecmd_clock_get_peer(struct tracecmd_clock_sync *clock_context,
+			    unsigned int *remote_cid, unsigned int *remote_port)
+{
+	if (!clock_context)
+		return 0;
+	if (remote_cid)
+		*remote_cid = clock_context->remote_cid;
+	if (remote_port)
+		*remote_cid = clock_context->remote_port;
+	return 1;
+}
+
+void tracecmd_clock_synch_enable(struct tracecmd_clock_sync *clock_context)
+{
+	set_clock_synch_events(clock_context->vinst,
+			       clock_context->ftrace_params, true);
+}
+
+void tracecmd_clock_synch_disable(struct tracecmd_clock_sync *clock_context)
+{
+	set_clock_synch_events(clock_context->vinst,
+			       clock_context->ftrace_params, false);
+}
+
+int tracecmd_clock_synch_calc(struct tracecmd_clock_sync *clock_context,
+			       long long *offset_ret, long long *time_ret)
+{
+	int i, j = 0;
+	long long av, tresch, offset = 0, time = 0;
+
+	if (!clock_context || !clock_context->probes_count)
+		return 0;
+	av = clock_context->offset_av / clock_context->probes_count;
+	tresch = (long long)((clock_context->offset_max - clock_context->offset_min)/10);
+
+	for (i = 0; i < clock_context->probes_count; i++) {
+		/* filter the offsets with deviation up to 10% */
+		if (llabs(clock_context->offsets[i] - av) < tresch) {
+			offset += clock_context->offsets[i];
+			j++;
+		}
+	}
+	if (j)
+		offset /= (long long)j;
+
+	tresch = 0;
+	for (i = 0; i < clock_context->probes_count; i++) {
+		if ((!tresch || tresch > llabs(offset-clock_context->offsets[i]))) {
+			tresch = llabs(offset-clock_context->offsets[i]);
+			time = clock_context->times[i];
+		}
+	}
+	if (offset_ret)
+		*offset_ret = offset;
+	if (time_ret)
+		*time_ret = time;
+#ifdef TSYNC_DEBUG
+	printf("\n calculated offset: %lld, %d/%d probes\n\r",
+		*offset_ret, clock_context->probes_count,
+		clock_context->probes_count + clock_context->bad_probes);
+#endif
+	return 1;
+}
+
+void tracecmd_clock_synch_calc_reset(struct tracecmd_clock_sync *clock_context)
+{
+	if (!clock_context)
+		return;
+
+	clock_context->probes_count = 0;
+	clock_context->bad_probes = 0;
+	clock_context->offset_av = 0;
+	clock_context->offset_min = 0;
+	clock_context->offset_max = 0;
+#ifdef TSYNC_DEBUG
+	if (clock_context->debug_fd >= 0) {
+		close(clock_context->debug_fd);
+		clock_context->debug_fd = -1;
+	}
+#endif
+
+}
+
+void tracecmd_clock_synch_calc_probe(struct tracecmd_clock_sync *clock_context,
+				     long long ts_local, long long ts_remote)
+{
+	int count;
+#ifdef TSYNC_DEBUG
+	char buff[256];
+#endif
+
+	if (!clock_context || !ts_local || !ts_remote)
+		return;
+	if (!ts_local || !ts_remote) {
+		clock_context->bad_probes++;
+		return;
+	}
+
+	if (!clock_context->offsets && !clock_context->times) {
+		clock_context->offsets = calloc(10, sizeof(long long));
+		clock_context->times = calloc(10, sizeof(long long));
+		clock_context->probes_size = 10;
+	}
+
+	if (clock_context->probes_size == clock_context->probes_count) {
+		clock_context->probes_size = (3*clock_context->probes_size)/2;
+		clock_context->offsets = realloc(clock_context->offsets,
+						 clock_context->probes_size *
+						 sizeof(long long));
+		clock_context->times = realloc(clock_context->times,
+					       clock_context->probes_size*
+					       sizeof(long long));
+	}
+
+	if (!clock_context->offsets || !clock_context->times) {
+		clock_context->probes_size = 0;
+		tracecmd_clock_synch_calc_reset(clock_context);
+		return;
+	}
+#ifdef TSYNC_DEBUG
+	if (clock_context->debug_fd < 0) {
+		sprintf(buff, "s-cid%d.txt", clock_context->remote_cid);
+		clock_context->debug_fd = open(buff, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+	}
+#endif
+	count = clock_context->probes_count;
+	clock_context->probes_count++;
+	clock_context->offsets[count] = ts_remote - ts_local;
+	clock_context->times[count] = ts_local;
+	clock_context->offset_av += clock_context->offsets[count];
+
+	if (!clock_context->offset_min ||
+	    clock_context->offset_min > llabs(clock_context->offsets[count]))
+		clock_context->offset_min = llabs(clock_context->offsets[count]);
+	if (!clock_context->offset_max ||
+	    clock_context->offset_max < llabs(clock_context->offsets[count]))
+		clock_context->offset_max = llabs(clock_context->offsets[count]);
+#ifdef TSYNC_DEBUG
+	sprintf(buff, "%lld %lld\n", ts_local, ts_remote);
+	write(clock_context->debug_fd, buff, strlen(buff));
+#endif
+
+}