diff mbox series

[v3,2/2] trace-cmd: Trace timesync to find pids that map vCPUs

Message ID 20210513204315.1206204-3-rostedt@goodmis.org (mailing list archive)
State Superseded
Headers show
Series trace-cmd: Use vsock tracing to find cids and threads | expand

Commit Message

Steven Rostedt May 13, 2021, 8:43 p.m. UTC
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

If qemu is not found, then there's no mapping between the vCPUs of the
guest and the threads that run them. As the time sync has the agent run on
all the guest's CPUs, trace it, looking for all kvm_entry, which state
which vCPU the host thread is entering on the guest, and then use that to
create the mapping for the data files.

Only parse for qemu if no CID is given on the command line.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
Changes since v1:
 - Skipped v2 (added to v3 of this patch series)
 - Used some of the new APIs of libtracefs 1.2
 - Call read_qemu_guests is CID is not supplied on the command line.

 tracecmd/include/trace-local.h |   2 +
 tracecmd/trace-record.c        | 202 ++++++++++++++++++++++++++++++++-
 2 files changed, 203 insertions(+), 1 deletion(-)

Comments

Tzvetomir Stoyanov (VMware) May 14, 2021, 4:10 a.m. UTC | #1
On Fri, May 14, 2021 at 3:33 AM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
>
> If qemu is not found, then there's no mapping between the vCPUs of the
> guest and the threads that run them. As the time sync has the agent run on
> all the guest's CPUs, trace it, looking for all kvm_entry, which state
> which vCPU the host thread is entering on the guest, and then use that to
> create the mapping for the data files.
>
> Only parse for qemu if no CID is given on the command line.
>
> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> ---
> Changes since v1:
>  - Skipped v2 (added to v3 of this patch series)
>  - Used some of the new APIs of libtracefs 1.2
>  - Call read_qemu_guests is CID is not supplied on the command line.
>
>  tracecmd/include/trace-local.h |   2 +
>  tracecmd/trace-record.c        | 202 ++++++++++++++++++++++++++++++++-
>  2 files changed, 203 insertions(+), 1 deletion(-)
>
> diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
> index b3997d00..4c5669c9 100644
> --- a/tracecmd/include/trace-local.h
> +++ b/tracecmd/include/trace-local.h
> @@ -298,11 +298,13 @@ void update_first_instance(struct buffer_instance *instance, int topt);
>  void show_instance_file(struct buffer_instance *instance, const char *name);
>
>  struct trace_guest {
> +       struct tracefs_instance *instance;
>         char *name;
>         int cid;
>         int pid;
>         int cpu_max;
>         int *cpu_pid;
> +       int *task_pids;
>  };
>  struct trace_guest *trace_get_guest(unsigned int cid, const char *name);
>  bool trace_have_guests_pid(void);
> diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
> index 5dd8be4a..5219d60b 100644
> --- a/tracecmd/trace-record.c
> +++ b/tracecmd/trace-record.c
> @@ -3230,6 +3230,38 @@ static int do_accept(int sd)
>         return -1;
>  }
>
> +/* Find all the tasks associated with the guest pid */
> +static void find_tasks(struct trace_guest *guest)
> +{
> +       struct dirent *dent;
> +       char *path;
> +       DIR *dir;
> +       int ret;
> +       int tasks = 0;
> +
> +       ret = asprintf(&path, "/proc/%d/task", guest->pid);
> +       if (ret < 0)
> +               return;
> +
> +       dir = opendir(path);
> +       free(path);
> +       if (!dir)
> +               return;
> +
> +       while ((dent = readdir(dir))) {
> +               int *pids;
> +               if (!(dent->d_type == DT_DIR && is_digits(dent->d_name)))
> +                       continue;
> +               pids = realloc(guest->task_pids, sizeof(int) * (tasks + 2));
> +               if (!pids)
> +                       break;
> +               pids[tasks++] = strtol(dent->d_name, NULL, 0);
> +               pids[tasks] = -1;
> +               guest->task_pids = pids;
> +       }
> +       closedir(dir);
> +}
> +
>  static char *parse_guest_name(char *gname, int *cid, int *port)
>  {
>         struct trace_guest *guest;
> @@ -3250,10 +3282,18 @@ static char *parse_guest_name(char *gname, int *cid, int *port)
>         } else if (is_digits(gname))
>                 *cid = atoi(gname);
>
> -       read_qemu_guests();
> +       if (*cid < 0)
> +               read_qemu_guests();
> +
> +       if (*cid < 0)
> +               return NULL;

This check is not needed. If cid is not part of the string, let
read_qemu_guests() to try discover the VMs, instead of returning NULL.

[ ... ]
Steven Rostedt May 14, 2021, 1:12 p.m. UTC | #2
On Fri, 14 May 2021 07:10:51 +0300
Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote:

> > @@ -3250,10 +3282,18 @@ static char *parse_guest_name(char *gname, int *cid, int *port)
> >         } else if (is_digits(gname))
> >                 *cid = atoi(gname);
> >
> > -       read_qemu_guests();
> > +       if (*cid < 0)
> > +               read_qemu_guests();
> > +
> > +       if (*cid < 0)
> > +               return NULL;  
> 
> This check is not needed. If cid is not part of the string, let
> read_qemu_guests() to try discover the VMs, instead of returning NULL.

I'm confused. That's basically what the above does.

	*cid = -1;

	if (<cid is in name>) {
		*cid = look_for_cid_via_tracing();
	}

	if (*cid < -1)
		read_qemu_guests();

That is, if the cid isn't part of the name, then it will call
read_qemu_guests().

The second check is in case the read_qemu_guests() fails to find the cid
either.

What am I missing?

-- Steve
Tzvetomir Stoyanov (VMware) May 14, 2021, 1:38 p.m. UTC | #3
On Fri, May 14, 2021, 16:12 Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Fri, 14 May 2021 07:10:51 +0300
> Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote:
>
> > > @@ -3250,10 +3282,18 @@ static char *parse_guest_name(char *gname, int *cid, int *port)
> > >         } else if (is_digits(gname))
> > >                 *cid = atoi(gname);
> > >
> > > -       read_qemu_guests();
> > > +       if (*cid < 0)
> > > +               read_qemu_guests();
> > > +
> > > +       if (*cid < 0)
> > > +               return NULL;
> >
> > This check is not needed. If cid is not part of the string, let
> > read_qemu_guests() to try discover the VMs, instead of returning NULL.
>
> I'm confused. That's basically what the above does.
>
>         *cid = -1;
>
>         if (<cid is in name>) {
>                 *cid = look_for_cid_via_tracing();
>         }
>
>         if (*cid < -1)
>                 read_qemu_guests();
>
> That is, if the cid isn't part of the name, then it will call
> read_qemu_guests().
>
> The second check is in case the read_qemu_guests() fails to find the cid
> either.
>
> What am I missing?

cid is not updated between both checks, it always fails - even though
read_qemu_guests() may succeed to find the VM with that name.

>
> -- Steve
Steven Rostedt May 14, 2021, 2 p.m. UTC | #4
On Fri, 14 May 2021 16:38:29 +0300
Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote:

> > What am I missing?  
> 
> cid is not updated between both checks, it always fails - even though
> read_qemu_guests() may succeed to find the VM with that name.

Ah, that's what I was missing. :-p

For some reason I thought it set the *cid, but I see that's obviously not
the case. You are correct, the second check is not needed because the code
to set *cid is right after it.

Thanks for pointing that out.

-- Steve
diff mbox series

Patch

diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
index b3997d00..4c5669c9 100644
--- a/tracecmd/include/trace-local.h
+++ b/tracecmd/include/trace-local.h
@@ -298,11 +298,13 @@  void update_first_instance(struct buffer_instance *instance, int topt);
 void show_instance_file(struct buffer_instance *instance, const char *name);
 
 struct trace_guest {
+	struct tracefs_instance *instance;
 	char *name;
 	int cid;
 	int pid;
 	int cpu_max;
 	int *cpu_pid;
+	int *task_pids;
 };
 struct trace_guest *trace_get_guest(unsigned int cid, const char *name);
 bool trace_have_guests_pid(void);
diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
index 5dd8be4a..5219d60b 100644
--- a/tracecmd/trace-record.c
+++ b/tracecmd/trace-record.c
@@ -3230,6 +3230,38 @@  static int do_accept(int sd)
 	return -1;
 }
 
+/* Find all the tasks associated with the guest pid */
+static void find_tasks(struct trace_guest *guest)
+{
+	struct dirent *dent;
+	char *path;
+	DIR *dir;
+	int ret;
+	int tasks = 0;
+
+	ret = asprintf(&path, "/proc/%d/task", guest->pid);
+	if (ret < 0)
+		return;
+
+	dir = opendir(path);
+	free(path);
+	if (!dir)
+		return;
+
+	while ((dent = readdir(dir))) {
+		int *pids;
+		if (!(dent->d_type == DT_DIR && is_digits(dent->d_name)))
+			continue;
+		pids = realloc(guest->task_pids, sizeof(int) * (tasks + 2));
+		if (!pids)
+			break;
+		pids[tasks++] = strtol(dent->d_name, NULL, 0);
+		pids[tasks] = -1;
+		guest->task_pids = pids;
+	}
+	closedir(dir);
+}
+
 static char *parse_guest_name(char *gname, int *cid, int *port)
 {
 	struct trace_guest *guest;
@@ -3250,10 +3282,18 @@  static char *parse_guest_name(char *gname, int *cid, int *port)
 	} else if (is_digits(gname))
 		*cid = atoi(gname);
 
-	read_qemu_guests();
+	if (*cid < 0)
+		read_qemu_guests();
+
+	if (*cid < 0)
+		return NULL;
+
 	guest = trace_get_guest(*cid, gname);
 	if (guest) {
 		*cid = guest->cid;
+		/* Mapping not found, search for them */
+		if (!guest->cpu_pid)
+			find_tasks(guest);
 		return guest->name;
 	}
 
@@ -3712,6 +3752,162 @@  static int open_guest_fifos(const char *guest, int **fds)
 	return i;
 }
 
+struct trace_mapping {
+	struct tep_event		*kvm_entry;
+	struct tep_format_field		*vcpu_id;
+	struct tep_format_field		*common_pid;
+	int				*pids;
+	int				*map;
+	int				max_cpus;
+};
+
+static void start_mapping_vcpus(struct trace_guest *guest)
+{
+	char *pids = NULL;
+	char *t;
+	int len = 0;
+	int s;
+	int i;
+
+	if (!guest->task_pids)
+		return;
+
+	guest->instance = tracefs_instance_create("map_guest_pids");
+	if (!guest->instance)
+		return;
+
+	for (i = 0; guest->task_pids[i] >= 0; i++) {
+		s = snprintf(NULL, 0, "%d ", guest->task_pids[i]);
+		t = realloc(pids, len + s + 1);
+		if (!t) {
+			free(pids);
+			pids = NULL;
+			break;
+		}
+		pids = t;
+		sprintf(pids + len, "%d ", guest->task_pids[i]);
+		len += s;
+	}
+	if (pids) {
+		tracefs_instance_file_write(guest->instance, "set_event_pid", pids);
+		free(pids);
+	}
+	tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "1");
+}
+
+static int map_vcpus(struct tep_event *event, struct tep_record *record,
+		     int cpu, void *context)
+{
+	struct trace_mapping *tmap = context;
+	unsigned long long val;
+	int type;
+	int pid;
+	int ret;
+	int i;
+
+	/* Do we have junk in the buffer? */
+	type = tep_data_type(event->tep, record);
+	if (type != tmap->kvm_entry->id)
+		return 0;
+
+	ret = tep_read_number_field(tmap->common_pid, record->data, &val);
+	if (ret < 0)
+		return 0;
+	pid = (int)val;
+
+	for (i = 0; tmap->pids[i] >= 0; i++) {
+		if (pid == tmap->pids[i])
+			break;
+	}
+	/* Is this thread one we care about ? */
+	if (tmap->pids[i] < 0)
+		return 0;
+
+	ret = tep_read_number_field(tmap->vcpu_id, record->data, &val);
+	if (ret < 0)
+		return 0;
+
+	cpu = (int)val;
+
+	/* Sanity check, warn? */
+	if (cpu >= tmap->max_cpus)
+		return 0;
+
+	/* Already have this one? Should we check if it is the same? */
+	if (tmap->map[cpu] >= 0)
+		return 0;
+
+	tmap->map[cpu] = pid;
+
+	/* Did we get them all */
+	for (i = 0; i < tmap->max_cpus; i++) {
+		if (tmap->map[i] < 0)
+			break;
+	}
+
+	return i == tmap->max_cpus;
+}
+
+static void stop_mapping_vcpus(struct buffer_instance *instance,
+			       struct trace_guest *guest)
+{
+	struct trace_mapping tmap = { };
+	struct tep_handle *tep;
+	const char *systems[] = { "kvm", NULL };
+	int i;
+
+	if (!guest->instance)
+		return;
+
+	tmap.pids = guest->task_pids;
+	tmap.max_cpus = instance->cpu_count;
+
+	tmap.map = malloc(sizeof(*tmap.map) * tmap.max_cpus);
+	if (!tmap.map)
+		return;
+
+	for (i = 0; i < tmap.max_cpus; i++)
+		tmap.map[i] = -1;
+
+	tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "0");
+
+	tep = tracefs_local_events_system(NULL, systems);
+	if (!tep)
+		goto out;
+
+	tmap.kvm_entry = tep_find_event_by_name(tep, "kvm", "kvm_entry");
+	if (!tmap.kvm_entry)
+		goto out_free;
+
+	tmap.vcpu_id = tep_find_field(tmap.kvm_entry, "vcpu_id");
+	if (!tmap.vcpu_id)
+		goto out_free;
+
+	tmap.common_pid = tep_find_any_field(tmap.kvm_entry, "common_pid");
+	if (!tmap.common_pid)
+		goto out_free;
+
+	tracefs_iterate_raw_events(tep, guest->instance, NULL, 0, map_vcpus, &tmap);
+
+	for (i = 0; i < tmap.max_cpus; i++) {
+		if (tmap.map[i] < 0)
+			break;
+	}
+	/* We found all the mapped CPUs */
+	if (i == tmap.max_cpus) {
+		guest->cpu_pid = tmap.map;
+		guest->cpu_max = tmap.max_cpus;
+		tmap.map = NULL;
+	}
+
+ out_free:
+	tep_free(tep);
+ out:
+	free(tmap.map);
+	tracefs_instance_destroy(guest->instance);
+	tracefs_instance_free(guest->instance);
+}
+
 static int host_tsync(struct common_record_context *ctx,
 		      struct buffer_instance *instance,
 		      unsigned int tsync_port, char *proto)
@@ -3724,11 +3920,15 @@  static int host_tsync(struct common_record_context *ctx,
 	if (guest == NULL)
 		return -1;
 
+	start_mapping_vcpus(guest);
+
 	instance->tsync = tracecmd_tsync_with_guest(top_instance.trace_id,
 						    instance->tsync_loop_interval,
 						    instance->cid, tsync_port,
 						    guest->pid, instance->cpu_count,
 						    proto, ctx->clock);
+	stop_mapping_vcpus(instance, guest);
+
 	if (!instance->tsync)
 		return -1;