diff mbox series

[RFC,3/4] trace-cruncher: High level wrappers for ftrace uprobes

Message ID 20220331095533.75289-4-tz.stoyanov@gmail.com (mailing list archive)
State New, archived
Headers show
Series trace-cruncher: ftrace uprobes support | expand

Commit Message

Tzvetomir Stoyanov (VMware) March 31, 2022, 9:55 a.m. UTC
Using uprobes requires finding the offset of a user function within the
binary file, where this functions is compiled. This is not a trivial
task, especially in the cases when a bunch of uprobes to user functions
should be added.
A high level trace-cruncher API allows adding multiple user functions as
uprobes or uretprobes. It supports wildcards for function names and
adding uprobes for library functions, used by the applications.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 setup.py             |   4 +-
 src/ftracepy-utils.h |  17 ++
 src/ftracepy.c       |  35 +++
 src/utrace-utils.c   | 509 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 563 insertions(+), 2 deletions(-)
 create mode 100644 src/utrace-utils.c

Comments

Yordan Karadzhov April 1, 2022, 9:41 a.m. UTC | #1
On 31.03.22 г. 12:55 ч., Tzvetomir Stoyanov (VMware) wrote:
> Using uprobes requires finding the offset of a user function within the
> binary file, where this functions is compiled. This is not a trivial
> task, especially in the cases when a bunch of uprobes to user functions
> should be added.
> A high level trace-cruncher API allows adding multiple user functions as
> uprobes or uretprobes. It supports wildcards for function names and
> adding uprobes for library functions, used by the applications.
> 
> Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> ---
>   setup.py             |   4 +-
>   src/ftracepy-utils.h |  17 ++
>   src/ftracepy.c       |  35 +++
>   src/utrace-utils.c   | 509 +++++++++++++++++++++++++++++++++++++++++++
>   4 files changed, 563 insertions(+), 2 deletions(-)
>   create mode 100644 src/utrace-utils.c
> 
> diff --git a/setup.py b/setup.py
> index 21c627f..acfa676 100644
> --- a/setup.py
> +++ b/setup.py
> @@ -71,8 +71,8 @@ def extension(name, sources, libraries):
>   
>   def main():
>       module_ft = extension(name='tracecruncher.ftracepy',
> -                          sources=['src/ftracepy.c', 'src/ftracepy-utils.c'],
> -                          libraries=['traceevent', 'tracefs'])
> +                          sources=['src/ftracepy.c', 'src/ftracepy-utils.c', 'src/utrace-utils.c', 'src/trace-obj-debug.c'],
> +                          libraries=['traceevent', 'tracefs', 'bfd'])
>   
>       cythonize('src/npdatawrapper.pyx', language_level = '3')
>       module_data = extension(name='tracecruncher.npdatawrapper',
> diff --git a/src/ftracepy-utils.h b/src/ftracepy-utils.h
> index e6fab69..60d2743 100644
> --- a/src/ftracepy-utils.h
> +++ b/src/ftracepy-utils.h
> @@ -34,6 +34,21 @@ C_OBJECT_WRAPPER_DECLARE(tracefs_synth, PySynthEvent)
>   
>   PyObject *PyTepRecord_time(PyTepRecord* self);
>   
> +struct py_utrace_context;
> +void py_utrace_free(struct py_utrace_context *utrace);
> +int py_utrace_destroy(struct py_utrace_context *utrace);
> +C_OBJECT_WRAPPER_DECLARE(py_utrace_context, PyUserTrace);
> +
> +PyObject *PyUserTrace_add_function(PyUserTrace *self, PyObject *args,
> +				   PyObject *kwargs);
> +
> +PyObject *PyUserTrace_add_ret_function(PyUserTrace *self, PyObject *args,
> +				       PyObject *kwargs);
> +
> +PyObject *PyUserTrace_start(PyUserTrace *self, PyObject *args, PyObject *kwargs);
> +
> +PyObject *PyUserTrace_stop(PyUserTrace *self, PyObject *args, PyObject *kwargs);
> +
>   PyObject *PyTepRecord_cpu(PyTepRecord* self);
>   
>   PyObject *PyTepEvent_name(PyTepEvent* self);
> @@ -270,6 +285,8 @@ PyObject *PyFtrace_synth(PyObject *self, PyObject *args,
>   PyObject *PyFtrace_set_ftrace_loglevel(PyObject *self, PyObject *args,
>   						       PyObject *kwargs);
>   
> +PyObject *PyFtrace_utrace(PyObject *self, PyObject *args, PyObject *kwargs);
> +
>   PyObject *PyFtrace_trace_process(PyObject *self, PyObject *args,
>   						 PyObject *kwargs);
>   
> diff --git a/src/ftracepy.c b/src/ftracepy.c
> index 681d641..107b78f 100644
> --- a/src/ftracepy.c
> +++ b/src/ftracepy.c
> @@ -315,6 +315,32 @@ C_OBJECT_WRAPPER(tracefs_synth, PySynthEvent,
>   		 tracefs_synth_destroy,
>   		 tracefs_synth_free)
>   
> +static PyMethodDef PyUserTrace_methods[] = {
> +	{"add_function",
> +	 (PyCFunction) PyUserTrace_add_function,
> +	 METH_VARARGS | METH_KEYWORDS,
> +	 "Add tracepoint on user function."
> +	},
> +	{"add_ret_function",
> +	 (PyCFunction) PyUserTrace_add_ret_function,
> +	 METH_VARARGS | METH_KEYWORDS,
> +	 "Add tracepoint on user function return."
> +	},
> +	{"start",
> +	 (PyCFunction) PyUserTrace_start,
> +	 METH_VARARGS | METH_KEYWORDS,
> +	 "Add tracepoint on user function return."
> +	},
> +	{"stop",
> +	 (PyCFunction) PyUserTrace_stop,
> +	 METH_VARARGS | METH_KEYWORDS,
> +	 "Add tracepoint on user function return."
> +	},
> +	{NULL, NULL, 0, NULL}
> +};

I would prefer to use enable / disable instead of start / stop for the names of those APIs.

> +C_OBJECT_WRAPPER(py_utrace_context, PyUserTrace,
> +		 py_utrace_destroy, py_utrace_free)
> +
>   static PyMethodDef ftracepy_methods[] = {
>   	{"dir",
>   	 (PyCFunction) PyFtrace_dir,
> @@ -501,6 +527,11 @@ static PyMethodDef ftracepy_methods[] = {
>   	 METH_VARARGS | METH_KEYWORDS,
>   	 "Define a synthetic event."
>   	},
> +	{"user_trace",
> +	 (PyCFunction) PyFtrace_utrace,
> +	 METH_VARARGS | METH_KEYWORDS,
> +	 "Create a context for tracing a user process using uprobes"
> +	},
>   	{"set_ftrace_loglevel",
>   	 (PyCFunction) PyFtrace_set_ftrace_loglevel,
>   	 METH_VARARGS | METH_KEYWORDS,
> @@ -575,6 +606,9 @@ PyMODINIT_FUNC PyInit_ftracepy(void)
>   	if (!PySynthEventTypeInit())
>   		return NULL;
>   
> +	if (!PyUserTraceTypeInit())
> +		return NULL;
> +
>   	TFS_ERROR = PyErr_NewException("tracecruncher.ftracepy.tfs_error",
>   				       NULL, NULL);
>   
> @@ -593,6 +627,7 @@ PyMODINIT_FUNC PyInit_ftracepy(void)
>   	PyModule_AddObject(module, "tracefs_dynevent", (PyObject *) &PyDyneventType);
>   	PyModule_AddObject(module, "tracefs_hist", (PyObject *) &PyTraceHistType);
>   	PyModule_AddObject(module, "tracefs_synth", (PyObject *) &PySynthEventType);
> +	PyModule_AddObject(module, "py_utrace_context", (PyObject *) &PyUserTraceType);
>   
>   	PyModule_AddObject(module, "tfs_error", TFS_ERROR);
>   	PyModule_AddObject(module, "tep_error", TEP_ERROR);
> diff --git a/src/utrace-utils.c b/src/utrace-utils.c
> new file mode 100644
> index 0000000..b528407
> --- /dev/null
> +++ b/src/utrace-utils.c

No need to create this new source file. All the code bellow have to be in ftracepy-utils.c

> @@ -0,0 +1,509 @@
> +// SPDX-License-Identifier: LGPL-2.1
> +
> +/*
> + * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> + */
> +
> +#ifndef _GNU_SOURCE
> +/** Use GNU C Library. */
> +#define _GNU_SOURCE
> +#endif // _GNU_SOURCE
> +
> +// C
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +
> +// trace-cruncher
> +#include "ftracepy-utils.h"
> +#include "trace-obj-debug.h"
> +
> +extern PyObject *TFS_ERROR;
> +extern PyObject *TRACECRUNCHER_ERROR;
> +
> +#define UPROBES_SYSTEM "tc_uprobes"
> +
> +#define FTRACE_UPROBE		0x1
> +#define FTRACE_URETPROBE	0x2
> +
> +struct fprobes_list {
> +	int size;
> +	int count;
> +	void **data;
> +};
> +
> +struct utrace_func {
> +	int type;
> +	char *func_name;
> +	char *func_args;
> +};
> +
> +struct py_utrace_context {
> +	pid_t pid;
> +	char *fname;
> +	char *usystem;
> +	struct fprobes_list fretprobes;
> +	struct fprobes_list ufuncs;
> +	struct fprobes_list uevents;
> +	struct trace_debug_object *dbg;
> +};
> +
> +#define EXPAND_CHUNK	10
> +static int utrace_list_add(struct fprobes_list *list, void *data)
> +{
> +	void **tmp;
> +
> +	if (list->size <= list->count) {
> +		tmp = realloc(list->data, (list->size + EXPAND_CHUNK) * sizeof(void *));
> +		if (!tmp)
> +			return -1;
> +		list->data = tmp;
> +		list->size += EXPAND_CHUNK;

The standard solution for dynamic arrays is to double the size. Is there some special reason to increase by 10?

> +	}
> +
> +	list->data[list->count] = data;
> +	list->count++;
> +	return list->count - 1;
> +}
> +
> +void py_utrace_free(struct py_utrace_context *utrace)
> +{
> +	struct utrace_func *f;
> +	int i;
> +
> +	if (!utrace)
> +		return;
> +	if (utrace->dbg)
> +		trace_debug_obj_destroy(utrace->dbg);
> +
> +	for (i = 0; i < utrace->ufuncs.count; i++) {
> +		f = utrace->ufuncs.data[i];
> +		free(f->func_name);
> +		free(f);
> +	}
> +	free(utrace->ufuncs.data);
> +
> +	for (i = 0; i < utrace->uevents.count; i++)
> +		tracefs_dynevent_free(utrace->uevents.data[i]);
> +	free(utrace->uevents.data);
> +
> +	free(utrace->fname);
> +	free(utrace->usystem);
> +	free(utrace);
> +}
> +
> +/*
> + * All strings, used as ftrace system or event name must contain only
> + * alphabetic characters, digits or underscores.
> + */
> +static void fname_unify(char *fname)
> +{
> +	int i;
> +
> +	for (i = 0; fname[i]; i++)
> +		if (!isalpha(fname[i]) && !isdigit(fname[i]) && fname[i] != '_')

You can use isalnum()


> +			fname[i] = '_';
> +}
> +
> +int py_utrace_destroy(struct py_utrace_context *utrace)
> +{
> +	int i;
> +
> +	for (i = 0; i < utrace->uevents.count; i++)
> +		tracefs_dynevent_destroy(utrace->uevents.data[i], true);
> +
> +	return 0;
> +}
> +
> +static struct py_utrace_context *utrace_new(pid_t pid, char *fname, bool libs)
> +{
> +	struct py_utrace_context *utrace;
> +	char *file;
> +
> +	utrace = calloc(1, sizeof(*utrace));
> +	if (!utrace)
> +		return NULL;
> +
> +	if (fname) {
> +
empty line
> +		utrace->dbg = trace_debug_obj_create_file(fname, libs);
> +		if (!utrace->dbg)
> +			goto error;
> +		utrace->fname = strdup(fname);
> +		if (!utrace->fname)
> +			goto error;
> +		file = strrchr(fname, '/');
> +		if (file)
> +			file++;
> +		if (!file || *file == '\0')
> +			file = fname;
> +		if (asprintf(&utrace->usystem, "%s_%s", UPROBES_SYSTEM, file) <= 0)
> +			goto error;
> +	} else {
> +		utrace->pid = pid;
> +		utrace->dbg = trace_debug_obj_create_pid(pid, libs);
> +		if (!utrace->dbg)
> +			goto error;
> +		if (asprintf(&utrace->usystem, "%s_%d", UPROBES_SYSTEM, pid) <= 0)
> +			goto error;
> +	}
> +
> +	fname_unify(utrace->usystem);
> +	return utrace;
> +
> +error:
> +	py_utrace_free(utrace);
> +	return NULL;
> +}
> +
> +static int py_utrace_add_func(struct py_utrace_context *utrace, char *func, int type)
> +{
> +	struct utrace_func *p;
> +	int ret;
> +	int i;
> +
> +	for (i = 0; i < utrace->ufuncs.count; i++) {
> +		p = utrace->ufuncs.data[i];
> +		if (!strcmp(p->func_name, func))
> +			break;
> +	}
> +
> +	if (i < utrace->ufuncs.count) {
> +		p->type |= type;
> +		return 0;
> +	}

Can we just replace the 'break' inside of the 'for' loop with the code under this 'if'?

> +
> +	p = calloc(1, sizeof(*p));
> +	if (!p)
> +		return -1;
> +	p->func_name = strdup(func);
> +	if (!p->func_name)
> +		goto error;
> +	p->type = type;
> +
> +	ret = utrace_list_add(&utrace->ufuncs, p);
> +	if (ret < 0)
> +		goto error;
> +
> +	if (trace_debug_add_resolve_symbol(utrace->dbg, 0, func, ret))
> +		goto error;
> +
> +	return 0;
> +
> +error:
> +	free(p->func_name);
> +	free(p);
> +	return -1;
> +}
> +
> +PyObject *PyUserTrace_add_function(PyUserTrace *self, PyObject *args,
> +				   PyObject *kwargs)
> +{
> +	struct py_utrace_context *utrace = self->ptrObj;
> +	static char *kwlist[] = {"fname", NULL};
> +	char *fname;
> +
> +	if (!PyArg_ParseTupleAndKeywords(args,
> +					 kwargs,
> +					 "s",
> +					 kwlist,
> +					 &fname)) {
> +		return NULL;
> +	}
> +
> +	if (py_utrace_add_func(utrace, fname, FTRACE_UPROBE) < 0) {
> +		MEM_ERROR
> +		return NULL;
> +	}
> +
> +	Py_RETURN_NONE;
> +}
> +
> +PyObject *PyUserTrace_add_ret_function(PyUserTrace *self, PyObject *args,
> +				       PyObject *kwargs)
> +{
> +	struct py_utrace_context *utrace = self->ptrObj;
> +	static char *kwlist[] = {"fname", NULL};
> +	char *fname;
> +
> +	if (!PyArg_ParseTupleAndKeywords(args,
> +					 kwargs,
> +					 "s",
> +					 kwlist,
> +					 &fname)) {
> +		return NULL;
> +	}
> +
> +	if (py_utrace_add_func(utrace, fname, FTRACE_URETPROBE) < 0) {
> +		MEM_ERROR
> +		return NULL;
> +	}
> +
> +	Py_RETURN_NONE;
> +}
> +
> +/*
> + * max event name is 64 bytes, hard coded in the kernel.
> + * it can consists only of alphabetic characters, digits or underscores
> + */
> +#define FILENAME_TRUNCATE	10
> +#define FUNCAME_TRUNCATE	50
> +static char *uprobe_event_name(char *file, char *func, int type)
> +{
> +	char *event = NULL;
> +	char *fname;
> +
> +	fname = strrchr(file, '/');
> +	if (fname)
> +		fname++;
> +	if (!fname || *fname == '\0')
> +		fname = file;
> +
> +	asprintf(&event, "%s%.*s_%.*s",
> +		 type == FTRACE_URETPROBE ? "r_":"",
> +		 FILENAME_TRUNCATE, fname, FUNCAME_TRUNCATE, func);
> +	if (event)
> +		fname_unify(event);
> +
> +	return event;
> +}
> +
> +/*
> + * Create uprobe based on function name,
> + * file name and function offset within the file
> + */
> +static int utrace_event_create(struct py_utrace_context *utrace,
> +			       struct tracecmd_debug_symbols *sym, char *fecthargs,
> +			       int type)
> +{
> +	struct tracefs_dynevent *uevent = NULL;
> +	char *rname;
> +
> +	/* Generate uprobe event name, according to ftrace name requirements */
> +	rname = uprobe_event_name(sym->fname, sym->name, type);
> +	if (!rname)
> +		return -1;
> +
> +	if (type == FTRACE_URETPROBE)
> +		uevent = tracefs_uretprobe_alloc(utrace->usystem, rname,
> +						 sym->fname, sym->foffset, fecthargs);
> +	else
> +		uevent = tracefs_uprobe_alloc(utrace->usystem, rname,
> +					      sym->fname, sym->foffset, fecthargs);
> +
> +	free(rname);
> +	if (!uevent)
> +		return -1;
> +
> +	if (tracefs_dynevent_create(uevent)) {
> +		tracefs_dynevent_free(uevent);
> +		return -1;
> +	}
> +
> +	utrace_list_add(&utrace->uevents, uevent);
> +	return 0;
> +}
> +
> +/* callback, called on each resolved function */
> +static int symblos_walk(struct tracecmd_debug_symbols *sym, void *context)
> +{
> +	struct py_utrace_context *utrace = context;
> +	struct utrace_func *ufunc;
> +
> +	if (!sym->name || !sym->fname || !sym->foffset ||
> +	    sym->cookie < 0 || sym->cookie >= utrace->ufuncs.count)
> +		return 0;
> +
> +	ufunc = utrace->ufuncs.data[sym->cookie];
> +
> +	if (ufunc->type & FTRACE_UPROBE)
> +		utrace_event_create(utrace, sym, ufunc->func_args, FTRACE_UPROBE);
> +
> +	if (ufunc->type & FTRACE_URETPROBE)
> +		utrace_event_create(utrace, sym, ufunc->func_args, FTRACE_URETPROBE);
> +
> +	return 0;
> +}
> +
> +static void py_utrace_generate_uprobes(struct py_utrace_context *utrace)
> +{
> +	/* Find the exact name and file offset of each user function that should be traced */
> +	trace_debug_resolve_symbols(utrace->dbg);
> +	trace_debug_walk_resolved_symbols(utrace->dbg, symblos_walk, utrace);
> +}
> +
> +static int py_utrace_set_filter(struct py_utrace_context *utrace, struct tracefs_instance *instance)
> +{
> +	char pids[BUFSIZ];
> +	int ret;
> +
> +	snprintf(pids, BUFSIZ, "%d", utrace->pid);
> +	ret = tracefs_instance_file_write(instance, "set_event_pid", pids);
> +	if (ret < 0)
> +		return -1;
> +
> +	/* Trace all forks also */
> +	ret = tracefs_option_enable(instance, TRACEFS_OPTION_EVENT_FORK);
> +	if (ret)
> +		return -1;
> +
> +	return 0;
> +}

Similar helper function already exists. Is is called hook2pid(). You may need to modify it slightly in order to fit what 
you need, but I would prefer to avoid code duplication.

> +
> +static int start_trace(struct py_utrace_context *utrace, struct tracefs_instance *instance)
> +{
> +	/* Filter the trace only on desired pid(s) */
> +	if (py_utrace_set_filter(utrace, instance)) {
> +		PyErr_SetString(TRACECRUNCHER_ERROR,
> +				"Failed to set trace filter");
> +		return -1;
> +	}
> +
> +	/* Enable uprobes in the system */
> +	if (tracefs_event_enable(instance, utrace->usystem, NULL)) {
> +		PyErr_SetString(TRACECRUNCHER_ERROR,
> +				"Failed to enable trace events");
> +		return -1;
> +	}
> +
> +	return 0;
> + > +
> +static int utrace_exec_cmd(struct py_utrace_context *utrace, struct tracefs_instance *instance)
> +{
> +	pid_t pid;
> +
> +	pid = fork();
> +	if (pid < 0) {
> +		PyErr_SetString(TRACECRUNCHER_ERROR, "Failed to fork");
> +		return -1;
> +	}
> +
> +	if (pid == 0) {
> +		char *argv[] = {getenv("SHELL"), "-c", utrace->fname, NULL};
> +		char *envp[] = {NULL};
> +

Note that here you start a new shell process and you execute the user program inside this shell. Is this what you want? 
This can be useful if the user wants to trace a script, but it is unnecessary overhead if you trace executable.

> +		utrace->pid = getpid();
> +		start_trace(utrace, instance);
> +		if (execvpe(argv[0], argv, envp) < 0)
> +			PyErr_SetString(TRACECRUNCHER_ERROR, "Failed to exec command");
> +	}
> +
> +	return pid;
> +}
> +
> +static int py_utrace_start(struct py_utrace_context *utrace, struct tracefs_instance *instance)
> +{
> +	/* If uprobes on desired user functions are not yet generated, do it now */
> +	if (!utrace->uevents.count)
> +		py_utrace_generate_uprobes(utrace);
> +
> +	/* No functions are found in the given program / pid */
> +	if (!utrace->uevents.count) {
> +		PyErr_SetString(TRACECRUNCHER_ERROR,
> +				"Cannot find requested user functions");
> +		return -1;
> +	}
> +
> +	if (utrace->fname)
> +		utrace_exec_cmd(utrace, instance);
> +	else
> +		start_trace(utrace, instance);
> +
> +	return 0;
> +}
> +
> +static int py_utrace_stop(struct py_utrace_context *utrace, struct tracefs_instance *instance)
> +{
> +	/* Disable uprobes in the system */
> +	if (tracefs_event_disable(instance, utrace->usystem, NULL)) {
> +		PyErr_SetString(TRACECRUNCHER_ERROR,
> +				"Failed to disable trace events");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +

I see no point calling those 2 APIs "start" and "stop" when what is actually execute is enable/disable.


> +static PyObject *PyUserTrace_trigger(PyUserTrace *self, PyObject *args, PyObject *kwargs, bool start)
> +{
> +	struct py_utrace_context *utrace = self->ptrObj;
> +	static char *kwlist[] = {"instance", NULL};
> +	struct tracefs_instance *instance = NULL;
> +	PyObject *py_inst = NULL;
> +	int ret;
> +
> +	if (!PyArg_ParseTupleAndKeywords(args,
> +					 kwargs,
> +					 "|O",
> +					 kwlist,
> +					 &py_inst)) {
> +		PyErr_SetString(TRACECRUNCHER_ERROR,
> +				"Failed to parse input arguments");
> +		return NULL;
> +	}
> +
> +	if (py_inst) {
> +		if (!PyTfsInstance_Check(py_inst)) {
> +			PyErr_SetString(TRACECRUNCHER_ERROR,
> +					"Input argument \'instance\' is from incompatible type.");
> +			return NULL;
> +		}
> +		instance = ((PyTfsInstance *)py_inst)->ptrObj;
> +	}
> +

We have a helper function to handle the case of a method that takes only one 'instance' argument - 
get_instance_from_arg(). You can use it here.


> +	if (start)
> +		ret = py_utrace_start(utrace, instance);
> +	else
> +		ret = py_utrace_stop(utrace, instance);
> +
> +	if (ret)
> +		return NULL;
> +
> +	Py_RETURN_NONE;
> +}
> +
> +PyObject *PyUserTrace_start(PyUserTrace *self, PyObject *args, PyObject *kwargs)
> +{
> +	return PyUserTrace_trigger(self, args, kwargs, true);
> +}
> +
> +PyObject *PyUserTrace_stop(PyUserTrace *self, PyObject *args, PyObject *kwargs)
> +{
> +	return PyUserTrace_trigger(self, args, kwargs, false);
> +}
> +
> +PyObject *PyFtrace_utrace(PyObject *self, PyObject *args, PyObject *kwargs)
> +{
> +	static char *kwlist[] = {"pid", "name", "follow_libs", NULL};
> +	struct py_utrace_context *utrace;
> +	long long pid = -1;
> +	char *comm = NULL;
> +	int libs = 0;
> +	PyObject *py_utrace;
> +
> +	if (!PyArg_ParseTupleAndKeywords(args,
> +					 kwargs,
> +					 "|Ksp",
> +					 kwlist,
> +					 &pid,
> +					 &comm,
> +					 &libs)) {
> +		return NULL;
> +	}
> +
> +	if (pid == -1 && !comm) {
> +		PyErr_Format(TFS_ERROR,
> +			     "Process ID or program name should be specified");
> +		return NULL;
> +	}
> +
> +	utrace = utrace_new(pid, comm, libs);
> +	if (!utrace) {
> +		MEM_ERROR;
> +		return NULL;
> +	}
> +	py_utrace = PyUserTrace_New(utrace);
> +
> +	return py_utrace;
> +}
diff mbox series

Patch

diff --git a/setup.py b/setup.py
index 21c627f..acfa676 100644
--- a/setup.py
+++ b/setup.py
@@ -71,8 +71,8 @@  def extension(name, sources, libraries):
 
 def main():
     module_ft = extension(name='tracecruncher.ftracepy',
-                          sources=['src/ftracepy.c', 'src/ftracepy-utils.c'],
-                          libraries=['traceevent', 'tracefs'])
+                          sources=['src/ftracepy.c', 'src/ftracepy-utils.c', 'src/utrace-utils.c', 'src/trace-obj-debug.c'],
+                          libraries=['traceevent', 'tracefs', 'bfd'])
 
     cythonize('src/npdatawrapper.pyx', language_level = '3')
     module_data = extension(name='tracecruncher.npdatawrapper',
diff --git a/src/ftracepy-utils.h b/src/ftracepy-utils.h
index e6fab69..60d2743 100644
--- a/src/ftracepy-utils.h
+++ b/src/ftracepy-utils.h
@@ -34,6 +34,21 @@  C_OBJECT_WRAPPER_DECLARE(tracefs_synth, PySynthEvent)
 
 PyObject *PyTepRecord_time(PyTepRecord* self);
 
+struct py_utrace_context;
+void py_utrace_free(struct py_utrace_context *utrace);
+int py_utrace_destroy(struct py_utrace_context *utrace);
+C_OBJECT_WRAPPER_DECLARE(py_utrace_context, PyUserTrace);
+
+PyObject *PyUserTrace_add_function(PyUserTrace *self, PyObject *args,
+				   PyObject *kwargs);
+
+PyObject *PyUserTrace_add_ret_function(PyUserTrace *self, PyObject *args,
+				       PyObject *kwargs);
+
+PyObject *PyUserTrace_start(PyUserTrace *self, PyObject *args, PyObject *kwargs);
+
+PyObject *PyUserTrace_stop(PyUserTrace *self, PyObject *args, PyObject *kwargs);
+
 PyObject *PyTepRecord_cpu(PyTepRecord* self);
 
 PyObject *PyTepEvent_name(PyTepEvent* self);
@@ -270,6 +285,8 @@  PyObject *PyFtrace_synth(PyObject *self, PyObject *args,
 PyObject *PyFtrace_set_ftrace_loglevel(PyObject *self, PyObject *args,
 						       PyObject *kwargs);
 
+PyObject *PyFtrace_utrace(PyObject *self, PyObject *args, PyObject *kwargs);
+
 PyObject *PyFtrace_trace_process(PyObject *self, PyObject *args,
 						 PyObject *kwargs);
 
diff --git a/src/ftracepy.c b/src/ftracepy.c
index 681d641..107b78f 100644
--- a/src/ftracepy.c
+++ b/src/ftracepy.c
@@ -315,6 +315,32 @@  C_OBJECT_WRAPPER(tracefs_synth, PySynthEvent,
 		 tracefs_synth_destroy,
 		 tracefs_synth_free)
 
+static PyMethodDef PyUserTrace_methods[] = {
+	{"add_function",
+	 (PyCFunction) PyUserTrace_add_function,
+	 METH_VARARGS | METH_KEYWORDS,
+	 "Add tracepoint on user function."
+	},
+	{"add_ret_function",
+	 (PyCFunction) PyUserTrace_add_ret_function,
+	 METH_VARARGS | METH_KEYWORDS,
+	 "Add tracepoint on user function return."
+	},
+	{"start",
+	 (PyCFunction) PyUserTrace_start,
+	 METH_VARARGS | METH_KEYWORDS,
+	 "Add tracepoint on user function return."
+	},
+	{"stop",
+	 (PyCFunction) PyUserTrace_stop,
+	 METH_VARARGS | METH_KEYWORDS,
+	 "Add tracepoint on user function return."
+	},
+	{NULL, NULL, 0, NULL}
+};
+C_OBJECT_WRAPPER(py_utrace_context, PyUserTrace,
+		 py_utrace_destroy, py_utrace_free)
+
 static PyMethodDef ftracepy_methods[] = {
 	{"dir",
 	 (PyCFunction) PyFtrace_dir,
@@ -501,6 +527,11 @@  static PyMethodDef ftracepy_methods[] = {
 	 METH_VARARGS | METH_KEYWORDS,
 	 "Define a synthetic event."
 	},
+	{"user_trace",
+	 (PyCFunction) PyFtrace_utrace,
+	 METH_VARARGS | METH_KEYWORDS,
+	 "Create a context for tracing a user process using uprobes"
+	},
 	{"set_ftrace_loglevel",
 	 (PyCFunction) PyFtrace_set_ftrace_loglevel,
 	 METH_VARARGS | METH_KEYWORDS,
@@ -575,6 +606,9 @@  PyMODINIT_FUNC PyInit_ftracepy(void)
 	if (!PySynthEventTypeInit())
 		return NULL;
 
+	if (!PyUserTraceTypeInit())
+		return NULL;
+
 	TFS_ERROR = PyErr_NewException("tracecruncher.ftracepy.tfs_error",
 				       NULL, NULL);
 
@@ -593,6 +627,7 @@  PyMODINIT_FUNC PyInit_ftracepy(void)
 	PyModule_AddObject(module, "tracefs_dynevent", (PyObject *) &PyDyneventType);
 	PyModule_AddObject(module, "tracefs_hist", (PyObject *) &PyTraceHistType);
 	PyModule_AddObject(module, "tracefs_synth", (PyObject *) &PySynthEventType);
+	PyModule_AddObject(module, "py_utrace_context", (PyObject *) &PyUserTraceType);
 
 	PyModule_AddObject(module, "tfs_error", TFS_ERROR);
 	PyModule_AddObject(module, "tep_error", TEP_ERROR);
diff --git a/src/utrace-utils.c b/src/utrace-utils.c
new file mode 100644
index 0000000..b528407
--- /dev/null
+++ b/src/utrace-utils.c
@@ -0,0 +1,509 @@ 
+// SPDX-License-Identifier: LGPL-2.1
+
+/*
+ * Copyright 2022 VMware Inc, Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
+ */
+
+#ifndef _GNU_SOURCE
+/** Use GNU C Library. */
+#define _GNU_SOURCE
+#endif // _GNU_SOURCE
+
+// C
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+// trace-cruncher
+#include "ftracepy-utils.h"
+#include "trace-obj-debug.h"
+
+extern PyObject *TFS_ERROR;
+extern PyObject *TRACECRUNCHER_ERROR;
+
+#define UPROBES_SYSTEM "tc_uprobes"
+
+#define FTRACE_UPROBE		0x1
+#define FTRACE_URETPROBE	0x2
+
+struct fprobes_list {
+	int size;
+	int count;
+	void **data;
+};
+
+struct utrace_func {
+	int type;
+	char *func_name;
+	char *func_args;
+};
+
+struct py_utrace_context {
+	pid_t pid;
+	char *fname;
+	char *usystem;
+	struct fprobes_list fretprobes;
+	struct fprobes_list ufuncs;
+	struct fprobes_list uevents;
+	struct trace_debug_object *dbg;
+};
+
+#define EXPAND_CHUNK	10
+static int utrace_list_add(struct fprobes_list *list, void *data)
+{
+	void **tmp;
+
+	if (list->size <= list->count) {
+		tmp = realloc(list->data, (list->size + EXPAND_CHUNK) * sizeof(void *));
+		if (!tmp)
+			return -1;
+		list->data = tmp;
+		list->size += EXPAND_CHUNK;
+	}
+
+	list->data[list->count] = data;
+	list->count++;
+	return list->count - 1;
+}
+
+void py_utrace_free(struct py_utrace_context *utrace)
+{
+	struct utrace_func *f;
+	int i;
+
+	if (!utrace)
+		return;
+	if (utrace->dbg)
+		trace_debug_obj_destroy(utrace->dbg);
+
+	for (i = 0; i < utrace->ufuncs.count; i++) {
+		f = utrace->ufuncs.data[i];
+		free(f->func_name);
+		free(f);
+	}
+	free(utrace->ufuncs.data);
+
+	for (i = 0; i < utrace->uevents.count; i++)
+		tracefs_dynevent_free(utrace->uevents.data[i]);
+	free(utrace->uevents.data);
+
+	free(utrace->fname);
+	free(utrace->usystem);
+	free(utrace);
+}
+
+/*
+ * All strings, used as ftrace system or event name must contain only
+ * alphabetic characters, digits or underscores.
+ */
+static void fname_unify(char *fname)
+{
+	int i;
+
+	for (i = 0; fname[i]; i++)
+		if (!isalpha(fname[i]) && !isdigit(fname[i]) && fname[i] != '_')
+			fname[i] = '_';
+}
+
+int py_utrace_destroy(struct py_utrace_context *utrace)
+{
+	int i;
+
+	for (i = 0; i < utrace->uevents.count; i++)
+		tracefs_dynevent_destroy(utrace->uevents.data[i], true);
+
+	return 0;
+}
+
+static struct py_utrace_context *utrace_new(pid_t pid, char *fname, bool libs)
+{
+	struct py_utrace_context *utrace;
+	char *file;
+
+	utrace = calloc(1, sizeof(*utrace));
+	if (!utrace)
+		return NULL;
+
+	if (fname) {
+
+		utrace->dbg = trace_debug_obj_create_file(fname, libs);
+		if (!utrace->dbg)
+			goto error;
+		utrace->fname = strdup(fname);
+		if (!utrace->fname)
+			goto error;
+		file = strrchr(fname, '/');
+		if (file)
+			file++;
+		if (!file || *file == '\0')
+			file = fname;
+		if (asprintf(&utrace->usystem, "%s_%s", UPROBES_SYSTEM, file) <= 0)
+			goto error;
+	} else {
+		utrace->pid = pid;
+		utrace->dbg = trace_debug_obj_create_pid(pid, libs);
+		if (!utrace->dbg)
+			goto error;
+		if (asprintf(&utrace->usystem, "%s_%d", UPROBES_SYSTEM, pid) <= 0)
+			goto error;
+	}
+
+	fname_unify(utrace->usystem);
+	return utrace;
+
+error:
+	py_utrace_free(utrace);
+	return NULL;
+}
+
+static int py_utrace_add_func(struct py_utrace_context *utrace, char *func, int type)
+{
+	struct utrace_func *p;
+	int ret;
+	int i;
+
+	for (i = 0; i < utrace->ufuncs.count; i++) {
+		p = utrace->ufuncs.data[i];
+		if (!strcmp(p->func_name, func))
+			break;
+	}
+
+	if (i < utrace->ufuncs.count) {
+		p->type |= type;
+		return 0;
+	}
+
+	p = calloc(1, sizeof(*p));
+	if (!p)
+		return -1;
+	p->func_name = strdup(func);
+	if (!p->func_name)
+		goto error;
+	p->type = type;
+
+	ret = utrace_list_add(&utrace->ufuncs, p);
+	if (ret < 0)
+		goto error;
+
+	if (trace_debug_add_resolve_symbol(utrace->dbg, 0, func, ret))
+		goto error;
+
+	return 0;
+
+error:
+	free(p->func_name);
+	free(p);
+	return -1;
+}
+
+PyObject *PyUserTrace_add_function(PyUserTrace *self, PyObject *args,
+				   PyObject *kwargs)
+{
+	struct py_utrace_context *utrace = self->ptrObj;
+	static char *kwlist[] = {"fname", NULL};
+	char *fname;
+
+	if (!PyArg_ParseTupleAndKeywords(args,
+					 kwargs,
+					 "s",
+					 kwlist,
+					 &fname)) {
+		return NULL;
+	}
+
+	if (py_utrace_add_func(utrace, fname, FTRACE_UPROBE) < 0) {
+		MEM_ERROR
+		return NULL;
+	}
+
+	Py_RETURN_NONE;
+}
+
+PyObject *PyUserTrace_add_ret_function(PyUserTrace *self, PyObject *args,
+				       PyObject *kwargs)
+{
+	struct py_utrace_context *utrace = self->ptrObj;
+	static char *kwlist[] = {"fname", NULL};
+	char *fname;
+
+	if (!PyArg_ParseTupleAndKeywords(args,
+					 kwargs,
+					 "s",
+					 kwlist,
+					 &fname)) {
+		return NULL;
+	}
+
+	if (py_utrace_add_func(utrace, fname, FTRACE_URETPROBE) < 0) {
+		MEM_ERROR
+		return NULL;
+	}
+
+	Py_RETURN_NONE;
+}
+
+/*
+ * max event name is 64 bytes, hard coded in the kernel.
+ * it can consists only of alphabetic characters, digits or underscores
+ */
+#define FILENAME_TRUNCATE	10
+#define FUNCAME_TRUNCATE	50
+static char *uprobe_event_name(char *file, char *func, int type)
+{
+	char *event = NULL;
+	char *fname;
+
+	fname = strrchr(file, '/');
+	if (fname)
+		fname++;
+	if (!fname || *fname == '\0')
+		fname = file;
+
+	asprintf(&event, "%s%.*s_%.*s",
+		 type == FTRACE_URETPROBE ? "r_":"",
+		 FILENAME_TRUNCATE, fname, FUNCAME_TRUNCATE, func);
+	if (event)
+		fname_unify(event);
+
+	return event;
+}
+
+/*
+ * Create uprobe based on function name,
+ * file name and function offset within the file
+ */
+static int utrace_event_create(struct py_utrace_context *utrace,
+			       struct tracecmd_debug_symbols *sym, char *fecthargs,
+			       int type)
+{
+	struct tracefs_dynevent *uevent = NULL;
+	char *rname;
+
+	/* Generate uprobe event name, according to ftrace name requirements */
+	rname = uprobe_event_name(sym->fname, sym->name, type);
+	if (!rname)
+		return -1;
+
+	if (type == FTRACE_URETPROBE)
+		uevent = tracefs_uretprobe_alloc(utrace->usystem, rname,
+						 sym->fname, sym->foffset, fecthargs);
+	else
+		uevent = tracefs_uprobe_alloc(utrace->usystem, rname,
+					      sym->fname, sym->foffset, fecthargs);
+
+	free(rname);
+	if (!uevent)
+		return -1;
+
+	if (tracefs_dynevent_create(uevent)) {
+		tracefs_dynevent_free(uevent);
+		return -1;
+	}
+
+	utrace_list_add(&utrace->uevents, uevent);
+	return 0;
+}
+
+/* callback, called on each resolved function */
+static int symblos_walk(struct tracecmd_debug_symbols *sym, void *context)
+{
+	struct py_utrace_context *utrace = context;
+	struct utrace_func *ufunc;
+
+	if (!sym->name || !sym->fname || !sym->foffset ||
+	    sym->cookie < 0 || sym->cookie >= utrace->ufuncs.count)
+		return 0;
+
+	ufunc = utrace->ufuncs.data[sym->cookie];
+
+	if (ufunc->type & FTRACE_UPROBE)
+		utrace_event_create(utrace, sym, ufunc->func_args, FTRACE_UPROBE);
+
+	if (ufunc->type & FTRACE_URETPROBE)
+		utrace_event_create(utrace, sym, ufunc->func_args, FTRACE_URETPROBE);
+
+	return 0;
+}
+
+static void py_utrace_generate_uprobes(struct py_utrace_context *utrace)
+{
+	/* Find the exact name and file offset of each user function that should be traced */
+	trace_debug_resolve_symbols(utrace->dbg);
+	trace_debug_walk_resolved_symbols(utrace->dbg, symblos_walk, utrace);
+}
+
+static int py_utrace_set_filter(struct py_utrace_context *utrace, struct tracefs_instance *instance)
+{
+	char pids[BUFSIZ];
+	int ret;
+
+	snprintf(pids, BUFSIZ, "%d", utrace->pid);
+	ret = tracefs_instance_file_write(instance, "set_event_pid", pids);
+	if (ret < 0)
+		return -1;
+
+	/* Trace all forks also */
+	ret = tracefs_option_enable(instance, TRACEFS_OPTION_EVENT_FORK);
+	if (ret)
+		return -1;
+
+	return 0;
+}
+
+static int start_trace(struct py_utrace_context *utrace, struct tracefs_instance *instance)
+{
+	/* Filter the trace only on desired pid(s) */
+	if (py_utrace_set_filter(utrace, instance)) {
+		PyErr_SetString(TRACECRUNCHER_ERROR,
+				"Failed to set trace filter");
+		return -1;
+	}
+
+	/* Enable uprobes in the system */
+	if (tracefs_event_enable(instance, utrace->usystem, NULL)) {
+		PyErr_SetString(TRACECRUNCHER_ERROR,
+				"Failed to enable trace events");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int utrace_exec_cmd(struct py_utrace_context *utrace, struct tracefs_instance *instance)
+{
+	pid_t pid;
+
+	pid = fork();
+	if (pid < 0) {
+		PyErr_SetString(TRACECRUNCHER_ERROR, "Failed to fork");
+		return -1;
+	}
+
+	if (pid == 0) {
+		char *argv[] = {getenv("SHELL"), "-c", utrace->fname, NULL};
+		char *envp[] = {NULL};
+
+		utrace->pid = getpid();
+		start_trace(utrace, instance);
+		if (execvpe(argv[0], argv, envp) < 0)
+			PyErr_SetString(TRACECRUNCHER_ERROR, "Failed to exec command");
+	}
+
+	return pid;
+}
+
+static int py_utrace_start(struct py_utrace_context *utrace, struct tracefs_instance *instance)
+{
+	/* If uprobes on desired user functions are not yet generated, do it now */
+	if (!utrace->uevents.count)
+		py_utrace_generate_uprobes(utrace);
+
+	/* No functions are found in the given program / pid */
+	if (!utrace->uevents.count) {
+		PyErr_SetString(TRACECRUNCHER_ERROR,
+				"Cannot find requested user functions");
+		return -1;
+	}
+
+	if (utrace->fname)
+		utrace_exec_cmd(utrace, instance);
+	else
+		start_trace(utrace, instance);
+
+	return 0;
+}
+
+static int py_utrace_stop(struct py_utrace_context *utrace, struct tracefs_instance *instance)
+{
+	/* Disable uprobes in the system */
+	if (tracefs_event_disable(instance, utrace->usystem, NULL)) {
+		PyErr_SetString(TRACECRUNCHER_ERROR,
+				"Failed to disable trace events");
+		return -1;
+	}
+
+	return 0;
+}
+
+static PyObject *PyUserTrace_trigger(PyUserTrace *self, PyObject *args, PyObject *kwargs, bool start)
+{
+	struct py_utrace_context *utrace = self->ptrObj;
+	static char *kwlist[] = {"instance", NULL};
+	struct tracefs_instance *instance = NULL;
+	PyObject *py_inst = NULL;
+	int ret;
+
+	if (!PyArg_ParseTupleAndKeywords(args,
+					 kwargs,
+					 "|O",
+					 kwlist,
+					 &py_inst)) {
+		PyErr_SetString(TRACECRUNCHER_ERROR,
+				"Failed to parse input arguments");
+		return NULL;
+	}
+
+	if (py_inst) {
+		if (!PyTfsInstance_Check(py_inst)) {
+			PyErr_SetString(TRACECRUNCHER_ERROR,
+					"Input argument \'instance\' is from incompatible type.");
+			return NULL;
+		}
+		instance = ((PyTfsInstance *)py_inst)->ptrObj;
+	}
+
+	if (start)
+		ret = py_utrace_start(utrace, instance);
+	else
+		ret = py_utrace_stop(utrace, instance);
+
+	if (ret)
+		return NULL;
+
+	Py_RETURN_NONE;
+}
+
+PyObject *PyUserTrace_start(PyUserTrace *self, PyObject *args, PyObject *kwargs)
+{
+	return PyUserTrace_trigger(self, args, kwargs, true);
+}
+
+PyObject *PyUserTrace_stop(PyUserTrace *self, PyObject *args, PyObject *kwargs)
+{
+	return PyUserTrace_trigger(self, args, kwargs, false);
+}
+
+PyObject *PyFtrace_utrace(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+	static char *kwlist[] = {"pid", "name", "follow_libs", NULL};
+	struct py_utrace_context *utrace;
+	long long pid = -1;
+	char *comm = NULL;
+	int libs = 0;
+	PyObject *py_utrace;
+
+	if (!PyArg_ParseTupleAndKeywords(args,
+					 kwargs,
+					 "|Ksp",
+					 kwlist,
+					 &pid,
+					 &comm,
+					 &libs)) {
+		return NULL;
+	}
+
+	if (pid == -1 && !comm) {
+		PyErr_Format(TFS_ERROR,
+			     "Process ID or program name should be specified");
+		return NULL;
+	}
+
+	utrace = utrace_new(pid, comm, libs);
+	if (!utrace) {
+		MEM_ERROR;
+		return NULL;
+	}
+	py_utrace = PyUserTrace_New(utrace);
+
+	return py_utrace;
+}