diff mbox series

[v2,1/2] trace-cmd: Optimize how pid filters are expressed

Message ID 20190415164739.17223-2-kaslevs@vmware.com (mailing list archive)
State Superseded
Headers show
Series Optimize pid filters and add --no-filter option | expand

Commit Message

Slavomir Kaslev April 15, 2019, 4:47 p.m. UTC
Express pid filters as allowed/disallowed filter ranges

  (pid>=100&&pid<=103)

instead of specifying them per pid

  (pid==100||pid==101||pid==102||pid==103)

This makes the size of the resulting filter smaller (and faster) and avoids
overflowing the filter size limit of one page which we can hit on bigger
machines (say >160 CPUs).

Signed-off-by: Slavomir Kaslev <kaslevs@vmware.com>
Reported-by: Phil Auld <pauld@redhat.com>
Suggested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tracecmd/trace-record.c | 115 +++++++++++++++++++++++++++-------------
 1 file changed, 78 insertions(+), 37 deletions(-)

Comments

Steven Rostedt April 15, 2019, 9:59 p.m. UTC | #1
On Mon, 15 Apr 2019 19:47:38 +0300
Slavomir Kaslev <kaslevs@vmware.com> wrote:

> Express pid filters as allowed/disallowed filter ranges
> 
>   (pid>=100&&pid<=103)  
> 
> instead of specifying them per pid
> 
>   (pid==100||pid==101||pid==102||pid==103)
> 
> This makes the size of the resulting filter smaller (and faster) and avoids
> overflowing the filter size limit of one page which we can hit on bigger
> machines (say >160 CPUs).

Except it breaks if we have a split.

I ran this:

 hackbench 10 &
 tracecmd/trace-cmd record -e sched_switch cat /sys/kernel/debug/tracing/events/sched/sched_switch/filter


Time: 0.093
(common_pid<6959||common_pid>6969)||(common_pid<6945||common_pid>6957)||(next_pid<6959||next_pid>6969)||(next_pid<6945||next_pid>6957)

This was the output. Showing that we had common_pid from 6959 - 6969
and 6945 - 6957 (a 6958 was missing), and because of this, we now trace
all processes because (common_pid < 6959 || common_pid > 6957) is
always true.

We need an "&&" there somewhere. That should have been:

((common_pid<6959||common_pid>6969)&&(common_pid<6945||common_pid>6957))||((next_pid<6959||next_pid>6969)&&(next_pid<6945||next_pid>6957))

-- Steve



> 
> Signed-off-by: Slavomir Kaslev <kaslevs@vmware.com>
> Reported-by: Phil Auld <pauld@redhat.com>
> Suggested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> ---
>  tracecmd/trace-record.c | 115 +++++++++++++++++++++++++++-------------
>  1 file changed, 78 insertions(+), 37 deletions(-)
> 
> diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
> index 76ca92d..eeee5e9 100644
> --- a/tracecmd/trace-record.c
> +++ b/tracecmd/trace-record.c
> @@ -950,10 +950,61 @@ static void update_ftrace_pids(int reset)
>  static void update_event_filters(struct buffer_instance *instance);
>  static void update_pid_event_filters(struct buffer_instance *instance);
>  
> +static void append_filter_pid_range(char **filter, int *curr_len,
> +				    const char *field,
> +				    int start_pid, int end_pid, bool exclude)
> +{
> +	char *op, *op1, *op2, *op3;
> +	int len;
> +
> +	op = *filter && **filter ? "||" : "";
> +
> +	// Handle thus case explicitly so that we get `pid==3` instead of
> +	// `pid>=3&&pid<=3` for singleton ranges
> +	if (start_pid == end_pid) {
> +#define FMT	"%s(%s%s%d)"
> +		len = snprintf(NULL, 0, FMT, op,
> +			       field, exclude ? "!=" : "==", start_pid);
> +		*filter = realloc(*filter, *curr_len + len + 1);
> +		if (!*filter)
> +			die("realloc");
> +
> +		len = snprintf(*filter + *curr_len, len + 1, FMT, op,
> +			       field, exclude ? "!=" : "==", start_pid);
> +		*curr_len += len;
> +
> +		return;
> +#undef FMT
> +	}
> +
> +	if (exclude) {
> +		op1 = "<";
> +		op2 = "||";
> +		op3 = ">";
> +	} else {
> +		op1 = ">=";
> +		op2 = "&&";
> +		op3 = "<=";
> +	}
> +
> +#define FMT	"%s(%s%s%d%s%s%s%d)"
> +	len = snprintf(NULL, 0, FMT, op,
> +		       field, op1, start_pid, op2,
> +		       field, op3, end_pid);
> +	*filter = realloc(*filter, *curr_len + len + 1);
> +	if (!*filter)
> +		die("realloc");
> +
> +	len = snprintf(*filter + *curr_len, len + 1, FMT, op,
> +		       field, op1, start_pid, op2,
> +		       field, op3, end_pid);
> +	*curr_len += len;
> +}
> +
>  /**
>   * make_pid_filter - create a filter string to all pids against @field
>   * @curr_filter: Append to a previous filter (may realloc). Can be NULL
> - * @field: The fild to compare the pids against
> + * @field: The field to compare the pids against
>   *
>   * Creates a new string or appends to an existing one if @curr_filter
>   * is not NULL. The new string will contain a filter with all pids
> @@ -963,54 +1014,44 @@ static void update_pid_event_filters(struct buffer_instance *instance);
>   */
>  static char *make_pid_filter(char *curr_filter, const char *field)
>  {
> +	int curr_len = 0, last_exclude = -1;
> +	int start_pid = -1, last_pid = -1;
> +	char *filter = NULL, *save;
>  	struct filter_pids *p;
> -	char *filter;
> -	char *orit;
> -	char *match;
> -	char *str;
> -	int curr_len = 0;
> -	int len;
>  
>  	/* Use the new method if possible */
>  	if (have_set_event_pid)
>  		return NULL;
>  
> -	len = len_filter_pids + (strlen(field) + strlen("(==)||")) * nr_filter_pids;
> -
> -	if (curr_filter) {
> -		curr_len = strlen(curr_filter);
> -		filter = realloc(curr_filter, curr_len + len + strlen("(&&())"));
> -		if (!filter)
> -			die("realloc");
> -		memmove(filter+1, curr_filter, curr_len);
> -		filter[0] = '(';
> -		strcat(filter, ")&&(");
> -		curr_len = strlen(filter);
> -	} else
> -		filter = malloc(len);
> -	if (!filter)
> -		die("Failed to allocate pid filter");
> -
> -	/* Last '||' that is not used will cover the \0 */
> -	str = filter + curr_len;
> +	if (!filter_pids)
> +		return curr_filter;
>  
>  	for (p = filter_pids; p; p = p->next) {
> -		if (p->exclude) {
> -			match = "!=";
> -			orit = "&&";
> -		} else {
> -			match = "==";
> -			orit = "||";
> +		/* PIDs are inserted in `filter_pids` from the front and that's
> +		 * why we expect them in descending order here.
> +		 */
> +		if (p->pid == last_pid - 1 && p->exclude == last_exclude) {
> +			last_pid = p->pid;
> +			continue;
>  		}
> -		if (p == filter_pids)
> -			orit = "";
>  
> -		len = sprintf(str, "%s(%s%s%d)", orit, field, match, p->pid);
> -		str += len;
> +		if (start_pid != -1)
> +			append_filter_pid_range(&filter, &curr_len, field,
> +						last_pid, start_pid,
> +						last_exclude);
> +
> +		start_pid = last_pid = p->pid;
> +		last_exclude = p->exclude;
> +
>  	}
> +	append_filter_pid_range(&filter, &curr_len, field,
> +				last_pid, start_pid, last_exclude);
>  
> -	if (curr_len)
> -		sprintf(str, ")");
> +	if (curr_filter) {
> +		save = filter;
> +		asprintf(&filter, "(%s)&&(%s)", curr_filter, filter);
> +		free(save);
> +	}
>  
>  	return filter;
>  }
Slavomir Kaslev April 15, 2019, 10:55 p.m. UTC | #2
On Mon, 2019-04-15 at 17:59 -0400, Steven Rostedt wrote:
> On Mon, 15 Apr 2019 19:47:38 +0300
> Slavomir Kaslev <kaslevs@vmware.com> wrote:
> 
> > Express pid filters as allowed/disallowed filter ranges
> > 
> >   (pid>=100&&pid<=103)  
> > 
> > instead of specifying them per pid
> > 
> >   (pid==100||pid==101||pid==102||pid==103)
> > 
> > This makes the size of the resulting filter smaller (and faster)
> > and avoids
> > overflowing the filter size limit of one page which we can hit on
> > bigger
> > machines (say >160 CPUs).
> 
> Except it breaks if we have a split.
> 
> I ran this:
> 
>  hackbench 10 &
>  tracecmd/trace-cmd record -e sched_switch cat
> /sys/kernel/debug/tracing/events/sched/sched_switch/filter
> 
> 
> Time: 0.093
> (common_pid<6959||common_pid>6969)||(common_pid<6945||common_pid>6957
> )||(next_pid<6959||next_pid>6969)||(next_pid<6945||next_pid>6957)
> 
> This was the output. Showing that we had common_pid from 6959 - 6969
> and 6945 - 6957 (a 6958 was missing), and because of this, we now
> trace
> all processes because (common_pid < 6959 || common_pid > 6957) is
> always true.
> 
> We need an "&&" there somewhere. That should have been:
> 
> ((common_pid<6959||common_pid>6969)&&(common_pid<6945||common_pid>695
> 7))||((next_pid<6959||next_pid>6969)&&(next_pid<6945||next_pid>6957))

Good catch. I've misread how the original code worked. It appends
exclude filters with && and non-exclude with ||. I'll send a fix in v3.

-- Slavi
diff mbox series

Patch

diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
index 76ca92d..eeee5e9 100644
--- a/tracecmd/trace-record.c
+++ b/tracecmd/trace-record.c
@@ -950,10 +950,61 @@  static void update_ftrace_pids(int reset)
 static void update_event_filters(struct buffer_instance *instance);
 static void update_pid_event_filters(struct buffer_instance *instance);
 
+static void append_filter_pid_range(char **filter, int *curr_len,
+				    const char *field,
+				    int start_pid, int end_pid, bool exclude)
+{
+	char *op, *op1, *op2, *op3;
+	int len;
+
+	op = *filter && **filter ? "||" : "";
+
+	// Handle thus case explicitly so that we get `pid==3` instead of
+	// `pid>=3&&pid<=3` for singleton ranges
+	if (start_pid == end_pid) {
+#define FMT	"%s(%s%s%d)"
+		len = snprintf(NULL, 0, FMT, op,
+			       field, exclude ? "!=" : "==", start_pid);
+		*filter = realloc(*filter, *curr_len + len + 1);
+		if (!*filter)
+			die("realloc");
+
+		len = snprintf(*filter + *curr_len, len + 1, FMT, op,
+			       field, exclude ? "!=" : "==", start_pid);
+		*curr_len += len;
+
+		return;
+#undef FMT
+	}
+
+	if (exclude) {
+		op1 = "<";
+		op2 = "||";
+		op3 = ">";
+	} else {
+		op1 = ">=";
+		op2 = "&&";
+		op3 = "<=";
+	}
+
+#define FMT	"%s(%s%s%d%s%s%s%d)"
+	len = snprintf(NULL, 0, FMT, op,
+		       field, op1, start_pid, op2,
+		       field, op3, end_pid);
+	*filter = realloc(*filter, *curr_len + len + 1);
+	if (!*filter)
+		die("realloc");
+
+	len = snprintf(*filter + *curr_len, len + 1, FMT, op,
+		       field, op1, start_pid, op2,
+		       field, op3, end_pid);
+	*curr_len += len;
+}
+
 /**
  * make_pid_filter - create a filter string to all pids against @field
  * @curr_filter: Append to a previous filter (may realloc). Can be NULL
- * @field: The fild to compare the pids against
+ * @field: The field to compare the pids against
  *
  * Creates a new string or appends to an existing one if @curr_filter
  * is not NULL. The new string will contain a filter with all pids
@@ -963,54 +1014,44 @@  static void update_pid_event_filters(struct buffer_instance *instance);
  */
 static char *make_pid_filter(char *curr_filter, const char *field)
 {
+	int curr_len = 0, last_exclude = -1;
+	int start_pid = -1, last_pid = -1;
+	char *filter = NULL, *save;
 	struct filter_pids *p;
-	char *filter;
-	char *orit;
-	char *match;
-	char *str;
-	int curr_len = 0;
-	int len;
 
 	/* Use the new method if possible */
 	if (have_set_event_pid)
 		return NULL;
 
-	len = len_filter_pids + (strlen(field) + strlen("(==)||")) * nr_filter_pids;
-
-	if (curr_filter) {
-		curr_len = strlen(curr_filter);
-		filter = realloc(curr_filter, curr_len + len + strlen("(&&())"));
-		if (!filter)
-			die("realloc");
-		memmove(filter+1, curr_filter, curr_len);
-		filter[0] = '(';
-		strcat(filter, ")&&(");
-		curr_len = strlen(filter);
-	} else
-		filter = malloc(len);
-	if (!filter)
-		die("Failed to allocate pid filter");
-
-	/* Last '||' that is not used will cover the \0 */
-	str = filter + curr_len;
+	if (!filter_pids)
+		return curr_filter;
 
 	for (p = filter_pids; p; p = p->next) {
-		if (p->exclude) {
-			match = "!=";
-			orit = "&&";
-		} else {
-			match = "==";
-			orit = "||";
+		/* PIDs are inserted in `filter_pids` from the front and that's
+		 * why we expect them in descending order here.
+		 */
+		if (p->pid == last_pid - 1 && p->exclude == last_exclude) {
+			last_pid = p->pid;
+			continue;
 		}
-		if (p == filter_pids)
-			orit = "";
 
-		len = sprintf(str, "%s(%s%s%d)", orit, field, match, p->pid);
-		str += len;
+		if (start_pid != -1)
+			append_filter_pid_range(&filter, &curr_len, field,
+						last_pid, start_pid,
+						last_exclude);
+
+		start_pid = last_pid = p->pid;
+		last_exclude = p->exclude;
+
 	}
+	append_filter_pid_range(&filter, &curr_len, field,
+				last_pid, start_pid, last_exclude);
 
-	if (curr_len)
-		sprintf(str, ")");
+	if (curr_filter) {
+		save = filter;
+		asprintf(&filter, "(%s)&&(%s)", curr_filter, filter);
+		free(save);
+	}
 
 	return filter;
 }