kernel-shark-qt: Make helper functions recognize data type to improve performance
diff mbox series

Message ID 20180709123823.695b653a@gandalf.local.home
State New, archived
Headers show
Series
  • kernel-shark-qt: Make helper functions recognize data type to improve performance
Related show

Commit Message

Steven Rostedt July 9, 2018, 4:38 p.m. UTC
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

Add a type enumerator and change rec_list into a union such that the helper
functions have more information about the type of data they are processing.
This will allow for the helper functions to be optimized such that they
perform better, and we can remove double allocations.

Some of the functionality of kshark_load_data_records() and
kshark_load_data_entries() have been moved into the helper functions with a
switch statement based on the type variable to know what to do with the
data.

The rec_list structure's entry element has been converted from a pointer,
that needs to be allocated, to a structure itself, such that it can be
allocated to store the entry data without having to allocate extra data.

The next field of the kshark_entry has been moved to the top of the
structure to match the rec_list next entry, so that they both have the link
list pointer as the first entry, and it does not matter if the rec_list is
used for kshark_entry's or for records.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel-shark-qt/src/libkshark.c | 170 ++++++++++++++++++++++++----------------
 kernel-shark-qt/src/libkshark.h |   6 +-
 2 files changed, 105 insertions(+), 71 deletions(-)

Comments

Yordan Karadzhov (VMware) July 10, 2018, 3:23 p.m. UTC | #1
On  9.07.2018 19:38, Steven Rostedt wrote:
> From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
> 
> Add a type enumerator and change rec_list into a union such that the helper
> functions have more information about the type of data they are processing.
> This will allow for the helper functions to be optimized such that they
> perform better, and we can remove double allocations.
> 
> Some of the functionality of kshark_load_data_records() and
> kshark_load_data_entries() have been moved into the helper functions with a
> switch statement based on the type variable to know what to do with the
> data.
> 
> The rec_list structure's entry element has been converted from a pointer,
> that needs to be allocated, to a structure itself, such that it can be
> allocated to store the entry data without having to allocate extra data.
> 
> The next field of the kshark_entry has been moved to the top of the
> structure to match the rec_list next entry, so that they both have the link
> list pointer as the first entry, and it does not matter if the rec_list is
> used for kshark_entry's or for records.
> 
> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> ---
>   kernel-shark-qt/src/libkshark.c | 170 ++++++++++++++++++++++++----------------
>   kernel-shark-qt/src/libkshark.h |   6 +-
>   2 files changed, 105 insertions(+), 71 deletions(-)
> 
> diff --git a/kernel-shark-qt/src/libkshark.c b/kernel-shark-qt/src/libkshark.c
> index a79ed982..0c874643 100644
> --- a/kernel-shark-qt/src/libkshark.c
> +++ b/kernel-shark-qt/src/libkshark.c
> @@ -500,15 +500,35 @@ static void kshark_set_entry_values(struct kshark_context *kshark_ctx,
>   	entry->pid = pevent_data_pid(kshark_ctx->pevent, record);
>   }
>   
> -/* Quiet warnings over documenting simple structures */
> -//! @cond Doxygen_Suppress
> +/**
> + * rec_list is used to pass the data to the load functions.
> + * The rec_list will contain the list of entries from the source,
> + * and will be a link list of per CPU entries.
> + */
>   struct rec_list {
> -	struct pevent_record	*rec;
> -	struct rec_list		*next;
> +	union {
> +		/* Used by kshark_load_data_records */
> +		struct {
> +			/** next pointer, matches entry->next */
> +			struct rec_list		*next;
> +			/** pointer to the raw record data */
> +			struct pevent_record	*rec;
> +		};
> +		/** entry - Used for kshark_load_data_entries() */
> +		struct kshark_entry		entry;
> +	};
> +};
> +
> +/**
> + * rec_type defines what type of rec_list is being used.
> + */
> +enum rec_type {
> +	REC_RECORD,
> +	REC_ENTRY,
>   };
> -//! @endcond
>   
> -static void free_rec_list(struct rec_list **rec_list, int n_cpus)
> +static void free_rec_list(struct rec_list **rec_list, int n_cpus,
> +			  enum rec_type type)
>   {
>   	struct rec_list *temp_rec;
>   	int cpu;
> @@ -517,7 +537,8 @@ static void free_rec_list(struct rec_list **rec_list, int n_cpus)
>   		while (rec_list[cpu]) {
>   			temp_rec = rec_list[cpu];
>   			rec_list[cpu] = temp_rec->next;
> -			free_record(temp_rec->rec);
> +			if (type == REC_RECORD)
> +				free_record(temp_rec->rec);
>   			free(temp_rec);
>   		}
>   	}
> @@ -525,14 +546,17 @@ static void free_rec_list(struct rec_list **rec_list, int n_cpus)
>   }
>  


[..]


> @@ -610,16 +685,11 @@ static int pick_next_cpu(struct rec_list **rec_list, int n_cpus)
>   ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
>   				struct kshark_entry ***data_rows)
>   {
> -	struct event_filter *adv_filter = kshark_ctx->advanced_event_filter;
> -	struct kshark_task_list *task;
>   	struct kshark_entry **rows;
> -	struct kshark_entry *entry;
>   	struct rec_list **rec_list;
> -	struct rec_list *temp_rec;
> -	struct pevent_record *rec;
> +	enum rec_type type = REC_ENTRY;
>   	size_t count, total = 0;
>   	int n_cpus;
> -	int ret;
>   
>   	if (*data_rows)
>   		free(*data_rows);
> @@ -631,63 +701,33 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
>   	 *	 code simplier. We should revisit to see if we can
>   	 *	 bring back the performance.
>   	 */
> -	total = get_records(kshark_ctx, &rec_list);
> +	total = get_records(kshark_ctx, &rec_list, type);
>   	if (total < 0)
>   		goto fail;
>   
> +	n_cpus = tracecmd_cpus(kshark_ctx->handle);
> +
>   	rows = calloc(total, sizeof(struct kshark_entry *));
>   	if(!rows)

I know it isn't from this patch, but there must be a space after the "if".

Please push this a.s.a.p
Thanks!

-- Yordan

> -		goto fail;
> -
> -	n_cpus = tracecmd_cpus(kshark_ctx->handle);
> +		goto fail_free;
>   
>   	for (count = 0; count < total; count++) {
>   		int next_cpu;
>   
> -		next_cpu = pick_next_cpu(rec_list, n_cpus);
> +		next_cpu = pick_next_cpu(rec_list, n_cpus, type);
>   
>   		if (next_cpu >= 0) {
> -			entry = malloc(sizeof(struct kshark_entry));
> -			if (!entry)
> -				goto fail_free;
> -
> -			rec = rec_list[next_cpu]->rec;
> -			rows[count] = entry;
> -
> -			kshark_set_entry_values(kshark_ctx, rec, entry);
> -			task = kshark_add_task(kshark_ctx, entry->pid);
> -			if (!task)
> -				goto fail_free;
> -
> -			/* Apply event filtering. */
> -			ret = FILTER_NONE;
> -			if (adv_filter->filters)
> -				ret = pevent_filter_match(adv_filter, rec);
> -
> -			if (!kshark_show_event(kshark_ctx, entry->event_id) ||
> -			    ret != FILTER_MATCH) {
> -				unset_event_filter_flag(kshark_ctx, entry);
> -			}
> -
> -			/* Apply task filtering. */
> -			if (!kshark_show_task(kshark_ctx, entry->pid)) {
> -				entry->visible &= ~kshark_ctx->filter_mask;
> -			}
> -
> -			temp_rec = rec_list[next_cpu];
> +			rows[count] = &rec_list[next_cpu]->entry;
>   			rec_list[next_cpu] = rec_list[next_cpu]->next;
> -			free(temp_rec);
> -			/* The record is no longer be referenced */
> -			free_record(rec);
>   		}
>   	}
>   
> -	free_rec_list(rec_list, n_cpus);
> +	free_rec_list(rec_list, n_cpus, type);
>   	*data_rows = rows;
>   	return total;
>   
>    fail_free:
> -	free_rec_list(rec_list, n_cpus);
> +	free_rec_list(rec_list, n_cpus, type);
>   	for (count = 0; count < total; count++) {
>   		if (!rows[count])
>   			break;
> @@ -712,16 +752,15 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
>   ssize_t kshark_load_data_records(struct kshark_context *kshark_ctx,
>   				struct pevent_record ***data_rows)
>   {
> -	struct kshark_task_list *task;
>   	struct pevent_record **rows;
>   	struct pevent_record *rec;
>   	struct rec_list **rec_list;
>   	struct rec_list *temp_rec;
> +	enum rec_type type = REC_RECORD;
>   	size_t count, total = 0;
>   	int n_cpus;
> -	int pid;
>   
> -	total = get_records(kshark_ctx, &rec_list);
> +	total = get_records(kshark_ctx, &rec_list, REC_RECORD);
>   	if (total < 0)
>   		goto fail;
>   
> @@ -734,17 +773,12 @@ ssize_t kshark_load_data_records(struct kshark_context *kshark_ctx,
>   	for (count = 0; count < total; count++) {
>   		int next_cpu;
>   
> -		next_cpu = pick_next_cpu(rec_list, n_cpus);
> +		next_cpu = pick_next_cpu(rec_list, n_cpus, type);
>   
>   		if (next_cpu >= 0) {
>   			rec = rec_list[next_cpu]->rec;
>   			rows[count] = rec;
>   
> -			pid = pevent_data_pid(kshark_ctx->pevent, rec);
> -			task = kshark_add_task(kshark_ctx, pid);
> -			if (!task)
> -				goto fail;
> -
>   			temp_rec = rec_list[next_cpu];
>   			rec_list[next_cpu] = rec_list[next_cpu]->next;
>   			free(temp_rec);
> @@ -753,7 +787,7 @@ ssize_t kshark_load_data_records(struct kshark_context *kshark_ctx,
>   	}
>   
>   	/* There should be no records left in rec_list */
> -	free_rec_list(rec_list, n_cpus);
> +	free_rec_list(rec_list, n_cpus, type);
>   	*data_rows = rows;
>   	return total;
>   
> diff --git a/kernel-shark-qt/src/libkshark.h b/kernel-shark-qt/src/libkshark.h
> index 6ed2a1ea..2e265522 100644
> --- a/kernel-shark-qt/src/libkshark.h
> +++ b/kernel-shark-qt/src/libkshark.h
> @@ -33,6 +33,9 @@ extern "C" {
>    * info etc.) is available on-demand via the offset into the trace file.
>    */
>   struct kshark_entry {
> +	/** Pointer to the next (in time) kshark_entry on the same CPU core. */
> +	struct kshark_entry *next; /* MUST BE FIRST ENTRY */
> +
>   	/**
>   	 * A bit mask controlling the visibility of the entry. A value of OxFF
>   	 * would mean that the entry is visible everywhere. Use
> @@ -60,9 +63,6 @@ struct kshark_entry {
>   	 * started.
>   	 */
>   	uint64_t	ts;
> -
> -	/** Pointer to the next (in time) kshark_entry on the same CPU core. */
> -	struct kshark_entry *next;
>   };
>   
>   /** Size of the task's hash table. */
>
Steven Rostedt July 10, 2018, 3:33 p.m. UTC | #2
On Tue, 10 Jul 2018 18:23:49 +0300
"Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:

> > +	n_cpus = tracecmd_cpus(kshark_ctx->handle);
> > +
> >   	rows = calloc(total, sizeof(struct kshark_entry *));
> >   	if(!rows)  
> 
> I know it isn't from this patch, but there must be a space after the "if".

Yeah, there's a few places that have that. I'll fix these with a
separate patch.

> 
> Please push this a.s.a.p

Will do. Thanks!

-- Steve
Steven Rostedt July 10, 2018, 4:06 p.m. UTC | #3
On Tue, 10 Jul 2018 11:33:08 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> > 
> > Please push this a.s.a.p  
> 
> Will do. Thanks!

Pushed.

-- Steve

Patch
diff mbox series

diff --git a/kernel-shark-qt/src/libkshark.c b/kernel-shark-qt/src/libkshark.c
index a79ed982..0c874643 100644
--- a/kernel-shark-qt/src/libkshark.c
+++ b/kernel-shark-qt/src/libkshark.c
@@ -500,15 +500,35 @@  static void kshark_set_entry_values(struct kshark_context *kshark_ctx,
 	entry->pid = pevent_data_pid(kshark_ctx->pevent, record);
 }
 
-/* Quiet warnings over documenting simple structures */
-//! @cond Doxygen_Suppress
+/**
+ * rec_list is used to pass the data to the load functions.
+ * The rec_list will contain the list of entries from the source,
+ * and will be a link list of per CPU entries.
+ */
 struct rec_list {
-	struct pevent_record	*rec;
-	struct rec_list		*next;
+	union {
+		/* Used by kshark_load_data_records */
+		struct {
+			/** next pointer, matches entry->next */
+			struct rec_list		*next;
+			/** pointer to the raw record data */
+			struct pevent_record	*rec;
+		};
+		/** entry - Used for kshark_load_data_entries() */
+		struct kshark_entry		entry;
+	};
+};
+
+/**
+ * rec_type defines what type of rec_list is being used.
+ */
+enum rec_type {
+	REC_RECORD,
+	REC_ENTRY,
 };
-//! @endcond
 
-static void free_rec_list(struct rec_list **rec_list, int n_cpus)
+static void free_rec_list(struct rec_list **rec_list, int n_cpus,
+			  enum rec_type type)
 {
 	struct rec_list *temp_rec;
 	int cpu;
@@ -517,7 +537,8 @@  static void free_rec_list(struct rec_list **rec_list, int n_cpus)
 		while (rec_list[cpu]) {
 			temp_rec = rec_list[cpu];
 			rec_list[cpu] = temp_rec->next;
-			free_record(temp_rec->rec);
+			if (type == REC_RECORD)
+				free_record(temp_rec->rec);
 			free(temp_rec);
 		}
 	}
@@ -525,14 +546,17 @@  static void free_rec_list(struct rec_list **rec_list, int n_cpus)
 }
 
 static size_t get_records(struct kshark_context *kshark_ctx,
-			  struct rec_list ***rec_list)
+			  struct rec_list ***rec_list, enum rec_type type)
 {
+	struct event_filter *adv_filter;
+	struct kshark_task_list *task;
 	struct pevent_record *rec;
 	struct rec_list **temp_next;
 	struct rec_list **cpu_list;
 	struct rec_list *temp_rec;
 	size_t count, total = 0;
 	int n_cpus;
+	int pid;
 	int cpu;
 
 	n_cpus = tracecmd_cpus(kshark_ctx->handle);
@@ -540,6 +564,10 @@  static size_t get_records(struct kshark_context *kshark_ctx,
 	if (!cpu_list)
 		return -ENOMEM;
 
+	/* Just to shorten the name */
+	if (type == REC_ENTRY)
+		adv_filter = kshark_ctx->advanced_event_filter;
+
 	for (cpu = 0; cpu < n_cpus; ++cpu) {
 		count = 0;
 		cpu_list[cpu] = NULL;
@@ -547,12 +575,49 @@  static size_t get_records(struct kshark_context *kshark_ctx,
 
 		rec = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
 		while (rec) {
-			*temp_next = temp_rec = malloc(sizeof(*temp_rec));
+			*temp_next = temp_rec = calloc(1, sizeof(*temp_rec));
 			if (!temp_rec)
 				goto fail;
 
-			temp_rec->rec = rec;
 			temp_rec->next = NULL;
+
+			switch (type) {
+			case REC_RECORD:
+				temp_rec->rec = rec;
+				pid = pevent_data_pid(kshark_ctx->pevent, rec);
+				break;
+			case REC_ENTRY: {
+				struct kshark_entry *entry;
+				int ret;
+
+				entry = &temp_rec->entry;
+				kshark_set_entry_values(kshark_ctx, rec, entry);
+				pid = entry->pid;
+				/* Apply event filtering. */
+				ret = FILTER_NONE;
+				if (adv_filter->filters)
+					ret = pevent_filter_match(adv_filter, rec);
+
+				if (!kshark_show_event(kshark_ctx, entry->event_id) ||
+				    ret != FILTER_MATCH) {
+					unset_event_filter_flag(kshark_ctx, entry);
+				}
+
+				/* Apply task filtering. */
+				if (!kshark_show_task(kshark_ctx, entry->pid)) {
+					entry->visible &= ~kshark_ctx->filter_mask;
+				}
+				free_record(rec);
+				break;
+			} /* REC_ENTRY */
+			}
+
+			task = kshark_add_task(kshark_ctx, pid);
+			if (!task) {
+				free_record(rec);
+				goto fail;
+			}
+
 			temp_next = &temp_rec->next;
 
 			++count;
@@ -566,13 +631,15 @@  static size_t get_records(struct kshark_context *kshark_ctx,
 	return total;
 
  fail:
-	free_rec_list(cpu_list, n_cpus);
+	free_rec_list(cpu_list, n_cpus, type);
 	return -ENOMEM;
 }
 
-static int pick_next_cpu(struct rec_list **rec_list, int n_cpus)
+static int pick_next_cpu(struct rec_list **rec_list, int n_cpus,
+			 enum rec_type type)
 {
 	uint64_t ts = 0;
+	uint64_t rec_ts;
 	int next_cpu = -1;
 	int cpu;
 
@@ -580,8 +647,16 @@  static int pick_next_cpu(struct rec_list **rec_list, int n_cpus)
 		if (!rec_list[cpu])
 			continue;
 
-		if (!ts || rec_list[cpu]->rec->ts < ts) {
-			ts = rec_list[cpu]->rec->ts;
+		switch (type) {
+		case REC_RECORD:
+			rec_ts = rec_list[cpu]->rec->ts;
+			break;
+		case REC_ENTRY:
+			rec_ts = rec_list[cpu]->entry.ts;
+			break;
+		}
+		if (!ts || rec_ts < ts) {
+			ts = rec_ts;
 			next_cpu = cpu;
 		}
 	}
@@ -610,16 +685,11 @@  static int pick_next_cpu(struct rec_list **rec_list, int n_cpus)
 ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
 				struct kshark_entry ***data_rows)
 {
-	struct event_filter *adv_filter = kshark_ctx->advanced_event_filter;
-	struct kshark_task_list *task;
 	struct kshark_entry **rows;
-	struct kshark_entry *entry;
 	struct rec_list **rec_list;
-	struct rec_list *temp_rec;
-	struct pevent_record *rec;
+	enum rec_type type = REC_ENTRY;
 	size_t count, total = 0;
 	int n_cpus;
-	int ret;
 
 	if (*data_rows)
 		free(*data_rows);
@@ -631,63 +701,33 @@  ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
 	 *	 code simplier. We should revisit to see if we can
 	 *	 bring back the performance.
 	 */
-	total = get_records(kshark_ctx, &rec_list);
+	total = get_records(kshark_ctx, &rec_list, type);
 	if (total < 0)
 		goto fail;
 
+	n_cpus = tracecmd_cpus(kshark_ctx->handle);
+
 	rows = calloc(total, sizeof(struct kshark_entry *));
 	if(!rows)
-		goto fail;
-
-	n_cpus = tracecmd_cpus(kshark_ctx->handle);
+		goto fail_free;
 
 	for (count = 0; count < total; count++) {
 		int next_cpu;
 
-		next_cpu = pick_next_cpu(rec_list, n_cpus);
+		next_cpu = pick_next_cpu(rec_list, n_cpus, type);
 
 		if (next_cpu >= 0) {
-			entry = malloc(sizeof(struct kshark_entry));
-			if (!entry)
-				goto fail_free;
-
-			rec = rec_list[next_cpu]->rec;
-			rows[count] = entry;
-
-			kshark_set_entry_values(kshark_ctx, rec, entry);
-			task = kshark_add_task(kshark_ctx, entry->pid);
-			if (!task)
-				goto fail_free;
-
-			/* Apply event filtering. */
-			ret = FILTER_NONE;
-			if (adv_filter->filters)
-				ret = pevent_filter_match(adv_filter, rec);
-
-			if (!kshark_show_event(kshark_ctx, entry->event_id) ||
-			    ret != FILTER_MATCH) {
-				unset_event_filter_flag(kshark_ctx, entry);
-			}
-
-			/* Apply task filtering. */
-			if (!kshark_show_task(kshark_ctx, entry->pid)) {
-				entry->visible &= ~kshark_ctx->filter_mask;
-			}
-
-			temp_rec = rec_list[next_cpu];
+			rows[count] = &rec_list[next_cpu]->entry;
 			rec_list[next_cpu] = rec_list[next_cpu]->next;
-			free(temp_rec);
-			/* The record is no longer be referenced */
-			free_record(rec);
 		}
 	}
 
-	free_rec_list(rec_list, n_cpus);
+	free_rec_list(rec_list, n_cpus, type);
 	*data_rows = rows;
 	return total;
 
  fail_free:
-	free_rec_list(rec_list, n_cpus);
+	free_rec_list(rec_list, n_cpus, type);
 	for (count = 0; count < total; count++) {
 		if (!rows[count])
 			break;
@@ -712,16 +752,15 @@  ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
 ssize_t kshark_load_data_records(struct kshark_context *kshark_ctx,
 				struct pevent_record ***data_rows)
 {
-	struct kshark_task_list *task;
 	struct pevent_record **rows;
 	struct pevent_record *rec;
 	struct rec_list **rec_list;
 	struct rec_list *temp_rec;
+	enum rec_type type = REC_RECORD;
 	size_t count, total = 0;
 	int n_cpus;
-	int pid;
 
-	total = get_records(kshark_ctx, &rec_list);
+	total = get_records(kshark_ctx, &rec_list, REC_RECORD);
 	if (total < 0)
 		goto fail;
 
@@ -734,17 +773,12 @@  ssize_t kshark_load_data_records(struct kshark_context *kshark_ctx,
 	for (count = 0; count < total; count++) {
 		int next_cpu;
 
-		next_cpu = pick_next_cpu(rec_list, n_cpus);
+		next_cpu = pick_next_cpu(rec_list, n_cpus, type);
 
 		if (next_cpu >= 0) {
 			rec = rec_list[next_cpu]->rec;
 			rows[count] = rec;
 
-			pid = pevent_data_pid(kshark_ctx->pevent, rec);
-			task = kshark_add_task(kshark_ctx, pid);
-			if (!task)
-				goto fail;
-
 			temp_rec = rec_list[next_cpu];
 			rec_list[next_cpu] = rec_list[next_cpu]->next;
 			free(temp_rec);
@@ -753,7 +787,7 @@  ssize_t kshark_load_data_records(struct kshark_context *kshark_ctx,
 	}
 
 	/* There should be no records left in rec_list */
-	free_rec_list(rec_list, n_cpus);
+	free_rec_list(rec_list, n_cpus, type);
 	*data_rows = rows;
 	return total;
 
diff --git a/kernel-shark-qt/src/libkshark.h b/kernel-shark-qt/src/libkshark.h
index 6ed2a1ea..2e265522 100644
--- a/kernel-shark-qt/src/libkshark.h
+++ b/kernel-shark-qt/src/libkshark.h
@@ -33,6 +33,9 @@  extern "C" {
  * info etc.) is available on-demand via the offset into the trace file.
  */
 struct kshark_entry {
+	/** Pointer to the next (in time) kshark_entry on the same CPU core. */
+	struct kshark_entry *next; /* MUST BE FIRST ENTRY */
+
 	/**
 	 * A bit mask controlling the visibility of the entry. A value of OxFF
 	 * would mean that the entry is visible everywhere. Use
@@ -60,9 +63,6 @@  struct kshark_entry {
 	 * started.
 	 */
 	uint64_t	ts;
-
-	/** Pointer to the next (in time) kshark_entry on the same CPU core. */
-	struct kshark_entry *next;
 };
 
 /** Size of the task's hash table. */