diff mbox series

[2/3] kernel-shark-qt: Consolidate load_data_entries and load_data_records

Message ID 20180703162432.316240087@goodmis.org (mailing list archive)
State Accepted, archived
Headers show
Series kernel-shark-qt: Merge kshark_load_data_records/entries() | expand

Commit Message

Steven Rostedt July 3, 2018, 4:21 p.m. UTC
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

The two functions kshark_load_data_entries() and
kshark_load_data_records() contain a lot of similar code. Adding helper
functions can simplify it, and keep bugs from happening in one and not
the other (bugs will happen in both! but they will be consistent).

Add some helper functions used by the two functions above.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel-shark-qt/src/libkshark.c | 232 +++++++++++++++++++-------------
 1 file changed, 137 insertions(+), 95 deletions(-)

Comments

Yordan Karadzhov July 4, 2018, 11:39 a.m. UTC | #1
On  3.07.2018 19:21, Steven Rostedt wrote:
> From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
> 
> The two functions kshark_load_data_entries() and
> kshark_load_data_records() contain a lot of similar code. Adding helper
> functions can simplify it, and keep bugs from happening in one and not
> the other (bugs will happen in both! but they will be consistent).
> 
> Add some helper functions used by the two functions above.
> 
> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> ---
>   kernel-shark-qt/src/libkshark.c | 232 +++++++++++++++++++-------------
>   1 file changed, 137 insertions(+), 95 deletions(-)
> 
> diff --git a/kernel-shark-qt/src/libkshark.c b/kernel-shark-qt/src/libkshark.c
> index e38ddebbbe41..680949077b7f 100644
> --- a/kernel-shark-qt/src/libkshark.c
> +++ b/kernel-shark-qt/src/libkshark.c
> @@ -500,6 +500,72 @@ static void kshark_set_entry_values(struct kshark_context *kshark_ctx,
>   	entry->pid = pevent_data_pid(kshark_ctx->pevent, record);
>   }
>   
> +struct rec_list {
> +	struct pevent_record	*rec;
> +	struct rec_list		*next;
> +};
> +
> +static void free_rec_list(struct rec_list **rec_list, int n_cpus)
> +{
> +	struct rec_list *temp_rec;
> +	int cpu;
> +
> +	for (cpu = 0; cpu < n_cpus; ++cpu) {
> +		while (rec_list[cpu]) {
> +			temp_rec = rec_list[cpu];
> +			rec_list[cpu] = temp_rec->next;
> +			free(temp_rec);
> +		}
> +	}
> +	free(rec_list);
> +}
> +
> +static size_t get_records(struct kshark_context *kshark_ctx,
> +			  struct rec_list ***rec_list)
> +{
> +	struct pevent_record *rec;
> +	struct rec_list **temp_next;
> +	struct rec_list **cpu_list;
> +	struct rec_list *temp_rec;
> +	size_t count, total = 0;
> +	int n_cpus;
> +	int cpu;
> +
> +	n_cpus = tracecmd_cpus(kshark_ctx->handle);
> +	cpu_list = calloc(n_cpus, sizeof(*cpu_list));
> +	if (!cpu_list)
> +		return -ENOMEM;
> +
> +	for (cpu = 0; cpu < n_cpus; ++cpu) {
> +		count = 0;
> +		cpu_list[cpu] = NULL;
> +		temp_next = &cpu_list[cpu];
> +
> +		rec = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
> +		while (rec) {
> +			*temp_next = temp_rec = malloc(sizeof(*temp_rec));
> +			if (!temp_rec)
> +				goto fail;
> +
> +			temp_rec->rec = rec;
> +			temp_rec->next = NULL;
> +			temp_next = &temp_rec->next;
> +
> +			++count;
> +			rec = tracecmd_read_data(kshark_ctx->handle, cpu);
> +		}
> +
> +		total += count;
> +	}
> +
> +	*rec_list = cpu_list;
> +	return total;
> +
> + fail:
> +	free_rec_list(cpu_list, n_cpus);
> +	return -ENOMEM;
> +}
> +
>   /**
>    * @brief Load the content of the trace data file into an array of
>    *	  kshark_entries. This function provides fast loading, however the

Actually , after applying the patch for trace-input.c and this patch on 
top kshark_load_data_entries() becomes slower than 
kshark_load_data_records(). Just make this clear in the description of 
the functions.

> @@ -521,9 +587,11 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
>   				struct kshark_entry ***data_rows)
>   {
>   	struct event_filter *adv_filter = kshark_ctx->advanced_event_filter;
> -	struct kshark_entry **cpu_list, **rows;
> -	struct kshark_entry *entry, **next;
>   	struct kshark_task_list *task;
> +	struct kshark_entry **rows;
> +	struct kshark_entry *entry;
> +	struct rec_list **rec_list;
> +	struct rec_list *temp_rec;
>   	struct pevent_record *rec;
>   	int cpu, n_cpus, next_cpu;
>   	size_t count, total = 0;
> @@ -533,24 +601,41 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
>   	if (*data_rows)
>   		free(*data_rows);
>   
> +	total = get_records(kshark_ctx, &rec_list);
> +	if (total < 0)
> +		goto fail;
> +
> +	rows = calloc(total, sizeof(struct kshark_entry *));
> +	if(!rows)
> +		goto fail;
> +
>   	n_cpus = tracecmd_cpus(kshark_ctx->handle);
> -	cpu_list = calloc(n_cpus, sizeof(struct kshark_entry *));
>   
> -	for (cpu = 0; cpu < n_cpus; ++cpu) {
> -		count = 0;
> -		cpu_list[cpu] = NULL;
> -		next = &cpu_list[cpu];
> +	for (count = 0; count < total; count++) {
> +		ts = 0;
> +		next_cpu = -1;
> +		for (cpu = 0; cpu < n_cpus; ++cpu) {
> +			if (!rec_list[cpu])
> +				continue;
>   
> -		rec = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
> -		while (rec) {
> -			*next = entry = malloc(sizeof(struct kshark_entry));
> +			if (!ts || rec_list[cpu]->rec->ts < ts) {
> +				ts = rec_list[cpu]->rec->ts;
> +				next_cpu = cpu;
> +			}
> +		}
> +
> +		if (next_cpu >= 0) {
> +			entry = malloc(sizeof(struct kshark_entry));
>   			if (!entry)
> -				goto fail;
> +				goto fail_free;
> +
> +			rec = rec_list[next_cpu]->rec;
> +			rows[count] = entry;
>   
>   			kshark_set_entry_values(kshark_ctx, rec, entry);
>   			task = kshark_add_task(kshark_ctx, entry->pid);
>   			if (!task)
> -				goto fail;
> +				goto fail_free;
>   
>   			/* Apply event filtering. */
>   			ret = FILTER_NONE;
> @@ -567,47 +652,26 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
>   				entry->visible &= ~kshark_ctx->filter_mask;
>   			}
>   
> -			entry->next = NULL;
> -			next = &entry->next;
> +			temp_rec = rec_list[next_cpu];
> +			rec_list[next_cpu] = rec_list[next_cpu]->next;
> +			free(temp_rec);
>   			free_record(rec);
> -
> -			++count;
> -			rec = tracecmd_read_data(kshark_ctx->handle, cpu);
> -		}
> -
> -		total += count;
> -	}
> -
> -	rows = calloc(total, sizeof(struct kshark_entry *));
> -	if (!rows)
> -		goto fail;
> -
> -	count = 0;
> -	while (count < total) {
> -		ts = 0;
> -		next_cpu = -1;
> -		for (cpu = 0; cpu < n_cpus; ++cpu) {
> -			if (!cpu_list[cpu])
> -				continue;
> -
> -			if (!ts || cpu_list[cpu]->ts < ts) {
> -				ts = cpu_list[cpu]->ts;
> -				next_cpu = cpu;
> -			}
> -		}
> -
> -		if (next_cpu >= 0) {
> -			rows[count] = cpu_list[next_cpu];
> -			cpu_list[next_cpu] = cpu_list[next_cpu]->next;
>   		}
> -		++count;
>   	}
>   
> -	free(cpu_list);
> +	free_rec_list(rec_list, n_cpus);
>   	*data_rows = rows;
>   	return total;
>   
> -fail:
> + fail_free:
> +	free_rec_list(rec_list, n_cpus);
> +	for (count = 0; count < total; count++) {
> +		if (!rows[count])
> +			break;
> +		free(rows[count]);
> +	}
> +	free(rows);
> + fail:
>   	fprintf(stderr, "Failed to allocate memory during data loading.\n");
>   	return -ENOMEM;
>   }
> @@ -625,82 +689,60 @@ fail:
>   ssize_t kshark_load_data_records(struct kshark_context *kshark_ctx,
>   				struct pevent_record ***data_rows)
>   {
> -	struct temp {
> -		struct pevent_record	*rec;
> -		struct temp		*next;
> -	} **cpu_list, **temp_next, *temp_rec;
> -
>   	struct kshark_task_list *task;
>   	struct pevent_record **rows;
> -	struct pevent_record *data;
> +	struct pevent_record *rec;
> +	struct rec_list **rec_list;
> +	struct rec_list *temp_rec;
>   	int cpu, n_cpus, next_cpu;
>   	size_t count, total = 0;
>   	uint64_t ts;
>   	int pid;
>   
> -	n_cpus = tracecmd_cpus(kshark_ctx->handle);
> -	cpu_list = calloc(n_cpus, sizeof(struct temp *));
> -
> -	for (cpu = 0; cpu < n_cpus; ++cpu) {
> -		count = 0;
> -		cpu_list[cpu] = NULL;
> -		temp_next = &cpu_list[cpu];
> -
> -		data = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
> -		while (data) {
> -			*temp_next = temp_rec = malloc(sizeof(*temp_rec));
> -			if (!temp_rec)
> -				goto fail;
> -
> -			pid = pevent_data_pid(kshark_ctx->pevent, data);
> -			task = kshark_add_task(kshark_ctx, pid);
> -			if (!task)
> -				goto fail;
> -
> -			temp_rec->rec = data;
> -			temp_rec->next = NULL;
> -			temp_next = &(temp_rec->next);
> -
> -			++count;
> -			data = tracecmd_read_data(kshark_ctx->handle, cpu);
> -		}
> -
> -		total += count;
> -	}
> +	total = get_records(kshark_ctx, &rec_list);
> +	if (total < 0)
> +		goto fail;
>   
>   	rows = calloc(total, sizeof(struct pevent_record *));
> -	if (!rows)
> +	if(!rows)
>   		goto fail;
>   
> -	count = 0;
> -	while (count < total) {
> +	n_cpus = tracecmd_cpus(kshark_ctx->handle);
> +
> +	for (count = 0; count < total; count++) {
>   		ts = 0;
>   		next_cpu = -1;
>   		for (cpu = 0; cpu < n_cpus; ++cpu) {
> -			if (!cpu_list[cpu])
> +			if (!rec_list[cpu])
>   				continue;
>   
> -			if (!ts || cpu_list[cpu]->rec->ts < ts) {
> -				ts = cpu_list[cpu]->rec->ts;
> +			if (!ts || rec_list[cpu]->rec->ts < ts) {
> +				ts = rec_list[cpu]->rec->ts;
>   				next_cpu = cpu;
>   			}
>   		}
>   
>   		if (next_cpu >= 0) {
> -			rows[count] = cpu_list[next_cpu]->rec;
> -			temp_rec = cpu_list[next_cpu];
> -			cpu_list[next_cpu] = cpu_list[next_cpu]->next;
> -			free (temp_rec);
> -		}
> +			rec = rec_list[next_cpu]->rec;
> +			rows[count] = rec;
>   
> -		++count;
> +			pid = pevent_data_pid(kshark_ctx->pevent, rec);
> +			task = kshark_add_task(kshark_ctx, pid);
> +			if (!task)
> +				goto fail;
> +
> +			temp_rec = rec_list[next_cpu];
> +			rec_list[next_cpu] = rec_list[next_cpu]->next;
> +			free(temp_rec);
> +			free_record(rec);

You have to free only the element of the list.
The user is responsible for freeing the record.

> +		}
>   	}
>   
> -	free(cpu_list);
> +	free_rec_list(rec_list, n_cpus);
>   	*data_rows = rows;
>   	return total;
>   
> -fail:
> + fail:
>   	fprintf(stderr, "Failed to allocate memory during data loading.\n");
>   	return -ENOMEM;
>   }
>
Steven Rostedt July 5, 2018, 2:47 p.m. UTC | #2
On Wed, 4 Jul 2018 14:39:01 +0300
"Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:

> >   /**
> >    * @brief Load the content of the trace data file into an array of
> >    *	  kshark_entries. This function provides fast loading, however the  
> 
> Actually , after applying the patch for trace-input.c and this patch on 
> top kshark_load_data_entries() becomes slower than 
> kshark_load_data_records(). Just make this clear in the description of 
> the functions.

So what should the purpose of this function be stated here then?

"This function provides an abstraction of the entries from the raw data
that is read." ?

-- Steve


> 
> > @@ -521,9 +587,11 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
> >   				struct kshark_entry ***data_rows)
Yordan Karadzhov July 5, 2018, 2:53 p.m. UTC | #3
On  5.07.2018 17:47, Steven Rostedt wrote:
> On Wed, 4 Jul 2018 14:39:01 +0300
> "Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:
> 
>>>    /**
>>>     * @brief Load the content of the trace data file into an array of
>>>     *	  kshark_entries. This function provides fast loading, however the
>>
>> Actually , after applying the patch for trace-input.c and this patch on
>> top kshark_load_data_entries() becomes slower than
>> kshark_load_data_records(). Just make this clear in the description of
>> the functions.
> 
> So what should the purpose of this function be stated here then?
> 
> "This function provides an abstraction of the entries from the raw data
> that is read." ?

Yes,

--Yordan


> 
> -- Steve
> 
> 
>>
>>> @@ -521,9 +587,11 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
>>>    				struct kshark_entry ***data_rows)
>
Steven Rostedt July 5, 2018, 3:48 p.m. UTC | #4
On Thu, 5 Jul 2018 17:53:12 +0300
"Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:

> > "This function provides an abstraction of the entries from the raw data
> > that is read." ?  
> 
> Yes,

Here's the new patch:

From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Subject: [PATCH] kernel-shark-qt: Consolidate load_data_entries and load_data_records

The two functions kshark_load_data_entries() and
kshark_load_data_records() contain a lot of similar code. Adding helper
functions can simplify it, and keep bugs from happening in one and not
the other (bugs will happen in both! but they will be consistent).

Add some helper functions used by the two functions above.

Note, this does create a performance digression to kshark_load_data_entries().
Up to 20%! But consolidating the code will make maintaining easier and we
can add more functions that use the helper functions. Update the comments
to reflect the slowdown. In the future, we may revisit this to see if
we can bring back the performance.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel-shark-qt/src/libkshark.c | 244 ++++++++++++++++++++++++----------------
 1 file changed, 147 insertions(+), 97 deletions(-)

diff --git a/kernel-shark-qt/src/libkshark.c b/kernel-shark-qt/src/libkshark.c
index e38ddebb..86bd8789 100644
--- a/kernel-shark-qt/src/libkshark.c
+++ b/kernel-shark-qt/src/libkshark.c
@@ -500,10 +500,77 @@ static void kshark_set_entry_values(struct kshark_context *kshark_ctx,
 	entry->pid = pevent_data_pid(kshark_ctx->pevent, record);
 }
 
+struct rec_list {
+	struct pevent_record	*rec;
+	struct rec_list		*next;
+};
+
+static void free_rec_list(struct rec_list **rec_list, int n_cpus)
+{
+	struct rec_list *temp_rec;
+	int cpu;
+
+	for (cpu = 0; cpu < n_cpus; ++cpu) {
+		while (rec_list[cpu]) {
+			temp_rec = rec_list[cpu];
+			rec_list[cpu] = temp_rec->next;
+			free(temp_rec);
+		}
+	}
+	free(rec_list);
+}
+
+static size_t get_records(struct kshark_context *kshark_ctx,
+			  struct rec_list ***rec_list)
+{
+	struct pevent_record *rec;
+	struct rec_list **temp_next;
+	struct rec_list **cpu_list;
+	struct rec_list *temp_rec;
+	size_t count, total = 0;
+	int n_cpus;
+	int cpu;
+
+	n_cpus = tracecmd_cpus(kshark_ctx->handle);
+	cpu_list = calloc(n_cpus, sizeof(*cpu_list));
+	if (!cpu_list)
+		return -ENOMEM;
+
+	for (cpu = 0; cpu < n_cpus; ++cpu) {
+		count = 0;
+		cpu_list[cpu] = NULL;
+		temp_next = &cpu_list[cpu];
+
+		rec = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
+		while (rec) {
+			*temp_next = temp_rec = malloc(sizeof(*temp_rec));
+			if (!temp_rec)
+				goto fail;
+
+			temp_rec->rec = rec;
+			temp_rec->next = NULL;
+			temp_next = &temp_rec->next;
+
+			++count;
+			rec = tracecmd_read_data(kshark_ctx->handle, cpu);
+		}
+
+		total += count;
+	}
+
+	*rec_list = cpu_list;
+	return total;
+
+ fail:
+	free_rec_list(cpu_list, n_cpus);
+	return -ENOMEM;
+}
+
 /**
  * @brief Load the content of the trace data file into an array of
- *	  kshark_entries. This function provides fast loading, however the
- *	  "latency" and the "info" fields can be accessed only via the offset
+ *	  kshark_entries. This function provides an abstraction of the
+ *	  entries from the raw data that is read, however the "latency"
+ *	  and the "info" fields can be accessed only via the offset
  *	  into the file. This makes the access to these two fields much
  *	  slower.
  *	  If one or more filters are set, the "visible" fields of each entry
@@ -521,9 +588,11 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
 				struct kshark_entry ***data_rows)
 {
 	struct event_filter *adv_filter = kshark_ctx->advanced_event_filter;
-	struct kshark_entry **cpu_list, **rows;
-	struct kshark_entry *entry, **next;
 	struct kshark_task_list *task;
+	struct kshark_entry **rows;
+	struct kshark_entry *entry;
+	struct rec_list **rec_list;
+	struct rec_list *temp_rec;
 	struct pevent_record *rec;
 	int cpu, n_cpus, next_cpu;
 	size_t count, total = 0;
@@ -533,24 +602,48 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
 	if (*data_rows)
 		free(*data_rows);
 
+	/*
+	 * TODO: Getting the records separately slows this function
+	 *       down, instead of just accessing the records when
+	 *	 setting up the kernel entries. But this keeps the
+	 *	 code simplier. We should revisit to see if we can
+	 *	 bring back the performance.
+	 */
+	total = get_records(kshark_ctx, &rec_list);
+	if (total < 0)
+		goto fail;
+
+	rows = calloc(total, sizeof(struct kshark_entry *));
+	if(!rows)
+		goto fail;
+
 	n_cpus = tracecmd_cpus(kshark_ctx->handle);
-	cpu_list = calloc(n_cpus, sizeof(struct kshark_entry *));
 
-	for (cpu = 0; cpu < n_cpus; ++cpu) {
-		count = 0;
-		cpu_list[cpu] = NULL;
-		next = &cpu_list[cpu];
+	for (count = 0; count < total; count++) {
+		ts = 0;
+		next_cpu = -1;
+		for (cpu = 0; cpu < n_cpus; ++cpu) {
+			if (!rec_list[cpu])
+				continue;
 
-		rec = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
-		while (rec) {
-			*next = entry = malloc(sizeof(struct kshark_entry));
+			if (!ts || rec_list[cpu]->rec->ts < ts) {
+				ts = rec_list[cpu]->rec->ts;
+				next_cpu = cpu;
+			}
+		}
+
+		if (next_cpu >= 0) {
+			entry = malloc(sizeof(struct kshark_entry));
 			if (!entry)
-				goto fail;
+				goto fail_free;
+
+			rec = rec_list[next_cpu]->rec;
+			rows[count] = entry;
 
 			kshark_set_entry_values(kshark_ctx, rec, entry);
 			task = kshark_add_task(kshark_ctx, entry->pid);
 			if (!task)
-				goto fail;
+				goto fail_free;
 
 			/* Apply event filtering. */
 			ret = FILTER_NONE;
@@ -567,47 +660,26 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
 				entry->visible &= ~kshark_ctx->filter_mask;
 			}
 
-			entry->next = NULL;
-			next = &entry->next;
+			temp_rec = rec_list[next_cpu];
+			rec_list[next_cpu] = rec_list[next_cpu]->next;
+			free(temp_rec);
 			free_record(rec);
-
-			++count;
-			rec = tracecmd_read_data(kshark_ctx->handle, cpu);
 		}
-
-		total += count;
 	}
 
-	rows = calloc(total, sizeof(struct kshark_entry *));
-	if (!rows)
-		goto fail;
-
-	count = 0;
-	while (count < total) {
-		ts = 0;
-		next_cpu = -1;
-		for (cpu = 0; cpu < n_cpus; ++cpu) {
-			if (!cpu_list[cpu])
-				continue;
-
-			if (!ts || cpu_list[cpu]->ts < ts) {
-				ts = cpu_list[cpu]->ts;
-				next_cpu = cpu;
-			}
-		}
-
-		if (next_cpu >= 0) {
-			rows[count] = cpu_list[next_cpu];
-			cpu_list[next_cpu] = cpu_list[next_cpu]->next;
-		}
-		++count;
-	}
-
-	free(cpu_list);
+	free_rec_list(rec_list, n_cpus);
 	*data_rows = rows;
 	return total;
 
-fail:
+ fail_free:
+	free_rec_list(rec_list, n_cpus);
+	for (count = 0; count < total; count++) {
+		if (!rows[count])
+			break;
+		free(rows[count]);
+	}
+	free(rows);
+ fail:
 	fprintf(stderr, "Failed to allocate memory during data loading.\n");
 	return -ENOMEM;
 }
@@ -625,82 +697,60 @@ fail:
 ssize_t kshark_load_data_records(struct kshark_context *kshark_ctx,
 				struct pevent_record ***data_rows)
 {
-	struct temp {
-		struct pevent_record	*rec;
-		struct temp		*next;
-	} **cpu_list, **temp_next, *temp_rec;
-
 	struct kshark_task_list *task;
 	struct pevent_record **rows;
-	struct pevent_record *data;
+	struct pevent_record *rec;
+	struct rec_list **rec_list;
+	struct rec_list *temp_rec;
 	int cpu, n_cpus, next_cpu;
 	size_t count, total = 0;
 	uint64_t ts;
 	int pid;
 
-	n_cpus = tracecmd_cpus(kshark_ctx->handle);
-	cpu_list = calloc(n_cpus, sizeof(struct temp *));
-
-	for (cpu = 0; cpu < n_cpus; ++cpu) {
-		count = 0;
-		cpu_list[cpu] = NULL;
-		temp_next = &cpu_list[cpu];
-
-		data = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
-		while (data) {
-			*temp_next = temp_rec = malloc(sizeof(*temp_rec));
-			if (!temp_rec)
-				goto fail;
-
-			pid = pevent_data_pid(kshark_ctx->pevent, data);
-			task = kshark_add_task(kshark_ctx, pid);
-			if (!task)
-				goto fail;
-
-			temp_rec->rec = data;
-			temp_rec->next = NULL;
-			temp_next = &(temp_rec->next);
-
-			++count;
-			data = tracecmd_read_data(kshark_ctx->handle, cpu);
-		}
-
-		total += count;
-	}
+	total = get_records(kshark_ctx, &rec_list);
+	if (total < 0)
+		goto fail;
 
 	rows = calloc(total, sizeof(struct pevent_record *));
-	if (!rows)
+	if(!rows)
 		goto fail;
 
-	count = 0;
-	while (count < total) {
+	n_cpus = tracecmd_cpus(kshark_ctx->handle);
+
+	for (count = 0; count < total; count++) {
 		ts = 0;
 		next_cpu = -1;
 		for (cpu = 0; cpu < n_cpus; ++cpu) {
-			if (!cpu_list[cpu])
+			if (!rec_list[cpu])
 				continue;
 
-			if (!ts || cpu_list[cpu]->rec->ts < ts) {
-				ts = cpu_list[cpu]->rec->ts;
+			if (!ts || rec_list[cpu]->rec->ts < ts) {
+				ts = rec_list[cpu]->rec->ts;
 				next_cpu = cpu;
 			}
 		}
 
 		if (next_cpu >= 0) {
-			rows[count] = cpu_list[next_cpu]->rec;
-			temp_rec = cpu_list[next_cpu];
-			cpu_list[next_cpu] = cpu_list[next_cpu]->next;
-			free (temp_rec);
-		}
+			rec = rec_list[next_cpu]->rec;
+			rows[count] = rec;
 
-		++count;
+			pid = pevent_data_pid(kshark_ctx->pevent, rec);
+			task = kshark_add_task(kshark_ctx, pid);
+			if (!task)
+				goto fail;
+
+			temp_rec = rec_list[next_cpu];
+			rec_list[next_cpu] = rec_list[next_cpu]->next;
+			free(temp_rec);
+			free_record(rec);
+		}
 	}
 
-	free(cpu_list);
+	free_rec_list(rec_list, n_cpus);
 	*data_rows = rows;
 	return total;
 
-fail:
+ fail:
 	fprintf(stderr, "Failed to allocate memory during data loading.\n");
 	return -ENOMEM;
 }
Yordan Karadzhov July 5, 2018, 4:08 p.m. UTC | #5
Hi Steve,
I am still learning how to answer to patches, and I guess because of 
this you didn't notice my comment at the bottom. I think you have to 
remove the line

   free_record(rec);

You don't have to send me another patch.
Thanks!
Yordan

On  4.07.2018 14:39, Yordan Karadzhov (VMware) wrote:
>>
>> +            rec = rec_list[next_cpu]->rec;
>> +            rows[count] = rec;
>> -        ++count;
>> +            pid = pevent_data_pid(kshark_ctx->pevent, rec);
>> +            task = kshark_add_task(kshark_ctx, pid);
>> +            if (!task)
>> +                goto fail;
>> +
>> +            temp_rec = rec_list[next_cpu];
>> +            rec_list[next_cpu] = rec_list[next_cpu]->next;
>> +            free(temp_rec);
>> +            free_record(rec);
> 
> You have to free only the element of the list.
> The user is responsible for freeing the record.
> 
>> +        }
>>       }
>> -    free(cpu_list);
>> +    free_rec_list(rec_list, n_cpus);
>>       *data_rows = rows;
Steven Rostedt July 5, 2018, 4:25 p.m. UTC | #6
On Thu, 5 Jul 2018 19:08:58 +0300
"Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:

> Hi Steve,
> I am still learning how to answer to patches, and I guess because of 
> this you didn't notice my comment at the bottom. I think you have to 
> remove the line
> 
>    free_record(rec);

My fault, I should have looked further down. This is why I usually
clean up my replies. By removing functions I'm not commenting on and
replacing them with: [..]

Even with that said, it was my fault for missing it. Thanks for the
review.

> 
> You don't have to send me another patch.

OK, I'll fix it up and push it out.

-- Steve
Steven Rostedt July 5, 2018, 4:35 p.m. UTC | #7
On Wed, 4 Jul 2018 14:39:01 +0300
"Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:

Your comment actually had me re-examine the code, and I found a bug.

> >   
> > +struct rec_list {
> > +	struct pevent_record	*rec;
> > +	struct rec_list		*next;
> > +};
> > +
> > +static void free_rec_list(struct rec_list **rec_list, int n_cpus)
> > +{
> > +	struct rec_list *temp_rec;
> > +	int cpu;
> > +
> > +	for (cpu = 0; cpu < n_cpus; ++cpu) {
> > +		while (rec_list[cpu]) {
> > +			temp_rec = rec_list[cpu];
> > +			rec_list[cpu] = temp_rec->next;

I need a:

			free_record(temp_rec->rec);


> > +			free(temp_rec);
> > +		}
> > +	}
> > +	free(rec_list);
> > +}
> > +


> > @@ -521,9 +587,11 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
> >   				struct kshark_entry ***data_rows)
> >   {
> >   	struct event_filter *adv_filter = kshark_ctx->advanced_event_filter;
> > -	struct kshark_entry **cpu_list, **rows;
> > -	struct kshark_entry *entry, **next;
> >   	struct kshark_task_list *task;
> > +	struct kshark_entry **rows;
> > +	struct kshark_entry *entry;
> > +	struct rec_list **rec_list;
> > +	struct rec_list *temp_rec;
> >   	struct pevent_record *rec;
> >   	int cpu, n_cpus, next_cpu;
> >   	size_t count, total = 0;
> > @@ -533,24 +601,41 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
> >   	if (*data_rows)
> >   		free(*data_rows);
> >   
> > +	total = get_records(kshark_ctx, &rec_list);
> > +	if (total < 0)
> > +		goto fail;
> > +
> > +	rows = calloc(total, sizeof(struct kshark_entry *));
> > +	if(!rows)
> > +		goto fail;
> > +
> >   	n_cpus = tracecmd_cpus(kshark_ctx->handle);
> > -	cpu_list = calloc(n_cpus, sizeof(struct kshark_entry *));
> >   
> > -	for (cpu = 0; cpu < n_cpus; ++cpu) {
> > -		count = 0;
> > -		cpu_list[cpu] = NULL;
> > -		next = &cpu_list[cpu];
> > +	for (count = 0; count < total; count++) {
> > +		ts = 0;
> > +		next_cpu = -1;
> > +		for (cpu = 0; cpu < n_cpus; ++cpu) {
> > +			if (!rec_list[cpu])
> > +				continue;
> >   
> > -		rec = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
> > -		while (rec) {
> > -			*next = entry = malloc(sizeof(struct kshark_entry));
> > +			if (!ts || rec_list[cpu]->rec->ts < ts) {
> > +				ts = rec_list[cpu]->rec->ts;
> > +				next_cpu = cpu;
> > +			}
> > +		}
> > +
> > +		if (next_cpu >= 0) {
> > +			entry = malloc(sizeof(struct kshark_entry));
> >   			if (!entry)
> > -				goto fail;
> > +				goto fail_free;
> > +
> > +			rec = rec_list[next_cpu]->rec;
> > +			rows[count] = entry;
> >   
> >   			kshark_set_entry_values(kshark_ctx, rec, entry);
> >   			task = kshark_add_task(kshark_ctx, entry->pid);
> >   			if (!task)
> > -				goto fail;
> > +				goto fail_free;
> >   
> >   			/* Apply event filtering. */
> >   			ret = FILTER_NONE;
> > @@ -567,47 +652,26 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
> >   				entry->visible &= ~kshark_ctx->filter_mask;
> >   			}
> >   
> > -			entry->next = NULL;
> > -			next = &entry->next;
> > +			temp_rec = rec_list[next_cpu];
> > +			rec_list[next_cpu] = rec_list[next_cpu]->next;
> > +			free(temp_rec);

I should add a comment here:

			/* The record will no longer be referenced */

> >   			free_record(rec);
> > -
> > -			++count;
> > -			rec = tracecmd_read_data(kshark_ctx->handle, cpu);
> > -		}
> > -
> > -		total += count;
> > -	}
> > -
> > -	rows = calloc(total, sizeof(struct kshark_entry *));
> > -	if (!rows)
> > -		goto fail;
> > -
> > -	count = 0;
> > -	while (count < total) {
> > -		ts = 0;
> > -		next_cpu = -1;
> > -		for (cpu = 0; cpu < n_cpus; ++cpu) {
> > -			if (!cpu_list[cpu])
> > -				continue;
> > -
> > -			if (!ts || cpu_list[cpu]->ts < ts) {
> > -				ts = cpu_list[cpu]->ts;
> > -				next_cpu = cpu;
> > -			}
> > -		}
> > -
> > -		if (next_cpu >= 0) {
> > -			rows[count] = cpu_list[next_cpu];
> > -			cpu_list[next_cpu] = cpu_list[next_cpu]->next;
> >   		}
> > -		++count;
> >   	}
> >   
> > -	free(cpu_list);
> > +	free_rec_list(rec_list, n_cpus);
> >   	*data_rows = rows;
> >   	return total;
> >   
> > -fail:
> > + fail_free:
> > +	free_rec_list(rec_list, n_cpus);
> > +	for (count = 0; count < total; count++) {
> > +		if (!rows[count])
> > +			break;
> > +		free(rows[count]);
> > +	}
> > +	free(rows);
> > + fail:
> >   	fprintf(stderr, "Failed to allocate memory during data loading.\n");
> >   	return -ENOMEM;
> >   }
> > @@ -625,82 +689,60 @@ fail:
> >   ssize_t kshark_load_data_records(struct kshark_context *kshark_ctx,
> >   				struct pevent_record ***data_rows)
> >   {

[..]

> >   	rows = calloc(total, sizeof(struct pevent_record *));
> > -	if (!rows)
> > +	if(!rows)
> >   		goto fail;
> >   
> > -	count = 0;
> > -	while (count < total) {
> > +	n_cpus = tracecmd_cpus(kshark_ctx->handle);
> > +
> > +	for (count = 0; count < total; count++) {
> >   		ts = 0;
> >   		next_cpu = -1;
> >   		for (cpu = 0; cpu < n_cpus; ++cpu) {
> > -			if (!cpu_list[cpu])
> > +			if (!rec_list[cpu])
> >   				continue;
> >   
> > -			if (!ts || cpu_list[cpu]->rec->ts < ts) {
> > -				ts = cpu_list[cpu]->rec->ts;
> > +			if (!ts || rec_list[cpu]->rec->ts < ts) {
> > +				ts = rec_list[cpu]->rec->ts;
> >   				next_cpu = cpu;
> >   			}
> >   		}
> >   
> >   		if (next_cpu >= 0) {
> > -			rows[count] = cpu_list[next_cpu]->rec;
> > -			temp_rec = cpu_list[next_cpu];
> > -			cpu_list[next_cpu] = cpu_list[next_cpu]->next;
> > -			free (temp_rec);
> > -		}
> > +			rec = rec_list[next_cpu]->rec;
> > +			rows[count] = rec;
> >   
> > -		++count;
> > +			pid = pevent_data_pid(kshark_ctx->pevent, rec);
> > +			task = kshark_add_task(kshark_ctx, pid);
> > +			if (!task)
> > +				goto fail;
> > +
> > +			temp_rec = rec_list[next_cpu];
> > +			rec_list[next_cpu] = rec_list[next_cpu]->next;
> > +			free(temp_rec);
> > +			free_record(rec);  
> 
> You have to free only the element of the list.
> The user is responsible for freeing the record.

Right! And I'll replace that with a comment:

			/* The record is still referenced in rows */

Thanks for the review.

-- Steve

> 
> > +		}
> >   	}
> >   
> > -	free(cpu_list);
> > +	free_rec_list(rec_list, n_cpus);
> >   	*data_rows = rows;
> >   	return total;
> >   
> > -fail:
> > + fail:
> >   	fprintf(stderr, "Failed to allocate memory during data loading.\n");
> >   	return -ENOMEM;
> >   }
> >
Steven Rostedt July 5, 2018, 4:38 p.m. UTC | #8
On Thu, 5 Jul 2018 12:35:09 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> Thanks for the review.

Here's a diff of the patch. Not counting the comment change in the
doxygen header, as I already rebased that.

-- Steve

diff --git a/kernel-shark-qt/src/libkshark.c b/kernel-shark-qt/src/libkshark.c
index 86bd8789..f7560404 100644
--- a/kernel-shark-qt/src/libkshark.c
+++ b/kernel-shark-qt/src/libkshark.c
@@ -514,6 +514,7 @@ static void free_rec_list(struct rec_list **rec_list, int n_cpus)
 		while (rec_list[cpu]) {
 			temp_rec = rec_list[cpu];
 			rec_list[cpu] = temp_rec->next;
+			free_record(temp_rec->rec);
 			free(temp_rec);
 		}
 	}
@@ -663,6 +664,7 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
 			temp_rec = rec_list[next_cpu];
 			rec_list[next_cpu] = rec_list[next_cpu]->next;
 			free(temp_rec);
+			/* The record is no longer be referenced */
 			free_record(rec);
 		}
 	}
@@ -742,10 +744,11 @@ ssize_t kshark_load_data_records(struct kshark_context *kshark_ctx,
 			temp_rec = rec_list[next_cpu];
 			rec_list[next_cpu] = rec_list[next_cpu]->next;
 			free(temp_rec);
-			free_record(rec);
+			/* The record is still referenced in rows */
 		}
 	}
 
+	/* There should be no records left in rec_list */
 	free_rec_list(rec_list, n_cpus);
 	*data_rows = rows;
 	return total;
Steven Rostedt July 5, 2018, 10:05 p.m. UTC | #9
On Wed, 4 Jul 2018 14:39:01 +0300
"Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:

> >   /**
> >    * @brief Load the content of the trace data file into an array of
> >    *	  kshark_entries. This function provides fast loading, however the  
> 
> Actually , after applying the patch for trace-input.c and this patch on 
> top kshark_load_data_entries() becomes slower than 
> kshark_load_data_records(). Just make this clear in the description of 
> the functions.
> 
> > @@ -521,9 +587,11 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
> >   				struct kshark_entry ***data_rows)

In my tests, it showed a 30% slowdown. I really didn't like that. So I
looked at why it was so slow, and it appeared two fold. One, was that I
was doing a double allocation (allocating for both the rec_list and the
entry), the other was that holding on to all records still appeared to
slow things down a bit. Even with the array of storage, it made it
slow.

I decided to make it closer to the original but still with helper
functions. The key was modifying the struct rec_list to be:

struct rec_list {
	union {
		struct kshark_entry		entry;
		struct {
			struct rec_list		*next; /* Matches entry->next */
			struct pevent_record	*rec;
		};
	};
};

And creating a enum:

enum rec_type {
	REC_RECORD,
	REC_ENTRY,
};


That would allow the functions to know what type it was dealing with.
Will this change affect your numpy work much? You can add a REC_NUMPY
if needed.

Try it out and see if it improves things on your end:

Oh, and I pushed my patches.

-- Steve

Index: trace-cmd.git/kernel-shark-qt/src/libkshark.c
===================================================================
--- trace-cmd.git.orig/kernel-shark-qt/src/libkshark.c
+++ trace-cmd.git/kernel-shark-qt/src/libkshark.c
@@ -503,12 +503,23 @@ static void kshark_set_entry_values(stru
 /* Quiet warnings over documenting simple structures */
 //! @cond Doxygen_Suppress
 struct rec_list {
-	struct pevent_record	*rec;
-	struct rec_list		*next;
+	union {
+		struct kshark_entry		entry;
+		struct {
+			struct rec_list		*next; /* Matches entry->next */
+			struct pevent_record	*rec;
+		};
+	};
 };
 //! @endcond
 
-static void free_rec_list(struct rec_list **rec_list, int n_cpus)
+enum rec_type {
+	REC_RECORD,
+	REC_ENTRY,
+};
+
+static void free_rec_list(struct rec_list **rec_list, int n_cpus,
+			  enum rec_type type)
 {
 	struct rec_list *temp_rec;
 	int cpu;
@@ -517,7 +528,8 @@ static void free_rec_list(struct rec_lis
 		while (rec_list[cpu]) {
 			temp_rec = rec_list[cpu];
 			rec_list[cpu] = temp_rec->next;
-			free_record(temp_rec->rec);
+			if (type == REC_RECORD)
+				free_record(temp_rec->rec);
 			free(temp_rec);
 		}
 	}
@@ -525,8 +537,9 @@ static void free_rec_list(struct rec_lis
 }
 
 static size_t get_records(struct kshark_context *kshark_ctx,
-			  struct rec_list ***rec_list)
+			  struct rec_list ***rec_list, enum rec_type type)
 {
+	struct event_filter *adv_filter = NULL;
 	struct pevent_record *rec;
 	struct rec_list **temp_next;
 	struct rec_list **cpu_list;
@@ -547,12 +560,50 @@ static size_t get_records(struct kshark_
 
 		rec = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
 		while (rec) {
-			*temp_next = temp_rec = malloc(sizeof(*temp_rec));
+			*temp_next = temp_rec = calloc(1, sizeof(*temp_rec));
 			if (!temp_rec)
 				goto fail;
 
-			temp_rec->rec = rec;
 			temp_rec->next = NULL;
+
+			switch (type) {
+			case REC_RECORD:
+				temp_rec->rec = rec;
+				break;
+			case REC_ENTRY: {
+				struct kshark_task_list *task;
+				struct kshark_entry *entry;
+				int ret;
+
+				if (!adv_filter)
+					adv_filter = kshark_ctx->advanced_event_filter;
+				entry = &temp_rec->entry;
+				kshark_set_entry_values(kshark_ctx, rec, entry);
+				task = kshark_add_task(kshark_ctx, entry->pid);
+				if (!task) {
+					free_record(rec);
+					goto fail;
+				}
+
+				/* Apply event filtering. */
+				ret = FILTER_NONE;
+				if (adv_filter->filters)
+					ret = pevent_filter_match(adv_filter, rec);
+
+				if (!kshark_show_event(kshark_ctx, entry->event_id) ||
+				    ret != FILTER_MATCH) {
+					unset_event_filter_flag(kshark_ctx, entry);
+				}
+
+				/* Apply task filtering. */
+				if (!kshark_show_task(kshark_ctx, entry->pid)) {
+					entry->visible &= ~kshark_ctx->filter_mask;
+				}
+				free_record(rec);
+				break;
+			} /* REC_ENTRY */
+			}
+
 			temp_next = &temp_rec->next;
 
 			++count;
@@ -566,13 +617,15 @@ static size_t get_records(struct kshark_
 	return total;
 
  fail:
-	free_rec_list(cpu_list, n_cpus);
+	free_rec_list(cpu_list, n_cpus, type);
 	return -ENOMEM;
 }
 
-static int pick_next_cpu(struct rec_list **rec_list, int n_cpus)
+static int pick_next_cpu(struct rec_list **rec_list, int n_cpus,
+			 enum rec_type type)
 {
 	uint64_t ts = 0;
+	uint64_t rec_ts;
 	int next_cpu = -1;
 	int cpu;
 
@@ -580,8 +633,16 @@ static int pick_next_cpu(struct rec_list
 		if (!rec_list[cpu])
 			continue;
 
-		if (!ts || rec_list[cpu]->rec->ts < ts) {
-			ts = rec_list[cpu]->rec->ts;
+		switch (type) {
+		case REC_RECORD:
+			rec_ts = rec_list[cpu]->rec->ts;
+			break;
+		case REC_ENTRY:
+			rec_ts = rec_list[cpu]->entry.ts;
+			break;
+		}
+		if (!ts || rec_ts < ts) {
+			ts = rec_ts;
 			next_cpu = cpu;
 		}
 	}
@@ -610,16 +671,11 @@ static int pick_next_cpu(struct rec_list
 ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
 				struct kshark_entry ***data_rows)
 {
-	struct event_filter *adv_filter = kshark_ctx->advanced_event_filter;
-	struct kshark_task_list *task;
 	struct kshark_entry **rows;
-	struct kshark_entry *entry;
 	struct rec_list **rec_list;
-	struct rec_list *temp_rec;
-	struct pevent_record *rec;
+	enum rec_type type = REC_ENTRY;
 	size_t count, total = 0;
 	int n_cpus;
-	int ret;
 
 	if (*data_rows)
 		free(*data_rows);
@@ -631,63 +687,33 @@ ssize_t kshark_load_data_entries(struct
 	 *	 code simplier. We should revisit to see if we can
 	 *	 bring back the performance.
 	 */
-	total = get_records(kshark_ctx, &rec_list);
+	total = get_records(kshark_ctx, &rec_list, type);
 	if (total < 0)
 		goto fail;
 
+	n_cpus = tracecmd_cpus(kshark_ctx->handle);
+
 	rows = calloc(total, sizeof(struct kshark_entry *));
 	if(!rows)
-		goto fail;
-
-	n_cpus = tracecmd_cpus(kshark_ctx->handle);
+		goto fail_free;
 
 	for (count = 0; count < total; count++) {
 		int next_cpu;
 
-		next_cpu = pick_next_cpu(rec_list, n_cpus);
+		next_cpu = pick_next_cpu(rec_list, n_cpus, type);
 
 		if (next_cpu >= 0) {
-			entry = malloc(sizeof(struct kshark_entry));
-			if (!entry)
-				goto fail_free;
-
-			rec = rec_list[next_cpu]->rec;
-			rows[count] = entry;
-
-			kshark_set_entry_values(kshark_ctx, rec, entry);
-			task = kshark_add_task(kshark_ctx, entry->pid);
-			if (!task)
-				goto fail_free;
-
-			/* Apply event filtering. */
-			ret = FILTER_NONE;
-			if (adv_filter->filters)
-				ret = pevent_filter_match(adv_filter, rec);
-
-			if (!kshark_show_event(kshark_ctx, entry->event_id) ||
-			    ret != FILTER_MATCH) {
-				unset_event_filter_flag(kshark_ctx, entry);
-			}
-
-			/* Apply task filtering. */
-			if (!kshark_show_task(kshark_ctx, entry->pid)) {
-				entry->visible &= ~kshark_ctx->filter_mask;
-			}
-
-			temp_rec = rec_list[next_cpu];
+			rows[count] = &rec_list[next_cpu]->entry;
 			rec_list[next_cpu] = rec_list[next_cpu]->next;
-			free(temp_rec);
-			/* The record is no longer be referenced */
-			free_record(rec);
 		}
 	}
 
-	free_rec_list(rec_list, n_cpus);
+	free_rec_list(rec_list, n_cpus, type);
 	*data_rows = rows;
 	return total;
 
  fail_free:
-	free_rec_list(rec_list, n_cpus);
+	free_rec_list(rec_list, n_cpus, type);
 	for (count = 0; count < total; count++) {
 		if (!rows[count])
 			break;
@@ -717,11 +743,12 @@ ssize_t kshark_load_data_records(struct
 	struct pevent_record *rec;
 	struct rec_list **rec_list;
 	struct rec_list *temp_rec;
+	enum rec_type type = REC_RECORD;
 	size_t count, total = 0;
 	int n_cpus;
 	int pid;
 
-	total = get_records(kshark_ctx, &rec_list);
+	total = get_records(kshark_ctx, &rec_list, REC_RECORD);
 	if (total < 0)
 		goto fail;
 
@@ -734,7 +761,7 @@ ssize_t kshark_load_data_records(struct
 	for (count = 0; count < total; count++) {
 		int next_cpu;
 
-		next_cpu = pick_next_cpu(rec_list, n_cpus);
+		next_cpu = pick_next_cpu(rec_list, n_cpus, type);
 
 		if (next_cpu >= 0) {
 			rec = rec_list[next_cpu]->rec;
@@ -753,7 +780,7 @@ ssize_t kshark_load_data_records(struct
 	}
 
 	/* There should be no records left in rec_list */
-	free_rec_list(rec_list, n_cpus);
+	free_rec_list(rec_list, n_cpus, type);
 	*data_rows = rows;
 	return total;
 
Index: trace-cmd.git/kernel-shark-qt/src/libkshark.h
===================================================================
--- trace-cmd.git.orig/kernel-shark-qt/src/libkshark.h
+++ trace-cmd.git/kernel-shark-qt/src/libkshark.h
@@ -33,6 +33,9 @@ extern "C" {
  * info etc.) is available on-demand via the offset into the trace file.
  */
 struct kshark_entry {
+	/** Pointer to the next (in time) kshark_entry on the same CPU core. */
+	struct kshark_entry *next; /* MUST BE FIRST ENTRY */
+
 	/**
 	 * A bit mask controlling the visibility of the entry. A value of OxFF
 	 * would mean that the entry is visible everywhere. Use
@@ -60,9 +63,6 @@ struct kshark_entry {
 	 * started.
 	 */
 	uint64_t	ts;
-
-	/** Pointer to the next (in time) kshark_entry on the same CPU core. */
-	struct kshark_entry *next;
 };
 
 /** Size of the task's hash table. */
Yordan Karadzhov July 6, 2018, 10:51 a.m. UTC | #10
Hi Steve,

On  6.07.2018 01:05, Steven Rostedt wrote:
> Try it out and see if it improves things on your end:
> 
> Oh, and I pushed my patches.
> 

Please push the patch for trace-input.c as well.
Thanks!

Yordan
Yordan Karadzhov July 6, 2018, 11:36 a.m. UTC | #11
Ohh, the patch has been pushed already.
I am sorry!
Yordan

On  6.07.2018 13:51, Yordan Karadzhov (VMware) wrote:
> Hi Steve,
> 
> On  6.07.2018 01:05, Steven Rostedt wrote:
>> Try it out and see if it improves things on your end:
>>
>> Oh, and I pushed my patches.
>>
> 
> Please push the patch for trace-input.c as well.
> Thanks!
> 
> Yordan
>
Yordan Karadzhov July 6, 2018, 12:46 p.m. UTC | #12
On  6.07.2018 01:05, Steven Rostedt wrote:
> On Wed, 4 Jul 2018 14:39:01 +0300
> "Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:
> 
>>>    /**
>>>     * @brief Load the content of the trace data file into an array of
>>>     *	  kshark_entries. This function provides fast loading, however the
>>
>> Actually , after applying the patch for trace-input.c and this patch on
>> top kshark_load_data_entries() becomes slower than
>> kshark_load_data_records(). Just make this clear in the description of
>> the functions.
>>
>>> @@ -521,9 +587,11 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
>>>    				struct kshark_entry ***data_rows)
> 
> In my tests, it showed a 30% slowdown. I really didn't like that. So I
> looked at why it was so slow, and it appeared two fold. One, was that I
> was doing a double allocation (allocating for both the rec_list and the
> entry), the other was that holding on to all records still appeared to
> slow things down a bit. Even with the array of storage, it made it
> slow.
> 
> I decided to make it closer to the original but still with helper
> functions. The key was modifying the struct rec_list to be:
> 
> struct rec_list {
> 	union {
> 		struct kshark_entry		entry;
> 		struct {
> 			struct rec_list		*next; /* Matches entry->next */
> 			struct pevent_record	*rec;
> 		};
> 	};
> };
> 
> And creating a enum:
> 
> enum rec_type {
> 	REC_RECORD,
> 	REC_ENTRY,
> };
> 
> 
> That would allow the functions to know what type it was dealing with.
> Will this change affect your numpy work much? You can add a REC_NUMPY
> if needed.
> 
> Try it out and see if it improves things on your end:
> 
I really like this solution. And yes, it improvises the performance.


> Oh, and I pushed my patches.
> 
> -- Steve
> 
> Index: trace-cmd.git/kernel-shark-qt/src/libkshark.c
> ===================================================================
> --- trace-cmd.git.orig/kernel-shark-qt/src/libkshark.c
> +++ trace-cmd.git/kernel-shark-qt/src/libkshark.c
> @@ -503,12 +503,23 @@ static void kshark_set_entry_values(stru
>   /* Quiet warnings over documenting simple structures */
>   //! @cond Doxygen_Suppress
>   struct rec_list {
> -	struct pevent_record	*rec;
> -	struct rec_list		*next;
> +	union {
> +		struct kshark_entry		entry;
> +		struct {
> +			struct rec_list		*next; /* Matches entry->next */
> +			struct pevent_record	*rec;
> +		};
> +	};
>   };
>   //! @endcond
>   
> -static void free_rec_list(struct rec_list **rec_list, int n_cpus)
> +enum rec_type {
> +	REC_RECORD,
> +	REC_ENTRY,
> +};
> +
> +static void free_rec_list(struct rec_list **rec_list, int n_cpus,
> +			  enum rec_type type)
>   {
>   	struct rec_list *temp_rec;
>   	int cpu;
> @@ -517,7 +528,8 @@ static void free_rec_list(struct rec_lis
>   		while (rec_list[cpu]) {
>   			temp_rec = rec_list[cpu];
>   			rec_list[cpu] = temp_rec->next;
> -			free_record(temp_rec->rec);
> +			if (type == REC_RECORD)
> +				free_record(temp_rec->rec);
>   			free(temp_rec);
>   		}
>   	}
> @@ -525,8 +537,9 @@ static void free_rec_list(struct rec_lis
>   }
>   
>   static size_t get_records(struct kshark_context *kshark_ctx,
> -			  struct rec_list ***rec_list)
> +			  struct rec_list ***rec_list, enum rec_type type)
>   {
> +	struct event_filter *adv_filter = NULL;
>   	struct pevent_record *rec;
>   	struct rec_list **temp_next;
>   	struct rec_list **cpu_list;
> @@ -547,12 +560,50 @@ static size_t get_records(struct kshark_
>   
>   		rec = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
>   		while (rec) {
> -			*temp_next = temp_rec = malloc(sizeof(*temp_rec));
> +			*temp_next = temp_rec = calloc(1, sizeof(*temp_rec));
>   			if (!temp_rec)
>   				goto fail;
>   
> -			temp_rec->rec = rec;
>   			temp_rec->next = NULL;
> +
> +			switch (type) {
> +			case REC_RECORD:
> +				temp_rec->rec = rec;
> +				break;
> +			case REC_ENTRY: {
> +				struct kshark_task_list *task;
> +				struct kshark_entry *entry;
> +				int ret;
> +
> +				if (!adv_filter)
> +					adv_filter = kshark_ctx->advanced_event_filter;

I defined "adv_filter" just as a short version of 
"kshark_ctx->advanced_event_filter", because of the 80 columns limit on 
the length of lines. You can move this outside of the while() loop;

> +				entry = &temp_rec->entry;
> +				kshark_set_entry_values(kshark_ctx, rec, entry);

Maybe the call of kshark_add_task() can be moved to 
kshark_load_data_entries()
Just to be consistent with kshark_load_data_records()

> +				task = kshark_add_task(kshark_ctx, entry->pid);
> +				if (!task) {
> +					free_record(rec);
> +					goto fail;
> +				}
> +
> +				/* Apply event filtering. */
> +				ret = FILTER_NONE;

Opps, I made a bug here. It has to be
				ret = FILTER_MATCH;

> +				if (adv_filter->filters)
> +					ret = pevent_filter_match(adv_filter, rec);
> +
> +				if (!kshark_show_event(kshark_ctx, entry->event_id) ||
> +				    ret != FILTER_MATCH) {
> +					unset_event_filter_flag(kshark_ctx, entry);
> +				}
> +
> +				/* Apply task filtering. */
> +				if (!kshark_show_task(kshark_ctx, entry->pid)) {
> +					entry->visible &= ~kshark_ctx->filter_mask;
> +				}
> +				free_record(rec);
> +				break;
> +			} /* REC_ENTRY */
> +			}
> +
>   			temp_next = &temp_rec->next;
>   
>   			++count;
> @@ -566,13 +617,15 @@ static size_t get_records(struct kshark_
>   	return total;
>   
>    fail:
> -	free_rec_list(cpu_list, n_cpus);
> +	free_rec_list(cpu_list, n_cpus, type);
>   	return -ENOMEM;
>   }
>   

[..]

>   
> Index: trace-cmd.git/kernel-shark-qt/src/libkshark.h
> ===================================================================
> --- trace-cmd.git.orig/kernel-shark-qt/src/libkshark.h
> +++ trace-cmd.git/kernel-shark-qt/src/libkshark.h
> @@ -33,6 +33,9 @@ extern "C" {
>    * info etc.) is available on-demand via the offset into the trace file.
>    */
>   struct kshark_entry {
> +	/** Pointer to the next (in time) kshark_entry on the same CPU core. */
> +	struct kshark_entry *next; /* MUST BE FIRST ENTRY */
> +
Correct, thanks!

Yordan

>   	/**
>   	 * A bit mask controlling the visibility of the entry. A value of OxFF
>   	 * would mean that the entry is visible everywhere. Use
> @@ -60,9 +63,6 @@ struct kshark_entry {
>   	 * started.
>   	 */
>   	uint64_t	ts;
> -
> -	/** Pointer to the next (in time) kshark_entry on the same CPU core. */
> -	struct kshark_entry *next;
>   };
>   
>   /** Size of the task's hash table. */
>
Steven Rostedt July 6, 2018, 6:21 p.m. UTC | #13
On Fri, 6 Jul 2018 15:46:54 +0300
"Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:

> >   static size_t get_records(struct kshark_context *kshark_ctx,
> > -			  struct rec_list ***rec_list)
> > +			  struct rec_list ***rec_list, enum rec_type type)
> >   {
> > +	struct event_filter *adv_filter = NULL;
> >   	struct pevent_record *rec;
> >   	struct rec_list **temp_next;
> >   	struct rec_list **cpu_list;
> > @@ -547,12 +560,50 @@ static size_t get_records(struct kshark_
> >   
> >   		rec = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
> >   		while (rec) {
> > -			*temp_next = temp_rec = malloc(sizeof(*temp_rec));
> > +			*temp_next = temp_rec = calloc(1, sizeof(*temp_rec));
> >   			if (!temp_rec)
> >   				goto fail;
> >   
> > -			temp_rec->rec = rec;
> >   			temp_rec->next = NULL;
> > +
> > +			switch (type) {
> > +			case REC_RECORD:
> > +				temp_rec->rec = rec;
> > +				break;
> > +			case REC_ENTRY: {
> > +				struct kshark_task_list *task;
> > +				struct kshark_entry *entry;
> > +				int ret;
> > +
> > +				if (!adv_filter)
> > +					adv_filter = kshark_ctx->advanced_event_filter;  
> 
> I defined "adv_filter" just as a short version of 
> "kshark_ctx->advanced_event_filter", because of the 80 columns limit on 
> the length of lines. You can move this outside of the while() loop;

I didn't want to put it outside the loop, because I didn't want it to
be required for REC_RECORD. But I think outside the loop, I can just do:

	if (type == REC_ENTRY)
		adv_filter = ....

> 
> > +				entry = &temp_rec->entry;
> > +				kshark_set_entry_values(kshark_ctx, rec, entry);  
> 
> Maybe the call of kshark_add_task() can be moved to 
> kshark_load_data_entries()
> Just to be consistent with kshark_load_data_records()

The problem is that "rec" is freed. That was required as part of the
speed up.

> 
> > +				task = kshark_add_task(kshark_ctx, entry->pid);
> > +				if (!task) {
> > +					free_record(rec);
> > +					goto fail;
> > +				}
> > +
> > +				/* Apply event filtering. */
> > +				ret = FILTER_NONE;  
> 
> Opps, I made a bug here. It has to be
> 				ret = FILTER_MATCH;

Can you send a patch. I don't want this to be a fix. That should be a
separate patch.


> 
> > +				if (adv_filter->filters)
> > +					ret = pevent_filter_match(adv_filter, rec);
> > +
> > +				if (!kshark_show_event(kshark_ctx, entry->event_id) ||
> > +				    ret != FILTER_MATCH) {
> > +					unset_event_filter_flag(kshark_ctx, entry);
> > +				}
> > +
> > +				/* Apply task filtering. */
> > +				if (!kshark_show_task(kshark_ctx, entry->pid)) {
> > +					entry->visible &= ~kshark_ctx->filter_mask;
> > +				}
> > +				free_record(rec);
> > +				break;
> > +			} /* REC_ENTRY */
> > +			}
> > +
> >   			temp_next = &temp_rec->next;
> >   
> >   			++count;
> > @@ -566,13 +617,15 @@ static size_t get_records(struct kshark_
> >   	return total;
> >   
> >    fail:
> > -	free_rec_list(cpu_list, n_cpus);
> > +	free_rec_list(cpu_list, n_cpus, type);
> >   	return -ENOMEM;
> >   }
> >     
> 
> [..]
> 
> >   
> > Index: trace-cmd.git/kernel-shark-qt/src/libkshark.h
> > ===================================================================
> > --- trace-cmd.git.orig/kernel-shark-qt/src/libkshark.h
> > +++ trace-cmd.git/kernel-shark-qt/src/libkshark.h
> > @@ -33,6 +33,9 @@ extern "C" {
> >    * info etc.) is available on-demand via the offset into the trace file.
> >    */
> >   struct kshark_entry {
> > +	/** Pointer to the next (in time) kshark_entry on the same CPU core. */
> > +	struct kshark_entry *next; /* MUST BE FIRST ENTRY */
> > +  
> Correct, thanks!

Yep, this is a common trick I use.

-- Steve

> 
> Yordan
> 
> >   	/**
> >   	 * A bit mask controlling the visibility of the entry. A value of OxFF
> >   	 * would mean that the entry is visible everywhere. Use
> > @@ -60,9 +63,6 @@ struct kshark_entry {
> >   	 * started.
> >   	 */
> >   	uint64_t	ts;
> > -
> > -	/** Pointer to the next (in time) kshark_entry on the same CPU core. */
> > -	struct kshark_entry *next;
> >   };
> >   
> >   /** Size of the task's hash table. */
> >
Steven Rostedt July 6, 2018, 6:24 p.m. UTC | #14
On Fri, 6 Jul 2018 15:46:54 +0300
"Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:

> > --- trace-cmd.git.orig/kernel-shark-qt/src/libkshark.h
> > +++ trace-cmd.git/kernel-shark-qt/src/libkshark.h
> > @@ -33,6 +33,9 @@ extern "C" {
> >    * info etc.) is available on-demand via the offset into the trace file.
> >    */
> >   struct kshark_entry {
> > +	/** Pointer to the next (in time) kshark_entry on the same CPU core. */
> > +	struct kshark_entry *next; /* MUST BE FIRST ENTRY */
> > +  
> Correct, thanks!

BTW, I was going to ask. Do you use entry->next elsewhere (to get per
cpu lists)?

-- Steve
Steven Rostedt July 6, 2018, 6:43 p.m. UTC | #15
On Fri, 6 Jul 2018 14:21:04 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> >   
> > > +				entry = &temp_rec->entry;
> > > +				kshark_set_entry_values(kshark_ctx, rec, entry);    
> > 
> > Maybe the call of kshark_add_task() can be moved to 
> > kshark_load_data_entries()
> > Just to be consistent with kshark_load_data_records()  
> 
> The problem is that "rec" is freed. That was required as part of the
> speed up.

Ah, you said kshark_add_task(), I read it as kshark_set_entry_values().
I'll look at it. But an FYI on replying. When making comments, one
usually references the code above, not below, which is where I got
confused.

Thanks!

-- Steve


> 
> >   
> > > +				task = kshark_add_task(kshark_ctx, entry->pid);
> > > +				if (!task) {
> > > +					free_record(rec);
> > > +					goto fail;
> > > +				}
Yordan Karadzhov July 9, 2018, 3:54 p.m. UTC | #16
On  6.07.2018 21:21, Steven Rostedt wrote:
>>> +
>>> +				/* Apply event filtering. */
>>> +				ret = FILTER_NONE;
>> Opps, I made a bug here. It has to be
>> 				ret = FILTER_MATCH;
> Can you send a patch. I don't want this to be a fix. That should be a
> separate patch.
> 
> 

Yes, I will do this.

Thanks!
Yordan
Yordan Karadzhov July 9, 2018, 3:54 p.m. UTC | #17
On  6.07.2018 21:24, Steven Rostedt wrote:
> On Fri, 6 Jul 2018 15:46:54 +0300
> "Yordan Karadzhov (VMware)" <y.karadz@gmail.com> wrote:
> 
>>> --- trace-cmd.git.orig/kernel-shark-qt/src/libkshark.h
>>> +++ trace-cmd.git/kernel-shark-qt/src/libkshark.h
>>> @@ -33,6 +33,9 @@ extern "C" {
>>>     * info etc.) is available on-demand via the offset into the trace file.
>>>     */
>>>    struct kshark_entry {
>>> +	/** Pointer to the next (in time) kshark_entry on the same CPU core. */
>>> +	struct kshark_entry *next; /* MUST BE FIRST ENTRY */
>>> +
>> Correct, thanks!
> 
> BTW, I was going to ask. Do you use entry->next elsewhere (to get per
> cpu lists)?
> 

Yes, I am using it.

Thanks!
Yordan


> -- Steve
>
diff mbox series

Patch

diff --git a/kernel-shark-qt/src/libkshark.c b/kernel-shark-qt/src/libkshark.c
index e38ddebbbe41..680949077b7f 100644
--- a/kernel-shark-qt/src/libkshark.c
+++ b/kernel-shark-qt/src/libkshark.c
@@ -500,6 +500,72 @@  static void kshark_set_entry_values(struct kshark_context *kshark_ctx,
 	entry->pid = pevent_data_pid(kshark_ctx->pevent, record);
 }
 
+struct rec_list {
+	struct pevent_record	*rec;
+	struct rec_list		*next;
+};
+
+static void free_rec_list(struct rec_list **rec_list, int n_cpus)
+{
+	struct rec_list *temp_rec;
+	int cpu;
+
+	for (cpu = 0; cpu < n_cpus; ++cpu) {
+		while (rec_list[cpu]) {
+			temp_rec = rec_list[cpu];
+			rec_list[cpu] = temp_rec->next;
+			free(temp_rec);
+		}
+	}
+	free(rec_list);
+}
+
+static size_t get_records(struct kshark_context *kshark_ctx,
+			  struct rec_list ***rec_list)
+{
+	struct pevent_record *rec;
+	struct rec_list **temp_next;
+	struct rec_list **cpu_list;
+	struct rec_list *temp_rec;
+	size_t count, total = 0;
+	int n_cpus;
+	int cpu;
+
+	n_cpus = tracecmd_cpus(kshark_ctx->handle);
+	cpu_list = calloc(n_cpus, sizeof(*cpu_list));
+	if (!cpu_list)
+		return -ENOMEM;
+
+	for (cpu = 0; cpu < n_cpus; ++cpu) {
+		count = 0;
+		cpu_list[cpu] = NULL;
+		temp_next = &cpu_list[cpu];
+
+		rec = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
+		while (rec) {
+			*temp_next = temp_rec = malloc(sizeof(*temp_rec));
+			if (!temp_rec)
+				goto fail;
+
+			temp_rec->rec = rec;
+			temp_rec->next = NULL;
+			temp_next = &temp_rec->next;
+
+			++count;
+			rec = tracecmd_read_data(kshark_ctx->handle, cpu);
+		}
+
+		total += count;
+	}
+
+	*rec_list = cpu_list;
+	return total;
+
+ fail:
+	free_rec_list(cpu_list, n_cpus);
+	return -ENOMEM;
+}
+
 /**
  * @brief Load the content of the trace data file into an array of
  *	  kshark_entries. This function provides fast loading, however the
@@ -521,9 +587,11 @@  ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
 				struct kshark_entry ***data_rows)
 {
 	struct event_filter *adv_filter = kshark_ctx->advanced_event_filter;
-	struct kshark_entry **cpu_list, **rows;
-	struct kshark_entry *entry, **next;
 	struct kshark_task_list *task;
+	struct kshark_entry **rows;
+	struct kshark_entry *entry;
+	struct rec_list **rec_list;
+	struct rec_list *temp_rec;
 	struct pevent_record *rec;
 	int cpu, n_cpus, next_cpu;
 	size_t count, total = 0;
@@ -533,24 +601,41 @@  ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
 	if (*data_rows)
 		free(*data_rows);
 
+	total = get_records(kshark_ctx, &rec_list);
+	if (total < 0)
+		goto fail;
+
+	rows = calloc(total, sizeof(struct kshark_entry *));
+	if(!rows)
+		goto fail;
+
 	n_cpus = tracecmd_cpus(kshark_ctx->handle);
-	cpu_list = calloc(n_cpus, sizeof(struct kshark_entry *));
 
-	for (cpu = 0; cpu < n_cpus; ++cpu) {
-		count = 0;
-		cpu_list[cpu] = NULL;
-		next = &cpu_list[cpu];
+	for (count = 0; count < total; count++) {
+		ts = 0;
+		next_cpu = -1;
+		for (cpu = 0; cpu < n_cpus; ++cpu) {
+			if (!rec_list[cpu])
+				continue;
 
-		rec = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
-		while (rec) {
-			*next = entry = malloc(sizeof(struct kshark_entry));
+			if (!ts || rec_list[cpu]->rec->ts < ts) {
+				ts = rec_list[cpu]->rec->ts;
+				next_cpu = cpu;
+			}
+		}
+
+		if (next_cpu >= 0) {
+			entry = malloc(sizeof(struct kshark_entry));
 			if (!entry)
-				goto fail;
+				goto fail_free;
+
+			rec = rec_list[next_cpu]->rec;
+			rows[count] = entry;
 
 			kshark_set_entry_values(kshark_ctx, rec, entry);
 			task = kshark_add_task(kshark_ctx, entry->pid);
 			if (!task)
-				goto fail;
+				goto fail_free;
 
 			/* Apply event filtering. */
 			ret = FILTER_NONE;
@@ -567,47 +652,26 @@  ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx,
 				entry->visible &= ~kshark_ctx->filter_mask;
 			}
 
-			entry->next = NULL;
-			next = &entry->next;
+			temp_rec = rec_list[next_cpu];
+			rec_list[next_cpu] = rec_list[next_cpu]->next;
+			free(temp_rec);
 			free_record(rec);
-
-			++count;
-			rec = tracecmd_read_data(kshark_ctx->handle, cpu);
-		}
-
-		total += count;
-	}
-
-	rows = calloc(total, sizeof(struct kshark_entry *));
-	if (!rows)
-		goto fail;
-
-	count = 0;
-	while (count < total) {
-		ts = 0;
-		next_cpu = -1;
-		for (cpu = 0; cpu < n_cpus; ++cpu) {
-			if (!cpu_list[cpu])
-				continue;
-
-			if (!ts || cpu_list[cpu]->ts < ts) {
-				ts = cpu_list[cpu]->ts;
-				next_cpu = cpu;
-			}
-		}
-
-		if (next_cpu >= 0) {
-			rows[count] = cpu_list[next_cpu];
-			cpu_list[next_cpu] = cpu_list[next_cpu]->next;
 		}
-		++count;
 	}
 
-	free(cpu_list);
+	free_rec_list(rec_list, n_cpus);
 	*data_rows = rows;
 	return total;
 
-fail:
+ fail_free:
+	free_rec_list(rec_list, n_cpus);
+	for (count = 0; count < total; count++) {
+		if (!rows[count])
+			break;
+		free(rows[count]);
+	}
+	free(rows);
+ fail:
 	fprintf(stderr, "Failed to allocate memory during data loading.\n");
 	return -ENOMEM;
 }
@@ -625,82 +689,60 @@  fail:
 ssize_t kshark_load_data_records(struct kshark_context *kshark_ctx,
 				struct pevent_record ***data_rows)
 {
-	struct temp {
-		struct pevent_record	*rec;
-		struct temp		*next;
-	} **cpu_list, **temp_next, *temp_rec;
-
 	struct kshark_task_list *task;
 	struct pevent_record **rows;
-	struct pevent_record *data;
+	struct pevent_record *rec;
+	struct rec_list **rec_list;
+	struct rec_list *temp_rec;
 	int cpu, n_cpus, next_cpu;
 	size_t count, total = 0;
 	uint64_t ts;
 	int pid;
 
-	n_cpus = tracecmd_cpus(kshark_ctx->handle);
-	cpu_list = calloc(n_cpus, sizeof(struct temp *));
-
-	for (cpu = 0; cpu < n_cpus; ++cpu) {
-		count = 0;
-		cpu_list[cpu] = NULL;
-		temp_next = &cpu_list[cpu];
-
-		data = tracecmd_read_cpu_first(kshark_ctx->handle, cpu);
-		while (data) {
-			*temp_next = temp_rec = malloc(sizeof(*temp_rec));
-			if (!temp_rec)
-				goto fail;
-
-			pid = pevent_data_pid(kshark_ctx->pevent, data);
-			task = kshark_add_task(kshark_ctx, pid);
-			if (!task)
-				goto fail;
-
-			temp_rec->rec = data;
-			temp_rec->next = NULL;
-			temp_next = &(temp_rec->next);
-
-			++count;
-			data = tracecmd_read_data(kshark_ctx->handle, cpu);
-		}
-
-		total += count;
-	}
+	total = get_records(kshark_ctx, &rec_list);
+	if (total < 0)
+		goto fail;
 
 	rows = calloc(total, sizeof(struct pevent_record *));
-	if (!rows)
+	if(!rows)
 		goto fail;
 
-	count = 0;
-	while (count < total) {
+	n_cpus = tracecmd_cpus(kshark_ctx->handle);
+
+	for (count = 0; count < total; count++) {
 		ts = 0;
 		next_cpu = -1;
 		for (cpu = 0; cpu < n_cpus; ++cpu) {
-			if (!cpu_list[cpu])
+			if (!rec_list[cpu])
 				continue;
 
-			if (!ts || cpu_list[cpu]->rec->ts < ts) {
-				ts = cpu_list[cpu]->rec->ts;
+			if (!ts || rec_list[cpu]->rec->ts < ts) {
+				ts = rec_list[cpu]->rec->ts;
 				next_cpu = cpu;
 			}
 		}
 
 		if (next_cpu >= 0) {
-			rows[count] = cpu_list[next_cpu]->rec;
-			temp_rec = cpu_list[next_cpu];
-			cpu_list[next_cpu] = cpu_list[next_cpu]->next;
-			free (temp_rec);
-		}
+			rec = rec_list[next_cpu]->rec;
+			rows[count] = rec;
 
-		++count;
+			pid = pevent_data_pid(kshark_ctx->pevent, rec);
+			task = kshark_add_task(kshark_ctx, pid);
+			if (!task)
+				goto fail;
+
+			temp_rec = rec_list[next_cpu];
+			rec_list[next_cpu] = rec_list[next_cpu]->next;
+			free(temp_rec);
+			free_record(rec);
+		}
 	}
 
-	free(cpu_list);
+	free_rec_list(rec_list, n_cpus);
 	*data_rows = rows;
 	return total;
 
-fail:
+ fail:
 	fprintf(stderr, "Failed to allocate memory during data loading.\n");
 	return -ENOMEM;
 }