@@ -84,6 +84,11 @@ void pdie(const char *fmt, ...)
/* Used for stats */
#define DELTA_NAME "delta"
+/*
+ * Keep track of when a CPU is running tasks and when it is
+ * idle. Use the CPU number to match the timings in the
+ * sched_switch event.
+ */
static struct traceeval_type cpu_delta_keys[] = {
{
.type = TRACEEVAL_TYPE_NUMBER,
@@ -96,6 +101,12 @@ static void assign_cpu_delta_keys(struct traceeval_data keys[1], int cpu)
TRACEEVAL_SET_NUMBER(keys[0], cpu);
}
+/*
+ * When scheduling, record the state the CPU was in.
+ * It only cares about IDLE vs RUNNING. If the idle task is being
+ * scheduled in, mark it the staet as IDLE, otherwise mark it
+ * as RUNNING.
+ */
static struct traceeval_type cpu_delta_vals[] = {
{
.type = TRACEEVAL_TYPE_NUMBER,
@@ -108,6 +119,9 @@ static void assign_cpu_delta_vals(struct traceeval_data vals[1], int state)
TRACEEVAL_SET_NUMBER(vals[0], state);
}
+/*
+ * The output will show all the CPUs and their IDLE vs RUNNING states.
+ */
static struct traceeval_type cpu_keys[] = {
{
.type = TRACEEVAL_TYPE_NUMBER,
@@ -125,6 +139,10 @@ static void assign_cpu_keys(struct traceeval_data keys[2], int cpu, int state)
TRACEEVAL_SET_NUMBER(keys[1], state);
}
+/*
+ * The mapping of CPU and state will track the timings of how long the
+ * CPU was in that state.
+ */
static struct traceeval_type cpu_vals[] = {
{
.type = TRACEEVAL_TYPE_DELTA,
@@ -139,6 +157,10 @@ static void assign_cpu_vals(struct traceeval_data vals[1],
TRACEEVAL_SET_DELTA(vals[0], delta, timestamp);
}
+/*
+ * When tracking tasks and threads, remember the task id (PID)
+ * when scheduling out (for sleep state) or in (for running state).
+ */
static struct traceeval_type task_delta_keys[] = {
{
.type = TRACEEVAL_TYPE_NUMBER,
@@ -151,6 +173,11 @@ static void assign_task_delta_keys(struct traceeval_data keys[1], int pid)
TRACEEVAL_SET_NUMBER(keys[0], pid);
}
+/*
+ * When finishing the timings, will need the name of the task, the
+ * state it was in: (RUNNING, PREEMPTED, BLOCKED, IDLE, or other)
+ * and the priority it had. This will be saved for the output.
+ */
static struct traceeval_type task_delta_vals[] = {
{
.type = TRACEEVAL_TYPE_NUMBER,
@@ -174,6 +201,12 @@ static void assign_task_delta_vals(struct traceeval_data vals[3],
TRACEEVAL_SET_NUMBER(vals[2], prio);
}
+/*
+ * Will output all the processes by their names. This means the two
+ * tasks with the same name will be grouped together (even though they
+ * may not be threads). The tasks will also be broken up by what state
+ * they were in: RUNNING, BLOCKED, PREEMPTED, SLEEPING.
+ */
static struct traceeval_type task_keys[] = {
{
.type = TRACEEVAL_TYPE_STRING,
@@ -192,6 +225,14 @@ static void assign_task_keys(struct traceeval_data keys[2],
TRACEEVAL_SET_NUMBER(keys[1], state);
}
+/*
+ * For each state the process is in, record the time delta for
+ * that state. Also, only for the RUNNING state, this will
+ * daisy chain another traceeval for each thread. That is,
+ * for each unique thread id (PID), there will be a traceeval
+ * histogram of those threads denoted by the teval_thread, and
+ * that will be saved in the "data" field.
+ */
static struct traceeval_type task_vals[] = {
{
.type = TRACEEVAL_TYPE_POINTER,
@@ -211,6 +252,12 @@ static void assign_task_vals(struct traceeval_data vals[2],
TRACEEVAL_SET_DELTA(vals[1], delta, timestamp);
}
+/*
+ * Each recorded process will have a traceeval to save all the
+ * threads within it. The threads will be mapped by their TID (PID)
+ * the state they were in: RUNNING, BLOCKED, PREEMPTED, SLEEPING
+ * and their priority.
+ */
static struct traceeval_type thread_keys[] = {
{
.type = TRACEEVAL_TYPE_NUMBER,
@@ -234,6 +281,9 @@ static void assign_thread_keys(struct traceeval_data keys[3],
TRACEEVAL_SET_NUMBER(keys[2], prio);
}
+/*
+ * Save the timings of the thread/state/prio keys.
+ */
static struct traceeval_type thread_vals[] = {
{
.type = TRACEEVAL_TYPE_DELTA,
@@ -326,6 +376,12 @@ static struct process_data *alloc_pdata(struct task_data *tdata, const char *com
return pdata;
}
+/*
+ * Each process will have a traceeval for all their threads. The
+ * thread traceeval descriptor will be saved in the process/RUNNING
+ * field. If a process is never running, then it will not have any
+ * threads!
+ */
static struct process_data *
get_process_data(struct task_data *tdata, const char *comm)
{
@@ -360,6 +416,10 @@ static void update_cpu_data(struct task_data *tdata, int cpu, int state,
traceeval_insert(tdata->teval_cpus, cpu_keys, vals);
}
+/*
+ * When a CPU is scheduling in idle, record the running state,
+ * and start the idle timings.
+ */
static void update_cpu_to_idle(struct task_data *tdata, struct tep_record *record)
{
@@ -387,6 +447,10 @@ static void update_cpu_to_idle(struct task_data *tdata, struct tep_record *recor
record->ts);
}
+/*
+ * When a CPU is scheduling a task, if idle is scheduling out, stop
+ * the idle timings and start or continue the running timings.
+ */
static void update_cpu_to_running(struct task_data *tdata, struct tep_record *record)
{
struct traceeval_data delta_keys[1];
@@ -465,6 +529,7 @@ static void start_running_thread(struct task_data *tdata,
traceeval_results_release(tdata->teval_tasks, results);
}
+ /* This task is running, so start timing the running portion */
assign_task_delta_vals(vals, RUNNING, comm, prio);
traceeval_delta_start(tdata->teval_tasks, delta_keys, vals, record->ts);
@@ -524,6 +589,7 @@ static void sched_out(struct task_data *tdata, const char *comm,
die("Could not read sched_switch next_pid for record");
state = get_stop_state(val);
+ /* Stop the RUNNING timings of the tasks that is scheduling out */
assign_task_delta_keys(delta_keys, pid);
ret = traceeval_delta_stop(tdata->teval_tasks, delta_keys, &results,
@@ -539,10 +605,12 @@ static void sched_out(struct task_data *tdata, const char *comm,
if (ret <= 0)
return;
+ /* What is scheduling out should be considered "running" */
if (old_state != RUNNING)
die("Not running %d from %lld to %lld",
old_state, val, record->ts);
+ /* Now start the "running" timings of the task scheduling in */
assign_task_keys(task_keys, comm, RUNNING);
pdata = get_process_data(tdata, comm);
@@ -551,6 +619,7 @@ static void sched_out(struct task_data *tdata, const char *comm,
traceeval_insert(tdata->teval_tasks, task_keys, task_vals);
+ /* Update the individual thread as well */
assign_thread_keys(thread_keys, pid, RUNNING, prio);
assign_thread_vals(vals, delta, record->ts);
@@ -964,6 +1033,11 @@ static void free_tdata(struct task_data *tdata)
{
}
+/*
+ * When the trace ended, there was likely tasks still running on
+ * the CPU (or sleeping). Just stop all the timings and put them
+ * into the database as well.
+ */
static void finish_leftovers(struct task_data *data)
{
const struct traceeval_data *results;