diff mbox series

[v4,05/11] trace-cruncher: Refactor the examples

Message ID 20210707132158.68520-6-y.karadz@gmail.com (mailing list archive)
State Accepted
Headers show
Series Build trace-cruncher as Python pakage | expand

Commit Message

Yordan Karadzhov July 7, 2021, 1:21 p.m. UTC
For the moment we will keep only one example that is "sched_wakeup.py".
"gpareto_fit.py" gets removed because it doesn't really demonstrate
anything conceptually different compared to "sched_wakeup.py". The
difference comes from the more advanced statistical analysis of the data,
however this goes beyond the scope of trace-cruncher. "page_faults.py"
gets removed only temporally, because it requires some functionalities
that are not yet implemented in the ftrace libraries. Once those
functionalities are made available, the example will be added to
trace-cruncher again.

Signed-off-by: Yordan Karadzhov (VMware) <y.karadz@gmail.com>
---
 examples/gpareto_fit.py  | 328 ---------------------------------------
 examples/page_faults.py  | 120 --------------
 examples/sched_wakeup.py |  70 ++++-----
 3 files changed, 30 insertions(+), 488 deletions(-)
 delete mode 100755 examples/gpareto_fit.py
 delete mode 100755 examples/page_faults.py
diff mbox series

Patch

diff --git a/examples/gpareto_fit.py b/examples/gpareto_fit.py
deleted file mode 100755
index 4a2bb2a..0000000
--- a/examples/gpareto_fit.py
+++ /dev/null
@@ -1,328 +0,0 @@ 
-#!/usr/bin/env python3
-
-"""
-SPDX-License-Identifier: LGPL-2.1
-
-Copyright 2019 VMware Inc, Yordan Karadzhov <ykaradzhov@vmware.com>
-"""
-
-import sys
-import json
-
-import matplotlib.pyplot as plt
-import scipy.stats as st
-import numpy as np
-
-from scipy.stats import genpareto as gpareto
-from scipy.optimize import curve_fit as cfit
-
-from ksharksetup import setup
-# Always call setup() before importing ksharkpy!!!
-setup()
-
-import ksharkpy as ks
-
-def chi2_test(hist, n_bins, c, loc, scale, norm):
-    """ Simple Chi^2 test for the goodness of the fit.
-    """
-    chi2 = n_empty_bins = 0
-    for i in range(len(hist[0])):
-        if hist[0][i] == 0:
-            # Ignore this empty bin.
-            n_empty_bins += 1
-            continue
-
-        # Get the center of bin i.
-        x = (hist[1][i] + hist[1][i + 1]) / 2
-        fit_val = gpareto.pdf(x, c=c, loc=loc, scale=scale)
-        chi = (fit_val - hist[0][i]) / np.sqrt(hist[0][i])
-        chi2 += chi**2
-
-    return  norm * chi2 / (n_bins - n_empty_bins)
-
-def quantile(p, P, c, loc, scale):
-    """ The quantile function of the Generalized Pareto distribution.
-    """
-    return loc + scale / c * ((P / p)**(c) - 1)
-
-
-def dq_dscale(p, P, c, scale):
-    """ Partial derivative of the quantile function.
-    """
-    return ((P / p)**c - 1) / c
-
-
-def dq_dc(p, P, c, scale):
-    """ Partial derivative of the quantile function.
-    """
-    return (scale * (np.log(P / p) * (P / p)**c ) / c
-          - scale * ((P / p)**c - 1) / (c**2))
-
-
-def dq_dP(p, P, c, scale):
-    """ Partial derivative of the quantile function.
-    """
-    return scale / P * (P / p)**c
-
-
-def error_P(n, N):
-    return np.sqrt(n) / N
-
-
-def error(p, P, c, scale, err_P, err_c, err_scale):
-    return np.sqrt((dq_dP(p, P, c, scale) * err_P)**2
-                 + (dq_dc(p, P, c, scale) * err_c)**2
-                 + (dq_dscale(p, P, c, scale) * err_scale)**2)
-
-
-def quantile_conf_bound(p, P, n, c, loc, scale, err_P, err_c, err_scale):
-    return (quantile(p=p, P=P, c=c, loc=loc, scale=scale)
-          + n * error(p=p, P=P, c=c, scale=scale,
-                      err_P=err_P, err_c=err_c, err_scale=err_scale));
-
-
-def get_latency(t0, t1):
-    """ Get the value of the latency in microseconds
-    """
-    return (t1 - t0) / 1000 - 1000
-
-
-def get_cpu_data(data, task_pid, start_id, stop_id, threshold):
-    """ Loop over the tracing data for a given CPU and find all latencies bigger
-        than the specified threshold.
-    """
-    # Get the size of the data.
-    size = ks.data_size(data)
-    #print("data size:", size)
-
-    time_start = -1
-    dt_ot = []
-    tot = 0
-    i = 0
-    i_start = 0;
-
-    while i < size:
-        if data["pid"][i] == task_pid and data['event'][i] == start_id:
-            time_start = data['time'][i]
-            i_start = i;
-            i = i + 1
-
-            while i < size:
-                if data["pid"][i] == task_pid and data['event'][i] == stop_id:
-                    delta = get_latency(time_start, data['time'][i])
-
-                    if delta > threshold and tot != 0:
-                        print('lat. over threshold: ', delta, i_start, i)
-                        dt_ot.append([delta, i_start, i])               
-
-                    tot = tot + 1
-                    break
-
-                i = i + 1
-        i = i + 1
-
-    print(task_pid, 'tot:', len(dt_ot), '/', tot)
-    return dt_ot, tot
-
-
-def make_ks_session(fname, data, start, stop):
-    """ Save a KernelShark session descriptor file (Json).
-        The sessions is zooming around the maximum observed latency.
-    """
-    sname = 'max_lat.json'
-    ks.new_session(fname, sname)
-    i_start = int(start)
-    i_stop = int(stop)
-
-    with open(sname, 'r+') as s:
-        session = json.load(s)
-        session['TaskPlots'] = [int(data['pid'][i_start])]
-        session['CPUPlots'] = [int(data['cpu'][i_start])]
-
-        delta = data['time'][i_stop] - data['time'][i_start]
-        tmin = int(data['time'][i_start] - delta)
-        tmax = int(data['time'][i_stop] + delta)
-        session['Model']['range'] = [tmin, tmax]
-
-        session['Markers']['markA']['isSet'] = True
-        session['Markers']['markA']['row'] = i_start)
-
-        session['Markers']['markB']['isSet'] = True
-        session['Markers']['markB']['row'] = i_stop)
-
-        session['ViewTop'] = i_start) - 5
-
-        ks.save_session(session, s)
-
-
-fname = str(sys.argv[1])
-status = ks.open_file(fname)
-if not status:
-    print ("Failed to open file ", fname)
-    sys.exit()
-
-ks.register_plugin('reg_pid')
-data = ks.load_data()
-
-# Get the Event Ids of the hrtimer_start and print events.
-start_id = ks.event_id('timer', 'hrtimer_start')
-stop_id = ks.event_id('ftrace', 'print')
-print("start_id", start_id)
-print("stop_id", stop_id)
-
-tasks = ks.get_tasks()
-jdb_pids = tasks['jitterdebugger']
-print('jitterdeburrer pids:', jdb_pids)
-jdb_pids.pop(0)
-
-threshold = 10
-data_ot = []
-tot = 0
-
-for task_pid in jdb_pids:
-    cpu_data, cpu_tot = get_cpu_data(data=data,
-                                     task_pid=task_pid,
-                                     start_id=start_id,
-                                     stop_id=stop_id,
-                                     threshold=threshold)
-
-    data_ot.extend(cpu_data)
-    tot += cpu_tot
-
-ks.close()
-
-dt_ot = np.array(data_ot)
-np.savetxt('peak_over_threshold_loaded.txt', dt_ot)
-
-make_ks_session(fname=fname, data=data, i_start=int(dt_ot[i_max_lat][1]),
-                                        i_stop=int(dt_ot[i_max_lat][2]))
-
-P = len(dt_ot) / tot
-err_P = error_P(n=len(dt_ot), N=tot)
-print('tot:', tot, ' P =', P)
-
-lat = dt_ot[:,0]
-#print(lat)
-i_max_lat = lat.argmax()
-print('imax:', i_max_lat, int(dt_ot[i_max_lat][1]))
-
-print('max', np.amax(dt_ot))
-
-start = threshold
-stop = 31
-n_bins = (stop - start) * 2
-
-bin_size = (stop - start) / n_bins
-
-x = np.linspace(start=start + bin_size / 2,
-                stop=stop - bin_size / 2,
-                num=n_bins)
-
-bins_ot = np.linspace(start=start, stop=stop, num=n_bins + 1)
-#print(bins_ot)
-
-fig, ax = plt.subplots(nrows=2, ncols=2)
-fig.tight_layout()
-ax[-1, -1].axis('off')
-
-hist_ot = ax[0][0].hist(x=lat, bins=bins_ot, histtype='stepfilled', alpha=0.3)
-ax[0][0].set_xlabel('latency [\u03BCs]', fontsize=8)
-ax[0][0].set_yscale('log')
-#print(hist_ot[0])
-
-hist_ot_norm = ax[1][0].hist(x=lat, bins=bins_ot,
-                             density=True, histtype='stepfilled', alpha=0.3)
-
-# Fit using the fitter of the genpareto class (shown in red).
-ret = gpareto.fit(lat, loc=threshold)
-ax[1][0].plot(x, gpareto.pdf(x, c=ret[0],  loc=ret[1],  scale=ret[2]),
-              'r-', lw=1, color='red',  alpha=0.8)
-
-ax[1][0].set_xlabel('latency [\u03BCs]', fontsize=8)
-print(ret)
-print('\ngoodness-of-fit: ' + '{:03.3f}'.format(chi2_test(hist_ot_norm,
-                                                          n_bins=n_bins,
-                                                          c=ret[0],
-                                                          loc=ret[1],
-                                                          scale=ret[2],
-                                                          norm=len(lat))))
-
-print("\n curve_fit:")
-# Fit using the curve_fit fitter. Fix the value of the "loc" parameter.
-popt, pcov = cfit(lambda x, c, scale: gpareto.pdf(x, c=c, loc=threshold, scale=scale),
-                  x, hist_ot_norm[0],
-                  p0=[ret[0], ret[2]])
-
-print(popt)
-print(pcov)
-
-ax[1][0].plot(x, gpareto.pdf(x, c=popt[0], loc=threshold, scale=popt[1]),
-              'r-', lw=1, color='blue', alpha=0.8)
-
-fit_legend = str('\u03BE = ' + '{:05.3f}'.format(popt[0]) +
-                 ' +- ' + '{:05.3f}'.format(pcov[0][0]**0.5) +
-                 ' (' + '{:03.2f}'.format(pcov[0][0]**0.5 / abs(popt[0]) * 100) + '%)')
-
-fit_legend += str('\n\u03C3 = ' + '{:05.3f}'.format(popt[1]) +
-                  ' +- ' + '{:05.3f}'.format(pcov[1][1]**0.5) +
-                  ' (' + '{:03.2f}'.format(pcov[1][1]**0.5 / abs(popt[1]) * 100) + '%)')
-
-fit_legend += '\n\u03BC = ' + str(threshold) + ' (const)'
-
-fit_legend += '\ngoodness-of-fit: ' + '{:03.3f}'.format(chi2_test(hist_ot_norm,
-                                                        n_bins=n_bins,
-                                                        c=popt[0],
-                                                        loc=threshold,
-                                                        scale=popt[1],
-                                                        norm=len(lat)))
-print(fit_legend)
-
-ax[0][1].set_xscale('log')
-##ax[0][1].set_yscale('log')
-ax[0][1].set_xlabel('Return period', fontsize=8)
-ax[0][1].set_ylabel('Return level [\u03BCs]', fontsize=6)
-ax[0][1].grid(True, linestyle=":", which="both")
-
-y = np.linspace(200000, 5000000, 400)
-ax[0][1].plot(y,
-              quantile(1 / y,
-                       P=P,
-                       c=popt[0],
-                       loc=threshold,
-                       scale=popt[1]),
-              'r-', lw=1, color='blue', alpha=0.8)
-
-ax[0][1].plot(y,
-              quantile_conf_bound(1 / y,
-                                  P=P,
-                                  n=+1, 
-                                  c=popt[0],
-                                  loc=threshold,
-                                  scale=popt[1],
-                                  err_P=err_P,
-                                  err_c= pcov[0][0]**0.5,
-                                  err_scale=pcov[1][1]**0.5),
-              'r-', lw=1, color='green', alpha=0.8)
-
-ax[0][1].plot(y,
-              quantile_conf_bound(1 / y,
-                                  P=P,
-                                  n=-1, 
-                                  c=popt[0],
-                                  loc=threshold,
-                                  scale=popt[1],
-                                  err_P=err_P,
-                                  err_c= pcov[0][0]**0.5,
-                                  err_scale=pcov[1][1]**0.5),
-              'r-', lw=1, color='green', alpha=0.8)
-
-props = dict(boxstyle='round', color='black', alpha=0.05)
-
-ax[1][1].text(0.05, 0.85,
-              fit_legend,
-              fontsize=9,
-              verticalalignment='top',
-              bbox=props)
-
-plt.savefig('figfit-all-loaded.png')
-#plt.show()
diff --git a/examples/page_faults.py b/examples/page_faults.py
deleted file mode 100755
index 446b12d..0000000
--- a/examples/page_faults.py
+++ /dev/null
@@ -1,120 +0,0 @@ 
-#!/usr/bin/env python3
-
-"""
-SPDX-License-Identifier: LGPL-2.1
-
-Copyright 2019 VMware Inc, Yordan Karadzhov <ykaradzhov@vmware.com>
-"""
-
-import os
-import sys
-import subprocess as sp
-import json
-
-import pprint as pr
-import matplotlib.pyplot as plt
-import scipy.stats as st
-import numpy as np
-from collections import Counter
-from tabulate import tabulate
-
-from ksharksetup import setup
-# Always call setup() before importing ksharkpy!!!
-setup()
-
-import ksharkpy as ks
-
-def gdb_decode_address(obj_file, obj_address):
-    """ Use gdb to examine the contents of the memory at this
-        address.
-    """
-    result = sp.run(['gdb',
-                     '--batch',
-                     '-ex',
-                     'x/i ' + str(obj_address),
-                     obj_file],
-                    stdout=sp.PIPE)
-
-    symbol = result.stdout.decode("utf-8").splitlines()
-
-    if symbol:
-        func = [symbol[0].split(':')[0], symbol[0].split(':')[1]]
-    else:
-        func = [obj_address]
-
-    func.append(obj_file)
-
-    return func
-
-# Get the name of the tracing data file.
-fname = str(sys.argv[1])
-
-ks.open_file(fname)
-ks.register_plugin('reg_pid')
-
-data = ks.load_data()
-tasks = ks.get_tasks()
-#pr.pprint(tasks)
-
-# Get the Event Ids of the page_fault_user or page_fault_kernel events.
-pf_eid = ks.event_id('exceptions', 'page_fault_user')
-
-# Gey the size of the data.
-d_size = ks.data_size(data)
-
-# Get the name of the user program.
-prog_name = str(sys.argv[2])
-
-table_headers = ['N p.f.', 'function', 'value', 'obj. file']
-table_list = []
-
-# Loop over all tasks associated with the user program.
-for j in range(len(tasks[prog_name])):
-    count = Counter()
-    task_pid = tasks[prog_name][j]
-    for i in range(0, d_size):
-        if data['event'][i] == pf_eid and data['pid'][i] == task_pid:
-            address = ks.read_event_field(offset=data['offset'][i],
-                                          event_id=pf_eid,
-                                          field='address')
-            ip = ks.read_event_field(offset=data['offset'][i],
-                                     event_id=pf_eid,
-                                     field='ip')
-            count[ip] += 1
-
-    pf_list = count.items()
-
-    # Sort the counters of the page fault instruction pointers. The most
-    # frequent will be on top.
-    pf_list = sorted(pf_list, key=lambda cnt: cnt[1], reverse=True)
-
-    i_max = 25
-    if i_max > len(pf_list):
-        i_max = len(pf_list)
-
-    for i in range(0, i_max):
-        func = ks.get_function(pf_list[i][0])
-        func_info = [func]
-        if func.startswith('0x'):
-            # The name of the function cannot be determined. We have an
-            # instruction pointer instead. Most probably this is a user-space
-            # function.
-            address = int(func, 0)
-            instruction = ks.map_instruction_address(task_pid, address)
-
-            if instruction['obj_file'] != 'UNKNOWN':
-                func_info = gdb_decode_address(instruction['obj_file'],
-                                               instruction['address'])
-            else:
-                func_info += ['', instruction['obj_file']]
-
-        else:
-            func_info = [func]
-
-        table_list.append([pf_list[i][1]] + func_info)
-
-ks.close()
-
-print("\n", tabulate(table_list,
-                     headers=table_headers,
-                     tablefmt='simple'))
diff --git a/examples/sched_wakeup.py b/examples/sched_wakeup.py
index 52f2688..acf3682 100755
--- a/examples/sched_wakeup.py
+++ b/examples/sched_wakeup.py
@@ -15,28 +15,20 @@  import matplotlib.pyplot as plt
 import scipy.stats as st
 import numpy as np
 
-from ksharksetup import setup
-# Always call setup() before importing ksharkpy!!!
-setup()
+import tracecruncher.ks_utils as tc
 
-import ksharkpy as ks
 # Get the name of the user program.
 if len(sys.argv) >= 2:
     fname = str(sys.argv[1])
 else:
     fname = input('choose a trace file: ')
 
-status = ks.open_file(fname)
-if not status:
-    print ("Failed to open file ", fname)
-    sys.exit()
-
-ks.register_plugin('reg_pid')
+f = tc.open_file(file_name=fname)
 
 # We do not need the Process Ids of the records.
 # Do not load the "pid" data.
-data = ks.load_data(pid_data=False)
-tasks = ks.get_tasks()
+data = f.load(pid_data=False)
+tasks = f.get_tasks()
 
 # Get the name of the user program.
 if len(sys.argv) >= 3:
@@ -48,11 +40,11 @@  else:
 task_pid = tasks[prog_name][0]
 
 # Get the Event Ids of the sched_switch and sched_waking events.
-ss_eid = ks.event_id('sched', 'sched_switch')
-w_eid = ks.event_id('sched', 'sched_waking')
+ss_eid = f.event_id(name='sched/sched_switch')
+w_eid = f.event_id(name='sched/sched_waking')
 
 # Gey the size of the data.
-i = data['offset'].size
+i = tc.size(data)
 
 dt = []
 delta_max = i_ss_max = i_sw_max = 0
@@ -60,7 +52,7 @@  delta_max = i_ss_max = i_sw_max = 0
 while i > 0:
     i = i - 1
     if data['event'][i] == ss_eid:
-        next_pid = ks.read_event_field(offset=data['offset'][i],
+        next_pid = f.read_event_field(offset=data['offset'][i],
                                        event_id=ss_eid,
                                        field='next_pid')
 
@@ -73,13 +65,13 @@  while i > 0:
                 i = i - 1
 
                 if data['event'][i] < 0 and cpu_ss == data['cpu'][i]:
-			# Ring buffer overflow. Ignore this case and continue.
+                        # Ring buffer overflow. Ignore this case and continue.
                         break
 
                 if data['event'][i] == ss_eid:
-                    next_pid = ks.read_event_field(offset=data['offset'][i],
-                                       event_id=ss_eid,
-                                       field='next_pid')
+                    next_pid = f.read_event_field(offset=data['offset'][i],
+                                                  event_id=ss_eid,
+                                                  field='next_pid')
                     if next_pid == task_pid:
                         # Second sched_switch for the same task. ?
                         time_ss = data['time'][i]
@@ -89,7 +81,7 @@  while i > 0:
                     continue
 
                 if (data['event'][i] == w_eid):
-                    waking_pid = ks.read_event_field(offset=data['offset'][i],
+                    waking_pid = f.read_event_field(offset=data['offset'][i],
                                                      event_id=w_eid,
                                                      field='pid')
 
@@ -107,6 +99,7 @@  while i > 0:
 desc = st.describe(np.array(dt))
 print(desc)
 
+# Plot the latency distribution.
 fig, ax = plt.subplots(nrows=1, ncols=1)
 fig.set_figheight(6)
 fig.set_figwidth(7)
@@ -119,30 +112,27 @@  ax.set_xlabel('latency [$\mu$s]')
 ax.hist(dt, bins=(100), histtype='step')
 plt.show()
 
-sname = 'sched.json'
-ks.new_session(fname, sname)
+# Prepare a session description for KernelShark.
+s = tc.ks_session('sched')
 
-with open(sname, 'r+') as s:
-    session = json.load(s)
-    session['TaskPlots'] = [task_pid]
-    session['CPUPlots'] = [int(data['cpu'][i_sw_max])]
+delta = data['time'][i_ss_max] - data['time'][i_sw_max]
+tmin = data['time'][i_sw_max] - delta
+tmax = data['time'][i_ss_max] + delta
 
-    if data['cpu'][i_ss_max] != data['cpu'][i_sw_max]:
-        session['CPUPlots'].append(int(data['cpu'][i_ss_max]))
+s.set_time_range(tmin=tmin, tmax=tmax)
 
-    delta = data['time'][i_ss_max] - data['time'][i_sw_max]
-    tmin = int(data['time'][i_sw_max] - delta)
-    tmax = int(data['time'][i_ss_max] + delta)
-    session['Model']['range'] = [tmin, tmax]
+cpu_plots = [data['cpu'][i_sw_max]]
+if data['cpu'][i_ss_max] != data['cpu'][i_sw_max]:
+    cpu_plots.append(data['cpu'][i_ss_max])
 
-    session['Markers']['markA']['isSet'] = True
-    session['Markers']['markA']['row'] = int(i_sw_max)
+s.set_cpu_plots(f, cpu_plots)
+s.set_task_plots(f, [task_pid])
 
-    session['Markers']['markB']['isSet'] = True
-    session['Markers']['markB']['row'] = int(i_ss_max)
+s.set_marker_a(i_sw_max)
+s.set_marker_b(i_ss_max)
 
-    session['ViewTop'] = int(i_sw_max) - 5
+s.set_first_visible_row(i_sw_max - 5)
 
-    ks.save_session(session, s)
+s.add_plugin(stream=f, plugin='sched_events')
 
-ks.close()
+s.save()