diff mbox series

[v4,4/4] TEST-ONLY: media: uvcvideo: Add statistics for measuring performance

Message ID 20201130104730.27655-4-ribalda@chromium.org (mailing list archive)
State New, archived
Headers show
Series [v4,1/4] dma-mapping: remove the {alloc,free}_noncoherent methods | expand

Commit Message

Ricardo Ribalda Nov. 30, 2020, 10:47 a.m. UTC
From: Shik Chen <shik@chromium.org>

Majorly based on [1], with the following tweaks:

* Use div_u64 for u64 divisions
* Calculate standard deviation
* Fix an uninitialized |min| field for header
* Apply clang-format

[1] https://git.kernel.org/pub/scm/linux/kernel/git/kbingham/rcar.git/commit/?h=uvc/async-ml&id=cebbd1b629bbe5f856ec5dc7591478c003f5a944

Signed-off-by: Shik Chen <shik@chromium.org>
---
 drivers/media/usb/uvc/uvc_video.c | 163 +++++++++++++++++++++++++++++-
 drivers/media/usb/uvc/uvcvideo.h  |  21 ++++
 2 files changed, 181 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c
index 59ade244abfb..34a27191ef0d 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -906,12 +906,61 @@  static void uvc_video_stats_update(struct uvc_streaming *stream)
 	memset(&stream->stats.frame, 0, sizeof(stream->stats.frame));
 }
 
+size_t uvc_video_dump_time_stats(char *buf, size_t size,
+				 struct uvc_stats_time *stat, const char *pfx)
+{
+	unsigned int avg = 0;
+	unsigned int std = 0;
+
+	if (stat->qty) {
+		avg = div_u64(stat->duration, stat->qty);
+		std = int_sqrt64(div_u64(stat->duration2, stat->qty) -
+				 avg * avg);
+	}
+
+	/* Stat durations are in nanoseconds, we present in micro-seconds */
+	return scnprintf(
+		buf, size,
+		"%s: %llu/%u uS/qty: %u.%03u avg %u.%03u std %u.%03u min %u.%03u max (uS)\n",
+		pfx, div_u64(stat->duration, 1000), stat->qty, avg / 1000,
+		avg % 1000, std / 1000, std % 1000, stat->min / 1000,
+		stat->min % 1000, stat->max / 1000, stat->max % 1000);
+}
+
+size_t uvc_video_dump_speed(char *buf, size_t size, const char *pfx, u64 bytes,
+			    u64 milliseconds)
+{
+	unsigned int rate = 0;
+	bool gbit = false;
+
+	if (milliseconds)
+		rate = div_u64(bytes * 8, milliseconds);
+
+	if (rate >= 1000000) {
+		gbit = true;
+		rate /= 1000;
+	}
+
+	/*
+	 * bits/milliseconds == kilobits/seconds,
+	 * presented here as Mbits/s (or Gbit/s) with 3 decimal places
+	 */
+	return scnprintf(buf, size, "%s: %d.%03d %sbits/s\n", pfx, rate / 1000,
+			 rate % 1000, gbit ? "G" : "M");
+}
+
 size_t uvc_video_stats_dump(struct uvc_streaming *stream, char *buf,
 			    size_t size)
 {
+	u64 bytes = stream->stats.stream.bytes; /* Single sample */
+	unsigned int empty_ratio = 0;
 	unsigned int scr_sof_freq;
 	unsigned int duration;
+	unsigned int fps = 0;
 	size_t count = 0;
+	u64 cpu = 0;
+	u64 cpu_q = 0;
+	u32 cpu_r = 0;
 
 	/* Compute the SCR.SOF frequency estimate. At the nominal 1kHz SOF
 	 * frequency this will not overflow before more than 1h.
@@ -924,12 +973,19 @@  size_t uvc_video_stats_dump(struct uvc_streaming *stream, char *buf,
 	else
 		scr_sof_freq = 0;
 
+	if (stream->stats.stream.nb_packets)
+		empty_ratio = stream->stats.stream.nb_empty * 100 /
+			      stream->stats.stream.nb_packets;
+
 	count += scnprintf(buf + count, size - count,
-			   "frames:  %u\npackets: %u\nempty:   %u\n"
-			   "errors:  %u\ninvalid: %u\n",
+			   "frames:  %u\n"
+			   "packets: %u\n"
+			   "empty:   %u (%u %%)\n"
+			   "errors:  %u\n"
+			   "invalid: %u\n",
 			   stream->stats.stream.nb_frames,
 			   stream->stats.stream.nb_packets,
-			   stream->stats.stream.nb_empty,
+			   stream->stats.stream.nb_empty, empty_ratio,
 			   stream->stats.stream.nb_errors,
 			   stream->stats.stream.nb_invalid);
 	count += scnprintf(buf + count, size - count,
@@ -946,6 +1002,55 @@  size_t uvc_video_stats_dump(struct uvc_streaming *stream, char *buf,
 			   stream->stats.stream.min_sof,
 			   stream->stats.stream.max_sof,
 			   scr_sof_freq / 1000, scr_sof_freq % 1000);
+	count += scnprintf(buf + count, size - count,
+			   "bytes %lld : duration %d\n", bytes, duration);
+
+	if (duration != 0) {
+		/* Duration is in milliseconds, * 100 to gain 2 dp precision */
+		fps = stream->stats.stream.nb_frames * 1000 * 100 / duration;
+		/* CPU usage as a % with 6 decimal places */
+		cpu = div_u64(stream->stats.urbstat.decode.duration, duration) *
+		      100;
+	}
+
+	count += scnprintf(buf + count, size - count, "FPS: %u.%02u\n",
+			   fps / 100, fps % 100);
+
+	/* Processing Times */
+
+	count += uvc_video_dump_time_stats(buf + count, size - count,
+					   &stream->stats.urbstat.urb, "URB");
+	count += uvc_video_dump_time_stats(buf + count, size - count,
+					   &stream->stats.urbstat.header,
+					   "header");
+	count += uvc_video_dump_time_stats(buf + count, size - count,
+					   &stream->stats.urbstat.latency,
+					   "latency");
+	count += uvc_video_dump_time_stats(buf + count, size - count,
+					   &stream->stats.urbstat.decode,
+					   "decode");
+
+	/* Processing Speeds */
+
+	/* This should be representative of the memory bus / cpu speed */
+	count += uvc_video_dump_speed(
+		buf + count, size - count, "raw decode speed", bytes,
+		div_u64(stream->stats.urbstat.decode.duration, 1000000));
+
+	/* Raw bus speed - scheduling latencies */
+	count += uvc_video_dump_speed(
+		buf + count, size - count, "raw URB handling speed", bytes,
+		div_u64(stream->stats.urbstat.urb.duration, 1000000));
+
+	/* Throughput against wall clock time, stream duration is in millis*/
+	count += uvc_video_dump_speed(buf + count, size - count, "throughput",
+				      bytes, duration);
+
+	cpu_q = div_u64_rem(cpu, 1000000, &cpu_r);
+
+	/* Determine the 'CPU Usage' of our URB processing */
+	count += scnprintf(buf + count, size - count,
+			   "URB decode CPU usage %llu.%06u %%\n", cpu_q, cpu_r);
 
 	return count;
 }
@@ -954,6 +1059,11 @@  static void uvc_video_stats_start(struct uvc_streaming *stream)
 {
 	memset(&stream->stats, 0, sizeof(stream->stats));
 	stream->stats.stream.min_sof = 2048;
+
+	stream->stats.urbstat.header.min = -1;
+	stream->stats.urbstat.latency.min = -1;
+	stream->stats.urbstat.decode.min = -1;
+	stream->stats.urbstat.urb.min = -1;
 }
 
 static void uvc_video_stats_stop(struct uvc_streaming *stream)
@@ -961,6 +1071,28 @@  static void uvc_video_stats_stop(struct uvc_streaming *stream)
 	stream->stats.stream.stop_ts = ktime_get();
 }
 
+static s64 uvc_stats_add(struct uvc_stats_time *s, const ktime_t a,
+			 const ktime_t b)
+{
+	ktime_t delta;
+	u64 duration;
+
+	delta = ktime_sub(b, a);
+	duration = ktime_to_ns(delta);
+
+	s->qty++;
+	s->duration += duration;
+	s->duration2 += duration * duration;
+
+	if (duration < s->min)
+		s->min = duration;
+
+	if (duration > s->max)
+		s->max = duration;
+
+	return duration;
+}
+
 /* ------------------------------------------------------------------------
  * Video codecs
  */
@@ -1024,6 +1156,9 @@  static int uvc_video_decode_start(struct uvc_streaming *stream,
 		stream->sequence++;
 		if (stream->sequence)
 			uvc_video_stats_update(stream);
+
+		/* Update the stream timer each frame */
+		stream->stats.stream.stop_ts = ktime_get();
 	}
 
 	uvc_video_clock_decode(stream, buf, data, len);
@@ -1111,18 +1246,34 @@  static inline struct device *stream_to_dmadev(struct uvc_streaming *stream)
 static void uvc_video_copy_data_work(struct work_struct *work)
 {
 	struct uvc_urb *uvc_urb = container_of(work, struct uvc_urb, work);
+	ktime_t now;
 	unsigned int i;
 	int ret;
 
+	/* Measure decode performance */
+	uvc_urb->decode_start = ktime_get();
+	/* Measure scheduling latency */
+	uvc_stats_add(&uvc_urb->stream->stats.urbstat.latency,
+		      uvc_urb->received, uvc_urb->decode_start);
+
 	for (i = 0; i < uvc_urb->async_operations; i++) {
 		struct uvc_copy_op *op = &uvc_urb->copy_operations[i];
 
 		memcpy(op->dst, op->src, op->len);
+		uvc_urb->stream->stats.stream.bytes += op->len;
 
 		/* Release reference taken on this buffer. */
 		uvc_queue_buffer_release(op->buf);
 	}
 
+	now = ktime_get();
+	/* measure 'memcpy time' */
+	uvc_stats_add(&uvc_urb->stream->stats.urbstat.decode,
+		      uvc_urb->decode_start, now);
+	/* measure 'full urb processing time' */
+	uvc_stats_add(&uvc_urb->stream->stats.urbstat.urb, uvc_urb->received,
+		      now);
+
 	if (uvc_urb->pages)
 		dma_sync_sgtable_for_device(stream_to_dmadev(uvc_urb->stream),
 					    &uvc_urb->sgt, DMA_FROM_DEVICE);
@@ -1510,6 +1661,9 @@  static void uvc_video_complete(struct urb *urb)
 	unsigned long flags;
 	int ret;
 
+	/* Track URB processing performance */
+	uvc_urb->received = ktime_get();
+
 	switch (urb->status) {
 	case 0:
 		break;
@@ -1567,6 +1721,9 @@  static void uvc_video_complete(struct urb *urb)
 	}
 
 	queue_work(stream->async_wq, &uvc_urb->work);
+
+	uvc_stats_add(&uvc_urb->stream->stats.urbstat.header, uvc_urb->received,
+		      ktime_get());
 }
 
 /*
diff --git a/drivers/media/usb/uvc/uvcvideo.h b/drivers/media/usb/uvc/uvcvideo.h
index 3e6618a2ac82..e77fa2253526 100644
--- a/drivers/media/usb/uvc/uvcvideo.h
+++ b/drivers/media/usb/uvc/uvcvideo.h
@@ -475,6 +475,14 @@  struct uvc_stats_frame {
 	u32 scr_stc;			/* SCR.STC of the last packet */
 };
 
+struct uvc_stats_time {
+	u64 duration;			/* Cumulative total duration between two events */
+	u64 duration2;			/* Cumulative total duration^2 between two events */
+	unsigned int qty;		/* Number of events represented in the total */
+	unsigned int min;		/* Shortest duration */
+	unsigned int max;		/* Longest duration */
+};
+
 struct uvc_stats_stream {
 	ktime_t start_ts;		/* Stream start timestamp */
 	ktime_t stop_ts;		/* Stream stop timestamp */
@@ -496,6 +504,8 @@  struct uvc_stats_stream {
 	unsigned int scr_sof;		/* STC.SOF of the last packet */
 	unsigned int min_sof;		/* Minimum STC.SOF value */
 	unsigned int max_sof;		/* Maximum STC.SOF value */
+
+	unsigned long bytes;		/* Successfully transferred bytes */
 };
 
 #define UVC_METADATA_BUF_SIZE 1024
@@ -525,6 +535,8 @@  struct uvc_copy_op {
  * @async_operations: counter to indicate the number of copy operations
  * @copy_operations: work descriptors for asynchronous copy operations
  * @work: work queue entry for asynchronous decode
+ * @received: URB interrupt time stamp
+ * @decode_start: URB processing start time stamp
  */
 struct uvc_urb {
 	struct urb *urb;
@@ -538,6 +550,9 @@  struct uvc_urb {
 	unsigned int async_operations;
 	struct uvc_copy_op copy_operations[UVC_MAX_PACKETS];
 	struct work_struct work;
+
+	ktime_t received;
+	ktime_t decode_start;
 };
 
 struct uvc_streaming {
@@ -600,6 +615,12 @@  struct uvc_streaming {
 	struct {
 		struct uvc_stats_frame frame;
 		struct uvc_stats_stream stream;
+		struct uvc_stats_urb {
+			struct uvc_stats_time header;
+			struct uvc_stats_time latency;
+			struct uvc_stats_time decode;
+			struct uvc_stats_time urb;
+		} urbstat;
 	} stats;
 
 	/* Timestamps support. */