diff mbox series

[v2,1/7] histogram: add struct histogram

Message ID 20200528235244.74960-1-axelrasmussen@google.com (mailing list archive)
State New, archived
Headers show
Series Add histogram measuring mmap_lock contention latency | expand

Commit Message

Axel Rasmussen May 28, 2020, 11:52 p.m. UTC
struct histogram provides a histogram that counts u64 samples in
arbitrary user-configurable buckets, optimized for efficient concurrent
recording.

This is a squashed, refactored, and modified version of a
previously-internal implementation. Thanks to the following individuals
for portions of the implementation:

  Jamie Liu <jamieliu@google.com> - Original implementation
  Yu Zhao <yuzhao@google.com> - Code cleanups + simplification

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
---
 include/linux/histogram.h | 270 ++++++++++++++++++++++++++++++++++++++
 lib/Kconfig               |   3 +
 lib/Makefile              |   2 +
 lib/histogram.c           | 157 ++++++++++++++++++++++
 4 files changed, 432 insertions(+)
 create mode 100644 include/linux/histogram.h
 create mode 100644 lib/histogram.c
diff mbox series

Patch

diff --git a/include/linux/histogram.h b/include/linux/histogram.h
new file mode 100644
index 000000000000..137930ca933f
--- /dev/null
+++ b/include/linux/histogram.h
@@ -0,0 +1,270 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_HISTOGRAM_H
+#define _LINUX_HISTOGRAM_H
+
+#include <linux/compiler.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/types.h>
+
+/*
+ * Histograms for counting events represented by u64s.
+ *
+ * Each histogram may have any number of thresholds. A histogram with
+ * nr_thresholds thresholds has nr_thresholds buckets, where bucket[i] counts
+ * samples in the half-open interval ( thresholds[i-1], thresholds[i] ].
+ * (thresholds[-1] is implicitly defined as 0, and thresholds[nr_thresholds-1]
+ * must be 2**64-1.) Each histogram's thresholds are immutable after creation.
+ * Each bucket counts up to a u64 worth of events.
+ *
+ * Example usage:
+ *
+ * u64 thresholds[] = {1, 2, 5, 10, 20, 50, 100, ~0};
+ * u64 buckets[ARRAY_SIZE(thresholds)];
+ * struct histogram *hist = histogram_alloc(thresholds, ARRAY_SIZE(thresholds));
+ * if (IS_ERR(hist)) {...}
+ * histogram_record(hist, 0, 1);
+ * histogram_record(hist, 8, 2);
+ * histogram_record(hist, 20, 3);
+ * histogram_record(hist, 110, 4);
+ * histogram_read_buckets(hist, buckets);
+ * // buckets == {1, 0, 0, 2, 3, 0, 0, 4}
+ * histogram_free(hist);
+ *
+ * For convenience, a struct histogram_rcu type is also provided which wraps an
+ * RCU-sched protected pointer to a struct histogram, allowing thresholds to be
+ * modified even while other threads may be recording to the histogram.
+ * Functions that operate on a histogram_rcu are distinguished by an _rcu
+ * suffix.
+ *
+ * Example usage:
+ *
+ * u64 thresholds_1[] = {1, 2, 5, 10, 20, 50, 100, ~0};
+ * u64 thresholds_2[] = {100, 200, 500, 1000, ~0};
+ * u64 buckets[ARRAY_SIZE(thresholds_1)];
+ * struct histogram_rcu hrcu;
+ * if (histogram_init_rcu(&hrcu, thresholds_1, ARRAY_SIZE(thresholds_1))) {...}
+ * histogram_record_rcu(&hrcu, 0, 1);
+ * histogram_record_rcu(&hrcu, 8, 2);
+ * histogram_record_rcu(&hrcu, 20, 3);
+ * histogram_record_rcu(&hrcu, 110, 4);
+ * if (histogram_read_rcu(&hrcu, buckets, NULL,
+ *			  ARRAY_SIZE(thresholds_1)) < 0) {...}
+ * // buckets == {1, 0, 0, 2, 3, 0, 0, 4}
+ * if (histogram_set_thresholds_rcu(&hrcu, thresholds_2,
+ *				    ARRAY_SIZE(thresholds_2)) {...}
+ * if (histogram_read_rcu(&hrcu, buckets, NULL,
+ *			  ARRAY_SIZE(thresholds_2)) < 0) {...}
+ * // buckets == {0, 0, 0, 0, 0}
+ * histogram_record_rcu(&hrcu, 50, 1);
+ * histogram_record_rcu(&hrcu, 150, 2);
+ * histogram_record_rcu(&hrcu, 5000, 3);
+ * if (histogram_read_rcu(&hrcu, buckets, NULL,
+ *			  ARRAY_SIZE(thresholds_2)) < 0) {...}
+ * // buckets == {1, 2, 0, 0, 3}
+ * histogram_destroy_rcu(&hrcu);
+ */
+
+struct histogram {
+	struct rcu_head rcu;
+	u64 __percpu *buckets;
+	size_t nr_thresholds;
+	u64 thresholds[0]; /* flexible array member */
+};
+
+struct histogram_rcu {
+	struct histogram __rcu *hist;
+};
+
+/**
+ * histogram_record() - record samples in histogram
+ * @hist: histogram
+ * @val: sample value
+ * @count: number of samples
+ *
+ * histogram_record() does not require synchronization with concurrent
+ * histogram_record() or histogram_read_buckets().
+ *
+ * histogram_record() may be called from tracing code.
+ */
+void histogram_record(struct histogram *hist, u64 val, u64 count);
+
+/**
+ * histogram_read_buckets() - read histogram buckets
+ * @hist: histogram
+ * @buckets: array with space for at least hist->nr_thresholds elements
+ *
+ * Sets each element of @buckets to the value of the corresponding histogram
+ * bucket.
+ *
+ * histogram_read_buckets() does not require synchronization with concurrent
+ * histogram_record() or histogram_read_buckets().
+ *
+ * histogram_read_buckets() does not block recording, so values are not read
+ * from all CPUs atomically. If this is a problem, the caller should stop
+ * recording first.
+ */
+void histogram_read_buckets(const struct histogram *hist, u64 *buckets);
+
+/**
+ * histogram_alloc() - create a histogram
+ * @thresholds: thresholds array
+ * @nr_thresholds: number of elements in @thresholds
+ *
+ * Histogram buckets are initialized to zero. @thresholds must be sorted in
+ * ascending order and must not contain any duplicates. @nr_thresholds must be
+ * >= 1. thresholds[nr_thresholds-1] must be ~(u64)0.
+ *
+ * histogram_alloc() makes a copy of @thresholds if successful, so ownership of
+ * @thresholds is unaffected by histogram_alloc().
+ *
+ * Context: Performs allocation with GFP_KERNEL.
+ *
+ * Returns: Pointer to new histogram, or a PTR_ERR on failure.
+ */
+struct histogram *histogram_alloc(const u64 *thresholds, size_t nr_thresholds);
+
+/**
+ * histogram_free() - delete a histogram
+ * @hist: histogram
+ */
+void histogram_free(struct histogram *hist);
+
+/**
+ * histogram_record_rcu() - record samples in RCU-protected histogram
+ * @hrcu: histogram
+ * @val: sample value
+ * @count: number of samples
+ *
+ * histogram_record_rcu() does not require external synchronization, even with
+ * histogram_destroy_rcu(). Calling histogram_record_rcu() on a @hrcu that
+ * histogram_destroy_rcu() has been called on is a no-op.
+ *
+ * histogram_record_rcu() may be called from tracing code.
+ */
+static inline void histogram_record_rcu(struct histogram_rcu *hrcu, u64 val,
+					u64 count)
+{
+	struct histogram *hist;
+
+	rcu_read_lock_sched_notrace();
+	hist = rcu_dereference_sched(hrcu->hist);
+	if (likely(hist))
+		histogram_record(hist, val, count);
+	rcu_read_unlock_sched_notrace();
+}
+
+/**
+ * histogram_read_rcu() - read RCU-protected histogram
+ * @hrcu: histogram
+ * @buckets: array with space for at least nr_thresholds elements
+ * @thresholds: array with space for at least nr_thresholds elements
+ * @nr_thresholds: array size (see above)
+ *
+ * If @buckets is not NULL, sets each element of @buckets to the value of the
+ * corresponding histogram bucket.
+ *
+ * If @thresholds is not NULL, sets each element of @thresholds to the
+ * corresponding histogram bucket threshold.
+ *
+ * On failure (for example, if nr_thresholds < hrcu->hist->nr_thresholds),
+ * neither @buckets nor @thresholds will be modified.
+ *
+ * histogram_read_rcu() does not require external synchronization, even with
+ * histogram_destroy_rcu(). Calling histogram_read_rcu() on a @hrcu that
+ * histogram_destroy_rcu() has been called on returns 0.
+ *
+ * histogram_read_rcu() does not block recording, so bucket values are not read
+ * from all CPUs atomically. If this is a problem, the caller should stop
+ * recording first.
+ *
+ * Returns: If successful, returns the actual number of thresholds stored in
+ * @thresholds; if nr_thresholds is too small, returns the negative of the
+ * minimum required nr_thresholds to succeed; if the histogram has been
+ * destroyed by histogram_destroy_rcu(), returns 0. (Note that if nr_thresholds
+ * is too small, it is not guaranteed that calling histogram_read_rcu() again
+ * with the returned value of nr_thresholds will succeed, because another
+ * thread could raise the number of thresholds again in the interim.)
+ */
+ssize_t histogram_read_rcu(const struct histogram_rcu *hrcu, u64 *buckets,
+			   u64 *thresholds, size_t nr_thresholds);
+
+/**
+ * histogram_set_thresholds_rcu() - set RCU-protected histogram thresholds
+ * @hrcu: histogram
+ * @thresholds: thresholds array
+ * @nr_thresholds: number of elements in @thresholds
+ *
+ * The semantics that apply to @thresholds are the same as for histogram_alloc.
+ *
+ * If successful, all buckets are atomically reset to zero.
+ *
+ * The caller must synchronize between concurrent calls to
+ * histogram_set_thresholds_rcu(), histogram_init_rcu(), and
+ * histogram_destroy_rcu().
+ *
+ * Context: Performs allocation with GFP_KERNEL.
+ *
+ * Returns: Zero on success, or a negative error code on failure.
+ */
+int histogram_set_thresholds_rcu(struct histogram_rcu *hrcu,
+				 const u64 *thresholds, size_t nr_thresholds);
+
+/**
+ * histogram_init_rcu() - initialize RCU-protected histogram
+ * @hrcu: histogram
+ * @thresholds: thresholds array
+ * @nr_thresholds: number of elements in @thresholds.
+ *
+ * Each struct histogram_rcu must be initialized by histogram_init_rcu() at
+ * least once before it is valid to call any other functions on it. A struct
+ * histogram_rcu that has been previously initialized cannot be initialized
+ * again, unless it has been subsequently destroyed by histogram_destroy_rcu().
+ *
+ * The semantics that apply to @thresholds are the same as for
+ * histogram_alloc(), with one exception: @thresholds may be NULL iff
+ * @nr_thresholds is 0. In this case, @hrcu will behave as if it has already
+ * been destroyed (histogram_record_rcu() will no-op and histogram_read_rcu()
+ * will return 0).
+ *
+ * The caller must synchronize between concurrent calls to
+ * histogram_set_thresholds_rcu(), histogram_init_rcu(), and
+ * histogram_destroy_rcu().
+ *
+ * Context: Performs allocation with GFP_KERNEL.
+ *
+ * Returns: Zero on success, or a negative error code on failure.
+ */
+static inline int histogram_init_rcu(struct histogram_rcu *hrcu,
+				     const u64 *thresholds,
+				     size_t nr_thresholds)
+{
+	RCU_INIT_POINTER(hrcu->hist, NULL);
+	if (!thresholds && !nr_thresholds)
+		return 0;
+	return histogram_set_thresholds_rcu(hrcu, thresholds, nr_thresholds);
+}
+
+void histogram_destroy_rcu_cb(struct rcu_head *rcu);
+
+/**
+ * histogram_destroy_rcu() - destroy RCU-protected histogram
+ * @hrcu: histogram
+ *
+ * After histogram_destroy_rcu() has been called on a @hrcu, it is valid to call
+ * histogram_init_rcu() on it again.
+ *
+ * The caller must synchronize between concurrent calls to
+ * histogram_set_thresholds_rcu(), histogram_init_rcu(), and
+ * histogram_destroy_rcu().
+ */
+static inline void histogram_destroy_rcu(struct histogram_rcu *hrcu)
+{
+	struct histogram *hist = rcu_dereference_raw(hrcu->hist);
+
+	RCU_INIT_POINTER(hrcu->hist, NULL);
+	if (likely(hist))
+		call_rcu(&hist->rcu, histogram_destroy_rcu_cb);
+}
+
+#endif /* _LINUX_HISTOGRAM_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 5d53f9609c25..4714bdfa343b 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -648,6 +648,9 @@  config OBJAGG
 config STRING_SELFTEST
 	tristate "Test string functions"
 
+config HISTOGRAM
+	bool
+
 endmenu
 
 config GENERIC_IOREMAP
diff --git a/lib/Makefile b/lib/Makefile
index 685aee60de1d..f61b1c15d656 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -248,6 +248,8 @@  obj-$(CONFIG_ASN1) += asn1_decoder.o
 
 obj-$(CONFIG_FONT_SUPPORT) += fonts/
 
+obj-$(CONFIG_HISTOGRAM) += histogram.o
+
 hostprogs	:= gen_crc32table
 hostprogs	+= gen_crc64table
 clean-files	:= crc32table.h
diff --git a/lib/histogram.c b/lib/histogram.c
new file mode 100644
index 000000000000..b68334275a46
--- /dev/null
+++ b/lib/histogram.c
@@ -0,0 +1,157 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/cache.h>
+#include <linux/cpumask.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/histogram.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+
+void histogram_record(struct histogram *hist, u64 val, u64 count)
+{
+	size_t i, lower, upper;
+
+	/* Binary search invariant: correct bucket in [lower, upper] */
+	lower = 0;
+	upper = hist->nr_thresholds - 1;
+	while (lower < upper) {
+		/* can't realistically overflow, nr_thresholds < 2**63 */
+		i = (lower + upper) / 2;
+		if (val <= hist->thresholds[i])
+			upper = i;
+		else
+			lower = i + 1;
+	}
+	i = upper;
+	/* _notrace because histogram_record may be called from tracing */
+	preempt_disable_notrace();
+	__this_cpu_add(hist->buckets[i], count);
+	preempt_enable_notrace();
+}
+EXPORT_SYMBOL_GPL(histogram_record);
+
+void histogram_read_buckets(const struct histogram *hist, u64 *buckets)
+{
+	int cpu;
+	size_t nr_buckets;
+	size_t i;
+
+	nr_buckets = hist->nr_thresholds;
+	memset(buckets, 0, nr_buckets * sizeof(u64));
+	/*
+	 * This must use for_each_possible_cpu rather than for_each_online_cpu
+	 * to ensure we count records from any CPUs that have since been
+	 * removed.
+	 */
+	for_each_possible_cpu(cpu) {
+		for (i = 0; i < nr_buckets; i++)
+			buckets[i] += per_cpu(hist->buckets[i], cpu);
+	}
+}
+EXPORT_SYMBOL_GPL(histogram_read_buckets);
+
+static int histogram_check_thresholds(const u64 *thresholds,
+				      size_t nr_thresholds)
+{
+	unsigned int i;
+
+	if (!nr_thresholds)
+		return -EINVAL;
+	if (!thresholds)
+		return -EFAULT;
+	for (i = 1; i < nr_thresholds; i++)
+		if (thresholds[i - 1] >= thresholds[i])
+			return -EINVAL;
+	if (thresholds[nr_thresholds - 1] != ~0ULL)
+		return -EINVAL;
+	return 0;
+}
+
+struct histogram *histogram_alloc(const u64 *thresholds, size_t nr_thresholds)
+{
+	struct histogram *hist;
+	size_t hist_size;
+	int ret;
+
+	ret = histogram_check_thresholds(thresholds, nr_thresholds);
+	if (ret)
+		return ERR_PTR(ret);
+	hist_size = sizeof(struct histogram) + nr_thresholds * sizeof(u64);
+	hist = kmalloc(ALIGN(hist_size, cache_line_size()), GFP_KERNEL);
+	if (!hist)
+		return ERR_PTR(-ENOMEM);
+	hist->buckets = __alloc_percpu(nr_thresholds * sizeof(*hist->buckets),
+				       __alignof__(*hist->buckets));
+	if (!hist->buckets) {
+		kfree(hist);
+		return ERR_PTR(-ENOMEM);
+	}
+	hist->nr_thresholds = nr_thresholds;
+	memcpy(hist->thresholds, thresholds, nr_thresholds * sizeof(u64));
+	return hist;
+}
+EXPORT_SYMBOL_GPL(histogram_alloc);
+
+void histogram_free(struct histogram *hist)
+{
+	if (!hist)
+		return;
+	free_percpu(hist->buckets);
+	kfree(hist);
+}
+EXPORT_SYMBOL_GPL(histogram_free);
+
+ssize_t histogram_read_rcu(const struct histogram_rcu *hrcu, u64 *buckets,
+			   u64 *thresholds, size_t nr_thresholds)
+{
+	const struct histogram *hist;
+	ssize_t ret = 0;
+
+	rcu_read_lock_sched();
+	hist = rcu_dereference_sched(hrcu->hist);
+	if (!hist) {
+		ret = 0;
+		goto out;
+	}
+	if (nr_thresholds < hist->nr_thresholds) {
+		ret = -hist->nr_thresholds;
+		goto out;
+	}
+	if (buckets)
+		histogram_read_buckets(hist, buckets);
+	if (thresholds)
+		memcpy(thresholds, hist->thresholds,
+		       hist->nr_thresholds * sizeof(u64));
+	ret = hist->nr_thresholds;
+out:
+	rcu_read_unlock_sched();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(histogram_read_rcu);
+
+int histogram_set_thresholds_rcu(struct histogram_rcu *hrcu,
+				 const u64 *thresholds, size_t nr_thresholds)
+{
+	struct histogram *old_hist = rcu_dereference_raw(hrcu->hist);
+	struct histogram *new_hist = histogram_alloc(thresholds, nr_thresholds);
+
+	if (IS_ERR(new_hist))
+		return PTR_ERR(new_hist);
+	rcu_assign_pointer(hrcu->hist, new_hist);
+	if (old_hist) {
+		synchronize_rcu();
+		histogram_free(old_hist);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(histogram_set_thresholds_rcu);
+
+void histogram_destroy_rcu_cb(struct rcu_head *rcu)
+{
+	histogram_free(container_of(rcu, struct histogram, rcu));
+}
+EXPORT_SYMBOL_GPL(histogram_destroy_rcu_cb);