diff mbox series

[07/40] Lazy percpu counters

Message ID 20230501165450.15352-8-surenb@google.com (mailing list archive)
State New
Headers show
Series Memory allocation profiling | expand

Commit Message

Suren Baghdasaryan May 1, 2023, 4:54 p.m. UTC
From: Kent Overstreet <kent.overstreet@linux.dev>

This patch adds lib/lazy-percpu-counter.c, which implements counters
that start out as atomics, but lazily switch to percpu mode if the
update rate crosses some threshold (arbitrarily set at 256 per second).

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 include/linux/lazy-percpu-counter.h | 102 ++++++++++++++++++++++
 lib/Kconfig                         |   3 +
 lib/Makefile                        |   2 +
 lib/lazy-percpu-counter.c           | 127 ++++++++++++++++++++++++++++
 4 files changed, 234 insertions(+)
 create mode 100644 include/linux/lazy-percpu-counter.h
 create mode 100644 lib/lazy-percpu-counter.c

Comments

Randy Dunlap May 1, 2023, 7:17 p.m. UTC | #1
Hi--

On 5/1/23 09:54, Suren Baghdasaryan wrote:
> From: Kent Overstreet <kent.overstreet@linux.dev>
> 
> This patch adds lib/lazy-percpu-counter.c, which implements counters
> that start out as atomics, but lazily switch to percpu mode if the
> update rate crosses some threshold (arbitrarily set at 256 per second).
> 

from submitting-patches.rst:

Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
to do frotz", as if you are giving orders to the codebase to change
its behaviour.

> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> ---
>  include/linux/lazy-percpu-counter.h | 102 ++++++++++++++++++++++
>  lib/Kconfig                         |   3 +
>  lib/Makefile                        |   2 +
>  lib/lazy-percpu-counter.c           | 127 ++++++++++++++++++++++++++++
>  4 files changed, 234 insertions(+)
>  create mode 100644 include/linux/lazy-percpu-counter.h
>  create mode 100644 lib/lazy-percpu-counter.c
> 
> diff --git a/include/linux/lazy-percpu-counter.h b/include/linux/lazy-percpu-counter.h
> new file mode 100644
> index 000000000000..45ca9e2ce58b
> --- /dev/null
> +++ b/include/linux/lazy-percpu-counter.h
> @@ -0,0 +1,102 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Lazy percpu counters:
> + * (C) 2022 Kent Overstreet
> + *
> + * Lazy percpu counters start out in atomic mode, then switch to percpu mode if
> + * the update rate crosses some threshold.
> + *
> + * This means we don't have to decide between low memory overhead atomic
> + * counters and higher performance percpu counters - we can have our cake and
> + * eat it, too!
> + *
> + * Internally we use an atomic64_t, where the low bit indicates whether we're in
> + * percpu mode, and the high 8 bits are a secondary counter that's incremented
> + * when the counter is modified - meaning 55 bits of precision are available for
> + * the counter itself.
> + */
> +
> +#ifndef _LINUX_LAZY_PERCPU_COUNTER_H
> +#define _LINUX_LAZY_PERCPU_COUNTER_H
> +
> +#include <linux/atomic.h>
> +#include <asm/percpu.h>
> +
> +struct lazy_percpu_counter {
> +	atomic64_t			v;
> +	unsigned long			last_wrap;
> +};
> +
> +void lazy_percpu_counter_exit(struct lazy_percpu_counter *c);
> +void lazy_percpu_counter_add_slowpath(struct lazy_percpu_counter *c, s64 i);
> +void lazy_percpu_counter_add_slowpath_noupgrade(struct lazy_percpu_counter *c, s64 i);
> +s64 lazy_percpu_counter_read(struct lazy_percpu_counter *c);
> +
> +/*
> + * We use the high bits of the atomic counter for a secondary counter, which is
> + * incremented every time the counter is touched. When the secondary counter
> + * wraps, we check the time the counter last wrapped, and if it was recent
> + * enough that means the update frequency has crossed our threshold and we
> + * switch to percpu mode:
> + */
> +#define COUNTER_MOD_BITS		8
> +#define COUNTER_MOD_MASK		~(~0ULL >> COUNTER_MOD_BITS)
> +#define COUNTER_MOD_BITS_START		(64 - COUNTER_MOD_BITS)
> +
> +/*
> + * We use the low bit of the counter to indicate whether we're in atomic mode
> + * (low bit clear), or percpu mode (low bit set, counter is a pointer to actual
> + * percpu counters:
> + */
> +#define COUNTER_IS_PCPU_BIT		1
> +
> +static inline u64 __percpu *lazy_percpu_counter_is_pcpu(u64 v)
> +{
> +	if (!(v & COUNTER_IS_PCPU_BIT))
> +		return NULL;
> +
> +	v ^= COUNTER_IS_PCPU_BIT;
> +	return (u64 __percpu *)(unsigned long)v;
> +}
> +
> +/**
> + * lazy_percpu_counter_add: Add a value to a lazy_percpu_counter

For kernel-doc, the function name should be followed by '-', not ':'.
(many places)

> + *
> + * @c: counter to modify
> + * @i: value to add
> + */
> +static inline void lazy_percpu_counter_add(struct lazy_percpu_counter *c, s64 i)
> +{
> +	u64 v = atomic64_read(&c->v);
> +	u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v);
> +
> +	if (likely(pcpu_v))
> +		this_cpu_add(*pcpu_v, i);
> +	else
> +		lazy_percpu_counter_add_slowpath(c, i);
> +}
> +
> +/**
> + * lazy_percpu_counter_add_noupgrade: Add a value to a lazy_percpu_counter,
> + * without upgrading to percpu mode
> + *
> + * @c: counter to modify
> + * @i: value to add
> + */
> +static inline void lazy_percpu_counter_add_noupgrade(struct lazy_percpu_counter *c, s64 i)
> +{
> +	u64 v = atomic64_read(&c->v);
> +	u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v);
> +
> +	if (likely(pcpu_v))
> +		this_cpu_add(*pcpu_v, i);
> +	else
> +		lazy_percpu_counter_add_slowpath_noupgrade(c, i);
> +}
> +
> +static inline void lazy_percpu_counter_sub(struct lazy_percpu_counter *c, s64 i)
> +{
> +	lazy_percpu_counter_add(c, -i);
> +}
> +
> +#endif /* _LINUX_LAZY_PERCPU_COUNTER_H */

> diff --git a/lib/lazy-percpu-counter.c b/lib/lazy-percpu-counter.c
> new file mode 100644
> index 000000000000..4f4e32c2dc09
> --- /dev/null
> +++ b/lib/lazy-percpu-counter.c
> @@ -0,0 +1,127 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include <linux/atomic.h>
> +#include <linux/gfp.h>
> +#include <linux/jiffies.h>
> +#include <linux/lazy-percpu-counter.h>
> +#include <linux/percpu.h>
> +
> +static inline s64 lazy_percpu_counter_atomic_val(s64 v)
> +{
> +	/* Ensure output is sign extended properly: */
> +	return (v << COUNTER_MOD_BITS) >>
> +		(COUNTER_MOD_BITS + COUNTER_IS_PCPU_BIT);
> +}
> +
...
> +
> +/**
> + * lazy_percpu_counter_exit: Free resources associated with a
> + * lazy_percpu_counter

Same kernel-doc comment.

> + *
> + * @c: counter to exit
> + */
> +void lazy_percpu_counter_exit(struct lazy_percpu_counter *c)
> +{
> +	free_percpu(lazy_percpu_counter_is_pcpu(atomic64_read(&c->v)));
> +}
> +EXPORT_SYMBOL_GPL(lazy_percpu_counter_exit);
> +
> +/**
> + * lazy_percpu_counter_read: Read current value of a lazy_percpu_counter
> + *
> + * @c: counter to read
> + */
> +s64 lazy_percpu_counter_read(struct lazy_percpu_counter *c)
> +{
> +	s64 v = atomic64_read(&c->v);
> +	u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v);
> +
> +	if (pcpu_v) {
> +		int cpu;
> +
> +		v = 0;
> +		for_each_possible_cpu(cpu)
> +			v += *per_cpu_ptr(pcpu_v, cpu);
> +	} else {
> +		v = lazy_percpu_counter_atomic_val(v);
> +	}
> +
> +	return v;
> +}
> +EXPORT_SYMBOL_GPL(lazy_percpu_counter_read);
> +
> +void lazy_percpu_counter_add_slowpath(struct lazy_percpu_counter *c, s64 i)
> +{
> +	u64 atomic_i;
> +	u64 old, v = atomic64_read(&c->v);
> +	u64 __percpu *pcpu_v;
> +
> +	atomic_i  = i << COUNTER_IS_PCPU_BIT;
> +	atomic_i &= ~COUNTER_MOD_MASK;
> +	atomic_i |= 1ULL << COUNTER_MOD_BITS_START;
> +
> +	do {
> +		pcpu_v = lazy_percpu_counter_is_pcpu(v);
> +		if (pcpu_v) {
> +			this_cpu_add(*pcpu_v, i);
> +			return;
> +		}
> +
> +		old = v;
> +	} while ((v = atomic64_cmpxchg(&c->v, old, old + atomic_i)) != old);
> +
> +	if (unlikely(!(v & COUNTER_MOD_MASK))) {
> +		unsigned long now = jiffies;
> +
> +		if (c->last_wrap &&
> +		    unlikely(time_after(c->last_wrap + HZ, now)))
> +			lazy_percpu_counter_switch_to_pcpu(c);
> +		else
> +			c->last_wrap = now;
> +	}
> +}
> +EXPORT_SYMBOL(lazy_percpu_counter_add_slowpath);
> +
> +void lazy_percpu_counter_add_slowpath_noupgrade(struct lazy_percpu_counter *c, s64 i)
> +{
> +	u64 atomic_i;
> +	u64 old, v = atomic64_read(&c->v);
> +	u64 __percpu *pcpu_v;
> +
> +	atomic_i  = i << COUNTER_IS_PCPU_BIT;
> +	atomic_i &= ~COUNTER_MOD_MASK;
> +
> +	do {
> +		pcpu_v = lazy_percpu_counter_is_pcpu(v);
> +		if (pcpu_v) {
> +			this_cpu_add(*pcpu_v, i);
> +			return;
> +		}
> +
> +		old = v;
> +	} while ((v = atomic64_cmpxchg(&c->v, old, old + atomic_i)) != old);
> +}
> +EXPORT_SYMBOL(lazy_percpu_counter_add_slowpath_noupgrade);

These last 2 exported functions could use some comments, preferably in
kernel-doc format.

Thanks.
diff mbox series

Patch

diff --git a/include/linux/lazy-percpu-counter.h b/include/linux/lazy-percpu-counter.h
new file mode 100644
index 000000000000..45ca9e2ce58b
--- /dev/null
+++ b/include/linux/lazy-percpu-counter.h
@@ -0,0 +1,102 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Lazy percpu counters:
+ * (C) 2022 Kent Overstreet
+ *
+ * Lazy percpu counters start out in atomic mode, then switch to percpu mode if
+ * the update rate crosses some threshold.
+ *
+ * This means we don't have to decide between low memory overhead atomic
+ * counters and higher performance percpu counters - we can have our cake and
+ * eat it, too!
+ *
+ * Internally we use an atomic64_t, where the low bit indicates whether we're in
+ * percpu mode, and the high 8 bits are a secondary counter that's incremented
+ * when the counter is modified - meaning 55 bits of precision are available for
+ * the counter itself.
+ */
+
+#ifndef _LINUX_LAZY_PERCPU_COUNTER_H
+#define _LINUX_LAZY_PERCPU_COUNTER_H
+
+#include <linux/atomic.h>
+#include <asm/percpu.h>
+
+struct lazy_percpu_counter {
+	atomic64_t			v;
+	unsigned long			last_wrap;
+};
+
+void lazy_percpu_counter_exit(struct lazy_percpu_counter *c);
+void lazy_percpu_counter_add_slowpath(struct lazy_percpu_counter *c, s64 i);
+void lazy_percpu_counter_add_slowpath_noupgrade(struct lazy_percpu_counter *c, s64 i);
+s64 lazy_percpu_counter_read(struct lazy_percpu_counter *c);
+
+/*
+ * We use the high bits of the atomic counter for a secondary counter, which is
+ * incremented every time the counter is touched. When the secondary counter
+ * wraps, we check the time the counter last wrapped, and if it was recent
+ * enough that means the update frequency has crossed our threshold and we
+ * switch to percpu mode:
+ */
+#define COUNTER_MOD_BITS		8
+#define COUNTER_MOD_MASK		~(~0ULL >> COUNTER_MOD_BITS)
+#define COUNTER_MOD_BITS_START		(64 - COUNTER_MOD_BITS)
+
+/*
+ * We use the low bit of the counter to indicate whether we're in atomic mode
+ * (low bit clear), or percpu mode (low bit set, counter is a pointer to actual
+ * percpu counters:
+ */
+#define COUNTER_IS_PCPU_BIT		1
+
+static inline u64 __percpu *lazy_percpu_counter_is_pcpu(u64 v)
+{
+	if (!(v & COUNTER_IS_PCPU_BIT))
+		return NULL;
+
+	v ^= COUNTER_IS_PCPU_BIT;
+	return (u64 __percpu *)(unsigned long)v;
+}
+
+/**
+ * lazy_percpu_counter_add: Add a value to a lazy_percpu_counter
+ *
+ * @c: counter to modify
+ * @i: value to add
+ */
+static inline void lazy_percpu_counter_add(struct lazy_percpu_counter *c, s64 i)
+{
+	u64 v = atomic64_read(&c->v);
+	u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v);
+
+	if (likely(pcpu_v))
+		this_cpu_add(*pcpu_v, i);
+	else
+		lazy_percpu_counter_add_slowpath(c, i);
+}
+
+/**
+ * lazy_percpu_counter_add_noupgrade: Add a value to a lazy_percpu_counter,
+ * without upgrading to percpu mode
+ *
+ * @c: counter to modify
+ * @i: value to add
+ */
+static inline void lazy_percpu_counter_add_noupgrade(struct lazy_percpu_counter *c, s64 i)
+{
+	u64 v = atomic64_read(&c->v);
+	u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v);
+
+	if (likely(pcpu_v))
+		this_cpu_add(*pcpu_v, i);
+	else
+		lazy_percpu_counter_add_slowpath_noupgrade(c, i);
+}
+
+static inline void lazy_percpu_counter_sub(struct lazy_percpu_counter *c, s64 i)
+{
+	lazy_percpu_counter_add(c, -i);
+}
+
+#endif /* _LINUX_LAZY_PERCPU_COUNTER_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 5c2da561c516..7380292a8fcd 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -505,6 +505,9 @@  config ASSOCIATIVE_ARRAY
 
 	  for more information.
 
+config LAZY_PERCPU_COUNTER
+	bool
+
 config HAS_IOMEM
 	bool
 	depends on !NO_IOMEM
diff --git a/lib/Makefile b/lib/Makefile
index 876fcdeae34e..293a0858a3f8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -164,6 +164,8 @@  obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
+obj-$(CONFIG_LAZY_PERCPU_COUNTER) += lazy-percpu-counter.o
+
 obj-$(CONFIG_BITREVERSE) += bitrev.o
 obj-$(CONFIG_LINEAR_RANGES) += linear_ranges.o
 obj-$(CONFIG_PACKING)	+= packing.o
diff --git a/lib/lazy-percpu-counter.c b/lib/lazy-percpu-counter.c
new file mode 100644
index 000000000000..4f4e32c2dc09
--- /dev/null
+++ b/lib/lazy-percpu-counter.c
@@ -0,0 +1,127 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/atomic.h>
+#include <linux/gfp.h>
+#include <linux/jiffies.h>
+#include <linux/lazy-percpu-counter.h>
+#include <linux/percpu.h>
+
+static inline s64 lazy_percpu_counter_atomic_val(s64 v)
+{
+	/* Ensure output is sign extended properly: */
+	return (v << COUNTER_MOD_BITS) >>
+		(COUNTER_MOD_BITS + COUNTER_IS_PCPU_BIT);
+}
+
+static void lazy_percpu_counter_switch_to_pcpu(struct lazy_percpu_counter *c)
+{
+	u64 __percpu *pcpu_v = alloc_percpu_gfp(u64, GFP_ATOMIC|__GFP_NOWARN);
+	u64 old, new, v;
+
+	if (!pcpu_v)
+		return;
+
+	preempt_disable();
+	v = atomic64_read(&c->v);
+	do {
+		if (lazy_percpu_counter_is_pcpu(v)) {
+			free_percpu(pcpu_v);
+			return;
+		}
+
+		old = v;
+		new = (unsigned long)pcpu_v | 1;
+
+		*this_cpu_ptr(pcpu_v) = lazy_percpu_counter_atomic_val(v);
+	} while ((v = atomic64_cmpxchg(&c->v, old, new)) != old);
+	preempt_enable();
+}
+
+/**
+ * lazy_percpu_counter_exit: Free resources associated with a
+ * lazy_percpu_counter
+ *
+ * @c: counter to exit
+ */
+void lazy_percpu_counter_exit(struct lazy_percpu_counter *c)
+{
+	free_percpu(lazy_percpu_counter_is_pcpu(atomic64_read(&c->v)));
+}
+EXPORT_SYMBOL_GPL(lazy_percpu_counter_exit);
+
+/**
+ * lazy_percpu_counter_read: Read current value of a lazy_percpu_counter
+ *
+ * @c: counter to read
+ */
+s64 lazy_percpu_counter_read(struct lazy_percpu_counter *c)
+{
+	s64 v = atomic64_read(&c->v);
+	u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v);
+
+	if (pcpu_v) {
+		int cpu;
+
+		v = 0;
+		for_each_possible_cpu(cpu)
+			v += *per_cpu_ptr(pcpu_v, cpu);
+	} else {
+		v = lazy_percpu_counter_atomic_val(v);
+	}
+
+	return v;
+}
+EXPORT_SYMBOL_GPL(lazy_percpu_counter_read);
+
+void lazy_percpu_counter_add_slowpath(struct lazy_percpu_counter *c, s64 i)
+{
+	u64 atomic_i;
+	u64 old, v = atomic64_read(&c->v);
+	u64 __percpu *pcpu_v;
+
+	atomic_i  = i << COUNTER_IS_PCPU_BIT;
+	atomic_i &= ~COUNTER_MOD_MASK;
+	atomic_i |= 1ULL << COUNTER_MOD_BITS_START;
+
+	do {
+		pcpu_v = lazy_percpu_counter_is_pcpu(v);
+		if (pcpu_v) {
+			this_cpu_add(*pcpu_v, i);
+			return;
+		}
+
+		old = v;
+	} while ((v = atomic64_cmpxchg(&c->v, old, old + atomic_i)) != old);
+
+	if (unlikely(!(v & COUNTER_MOD_MASK))) {
+		unsigned long now = jiffies;
+
+		if (c->last_wrap &&
+		    unlikely(time_after(c->last_wrap + HZ, now)))
+			lazy_percpu_counter_switch_to_pcpu(c);
+		else
+			c->last_wrap = now;
+	}
+}
+EXPORT_SYMBOL(lazy_percpu_counter_add_slowpath);
+
+void lazy_percpu_counter_add_slowpath_noupgrade(struct lazy_percpu_counter *c, s64 i)
+{
+	u64 atomic_i;
+	u64 old, v = atomic64_read(&c->v);
+	u64 __percpu *pcpu_v;
+
+	atomic_i  = i << COUNTER_IS_PCPU_BIT;
+	atomic_i &= ~COUNTER_MOD_MASK;
+
+	do {
+		pcpu_v = lazy_percpu_counter_is_pcpu(v);
+		if (pcpu_v) {
+			this_cpu_add(*pcpu_v, i);
+			return;
+		}
+
+		old = v;
+	} while ((v = atomic64_cmpxchg(&c->v, old, old + atomic_i)) != old);
+}
+EXPORT_SYMBOL(lazy_percpu_counter_add_slowpath_noupgrade);