diff mbox series

[4/5] refcount: Introduce refcount_long_t and APIs

Message ID 20240502223341.1835070-4-keescook@chromium.org (mailing list archive)
State New, archived
Headers show
Series fs: Do not allow get_file() to resurrect 0 f_count | expand

Commit Message

Kees Cook May 2, 2024, 10:33 p.m. UTC
Duplicate the refcount_t types and APIs gain refcount_long_t. This is
needed for larger counters that while currently very unlikely to overflow,
still want to detect and mitigate underflow.

Generate refcount-long.h via direct string replacements. Doing macros
like compat_binfmt_elf doesn't appear to work well.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
Cc: Will Deacon <will@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: linux-kbuild@vger.kernel.org
---
 MAINTAINERS                    |   2 +-
 Makefile                       |  11 +-
 include/linux/refcount-impl.h  | 344 +++++++++++++++++++++++++++++++++
 include/linux/refcount.h       | 341 +-------------------------------
 include/linux/refcount_types.h |  12 ++
 lib/refcount.c                 |  17 +-
 6 files changed, 385 insertions(+), 342 deletions(-)
 create mode 100644 include/linux/refcount-impl.h

Comments

kernel test robot May 6, 2024, 8:04 a.m. UTC | #1
hi, Kees Cook,

we noticed a WARNING is our boot tests. 
as we understand, the WARNING is not introduced by this patch, instead, just
changing stats as below.
however, we failed to bisect
"dmesg.WARNING:at_lib/refcount.c:#refcount_warn_saturate"
which shows in parent commit to capture real commit which introduced the WARNING

just made this report FYI what we observed in our boot tests.


8090de6a0536f462 93b9cd30de232c9b4e27221dff6
---------------- ---------------------------
       fail:runs  %reproduction    fail:runs
           |             |             |
           :6          100%           6:6     dmesg.EIP:refcount_report_saturation
          6:6         -100%            :6     dmesg.EIP:refcount_warn_saturate
           :6          100%           6:6     dmesg.WARNING:at_lib/refcount.c:#refcount_report_saturation
          6:6         -100%            :6     dmesg.WARNING:at_lib/refcount.c:#refcount_warn_saturate


Hello,

kernel test robot noticed "WARNING:at_lib/refcount.c:#refcount_report_saturation" on:

commit: 93b9cd30de232c9b4e27221dff6d02ac557b86eb ("[PATCH 4/5] refcount: Introduce refcount_long_t and APIs")
url: https://github.com/intel-lab-lkp/linux/commits/Kees-Cook/fs-Do-not-allow-get_file-to-resurrect-0-f_count/20240503-063542
base: https://git.kernel.org/cgit/linux/kernel/git/vfs/vfs.git vfs.all
patch link: https://lore.kernel.org/all/20240502223341.1835070-4-keescook@chromium.org/
patch subject: [PATCH 4/5] refcount: Introduce refcount_long_t and APIs

in testcase: boot

compiler: gcc-13
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202405061514.23fedba1-oliver.sang@intel.com


[    2.683270][    T1] ------------[ cut here ]------------
[    2.684014][    T1] refcount_t: decrement hit 0; leaking memory.
[ 2.684829][ T1] WARNING: CPU: 0 PID: 1 at lib/refcount.c:29 refcount_report_saturation (lib/refcount.c:29 (discriminator 1)) 
[    2.686080][    T1] Modules linked in:
[    2.686633][    T1] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.9.0-rc3-00078-g93b9cd30de23 #1 ade0d32fff89aed56247bad9c6991c6e60a975d2
[    2.688188][    T1] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[ 2.689523][ T1] EIP: refcount_report_saturation (lib/refcount.c:29 (discriminator 1)) 
[ 2.690327][ T1] Code: c2 01 e8 92 8a b1 ff 0f 0b 58 c9 31 c0 31 d2 31 c9 c3 8d b4 26 00 00 00 00 68 ec bc f4 c1 c6 05 3e 42 4a c2 01 e8 6f 8a b1 ff <0f> 0b 5a c9 31 c0 31 d2 31 c9 c3 55 89 c1 89 d0 c7 01 00 00 00 c0
All code
========
   0:	c2 01 e8             	ret    $0xe801
   3:	92                   	xchg   %eax,%edx
   4:	8a b1 ff 0f 0b 58    	mov    0x580b0fff(%rcx),%dh
   a:	c9                   	leave
   b:	31 c0                	xor    %eax,%eax
   d:	31 d2                	xor    %edx,%edx
   f:	31 c9                	xor    %ecx,%ecx
  11:	c3                   	ret
  12:	8d b4 26 00 00 00 00 	lea    0x0(%rsi,%riz,1),%esi
  19:	68 ec bc f4 c1       	push   $0xffffffffc1f4bcec
  1e:	c6 05 3e 42 4a c2 01 	movb   $0x1,-0x3db5bdc2(%rip)        # 0xffffffffc24a4263
  25:	e8 6f 8a b1 ff       	call   0xffffffffffb18a99
  2a:*	0f 0b                	ud2		<-- trapping instruction
  2c:	5a                   	pop    %rdx
  2d:	c9                   	leave
  2e:	31 c0                	xor    %eax,%eax
  30:	31 d2                	xor    %edx,%edx
  32:	31 c9                	xor    %ecx,%ecx
  34:	c3                   	ret
  35:	55                   	push   %rbp
  36:	89 c1                	mov    %eax,%ecx
  38:	89 d0                	mov    %edx,%eax
  3a:	c7 01 00 00 00 c0    	movl   $0xc0000000,(%rcx)

Code starting with the faulting instruction
===========================================
   0:	0f 0b                	ud2
   2:	5a                   	pop    %rdx
   3:	c9                   	leave
   4:	31 c0                	xor    %eax,%eax
   6:	31 d2                	xor    %edx,%edx
   8:	31 c9                	xor    %ecx,%ecx
   a:	c3                   	ret
   b:	55                   	push   %rbp
   c:	89 c1                	mov    %eax,%ecx
   e:	89 d0                	mov    %edx,%eax
  10:	c7 01 00 00 00 c0    	movl   $0xc0000000,(%rcx)
[    2.692770][    T1] EAX: 00000000 EBX: e19ee0c4 ECX: 00000000 EDX: 00000000
[    2.693685][    T1] ESI: e19ee0c0 EDI: c37ad880 EBP: c3767dd0 ESP: c3767dcc
[    2.694597][    T1] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00010246
[    2.695638][    T1] CR0: 80050033 CR2: ffcb2000 CR3: 02723000 CR4: 00040690
[    2.696557][    T1] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[    2.697464][    T1] DR6: fffe0ff0 DR7: 00000400
[    2.698106][    T1] Call Trace:
[ 2.698586][ T1] ? show_regs (arch/x86/kernel/dumpstack.c:479) 
[ 2.699197][ T1] ? refcount_report_saturation (lib/refcount.c:29 (discriminator 1)) 
[ 2.699975][ T1] ? __warn (kernel/panic.c:694) 
[ 2.700547][ T1] ? refcount_report_saturation (lib/refcount.c:29 (discriminator 1)) 
[ 2.701322][ T1] ? report_bug (lib/bug.c:201 lib/bug.c:219) 
[ 2.701960][ T1] ? exc_overflow (arch/x86/kernel/traps.c:252) 
[ 2.702584][ T1] ? handle_bug (arch/x86/kernel/traps.c:218) 
[ 2.703208][ T1] ? exc_invalid_op (arch/x86/kernel/traps.c:260 (discriminator 1)) 
[ 2.703850][ T1] ? handle_exception (arch/x86/entry/entry_32.S:1047) 
[ 2.704538][ T1] ? devkmsg_open (kernel/printk/printk.c:596 kernel/printk/printk.c:615 kernel/printk/printk.c:919) 
[ 2.705173][ T1] ? exc_overflow (arch/x86/kernel/traps.c:252) 
[ 2.705798][ T1] ? refcount_report_saturation (lib/refcount.c:29 (discriminator 1)) 
[ 2.706578][ T1] ? exc_overflow (arch/x86/kernel/traps.c:252) 
[ 2.707215][ T1] ? refcount_report_saturation (lib/refcount.c:29 (discriminator 1)) 
[ 2.707994][ T1] refcount_warn_saturate (lib/refcount.c:40) 
[ 2.708693][ T1] __reset_page_owner (include/linux/refcount-impl.h:318 include/linux/refcount-impl.h:333 mm/page_owner.c:228 mm/page_owner.c:266) 
[ 2.709377][ T1] __free_pages_ok (include/linux/mmzone.h:1100 mm/page_alloc.c:1144 mm/page_alloc.c:1270) 
[ 2.710036][ T1] make_alloc_exact (mm/page_alloc.c:4828 (discriminator 1)) 
[ 2.710687][ T1] alloc_pages_exact (mm/page_alloc.c:4859) 
[ 2.711357][ T1] alloc_large_system_hash (mm/mm_init.c:2531) 
[ 2.712095][ T1] inet_hashinfo2_init (net/ipv4/inet_hashtables.c:1193 (discriminator 1)) 
[ 2.712770][ T1] tcp_init (net/ipv4/tcp.c:4714) 
[ 2.713348][ T1] inet_init (net/ipv4/af_inet.c:2032) 
[ 2.713949][ T1] ? ipv4_offload_init (net/ipv4/af_inet.c:1942) 
[ 2.714640][ T1] do_one_initcall (init/main.c:1238) 
[ 2.715294][ T1] ? rdinit_setup (init/main.c:1286) 
[ 2.715927][ T1] do_initcalls (init/main.c:1299 (discriminator 1) init/main.c:1316 (discriminator 1)) 
[ 2.716542][ T1] ? rest_init (init/main.c:1429) 
[ 2.717157][ T1] kernel_init_freeable (init/main.c:1550) 
[ 2.717852][ T1] kernel_init (init/main.c:1439) 
[ 2.718455][ T1] ret_from_fork (arch/x86/kernel/process.c:153) 
[ 2.719085][ T1] ? rest_init (init/main.c:1429) 
[ 2.719700][ T1] ret_from_fork_asm (arch/x86/entry/entry_32.S:737) 
[ 2.720348][ T1] entry_INT80_32 (arch/x86/entry/entry_32.S:944) 
[    2.720999][    T1] irq event stamp: 263893
[ 2.721599][ T1] hardirqs last enabled at (263903): console_unlock (arch/x86/include/asm/irqflags.h:26 arch/x86/include/asm/irqflags.h:67 arch/x86/include/asm/irqflags.h:127 kernel/printk/printk.c:341 kernel/printk/printk.c:2731 kernel/printk/printk.c:3050) 
[ 2.722782][ T1] hardirqs last disabled at (263912): console_unlock (kernel/printk/printk.c:339 (discriminator 3) kernel/printk/printk.c:2731 (discriminator 3) kernel/printk/printk.c:3050 (discriminator 3)) 
[ 2.723947][ T1] softirqs last enabled at (263164): release_sock (net/core/sock.c:3560) 
[ 2.725080][ T1] softirqs last disabled at (263162): release_sock (net/core/sock.c:3549) 
[    2.726210][    T1] ---[ end trace 0000000000000000 ]---



The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20240506/202405061514.23fedba1-oliver.sang@intel.com
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 7c121493f43d..2e6c8eaab194 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3360,7 +3360,7 @@  S:	Maintained
 F:	Documentation/atomic_*.txt
 F:	arch/*/include/asm/atomic*.h
 F:	include/*/atomic*.h
-F:	include/linux/refcount.h
+F:	include/linux/refcount*.h
 F:	scripts/atomic/
 
 ATTO EXPRESSSAS SAS/SATA RAID SCSI DRIVER
diff --git a/Makefile b/Makefile
index 4bef6323c47d..a4bdcd34f323 100644
--- a/Makefile
+++ b/Makefile
@@ -1190,7 +1190,9 @@  PHONY += prepare archprepare
 
 archprepare: outputmakefile archheaders archscripts scripts include/config/kernel.release \
 	asm-generic $(version_h) include/generated/utsrelease.h \
-	include/generated/compile.h include/generated/autoconf.h remove-stale-files
+	include/generated/compile.h include/generated/autoconf.h \
+	include/generated/refcount-long.h \
+	remove-stale-files
 
 prepare0: archprepare
 	$(Q)$(MAKE) $(build)=scripts/mod
@@ -1262,6 +1264,13 @@  filechk_compile.h = $(srctree)/scripts/mkcompile_h \
 include/generated/compile.h: FORCE
 	$(call filechk,compile.h)
 
+include/generated/refcount-long.h: $(srctree)/include/linux/refcount-impl.h
+	$(Q)$(PERL) -pe 's/\b(atomic|(__)?refcount)_/\1_long_/g;	\
+			 s/ATOMIC_/ATOMIC_LONG_/g;			\
+			 s/(REFCOUNT)_(IMPL|INIT|MAX|SAT)/\1_LONG_\2/g;	\
+			 s/\b(U?)INT_/\1LONG_/g;			\
+			 s/\bint\b/long/g;' $< >$@
+
 PHONY += headerdep
 headerdep:
 	$(Q)find $(srctree)/include/ -name '*.h' | xargs --max-args 1 \
diff --git a/include/linux/refcount-impl.h b/include/linux/refcount-impl.h
new file mode 100644
index 000000000000..f5c73a0f46a4
--- /dev/null
+++ b/include/linux/refcount-impl.h
@@ -0,0 +1,344 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Variant of atomic_t specialized for reference counts.
+ *
+ * The interface matches the atomic_t interface (to aid in porting) but only
+ * provides the few functions one should use for reference counting.
+ *
+ * Saturation semantics
+ * ====================
+ *
+ * refcount_t differs from atomic_t in that the counter saturates at
+ * REFCOUNT_SATURATED and will not move once there. This avoids wrapping the
+ * counter and causing 'spurious' use-after-free issues. In order to avoid the
+ * cost associated with introducing cmpxchg() loops into all of the saturating
+ * operations, we temporarily allow the counter to take on an unchecked value
+ * and then explicitly set it to REFCOUNT_SATURATED on detecting that underflow
+ * or overflow has occurred. Although this is racy when multiple threads
+ * access the refcount concurrently, by placing REFCOUNT_SATURATED roughly
+ * equidistant from 0 and INT_MAX we minimise the scope for error:
+ *
+ *	                           INT_MAX     REFCOUNT_SATURATED   UINT_MAX
+ *   0                          (0x7fff_ffff)    (0xc000_0000)    (0xffff_ffff)
+ *   +--------------------------------+----------------+----------------+
+ *                                     <---------- bad value! ---------->
+ *
+ * (in a signed view of the world, the "bad value" range corresponds to
+ * a negative counter value).
+ *
+ * As an example, consider a refcount_inc() operation that causes the counter
+ * to overflow:
+ *
+ *	int old = atomic_fetch_add_relaxed(r);
+ *	// old is INT_MAX, refcount now INT_MIN (0x8000_0000)
+ *	if (old < 0)
+ *		atomic_set(r, REFCOUNT_SATURATED);
+ *
+ * If another thread also performs a refcount_inc() operation between the two
+ * atomic operations, then the count will continue to edge closer to 0. If it
+ * reaches a value of 1 before /any/ of the threads reset it to the saturated
+ * value, then a concurrent refcount_dec_and_test() may erroneously free the
+ * underlying object.
+ * Linux limits the maximum number of tasks to PID_MAX_LIMIT, which is currently
+ * 0x400000 (and can't easily be raised in the future beyond FUTEX_TID_MASK).
+ * With the current PID limit, if no batched refcounting operations are used and
+ * the attacker can't repeatedly trigger kernel oopses in the middle of refcount
+ * operations, this makes it impossible for a saturated refcount to leave the
+ * saturation range, even if it is possible for multiple uses of the same
+ * refcount to nest in the context of a single task:
+ *
+ *     (UINT_MAX+1-REFCOUNT_SATURATED) / PID_MAX_LIMIT =
+ *     0x40000000 / 0x400000 = 0x100 = 256
+ *
+ * If hundreds of references are added/removed with a single refcounting
+ * operation, it may potentially be possible to leave the saturation range; but
+ * given the precise timing details involved with the round-robin scheduling of
+ * each thread manipulating the refcount and the need to hit the race multiple
+ * times in succession, there doesn't appear to be a practical avenue of attack
+ * even if using refcount_add() operations with larger increments.
+ *
+ * Memory ordering
+ * ===============
+ *
+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
+ * and provide only what is strictly required for refcounts.
+ *
+ * The increments are fully relaxed; these will not provide ordering. The
+ * rationale is that whatever is used to obtain the object we're increasing the
+ * reference count on will provide the ordering. For locked data structures,
+ * its the lock acquire, for RCU/lockless data structures its the dependent
+ * load.
+ *
+ * Do note that inc_not_zero() provides a control dependency which will order
+ * future stores against the inc, this ensures we'll never modify the object
+ * if we did not in fact acquire a reference.
+ *
+ * The decrements will provide release order, such that all the prior loads and
+ * stores will be issued before, it also provides a control dependency, which
+ * will order us against the subsequent free().
+ *
+ * The control dependency is against the load of the cmpxchg (ll/sc) that
+ * succeeded. This means the stores aren't fully ordered, but this is fine
+ * because the 1->0 transition indicates no concurrency.
+ *
+ * Note that the allocator is responsible for ordering things between free()
+ * and alloc().
+ *
+ * The decrements dec_and_test() and sub_and_test() also provide acquire
+ * ordering on success.
+ *
+ */
+#ifndef _LINUX_REFCOUNT_IMPL_H
+#define _LINUX_REFCOUNT_IMPL_H
+
+#define REFCOUNT_INIT(n)	{ .refs = ATOMIC_INIT(n), }
+#define REFCOUNT_MAX		INT_MAX
+#define REFCOUNT_SATURATED	(INT_MIN / 2)
+
+void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t);
+
+/**
+ * refcount_set - set a refcount's value
+ * @r: the refcount
+ * @n: value to which the refcount will be set
+ */
+static inline void refcount_set(refcount_t *r, int n)
+{
+	atomic_set(&r->refs, n);
+}
+
+/**
+ * refcount_read - get a refcount's value
+ * @r: the refcount
+ *
+ * Return: the refcount's value
+ */
+static inline unsigned int refcount_read(const refcount_t *r)
+{
+	return atomic_read(&r->refs);
+}
+
+static inline __must_check __signed_wrap
+bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
+{
+	int old = refcount_read(r);
+
+	do {
+		if (!old)
+			break;
+	} while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i));
+
+	if (oldp)
+		*oldp = old;
+
+	if (unlikely(old < 0 || old + i < 0))
+		refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF);
+
+	return old;
+}
+
+/**
+ * refcount_add_not_zero - add a value to a refcount unless it is 0
+ * @i: the value to add to the refcount
+ * @r: the refcount
+ *
+ * Will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time.  In these
+ * cases, refcount_inc(), or one of its variants, should instead be used to
+ * increment a reference count.
+ *
+ * Return: false if the passed refcount is 0, true otherwise
+ */
+static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
+{
+	return __refcount_add_not_zero(i, r, NULL);
+}
+
+static inline __signed_wrap
+void __refcount_add(int i, refcount_t *r, int *oldp)
+{
+	int old = atomic_fetch_add_relaxed(i, &r->refs);
+
+	if (oldp)
+		*oldp = old;
+
+	if (unlikely(!old))
+		refcount_warn_saturate(r, REFCOUNT_ADD_UAF);
+	else if (unlikely(old < 0 || old + i < 0))
+		refcount_warn_saturate(r, REFCOUNT_ADD_OVF);
+}
+
+/**
+ * refcount_add - add a value to a refcount
+ * @i: the value to add to the refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time.  In these
+ * cases, refcount_inc(), or one of its variants, should instead be used to
+ * increment a reference count.
+ */
+static inline void refcount_add(int i, refcount_t *r)
+{
+	__refcount_add(i, r, NULL);
+}
+
+static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp)
+{
+	return __refcount_add_not_zero(1, r, oldp);
+}
+
+/**
+ * refcount_inc_not_zero - increment a refcount unless it is 0
+ * @r: the refcount to increment
+ *
+ * Similar to atomic_inc_not_zero(), but will saturate at REFCOUNT_SATURATED
+ * and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ *
+ * Return: true if the increment was successful, false otherwise
+ */
+static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
+{
+	return __refcount_inc_not_zero(r, NULL);
+}
+
+static inline void __refcount_inc(refcount_t *r, int *oldp)
+{
+	__refcount_add(1, r, oldp);
+}
+
+/**
+ * refcount_inc - increment a refcount
+ * @r: the refcount to increment
+ *
+ * Similar to atomic_inc(), but will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller already has a
+ * reference on the object.
+ *
+ * Will WARN if the refcount is 0, as this represents a possible use-after-free
+ * condition.
+ */
+static inline void refcount_inc(refcount_t *r)
+{
+	__refcount_inc(r, NULL);
+}
+
+static inline __must_check __signed_wrap
+bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
+{
+	int old = atomic_fetch_sub_release(i, &r->refs);
+
+	if (oldp)
+		*oldp = old;
+
+	if (old == i) {
+		smp_acquire__after_ctrl_dep();
+		return true;
+	}
+
+	if (unlikely(old < 0 || old - i < 0))
+		refcount_warn_saturate(r, REFCOUNT_SUB_UAF);
+
+	return false;
+}
+
+/**
+ * refcount_sub_and_test - subtract from a refcount and test if it is 0
+ * @i: amount to subtract from the refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_dec_and_test(), but it will WARN, return false and
+ * ultimately leak on underflow and will fail to decrement when saturated
+ * at REFCOUNT_SATURATED.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides an acquire ordering on success such that free()
+ * must come after.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time.  In these
+ * cases, refcount_dec(), or one of its variants, should instead be used to
+ * decrement a reference count.
+ *
+ * Return: true if the resulting refcount is 0, false otherwise
+ */
+static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
+{
+	return __refcount_sub_and_test(i, r, NULL);
+}
+
+static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp)
+{
+	return __refcount_sub_and_test(1, r, oldp);
+}
+
+/**
+ * refcount_dec_and_test - decrement a refcount and test if it is 0
+ * @r: the refcount
+ *
+ * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
+ * decrement when saturated at REFCOUNT_SATURATED.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides an acquire ordering on success such that free()
+ * must come after.
+ *
+ * Return: true if the resulting refcount is 0, false otherwise
+ */
+static inline __must_check bool refcount_dec_and_test(refcount_t *r)
+{
+	return __refcount_dec_and_test(r, NULL);
+}
+
+static inline void __refcount_dec(refcount_t *r, int *oldp)
+{
+	int old = atomic_fetch_sub_release(1, &r->refs);
+
+	if (oldp)
+		*oldp = old;
+
+	if (unlikely(old <= 1))
+		refcount_warn_saturate(r, REFCOUNT_DEC_LEAK);
+}
+
+/**
+ * refcount_dec - decrement a refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
+ * when saturated at REFCOUNT_SATURATED.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before.
+ */
+static inline void refcount_dec(refcount_t *r)
+{
+	__refcount_dec(r, NULL);
+}
+
+extern __must_check bool refcount_dec_if_one(refcount_t *r);
+extern __must_check bool refcount_dec_not_one(refcount_t *r);
+extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) __cond_acquires(lock);
+extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) __cond_acquires(lock);
+extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r,
+						       spinlock_t *lock,
+						       unsigned long *flags) __cond_acquires(lock);
+
+#endif /* _LINUX_REFCOUNT_IMPL_H */
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 59b3b752394d..a744f814374f 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -1,94 +1,4 @@ 
 /* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Variant of atomic_t specialized for reference counts.
- *
- * The interface matches the atomic_t interface (to aid in porting) but only
- * provides the few functions one should use for reference counting.
- *
- * Saturation semantics
- * ====================
- *
- * refcount_t differs from atomic_t in that the counter saturates at
- * REFCOUNT_SATURATED and will not move once there. This avoids wrapping the
- * counter and causing 'spurious' use-after-free issues. In order to avoid the
- * cost associated with introducing cmpxchg() loops into all of the saturating
- * operations, we temporarily allow the counter to take on an unchecked value
- * and then explicitly set it to REFCOUNT_SATURATED on detecting that underflow
- * or overflow has occurred. Although this is racy when multiple threads
- * access the refcount concurrently, by placing REFCOUNT_SATURATED roughly
- * equidistant from 0 and INT_MAX we minimise the scope for error:
- *
- * 	                           INT_MAX     REFCOUNT_SATURATED   UINT_MAX
- *   0                          (0x7fff_ffff)    (0xc000_0000)    (0xffff_ffff)
- *   +--------------------------------+----------------+----------------+
- *                                     <---------- bad value! ---------->
- *
- * (in a signed view of the world, the "bad value" range corresponds to
- * a negative counter value).
- *
- * As an example, consider a refcount_inc() operation that causes the counter
- * to overflow:
- *
- * 	int old = atomic_fetch_add_relaxed(r);
- *	// old is INT_MAX, refcount now INT_MIN (0x8000_0000)
- *	if (old < 0)
- *		atomic_set(r, REFCOUNT_SATURATED);
- *
- * If another thread also performs a refcount_inc() operation between the two
- * atomic operations, then the count will continue to edge closer to 0. If it
- * reaches a value of 1 before /any/ of the threads reset it to the saturated
- * value, then a concurrent refcount_dec_and_test() may erroneously free the
- * underlying object.
- * Linux limits the maximum number of tasks to PID_MAX_LIMIT, which is currently
- * 0x400000 (and can't easily be raised in the future beyond FUTEX_TID_MASK).
- * With the current PID limit, if no batched refcounting operations are used and
- * the attacker can't repeatedly trigger kernel oopses in the middle of refcount
- * operations, this makes it impossible for a saturated refcount to leave the
- * saturation range, even if it is possible for multiple uses of the same
- * refcount to nest in the context of a single task:
- *
- *     (UINT_MAX+1-REFCOUNT_SATURATED) / PID_MAX_LIMIT =
- *     0x40000000 / 0x400000 = 0x100 = 256
- *
- * If hundreds of references are added/removed with a single refcounting
- * operation, it may potentially be possible to leave the saturation range; but
- * given the precise timing details involved with the round-robin scheduling of
- * each thread manipulating the refcount and the need to hit the race multiple
- * times in succession, there doesn't appear to be a practical avenue of attack
- * even if using refcount_add() operations with larger increments.
- *
- * Memory ordering
- * ===============
- *
- * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
- * and provide only what is strictly required for refcounts.
- *
- * The increments are fully relaxed; these will not provide ordering. The
- * rationale is that whatever is used to obtain the object we're increasing the
- * reference count on will provide the ordering. For locked data structures,
- * its the lock acquire, for RCU/lockless data structures its the dependent
- * load.
- *
- * Do note that inc_not_zero() provides a control dependency which will order
- * future stores against the inc, this ensures we'll never modify the object
- * if we did not in fact acquire a reference.
- *
- * The decrements will provide release order, such that all the prior loads and
- * stores will be issued before, it also provides a control dependency, which
- * will order us against the subsequent free().
- *
- * The control dependency is against the load of the cmpxchg (ll/sc) that
- * succeeded. This means the stores aren't fully ordered, but this is fine
- * because the 1->0 transition indicates no concurrency.
- *
- * Note that the allocator is responsible for ordering things between free()
- * and alloc().
- *
- * The decrements dec_and_test() and sub_and_test() also provide acquire
- * ordering on success.
- *
- */
-
 #ifndef _LINUX_REFCOUNT_H
 #define _LINUX_REFCOUNT_H
 
@@ -101,10 +11,6 @@ 
 
 struct mutex;
 
-#define REFCOUNT_INIT(n)	{ .refs = ATOMIC_INIT(n), }
-#define REFCOUNT_MAX		INT_MAX
-#define REFCOUNT_SATURATED	(INT_MIN / 2)
-
 enum refcount_saturation_type {
 	REFCOUNT_ADD_NOT_ZERO_OVF,
 	REFCOUNT_ADD_OVF,
@@ -113,249 +19,10 @@  enum refcount_saturation_type {
 	REFCOUNT_DEC_LEAK,
 };
 
-void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t);
-
-/**
- * refcount_set - set a refcount's value
- * @r: the refcount
- * @n: value to which the refcount will be set
- */
-static inline void refcount_set(refcount_t *r, int n)
-{
-	atomic_set(&r->refs, n);
-}
-
-/**
- * refcount_read - get a refcount's value
- * @r: the refcount
- *
- * Return: the refcount's value
- */
-static inline unsigned int refcount_read(const refcount_t *r)
-{
-	return atomic_read(&r->refs);
-}
-
-static inline __must_check __signed_wrap
-bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
-{
-	int old = refcount_read(r);
-
-	do {
-		if (!old)
-			break;
-	} while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i));
-
-	if (oldp)
-		*oldp = old;
-
-	if (unlikely(old < 0 || old + i < 0))
-		refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF);
-
-	return old;
-}
-
-/**
- * refcount_add_not_zero - add a value to a refcount unless it is 0
- * @i: the value to add to the refcount
- * @r: the refcount
- *
- * Will saturate at REFCOUNT_SATURATED and WARN.
- *
- * Provides no memory ordering, it is assumed the caller has guaranteed the
- * object memory to be stable (RCU, etc.). It does provide a control dependency
- * and thereby orders future stores. See the comment on top.
- *
- * Use of this function is not recommended for the normal reference counting
- * use case in which references are taken and released one at a time.  In these
- * cases, refcount_inc(), or one of its variants, should instead be used to
- * increment a reference count.
- *
- * Return: false if the passed refcount is 0, true otherwise
- */
-static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
-{
-	return __refcount_add_not_zero(i, r, NULL);
-}
-
-static inline __signed_wrap
-void __refcount_add(int i, refcount_t *r, int *oldp)
-{
-	int old = atomic_fetch_add_relaxed(i, &r->refs);
-
-	if (oldp)
-		*oldp = old;
-
-	if (unlikely(!old))
-		refcount_warn_saturate(r, REFCOUNT_ADD_UAF);
-	else if (unlikely(old < 0 || old + i < 0))
-		refcount_warn_saturate(r, REFCOUNT_ADD_OVF);
-}
-
-/**
- * refcount_add - add a value to a refcount
- * @i: the value to add to the refcount
- * @r: the refcount
- *
- * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN.
- *
- * Provides no memory ordering, it is assumed the caller has guaranteed the
- * object memory to be stable (RCU, etc.). It does provide a control dependency
- * and thereby orders future stores. See the comment on top.
- *
- * Use of this function is not recommended for the normal reference counting
- * use case in which references are taken and released one at a time.  In these
- * cases, refcount_inc(), or one of its variants, should instead be used to
- * increment a reference count.
- */
-static inline void refcount_add(int i, refcount_t *r)
-{
-	__refcount_add(i, r, NULL);
-}
-
-static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp)
-{
-	return __refcount_add_not_zero(1, r, oldp);
-}
-
-/**
- * refcount_inc_not_zero - increment a refcount unless it is 0
- * @r: the refcount to increment
- *
- * Similar to atomic_inc_not_zero(), but will saturate at REFCOUNT_SATURATED
- * and WARN.
- *
- * Provides no memory ordering, it is assumed the caller has guaranteed the
- * object memory to be stable (RCU, etc.). It does provide a control dependency
- * and thereby orders future stores. See the comment on top.
- *
- * Return: true if the increment was successful, false otherwise
- */
-static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
-{
-	return __refcount_inc_not_zero(r, NULL);
-}
-
-static inline void __refcount_inc(refcount_t *r, int *oldp)
-{
-	__refcount_add(1, r, oldp);
-}
-
-/**
- * refcount_inc - increment a refcount
- * @r: the refcount to increment
- *
- * Similar to atomic_inc(), but will saturate at REFCOUNT_SATURATED and WARN.
- *
- * Provides no memory ordering, it is assumed the caller already has a
- * reference on the object.
- *
- * Will WARN if the refcount is 0, as this represents a possible use-after-free
- * condition.
- */
-static inline void refcount_inc(refcount_t *r)
-{
-	__refcount_inc(r, NULL);
-}
-
-static inline __must_check __signed_wrap
-bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
-{
-	int old = atomic_fetch_sub_release(i, &r->refs);
-
-	if (oldp)
-		*oldp = old;
-
-	if (old == i) {
-		smp_acquire__after_ctrl_dep();
-		return true;
-	}
-
-	if (unlikely(old < 0 || old - i < 0))
-		refcount_warn_saturate(r, REFCOUNT_SUB_UAF);
-
-	return false;
-}
-
-/**
- * refcount_sub_and_test - subtract from a refcount and test if it is 0
- * @i: amount to subtract from the refcount
- * @r: the refcount
- *
- * Similar to atomic_dec_and_test(), but it will WARN, return false and
- * ultimately leak on underflow and will fail to decrement when saturated
- * at REFCOUNT_SATURATED.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides an acquire ordering on success such that free()
- * must come after.
- *
- * Use of this function is not recommended for the normal reference counting
- * use case in which references are taken and released one at a time.  In these
- * cases, refcount_dec(), or one of its variants, should instead be used to
- * decrement a reference count.
- *
- * Return: true if the resulting refcount is 0, false otherwise
- */
-static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
-{
-	return __refcount_sub_and_test(i, r, NULL);
-}
-
-static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp)
-{
-	return __refcount_sub_and_test(1, r, oldp);
-}
-
-/**
- * refcount_dec_and_test - decrement a refcount and test if it is 0
- * @r: the refcount
- *
- * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
- * decrement when saturated at REFCOUNT_SATURATED.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before, and provides an acquire ordering on success such that free()
- * must come after.
- *
- * Return: true if the resulting refcount is 0, false otherwise
- */
-static inline __must_check bool refcount_dec_and_test(refcount_t *r)
-{
-	return __refcount_dec_and_test(r, NULL);
-}
-
-static inline void __refcount_dec(refcount_t *r, int *oldp)
-{
-	int old = atomic_fetch_sub_release(1, &r->refs);
-
-	if (oldp)
-		*oldp = old;
-
-	if (unlikely(old <= 1))
-		refcount_warn_saturate(r, REFCOUNT_DEC_LEAK);
-}
+/* Make the generation of refcount_long_t easier. */
+#define refcount_long_saturation_type refcount_saturation_type
 
-/**
- * refcount_dec - decrement a refcount
- * @r: the refcount
- *
- * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
- * when saturated at REFCOUNT_SATURATED.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before.
- */
-static inline void refcount_dec(refcount_t *r)
-{
-	__refcount_dec(r, NULL);
-}
+#include <linux/refcount-impl.h>
+#include <generated/refcount-long.h>
 
-extern __must_check bool refcount_dec_if_one(refcount_t *r);
-extern __must_check bool refcount_dec_not_one(refcount_t *r);
-extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) __cond_acquires(lock);
-extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) __cond_acquires(lock);
-extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r,
-						       spinlock_t *lock,
-						       unsigned long *flags) __cond_acquires(lock);
 #endif /* _LINUX_REFCOUNT_H */
diff --git a/include/linux/refcount_types.h b/include/linux/refcount_types.h
index 162004f06edf..6ea02d6a9623 100644
--- a/include/linux/refcount_types.h
+++ b/include/linux/refcount_types.h
@@ -16,4 +16,16 @@  typedef struct refcount_struct {
 	atomic_t refs;
 } refcount_t;
 
+/**
+ * typedef refcount_long_t - variant of atomic64_t specialized for reference counts
+ * @refs: atomic_long_t counter field
+ *
+ * The counter saturates at REFCOUNT_LONG_SATURATED and will not move once
+ * there. This avoids wrapping the counter and causing 'spurious'
+ * use-after-free bugs.
+ */
+typedef struct refcount_long_struct {
+	atomic_long_t refs;
+} refcount_long_t;
+
 #endif /* _LINUX_REFCOUNT_TYPES_H */
diff --git a/lib/refcount.c b/lib/refcount.c
index a207a8f22b3c..201304b7d7a5 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -10,10 +10,8 @@ 
 
 #define REFCOUNT_WARN(str)	WARN_ONCE(1, "refcount_t: " str ".\n")
 
-void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t)
+static void refcount_report_saturation(enum refcount_saturation_type t)
 {
-	refcount_set(r, REFCOUNT_SATURATED);
-
 	switch (t) {
 	case REFCOUNT_ADD_NOT_ZERO_OVF:
 		REFCOUNT_WARN("saturated; leaking memory");
@@ -34,8 +32,21 @@  void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t)
 		REFCOUNT_WARN("unknown saturation event!?");
 	}
 }
+
+void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t)
+{
+	refcount_set(r, REFCOUNT_SATURATED);
+	refcount_report_saturation(t);
+}
 EXPORT_SYMBOL(refcount_warn_saturate);
 
+void refcount_long_warn_saturate(refcount_long_t *r, enum refcount_saturation_type t)
+{
+	refcount_long_set(r, REFCOUNT_LONG_SATURATED);
+	refcount_report_saturation(t);
+}
+EXPORT_SYMBOL(refcount_long_warn_saturate);
+
 /**
  * refcount_dec_if_one - decrement a refcount if it is 1
  * @r: the refcount