diff mbox series

[bpf-next,v8,3/6] locking/local_lock: Introduce localtry_lock_t

Message ID 20250213033556.9534-4-alexei.starovoitov@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series bpf, mm: Introduce try_alloc_pages() | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 553 this patch: 553
netdev/build_tools success Errors and warnings before: 26 (+1) this patch: 26 (+1)
netdev/cc_maintainers warning 3 maintainers not CCed: clrkwllms@kernel.org linux-rt-devel@lists.linux.dev kuba@kernel.org
netdev/build_clang success Errors and warnings before: 24112 this patch: 24112
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 15239 this patch: 15239
netdev/checkpatch warning WARNING: do not add new typedefs WARNING: line length of 83 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc fail Errors and warnings before: 1 this patch: 4
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-12 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-20 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-21 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-50 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-51 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-49 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-48 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18

Commit Message

Alexei Starovoitov Feb. 13, 2025, 3:35 a.m. UTC
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

In !PREEMPT_RT local_lock_irqsave() disables interrupts to protect
critical section, but it doesn't prevent NMI, so the fully reentrant
code cannot use local_lock_irqsave() for exclusive access.

Introduce localtry_lock_t and localtry_lock_irqsave() that
disables interrupts and sets acquired=1, so localtry_lock_irqsave()
from NMI attempting to acquire the same lock will return false.

In PREEMPT_RT local_lock_irqsave() maps to preemptible spin_lock().
Map localtry_lock_irqsave() to preemptible spin_trylock().
When in hard IRQ or NMI return false right away, since
spin_trylock() is not safe due to PI issues.

Note there is no need to use local_inc for acquired variable,
since it's a percpu variable with strict nesting scopes.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/local_lock.h          |  59 +++++++++++++
 include/linux/local_lock_internal.h | 123 ++++++++++++++++++++++++++++
 2 files changed, 182 insertions(+)

Comments

Vlastimil Babka Feb. 13, 2025, 3:03 p.m. UTC | #1
On 2/13/25 04:35, Alexei Starovoitov wrote:
> From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> 
> In !PREEMPT_RT local_lock_irqsave() disables interrupts to protect
> critical section, but it doesn't prevent NMI, so the fully reentrant
> code cannot use local_lock_irqsave() for exclusive access.
> 
> Introduce localtry_lock_t and localtry_lock_irqsave() that
> disables interrupts and sets acquired=1, so localtry_lock_irqsave()
> from NMI attempting to acquire the same lock will return false.
> 
> In PREEMPT_RT local_lock_irqsave() maps to preemptible spin_lock().
> Map localtry_lock_irqsave() to preemptible spin_trylock().
> When in hard IRQ or NMI return false right away, since
> spin_trylock() is not safe due to PI issues.
> 
> Note there is no need to use local_inc for acquired variable,
> since it's a percpu variable with strict nesting scopes.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>  include/linux/local_lock.h          |  59 +++++++++++++
>  include/linux/local_lock_internal.h | 123 ++++++++++++++++++++++++++++
>  2 files changed, 182 insertions(+)
> 
> diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
> index 091dc0b6bdfb..05c254a5d7d3 100644
> --- a/include/linux/local_lock.h
> +++ b/include/linux/local_lock.h
> @@ -51,6 +51,65 @@
>  #define local_unlock_irqrestore(lock, flags)			\
>  	__local_unlock_irqrestore(lock, flags)
>  
> +/**
> + * localtry_lock_init - Runtime initialize a lock instance
> + */
> +#define localtry_lock_init(lock)		__localtry_lock_init(lock)
> +
> +/**
> + * localtry_lock - Acquire a per CPU local lock
> + * @lock:	The lock variable
> + */
> +#define localtry_lock(lock)		__localtry_lock(lock)
> +
> +/**
> + * localtry_lock_irq - Acquire a per CPU local lock and disable interrupts
> + * @lock:	The lock variable
> + */
> +#define localtry_lock_irq(lock)		__localtry_lock_irq(lock)
> +
> +/**
> + * localtry_lock_irqsave - Acquire a per CPU local lock, save and disable
> + *			 interrupts
> + * @lock:	The lock variable
> + * @flags:	Storage for interrupt flags
> + */
> +#define localtry_lock_irqsave(lock, flags)				\
> +	__localtry_lock_irqsave(lock, flags)
> +
> +/**
> + * localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable
> + *			      interrupts if acquired
> + * @lock:	The lock variable
> + * @flags:	Storage for interrupt flags
> + *
> + * The function can be used in any context such as NMI or HARDIRQ. Due to
> + * locking constrains it will _always_ fail to acquire the lock on PREEMPT_RT.

The "always fail" applies only to the NMI and HARDIRQ contexts, right? It's
not entirely obvious so it sounds worse than it is.

> +
> +#define __localtry_trylock_irqsave(lock, flags)			\
> +	({							\
> +		int __locked;					\
> +								\
> +		typecheck(unsigned long, flags);		\
> +		flags = 0;					\
> +		if (in_nmi() | in_hardirq()) {			\
> +			__locked = 0;				\

Because of this, IIUC?

> +		} else {					\
> +			migrate_disable();			\
> +			__locked = spin_trylock(this_cpu_ptr((lock)));	\
> +			if (!__locked)				\
> +				migrate_enable();		\
> +		}						\
> +		__locked;					\
> +	})
> +
>  #endif /* CONFIG_PREEMPT_RT */
Alexei Starovoitov Feb. 13, 2025, 3:23 p.m. UTC | #2
On Thu, Feb 13, 2025 at 7:04 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 2/13/25 04:35, Alexei Starovoitov wrote:
> > From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> >
> > In !PREEMPT_RT local_lock_irqsave() disables interrupts to protect
> > critical section, but it doesn't prevent NMI, so the fully reentrant
> > code cannot use local_lock_irqsave() for exclusive access.
> >
> > Introduce localtry_lock_t and localtry_lock_irqsave() that
> > disables interrupts and sets acquired=1, so localtry_lock_irqsave()
> > from NMI attempting to acquire the same lock will return false.
> >
> > In PREEMPT_RT local_lock_irqsave() maps to preemptible spin_lock().
> > Map localtry_lock_irqsave() to preemptible spin_trylock().
> > When in hard IRQ or NMI return false right away, since
> > spin_trylock() is not safe due to PI issues.
> >
> > Note there is no need to use local_inc for acquired variable,
> > since it's a percpu variable with strict nesting scopes.
> >
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> > ---
> >  include/linux/local_lock.h          |  59 +++++++++++++
> >  include/linux/local_lock_internal.h | 123 ++++++++++++++++++++++++++++
> >  2 files changed, 182 insertions(+)
> >
> > diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
> > index 091dc0b6bdfb..05c254a5d7d3 100644
> > --- a/include/linux/local_lock.h
> > +++ b/include/linux/local_lock.h
> > @@ -51,6 +51,65 @@
> >  #define local_unlock_irqrestore(lock, flags)                 \
> >       __local_unlock_irqrestore(lock, flags)
> >
> > +/**
> > + * localtry_lock_init - Runtime initialize a lock instance
> > + */
> > +#define localtry_lock_init(lock)             __localtry_lock_init(lock)
> > +
> > +/**
> > + * localtry_lock - Acquire a per CPU local lock
> > + * @lock:    The lock variable
> > + */
> > +#define localtry_lock(lock)          __localtry_lock(lock)
> > +
> > +/**
> > + * localtry_lock_irq - Acquire a per CPU local lock and disable interrupts
> > + * @lock:    The lock variable
> > + */
> > +#define localtry_lock_irq(lock)              __localtry_lock_irq(lock)
> > +
> > +/**
> > + * localtry_lock_irqsave - Acquire a per CPU local lock, save and disable
> > + *                    interrupts
> > + * @lock:    The lock variable
> > + * @flags:   Storage for interrupt flags
> > + */
> > +#define localtry_lock_irqsave(lock, flags)                           \
> > +     __localtry_lock_irqsave(lock, flags)
> > +
> > +/**
> > + * localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable
> > + *                         interrupts if acquired
> > + * @lock:    The lock variable
> > + * @flags:   Storage for interrupt flags
> > + *
> > + * The function can be used in any context such as NMI or HARDIRQ. Due to
> > + * locking constrains it will _always_ fail to acquire the lock on PREEMPT_RT.
>
> The "always fail" applies only to the NMI and HARDIRQ contexts, right? It's
> not entirely obvious so it sounds worse than it is.
>
> > +
> > +#define __localtry_trylock_irqsave(lock, flags)                      \
> > +     ({                                                      \
> > +             int __locked;                                   \
> > +                                                             \
> > +             typecheck(unsigned long, flags);                \
> > +             flags = 0;                                      \
> > +             if (in_nmi() | in_hardirq()) {                  \
> > +                     __locked = 0;                           \
>
> Because of this, IIUC?

Right.
It's part of commit log:
+ In PREEMPT_RT local_lock_irqsave() maps to preemptible spin_lock().
+ Map localtry_lock_irqsave() to preemptible spin_trylock().
+ When in hard IRQ or NMI return false right away, since
+ spin_trylock() is not safe due to PI issues.

Steven explained it in detail in some earlier thread.

realtime is hard. bpf and realtime together are even harder.
Things got much better over the years, but plenty of work ahead.
I can go in detail, but offtopic for this thread.
Steven Rostedt Feb. 13, 2025, 3:28 p.m. UTC | #3
On Thu, 13 Feb 2025 07:23:01 -0800
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> realtime is hard. bpf and realtime together are even harder.

Going for an LWN Quote-of-the-week ? ;-)

-- Steve
Vlastimil Babka Feb. 14, 2025, 12:11 p.m. UTC | #4
On 2/13/25 04:35, Alexei Starovoitov wrote:
> From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> 
> In !PREEMPT_RT local_lock_irqsave() disables interrupts to protect
> critical section, but it doesn't prevent NMI, so the fully reentrant
> code cannot use local_lock_irqsave() for exclusive access.
> 
> Introduce localtry_lock_t and localtry_lock_irqsave() that
> disables interrupts and sets acquired=1, so localtry_lock_irqsave()
> from NMI attempting to acquire the same lock will return false.
> 
> In PREEMPT_RT local_lock_irqsave() maps to preemptible spin_lock().
> Map localtry_lock_irqsave() to preemptible spin_trylock().
> When in hard IRQ or NMI return false right away, since
> spin_trylock() is not safe due to PI issues.
> 
> Note there is no need to use local_inc for acquired variable,
> since it's a percpu variable with strict nesting scopes.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>

...

>  
> +/* localtry_lock_t variants */
> +
> +#define __localtry_lock_init(lock)				\
> +do {								\
> +	__local_lock_init(&(lock)->llock);			\
> +	WRITE_ONCE(&(lock)->acquired, 0);			\

This needs to be WRITE_ONCE((lock)->acquired, 0);

I'm adopting this implementation for my next slab sheaves RFC. But I'll want
localtry_trylock() without _irqsave too, so I've added it locally. Posting
below with the init fix and making the PREEMPT_RT comment clear. Feel free
to fold everything, it would make it easier for me. Or just the fixes, if
you don't want code you don't use yourself.

----8<----
From c4f47afa3d06367d8d54662d6c3a76d3ab6e349d Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 13 Feb 2025 19:38:31 +0100
Subject: [PATCH] locking/local_lock: add localtry_trylock()

Add a localtry_trylock() variant without _irqsave that will be used in
slab sheaves implementation. Thanks to only disabling preemption and not
irqs, it has a lower overhead. It's not necessary to disable irqs to
avoid a deadlock if the irq context uses trylock and can handle
failures.

Also make the comment of localtry_trylock_irqsave() more clear, and fix a
compilation failure in localtry_lock_init().

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/local_lock.h          | 13 +++++++++++-
 include/linux/local_lock_internal.h | 31 +++++++++++++++++++++++++----
 2 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
index 05c254a5d7d3..1a0bc35839e3 100644
--- a/include/linux/local_lock.h
+++ b/include/linux/local_lock.h
@@ -77,6 +77,16 @@
 #define localtry_lock_irqsave(lock, flags)				\
 	__localtry_lock_irqsave(lock, flags)
 
+/**
+ * localtry_trylock - Try to acquire a per CPU local lock.
+ * @lock:	The lock variable
+ *
+ * The function can be used in any context such as NMI or HARDIRQ. Due to
+ * locking constrains it will _always_ fail to acquire the lock in NMI or
+ * HARDIRQ context on PREEMPT_RT.
+ */
+#define localtry_trylock(lock)		__localtry_trylock(lock)
+
 /**
  * localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable
  *			      interrupts if acquired
@@ -84,7 +94,8 @@
  * @flags:	Storage for interrupt flags
  *
  * The function can be used in any context such as NMI or HARDIRQ. Due to
- * locking constrains it will _always_ fail to acquire the lock on PREEMPT_RT.
+ * locking constrains it will _always_ fail to acquire the lock in NMI or
+ * HARDIRQ context on PREEMPT_RT.
  */
 #define localtry_trylock_irqsave(lock, flags)				\
 	__localtry_trylock_irqsave(lock, flags)
diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
index c1369b300777..67bd13d142fa 100644
--- a/include/linux/local_lock_internal.h
+++ b/include/linux/local_lock_internal.h
@@ -137,7 +137,7 @@ do {								\
 #define __localtry_lock_init(lock)				\
 do {								\
 	__local_lock_init(&(lock)->llock);			\
-	WRITE_ONCE(&(lock)->acquired, 0);			\
+	WRITE_ONCE((lock)->acquired, 0);			\
 } while (0)
 
 #define __localtry_lock(lock)					\
@@ -167,6 +167,24 @@ do {								\
 		WRITE_ONCE(lt->acquired, 1);			\
 	} while (0)
 
+#define __localtry_trylock(lock)				\
+	({							\
+		localtry_lock_t *lt;				\
+		bool _ret;					\
+								\
+		preempt_disable();				\
+		lt = this_cpu_ptr(lock);			\
+		if (!READ_ONCE(lt->acquired)) {			\
+			WRITE_ONCE(lt->acquired, 1);		\
+			local_trylock_acquire(&lt->llock);	\
+			_ret = true;				\
+		} else {					\
+			_ret = false;				\
+			preempt_enable();			\
+		}						\
+		_ret;						\
+	})
+
 #define __localtry_trylock_irqsave(lock, flags)			\
 	({							\
 		localtry_lock_t *lt;				\
@@ -275,12 +293,10 @@ do {								\
 #define __localtry_unlock_irq(lock)			__local_unlock(lock)
 #define __localtry_unlock_irqrestore(lock, flags)	__local_unlock_irqrestore(lock, flags)
 
-#define __localtry_trylock_irqsave(lock, flags)			\
+#define __localtry_trylock(lock)				\
 	({							\
 		int __locked;					\
 								\
-		typecheck(unsigned long, flags);		\
-		flags = 0;					\
 		if (in_nmi() | in_hardirq()) {			\
 			__locked = 0;				\
 		} else {					\
@@ -292,4 +308,11 @@ do {								\
 		__locked;					\
 	})
 
+#define __localtry_trylock_irqsave(lock, flags)			\
+	({							\
+		typecheck(unsigned long, flags);		\
+		flags = 0;					\
+		__localtry_trylock(lock);			\
+	})
+
 #endif /* CONFIG_PREEMPT_RT */
Vlastimil Babka Feb. 14, 2025, 12:15 p.m. UTC | #5
On 2/13/25 16:23, Alexei Starovoitov wrote:
> On Thu, Feb 13, 2025 at 7:04 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>>
>>
>> > +
>> > +#define __localtry_trylock_irqsave(lock, flags)                      \
>> > +     ({                                                      \
>> > +             int __locked;                                   \
>> > +                                                             \
>> > +             typecheck(unsigned long, flags);                \
>> > +             flags = 0;                                      \
>> > +             if (in_nmi() | in_hardirq()) {                  \
>> > +                     __locked = 0;                           \
>>
>> Because of this, IIUC?
> 
> Right.
> It's part of commit log:
> + In PREEMPT_RT local_lock_irqsave() maps to preemptible spin_lock().
> + Map localtry_lock_irqsave() to preemptible spin_trylock().
> + When in hard IRQ or NMI return false right away, since
> + spin_trylock() is not safe due to PI issues.
> 
> Steven explained it in detail in some earlier thread.
> 
> realtime is hard. bpf and realtime together are even harder.
> Things got much better over the years, but plenty of work ahead.
> I can go in detail, but offtopic for this thread.

Thanks, it's fine. Just that the comment of the function could be more clear
then, so people don't have to check implementation/commit log/lore
discussions :)
Alexei Starovoitov Feb. 14, 2025, 6:32 p.m. UTC | #6
On Fri, Feb 14, 2025 at 4:11 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 2/13/25 04:35, Alexei Starovoitov wrote:
> > From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> >
> > In !PREEMPT_RT local_lock_irqsave() disables interrupts to protect
> > critical section, but it doesn't prevent NMI, so the fully reentrant
> > code cannot use local_lock_irqsave() for exclusive access.
> >
> > Introduce localtry_lock_t and localtry_lock_irqsave() that
> > disables interrupts and sets acquired=1, so localtry_lock_irqsave()
> > from NMI attempting to acquire the same lock will return false.
> >
> > In PREEMPT_RT local_lock_irqsave() maps to preemptible spin_lock().
> > Map localtry_lock_irqsave() to preemptible spin_trylock().
> > When in hard IRQ or NMI return false right away, since
> > spin_trylock() is not safe due to PI issues.
> >
> > Note there is no need to use local_inc for acquired variable,
> > since it's a percpu variable with strict nesting scopes.
> >
> > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > Signed-off-by: Alexei Starovoitov <ast@kernel.org>
>
> ...
>
> >
> > +/* localtry_lock_t variants */
> > +
> > +#define __localtry_lock_init(lock)                           \
> > +do {                                                         \
> > +     __local_lock_init(&(lock)->llock);                      \
> > +     WRITE_ONCE(&(lock)->acquired, 0);                       \
>
> This needs to be WRITE_ONCE((lock)->acquired, 0);

Thanks. Good catch.

> I'm adopting this implementation for my next slab sheaves RFC. But I'll want
> localtry_trylock() without _irqsave too, so I've added it locally. Posting
> below with the init fix and making the PREEMPT_RT comment clear. Feel free
> to fold everything, it would make it easier for me. Or just the fixes, if
> you don't want code you don't use yourself.

+1

> ----8<----
> From c4f47afa3d06367d8d54662d6c3a76d3ab6e349d Mon Sep 17 00:00:00 2001
> From: Vlastimil Babka <vbabka@suse.cz>
> Date: Thu, 13 Feb 2025 19:38:31 +0100
> Subject: [PATCH] locking/local_lock: add localtry_trylock()
>
> Add a localtry_trylock() variant without _irqsave that will be used in
> slab sheaves implementation. Thanks to only disabling preemption and not
> irqs, it has a lower overhead. It's not necessary to disable irqs to
> avoid a deadlock if the irq context uses trylock and can handle
> failures.
>
> Also make the comment of localtry_trylock_irqsave() more clear, and fix a
> compilation failure in localtry_lock_init().
>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  include/linux/local_lock.h          | 13 +++++++++++-
>  include/linux/local_lock_internal.h | 31 +++++++++++++++++++++++++----
>  2 files changed, 39 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
> index 05c254a5d7d3..1a0bc35839e3 100644
> --- a/include/linux/local_lock.h
> +++ b/include/linux/local_lock.h
> @@ -77,6 +77,16 @@
>  #define localtry_lock_irqsave(lock, flags)                             \
>         __localtry_lock_irqsave(lock, flags)
>
> +/**
> + * localtry_trylock - Try to acquire a per CPU local lock.
> + * @lock:      The lock variable
> + *
> + * The function can be used in any context such as NMI or HARDIRQ. Due to
> + * locking constrains it will _always_ fail to acquire the lock in NMI or
> + * HARDIRQ context on PREEMPT_RT.
> + */
> +#define localtry_trylock(lock)         __localtry_trylock(lock)
> +
>  /**
>   * localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable
>   *                           interrupts if acquired
> @@ -84,7 +94,8 @@
>   * @flags:     Storage for interrupt flags
>   *
>   * The function can be used in any context such as NMI or HARDIRQ. Due to
> - * locking constrains it will _always_ fail to acquire the lock on PREEMPT_RT.
> + * locking constrains it will _always_ fail to acquire the lock in NMI or
> + * HARDIRQ context on PREEMPT_RT.

+1 as well.

>   */
>  #define localtry_trylock_irqsave(lock, flags)                          \
>         __localtry_trylock_irqsave(lock, flags)
> diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
> index c1369b300777..67bd13d142fa 100644
> --- a/include/linux/local_lock_internal.h
> +++ b/include/linux/local_lock_internal.h
> @@ -137,7 +137,7 @@ do {                                                                \
>  #define __localtry_lock_init(lock)                             \
>  do {                                                           \
>         __local_lock_init(&(lock)->llock);                      \
> -       WRITE_ONCE(&(lock)->acquired, 0);                       \
> +       WRITE_ONCE((lock)->acquired, 0);                        \
>  } while (0)
>
>  #define __localtry_lock(lock)                                  \
> @@ -167,6 +167,24 @@ do {                                                               \
>                 WRITE_ONCE(lt->acquired, 1);                    \
>         } while (0)
>
> +#define __localtry_trylock(lock)                               \
> +       ({                                                      \
> +               localtry_lock_t *lt;                            \
> +               bool _ret;                                      \
> +                                                               \
> +               preempt_disable();                              \
> +               lt = this_cpu_ptr(lock);                        \
> +               if (!READ_ONCE(lt->acquired)) {                 \
> +                       WRITE_ONCE(lt->acquired, 1);            \
> +                       local_trylock_acquire(&lt->llock);      \
> +                       _ret = true;                            \
> +               } else {                                        \
> +                       _ret = false;                           \
> +                       preempt_enable();                       \
> +               }                                               \
> +               _ret;                                           \
> +       })
> +
>  #define __localtry_trylock_irqsave(lock, flags)                        \
>         ({                                                      \
>                 localtry_lock_t *lt;                            \
> @@ -275,12 +293,10 @@ do {                                                              \
>  #define __localtry_unlock_irq(lock)                    __local_unlock(lock)
>  #define __localtry_unlock_irqrestore(lock, flags)      __local_unlock_irqrestore(lock, flags)
>
> -#define __localtry_trylock_irqsave(lock, flags)                        \
> +#define __localtry_trylock(lock)                               \
>         ({                                                      \
>                 int __locked;                                   \
>                                                                 \
> -               typecheck(unsigned long, flags);                \
> -               flags = 0;                                      \
>                 if (in_nmi() | in_hardirq()) {                  \
>                         __locked = 0;                           \
>                 } else {                                        \
> @@ -292,4 +308,11 @@ do {                                                               \
>                 __locked;                                       \
>         })
>
> +#define __localtry_trylock_irqsave(lock, flags)                        \
> +       ({                                                      \
> +               typecheck(unsigned long, flags);                \
> +               flags = 0;                                      \
> +               __localtry_trylock(lock);                       \
> +       })
> +

All makes sense to me.

Since respin is needed, I can fold the above fix/feature and
push it into a branch with stable sha-s that we both can
use as a base ?

Or you can push just this one patch into a stable branch and I can pull it
and apply the rest on top.
Vlastimil Babka Feb. 14, 2025, 6:48 p.m. UTC | #7
On 2/14/25 19:32, Alexei Starovoitov wrote:
>>         ({                                                      \
>>                 localtry_lock_t *lt;                            \
>> @@ -275,12 +293,10 @@ do {                                                              \
>>  #define __localtry_unlock_irq(lock)                    __local_unlock(lock)
>>  #define __localtry_unlock_irqrestore(lock, flags)      __local_unlock_irqrestore(lock, flags)
>>
>> -#define __localtry_trylock_irqsave(lock, flags)                        \
>> +#define __localtry_trylock(lock)                               \
>>         ({                                                      \
>>                 int __locked;                                   \
>>                                                                 \
>> -               typecheck(unsigned long, flags);                \
>> -               flags = 0;                                      \
>>                 if (in_nmi() | in_hardirq()) {                  \
>>                         __locked = 0;                           \
>>                 } else {                                        \
>> @@ -292,4 +308,11 @@ do {                                                               \
>>                 __locked;                                       \
>>         })
>>
>> +#define __localtry_trylock_irqsave(lock, flags)                        \
>> +       ({                                                      \
>> +               typecheck(unsigned long, flags);                \
>> +               flags = 0;                                      \
>> +               __localtry_trylock(lock);                       \
>> +       })
>> +
> 
> All makes sense to me.
> 
> Since respin is needed, I can fold the above fix/feature and
> push it into a branch with stable sha-s that we both can
> use as a base ?

I doubt sheaves will be included in 6.15 so it's fine enough for me if you
fold this and perhaps order the result as patch 1?

> Or you can push just this one patch into a stable branch and I can pull it
> and apply the rest on top.

Ideally we'd have PeterZ blessing before we get to stable commit id's...

Thanks.
Sebastian Andrzej Siewior Feb. 17, 2025, 3:17 p.m. UTC | #8
On 2025-02-14 19:48:57 [+0100], Vlastimil Babka wrote:
> > Since respin is needed, I can fold the above fix/feature and
> > push it into a branch with stable sha-s that we both can
> > use as a base ?
> 
> I doubt sheaves will be included in 6.15 so it's fine enough for me if you
> fold this and perhaps order the result as patch 1?
> 
> > Or you can push just this one patch into a stable branch and I can pull it
> > and apply the rest on top.
> 
> Ideally we'd have PeterZ blessing before we get to stable commit id's...

Yes. As noted in the other thread, I'm all fine with the propose
changes.

> Thanks.

Sebastian
Vlastimil Babka Feb. 18, 2025, 3:17 p.m. UTC | #9
On 2/13/25 04:35, Alexei Starovoitov wrote:
> From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> 
> In !PREEMPT_RT local_lock_irqsave() disables interrupts to protect
> critical section, but it doesn't prevent NMI, so the fully reentrant
> code cannot use local_lock_irqsave() for exclusive access.
> 
> Introduce localtry_lock_t and localtry_lock_irqsave() that
> disables interrupts and sets acquired=1, so localtry_lock_irqsave()
> from NMI attempting to acquire the same lock will return false.
> 
> In PREEMPT_RT local_lock_irqsave() maps to preemptible spin_lock().
> Map localtry_lock_irqsave() to preemptible spin_trylock().
> When in hard IRQ or NMI return false right away, since
> spin_trylock() is not safe due to PI issues.
> 
> Note there is no need to use local_inc for acquired variable,
> since it's a percpu variable with strict nesting scopes.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>

I'm not the maintainer of this area, but with the fixes/addition I proposed,
and having use for this lock variant myself, I think it's fair to add, fwiw:

Acked-by: Vlastimil Babka <vbabka@suse.cz>

Thanks.
diff mbox series

Patch

diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h
index 091dc0b6bdfb..05c254a5d7d3 100644
--- a/include/linux/local_lock.h
+++ b/include/linux/local_lock.h
@@ -51,6 +51,65 @@ 
 #define local_unlock_irqrestore(lock, flags)			\
 	__local_unlock_irqrestore(lock, flags)
 
+/**
+ * localtry_lock_init - Runtime initialize a lock instance
+ */
+#define localtry_lock_init(lock)		__localtry_lock_init(lock)
+
+/**
+ * localtry_lock - Acquire a per CPU local lock
+ * @lock:	The lock variable
+ */
+#define localtry_lock(lock)		__localtry_lock(lock)
+
+/**
+ * localtry_lock_irq - Acquire a per CPU local lock and disable interrupts
+ * @lock:	The lock variable
+ */
+#define localtry_lock_irq(lock)		__localtry_lock_irq(lock)
+
+/**
+ * localtry_lock_irqsave - Acquire a per CPU local lock, save and disable
+ *			 interrupts
+ * @lock:	The lock variable
+ * @flags:	Storage for interrupt flags
+ */
+#define localtry_lock_irqsave(lock, flags)				\
+	__localtry_lock_irqsave(lock, flags)
+
+/**
+ * localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable
+ *			      interrupts if acquired
+ * @lock:	The lock variable
+ * @flags:	Storage for interrupt flags
+ *
+ * The function can be used in any context such as NMI or HARDIRQ. Due to
+ * locking constrains it will _always_ fail to acquire the lock on PREEMPT_RT.
+ */
+#define localtry_trylock_irqsave(lock, flags)				\
+	__localtry_trylock_irqsave(lock, flags)
+
+/**
+ * local_unlock - Release a per CPU local lock
+ * @lock:	The lock variable
+ */
+#define localtry_unlock(lock)		__localtry_unlock(lock)
+
+/**
+ * local_unlock_irq - Release a per CPU local lock and enable interrupts
+ * @lock:	The lock variable
+ */
+#define localtry_unlock_irq(lock)		__localtry_unlock_irq(lock)
+
+/**
+ * localtry_unlock_irqrestore - Release a per CPU local lock and restore
+ *			      interrupt flags
+ * @lock:	The lock variable
+ * @flags:      Interrupt flags to restore
+ */
+#define localtry_unlock_irqrestore(lock, flags)			\
+	__localtry_unlock_irqrestore(lock, flags)
+
 DEFINE_GUARD(local_lock, local_lock_t __percpu*,
 	     local_lock(_T),
 	     local_unlock(_T))
diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h
index 8dd71fbbb6d2..c1369b300777 100644
--- a/include/linux/local_lock_internal.h
+++ b/include/linux/local_lock_internal.h
@@ -15,6 +15,11 @@  typedef struct {
 #endif
 } local_lock_t;
 
+typedef struct {
+	local_lock_t	llock;
+	unsigned int	acquired;
+} localtry_lock_t;
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define LOCAL_LOCK_DEBUG_INIT(lockname)		\
 	.dep_map = {					\
@@ -31,6 +36,13 @@  static inline void local_lock_acquire(local_lock_t *l)
 	l->owner = current;
 }
 
+static inline void local_trylock_acquire(local_lock_t *l)
+{
+	lock_map_acquire_try(&l->dep_map);
+	DEBUG_LOCKS_WARN_ON(l->owner);
+	l->owner = current;
+}
+
 static inline void local_lock_release(local_lock_t *l)
 {
 	DEBUG_LOCKS_WARN_ON(l->owner != current);
@@ -45,11 +57,13 @@  static inline void local_lock_debug_init(local_lock_t *l)
 #else /* CONFIG_DEBUG_LOCK_ALLOC */
 # define LOCAL_LOCK_DEBUG_INIT(lockname)
 static inline void local_lock_acquire(local_lock_t *l) { }
+static inline void local_trylock_acquire(local_lock_t *l) { }
 static inline void local_lock_release(local_lock_t *l) { }
 static inline void local_lock_debug_init(local_lock_t *l) { }
 #endif /* !CONFIG_DEBUG_LOCK_ALLOC */
 
 #define INIT_LOCAL_LOCK(lockname)	{ LOCAL_LOCK_DEBUG_INIT(lockname) }
+#define INIT_LOCALTRY_LOCK(lockname)	{ .llock = { LOCAL_LOCK_DEBUG_INIT(lockname.llock) }}
 
 #define __local_lock_init(lock)					\
 do {								\
@@ -118,6 +132,86 @@  do {								\
 #define __local_unlock_nested_bh(lock)				\
 	local_lock_release(this_cpu_ptr(lock))
 
+/* localtry_lock_t variants */
+
+#define __localtry_lock_init(lock)				\
+do {								\
+	__local_lock_init(&(lock)->llock);			\
+	WRITE_ONCE(&(lock)->acquired, 0);			\
+} while (0)
+
+#define __localtry_lock(lock)					\
+	do {							\
+		localtry_lock_t *lt;				\
+		preempt_disable();				\
+		lt = this_cpu_ptr(lock);			\
+		local_lock_acquire(&lt->llock);			\
+		WRITE_ONCE(lt->acquired, 1);			\
+	} while (0)
+
+#define __localtry_lock_irq(lock)				\
+	do {							\
+		localtry_lock_t *lt;				\
+		local_irq_disable();				\
+		lt = this_cpu_ptr(lock);			\
+		local_lock_acquire(&lt->llock);			\
+		WRITE_ONCE(lt->acquired, 1);			\
+	} while (0)
+
+#define __localtry_lock_irqsave(lock, flags)			\
+	do {							\
+		localtry_lock_t *lt;				\
+		local_irq_save(flags);				\
+		lt = this_cpu_ptr(lock);			\
+		local_lock_acquire(&lt->llock);			\
+		WRITE_ONCE(lt->acquired, 1);			\
+	} while (0)
+
+#define __localtry_trylock_irqsave(lock, flags)			\
+	({							\
+		localtry_lock_t *lt;				\
+		bool _ret;					\
+								\
+		local_irq_save(flags);				\
+		lt = this_cpu_ptr(lock);			\
+		if (!READ_ONCE(lt->acquired)) {			\
+			WRITE_ONCE(lt->acquired, 1);		\
+			local_trylock_acquire(&lt->llock);	\
+			_ret = true;				\
+		} else {					\
+			_ret = false;				\
+			local_irq_restore(flags);		\
+		}						\
+		_ret;						\
+	})
+
+#define __localtry_unlock(lock)					\
+	do {							\
+		localtry_lock_t *lt;				\
+		lt = this_cpu_ptr(lock);			\
+		WRITE_ONCE(lt->acquired, 0);			\
+		local_lock_release(&lt->llock);			\
+		preempt_enable();				\
+	} while (0)
+
+#define __localtry_unlock_irq(lock)				\
+	do {							\
+		localtry_lock_t *lt;				\
+		lt = this_cpu_ptr(lock);			\
+		WRITE_ONCE(lt->acquired, 0);			\
+		local_lock_release(&lt->llock);			\
+		local_irq_enable();				\
+	} while (0)
+
+#define __localtry_unlock_irqrestore(lock, flags)		\
+	do {							\
+		localtry_lock_t *lt;				\
+		lt = this_cpu_ptr(lock);			\
+		WRITE_ONCE(lt->acquired, 0);			\
+		local_lock_release(&lt->llock);			\
+		local_irq_restore(flags);			\
+	} while (0)
+
 #else /* !CONFIG_PREEMPT_RT */
 
 /*
@@ -125,8 +219,10 @@  do {								\
  * critical section while staying preemptible.
  */
 typedef spinlock_t local_lock_t;
+typedef spinlock_t localtry_lock_t;
 
 #define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname))
+#define INIT_LOCALTRY_LOCK(lockname) INIT_LOCAL_LOCK(lockname)
 
 #define __local_lock_init(l)					\
 	do {							\
@@ -169,4 +265,31 @@  do {								\
 	spin_unlock(this_cpu_ptr((lock)));			\
 } while (0)
 
+/* localtry_lock_t variants */
+
+#define __localtry_lock_init(lock)			__local_lock_init(lock)
+#define __localtry_lock(lock)				__local_lock(lock)
+#define __localtry_lock_irq(lock)			__local_lock(lock)
+#define __localtry_lock_irqsave(lock, flags)		__local_lock_irqsave(lock, flags)
+#define __localtry_unlock(lock)				__local_unlock(lock)
+#define __localtry_unlock_irq(lock)			__local_unlock(lock)
+#define __localtry_unlock_irqrestore(lock, flags)	__local_unlock_irqrestore(lock, flags)
+
+#define __localtry_trylock_irqsave(lock, flags)			\
+	({							\
+		int __locked;					\
+								\
+		typecheck(unsigned long, flags);		\
+		flags = 0;					\
+		if (in_nmi() | in_hardirq()) {			\
+			__locked = 0;				\
+		} else {					\
+			migrate_disable();			\
+			__locked = spin_trylock(this_cpu_ptr((lock)));	\
+			if (!__locked)				\
+				migrate_enable();		\
+		}						\
+		__locked;					\
+	})
+
 #endif /* CONFIG_PREEMPT_RT */