Message ID | 20241210023936.46871-4-alexei.starovoitov@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | bpf, mm: Introduce __GFP_TRYLOCK | expand |
On 12/10/24 03:39, Alexei Starovoitov wrote: > From: Alexei Starovoitov <ast@kernel.org> > > Similar to local_lock_irqsave() introduce local_trylock_irqsave(). > It uses spin_trylock in PREEMPT_RT and always succeeds when !RT. Hmm but is that correct to always succeed? If we're in an nmi, we might be preempting an existing local_(try)lock_irqsave() critical section because disabling irqs doesn't stop NMI's, right? > Signed-off-by: Alexei Starovoitov <ast@kernel.org> > --- > include/linux/local_lock.h | 9 +++++++++ > include/linux/local_lock_internal.h | 23 +++++++++++++++++++++++ > 2 files changed, 32 insertions(+) > > diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h > index 091dc0b6bdfb..6880c29b8b98 100644 > --- a/include/linux/local_lock.h > +++ b/include/linux/local_lock.h > @@ -30,6 +30,15 @@ > #define local_lock_irqsave(lock, flags) \ > __local_lock_irqsave(lock, flags) > > +/** > + * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable > + * interrupts. Always succeeds in !PREEMPT_RT. > + * @lock: The lock variable > + * @flags: Storage for interrupt flags > + */ > +#define local_trylock_irqsave(lock, flags) \ > + __local_trylock_irqsave(lock, flags) > + > /** > * local_unlock - Release a per CPU local lock > * @lock: The lock variable > diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h > index 8dd71fbbb6d2..2c0f8a49c2d0 100644 > --- a/include/linux/local_lock_internal.h > +++ b/include/linux/local_lock_internal.h > @@ -31,6 +31,13 @@ static inline void local_lock_acquire(local_lock_t *l) > l->owner = current; > } > > +static inline void local_trylock_acquire(local_lock_t *l) > +{ > + lock_map_acquire_try(&l->dep_map); > + DEBUG_LOCKS_WARN_ON(l->owner); > + l->owner = current; > +} > + > static inline void local_lock_release(local_lock_t *l) > { > DEBUG_LOCKS_WARN_ON(l->owner != current); > @@ -45,6 +52,7 @@ static inline void local_lock_debug_init(local_lock_t *l) > #else /* CONFIG_DEBUG_LOCK_ALLOC */ > # define LOCAL_LOCK_DEBUG_INIT(lockname) > static inline void local_lock_acquire(local_lock_t *l) { } > +static inline void local_trylock_acquire(local_lock_t *l) { } > static inline void local_lock_release(local_lock_t *l) { } > static inline void local_lock_debug_init(local_lock_t *l) { } > #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ > @@ -91,6 +99,13 @@ do { \ > local_lock_acquire(this_cpu_ptr(lock)); \ > } while (0) > > +#define __local_trylock_irqsave(lock, flags) \ > + ({ \ > + local_irq_save(flags); \ > + local_trylock_acquire(this_cpu_ptr(lock)); \ > + 1; \ > + }) > + > #define __local_unlock(lock) \ > do { \ > local_lock_release(this_cpu_ptr(lock)); \ > @@ -148,6 +163,14 @@ typedef spinlock_t local_lock_t; > __local_lock(lock); \ > } while (0) > > +#define __local_trylock_irqsave(lock, flags) \ > + ({ \ > + typecheck(unsigned long, flags); \ > + flags = 0; \ > + migrate_disable(); \ > + spin_trylock(this_cpu_ptr((__lock))); \ > + }) > + > #define __local_unlock(__lock) \ > do { \ > spin_unlock(this_cpu_ptr((__lock))); \
On 12/11/24 11:53, Vlastimil Babka wrote: > On 12/10/24 03:39, Alexei Starovoitov wrote: >> From: Alexei Starovoitov <ast@kernel.org> >> >> Similar to local_lock_irqsave() introduce local_trylock_irqsave(). >> It uses spin_trylock in PREEMPT_RT and always succeeds when !RT. > > Hmm but is that correct to always succeed? If we're in an nmi, we might be > preempting an existing local_(try)lock_irqsave() critical section because > disabling irqs doesn't stop NMI's, right? So unless I'm missing something, it would need to be a new kind of local lock to support this trylock operation on !RT? Perhaps based on the same principle of a simple active/locked flag that I tried in my sheaves RFC? [1] There could be also the advantage that if all (potentially) irq contexts (not just nmi) used trylock, it would be sufficient to disable preeemption and not interrupts, which is cheaper. The RT variant could work as you proposed here, that was wrong in my RFC as you already pointed out when we discussed v1 of this series. [1] https://lore.kernel.org/all/20241112-slub-percpu-caches-v1-5-ddc0bdc27e05@suse.cz/ >> Signed-off-by: Alexei Starovoitov <ast@kernel.org> >> --- >> include/linux/local_lock.h | 9 +++++++++ >> include/linux/local_lock_internal.h | 23 +++++++++++++++++++++++ >> 2 files changed, 32 insertions(+) >> >> diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h >> index 091dc0b6bdfb..6880c29b8b98 100644 >> --- a/include/linux/local_lock.h >> +++ b/include/linux/local_lock.h >> @@ -30,6 +30,15 @@ >> #define local_lock_irqsave(lock, flags) \ >> __local_lock_irqsave(lock, flags) >> >> +/** >> + * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable >> + * interrupts. Always succeeds in !PREEMPT_RT. >> + * @lock: The lock variable >> + * @flags: Storage for interrupt flags >> + */ >> +#define local_trylock_irqsave(lock, flags) \ >> + __local_trylock_irqsave(lock, flags) >> + >> /** >> * local_unlock - Release a per CPU local lock >> * @lock: The lock variable >> diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h >> index 8dd71fbbb6d2..2c0f8a49c2d0 100644 >> --- a/include/linux/local_lock_internal.h >> +++ b/include/linux/local_lock_internal.h >> @@ -31,6 +31,13 @@ static inline void local_lock_acquire(local_lock_t *l) >> l->owner = current; >> } >> >> +static inline void local_trylock_acquire(local_lock_t *l) >> +{ >> + lock_map_acquire_try(&l->dep_map); >> + DEBUG_LOCKS_WARN_ON(l->owner); >> + l->owner = current; >> +} >> + >> static inline void local_lock_release(local_lock_t *l) >> { >> DEBUG_LOCKS_WARN_ON(l->owner != current); >> @@ -45,6 +52,7 @@ static inline void local_lock_debug_init(local_lock_t *l) >> #else /* CONFIG_DEBUG_LOCK_ALLOC */ >> # define LOCAL_LOCK_DEBUG_INIT(lockname) >> static inline void local_lock_acquire(local_lock_t *l) { } >> +static inline void local_trylock_acquire(local_lock_t *l) { } >> static inline void local_lock_release(local_lock_t *l) { } >> static inline void local_lock_debug_init(local_lock_t *l) { } >> #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ >> @@ -91,6 +99,13 @@ do { \ >> local_lock_acquire(this_cpu_ptr(lock)); \ >> } while (0) >> >> +#define __local_trylock_irqsave(lock, flags) \ >> + ({ \ >> + local_irq_save(flags); \ >> + local_trylock_acquire(this_cpu_ptr(lock)); \ >> + 1; \ >> + }) >> + >> #define __local_unlock(lock) \ >> do { \ >> local_lock_release(this_cpu_ptr(lock)); \ >> @@ -148,6 +163,14 @@ typedef spinlock_t local_lock_t; >> __local_lock(lock); \ >> } while (0) >> >> +#define __local_trylock_irqsave(lock, flags) \ >> + ({ \ >> + typecheck(unsigned long, flags); \ >> + flags = 0; \ >> + migrate_disable(); \ >> + spin_trylock(this_cpu_ptr((__lock))); \ >> + }) >> + >> #define __local_unlock(__lock) \ >> do { \ >> spin_unlock(this_cpu_ptr((__lock))); \ >
On Wed, Dec 11, 2024 at 3:55 AM Vlastimil Babka <vbabka@suse.cz> wrote: > > On 12/11/24 11:53, Vlastimil Babka wrote: > > On 12/10/24 03:39, Alexei Starovoitov wrote: > >> From: Alexei Starovoitov <ast@kernel.org> > >> > >> Similar to local_lock_irqsave() introduce local_trylock_irqsave(). > >> It uses spin_trylock in PREEMPT_RT and always succeeds when !RT. > > > > Hmm but is that correct to always succeed? If we're in an nmi, we might be > > preempting an existing local_(try)lock_irqsave() critical section because > > disabling irqs doesn't stop NMI's, right? > > So unless I'm missing something, it would need to be a new kind of local > lock to support this trylock operation on !RT? Ohh. Correct. Forgot about nmi interrupting local_lock_irqsave region in !RT. > Perhaps based on the same > principle of a simple active/locked flag that I tried in my sheaves RFC? [1] > There could be also the advantage that if all (potentially) irq contexts > (not just nmi) used trylock, it would be sufficient to disable preeemption > and not interrupts, which is cheaper. I don't think it's the case. pushf was slow on old x86. According to https://www.agner.org/optimize/instruction_tables.pdf it's 3 uops on skylake. That could be faster than preempt_disable (incl %gs:addr) which is 3-4 uops assuming cache hit. > The RT variant could work as you proposed here, that was wrong in my RFC as > you already pointed out when we discussed v1 of this series. > > [1] > https://lore.kernel.org/all/20241112-slub-percpu-caches-v1-5-ddc0bdc27e05@suse.cz/ I like your +struct local_tryirq_lock approach, but let's put it in local_lock.h ? and it probably needs local_inc_return() instead of READ/WRITE_ONCE. With irq and nmis it's racy. In the meantime I think I will fix below: > >> +#define __local_trylock_irqsave(lock, flags) \ > >> + ({ \ > >> + local_irq_save(flags); \ > >> + local_trylock_acquire(this_cpu_ptr(lock)); \ > >> + 1; \ > >> + }) as #define __local_trylock_irqsave(lock, flags) \ ({ \ local_irq_save(flags); \ local_trylock_acquire(this_cpu_ptr(lock)); \ !in_nmi(); \ }) I think that's good enough for memcg patch 4 and doesn't grow local_lock_t on !RT. We can introduce typedef struct { int count; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; struct task_struct *owner; #endif } local_trylock_t; and the whole set of local_trylock_lock, local_trylock_unlock,... But that's quite a bit of code. Feels a bit overkill for memcg patch 4. At this point it feels that adding 'int count' to existing local_lock_t is cleaner.
On 12/12/24 03:49, Alexei Starovoitov wrote: > On Wed, Dec 11, 2024 at 3:55 AM Vlastimil Babka <vbabka@suse.cz> wrote: >> >> On 12/11/24 11:53, Vlastimil Babka wrote: >> > On 12/10/24 03:39, Alexei Starovoitov wrote: >> >> From: Alexei Starovoitov <ast@kernel.org> >> >> >> >> Similar to local_lock_irqsave() introduce local_trylock_irqsave(). >> >> It uses spin_trylock in PREEMPT_RT and always succeeds when !RT. >> > >> > Hmm but is that correct to always succeed? If we're in an nmi, we might be >> > preempting an existing local_(try)lock_irqsave() critical section because >> > disabling irqs doesn't stop NMI's, right? >> >> So unless I'm missing something, it would need to be a new kind of local >> lock to support this trylock operation on !RT? > > Ohh. Correct. Forgot about nmi interrupting local_lock_irqsave region in !RT. > >> Perhaps based on the same >> principle of a simple active/locked flag that I tried in my sheaves RFC? [1] >> There could be also the advantage that if all (potentially) irq contexts >> (not just nmi) used trylock, it would be sufficient to disable preeemption >> and not interrupts, which is cheaper. > > I don't think it's the case. > pushf was slow on old x86. > According to https://www.agner.org/optimize/instruction_tables.pdf > it's 3 uops on skylake. > That could be faster than preempt_disable (incl %gs:addr) > which is 3-4 uops assuming cache hit. I think the costly ones are not pushf, but cli and popf. I did some microbenchmark in the kernel (Ryzen 2700, not really latest thing but anyway) and IIRC it was twice slower to do the irqsave/restore than preempt only. >> The RT variant could work as you proposed here, that was wrong in my RFC as >> you already pointed out when we discussed v1 of this series. >> >> [1] >> https://lore.kernel.org/all/20241112-slub-percpu-caches-v1-5-ddc0bdc27e05@suse.cz/ > > I like your > +struct local_tryirq_lock > approach, but let's put it in local_lock.h ? Sure, that was a proof of concept so kept it local. > and it probably needs local_inc_return() instead of READ/WRITE_ONCE. > With irq and nmis it's racy. Hm guess you are right, thanks! > In the meantime I think I will fix below: > >> >> +#define __local_trylock_irqsave(lock, flags) \ >> >> + ({ \ >> >> + local_irq_save(flags); \ >> >> + local_trylock_acquire(this_cpu_ptr(lock)); \ >> >> + 1; \ >> >> + }) > > as > #define __local_trylock_irqsave(lock, flags) \ > ({ \ > local_irq_save(flags); \ > local_trylock_acquire(this_cpu_ptr(lock)); \ > !in_nmi(); \ > }) > > I think that's good enough for memcg patch 4 and > doesn't grow local_lock_t on !RT. But that means you'll never succeed in nmi, doesn't that limit the bpf use case? > We can introduce > > typedef struct { > int count; > #ifdef CONFIG_DEBUG_LOCK_ALLOC > struct lockdep_map dep_map; > struct task_struct *owner; > #endif > } local_trylock_t; > > and the whole set of local_trylock_lock, local_trylock_unlock,... > But that's quite a bit of code. Feels a bit overkill for memcg patch 4. SLUB also uses local_locks so it would be needed there later too. > At this point it feels that adding 'int count' to existing local_lock_t > is cleaner. We have Peter and Thomas in Cc let's see what they think :)
> diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h > index 8dd71fbbb6d2..2c0f8a49c2d0 100644 > --- a/include/linux/local_lock_internal.h > +++ b/include/linux/local_lock_internal.h > @@ -148,6 +163,14 @@ typedef spinlock_t local_lock_t; > __local_lock(lock); \ > } while (0) > > +#define __local_trylock_irqsave(lock, flags) \ > + ({ \ > + typecheck(unsigned long, flags); \ > + flags = 0; \ > + migrate_disable(); \ > + spin_trylock(this_cpu_ptr((__lock))); \ You should probably do a migrate_enable() here if the trylock fails. > + }) > + > #define __local_unlock(__lock) \ > do { \ > spin_unlock(this_cpu_ptr((__lock))); \ Sebastian
On Thu, Dec 12, 2024 at 7:15 AM Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote: > > > diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h > > index 8dd71fbbb6d2..2c0f8a49c2d0 100644 > > --- a/include/linux/local_lock_internal.h > > +++ b/include/linux/local_lock_internal.h > > @@ -148,6 +163,14 @@ typedef spinlock_t local_lock_t; > > __local_lock(lock); \ > > } while (0) > > > > +#define __local_trylock_irqsave(lock, flags) \ > > + ({ \ > > + typecheck(unsigned long, flags); \ > > + flags = 0; \ > > + migrate_disable(); \ > > + spin_trylock(this_cpu_ptr((__lock))); \ > > You should probably do a migrate_enable() here if the trylock fails. Great catch. Thanks!
On 12/12/24 10:15, Vlastimil Babka wrote: > On 12/12/24 03:49, Alexei Starovoitov wrote: >> I like your >> +struct local_tryirq_lock >> approach, but let's put it in local_lock.h ? > > Sure, that was a proof of concept so kept it local. > >> and it probably needs local_inc_return() instead of READ/WRITE_ONCE. >> With irq and nmis it's racy. > > Hm guess you are right, thanks! > Ah I remember now, the idea was that if an irq or nmi interrupts someone else between checking that active is 0 and setting it to 1, it will finish the critical section and unlock before the interrupted context can continue, so the race is harmless.
diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 091dc0b6bdfb..6880c29b8b98 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -30,6 +30,15 @@ #define local_lock_irqsave(lock, flags) \ __local_lock_irqsave(lock, flags) +/** + * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable + * interrupts. Always succeeds in !PREEMPT_RT. + * @lock: The lock variable + * @flags: Storage for interrupt flags + */ +#define local_trylock_irqsave(lock, flags) \ + __local_trylock_irqsave(lock, flags) + /** * local_unlock - Release a per CPU local lock * @lock: The lock variable diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 8dd71fbbb6d2..2c0f8a49c2d0 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -31,6 +31,13 @@ static inline void local_lock_acquire(local_lock_t *l) l->owner = current; } +static inline void local_trylock_acquire(local_lock_t *l) +{ + lock_map_acquire_try(&l->dep_map); + DEBUG_LOCKS_WARN_ON(l->owner); + l->owner = current; +} + static inline void local_lock_release(local_lock_t *l) { DEBUG_LOCKS_WARN_ON(l->owner != current); @@ -45,6 +52,7 @@ static inline void local_lock_debug_init(local_lock_t *l) #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { } +static inline void local_trylock_acquire(local_lock_t *l) { } static inline void local_lock_release(local_lock_t *l) { } static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ @@ -91,6 +99,13 @@ do { \ local_lock_acquire(this_cpu_ptr(lock)); \ } while (0) +#define __local_trylock_irqsave(lock, flags) \ + ({ \ + local_irq_save(flags); \ + local_trylock_acquire(this_cpu_ptr(lock)); \ + 1; \ + }) + #define __local_unlock(lock) \ do { \ local_lock_release(this_cpu_ptr(lock)); \ @@ -148,6 +163,14 @@ typedef spinlock_t local_lock_t; __local_lock(lock); \ } while (0) +#define __local_trylock_irqsave(lock, flags) \ + ({ \ + typecheck(unsigned long, flags); \ + flags = 0; \ + migrate_disable(); \ + spin_trylock(this_cpu_ptr((__lock))); \ + }) + #define __local_unlock(__lock) \ do { \ spin_unlock(this_cpu_ptr((__lock))); \