Message ID | 20220929180731.2875722-2-paulmck@kernel.org (mailing list archive) |
---|---|
State | Accepted |
Commit | b5daaf1269acc714e310dd33e622412e4f178946 |
Headers | show |
Series | NMI-safe SRCU reader API | expand |
On Thu, Sep 29, 2022 at 11:07:25AM -0700, Paul E. McKenney wrote: > @@ -1090,7 +1121,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > int ss_state; > > check_init_srcu_struct(ssp); > - idx = srcu_read_lock(ssp); > + idx = __srcu_read_lock_nmisafe(ssp); Why do we need to force the atomic based version here (even if CONFIG_NEED_SRCU_NMI_SAFE=y)? > ss_state = smp_load_acquire(&ssp->srcu_size_state); > if (ss_state < SRCU_SIZE_WAIT_CALL) > sdp = per_cpu_ptr(ssp->sda, 0); > @@ -1123,7 +1154,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > srcu_funnel_gp_start(ssp, sdp, s, do_norm); > else if (needexp) > srcu_funnel_exp_start(ssp, sdp_mynode, s); > - srcu_read_unlock(ssp, idx); > + __srcu_read_unlock_nmisafe(ssp, idx); > return s; > } > > @@ -1427,13 +1458,13 @@ void srcu_barrier(struct srcu_struct *ssp) > /* Initial count prevents reaching zero until all CBs are posted. */ > atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); > > - idx = srcu_read_lock(ssp); > + idx = __srcu_read_lock_nmisafe(ssp); And same here? Thanks. > if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) > srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0)); > else > for_each_possible_cpu(cpu) > srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu)); > - srcu_read_unlock(ssp, idx); > + __srcu_read_unlock_nmisafe(ssp, idx); > > /* Remove the initial count, at which point reaching zero can happen. */ > if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) > -- > 2.31.1.189.g2e36527f23 >
On Sun, Oct 02, 2022 at 05:55:16PM +0200, Frederic Weisbecker wrote: > On Thu, Sep 29, 2022 at 11:07:25AM -0700, Paul E. McKenney wrote: > > @@ -1090,7 +1121,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > > int ss_state; > > > > check_init_srcu_struct(ssp); > > - idx = srcu_read_lock(ssp); > > + idx = __srcu_read_lock_nmisafe(ssp); > > Why do we need to force the atomic based version here (even if > CONFIG_NEED_SRCU_NMI_SAFE=y)? ...even if CONFIG_NEED_SRCU_NMI_SAFE=n that is...
On Sun, Oct 02, 2022 at 05:55:16PM +0200, Frederic Weisbecker wrote: > On Thu, Sep 29, 2022 at 11:07:25AM -0700, Paul E. McKenney wrote: > > @@ -1090,7 +1121,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > > int ss_state; > > > > check_init_srcu_struct(ssp); > > - idx = srcu_read_lock(ssp); > > + idx = __srcu_read_lock_nmisafe(ssp); > > Why do we need to force the atomic based version here (even if CONFIG_NEED_SRCU_NMI_SAFE=y)? In kernels built with CONFIG_NEED_SRCU_NMI_SAFE=n, we of course need it. As you say, in kernels built with CONFIG_NEED_SRCU_NMI_SAFE=y, we don't. But it doesn't hurt to always use __srcu_read_lock_nmisafe() here, and this is nowhere near a fastpath, so there is little benefit to using __srcu_read_lock() when it is safe to do so. In addition, note that it is possible that a given srcu_struct structure's first grace period is executed before its first reader. In that case, we have no way of knowing which of __srcu_read_lock_nmisafe() or __srcu_read_lock() to choose. So this code always does it the slow(ish) safe way. > > ss_state = smp_load_acquire(&ssp->srcu_size_state); > > if (ss_state < SRCU_SIZE_WAIT_CALL) > > sdp = per_cpu_ptr(ssp->sda, 0); > > @@ -1123,7 +1154,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > > srcu_funnel_gp_start(ssp, sdp, s, do_norm); > > else if (needexp) > > srcu_funnel_exp_start(ssp, sdp_mynode, s); > > - srcu_read_unlock(ssp, idx); > > + __srcu_read_unlock_nmisafe(ssp, idx); > > return s; > > } > > > > @@ -1427,13 +1458,13 @@ void srcu_barrier(struct srcu_struct *ssp) > > /* Initial count prevents reaching zero until all CBs are posted. */ > > atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); > > > > - idx = srcu_read_lock(ssp); > > + idx = __srcu_read_lock_nmisafe(ssp); > > And same here? Yes, same here. ;-) Thanx, Paul > Thanks. > > > if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) > > srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0)); > > else > > for_each_possible_cpu(cpu) > > srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu)); > > - srcu_read_unlock(ssp, idx); > > + __srcu_read_unlock_nmisafe(ssp, idx); > > > > /* Remove the initial count, at which point reaching zero can happen. */ > > if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) > > -- > > 2.31.1.189.g2e36527f23 > >
On Sun, Oct 02, 2022 at 05:57:25PM +0200, Frederic Weisbecker wrote: > On Sun, Oct 02, 2022 at 05:55:16PM +0200, Frederic Weisbecker wrote: > > On Thu, Sep 29, 2022 at 11:07:25AM -0700, Paul E. McKenney wrote: > > > @@ -1090,7 +1121,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > > > int ss_state; > > > > > > check_init_srcu_struct(ssp); > > > - idx = srcu_read_lock(ssp); > > > + idx = __srcu_read_lock_nmisafe(ssp); > > > > Why do we need to force the atomic based version here (even if > > CONFIG_NEED_SRCU_NMI_SAFE=y)? > > ...even if CONFIG_NEED_SRCU_NMI_SAFE=n that is... Heh! I also got it consistently backwards in my reply. I will trust your ability to translate. ;-) Thanx, Paul
On Sun, Oct 02, 2022 at 09:09:57AM -0700, Paul E. McKenney wrote: > On Sun, Oct 02, 2022 at 05:55:16PM +0200, Frederic Weisbecker wrote: > > On Thu, Sep 29, 2022 at 11:07:25AM -0700, Paul E. McKenney wrote: > > > @@ -1090,7 +1121,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > > > int ss_state; > > > > > > check_init_srcu_struct(ssp); > > > - idx = srcu_read_lock(ssp); > > > + idx = __srcu_read_lock_nmisafe(ssp); > > > > Why do we need to force the atomic based version here (even if CONFIG_NEED_SRCU_NMI_SAFE=y)? > > In kernels built with CONFIG_NEED_SRCU_NMI_SAFE=n, we of course need it. > As you say, in kernels built with CONFIG_NEED_SRCU_NMI_SAFE=y, we don't. > But it doesn't hurt to always use __srcu_read_lock_nmisafe() here, and > this is nowhere near a fastpath, so there is little benefit to using > __srcu_read_lock() when it is safe to do so. > > In addition, note that it is possible that a given srcu_struct structure's > first grace period is executed before its first reader. In that > case, we have no way of knowing which of __srcu_read_lock_nmisafe() > or __srcu_read_lock() to choose. > > So this code always does it the slow(ish) safe way. But then srcu_read_lock_nmisafe() would work as well, right? > > > > ss_state = smp_load_acquire(&ssp->srcu_size_state); > > > if (ss_state < SRCU_SIZE_WAIT_CALL) > > > sdp = per_cpu_ptr(ssp->sda, 0); > > > @@ -1123,7 +1154,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > > > srcu_funnel_gp_start(ssp, sdp, s, do_norm); > > > else if (needexp) > > > srcu_funnel_exp_start(ssp, sdp_mynode, s); > > > - srcu_read_unlock(ssp, idx); > > > + __srcu_read_unlock_nmisafe(ssp, idx); > > > return s; > > > } > > > > > > @@ -1427,13 +1458,13 @@ void srcu_barrier(struct srcu_struct *ssp) > > > /* Initial count prevents reaching zero until all CBs are posted. */ > > > atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); > > > > > > - idx = srcu_read_lock(ssp); > > > + idx = __srcu_read_lock_nmisafe(ssp); > > > > And same here? > > Yes, same here. ;-) Now bonus question: why do SRCU grace period starting/tracking need to be in an SRCU read side critical section? :o) Thanks.
On Sun, Oct 02, 2022 at 11:47:10PM +0200, Frederic Weisbecker wrote: > On Sun, Oct 02, 2022 at 09:09:57AM -0700, Paul E. McKenney wrote: > > On Sun, Oct 02, 2022 at 05:55:16PM +0200, Frederic Weisbecker wrote: > > > On Thu, Sep 29, 2022 at 11:07:25AM -0700, Paul E. McKenney wrote: > > > > @@ -1090,7 +1121,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > > > > int ss_state; > > > > > > > > check_init_srcu_struct(ssp); > > > > - idx = srcu_read_lock(ssp); > > > > + idx = __srcu_read_lock_nmisafe(ssp); > > > > > > Why do we need to force the atomic based version here (even if CONFIG_NEED_SRCU_NMI_SAFE=y)? > > > > In kernels built with CONFIG_NEED_SRCU_NMI_SAFE=n, we of course need it. > > As you say, in kernels built with CONFIG_NEED_SRCU_NMI_SAFE=y, we don't. > > But it doesn't hurt to always use __srcu_read_lock_nmisafe() here, and > > this is nowhere near a fastpath, so there is little benefit to using > > __srcu_read_lock() when it is safe to do so. > > > > In addition, note that it is possible that a given srcu_struct structure's > > first grace period is executed before its first reader. In that > > case, we have no way of knowing which of __srcu_read_lock_nmisafe() > > or __srcu_read_lock() to choose. > > > > So this code always does it the slow(ish) safe way. > > But then srcu_read_lock_nmisafe() would work as well, right? Almost. The problem is that without the leading "__", this would convince SRCU that this is an NMI-safe srcu_struct. Which it might not be. Worse yet, if this srcu_struct had already done an srcu_read_lock(), it would splat. > > > > ss_state = smp_load_acquire(&ssp->srcu_size_state); > > > > if (ss_state < SRCU_SIZE_WAIT_CALL) > > > > sdp = per_cpu_ptr(ssp->sda, 0); > > > > @@ -1123,7 +1154,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > > > > srcu_funnel_gp_start(ssp, sdp, s, do_norm); > > > > else if (needexp) > > > > srcu_funnel_exp_start(ssp, sdp_mynode, s); > > > > - srcu_read_unlock(ssp, idx); > > > > + __srcu_read_unlock_nmisafe(ssp, idx); > > > > return s; > > > > } > > > > > > > > @@ -1427,13 +1458,13 @@ void srcu_barrier(struct srcu_struct *ssp) > > > > /* Initial count prevents reaching zero until all CBs are posted. */ > > > > atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); > > > > > > > > - idx = srcu_read_lock(ssp); > > > > + idx = __srcu_read_lock_nmisafe(ssp); > > > > > > And same here? > > > > Yes, same here. ;-) > > Now bonus question: why do SRCU grace period starting/tracking > need to be in an SRCU read side critical section? :o) Because I am lazy and like to keep things simple? ;-) More seriously, take a look at srcu_gp_start_if_needed() and the functions it calls and ask yourself what bad things could happen if they were preempted for an arbitrarily long period of time. Thanx, Paul
On Sun, Oct 02, 2022 at 04:46:55PM -0700, Paul E. McKenney wrote: > On Sun, Oct 02, 2022 at 11:47:10PM +0200, Frederic Weisbecker wrote: > > On Sun, Oct 02, 2022 at 09:09:57AM -0700, Paul E. McKenney wrote: > > > On Sun, Oct 02, 2022 at 05:55:16PM +0200, Frederic Weisbecker wrote: > > > > On Thu, Sep 29, 2022 at 11:07:25AM -0700, Paul E. McKenney wrote: > > > > > @@ -1090,7 +1121,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > > > > > int ss_state; > > > > > > > > > > check_init_srcu_struct(ssp); > > > > > - idx = srcu_read_lock(ssp); > > > > > + idx = __srcu_read_lock_nmisafe(ssp); > > > > > > > > Why do we need to force the atomic based version here (even if CONFIG_NEED_SRCU_NMI_SAFE=y)? > > > > > > In kernels built with CONFIG_NEED_SRCU_NMI_SAFE=n, we of course need it. > > > As you say, in kernels built with CONFIG_NEED_SRCU_NMI_SAFE=y, we don't. > > > But it doesn't hurt to always use __srcu_read_lock_nmisafe() here, and > > > this is nowhere near a fastpath, so there is little benefit to using > > > __srcu_read_lock() when it is safe to do so. > > > > > > In addition, note that it is possible that a given srcu_struct structure's > > > first grace period is executed before its first reader. In that > > > case, we have no way of knowing which of __srcu_read_lock_nmisafe() > > > or __srcu_read_lock() to choose. > > > > > > So this code always does it the slow(ish) safe way. > > > > But then srcu_read_lock_nmisafe() would work as well, right? > > Almost. > > The problem is that without the leading "__", this would convince SRCU > that this is an NMI-safe srcu_struct. Which it might not be. Worse yet, > if this srcu_struct had already done an srcu_read_lock(), it would splat. Ah ok, now that makes sense. > > > > > > ss_state = smp_load_acquire(&ssp->srcu_size_state); > > > > > if (ss_state < SRCU_SIZE_WAIT_CALL) > > > > > sdp = per_cpu_ptr(ssp->sda, 0); > > > > > @@ -1123,7 +1154,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > > > > > srcu_funnel_gp_start(ssp, sdp, s, do_norm); > > > > > else if (needexp) > > > > > srcu_funnel_exp_start(ssp, sdp_mynode, s); > > > > > - srcu_read_unlock(ssp, idx); > > > > > + __srcu_read_unlock_nmisafe(ssp, idx); > > > > > return s; > > > > > } > > > > > > > > > > @@ -1427,13 +1458,13 @@ void srcu_barrier(struct srcu_struct *ssp) > > > > > /* Initial count prevents reaching zero until all CBs are posted. */ > > > > > atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); > > > > > > > > > > - idx = srcu_read_lock(ssp); > > > > > + idx = __srcu_read_lock_nmisafe(ssp); > > > > > > > > And same here? > > > > > > Yes, same here. ;-) > > > > Now bonus question: why do SRCU grace period starting/tracking > > need to be in an SRCU read side critical section? :o) > > Because I am lazy and like to keep things simple? ;-) > > More seriously, take a look at srcu_gp_start_if_needed() and the functions > it calls and ask yourself what bad things could happen if they were > preempted for an arbitrarily long period of time. I can see a risk for ssp->srcu_gp_seq to overflow. Can't say that was obvious though, at least for me. Am I missing something else? Thanks.
On Mon, Oct 03, 2022 at 11:55:35AM +0200, Frederic Weisbecker wrote: > On Sun, Oct 02, 2022 at 04:46:55PM -0700, Paul E. McKenney wrote: > > On Sun, Oct 02, 2022 at 11:47:10PM +0200, Frederic Weisbecker wrote: > > > On Sun, Oct 02, 2022 at 09:09:57AM -0700, Paul E. McKenney wrote: > > > > On Sun, Oct 02, 2022 at 05:55:16PM +0200, Frederic Weisbecker wrote: > > > > > On Thu, Sep 29, 2022 at 11:07:25AM -0700, Paul E. McKenney wrote: > > > > > > @@ -1090,7 +1121,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > > > > > > int ss_state; > > > > > > > > > > > > check_init_srcu_struct(ssp); > > > > > > - idx = srcu_read_lock(ssp); > > > > > > + idx = __srcu_read_lock_nmisafe(ssp); > > > > > > > > > > Why do we need to force the atomic based version here (even if CONFIG_NEED_SRCU_NMI_SAFE=y)? > > > > > > > > In kernels built with CONFIG_NEED_SRCU_NMI_SAFE=n, we of course need it. > > > > As you say, in kernels built with CONFIG_NEED_SRCU_NMI_SAFE=y, we don't. > > > > But it doesn't hurt to always use __srcu_read_lock_nmisafe() here, and > > > > this is nowhere near a fastpath, so there is little benefit to using > > > > __srcu_read_lock() when it is safe to do so. > > > > > > > > In addition, note that it is possible that a given srcu_struct structure's > > > > first grace period is executed before its first reader. In that > > > > case, we have no way of knowing which of __srcu_read_lock_nmisafe() > > > > or __srcu_read_lock() to choose. > > > > > > > > So this code always does it the slow(ish) safe way. > > > > > > But then srcu_read_lock_nmisafe() would work as well, right? > > > > Almost. > > > > The problem is that without the leading "__", this would convince SRCU > > that this is an NMI-safe srcu_struct. Which it might not be. Worse yet, > > if this srcu_struct had already done an srcu_read_lock(), it would splat. > > Ah ok, now that makes sense. > > > > > > > > > ss_state = smp_load_acquire(&ssp->srcu_size_state); > > > > > > if (ss_state < SRCU_SIZE_WAIT_CALL) > > > > > > sdp = per_cpu_ptr(ssp->sda, 0); > > > > > > @@ -1123,7 +1154,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, > > > > > > srcu_funnel_gp_start(ssp, sdp, s, do_norm); > > > > > > else if (needexp) > > > > > > srcu_funnel_exp_start(ssp, sdp_mynode, s); > > > > > > - srcu_read_unlock(ssp, idx); > > > > > > + __srcu_read_unlock_nmisafe(ssp, idx); > > > > > > return s; > > > > > > } > > > > > > > > > > > > @@ -1427,13 +1458,13 @@ void srcu_barrier(struct srcu_struct *ssp) > > > > > > /* Initial count prevents reaching zero until all CBs are posted. */ > > > > > > atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); > > > > > > > > > > > > - idx = srcu_read_lock(ssp); > > > > > > + idx = __srcu_read_lock_nmisafe(ssp); > > > > > > > > > > And same here? > > > > > > > > Yes, same here. ;-) > > > > > > Now bonus question: why do SRCU grace period starting/tracking > > > need to be in an SRCU read side critical section? :o) > > > > Because I am lazy and like to keep things simple? ;-) > > > > More seriously, take a look at srcu_gp_start_if_needed() and the functions > > it calls and ask yourself what bad things could happen if they were > > preempted for an arbitrarily long period of time. > > I can see a risk for ssp->srcu_gp_seq to overflow. Can't say that was obvious > though, at least for me. Am I missing something else? That is what I recall. There might also be something else, of course. ;-) Thanx, Paul
Hi Paul, Sorry for the late response here. I am now trying to actually use this series. On 2022-09-29, "Paul E. McKenney" <paulmck@kernel.org> wrote: > diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig > index d471d22a5e21..f53ad63b2bc6 100644 > --- a/kernel/rcu/Kconfig > +++ b/kernel/rcu/Kconfig > @@ -72,6 +72,9 @@ config TREE_SRCU > help > This option selects the full-fledged version of SRCU. > You are missing a: +config ARCH_HAS_NMI_SAFE_THIS_CPU_OPS + bool + Otherwise there is no CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS, so for example CONFIG_NEED_SRCU_NMI_SAFE always ends up with y on X86. > +config NEED_SRCU_NMI_SAFE > + def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU > + John
On Tue, Oct 18, 2022 at 04:37:01PM +0206, John Ogness wrote: > Hi Paul, > > Sorry for the late response here. I am now trying to actually use this > series. > > On 2022-09-29, "Paul E. McKenney" <paulmck@kernel.org> wrote: > > diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig > > index d471d22a5e21..f53ad63b2bc6 100644 > > --- a/kernel/rcu/Kconfig > > +++ b/kernel/rcu/Kconfig > > @@ -72,6 +72,9 @@ config TREE_SRCU > > help > > This option selects the full-fledged version of SRCU. > > > > You are missing a: > > +config ARCH_HAS_NMI_SAFE_THIS_CPU_OPS > + bool > + > > Otherwise there is no CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS, so for > example CONFIG_NEED_SRCU_NMI_SAFE always ends up with y on X86. Good catch, thank you! Pulling this in with attribution. Thanx, Paul > > +config NEED_SRCU_NMI_SAFE > > + def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU > > + > > John
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 01226e4d960a..2cc8321c0c86 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -52,6 +52,8 @@ int init_srcu_struct(struct srcu_struct *ssp); #else /* Dummy definition for things like notifiers. Actual use gets link error. */ struct srcu_struct { }; +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe) __acquires(ssp); +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe) __releases(ssp); #endif void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, @@ -166,6 +168,25 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) return retval; } +/** + * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section, but in an NMI-safe manner. + * See srcu_read_lock() for more information. + */ +static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp) +{ + int retval; + + if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) + retval = __srcu_read_lock_nmisafe(ssp); + else + retval = __srcu_read_lock(ssp); + rcu_lock_acquire(&(ssp)->dep_map); + return retval; +} + /* Used by tracing, cannot be traced and cannot invoke lockdep. */ static inline notrace int srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) @@ -191,6 +212,24 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __srcu_read_unlock(ssp, idx); } +/** + * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Exit an SRCU read-side critical section, but in an NMI-safe manner. + */ +static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) + __releases(ssp) +{ + WARN_ON_ONCE(idx & ~0x1); + rcu_lock_release(&(ssp)->dep_map); + if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) + __srcu_read_unlock_nmisafe(ssp, idx); + else + __srcu_read_unlock(ssp, idx); +} + /* Used by tracing, cannot be traced and cannot call lockdep. */ static inline notrace void srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 6cfaa0a9a9b9..bccfa18b1b15 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -88,4 +88,15 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, data_race(READ_ONCE(ssp->srcu_lock_nesting[idx]))); } +static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) +{ + BUG(); + return 0; +} + +static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) +{ + BUG(); +} + #endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 0c4eca07d78d..d45dd507f4a5 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -154,4 +154,7 @@ void synchronize_srcu_expedited(struct srcu_struct *ssp); void srcu_barrier(struct srcu_struct *ssp); void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); + #endif diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index d471d22a5e21..f53ad63b2bc6 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -72,6 +72,9 @@ config TREE_SRCU help This option selects the full-fledged version of SRCU. +config NEED_SRCU_NMI_SAFE + def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU + config TASKS_RCU_GENERIC def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU select SRCU diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index d8e1b270a065..7f9703b88cae 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -574,10 +574,14 @@ static struct rcu_torture_ops rcu_busted_ops = { DEFINE_STATIC_SRCU(srcu_ctl); static struct srcu_struct srcu_ctld; static struct srcu_struct *srcu_ctlp = &srcu_ctl; +static struct rcu_torture_ops srcud_ops; static int srcu_torture_read_lock(void) __acquires(srcu_ctlp) { - return srcu_read_lock(srcu_ctlp); + if (cur_ops == &srcud_ops) + return srcu_read_lock_nmisafe(srcu_ctlp); + else + return srcu_read_lock(srcu_ctlp); } static void @@ -601,7 +605,10 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp) static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp) { - srcu_read_unlock(srcu_ctlp, idx); + if (cur_ops == &srcud_ops) + srcu_read_unlock_nmisafe(srcu_ctlp, idx); + else + srcu_read_unlock(srcu_ctlp, idx); } static int torture_srcu_read_lock_held(void) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 6fd0665f4d1f..f5b1485e79aa 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -654,6 +654,37 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) } EXPORT_SYMBOL_GPL(__srcu_read_unlock); +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct, but in an NMI-safe manner using RMW atomics. + * Returns an index that must be passed to the matching srcu_read_unlock(). + */ +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) +{ + int idx; + struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); + + idx = READ_ONCE(ssp->srcu_idx) & 0x1; + atomic_long_inc(&sdp->srcu_lock_count[idx]); + smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */ + return idx; +} +EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe); + +/* + * Removes the count for the old reader from the appropriate per-CPU + * element of the srcu_struct. Note that this may well be a different + * CPU than that which was incremented by the corresponding srcu_read_lock(). + */ +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) +{ + struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); + + smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */ + atomic_long_inc(&sdp->srcu_unlock_count[idx]); +} +EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe); + /* * Start an SRCU grace period. */ @@ -1090,7 +1121,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, int ss_state; check_init_srcu_struct(ssp); - idx = srcu_read_lock(ssp); + idx = __srcu_read_lock_nmisafe(ssp); ss_state = smp_load_acquire(&ssp->srcu_size_state); if (ss_state < SRCU_SIZE_WAIT_CALL) sdp = per_cpu_ptr(ssp->sda, 0); @@ -1123,7 +1154,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, srcu_funnel_gp_start(ssp, sdp, s, do_norm); else if (needexp) srcu_funnel_exp_start(ssp, sdp_mynode, s); - srcu_read_unlock(ssp, idx); + __srcu_read_unlock_nmisafe(ssp, idx); return s; } @@ -1427,13 +1458,13 @@ void srcu_barrier(struct srcu_struct *ssp) /* Initial count prevents reaching zero until all CBs are posted. */ atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); - idx = srcu_read_lock(ssp); + idx = __srcu_read_lock_nmisafe(ssp); if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0)); else for_each_possible_cpu(cpu) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu)); - srcu_read_unlock(ssp, idx); + __srcu_read_unlock_nmisafe(ssp, idx); /* Remove the initial count, at which point reaching zero can happen. */ if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))