diff mbox series

[RFC,v2,3/8] srcu: Check for consistent per-CPU per-srcu_struct NMI safety

Message ID 20220929180731.2875722-3-paulmck@kernel.org (mailing list archive)
State Accepted
Commit 6956f0809c3586efa7499f59b408f2ce76d03899
Headers show
Series NMI-safe SRCU reader API | expand

Commit Message

Paul E. McKenney Sept. 29, 2022, 6:07 p.m. UTC
This commit adds runtime checks to verify that a given srcu_struct uses
consistent NMI-safe (or not) read-side primitives on a per-CPU basis.

Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Petr Mladek <pmladek@suse.com>
---
 include/linux/srcu.h     |  4 ++--
 include/linux/srcutiny.h |  4 ++--
 include/linux/srcutree.h |  9 +++++++--
 kernel/rcu/srcutree.c    | 38 ++++++++++++++++++++++++++++++++------
 4 files changed, 43 insertions(+), 12 deletions(-)

Comments

Frederic Weisbecker Oct. 2, 2022, 10:06 p.m. UTC | #1
On Thu, Sep 29, 2022 at 11:07:26AM -0700, Paul E. McKenney wrote:
> This commit adds runtime checks to verify that a given srcu_struct uses
> consistent NMI-safe (or not) read-side primitives on a per-CPU basis.
> 
> Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/
> 
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: John Ogness <john.ogness@linutronix.de>
> Cc: Petr Mladek <pmladek@suse.com>
> ---
>  include/linux/srcu.h     |  4 ++--
>  include/linux/srcutiny.h |  4 ++--
>  include/linux/srcutree.h |  9 +++++++--
>  kernel/rcu/srcutree.c    | 38 ++++++++++++++++++++++++++++++++------
>  4 files changed, 43 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/srcu.h b/include/linux/srcu.h
> index 2cc8321c0c86..565f60d57484 100644
> --- a/include/linux/srcu.h
> +++ b/include/linux/srcu.h
> @@ -180,7 +180,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
>  	int retval;
>  
>  	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
> -		retval = __srcu_read_lock_nmisafe(ssp);
> +		retval = __srcu_read_lock_nmisafe(ssp, true);
>  	else
>  		retval = __srcu_read_lock(ssp);

Shouldn't it be checked also when CONFIG_NEED_SRCU_NMI_SAFE=n ?

Thanks.
Paul E. McKenney Oct. 2, 2022, 11:51 p.m. UTC | #2
On Mon, Oct 03, 2022 at 12:06:19AM +0200, Frederic Weisbecker wrote:
> On Thu, Sep 29, 2022 at 11:07:26AM -0700, Paul E. McKenney wrote:
> > This commit adds runtime checks to verify that a given srcu_struct uses
> > consistent NMI-safe (or not) read-side primitives on a per-CPU basis.
> > 
> > Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/
> > 
> > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > Cc: Thomas Gleixner <tglx@linutronix.de>
> > Cc: John Ogness <john.ogness@linutronix.de>
> > Cc: Petr Mladek <pmladek@suse.com>
> > ---
> >  include/linux/srcu.h     |  4 ++--
> >  include/linux/srcutiny.h |  4 ++--
> >  include/linux/srcutree.h |  9 +++++++--
> >  kernel/rcu/srcutree.c    | 38 ++++++++++++++++++++++++++++++++------
> >  4 files changed, 43 insertions(+), 12 deletions(-)
> > 
> > diff --git a/include/linux/srcu.h b/include/linux/srcu.h
> > index 2cc8321c0c86..565f60d57484 100644
> > --- a/include/linux/srcu.h
> > +++ b/include/linux/srcu.h
> > @@ -180,7 +180,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
> >  	int retval;
> >  
> >  	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
> > -		retval = __srcu_read_lock_nmisafe(ssp);
> > +		retval = __srcu_read_lock_nmisafe(ssp, true);
> >  	else
> >  		retval = __srcu_read_lock(ssp);
> 
> Shouldn't it be checked also when CONFIG_NEED_SRCU_NMI_SAFE=n ?

You are asking why there is no "true" argument to __srcu_read_lock()?
That is because it checks unconditionally.  OK, so why the
"true" argument to __srcu_read_lock_nmisafe(), you ask?  Because
srcu_gp_start_if_needed() needs to call __srcu_read_lock_nmisafe()
while suppressing the checking, which it does by passing in "false".
In turn because srcu_gp_start_if_needed() cannot always tell whether
its srcu_struct is or is not NMI-safe.

							Thanx, Paul
Frederic Weisbecker Oct. 3, 2022, 10:13 a.m. UTC | #3
On Sun, Oct 02, 2022 at 04:51:03PM -0700, Paul E. McKenney wrote:
> On Mon, Oct 03, 2022 at 12:06:19AM +0200, Frederic Weisbecker wrote:
> > On Thu, Sep 29, 2022 at 11:07:26AM -0700, Paul E. McKenney wrote:
> > > This commit adds runtime checks to verify that a given srcu_struct uses
> > > consistent NMI-safe (or not) read-side primitives on a per-CPU basis.
> > > 
> > > Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/
> > > 
> > > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > > Cc: Thomas Gleixner <tglx@linutronix.de>
> > > Cc: John Ogness <john.ogness@linutronix.de>
> > > Cc: Petr Mladek <pmladek@suse.com>
> > > ---
> > >  include/linux/srcu.h     |  4 ++--
> > >  include/linux/srcutiny.h |  4 ++--
> > >  include/linux/srcutree.h |  9 +++++++--
> > >  kernel/rcu/srcutree.c    | 38 ++++++++++++++++++++++++++++++++------
> > >  4 files changed, 43 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/include/linux/srcu.h b/include/linux/srcu.h
> > > index 2cc8321c0c86..565f60d57484 100644
> > > --- a/include/linux/srcu.h
> > > +++ b/include/linux/srcu.h
> > > @@ -180,7 +180,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
> > >  	int retval;
> > >  
> > >  	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
> > > -		retval = __srcu_read_lock_nmisafe(ssp);
> > > +		retval = __srcu_read_lock_nmisafe(ssp, true);
> > >  	else
> > >  		retval = __srcu_read_lock(ssp);
> > 
> > Shouldn't it be checked also when CONFIG_NEED_SRCU_NMI_SAFE=n ?
> 
> You are asking why there is no "true" argument to __srcu_read_lock()?
> That is because it checks unconditionally.

It checks unconditionally but it always assumes not to be called as nmisafe.

For example on x86/arm64/loongarch, the same ssp used with both srcu_read_lock() and
srcu_read_lock_nmisafe() won't report an issue. But on powerpc it will.

My point is that strong archs should warn as well on behalf of others, to detect
mistakes early.

Thanks.
Paul E. McKenney Oct. 3, 2022, 11:57 a.m. UTC | #4
On Mon, Oct 03, 2022 at 12:13:31PM +0200, Frederic Weisbecker wrote:
> On Sun, Oct 02, 2022 at 04:51:03PM -0700, Paul E. McKenney wrote:
> > On Mon, Oct 03, 2022 at 12:06:19AM +0200, Frederic Weisbecker wrote:
> > > On Thu, Sep 29, 2022 at 11:07:26AM -0700, Paul E. McKenney wrote:
> > > > This commit adds runtime checks to verify that a given srcu_struct uses
> > > > consistent NMI-safe (or not) read-side primitives on a per-CPU basis.
> > > > 
> > > > Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/
> > > > 
> > > > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > > > Cc: Thomas Gleixner <tglx@linutronix.de>
> > > > Cc: John Ogness <john.ogness@linutronix.de>
> > > > Cc: Petr Mladek <pmladek@suse.com>
> > > > ---
> > > >  include/linux/srcu.h     |  4 ++--
> > > >  include/linux/srcutiny.h |  4 ++--
> > > >  include/linux/srcutree.h |  9 +++++++--
> > > >  kernel/rcu/srcutree.c    | 38 ++++++++++++++++++++++++++++++++------
> > > >  4 files changed, 43 insertions(+), 12 deletions(-)
> > > > 
> > > > diff --git a/include/linux/srcu.h b/include/linux/srcu.h
> > > > index 2cc8321c0c86..565f60d57484 100644
> > > > --- a/include/linux/srcu.h
> > > > +++ b/include/linux/srcu.h
> > > > @@ -180,7 +180,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
> > > >  	int retval;
> > > >  
> > > >  	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
> > > > -		retval = __srcu_read_lock_nmisafe(ssp);
> > > > +		retval = __srcu_read_lock_nmisafe(ssp, true);
> > > >  	else
> > > >  		retval = __srcu_read_lock(ssp);
> > > 
> > > Shouldn't it be checked also when CONFIG_NEED_SRCU_NMI_SAFE=n ?
> > 
> > You are asking why there is no "true" argument to __srcu_read_lock()?
> > That is because it checks unconditionally.
> 
> It checks unconditionally but it always assumes not to be called as nmisafe.
> 
> For example on x86/arm64/loongarch, the same ssp used with both srcu_read_lock() and
> srcu_read_lock_nmisafe() won't report an issue. But on powerpc it will.
> 
> My point is that strong archs should warn as well on behalf of others, to detect
> mistakes early.

Good point, especially given that x86_64 and arm64 are a rather large
fraction of the uses.  Not critically urgent, but definitely nice to have.

Did you by chance have a suggestion for a nice way to accomplish this?

								Thanx, Paul
Frederic Weisbecker Oct. 3, 2022, 12:37 p.m. UTC | #5
On Mon, Oct 03, 2022 at 04:57:18AM -0700, Paul E. McKenney wrote:
> On Mon, Oct 03, 2022 at 12:13:31PM +0200, Frederic Weisbecker wrote:
> > On Sun, Oct 02, 2022 at 04:51:03PM -0700, Paul E. McKenney wrote:
> > > On Mon, Oct 03, 2022 at 12:06:19AM +0200, Frederic Weisbecker wrote:
> > > > On Thu, Sep 29, 2022 at 11:07:26AM -0700, Paul E. McKenney wrote:
> > > > > This commit adds runtime checks to verify that a given srcu_struct uses
> > > > > consistent NMI-safe (or not) read-side primitives on a per-CPU basis.
> > > > > 
> > > > > Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/
> > > > > 
> > > > > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > > > > Cc: Thomas Gleixner <tglx@linutronix.de>
> > > > > Cc: John Ogness <john.ogness@linutronix.de>
> > > > > Cc: Petr Mladek <pmladek@suse.com>
> > > > > ---
> > > > >  include/linux/srcu.h     |  4 ++--
> > > > >  include/linux/srcutiny.h |  4 ++--
> > > > >  include/linux/srcutree.h |  9 +++++++--
> > > > >  kernel/rcu/srcutree.c    | 38 ++++++++++++++++++++++++++++++++------
> > > > >  4 files changed, 43 insertions(+), 12 deletions(-)
> > > > > 
> > > > > diff --git a/include/linux/srcu.h b/include/linux/srcu.h
> > > > > index 2cc8321c0c86..565f60d57484 100644
> > > > > --- a/include/linux/srcu.h
> > > > > +++ b/include/linux/srcu.h
> > > > > @@ -180,7 +180,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
> > > > >  	int retval;
> > > > >  
> > > > >  	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
> > > > > -		retval = __srcu_read_lock_nmisafe(ssp);
> > > > > +		retval = __srcu_read_lock_nmisafe(ssp, true);
> > > > >  	else
> > > > >  		retval = __srcu_read_lock(ssp);
> > > > 
> > > > Shouldn't it be checked also when CONFIG_NEED_SRCU_NMI_SAFE=n ?
> > > 
> > > You are asking why there is no "true" argument to __srcu_read_lock()?
> > > That is because it checks unconditionally.
> > 
> > It checks unconditionally but it always assumes not to be called as nmisafe.
> > 
> > For example on x86/arm64/loongarch, the same ssp used with both srcu_read_lock() and
> > srcu_read_lock_nmisafe() won't report an issue. But on powerpc it will.
> > 
> > My point is that strong archs should warn as well on behalf of others, to detect
> > mistakes early.
> 
> Good point, especially given that x86_64 and arm64 are a rather large
> fraction of the uses.  Not critically urgent, but definitely nice to have.

No indeed.

> 
> Did you by chance have a suggestion for a nice way to accomplish this?

This could be like this:

enum srcu_nmi_flags {
     SRCU_NMI_UNKNOWN = 0x0,
     SRCU_NMI_UNSAFE  = 0x1,
     SRCU_NMI_SAFE    = 0x2
};

#ifdef CONFIG_NEED_SRCU_NMI_SAFE
static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, enum srcu_nmi_flags flags)
{
	int idx;
	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);

	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
	atomic_long_inc(&sdp->srcu_lock_count[idx]);
	smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */

	srcu_check_nmi_safety(ssp, flags);

	return idx;
}
#else
static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, enum srcu_nmi_flags flags)
{
	srcu_check_nmi_safety(ssp, flags);
	return __srcu_read_lock(ssp);
}
#endif

static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp)
{
	return  __srcu_read_lock_nmisafe(ssp, SRCU_NMI_SAFE);
}

// An __srcu_read_lock() caller in kernel/rcu/tasks.h must be
// taken care of as well
static inline int srcu_read_lock(struct srcu_struct *ssp)
{
	srcu_check_nmi_safety(ssp, SRCU_NMI_UNSAFE);
	return  __srcu_read_lock(ssp);
}

And then you can call __srcu_read_lock_nmisafe(ssp, SRCU_NMI_UNKNOWN) from
initializers of gp.
Paul E. McKenney Oct. 3, 2022, 1:32 p.m. UTC | #6
On Mon, Oct 03, 2022 at 02:37:21PM +0200, Frederic Weisbecker wrote:
> On Mon, Oct 03, 2022 at 04:57:18AM -0700, Paul E. McKenney wrote:
> > On Mon, Oct 03, 2022 at 12:13:31PM +0200, Frederic Weisbecker wrote:
> > > On Sun, Oct 02, 2022 at 04:51:03PM -0700, Paul E. McKenney wrote:
> > > > On Mon, Oct 03, 2022 at 12:06:19AM +0200, Frederic Weisbecker wrote:
> > > > > On Thu, Sep 29, 2022 at 11:07:26AM -0700, Paul E. McKenney wrote:
> > > > > > This commit adds runtime checks to verify that a given srcu_struct uses
> > > > > > consistent NMI-safe (or not) read-side primitives on a per-CPU basis.
> > > > > > 
> > > > > > Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/
> > > > > > 
> > > > > > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > > > > > Cc: Thomas Gleixner <tglx@linutronix.de>
> > > > > > Cc: John Ogness <john.ogness@linutronix.de>
> > > > > > Cc: Petr Mladek <pmladek@suse.com>
> > > > > > ---
> > > > > >  include/linux/srcu.h     |  4 ++--
> > > > > >  include/linux/srcutiny.h |  4 ++--
> > > > > >  include/linux/srcutree.h |  9 +++++++--
> > > > > >  kernel/rcu/srcutree.c    | 38 ++++++++++++++++++++++++++++++++------
> > > > > >  4 files changed, 43 insertions(+), 12 deletions(-)
> > > > > > 
> > > > > > diff --git a/include/linux/srcu.h b/include/linux/srcu.h
> > > > > > index 2cc8321c0c86..565f60d57484 100644
> > > > > > --- a/include/linux/srcu.h
> > > > > > +++ b/include/linux/srcu.h
> > > > > > @@ -180,7 +180,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
> > > > > >  	int retval;
> > > > > >  
> > > > > >  	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
> > > > > > -		retval = __srcu_read_lock_nmisafe(ssp);
> > > > > > +		retval = __srcu_read_lock_nmisafe(ssp, true);
> > > > > >  	else
> > > > > >  		retval = __srcu_read_lock(ssp);
> > > > > 
> > > > > Shouldn't it be checked also when CONFIG_NEED_SRCU_NMI_SAFE=n ?
> > > > 
> > > > You are asking why there is no "true" argument to __srcu_read_lock()?
> > > > That is because it checks unconditionally.
> > > 
> > > It checks unconditionally but it always assumes not to be called as nmisafe.
> > > 
> > > For example on x86/arm64/loongarch, the same ssp used with both srcu_read_lock() and
> > > srcu_read_lock_nmisafe() won't report an issue. But on powerpc it will.
> > > 
> > > My point is that strong archs should warn as well on behalf of others, to detect
> > > mistakes early.
> > 
> > Good point, especially given that x86_64 and arm64 are a rather large
> > fraction of the uses.  Not critically urgent, but definitely nice to have.
> 
> No indeed.
> 
> > 
> > Did you by chance have a suggestion for a nice way to accomplish this?
> 
> This could be like this:
> 
> enum srcu_nmi_flags {
>      SRCU_NMI_UNKNOWN = 0x0,
>      SRCU_NMI_UNSAFE  = 0x1,
>      SRCU_NMI_SAFE    = 0x2
> };
> 
> #ifdef CONFIG_NEED_SRCU_NMI_SAFE
> static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, enum srcu_nmi_flags flags)
> {
> 	int idx;
> 	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
> 
> 	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> 	atomic_long_inc(&sdp->srcu_lock_count[idx]);
> 	smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */
> 
> 	srcu_check_nmi_safety(ssp, flags);
> 
> 	return idx;
> }
> #else
> static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, enum srcu_nmi_flags flags)
> {
> 	srcu_check_nmi_safety(ssp, flags);
> 	return __srcu_read_lock(ssp);
> }
> #endif
> 
> static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp)
> {
> 	return  __srcu_read_lock_nmisafe(ssp, SRCU_NMI_SAFE);
> }
> 
> // An __srcu_read_lock() caller in kernel/rcu/tasks.h must be
> // taken care of as well
> static inline int srcu_read_lock(struct srcu_struct *ssp)
> {
> 	srcu_check_nmi_safety(ssp, SRCU_NMI_UNSAFE);
> 	return  __srcu_read_lock(ssp);
> }
> 
> And then you can call __srcu_read_lock_nmisafe(ssp, SRCU_NMI_UNKNOWN) from
> initializers of gp.

Not bad at all!

Would you like to send a patch?

I do not consider this to be something for the current merge window even
if the rest goes in because printk() is used heavily and because it is
easy to get access to powerpc and presumably also riscv systems.

But as you say, it would be very good to have longer term for the case
where srcu_read_lock_nmisafe() is used for some more obscure purpose.

							Thanx, Paul
Frederic Weisbecker Oct. 3, 2022, 1:36 p.m. UTC | #7
On Mon, Oct 03, 2022 at 06:32:10AM -0700, Paul E. McKenney wrote:
> On Mon, Oct 03, 2022 at 02:37:21PM +0200, Frederic Weisbecker wrote:
> > On Mon, Oct 03, 2022 at 04:57:18AM -0700, Paul E. McKenney wrote:
> > > On Mon, Oct 03, 2022 at 12:13:31PM +0200, Frederic Weisbecker wrote:
> > > > On Sun, Oct 02, 2022 at 04:51:03PM -0700, Paul E. McKenney wrote:
> > > > > On Mon, Oct 03, 2022 at 12:06:19AM +0200, Frederic Weisbecker wrote:
> > > > > > On Thu, Sep 29, 2022 at 11:07:26AM -0700, Paul E. McKenney wrote:
> > > > > > > This commit adds runtime checks to verify that a given srcu_struct uses
> > > > > > > consistent NMI-safe (or not) read-side primitives on a per-CPU basis.
> > > > > > > 
> > > > > > > Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/
> > > > > > > 
> > > > > > > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > > > > > > Cc: Thomas Gleixner <tglx@linutronix.de>
> > > > > > > Cc: John Ogness <john.ogness@linutronix.de>
> > > > > > > Cc: Petr Mladek <pmladek@suse.com>
> > > > > > > ---
> > > > > > >  include/linux/srcu.h     |  4 ++--
> > > > > > >  include/linux/srcutiny.h |  4 ++--
> > > > > > >  include/linux/srcutree.h |  9 +++++++--
> > > > > > >  kernel/rcu/srcutree.c    | 38 ++++++++++++++++++++++++++++++++------
> > > > > > >  4 files changed, 43 insertions(+), 12 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/include/linux/srcu.h b/include/linux/srcu.h
> > > > > > > index 2cc8321c0c86..565f60d57484 100644
> > > > > > > --- a/include/linux/srcu.h
> > > > > > > +++ b/include/linux/srcu.h
> > > > > > > @@ -180,7 +180,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
> > > > > > >  	int retval;
> > > > > > >  
> > > > > > >  	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
> > > > > > > -		retval = __srcu_read_lock_nmisafe(ssp);
> > > > > > > +		retval = __srcu_read_lock_nmisafe(ssp, true);
> > > > > > >  	else
> > > > > > >  		retval = __srcu_read_lock(ssp);
> > > > > > 
> > > > > > Shouldn't it be checked also when CONFIG_NEED_SRCU_NMI_SAFE=n ?
> > > > > 
> > > > > You are asking why there is no "true" argument to __srcu_read_lock()?
> > > > > That is because it checks unconditionally.
> > > > 
> > > > It checks unconditionally but it always assumes not to be called as nmisafe.
> > > > 
> > > > For example on x86/arm64/loongarch, the same ssp used with both srcu_read_lock() and
> > > > srcu_read_lock_nmisafe() won't report an issue. But on powerpc it will.
> > > > 
> > > > My point is that strong archs should warn as well on behalf of others, to detect
> > > > mistakes early.
> > > 
> > > Good point, especially given that x86_64 and arm64 are a rather large
> > > fraction of the uses.  Not critically urgent, but definitely nice to have.
> > 
> > No indeed.
> > 
> > > 
> > > Did you by chance have a suggestion for a nice way to accomplish this?
> > 
> > This could be like this:
> > 
> > enum srcu_nmi_flags {
> >      SRCU_NMI_UNKNOWN = 0x0,
> >      SRCU_NMI_UNSAFE  = 0x1,
> >      SRCU_NMI_SAFE    = 0x2
> > };
> > 
> > #ifdef CONFIG_NEED_SRCU_NMI_SAFE
> > static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, enum srcu_nmi_flags flags)
> > {
> > 	int idx;
> > 	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
> > 
> > 	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
> > 	atomic_long_inc(&sdp->srcu_lock_count[idx]);
> > 	smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */
> > 
> > 	srcu_check_nmi_safety(ssp, flags);
> > 
> > 	return idx;
> > }
> > #else
> > static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, enum srcu_nmi_flags flags)
> > {
> > 	srcu_check_nmi_safety(ssp, flags);
> > 	return __srcu_read_lock(ssp);
> > }
> > #endif
> > 
> > static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp)
> > {
> > 	return  __srcu_read_lock_nmisafe(ssp, SRCU_NMI_SAFE);
> > }
> > 
> > // An __srcu_read_lock() caller in kernel/rcu/tasks.h must be
> > // taken care of as well
> > static inline int srcu_read_lock(struct srcu_struct *ssp)
> > {
> > 	srcu_check_nmi_safety(ssp, SRCU_NMI_UNSAFE);
> > 	return  __srcu_read_lock(ssp);
> > }
> > 
> > And then you can call __srcu_read_lock_nmisafe(ssp, SRCU_NMI_UNKNOWN) from
> > initializers of gp.
> 
> Not bad at all!
> 
> Would you like to send a patch?
> 
> I do not consider this to be something for the current merge window even
> if the rest goes in because printk() is used heavily and because it is
> easy to get access to powerpc and presumably also riscv systems.
> 
> But as you say, it would be very good to have longer term for the case
> where srcu_read_lock_nmisafe() is used for some more obscure purpose.

Sure thing!

Thanks.
diff mbox series

Patch

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 2cc8321c0c86..565f60d57484 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -180,7 +180,7 @@  static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
 	int retval;
 
 	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
-		retval = __srcu_read_lock_nmisafe(ssp);
+		retval = __srcu_read_lock_nmisafe(ssp, true);
 	else
 		retval = __srcu_read_lock(ssp);
 	rcu_lock_acquire(&(ssp)->dep_map);
@@ -225,7 +225,7 @@  static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
 	WARN_ON_ONCE(idx & ~0x1);
 	rcu_lock_release(&(ssp)->dep_map);
 	if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
-		__srcu_read_unlock_nmisafe(ssp, idx);
+		__srcu_read_unlock_nmisafe(ssp, idx, true);
 	else
 		__srcu_read_unlock(ssp, idx);
 }
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index bccfa18b1b15..efd808a23f8d 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -88,13 +88,13 @@  static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
 		 data_race(READ_ONCE(ssp->srcu_lock_nesting[idx])));
 }
 
-static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
+static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe)
 {
 	BUG();
 	return 0;
 }
 
-static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
+static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe)
 {
 	BUG();
 }
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index d45dd507f4a5..35ffdedf86cc 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -25,6 +25,7 @@  struct srcu_data {
 	/* Read-side state. */
 	atomic_long_t srcu_lock_count[2];	/* Locks per CPU. */
 	atomic_long_t srcu_unlock_count[2];	/* Unlocks per CPU. */
+	int srcu_nmi_safety;			/* NMI-safe srcu_struct structure? */
 
 	/* Update-side state. */
 	spinlock_t __private lock ____cacheline_internodealigned_in_smp;
@@ -42,6 +43,10 @@  struct srcu_data {
 	struct srcu_struct *ssp;
 };
 
+#define SRCU_NMI_UNKNOWN	0x0
+#define SRCU_NMI_NMI_UNSAFE	0x1
+#define SRCU_NMI_NMI_SAFE	0x2
+
 /*
  * Node in SRCU combining tree, similar in function to rcu_data.
  */
@@ -154,7 +159,7 @@  void synchronize_srcu_expedited(struct srcu_struct *ssp);
 void srcu_barrier(struct srcu_struct *ssp);
 void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
 
-int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
-void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);
+int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe) __acquires(ssp);
+void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe) __releases(ssp);
 
 #endif
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index f5b1485e79aa..09a11f2c2042 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -626,6 +626,26 @@  void cleanup_srcu_struct(struct srcu_struct *ssp)
 }
 EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
 
+/*
+ * Check for consistent NMI safety.
+ */
+static void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe)
+{
+	int nmi_safe_mask = 1 << nmi_safe;
+	int old_nmi_safe_mask;
+	struct srcu_data *sdp;
+
+	if (!IS_ENABLED(CONFIG_PROVE_RCU))
+		return;
+	sdp = raw_cpu_ptr(ssp->sda);
+	old_nmi_safe_mask = READ_ONCE(sdp->srcu_nmi_safety);
+	if (!old_nmi_safe_mask) {
+		WRITE_ONCE(sdp->srcu_nmi_safety, nmi_safe_mask);
+		return;
+	}
+	WARN_ONCE(old_nmi_safe_mask != nmi_safe_mask, "CPU %d old state %d new state %d\n", sdp->cpu, old_nmi_safe_mask, nmi_safe_mask);
+}
+
 /*
  * Counts the new reader in the appropriate per-CPU element of the
  * srcu_struct.
@@ -638,6 +658,7 @@  int __srcu_read_lock(struct srcu_struct *ssp)
 	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
 	this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter);
 	smp_mb(); /* B */  /* Avoid leaking the critical section. */
+	srcu_check_nmi_safety(ssp, false);
 	return idx;
 }
 EXPORT_SYMBOL_GPL(__srcu_read_lock);
@@ -651,6 +672,7 @@  void __srcu_read_unlock(struct srcu_struct *ssp, int idx)
 {
 	smp_mb(); /* C */  /* Avoid leaking the critical section. */
 	this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter);
+	srcu_check_nmi_safety(ssp, false);
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
@@ -659,7 +681,7 @@  EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  * srcu_struct, but in an NMI-safe manner using RMW atomics.
  * Returns an index that must be passed to the matching srcu_read_unlock().
  */
-int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
+int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe)
 {
 	int idx;
 	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
@@ -667,6 +689,8 @@  int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
 	idx = READ_ONCE(ssp->srcu_idx) & 0x1;
 	atomic_long_inc(&sdp->srcu_lock_count[idx]);
 	smp_mb__after_atomic(); /* B */  /* Avoid leaking the critical section. */
+	if (chknmisafe)
+		srcu_check_nmi_safety(ssp, true);
 	return idx;
 }
 EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
@@ -676,12 +700,14 @@  EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
  * element of the srcu_struct.  Note that this may well be a different
  * CPU than that which was incremented by the corresponding srcu_read_lock().
  */
-void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
+void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe)
 {
 	struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
 
 	smp_mb__before_atomic(); /* C */  /* Avoid leaking the critical section. */
 	atomic_long_inc(&sdp->srcu_unlock_count[idx]);
+	if (chknmisafe)
+		srcu_check_nmi_safety(ssp, true);
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
 
@@ -1121,7 +1147,7 @@  static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
 	int ss_state;
 
 	check_init_srcu_struct(ssp);
-	idx = __srcu_read_lock_nmisafe(ssp);
+	idx = __srcu_read_lock_nmisafe(ssp, false);
 	ss_state = smp_load_acquire(&ssp->srcu_size_state);
 	if (ss_state < SRCU_SIZE_WAIT_CALL)
 		sdp = per_cpu_ptr(ssp->sda, 0);
@@ -1154,7 +1180,7 @@  static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
 		srcu_funnel_gp_start(ssp, sdp, s, do_norm);
 	else if (needexp)
 		srcu_funnel_exp_start(ssp, sdp_mynode, s);
-	__srcu_read_unlock_nmisafe(ssp, idx);
+	__srcu_read_unlock_nmisafe(ssp, idx, false);
 	return s;
 }
 
@@ -1458,13 +1484,13 @@  void srcu_barrier(struct srcu_struct *ssp)
 	/* Initial count prevents reaching zero until all CBs are posted. */
 	atomic_set(&ssp->srcu_barrier_cpu_cnt, 1);
 
-	idx = __srcu_read_lock_nmisafe(ssp);
+	idx = __srcu_read_lock_nmisafe(ssp, false);
 	if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
 		srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0));
 	else
 		for_each_possible_cpu(cpu)
 			srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
-	__srcu_read_unlock_nmisafe(ssp, idx);
+	__srcu_read_unlock_nmisafe(ssp, idx, false);
 
 	/* Remove the initial count, at which point reaching zero can happen. */
 	if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt))