diff mbox series

[11/20] srcu: Move grace-period fields from srcu_struct to srcu_usage

Message ID 20230330224726.662344-11-paulmck@kernel.org (mailing list archive)
State Accepted
Commit 03200b5ca3b4d4edf634dc052bf3b8eb8dc8bbbc
Headers show
Series Further shrink srcu_struct to promote cache locality | expand

Commit Message

Paul E. McKenney March 30, 2023, 10:47 p.m. UTC
This commit moves the ->srcu_gp_seq, ->srcu_gp_seq_needed,
->srcu_gp_seq_needed_exp, ->srcu_gp_start, and ->srcu_last_gp_end fields
from the srcu_struct structure to the srcu_usage structure to reduce
the size of the former in order to improve cache locality.

Suggested-by: Christoph Hellwig <hch@lst.de>
Tested-by: Sachin Sant <sachinp@linux.ibm.com>
Tested-by: "Zhang, Qiang1" <qiang1.zhang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/srcutree.h |  25 ++++----
 kernel/rcu/srcutree.c    | 128 +++++++++++++++++++--------------------
 2 files changed, 77 insertions(+), 76 deletions(-)

Comments

Jon Hunter June 1, 2023, 11:14 a.m. UTC | #1
Hi Paul,

On 30/03/2023 23:47, Paul E. McKenney wrote:
> This commit moves the ->srcu_gp_seq, ->srcu_gp_seq_needed,
> ->srcu_gp_seq_needed_exp, ->srcu_gp_start, and ->srcu_last_gp_end fields
> from the srcu_struct structure to the srcu_usage structure to reduce
> the size of the former in order to improve cache locality.
> 
> Suggested-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Sachin Sant <sachinp@linux.ibm.com>
> Tested-by: "Zhang, Qiang1" <qiang1.zhang@intel.com>
> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>


I have noticed a suspend regression on some of our Tegra boards recently with v6.4-rc and interestingly bisect is pointing to this commit. I was unable revert this on top of the latest mainline but if I checkout this commit suspend fails and if I checkout the previous commit is passes.

Enabling more debug I was able to capture the following crash log I see on one of the boards ...

[   57.327645] PM: suspend entry (deep)
[   57.331660] Filesystems sync: 0.000 seconds
[   57.340147] Freezing user space processes
[   57.347470] Freezing user space processes completed (elapsed 0.007 seconds)
[   57.347501] OOM killer disabled.
[   57.347508] Freezing remaining freezable tasks
[   57.348834] Freezing remaining freezable tasks completed (elapsed 0.001 seconds)
[   57.349932] 8<--- cut here ---
[   57.349943] Unable to handle kernel NULL pointer dereference at virtual address 00000000 when write
[   57.349960] [00000000] *pgd=00000000
[   57.349986] Internal error: Oops: 805 [#1] PREEMPT SMP ARM
[   57.350007] Modules linked in: tegra30_tsensor
[   57.350033] CPU: 0 PID: 589 Comm: rtcwake Not tainted 6.3.0-rc1-00011-g03200b5ca3b4-dirty #3
[   57.350057] Hardware name: NVIDIA Tegra SoC (Flattened Device Tree)
[   57.350067] PC is at rcu_segcblist_enqueue+0x2c/0x38
[   57.350120] LR is at srcu_gp_start_if_needed+0xe4/0x544
[   57.350169] pc : [<c01a5120>]    lr : [<c0198b5c>]    psr: a0070093
[   57.350183] sp : f0b2dd20  ip : 3b5870ef  fp : 00000000
[   57.350194] r10: ef787d84  r9 : 00000000  r8 : ef787d80
[   57.350205] r7 : 80070013  r6 : c131ec30  r5 : ef787d40  r4 : f0b2dd64
[   57.350217] r3 : 00000000  r2 : 00000000  r1 : f0b2dd64  r0 : ef787d84
[   57.350230] Flags: NzCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment none
[   57.350251] Control: 10c5387d  Table: 81d8004a  DAC: 00000051
[   57.350261] Register r0 information: non-slab/vmalloc memory
[   57.350283] Register r1 information: 2-page vmalloc region starting at 0xf0b2c000 allocated at kernel_clone+0xb4/0x3e4
[   57.350322] Register r2 information: NULL pointer
[   57.350337] Register r3 information: NULL pointer
[   57.350350] Register r4 information: 2-page vmalloc region starting at 0xf0b2c000 allocated at kernel_clone+0xb4/0x3e4
[   57.350379] Register r5 information: non-slab/vmalloc memory
[   57.350394] Register r6 information: non-slab/vmalloc memory
[   57.350408] Register r7 information: non-paged memory
[   57.350422] Register r8 information: non-slab/vmalloc memory
[   57.350436] Register r9 information: NULL pointer
[   57.350449] Register r10 information: non-slab/vmalloc memory
[   57.350463] Register r11 information: NULL pointer
[   57.350477] Register r12 information: non-paged memory
[   57.350491] Process rtcwake (pid: 589, stack limit = 0x410bb531)
[   57.350510] Stack: (0xf0b2dd20 to 0xf0b2e000)
[   57.350534] dd20: 00000000 c1ee4a40 f0b2dd7c c0184f24 ef781495 3b5870ef c1ee4a40 c1ee4a40
[   57.350555] dd40: c131ec30 00000000 00000002 c0f3d1fc c3542ac0 c2abbb10 c1ee4a40 c0199044
[   57.350574] dd60: 60070013 00000000 c0195924 00000000 00000000 f0b2dd74 f0b2dd74 3b5870ef
[   57.350592] dd80: 00000000 c131ebc0 c120ab28 c0146d9c c2785b94 c2785b40 c0fee9f4 c0872590
[   57.350611] dda0: c2785b40 c08c39cc c2785b40 c08c3a3c c2788c00 c08c40b0 c0f3d1fc c066f028
[   57.350630] ddc0: f0b2de14 c2788c00 c1325ef4 c08c1d40 c2788c00 c1325ef4 c0fee9f4 c08c31cc
[   57.350648] dde0: c13708a0 0000000d 00000000 c0681c10 c16afe84 00000002 56508788 0000000d
[   57.350665] de00: 00000002 c13708a0 10624dd3 56409580 0000000d 00000000 00000002 c0f3d1fc
[   57.350685] de20: c3542ac0 c2abbb10 c1ee4a40 c06824e4 00000000 ffffa900 00000000 c1386510
[   57.350703] de40: 00000003 00000003 c1204f75 c017e8a8 c3542ac0 c2abbb10 00428228 c0171574
[   57.350721] de60: 00000000 00000000 00000003 3b5870ef c1204f75 00000000 00000003 c137aeb4
[   57.350739] de80: c1204f75 c0f3d1fc c3542ac0 c2abbb10 00428228 c017f380 00000003 c0f38a54
[   57.350757] dea0: 00000003 c1386524 00000004 c017d708 00000004 c2abbb00 00000000 00000000
[   57.350775] dec0: c3542ac0 f0b2df28 c2abbb10 c03305b4 00000000 00000000 c2953c00 c1ee4a40
[   57.350794] dee0: 00429438 00000004 c0d18488 00004004 00000000 c02b1094 00000a55 c1d80010
[   57.350812] df00: c1d80010 00000000 00000000 f0b2df78 01010006 00000004 00000000 00429438
[   57.350830] df20: 00000000 00000000 c2953c00 00000000 00000000 00000000 00000000 00000000
[   57.350848] df40: 00000000 00004004 00000000 00000000 0000006c 3b5870ef c2953c00 c2953c00
[   57.350866] df60: 00000000 00000000 c1ee4a40 00429438 00000004 c02b12c8 00000000 00000000
[   57.350885] df80: 00001008 3b5870ef 0000006c 00429438 00428228 00000004 c0100324 c1ee4a40
[   57.350902] dfa0: 00000004 c01000c0 0000006c 00429438 00000004 00429438 00000004 00000000
[   57.350920] dfc0: 0000006c 00429438 00428228 00000004 00000004 00000004 0041578c 00428228
[   57.350938] dfe0: 00000004 becda9a8 b6e9bc0b b6e26206 600f0030 00000004 00000000 00000000
[   57.350960]  rcu_segcblist_enqueue from srcu_gp_start_if_needed+0xe4/0x544
[   57.351023]  srcu_gp_start_if_needed from __synchronize_srcu.part.6+0x70/0x98
[   57.351084]  __synchronize_srcu.part.6 from srcu_notifier_chain_unregister+0x6c/0xdc
[   57.351155]  srcu_notifier_chain_unregister from cpufreq_unregister_notifier+0x60/0xbc
[   57.351215]  cpufreq_unregister_notifier from tegra_actmon_pause.part.0+0x1c/0x54
[   57.351277]  tegra_actmon_pause.part.0 from tegra_actmon_stop+0x38/0x3c
[   57.351324]  tegra_actmon_stop from tegra_governor_event_handler+0x100/0x11c
[   57.351373]  tegra_governor_event_handler from devfreq_suspend_device+0x64/0xac
[   57.351423]  devfreq_suspend_device from devfreq_suspend+0x30/0x64
[   57.351467]  devfreq_suspend from dpm_suspend+0x34/0x33c
[   57.351506]  dpm_suspend from dpm_suspend_start+0x90/0x98
[   57.351528]  dpm_suspend_start from suspend_devices_and_enter+0xe4/0x93c
[   57.351573]  suspend_devices_and_enter from pm_suspend+0x280/0x3ac
[   57.351614]  pm_suspend from state_store+0x6c/0xc8
[   57.351654]  state_store from kernfs_fop_write_iter+0x118/0x1b4
[   57.351696]  kernfs_fop_write_iter from vfs_write+0x314/0x3d4
[   57.351733]  vfs_write from ksys_write+0xa0/0xd0
[   57.351760]  ksys_write from ret_fast_syscall+0x0/0x54
[   57.351788] Exception stack(0xf0b2dfa8 to 0xf0b2dff0)
[   57.351809] dfa0:                   0000006c 00429438 00000004 00429438 00000004 00000000
[   57.351828] dfc0: 0000006c 00429438 00428228 00000004 00000004 00000004 0041578c 00428228
[   57.351843] dfe0: 00000004 becda9a8 b6e9bc0b b6e26206
[   57.351863] Code: e2833001 e5803034 e5812000 e5903010 (e5831000)
[   57.351875] ---[ end trace 0000000000000000 ]---


I have not dug into this yet and so wanted to see if you have any thoughts on this?

Thanks!
Jon
Paul E. McKenney June 1, 2023, 1:46 p.m. UTC | #2
On Thu, Jun 01, 2023 at 08:33:10PM +0800, Z qiang wrote:
> >
> > Hi Paul,
> >
> > On 30/03/2023 23:47, Paul E. McKenney wrote:
> > > This commit moves the ->srcu_gp_seq, ->srcu_gp_seq_needed,
> > > ->srcu_gp_seq_needed_exp, ->srcu_gp_start, and ->srcu_last_gp_end fields
> > > from the srcu_struct structure to the srcu_usage structure to reduce
> > > the size of the former in order to improve cache locality.
> > >
> > > Suggested-by: Christoph Hellwig <hch@lst.de>
> > > Tested-by: Sachin Sant <sachinp@linux.ibm.com>
> > > Tested-by: "Zhang, Qiang1" <qiang1.zhang@intel.com>
> > > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> >
> >
> > I have noticed a suspend regression on some of our Tegra boards recently
> with v6.4-rc and interestingly bisect is pointing to this commit. I was
> unable revert this on top of the latest mainline but if I checkout this
> commit suspend fails and if I checkout the previous commit is passes.
> >
> > Enabling more debug I was able to capture the following crash log I see
> on one of the boards ...
> >
> > [   57.327645] PM: suspend entry (deep)
> > [   57.331660] Filesystems sync: 0.000 seconds
> > [   57.340147] Freezing user space processes
> > [   57.347470] Freezing user space processes completed (elapsed 0.007
> seconds)
> > [   57.347501] OOM killer disabled.
> > [   57.347508] Freezing remaining freezable tasks
> > [   57.348834] Freezing remaining freezable tasks completed (elapsed
> 0.001 seconds)
> > [   57.349932] 8<--- cut here ---
> > [   57.349943] Unable to handle kernel NULL pointer dereference at
> virtual address 00000000 when write
> > [   57.349960] [00000000] *pgd=00000000
> > [   57.349986] Internal error: Oops: 805 [#1] PREEMPT SMP ARM
> > [   57.350007] Modules linked in: tegra30_tsensor
> > [   57.350033] CPU: 0 PID: 589 Comm: rtcwake Not tainted
> 6.3.0-rc1-00011-g03200b5ca3b4-dirty #3
> > [   57.350057] Hardware name: NVIDIA Tegra SoC (Flattened Device Tree)
> > [   57.350067] PC is at rcu_segcblist_enqueue+0x2c/0x38
> > [   57.350120] LR is at srcu_gp_start_if_needed+0xe4/0x544
> > [   57.350169] pc : [<c01a5120>]    lr : [<c0198b5c>]    psr: a0070093
> > [   57.350183] sp : f0b2dd20  ip : 3b5870ef  fp : 00000000
> > [   57.350194] r10: ef787d84  r9 : 00000000  r8 : ef787d80
> > [   57.350205] r7 : 80070013  r6 : c131ec30  r5 : ef787d40  r4 : f0b2dd64
> > [   57.350217] r3 : 00000000  r2 : 00000000  r1 : f0b2dd64  r0 : ef787d84
> > [   57.350230] Flags: NzCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM
>  Segment none
> > [   57.350251] Control: 10c5387d  Table: 81d8004a  DAC: 00000051
> > [   57.350261] Register r0 information: non-slab/vmalloc memory
> > [   57.350283] Register r1 information: 2-page vmalloc region starting at
> 0xf0b2c000 allocated at kernel_clone+0xb4/0x3e4
> > [   57.350322] Register r2 information: NULL pointer
> > [   57.350337] Register r3 information: NULL pointer
> > [   57.350350] Register r4 information: 2-page vmalloc region starting at
> 0xf0b2c000 allocated at kernel_clone+0xb4/0x3e4
> > [   57.350379] Register r5 information: non-slab/vmalloc memory
> > [   57.350394] Register r6 information: non-slab/vmalloc memory
> > [   57.350408] Register r7 information: non-paged memory
> > [   57.350422] Register r8 information: non-slab/vmalloc memory
> > [   57.350436] Register r9 information: NULL pointer
> > [   57.350449] Register r10 information: non-slab/vmalloc memory
> > [   57.350463] Register r11 information: NULL pointer
> > [   57.350477] Register r12 information: non-paged memory
> > [   57.350491] Process rtcwake (pid: 589, stack limit = 0x410bb531)
> > [   57.350510] Stack: (0xf0b2dd20 to 0xf0b2e000)
> > [   57.350534] dd20: 00000000 c1ee4a40 f0b2dd7c c0184f24 ef781495
> 3b5870ef c1ee4a40 c1ee4a40
> > [   57.350555] dd40: c131ec30 00000000 00000002 c0f3d1fc c3542ac0
> c2abbb10 c1ee4a40 c0199044
> > [   57.350574] dd60: 60070013 00000000 c0195924 00000000 00000000
> f0b2dd74 f0b2dd74 3b5870ef
> > [   57.350592] dd80: 00000000 c131ebc0 c120ab28 c0146d9c c2785b94
> c2785b40 c0fee9f4 c0872590
> > [   57.350611] dda0: c2785b40 c08c39cc c2785b40 c08c3a3c c2788c00
> c08c40b0 c0f3d1fc c066f028
> > [   57.350630] ddc0: f0b2de14 c2788c00 c1325ef4 c08c1d40 c2788c00
> c1325ef4 c0fee9f4 c08c31cc
> > [   57.350648] dde0: c13708a0 0000000d 00000000 c0681c10 c16afe84
> 00000002 56508788 0000000d
> > [   57.350665] de00: 00000002 c13708a0 10624dd3 56409580 0000000d
> 00000000 00000002 c0f3d1fc
> > [   57.350685] de20: c3542ac0 c2abbb10 c1ee4a40 c06824e4 00000000
> ffffa900 00000000 c1386510
> > [   57.350703] de40: 00000003 00000003 c1204f75 c017e8a8 c3542ac0
> c2abbb10 00428228 c0171574
> > [   57.350721] de60: 00000000 00000000 00000003 3b5870ef c1204f75
> 00000000 00000003 c137aeb4
> > [   57.350739] de80: c1204f75 c0f3d1fc c3542ac0 c2abbb10 00428228
> c017f380 00000003 c0f38a54
> > [   57.350757] dea0: 00000003 c1386524 00000004 c017d708 00000004
> c2abbb00 00000000 00000000
> > [   57.350775] dec0: c3542ac0 f0b2df28 c2abbb10 c03305b4 00000000
> 00000000 c2953c00 c1ee4a40
> > [   57.350794] dee0: 00429438 00000004 c0d18488 00004004 00000000
> c02b1094 00000a55 c1d80010
> > [   57.350812] df00: c1d80010 00000000 00000000 f0b2df78 01010006
> 00000004 00000000 00429438
> > [   57.350830] df20: 00000000 00000000 c2953c00 00000000 00000000
> 00000000 00000000 00000000
> > [   57.350848] df40: 00000000 00004004 00000000 00000000 0000006c
> 3b5870ef c2953c00 c2953c00
> > [   57.350866] df60: 00000000 00000000 c1ee4a40 00429438 00000004
> c02b12c8 00000000 00000000
> > [   57.350885] df80: 00001008 3b5870ef 0000006c 00429438 00428228
> 00000004 c0100324 c1ee4a40
> > [   57.350902] dfa0: 00000004 c01000c0 0000006c 00429438 00000004
> 00429438 00000004 00000000
> > [   57.350920] dfc0: 0000006c 00429438 00428228 00000004 00000004
> 00000004 0041578c 00428228
> > [   57.350938] dfe0: 00000004 becda9a8 b6e9bc0b b6e26206 600f0030
> 00000004 00000000 00000000
> > [   57.350960]  rcu_segcblist_enqueue from
> srcu_gp_start_if_needed+0xe4/0x544
> > [   57.351023]  srcu_gp_start_if_needed from
> __synchronize_srcu.part.6+0x70/0x98
> > [   57.351084]  __synchronize_srcu.part.6 from
> srcu_notifier_chain_unregister+0x6c/0xdc
> > [   57.351155]  srcu_notifier_chain_unregister from
> cpufreq_unregister_notifier+0x60/0xbc
> > [   57.351215]  cpufreq_unregister_notifier from
> tegra_actmon_pause.part.0+0x1c/0x54
> > [   57.351277]  tegra_actmon_pause.part.0 from tegra_actmon_stop+0x38/0x3c
> > [   57.351324]  tegra_actmon_stop from
> tegra_governor_event_handler+0x100/0x11c
> > [   57.351373]  tegra_governor_event_handler from
> devfreq_suspend_device+0x64/0xac
> > [   57.351423]  devfreq_suspend_device from devfreq_suspend+0x30/0x64
> > [   57.351467]  devfreq_suspend from dpm_suspend+0x34/0x33c
> > [   57.351506]  dpm_suspend from dpm_suspend_start+0x90/0x98
> > [   57.351528]  dpm_suspend_start from
> suspend_devices_and_enter+0xe4/0x93c
> > [   57.351573]  suspend_devices_and_enter from pm_suspend+0x280/0x3ac
> > [   57.351614]  pm_suspend from state_store+0x6c/0xc8
> > [   57.351654]  state_store from kernfs_fop_write_iter+0x118/0x1b4
> > [   57.351696]  kernfs_fop_write_iter from vfs_write+0x314/0x3d4
> > [   57.351733]  vfs_write from ksys_write+0xa0/0xd0
> > [   57.351760]  ksys_write from ret_fast_syscall+0x0/0x54
> > [   57.351788] Exception stack(0xf0b2dfa8 to 0xf0b2dff0)
> > [   57.351809] dfa0:                   0000006c 00429438 00000004
> 00429438 00000004 00000000
> > [   57.351828] dfc0: 0000006c 00429438 00428228 00000004 00000004
> 00000004 0041578c 00428228
> > [   57.351843] dfe0: 00000004 becda9a8 b6e9bc0b b6e26206
> > [   57.351863] Code: e2833001 e5803034 e5812000 e5903010 (e5831000)
> > [   57.351875] ---[ end trace 0000000000000000 ]---
> >
> >
> > I have not dug into this yet and so wanted to see if you have any
> thoughts on this?
> >
> 
> Hi, Jon
> 
> Please try it:
> 
> diff --git a/include/linux/notifier.h b/include/linux/notifier.h
> index 2aba75145144..3ce6b59e02e5 100644
> --- a/include/linux/notifier.h
> +++ b/include/linux/notifier.h
> @@ -110,6 +110,7 @@ extern void srcu_init_notifier_head(struct
> srcu_notifier_head *nh);
>         {                                                       \
>                 .mutex = __MUTEX_INITIALIZER(name.mutex),       \
>                 .head = NULL,                                   \
> +               .srcuu = __SRCU_USAGE_INIT(name.srcuu),         \
>                 .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
>         }

Thank you both!

Huh.  It looks like Chen-Yu Tsai sent a patch to this effect and
AngeloGioacchino Del Regno tested it.  No one has picked it up yet.

https://lore.kernel.org/all/20230526073539.339203-1-wenst@chromium.org/

This is clearly a regression, and I don't see it in -next.  I will pick
it up and send it along in a few days if Matthias or Rafael don't beat
me to it.

In the meantime, I would be happy to add Jon's Reported-by and Tested-by,
along with Qiang's Acked-by or Reviewed-by.

							Thanx, Paul

------------------------------------------------------------------------

commit bf7da55fcdd1478839b21697cf0534be1f149a49
Author: Chen-Yu Tsai <wenst@chromium.org>
Date:   Fri May 26 15:35:37 2023 +0800

    notifier: Initialize new struct srcu_usage field
    
    In commit 95433f726301 ("srcu: Begin offloading srcu_struct fields to
    srcu_update"), a new struct srcu_usage field was added, but was not
    properly initialized. This led to a "spinlock bad magic" BUG when the
    SRCU notifier was ever used. This was observed in the MediaTek CCI
    devfreq driver on next-20230525. The trimmed stack trace is as follows:
    
        BUG: spinlock bad magic on CPU#4, swapper/0/1
         lock: 0xffffff80ff529ac0, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0
        Call trace:
         spin_bug+0xa4/0xe8
         do_raw_spin_lock+0xec/0x120
         _raw_spin_lock_irqsave+0x78/0xb8
         synchronize_srcu+0x3c/0x168
         srcu_notifier_chain_unregister+0x5c/0xa0
         cpufreq_unregister_notifier+0x94/0xe0
         devfreq_passive_event_handler+0x7c/0x3e0
         devfreq_remove_device+0x48/0xe8
    
    Add __SRCU_USAGE_INIT() to SRCU_NOTIFIER_INIT() so that srcu_usage gets
    initialized properly.
    
    Fixes: 95433f726301 ("srcu: Begin offloading srcu_struct fields to srcu_update")
    Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
    Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
    Cc: Matthias Brugger <matthias.bgg@gmail.com>
    Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
    Cc: "Michał Mirosław" <mirq-linux@rere.qmqm.pl>
    Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
    Cc: Sachin Sant <sachinp@linux.ibm.com>
    Cc: "Zhang, Qiang1" <qiang1.zhang@intel.com>
    Cc: Joel Fernandes (Google) <joel@joelfernandes.org
    Cc: Jon Hunter <jonathanh@nvidia.com>
    Signed-off-by: Paul E. McKenney <paulmck@kernel.org>

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 2aba75145144..86544707236a 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -106,12 +106,22 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
 #define RAW_NOTIFIER_INIT(name)	{				\
 		.head = NULL }
 
+#ifdef CONFIG_TREE_SRCU
 #define SRCU_NOTIFIER_INIT(name, pcpu)				\
 	{							\
 		.mutex = __MUTEX_INITIALIZER(name.mutex),	\
 		.head = NULL,					\
+		.srcuu = __SRCU_USAGE_INIT(name.srcuu),		\
 		.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
 	}
+#else
+#define SRCU_NOTIFIER_INIT(name, pcpu)				\
+	{							\
+		.mutex = __MUTEX_INITIALIZER(name.mutex),	\
+		.head = NULL,					\
+		.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
+	}
+#endif
 
 #define ATOMIC_NOTIFIER_HEAD(name)				\
 	struct atomic_notifier_head name =			\
Jon Hunter June 1, 2023, 5:14 p.m. UTC | #3
Hi Paul, Zqiang,

On 01/06/2023 14:46, Paul E. McKenney wrote:

...

> Thank you both!
> 
> Huh.  It looks like Chen-Yu Tsai sent a patch to this effect and
> AngeloGioacchino Del Regno tested it.  No one has picked it up yet.
> 
> https://lore.kernel.org/all/20230526073539.339203-1-wenst@chromium.org/
> 
> This is clearly a regression, and I don't see it in -next.  I will pick
> it up and send it along in a few days if Matthias or Rafael don't beat
> me to it.
> 
> In the meantime, I would be happy to add Jon's Reported-by and Tested-by,
> along with Qiang's Acked-by or Reviewed-by.


Thanks for the rapid response. Yes that does fix the problem for me so ...

Tested-by: Jon Hunter <jonathanh@nvidia.com>

Cheers!
Jon
Paul E. McKenney June 1, 2023, 7:21 p.m. UTC | #4
On Thu, Jun 01, 2023 at 06:14:21PM +0100, Jon Hunter wrote:
> Hi Paul, Zqiang,
> 
> On 01/06/2023 14:46, Paul E. McKenney wrote:
> 
> ...
> 
> > Thank you both!
> > 
> > Huh.  It looks like Chen-Yu Tsai sent a patch to this effect and
> > AngeloGioacchino Del Regno tested it.  No one has picked it up yet.
> > 
> > https://lore.kernel.org/all/20230526073539.339203-1-wenst@chromium.org/
> > 
> > This is clearly a regression, and I don't see it in -next.  I will pick
> > it up and send it along in a few days if Matthias or Rafael don't beat
> > me to it.
> > 
> > In the meantime, I would be happy to add Jon's Reported-by and Tested-by,
> > along with Qiang's Acked-by or Reviewed-by.
> 
> 
> Thanks for the rapid response. Yes that does fix the problem for me so ...
> 
> Tested-by: Jon Hunter <jonathanh@nvidia.com>

Added, and again, thank you!

							Thanx, Paul
Zqiang June 2, 2023, 2:52 a.m. UTC | #5
>
> On Thu, Jun 01, 2023 at 08:33:10PM +0800, Z qiang wrote:
> > >
> > > Hi Paul,
> > >
> > > On 30/03/2023 23:47, Paul E. McKenney wrote:
> > > > This commit moves the ->srcu_gp_seq, ->srcu_gp_seq_needed,
> > > > ->srcu_gp_seq_needed_exp, ->srcu_gp_start, and ->srcu_last_gp_end fields
> > > > from the srcu_struct structure to the srcu_usage structure to reduce
> > > > the size of the former in order to improve cache locality.
> > > >
> > > > Suggested-by: Christoph Hellwig <hch@lst.de>
> > > > Tested-by: Sachin Sant <sachinp@linux.ibm.com>
> > > > Tested-by: "Zhang, Qiang1" <qiang1.zhang@intel.com>
> > > > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > >
> > >
> > > I have noticed a suspend regression on some of our Tegra boards recently
> > with v6.4-rc and interestingly bisect is pointing to this commit. I was
> > unable revert this on top of the latest mainline but if I checkout this
> > commit suspend fails and if I checkout the previous commit is passes.
> > >
> > > Enabling more debug I was able to capture the following crash log I see
> > on one of the boards ...
> > >
> > > [   57.327645] PM: suspend entry (deep)
> > > [   57.331660] Filesystems sync: 0.000 seconds
> > > [   57.340147] Freezing user space processes
> > > [   57.347470] Freezing user space processes completed (elapsed 0.007
> > seconds)
> > > [   57.347501] OOM killer disabled.
> > > [   57.347508] Freezing remaining freezable tasks
> > > [   57.348834] Freezing remaining freezable tasks completed (elapsed
> > 0.001 seconds)
> > > [   57.349932] 8<--- cut here ---
> > > [   57.349943] Unable to handle kernel NULL pointer dereference at
> > virtual address 00000000 when write
> > > [   57.349960] [00000000] *pgd=00000000
> > > [   57.349986] Internal error: Oops: 805 [#1] PREEMPT SMP ARM
> > > [   57.350007] Modules linked in: tegra30_tsensor
> > > [   57.350033] CPU: 0 PID: 589 Comm: rtcwake Not tainted
> > 6.3.0-rc1-00011-g03200b5ca3b4-dirty #3
> > > [   57.350057] Hardware name: NVIDIA Tegra SoC (Flattened Device Tree)
> > > [   57.350067] PC is at rcu_segcblist_enqueue+0x2c/0x38
> > > [   57.350120] LR is at srcu_gp_start_if_needed+0xe4/0x544
> > > [   57.350169] pc : [<c01a5120>]    lr : [<c0198b5c>]    psr: a0070093
> > > [   57.350183] sp : f0b2dd20  ip : 3b5870ef  fp : 00000000
> > > [   57.350194] r10: ef787d84  r9 : 00000000  r8 : ef787d80
> > > [   57.350205] r7 : 80070013  r6 : c131ec30  r5 : ef787d40  r4 : f0b2dd64
> > > [   57.350217] r3 : 00000000  r2 : 00000000  r1 : f0b2dd64  r0 : ef787d84
> > > [   57.350230] Flags: NzCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM
> >  Segment none
> > > [   57.350251] Control: 10c5387d  Table: 81d8004a  DAC: 00000051
> > > [   57.350261] Register r0 information: non-slab/vmalloc memory
> > > [   57.350283] Register r1 information: 2-page vmalloc region starting at
> > 0xf0b2c000 allocated at kernel_clone+0xb4/0x3e4
> > > [   57.350322] Register r2 information: NULL pointer
> > > [   57.350337] Register r3 information: NULL pointer
> > > [   57.350350] Register r4 information: 2-page vmalloc region starting at
> > 0xf0b2c000 allocated at kernel_clone+0xb4/0x3e4
> > > [   57.350379] Register r5 information: non-slab/vmalloc memory
> > > [   57.350394] Register r6 information: non-slab/vmalloc memory
> > > [   57.350408] Register r7 information: non-paged memory
> > > [   57.350422] Register r8 information: non-slab/vmalloc memory
> > > [   57.350436] Register r9 information: NULL pointer
> > > [   57.350449] Register r10 information: non-slab/vmalloc memory
> > > [   57.350463] Register r11 information: NULL pointer
> > > [   57.350477] Register r12 information: non-paged memory
> > > [   57.350491] Process rtcwake (pid: 589, stack limit = 0x410bb531)
> > > [   57.350510] Stack: (0xf0b2dd20 to 0xf0b2e000)
> > > [   57.350534] dd20: 00000000 c1ee4a40 f0b2dd7c c0184f24 ef781495
> > 3b5870ef c1ee4a40 c1ee4a40
> > > [   57.350555] dd40: c131ec30 00000000 00000002 c0f3d1fc c3542ac0
> > c2abbb10 c1ee4a40 c0199044
> > > [   57.350574] dd60: 60070013 00000000 c0195924 00000000 00000000
> > f0b2dd74 f0b2dd74 3b5870ef
> > > [   57.350592] dd80: 00000000 c131ebc0 c120ab28 c0146d9c c2785b94
> > c2785b40 c0fee9f4 c0872590
> > > [   57.350611] dda0: c2785b40 c08c39cc c2785b40 c08c3a3c c2788c00
> > c08c40b0 c0f3d1fc c066f028
> > > [   57.350630] ddc0: f0b2de14 c2788c00 c1325ef4 c08c1d40 c2788c00
> > c1325ef4 c0fee9f4 c08c31cc
> > > [   57.350648] dde0: c13708a0 0000000d 00000000 c0681c10 c16afe84
> > 00000002 56508788 0000000d
> > > [   57.350665] de00: 00000002 c13708a0 10624dd3 56409580 0000000d
> > 00000000 00000002 c0f3d1fc
> > > [   57.350685] de20: c3542ac0 c2abbb10 c1ee4a40 c06824e4 00000000
> > ffffa900 00000000 c1386510
> > > [   57.350703] de40: 00000003 00000003 c1204f75 c017e8a8 c3542ac0
> > c2abbb10 00428228 c0171574
> > > [   57.350721] de60: 00000000 00000000 00000003 3b5870ef c1204f75
> > 00000000 00000003 c137aeb4
> > > [   57.350739] de80: c1204f75 c0f3d1fc c3542ac0 c2abbb10 00428228
> > c017f380 00000003 c0f38a54
> > > [   57.350757] dea0: 00000003 c1386524 00000004 c017d708 00000004
> > c2abbb00 00000000 00000000
> > > [   57.350775] dec0: c3542ac0 f0b2df28 c2abbb10 c03305b4 00000000
> > 00000000 c2953c00 c1ee4a40
> > > [   57.350794] dee0: 00429438 00000004 c0d18488 00004004 00000000
> > c02b1094 00000a55 c1d80010
> > > [   57.350812] df00: c1d80010 00000000 00000000 f0b2df78 01010006
> > 00000004 00000000 00429438
> > > [   57.350830] df20: 00000000 00000000 c2953c00 00000000 00000000
> > 00000000 00000000 00000000
> > > [   57.350848] df40: 00000000 00004004 00000000 00000000 0000006c
> > 3b5870ef c2953c00 c2953c00
> > > [   57.350866] df60: 00000000 00000000 c1ee4a40 00429438 00000004
> > c02b12c8 00000000 00000000
> > > [   57.350885] df80: 00001008 3b5870ef 0000006c 00429438 00428228
> > 00000004 c0100324 c1ee4a40
> > > [   57.350902] dfa0: 00000004 c01000c0 0000006c 00429438 00000004
> > 00429438 00000004 00000000
> > > [   57.350920] dfc0: 0000006c 00429438 00428228 00000004 00000004
> > 00000004 0041578c 00428228
> > > [   57.350938] dfe0: 00000004 becda9a8 b6e9bc0b b6e26206 600f0030
> > 00000004 00000000 00000000
> > > [   57.350960]  rcu_segcblist_enqueue from
> > srcu_gp_start_if_needed+0xe4/0x544
> > > [   57.351023]  srcu_gp_start_if_needed from
> > __synchronize_srcu.part.6+0x70/0x98
> > > [   57.351084]  __synchronize_srcu.part.6 from
> > srcu_notifier_chain_unregister+0x6c/0xdc
> > > [   57.351155]  srcu_notifier_chain_unregister from
> > cpufreq_unregister_notifier+0x60/0xbc
> > > [   57.351215]  cpufreq_unregister_notifier from
> > tegra_actmon_pause.part.0+0x1c/0x54
> > > [   57.351277]  tegra_actmon_pause.part.0 from tegra_actmon_stop+0x38/0x3c
> > > [   57.351324]  tegra_actmon_stop from
> > tegra_governor_event_handler+0x100/0x11c
> > > [   57.351373]  tegra_governor_event_handler from
> > devfreq_suspend_device+0x64/0xac
> > > [   57.351423]  devfreq_suspend_device from devfreq_suspend+0x30/0x64
> > > [   57.351467]  devfreq_suspend from dpm_suspend+0x34/0x33c
> > > [   57.351506]  dpm_suspend from dpm_suspend_start+0x90/0x98
> > > [   57.351528]  dpm_suspend_start from
> > suspend_devices_and_enter+0xe4/0x93c
> > > [   57.351573]  suspend_devices_and_enter from pm_suspend+0x280/0x3ac
> > > [   57.351614]  pm_suspend from state_store+0x6c/0xc8
> > > [   57.351654]  state_store from kernfs_fop_write_iter+0x118/0x1b4
> > > [   57.351696]  kernfs_fop_write_iter from vfs_write+0x314/0x3d4
> > > [   57.351733]  vfs_write from ksys_write+0xa0/0xd0
> > > [   57.351760]  ksys_write from ret_fast_syscall+0x0/0x54
> > > [   57.351788] Exception stack(0xf0b2dfa8 to 0xf0b2dff0)
> > > [   57.351809] dfa0:                   0000006c 00429438 00000004
> > 00429438 00000004 00000000
> > > [   57.351828] dfc0: 0000006c 00429438 00428228 00000004 00000004
> > 00000004 0041578c 00428228
> > > [   57.351843] dfe0: 00000004 becda9a8 b6e9bc0b b6e26206
> > > [   57.351863] Code: e2833001 e5803034 e5812000 e5903010 (e5831000)
> > > [   57.351875] ---[ end trace 0000000000000000 ]---
> > >
> > >
> > > I have not dug into this yet and so wanted to see if you have any
> > thoughts on this?
> > >
> >
> > Hi, Jon
> >
> > Please try it:
> >
> > diff --git a/include/linux/notifier.h b/include/linux/notifier.h
> > index 2aba75145144..3ce6b59e02e5 100644
> > --- a/include/linux/notifier.h
> > +++ b/include/linux/notifier.h
> > @@ -110,6 +110,7 @@ extern void srcu_init_notifier_head(struct
> > srcu_notifier_head *nh);
> >         {                                                       \
> >                 .mutex = __MUTEX_INITIALIZER(name.mutex),       \
> >                 .head = NULL,                                   \
> > +               .srcuu = __SRCU_USAGE_INIT(name.srcuu),         \
> >                 .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
> >         }
>
> Thank you both!
>
> Huh.  It looks like Chen-Yu Tsai sent a patch to this effect and
> AngeloGioacchino Del Regno tested it.  No one has picked it up yet.
>
> https://lore.kernel.org/all/20230526073539.339203-1-wenst@chromium.org/
>
> This is clearly a regression, and I don't see it in -next.  I will pick
> it up and send it along in a few days if Matthias or Rafael don't beat
> me to it.
>
> In the meantime, I would be happy to add Jon's Reported-by and Tested-by,
> along with Qiang's Acked-by or Reviewed-by.
>

Acked-by: Zqiang <qiang.zhang1211@gmail.com>

Thanks
Zqiang

>
>
>                                                         Thanx, Paul
>
> ------------------------------------------------------------------------
>
> commit bf7da55fcdd1478839b21697cf0534be1f149a49
> Author: Chen-Yu Tsai <wenst@chromium.org>
> Date:   Fri May 26 15:35:37 2023 +0800
>
>     notifier: Initialize new struct srcu_usage field
>
>     In commit 95433f726301 ("srcu: Begin offloading srcu_struct fields to
>     srcu_update"), a new struct srcu_usage field was added, but was not
>     properly initialized. This led to a "spinlock bad magic" BUG when the
>     SRCU notifier was ever used. This was observed in the MediaTek CCI
>     devfreq driver on next-20230525. The trimmed stack trace is as follows:
>
>         BUG: spinlock bad magic on CPU#4, swapper/0/1
>          lock: 0xffffff80ff529ac0, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0
>         Call trace:
>          spin_bug+0xa4/0xe8
>          do_raw_spin_lock+0xec/0x120
>          _raw_spin_lock_irqsave+0x78/0xb8
>          synchronize_srcu+0x3c/0x168
>          srcu_notifier_chain_unregister+0x5c/0xa0
>          cpufreq_unregister_notifier+0x94/0xe0
>          devfreq_passive_event_handler+0x7c/0x3e0
>          devfreq_remove_device+0x48/0xe8
>
>     Add __SRCU_USAGE_INIT() to SRCU_NOTIFIER_INIT() so that srcu_usage gets
>     initialized properly.
>
>     Fixes: 95433f726301 ("srcu: Begin offloading srcu_struct fields to srcu_update")
>     Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
>     Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
>     Cc: Matthias Brugger <matthias.bgg@gmail.com>
>     Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
>     Cc: "Michał Mirosław" <mirq-linux@rere.qmqm.pl>
>     Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
>     Cc: Sachin Sant <sachinp@linux.ibm.com>
>     Cc: "Zhang, Qiang1" <qiang1.zhang@intel.com>
>     Cc: Joel Fernandes (Google) <joel@joelfernandes.org
>     Cc: Jon Hunter <jonathanh@nvidia.com>
>     Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
>
> diff --git a/include/linux/notifier.h b/include/linux/notifier.h
> index 2aba75145144..86544707236a 100644
> --- a/include/linux/notifier.h
> +++ b/include/linux/notifier.h
> @@ -106,12 +106,22 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
>  #define RAW_NOTIFIER_INIT(name)        {                               \
>                 .head = NULL }
>
> +#ifdef CONFIG_TREE_SRCU
>  #define SRCU_NOTIFIER_INIT(name, pcpu)                         \
>         {                                                       \
>                 .mutex = __MUTEX_INITIALIZER(name.mutex),       \
>                 .head = NULL,                                   \
> +               .srcuu = __SRCU_USAGE_INIT(name.srcuu),         \
>                 .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
>         }
> +#else
> +#define SRCU_NOTIFIER_INIT(name, pcpu)                         \
> +       {                                                       \
> +               .mutex = __MUTEX_INITIALIZER(name.mutex),       \
> +               .head = NULL,                                   \
> +               .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
> +       }
> +#endif
>
>  #define ATOMIC_NOTIFIER_HEAD(name)                             \
>         struct atomic_notifier_head name =                      \
Paul E. McKenney June 2, 2023, 3:09 a.m. UTC | #6
On Fri, Jun 02, 2023 at 10:52:44AM +0800, Z qiang wrote:
> >
> > On Thu, Jun 01, 2023 at 08:33:10PM +0800, Z qiang wrote:
> > > >
> > > > Hi Paul,
> > > >
> > > > On 30/03/2023 23:47, Paul E. McKenney wrote:
> > > > > This commit moves the ->srcu_gp_seq, ->srcu_gp_seq_needed,
> > > > > ->srcu_gp_seq_needed_exp, ->srcu_gp_start, and ->srcu_last_gp_end fields
> > > > > from the srcu_struct structure to the srcu_usage structure to reduce
> > > > > the size of the former in order to improve cache locality.
> > > > >
> > > > > Suggested-by: Christoph Hellwig <hch@lst.de>
> > > > > Tested-by: Sachin Sant <sachinp@linux.ibm.com>
> > > > > Tested-by: "Zhang, Qiang1" <qiang1.zhang@intel.com>
> > > > > Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> > > >
> > > >
> > > > I have noticed a suspend regression on some of our Tegra boards recently
> > > with v6.4-rc and interestingly bisect is pointing to this commit. I was
> > > unable revert this on top of the latest mainline but if I checkout this
> > > commit suspend fails and if I checkout the previous commit is passes.
> > > >
> > > > Enabling more debug I was able to capture the following crash log I see
> > > on one of the boards ...
> > > >
> > > > [   57.327645] PM: suspend entry (deep)
> > > > [   57.331660] Filesystems sync: 0.000 seconds
> > > > [   57.340147] Freezing user space processes
> > > > [   57.347470] Freezing user space processes completed (elapsed 0.007
> > > seconds)
> > > > [   57.347501] OOM killer disabled.
> > > > [   57.347508] Freezing remaining freezable tasks
> > > > [   57.348834] Freezing remaining freezable tasks completed (elapsed
> > > 0.001 seconds)
> > > > [   57.349932] 8<--- cut here ---
> > > > [   57.349943] Unable to handle kernel NULL pointer dereference at
> > > virtual address 00000000 when write
> > > > [   57.349960] [00000000] *pgd=00000000
> > > > [   57.349986] Internal error: Oops: 805 [#1] PREEMPT SMP ARM
> > > > [   57.350007] Modules linked in: tegra30_tsensor
> > > > [   57.350033] CPU: 0 PID: 589 Comm: rtcwake Not tainted
> > > 6.3.0-rc1-00011-g03200b5ca3b4-dirty #3
> > > > [   57.350057] Hardware name: NVIDIA Tegra SoC (Flattened Device Tree)
> > > > [   57.350067] PC is at rcu_segcblist_enqueue+0x2c/0x38
> > > > [   57.350120] LR is at srcu_gp_start_if_needed+0xe4/0x544
> > > > [   57.350169] pc : [<c01a5120>]    lr : [<c0198b5c>]    psr: a0070093
> > > > [   57.350183] sp : f0b2dd20  ip : 3b5870ef  fp : 00000000
> > > > [   57.350194] r10: ef787d84  r9 : 00000000  r8 : ef787d80
> > > > [   57.350205] r7 : 80070013  r6 : c131ec30  r5 : ef787d40  r4 : f0b2dd64
> > > > [   57.350217] r3 : 00000000  r2 : 00000000  r1 : f0b2dd64  r0 : ef787d84
> > > > [   57.350230] Flags: NzCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM
> > >  Segment none
> > > > [   57.350251] Control: 10c5387d  Table: 81d8004a  DAC: 00000051
> > > > [   57.350261] Register r0 information: non-slab/vmalloc memory
> > > > [   57.350283] Register r1 information: 2-page vmalloc region starting at
> > > 0xf0b2c000 allocated at kernel_clone+0xb4/0x3e4
> > > > [   57.350322] Register r2 information: NULL pointer
> > > > [   57.350337] Register r3 information: NULL pointer
> > > > [   57.350350] Register r4 information: 2-page vmalloc region starting at
> > > 0xf0b2c000 allocated at kernel_clone+0xb4/0x3e4
> > > > [   57.350379] Register r5 information: non-slab/vmalloc memory
> > > > [   57.350394] Register r6 information: non-slab/vmalloc memory
> > > > [   57.350408] Register r7 information: non-paged memory
> > > > [   57.350422] Register r8 information: non-slab/vmalloc memory
> > > > [   57.350436] Register r9 information: NULL pointer
> > > > [   57.350449] Register r10 information: non-slab/vmalloc memory
> > > > [   57.350463] Register r11 information: NULL pointer
> > > > [   57.350477] Register r12 information: non-paged memory
> > > > [   57.350491] Process rtcwake (pid: 589, stack limit = 0x410bb531)
> > > > [   57.350510] Stack: (0xf0b2dd20 to 0xf0b2e000)
> > > > [   57.350534] dd20: 00000000 c1ee4a40 f0b2dd7c c0184f24 ef781495
> > > 3b5870ef c1ee4a40 c1ee4a40
> > > > [   57.350555] dd40: c131ec30 00000000 00000002 c0f3d1fc c3542ac0
> > > c2abbb10 c1ee4a40 c0199044
> > > > [   57.350574] dd60: 60070013 00000000 c0195924 00000000 00000000
> > > f0b2dd74 f0b2dd74 3b5870ef
> > > > [   57.350592] dd80: 00000000 c131ebc0 c120ab28 c0146d9c c2785b94
> > > c2785b40 c0fee9f4 c0872590
> > > > [   57.350611] dda0: c2785b40 c08c39cc c2785b40 c08c3a3c c2788c00
> > > c08c40b0 c0f3d1fc c066f028
> > > > [   57.350630] ddc0: f0b2de14 c2788c00 c1325ef4 c08c1d40 c2788c00
> > > c1325ef4 c0fee9f4 c08c31cc
> > > > [   57.350648] dde0: c13708a0 0000000d 00000000 c0681c10 c16afe84
> > > 00000002 56508788 0000000d
> > > > [   57.350665] de00: 00000002 c13708a0 10624dd3 56409580 0000000d
> > > 00000000 00000002 c0f3d1fc
> > > > [   57.350685] de20: c3542ac0 c2abbb10 c1ee4a40 c06824e4 00000000
> > > ffffa900 00000000 c1386510
> > > > [   57.350703] de40: 00000003 00000003 c1204f75 c017e8a8 c3542ac0
> > > c2abbb10 00428228 c0171574
> > > > [   57.350721] de60: 00000000 00000000 00000003 3b5870ef c1204f75
> > > 00000000 00000003 c137aeb4
> > > > [   57.350739] de80: c1204f75 c0f3d1fc c3542ac0 c2abbb10 00428228
> > > c017f380 00000003 c0f38a54
> > > > [   57.350757] dea0: 00000003 c1386524 00000004 c017d708 00000004
> > > c2abbb00 00000000 00000000
> > > > [   57.350775] dec0: c3542ac0 f0b2df28 c2abbb10 c03305b4 00000000
> > > 00000000 c2953c00 c1ee4a40
> > > > [   57.350794] dee0: 00429438 00000004 c0d18488 00004004 00000000
> > > c02b1094 00000a55 c1d80010
> > > > [   57.350812] df00: c1d80010 00000000 00000000 f0b2df78 01010006
> > > 00000004 00000000 00429438
> > > > [   57.350830] df20: 00000000 00000000 c2953c00 00000000 00000000
> > > 00000000 00000000 00000000
> > > > [   57.350848] df40: 00000000 00004004 00000000 00000000 0000006c
> > > 3b5870ef c2953c00 c2953c00
> > > > [   57.350866] df60: 00000000 00000000 c1ee4a40 00429438 00000004
> > > c02b12c8 00000000 00000000
> > > > [   57.350885] df80: 00001008 3b5870ef 0000006c 00429438 00428228
> > > 00000004 c0100324 c1ee4a40
> > > > [   57.350902] dfa0: 00000004 c01000c0 0000006c 00429438 00000004
> > > 00429438 00000004 00000000
> > > > [   57.350920] dfc0: 0000006c 00429438 00428228 00000004 00000004
> > > 00000004 0041578c 00428228
> > > > [   57.350938] dfe0: 00000004 becda9a8 b6e9bc0b b6e26206 600f0030
> > > 00000004 00000000 00000000
> > > > [   57.350960]  rcu_segcblist_enqueue from
> > > srcu_gp_start_if_needed+0xe4/0x544
> > > > [   57.351023]  srcu_gp_start_if_needed from
> > > __synchronize_srcu.part.6+0x70/0x98
> > > > [   57.351084]  __synchronize_srcu.part.6 from
> > > srcu_notifier_chain_unregister+0x6c/0xdc
> > > > [   57.351155]  srcu_notifier_chain_unregister from
> > > cpufreq_unregister_notifier+0x60/0xbc
> > > > [   57.351215]  cpufreq_unregister_notifier from
> > > tegra_actmon_pause.part.0+0x1c/0x54
> > > > [   57.351277]  tegra_actmon_pause.part.0 from tegra_actmon_stop+0x38/0x3c
> > > > [   57.351324]  tegra_actmon_stop from
> > > tegra_governor_event_handler+0x100/0x11c
> > > > [   57.351373]  tegra_governor_event_handler from
> > > devfreq_suspend_device+0x64/0xac
> > > > [   57.351423]  devfreq_suspend_device from devfreq_suspend+0x30/0x64
> > > > [   57.351467]  devfreq_suspend from dpm_suspend+0x34/0x33c
> > > > [   57.351506]  dpm_suspend from dpm_suspend_start+0x90/0x98
> > > > [   57.351528]  dpm_suspend_start from
> > > suspend_devices_and_enter+0xe4/0x93c
> > > > [   57.351573]  suspend_devices_and_enter from pm_suspend+0x280/0x3ac
> > > > [   57.351614]  pm_suspend from state_store+0x6c/0xc8
> > > > [   57.351654]  state_store from kernfs_fop_write_iter+0x118/0x1b4
> > > > [   57.351696]  kernfs_fop_write_iter from vfs_write+0x314/0x3d4
> > > > [   57.351733]  vfs_write from ksys_write+0xa0/0xd0
> > > > [   57.351760]  ksys_write from ret_fast_syscall+0x0/0x54
> > > > [   57.351788] Exception stack(0xf0b2dfa8 to 0xf0b2dff0)
> > > > [   57.351809] dfa0:                   0000006c 00429438 00000004
> > > 00429438 00000004 00000000
> > > > [   57.351828] dfc0: 0000006c 00429438 00428228 00000004 00000004
> > > 00000004 0041578c 00428228
> > > > [   57.351843] dfe0: 00000004 becda9a8 b6e9bc0b b6e26206
> > > > [   57.351863] Code: e2833001 e5803034 e5812000 e5903010 (e5831000)
> > > > [   57.351875] ---[ end trace 0000000000000000 ]---
> > > >
> > > >
> > > > I have not dug into this yet and so wanted to see if you have any
> > > thoughts on this?
> > > >
> > >
> > > Hi, Jon
> > >
> > > Please try it:
> > >
> > > diff --git a/include/linux/notifier.h b/include/linux/notifier.h
> > > index 2aba75145144..3ce6b59e02e5 100644
> > > --- a/include/linux/notifier.h
> > > +++ b/include/linux/notifier.h
> > > @@ -110,6 +110,7 @@ extern void srcu_init_notifier_head(struct
> > > srcu_notifier_head *nh);
> > >         {                                                       \
> > >                 .mutex = __MUTEX_INITIALIZER(name.mutex),       \
> > >                 .head = NULL,                                   \
> > > +               .srcuu = __SRCU_USAGE_INIT(name.srcuu),         \
> > >                 .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
> > >         }
> >
> > Thank you both!
> >
> > Huh.  It looks like Chen-Yu Tsai sent a patch to this effect and
> > AngeloGioacchino Del Regno tested it.  No one has picked it up yet.
> >
> > https://lore.kernel.org/all/20230526073539.339203-1-wenst@chromium.org/
> >
> > This is clearly a regression, and I don't see it in -next.  I will pick
> > it up and send it along in a few days if Matthias or Rafael don't beat
> > me to it.
> >
> > In the meantime, I would be happy to add Jon's Reported-by and Tested-by,
> > along with Qiang's Acked-by or Reviewed-by.
> 
> Acked-by: Zqiang <qiang.zhang1211@gmail.com>

Thank you!  I will apply this on my next rebase.

							Thanx, Paul
Thorsten Leemhuis June 4, 2023, 9:53 a.m. UTC | #7
[CCing the regression list, as it should be in the loop for regressions:
https://docs.kernel.org/admin-guide/reporting-regressions.html]

[TLDR: I'm adding this report to the list of tracked Linux kernel
regressions; the text you find below is based on a few templates
paragraphs you might have encountered already in similar form.
See link in footer if these mails annoy you.]

On 01.06.23 13:14, Jon Hunter wrote:
> 
> On 30/03/2023 23:47, Paul E. McKenney wrote:
>> This commit moves the ->srcu_gp_seq, ->srcu_gp_seq_needed,
>> ->srcu_gp_seq_needed_exp, ->srcu_gp_start, and ->srcu_last_gp_end fields
>> from the srcu_struct structure to the srcu_usage structure to reduce
>> the size of the former in order to improve cache locality.
>>
>> Suggested-by: Christoph Hellwig <hch@lst.de>
>> Tested-by: Sachin Sant <sachinp@linux.ibm.com>
>> Tested-by: "Zhang, Qiang1" <qiang1.zhang@intel.com>
>> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
> 
> 
> I have noticed a suspend regression on some of our Tegra boards recently
> with v6.4-rc and interestingly bisect is pointing to this commit. I was
> unable revert this on top of the latest mainline but if I checkout this
> commit suspend fails and if I checkout the previous commit is passes.

Thanks for the report. To be sure the issue doesn't fall through the
cracks unnoticed, I'm adding it to regzbot, the Linux kernel regression
tracking bot:

#regzbot ^introduced 95433f726301
#regzbot title rcu: "spinlock bad magic" BUG when the SRCU notifier was
ever used
#regzbot monitor:
https://lore.kernel.org/all/20230526073539.339203-1-wenst@chromium.org/
#regzbot fix: notifier: Initialize new struct srcu_usage field
#regzbot ignore-activity

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
That page also explains what to do if mails like this annoy you.
diff mbox series

Patch

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index d04e3da6181c..372e35b0e8b6 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -68,6 +68,11 @@  struct srcu_usage {
 	struct mutex srcu_cb_mutex;		/* Serialize CB preparation. */
 	spinlock_t __private lock;		/* Protect counters and size state. */
 	struct mutex srcu_gp_mutex;		/* Serialize GP work. */
+	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
+	unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */
+	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
+	unsigned long srcu_gp_start;		/* Last GP start timestamp (jiffies) */
+	unsigned long srcu_last_gp_end;		/* Last GP end timestamp (ns) */
 };
 
 /*
@@ -75,11 +80,6 @@  struct srcu_usage {
  */
 struct srcu_struct {
 	unsigned int srcu_idx;			/* Current rdr array element. */
-	unsigned long srcu_gp_seq;		/* Grace-period seq #. */
-	unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */
-	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
-	unsigned long srcu_gp_start;		/* Last GP start timestamp (jiffies) */
-	unsigned long srcu_last_gp_end;		/* Last GP end timestamp (ns) */
 	unsigned long srcu_size_jiffies;	/* Current contention-measurement interval. */
 	unsigned long srcu_n_lock_retries;	/* Contention events in current interval. */
 	unsigned long srcu_n_exp_nodelay;	/* # expedited no-delays in current GP phase. */
@@ -115,8 +115,13 @@  struct srcu_struct {
 #define SRCU_STATE_SCAN1	1
 #define SRCU_STATE_SCAN2	2
 
-#define __SRCU_STRUCT_INIT_COMMON(name, usage_name)						\
+#define __SRCU_USAGE_INIT(name)									\
+{												\
+	.lock = __SPIN_LOCK_UNLOCKED(name.lock),						\
 	.srcu_gp_seq_needed = -1UL,								\
+}
+
+#define __SRCU_STRUCT_INIT_COMMON(name, usage_name)						\
 	.work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0),					\
 	.srcu_sup = &usage_name,								\
 	__SRCU_DEP_MAP_INIT(name)
@@ -153,9 +158,7 @@  struct srcu_struct {
  */
 #ifdef MODULE
 # define __DEFINE_SRCU(name, is_static)								\
-	static struct srcu_usage name##_srcu_usage = {						\
-		.lock = __SPIN_LOCK_UNLOCKED(name.lock),					\
-	};											\
+	static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage);	\
 	is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name, name##_srcu_usage);	\
 	extern struct srcu_struct * const __srcu_struct_##name;					\
 	struct srcu_struct * const __srcu_struct_##name						\
@@ -163,9 +166,7 @@  struct srcu_struct {
 #else
 # define __DEFINE_SRCU(name, is_static)								\
 	static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);				\
-	static struct srcu_usage name##_srcu_usage = {						\
-		.lock = __SPIN_LOCK_UNLOCKED(name.lock),					\
-	};											\
+	static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage);	\
 	is_static struct srcu_struct name =							\
 		__SRCU_STRUCT_INIT(name, name##_srcu_usage, name##_srcu_data)
 #endif
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index a36066798de7..340eb685cf64 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -135,8 +135,8 @@  static void init_srcu_struct_data(struct srcu_struct *ssp)
 		spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
 		rcu_segcblist_init(&sdp->srcu_cblist);
 		sdp->srcu_cblist_invoking = false;
-		sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq;
-		sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq;
+		sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq;
+		sdp->srcu_gp_seq_needed_exp = ssp->srcu_sup->srcu_gp_seq;
 		sdp->mynode = NULL;
 		sdp->cpu = cpu;
 		INIT_WORK(&sdp->work, srcu_invoke_callbacks);
@@ -247,7 +247,7 @@  static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
 	mutex_init(&ssp->srcu_sup->srcu_cb_mutex);
 	mutex_init(&ssp->srcu_sup->srcu_gp_mutex);
 	ssp->srcu_idx = 0;
-	ssp->srcu_gp_seq = 0;
+	ssp->srcu_sup->srcu_gp_seq = 0;
 	ssp->srcu_barrier_seq = 0;
 	mutex_init(&ssp->srcu_barrier_mutex);
 	atomic_set(&ssp->srcu_barrier_cpu_cnt, 0);
@@ -261,8 +261,8 @@  static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
 		return -ENOMEM;
 	}
 	init_srcu_struct_data(ssp);
-	ssp->srcu_gp_seq_needed_exp = 0;
-	ssp->srcu_last_gp_end = ktime_get_mono_fast_ns();
+	ssp->srcu_sup->srcu_gp_seq_needed_exp = 0;
+	ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns();
 	if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
 		if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC)) {
 			if (!ssp->sda_is_static) {
@@ -275,7 +275,7 @@  static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
 			WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG);
 		}
 	}
-	smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */
+	smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, 0); /* Init done. */
 	return 0;
 }
 
@@ -402,10 +402,10 @@  static void check_init_srcu_struct(struct srcu_struct *ssp)
 	unsigned long flags;
 
 	/* The smp_load_acquire() pairs with the smp_store_release(). */
-	if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed))) /*^^^*/
+	if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/
 		return; /* Already initialized. */
 	spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
-	if (!rcu_seq_state(ssp->srcu_gp_seq_needed)) {
+	if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) {
 		spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
 		return;
 	}
@@ -616,11 +616,11 @@  static unsigned long srcu_get_delay(struct srcu_struct *ssp)
 	unsigned long j;
 	unsigned long jbase = SRCU_INTERVAL;
 
-	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_sup->srcu_gp_seq), READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp)))
 		jbase = 0;
-	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
+	if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq))) {
 		j = jiffies - 1;
-		gpstart = READ_ONCE(ssp->srcu_gp_start);
+		gpstart = READ_ONCE(ssp->srcu_sup->srcu_gp_start);
 		if (time_after(j, gpstart))
 			jbase += j - gpstart;
 		if (!jbase) {
@@ -656,12 +656,12 @@  void cleanup_srcu_struct(struct srcu_struct *ssp)
 		if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
 			return; /* Forgot srcu_barrier(), so just leak it! */
 	}
-	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
-	    WARN_ON(rcu_seq_current(&ssp->srcu_gp_seq) != ssp->srcu_gp_seq_needed) ||
+	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
+	    WARN_ON(rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq) != ssp->srcu_sup->srcu_gp_seq_needed) ||
 	    WARN_ON(srcu_readers_active(ssp))) {
 		pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n",
-			__func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)),
-			rcu_seq_current(&ssp->srcu_gp_seq), ssp->srcu_gp_seq_needed);
+			__func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)),
+			rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq), ssp->srcu_sup->srcu_gp_seq_needed);
 		return; /* Caller forgot to stop doing call_srcu()? */
 	}
 	kfree(ssp->srcu_sup->node);
@@ -775,18 +775,18 @@  static void srcu_gp_start(struct srcu_struct *ssp)
 	else
 		sdp = this_cpu_ptr(ssp->sda);
 	lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
-	WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
+	WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed));
 	spin_lock_rcu_node(sdp);  /* Interrupts already disabled. */
 	rcu_segcblist_advance(&sdp->srcu_cblist,
-			      rcu_seq_current(&ssp->srcu_gp_seq));
+			      rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
-				       rcu_seq_snap(&ssp->srcu_gp_seq));
+				       rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq));
 	spin_unlock_rcu_node(sdp);  /* Interrupts remain disabled. */
-	WRITE_ONCE(ssp->srcu_gp_start, jiffies);
+	WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies);
 	WRITE_ONCE(ssp->srcu_n_exp_nodelay, 0);
 	smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
-	rcu_seq_start(&ssp->srcu_gp_seq);
-	state = rcu_seq_state(ssp->srcu_gp_seq);
+	rcu_seq_start(&ssp->srcu_sup->srcu_gp_seq);
+	state = rcu_seq_state(ssp->srcu_sup->srcu_gp_seq);
 	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
 }
 
@@ -865,16 +865,16 @@  static void srcu_gp_end(struct srcu_struct *ssp)
 
 	/* End the current grace period. */
 	spin_lock_irq_rcu_node(ssp->srcu_sup);
-	idx = rcu_seq_state(ssp->srcu_gp_seq);
+	idx = rcu_seq_state(ssp->srcu_sup->srcu_gp_seq);
 	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
-	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+	if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_sup->srcu_gp_seq), READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp)))
 		cbdelay = 0;
 
-	WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
-	rcu_seq_end(&ssp->srcu_gp_seq);
-	gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
-	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
-		WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, gpseq);
+	WRITE_ONCE(ssp->srcu_sup->srcu_last_gp_end, ktime_get_mono_fast_ns());
+	rcu_seq_end(&ssp->srcu_sup->srcu_gp_seq);
+	gpseq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
+	if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, gpseq))
+		WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, gpseq);
 	spin_unlock_irq_rcu_node(ssp->srcu_sup);
 	mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
 	/* A new grace period can start at this point.  But only one. */
@@ -925,9 +925,9 @@  static void srcu_gp_end(struct srcu_struct *ssp)
 
 	/* Start a new grace period if needed. */
 	spin_lock_irq_rcu_node(ssp->srcu_sup);
-	gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
+	gpseq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
 	if (!rcu_seq_state(gpseq) &&
-	    ULONG_CMP_LT(gpseq, ssp->srcu_gp_seq_needed)) {
+	    ULONG_CMP_LT(gpseq, ssp->srcu_sup->srcu_gp_seq_needed)) {
 		srcu_gp_start(ssp);
 		spin_unlock_irq_rcu_node(ssp->srcu_sup);
 		srcu_reschedule(ssp, 0);
@@ -960,7 +960,7 @@  static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
 	if (snp)
 		for (; snp != NULL; snp = snp->srcu_parent) {
 			sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp);
-			if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) ||
+			if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) ||
 			    (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)))
 				return;
 			spin_lock_irqsave_rcu_node(snp, flags);
@@ -973,8 +973,8 @@  static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp
 			spin_unlock_irqrestore_rcu_node(snp, flags);
 		}
 	spin_lock_irqsave_ssp_contention(ssp, &flags);
-	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
-		WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
+	if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s))
+		WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s);
 	spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
 }
 
@@ -1010,7 +1010,7 @@  static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
 	if (snp_leaf)
 		/* Each pass through the loop does one level of the srcu_node tree. */
 		for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
-			if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) && snp != snp_leaf)
+			if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) && snp != snp_leaf)
 				return; /* GP already done and CBs recorded. */
 			spin_lock_irqsave_rcu_node(snp, flags);
 			snp_seq = snp->srcu_have_cbs[idx];
@@ -1037,20 +1037,20 @@  static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp,
 
 	/* Top of tree, must ensure the grace period will be started. */
 	spin_lock_irqsave_ssp_contention(ssp, &flags);
-	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) {
+	if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed, s)) {
 		/*
 		 * Record need for grace period s.  Pair with load
 		 * acquire setting up for initialization.
 		 */
-		smp_store_release(&ssp->srcu_gp_seq_needed, s); /*^^^*/
+		smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, s); /*^^^*/
 	}
-	if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s))
-		WRITE_ONCE(ssp->srcu_gp_seq_needed_exp, s);
+	if (!do_norm && ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s))
+		WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s);
 
 	/* If grace period not already in progress, start it. */
-	if (!WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_gp_seq, s)) &&
-	    rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) {
-		WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed));
+	if (!WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) &&
+	    rcu_seq_state(ssp->srcu_sup->srcu_gp_seq) == SRCU_STATE_IDLE) {
+		WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed));
 		srcu_gp_start(ssp);
 
 		// And how can that list_add() in the "else" clause
@@ -1164,18 +1164,18 @@  static bool srcu_might_be_idle(struct srcu_struct *ssp)
 
 	/* First, see if enough time has passed since the last GP. */
 	t = ktime_get_mono_fast_ns();
-	tlast = READ_ONCE(ssp->srcu_last_gp_end);
+	tlast = READ_ONCE(ssp->srcu_sup->srcu_last_gp_end);
 	if (exp_holdoff == 0 ||
 	    time_in_range_open(t, tlast, tlast + exp_holdoff))
 		return false; /* Too soon after last GP. */
 
 	/* Next, check for probable idleness. */
-	curseq = rcu_seq_current(&ssp->srcu_gp_seq);
+	curseq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
 	smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
-	if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_gp_seq_needed)))
+	if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed)))
 		return false; /* Grace period in progress, so not idle. */
 	smp_mb(); /* Order ->srcu_gp_seq with prior access. */
-	if (curseq != rcu_seq_current(&ssp->srcu_gp_seq))
+	if (curseq != rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq))
 		return false; /* GP # changed, so not idle. */
 	return true; /* With reasonable probability, idle! */
 }
@@ -1218,8 +1218,8 @@  static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
 	if (rhp)
 		rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp);
 	rcu_segcblist_advance(&sdp->srcu_cblist,
-			      rcu_seq_current(&ssp->srcu_gp_seq));
-	s = rcu_seq_snap(&ssp->srcu_gp_seq);
+			      rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
+	s = rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq);
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s);
 	if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
 		sdp->srcu_gp_seq_needed = s;
@@ -1430,7 +1430,7 @@  unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp)
 	// Any prior manipulation of SRCU-protected data must happen
 	// before the load from ->srcu_gp_seq.
 	smp_mb();
-	return rcu_seq_snap(&ssp->srcu_gp_seq);
+	return rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq);
 }
 EXPORT_SYMBOL_GPL(get_state_synchronize_srcu);
 
@@ -1477,7 +1477,7 @@  EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
  */
 bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
 {
-	if (!rcu_seq_done(&ssp->srcu_gp_seq, cookie))
+	if (!rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie))
 		return false;
 	// Ensure that the end of the SRCU grace period happens before
 	// any subsequent code that the caller might execute.
@@ -1597,16 +1597,16 @@  static void srcu_advance_state(struct srcu_struct *ssp)
 	 * The load-acquire ensures that we see the accesses performed
 	 * by the prior grace period.
 	 */
-	idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq)); /* ^^^ */
+	idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */
 	if (idx == SRCU_STATE_IDLE) {
 		spin_lock_irq_rcu_node(ssp->srcu_sup);
-		if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) {
-			WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq));
+		if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
+			WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq));
 			spin_unlock_irq_rcu_node(ssp->srcu_sup);
 			mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
 			return;
 		}
-		idx = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq));
+		idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq));
 		if (idx == SRCU_STATE_IDLE)
 			srcu_gp_start(ssp);
 		spin_unlock_irq_rcu_node(ssp->srcu_sup);
@@ -1616,7 +1616,7 @@  static void srcu_advance_state(struct srcu_struct *ssp)
 		}
 	}
 
-	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
+	if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
 		idx = 1 ^ (ssp->srcu_idx & 1);
 		if (!try_check_zero(ssp, idx, 1)) {
 			mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex);
@@ -1624,12 +1624,12 @@  static void srcu_advance_state(struct srcu_struct *ssp)
 		}
 		srcu_flip(ssp);
 		spin_lock_irq_rcu_node(ssp->srcu_sup);
-		rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2);
+		rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2);
 		ssp->srcu_n_exp_nodelay = 0;
 		spin_unlock_irq_rcu_node(ssp->srcu_sup);
 	}
 
-	if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
+	if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
 
 		/*
 		 * SRCU read-side critical sections are normally short,
@@ -1666,7 +1666,7 @@  static void srcu_invoke_callbacks(struct work_struct *work)
 	rcu_cblist_init(&ready_cbs);
 	spin_lock_irq_rcu_node(sdp);
 	rcu_segcblist_advance(&sdp->srcu_cblist,
-			      rcu_seq_current(&ssp->srcu_gp_seq));
+			      rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
 	if (sdp->srcu_cblist_invoking ||
 	    !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
 		spin_unlock_irq_rcu_node(sdp);
@@ -1694,7 +1694,7 @@  static void srcu_invoke_callbacks(struct work_struct *work)
 	spin_lock_irq_rcu_node(sdp);
 	rcu_segcblist_add_len(&sdp->srcu_cblist, -len);
 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
-				       rcu_seq_snap(&ssp->srcu_gp_seq));
+				       rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq));
 	sdp->srcu_cblist_invoking = false;
 	more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
 	spin_unlock_irq_rcu_node(sdp);
@@ -1711,12 +1711,12 @@  static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay)
 	bool pushgp = true;
 
 	spin_lock_irq_rcu_node(ssp->srcu_sup);
-	if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) {
-		if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq))) {
+	if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
+		if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) {
 			/* All requests fulfilled, time to go idle. */
 			pushgp = false;
 		}
-	} else if (!rcu_seq_state(ssp->srcu_gp_seq)) {
+	} else if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq)) {
 		/* Outstanding request and no GP.  Start one. */
 		srcu_gp_start(ssp);
 	}
@@ -1762,7 +1762,7 @@  void srcutorture_get_gp_data(enum rcutorture_type test_type,
 	if (test_type != SRCU_FLAVOR)
 		return;
 	*flags = 0;
-	*gp_seq = rcu_seq_current(&ssp->srcu_gp_seq);
+	*gp_seq = rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq);
 }
 EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
 
@@ -1791,7 +1791,7 @@  void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf)
 	if (ss_state < 0 || ss_state >= ARRAY_SIZE(srcu_size_state_name))
 		ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - 1;
 	pr_alert("%s%s Tree SRCU g%ld state %d (%s)",
-		 tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), ss_state,
+		 tt, tf, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq), ss_state,
 		 srcu_size_state_name[ss_state_idx]);
 	if (!ssp->sda) {
 		// Called after cleanup_srcu_struct(), perhaps.
@@ -1905,7 +1905,7 @@  static void srcu_module_going(struct module *mod)
 
 	for (i = 0; i < mod->num_srcu_structs; i++) {
 		ssp = *(sspp++);
-		if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed)) &&
+		if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed)) &&
 		    !WARN_ON_ONCE(!ssp->sda_is_static))
 			cleanup_srcu_struct(ssp);
 		free_percpu(ssp->sda);