diff mbox series

parisc: Fix spinlock barriers

Message ID 9b28e07e-3d11-d24b-410b-4732a828e588@bell.net (mailing list archive)
State Superseded
Headers show
Series parisc: Fix spinlock barriers | expand

Commit Message

John David Anglin July 3, 2020, 8:32 p.m. UTC
Stalls are quite frequent with recent kernels.  When the stall is detected by rcu_sched, we
get a backtrace similar to the following:

rcu: INFO: rcu_sched self-detected stall on CPU
rcu:    0-...!: (5998 ticks this GP) idle=3a6/1/0x4000000000000002 softirq=8356938/8356939 fqs=2
        (t=6000 jiffies g=8985785 q=391)
rcu: rcu_sched kthread starved for 5992 jiffies! g8985785 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0
rcu: RCU grace-period kthread stack dump:
rcu_sched       R  running task        0    10      2 0x00000000
Backtrace:

Task dump for CPU 0:
collect2        R  running task        0 16562  16561 0x00000014
Backtrace:
 [<000000004017913c>] show_stack+0x44/0x60
 [<00000000401df694>] sched_show_task.part.77+0xf4/0x180
 [<00000000401e70e8>] dump_cpu_task+0x68/0x80
 [<0000000040230a58>] rcu_sched_clock_irq+0x708/0xae0
 [<0000000040237670>] update_process_times+0x58/0xb8
 [<00000000407dc39c>] timer_interrupt+0xa4/0x110
 [<000000004021af30>] __handle_irq_event_percpu+0xb8/0x228
 [<000000004021b0d4>] handle_irq_event_percpu+0x34/0x98
 [<00000000402225b8>] handle_percpu_irq+0xa8/0xe8
 [<000000004021a05c>] generic_handle_irq+0x54/0x70
 [<0000000040180340>] call_on_stack+0x18/0x24
 [<000000004017a63c>] execute_on_irq_stack+0x5c/0xa8
 [<000000004017b76c>] do_cpu_irq_mask+0x2dc/0x410
 [<000000004017f074>] intr_return+0x0/0xc

However, this doesn't provide any information as to the cause.  I enabled CONFIG_SOFTLOCKUP_DETECTOR
and I caught the following stall:

watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [cc1:22803]
Modules linked in: dm_mod dax binfmt_misc ext4 crc16 jbd2 ext2 mbcache sg ipmi_watchdog ipmi_si ipmi_poweroff ipmi_devintf ipmi_msghandler nfsd
ip_tables x_tables ipv6 autofs4 xfs libcrc32c crc32c_generic raid10 raid1 raid0 multipath linear md_mod ses enclosure sd_mod scsi_transport_sas
t10_pi sr_mod cdrom ata_generic uas usb_storage pata_cmd64x libata ohci_pci ehci_pci ohci_hcd sym53c8xx ehci_hcd scsi_transport_spi tg3 usbcore
scsi_mod usb_common
CPU: 0 PID: 22803 Comm: cc1 Not tainted 5.6.17+ #3
Hardware name: 9000/800/rp3440

     YZrvWESTHLNXBCVMcbcbcbcbOGFRQPDI
PSW: 00001000000001001111111100001111 Not tainted
r00-03  000000ff0804ff0f 0000000040891dc0 000000004037d1c4 000000006d5e8890
r04-07  000000004086fdc0 0000000040ab31ac 000000000004e99a 0000000000001f20
r08-11  0000000040b24710 000000006d5e8488 0000000040a1d280 000000006d5e89b0
r12-15  000000006d5e88c4 00000001802c2cb8 000000003c812825 0000004122eb4d18
r16-19  0000000040b26630 000000006d5e8898 000000000001d330 000000006d5e88c0
r20-23  000000000800000f 0000000a0ad24270 b6683633143fce3c 0000004122eb4d54
r24-27  000000006d5e88c4 000000006d5e8488 00000001802c2cb8 000000004086fdc0
r28-31  0000004122d57b69 000000006d5e89b0 000000006d5e89e0 000000006d5e8000
sr00-03  000000000c749000 0000000000000000 0000000000000000 000000000c749000
sr04-07  0000000000000000 0000000000000000 0000000000000000 0000000000000000

IASQ: 0000000000000000 0000000000000000 IAOQ: 000000004037d414 000000004037d418
 IIR: 0e0010dc    ISR: 00000041042d63f0  IOR: 000000004086fdc0
 CPU:        0   CR30: 000000006d5e8000 CR31: ffffffffffffefff
 ORIG_R28: 000000004086fdc0
 IAOQ[0]: d_alloc_parallel+0x384/0x688
 IAOQ[1]: d_alloc_parallel+0x388/0x688
 RP(r2): d_alloc_parallel+0x134/0x688
Backtrace:
 [<000000004036974c>] __lookup_slow+0xa4/0x200
 [<0000000040369fc8>] walk_component+0x288/0x458
 [<000000004036a9a0>] path_lookupat+0x88/0x198
 [<000000004036e748>] filename_lookup+0xa0/0x168
 [<000000004036e95c>] user_path_at_empty+0x64/0x80
 [<000000004035d93c>] vfs_statx+0x104/0x158
 [<000000004035dfcc>] __do_sys_lstat64+0x44/0x80
 [<000000004035e5a0>] sys_lstat64+0x20/0x38
 [<0000000040180054>] syscall_exit+0x0/0x14

The code was stuck in this loop in d_alloc_parallel:

    4037d414:   0e 00 10 dc     ldd 0(r16),ret0
    4037d418:   c7 fc 5f ed     bb,< ret0,1f,4037d414 <d_alloc_parallel+0x384>
    4037d41c:   08 00 02 40     nop

This is the inner loop of bit_spin_lock which is called by hlist_bl_unlock in d_alloc_parallel:

static inline void bit_spin_lock(int bitnum, unsigned long *addr)
{
        /*
         * Assuming the lock is uncontended, this never enters
         * the body of the outer loop. If it is contended, then
         * within the inner loop a non-atomic test is used to
         * busywait with less bus contention for a good time to
         * attempt to acquire the lock bit.
         */
        preempt_disable();
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
        while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
                preempt_enable();
                do {
                        cpu_relax();
                } while (test_bit(bitnum, addr));
                preempt_disable();
        }
#endif
        __acquire(bitlock);
}

test_and_set_bit_lock() looks like this:

static inline int test_and_set_bit_lock(unsigned int nr,
                                        volatile unsigned long *p)
{
        long old;
        unsigned long mask = BIT_MASK(nr);

        p += BIT_WORD(nr);
        if (READ_ONCE(*p) & mask)
                return 1;

        old = atomic_long_fetch_or_acquire(mask, (atomic_long_t *)p);
        return !!(old & mask);
}

After much trial and error, I came to the conclusion that barriers in our spinlock code are broken on SMP
machines.  The ldcw,co is supposed to be strongly ordered.  However, it doesn't work properly as a barrier
both on acquisition and release of the spin lock.  Only the sync instreuction works properly.

We gain back a little bit of the extra barrier overhead by removing the barriers around TLB flushes.  I
believe these operations are strongly ordered.

I've tested the attached change with builds of the Debian libprelude and libpreludedb packages.  The latter
package triggered stalls nearly every time near the end of the build.  However, since the stalls are random,
it's hard to be certain that the change completely fixes the problem.

Signed-off-by: Dave Anglin <dave.anglin@bell.net>
---

Comments

Rolf Eike Beer July 9, 2020, 1:26 p.m. UTC | #1
Am Freitag, 3. Juli 2020, 22:32:35 CEST schrieb John David Anglin:
> Stalls are quite frequent with recent kernels.  When the stall is detected
> by rcu_sched, we get a backtrace similar to the following:

With this patch on top of 5.7.7 I still get:

pioneer login: [14567.086716] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[14567.086716] rcu:     1-...!: (1 GPs behind) idle=2be/1/0x4000000000000000 softirq=1301330/1301331 fqs=6
[14567.086716] rcu:     2-...!: (1 GPs behind) idle=de2/1/0x4000000000000000 softirq=1324750/1324751 fqs=6
[14567.086716]  (detected by 0, t=15002 jiffies, g=2903305, q=43)
[14567.086716] Task dump for CPU 1:
[14567.086716] sh              R  running task        0 28260  28257 0x00000012
[14567.086716] Backtrace:
[14567.086716]
[14567.086716] Task dump for CPU 2:
[14567.086716] sh              t    0 28253  28247 0x00000011
[14567.086716] Backtrace:
[14567.086716]
[14567.086716] rcu: rcu_sched kthread starved for 14990 jiffies! g2903305 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=3
[14567.086716] rcu: RCU grace-period kthread stack dump:
[14567.086716] rcu_sched       R  running task        0    10      2 0x00000000
[14567.086716] Backtrace:
[14567.086716]
[14747.106716] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[14747.106716] rcu:     1-...!: (1 GPs behind) idle=2be/1/0x4000000000000000 softirq=1301330/1301331 fqs=6
[14747.106716] rcu:     2-...!: (1 GPs behind) idle=de2/1/0x4000000000000000 softirq=1324750/1324751 fqs=6
[14747.106716]  (detected by 0, t=60007 jiffies, g=2903305, q=43)
[14747.106716] Task dump for CPU 1:
[14747.106716] sh              R  running task        0 28260  28257 0x00000012
[14747.106716] Backtrace:
[14747.106716]
[14747.106716] Task dump for CPU 2:
[14747.106716] sh              t    0 28253  28247 0x00000011
[14747.106716] Backtrace:
[14747.106716]
[14747.106716] rcu: rcu_sched kthread starved for 59995 jiffies! g2903305 f0x2 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=3
[14747.106716] rcu: RCU grace-period kthread stack dump:
[14747.106716] rcu_sched       R  running task        0    10      2 0x00000000
[14747.106716] Backtrace:
[14747.106716]
[14927.126716] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[14927.126716] rcu:     1-...!: (1 GPs behind) idle=2be/1/0x4000000000000000 softirq=1301330/1301331 fqs=6
[14927.126716] rcu:     2-...!: (1 GPs behind) idle=de2/1/0x4000000000000000 softirq=1324750/1324751 fqs=6
[14927.126716]  (detected by 0, t=105012 jiffies, g=2903305, q=43)
[14927.126716] Task dump for CPU 1:
[14927.126716] sh              R  running task        0 28260  28257 0x00000012
[14927.126716] Backtrace:
[14927.126716]
[14927.126716] Task dump for CPU 2:
[14927.126716] sh              t    0 28253  28247 0x00000011
[14927.126716] Backtrace:
[14927.126716]
[14927.126716] rcu: rcu_sched kthread starved for 105000 jiffies! g2903305 f0x2 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=3
[14927.126716] rcu: RCU grace-period kthread stack dump:
[14927.126716] rcu_sched       R  running task        0    10      2 0x00000000
[14927.126716] Backtrace:
[14927.126716]
[15107.146716] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[15107.146716] rcu:     1-...!: (1 GPs behind) idle=2be/1/0x4000000000000000 softirq=1301330/1301331 fqs=6
[15107.146716] rcu:     2-...!: (1 GPs behind) idle=de2/1/0x4000000000000000 softirq=1324750/1324751 fqs=6
[15107.146716]  (detected by 0, t=150017 jiffies, g=2903305, q=43)
[15107.146716] Task dump for CPU 1:
[15107.146716] sh              R  running task        0 28260  28257 0x00000012
[15107.146716] Backtrace:
[15107.146716]
[15107.146716] Task dump for CPU 2:
[15107.146716] sh              t    0 28253  28247 0x00000011
[15107.146716] Backtrace:
[15107.146716]
[15107.146716] rcu: rcu_sched kthread starved for 150005 jiffies! g2903305 f0x2 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=3
[15107.146716] rcu: RCU grace-period kthread stack dump:
[15107.146716] rcu_sched       R  running task        0    10      2 0x00000000
[15107.146716] Backtrace:
[15107.146716]
[15287.166716] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[15287.166716] rcu:     1-...!: (1 GPs behind) idle=2be/1/0x4000000000000000 softirq=1301330/1301331 fqs=6
[15287.166716] rcu:     2-...!: (1 GPs behind) idle=de2/1/0x4000000000000000 softirq=1324750/1324751 fqs=6
[15287.166716]  (detected by 0, t=195022 jiffies, g=2903305, q=43)
[15287.166716] Task dump for CPU 1:
[15287.166716] sh              R  running task        0 28260  28257 0x00000012
[15287.166716] Backtrace:
[15287.166716]
[15287.166716] Task dump for CPU 2:
[15287.166716] sh              t    0 28253  28247 0x00000011
[15287.166716] Backtrace:
[15287.166716]
[15287.166716] rcu: rcu_sched kthread starved for 195010 jiffies! g2903305 f0x2 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=3
[15287.166716] rcu: RCU grace-period kthread stack dump:
[15287.166716] rcu_sched       R  running task        0    10      2 0x00000000
[15287.166716] Backtrace:
[15287.166716]
John David Anglin July 9, 2020, 1:39 p.m. UTC | #2
On 2020-07-09 9:26 a.m., Rolf Eike Beer wrote:
> Am Freitag, 3. Juli 2020, 22:32:35 CEST schrieb John David Anglin:
>> Stalls are quite frequent with recent kernels.  When the stall is detected
>> by rcu_sched, we get a backtrace similar to the following:
> With this patch on top of 5.7.7 I still get:
Suggest enabling CONFIG_LOCKUP_DETECTOR=y and CONFIG_SOFTLOCKUP_DETECTOR=y so
we can see where the stall occurs.

Dave
Jeroen Roovers July 10, 2020, 10:50 p.m. UTC | #3
On Thu, 9 Jul 2020 09:39:33 -0400
John David Anglin <dave.anglin@bell.net> wrote:

> On 2020-07-09 9:26 a.m., Rolf Eike Beer wrote:
> > Am Freitag, 3. Juli 2020, 22:32:35 CEST schrieb John David Anglin:  
> >> Stalls are quite frequent with recent kernels.  When the stall is
> >> detected by rcu_sched, we get a backtrace similar to the
> >> following:  
> > With this patch on top of 5.7.7 I still get:  
> Suggest enabling CONFIG_LOCKUP_DETECTOR=y and
> CONFIG_SOFTLOCKUP_DETECTOR=y so we can see where the stall occurs.
> 
> Dave
> 

Attached is kernel output while running the futex_requeue_pi test from
the kernel selftests. It failed this way on the second try while it
passed on the first try. The output it gave is with the kernel
configuration options as set out above.


Kind regards,
      jer
[32627.869720][    C0] sched: RT throttling activated
[32627.869720][    C0] sched: RT throttling activated
[32758.890722][  T184] INFO: task futex_requeue_p:11776 blocked for more than
120 seconds.
[32758.890722][  T184] INFO: task futex_requeue_p:11776 blocked for more than
120 seconds.
[32759.092374][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[32759.092374][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[32759.283055][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[32759.283055][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[32759.497698][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[32759.497698][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[32759.653327][  T184] Backtrace:
[32759.653327][  T184] Backtrace:
[32759.730236][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[32759.730236][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[32759.894604][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[32759.894604][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[32760.041479][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[32760.041479][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[32760.197084][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[32760.197084][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[32760.346148][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[32760.346148][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[32760.499589][  T184]
[32760.499589][  T184]
[32783.829356][    C0] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[32783.829356][    C0] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[32783.942594][    C0] rcu:     0-...!: (1 GPs behind)
idle=00e/1/0x4000000000000002 softirq=479791/479792 fqs=0
[32783.942594][    C0] rcu:     0-...!: (1 GPs behind)
idle=00e/1/0x4000000000000002 softirq=479791/479792 fqs=0
[32783.942594][    C0]  (detected by 0, t=5252 jiffies, g=628645, q=403)
[32783.942594][    C0]  (detected by 0, t=5252 jiffies, g=628645, q=403)
[32783.942594][    C0] Task dump for CPU 0:
[32783.942594][    C0] Task dump for CPU 0:
[32783.942594][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000010
[32783.942594][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000010
[32783.942594][    C0] Backtrace:
[32783.942594][    C0] Backtrace:
[32783.942594][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[32783.942594][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[32783.942594][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[32783.942594][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[32783.942594][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[32783.942594][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[32783.942594][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[32783.942594][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[32783.942594][    C0]  [<00000000403052b4>] rcu_sched_clock_irq+0xa14/0xb58
[32783.942594][    C0]  [<00000000403052b4>] rcu_sched_clock_irq+0xa14/0xb58
[32783.942594][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[32783.942594][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[32783.942594][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[32783.942594][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[32783.942594][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[32783.942594][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[32783.942594][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[32783.942594][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[32783.942594][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[32783.942594][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[32783.942594][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[32783.942594][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[32783.942594][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[32783.942594][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[32783.942594][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[32783.942594][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[32783.942594][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[32783.942594][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[32783.942594][    C0]
[32783.942594][    C0]
[32885.902861][  T184] INFO: task futex_requeue_p:11776 blocked for more than
248 seconds.
[32885.902861][  T184] INFO: task futex_requeue_p:11776 blocked for more than
248 seconds.
[32886.104469][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[32886.104469][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[32886.295068][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[32886.295068][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[32886.509754][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[32886.509754][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[32886.665385][  T184] Backtrace:
[32886.665385][  T184] Backtrace:
[32886.742274][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[32886.742274][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[32886.906682][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[32886.906682][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[32887.053516][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[32887.053516][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[32887.209140][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[32887.209140][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[32887.358204][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[32887.358204][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[32887.511637][  T184]
[32887.511637][  T184]
[33020.862709][  T184] INFO: task futex_requeue_p:11776 blocked for more than
382 seconds.
[33020.862709][  T184] INFO: task futex_requeue_p:11776 blocked for more than
382 seconds.
[33021.064414][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[33021.064414][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[33021.255143][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[33021.255143][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[33021.469860][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[33021.469860][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[33021.625579][  T184] Backtrace:
[33021.625579][  T184] Backtrace:
[33021.702420][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[33021.702420][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[33021.866875][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[33021.866875][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[33022.013832][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[33022.013832][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[33022.169549][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[33022.169549][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[33022.318656][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[33022.318656][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[33022.472089][  T184]
[33022.472089][  T184]
[33031.888801][    C0] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[33031.888801][    C0] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[33031.898353][    C0]  (detected by 0, t=5252 jiffies, g=628793, q=10)
[33031.898353][    C0]  (detected by 0, t=5252 jiffies, g=628793, q=10)
[33031.898353][    C0] rcu: All QSes seen, last rcu_sched kthread activity
5252 (4303150277-4303145025), jiffies_till_next_fqs=1, root ->qsmask 0x0
[33031.898353][    C0] rcu: All QSes seen, last rcu_sched kthread activity
5252 (4303150277-4303145025), jiffies_till_next_fqs=1, root ->qsmask 0x0
[33031.898353][    C0] rcu: rcu_sched kthread starved for 5252 jiffies!
g628793 f0x2 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0
[33031.898353][    C0] rcu: rcu_sched kthread starved for 5252 jiffies!
g628793 f0x2 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0
[33031.898353][    C0] rcu:     Unless rcu_sched kthread gets sufficient CPU
time, OOM is now expected behavior.
[33031.898353][    C0] rcu:     Unless rcu_sched kthread gets sufficient CPU
time, OOM is now expected behavior.
[33031.898353][    C0] rcu: RCU grace-period kthread stack dump:
[33031.898353][    C0] rcu: RCU grace-period kthread stack dump:
[33031.898353][    C0] rcu_sched       R  running task        0    10      2
0x00000000
[33031.898353][    C0] rcu_sched       R  running task        0    10      2
0x00000000
[33031.898353][    C0] Backtrace:
[33031.898353][    C0] Backtrace:
[33031.898353][    C0]
[33031.898353][    C0]
[33067.872720][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33067.872720][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33067.872720][    C0] rcu:     0-....: (1 GPs behind)
idle=00e/1/0x4000000000000002 softirq=479896/479897 fqs=13
[33067.872720][    C0] rcu:     0-....: (1 GPs behind)
idle=00e/1/0x4000000000000002 softirq=479896/479897 fqs=13
[33067.872720][    C0]  (t=5250 jiffies g=628797 q=7)
[33067.872720][    C0]  (t=5250 jiffies g=628797 q=7)
[33067.872720][    C0] Task dump for CPU 0:
[33067.872720][    C0] Task dump for CPU 0:
[33067.872720][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33067.872720][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33067.872720][    C0] Backtrace:
[33067.872720][    C0] Backtrace:
[33067.872720][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33067.872720][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33067.872720][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33067.872720][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33067.872720][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33067.872720][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33067.872720][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33067.872720][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33067.872720][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33067.872720][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33067.872720][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33067.872720][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33067.872720][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33067.872720][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33067.872720][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33067.872720][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33067.872720][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33067.872720][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33067.872720][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33067.872720][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33067.872720][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33067.872720][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33067.872720][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33067.872720][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33067.872720][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33067.872720][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33067.872720][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33067.872720][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33067.872720][    C0]
[33067.872720][    C0]
[33130.884579][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33130.884579][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33130.884579][    C0] rcu:     0-....: (1 GPs behind)
idle=00e/1/0x4000000000000002 softirq=479896/479897 fqs=39
[33130.884579][    C0] rcu:     0-....: (1 GPs behind)
idle=00e/1/0x4000000000000002 softirq=479896/479897 fqs=39
[33130.884579][    C0]  (t=21003 jiffies g=628797 q=13)
[33130.884579][    C0]  (t=21003 jiffies g=628797 q=13)
[33130.884579][    C0] Task dump for CPU 0:
[33130.884579][    C0] Task dump for CPU 0:
[33130.884579][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000010
[33130.884579][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000010
[33130.884579][    C0] Backtrace:
[33130.884579][    C0] Backtrace:
[33130.884579][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33130.884579][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33130.884579][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33130.884579][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33130.884579][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33130.884579][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33130.884579][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33130.884579][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33130.884579][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33130.884579][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33130.884579][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33130.884579][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33130.884579][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33130.884579][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33130.884579][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33130.884579][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33130.884579][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33130.884579][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33130.884579][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33130.884579][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33130.884579][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33130.884579][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33130.884579][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33130.884579][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33130.884579][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33130.884579][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33130.884579][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33130.884579][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33130.884579][    C0]
[33130.884579][    C0]
[33153.913692][  T184] INFO: task futex_requeue_p:11776 blocked for more than
516 seconds.
[33153.913692][  T184] INFO: task futex_requeue_p:11776 blocked for more than
516 seconds.
[33154.115244][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[33154.115244][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[33154.305853][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[33154.305853][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[33154.520542][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[33154.520542][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[33154.676172][  T184] Backtrace:
[33154.676172][  T184] Backtrace:
[33154.753060][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[33154.753060][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[33154.917428][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[33154.917428][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[33155.064301][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[33155.064301][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[33155.219926][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[33155.219926][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[33155.368990][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[33155.368990][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[33155.522424][  T184]
[33155.522424][  T184]
[33193.896438][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33193.896438][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33193.896438][    C0] rcu:     0-....: (1 GPs behind)
idle=00e/1/0x4000000000000002 softirq=479896/479897 fqs=65
[33193.896438][    C0] rcu:     0-....: (1 GPs behind)
idle=00e/1/0x4000000000000002 softirq=479896/479897 fqs=65
[33193.896438][    C0]  (t=36756 jiffies g=628797 q=18)
[33193.896438][    C0]  (t=36756 jiffies g=628797 q=18)
[33193.896438][    C0] Task dump for CPU 0:
[33193.896438][    C0] Task dump for CPU 0:
[33193.896438][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33193.896438][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33193.896438][    C0] Backtrace:
[33193.896438][    C0] Backtrace:
[33193.896438][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33193.896438][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33193.896438][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33193.896438][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33193.896438][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33193.896438][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33193.896438][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33193.896438][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33193.896438][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33193.896438][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33193.896438][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33193.896438][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33193.896438][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33193.896438][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33193.896438][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33193.896438][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33193.896438][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33193.896438][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33193.896438][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33193.896438][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33193.896438][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33193.896438][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33193.896438][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33193.896438][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33193.896438][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33193.896438][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33193.896438][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33193.896438][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33193.896438][    C0]
[33193.896438][    C0]
[33251.868309][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33251.868309][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33251.868309][    C0] rcu:     0-....: (2 GPs behind)
idle=012/1/0x4000000000000002 softirq=479896/479897 fqs=15
[33251.868309][    C0] rcu:     0-....: (2 GPs behind)
idle=012/1/0x4000000000000002 softirq=479896/479897 fqs=15
[33251.868309][    C0]  (t=5250 jiffies g=628801 q=38)
[33251.868309][    C0]  (t=5250 jiffies g=628801 q=38)
[33251.868309][    C0] Task dump for CPU 0:
[33251.868309][    C0] Task dump for CPU 0:
[33251.868309][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33251.868309][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33251.868309][    C0] Backtrace:
[33251.868309][    C0] Backtrace:
[33251.868309][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33251.868309][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33251.868309][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33251.868309][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33251.868309][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33251.868309][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33251.868309][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33251.868309][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33251.868309][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33251.868309][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33251.868309][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33251.868309][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33251.868309][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33251.868309][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33251.868309][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33251.868309][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33251.868309][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33251.868309][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33251.868309][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33251.868309][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33251.868309][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33251.868309][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33251.868309][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33251.868309][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33251.868309][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33251.868309][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33251.868309][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33251.868309][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33251.868309][    C0]
[33251.868309][    C0]
[33314.880168][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33314.880168][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33314.880168][    C0] rcu:     0-....: (2 GPs behind)
idle=012/1/0x4000000000000002 softirq=479896/479897 fqs=39
[33314.880168][    C0] rcu:     0-....: (2 GPs behind)
idle=012/1/0x4000000000000002 softirq=479896/479897 fqs=39
[33314.880168][    C0]  (t=21003 jiffies g=628801 q=43)
[33314.880168][    C0]  (t=21003 jiffies g=628801 q=43)
[33315.285850][    C0] Task dump for CPU 0:
[33315.285850][    C0] Task dump for CPU 0:
[33315.285850][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33315.285850][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33315.285850][    C0] Backtrace:
[33315.285850][    C0] Backtrace:
[33315.285850][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33315.285850][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33315.285850][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33315.285850][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33315.285850][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33315.285850][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33315.285850][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33315.285850][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33315.285850][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33315.285850][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33315.285850][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33315.285850][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33315.285850][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33315.285850][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33315.285850][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33315.285850][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33315.285850][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33315.285850][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33315.285850][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33315.285850][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33315.285850][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33315.285850][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33315.285850][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33315.285850][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33315.285850][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33315.285850][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33315.285850][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33315.285850][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33315.285850][    C0]
[33315.285850][    C0]
[33373.917674][  T184] INFO: task futex_requeue_p:11776 blocked for more than
736 seconds.
[33373.917674][  T184] INFO: task futex_requeue_p:11776 blocked for more than
736 seconds.
[33374.119359][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[33374.119359][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[33374.310076][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[33374.310076][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[33374.524852][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[33374.524852][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[33374.680482][  T184] Backtrace:
[33374.680482][  T184] Backtrace:
[33374.757334][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[33374.757334][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[33374.921784][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[33374.921784][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[33375.068731][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[33375.068731][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[33375.224462][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[33375.224462][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[33375.373621][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[33375.373621][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[33375.527000][  T184]
[33375.527000][  T184]
[33377.892027][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33377.892027][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33377.892027][    C0] rcu:     0-....: (2 GPs behind)
idle=012/1/0x4000000000000002 softirq=479896/479897 fqs=60
[33377.892027][    C0] rcu:     0-....: (2 GPs behind)
idle=012/1/0x4000000000000002 softirq=479896/479897 fqs=60
[33377.892027][    C0]  (t=36756 jiffies g=628801 q=49)
[33377.892027][    C0]  (t=36756 jiffies g=628801 q=49)
[33377.892027][    C0] Task dump for CPU 0:
[33377.892027][    C0] Task dump for CPU 0:
[33377.892027][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33377.892027][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33377.892027][    C0] Backtrace:
[33377.892027][    C0] Backtrace:
[33377.892027][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33377.892027][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33377.892027][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33377.892027][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33377.892027][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33377.892027][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33377.892027][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33377.892027][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33377.892027][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33377.892027][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33377.892027][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33377.892027][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33377.892027][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33377.892027][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33377.892027][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33377.892027][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33377.892027][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33377.892027][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33377.892027][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33377.892027][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33377.892027][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33377.892027][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33377.892027][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33377.892027][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33377.892027][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33377.892027][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33377.892027][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33377.892027][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33377.892027][    C0]
[33377.892027][    C0]
[33440.871886][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33440.871886][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33440.871886][    C0] rcu:     0-....: (3 GPs behind)
idle=016/1/0x4000000000000002 softirq=479896/479897 fqs=11
[33440.871886][    C0] rcu:     0-....: (3 GPs behind)
idle=016/1/0x4000000000000002 softirq=479896/479897 fqs=11
[33440.871886][    C0]  (t=5250 jiffies g=628805 q=23)
[33440.871886][    C0]  (t=5250 jiffies g=628805 q=23)
[33440.871886][    C0] Task dump for CPU 0:
[33440.871886][    C0] Task dump for CPU 0:
[33440.871886][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33440.871886][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33440.871886][    C0] Backtrace:
[33440.871886][    C0] Backtrace:
[33440.871886][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33440.871886][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33440.871886][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33440.871886][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33440.871886][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33440.871886][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33440.871886][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33440.871886][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33440.871886][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33440.871886][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33440.871886][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33440.871886][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33440.871886][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33440.871886][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33440.871886][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33440.871886][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33440.871886][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33440.871886][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33440.871886][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33440.871886][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33440.871886][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33440.871886][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33440.871886][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33440.871886][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33440.871886][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33440.871886][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33440.871886][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33440.871886][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33440.871886][    C0]
[33440.871886][    C0]
[33477.879803][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33477.879803][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33477.879803][    C0] rcu:     0-....: (4 GPs behind)
idle=01a/1/0x4000000000000002 softirq=479896/479897 fqs=13
[33477.879803][    C0] rcu:     0-....: (4 GPs behind)
idle=01a/1/0x4000000000000002 softirq=479896/479897 fqs=13
[33477.879803][    C0]  (t=5250 jiffies g=628809 q=10)
[33477.879803][    C0]  (t=5250 jiffies g=628809 q=10)
[33477.879803][    C0] Task dump for CPU 0:
[33477.879803][    C0] Task dump for CPU 0:
[33477.879803][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33477.879803][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33477.879803][    C0] Backtrace:
[33477.879803][    C0] Backtrace:
[33477.879803][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33477.879803][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33477.879803][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33477.879803][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33477.879803][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33477.879803][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33477.879803][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33477.879803][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33477.879803][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33477.879803][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33477.879803][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33477.879803][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33477.879803][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33477.879803][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33477.879803][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33477.879803][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33477.879803][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33477.879803][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33477.879803][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33477.879803][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33477.879803][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33477.879803][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33477.879803][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33477.879803][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33477.879803][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33477.879803][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33477.879803][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33477.879803][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33477.879803][    C0]
[33477.879803][    C0]
[33524.859698][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33524.859698][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33524.859698][    C0] rcu:     0-....: (5 GPs behind)
idle=01e/1/0x4000000000000002 softirq=479896/479897 fqs=10
[33524.859698][    C0] rcu:     0-....: (5 GPs behind)
idle=01e/1/0x4000000000000002 softirq=479896/479897 fqs=10
[33524.859698][    C0]  (t=5250 jiffies g=628813 q=10)
[33524.859698][    C0]  (t=5250 jiffies g=628813 q=10)
[33524.859698][    C0] Task dump for CPU 0:
[33524.859698][    C0] Task dump for CPU 0:
[33524.859698][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33524.859698][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33524.859698][    C0] Backtrace:
[33524.859698][    C0] Backtrace:
[33524.859698][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33524.859698][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33524.859698][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33524.859698][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33524.859698][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33524.859698][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33524.859698][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33524.859698][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33524.859698][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33524.859698][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33524.859698][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33524.859698][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33524.859698][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33524.859698][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33524.859698][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33524.859698][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33524.859698][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33524.859698][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33524.859698][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33524.859698][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33524.859698][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33524.859698][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33524.859698][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33524.859698][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33524.859698][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33524.859698][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33524.859698][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33524.859698][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33524.859698][    C0]
[33524.859698][    C0]
[33578.896996][  T184] INFO: task futex_requeue_p:11776 blocked for more than
941 seconds.
[33578.896996][  T184] INFO: task futex_requeue_p:11776 blocked for more than
941 seconds.
[33579.098653][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[33579.098653][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[33579.289329][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[33579.289329][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[33579.504112][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[33579.504112][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[33579.659740][  T184] Backtrace:
[33579.659740][  T184] Backtrace:
[33579.736581][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[33579.736581][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[33579.901052][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[33579.901052][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[33580.047996][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[33580.047996][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[33580.203655][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[33580.203655][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[33580.352718][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[33580.352718][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[33580.506158][  T184]
[33580.506158][  T184]
[33587.871557][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33587.871557][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33587.871557][    C0] rcu:     0-....: (5 GPs behind)
idle=01e/1/0x4000000000000002 softirq=479896/479897 fqs=36
[33587.871557][    C0] rcu:     0-....: (5 GPs behind)
idle=01e/1/0x4000000000000002 softirq=479896/479897 fqs=36
[33587.871557][    C0]  (t=21003 jiffies g=628813 q=15)
[33587.871557][    C0]  (t=21003 jiffies g=628813 q=15)
[33587.871557][    C0] Task dump for CPU 0:
[33587.871557][    C0] Task dump for CPU 0:
[33587.871557][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33587.871557][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33587.871557][    C0] Backtrace:
[33587.871557][    C0] Backtrace:
[33587.871557][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33587.871557][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33587.871557][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33587.871557][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33587.871557][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33587.871557][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33587.871557][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33587.871557][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33587.871557][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33587.871557][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33587.871557][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33587.871557][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33587.871557][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33587.871557][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33587.871557][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33587.871557][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33587.871557][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33587.871557][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33587.871557][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33587.871557][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33587.871557][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33587.871557][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33587.871557][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33587.871557][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33587.871557][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33587.871557][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33587.871557][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33587.871557][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33587.871557][    C0]
[33587.871557][    C0]
[33623.895477][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33623.895477][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33623.895477][    C0] rcu:     0-....: (6 GPs behind)
idle=022/1/0x4000000000000002 softirq=479896/479897 fqs=13
[33623.895477][    C0] rcu:     0-....: (6 GPs behind)
idle=022/1/0x4000000000000002 softirq=479896/479897 fqs=13
[33623.895477][    C0]  (t=5250 jiffies g=628817 q=11)
[33623.895477][    C0]  (t=5250 jiffies g=628817 q=11)
[33623.895477][    C0] Task dump for CPU 0:
[33623.895477][    C0] Task dump for CPU 0:
[33623.895477][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33623.895477][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33623.895477][    C0] Backtrace:
[33623.895477][    C0] Backtrace:
[33623.895477][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33623.895477][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33623.895477][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33623.895477][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33623.895477][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33623.895477][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33623.895477][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33623.895477][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33623.895477][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33623.895477][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33623.895477][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33623.895477][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33623.895477][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33623.895477][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33623.895477][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33623.895477][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33623.895477][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33623.895477][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33623.895477][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33623.895477][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33623.895477][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33623.895477][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33623.895477][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33623.895477][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33623.895477][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33623.895477][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33623.895477][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33623.895477][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33623.895477][    C0]
[33623.895477][    C0]
[33686.907335][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33686.907335][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33686.908471][    C0] rcu:     0-....: (6 GPs behind)
idle=022/1/0x4000000000000002 softirq=479896/479897 fqs=38
[33686.908471][    C0] rcu:     0-....: (6 GPs behind)
idle=022/1/0x4000000000000002 softirq=479896/479897 fqs=38
[33686.908471][    C0]  (t=21003 jiffies g=628817 q=29)
[33686.908471][    C0]  (t=21003 jiffies g=628817 q=29)
[33686.908471][    C0] Task dump for CPU 0:
[33686.908471][    C0] Task dump for CPU 0:
[33686.908471][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33686.908471][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33686.908471][    C0] Backtrace:
[33686.908471][    C0] Backtrace:
[33686.908471][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33686.908471][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33686.908471][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33686.908471][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33686.908471][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33686.908471][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33686.908471][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33686.908471][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33686.908471][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33686.908471][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33686.908471][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33686.908471][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33686.908471][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33686.908471][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33686.908471][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33686.908471][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33686.908471][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33686.908471][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33686.908471][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33686.908471][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33686.908471][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33686.908471][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33686.908471][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33686.908471][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33686.908471][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33686.908471][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33686.908471][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33686.908471][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33686.908471][    C0]
[33686.908471][    C0]
[33749.919195][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33749.919195][    C0] rcu: INFO: rcu_sched self-detected stall on CPU
[33749.919195][    C0] rcu:     0-....: (6 GPs behind)
idle=022/1/0x4000000000000002 softirq=479896/479897 fqs=68
[33749.919195][    C0] rcu:     0-....: (6 GPs behind)
idle=022/1/0x4000000000000002 softirq=479896/479897 fqs=68
[33749.919195][    C0]  (t=36756 jiffies g=628817 q=35)
[33749.919195][    C0]  (t=36756 jiffies g=628817 q=35)
[33749.919195][    C0] Task dump for CPU 0:
[33749.919195][    C0] Task dump for CPU 0:
[33749.919195][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33749.919195][    C0] futex_requeue_p R  running task        0 11780  11689
0x00000014
[33749.919195][    C0] Backtrace:
[33749.919195][    C0] Backtrace:
[33749.919195][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33749.919195][    C0]  [<0000000040247130>] show_stack+0x50/0x70
[33749.919195][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33749.919195][    C0]  [<00000000402b8ed8>]
sched_show_task.part.0+0x150/0x170
[33749.919195][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33749.919195][    C0]  [<00000000402b90f8>] dump_cpu_task+0x68/0x80
[33749.919195][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33749.919195][    C0]  [<0000000040306568>] rcu_dump_cpu_stacks+0x110/0x178
[33749.919195][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33749.919195][    C0]  [<0000000040305088>] rcu_sched_clock_irq+0x7e8/0xb58
[33749.919195][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33749.919195][    C0]  [<000000004030c8b8>] update_process_times+0x58/0xb8
[33749.919195][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33749.919195][    C0]  [<00000000409e8160>] timer_interrupt+0xb0/0x120
[33749.919195][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33749.919195][    C0]  [<00000000402ec738>]
__handle_irq_event_percpu+0xb8/0x238
[33749.919195][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33749.919195][    C0]  [<00000000402ec8ec>] handle_irq_event_percpu+0x34/0xb8
[33749.919195][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33749.919195][    C0]  [<00000000402f4630>] handle_percpu_irq+0xa8/0xe8
[33749.919195][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33749.919195][    C0]  [<00000000402eb7d0>] generic_handle_irq+0x60/0x78
[33749.919195][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33749.919195][    C0]  [<000000004024e340>] call_on_stack+0x18/0x24
[33749.919195][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33749.919195][    C0]  [<0000000040249d60>] do_cpu_irq_mask+0x320/0x450
[33749.919195][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33749.919195][    C0]  [<000000004024d074>] intr_return+0x0/0xc
[33749.919195][    C0]
[33749.919195][    C0]
[33801.861080][  T184] INFO: task futex_requeue_p:11776 blocked for more than
1163 seconds.
[33801.861080][  T184] INFO: task futex_requeue_p:11776 blocked for more than
1163 seconds.
[33802.064917][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[33802.064917][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[33802.255538][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[33802.255538][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[33802.470219][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[33802.470219][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[33802.625849][  T184] Backtrace:
[33802.625849][  T184] Backtrace:
[33802.702739][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[33802.702739][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[33802.867146][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[33802.867146][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[33803.013980][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[33803.013980][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[33803.169603][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[33803.169603][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[33803.318668][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[33803.318668][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[33803.472107][  T184]
[33803.472107][  T184]
[33812.939054][    C0] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[33812.939054][    C0] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[33812.939054][    C0]  (detected by 0, t=52511 jiffies, g=628817, q=48)
[33812.939054][    C0]  (detected by 0, t=52511 jiffies, g=628817, q=48)
[33812.939054][    C0] rcu: All QSes seen, last rcu_sched kthread activity 19
(4303345540-4303345521), jiffies_till_next_fqs=1, root ->qsmask 0x0
[33812.939054][    C0] rcu: All QSes seen, last rcu_sched kthread activity 19
(4303345540-4303345521), jiffies_till_next_fqs=1, root ->qsmask 0x0
[34034.888097][  T184] INFO: task futex_requeue_p:11776 blocked for more than
1396 seconds.
[34034.888097][  T184] INFO: task futex_requeue_p:11776 blocked for more than
1396 seconds.
[34035.091906][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[34035.091906][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[34035.282516][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[34035.282516][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[34035.497205][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[34035.497205][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[34035.652834][  T184] Backtrace:
[34035.652834][  T184] Backtrace:
[34035.729724][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[34035.729724][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[34035.894141][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[34035.894141][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[34036.040965][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[34036.040965][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[34036.196589][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[34036.196589][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[34036.345655][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[34036.345655][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[34036.499095][  T184]
[34036.499095][  T184]
[34384.869774][    C0] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[34384.869774][    C0] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[34384.963346][  T184] INFO: task futex_requeue_p:11776 blocked for more than
1746 seconds.
[34384.963346][  T184] INFO: task futex_requeue_p:11776 blocked for more than
1746 seconds.
[34384.963054][    C0]  (detected by 0, t=5252 jiffies, g=628973, q=3)
[34384.963054][    C0]  (detected by 0, t=5252 jiffies, g=628973, q=3)
[34384.963054][    C0] rcu: All QSes seen, last rcu_sched kthread activity
5252 (4303488523-4303483271), jiffies_till_next_fqs=1, root ->qsmask 0x0
[34384.963054][    C0] rcu: All QSes seen, last rcu_sched kthread activity
5252 (4303488523-4303483271), jiffies_till_next_fqs=1, root ->qsmask 0x0
[34384.963054][    C0] rcu: rcu_sched kthread starved for 5252 jiffies!
g628973 f0x2 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=1
[34384.963054][    C0] rcu: rcu_sched kthread starved for 5252 jiffies!
g628973 f0x2 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=1
[34384.963054][    C0] rcu:     Unless rcu_sched kthread gets sufficient CPU
time, OOM is now expected behavior.
[34384.963054][    C0] rcu:     Unless rcu_sched kthread gets sufficient CPU
time, OOM is now expected behavior.
[34385.240135][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[34385.240135][  T184]       Not tainted
5.8.0-rc4-JeR-00128-g42f82040ee66-dirty #195
[34384.963054][    C0] rcu: RCU grace-period kthread stack dump:
[34384.963054][    C0] rcu: RCU grace-period kthread stack dump:
[34384.963054][    C0] rcu_sched       R  running task        0    10      2
0x00000000
[34384.963054][    C0] rcu_sched       R  running task        0    10      2
0x00000000
[34384.963054][    C0] Backtrace:
[34384.963054][    C0] Backtrace:
[34385.724199][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[34385.724199][  T184] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[34384.963054][    C0]
[34384.963054][    C0]
[34387.118373][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[34387.118373][  T184] futex_requeue_p D    0 11776  11689 0x00000010
[34387.274009][  T184] Backtrace:
[34387.274009][  T184] Backtrace:
[34387.350896][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[34387.350896][  T184]  [<00000000402b2b50>] check_preempt_curr+0xb8/0xd8
[34387.515262][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[34387.515262][  T184]  [<00000000402e1f9c>] up_write+0x17c/0x188
[34387.662135][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[34387.662135][  T184]  [<000000004040f914>] sys_mprotect+0x1f4/0x278
[34387.817760][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[34387.817760][  T184]  [<000000004024e054>] syscall_exit+0x0/0x14
[34387.966825][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[34387.966825][  T184]  [<000000004024d080>] intr_check_sig+0x0/0x3c
[34388.120258][  T184]
[34388.120258][  T184]
John David Anglin July 11, 2020, 1:46 a.m. UTC | #4
Hi Jeroen,

On 2020-07-10 6:50 p.m., Jeroen Roovers wrote:
> On Thu, 9 Jul 2020 09:39:33 -0400
> John David Anglin <dave.anglin@bell.net> wrote:
>
>> On 2020-07-09 9:26 a.m., Rolf Eike Beer wrote:
>>> Am Freitag, 3. Juli 2020, 22:32:35 CEST schrieb John David Anglin:  
>>>> Stalls are quite frequent with recent kernels.  When the stall is
>>>> detected by rcu_sched, we get a backtrace similar to the
>>>> following:  
>>> With this patch on top of 5.7.7 I still get:  
>> Suggest enabling CONFIG_LOCKUP_DETECTOR=y and
>> CONFIG_SOFTLOCKUP_DETECTOR=y so we can see where the stall occurs.
>>
>> Dave
>>
> Attached is kernel output while running the futex_requeue_pi test from
> the kernel selftests. It failed this way on the second try while it
> passed on the first try. The output it gave is with the kernel
> configuration options as set out above.
Unfortunately, the soft lockup detector didn't trigger in the output you attached.  So, it's not
clear where futex_requeue_p is stuck.  There are no spinlocks  in check_preempt_curr() that
I can see.

void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
{
        const struct sched_class *class;

        if (p->sched_class == rq->curr->sched_class) {
                rq->curr->sched_class->check_preempt_curr(rq, p, flags);
        } else {
                for_each_class(class) {
                        if (class == rq->curr->sched_class)
                                break;
                        if (class == p->sched_class) {
                                resched_curr(rq);
                                break;
                        }
                }
        }

        /*
         * A queue event has occurred, and we're going to schedule.  In
         * this case, we can save a useless back to back clock update.
         */
        if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
                rq_clock_skip_update(rq, true);
}

There's one loop in the above code.

I have CONFIG_PREEMPT_NONE=y in my kernel builds.

Regards,
Dave
John David Anglin July 17, 2020, 9:15 p.m. UTC | #5
Stalls are quite frequent with recent kernels.  When the stall is detected by rcu_sched, we
get a backtrace similar to the following:

rcu: INFO: rcu_sched self-detected stall on CPU
rcu:    0-...!: (5998 ticks this GP) idle=3a6/1/0x4000000000000002 softirq=8356938/8356939 fqs=2
        (t=6000 jiffies g=8985785 q=391)
rcu: rcu_sched kthread starved for 5992 jiffies! g8985785 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0
rcu: RCU grace-period kthread stack dump:
rcu_sched       R  running task        0    10      2 0x00000000
Backtrace:

Task dump for CPU 0:
collect2        R  running task        0 16562  16561 0x00000014
Backtrace:
 [<000000004017913c>] show_stack+0x44/0x60
 [<00000000401df694>] sched_show_task.part.77+0xf4/0x180
 [<00000000401e70e8>] dump_cpu_task+0x68/0x80
 [<0000000040230a58>] rcu_sched_clock_irq+0x708/0xae0
 [<0000000040237670>] update_process_times+0x58/0xb8
 [<00000000407dc39c>] timer_interrupt+0xa4/0x110
 [<000000004021af30>] __handle_irq_event_percpu+0xb8/0x228
 [<000000004021b0d4>] handle_irq_event_percpu+0x34/0x98
 [<00000000402225b8>] handle_percpu_irq+0xa8/0xe8
 [<000000004021a05c>] generic_handle_irq+0x54/0x70
 [<0000000040180340>] call_on_stack+0x18/0x24
 [<000000004017a63c>] execute_on_irq_stack+0x5c/0xa8
 [<000000004017b76c>] do_cpu_irq_mask+0x2dc/0x410
 [<000000004017f074>] intr_return+0x0/0xc

However, this doesn't provide any information as to the cause.  I enabled CONFIG_SOFTLOCKUP_DETECTOR
and I caught the following stall:

watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [cc1:22803]
Modules linked in: dm_mod dax binfmt_misc ext4 crc16 jbd2 ext2 mbcache sg ipmi_watchdog ipmi_si ipmi_poweroff ipmi_devintf ipmi_msghandler nfsd
ip_tables x_tables ipv6 autofs4 xfs libcrc32c crc32c_generic raid10 raid1 raid0 multipath linear md_mod ses enclosure sd_mod scsi_transport_sas
t10_pi sr_mod cdrom ata_generic uas usb_storage pata_cmd64x libata ohci_pci ehci_pci ohci_hcd sym53c8xx ehci_hcd scsi_transport_spi tg3 usbcore
scsi_mod usb_common
CPU: 0 PID: 22803 Comm: cc1 Not tainted 5.6.17+ #3
Hardware name: 9000/800/rp3440

     YZrvWESTHLNXBCVMcbcbcbcbOGFRQPDI
PSW: 00001000000001001111111100001111 Not tainted
r00-03  000000ff0804ff0f 0000000040891dc0 000000004037d1c4 000000006d5e8890
r04-07  000000004086fdc0 0000000040ab31ac 000000000004e99a 0000000000001f20
r08-11  0000000040b24710 000000006d5e8488 0000000040a1d280 000000006d5e89b0
r12-15  000000006d5e88c4 00000001802c2cb8 000000003c812825 0000004122eb4d18
r16-19  0000000040b26630 000000006d5e8898 000000000001d330 000000006d5e88c0
r20-23  000000000800000f 0000000a0ad24270 b6683633143fce3c 0000004122eb4d54
r24-27  000000006d5e88c4 000000006d5e8488 00000001802c2cb8 000000004086fdc0
r28-31  0000004122d57b69 000000006d5e89b0 000000006d5e89e0 000000006d5e8000
sr00-03  000000000c749000 0000000000000000 0000000000000000 000000000c749000
sr04-07  0000000000000000 0000000000000000 0000000000000000 0000000000000000

IASQ: 0000000000000000 0000000000000000 IAOQ: 000000004037d414 000000004037d418
 IIR: 0e0010dc    ISR: 00000041042d63f0  IOR: 000000004086fdc0
 CPU:        0   CR30: 000000006d5e8000 CR31: ffffffffffffefff
 ORIG_R28: 000000004086fdc0
 IAOQ[0]: d_alloc_parallel+0x384/0x688
 IAOQ[1]: d_alloc_parallel+0x388/0x688
 RP(r2): d_alloc_parallel+0x134/0x688
Backtrace:
 [<000000004036974c>] __lookup_slow+0xa4/0x200
 [<0000000040369fc8>] walk_component+0x288/0x458
 [<000000004036a9a0>] path_lookupat+0x88/0x198
 [<000000004036e748>] filename_lookup+0xa0/0x168
 [<000000004036e95c>] user_path_at_empty+0x64/0x80
 [<000000004035d93c>] vfs_statx+0x104/0x158
 [<000000004035dfcc>] __do_sys_lstat64+0x44/0x80
 [<000000004035e5a0>] sys_lstat64+0x20/0x38
 [<0000000040180054>] syscall_exit+0x0/0x14

The code was stuck in this loop in d_alloc_parallel:

    4037d414:   0e 00 10 dc     ldd 0(r16),ret0
    4037d418:   c7 fc 5f ed     bb,< ret0,1f,4037d414 <d_alloc_parallel+0x384>
    4037d41c:   08 00 02 40     nop

This is the inner loop of bit_spin_lock which is called by hlist_bl_unlock in d_alloc_parallel:

static inline void bit_spin_lock(int bitnum, unsigned long *addr)
{
        /*
         * Assuming the lock is uncontended, this never enters
         * the body of the outer loop. If it is contended, then
         * within the inner loop a non-atomic test is used to
         * busywait with less bus contention for a good time to
         * attempt to acquire the lock bit.
         */
        preempt_disable();
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
        while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
                preempt_enable();
                do {
                        cpu_relax();
                } while (test_bit(bitnum, addr));
                preempt_disable();
        }
#endif
        __acquire(bitlock);
}

test_and_set_bit_lock() looks like this:

static inline int test_and_set_bit_lock(unsigned int nr,
                                        volatile unsigned long *p)
{
        long old;
        unsigned long mask = BIT_MASK(nr);

        p += BIT_WORD(nr);
        if (READ_ONCE(*p) & mask)
                return 1;

        old = atomic_long_fetch_or_acquire(mask, (atomic_long_t *)p);
        return !!(old & mask);
}

After much trial and error, I came to the conclusion that barriers in our spinlock code are broken on SMP
machines.  The ldcw,co is supposed to be strongly ordered.  However, it doesn't work properly as a barrier
both on acquisition and release of the spin lock.  Only the sync instruction works properly.

After significantly more testing, I realized that instruction prefetching was corrupting the behavior of
instructions in the lock region.  We need a prefetch barrier to suppress speculation of these instructions.

Similarly, the release barrier needs to be followed by seven nops to ensure all accesses in the lock region
are completed before we release the lock.  This is described in the PA 2.0 architecture manual although for
different circumstances.

This got a bit ugly to implement.  I wasn't able to successfully inline the spinlock routines with the
lenthened barriers.  So, I ended up with implementing the lock routines in non inlined static functions.

I've adjusted the spinlock routines in spinlock.h and LWS atomic routines in syscall.S.  I removed the
release barriers from the TLB lock routines as I don't believe their needed.  However, there may be more
to look at there.

This fixes the buildd of libpreludedb and c8000 seems stable with the change.

Signed-off-by: Dave Anglin <dave.anglin@bell.net>
---

diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h
index dbaaca84f27f..35cf1b0d0f46 100644
--- a/arch/parisc/include/asm/barrier.h
+++ b/arch/parisc/include/asm/barrier.h
@@ -5,8 +5,10 @@
 #ifndef __ASSEMBLY__

 /* The synchronize caches instruction executes as a nop on systems in
-   which all memory references are performed in order. */
-#define synchronize_caches() __asm__ __volatile__ ("sync" : : : "memory")
+   which all memory references are performed in order. Seven nops are
+   required after the sync to ensure that all previous accesses have
+   been performed. */
+#define synchronize_caches() __asm__ __volatile__ ("sync\n\tnop\n\tnop\n\tnop\n\tnop\n\tnop\n\tnop\n\tnop" : : : "memory")

 #if defined(CONFIG_SMP)
 #define mb()		do { synchronize_caches(); } while (0)
@@ -26,6 +28,15 @@
 #define __smp_rmb()	mb()
 #define __smp_wmb()	mb()

+/* The "ssm 0,%%r0" instruction acts as an ifetch speculation barrier. */
+#define ssm_nop() __asm__ __volatile__ ("ssm 0,%%r0\n\tnop\n\tnop\n\tnop\n\tnop\n\tnop\n\tnop\n\tnop" : : : "memory")
+
+#if defined(CONFIG_SMP)
+#define speculation_mb() do { ssm_nop(); } while (0)
+#else
+#define speculation_mb() mb()
+#endif
+
 #include <asm-generic/barrier.h>

 #endif /* !__ASSEMBLY__ */
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 70fecb8dc4e2..a23f7bc5d189 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -10,34 +10,54 @@
 static inline int arch_spin_is_locked(arch_spinlock_t *x)
 {
 	volatile unsigned int *a = __ldcw_align(x);
-	smp_mb();
-	return *a == 0;
+	return READ_ONCE(*a) == 0;
 }

-static inline void arch_spin_lock(arch_spinlock_t *x)
+static inline int __pa_ldcw (volatile unsigned int *a)
+{
+#if __PA_LDCW_ALIGNMENT==16
+	*(volatile char *)a = 0;
+#endif
+	return __ldcw(a);
+}
+
+static noinline void __pa_spin_lock(arch_spinlock_t *x)
 {
 	volatile unsigned int *a;

 	a = __ldcw_align(x);
-	while (__ldcw(a) == 0)
+	while (__pa_ldcw(a) == 0) {
 		while (*a == 0)
-			cpu_relax();
+			continue;
+	}
+	speculation_mb();
 }

-static inline void arch_spin_lock_flags(arch_spinlock_t *x,
-					 unsigned long flags)
+static inline void arch_spin_lock(arch_spinlock_t *x)
+{
+	__pa_spin_lock(x);
+}
+
+static noinline void __pa_spin_lock_flags(arch_spinlock_t *x,
+					  unsigned long flags)
 {
 	volatile unsigned int *a;
-	unsigned long flags_dis;

 	a = __ldcw_align(x);
-	while (__ldcw(a) == 0) {
-		local_save_flags(flags_dis);
-		local_irq_restore(flags);
+	while (__pa_ldcw(a) == 0) {
 		while (*a == 0)
-			cpu_relax();
-		local_irq_restore(flags_dis);
+			if (flags & PSW_SM_I) {
+				local_irq_enable();
+				local_irq_disable();
+			}
 	}
+	speculation_mb();
+}
+
+static inline void arch_spin_lock_flags(arch_spinlock_t *x,
+					unsigned long flags)
+{
+	__pa_spin_lock_flags(x, flags);
 }
 #define arch_spin_lock_flags arch_spin_lock_flags

@@ -46,25 +66,26 @@ static inline void arch_spin_unlock(arch_spinlock_t *x)
 	volatile unsigned int *a;

 	a = __ldcw_align(x);
-#ifdef CONFIG_SMP
-	(void) __ldcw(a);
-#else
-	mb();
-#endif
+	smp_mb();
 	*a = 1;
 }

-static inline int arch_spin_trylock(arch_spinlock_t *x)
+static noinline int __pa_spin_trylock(arch_spinlock_t *x)
 {
 	volatile unsigned int *a;
 	int ret;

 	a = __ldcw_align(x);
-        ret = __ldcw(a) != 0;
-
+	ret = __ldcw(a) != 0;
+	speculation_mb();
 	return ret;
 }

+static inline int arch_spin_trylock(arch_spinlock_t *x)
+{
+	return __pa_spin_trylock(x);
+}
+
 /*
  * Read-write spinlocks, allowing multiple readers but only one writer.
  * Unfair locking as Writers could be starved indefinitely by Reader(s)
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 4b484ec7c7da..cd7865b54393 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -454,7 +454,6 @@
 	nop
 	LDREG		0(\ptp),\pte
 	bb,<,n		\pte,_PAGE_PRESENT_BIT,3f
-	LDCW		0(\tmp),\tmp1
 	b		\fault
 	stw		\spc,0(\tmp)
 99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
@@ -468,8 +467,6 @@
 	.macro		tlb_unlock0	spc,tmp,tmp1
 #ifdef CONFIG_SMP
 98:	or,COND(=)	%r0,\spc,%r0
-	LDCW		0(\tmp),\tmp1
-	or,COND(=)	%r0,\spc,%r0
 	stw		\spc,0(\tmp)
 99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index f05c9d5b6b9e..4ba43a3c7c67 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -635,14 +635,33 @@ cas_action:
 	mfctl	%cr27, %r1
 	stw	%r1, 4(%sr2,%r20)
 #endif
+
+#ifdef CONFIG_SMP
+	ssm	0,%r0	/* Speculation barrier */
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+#endif
+
 	/* The load and store could fail */
 1:	ldw	0(%r26), %r28
 	sub,<>	%r28, %r25, %r0
 2:	stw	%r24, 0(%r26)
 	/* Free lock */
 #ifdef CONFIG_SMP
-98:	LDCW	0(%sr2,%r20), %r1			/* Barrier */
+98:	sync			/* Barrier */
 99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
 #endif
 	stw	%r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
@@ -659,8 +678,15 @@ cas_action:
 	/* Error occurred on load or store */
 	/* Free lock */
 #ifdef CONFIG_SMP
-98:	LDCW	0(%sr2,%r20), %r1			/* Barrier */
+98:	sync			/* Barrier */
 99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
 #endif
 	stw	%r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
@@ -805,6 +831,18 @@ cas2_wouldblock:
 		perspective
 	*/
 cas2_action:
+
+#ifdef CONFIG_SMP
+	ssm	0,%r0	/* Speculation barrier */
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+#endif
+
 	/* Jump to the correct function */
 	blr	%r29, %r0
 	/* Set %r28 as non-zero for now */
@@ -864,8 +902,15 @@ cas2_action:
 cas2_end:
 	/* Free lock */
 #ifdef CONFIG_SMP
-98:	LDCW	0(%sr2,%r20), %r1			/* Barrier */
+98:	sync			/* Barrier */
 99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
 #endif
 	stw	%r20, 0(%sr2,%r20)
 	/* Enable interrupts */
@@ -878,8 +923,15 @@ cas2_end:
 	/* Error occurred on load or store */
 	/* Free lock */
 #ifdef CONFIG_SMP
-98:	LDCW	0(%sr2,%r20), %r1			/* Barrier */
+98:	sync			/* Barrier */
 99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
 #endif
 	stw	%r20, 0(%sr2,%r20)
 	ssm	PSW_SM_I, %r0
Helge Deller July 18, 2020, 12:04 p.m. UTC | #6
Hi Dave,

just a few questions and suggestions below....

On 17.07.20 23:15, John David Anglin wrote:
> diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
> index 70fecb8dc4e2..a23f7bc5d189 100644
> --- a/arch/parisc/include/asm/spinlock.h
> +++ b/arch/parisc/include/asm/spinlock.h
> @@ -10,34 +10,54 @@
>  static inline int arch_spin_is_locked(arch_spinlock_t *x)
>  {
>  	volatile unsigned int *a = __ldcw_align(x);
> -	smp_mb();
> -	return *a == 0;
> +	return READ_ONCE(*a) == 0;
>  }
>
> -static inline void arch_spin_lock(arch_spinlock_t *x)
> +static inline int __pa_ldcw (volatile unsigned int *a)
> +{
> +#if __PA_LDCW_ALIGNMENT==16
> +	*(volatile char *)a = 0;
> +#endif

I assume this is planned as a kind of prefetching into cache here?
But doesn't it maybe introduce a bug when the first byte
(in which you write zero) wasn't zero at the beginning?
In that case the following ldcw():

> +	return __ldcw(a);

returns zero, althought it wasn't zero initially?


> +}
> +


> diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
> index f05c9d5b6b9e..4ba43a3c7c67 100644
> --- a/arch/parisc/kernel/syscall.S
> +++ b/arch/parisc/kernel/syscall.S
> @@ -659,8 +678,15 @@ cas_action:
>  	/* Error occurred on load or store */
>  	/* Free lock */
>  #ifdef CONFIG_SMP
> -98:	LDCW	0(%sr2,%r20), %r1			/* Barrier */
> +98:	sync			/* Barrier */
>  99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop

I think you should put those nop's before the 99:ALTERNATIVE() instruction above.
That way they all get replaced by one jump instead of multiple nops (when
there is only one CPU in the SMP system).

Helge
John David Anglin July 18, 2020, 1:08 p.m. UTC | #7
On 2020-07-18 8:04 a.m., Helge Deller wrote:
> Hi Dave,
>
> just a few questions and suggestions below....
>
> On 17.07.20 23:15, John David Anglin wrote:
[...]
>> -static inline void arch_spin_lock(arch_spinlock_t *x)
>> +static inline int __pa_ldcw (volatile unsigned int *a)
>> +{
>> +#if __PA_LDCW_ALIGNMENT==16
>> +	*(volatile char *)a = 0;
>> +#endif
>>
>> I assume this is planned as a kind of prefetching into cache here?
>> But doesn't it maybe introduce a bug when the first byte
>> (in which you write zero) wasn't zero at the beginning?
>> In that case the following ldcw():
The intention is to dirty the cache line.  Note the above generates a stb instruction that operates
on the most significant byte of the lock word.  The release uses a stw and sets bit 31 in the least
significant byte of the spin lock word.  So, the stb doesn't affect the state of the lock.

When the cache line is dirty, the ldcw instruction may be optimized to operate in cache.  This speeds
up the operation.

Another alternative is to use the stby instruction.  See the programming note on page 7-135 of the
architecture manual.  It doesn't write anything when the address is the left most byte of a word but
it still can be used to dirty the cache line.

Note this is only added when using ldcw and not when using ldcw,co.
>> +	return __ldcw(a);
> returns zero, althought it wasn't zero initially?
>
>
>> +}
>> +
>
>> diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
>> index f05c9d5b6b9e..4ba43a3c7c67 100644
>> --- a/arch/parisc/kernel/syscall.S
>> +++ b/arch/parisc/kernel/syscall.S
>> @@ -659,8 +678,15 @@ cas_action:
>>  	/* Error occurred on load or store */
>>  	/* Free lock */
>>  #ifdef CONFIG_SMP
>> -98:	LDCW	0(%sr2,%r20), %r1			/* Barrier */
>> +98:	sync			/* Barrier */
>>  99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
>> +	nop
>> +	nop
>> +	nop
>> +	nop
>> +	nop
>> +	nop
>> +	nop
> I think you should put those nop's before the 99:ALTERNATIVE() instruction above.
> That way they all get replaced by one jump instead of multiple nops (when
> there is only one CPU in the SMP system).
Okay.  I'll test.  Thought the replacement was just nops.

Dave
John David Anglin July 18, 2020, 1:48 p.m. UTC | #8
On 2020-07-18 9:08 a.m., John David Anglin wrote:
>>> diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
>>> index f05c9d5b6b9e..4ba43a3c7c67 100644
>>> --- a/arch/parisc/kernel/syscall.S
>>> +++ b/arch/parisc/kernel/syscall.S
>>> @@ -659,8 +678,15 @@ cas_action:
>>>  	/* Error occurred on load or store */
>>>  	/* Free lock */
>>>  #ifdef CONFIG_SMP
>>> -98:	LDCW	0(%sr2,%r20), %r1			/* Barrier */
>>> +98:	sync			/* Barrier */
>>>  99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
>>> +	nop
>>> +	nop
>>> +	nop
>>> +	nop
>>> +	nop
>>> +	nop
>>> +	nop
>> I think you should put those nop's before the 99:ALTERNATIVE() instruction above.
>> That way they all get replaced by one jump instead of multiple nops (when
>> there is only one CPU in the SMP system).
> Okay.  I'll test.  Thought the replacement was just nops.
Do we know that a jump is faster than 8 nops?

We need ALTERNATIVE conditions for the barrier defines.

Dave
Helge Deller July 18, 2020, 2:51 p.m. UTC | #9
On 18.07.20 15:48, John David Anglin wrote:
> On 2020-07-18 9:08 a.m., John David Anglin wrote:
>>>> diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
>>>> index f05c9d5b6b9e..4ba43a3c7c67 100644
>>>> --- a/arch/parisc/kernel/syscall.S
>>>> +++ b/arch/parisc/kernel/syscall.S
>>>> @@ -659,8 +678,15 @@ cas_action:
>>>>  	/* Error occurred on load or store */
>>>>  	/* Free lock */
>>>>  #ifdef CONFIG_SMP
>>>> -98:	LDCW	0(%sr2,%r20), %r1			/* Barrier */
>>>> +98:	sync			/* Barrier */
>>>>  99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
>>>> +	nop
>>>> +	nop
>>>> +	nop
>>>> +	nop
>>>> +	nop
>>>> +	nop
>>>> +	nop
>>> I think you should put those nop's before the 99:ALTERNATIVE() instruction above.
>>> That way they all get replaced by one jump instead of multiple nops (when
>>> there is only one CPU in the SMP system).
>> Okay.  I'll test.  Thought the replacement was just nops.
> Do we know that a jump is faster than 8 nops?

I don't know if it's faster.

> We need ALTERNATIVE conditions for the barrier defines.

Yes, that's Ok.

Helge
John David Anglin July 18, 2020, 6:23 p.m. UTC | #10
On 2020-07-18 9:08 a.m., John David Anglin wrote:
>>> -static inline void arch_spin_lock(arch_spinlock_t *x)
>>> +static inline int __pa_ldcw (volatile unsigned int *a)
>>> +{
>>> +#if __PA_LDCW_ALIGNMENT==16
>>> +	*(volatile char *)a = 0;
>>> +#endif
>>>
>>> I assume this is planned as a kind of prefetching into cache here?
>>> But doesn't it maybe introduce a bug when the first byte
>>> (in which you write zero) wasn't zero at the beginning?
>>> In that case the following ldcw():
> The intention is to dirty the cache line.  Note the above generates a stb instruction that operates
> on the most significant byte of the lock word.  The release uses a stw and sets bit 31 in the least
> significant byte of the spin lock word.  So, the stb doesn't affect the state of the lock.
>
> When the cache line is dirty, the ldcw instruction may be optimized to operate in cache.  This speeds
> up the operation.
>
> Another alternative is to use the stby instruction.  See the programming note on page 7-135 of the
> architecture manual.  It doesn't write anything when the address is the left most byte of a word but
> it still can be used to dirty the cache line.
Wait, you are correct.  We use other values to free the lock in entry.S and syscall.S.  Using the space register
value in entry.S  might be problematic as it's a long value.  Could we end up with the least significant 32 bits
all zero?

Dave
Rolf Eike Beer July 19, 2020, 7:03 a.m. UTC | #11
Am Samstag, 11. Juli 2020, 03:46:05 CEST schrieb John David Anglin:
> Hi Jeroen,
> 
> On 2020-07-10 6:50 p.m., Jeroen Roovers wrote:
> > On Thu, 9 Jul 2020 09:39:33 -0400
> > 
> > John David Anglin <dave.anglin@bell.net> wrote:
> >> On 2020-07-09 9:26 a.m., Rolf Eike Beer wrote:
> >>> Am Freitag, 3. Juli 2020, 22:32:35 CEST schrieb John David Anglin:
> >>>> Stalls are quite frequent with recent kernels.  When the stall is
> >>>> detected by rcu_sched, we get a backtrace similar to the
> >>> 
> >>>> following:
> >>> With this patch on top of 5.7.7 I still get:
> >> Suggest enabling CONFIG_LOCKUP_DETECTOR=y and
> >> CONFIG_SOFTLOCKUP_DETECTOR=y so we can see where the stall occurs.
> >> 
> >> Dave
> > 
> > Attached is kernel output while running the futex_requeue_pi test from
> > the kernel selftests. It failed this way on the second try while it
> > passed on the first try. The output it gave is with the kernel
> > configuration options as set out above.
> 
> Unfortunately, the soft lockup detector didn't trigger in the output you
> attached.  So, it's not clear where futex_requeue_p is stuck.  There are no
> spinlocks  in check_preempt_curr() that I can see.

Here is some info. Will use v2 patch soon.

[835212.602791] watchdog: BUG: soft lockup - CPU#0 stuck for 21s! [cc1:4062]
[835212.602791] Modules linked in: 8021q ipmi_poweroff ipmi_si ipmi_devintf 
sata_via ipmi_msghandler cbc dm_zero dm_snapshot dm_mirror dm_region_hash 
dm_log dm_crypt dm_bufio pata_sil680 libata
[835212.602791] CPU: 0 PID: 4062 Comm: cc1 Tainted: G             L    5.7.7-
gentoo-parisc64 #2
[835212.602791] Hardware name: 9000/785/C8000
[835212.602791]
[835212.602791]      YZrvWESTHLNXBCVMcbcbcbcbOGFRQPDI
[835212.602791] PSW: 00001000000001101111111100001111 Tainted: G             L
[835212.602791] r00-03  000000ff0806ff0f 0000000040a6a440 000000004035fcac 
000000000096cf9a
[835212.602791] r04-07  0000000040a36440 000000019ad27180 0000000230bc4488 
0000000040c61250
[835212.602791] r08-11  0000000040dd29b0 0000000000000001 0000000000000030 
0000000230bc48b0
[835212.602791] r12-15  0000000040dd3508 0000000000000000 0000000040a69c40 
000000002ee74a64
[835212.602791] r16-19  00000001a64ec240 0000000040d42810 0000000002ae2b20 
00000001a2feda71
[835212.602791] r20-23  00000002332bb090 0000000041fd1388 0000000001915c5f 
000000019ad271c0
[835212.602791] r24-27  0000000230bc48b0 0000000230bc4488 00000001a64ec240 
0000000040a36440
[835212.602791] r28-31  00000001a2feda71 0000000230bc49c0 0000000230bc49f0 
000000000800000f
[835212.602791] sr00-03  0000000003c7b800 0000000000000000 0000000000000000 
0000000003c7b800
[835212.602791] sr04-07  0000000000000000 0000000000000000 0000000000000000 
0000000000000000
[835212.602791]
[835212.602791] IASQ: 0000000000000000 0000000000000000 IAOQ: 000000004035ff64 
000000004035ff68
[835212.602791]  IIR: 0d8010dc    ISR: fffffffffffff000  IOR: 0000000040c619c8
[835212.602791]  CPU:        0   CR30: 0000000230bc4000 CR31: ffffffffffffffff
[835212.602791]  ORIG_R28: 0000000040a69c40
[835212.602791]  IAOQ[0]: d_alloc_parallel+0x424/0x668
[835212.602791]  IAOQ[1]: d_alloc_parallel+0x428/0x668
[835212.602791]  RP(r2): d_alloc_parallel+0x16c/0x668
[835212.602791] Backtrace:
[835212.602791]  [<000000004034a5f0>] __lookup_slow+0xb0/0x280
[835212.602791]  [<000000004034c6a0>] walk_component+0x1d8/0x300
[835212.602791]  [<000000004034cd88>] path_lookupat+0xd0/0x228
[835212.602791]  [<000000004034fa64>] filename_lookup+0xac/0x1a8
[835212.602791]  [<000000004034fccc>] user_path_at_empty+0x74/0x90
[835212.602791]  [<000000004033c774>] vfs_statx+0xd4/0x1a0
[835212.602791]  [<000000004033cdb8>] __do_sys_stat64+0x48/0xa0
[835212.602791]  [<000000004033d5f0>] sys_stat64+0x20/0x30
[835212.602791]  [<0000000040191018>] syscall_exit+0x0/0x14
[835212.602791]
[835370.370791] rcu: INFO: rcu_sched self-detected stall on CPU
[835370.370791] rcu:    0-....: (4817472 ticks this GP) 
idle=0c2/1/0x4000000000000002 softirq=71611784/71611784 fqs=56671
[835370.370791]         (t=4965330 jiffies g=142820329 q=40783)
[835370.370791] rcu: rcu_sched kthread starved for 4842124 jiffies! g142820329 
f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=2
[835370.370791] rcu: RCU grace-period kthread stack dump:
[835370.370791] rcu_sched       R  running task        0    10      2 
0x00000000
[835370.370791] Backtrace:
[835370.370791]
[835370.370791] Task dump for CPU 0:
[835370.370791] cc1             R  running task        0  4062   4058 
0x00000014
[835370.370791] Backtrace:
[835370.370791]  [<0000000040189a1c>] show_stack+0x5c/0x70
[835370.370791]  [<0000000040201254>] sched_show_task.part.0+0x1a4/0x1c8
[835370.370791]  [<00000000402014ac>] dump_cpu_task+0x8c/0xa8
[835370.370791]  [<000000004024c858>] rcu_dump_cpu_stacks+0x168/0x210
[835370.370791]  [<000000004024ad2c>] rcu_sched_clock_irq+0x984/0xd70
[835370.370791]  [<0000000040251eb0>] update_process_times+0x80/0x110
[835370.370791]  [<00000000409abdbc>] timer_interrupt+0xb4/0x130
[835370.370791]  [<00000000402320c4>] __handle_irq_event_percpu+0xc4/0x270
[835370.370791]  [<00000000402322a8>] handle_irq_event_percpu+0x38/0xd8
[835370.370791]  [<000000004023a778>] handle_percpu_irq+0xb0/0xe8
[835370.370791]  [<0000000040231140>] generic_handle_irq+0x50/0x60
[835370.370791]  [<0000000040191304>] call_on_stack+0x18/0x24
[835370.370791]  [<000000004018b728>] execute_on_irq_stack+0x80/0x98
[835370.370791]  [<000000004018c928>] do_cpu_irq_mask+0x2a0/0x340
[835370.370791]  [<0000000040190074>] intr_return+0x0/0xc
[835370.370791]
[835370.370791] Task dump for CPU 1:
[835370.370791] cc1             R  running task        0  4059   4055 
0x00000014
[835370.370791] Backtrace:
[835370.370791]
[835370.370791] Task dump for CPU 3:
[835370.370791] cc1             R  running task        0  4061   4056 
0x00000014
[835370.370791] Backtrace:
John David Anglin July 19, 2020, 1:43 p.m. UTC | #12
Hi Eike,

Excellent, this is the backtrace which we needed to see.  This is same stall as the one I
see building libpreludedb.

I'm going to send a new patch with the real fix soon.  The previous patches just messed
with the timing of things.

On 2020-07-19 3:03 a.m., Rolf Eike Beer wrote:
> [835212.602791]  IAOQ[0]: d_alloc_parallel+0x424/0x668
> [835212.602791]  IAOQ[1]: d_alloc_parallel+0x428/0x668
> [835212.602791]  RP(r2): d_alloc_parallel+0x16c/0x668
> [835212.602791] Backtrace:
> [835212.602791]  [<000000004034a5f0>] __lookup_slow+0xb0/0x280
> [835212.602791]  [<000000004034c6a0>] walk_component+0x1d8/0x300
> [835212.602791]  [<000000004034cd88>] path_lookupat+0xd0/0x228
> [835212.602791]  [<000000004034fa64>] filename_lookup+0xac/0x1a8
> [835212.602791]  [<000000004034fccc>] user_path_at_empty+0x74/0x90
> [835212.602791]  [<000000004033c774>] vfs_statx+0xd4/0x1a0
> [835212.602791]  [<000000004033cdb8>] __do_sys_stat64+0x48/0xa0
> [835212.602791]  [<000000004033d5f0>] sys_stat64+0x20/0x30
> [835212.602791]  [<0000000040191018>] syscall_exit+0x0/0x14

Regards,
Dave
diff mbox series

Patch

diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 75cf84070fc9..996b1650c151 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -55,16 +55,14 @@  extern int pa_serialize_tlb_flushes;
 #endif

 #define purge_tlb_start(flags)  do { \
+	local_irq_save(flags);	\
 	if (pa_serialize_tlb_flushes)	\
-		spin_lock_irqsave(&pa_tlb_flush_lock, flags); \
-	else \
-		local_irq_save(flags);	\
+		arch_spin_lock_nb((arch_spinlock_t *)&pa_tlb_flush_lock); \
 	} while (0)
 #define purge_tlb_end(flags)	do { \
 	if (pa_serialize_tlb_flushes)	\
-		spin_unlock_irqrestore(&pa_tlb_flush_lock, flags); \
-	else \
-		local_irq_restore(flags); \
+		arch_spin_unlock_nb((arch_spinlock_t *)&pa_tlb_flush_lock); \
+	local_irq_restore(flags); \
 	} while (0)

 /* Purge data and instruction TLB entries. The TLB purge instructions
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 70fecb8dc4e2..890d899a8eae 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -18,6 +18,17 @@  static inline void arch_spin_lock(arch_spinlock_t *x)
 {
 	volatile unsigned int *a;

+	a = __ldcw_align(x);
+	while (__ldcw(a) == 0)
+		while (*a == 0)
+			cpu_relax();
+	smp_mb();
+}
+
+static inline void arch_spin_lock_nb(arch_spinlock_t *x)
+{
+	volatile unsigned int *a;
+
 	a = __ldcw_align(x);
 	while (__ldcw(a) == 0)
 		while (*a == 0)
@@ -38,6 +49,7 @@  static inline void arch_spin_lock_flags(arch_spinlock_t *x,
 			cpu_relax();
 		local_irq_restore(flags_dis);
 	}
+	smp_mb();
 }
 #define arch_spin_lock_flags arch_spin_lock_flags

@@ -46,11 +58,15 @@  static inline void arch_spin_unlock(arch_spinlock_t *x)
 	volatile unsigned int *a;

 	a = __ldcw_align(x);
-#ifdef CONFIG_SMP
-	(void) __ldcw(a);
-#else
-	mb();
-#endif
+	smp_mb();
+	*a = 1;
+}
+
+static inline void arch_spin_unlock_nb(arch_spinlock_t *x)
+{
+	volatile unsigned int *a;
+
+	a = __ldcw_align(x);
 	*a = 1;
 }

@@ -61,6 +77,7 @@  static inline int arch_spin_trylock(arch_spinlock_t *x)

 	a = __ldcw_align(x);
         ret = __ldcw(a) != 0;
+	smp_mb();

 	return ret;
 }
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 4b484ec7c7da..cd7865b54393 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -454,7 +454,6 @@ 
 	nop
 	LDREG		0(\ptp),\pte
 	bb,<,n		\pte,_PAGE_PRESENT_BIT,3f
-	LDCW		0(\tmp),\tmp1
 	b		\fault
 	stw		\spc,0(\tmp)
 99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
@@ -468,8 +467,6 @@ 
 	.macro		tlb_unlock0	spc,tmp,tmp1
 #ifdef CONFIG_SMP
 98:	or,COND(=)	%r0,\spc,%r0
-	LDCW		0(\tmp),\tmp1
-	or,COND(=)	%r0,\spc,%r0
 	stw		\spc,0(\tmp)
 99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index f05c9d5b6b9e..b8b2a3364991 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -634,6 +634,10 @@  cas_action:
 	/* DEBUG */
 	mfctl	%cr27, %r1
 	stw	%r1, 4(%sr2,%r20)
+#endif
+#ifdef CONFIG_SMP
+98:	sync			/* Barrier */
+99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
 	/* The load and store could fail */
 1:	ldw	0(%r26), %r28
@@ -641,7 +645,7 @@  cas_action:
 2:	stw	%r24, 0(%r26)
 	/* Free lock */
 #ifdef CONFIG_SMP
-98:	LDCW	0(%sr2,%r20), %r1			/* Barrier */
+98:	sync			/* Barrier */
 99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
 	stw	%r20, 0(%sr2,%r20)
@@ -659,7 +663,7 @@  cas_action:
 	/* Error occurred on load or store */
 	/* Free lock */
 #ifdef CONFIG_SMP
-98:	LDCW	0(%sr2,%r20), %r1			/* Barrier */
+98:	sync			/* Barrier */
 99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
 	stw	%r20, 0(%sr2,%r20)
@@ -805,6 +809,10 @@  cas2_wouldblock:
 		perspective
 	*/
 cas2_action:
+#ifdef CONFIG_SMP
+98:	sync			/* Barrier */
+99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
+#endif
 	/* Jump to the correct function */
 	blr	%r29, %r0
 	/* Set %r28 as non-zero for now */
@@ -864,7 +872,7 @@  cas2_action:
 cas2_end:
 	/* Free lock */
 #ifdef CONFIG_SMP
-98:	LDCW	0(%sr2,%r20), %r1			/* Barrier */
+98:	sync			/* Barrier */
 99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
 	stw	%r20, 0(%sr2,%r20)
@@ -878,7 +886,7 @@  cas2_end:
 	/* Error occurred on load or store */
 	/* Free lock */
 #ifdef CONFIG_SMP
-98:	LDCW	0(%sr2,%r20), %r1			/* Barrier */
+98:	sync			/* Barrier */
 99:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
 	stw	%r20, 0(%sr2,%r20)