diff mbox series

parisc: Prevent optimization of loads and stores in atomic operations

Message ID 55841e49-cf36-e49a-5b33-7b60d3275e56@bell.net (mailing list archive)
State Superseded
Headers show
Series parisc: Prevent optimization of loads and stores in atomic operations | expand

Commit Message

John David Anglin June 13, 2020, 3:20 p.m. UTC
Stalls are quite frequent with recent kernels.  When the stall is detected by rcu_sched, we
get a backtrace similar to the following:

rcu: INFO: rcu_sched self-detected stall on CPU
rcu:    0-...!: (5998 ticks this GP) idle=3a6/1/0x4000000000000002 softirq=8356938/8356939 fqs=2
        (t=6000 jiffies g=8985785 q=391)
rcu: rcu_sched kthread starved for 5992 jiffies! g8985785 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=0
rcu: RCU grace-period kthread stack dump:
rcu_sched       R  running task        0    10      2 0x00000000
Backtrace:

Task dump for CPU 0:
collect2        R  running task        0 16562  16561 0x00000014
Backtrace:
 [<000000004017913c>] show_stack+0x44/0x60
 [<00000000401df694>] sched_show_task.part.77+0xf4/0x180
 [<00000000401e70e8>] dump_cpu_task+0x68/0x80
 [<0000000040230a58>] rcu_sched_clock_irq+0x708/0xae0
 [<0000000040237670>] update_process_times+0x58/0xb8
 [<00000000407dc39c>] timer_interrupt+0xa4/0x110
 [<000000004021af30>] __handle_irq_event_percpu+0xb8/0x228
 [<000000004021b0d4>] handle_irq_event_percpu+0x34/0x98
 [<00000000402225b8>] handle_percpu_irq+0xa8/0xe8
 [<000000004021a05c>] generic_handle_irq+0x54/0x70
 [<0000000040180340>] call_on_stack+0x18/0x24
 [<000000004017a63c>] execute_on_irq_stack+0x5c/0xa8
 [<000000004017b76c>] do_cpu_irq_mask+0x2dc/0x410
 [<000000004017f074>] intr_return+0x0/0xc

However, this doesn't provide any information as to the cause.  I enabled CONFIG_SOFTLOCKUP_DETECTOR
and I caught the following stall:

watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [cc1:22803]
Modules linked in: dm_mod dax binfmt_misc ext4 crc16 jbd2 ext2 mbcache sg ipmi_watchdog ipmi_si ipmi_poweroff ipmi_devintf ipmi_msghandler nfsd
ip_tables x_tables ipv6 autofs4 xfs libcrc32c crc32c_generic raid10 raid1 raid0 multipath linear md_mod ses enclosure sd_mod scsi_transport_sas
t10_pi sr_mod cdrom ata_generic uas usb_storage pata_cmd64x libata ohci_pci ehci_pci ohci_hcd sym53c8xx ehci_hcd scsi_transport_spi tg3 usbcore
scsi_mod usb_common
CPU: 0 PID: 22803 Comm: cc1 Not tainted 5.6.17+ #3
Hardware name: 9000/800/rp3440

     YZrvWESTHLNXBCVMcbcbcbcbOGFRQPDI
PSW: 00001000000001001111111100001111 Not tainted
r00-03  000000ff0804ff0f 0000000040891dc0 000000004037d1c4 000000006d5e8890
r04-07  000000004086fdc0 0000000040ab31ac 000000000004e99a 0000000000001f20
r08-11  0000000040b24710 000000006d5e8488 0000000040a1d280 000000006d5e89b0
r12-15  000000006d5e88c4 00000001802c2cb8 000000003c812825 0000004122eb4d18
r16-19  0000000040b26630 000000006d5e8898 000000000001d330 000000006d5e88c0
r20-23  000000000800000f 0000000a0ad24270 b6683633143fce3c 0000004122eb4d54
r24-27  000000006d5e88c4 000000006d5e8488 00000001802c2cb8 000000004086fdc0
r28-31  0000004122d57b69 000000006d5e89b0 000000006d5e89e0 000000006d5e8000
sr00-03  000000000c749000 0000000000000000 0000000000000000 000000000c749000
sr04-07  0000000000000000 0000000000000000 0000000000000000 0000000000000000

IASQ: 0000000000000000 0000000000000000 IAOQ: 000000004037d414 000000004037d418
 IIR: 0e0010dc    ISR: 00000041042d63f0  IOR: 000000004086fdc0
 CPU:        0   CR30: 000000006d5e8000 CR31: ffffffffffffefff
 ORIG_R28: 000000004086fdc0
 IAOQ[0]: d_alloc_parallel+0x384/0x688
 IAOQ[1]: d_alloc_parallel+0x388/0x688
 RP(r2): d_alloc_parallel+0x134/0x688
Backtrace:
 [<000000004036974c>] __lookup_slow+0xa4/0x200
 [<0000000040369fc8>] walk_component+0x288/0x458
 [<000000004036a9a0>] path_lookupat+0x88/0x198
 [<000000004036e748>] filename_lookup+0xa0/0x168
 [<000000004036e95c>] user_path_at_empty+0x64/0x80
 [<000000004035d93c>] vfs_statx+0x104/0x158
 [<000000004035dfcc>] __do_sys_lstat64+0x44/0x80
 [<000000004035e5a0>] sys_lstat64+0x20/0x38
 [<0000000040180054>] syscall_exit+0x0/0x14

The code was stuck in this loop in d_alloc_parallel:

    4037d414:   0e 00 10 dc     ldd 0(r16),ret0
    4037d418:   c7 fc 5f ed     bb,< ret0,1f,4037d414 <d_alloc_parallel+0x384>
    4037d41c:   08 00 02 40     nop

This is the inner loop of bit_spin_lock which is called by hlist_bl_unlock in d_alloc_parallel:

static inline void bit_spin_lock(int bitnum, unsigned long *addr)
{
        /*
         * Assuming the lock is uncontended, this never enters
         * the body of the outer loop. If it is contended, then
         * within the inner loop a non-atomic test is used to
         * busywait with less bus contention for a good time to
         * attempt to acquire the lock bit.
         */
        preempt_disable();
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
        while (unlikely(test_and_set_bit_lock(bitnum, addr))) {
                preempt_enable();
                do {
                        cpu_relax();
                } while (test_bit(bitnum, addr));
                preempt_disable();
        }
#endif
        __acquire(bitlock);
}

test_and_set_bit_lock() looks like this:

static inline int test_and_set_bit_lock(unsigned int nr,
                                        volatile unsigned long *p)
{
        long old;
        unsigned long mask = BIT_MASK(nr);

        p += BIT_WORD(nr);
        if (READ_ONCE(*p) & mask)
                return 1;

        old = atomic_long_fetch_or_acquire(mask, (atomic_long_t *)p);
        return !!(old & mask);
}

Note the volatile keyword is dropped in casting p to atomic_long_t *.

Our current implementation of atomic_fetch_##op is:

static __inline__ int atomic_fetch_##op(int i, atomic_t *v)             \
{                                                                       \
        unsigned long flags;                                            \
        int ret;                                                        \
                                                                        \
        _atomic_spin_lock_irqsave(v, flags);                            \
        ret = v->counter;                                               \
        v->counter c_op i;                                              \
        _atomic_spin_unlock_irqrestore(v, flags);                       \
                                                                        \
        return ret;                                                     \
}

Note the pointer v is not volatile.  Although _atomic_spin_lock_irqsave clobbers memory,
I realized that gcc had optimized the code in the spinlock.  Essentially, this occurs in
the following situation:

t1 = *p;
if (t1)
  do_something1;
_atomic_spin_lock_irqsave();
t2 = *p
if (t2)
  do_something2;

The fix is to use a volatile pointer for the accesses in spinlocks.  This prevents gcc from
optimizing the accesses.

I have now run 5.6.1[78] kernels for about 4 days without a stall or any other obvious kernel issue.

Signed-off-by: Dave Anglin <dave.anglin@bell.net>

Comments

Helge Deller June 13, 2020, 11:08 p.m. UTC | #1
On 13.06.20 17:20, John David Anglin wrote:
> Stalls are quite frequent with recent kernels.  When the stall is detected by rcu_sched, we
> get a backtrace similar to the following:
...
> The fix is to use a volatile pointer for the accesses in spinlocks.  This prevents gcc from
> optimizing the accesses.

Very nice finding!
If this really fixes the stalls we have seen in the past, then that's a BIG step forward!

...
> diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c
> index 70ffbcf889b8..5904966ca957 100644
> --- a/arch/parisc/lib/bitops.c
> +++ b/arch/parisc/lib/bitops.c
> @@ -21,11 +21,12 @@ arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = {
>  unsigned long __xchg64(unsigned long x, unsigned long *ptr)
>  {
>  	unsigned long temp, flags;
> +	volatile unsigned long *p = ptr;

Regarding your changes to bitops.c, can't we instead
simply tag the parameter ptr with "volatile", e.g.
 unsigned long __xchg64(unsigned long x, volatile unsigned long *ptr)
 unsigned long __xchg32(int x, volatile int *ptr)
 unsigned long __xchg8(char x, volatile char *ptr)

Helge


>
> -	_atomic_spin_lock_irqsave(ptr, flags);
> -	temp = *ptr;
> -	*ptr = x;
> -	_atomic_spin_unlock_irqrestore(ptr, flags);
> +	_atomic_spin_lock_irqsave(p, flags);
> +	temp = *p;
> +	*p = x;
> +	_atomic_spin_unlock_irqrestore(p, flags);
>  	return temp;
>  }
>  #endif
> @@ -33,12 +34,13 @@ unsigned long __xchg64(unsigned long x, unsigned long *ptr)
>  unsigned long __xchg32(int x, int *ptr)
>  {
>  	unsigned long flags;
> +	volatile int *p = ptr;
>  	long temp;
>
> -	_atomic_spin_lock_irqsave(ptr, flags);
> -	temp = (long) *ptr;	/* XXX - sign extension wanted? */
> -	*ptr = x;
> -	_atomic_spin_unlock_irqrestore(ptr, flags);
> +	_atomic_spin_lock_irqsave(p, flags);
> +	temp = (long) *p;	/* XXX - sign extension wanted? */
> +	*p = x;
> +	_atomic_spin_unlock_irqrestore(p, flags);
>  	return (unsigned long)temp;
>  }
>
> @@ -46,12 +48,13 @@ unsigned long __xchg32(int x, int *ptr)
>  unsigned long __xchg8(char x, char *ptr)
>  {
>  	unsigned long flags;
> +	volatile char *p = ptr;
>  	long temp;
>
> -	_atomic_spin_lock_irqsave(ptr, flags);
> -	temp = (long) *ptr;	/* XXX - sign extension wanted? */
> -	*ptr = x;
> -	_atomic_spin_unlock_irqrestore(ptr, flags);
> +	_atomic_spin_lock_irqsave(p, flags);
> +	temp = (long) *p;	/* XXX - sign extension wanted? */
> +	*p = x;
> +	_atomic_spin_unlock_irqrestore(p, flags);
>  	return (unsigned long)temp;
>  }
>
John David Anglin June 14, 2020, 2:04 p.m. UTC | #2
On 2020-06-13 7:08 p.m., Helge Deller wrote:
> Regarding your changes to bitops.c, can't we instead
> simply tag the parameter ptr with "volatile", e.g.
>  unsigned long __xchg64(unsigned long x, volatile unsigned long *ptr)
>  unsigned long __xchg32(int x, volatile int *ptr)
>  unsigned long __xchg8(char x, volatile char *ptr)
In developing this change, I intentionally avoided changing any interfaces.  This would certainly
have the same affect and is similar to what is done in the __cmpxchg_u64 and __cmpxchg_u32 routines.

Dave
Rolf Eike Beer June 16, 2020, 2:46 p.m. UTC | #3
> The fix is to use a volatile pointer for the accesses in spinlocks.  This
> prevents gcc from optimizing the accesses.
> 
> I have now run 5.6.1[78] kernels for about 4 days without a stall or any
> other obvious kernel issue.

It does not cleanly apply to 5.7.2, but the parts that do apply have made my 
C8000 run longer in the gcc testsuite than before. I will run it for a few 
days more to be sure, but it looks good so far.

My testcase is: run several CMake nightlies every day by cron, run emerge jobs 
for Gentoo stabilization in a chroot. For the last few attempts it always 
tried to run the gcc 8.4.0 testsuite at the end, which killed the machine 
eventually.

Eike
John David Anglin June 16, 2020, 2:57 p.m. UTC | #4
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA256
 
On 2020-06-16 10:46 a.m., Rolf Eike Beer wrote:
> It does not cleanly apply to 5.7.2, but the parts that do apply have made my
> C8000 run longer in the gcc testsuite than before. I will run it for a few
> days more to be sure, but it looks good so far.
The important parts are to atomic.h and possibly futex.h.  The routines are inlined and subject
to the optimization problem that I mentioned in the original post.

I will post an updated patch against trunk in a day or two.  The debian gcc-snapshot package is
building on mx3210 and phantom.  This package almost always failed to build on phantom.

Dave

- -- 
John David Anglin  dave.anglin@bell.net
-----BEGIN PGP SIGNATURE-----
 
iQIzBAEBCAAdFiEEnRzl+6e9+DTrEhyEXb/Nrl8ZTfEFAl7o3bsACgkQXb/Nrl8Z
TfGbng/8CBoMlVdYSzdFZiljEUNh1lTjkbnoL73GisnhD3wUksNc3N3BbUggp6q6
D6O9ueydFg+lUEBNIh7LmEpsm6n5CmjFJjlEQr7/+pJ8HMtmhezqYEYr/Vh/+Y4J
XjX2V7dwtPaQ/SxRclw/ePW8NjU1eTDm2+4MWulwEGKgb7dIdEG2VgT74rB+eYsn
nF7rUojFPrVSNITKhrH+gYJzXzA0Wub4btt8jy1rlwwMG5vm6o2ZRqAtXiwOUW1F
Y7Gq9yK4jFUUCEY1Y7KvHooysu16n5EfgrS6CKF7JDustC3wFA3tAlfdsYg01TEL
qLfZGcm9XPvaMNWEvobZNzobrdgprL7ykzwwalzYwu9lpj1qTqT8IBcrtl1gsBts
SN5aXG1wogqVtKv4FKNfJFPQzy5h2O4FYb/4C4hp2kSEM5vc1+0T+TvB5Pc9qxp+
5mVAJ8X27WfQuyyvjZiMy9Mh/gf1+NHeqvD1tSzhrTu3Mym/5JZdACGy1iwNNzkg
8tx3jvv41ZjNoJY+BYl7uUar79EL/yi0I0GumG5+EHuTmQ8wYhgoXkS1nGmbNmIw
8w+0iAXL1VqV4GfUhYxkNLyqT1PFakSoohu5IAdRM3ofbpM2P1ypPcOld4IcanZP
v2ebiUh+N+wEHagy+b/v0rBUUzN7dm1FJgSlRoEIqKaK9ImYWkQ=
=c/GD
-----END PGP SIGNATURE-----
John David Anglin June 17, 2020, 2:41 p.m. UTC | #5
On 2020-06-16 10:57 a.m., John David Anglin wrote:I will post an updated patch against trunk in a day or two.  The debian gcc-snapshot package is
> building on mx3210 and phantom.  This package almost always failed to build on phantom.

Sadly, the patch doesn't fix problem:

watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [pr58417.exe:23900]
Modules linked in: dm_mod dax binfmt_misc ext4 crc16 jbd2 ext2 mbcache sg ipmi_watchdog ipmi_si ipmi_poweroff ipmi_devintf ipmi_msghandler nfsd
ip_tables x_tables ipv6 autofs4 xfs libcrc32c crc32c_generic raid10 raid1 raid0 multipath linear md_mod ses enclosure sd_mod scsi_transport_sas
t10_pi uas usb_storage sr_mod cdrom ata_generic ohci_pci ehci_pci ohci_hcd ehci_hcd pata_cmd64x sym53c8xx scsi_transport_spi usbcore libata
usb_common tg3 scsi_mod
CPU: 1 PID: 23900 Comm: pr58417.exe Tainted: G             L    5.6.18+ #2
Hardware name: 9000/800/rp3440

     YZrvWESTHLNXBCVMcbcbcbcbOGFRQPDI
PSW: 00001000000001101111111100001111 Tainted: G             L
r00-03  000000ff0806ff0f 0000000040891dc0 000000004037d234 0000004123e50960
r04-07  000000004086fdc0 0000000040ab31ac 00000000001354d6 0000000000000ca8
r08-11  0000000040b24710 0000004123e50348 0000000040a1d280 0000004123e50a80
r12-15  0000004123e50994 0000000141e69ec8 00000000b8883cd5 0000004119979948
r16-19  0000000040b253b8 0000004123e50968 0000000000ce2e75 0000004123e50990
r20-23  000000000800000f 0000000e868fecc9 0de501fab0949db0 0000004119979983
r24-27  0000004123e50994 0000004123e50348 0000000141e69ec8 000000004086fdc0
r28-31  0000000000000001 0000004123e50a80 0000004123e50ab0 0000004123e50000
sr00-03  0000000008b81800 0000000000000000 0000000000000000 0000000008b81800
sr04-07  0000000000000000 0000000000000000 0000000000000000 0000000000000000

IASQ: 0000000000000000 0000000000000000 IAOQ: 000000004037d200 000000004037d204
 IIR: 51bc0060    ISR: 0000000000000000  IOR: 0000002c0000000b
 CPU:        1   CR30: 0000004123e50000 CR31: ffffffffffffffff
 ORIG_R28: 0000000000000000
 IAOQ[0]: d_alloc_parallel+0x100/0x688
 IAOQ[1]: d_alloc_parallel+0x104/0x688
 RP(r2): d_alloc_parallel+0x134/0x688
Backtrace:
 [<00000000403697bc>] __lookup_slow+0xa4/0x200
 [<000000004036a038>] walk_component+0x288/0x458
 [<000000004036a3e0>] link_path_walk+0x1d8/0x630
 [<000000004036ace4>] path_openat+0x10c/0x1370
 [<000000004036ed78>] do_filp_open+0x88/0x120
 [<000000004034f758>] do_sys_openat2+0x1e8/0x310
 [<00000000403513dc>] do_sys_open+0x64/0x88
 [<00000000403515e8>] compat_sys_openat+0x20/0x38
 [<0000000040180054>] syscall_exit+0x0/0x14

watchdog: BUG: soft lockup - CPU#3 stuck for 22s! [sh:23899]
Modules linked in: dm_mod dax binfmt_misc ext4 crc16 jbd2 ext2 mbcache sg ipmi_watchdog ipmi_si ipmi_poweroff ipmi_devintf ipmi_msghandler nfsd
ip_tables x_tables ipv6 autofs4 xfs libcrc32c crc32c_generic raid10 raid1 raid0 multipath linear md_mod ses enclosure sd_mod scsi_transport_sas
t10_pi uas usb_storage sr_mod cdrom ata_generic ohci_pci ehci_pci ohci_hcd ehci_hcd pata_cmd64x sym53c8xx scsi_transport_spi usbcore libata
usb_common tg3 scsi_mod
CPU: 3 PID: 23899 Comm: sh Tainted: G             L    5.6.18+ #2
Hardware name: 9000/800/rp3440

     YZrvWESTHLNXBCVMcbcbcbcbOGFRQPDI
PSW: 00001000000001101100000000001111 Tainted: G             L
r00-03  000000000806c00f 00000000408a6dc0 000000004037af30 000000409ef1cab0
r04-07  000000004086fdc0 0000000000000ca8 0000000040b24710 00000040b7ba73a0
r08-11  0000000040b253b8 0000004127c7eab8 00000040b7ba73f8 000000409ef1ca80
r12-15  0000000000000019 00000040ec69907e 000000409ef1c338 000000000000002f
r16-19  000000409ef1c4e0 0000000000000001 0000000000000000 0000000000000001
r20-23  000000409ef1c000 0000000000000000 0000000000000000 00000040b7ba73db
r24-27  0000000000ce2e75 0000000000ce2e74 00000040b7ba73a0 000000004086fdc0
r28-31  00000040b7ba7469 000000409ef1ca80 000000409ef1cb60 0000000040ab31ac
sr00-03  0000000008b80800 0000000000000000 0000000000000000 0000000008b80800
sr04-07  0000000000000000 0000000000000000 0000000000000000 0000000000000000

IASQ: 0000000000000000 0000000000000000 IAOQ: 000000004037a5e0 000000004037a5e4
 IIR: 0d0010dc    ISR: 0000000000000000  IOR: 000000000000001f
 CPU:        3   CR30: 000000409ef1c000 CR31: ffffffffffffffff
 ORIG_R28: 00000000401dfa04
 IAOQ[0]: __d_lookup_done+0x1a8/0x1d0
 IAOQ[1]: __d_lookup_done+0x1ac/0x1d0
 RP(r2): d_add+0x228/0x270
Backtrace:
 [<000000004037af30>] d_add+0x228/0x270
 [<0000000040395fe4>] simple_lookup+0x64/0xa0
 [<00000000403697f8>] __lookup_slow+0xe0/0x200
 [<000000004036a038>] walk_component+0x288/0x458
 [<000000004036a3e0>] link_path_walk+0x1d8/0x630
 [<000000004036ace4>] path_openat+0x10c/0x1370
 [<000000004036ed78>] do_filp_open+0x88/0x120
 [<000000004034f758>] do_sys_openat2+0x1e8/0x310
 [<00000000403513dc>] do_sys_open+0x64/0x88
 [<00000000403515e8>] compat_sys_openat+0x20/0x38
 [<0000000040180054>] syscall_exit+0x0/0x14
Rolf Eike Beer June 17, 2020, 7:31 p.m. UTC | #6
Am Mittwoch, 17. Juni 2020, 16:41:44 CEST schrieb John David Anglin:
> On 2020-06-16 10:57 a.m., John David Anglin wrote:I will post an updated
> patch against trunk in a day or two.  The debian gcc-snapshot package is
> > building on mx3210 and phantom.  This package almost always failed to
> > build on phantom.
> Sadly, the patch doesn't fix problem:

Whatever is still the issue, for me it makes things much better. So you may 
add:

Tested-by: Rolf Eike Beer <eike-kernel@sf-tec.de>

Greetings

Eike
Rolf Eike Beer June 18, 2020, 5:29 a.m. UTC | #7
Am Mittwoch, 17. Juni 2020, 21:31:40 CEST schrieb Rolf Eike Beer:
> Am Mittwoch, 17. Juni 2020, 16:41:44 CEST schrieb John David Anglin:
> > On 2020-06-16 10:57 a.m., John David Anglin wrote:I will post an updated
> > patch against trunk in a day or two.  The debian gcc-snapshot package is
> > 
> > > building on mx3210 and phantom.  This package almost always failed to
> > > build on phantom.
> > 
> > Sadly, the patch doesn't fix problem:
> Whatever is still the issue, for me it makes things much better. So you may
> add:
> 
> Tested-by: Rolf Eike Beer <eike-kernel@sf-tec.de>

And now that we talked about it the machine is dead:

[209974.638738] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[209974.638738] rcu:    3-...!: (0 ticks this GP) idle=64a/
1/0x4000000000000000 softirq=20658539/20658539 fqs=0
[209974.638738]         (detected by 0, t=5505612 jiffies, g=39395637, q=10)
[209974.638738] Task dump for CPU 3:
[209974.638738] sh              R  running task        0  9458   9177 
0x00000012
[209974.638738] Backtrace:
[209974.638738]
[209974.638738] rcu: rcu_sched kthread starved for 5505612 jiffies! g39395637 
f0x2 RCU_GP_WAIT_FQS(5) ->state=0x200 ->cpu=1
[209974.638738] rcu: RCU grace-period kthread stack dump:
[209974.638738] rcu_sched       R    0    10      2 0x00000000
[209974.638738] Backtrace:

I still think that patch is a step in the right direction.

Eike
diff mbox series

Patch

diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 118953d41763..0090c9606313 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -59,11 +59,11 @@  extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
 static __inline__ void atomic_set(atomic_t *v, int i)
 {
 	unsigned long flags;
-	_atomic_spin_lock_irqsave(v, flags);
+	volatile int *p = &v->counter;

-	v->counter = i;
-
-	_atomic_spin_unlock_irqrestore(v, flags);
+	_atomic_spin_lock_irqsave(p, flags);
+	*p = i;
+	_atomic_spin_unlock_irqrestore(p, flags);
 }

 #define atomic_set_release(v, i)	atomic_set((v), (i))
@@ -81,21 +81,23 @@  static __inline__ int atomic_read(const atomic_t *v)
 static __inline__ void atomic_##op(int i, atomic_t *v)			\
 {									\
 	unsigned long flags;						\
+	volatile int *p = &v->counter;					\
 									\
-	_atomic_spin_lock_irqsave(v, flags);				\
-	v->counter c_op i;						\
-	_atomic_spin_unlock_irqrestore(v, flags);			\
+	_atomic_spin_lock_irqsave(p, flags);				\
+	*p c_op i;							\
+	_atomic_spin_unlock_irqrestore(p, flags);			\
 }									\

 #define ATOMIC_OP_RETURN(op, c_op)					\
 static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	unsigned long flags;						\
+	volatile int *p = &v->counter;					\
 	int ret;							\
 									\
-	_atomic_spin_lock_irqsave(v, flags);				\
-	ret = (v->counter c_op i);					\
-	_atomic_spin_unlock_irqrestore(v, flags);			\
+	_atomic_spin_lock_irqsave(p, flags);				\
+	ret = (*p c_op i);						\
+	_atomic_spin_unlock_irqrestore(p, flags);			\
 									\
 	return ret;							\
 }
@@ -104,12 +106,14 @@  static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 static __inline__ int atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	unsigned long flags;						\
-	int ret;							\
+	volatile int *p = &v->counter;					\
+	int ret, tmp;							\
 									\
-	_atomic_spin_lock_irqsave(v, flags);				\
-	ret = v->counter;						\
-	v->counter c_op i;						\
-	_atomic_spin_unlock_irqrestore(v, flags);			\
+	_atomic_spin_lock_irqsave(p, flags);				\
+	ret = *p;							\
+	tmp = ret;							\
+	*p = (tmp c_op i);						\
+	_atomic_spin_unlock_irqrestore(p, flags);			\
 									\
 	return ret;							\
 }
@@ -146,21 +150,23 @@  ATOMIC_OPS(xor, ^=)
 static __inline__ void atomic64_##op(s64 i, atomic64_t *v)		\
 {									\
 	unsigned long flags;						\
+	volatile s64 *p = &v->counter;					\
 									\
-	_atomic_spin_lock_irqsave(v, flags);				\
-	v->counter c_op i;						\
-	_atomic_spin_unlock_irqrestore(v, flags);			\
+	_atomic_spin_lock_irqsave(p, flags);				\
+	*p c_op i;							\
+	_atomic_spin_unlock_irqrestore(p, flags);			\
 }									\

 #define ATOMIC64_OP_RETURN(op, c_op)					\
 static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v)	\
 {									\
 	unsigned long flags;						\
+	volatile s64 *p = &v->counter;					\
 	s64 ret;							\
 									\
-	_atomic_spin_lock_irqsave(v, flags);				\
-	ret = (v->counter c_op i);					\
-	_atomic_spin_unlock_irqrestore(v, flags);			\
+	_atomic_spin_lock_irqsave(p, flags);				\
+	ret = (*p c_op i);						\
+	_atomic_spin_unlock_irqrestore(p, flags);			\
 									\
 	return ret;							\
 }
@@ -169,12 +175,14 @@  static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v)	\
 static __inline__ s64 atomic64_fetch_##op(s64 i, atomic64_t *v)		\
 {									\
 	unsigned long flags;						\
-	s64 ret;							\
+	volatile s64 *p = &v->counter;					\
+	s64 ret, tmp;							\
 									\
-	_atomic_spin_lock_irqsave(v, flags);				\
-	ret = v->counter;						\
-	v->counter c_op i;						\
-	_atomic_spin_unlock_irqrestore(v, flags);			\
+	_atomic_spin_lock_irqsave(p, flags);				\
+	ret = *p;							\
+	tmp = ret;							\
+	*p = (tmp c_op i);						\
+	_atomic_spin_unlock_irqrestore(p, flags);			\
 									\
 	return ret;							\
 }
@@ -205,11 +213,11 @@  static __inline__ void
 atomic64_set(atomic64_t *v, s64 i)
 {
 	unsigned long flags;
-	_atomic_spin_lock_irqsave(v, flags);
-
-	v->counter = i;
+	volatile s64 *p = &v->counter;

-	_atomic_spin_unlock_irqrestore(v, flags);
+	_atomic_spin_lock_irqsave(p, flags);
+	*p = i;
+	_atomic_spin_unlock_irqrestore(p, flags);
 }

 static __inline__ s64
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
index d2c3e4106851..a9e5366b5511 100644
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -36,14 +36,15 @@  static inline int
 arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 {
 	unsigned long int flags;
+	volatile u32 __user *u = uaddr;
 	int oldval, ret;
 	u32 tmp;

-	_futex_spin_lock_irqsave(uaddr, &flags);
+	_futex_spin_lock_irqsave(u, &flags);
 	pagefault_disable();

 	ret = -EFAULT;
-	if (unlikely(get_user(oldval, uaddr) != 0))
+	if (unlikely(get_user(oldval, u) != 0))
 		goto out_pagefault_enable;

 	ret = 0;
@@ -69,12 +70,12 @@  arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
 		ret = -ENOSYS;
 	}

-	if (ret == 0 && unlikely(put_user(tmp, uaddr) != 0))
+	if (ret == 0 && unlikely(put_user(tmp, u) != 0))
 		ret = -EFAULT;

 out_pagefault_enable:
 	pagefault_enable();
-	_futex_spin_unlock_irqrestore(uaddr, &flags);
+	_futex_spin_unlock_irqrestore(u, &flags);

 	if (!ret)
 		*oval = oldval;
@@ -88,14 +89,15 @@  futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 {
 	u32 val;
 	unsigned long flags;
+	volatile u32 __user *u = uaddr;

 	/* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
 	 * our gateway page, and causes no end of trouble...
 	 */
-	if (uaccess_kernel() && !uaddr)
+	if (uaccess_kernel() && !u)
 		return -EFAULT;

-	if (!access_ok(uaddr, sizeof(u32)))
+	if (!access_ok(u, sizeof(u32)))
 		return -EFAULT;

 	/* HPPA has no cmpxchg in hardware and therefore the
@@ -104,19 +106,19 @@  futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	 * address. This should scale to a couple of CPUs.
 	 */

-	_futex_spin_lock_irqsave(uaddr, &flags);
-	if (unlikely(get_user(val, uaddr) != 0)) {
-		_futex_spin_unlock_irqrestore(uaddr, &flags);
+	_futex_spin_lock_irqsave(u, &flags);
+	if (unlikely(get_user(val, u) != 0)) {
+		_futex_spin_unlock_irqrestore(u, &flags);
 		return -EFAULT;
 	}

-	if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) {
-		_futex_spin_unlock_irqrestore(uaddr, &flags);
+	if (val == oldval && unlikely(put_user(newval, u) != 0)) {
+		_futex_spin_unlock_irqrestore(u, &flags);
 		return -EFAULT;
 	}

 	*uval = val;
-	_futex_spin_unlock_irqrestore(uaddr, &flags);
+	_futex_spin_unlock_irqrestore(u, &flags);

 	return 0;
 }
diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c
index 70ffbcf889b8..5904966ca957 100644
--- a/arch/parisc/lib/bitops.c
+++ b/arch/parisc/lib/bitops.c
@@ -21,11 +21,12 @@  arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = {
 unsigned long __xchg64(unsigned long x, unsigned long *ptr)
 {
 	unsigned long temp, flags;
+	volatile unsigned long *p = ptr;

-	_atomic_spin_lock_irqsave(ptr, flags);
-	temp = *ptr;
-	*ptr = x;
-	_atomic_spin_unlock_irqrestore(ptr, flags);
+	_atomic_spin_lock_irqsave(p, flags);
+	temp = *p;
+	*p = x;
+	_atomic_spin_unlock_irqrestore(p, flags);
 	return temp;
 }
 #endif
@@ -33,12 +34,13 @@  unsigned long __xchg64(unsigned long x, unsigned long *ptr)
 unsigned long __xchg32(int x, int *ptr)
 {
 	unsigned long flags;
+	volatile int *p = ptr;
 	long temp;

-	_atomic_spin_lock_irqsave(ptr, flags);
-	temp = (long) *ptr;	/* XXX - sign extension wanted? */
-	*ptr = x;
-	_atomic_spin_unlock_irqrestore(ptr, flags);
+	_atomic_spin_lock_irqsave(p, flags);
+	temp = (long) *p;	/* XXX - sign extension wanted? */
+	*p = x;
+	_atomic_spin_unlock_irqrestore(p, flags);
 	return (unsigned long)temp;
 }

@@ -46,12 +48,13 @@  unsigned long __xchg32(int x, int *ptr)
 unsigned long __xchg8(char x, char *ptr)
 {
 	unsigned long flags;
+	volatile char *p = ptr;
 	long temp;

-	_atomic_spin_lock_irqsave(ptr, flags);
-	temp = (long) *ptr;	/* XXX - sign extension wanted? */
-	*ptr = x;
-	_atomic_spin_unlock_irqrestore(ptr, flags);
+	_atomic_spin_lock_irqsave(p, flags);
+	temp = (long) *p;	/* XXX - sign extension wanted? */
+	*p = x;
+	_atomic_spin_unlock_irqrestore(p, flags);
 	return (unsigned long)temp;
 }