Message ID | 1606225437-22948-2-git-send-email-guoren@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/5] riscv: Coding convention for xchg | expand |
On Tue, Nov 24, 2020 at 01:43:54PM +0000, guoren@kernel.org wrote: > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild > index 59dd7be..6f5f438 100644 > --- a/arch/riscv/include/asm/Kbuild > +++ b/arch/riscv/include/asm/Kbuild > @@ -6,3 +6,6 @@ generic-y += kvm_para.h > generic-y += local64.h > generic-y += user.h > generic-y += vmlinux.lds.h > +generic-y += mcs_spinlock.h > +generic-y += qrwlock.h > +generic-y += qspinlock.h > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > index 5609185..e178700 100644 > --- a/arch/riscv/include/asm/cmpxchg.h > +++ b/arch/riscv/include/asm/cmpxchg.h > @@ -16,7 +16,43 @@ > __typeof__(ptr) __ptr = (ptr); \ > __typeof__(new) __new = (new); \ > __typeof__(*(ptr)) __ret; \ > + register unsigned long __rc, tmp, align, addr; \ > switch (size) { \ > + case 2: \ > + align = ((unsigned long) __ptr & 0x3); \ > + addr = ((unsigned long) __ptr & ~0x3); \ > + if (align) { \ > + __asm__ __volatile__ ( \ > + "0: lr.w %0, 0(%z4)\n" \ > + " move %1, %0\n" \ > + " slli %1, %1, 16\n" \ > + " srli %1, %1, 16\n" \ > + " move %2, %z3\n" \ > + " slli %2, %2, 16\n" \ > + " or %1, %2, %1\n" \ > + " sc.w %2, %1, 0(%z4)\n" \ > + " bnez %2, 0b\n" \ > + " srli %0, %0, 16\n" \ > + : "=&r" (__ret), "=&r" (tmp), "=&r" (__rc) \ > + : "rJ" (__new), "rJ"(addr) \ > + : "memory"); \ > + } else { \ > + __asm__ __volatile__ ( \ > + "0: lr.w %0, (%z4)\n" \ > + " move %1, %0\n" \ > + " srli %1, %1, 16\n" \ > + " slli %1, %1, 16\n" \ > + " move %2, %z3\n" \ > + " or %1, %2, %1\n" \ > + " sc.w %2, %1, 0(%z4)\n" \ > + " bnez %2, 0b\n" \ > + " slli %0, %0, 16\n" \ > + " srli %0, %0, 16\n" \ > + : "=&r" (__ret), "=&r" (tmp), "=&r" (__rc) \ > + : "rJ" (__new), "rJ"(addr) \ > + : "memory"); \ > + } \ > + break; \ > case 4: \ > __asm__ __volatile__ ( \ > " amoswap.w %0, %2, %1\n" \ I'm pretty sure there's a handfull of implementations like this out there... if only we could share. Anyway, this too should be an independent patch.
On Tue, Nov 24, 2020 at 3:39 PM Peter Zijlstra <peterz@infradead.org> wrote: > On Tue, Nov 24, 2020 at 01:43:54PM +0000, guoren@kernel.org wrote: > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild > > + if (align) { \ > > + __asm__ __volatile__ ( \ > > + "0: lr.w %0, 0(%z4)\n" \ > > + " move %1, %0\n" \ > > + " slli %1, %1, 16\n" \ > > + " srli %1, %1, 16\n" \ > > + " move %2, %z3\n" \ > > + " slli %2, %2, 16\n" \ > > + " or %1, %2, %1\n" \ > > + " sc.w %2, %1, 0(%z4)\n" \ > > + " bnez %2, 0b\n" \ > > + " srli %0, %0, 16\n" \ > > + : "=&r" (__ret), "=&r" (tmp), "=&r" (__rc) \ > > + : "rJ" (__new), "rJ"(addr) \ > > + : "memory"); \ > > I'm pretty sure there's a handfull of implementations like this out > there... if only we could share. Isn't this effectively the same as the "_Q_PENDING_BITS != 8" version of xchg_tail()? If nothing else needs xchg() on a 16-bit value, maybe changing the #ifdef in the qspinlock code is enough. Only around half the architectures actually implement 8-bit and 16-bit cmpxchg() and xchg(), it might even be worth trying to eventually change the interface to not do it at all, but instead have explicit cmpxchg8() and cmpxchg16() helpers for the few files that do use them. Arnd
Thx Peter, On Tue, Nov 24, 2020 at 10:39 PM Peter Zijlstra <peterz@infradead.org> wrote: > > On Tue, Nov 24, 2020 at 01:43:54PM +0000, guoren@kernel.org wrote: > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild > > index 59dd7be..6f5f438 100644 > > --- a/arch/riscv/include/asm/Kbuild > > +++ b/arch/riscv/include/asm/Kbuild > > @@ -6,3 +6,6 @@ generic-y += kvm_para.h > > generic-y += local64.h > > generic-y += user.h > > generic-y += vmlinux.lds.h > > +generic-y += mcs_spinlock.h > > +generic-y += qrwlock.h > > +generic-y += qspinlock.h > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h > > index 5609185..e178700 100644 > > --- a/arch/riscv/include/asm/cmpxchg.h > > +++ b/arch/riscv/include/asm/cmpxchg.h > > @@ -16,7 +16,43 @@ > > __typeof__(ptr) __ptr = (ptr); \ > > __typeof__(new) __new = (new); \ > > __typeof__(*(ptr)) __ret; \ > > + register unsigned long __rc, tmp, align, addr; \ > > switch (size) { \ > > + case 2: \ > > + align = ((unsigned long) __ptr & 0x3); \ > > + addr = ((unsigned long) __ptr & ~0x3); \ > > + if (align) { \ > > + __asm__ __volatile__ ( \ > > + "0: lr.w %0, 0(%z4)\n" \ > > + " move %1, %0\n" \ > > + " slli %1, %1, 16\n" \ > > + " srli %1, %1, 16\n" \ > > + " move %2, %z3\n" \ > > + " slli %2, %2, 16\n" \ > > + " or %1, %2, %1\n" \ > > + " sc.w %2, %1, 0(%z4)\n" \ > > + " bnez %2, 0b\n" \ > > + " srli %0, %0, 16\n" \ > > + : "=&r" (__ret), "=&r" (tmp), "=&r" (__rc) \ > > + : "rJ" (__new), "rJ"(addr) \ > > + : "memory"); \ > > + } else { \ > > + __asm__ __volatile__ ( \ > > + "0: lr.w %0, (%z4)\n" \ > > + " move %1, %0\n" \ > > + " srli %1, %1, 16\n" \ > > + " slli %1, %1, 16\n" \ > > + " move %2, %z3\n" \ > > + " or %1, %2, %1\n" \ > > + " sc.w %2, %1, 0(%z4)\n" \ > > + " bnez %2, 0b\n" \ > > + " slli %0, %0, 16\n" \ > > + " srli %0, %0, 16\n" \ > > + : "=&r" (__ret), "=&r" (tmp), "=&r" (__rc) \ > > + : "rJ" (__new), "rJ"(addr) \ > > + : "memory"); \ > > + } \ > > + break; \ > > case 4: \ > > __asm__ __volatile__ ( \ > > " amoswap.w %0, %2, %1\n" \ > > I'm pretty sure there's a handfull of implementations like this out > there... if only we could share. Michael has sent qspinlock before, ref to Link below. He reused mips' code. Link: https://lore.kernel.org/linux-riscv/20190211043829.30096-1-michaeljclark@mac.com/ Which short xchg implementation do you prefer (Mine or his)? > > Anyway, this too should be an independent patch. Ok, I'll separate it into two patches, 1. implement short xchg 2. qspinlock enabled based on Michael's patch
On Tue, Nov 24, 2020 at 11:00 PM Arnd Bergmann <arnd@kernel.org> wrote: > > On Tue, Nov 24, 2020 at 3:39 PM Peter Zijlstra <peterz@infradead.org> wrote: > > On Tue, Nov 24, 2020 at 01:43:54PM +0000, guoren@kernel.org wrote: > > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild > > > > + if (align) { \ > > > + __asm__ __volatile__ ( \ > > > + "0: lr.w %0, 0(%z4)\n" \ > > > + " move %1, %0\n" \ > > > + " slli %1, %1, 16\n" \ > > > + " srli %1, %1, 16\n" \ > > > + " move %2, %z3\n" \ > > > + " slli %2, %2, 16\n" \ > > > + " or %1, %2, %1\n" \ > > > + " sc.w %2, %1, 0(%z4)\n" \ > > > + " bnez %2, 0b\n" \ > > > + " srli %0, %0, 16\n" \ > > > + : "=&r" (__ret), "=&r" (tmp), "=&r" (__rc) \ > > > + : "rJ" (__new), "rJ"(addr) \ > > > + : "memory"); \ > > > > I'm pretty sure there's a handfull of implementations like this out > > there... if only we could share. > > Isn't this effectively the same as the "_Q_PENDING_BITS != 8" > version of xchg_tail()? This can be concluded as the different effectiveness between cmpxchg and xchg. For the arch which only has lr/sc instructions, the cmpxchg & xchg are similar. #if _Q_PENDING_BITS == 8 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) { /* * We can use relaxed semantics since the caller ensures that the * MCS node is properly initialized before updating the tail. */ return (u32)xchg_relaxed(&lock->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; } #else /* _Q_PENDING_BITS == 8 */ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) { u32 old, new, val = atomic_read(&lock->val); for (;;) { new = (val & _Q_LOCKED_PENDING_MASK) | tail; /* * We can use relaxed semantics since the caller ensures that * the MCS node is properly initialized before updating the * tail. */ old = atomic_cmpxchg_relaxed(&lock->val, val, new); if (old == val) break; val = old; } return old; } #endif /* _Q_PENDING_BITS == 8 */ > > If nothing else needs xchg() on a 16-bit value, maybe > changing the #ifdef in the qspinlock code is enough. > > Only around half the architectures actually implement 8-bit > and 16-bit cmpxchg() and xchg(), it might even be worth trying > to eventually change the interface to not do it at all, but > instead have explicit cmpxchg8() and cmpxchg16() helpers > for the few files that do use them. > > Arnd
On Tue, Nov 24, 2020 at 04:00:14PM +0100, Arnd Bergmann wrote: > On Tue, Nov 24, 2020 at 3:39 PM Peter Zijlstra <peterz@infradead.org> wrote: > > On Tue, Nov 24, 2020 at 01:43:54PM +0000, guoren@kernel.org wrote: > > > diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild > > > > + if (align) { \ > > > + __asm__ __volatile__ ( \ > > > + "0: lr.w %0, 0(%z4)\n" \ > > > + " move %1, %0\n" \ > > > + " slli %1, %1, 16\n" \ > > > + " srli %1, %1, 16\n" \ > > > + " move %2, %z3\n" \ > > > + " slli %2, %2, 16\n" \ > > > + " or %1, %2, %1\n" \ > > > + " sc.w %2, %1, 0(%z4)\n" \ > > > + " bnez %2, 0b\n" \ > > > + " srli %0, %0, 16\n" \ > > > + : "=&r" (__ret), "=&r" (tmp), "=&r" (__rc) \ > > > + : "rJ" (__new), "rJ"(addr) \ > > > + : "memory"); \ > > > > I'm pretty sure there's a handfull of implementations like this out > > there... if only we could share. > > Isn't this effectively the same as the "_Q_PENDING_BITS != 8" > version of xchg_tail()? Pretty much I suppose. > If nothing else needs xchg() on a 16-bit value, maybe > changing the #ifdef in the qspinlock code is enough. Like the below? I think the native LL/SC variant is slightly better than the cmpxchg loop. The problem is that cmpxchg() is ll/sc and then we loop that, instead of only having the ll/sc loop. > Only around half the architectures actually implement 8-bit > and 16-bit cmpxchg() and xchg(), it might even be worth trying > to eventually change the interface to not do it at all, but > instead have explicit cmpxchg8() and cmpxchg16() helpers > for the few files that do use them. Yeah, many RISCs don't have sub-word atomics. Not sure how many other users there are. qspinlock certainly is the most popular I suppose. --- diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index cbff6ba53d56..7049fb2af0b2 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -163,26 +163,6 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL); } -/* - * xchg_tail - Put in the new queue tail code word & retrieve previous one - * @lock : Pointer to queued spinlock structure - * @tail : The new queue tail code word - * Return: The previous queue tail code word - * - * xchg(lock, tail), which heads an address dependency - * - * p,*,* -> n,*,* ; prev = xchg(lock, node) - */ -static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) -{ - /* - * We can use relaxed semantics since the caller ensures that the - * MCS node is properly initialized before updating the tail. - */ - return (u32)xchg_relaxed(&lock->tail, - tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; -} - #else /* _Q_PENDING_BITS == 8 */ /** @@ -207,6 +187,32 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); } +#endif /* _Q_PENDING_BITS == 8 */ + +#if _Q_PENDING_BITS == 8 && ARCH_HAS_XCHG16 + +/* + * xchg_tail - Put in the new queue tail code word & retrieve previous one + * @lock : Pointer to queued spinlock structure + * @tail : The new queue tail code word + * Return: The previous queue tail code word + * + * xchg(lock, tail), which heads an address dependency + * + * p,*,* -> n,*,* ; prev = xchg(lock, node) + */ +static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) +{ + /* + * We can use relaxed semantics since the caller ensures that the + * MCS node is properly initialized before updating the tail. + */ + return (u32)xchg_relaxed(&lock->tail, + tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; +} + +#else /* !(_Q_PENDING_BITS == 8 && ARCH_HAS_XCHG16) */ + /** * xchg_tail - Put in the new queue tail code word & retrieve previous one * @lock : Pointer to queued spinlock structure @@ -236,7 +242,8 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) } return old; } -#endif /* _Q_PENDING_BITS == 8 */ + +#endif /* _Q_PENDING_BITS == 8 && ARCH_HAS_XCHG16 */ /** * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
On Wed, Nov 25, 2020 at 08:52:23AM +0800, Guo Ren wrote: > > I'm pretty sure there's a handfull of implementations like this out > > there... if only we could share. > Michael has sent qspinlock before, ref to Link below. He reused mips' code. > > Link: https://lore.kernel.org/linux-riscv/20190211043829.30096-1-michaeljclark@mac.com/ > > Which short xchg implementation do you prefer (Mine or his)? Well, it would be very nice to have mips/riscv/csky all use the same header to implement these I suppose. But then we're back to a cmpxchg-loop, in which case Arnd's suggestion isn't worse.
On Wed, Nov 25, 2020 at 03:16:45PM +0100, Peter Zijlstra wrote: > @@ -207,6 +187,32 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) > atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); > } > > +#endif /* _Q_PENDING_BITS == 8 */ > + > +#if _Q_PENDING_BITS == 8 && ARCH_HAS_XCHG16 > + > +/* > + * xchg_tail - Put in the new queue tail code word & retrieve previous one > + * @lock : Pointer to queued spinlock structure > + * @tail : The new queue tail code word > + * Return: The previous queue tail code word > + * > + * xchg(lock, tail), which heads an address dependency > + * > + * p,*,* -> n,*,* ; prev = xchg(lock, node) > + */ > +static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) > +{ > + /* > + * We can use relaxed semantics since the caller ensures that the > + * MCS node is properly initialized before updating the tail. > + */ > + return (u32)xchg_relaxed(&lock->tail, > + tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; > +} > + > +#else /* !(_Q_PENDING_BITS == 8 && ARCH_HAS_XCHG16) */ Why can't architectures just implement this with a 32-bit xchg instruction if they don't have one that operates on 16 bits? Sure, they'll store more data, but it's atomic so you shouldn't be able to tell... (ignoring parisc crazy). Also, I'm surprised qspinlock benefits riscv. On arm64, there's nothing in it over tickets for <= 16 CPUs. Will
Hi Will, On Wed, Nov 25, 2020 at 10:31 PM Will Deacon <will@kernel.org> wrote: > > On Wed, Nov 25, 2020 at 03:16:45PM +0100, Peter Zijlstra wrote: > > @@ -207,6 +187,32 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) > > atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); > > } > > > > +#endif /* _Q_PENDING_BITS == 8 */ > > + > > +#if _Q_PENDING_BITS == 8 && ARCH_HAS_XCHG16 > > + > > +/* > > + * xchg_tail - Put in the new queue tail code word & retrieve previous one > > + * @lock : Pointer to queued spinlock structure > > + * @tail : The new queue tail code word > > + * Return: The previous queue tail code word > > + * > > + * xchg(lock, tail), which heads an address dependency > > + * > > + * p,*,* -> n,*,* ; prev = xchg(lock, node) > > + */ > > +static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) > > +{ > > + /* > > + * We can use relaxed semantics since the caller ensures that the > > + * MCS node is properly initialized before updating the tail. > > + */ > > + return (u32)xchg_relaxed(&lock->tail, > > + tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; > > +} > > + > > +#else /* !(_Q_PENDING_BITS == 8 && ARCH_HAS_XCHG16) */ > > Why can't architectures just implement this with a 32-bit xchg instruction > if they don't have one that operates on 16 bits? Sure, they'll store more > data, but it's atomic so you shouldn't be able to tell... (ignoring parisc > crazy). > > Also, I'm surprised qspinlock benefits riscv. On arm64, there's nothing in > it over tickets for <= 16 CPUs. NUMA is on the way: https://lore.kernel.org/linux-riscv/20201119003829.1282810-1-atish.patra@wdc.com/ With your advice, I think we could using tickets lock when <= 16 CPUs and using qspinlock when > 16 CPUs. Is that right? The next patchset plan is: - Using tickets & qspinlock together in riscv. Abandon 16bits xchg/cmpxchg implementation. - Abanden qspinlock in csky, because it only could 4 CPUs' SMP. > > Will
On Thu, Nov 26, 2020 at 09:36:34AM +0800, Guo Ren wrote: > On Wed, Nov 25, 2020 at 10:31 PM Will Deacon <will@kernel.org> wrote: > > On Wed, Nov 25, 2020 at 03:16:45PM +0100, Peter Zijlstra wrote: > > > @@ -207,6 +187,32 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) > > > atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val); > > > } > > > > > > +#endif /* _Q_PENDING_BITS == 8 */ > > > + > > > +#if _Q_PENDING_BITS == 8 && ARCH_HAS_XCHG16 > > > + > > > +/* > > > + * xchg_tail - Put in the new queue tail code word & retrieve previous one > > > + * @lock : Pointer to queued spinlock structure > > > + * @tail : The new queue tail code word > > > + * Return: The previous queue tail code word > > > + * > > > + * xchg(lock, tail), which heads an address dependency > > > + * > > > + * p,*,* -> n,*,* ; prev = xchg(lock, node) > > > + */ > > > +static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) > > > +{ > > > + /* > > > + * We can use relaxed semantics since the caller ensures that the > > > + * MCS node is properly initialized before updating the tail. > > > + */ > > > + return (u32)xchg_relaxed(&lock->tail, > > > + tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; > > > +} > > > + > > > +#else /* !(_Q_PENDING_BITS == 8 && ARCH_HAS_XCHG16) */ > > > > Why can't architectures just implement this with a 32-bit xchg instruction > > if they don't have one that operates on 16 bits? Sure, they'll store more > > data, but it's atomic so you shouldn't be able to tell... (ignoring parisc > > crazy). > > > > Also, I'm surprised qspinlock benefits riscv. On arm64, there's nothing in > > it over tickets for <= 16 CPUs. > NUMA is on the way: > https://lore.kernel.org/linux-riscv/20201119003829.1282810-1-atish.patra@wdc.com/ Well, they're patches not hardware (and I only see mention of QEMU over there for the RISCV platform) ;) > With your advice, I think we could using tickets lock when <= 16 CPUs > and using qspinlock when > 16 CPUs. > Is that right? No, when I say "there's nothing in it", it means they're interchangeable. It's just that qspinlock introduces a lot of complexity and I'm not keen massively keen on changing the core code (which is used by many other architectures) just because you don't have a 16-bit xchg() implementation. So if you need qspinlock on riscv (not sure you do), then go ahead and make that your one true lock implementation, but implement 16-bit xchg() at the same time. Bonus points if you implement that in terms of 32-bit xchg() in generic code (might need to watch out for endianness when shifting the new value after aligning the base pointer). Will
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 2d61c4c..d757ba4 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -36,6 +36,8 @@ config RISCV select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if 64BIT + select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USE_QUEUED_SPINLOCKS select CLONE_BACKWARDS select CLINT_TIMER if !MMU select COMMON_CLK diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 59dd7be..6f5f438 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -6,3 +6,6 @@ generic-y += kvm_para.h generic-y += local64.h generic-y += user.h generic-y += vmlinux.lds.h +generic-y += mcs_spinlock.h +generic-y += qrwlock.h +generic-y += qspinlock.h diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 5609185..e178700 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -16,7 +16,43 @@ __typeof__(ptr) __ptr = (ptr); \ __typeof__(new) __new = (new); \ __typeof__(*(ptr)) __ret; \ + register unsigned long __rc, tmp, align, addr; \ switch (size) { \ + case 2: \ + align = ((unsigned long) __ptr & 0x3); \ + addr = ((unsigned long) __ptr & ~0x3); \ + if (align) { \ + __asm__ __volatile__ ( \ + "0: lr.w %0, 0(%z4)\n" \ + " move %1, %0\n" \ + " slli %1, %1, 16\n" \ + " srli %1, %1, 16\n" \ + " move %2, %z3\n" \ + " slli %2, %2, 16\n" \ + " or %1, %2, %1\n" \ + " sc.w %2, %1, 0(%z4)\n" \ + " bnez %2, 0b\n" \ + " srli %0, %0, 16\n" \ + : "=&r" (__ret), "=&r" (tmp), "=&r" (__rc) \ + : "rJ" (__new), "rJ"(addr) \ + : "memory"); \ + } else { \ + __asm__ __volatile__ ( \ + "0: lr.w %0, (%z4)\n" \ + " move %1, %0\n" \ + " srli %1, %1, 16\n" \ + " slli %1, %1, 16\n" \ + " move %2, %z3\n" \ + " or %1, %2, %1\n" \ + " sc.w %2, %1, 0(%z4)\n" \ + " bnez %2, 0b\n" \ + " slli %0, %0, 16\n" \ + " srli %0, %0, 16\n" \ + : "=&r" (__ret), "=&r" (tmp), "=&r" (__rc) \ + : "rJ" (__new), "rJ"(addr) \ + : "memory"); \ + } \ + break; \ case 4: \ __asm__ __volatile__ ( \ " amoswap.w %0, %2, %1\n" \ diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h index f4f7fa1..b8deb3a 100644 --- a/arch/riscv/include/asm/spinlock.h +++ b/arch/riscv/include/asm/spinlock.h @@ -7,129 +7,7 @@ #ifndef _ASM_RISCV_SPINLOCK_H #define _ASM_RISCV_SPINLOCK_H -#include <linux/kernel.h> -#include <asm/current.h> -#include <asm/fence.h> - -/* - * Simple spin lock operations. These provide no fairness guarantees. - */ - -/* FIXME: Replace this with a ticket lock, like MIPS. */ - -#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0) - -static inline void arch_spin_unlock(arch_spinlock_t *lock) -{ - smp_store_release(&lock->lock, 0); -} - -static inline int arch_spin_trylock(arch_spinlock_t *lock) -{ - int tmp = 1, busy; - - __asm__ __volatile__ ( - " amoswap.w %0, %2, %1\n" - RISCV_ACQUIRE_BARRIER - : "=r" (busy), "+A" (lock->lock) - : "r" (tmp) - : "memory"); - - return !busy; -} - -static inline void arch_spin_lock(arch_spinlock_t *lock) -{ - while (1) { - if (arch_spin_is_locked(lock)) - continue; - - if (arch_spin_trylock(lock)) - break; - } -} - -/***********************************************************/ - -static inline void arch_read_lock(arch_rwlock_t *lock) -{ - int tmp; - - __asm__ __volatile__( - "1: lr.w %1, %0\n" - " bltz %1, 1b\n" - " addi %1, %1, 1\n" - " sc.w %1, %1, %0\n" - " bnez %1, 1b\n" - RISCV_ACQUIRE_BARRIER - : "+A" (lock->lock), "=&r" (tmp) - :: "memory"); -} - -static inline void arch_write_lock(arch_rwlock_t *lock) -{ - int tmp; - - __asm__ __volatile__( - "1: lr.w %1, %0\n" - " bnez %1, 1b\n" - " li %1, -1\n" - " sc.w %1, %1, %0\n" - " bnez %1, 1b\n" - RISCV_ACQUIRE_BARRIER - : "+A" (lock->lock), "=&r" (tmp) - :: "memory"); -} - -static inline int arch_read_trylock(arch_rwlock_t *lock) -{ - int busy; - - __asm__ __volatile__( - "1: lr.w %1, %0\n" - " bltz %1, 1f\n" - " addi %1, %1, 1\n" - " sc.w %1, %1, %0\n" - " bnez %1, 1b\n" - RISCV_ACQUIRE_BARRIER - "1:\n" - : "+A" (lock->lock), "=&r" (busy) - :: "memory"); - - return !busy; -} - -static inline int arch_write_trylock(arch_rwlock_t *lock) -{ - int busy; - - __asm__ __volatile__( - "1: lr.w %1, %0\n" - " bnez %1, 1f\n" - " li %1, -1\n" - " sc.w %1, %1, %0\n" - " bnez %1, 1b\n" - RISCV_ACQUIRE_BARRIER - "1:\n" - : "+A" (lock->lock), "=&r" (busy) - :: "memory"); - - return !busy; -} - -static inline void arch_read_unlock(arch_rwlock_t *lock) -{ - __asm__ __volatile__( - RISCV_RELEASE_BARRIER - " amoadd.w x0, %1, %0\n" - : "+A" (lock->lock) - : "r" (-1) - : "memory"); -} - -static inline void arch_write_unlock(arch_rwlock_t *lock) -{ - smp_store_release(&lock->lock, 0); -} +#include <asm/qrwlock.h> +#include <asm/qspinlock.h> #endif /* _ASM_RISCV_SPINLOCK_H */ diff --git a/arch/riscv/include/asm/spinlock_types.h b/arch/riscv/include/asm/spinlock_types.h index f398e76..d033a97 100644 --- a/arch/riscv/include/asm/spinlock_types.h +++ b/arch/riscv/include/asm/spinlock_types.h @@ -6,20 +6,11 @@ #ifndef _ASM_RISCV_SPINLOCK_TYPES_H #define _ASM_RISCV_SPINLOCK_TYPES_H -#ifndef __LINUX_SPINLOCK_TYPES_H +#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(_ASM_RISCV_SPINLOCK_H) # error "please don't include this file directly" #endif -typedef struct { - volatile unsigned int lock; -} arch_spinlock_t; - -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } - -typedef struct { - volatile unsigned int lock; -} arch_rwlock_t; - -#define __ARCH_RW_LOCK_UNLOCKED { 0 } +#include <asm-generic/qspinlock_types.h> +#include <asm-generic/qrwlock_types.h> #endif /* _ASM_RISCV_SPINLOCK_TYPES_H */