@@ -1396,6 +1396,21 @@ config LOONGSON3_ENHANCEMENT
please say 'N' here. If you want a high-performance kernel to run on
new Loongson 3 machines only, please say 'Y' here.
+config CPU_LOONGSON3_WORKAROUNDS
+ bool "Old Loongson 3 LLSC Workarounds"
+ default y if SMP
+ depends on CPU_LOONGSON3
+ help
+ Loongson 3 processors have the llsc issues which require workarounds.
+ Without workarounds the system may hang unexpectedly.
+
+ Newer Loongson 3 will fix these issues and no workarounds are needed.
+ The workarounds have no significant side effect on them but may
+ decrease the performance of the system so this option should be
+ disabled unless the kernel is intended to be run on old systems.
+
+ If unsure, please say Y.
+
config CPU_LOONGSON2E
bool "Loongson 2E"
depends on SYS_HAS_CPU_LOONGSON2E
@@ -59,6 +59,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \
} else if (kernel_uses_llsc) { \
int temp; \
\
+ loongson_llsc_mb(); \
do { \
__asm__ __volatile__( \
" .set "MIPS_ISA_LEVEL" \n" \
@@ -100,6 +101,7 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \
} else if (kernel_uses_llsc) { \
int temp; \
\
+ loongson_llsc_mb(); \
do { \
__asm__ __volatile__( \
" .set "MIPS_ISA_LEVEL" \n" \
@@ -148,6 +150,7 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \
} else if (kernel_uses_llsc) { \
int temp; \
\
+ loongson_llsc_mb(); \
do { \
__asm__ __volatile__( \
" .set "MIPS_ISA_LEVEL" \n" \
@@ -401,6 +404,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
} else if (kernel_uses_llsc) { \
long temp; \
\
+ loongson_llsc_mb(); \
do { \
__asm__ __volatile__( \
" .set "MIPS_ISA_LEVEL" \n" \
@@ -442,6 +446,7 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
} else if (kernel_uses_llsc) { \
long temp; \
\
+ loongson_llsc_mb(); \
do { \
__asm__ __volatile__( \
" .set "MIPS_ISA_LEVEL" \n" \
@@ -491,6 +496,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
} else if (kernel_uses_llsc) { \
long temp; \
\
+ loongson_llsc_mb(); \
do { \
__asm__ __volatile__( \
" .set "MIPS_ISA_LEVEL" \n" \
@@ -222,6 +222,42 @@
#define __smp_mb__before_atomic() __smp_mb__before_llsc()
#define __smp_mb__after_atomic() smp_llsc_mb()
+/*
+ * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
+ * store or pref) in between an ll & sc can cause the sc instruction to
+ * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
+ * containing such sequences, this bug bites harder than we might otherwise
+ * expect due to reordering & speculation:
+ *
+ * 1) A memory access appearing prior to the ll in program order may actually
+ * be executed after the ll - this is the reordering case.
+ *
+ * In order to avoid this we need to place a memory barrier (ie. a sync
+ * instruction) prior to every ll instruction, in between it & any earlier
+ * memory access instructions. Many of these cases are already covered by
+ * smp_mb__before_llsc() but for the remaining cases, typically ones in
+ * which multiple CPUs may operate on a memory location but ordering is not
+ * usually guaranteed, we use loongson_llsc_mb() below.
+ *
+ * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
+ *
+ * 2) If a conditional branch exists between an ll & sc with a target outside
+ * of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
+ * or similar, then misprediction of the branch may allow speculative
+ * execution of memory accesses from outside of the ll-sc loop.
+ *
+ * In order to avoid this we need a memory barrier (ie. a sync instruction)
+ * at each affected branch target, for which we also use loongson_llsc_mb()
+ * defined below.
+ *
+ * This case affects all current Loongson 3 CPUs.
+ */
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
+#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
+#else
+#define loongson_llsc_mb() do { } while (0)
+#endif
+
#include <asm-generic/barrier.h>
#endif /* __ASM_BARRIER_H */
@@ -68,6 +68,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
: "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m));
#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" " __LL "%0, %1 # set_bit \n"
@@ -78,6 +79,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
} while (unlikely(!temp));
#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
} else if (kernel_uses_llsc) {
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" .set "MIPS_ISA_ARCH_LEVEL" \n"
@@ -120,6 +122,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
: "ir" (~(1UL << bit)));
#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" " __LL "%0, %1 # clear_bit \n"
@@ -130,6 +133,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
} while (unlikely(!temp));
#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
} else if (kernel_uses_llsc) {
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" .set "MIPS_ISA_ARCH_LEVEL" \n"
@@ -188,6 +192,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
unsigned long temp;
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" .set "MIPS_ISA_ARCH_LEVEL" \n"
@@ -50,6 +50,7 @@
"i" (-EFAULT) \
: "memory"); \
} else if (cpu_has_llsc) { \
+ loongson_llsc_mb(); \
__asm__ __volatile__( \
" .set push \n" \
" .set noat \n" \
@@ -162,6 +163,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
"i" (-EFAULT)
: "memory");
} else if (cpu_has_llsc) {
+ loongson_llsc_mb();
__asm__ __volatile__(
"# futex_atomic_cmpxchg_inatomic \n"
" .set push \n"
@@ -190,6 +192,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
"i" (-EFAULT)
: "memory");
+ loongson_llsc_mb();
} else
return -ENOSYS;
@@ -229,6 +229,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
: [global] "r" (page_global));
} else if (kernel_uses_llsc) {
+ loongson_llsc_mb();
__asm__ __volatile__ (
" .set "MIPS_ISA_ARCH_LEVEL" \n"
" .set push \n"
@@ -244,6 +245,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
" .set mips0 \n"
: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
: [global] "r" (page_global));
+ loongson_llsc_mb();
}
#else /* !CONFIG_SMP */
if (pte_none(*buddy))
@@ -115,6 +115,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
[my_ticket] "=&r" (my_ticket)
: [inc] "r" (inc));
} else {
+ loongson_llsc_mb();
__asm__ __volatile__ (
" .set push # arch_spin_lock \n"
" .set noreorder \n"
@@ -190,6 +191,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
[now_serving] "=&r" (tmp3)
: [inc] "r" (inc));
} else {
+ loongson_llsc_mb();
__asm__ __volatile__ (
" .set push # arch_spin_trylock \n"
" .set noreorder \n"
@@ -259,6 +261,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
: GCC_OFF_SMALL_ASM() (rw->lock)
: "memory");
} else {
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
"1: ll %1, %2 # arch_read_lock \n"
@@ -320,6 +323,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
: GCC_OFF_SMALL_ASM() (rw->lock)
: "memory");
} else {
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
"1: ll %1, %2 # arch_write_lock \n"
@@ -345,6 +349,7 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
: "=m" (rw->lock)
: "m" (rw->lock)
: "memory");
+ nudge_writes();
}
static inline int arch_read_trylock(arch_rwlock_t *rw)
@@ -370,6 +375,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
: GCC_OFF_SMALL_ASM() (rw->lock)
: "memory");
} else {
+ loongson_llsc_mb();
__asm__ __volatile__(
" .set noreorder # arch_read_trylock \n"
" li %2, 0 \n"
@@ -414,6 +420,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
: GCC_OFF_SMALL_ASM() (rw->lock)
: "memory");
} else {
+ loongson_llsc_mb();
do {
__asm__ __volatile__(
" ll %1, %3 # arch_write_trylock \n"
@@ -23,6 +23,29 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
endif
cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap
+
+#
+# Some versions of binutils, not currently mainline as of 2019/02/04, support
+# an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction
+# to work around a CPU bug (see loongson_llsc_mb() in asm/barrier.h for a
+# description).
+#
+# We disable this in order to prevent the assembler meddling with the
+# instruction that labels refer to, ie. if we label an ll instruction:
+#
+# 1: ll v0, 0(a0)
+#
+# ...then with the assembler fix applied the label may actually point at a sync
+# instruction inserted by the assembler, and if we were using the label in an
+# exception table the table would no longer contain the address of the ll
+# instruction.
+#
+# Avoid this by explicitly disabling that assembler behaviour. If upstream
+# binutils does not merge support for the flag then we can revisit & remove
+# this later - for now it ensures vendor toolchains don't cause problems.
+#
+cflags-$(CONFIG_CPU_LOONGSON3) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,)
+
#
# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
# as MIPS64 R2; older versions as just R1. This leaves the possibility open
@@ -931,6 +931,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
* to mimic that here by taking a load/istream page
* fault.
*/
+ if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+ uasm_i_sync(p, 0);
UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0);
uasm_i_jr(p, ptr);
@@ -1637,6 +1639,8 @@ static void
iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)
{
#ifdef CONFIG_SMP
+ if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+ uasm_i_sync(p, 0);
# ifdef CONFIG_PHYS_ADDR_T_64BIT
if (cpu_has_64bits)
uasm_i_lld(p, pte, 0, ptr);
@@ -2218,6 +2222,8 @@ static void build_r4000_tlb_load_handler(void)
#endif
uasm_l_nopage_tlbl(&l, p);
+ if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+ uasm_i_sync(&p, 0);
build_restore_work_registers(&p);
#ifdef CONFIG_CPU_MICROMIPS
if ((unsigned long)tlb_do_page_fault_0 & 1) {
@@ -2273,6 +2279,8 @@ static void build_r4000_tlb_store_handler(void)
#endif
uasm_l_nopage_tlbs(&l, p);
+ if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+ uasm_i_sync(&p, 0);
build_restore_work_registers(&p);
#ifdef CONFIG_CPU_MICROMIPS
if ((unsigned long)tlb_do_page_fault_1 & 1) {
@@ -2329,6 +2337,8 @@ static void build_r4000_tlb_modify_handler(void)
#endif
uasm_l_nopage_tlbm(&l, p);
+ if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+ uasm_i_sync(&p, 0);
build_restore_work_registers(&p);
#ifdef CONFIG_CPU_MICROMIPS
if ((unsigned long)tlb_do_page_fault_1 & 1) {