Message ID | 20170725114901.31403-1-ard.biesheuvel@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 25 July 2017 at 12:49, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > Hi all, > > I had a stab at porting the fast refcount checks to arm64. It is slightly > less straight-forward than x86 given that we need to support both LSE and > LL/SC, and fallback to the latter if running a kernel built with support > for the former on hardware that does not support it. > > It is build tested with and without LSE support, and boots fine on non-LSE > hardware in both cases. > > Suggestions welcome as to how to test and/or benchmark this, > I discovered this awesome tool called lkdtm, and noticed that the patch does not quite work in its current form: the condition check is incorrect, and it uses adrp instruction, which is not allowed in modules in a standard build. Updated patch here: https://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git/log/?h=arm64-fast-refcount > ---------8<---------------- > This adds support to arm64 for fast refcount checking, as proposed by > Kees for x86. > > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> > --- > arch/arm64/Kconfig | 1 + > arch/arm64/include/asm/atomic.h | 15 ++++ > arch/arm64/include/asm/atomic_ll_sc.h | 27 ++++++ > arch/arm64/include/asm/atomic_lse.h | 51 ++++++++++++ > arch/arm64/include/asm/brk-imm.h | 1 + > arch/arm64/include/asm/refcount.h | 88 ++++++++++++++++++++ > arch/arm64/kernel/traps.c | 35 ++++++++ > arch/arm64/lib/atomic_ll_sc.c | 6 ++ > 8 files changed, 224 insertions(+) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index dfd908630631..53b9a8f5277b 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -16,6 +16,7 @@ config ARM64 > select ARCH_HAS_GCOV_PROFILE_ALL > select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA > select ARCH_HAS_KCOV > + select ARCH_HAS_REFCOUNT > select ARCH_HAS_SET_MEMORY > select ARCH_HAS_SG_CHAIN > select ARCH_HAS_STRICT_KERNEL_RWX > diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h > index c0235e0ff849..66dc66399630 100644 > --- a/arch/arm64/include/asm/atomic.h > +++ b/arch/arm64/include/asm/atomic.h > @@ -24,10 +24,25 @@ > #include <linux/types.h> > > #include <asm/barrier.h> > +#include <asm/brk-imm.h> > #include <asm/lse.h> > > #ifdef __KERNEL__ > > +#define REFCOUNT_CHECK(cond) \ > +"22: b." #cond " 33f\n" \ > +" .pushsection \".text.unlikely\"\n" \ > +"33: mov x16, %[counter]\n" \ > +" adrp x17, 22b\n" \ > +" add x17, x17, :lo12:22b\n" \ > +" brk %[brk_imm]\n" \ > +" .popsection\n" > + > +#define REFCOUNT_INPUTS(r) \ > + [counter] "r" (&(r)->counter), [brk_imm] "i" (REFCOUNT_BRK_IMM), > + > +#define REFCOUNT_CLOBBERS : "cc", "x16", "x17" > + > #define __ARM64_IN_ATOMIC_IMPL > > #if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE) > diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h > index f5a2d09afb38..7b1cb901986c 100644 > --- a/arch/arm64/include/asm/atomic_ll_sc.h > +++ b/arch/arm64/include/asm/atomic_ll_sc.h > @@ -327,4 +327,31 @@ __CMPXCHG_DBL(_mb, dmb ish, l, "memory") > > #undef __CMPXCHG_DBL > > +#define REFCOUNT_OP(op, asm_op, cond, clobber...) \ > +__LL_SC_INLINE int \ > +__LL_SC_PREFIX(__refcount_##op(int i, atomic_t *r)) \ > +{ \ > + unsigned long tmp; \ > + int result; \ > + \ > + asm volatile("// refcount_" #op "\n" \ > +" prfm pstl1strm, %2\n" \ > +"1: ldxr %w0, %2\n" \ > +" " #asm_op " %w0, %w0, %w[i]\n" \ > +" stxr %w1, %w0, %2\n" \ > +" cbnz %w1, 1b\n" \ > + REFCOUNT_CHECK(cond) \ > + : "=&r" (result), "=&r" (tmp), "+Q" (r->counter) \ > + : REFCOUNT_INPUTS(r) [i] "Ir" (i) \ > + clobber); \ > + \ > + return result; \ > +} \ > +__LL_SC_EXPORT(__refcount_##op); > + > +REFCOUNT_OP(add_lt, adds, lt, REFCOUNT_CLOBBERS); > +REFCOUNT_OP(add_le, adds, le, REFCOUNT_CLOBBERS); > +REFCOUNT_OP(sub_lt, subs, lt, REFCOUNT_CLOBBERS); > +REFCOUNT_OP(sub_le, subs, le, REFCOUNT_CLOBBERS); > + > #endif /* __ASM_ATOMIC_LL_SC_H */ > diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h > index 99fa69c9c3cf..f64bb51f1860 100644 > --- a/arch/arm64/include/asm/atomic_lse.h > +++ b/arch/arm64/include/asm/atomic_lse.h > @@ -531,4 +531,55 @@ __CMPXCHG_DBL(_mb, al, "memory") > #undef __LL_SC_CMPXCHG_DBL > #undef __CMPXCHG_DBL > > +#define REFCOUNT_ADD_OP(op, cond) \ > +static inline int __refcount_##op(int i, atomic_t *r) \ > +{ \ > + register int w0 asm ("w0") = i; \ > + register atomic_t *x1 asm ("x1") = r; \ > + \ > + asm volatile(ARM64_LSE_ATOMIC_INSN( \ > + /* LL/SC */ \ > + __LL_SC_CALL(__refcount_##op) \ > + __nops(1), \ > + /* LSE atomics */ \ > + " ldadd %w[i], w30, %[v]\n" \ > + " adds %w[i], %w[i], w30") \ > + REFCOUNT_CHECK(cond) \ > + : [i] "+r" (w0), [v] "+Q" (r->counter) \ > + : REFCOUNT_INPUTS(r) "r" (x1) \ > + : __LL_SC_CLOBBERS, "cc"); \ > + \ > + return w0; \ > +} > + > +#define REFCOUNT_SUB_OP(op, cond, fbop) \ > +static inline int __refcount_##op(int i, atomic_t *r) \ > +{ \ > + register int w0 asm ("w0") = i; \ > + register atomic_t *x1 asm ("x1") = r; \ > + \ > + if (__builtin_constant_p(i)) \ > + return __refcount_##fbop(-i, r); \ > + \ > + asm volatile(ARM64_LSE_ATOMIC_INSN( \ > + /* LL/SC */ \ > + __LL_SC_CALL(__refcount_##op) \ > + __nops(2), \ > + /* LSE atomics */ \ > + " neg %w[i], %w[i]\n" \ > + " ldadd %w[i], w30, %[v]\n" \ > + " adds %w[i], %w[i], w30") \ > + REFCOUNT_CHECK(cond) \ > + : [i] "+r" (w0), [v] "+Q" (r->counter) \ > + : REFCOUNT_INPUTS(r) "r" (x1) \ > + : __LL_SC_CLOBBERS, "cc"); \ > + \ > + return w0; \ > +} > + > +REFCOUNT_ADD_OP(add_lt, lt); > +REFCOUNT_ADD_OP(add_le, le); > +REFCOUNT_SUB_OP(sub_lt, lt, add_lt); > +REFCOUNT_SUB_OP(sub_le, le, add_le); > + > #endif /* __ASM_ATOMIC_LSE_H */ > diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h > index ed693c5bcec0..0bce57737ff1 100644 > --- a/arch/arm64/include/asm/brk-imm.h > +++ b/arch/arm64/include/asm/brk-imm.h > @@ -18,6 +18,7 @@ > * 0x800: kernel-mode BUG() and WARN() traps > */ > #define FAULT_BRK_IMM 0x100 > +#define REFCOUNT_BRK_IMM 0x101 > #define KGDB_DYN_DBG_BRK_IMM 0x400 > #define KGDB_COMPILED_DBG_BRK_IMM 0x401 > #define BUG_BRK_IMM 0x800 > diff --git a/arch/arm64/include/asm/refcount.h b/arch/arm64/include/asm/refcount.h > new file mode 100644 > index 000000000000..3d69537ff2e7 > --- /dev/null > +++ b/arch/arm64/include/asm/refcount.h > @@ -0,0 +1,88 @@ > +/* > + * arm64-specific implementation of refcount_t. Based on x86 version and > + * PAX_REFCOUNT from PaX/grsecurity. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + */ > + > +#ifndef __ASM_REFCOUNT_H > +#define __ASM_REFCOUNT_H > + > +#include <linux/refcount.h> > + > +#include <asm/atomic.h> > +#include <asm/uaccess.h> > + > +static __always_inline void refcount_add(int i, refcount_t *r) > +{ > + __refcount_add_lt(i, &r->refs); > +} > + > +static __always_inline void refcount_inc(refcount_t *r) > +{ > + __refcount_add_lt(1, &r->refs); > +} > + > +static __always_inline void refcount_dec(refcount_t *r) > +{ > + __refcount_sub_le(1, &r->refs); > +} > + > +static __always_inline __must_check bool refcount_sub_and_test(unsigned int i, > + refcount_t *r) > +{ > + return __refcount_sub_lt(i, &r->refs) == 0; > +} > + > +static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) > +{ > + return __refcount_sub_lt(1, &r->refs) == 0; > +} > + > +/** > + * __refcount_add_unless - add unless the number is already a given value > + * @r: pointer of type refcount_t > + * @a: the amount to add to v... > + * @u: ...unless v is equal to u. > + * > + * Atomically adds @a to @r, so long as @r was not already @u. > + * Returns the old value of @r. > + */ > +static __always_inline __must_check > +int __refcount_add_unless(refcount_t *r, int a, int u) > +{ > + int c, new; > + > + c = atomic_read(&(r->refs)); > + do { > + if (unlikely(c == u)) > + break; > + > + asm volatile( > + "adds %0, %0, %2 ;" > + REFCOUNT_CHECK(lt) > + : "=r" (new) > + : "0" (c), "Ir" (a), > + [counter] "r" (&r->refs.counter), > + [brk_imm] "i" (REFCOUNT_BRK_IMM) > + : "cc", "x16", "x17"); > + > + } while (!atomic_try_cmpxchg(&(r->refs), &c, new)); > + > + return c; > +} > + > +static __always_inline __must_check > +bool refcount_add_not_zero(unsigned int i, refcount_t *r) > +{ > + return __refcount_add_unless(r, i, 0) != 0; > +} > + > +static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) > +{ > + return refcount_add_not_zero(1, r); > +} > + > +#endif > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c > index c7c7088097be..6b5a3658d050 100644 > --- a/arch/arm64/kernel/traps.c > +++ b/arch/arm64/kernel/traps.c > @@ -758,8 +758,43 @@ int __init early_brk64(unsigned long addr, unsigned int esr, > return bug_handler(regs, esr) != DBG_HOOK_HANDLED; > } > > +static int refcount_overflow_handler(struct pt_regs *regs, unsigned int esr) > +{ > + /* First unconditionally saturate the refcount. */ > + *(int *)regs->regs[16] = INT_MIN / 2; > + > + /* > + * This function has been called because either a negative refcount > + * value was seen by any of the refcount functions, or a zero > + * refcount value was seen by refcount_dec(). > + * > + * If we crossed from INT_MAX to INT_MIN, V (oVerflow: signed overflow) > + * will be set. Additionally, seeing the refcount reach 0 will set Z > + * (Zero: result was zero). In each of these cases we want a report, > + * since it's a boundary condition. > + */ > + if (regs->pstate & (PSR_Z_BIT | PSR_V_BIT)) { > + bool zero = regs->pstate & PSR_Z_BIT; > + > + /* point pc to the branch instruction that brought us here */ > + regs->pc = regs->regs[17]; > + refcount_error_report(regs, zero ? "hit zero" : "overflow"); > + } > + > + /* advance pc and proceed */ > + regs->pc += 4; > + return DBG_HOOK_HANDLED; > +} > + > +static struct break_hook refcount_break_hook = { > + .esr_val = 0xf2000000 | REFCOUNT_BRK_IMM, > + .esr_mask = 0xffffffff, > + .fn = refcount_overflow_handler, > +}; > + > /* This registration must happen early, before debug_traps_init(). */ > void __init trap_init(void) > { > register_break_hook(&bug_break_hook); > + register_break_hook(&refcount_break_hook); > } > diff --git a/arch/arm64/lib/atomic_ll_sc.c b/arch/arm64/lib/atomic_ll_sc.c > index b0c538b0da28..5f038abdc635 100644 > --- a/arch/arm64/lib/atomic_ll_sc.c > +++ b/arch/arm64/lib/atomic_ll_sc.c > @@ -1,3 +1,9 @@ > #include <asm/atomic.h> > #define __ARM64_IN_ATOMIC_IMPL > +#undef REFCOUNT_CHECK > +#undef REFCOUNT_INPUTS > +#undef REFCOUNT_CLOBBERS > +#define REFCOUNT_CHECK(cond) > +#define REFCOUNT_INPUTS(r) > +#define REFCOUNT_CLOBBERS : "cc" > #include <asm/atomic_ll_sc.h> > -- > 2.9.3 >
On Tue, Jul 25, 2017 at 4:49 AM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > Hi all, > > I had a stab at porting the fast refcount checks to arm64. It is slightly > less straight-forward than x86 given that we need to support both LSE and > LL/SC, and fallback to the latter if running a kernel built with support > for the former on hardware that does not support it. > > It is build tested with and without LSE support, and boots fine on non-LSE > hardware in both cases. Ah! Very cool. Hopefully you and Li can compare notes; I think they've been working on an implementation too. > Suggestions welcome as to how to test and/or benchmark this, I'll post a patch for LKDTM that I've been using. It's more comprehensive than the existing ATOMIC checks (which predated the refcount-only protection). -Kees
On 25 July 2017 at 18:13, Kees Cook <keescook@chromium.org> wrote: > On Tue, Jul 25, 2017 at 4:49 AM, Ard Biesheuvel > <ard.biesheuvel@linaro.org> wrote: >> Hi all, >> >> I had a stab at porting the fast refcount checks to arm64. It is slightly >> less straight-forward than x86 given that we need to support both LSE and >> LL/SC, and fallback to the latter if running a kernel built with support >> for the former on hardware that does not support it. >> >> It is build tested with and without LSE support, and boots fine on non-LSE >> hardware in both cases. > > Ah! Very cool. Hopefully you and Li can compare notes; I think they've > been working on an implementation too. > I wasn't aware of that. >> Suggestions welcome as to how to test and/or benchmark this, > > I'll post a patch for LKDTM that I've been using. It's more > comprehensive than the existing ATOMIC checks (which predated the > refcount-only protection). > OK. One thing I couldn't figure out: is refcount_t signed or not? The saturate tests set the initial value to UINT_MAX - 1, but this is interpreted as a negative value and so the refcount manipulations that are expected to succeed also fail in my case.
On Tue, Jul 25, 2017 at 10:20 AM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > On 25 July 2017 at 18:13, Kees Cook <keescook@chromium.org> wrote: >> On Tue, Jul 25, 2017 at 4:49 AM, Ard Biesheuvel >> <ard.biesheuvel@linaro.org> wrote: >>> Hi all, >>> >>> I had a stab at porting the fast refcount checks to arm64. It is slightly >>> less straight-forward than x86 given that we need to support both LSE and >>> LL/SC, and fallback to the latter if running a kernel built with support >>> for the former on hardware that does not support it. >>> >>> It is build tested with and without LSE support, and boots fine on non-LSE >>> hardware in both cases. >> >> Ah! Very cool. Hopefully you and Li can compare notes; I think they've >> been working on an implementation too. >> > > I wasn't aware of that. > >>> Suggestions welcome as to how to test and/or benchmark this, >> >> I'll post a patch for LKDTM that I've been using. It's more >> comprehensive than the existing ATOMIC checks (which predated the >> refcount-only protection). >> > > OK. One thing I couldn't figure out: is refcount_t signed or not? The > saturate tests set the initial value to UINT_MAX - 1, but this is > interpreted as a negative value and so the refcount manipulations that > are expected to succeed also fail in my case. refcount_t under REFCOUNT_FULL is unsigned. Under the x86 fast refcount, it's signed to gain the CPU flag detection for overflow. The understanding is basically "omg, if you've got INT_MAX-many references to something you already DoSed your machine". I'll have the full LKDTM tests up in a moment here, just doing another pass on them now... -Kees
Hi Ard and Kees, Yes, i have been working on this for several days, but i think Ard's patch is better than mine in some ways. So i'll help reviewing the patch and try to give some suggestions. Never mind:) on 2017/7/26 1:20, Ard Biesheuvel wrote: > On 25 July 2017 at 18:13, Kees Cook <keescook@chromium.org> wrote: >> On Tue, Jul 25, 2017 at 4:49 AM, Ard Biesheuvel >> <ard.biesheuvel@linaro.org> wrote: >>> Hi all, >>> >>> I had a stab at porting the fast refcount checks to arm64. It is slightly >>> less straight-forward than x86 given that we need to support both LSE and >>> LL/SC, and fallback to the latter if running a kernel built with support >>> for the former on hardware that does not support it. >>> >>> It is build tested with and without LSE support, and boots fine on non-LSE >>> hardware in both cases. >> Ah! Very cool. Hopefully you and Li can compare notes; I think they've >> been working on an implementation too. >> > I wasn't aware of that. > >>> Suggestions welcome as to how to test and/or benchmark this, >> I'll post a patch for LKDTM that I've been using. It's more >> comprehensive than the existing ATOMIC checks (which predated the >> refcount-only protection). >> > OK. One thing I couldn't figure out: is refcount_t signed or not? The > saturate tests set the initial value to UINT_MAX - 1, but this is > interpreted as a negative value and so the refcount manipulations that > are expected to succeed also fail in my case.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index dfd908630631..53b9a8f5277b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -16,6 +16,7 @@ config ARM64 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA select ARCH_HAS_KCOV + select ARCH_HAS_REFCOUNT select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index c0235e0ff849..66dc66399630 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -24,10 +24,25 @@ #include <linux/types.h> #include <asm/barrier.h> +#include <asm/brk-imm.h> #include <asm/lse.h> #ifdef __KERNEL__ +#define REFCOUNT_CHECK(cond) \ +"22: b." #cond " 33f\n" \ +" .pushsection \".text.unlikely\"\n" \ +"33: mov x16, %[counter]\n" \ +" adrp x17, 22b\n" \ +" add x17, x17, :lo12:22b\n" \ +" brk %[brk_imm]\n" \ +" .popsection\n" + +#define REFCOUNT_INPUTS(r) \ + [counter] "r" (&(r)->counter), [brk_imm] "i" (REFCOUNT_BRK_IMM), + +#define REFCOUNT_CLOBBERS : "cc", "x16", "x17" + #define __ARM64_IN_ATOMIC_IMPL #if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index f5a2d09afb38..7b1cb901986c 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -327,4 +327,31 @@ __CMPXCHG_DBL(_mb, dmb ish, l, "memory") #undef __CMPXCHG_DBL +#define REFCOUNT_OP(op, asm_op, cond, clobber...) \ +__LL_SC_INLINE int \ +__LL_SC_PREFIX(__refcount_##op(int i, atomic_t *r)) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + asm volatile("// refcount_" #op "\n" \ +" prfm pstl1strm, %2\n" \ +"1: ldxr %w0, %2\n" \ +" " #asm_op " %w0, %w0, %w[i]\n" \ +" stxr %w1, %w0, %2\n" \ +" cbnz %w1, 1b\n" \ + REFCOUNT_CHECK(cond) \ + : "=&r" (result), "=&r" (tmp), "+Q" (r->counter) \ + : REFCOUNT_INPUTS(r) [i] "Ir" (i) \ + clobber); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(__refcount_##op); + +REFCOUNT_OP(add_lt, adds, lt, REFCOUNT_CLOBBERS); +REFCOUNT_OP(add_le, adds, le, REFCOUNT_CLOBBERS); +REFCOUNT_OP(sub_lt, subs, lt, REFCOUNT_CLOBBERS); +REFCOUNT_OP(sub_le, subs, le, REFCOUNT_CLOBBERS); + #endif /* __ASM_ATOMIC_LL_SC_H */ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 99fa69c9c3cf..f64bb51f1860 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -531,4 +531,55 @@ __CMPXCHG_DBL(_mb, al, "memory") #undef __LL_SC_CMPXCHG_DBL #undef __CMPXCHG_DBL +#define REFCOUNT_ADD_OP(op, cond) \ +static inline int __refcount_##op(int i, atomic_t *r) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = r; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_CALL(__refcount_##op) \ + __nops(1), \ + /* LSE atomics */ \ + " ldadd %w[i], w30, %[v]\n" \ + " adds %w[i], %w[i], w30") \ + REFCOUNT_CHECK(cond) \ + : [i] "+r" (w0), [v] "+Q" (r->counter) \ + : REFCOUNT_INPUTS(r) "r" (x1) \ + : __LL_SC_CLOBBERS, "cc"); \ + \ + return w0; \ +} + +#define REFCOUNT_SUB_OP(op, cond, fbop) \ +static inline int __refcount_##op(int i, atomic_t *r) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = r; \ + \ + if (__builtin_constant_p(i)) \ + return __refcount_##fbop(-i, r); \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_CALL(__refcount_##op) \ + __nops(2), \ + /* LSE atomics */ \ + " neg %w[i], %w[i]\n" \ + " ldadd %w[i], w30, %[v]\n" \ + " adds %w[i], %w[i], w30") \ + REFCOUNT_CHECK(cond) \ + : [i] "+r" (w0), [v] "+Q" (r->counter) \ + : REFCOUNT_INPUTS(r) "r" (x1) \ + : __LL_SC_CLOBBERS, "cc"); \ + \ + return w0; \ +} + +REFCOUNT_ADD_OP(add_lt, lt); +REFCOUNT_ADD_OP(add_le, le); +REFCOUNT_SUB_OP(sub_lt, lt, add_lt); +REFCOUNT_SUB_OP(sub_le, le, add_le); + #endif /* __ASM_ATOMIC_LSE_H */ diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index ed693c5bcec0..0bce57737ff1 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -18,6 +18,7 @@ * 0x800: kernel-mode BUG() and WARN() traps */ #define FAULT_BRK_IMM 0x100 +#define REFCOUNT_BRK_IMM 0x101 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 #define BUG_BRK_IMM 0x800 diff --git a/arch/arm64/include/asm/refcount.h b/arch/arm64/include/asm/refcount.h new file mode 100644 index 000000000000..3d69537ff2e7 --- /dev/null +++ b/arch/arm64/include/asm/refcount.h @@ -0,0 +1,88 @@ +/* + * arm64-specific implementation of refcount_t. Based on x86 version and + * PAX_REFCOUNT from PaX/grsecurity. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_REFCOUNT_H +#define __ASM_REFCOUNT_H + +#include <linux/refcount.h> + +#include <asm/atomic.h> +#include <asm/uaccess.h> + +static __always_inline void refcount_add(int i, refcount_t *r) +{ + __refcount_add_lt(i, &r->refs); +} + +static __always_inline void refcount_inc(refcount_t *r) +{ + __refcount_add_lt(1, &r->refs); +} + +static __always_inline void refcount_dec(refcount_t *r) +{ + __refcount_sub_le(1, &r->refs); +} + +static __always_inline __must_check bool refcount_sub_and_test(unsigned int i, + refcount_t *r) +{ + return __refcount_sub_lt(i, &r->refs) == 0; +} + +static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) +{ + return __refcount_sub_lt(1, &r->refs) == 0; +} + +/** + * __refcount_add_unless - add unless the number is already a given value + * @r: pointer of type refcount_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @r, so long as @r was not already @u. + * Returns the old value of @r. + */ +static __always_inline __must_check +int __refcount_add_unless(refcount_t *r, int a, int u) +{ + int c, new; + + c = atomic_read(&(r->refs)); + do { + if (unlikely(c == u)) + break; + + asm volatile( + "adds %0, %0, %2 ;" + REFCOUNT_CHECK(lt) + : "=r" (new) + : "0" (c), "Ir" (a), + [counter] "r" (&r->refs.counter), + [brk_imm] "i" (REFCOUNT_BRK_IMM) + : "cc", "x16", "x17"); + + } while (!atomic_try_cmpxchg(&(r->refs), &c, new)); + + return c; +} + +static __always_inline __must_check +bool refcount_add_not_zero(unsigned int i, refcount_t *r) +{ + return __refcount_add_unless(r, i, 0) != 0; +} + +static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) +{ + return refcount_add_not_zero(1, r); +} + +#endif diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c7c7088097be..6b5a3658d050 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -758,8 +758,43 @@ int __init early_brk64(unsigned long addr, unsigned int esr, return bug_handler(regs, esr) != DBG_HOOK_HANDLED; } +static int refcount_overflow_handler(struct pt_regs *regs, unsigned int esr) +{ + /* First unconditionally saturate the refcount. */ + *(int *)regs->regs[16] = INT_MIN / 2; + + /* + * This function has been called because either a negative refcount + * value was seen by any of the refcount functions, or a zero + * refcount value was seen by refcount_dec(). + * + * If we crossed from INT_MAX to INT_MIN, V (oVerflow: signed overflow) + * will be set. Additionally, seeing the refcount reach 0 will set Z + * (Zero: result was zero). In each of these cases we want a report, + * since it's a boundary condition. + */ + if (regs->pstate & (PSR_Z_BIT | PSR_V_BIT)) { + bool zero = regs->pstate & PSR_Z_BIT; + + /* point pc to the branch instruction that brought us here */ + regs->pc = regs->regs[17]; + refcount_error_report(regs, zero ? "hit zero" : "overflow"); + } + + /* advance pc and proceed */ + regs->pc += 4; + return DBG_HOOK_HANDLED; +} + +static struct break_hook refcount_break_hook = { + .esr_val = 0xf2000000 | REFCOUNT_BRK_IMM, + .esr_mask = 0xffffffff, + .fn = refcount_overflow_handler, +}; + /* This registration must happen early, before debug_traps_init(). */ void __init trap_init(void) { register_break_hook(&bug_break_hook); + register_break_hook(&refcount_break_hook); } diff --git a/arch/arm64/lib/atomic_ll_sc.c b/arch/arm64/lib/atomic_ll_sc.c index b0c538b0da28..5f038abdc635 100644 --- a/arch/arm64/lib/atomic_ll_sc.c +++ b/arch/arm64/lib/atomic_ll_sc.c @@ -1,3 +1,9 @@ #include <asm/atomic.h> #define __ARM64_IN_ATOMIC_IMPL +#undef REFCOUNT_CHECK +#undef REFCOUNT_INPUTS +#undef REFCOUNT_CLOBBERS +#define REFCOUNT_CHECK(cond) +#define REFCOUNT_INPUTS(r) +#define REFCOUNT_CLOBBERS : "cc" #include <asm/atomic_ll_sc.h>
Hi all, I had a stab at porting the fast refcount checks to arm64. It is slightly less straight-forward than x86 given that we need to support both LSE and LL/SC, and fallback to the latter if running a kernel built with support for the former on hardware that does not support it. It is build tested with and without LSE support, and boots fine on non-LSE hardware in both cases. Suggestions welcome as to how to test and/or benchmark this, Thanks, Ard. ---------8<---------------- This adds support to arm64 for fast refcount checking, as proposed by Kees for x86. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/atomic.h | 15 ++++ arch/arm64/include/asm/atomic_ll_sc.h | 27 ++++++ arch/arm64/include/asm/atomic_lse.h | 51 ++++++++++++ arch/arm64/include/asm/brk-imm.h | 1 + arch/arm64/include/asm/refcount.h | 88 ++++++++++++++++++++ arch/arm64/kernel/traps.c | 35 ++++++++ arch/arm64/lib/atomic_ll_sc.c | 6 ++ 8 files changed, 224 insertions(+)