@@ -190,6 +190,7 @@ config LOONGARCH
select TRACE_IRQFLAGS_SUPPORT
select USE_PERCPU_NUMA_NODE_ID
select USER_STACKTRACE_SUPPORT
+ select VDSO_GETRANDOM if CPU_HAS_LSX
select ZONE_DMA32
config 32BIT
new file mode 100644
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
+ */
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <asm/vdso/vdso.h>
+
+static __always_inline ssize_t getrandom_syscall(void *_buffer,
+ size_t _len,
+ unsigned int _flags)
+{
+ register long ret asm("a0");
+ register long int nr asm("a7") = __NR_getrandom;
+ register void *buffer asm("a0") = _buffer;
+ register size_t len asm("a1") = _len;
+ register unsigned int flags asm("a2") = _flags;
+
+ asm volatile(
+ " syscall 0\n"
+ : "+r" (ret)
+ : "r" (nr), "r" (buffer), "r" (len), "r" (flags)
+ : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8",
+ "memory");
+
+ return ret;
+}
+
+static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(
+ void)
+{
+ return (const struct vdso_rng_data *)(
+ get_vdso_data() +
+ VVAR_LOONGARCH_PAGES_START * PAGE_SIZE +
+ offsetof(struct loongarch_vdso_data, rng_data));
+}
+
+extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key,
+ u32 *counter, size_t nblocks);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
@@ -4,6 +4,9 @@
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
+#ifndef _ASM_VDSO_VDSO_H
+#define _ASM_VDSO_VDSO_H
+
#ifndef __ASSEMBLY__
#include <asm/asm.h>
@@ -16,6 +19,9 @@ struct vdso_pcpu_data {
struct loongarch_vdso_data {
struct vdso_pcpu_data pdata[NR_CPUS];
+#ifdef CONFIG_VDSO_GETRANDOM
+ struct vdso_rng_data rng_data;
+#endif
};
/*
@@ -63,3 +69,5 @@ static inline unsigned long get_vdso_data(void)
}
#endif /* __ASSEMBLY__ */
+
+#endif
@@ -14,6 +14,7 @@
#include <asm/ptrace.h>
#include <asm/processor.h>
#include <asm/ftrace.h>
+#include <asm/vdso/vdso.h>
static void __used output_ptreg_defines(void)
{
@@ -321,3 +322,12 @@ static void __used output_kvm_defines(void)
OFFSET(KVM_GPGD, kvm, arch.pgd);
BLANK();
}
+
+#ifdef CONFIG_VDSO_GETRANDOM
+static void __used output_vdso_rng_defines(void)
+{
+ COMMENT("LoongArch VDSO getrandom offsets.");
+ OFFSET(VDSO_RNG_DATA, loongarch_vdso_data, rng_data);
+ BLANK();
+}
+#endif
@@ -23,6 +23,7 @@
#include <vdso/helpers.h>
#include <vdso/vsyscall.h>
#include <vdso/datapage.h>
+#include <generated/asm-offsets.h>
#include <generated/vdso-offsets.h>
extern char vdso_start[], vdso_end[];
@@ -35,6 +36,11 @@ static union {
struct loongarch_vdso_data vdata;
} loongarch_vdso_data __page_aligned_data;
+#ifdef CONFIG_VDSO_GETRANDOM
+asm(".globl _vdso_rng_data\n"
+ ".set _vdso_rng_data, loongarch_vdso_data + " __stringify(VDSO_RNG_DATA));
+#endif
+
static struct page *vdso_pages[] = { NULL };
struct vdso_data *vdso_data = generic_vdso_data.data;
struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata;
@@ -6,6 +6,8 @@ include $(srctree)/lib/vdso/Makefile
obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o
+obj-vdso-$(CONFIG_VDSO_GETRANDOM) += vgetrandom.o vgetrandom-chacha.o vgetrandom-alt.o memset.o
+
# Common compiler flags between ABIs.
ccflags-vdso := \
$(filter -I%,$(KBUILD_CFLAGS)) \
new file mode 100644
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A copy of __memset_generic from arch/loongarch/lib/memset.S for vDSO.
+ *
+ * Copyright (C) 2020-2024 Loongson Technology Corporation Limited
+ */
+
+#include <asm/regdef.h>
+#include <linux/linkage.h>
+
+SYM_FUNC_START(memset)
+ move a3, a0
+ beqz a2, 2f
+
+1: st.b a1, a0, 0
+ addi.d a0, a0, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 1b
+
+2: move a0, a3
+ jr ra
+SYM_FUNC_END(memset)
+
+.hidden memset
@@ -69,6 +69,7 @@ VERSION
__vdso_clock_gettime;
__vdso_gettimeofday;
__vdso_rt_sigreturn;
+ __vdso_getrandom;
local: *;
};
}
new file mode 100644
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
+ *
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/cpu.h>
+#include <asm/unistd.h>
+#include <asm/regdef.h>
+#include <linux/linkage.h>
+
+SYM_FUNC_START(__vdso_getrandom)
+ ALTERNATIVE __stringify(li.w a7, __NR_getrandom; syscall 0; jr ra), \
+ "b __vdso_getrandom_lsx", CPU_FEATURE_LSX
+SYM_FUNC_END(__vdso_getrandom)
+
+.weak getrandom
+.set getrandom, __vdso_getrandom
new file mode 100644
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
+ *
+ * Based on arch/x86/entry/vdso/vgetrandom-chacha.S:
+ *
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights
+ * Reserved.
+ */
+
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#include <linux/linkage.h>
+
+.section .rodata
+.align 4
+CONSTANTS: .octa 0x6b20657479622d323320646e61707865
+
+.text
+
+/*
+ * Very basic SSE2 implementation of ChaCha20. Produces a given positive
+ * number of blocks of output with a nonce of 0, taking an input key and
+ * 8-byte counter. Importantly does not spill to the stack. Its arguments
+ * are:
+ *
+ * a0: output bytes
+ * a1: 32-byte key input
+ * a2: 8-byte counter input/output
+ * a3: number of 64-byte blocks to write to output
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+#define output a0
+#define key a1
+#define counter a2
+#define nblocks a3
+#define i t0
+/* LSX registers vr0-vr23 are caller-save. */
+#define state0 $vr0
+#define state1 $vr1
+#define state2 $vr2
+#define state3 $vr3
+#define copy0 $vr4
+#define copy1 $vr5
+#define copy2 $vr6
+#define copy3 $vr7
+#define one $vr8
+
+ /* copy0 = "expand 32-byte k" */
+ la.pcrel t1, CONSTANTS
+ vld copy0, t1, 0
+ /* copy1, copy2 = key */
+ vld copy1, key, 0
+ vld copy2, key, 0x10
+ /* copy3 = counter || zero nonce */
+ vldrepl.d copy3, counter, 0
+ vinsgr2vr.d copy3, zero, 1
+ /* one = 1 || 0 */
+ vldi one, 0b0110000000001
+ vinsgr2vr.d one, zero, 1
+
+.Lblock:
+ /* state = copy */
+ vori.b state0, copy0, 0
+ vori.b state1, copy1, 0
+ vori.b state2, copy2, 0
+ vori.b state3, copy3, 0
+
+ li.w i, 10
+.Lpermute:
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+ vadd.w state0, state0, state1
+ vxor.v state3, state3, state0
+ vrotri.w state3, state3, 16
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+ vadd.w state2, state2, state3
+ vxor.v state1, state1, state2
+ vrotri.w state1, state1, 20
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+ vadd.w state0, state0, state1
+ vxor.v state3, state3, state0
+ vrotri.w state3, state3, 24
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+ vadd.w state2, state2, state3
+ vxor.v state1, state1, state2
+ vrotri.w state1, state1, 25
+
+ /* state1[0,1,2,3] = state1[1,2,3,0] */
+ vshuf4i.w state1, state1, 0b00111001
+ /* state2[0,1,2,3] = state2[2,3,0,1] */
+ vshuf4i.w state2, state2, 0b01001110
+ /* state3[0,1,2,3] = state3[1,2,3,0] */
+ vshuf4i.w state3, state3, 0b10010011
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
+ vadd.w state0, state0, state1
+ vxor.v state3, state3, state0
+ vrotri.w state3, state3, 16
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
+ vadd.w state2, state2, state3
+ vxor.v state1, state1, state2
+ vrotri.w state1, state1, 20
+
+ /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
+ vadd.w state0, state0, state1
+ vxor.v state3, state3, state0
+ vrotri.w state3, state3, 24
+
+ /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
+ vadd.w state2, state2, state3
+ vxor.v state1, state1, state2
+ vrotri.w state1, state1, 25
+
+ /* state1[0,1,2,3] = state1[3,0,1,2] */
+ vshuf4i.w state1, state1, 0b10010011
+ /* state2[0,1,2,3] = state2[2,3,0,1] */
+ vshuf4i.w state2, state2, 0b01001110
+ /* state3[0,1,2,3] = state3[1,2,3,0] */
+ vshuf4i.w state3, state3, 0b00111001
+
+ addi.w i, i, -1
+ bnez i, .Lpermute
+
+ /* output0 = state0 + copy0 */
+ vadd.w state0, state0, copy0
+ vst state0, output, 0
+ /* output1 = state1 + copy1 */
+ vadd.w state1, state1, copy1
+ vst state1, output, 0x10
+ /* output2 = state2 + copy2 */
+ vadd.w state2, state2, copy2
+ vst state2, output, 0x20
+ /* output3 = state3 + copy3 */
+ vadd.w state3, state3, copy3
+ vst state3, output, 0x30
+
+ /* ++copy3.counter */
+ vadd.d copy3, copy3, one
+
+ /* output += 64 */
+ PTR_ADDI output, output, 64
+ /* --nblocks */
+ PTR_ADDI nblocks, nblocks, -1
+ bnez nblocks, .Lblock
+
+ /* counter = copy3.counter */
+ vstelm.d copy3, counter, 0, 0
+
+ /* Zero out the potentially sensitive regs, in case nothing uses these again. */
+ vldi state0, 0
+ vldi state1, 0
+ vldi state2, 0
+ vldi state3, 0
+ vldi copy1, 0
+ vldi copy2, 0
+
+ jr ra
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
new file mode 100644
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Xi Ruoyao <xry111@xry111.site>. All Rights Reserved.
+ */
+#include <linux/types.h>
+
+#include "../../../../lib/vdso/getrandom.c"
+
+typeof(__cvdso_getrandom) __vdso_getrandom_lsx;
+
+ssize_t __vdso_getrandom_lsx(void *buffer, size_t len, unsigned int flags,
+ void *opaque_state, size_t opaque_len)
+{
+ return __cvdso_getrandom(buffer, len, flags, opaque_state,
+ opaque_len);
+}
Hook up the generic vDSO implementation to the LoongArch vDSO data page: embed struct vdso_rng_data into struct loongarch_vdso_data, and use assembler hack to resolve the symbol name "_vdso_rng_data" (which is expected by the generic vDSO implementation) to the rng_data field in loongarch_vdso_data. The vDSO function requires a ChaCha20 implementation that does not write to the stack, yet can still do an entire ChaCha20 permutation, so provide this using LSX. For processors lacking LSX just fallback to a getrandom() syscall. The compiler (GCC 14.2) calls memset() for initializing a "large" struct in a cold path of the generic vDSO getrandom() code. There seems no way to prevent it from calling memset(), and it's a cold path so the performance does not matter, so just provide a naive memset() implementation for vDSO. Signed-off-by: Xi Ruoyao <xry111@xry111.site> --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/vdso/getrandom.h | 47 ++++++ arch/loongarch/include/asm/vdso/vdso.h | 8 + arch/loongarch/kernel/asm-offsets.c | 10 ++ arch/loongarch/kernel/vdso.c | 6 + arch/loongarch/vdso/Makefile | 2 + arch/loongarch/vdso/memset.S | 24 +++ arch/loongarch/vdso/vdso.lds.S | 1 + arch/loongarch/vdso/vgetrandom-alt.S | 19 +++ arch/loongarch/vdso/vgetrandom-chacha.S | 162 ++++++++++++++++++++ arch/loongarch/vdso/vgetrandom.c | 16 ++ 11 files changed, 296 insertions(+) create mode 100644 arch/loongarch/include/asm/vdso/getrandom.h create mode 100644 arch/loongarch/vdso/memset.S create mode 100644 arch/loongarch/vdso/vgetrandom-alt.S create mode 100644 arch/loongarch/vdso/vgetrandom-chacha.S create mode 100644 arch/loongarch/vdso/vgetrandom.c