Message ID | 20240826181059.111536-1-adhemerval.zanella@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | aarch64: vdso: Wire up getrandom() vDSO implementation | expand |
Hi Adhemerval, Thanks for posting this! Exciting to have it here. Just some small nits for now: On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: > +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) > +{ > + register long int x8 asm ("x8") = __NR_getrandom; > + register long int x0 asm ("x0") = (long int) buffer; > + register long int x1 asm ("x1") = (long int) len; > + register long int x2 asm ("x2") = (long int) flags; Usually it's written just as `long` or `unsigned long`, and likewise with the cast. Also, no space after the cast. > +#define __VDSO_RND_DATA_OFFSET 480 This is the size of the data currently there? > #include <asm/page.h> > #include <asm/vdso.h> > #include <asm-generic/vmlinux.lds.h> > +#include <vdso/datapage.h> > +#include <asm/vdso/vsyscall.h> Possible to keep the asm/ together? > + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive > + * number of blocks of output with nonnce 0, taking an input key and 8-bytes nonnce -> nonce > -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) > +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) > SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) > > TEST_GEN_PROGS := vdso_test_gettimeofday > @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) > TEST_GEN_PROGS += vdso_standalone_test_x86 > endif > TEST_GEN_PROGS += vdso_test_correctness > -ifeq ($(uname_M),x86_64) > +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) > TEST_GEN_PROGS += vdso_test_getrandom > ifneq ($(SODIUM),) > TEST_GEN_PROGS += vdso_test_chacha You'll need to add the symlink to get the chacha selftest running: $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso $ git add tools/arch/arm64/vdso Also, can you confirm that the chacha selftest runs and works? Jason
On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: > +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) > +{ > + register long int x8 asm ("x8") = __NR_getrandom; > + register long int x0 asm ("x0") = (long int) buffer; > + register long int x1 asm ("x1") = (long int) len; > + register long int x2 asm ("x2") = (long int) flags; > + > + asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2)); > + > + return x0; > +} More generally, it might be best to follow the format used by arch/arm64/include/asm/vdso/gettimeofday.h.
Le 26/08/2024 à 20:10, Adhemerval Zanella a écrit : > Hook up the generic vDSO implementation to the aarch64 vDSO data page. > The _vdso_rng_data required data is placed within the _vdso_data vvar > page, by using a offset larger than the vdso_data > (__VDSO_RND_DATA_OFFSET). > > The vDSO function requires a ChaCha20 implementation that does not > write to the stack, and that can do an entire ChaCha20 permutation. > The one provided is based on the current chacha-neon-core.S and uses NEON > on the permute operation. > > Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> > --- > arch/arm64/Kconfig | 1 + > arch/arm64/include/asm/vdso/getrandom.h | 50 +++++++ > arch/arm64/include/asm/vdso/vsyscall.h | 9 ++ > arch/arm64/kernel/vdso/Makefile | 7 +- > arch/arm64/kernel/vdso/vdso.lds.S | 4 + > arch/arm64/kernel/vdso/vgetrandom-chacha.S | 153 +++++++++++++++++++++ > arch/arm64/kernel/vdso/vgetrandom.c | 13 ++ > tools/testing/selftests/vDSO/Makefile | 4 +- > 8 files changed, 238 insertions(+), 3 deletions(-) > create mode 100644 arch/arm64/include/asm/vdso/getrandom.h > create mode 100644 arch/arm64/kernel/vdso/vgetrandom-chacha.S > create mode 100644 arch/arm64/kernel/vdso/vgetrandom.c Were you able to use selftests ? I think you are missing the symbolic link to vdso directory (assuming you are using latest master branch from https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index b3fc891f1544..e3f4c5bf0661 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -237,6 +237,7 @@ config ARM64 > select HAVE_KPROBES > select HAVE_KRETPROBES > select HAVE_GENERIC_VDSO > + select VDSO_GETRANDOM You don't keep things in alphabetical here order on ARM64 ? > select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU > select IRQ_DOMAIN > select IRQ_FORCED_THREADING > diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h > new file mode 100644 > index 000000000000..6e2b136813ca > --- /dev/null > +++ b/arch/arm64/include/asm/vdso/getrandom.h > @@ -0,0 +1,50 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#ifndef __ASM_VDSO_GETRANDOM_H > +#define __ASM_VDSO_GETRANDOM_H > + > +#ifndef __ASSEMBLY__ > + > +#include <asm/unistd.h> > +#include <vdso/datapage.h> > + > +/** > + * getrandom_syscall - Invoke the getrandom() syscall. > + * @buffer: Destination buffer to fill with random bytes. > + * @len: Size of @buffer in bytes. > + * @flags: Zero or more GRND_* flags. > + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. > + */ > +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) > +{ > + register long int x8 asm ("x8") = __NR_getrandom; > + register long int x0 asm ("x0") = (long int) buffer; > + register long int x1 asm ("x1") = (long int) len; > + register long int x2 asm ("x2") = (long int) flags; > + > + asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2)); > + > + return x0; > +} > + > +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) > +{ > + return &_vdso_rng_data; > +} > + > +/** > + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack. > + * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output. > + * @key: 32-byte input key. > + * @counter: 8-byte counter, read on input and updated on return. > + * @nblocks: Number of blocks to generate. > + * > + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write > + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data > + * leaking into forked child processes. > + */ > +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks); For Jason: We all redefine this prototype, should we have it in a central place, or do you expect some architecture to provide some static inline for it ? > + > +#endif /* !__ASSEMBLY__ */ > + > +#endif /* __ASM_VDSO_GETRANDOM_H */ > diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h > index f94b1457c117..7ddb2bc3b57b 100644 > --- a/arch/arm64/include/asm/vdso/vsyscall.h > +++ b/arch/arm64/include/asm/vdso/vsyscall.h > @@ -2,6 +2,8 @@ > #ifndef __ASM_VDSO_VSYSCALL_H > #define __ASM_VDSO_VSYSCALL_H > > +#define __VDSO_RND_DATA_OFFSET 480 > + How is this offset calculated or defined ? What happens if the other structures grow ? Could you use some sizeof(something) instead of something from asm-offsets if you also need it in ASM ? > #ifndef __ASSEMBLY__ > > #include <linux/timekeeper_internal.h> > @@ -21,6 +23,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void) > } > #define __arch_get_k_vdso_data __arm64_get_k_vdso_data > > +static __always_inline > +struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void) > +{ > + return (void *)__arm64_get_k_vdso_data() + __VDSO_RND_DATA_OFFSET; > +} > +#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data > + > static __always_inline > void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) > { > diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile > index d11da6461278..37dad3bb953a 100644 > --- a/arch/arm64/kernel/vdso/Makefile > +++ b/arch/arm64/kernel/vdso/Makefile > @@ -9,7 +9,7 @@ > # Include the generic Makefile to check the built vdso. > include $(srctree)/lib/vdso/Makefile > > -obj-vdso := vgettimeofday.o note.o sigreturn.o > +obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o > > # Build rules > targets := $(obj-vdso) vdso.so vdso.so.dbg > @@ -40,8 +40,13 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ > $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ > $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ > -Wmissing-prototypes -Wmissing-declarations > +CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ > + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ > + $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ > + -Wmissing-prototypes -Wmissing-declarations > > CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables > +CFLAGS_vgetrandom.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables > > ifneq ($(c-gettimeofday-y),) > CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) > diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S > index 45354f2ddf70..f8dbcece20e2 100644 > --- a/arch/arm64/kernel/vdso/vdso.lds.S > +++ b/arch/arm64/kernel/vdso/vdso.lds.S > @@ -12,6 +12,8 @@ > #include <asm/page.h> > #include <asm/vdso.h> > #include <asm-generic/vmlinux.lds.h> > +#include <vdso/datapage.h> > +#include <asm/vdso/vsyscall.h> > > OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") > OUTPUT_ARCH(aarch64) > @@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64) > SECTIONS > { > PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); > + PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); > #ifdef CONFIG_TIME_NS > PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); > #endif > @@ -102,6 +105,7 @@ VERSION > __kernel_gettimeofday; > __kernel_clock_gettime; > __kernel_clock_getres; > + __kernel_getrandom; > local: *; > }; > } > diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S [skipped ASM as I have not spoken ARM asm since I was at school in the 90's] > diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c > new file mode 100644 > index 000000000000..b6d6f4db3a98 > --- /dev/null > +++ b/arch/arm64/kernel/vdso/vgetrandom.c > @@ -0,0 +1,13 @@ > +// SPDX-License-Identifier: GPL-2.0-only > + > +#include <linux/types.h> > +#include <linux/mm.h> > + > +#include "../../../../lib/vdso/getrandom.c" For gettimeofday ARM64 uses c-gettimeofday-y in the Makefile instead. You should do the same with c-getrandom-y > + > +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); > + > +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) > +{ > + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); > +} > diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile > index 10ffdda3f2fa..f07ea679a4cc 100644 > --- a/tools/testing/selftests/vDSO/Makefile > +++ b/tools/testing/selftests/vDSO/Makefile > @@ -1,6 +1,6 @@ > # SPDX-License-Identifier: GPL-2.0 > uname_M := $(shell uname -m 2>/dev/null || echo not) > -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) > +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) > SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) > > TEST_GEN_PROGS := vdso_test_gettimeofday > @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) > TEST_GEN_PROGS += vdso_standalone_test_x86 > endif > TEST_GEN_PROGS += vdso_test_correctness > -ifeq ($(uname_M),x86_64) > +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) Does that work for you when you cross-compile ? For powerpc when I cross compile I still get the x86_64 from uname_M here, which is unexpected. > TEST_GEN_PROGS += vdso_test_getrandom > ifneq ($(SODIUM),) > TEST_GEN_PROGS += vdso_test_chacha Christophe
On Tue, Aug 27, 2024 at 10:46:21AM +0200, Christophe Leroy wrote: > > +/** > > + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack. > > + * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output. > > + * @key: 32-byte input key. > > + * @counter: 8-byte counter, read on input and updated on return. > > + * @nblocks: Number of blocks to generate. > > + * > > + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write > > + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data > > + * leaking into forked child processes. > > + */ > > +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks); > > For Jason: We all redefine this prototype, should we have it in a > central place, or do you expect some architecture to provide some static > inline for it ? Given the doc comment and such, that would be nice. But I didn't see a straight forward way of doing that when I tried before. If you want to try and send another fixup commit, that'd be welcomed. > > +#define __VDSO_RND_DATA_OFFSET 480 > > + > > How is this offset calculated or defined ? What happens if the other > structures grow ? Could you use some sizeof(something) instead of > something from asm-offsets if you also need it in ASM ? FYI, there's a similar static calculation like this in the x86 code: +#if !defined(_SINGLE_DATA) +#define _SINGLE_DATA +DECLARE_VVAR_SINGLE(640, struct vdso_rng_data, _vdso_rng_data) +#endif > > uname_M := $(shell uname -m 2>/dev/null || echo not) > > -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) > > +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) > > > SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) > > > > TEST_GEN_PROGS := vdso_test_gettimeofday > > @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) > > TEST_GEN_PROGS += vdso_standalone_test_x86 > > endif > > TEST_GEN_PROGS += vdso_test_correctness > > -ifeq ($(uname_M),x86_64) > > +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) > > Does that work for you when you cross-compile ? For powerpc when I cross > compile I still get the x86_64 from uname_M here, which is unexpected. That sounds like a legitimate bug you're pointing out, but not one with Adhemerval's code, right? Rather, it's something to be fixed inside of these self tests as a whole? Jason
On 26/08/24 17:27, Jason A. Donenfeld wrote: > Hi Adhemerval, > > Thanks for posting this! Exciting to have it here. > > Just some small nits for now: > > On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: >> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) >> +{ >> + register long int x8 asm ("x8") = __NR_getrandom; >> + register long int x0 asm ("x0") = (long int) buffer; >> + register long int x1 asm ("x1") = (long int) len; >> + register long int x2 asm ("x2") = (long int) flags; > > Usually it's written just as `long` or `unsigned long`, and likewise > with the cast. Also, no space after the cast. Ack. > >> +#define __VDSO_RND_DATA_OFFSET 480 > > This is the size of the data currently there? Yes, I used the same strategy x86 did. > >> #include <asm/page.h> >> #include <asm/vdso.h> >> #include <asm-generic/vmlinux.lds.h> >> +#include <vdso/datapage.h> >> +#include <asm/vdso/vsyscall.h> > > Possible to keep the asm/ together? Ack. > >> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive >> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes > > nonnce -> nonce Ack. > >> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) >> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) >> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) >> >> TEST_GEN_PROGS := vdso_test_gettimeofday >> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) >> TEST_GEN_PROGS += vdso_standalone_test_x86 >> endif >> TEST_GEN_PROGS += vdso_test_correctness >> -ifeq ($(uname_M),x86_64) >> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) >> TEST_GEN_PROGS += vdso_test_getrandom >> ifneq ($(SODIUM),) >> TEST_GEN_PROGS += vdso_test_chacha > > You'll need to add the symlink to get the chacha selftest running: > > $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso > $ git add tools/arch/arm64/vdso > > Also, can you confirm that the chacha selftest runs and works? Yes, last time I has to built it manually since the Makefile machinery seem to be broken even on x86_64. In a Ubuntu vm I have: tools/testing/selftests/vDSO$ make CC vdso_test_gettimeofday CC vdso_test_getcpu CC vdso_test_abi CC vdso_test_clock_getres CC vdso_standalone_test_x86 CC vdso_test_correctness CC vdso_test_getrandom CC vdso_test_chacha In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7, from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, from /usr/include/limits.h:195, from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205, from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7, from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34, from /usr/include/sodium/export.h:7, from /usr/include/sodium/crypto_stream_chacha20.h:14, from vdso_test_chacha.c:6: /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" 99 | # if INT_MAX == 32767 | ^~~~~~~ /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" 102 | # if INT_MAX == 2147483647 | ^~~~~~~ /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" 126 | # if LONG_MAX == 2147483647 | ^~~~~~~~ make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1 I will try to figure out to be build it correctly, but I think it would be better to vgetrandom-chacha.S with a different rule.
On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote: > > > On 26/08/24 17:27, Jason A. Donenfeld wrote: > > Hi Adhemerval, > > > > Thanks for posting this! Exciting to have it here. > > > > Just some small nits for now: > > > > On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: > >> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) > >> +{ > >> + register long int x8 asm ("x8") = __NR_getrandom; > >> + register long int x0 asm ("x0") = (long int) buffer; > >> + register long int x1 asm ("x1") = (long int) len; > >> + register long int x2 asm ("x2") = (long int) flags; > > > > Usually it's written just as `long` or `unsigned long`, and likewise > > with the cast. Also, no space after the cast. > > Ack. > > > > >> +#define __VDSO_RND_DATA_OFFSET 480 > > > > This is the size of the data currently there? > > Yes, I used the same strategy x86 did. > > > > >> #include <asm/page.h> > >> #include <asm/vdso.h> > >> #include <asm-generic/vmlinux.lds.h> > >> +#include <vdso/datapage.h> > >> +#include <asm/vdso/vsyscall.h> > > > > Possible to keep the asm/ together? > > Ack. > > > > >> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive > >> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes > > > > nonnce -> nonce > > Ack. > > > > >> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) > >> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) > >> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) > >> > >> TEST_GEN_PROGS := vdso_test_gettimeofday > >> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) > >> TEST_GEN_PROGS += vdso_standalone_test_x86 > >> endif > >> TEST_GEN_PROGS += vdso_test_correctness > >> -ifeq ($(uname_M),x86_64) > >> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) > >> TEST_GEN_PROGS += vdso_test_getrandom > >> ifneq ($(SODIUM),) > >> TEST_GEN_PROGS += vdso_test_chacha > > > > You'll need to add the symlink to get the chacha selftest running: > > > > $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso > > $ git add tools/arch/arm64/vdso > > > > Also, can you confirm that the chacha selftest runs and works? > > Yes, last time I has to built it manually since the Makefile machinery seem > to be broken even on x86_64. In a Ubuntu vm I have: > > tools/testing/selftests/vDSO$ make > CC vdso_test_gettimeofday > CC vdso_test_getcpu > CC vdso_test_abi > CC vdso_test_clock_getres > CC vdso_standalone_test_x86 > CC vdso_test_correctness > CC vdso_test_getrandom > CC vdso_test_chacha > In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7, > from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, > from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, > from /usr/include/limits.h:195, > from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205, > from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7, > from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34, > from /usr/include/sodium/export.h:7, > from /usr/include/sodium/crypto_stream_chacha20.h:14, > from vdso_test_chacha.c:6: > /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" > 99 | # if INT_MAX == 32767 > | ^~~~~~~ > /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" > 102 | # if INT_MAX == 2147483647 > | ^~~~~~~ > /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" > 126 | # if LONG_MAX == 2147483647 > | ^~~~~~~~ > make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1 You get that even with the latest random.git? I thought Christophe's patch fixed that, but maybe not and I should just remove the dependency on the sodium header instead. Jason
On 27/08/24 10:34, Jason A. Donenfeld wrote: > On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote: >> >> >> On 26/08/24 17:27, Jason A. Donenfeld wrote: >>> Hi Adhemerval, >>> >>> Thanks for posting this! Exciting to have it here. >>> >>> Just some small nits for now: >>> >>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: >>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) >>>> +{ >>>> + register long int x8 asm ("x8") = __NR_getrandom; >>>> + register long int x0 asm ("x0") = (long int) buffer; >>>> + register long int x1 asm ("x1") = (long int) len; >>>> + register long int x2 asm ("x2") = (long int) flags; >>> >>> Usually it's written just as `long` or `unsigned long`, and likewise >>> with the cast. Also, no space after the cast. >> >> Ack. >> >>> >>>> +#define __VDSO_RND_DATA_OFFSET 480 >>> >>> This is the size of the data currently there? >> >> Yes, I used the same strategy x86 did. >> >>> >>>> #include <asm/page.h> >>>> #include <asm/vdso.h> >>>> #include <asm-generic/vmlinux.lds.h> >>>> +#include <vdso/datapage.h> >>>> +#include <asm/vdso/vsyscall.h> >>> >>> Possible to keep the asm/ together? >> >> Ack. >> >>> >>>> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive >>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes >>> >>> nonnce -> nonce >> >> Ack. >> >>> >>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) >>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) >>>> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) >>>> >>>> TEST_GEN_PROGS := vdso_test_gettimeofday >>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) >>>> TEST_GEN_PROGS += vdso_standalone_test_x86 >>>> endif >>>> TEST_GEN_PROGS += vdso_test_correctness >>>> -ifeq ($(uname_M),x86_64) >>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) >>>> TEST_GEN_PROGS += vdso_test_getrandom >>>> ifneq ($(SODIUM),) >>>> TEST_GEN_PROGS += vdso_test_chacha >>> >>> You'll need to add the symlink to get the chacha selftest running: >>> >>> $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso >>> $ git add tools/arch/arm64/vdso >>> >>> Also, can you confirm that the chacha selftest runs and works? >> >> Yes, last time I has to built it manually since the Makefile machinery seem >> to be broken even on x86_64. In a Ubuntu vm I have: >> >> tools/testing/selftests/vDSO$ make >> CC vdso_test_gettimeofday >> CC vdso_test_getcpu >> CC vdso_test_abi >> CC vdso_test_clock_getres >> CC vdso_standalone_test_x86 >> CC vdso_test_correctness >> CC vdso_test_getrandom >> CC vdso_test_chacha >> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7, >> from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, >> from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, >> from /usr/include/limits.h:195, >> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205, >> from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7, >> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34, >> from /usr/include/sodium/export.h:7, >> from /usr/include/sodium/crypto_stream_chacha20.h:14, >> from vdso_test_chacha.c:6: >> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" >> 99 | # if INT_MAX == 32767 >> | ^~~~~~~ >> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" >> 102 | # if INT_MAX == 2147483647 >> | ^~~~~~~ >> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" >> 126 | # if LONG_MAX == 2147483647 >> | ^~~~~~~~ >> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1 > > You get that even with the latest random.git? I thought Christophe's > patch fixed that, but maybe not and I should just remove the dependency > on the sodium header instead. On x86_64 I tested with Linux master. With random.git it is a different issue: linux-git/tools/testing/selftests/vDSO$ make CC vdso_test_gettimeofday CC vdso_test_getcpu CC vdso_test_abi CC vdso_test_clock_getres CC vdso_standalone_test_x86 CC vdso_test_correctness CC vdso_test_getrandom CC vdso_test_chacha /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main': vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20' collect2: error: ld returned 1 exit status If I move -lsodium to the end of the compiler command it works.
Hi Adhemerval, ... > diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S > new file mode 100644 > index 000000000000..3fb9715dd6f0 > --- /dev/null > +++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S > @@ -0,0 +1,153 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +#include <linux/linkage.h> > +#include <asm/cache.h> > + > + .text > + > +/* > + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive > + * number of blocks of output with nonnce 0, taking an input key and 8-bytes > + * counter. Importantly does not spill to the stack. > + * > + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, > + * const uint8_t *key, > + * uint32_t *counter, > + * size_t nblocks) > + * > + * x0: output bytes > + * x1: 32-byte key input > + * x2: 8-byte counter input/output > + * x3: number of 64-byte block to write to output > + */ > +SYM_FUNC_START(__arch_chacha20_blocks_nostack) > + Shouldn't we preserve d8-d15 here? > + /* v0 = "expand 32-byte k" */ > + adr_l x8, CTES > + ld1 {v5.4s}, [x8] > + /* v1,v2 = key */ > + ld1 { v6.4s, v7.4s }, [x1] > + /* v3 = counter || zero noonce */ > + ldr d8, [x2] > + > + adr_l x8, ONE > + ldr q13, [x8] > + > + adr_l x10, ROT8 > + ld1 {v12.4s}, [x10] > +.Lblock: > + /* copy state to auxiliary vectors for the final add after the permute. */ > + mov v0.16b, v5.16b > + mov v1.16b, v6.16b > + mov v2.16b, v7.16b > + mov v3.16b, v8.16b > + > + mov w4, 20 > +.Lpermute: > + /* > + * Permute one 64-byte block where the state matrix is stored in the four NEON > + * registers v0-v3. It performs matrix operations on four words in parallel, > + * but requires shuffling to rearrange the words after each round. > + */ > + > +.Ldoubleround: > + /* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */ > + add v0.4s, v0.4s, v1.4s > + eor v3.16b, v3.16b, v0.16b > + rev32 v3.8h, v3.8h > + > + /* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */ > + add v2.4s, v2.4s, v3.4s > + eor v4.16b, v1.16b, v2.16b > + shl v1.4s, v4.4s, #12 > + sri v1.4s, v4.4s, #20 > + > + /* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */ > + add v0.4s, v0.4s, v1.4s > + eor v3.16b, v3.16b, v0.16b > + tbl v3.16b, {v3.16b}, v12.16b > + > + /* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */ > + add v2.4s, v2.4s, v3.4s > + eor v4.16b, v1.16b, v2.16b > + shl v1.4s, v4.4s, #7 > + sri v1.4s, v4.4s, #25 > + > + /* x1 = shuffle32(x1, MASK(0, 3, 2, 1)) */ > + ext v1.16b, v1.16b, v1.16b, #4 > + /* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */ > + ext v2.16b, v2.16b, v2.16b, #8 > + /* x3 = shuffle32(x3, MASK(2, 1, 0, 3)) */ > + ext v3.16b, v3.16b, v3.16b, #12 > + > + /* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */ > + add v0.4s, v0.4s, v1.4s > + eor v3.16b, v3.16b, v0.16b > + rev32 v3.8h, v3.8h > + > + /* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */ > + add v2.4s, v2.4s, v3.4s > + eor v4.16b, v1.16b, v2.16b > + shl v1.4s, v4.4s, #12 > + sri v1.4s, v4.4s, #20 > + > + /* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */ > + add v0.4s, v0.4s, v1.4s > + eor v3.16b, v3.16b, v0.16b > + tbl v3.16b, {v3.16b}, v12.16b > + > + /* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */ > + add v2.4s, v2.4s, v3.4s > + eor v4.16b, v1.16b, v2.16b > + shl v1.4s, v4.4s, #7 > + sri v1.4s, v4.4s, #25 > + > + /* x1 = shuffle32(x1, MASK(2, 1, 0, 3)) */ > + ext v1.16b, v1.16b, v1.16b, #12 > + /* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */ > + ext v2.16b, v2.16b, v2.16b, #8 > + /* x3 = shuffle32(x3, MASK(0, 3, 2, 1)) */ > + ext v3.16b, v3.16b, v3.16b, #4 > + > + subs w4, w4, #2 > + b.ne .Ldoubleround > + > + /* output0 = state0 + v0 */ > + add v0.4s, v0.4s, v5.4s > + /* output1 = state1 + v1 */ > + add v1.4s, v1.4s, v6.4s > + /* output2 = state2 + v2 */ > + add v2.4s, v2.4s, v7.4s > + /* output2 = state3 + v3 */ > + add v3.4s, v3.4s, v8.4s > + st1 { v0.4s - v3.4s }, [x0] > + > + /* ++copy3.counter */ > + add d8, d8, d13 > + > + /* output += 64, --nblocks */ > + add x0, x0, 64 > + subs x3, x3, #1 > + b.ne .Lblock > + > + /* counter = copy3.counter */ > + str d8, [x2] > + > + /* Zero out the potentially sensitive regs, in case nothing uses these again. */ > + eor v0.16b, v0.16b, v0.16b > + eor v1.16b, v1.16b, v1.16b > + eor v2.16b, v2.16b, v2.16b > + eor v3.16b, v3.16b, v3.16b > + eor v6.16b, v6.16b, v6.16b > + eor v7.16b, v7.16b, v7.16b > + ret > +SYM_FUNC_END(__arch_chacha20_blocks_nostack) > + > + .section ".rodata", "a", %progbits > + .align L1_CACHE_SHIFT > + > +CTES: .word 1634760805, 857760878, 2036477234, 1797285236 > +ONE: .xword 1, 0 > +ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f > + > +emit_aarch64_feature_1_and
Le 27/08/2024 à 15:17, Adhemerval Zanella Netto a écrit : > [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] > > On 26/08/24 17:27, Jason A. Donenfeld wrote: >> Hi Adhemerval, >> >> Thanks for posting this! Exciting to have it here. >> >> Just some small nits for now: >> >> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: >>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) >>> +{ >>> + register long int x8 asm ("x8") = __NR_getrandom; >>> + register long int x0 asm ("x0") = (long int) buffer; >>> + register long int x1 asm ("x1") = (long int) len; >>> + register long int x2 asm ("x2") = (long int) flags; >> >> Usually it's written just as `long` or `unsigned long`, and likewise >> with the cast. Also, no space after the cast. > > Ack. > >> >>> +#define __VDSO_RND_DATA_OFFSET 480 >> >> This is the size of the data currently there? > > Yes, I used the same strategy x86 did. > >> >>> #include <asm/page.h> >>> #include <asm/vdso.h> >>> #include <asm-generic/vmlinux.lds.h> >>> +#include <vdso/datapage.h> >>> +#include <asm/vdso/vsyscall.h> >> >> Possible to keep the asm/ together? > > Ack. > >> >>> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive >>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes >> >> nonnce -> nonce > > Ack. > >> >>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) >>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) >>> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) >>> >>> TEST_GEN_PROGS := vdso_test_gettimeofday >>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) >>> TEST_GEN_PROGS += vdso_standalone_test_x86 >>> endif >>> TEST_GEN_PROGS += vdso_test_correctness >>> -ifeq ($(uname_M),x86_64) >>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) >>> TEST_GEN_PROGS += vdso_test_getrandom >>> ifneq ($(SODIUM),) >>> TEST_GEN_PROGS += vdso_test_chacha >> >> You'll need to add the symlink to get the chacha selftest running: >> >> $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso >> $ git add tools/arch/arm64/vdso >> >> Also, can you confirm that the chacha selftest runs and works? > > Yes, last time I has to built it manually since the Makefile machinery seem > to be broken even on x86_64. In a Ubuntu vm I have: > > tools/testing/selftests/vDSO$ make > CC vdso_test_gettimeofday > CC vdso_test_getcpu > CC vdso_test_abi > CC vdso_test_clock_getres > CC vdso_standalone_test_x86 > CC vdso_test_correctness > CC vdso_test_getrandom > CC vdso_test_chacha > In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7, > from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, > from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, > from /usr/include/limits.h:195, > from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205, > from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7, > from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34, > from /usr/include/sodium/export.h:7, > from /usr/include/sodium/crypto_stream_chacha20.h:14, > from vdso_test_chacha.c:6: > /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" > 99 | # if INT_MAX == 32767 > | ^~~~~~~ > /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" > 102 | # if INT_MAX == 2147483647 > | ^~~~~~~ > /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" > 126 | # if LONG_MAX == 2147483647 > | ^~~~~~~~ > make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1 > > > I will try to figure out to be build it correctly, but I think it would be > better to vgetrandom-chacha.S with a different rule. Hi, can you try with the following commit : https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git/commit/?id=e1af61334ade39a9af3031b7189f9acb419648a4 Thanks Christophe
On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: > Hook up the generic vDSO implementation to the aarch64 vDSO data page. > The _vdso_rng_data required data is placed within the _vdso_data vvar > page, by using a offset larger than the vdso_data > (__VDSO_RND_DATA_OFFSET). > > The vDSO function requires a ChaCha20 implementation that does not > write to the stack, and that can do an entire ChaCha20 permutation. > The one provided is based on the current chacha-neon-core.S and uses NEON > on the permute operation. Is there a fallback for when NEON isn't present? The kernel supports some (deeply embedded) implementations where NEON is not present, and AFAICT this will UNDEF on those machines. Mark. > Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> > --- > arch/arm64/Kconfig | 1 + > arch/arm64/include/asm/vdso/getrandom.h | 50 +++++++ > arch/arm64/include/asm/vdso/vsyscall.h | 9 ++ > arch/arm64/kernel/vdso/Makefile | 7 +- > arch/arm64/kernel/vdso/vdso.lds.S | 4 + > arch/arm64/kernel/vdso/vgetrandom-chacha.S | 153 +++++++++++++++++++++ > arch/arm64/kernel/vdso/vgetrandom.c | 13 ++ > tools/testing/selftests/vDSO/Makefile | 4 +- > 8 files changed, 238 insertions(+), 3 deletions(-) > create mode 100644 arch/arm64/include/asm/vdso/getrandom.h > create mode 100644 arch/arm64/kernel/vdso/vgetrandom-chacha.S > create mode 100644 arch/arm64/kernel/vdso/vgetrandom.c > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index b3fc891f1544..e3f4c5bf0661 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -237,6 +237,7 @@ config ARM64 > select HAVE_KPROBES > select HAVE_KRETPROBES > select HAVE_GENERIC_VDSO > + select VDSO_GETRANDOM > select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU > select IRQ_DOMAIN > select IRQ_FORCED_THREADING > diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h > new file mode 100644 > index 000000000000..6e2b136813ca > --- /dev/null > +++ b/arch/arm64/include/asm/vdso/getrandom.h > @@ -0,0 +1,50 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#ifndef __ASM_VDSO_GETRANDOM_H > +#define __ASM_VDSO_GETRANDOM_H > + > +#ifndef __ASSEMBLY__ > + > +#include <asm/unistd.h> > +#include <vdso/datapage.h> > + > +/** > + * getrandom_syscall - Invoke the getrandom() syscall. > + * @buffer: Destination buffer to fill with random bytes. > + * @len: Size of @buffer in bytes. > + * @flags: Zero or more GRND_* flags. > + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. > + */ > +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) > +{ > + register long int x8 asm ("x8") = __NR_getrandom; > + register long int x0 asm ("x0") = (long int) buffer; > + register long int x1 asm ("x1") = (long int) len; > + register long int x2 asm ("x2") = (long int) flags; > + > + asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2)); > + > + return x0; > +} > + > +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) > +{ > + return &_vdso_rng_data; > +} > + > +/** > + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack. > + * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output. > + * @key: 32-byte input key. > + * @counter: 8-byte counter, read on input and updated on return. > + * @nblocks: Number of blocks to generate. > + * > + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write > + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data > + * leaking into forked child processes. > + */ > +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks); > + > +#endif /* !__ASSEMBLY__ */ > + > +#endif /* __ASM_VDSO_GETRANDOM_H */ > diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h > index f94b1457c117..7ddb2bc3b57b 100644 > --- a/arch/arm64/include/asm/vdso/vsyscall.h > +++ b/arch/arm64/include/asm/vdso/vsyscall.h > @@ -2,6 +2,8 @@ > #ifndef __ASM_VDSO_VSYSCALL_H > #define __ASM_VDSO_VSYSCALL_H > > +#define __VDSO_RND_DATA_OFFSET 480 > + > #ifndef __ASSEMBLY__ > > #include <linux/timekeeper_internal.h> > @@ -21,6 +23,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void) > } > #define __arch_get_k_vdso_data __arm64_get_k_vdso_data > > +static __always_inline > +struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void) > +{ > + return (void *)__arm64_get_k_vdso_data() + __VDSO_RND_DATA_OFFSET; > +} > +#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data > + > static __always_inline > void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) > { > diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile > index d11da6461278..37dad3bb953a 100644 > --- a/arch/arm64/kernel/vdso/Makefile > +++ b/arch/arm64/kernel/vdso/Makefile > @@ -9,7 +9,7 @@ > # Include the generic Makefile to check the built vdso. > include $(srctree)/lib/vdso/Makefile > > -obj-vdso := vgettimeofday.o note.o sigreturn.o > +obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o > > # Build rules > targets := $(obj-vdso) vdso.so vdso.so.dbg > @@ -40,8 +40,13 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ > $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ > $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ > -Wmissing-prototypes -Wmissing-declarations > +CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ > + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ > + $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ > + -Wmissing-prototypes -Wmissing-declarations > > CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables > +CFLAGS_vgetrandom.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables > > ifneq ($(c-gettimeofday-y),) > CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) > diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S > index 45354f2ddf70..f8dbcece20e2 100644 > --- a/arch/arm64/kernel/vdso/vdso.lds.S > +++ b/arch/arm64/kernel/vdso/vdso.lds.S > @@ -12,6 +12,8 @@ > #include <asm/page.h> > #include <asm/vdso.h> > #include <asm-generic/vmlinux.lds.h> > +#include <vdso/datapage.h> > +#include <asm/vdso/vsyscall.h> > > OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") > OUTPUT_ARCH(aarch64) > @@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64) > SECTIONS > { > PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); > + PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); > #ifdef CONFIG_TIME_NS > PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); > #endif > @@ -102,6 +105,7 @@ VERSION > __kernel_gettimeofday; > __kernel_clock_gettime; > __kernel_clock_getres; > + __kernel_getrandom; > local: *; > }; > } > diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S > new file mode 100644 > index 000000000000..3fb9715dd6f0 > --- /dev/null > +++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S > @@ -0,0 +1,153 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +#include <linux/linkage.h> > +#include <asm/cache.h> > + > + .text > + > +/* > + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive > + * number of blocks of output with nonnce 0, taking an input key and 8-bytes > + * counter. Importantly does not spill to the stack. > + * > + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, > + * const uint8_t *key, > + * uint32_t *counter, > + * size_t nblocks) > + * > + * x0: output bytes > + * x1: 32-byte key input > + * x2: 8-byte counter input/output > + * x3: number of 64-byte block to write to output > + */ > +SYM_FUNC_START(__arch_chacha20_blocks_nostack) > + > + /* v0 = "expand 32-byte k" */ > + adr_l x8, CTES > + ld1 {v5.4s}, [x8] > + /* v1,v2 = key */ > + ld1 { v6.4s, v7.4s }, [x1] > + /* v3 = counter || zero noonce */ > + ldr d8, [x2] > + > + adr_l x8, ONE > + ldr q13, [x8] > + > + adr_l x10, ROT8 > + ld1 {v12.4s}, [x10] > +.Lblock: > + /* copy state to auxiliary vectors for the final add after the permute. */ > + mov v0.16b, v5.16b > + mov v1.16b, v6.16b > + mov v2.16b, v7.16b > + mov v3.16b, v8.16b > + > + mov w4, 20 > +.Lpermute: > + /* > + * Permute one 64-byte block where the state matrix is stored in the four NEON > + * registers v0-v3. It performs matrix operations on four words in parallel, > + * but requires shuffling to rearrange the words after each round. > + */ > + > +.Ldoubleround: > + /* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */ > + add v0.4s, v0.4s, v1.4s > + eor v3.16b, v3.16b, v0.16b > + rev32 v3.8h, v3.8h > + > + /* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */ > + add v2.4s, v2.4s, v3.4s > + eor v4.16b, v1.16b, v2.16b > + shl v1.4s, v4.4s, #12 > + sri v1.4s, v4.4s, #20 > + > + /* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */ > + add v0.4s, v0.4s, v1.4s > + eor v3.16b, v3.16b, v0.16b > + tbl v3.16b, {v3.16b}, v12.16b > + > + /* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */ > + add v2.4s, v2.4s, v3.4s > + eor v4.16b, v1.16b, v2.16b > + shl v1.4s, v4.4s, #7 > + sri v1.4s, v4.4s, #25 > + > + /* x1 = shuffle32(x1, MASK(0, 3, 2, 1)) */ > + ext v1.16b, v1.16b, v1.16b, #4 > + /* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */ > + ext v2.16b, v2.16b, v2.16b, #8 > + /* x3 = shuffle32(x3, MASK(2, 1, 0, 3)) */ > + ext v3.16b, v3.16b, v3.16b, #12 > + > + /* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */ > + add v0.4s, v0.4s, v1.4s > + eor v3.16b, v3.16b, v0.16b > + rev32 v3.8h, v3.8h > + > + /* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */ > + add v2.4s, v2.4s, v3.4s > + eor v4.16b, v1.16b, v2.16b > + shl v1.4s, v4.4s, #12 > + sri v1.4s, v4.4s, #20 > + > + /* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */ > + add v0.4s, v0.4s, v1.4s > + eor v3.16b, v3.16b, v0.16b > + tbl v3.16b, {v3.16b}, v12.16b > + > + /* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */ > + add v2.4s, v2.4s, v3.4s > + eor v4.16b, v1.16b, v2.16b > + shl v1.4s, v4.4s, #7 > + sri v1.4s, v4.4s, #25 > + > + /* x1 = shuffle32(x1, MASK(2, 1, 0, 3)) */ > + ext v1.16b, v1.16b, v1.16b, #12 > + /* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */ > + ext v2.16b, v2.16b, v2.16b, #8 > + /* x3 = shuffle32(x3, MASK(0, 3, 2, 1)) */ > + ext v3.16b, v3.16b, v3.16b, #4 > + > + subs w4, w4, #2 > + b.ne .Ldoubleround > + > + /* output0 = state0 + v0 */ > + add v0.4s, v0.4s, v5.4s > + /* output1 = state1 + v1 */ > + add v1.4s, v1.4s, v6.4s > + /* output2 = state2 + v2 */ > + add v2.4s, v2.4s, v7.4s > + /* output2 = state3 + v3 */ > + add v3.4s, v3.4s, v8.4s > + st1 { v0.4s - v3.4s }, [x0] > + > + /* ++copy3.counter */ > + add d8, d8, d13 > + > + /* output += 64, --nblocks */ > + add x0, x0, 64 > + subs x3, x3, #1 > + b.ne .Lblock > + > + /* counter = copy3.counter */ > + str d8, [x2] > + > + /* Zero out the potentially sensitive regs, in case nothing uses these again. */ > + eor v0.16b, v0.16b, v0.16b > + eor v1.16b, v1.16b, v1.16b > + eor v2.16b, v2.16b, v2.16b > + eor v3.16b, v3.16b, v3.16b > + eor v6.16b, v6.16b, v6.16b > + eor v7.16b, v7.16b, v7.16b > + ret > +SYM_FUNC_END(__arch_chacha20_blocks_nostack) > + > + .section ".rodata", "a", %progbits > + .align L1_CACHE_SHIFT > + > +CTES: .word 1634760805, 857760878, 2036477234, 1797285236 > +ONE: .xword 1, 0 > +ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f > + > +emit_aarch64_feature_1_and > diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c > new file mode 100644 > index 000000000000..b6d6f4db3a98 > --- /dev/null > +++ b/arch/arm64/kernel/vdso/vgetrandom.c > @@ -0,0 +1,13 @@ > +// SPDX-License-Identifier: GPL-2.0-only > + > +#include <linux/types.h> > +#include <linux/mm.h> > + > +#include "../../../../lib/vdso/getrandom.c" > + > +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); > + > +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) > +{ > + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); > +} > diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile > index 10ffdda3f2fa..f07ea679a4cc 100644 > --- a/tools/testing/selftests/vDSO/Makefile > +++ b/tools/testing/selftests/vDSO/Makefile > @@ -1,6 +1,6 @@ > # SPDX-License-Identifier: GPL-2.0 > uname_M := $(shell uname -m 2>/dev/null || echo not) > -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) > +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) > SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) > > TEST_GEN_PROGS := vdso_test_gettimeofday > @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) > TEST_GEN_PROGS += vdso_standalone_test_x86 > endif > TEST_GEN_PROGS += vdso_test_correctness > -ifeq ($(uname_M),x86_64) > +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) > TEST_GEN_PROGS += vdso_test_getrandom > ifneq ($(SODIUM),) > TEST_GEN_PROGS += vdso_test_chacha > -- > 2.43.0 > >
Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit : > [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] > > On 27/08/24 10:34, Jason A. Donenfeld wrote: >> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote: >>> >>> >>> On 26/08/24 17:27, Jason A. Donenfeld wrote: >>>> Hi Adhemerval, >>>> >>>> Thanks for posting this! Exciting to have it here. >>>> >>>> Just some small nits for now: >>>> >>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: >>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) >>>>> +{ >>>>> + register long int x8 asm ("x8") = __NR_getrandom; >>>>> + register long int x0 asm ("x0") = (long int) buffer; >>>>> + register long int x1 asm ("x1") = (long int) len; >>>>> + register long int x2 asm ("x2") = (long int) flags; >>>> >>>> Usually it's written just as `long` or `unsigned long`, and likewise >>>> with the cast. Also, no space after the cast. >>> >>> Ack. >>> >>>> >>>>> +#define __VDSO_RND_DATA_OFFSET 480 >>>> >>>> This is the size of the data currently there? >>> >>> Yes, I used the same strategy x86 did. >>> >>>> >>>>> #include <asm/page.h> >>>>> #include <asm/vdso.h> >>>>> #include <asm-generic/vmlinux.lds.h> >>>>> +#include <vdso/datapage.h> >>>>> +#include <asm/vdso/vsyscall.h> >>>> >>>> Possible to keep the asm/ together? >>> >>> Ack. >>> >>>> >>>>> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive >>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes >>>> >>>> nonnce -> nonce >>> >>> Ack. >>> >>>> >>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) >>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) >>>>> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) >>>>> >>>>> TEST_GEN_PROGS := vdso_test_gettimeofday >>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) >>>>> TEST_GEN_PROGS += vdso_standalone_test_x86 >>>>> endif >>>>> TEST_GEN_PROGS += vdso_test_correctness >>>>> -ifeq ($(uname_M),x86_64) >>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) >>>>> TEST_GEN_PROGS += vdso_test_getrandom >>>>> ifneq ($(SODIUM),) >>>>> TEST_GEN_PROGS += vdso_test_chacha >>>> >>>> You'll need to add the symlink to get the chacha selftest running: >>>> >>>> $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso >>>> $ git add tools/arch/arm64/vdso >>>> >>>> Also, can you confirm that the chacha selftest runs and works? >>> >>> Yes, last time I has to built it manually since the Makefile machinery seem >>> to be broken even on x86_64. In a Ubuntu vm I have: >>> >>> tools/testing/selftests/vDSO$ make >>> CC vdso_test_gettimeofday >>> CC vdso_test_getcpu >>> CC vdso_test_abi >>> CC vdso_test_clock_getres >>> CC vdso_standalone_test_x86 >>> CC vdso_test_correctness >>> CC vdso_test_getrandom >>> CC vdso_test_chacha >>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7, >>> from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, >>> from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, >>> from /usr/include/limits.h:195, >>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205, >>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7, >>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34, >>> from /usr/include/sodium/export.h:7, >>> from /usr/include/sodium/crypto_stream_chacha20.h:14, >>> from vdso_test_chacha.c:6: >>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" >>> 99 | # if INT_MAX == 32767 >>> | ^~~~~~~ >>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" >>> 102 | # if INT_MAX == 2147483647 >>> | ^~~~~~~ >>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" >>> 126 | # if LONG_MAX == 2147483647 >>> | ^~~~~~~~ >>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1 >> >> You get that even with the latest random.git? I thought Christophe's >> patch fixed that, but maybe not and I should just remove the dependency >> on the sodium header instead. > > On x86_64 I tested with Linux master. With random.git it is a different issue: > > linux-git/tools/testing/selftests/vDSO$ make > CC vdso_test_gettimeofday > CC vdso_test_getcpu > CC vdso_test_abi > CC vdso_test_clock_getres > CC vdso_standalone_test_x86 > CC vdso_test_correctness > CC vdso_test_getrandom > CC vdso_test_chacha > /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main': > vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20' > collect2: error: ld returned 1 exit status > > If I move -lsodium to the end of the compiler command it works. > > Try a "make clean" maybe ? I have Fedora 38 and no build problem with latest random tree: $ make V=1 gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_getcpu.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_abi.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_clock_getres.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector vdso_standalone_test_x86.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86 gcc -std=gnu99 -D_GNU_SOURCE= -ldl vdso_test_correctness.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi vdso_test_getrandom.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha $
On 27/08/24 11:00, Christophe Leroy wrote: > > > Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit : >> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] >> >> On 27/08/24 10:34, Jason A. Donenfeld wrote: >>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote: >>>> >>>> >>>> On 26/08/24 17:27, Jason A. Donenfeld wrote: >>>>> Hi Adhemerval, >>>>> >>>>> Thanks for posting this! Exciting to have it here. >>>>> >>>>> Just some small nits for now: >>>>> >>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: >>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) >>>>>> +{ >>>>>> + register long int x8 asm ("x8") = __NR_getrandom; >>>>>> + register long int x0 asm ("x0") = (long int) buffer; >>>>>> + register long int x1 asm ("x1") = (long int) len; >>>>>> + register long int x2 asm ("x2") = (long int) flags; >>>>> >>>>> Usually it's written just as `long` or `unsigned long`, and likewise >>>>> with the cast. Also, no space after the cast. >>>> >>>> Ack. >>>> >>>>> >>>>>> +#define __VDSO_RND_DATA_OFFSET 480 >>>>> >>>>> This is the size of the data currently there? >>>> >>>> Yes, I used the same strategy x86 did. >>>> >>>>> >>>>>> #include <asm/page.h> >>>>>> #include <asm/vdso.h> >>>>>> #include <asm-generic/vmlinux.lds.h> >>>>>> +#include <vdso/datapage.h> >>>>>> +#include <asm/vdso/vsyscall.h> >>>>> >>>>> Possible to keep the asm/ together? >>>> >>>> Ack. >>>> >>>>> >>>>>> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive >>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes >>>>> >>>>> nonnce -> nonce >>>> >>>> Ack. >>>> >>>>> >>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) >>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) >>>>>> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) >>>>>> >>>>>> TEST_GEN_PROGS := vdso_test_gettimeofday >>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) >>>>>> TEST_GEN_PROGS += vdso_standalone_test_x86 >>>>>> endif >>>>>> TEST_GEN_PROGS += vdso_test_correctness >>>>>> -ifeq ($(uname_M),x86_64) >>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) >>>>>> TEST_GEN_PROGS += vdso_test_getrandom >>>>>> ifneq ($(SODIUM),) >>>>>> TEST_GEN_PROGS += vdso_test_chacha >>>>> >>>>> You'll need to add the symlink to get the chacha selftest running: >>>>> >>>>> $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso >>>>> $ git add tools/arch/arm64/vdso >>>>> >>>>> Also, can you confirm that the chacha selftest runs and works? >>>> >>>> Yes, last time I has to built it manually since the Makefile machinery seem >>>> to be broken even on x86_64. In a Ubuntu vm I have: >>>> >>>> tools/testing/selftests/vDSO$ make >>>> CC vdso_test_gettimeofday >>>> CC vdso_test_getcpu >>>> CC vdso_test_abi >>>> CC vdso_test_clock_getres >>>> CC vdso_standalone_test_x86 >>>> CC vdso_test_correctness >>>> CC vdso_test_getrandom >>>> CC vdso_test_chacha >>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7, >>>> from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, >>>> from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, >>>> from /usr/include/limits.h:195, >>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205, >>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7, >>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34, >>>> from /usr/include/sodium/export.h:7, >>>> from /usr/include/sodium/crypto_stream_chacha20.h:14, >>>> from vdso_test_chacha.c:6: >>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" >>>> 99 | # if INT_MAX == 32767 >>>> | ^~~~~~~ >>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" >>>> 102 | # if INT_MAX == 2147483647 >>>> | ^~~~~~~ >>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" >>>> 126 | # if LONG_MAX == 2147483647 >>>> | ^~~~~~~~ >>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1 >>> >>> You get that even with the latest random.git? I thought Christophe's >>> patch fixed that, but maybe not and I should just remove the dependency >>> on the sodium header instead. >> >> On x86_64 I tested with Linux master. With random.git it is a different issue: >> >> linux-git/tools/testing/selftests/vDSO$ make >> CC vdso_test_gettimeofday >> CC vdso_test_getcpu >> CC vdso_test_abi >> CC vdso_test_clock_getres >> CC vdso_standalone_test_x86 >> CC vdso_test_correctness >> CC vdso_test_getrandom >> CC vdso_test_chacha >> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main': >> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20' >> collect2: error: ld returned 1 exit status >> >> If I move -lsodium to the end of the compiler command it works. >> >> > > Try a "make clean" maybe ? > > I have Fedora 38 and no build problem with latest random tree: > > $ make V=1 > gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday > gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_getcpu.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu > gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_abi.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi > gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_clock_getres.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres > gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector vdso_standalone_test_x86.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86 > gcc -std=gnu99 -D_GNU_SOURCE= -ldl vdso_test_correctness.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness > gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi vdso_test_getrandom.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom > gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha > $ It is a clean tree (git clean -dfx), and I take there is no need to build a kernel prior hand.
On 27/08/24 11:00, Mark Rutland wrote: > On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: >> Hook up the generic vDSO implementation to the aarch64 vDSO data page. >> The _vdso_rng_data required data is placed within the _vdso_data vvar >> page, by using a offset larger than the vdso_data >> (__VDSO_RND_DATA_OFFSET). >> >> The vDSO function requires a ChaCha20 implementation that does not >> write to the stack, and that can do an entire ChaCha20 permutation. >> The one provided is based on the current chacha-neon-core.S and uses NEON >> on the permute operation. > > Is there a fallback for when NEON isn't present? The kernel supports > some (deeply embedded) implementations where NEON is not present, and > AFAICT this will UNDEF on those machines. > > Mark. Not right know, in this case I think it better to just do something similar to Loongarch and fallback to the syscall. I will add this on the next version. > >> Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> >> --- >> arch/arm64/Kconfig | 1 + >> arch/arm64/include/asm/vdso/getrandom.h | 50 +++++++ >> arch/arm64/include/asm/vdso/vsyscall.h | 9 ++ >> arch/arm64/kernel/vdso/Makefile | 7 +- >> arch/arm64/kernel/vdso/vdso.lds.S | 4 + >> arch/arm64/kernel/vdso/vgetrandom-chacha.S | 153 +++++++++++++++++++++ >> arch/arm64/kernel/vdso/vgetrandom.c | 13 ++ >> tools/testing/selftests/vDSO/Makefile | 4 +- >> 8 files changed, 238 insertions(+), 3 deletions(-) >> create mode 100644 arch/arm64/include/asm/vdso/getrandom.h >> create mode 100644 arch/arm64/kernel/vdso/vgetrandom-chacha.S >> create mode 100644 arch/arm64/kernel/vdso/vgetrandom.c >> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index b3fc891f1544..e3f4c5bf0661 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -237,6 +237,7 @@ config ARM64 >> select HAVE_KPROBES >> select HAVE_KRETPROBES >> select HAVE_GENERIC_VDSO >> + select VDSO_GETRANDOM >> select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU >> select IRQ_DOMAIN >> select IRQ_FORCED_THREADING >> diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h >> new file mode 100644 >> index 000000000000..6e2b136813ca >> --- /dev/null >> +++ b/arch/arm64/include/asm/vdso/getrandom.h >> @@ -0,0 +1,50 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> + >> +#ifndef __ASM_VDSO_GETRANDOM_H >> +#define __ASM_VDSO_GETRANDOM_H >> + >> +#ifndef __ASSEMBLY__ >> + >> +#include <asm/unistd.h> >> +#include <vdso/datapage.h> >> + >> +/** >> + * getrandom_syscall - Invoke the getrandom() syscall. >> + * @buffer: Destination buffer to fill with random bytes. >> + * @len: Size of @buffer in bytes. >> + * @flags: Zero or more GRND_* flags. >> + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. >> + */ >> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) >> +{ >> + register long int x8 asm ("x8") = __NR_getrandom; >> + register long int x0 asm ("x0") = (long int) buffer; >> + register long int x1 asm ("x1") = (long int) len; >> + register long int x2 asm ("x2") = (long int) flags; >> + >> + asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2)); >> + >> + return x0; >> +} >> + >> +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) >> +{ >> + return &_vdso_rng_data; >> +} >> + >> +/** >> + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack. >> + * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output. >> + * @key: 32-byte input key. >> + * @counter: 8-byte counter, read on input and updated on return. >> + * @nblocks: Number of blocks to generate. >> + * >> + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write >> + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data >> + * leaking into forked child processes. >> + */ >> +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks); >> + >> +#endif /* !__ASSEMBLY__ */ >> + >> +#endif /* __ASM_VDSO_GETRANDOM_H */ >> diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h >> index f94b1457c117..7ddb2bc3b57b 100644 >> --- a/arch/arm64/include/asm/vdso/vsyscall.h >> +++ b/arch/arm64/include/asm/vdso/vsyscall.h >> @@ -2,6 +2,8 @@ >> #ifndef __ASM_VDSO_VSYSCALL_H >> #define __ASM_VDSO_VSYSCALL_H >> >> +#define __VDSO_RND_DATA_OFFSET 480 >> + >> #ifndef __ASSEMBLY__ >> >> #include <linux/timekeeper_internal.h> >> @@ -21,6 +23,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void) >> } >> #define __arch_get_k_vdso_data __arm64_get_k_vdso_data >> >> +static __always_inline >> +struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void) >> +{ >> + return (void *)__arm64_get_k_vdso_data() + __VDSO_RND_DATA_OFFSET; >> +} >> +#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data >> + >> static __always_inline >> void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) >> { >> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile >> index d11da6461278..37dad3bb953a 100644 >> --- a/arch/arm64/kernel/vdso/Makefile >> +++ b/arch/arm64/kernel/vdso/Makefile >> @@ -9,7 +9,7 @@ >> # Include the generic Makefile to check the built vdso. >> include $(srctree)/lib/vdso/Makefile >> >> -obj-vdso := vgettimeofday.o note.o sigreturn.o >> +obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o >> >> # Build rules >> targets := $(obj-vdso) vdso.so vdso.so.dbg >> @@ -40,8 +40,13 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ >> $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ >> $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ >> -Wmissing-prototypes -Wmissing-declarations >> +CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ >> + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ >> + $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ >> + -Wmissing-prototypes -Wmissing-declarations >> >> CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables >> +CFLAGS_vgetrandom.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables >> >> ifneq ($(c-gettimeofday-y),) >> CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) >> diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S >> index 45354f2ddf70..f8dbcece20e2 100644 >> --- a/arch/arm64/kernel/vdso/vdso.lds.S >> +++ b/arch/arm64/kernel/vdso/vdso.lds.S >> @@ -12,6 +12,8 @@ >> #include <asm/page.h> >> #include <asm/vdso.h> >> #include <asm-generic/vmlinux.lds.h> >> +#include <vdso/datapage.h> >> +#include <asm/vdso/vsyscall.h> >> >> OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") >> OUTPUT_ARCH(aarch64) >> @@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64) >> SECTIONS >> { >> PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); >> + PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); >> #ifdef CONFIG_TIME_NS >> PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); >> #endif >> @@ -102,6 +105,7 @@ VERSION >> __kernel_gettimeofday; >> __kernel_clock_gettime; >> __kernel_clock_getres; >> + __kernel_getrandom; >> local: *; >> }; >> } >> diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S >> new file mode 100644 >> index 000000000000..3fb9715dd6f0 >> --- /dev/null >> +++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S >> @@ -0,0 +1,153 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> + >> +#include <linux/linkage.h> >> +#include <asm/cache.h> >> + >> + .text >> + >> +/* >> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive >> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes >> + * counter. Importantly does not spill to the stack. >> + * >> + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, >> + * const uint8_t *key, >> + * uint32_t *counter, >> + * size_t nblocks) >> + * >> + * x0: output bytes >> + * x1: 32-byte key input >> + * x2: 8-byte counter input/output >> + * x3: number of 64-byte block to write to output >> + */ >> +SYM_FUNC_START(__arch_chacha20_blocks_nostack) >> + >> + /* v0 = "expand 32-byte k" */ >> + adr_l x8, CTES >> + ld1 {v5.4s}, [x8] >> + /* v1,v2 = key */ >> + ld1 { v6.4s, v7.4s }, [x1] >> + /* v3 = counter || zero noonce */ >> + ldr d8, [x2] >> + >> + adr_l x8, ONE >> + ldr q13, [x8] >> + >> + adr_l x10, ROT8 >> + ld1 {v12.4s}, [x10] >> +.Lblock: >> + /* copy state to auxiliary vectors for the final add after the permute. */ >> + mov v0.16b, v5.16b >> + mov v1.16b, v6.16b >> + mov v2.16b, v7.16b >> + mov v3.16b, v8.16b >> + >> + mov w4, 20 >> +.Lpermute: >> + /* >> + * Permute one 64-byte block where the state matrix is stored in the four NEON >> + * registers v0-v3. It performs matrix operations on four words in parallel, >> + * but requires shuffling to rearrange the words after each round. >> + */ >> + >> +.Ldoubleround: >> + /* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */ >> + add v0.4s, v0.4s, v1.4s >> + eor v3.16b, v3.16b, v0.16b >> + rev32 v3.8h, v3.8h >> + >> + /* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */ >> + add v2.4s, v2.4s, v3.4s >> + eor v4.16b, v1.16b, v2.16b >> + shl v1.4s, v4.4s, #12 >> + sri v1.4s, v4.4s, #20 >> + >> + /* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */ >> + add v0.4s, v0.4s, v1.4s >> + eor v3.16b, v3.16b, v0.16b >> + tbl v3.16b, {v3.16b}, v12.16b >> + >> + /* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */ >> + add v2.4s, v2.4s, v3.4s >> + eor v4.16b, v1.16b, v2.16b >> + shl v1.4s, v4.4s, #7 >> + sri v1.4s, v4.4s, #25 >> + >> + /* x1 = shuffle32(x1, MASK(0, 3, 2, 1)) */ >> + ext v1.16b, v1.16b, v1.16b, #4 >> + /* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */ >> + ext v2.16b, v2.16b, v2.16b, #8 >> + /* x3 = shuffle32(x3, MASK(2, 1, 0, 3)) */ >> + ext v3.16b, v3.16b, v3.16b, #12 >> + >> + /* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */ >> + add v0.4s, v0.4s, v1.4s >> + eor v3.16b, v3.16b, v0.16b >> + rev32 v3.8h, v3.8h >> + >> + /* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */ >> + add v2.4s, v2.4s, v3.4s >> + eor v4.16b, v1.16b, v2.16b >> + shl v1.4s, v4.4s, #12 >> + sri v1.4s, v4.4s, #20 >> + >> + /* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */ >> + add v0.4s, v0.4s, v1.4s >> + eor v3.16b, v3.16b, v0.16b >> + tbl v3.16b, {v3.16b}, v12.16b >> + >> + /* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */ >> + add v2.4s, v2.4s, v3.4s >> + eor v4.16b, v1.16b, v2.16b >> + shl v1.4s, v4.4s, #7 >> + sri v1.4s, v4.4s, #25 >> + >> + /* x1 = shuffle32(x1, MASK(2, 1, 0, 3)) */ >> + ext v1.16b, v1.16b, v1.16b, #12 >> + /* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */ >> + ext v2.16b, v2.16b, v2.16b, #8 >> + /* x3 = shuffle32(x3, MASK(0, 3, 2, 1)) */ >> + ext v3.16b, v3.16b, v3.16b, #4 >> + >> + subs w4, w4, #2 >> + b.ne .Ldoubleround >> + >> + /* output0 = state0 + v0 */ >> + add v0.4s, v0.4s, v5.4s >> + /* output1 = state1 + v1 */ >> + add v1.4s, v1.4s, v6.4s >> + /* output2 = state2 + v2 */ >> + add v2.4s, v2.4s, v7.4s >> + /* output2 = state3 + v3 */ >> + add v3.4s, v3.4s, v8.4s >> + st1 { v0.4s - v3.4s }, [x0] >> + >> + /* ++copy3.counter */ >> + add d8, d8, d13 >> + >> + /* output += 64, --nblocks */ >> + add x0, x0, 64 >> + subs x3, x3, #1 >> + b.ne .Lblock >> + >> + /* counter = copy3.counter */ >> + str d8, [x2] >> + >> + /* Zero out the potentially sensitive regs, in case nothing uses these again. */ >> + eor v0.16b, v0.16b, v0.16b >> + eor v1.16b, v1.16b, v1.16b >> + eor v2.16b, v2.16b, v2.16b >> + eor v3.16b, v3.16b, v3.16b >> + eor v6.16b, v6.16b, v6.16b >> + eor v7.16b, v7.16b, v7.16b >> + ret >> +SYM_FUNC_END(__arch_chacha20_blocks_nostack) >> + >> + .section ".rodata", "a", %progbits >> + .align L1_CACHE_SHIFT >> + >> +CTES: .word 1634760805, 857760878, 2036477234, 1797285236 >> +ONE: .xword 1, 0 >> +ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f >> + >> +emit_aarch64_feature_1_and >> diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c >> new file mode 100644 >> index 000000000000..b6d6f4db3a98 >> --- /dev/null >> +++ b/arch/arm64/kernel/vdso/vgetrandom.c >> @@ -0,0 +1,13 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> + >> +#include <linux/types.h> >> +#include <linux/mm.h> >> + >> +#include "../../../../lib/vdso/getrandom.c" >> + >> +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); >> + >> +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) >> +{ >> + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); >> +} >> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile >> index 10ffdda3f2fa..f07ea679a4cc 100644 >> --- a/tools/testing/selftests/vDSO/Makefile >> +++ b/tools/testing/selftests/vDSO/Makefile >> @@ -1,6 +1,6 @@ >> # SPDX-License-Identifier: GPL-2.0 >> uname_M := $(shell uname -m 2>/dev/null || echo not) >> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) >> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) >> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) >> >> TEST_GEN_PROGS := vdso_test_gettimeofday >> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) >> TEST_GEN_PROGS += vdso_standalone_test_x86 >> endif >> TEST_GEN_PROGS += vdso_test_correctness >> -ifeq ($(uname_M),x86_64) >> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) >> TEST_GEN_PROGS += vdso_test_getrandom >> ifneq ($(SODIUM),) >> TEST_GEN_PROGS += vdso_test_chacha >> -- >> 2.43.0 >> >>
On 27/08/24 05:46, Christophe Leroy wrote: > > > Le 26/08/2024 à 20:10, Adhemerval Zanella a écrit : >> Hook up the generic vDSO implementation to the aarch64 vDSO data page. >> The _vdso_rng_data required data is placed within the _vdso_data vvar >> page, by using a offset larger than the vdso_data >> (__VDSO_RND_DATA_OFFSET). >> >> The vDSO function requires a ChaCha20 implementation that does not >> write to the stack, and that can do an entire ChaCha20 permutation. >> The one provided is based on the current chacha-neon-core.S and uses NEON >> on the permute operation. >> >> Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> >> --- >> arch/arm64/Kconfig | 1 + >> arch/arm64/include/asm/vdso/getrandom.h | 50 +++++++ >> arch/arm64/include/asm/vdso/vsyscall.h | 9 ++ >> arch/arm64/kernel/vdso/Makefile | 7 +- >> arch/arm64/kernel/vdso/vdso.lds.S | 4 + >> arch/arm64/kernel/vdso/vgetrandom-chacha.S | 153 +++++++++++++++++++++ >> arch/arm64/kernel/vdso/vgetrandom.c | 13 ++ >> tools/testing/selftests/vDSO/Makefile | 4 +- >> 8 files changed, 238 insertions(+), 3 deletions(-) >> create mode 100644 arch/arm64/include/asm/vdso/getrandom.h >> create mode 100644 arch/arm64/kernel/vdso/vgetrandom-chacha.S >> create mode 100644 arch/arm64/kernel/vdso/vgetrandom.c > > Were you able to use selftests ? I think you are missing the symbolic link to vdso directory (assuming you are using latest master branch from https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git) It is missing indeed, last time I use a old brach that has a different Makefile machinery and it I could it built more easily. > >> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index b3fc891f1544..e3f4c5bf0661 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -237,6 +237,7 @@ config ARM64 >> select HAVE_KPROBES >> select HAVE_KRETPROBES >> select HAVE_GENERIC_VDSO >> + select VDSO_GETRANDOM > > You don't keep things in alphabetical here order on ARM64 ? It seems to most part, but the file does have some outliers (HAVE_SOFTIRQ_ON_OWN_STACK for instance). I moved to the end of the list. > >> select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU >> select IRQ_DOMAIN >> select IRQ_FORCED_THREADING >> diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h >> new file mode 100644 >> index 000000000000..6e2b136813ca >> --- /dev/null >> +++ b/arch/arm64/include/asm/vdso/getrandom.h >> @@ -0,0 +1,50 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> + >> +#ifndef __ASM_VDSO_GETRANDOM_H >> +#define __ASM_VDSO_GETRANDOM_H >> + >> +#ifndef __ASSEMBLY__ >> + >> +#include <asm/unistd.h> >> +#include <vdso/datapage.h> >> + >> +/** >> + * getrandom_syscall - Invoke the getrandom() syscall. >> + * @buffer: Destination buffer to fill with random bytes. >> + * @len: Size of @buffer in bytes. >> + * @flags: Zero or more GRND_* flags. >> + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. >> + */ >> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) >> +{ >> + register long int x8 asm ("x8") = __NR_getrandom; >> + register long int x0 asm ("x0") = (long int) buffer; >> + register long int x1 asm ("x1") = (long int) len; >> + register long int x2 asm ("x2") = (long int) flags; >> + >> + asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2)); >> + >> + return x0; >> +} >> + >> +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) >> +{ >> + return &_vdso_rng_data; >> +} >> + >> +/** >> + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack. >> + * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output. >> + * @key: 32-byte input key. >> + * @counter: 8-byte counter, read on input and updated on return. >> + * @nblocks: Number of blocks to generate. >> + * >> + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write >> + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data >> + * leaking into forked child processes. >> + */ >> +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks); > > For Jason: We all redefine this prototype, should we have it in a central place, or do you expect some architecture to provide some static inline for it ? > >> + >> +#endif /* !__ASSEMBLY__ */ >> + >> +#endif /* __ASM_VDSO_GETRANDOM_H */ >> diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h >> index f94b1457c117..7ddb2bc3b57b 100644 >> --- a/arch/arm64/include/asm/vdso/vsyscall.h >> +++ b/arch/arm64/include/asm/vdso/vsyscall.h >> @@ -2,6 +2,8 @@ >> #ifndef __ASM_VDSO_VSYSCALL_H >> #define __ASM_VDSO_VSYSCALL_H >> +#define __VDSO_RND_DATA_OFFSET 480 >> + > > How is this offset calculated or defined ? What happens if the other structures grow ? Could you use some sizeof(something) instead of something from asm-offsets if you also need it in ASM ? That is something I talked to Jason some time ago, since a similar strategy to use a 'magic' offset is used on x86_64. Ideally I think the vdso_rnd_data should be moved to a common static structure along with _vdso_data, so the there is no need to come up with magic offset like this. It seems that the powerpc does follow this pattern, but other ports no. However, since each architecture does some specific machinery with the vdso datapages; it would require some more extensive refactoring on multiple architectures to get this right. > >> #ifndef __ASSEMBLY__ >> #include <linux/timekeeper_internal.h> >> @@ -21,6 +23,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void) >> } >> #define __arch_get_k_vdso_data __arm64_get_k_vdso_data >> +static __always_inline >> +struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void) >> +{ >> + return (void *)__arm64_get_k_vdso_data() + __VDSO_RND_DATA_OFFSET; >> +} >> +#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data >> + >> static __always_inline >> void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) >> { >> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile >> index d11da6461278..37dad3bb953a 100644 >> --- a/arch/arm64/kernel/vdso/Makefile >> +++ b/arch/arm64/kernel/vdso/Makefile >> @@ -9,7 +9,7 @@ >> # Include the generic Makefile to check the built vdso. >> include $(srctree)/lib/vdso/Makefile >> -obj-vdso := vgettimeofday.o note.o sigreturn.o >> +obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o >> # Build rules >> targets := $(obj-vdso) vdso.so vdso.so.dbg >> @@ -40,8 +40,13 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ >> $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ >> $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ >> -Wmissing-prototypes -Wmissing-declarations >> +CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ >> + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ >> + $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ >> + -Wmissing-prototypes -Wmissing-declarations >> CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables >> +CFLAGS_vgetrandom.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables >> ifneq ($(c-gettimeofday-y),) >> CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) >> diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S >> index 45354f2ddf70..f8dbcece20e2 100644 >> --- a/arch/arm64/kernel/vdso/vdso.lds.S >> +++ b/arch/arm64/kernel/vdso/vdso.lds.S >> @@ -12,6 +12,8 @@ >> #include <asm/page.h> >> #include <asm/vdso.h> >> #include <asm-generic/vmlinux.lds.h> >> +#include <vdso/datapage.h> >> +#include <asm/vdso/vsyscall.h> >> OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") >> OUTPUT_ARCH(aarch64) >> @@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64) >> SECTIONS >> { >> PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); >> + PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); >> #ifdef CONFIG_TIME_NS >> PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); >> #endif >> @@ -102,6 +105,7 @@ VERSION >> __kernel_gettimeofday; >> __kernel_clock_gettime; >> __kernel_clock_getres; >> + __kernel_getrandom; >> local: *; >> }; >> } >> diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S > > [skipped ASM as I have not spoken ARM asm since I was at school in the 90's] > >> diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c >> new file mode 100644 >> index 000000000000..b6d6f4db3a98 >> --- /dev/null >> +++ b/arch/arm64/kernel/vdso/vgetrandom.c >> @@ -0,0 +1,13 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> + >> +#include <linux/types.h> >> +#include <linux/mm.h> >> + >> +#include "../../../../lib/vdso/getrandom.c" > > For gettimeofday ARM64 uses c-gettimeofday-y in the Makefile instead. > > You should do the same with c-getrandom-y Ack. > >> + >> +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); >> + >> +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) >> +{ >> + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); >> +} >> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile >> index 10ffdda3f2fa..f07ea679a4cc 100644 >> --- a/tools/testing/selftests/vDSO/Makefile >> +++ b/tools/testing/selftests/vDSO/Makefile >> @@ -1,6 +1,6 @@ >> # SPDX-License-Identifier: GPL-2.0 >> uname_M := $(shell uname -m 2>/dev/null || echo not) >> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) >> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) > >> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) >> TEST_GEN_PROGS := vdso_test_gettimeofday >> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) >> TEST_GEN_PROGS += vdso_standalone_test_x86 >> endif >> TEST_GEN_PROGS += vdso_test_correctness >> -ifeq ($(uname_M),x86_64) >> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) > > Does that work for you when you cross-compile ? For powerpc when I cross compile I still get the x86_64 from uname_M here, which is unexpected. Right, I haven't test cross-compilation on the selftests so I am not sure. I will check it. > >> TEST_GEN_PROGS += vdso_test_getrandom >> ifneq ($(SODIUM),) >> TEST_GEN_PROGS += vdso_test_chacha > > Christophe
Le 27/08/2024 à 16:01, Adhemerval Zanella Netto a écrit : > [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] > > On 27/08/24 11:00, Christophe Leroy wrote: >> >> >> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit : >>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] >>> >>> On 27/08/24 10:34, Jason A. Donenfeld wrote: >>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote: >>>>> >>>>> >>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote: >>>>>> Hi Adhemerval, >>>>>> >>>>>> Thanks for posting this! Exciting to have it here. >>>>>> >>>>>> Just some small nits for now: >>>>>> >>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: >>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) >>>>>>> +{ >>>>>>> + register long int x8 asm ("x8") = __NR_getrandom; >>>>>>> + register long int x0 asm ("x0") = (long int) buffer; >>>>>>> + register long int x1 asm ("x1") = (long int) len; >>>>>>> + register long int x2 asm ("x2") = (long int) flags; >>>>>> >>>>>> Usually it's written just as `long` or `unsigned long`, and likewise >>>>>> with the cast. Also, no space after the cast. >>>>> >>>>> Ack. >>>>> >>>>>> >>>>>>> +#define __VDSO_RND_DATA_OFFSET 480 >>>>>> >>>>>> This is the size of the data currently there? >>>>> >>>>> Yes, I used the same strategy x86 did. >>>>> >>>>>> >>>>>>> #include <asm/page.h> >>>>>>> #include <asm/vdso.h> >>>>>>> #include <asm-generic/vmlinux.lds.h> >>>>>>> +#include <vdso/datapage.h> >>>>>>> +#include <asm/vdso/vsyscall.h> >>>>>> >>>>>> Possible to keep the asm/ together? >>>>> >>>>> Ack. >>>>> >>>>>> >>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive >>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes >>>>>> >>>>>> nonnce -> nonce >>>>> >>>>> Ack. >>>>> >>>>>> >>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) >>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) >>>>>>> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) >>>>>>> >>>>>>> TEST_GEN_PROGS := vdso_test_gettimeofday >>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) >>>>>>> TEST_GEN_PROGS += vdso_standalone_test_x86 >>>>>>> endif >>>>>>> TEST_GEN_PROGS += vdso_test_correctness >>>>>>> -ifeq ($(uname_M),x86_64) >>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) >>>>>>> TEST_GEN_PROGS += vdso_test_getrandom >>>>>>> ifneq ($(SODIUM),) >>>>>>> TEST_GEN_PROGS += vdso_test_chacha >>>>>> >>>>>> You'll need to add the symlink to get the chacha selftest running: >>>>>> >>>>>> $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso >>>>>> $ git add tools/arch/arm64/vdso >>>>>> >>>>>> Also, can you confirm that the chacha selftest runs and works? >>>>> >>>>> Yes, last time I has to built it manually since the Makefile machinery seem >>>>> to be broken even on x86_64. In a Ubuntu vm I have: >>>>> >>>>> tools/testing/selftests/vDSO$ make >>>>> CC vdso_test_gettimeofday >>>>> CC vdso_test_getcpu >>>>> CC vdso_test_abi >>>>> CC vdso_test_clock_getres >>>>> CC vdso_standalone_test_x86 >>>>> CC vdso_test_correctness >>>>> CC vdso_test_getrandom >>>>> CC vdso_test_chacha >>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7, >>>>> from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, >>>>> from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, >>>>> from /usr/include/limits.h:195, >>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205, >>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7, >>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34, >>>>> from /usr/include/sodium/export.h:7, >>>>> from /usr/include/sodium/crypto_stream_chacha20.h:14, >>>>> from vdso_test_chacha.c:6: >>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" >>>>> 99 | # if INT_MAX == 32767 >>>>> | ^~~~~~~ >>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" >>>>> 102 | # if INT_MAX == 2147483647 >>>>> | ^~~~~~~ >>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" >>>>> 126 | # if LONG_MAX == 2147483647 >>>>> | ^~~~~~~~ >>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1 >>>> >>>> You get that even with the latest random.git? I thought Christophe's >>>> patch fixed that, but maybe not and I should just remove the dependency >>>> on the sodium header instead. >>> >>> On x86_64 I tested with Linux master. With random.git it is a different issue: >>> >>> linux-git/tools/testing/selftests/vDSO$ make >>> CC vdso_test_gettimeofday >>> CC vdso_test_getcpu >>> CC vdso_test_abi >>> CC vdso_test_clock_getres >>> CC vdso_standalone_test_x86 >>> CC vdso_test_correctness >>> CC vdso_test_getrandom >>> CC vdso_test_chacha >>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main': >>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20' >>> collect2: error: ld returned 1 exit status >>> >>> If I move -lsodium to the end of the compiler command it works. >>> >>> >> >> Try a "make clean" maybe ? >> >> I have Fedora 38 and no build problem with latest random tree: >> >> $ make V=1 >> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday >> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_getcpu.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu >> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_abi.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi >> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_clock_getres.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres >> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector vdso_standalone_test_x86.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86 >> gcc -std=gnu99 -D_GNU_SOURCE= -ldl vdso_test_correctness.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness >> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi vdso_test_getrandom.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom >> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha >> $ > > It is a clean tree (git clean -dfx), and I take there is no need to build a kernel > prior hand. I meeant 'make clean' Right, I have not built any x86 kernel at the moment. Just : $ pwd /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO $ make clean then $ make V=1 Christophe
On 27/08/24 11:10, Christophe Leroy wrote: > > > Le 27/08/2024 à 16:01, Adhemerval Zanella Netto a écrit : >> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] >> >> On 27/08/24 11:00, Christophe Leroy wrote: >>> >>> >>> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit : >>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] >>>> >>>> On 27/08/24 10:34, Jason A. Donenfeld wrote: >>>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote: >>>>>> >>>>>> >>>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote: >>>>>>> Hi Adhemerval, >>>>>>> >>>>>>> Thanks for posting this! Exciting to have it here. >>>>>>> >>>>>>> Just some small nits for now: >>>>>>> >>>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: >>>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) >>>>>>>> +{ >>>>>>>> + register long int x8 asm ("x8") = __NR_getrandom; >>>>>>>> + register long int x0 asm ("x0") = (long int) buffer; >>>>>>>> + register long int x1 asm ("x1") = (long int) len; >>>>>>>> + register long int x2 asm ("x2") = (long int) flags; >>>>>>> >>>>>>> Usually it's written just as `long` or `unsigned long`, and likewise >>>>>>> with the cast. Also, no space after the cast. >>>>>> >>>>>> Ack. >>>>>> >>>>>>> >>>>>>>> +#define __VDSO_RND_DATA_OFFSET 480 >>>>>>> >>>>>>> This is the size of the data currently there? >>>>>> >>>>>> Yes, I used the same strategy x86 did. >>>>>> >>>>>>> >>>>>>>> #include <asm/page.h> >>>>>>>> #include <asm/vdso.h> >>>>>>>> #include <asm-generic/vmlinux.lds.h> >>>>>>>> +#include <vdso/datapage.h> >>>>>>>> +#include <asm/vdso/vsyscall.h> >>>>>>> >>>>>>> Possible to keep the asm/ together? >>>>>> >>>>>> Ack. >>>>>> >>>>>>> >>>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive >>>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes >>>>>>> >>>>>>> nonnce -> nonce >>>>>> >>>>>> Ack. >>>>>> >>>>>>> >>>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) >>>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) >>>>>>>> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) >>>>>>>> >>>>>>>> TEST_GEN_PROGS := vdso_test_gettimeofday >>>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) >>>>>>>> TEST_GEN_PROGS += vdso_standalone_test_x86 >>>>>>>> endif >>>>>>>> TEST_GEN_PROGS += vdso_test_correctness >>>>>>>> -ifeq ($(uname_M),x86_64) >>>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) >>>>>>>> TEST_GEN_PROGS += vdso_test_getrandom >>>>>>>> ifneq ($(SODIUM),) >>>>>>>> TEST_GEN_PROGS += vdso_test_chacha >>>>>>> >>>>>>> You'll need to add the symlink to get the chacha selftest running: >>>>>>> >>>>>>> $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso >>>>>>> $ git add tools/arch/arm64/vdso >>>>>>> >>>>>>> Also, can you confirm that the chacha selftest runs and works? >>>>>> >>>>>> Yes, last time I has to built it manually since the Makefile machinery seem >>>>>> to be broken even on x86_64. In a Ubuntu vm I have: >>>>>> >>>>>> tools/testing/selftests/vDSO$ make >>>>>> CC vdso_test_gettimeofday >>>>>> CC vdso_test_getcpu >>>>>> CC vdso_test_abi >>>>>> CC vdso_test_clock_getres >>>>>> CC vdso_standalone_test_x86 >>>>>> CC vdso_test_correctness >>>>>> CC vdso_test_getrandom >>>>>> CC vdso_test_chacha >>>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7, >>>>>> from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, >>>>>> from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, >>>>>> from /usr/include/limits.h:195, >>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205, >>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7, >>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34, >>>>>> from /usr/include/sodium/export.h:7, >>>>>> from /usr/include/sodium/crypto_stream_chacha20.h:14, >>>>>> from vdso_test_chacha.c:6: >>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" >>>>>> 99 | # if INT_MAX == 32767 >>>>>> | ^~~~~~~ >>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" >>>>>> 102 | # if INT_MAX == 2147483647 >>>>>> | ^~~~~~~ >>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" >>>>>> 126 | # if LONG_MAX == 2147483647 >>>>>> | ^~~~~~~~ >>>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1 >>>>> >>>>> You get that even with the latest random.git? I thought Christophe's >>>>> patch fixed that, but maybe not and I should just remove the dependency >>>>> on the sodium header instead. >>>> >>>> On x86_64 I tested with Linux master. With random.git it is a different issue: >>>> >>>> linux-git/tools/testing/selftests/vDSO$ make >>>> CC vdso_test_gettimeofday >>>> CC vdso_test_getcpu >>>> CC vdso_test_abi >>>> CC vdso_test_clock_getres >>>> CC vdso_standalone_test_x86 >>>> CC vdso_test_correctness >>>> CC vdso_test_getrandom >>>> CC vdso_test_chacha >>>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main': >>>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20' >>>> collect2: error: ld returned 1 exit status >>>> >>>> If I move -lsodium to the end of the compiler command it works. >>>> >>>> >>> >>> Try a "make clean" maybe ? >>> >>> I have Fedora 38 and no build problem with latest random tree: >>> >>> $ make V=1 >>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday >>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_getcpu.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu >>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_abi.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi >>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_clock_getres.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres >>> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector vdso_standalone_test_x86.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86 >>> gcc -std=gnu99 -D_GNU_SOURCE= -ldl vdso_test_correctness.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness >>> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi vdso_test_getrandom.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom >>> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha >>> $ >> >> It is a clean tree (git clean -dfx), and I take there is no need to build a kernel >> prior hand. > > I meeant 'make clean' > > > Right, I have not built any x86 kernel at the moment. > > Just : > $ pwd > /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO > > $ make clean > > then > > $ make V=1 The issue is Ubuntu linker is configure to use --as-needed by default, this patch fixes the issue: diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 10ffdda3f2fa..151baf650e4c 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -45,4 +45,4 @@ $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ -idirafter $(top_srcdir)/arch/$(ARCH)/include \ -idirafter $(top_srcdir)/include \ -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ - -Wa,--noexecstack $(SODIUM) + -Wa,--noexecstack -Wl,-no-as-needed $(SODIUM)
On Tue, Aug 27, 2024 at 11:14:27AM -0300, Adhemerval Zanella Netto wrote: > > > On 27/08/24 11:10, Christophe Leroy wrote: > > > > > > Le 27/08/2024 à 16:01, Adhemerval Zanella Netto a écrit : > >> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] > >> > >> On 27/08/24 11:00, Christophe Leroy wrote: > >>> > >>> > >>> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit : > >>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] > >>>> > >>>> On 27/08/24 10:34, Jason A. Donenfeld wrote: > >>>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote: > >>>>>> > >>>>>> > >>>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote: > >>>>>>> Hi Adhemerval, > >>>>>>> > >>>>>>> Thanks for posting this! Exciting to have it here. > >>>>>>> > >>>>>>> Just some small nits for now: > >>>>>>> > >>>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: > >>>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) > >>>>>>>> +{ > >>>>>>>> + register long int x8 asm ("x8") = __NR_getrandom; > >>>>>>>> + register long int x0 asm ("x0") = (long int) buffer; > >>>>>>>> + register long int x1 asm ("x1") = (long int) len; > >>>>>>>> + register long int x2 asm ("x2") = (long int) flags; > >>>>>>> > >>>>>>> Usually it's written just as `long` or `unsigned long`, and likewise > >>>>>>> with the cast. Also, no space after the cast. > >>>>>> > >>>>>> Ack. > >>>>>> > >>>>>>> > >>>>>>>> +#define __VDSO_RND_DATA_OFFSET 480 > >>>>>>> > >>>>>>> This is the size of the data currently there? > >>>>>> > >>>>>> Yes, I used the same strategy x86 did. > >>>>>> > >>>>>>> > >>>>>>>> #include <asm/page.h> > >>>>>>>> #include <asm/vdso.h> > >>>>>>>> #include <asm-generic/vmlinux.lds.h> > >>>>>>>> +#include <vdso/datapage.h> > >>>>>>>> +#include <asm/vdso/vsyscall.h> > >>>>>>> > >>>>>>> Possible to keep the asm/ together? > >>>>>> > >>>>>> Ack. > >>>>>> > >>>>>>> > >>>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive > >>>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes > >>>>>>> > >>>>>>> nonnce -> nonce > >>>>>> > >>>>>> Ack. > >>>>>> > >>>>>>> > >>>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) > >>>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) > >>>>>>>> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) > >>>>>>>> > >>>>>>>> TEST_GEN_PROGS := vdso_test_gettimeofday > >>>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) > >>>>>>>> TEST_GEN_PROGS += vdso_standalone_test_x86 > >>>>>>>> endif > >>>>>>>> TEST_GEN_PROGS += vdso_test_correctness > >>>>>>>> -ifeq ($(uname_M),x86_64) > >>>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) > >>>>>>>> TEST_GEN_PROGS += vdso_test_getrandom > >>>>>>>> ifneq ($(SODIUM),) > >>>>>>>> TEST_GEN_PROGS += vdso_test_chacha > >>>>>>> > >>>>>>> You'll need to add the symlink to get the chacha selftest running: > >>>>>>> > >>>>>>> $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso > >>>>>>> $ git add tools/arch/arm64/vdso > >>>>>>> > >>>>>>> Also, can you confirm that the chacha selftest runs and works? > >>>>>> > >>>>>> Yes, last time I has to built it manually since the Makefile machinery seem > >>>>>> to be broken even on x86_64. In a Ubuntu vm I have: > >>>>>> > >>>>>> tools/testing/selftests/vDSO$ make > >>>>>> CC vdso_test_gettimeofday > >>>>>> CC vdso_test_getcpu > >>>>>> CC vdso_test_abi > >>>>>> CC vdso_test_clock_getres > >>>>>> CC vdso_standalone_test_x86 > >>>>>> CC vdso_test_correctness > >>>>>> CC vdso_test_getrandom > >>>>>> CC vdso_test_chacha > >>>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7, > >>>>>> from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, > >>>>>> from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, > >>>>>> from /usr/include/limits.h:195, > >>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205, > >>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7, > >>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34, > >>>>>> from /usr/include/sodium/export.h:7, > >>>>>> from /usr/include/sodium/crypto_stream_chacha20.h:14, > >>>>>> from vdso_test_chacha.c:6: > >>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" > >>>>>> 99 | # if INT_MAX == 32767 > >>>>>> | ^~~~~~~ > >>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" > >>>>>> 102 | # if INT_MAX == 2147483647 > >>>>>> | ^~~~~~~ > >>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" > >>>>>> 126 | # if LONG_MAX == 2147483647 > >>>>>> | ^~~~~~~~ > >>>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1 > >>>>> > >>>>> You get that even with the latest random.git? I thought Christophe's > >>>>> patch fixed that, but maybe not and I should just remove the dependency > >>>>> on the sodium header instead. > >>>> > >>>> On x86_64 I tested with Linux master. With random.git it is a different issue: > >>>> > >>>> linux-git/tools/testing/selftests/vDSO$ make > >>>> CC vdso_test_gettimeofday > >>>> CC vdso_test_getcpu > >>>> CC vdso_test_abi > >>>> CC vdso_test_clock_getres > >>>> CC vdso_standalone_test_x86 > >>>> CC vdso_test_correctness > >>>> CC vdso_test_getrandom > >>>> CC vdso_test_chacha > >>>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main': > >>>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20' > >>>> collect2: error: ld returned 1 exit status > >>>> > >>>> If I move -lsodium to the end of the compiler command it works. > >>>> > >>>> > >>> > >>> Try a "make clean" maybe ? > >>> > >>> I have Fedora 38 and no build problem with latest random tree: > >>> > >>> $ make V=1 > >>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday > >>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_getcpu.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu > >>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_abi.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi > >>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_clock_getres.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres > >>> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector vdso_standalone_test_x86.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86 > >>> gcc -std=gnu99 -D_GNU_SOURCE= -ldl vdso_test_correctness.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness > >>> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi vdso_test_getrandom.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom > >>> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha > >>> $ > >> > >> It is a clean tree (git clean -dfx), and I take there is no need to build a kernel > >> prior hand. > > > > I meeant 'make clean' > > > > > > Right, I have not built any x86 kernel at the moment. > > > > Just : > > $ pwd > > /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO > > > > $ make clean > > > > then > > > > $ make V=1 > > The issue is Ubuntu linker is configure to use --as-needed by default, this > patch fixes the issue: > > diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile > index 10ffdda3f2fa..151baf650e4c 100644 > --- a/tools/testing/selftests/vDSO/Makefile > +++ b/tools/testing/selftests/vDSO/Makefile > @@ -45,4 +45,4 @@ $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ > -idirafter $(top_srcdir)/arch/$(ARCH)/include \ > -idirafter $(top_srcdir)/include \ > -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ > - -Wa,--noexecstack $(SODIUM) > + -Wa,--noexecstack -Wl,-no-as-needed $(SODIUM) Oh, it's an as-needed thing. In that case, does this fix it for you? diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 10ffdda3f2fa..834aa862ba2c 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) +SODIUM_LIBS := $(shell pkg-config --libs libsodium 2>/dev/null) +SODIUM_CFLAGS := $(shell pkg-config --cflags libsodium 2>/dev/null) TEST_GEN_PROGS := vdso_test_gettimeofday TEST_GEN_PROGS += vdso_test_getcpu @@ -13,7 +14,7 @@ endif TEST_GEN_PROGS += vdso_test_correctness ifeq ($(uname_M),x86_64) TEST_GEN_PROGS += vdso_test_getrandom -ifneq ($(SODIUM),) +ifneq ($(SODIUM_LIBS),) TEST_GEN_PROGS += vdso_test_chacha endif endif @@ -41,8 +42,9 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ -isystem $(top_srcdir)/include/uapi $(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(ARCH)/vdso/vgetrandom-chacha.S +$(OUTPUT)/vdso_test_chacha: LDLIBS += $(SODIUM_LIBS) $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ -idirafter $(top_srcdir)/arch/$(ARCH)/include \ -idirafter $(top_srcdir)/include \ -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ - -Wa,--noexecstack $(SODIUM) + -Wa,--noexecstack $(SODIUM_CFLAGS)
On 27/08/24 11:28, Jason A. Donenfeld wrote: > On Tue, Aug 27, 2024 at 11:14:27AM -0300, Adhemerval Zanella Netto wrote: >> >> >> On 27/08/24 11:10, Christophe Leroy wrote: >>> >>> >>> Le 27/08/2024 à 16:01, Adhemerval Zanella Netto a écrit : >>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] >>>> >>>> On 27/08/24 11:00, Christophe Leroy wrote: >>>>> >>>>> >>>>> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit : >>>>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] >>>>>> >>>>>> On 27/08/24 10:34, Jason A. Donenfeld wrote: >>>>>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote: >>>>>>>> >>>>>>>> >>>>>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote: >>>>>>>>> Hi Adhemerval, >>>>>>>>> >>>>>>>>> Thanks for posting this! Exciting to have it here. >>>>>>>>> >>>>>>>>> Just some small nits for now: >>>>>>>>> >>>>>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: >>>>>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) >>>>>>>>>> +{ >>>>>>>>>> + register long int x8 asm ("x8") = __NR_getrandom; >>>>>>>>>> + register long int x0 asm ("x0") = (long int) buffer; >>>>>>>>>> + register long int x1 asm ("x1") = (long int) len; >>>>>>>>>> + register long int x2 asm ("x2") = (long int) flags; >>>>>>>>> >>>>>>>>> Usually it's written just as `long` or `unsigned long`, and likewise >>>>>>>>> with the cast. Also, no space after the cast. >>>>>>>> >>>>>>>> Ack. >>>>>>>> >>>>>>>>> >>>>>>>>>> +#define __VDSO_RND_DATA_OFFSET 480 >>>>>>>>> >>>>>>>>> This is the size of the data currently there? >>>>>>>> >>>>>>>> Yes, I used the same strategy x86 did. >>>>>>>> >>>>>>>>> >>>>>>>>>> #include <asm/page.h> >>>>>>>>>> #include <asm/vdso.h> >>>>>>>>>> #include <asm-generic/vmlinux.lds.h> >>>>>>>>>> +#include <vdso/datapage.h> >>>>>>>>>> +#include <asm/vdso/vsyscall.h> >>>>>>>>> >>>>>>>>> Possible to keep the asm/ together? >>>>>>>> >>>>>>>> Ack. >>>>>>>> >>>>>>>>> >>>>>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive >>>>>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes >>>>>>>>> >>>>>>>>> nonnce -> nonce >>>>>>>> >>>>>>>> Ack. >>>>>>>> >>>>>>>>> >>>>>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) >>>>>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) >>>>>>>>>> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) >>>>>>>>>> >>>>>>>>>> TEST_GEN_PROGS := vdso_test_gettimeofday >>>>>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) >>>>>>>>>> TEST_GEN_PROGS += vdso_standalone_test_x86 >>>>>>>>>> endif >>>>>>>>>> TEST_GEN_PROGS += vdso_test_correctness >>>>>>>>>> -ifeq ($(uname_M),x86_64) >>>>>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) >>>>>>>>>> TEST_GEN_PROGS += vdso_test_getrandom >>>>>>>>>> ifneq ($(SODIUM),) >>>>>>>>>> TEST_GEN_PROGS += vdso_test_chacha >>>>>>>>> >>>>>>>>> You'll need to add the symlink to get the chacha selftest running: >>>>>>>>> >>>>>>>>> $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso >>>>>>>>> $ git add tools/arch/arm64/vdso >>>>>>>>> >>>>>>>>> Also, can you confirm that the chacha selftest runs and works? >>>>>>>> >>>>>>>> Yes, last time I has to built it manually since the Makefile machinery seem >>>>>>>> to be broken even on x86_64. In a Ubuntu vm I have: >>>>>>>> >>>>>>>> tools/testing/selftests/vDSO$ make >>>>>>>> CC vdso_test_gettimeofday >>>>>>>> CC vdso_test_getcpu >>>>>>>> CC vdso_test_abi >>>>>>>> CC vdso_test_clock_getres >>>>>>>> CC vdso_standalone_test_x86 >>>>>>>> CC vdso_test_correctness >>>>>>>> CC vdso_test_getrandom >>>>>>>> CC vdso_test_chacha >>>>>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7, >>>>>>>> from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, >>>>>>>> from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, >>>>>>>> from /usr/include/limits.h:195, >>>>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205, >>>>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7, >>>>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34, >>>>>>>> from /usr/include/sodium/export.h:7, >>>>>>>> from /usr/include/sodium/crypto_stream_chacha20.h:14, >>>>>>>> from vdso_test_chacha.c:6: >>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" >>>>>>>> 99 | # if INT_MAX == 32767 >>>>>>>> | ^~~~~~~ >>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" >>>>>>>> 102 | # if INT_MAX == 2147483647 >>>>>>>> | ^~~~~~~ >>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" >>>>>>>> 126 | # if LONG_MAX == 2147483647 >>>>>>>> | ^~~~~~~~ >>>>>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1 >>>>>>> >>>>>>> You get that even with the latest random.git? I thought Christophe's >>>>>>> patch fixed that, but maybe not and I should just remove the dependency >>>>>>> on the sodium header instead. >>>>>> >>>>>> On x86_64 I tested with Linux master. With random.git it is a different issue: >>>>>> >>>>>> linux-git/tools/testing/selftests/vDSO$ make >>>>>> CC vdso_test_gettimeofday >>>>>> CC vdso_test_getcpu >>>>>> CC vdso_test_abi >>>>>> CC vdso_test_clock_getres >>>>>> CC vdso_standalone_test_x86 >>>>>> CC vdso_test_correctness >>>>>> CC vdso_test_getrandom >>>>>> CC vdso_test_chacha >>>>>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main': >>>>>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20' >>>>>> collect2: error: ld returned 1 exit status >>>>>> >>>>>> If I move -lsodium to the end of the compiler command it works. >>>>>> >>>>>> >>>>> >>>>> Try a "make clean" maybe ? >>>>> >>>>> I have Fedora 38 and no build problem with latest random tree: >>>>> >>>>> $ make V=1 >>>>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday >>>>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_getcpu.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu >>>>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_abi.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi >>>>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_clock_getres.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres >>>>> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector vdso_standalone_test_x86.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86 >>>>> gcc -std=gnu99 -D_GNU_SOURCE= -ldl vdso_test_correctness.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness >>>>> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi vdso_test_getrandom.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom >>>>> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha >>>>> $ >>>> >>>> It is a clean tree (git clean -dfx), and I take there is no need to build a kernel >>>> prior hand. >>> >>> I meeant 'make clean' >>> >>> >>> Right, I have not built any x86 kernel at the moment. >>> >>> Just : >>> $ pwd >>> /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO >>> >>> $ make clean >>> >>> then >>> >>> $ make V=1 >> >> The issue is Ubuntu linker is configure to use --as-needed by default, this >> patch fixes the issue: >> >> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile >> index 10ffdda3f2fa..151baf650e4c 100644 >> --- a/tools/testing/selftests/vDSO/Makefile >> +++ b/tools/testing/selftests/vDSO/Makefile >> @@ -45,4 +45,4 @@ $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ >> -idirafter $(top_srcdir)/arch/$(ARCH)/include \ >> -idirafter $(top_srcdir)/include \ >> -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ >> - -Wa,--noexecstack $(SODIUM) >> + -Wa,--noexecstack -Wl,-no-as-needed $(SODIUM) > > Oh, it's an as-needed thing. In that case, does this fix it for you? > > diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile > index 10ffdda3f2fa..834aa862ba2c 100644 > --- a/tools/testing/selftests/vDSO/Makefile > +++ b/tools/testing/selftests/vDSO/Makefile > @@ -1,7 +1,8 @@ > # SPDX-License-Identifier: GPL-2.0 > uname_M := $(shell uname -m 2>/dev/null || echo not) > ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) > -SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) > +SODIUM_LIBS := $(shell pkg-config --libs libsodium 2>/dev/null) > +SODIUM_CFLAGS := $(shell pkg-config --cflags libsodium 2>/dev/null) > > TEST_GEN_PROGS := vdso_test_gettimeofday > TEST_GEN_PROGS += vdso_test_getcpu > @@ -13,7 +14,7 @@ endif > TEST_GEN_PROGS += vdso_test_correctness > ifeq ($(uname_M),x86_64) > TEST_GEN_PROGS += vdso_test_getrandom > -ifneq ($(SODIUM),) > +ifneq ($(SODIUM_LIBS),) > TEST_GEN_PROGS += vdso_test_chacha > endif > endif > @@ -41,8 +42,9 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ > -isystem $(top_srcdir)/include/uapi > > $(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(ARCH)/vdso/vgetrandom-chacha.S > +$(OUTPUT)/vdso_test_chacha: LDLIBS += $(SODIUM_LIBS) > $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ > -idirafter $(top_srcdir)/arch/$(ARCH)/include \ > -idirafter $(top_srcdir)/include \ > -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ > - -Wa,--noexecstack $(SODIUM) > + -Wa,--noexecstack $(SODIUM_CFLAGS) > Nops, 'pkg-config --cflags libsodium' is empty. The -Wl,-no-as-needed is simpler I think.
On Tue, Aug 27, 2024 at 4:30 PM Adhemerval Zanella Netto <adhemerval.zanella@linaro.org> wrote: > > > > On 27/08/24 11:28, Jason A. Donenfeld wrote: > > On Tue, Aug 27, 2024 at 11:14:27AM -0300, Adhemerval Zanella Netto wrote: > >> > >> > >> On 27/08/24 11:10, Christophe Leroy wrote: > >>> > >>> > >>> Le 27/08/2024 à 16:01, Adhemerval Zanella Netto a écrit : > >>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] > >>>> > >>>> On 27/08/24 11:00, Christophe Leroy wrote: > >>>>> > >>>>> > >>>>> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit : > >>>>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] > >>>>>> > >>>>>> On 27/08/24 10:34, Jason A. Donenfeld wrote: > >>>>>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote: > >>>>>>>> > >>>>>>>> > >>>>>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote: > >>>>>>>>> Hi Adhemerval, > >>>>>>>>> > >>>>>>>>> Thanks for posting this! Exciting to have it here. > >>>>>>>>> > >>>>>>>>> Just some small nits for now: > >>>>>>>>> > >>>>>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: > >>>>>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) > >>>>>>>>>> +{ > >>>>>>>>>> + register long int x8 asm ("x8") = __NR_getrandom; > >>>>>>>>>> + register long int x0 asm ("x0") = (long int) buffer; > >>>>>>>>>> + register long int x1 asm ("x1") = (long int) len; > >>>>>>>>>> + register long int x2 asm ("x2") = (long int) flags; > >>>>>>>>> > >>>>>>>>> Usually it's written just as `long` or `unsigned long`, and likewise > >>>>>>>>> with the cast. Also, no space after the cast. > >>>>>>>> > >>>>>>>> Ack. > >>>>>>>> > >>>>>>>>> > >>>>>>>>>> +#define __VDSO_RND_DATA_OFFSET 480 > >>>>>>>>> > >>>>>>>>> This is the size of the data currently there? > >>>>>>>> > >>>>>>>> Yes, I used the same strategy x86 did. > >>>>>>>> > >>>>>>>>> > >>>>>>>>>> #include <asm/page.h> > >>>>>>>>>> #include <asm/vdso.h> > >>>>>>>>>> #include <asm-generic/vmlinux.lds.h> > >>>>>>>>>> +#include <vdso/datapage.h> > >>>>>>>>>> +#include <asm/vdso/vsyscall.h> > >>>>>>>>> > >>>>>>>>> Possible to keep the asm/ together? > >>>>>>>> > >>>>>>>> Ack. > >>>>>>>> > >>>>>>>>> > >>>>>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive > >>>>>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes > >>>>>>>>> > >>>>>>>>> nonnce -> nonce > >>>>>>>> > >>>>>>>> Ack. > >>>>>>>> > >>>>>>>>> > >>>>>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) > >>>>>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) > >>>>>>>>>> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) > >>>>>>>>>> > >>>>>>>>>> TEST_GEN_PROGS := vdso_test_gettimeofday > >>>>>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) > >>>>>>>>>> TEST_GEN_PROGS += vdso_standalone_test_x86 > >>>>>>>>>> endif > >>>>>>>>>> TEST_GEN_PROGS += vdso_test_correctness > >>>>>>>>>> -ifeq ($(uname_M),x86_64) > >>>>>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) > >>>>>>>>>> TEST_GEN_PROGS += vdso_test_getrandom > >>>>>>>>>> ifneq ($(SODIUM),) > >>>>>>>>>> TEST_GEN_PROGS += vdso_test_chacha > >>>>>>>>> > >>>>>>>>> You'll need to add the symlink to get the chacha selftest running: > >>>>>>>>> > >>>>>>>>> $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso > >>>>>>>>> $ git add tools/arch/arm64/vdso > >>>>>>>>> > >>>>>>>>> Also, can you confirm that the chacha selftest runs and works? > >>>>>>>> > >>>>>>>> Yes, last time I has to built it manually since the Makefile machinery seem > >>>>>>>> to be broken even on x86_64. In a Ubuntu vm I have: > >>>>>>>> > >>>>>>>> tools/testing/selftests/vDSO$ make > >>>>>>>> CC vdso_test_gettimeofday > >>>>>>>> CC vdso_test_getcpu > >>>>>>>> CC vdso_test_abi > >>>>>>>> CC vdso_test_clock_getres > >>>>>>>> CC vdso_standalone_test_x86 > >>>>>>>> CC vdso_test_correctness > >>>>>>>> CC vdso_test_getrandom > >>>>>>>> CC vdso_test_chacha > >>>>>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7, > >>>>>>>> from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, > >>>>>>>> from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, > >>>>>>>> from /usr/include/limits.h:195, > >>>>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205, > >>>>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7, > >>>>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34, > >>>>>>>> from /usr/include/sodium/export.h:7, > >>>>>>>> from /usr/include/sodium/crypto_stream_chacha20.h:14, > >>>>>>>> from vdso_test_chacha.c:6: > >>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" > >>>>>>>> 99 | # if INT_MAX == 32767 > >>>>>>>> | ^~~~~~~ > >>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" > >>>>>>>> 102 | # if INT_MAX == 2147483647 > >>>>>>>> | ^~~~~~~ > >>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" > >>>>>>>> 126 | # if LONG_MAX == 2147483647 > >>>>>>>> | ^~~~~~~~ > >>>>>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1 > >>>>>>> > >>>>>>> You get that even with the latest random.git? I thought Christophe's > >>>>>>> patch fixed that, but maybe not and I should just remove the dependency > >>>>>>> on the sodium header instead. > >>>>>> > >>>>>> On x86_64 I tested with Linux master. With random.git it is a different issue: > >>>>>> > >>>>>> linux-git/tools/testing/selftests/vDSO$ make > >>>>>> CC vdso_test_gettimeofday > >>>>>> CC vdso_test_getcpu > >>>>>> CC vdso_test_abi > >>>>>> CC vdso_test_clock_getres > >>>>>> CC vdso_standalone_test_x86 > >>>>>> CC vdso_test_correctness > >>>>>> CC vdso_test_getrandom > >>>>>> CC vdso_test_chacha > >>>>>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main': > >>>>>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20' > >>>>>> collect2: error: ld returned 1 exit status > >>>>>> > >>>>>> If I move -lsodium to the end of the compiler command it works. > >>>>>> > >>>>>> > >>>>> > >>>>> Try a "make clean" maybe ? > >>>>> > >>>>> I have Fedora 38 and no build problem with latest random tree: > >>>>> > >>>>> $ make V=1 > >>>>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday > >>>>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_getcpu.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu > >>>>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_abi.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi > >>>>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_clock_getres.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres > >>>>> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector vdso_standalone_test_x86.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86 > >>>>> gcc -std=gnu99 -D_GNU_SOURCE= -ldl vdso_test_correctness.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness > >>>>> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi vdso_test_getrandom.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom > >>>>> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha > >>>>> $ > >>>> > >>>> It is a clean tree (git clean -dfx), and I take there is no need to build a kernel > >>>> prior hand. > >>> > >>> I meeant 'make clean' > >>> > >>> > >>> Right, I have not built any x86 kernel at the moment. > >>> > >>> Just : > >>> $ pwd > >>> /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO > >>> > >>> $ make clean > >>> > >>> then > >>> > >>> $ make V=1 > >> > >> The issue is Ubuntu linker is configure to use --as-needed by default, this > >> patch fixes the issue: > >> > >> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile > >> index 10ffdda3f2fa..151baf650e4c 100644 > >> --- a/tools/testing/selftests/vDSO/Makefile > >> +++ b/tools/testing/selftests/vDSO/Makefile > >> @@ -45,4 +45,4 @@ $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ > >> -idirafter $(top_srcdir)/arch/$(ARCH)/include \ > >> -idirafter $(top_srcdir)/include \ > >> -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ > >> - -Wa,--noexecstack $(SODIUM) > >> + -Wa,--noexecstack -Wl,-no-as-needed $(SODIUM) > > > > Oh, it's an as-needed thing. In that case, does this fix it for you? > > > > diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile > > index 10ffdda3f2fa..834aa862ba2c 100644 > > --- a/tools/testing/selftests/vDSO/Makefile > > +++ b/tools/testing/selftests/vDSO/Makefile > > @@ -1,7 +1,8 @@ > > # SPDX-License-Identifier: GPL-2.0 > > uname_M := $(shell uname -m 2>/dev/null || echo not) > > ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) > > -SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) > > +SODIUM_LIBS := $(shell pkg-config --libs libsodium 2>/dev/null) > > +SODIUM_CFLAGS := $(shell pkg-config --cflags libsodium 2>/dev/null) > > > > TEST_GEN_PROGS := vdso_test_gettimeofday > > TEST_GEN_PROGS += vdso_test_getcpu > > @@ -13,7 +14,7 @@ endif > > TEST_GEN_PROGS += vdso_test_correctness > > ifeq ($(uname_M),x86_64) > > TEST_GEN_PROGS += vdso_test_getrandom > > -ifneq ($(SODIUM),) > > +ifneq ($(SODIUM_LIBS),) > > TEST_GEN_PROGS += vdso_test_chacha > > endif > > endif > > @@ -41,8 +42,9 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ > > -isystem $(top_srcdir)/include/uapi > > > > $(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(ARCH)/vdso/vgetrandom-chacha.S > > +$(OUTPUT)/vdso_test_chacha: LDLIBS += $(SODIUM_LIBS) > > $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ > > -idirafter $(top_srcdir)/arch/$(ARCH)/include \ > > -idirafter $(top_srcdir)/include \ > > -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ > > - -Wa,--noexecstack $(SODIUM) > > + -Wa,--noexecstack $(SODIUM_CFLAGS) > > > > Nops, 'pkg-config --cflags libsodium' is empty. The -Wl,-no-as-needed is simpler > I think. The --cflags thing is for a different issue Ruoyao found. My intended fix here was the LDLIBS += $(SODIUM_LIBS) part, which moves the `-lsodium` closer to the end of the command line. But it still doesn't work? Surprising...
On 27/08/24 11:32, Jason A. Donenfeld wrote: > On Tue, Aug 27, 2024 at 4:30 PM Adhemerval Zanella Netto > <adhemerval.zanella@linaro.org> wrote: >> >> >> >> On 27/08/24 11:28, Jason A. Donenfeld wrote: >>> On Tue, Aug 27, 2024 at 11:14:27AM -0300, Adhemerval Zanella Netto wrote: >>>> >>>> >>>> On 27/08/24 11:10, Christophe Leroy wrote: >>>>> >>>>> >>>>> Le 27/08/2024 à 16:01, Adhemerval Zanella Netto a écrit : >>>>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] >>>>>> >>>>>> On 27/08/24 11:00, Christophe Leroy wrote: >>>>>>> >>>>>>> >>>>>>> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit : >>>>>>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ] >>>>>>>> >>>>>>>> On 27/08/24 10:34, Jason A. Donenfeld wrote: >>>>>>>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote: >>>>>>>>>> >>>>>>>>>> >>>>>>>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote: >>>>>>>>>>> Hi Adhemerval, >>>>>>>>>>> >>>>>>>>>>> Thanks for posting this! Exciting to have it here. >>>>>>>>>>> >>>>>>>>>>> Just some small nits for now: >>>>>>>>>>> >>>>>>>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote: >>>>>>>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) >>>>>>>>>>>> +{ >>>>>>>>>>>> + register long int x8 asm ("x8") = __NR_getrandom; >>>>>>>>>>>> + register long int x0 asm ("x0") = (long int) buffer; >>>>>>>>>>>> + register long int x1 asm ("x1") = (long int) len; >>>>>>>>>>>> + register long int x2 asm ("x2") = (long int) flags; >>>>>>>>>>> >>>>>>>>>>> Usually it's written just as `long` or `unsigned long`, and likewise >>>>>>>>>>> with the cast. Also, no space after the cast. >>>>>>>>>> >>>>>>>>>> Ack. >>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>>> +#define __VDSO_RND_DATA_OFFSET 480 >>>>>>>>>>> >>>>>>>>>>> This is the size of the data currently there? >>>>>>>>>> >>>>>>>>>> Yes, I used the same strategy x86 did. >>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>>> #include <asm/page.h> >>>>>>>>>>>> #include <asm/vdso.h> >>>>>>>>>>>> #include <asm-generic/vmlinux.lds.h> >>>>>>>>>>>> +#include <vdso/datapage.h> >>>>>>>>>>>> +#include <asm/vdso/vsyscall.h> >>>>>>>>>>> >>>>>>>>>>> Possible to keep the asm/ together? >>>>>>>>>> >>>>>>>>>> Ack. >>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive >>>>>>>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes >>>>>>>>>>> >>>>>>>>>>> nonnce -> nonce >>>>>>>>>> >>>>>>>>>> Ack. >>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) >>>>>>>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) >>>>>>>>>>>> SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) >>>>>>>>>>>> >>>>>>>>>>>> TEST_GEN_PROGS := vdso_test_gettimeofday >>>>>>>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) >>>>>>>>>>>> TEST_GEN_PROGS += vdso_standalone_test_x86 >>>>>>>>>>>> endif >>>>>>>>>>>> TEST_GEN_PROGS += vdso_test_correctness >>>>>>>>>>>> -ifeq ($(uname_M),x86_64) >>>>>>>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) >>>>>>>>>>>> TEST_GEN_PROGS += vdso_test_getrandom >>>>>>>>>>>> ifneq ($(SODIUM),) >>>>>>>>>>>> TEST_GEN_PROGS += vdso_test_chacha >>>>>>>>>>> >>>>>>>>>>> You'll need to add the symlink to get the chacha selftest running: >>>>>>>>>>> >>>>>>>>>>> $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso >>>>>>>>>>> $ git add tools/arch/arm64/vdso >>>>>>>>>>> >>>>>>>>>>> Also, can you confirm that the chacha selftest runs and works? >>>>>>>>>> >>>>>>>>>> Yes, last time I has to built it manually since the Makefile machinery seem >>>>>>>>>> to be broken even on x86_64. In a Ubuntu vm I have: >>>>>>>>>> >>>>>>>>>> tools/testing/selftests/vDSO$ make >>>>>>>>>> CC vdso_test_gettimeofday >>>>>>>>>> CC vdso_test_getcpu >>>>>>>>>> CC vdso_test_abi >>>>>>>>>> CC vdso_test_clock_getres >>>>>>>>>> CC vdso_standalone_test_x86 >>>>>>>>>> CC vdso_test_correctness >>>>>>>>>> CC vdso_test_getrandom >>>>>>>>>> CC vdso_test_chacha >>>>>>>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7, >>>>>>>>>> from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, >>>>>>>>>> from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, >>>>>>>>>> from /usr/include/limits.h:195, >>>>>>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205, >>>>>>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7, >>>>>>>>>> from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34, >>>>>>>>>> from /usr/include/sodium/export.h:7, >>>>>>>>>> from /usr/include/sodium/crypto_stream_chacha20.h:14, >>>>>>>>>> from vdso_test_chacha.c:6: >>>>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" >>>>>>>>>> 99 | # if INT_MAX == 32767 >>>>>>>>>> | ^~~~~~~ >>>>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" >>>>>>>>>> 102 | # if INT_MAX == 2147483647 >>>>>>>>>> | ^~~~~~~ >>>>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" >>>>>>>>>> 126 | # if LONG_MAX == 2147483647 >>>>>>>>>> | ^~~~~~~~ >>>>>>>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1 >>>>>>>>> >>>>>>>>> You get that even with the latest random.git? I thought Christophe's >>>>>>>>> patch fixed that, but maybe not and I should just remove the dependency >>>>>>>>> on the sodium header instead. >>>>>>>> >>>>>>>> On x86_64 I tested with Linux master. With random.git it is a different issue: >>>>>>>> >>>>>>>> linux-git/tools/testing/selftests/vDSO$ make >>>>>>>> CC vdso_test_gettimeofday >>>>>>>> CC vdso_test_getcpu >>>>>>>> CC vdso_test_abi >>>>>>>> CC vdso_test_clock_getres >>>>>>>> CC vdso_standalone_test_x86 >>>>>>>> CC vdso_test_correctness >>>>>>>> CC vdso_test_getrandom >>>>>>>> CC vdso_test_chacha >>>>>>>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main': >>>>>>>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20' >>>>>>>> collect2: error: ld returned 1 exit status >>>>>>>> >>>>>>>> If I move -lsodium to the end of the compiler command it works. >>>>>>>> >>>>>>>> >>>>>>> >>>>>>> Try a "make clean" maybe ? >>>>>>> >>>>>>> I have Fedora 38 and no build problem with latest random tree: >>>>>>> >>>>>>> $ make V=1 >>>>>>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday >>>>>>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_getcpu.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu >>>>>>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_abi.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi >>>>>>> gcc -std=gnu99 -D_GNU_SOURCE= vdso_test_clock_getres.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres >>>>>>> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector vdso_standalone_test_x86.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86 >>>>>>> gcc -std=gnu99 -D_GNU_SOURCE= -ldl vdso_test_correctness.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness >>>>>>> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi vdso_test_getrandom.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom >>>>>>> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha >>>>>>> $ >>>>>> >>>>>> It is a clean tree (git clean -dfx), and I take there is no need to build a kernel >>>>>> prior hand. >>>>> >>>>> I meeant 'make clean' >>>>> >>>>> >>>>> Right, I have not built any x86 kernel at the moment. >>>>> >>>>> Just : >>>>> $ pwd >>>>> /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO >>>>> >>>>> $ make clean >>>>> >>>>> then >>>>> >>>>> $ make V=1 >>>> >>>> The issue is Ubuntu linker is configure to use --as-needed by default, this >>>> patch fixes the issue: >>>> >>>> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile >>>> index 10ffdda3f2fa..151baf650e4c 100644 >>>> --- a/tools/testing/selftests/vDSO/Makefile >>>> +++ b/tools/testing/selftests/vDSO/Makefile >>>> @@ -45,4 +45,4 @@ $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ >>>> -idirafter $(top_srcdir)/arch/$(ARCH)/include \ >>>> -idirafter $(top_srcdir)/include \ >>>> -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ >>>> - -Wa,--noexecstack $(SODIUM) >>>> + -Wa,--noexecstack -Wl,-no-as-needed $(SODIUM) >>> >>> Oh, it's an as-needed thing. In that case, does this fix it for you? >>> >>> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile >>> index 10ffdda3f2fa..834aa862ba2c 100644 >>> --- a/tools/testing/selftests/vDSO/Makefile >>> +++ b/tools/testing/selftests/vDSO/Makefile >>> @@ -1,7 +1,8 @@ >>> # SPDX-License-Identifier: GPL-2.0 >>> uname_M := $(shell uname -m 2>/dev/null || echo not) >>> ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) >>> -SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) >>> +SODIUM_LIBS := $(shell pkg-config --libs libsodium 2>/dev/null) >>> +SODIUM_CFLAGS := $(shell pkg-config --cflags libsodium 2>/dev/null) >>> >>> TEST_GEN_PROGS := vdso_test_gettimeofday >>> TEST_GEN_PROGS += vdso_test_getcpu >>> @@ -13,7 +14,7 @@ endif >>> TEST_GEN_PROGS += vdso_test_correctness >>> ifeq ($(uname_M),x86_64) >>> TEST_GEN_PROGS += vdso_test_getrandom >>> -ifneq ($(SODIUM),) >>> +ifneq ($(SODIUM_LIBS),) >>> TEST_GEN_PROGS += vdso_test_chacha >>> endif >>> endif >>> @@ -41,8 +42,9 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ >>> -isystem $(top_srcdir)/include/uapi >>> >>> $(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(ARCH)/vdso/vgetrandom-chacha.S >>> +$(OUTPUT)/vdso_test_chacha: LDLIBS += $(SODIUM_LIBS) >>> $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ >>> -idirafter $(top_srcdir)/arch/$(ARCH)/include \ >>> -idirafter $(top_srcdir)/include \ >>> -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ >>> - -Wa,--noexecstack $(SODIUM) >>> + -Wa,--noexecstack $(SODIUM_CFLAGS) >>> >> >> Nops, 'pkg-config --cflags libsodium' is empty. The -Wl,-no-as-needed is simpler >> I think. > > The --cflags thing is for a different issue Ruoyao found. My intended > fix here was the LDLIBS += $(SODIUM_LIBS) part, which moves the > `-lsodium` closer to the end of the command line. But it still doesn't > work? Surprising... Oops, it does work indeed (my mistake here).
On Tue, Aug 27, 2024 at 11:02 AM Jason A. Donenfeld <Jason@zx2c4.com> wrote: > > On Tue, Aug 27, 2024 at 10:46:21AM +0200, Christophe Leroy wrote: > > > +/** > > > + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack. > > > + * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output. > > > + * @key: 32-byte input key. > > > + * @counter: 8-byte counter, read on input and updated on return. > > > + * @nblocks: Number of blocks to generate. > > > + * > > > + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write > > > + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data > > > + * leaking into forked child processes. > > > + */ > > > +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks); > > > > For Jason: We all redefine this prototype, should we have it in a > > central place, or do you expect some architecture to provide some static > > inline for it ? > > Given the doc comment and such, that would be nice. But I didn't see a > straight forward way of doing that when I tried before. If you want to > try and send another fixup commit, that'd be welcomed. I'll give it a shot.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b3fc891f1544..e3f4c5bf0661 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -237,6 +237,7 @@ config ARM64 select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_GENERIC_VDSO + select VDSO_GETRANDOM select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h new file mode 100644 index 000000000000..6e2b136813ca --- /dev/null +++ b/arch/arm64/include/asm/vdso/getrandom.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_VDSO_GETRANDOM_H +#define __ASM_VDSO_GETRANDOM_H + +#ifndef __ASSEMBLY__ + +#include <asm/unistd.h> +#include <vdso/datapage.h> + +/** + * getrandom_syscall - Invoke the getrandom() syscall. + * @buffer: Destination buffer to fill with random bytes. + * @len: Size of @buffer in bytes. + * @flags: Zero or more GRND_* flags. + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. + */ +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) +{ + register long int x8 asm ("x8") = __NR_getrandom; + register long int x0 asm ("x0") = (long int) buffer; + register long int x1 asm ("x1") = (long int) len; + register long int x2 asm ("x2") = (long int) flags; + + asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2)); + + return x0; +} + +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) +{ + return &_vdso_rng_data; +} + +/** + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack. + * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output. + * @key: 32-byte input key. + * @counter: 8-byte counter, read on input and updated on return. + * @nblocks: Number of blocks to generate. + * + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data + * leaking into forked child processes. + */ +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks); + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index f94b1457c117..7ddb2bc3b57b 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -2,6 +2,8 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H +#define __VDSO_RND_DATA_OFFSET 480 + #ifndef __ASSEMBLY__ #include <linux/timekeeper_internal.h> @@ -21,6 +23,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void) } #define __arch_get_k_vdso_data __arm64_get_k_vdso_data +static __always_inline +struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void) +{ + return (void *)__arm64_get_k_vdso_data() + __VDSO_RND_DATA_OFFSET; +} +#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data + static __always_inline void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) { diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d11da6461278..37dad3bb953a 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -9,7 +9,7 @@ # Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile -obj-vdso := vgettimeofday.o note.o sigreturn.o +obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg @@ -40,8 +40,13 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ -Wmissing-prototypes -Wmissing-declarations +CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ + $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ + -Wmissing-prototypes -Wmissing-declarations CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables +CFLAGS_vgetrandom.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 45354f2ddf70..f8dbcece20e2 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -12,6 +12,8 @@ #include <asm/page.h> #include <asm/vdso.h> #include <asm-generic/vmlinux.lds.h> +#include <vdso/datapage.h> +#include <asm/vdso/vsyscall.h> OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") OUTPUT_ARCH(aarch64) @@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64) SECTIONS { PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); + PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); #ifdef CONFIG_TIME_NS PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); #endif @@ -102,6 +105,7 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; + __kernel_getrandom; local: *; }; } diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..3fb9715dd6f0 --- /dev/null +++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/linkage.h> +#include <asm/cache.h> + + .text + +/* + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive + * number of blocks of output with nonnce 0, taking an input key and 8-bytes + * counter. Importantly does not spill to the stack. + * + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, + * const uint8_t *key, + * uint32_t *counter, + * size_t nblocks) + * + * x0: output bytes + * x1: 32-byte key input + * x2: 8-byte counter input/output + * x3: number of 64-byte block to write to output + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) + + /* v0 = "expand 32-byte k" */ + adr_l x8, CTES + ld1 {v5.4s}, [x8] + /* v1,v2 = key */ + ld1 { v6.4s, v7.4s }, [x1] + /* v3 = counter || zero noonce */ + ldr d8, [x2] + + adr_l x8, ONE + ldr q13, [x8] + + adr_l x10, ROT8 + ld1 {v12.4s}, [x10] +.Lblock: + /* copy state to auxiliary vectors for the final add after the permute. */ + mov v0.16b, v5.16b + mov v1.16b, v6.16b + mov v2.16b, v7.16b + mov v3.16b, v8.16b + + mov w4, 20 +.Lpermute: + /* + * Permute one 64-byte block where the state matrix is stored in the four NEON + * registers v0-v3. It performs matrix operations on four words in parallel, + * but requires shuffling to rearrange the words after each round. + */ + +.Ldoubleround: + /* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */ + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + rev32 v3.8h, v3.8h + + /* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */ + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #12 + sri v1.4s, v4.4s, #20 + + /* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */ + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + tbl v3.16b, {v3.16b}, v12.16b + + /* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */ + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #7 + sri v1.4s, v4.4s, #25 + + /* x1 = shuffle32(x1, MASK(0, 3, 2, 1)) */ + ext v1.16b, v1.16b, v1.16b, #4 + /* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */ + ext v2.16b, v2.16b, v2.16b, #8 + /* x3 = shuffle32(x3, MASK(2, 1, 0, 3)) */ + ext v3.16b, v3.16b, v3.16b, #12 + + /* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */ + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + rev32 v3.8h, v3.8h + + /* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */ + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #12 + sri v1.4s, v4.4s, #20 + + /* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */ + add v0.4s, v0.4s, v1.4s + eor v3.16b, v3.16b, v0.16b + tbl v3.16b, {v3.16b}, v12.16b + + /* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */ + add v2.4s, v2.4s, v3.4s + eor v4.16b, v1.16b, v2.16b + shl v1.4s, v4.4s, #7 + sri v1.4s, v4.4s, #25 + + /* x1 = shuffle32(x1, MASK(2, 1, 0, 3)) */ + ext v1.16b, v1.16b, v1.16b, #12 + /* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */ + ext v2.16b, v2.16b, v2.16b, #8 + /* x3 = shuffle32(x3, MASK(0, 3, 2, 1)) */ + ext v3.16b, v3.16b, v3.16b, #4 + + subs w4, w4, #2 + b.ne .Ldoubleround + + /* output0 = state0 + v0 */ + add v0.4s, v0.4s, v5.4s + /* output1 = state1 + v1 */ + add v1.4s, v1.4s, v6.4s + /* output2 = state2 + v2 */ + add v2.4s, v2.4s, v7.4s + /* output2 = state3 + v3 */ + add v3.4s, v3.4s, v8.4s + st1 { v0.4s - v3.4s }, [x0] + + /* ++copy3.counter */ + add d8, d8, d13 + + /* output += 64, --nblocks */ + add x0, x0, 64 + subs x3, x3, #1 + b.ne .Lblock + + /* counter = copy3.counter */ + str d8, [x2] + + /* Zero out the potentially sensitive regs, in case nothing uses these again. */ + eor v0.16b, v0.16b, v0.16b + eor v1.16b, v1.16b, v1.16b + eor v2.16b, v2.16b, v2.16b + eor v3.16b, v3.16b, v3.16b + eor v6.16b, v6.16b, v6.16b + eor v7.16b, v7.16b, v7.16b + ret +SYM_FUNC_END(__arch_chacha20_blocks_nostack) + + .section ".rodata", "a", %progbits + .align L1_CACHE_SHIFT + +CTES: .word 1634760805, 857760878, 2036477234, 1797285236 +ONE: .xword 1, 0 +ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f + +emit_aarch64_feature_1_and diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c new file mode 100644 index 000000000000..b6d6f4db3a98 --- /dev/null +++ b/arch/arm64/kernel/vdso/vgetrandom.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/types.h> +#include <linux/mm.h> + +#include "../../../../lib/vdso/getrandom.c" + +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); + +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) +{ + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); +} diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 10ffdda3f2fa..f07ea679a4cc 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/) SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) TEST_GEN_PROGS := vdso_test_gettimeofday @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) TEST_GEN_PROGS += vdso_standalone_test_x86 endif TEST_GEN_PROGS += vdso_test_correctness -ifeq ($(uname_M),x86_64) +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M))) TEST_GEN_PROGS += vdso_test_getrandom ifneq ($(SODIUM),) TEST_GEN_PROGS += vdso_test_chacha
Hook up the generic vDSO implementation to the aarch64 vDSO data page. The _vdso_rng_data required data is placed within the _vdso_data vvar page, by using a offset larger than the vdso_data (__VDSO_RND_DATA_OFFSET). The vDSO function requires a ChaCha20 implementation that does not write to the stack, and that can do an entire ChaCha20 permutation. The one provided is based on the current chacha-neon-core.S and uses NEON on the permute operation. Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/vdso/getrandom.h | 50 +++++++ arch/arm64/include/asm/vdso/vsyscall.h | 9 ++ arch/arm64/kernel/vdso/Makefile | 7 +- arch/arm64/kernel/vdso/vdso.lds.S | 4 + arch/arm64/kernel/vdso/vgetrandom-chacha.S | 153 +++++++++++++++++++++ arch/arm64/kernel/vdso/vgetrandom.c | 13 ++ tools/testing/selftests/vDSO/Makefile | 4 +- 8 files changed, 238 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/include/asm/vdso/getrandom.h create mode 100644 arch/arm64/kernel/vdso/vgetrandom-chacha.S create mode 100644 arch/arm64/kernel/vdso/vgetrandom.c