diff mbox series

aarch64: vdso: Wire up getrandom() vDSO implementation

Message ID 20240826181059.111536-1-adhemerval.zanella@linaro.org (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series aarch64: vdso: Wire up getrandom() vDSO implementation | expand

Commit Message

Adhemerval Zanella Netto Aug. 26, 2024, 6:10 p.m. UTC
Hook up the generic vDSO implementation to the aarch64 vDSO data page.
The _vdso_rng_data required data is placed within the _vdso_data vvar
page, by using a offset larger than the vdso_data
(__VDSO_RND_DATA_OFFSET).

The vDSO function requires a ChaCha20 implementation that does not
write to the stack, and that can do an entire ChaCha20 permutation.
The one provided is based on the current chacha-neon-core.S and uses NEON
on the permute operation.

Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
---
 arch/arm64/Kconfig                         |   1 +
 arch/arm64/include/asm/vdso/getrandom.h    |  50 +++++++
 arch/arm64/include/asm/vdso/vsyscall.h     |   9 ++
 arch/arm64/kernel/vdso/Makefile            |   7 +-
 arch/arm64/kernel/vdso/vdso.lds.S          |   4 +
 arch/arm64/kernel/vdso/vgetrandom-chacha.S | 153 +++++++++++++++++++++
 arch/arm64/kernel/vdso/vgetrandom.c        |  13 ++
 tools/testing/selftests/vDSO/Makefile      |   4 +-
 8 files changed, 238 insertions(+), 3 deletions(-)
 create mode 100644 arch/arm64/include/asm/vdso/getrandom.h
 create mode 100644 arch/arm64/kernel/vdso/vgetrandom-chacha.S
 create mode 100644 arch/arm64/kernel/vdso/vgetrandom.c

Comments

Jason A. Donenfeld Aug. 26, 2024, 8:27 p.m. UTC | #1
Hi Adhemerval,

Thanks for posting this! Exciting to have it here.

Just some small nits for now:

On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
> +{
> +	register long int x8 asm ("x8") = __NR_getrandom;
> +	register long int x0 asm ("x0") = (long int) buffer;
> +	register long int x1 asm ("x1") = (long int) len;
> +	register long int x2 asm ("x2") = (long int) flags;

Usually it's written just as `long` or `unsigned long`, and likewise
with the cast. Also, no space after the cast.

> +#define __VDSO_RND_DATA_OFFSET  480

This is the size of the data currently there?

>  #include <asm/page.h>
>  #include <asm/vdso.h>
>  #include <asm-generic/vmlinux.lds.h>
> +#include <vdso/datapage.h>
> +#include <asm/vdso/vsyscall.h>

Possible to keep the asm/ together?

> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes

nonnce -> nonce

> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
>  SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>  
>  TEST_GEN_PROGS := vdso_test_gettimeofday
> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>  TEST_GEN_PROGS += vdso_standalone_test_x86
>  endif
>  TEST_GEN_PROGS += vdso_test_correctness
> -ifeq ($(uname_M),x86_64)
> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
>  TEST_GEN_PROGS += vdso_test_getrandom
>  ifneq ($(SODIUM),)
>  TEST_GEN_PROGS += vdso_test_chacha

You'll need to add the symlink to get the chacha selftest running:

  $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
  $ git add tools/arch/arm64/vdso

Also, can you confirm that the chacha selftest runs and works?

Jason
Jason A. Donenfeld Aug. 26, 2024, 8:55 p.m. UTC | #2
On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
> +{
> +	register long int x8 asm ("x8") = __NR_getrandom;
> +	register long int x0 asm ("x0") = (long int) buffer;
> +	register long int x1 asm ("x1") = (long int) len;
> +	register long int x2 asm ("x2") = (long int) flags;
> +
> +	asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2));
> +
> +	return x0;
> +}

More generally, it might be best to follow the format used by
arch/arm64/include/asm/vdso/gettimeofday.h.
Christophe Leroy Aug. 27, 2024, 8:46 a.m. UTC | #3
Le 26/08/2024 à 20:10, Adhemerval Zanella a écrit :
> Hook up the generic vDSO implementation to the aarch64 vDSO data page.
> The _vdso_rng_data required data is placed within the _vdso_data vvar
> page, by using a offset larger than the vdso_data
> (__VDSO_RND_DATA_OFFSET).
> 
> The vDSO function requires a ChaCha20 implementation that does not
> write to the stack, and that can do an entire ChaCha20 permutation.
> The one provided is based on the current chacha-neon-core.S and uses NEON
> on the permute operation.
> 
> Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
> ---
>   arch/arm64/Kconfig                         |   1 +
>   arch/arm64/include/asm/vdso/getrandom.h    |  50 +++++++
>   arch/arm64/include/asm/vdso/vsyscall.h     |   9 ++
>   arch/arm64/kernel/vdso/Makefile            |   7 +-
>   arch/arm64/kernel/vdso/vdso.lds.S          |   4 +
>   arch/arm64/kernel/vdso/vgetrandom-chacha.S | 153 +++++++++++++++++++++
>   arch/arm64/kernel/vdso/vgetrandom.c        |  13 ++
>   tools/testing/selftests/vDSO/Makefile      |   4 +-
>   8 files changed, 238 insertions(+), 3 deletions(-)
>   create mode 100644 arch/arm64/include/asm/vdso/getrandom.h
>   create mode 100644 arch/arm64/kernel/vdso/vgetrandom-chacha.S
>   create mode 100644 arch/arm64/kernel/vdso/vgetrandom.c

Were you able to use selftests ? I think you are missing the symbolic 
link to vdso directory (assuming you are using latest master branch from 
https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git)

> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index b3fc891f1544..e3f4c5bf0661 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -237,6 +237,7 @@ config ARM64
>   	select HAVE_KPROBES
>   	select HAVE_KRETPROBES
>   	select HAVE_GENERIC_VDSO
> +	select VDSO_GETRANDOM

You don't keep things in alphabetical here order on ARM64 ?

>   	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
>   	select IRQ_DOMAIN
>   	select IRQ_FORCED_THREADING
> diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h
> new file mode 100644
> index 000000000000..6e2b136813ca
> --- /dev/null
> +++ b/arch/arm64/include/asm/vdso/getrandom.h
> @@ -0,0 +1,50 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_VDSO_GETRANDOM_H
> +#define __ASM_VDSO_GETRANDOM_H
> +
> +#ifndef __ASSEMBLY__
> +
> +#include <asm/unistd.h>
> +#include <vdso/datapage.h>
> +
> +/**
> + * getrandom_syscall - Invoke the getrandom() syscall.
> + * @buffer:	Destination buffer to fill with random bytes.
> + * @len:	Size of @buffer in bytes.
> + * @flags:	Zero or more GRND_* flags.
> + * Returns:	The number of random bytes written to @buffer, or a negative value indicating an error.
> + */
> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
> +{
> +	register long int x8 asm ("x8") = __NR_getrandom;
> +	register long int x0 asm ("x0") = (long int) buffer;
> +	register long int x1 asm ("x1") = (long int) len;
> +	register long int x2 asm ("x2") = (long int) flags;
> +
> +	asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2));
> +
> +	return x0;
> +}
> +
> +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
> +{
> +	return &_vdso_rng_data;
> +}
> +
> +/**
> + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack.
> + * @dst_bytes:	Destination buffer to hold @nblocks * 64 bytes of output.
> + * @key:	32-byte input key.
> + * @counter:	8-byte counter, read on input and updated on return.
> + * @nblocks:	Number of blocks to generate.
> + *
> + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write
> + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data
> + * leaking into forked child processes.
> + */
> +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks);

For Jason: We all redefine this prototype, should we have it in a 
central place, or do you expect some architecture to provide some static 
inline for it ?

> +
> +#endif /* !__ASSEMBLY__ */
> +
> +#endif /* __ASM_VDSO_GETRANDOM_H */
> diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
> index f94b1457c117..7ddb2bc3b57b 100644
> --- a/arch/arm64/include/asm/vdso/vsyscall.h
> +++ b/arch/arm64/include/asm/vdso/vsyscall.h
> @@ -2,6 +2,8 @@
>   #ifndef __ASM_VDSO_VSYSCALL_H
>   #define __ASM_VDSO_VSYSCALL_H
>   
> +#define __VDSO_RND_DATA_OFFSET  480
> +

How is this offset calculated or defined ? What happens if the other 
structures grow ? Could you use some sizeof(something) instead of 
something from asm-offsets if you also need it in ASM ?

>   #ifndef __ASSEMBLY__
>   
>   #include <linux/timekeeper_internal.h>
> @@ -21,6 +23,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void)
>   }
>   #define __arch_get_k_vdso_data __arm64_get_k_vdso_data
>   
> +static __always_inline
> +struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void)
> +{
> +	return (void *)__arm64_get_k_vdso_data() + __VDSO_RND_DATA_OFFSET;
> +}
> +#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data
> +
>   static __always_inline
>   void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
>   {
> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
> index d11da6461278..37dad3bb953a 100644
> --- a/arch/arm64/kernel/vdso/Makefile
> +++ b/arch/arm64/kernel/vdso/Makefile
> @@ -9,7 +9,7 @@
>   # Include the generic Makefile to check the built vdso.
>   include $(srctree)/lib/vdso/Makefile
>   
> -obj-vdso := vgettimeofday.o note.o sigreturn.o
> +obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o
>   
>   # Build rules
>   targets := $(obj-vdso) vdso.so vdso.so.dbg
> @@ -40,8 +40,13 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
>   				$(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
>   				$(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
>   				-Wmissing-prototypes -Wmissing-declarations
> +CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
> +			     $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
> +			     $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
> +			     -Wmissing-prototypes -Wmissing-declarations
>   
>   CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
> +CFLAGS_vgetrandom.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
>   
>   ifneq ($(c-gettimeofday-y),)
>     CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
> diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
> index 45354f2ddf70..f8dbcece20e2 100644
> --- a/arch/arm64/kernel/vdso/vdso.lds.S
> +++ b/arch/arm64/kernel/vdso/vdso.lds.S
> @@ -12,6 +12,8 @@
>   #include <asm/page.h>
>   #include <asm/vdso.h>
>   #include <asm-generic/vmlinux.lds.h>
> +#include <vdso/datapage.h>
> +#include <asm/vdso/vsyscall.h>
>   
>   OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
>   OUTPUT_ARCH(aarch64)
> @@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64)
>   SECTIONS
>   {
>   	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
> +	PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
>   #ifdef CONFIG_TIME_NS
>   	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
>   #endif
> @@ -102,6 +105,7 @@ VERSION
>   		__kernel_gettimeofday;
>   		__kernel_clock_gettime;
>   		__kernel_clock_getres;
> +		__kernel_getrandom;
>   	local: *;
>   	};
>   }
> diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S

[skipped ASM as I have not spoken ARM asm since I was at school in the 90's]

> diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c
> new file mode 100644
> index 000000000000..b6d6f4db3a98
> --- /dev/null
> +++ b/arch/arm64/kernel/vdso/vgetrandom.c
> @@ -0,0 +1,13 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include <linux/types.h>
> +#include <linux/mm.h>
> +
> +#include "../../../../lib/vdso/getrandom.c"

For gettimeofday ARM64 uses c-gettimeofday-y in the Makefile instead.

You should do the same with c-getrandom-y

> +
> +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
> +
> +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
> +{
> +	return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
> +}
> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
> index 10ffdda3f2fa..f07ea679a4cc 100644
> --- a/tools/testing/selftests/vDSO/Makefile
> +++ b/tools/testing/selftests/vDSO/Makefile
> @@ -1,6 +1,6 @@
>   # SPDX-License-Identifier: GPL-2.0
>   uname_M := $(shell uname -m 2>/dev/null || echo not)
> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)

>   SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>   
>   TEST_GEN_PROGS := vdso_test_gettimeofday
> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>   TEST_GEN_PROGS += vdso_standalone_test_x86
>   endif
>   TEST_GEN_PROGS += vdso_test_correctness
> -ifeq ($(uname_M),x86_64)
> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))

Does that work for you when you cross-compile ? For powerpc when I cross 
compile I still get the x86_64 from uname_M here, which is unexpected.

>   TEST_GEN_PROGS += vdso_test_getrandom
>   ifneq ($(SODIUM),)
>   TEST_GEN_PROGS += vdso_test_chacha

Christophe
Jason A. Donenfeld Aug. 27, 2024, 8:53 a.m. UTC | #4
On Tue, Aug 27, 2024 at 10:46:21AM +0200, Christophe Leroy wrote:
> > +/**
> > + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack.
> > + * @dst_bytes:	Destination buffer to hold @nblocks * 64 bytes of output.
> > + * @key:	32-byte input key.
> > + * @counter:	8-byte counter, read on input and updated on return.
> > + * @nblocks:	Number of blocks to generate.
> > + *
> > + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write
> > + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data
> > + * leaking into forked child processes.
> > + */
> > +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks);
> 
> For Jason: We all redefine this prototype, should we have it in a 
> central place, or do you expect some architecture to provide some static 
> inline for it ?

Given the doc comment and such, that would be nice. But I didn't see a
straight forward way of doing that when I tried before. If you want to
try and send another fixup commit, that'd be welcomed.

> > +#define __VDSO_RND_DATA_OFFSET  480
> > +
> 
> How is this offset calculated or defined ? What happens if the other 
> structures grow ? Could you use some sizeof(something) instead of 
> something from asm-offsets if you also need it in ASM ?

FYI, there's a similar static calculation like this in the x86 code:

+#if !defined(_SINGLE_DATA)
+#define _SINGLE_DATA
+DECLARE_VVAR_SINGLE(640, struct vdso_rng_data, _vdso_rng_data)
+#endif

> >   uname_M := $(shell uname -m 2>/dev/null || echo not)
> > -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
> > +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
> 
> >   SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
> >   
> >   TEST_GEN_PROGS := vdso_test_gettimeofday
> > @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
> >   TEST_GEN_PROGS += vdso_standalone_test_x86
> >   endif
> >   TEST_GEN_PROGS += vdso_test_correctness
> > -ifeq ($(uname_M),x86_64)
> > +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
> 
> Does that work for you when you cross-compile ? For powerpc when I cross 
> compile I still get the x86_64 from uname_M here, which is unexpected.

That sounds like a legitimate bug you're pointing out, but not one with
Adhemerval's code, right? Rather, it's something to be fixed inside of
these self tests as a whole?

Jason
Adhemerval Zanella Netto Aug. 27, 2024, 1:17 p.m. UTC | #5
On 26/08/24 17:27, Jason A. Donenfeld wrote:
> Hi Adhemerval,
> 
> Thanks for posting this! Exciting to have it here.
> 
> Just some small nits for now:
> 
> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
>> +{
>> +	register long int x8 asm ("x8") = __NR_getrandom;
>> +	register long int x0 asm ("x0") = (long int) buffer;
>> +	register long int x1 asm ("x1") = (long int) len;
>> +	register long int x2 asm ("x2") = (long int) flags;
> 
> Usually it's written just as `long` or `unsigned long`, and likewise
> with the cast. Also, no space after the cast.

Ack.

> 
>> +#define __VDSO_RND_DATA_OFFSET  480
> 
> This is the size of the data currently there?

Yes, I used the same strategy x86 did.

> 
>>  #include <asm/page.h>
>>  #include <asm/vdso.h>
>>  #include <asm-generic/vmlinux.lds.h>
>> +#include <vdso/datapage.h>
>> +#include <asm/vdso/vsyscall.h>
> 
> Possible to keep the asm/ together?

Ack.

> 
>> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
> 
> nonnce -> nonce

Ack.

> 
>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
>>  SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>>  
>>  TEST_GEN_PROGS := vdso_test_gettimeofday
>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>>  TEST_GEN_PROGS += vdso_standalone_test_x86
>>  endif
>>  TEST_GEN_PROGS += vdso_test_correctness
>> -ifeq ($(uname_M),x86_64)
>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
>>  TEST_GEN_PROGS += vdso_test_getrandom
>>  ifneq ($(SODIUM),)
>>  TEST_GEN_PROGS += vdso_test_chacha
> 
> You'll need to add the symlink to get the chacha selftest running:
> 
>   $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
>   $ git add tools/arch/arm64/vdso
> 
> Also, can you confirm that the chacha selftest runs and works?

Yes, last time I has to built it manually since the Makefile machinery seem 
to be broken even on x86_64.  In a Ubuntu vm I have:

tools/testing/selftests/vDSO$ make
  CC       vdso_test_gettimeofday
  CC       vdso_test_getcpu
  CC       vdso_test_abi
  CC       vdso_test_clock_getres
  CC       vdso_standalone_test_x86
  CC       vdso_test_correctness
  CC       vdso_test_getrandom
  CC       vdso_test_chacha
In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7,
                 from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38,
                 from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161,
                 from /usr/include/limits.h:195,
                 from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205,
                 from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7,
                 from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34,
                 from /usr/include/sodium/export.h:7,
                 from /usr/include/sodium/crypto_stream_chacha20.h:14,
                 from vdso_test_chacha.c:6:
/usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "("
   99 | # if INT_MAX == 32767
      |      ^~~~~~~
/usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "("
  102 | #  if INT_MAX == 2147483647
      |       ^~~~~~~
/usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "("
  126 | # if LONG_MAX == 2147483647
      |      ^~~~~~~~
make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1


I will try to figure out to be build it correctly, but I think it would be
better to vgetrandom-chacha.S with a different rule.
Jason A. Donenfeld Aug. 27, 2024, 1:34 p.m. UTC | #6
On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote:
> 
> 
> On 26/08/24 17:27, Jason A. Donenfeld wrote:
> > Hi Adhemerval,
> > 
> > Thanks for posting this! Exciting to have it here.
> > 
> > Just some small nits for now:
> > 
> > On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
> >> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
> >> +{
> >> +	register long int x8 asm ("x8") = __NR_getrandom;
> >> +	register long int x0 asm ("x0") = (long int) buffer;
> >> +	register long int x1 asm ("x1") = (long int) len;
> >> +	register long int x2 asm ("x2") = (long int) flags;
> > 
> > Usually it's written just as `long` or `unsigned long`, and likewise
> > with the cast. Also, no space after the cast.
> 
> Ack.
> 
> > 
> >> +#define __VDSO_RND_DATA_OFFSET  480
> > 
> > This is the size of the data currently there?
> 
> Yes, I used the same strategy x86 did.
> 
> > 
> >>  #include <asm/page.h>
> >>  #include <asm/vdso.h>
> >>  #include <asm-generic/vmlinux.lds.h>
> >> +#include <vdso/datapage.h>
> >> +#include <asm/vdso/vsyscall.h>
> > 
> > Possible to keep the asm/ together?
> 
> Ack.
> 
> > 
> >> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
> >> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
> > 
> > nonnce -> nonce
> 
> Ack.
> 
> > 
> >> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
> >> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
> >>  SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
> >>  
> >>  TEST_GEN_PROGS := vdso_test_gettimeofday
> >> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
> >>  TEST_GEN_PROGS += vdso_standalone_test_x86
> >>  endif
> >>  TEST_GEN_PROGS += vdso_test_correctness
> >> -ifeq ($(uname_M),x86_64)
> >> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
> >>  TEST_GEN_PROGS += vdso_test_getrandom
> >>  ifneq ($(SODIUM),)
> >>  TEST_GEN_PROGS += vdso_test_chacha
> > 
> > You'll need to add the symlink to get the chacha selftest running:
> > 
> >   $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
> >   $ git add tools/arch/arm64/vdso
> > 
> > Also, can you confirm that the chacha selftest runs and works?
> 
> Yes, last time I has to built it manually since the Makefile machinery seem 
> to be broken even on x86_64.  In a Ubuntu vm I have:
> 
> tools/testing/selftests/vDSO$ make
>   CC       vdso_test_gettimeofday
>   CC       vdso_test_getcpu
>   CC       vdso_test_abi
>   CC       vdso_test_clock_getres
>   CC       vdso_standalone_test_x86
>   CC       vdso_test_correctness
>   CC       vdso_test_getrandom
>   CC       vdso_test_chacha
> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7,
>                  from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38,
>                  from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161,
>                  from /usr/include/limits.h:195,
>                  from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205,
>                  from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7,
>                  from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34,
>                  from /usr/include/sodium/export.h:7,
>                  from /usr/include/sodium/crypto_stream_chacha20.h:14,
>                  from vdso_test_chacha.c:6:
> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "("
>    99 | # if INT_MAX == 32767
>       |      ^~~~~~~
> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "("
>   102 | #  if INT_MAX == 2147483647
>       |       ^~~~~~~
> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "("
>   126 | # if LONG_MAX == 2147483647
>       |      ^~~~~~~~
> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1

You get that even with the latest random.git? I thought Christophe's
patch fixed that, but maybe not and I should just remove the dependency
on the sodium header instead.

Jason
Adhemerval Zanella Netto Aug. 27, 2024, 1:39 p.m. UTC | #7
On 27/08/24 10:34, Jason A. Donenfeld wrote:
> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote:
>>
>>
>> On 26/08/24 17:27, Jason A. Donenfeld wrote:
>>> Hi Adhemerval,
>>>
>>> Thanks for posting this! Exciting to have it here.
>>>
>>> Just some small nits for now:
>>>
>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
>>>> +{
>>>> +	register long int x8 asm ("x8") = __NR_getrandom;
>>>> +	register long int x0 asm ("x0") = (long int) buffer;
>>>> +	register long int x1 asm ("x1") = (long int) len;
>>>> +	register long int x2 asm ("x2") = (long int) flags;
>>>
>>> Usually it's written just as `long` or `unsigned long`, and likewise
>>> with the cast. Also, no space after the cast.
>>
>> Ack.
>>
>>>
>>>> +#define __VDSO_RND_DATA_OFFSET  480
>>>
>>> This is the size of the data currently there?
>>
>> Yes, I used the same strategy x86 did.
>>
>>>
>>>>  #include <asm/page.h>
>>>>  #include <asm/vdso.h>
>>>>  #include <asm-generic/vmlinux.lds.h>
>>>> +#include <vdso/datapage.h>
>>>> +#include <asm/vdso/vsyscall.h>
>>>
>>> Possible to keep the asm/ together?
>>
>> Ack.
>>
>>>
>>>> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
>>>
>>> nonnce -> nonce
>>
>> Ack.
>>
>>>
>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
>>>>  SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>>>>  
>>>>  TEST_GEN_PROGS := vdso_test_gettimeofday
>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>>>>  TEST_GEN_PROGS += vdso_standalone_test_x86
>>>>  endif
>>>>  TEST_GEN_PROGS += vdso_test_correctness
>>>> -ifeq ($(uname_M),x86_64)
>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
>>>>  TEST_GEN_PROGS += vdso_test_getrandom
>>>>  ifneq ($(SODIUM),)
>>>>  TEST_GEN_PROGS += vdso_test_chacha
>>>
>>> You'll need to add the symlink to get the chacha selftest running:
>>>
>>>   $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
>>>   $ git add tools/arch/arm64/vdso
>>>
>>> Also, can you confirm that the chacha selftest runs and works?
>>
>> Yes, last time I has to built it manually since the Makefile machinery seem 
>> to be broken even on x86_64.  In a Ubuntu vm I have:
>>
>> tools/testing/selftests/vDSO$ make
>>   CC       vdso_test_gettimeofday
>>   CC       vdso_test_getcpu
>>   CC       vdso_test_abi
>>   CC       vdso_test_clock_getres
>>   CC       vdso_standalone_test_x86
>>   CC       vdso_test_correctness
>>   CC       vdso_test_getrandom
>>   CC       vdso_test_chacha
>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7,
>>                  from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38,
>>                  from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161,
>>                  from /usr/include/limits.h:195,
>>                  from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205,
>>                  from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7,
>>                  from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34,
>>                  from /usr/include/sodium/export.h:7,
>>                  from /usr/include/sodium/crypto_stream_chacha20.h:14,
>>                  from vdso_test_chacha.c:6:
>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "("
>>    99 | # if INT_MAX == 32767
>>       |      ^~~~~~~
>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "("
>>   102 | #  if INT_MAX == 2147483647
>>       |       ^~~~~~~
>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "("
>>   126 | # if LONG_MAX == 2147483647
>>       |      ^~~~~~~~
>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1
> 
> You get that even with the latest random.git? I thought Christophe's
> patch fixed that, but maybe not and I should just remove the dependency
> on the sodium header instead.

On x86_64 I tested with Linux master.  With random.git it is a different issue:

linux-git/tools/testing/selftests/vDSO$ make
  CC       vdso_test_gettimeofday
  CC       vdso_test_getcpu
  CC       vdso_test_abi
  CC       vdso_test_clock_getres
  CC       vdso_standalone_test_x86
  CC       vdso_test_correctness
  CC       vdso_test_getrandom
  CC       vdso_test_chacha
/usr/bin/ld: /tmp/ccKpjnSM.o: in function `main':
vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20'
collect2: error: ld returned 1 exit status

If I move -lsodium to the end of the compiler command it works.
Ard Biesheuvel Aug. 27, 2024, 1:51 p.m. UTC | #8
Hi Adhemerval,

...

> diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
> new file mode 100644
> index 000000000000..3fb9715dd6f0
> --- /dev/null
> +++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
> @@ -0,0 +1,153 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <linux/linkage.h>
> +#include <asm/cache.h>
> +
> +       .text
> +
> +/*
> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
> + * counter.  Importantly does not spill to the stack.
> + *
> + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
> + *                                    const uint8_t *key,
> + *                                    uint32_t *counter,
> + *                                    size_t nblocks)
> + *
> + *     x0: output bytes
> + *     x1: 32-byte key input
> + *     x2: 8-byte counter input/output
> + *     x3: number of 64-byte block to write to output
> + */
> +SYM_FUNC_START(__arch_chacha20_blocks_nostack)
> +

Shouldn't we preserve d8-d15 here?

> +       /* v0 = "expand 32-byte k" */
> +       adr_l           x8, CTES
> +       ld1             {v5.4s}, [x8]
> +       /* v1,v2 = key */
> +       ld1             { v6.4s, v7.4s }, [x1]
> +       /* v3 = counter || zero noonce  */
> +       ldr             d8, [x2]
> +
> +       adr_l           x8, ONE
> +       ldr             q13, [x8]
> +
> +       adr_l           x10, ROT8
> +       ld1             {v12.4s}, [x10]
> +.Lblock:
> +       /* copy state to auxiliary vectors for the final add after the permute.  */
> +       mov             v0.16b, v5.16b
> +       mov             v1.16b, v6.16b
> +       mov             v2.16b, v7.16b
> +       mov             v3.16b, v8.16b
> +
> +       mov             w4, 20
> +.Lpermute:
> +       /*
> +        * Permute one 64-byte block where the state matrix is stored in the four NEON
> +        * registers v0-v3.  It performs matrix operations on four words in parallel,
> +        * but requires shuffling to rearrange the words after each round.
> +        */
> +
> +.Ldoubleround:
> +       /* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */
> +       add             v0.4s, v0.4s, v1.4s
> +       eor             v3.16b, v3.16b, v0.16b
> +       rev32           v3.8h, v3.8h
> +
> +       /* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */
> +       add             v2.4s, v2.4s, v3.4s
> +       eor             v4.16b, v1.16b, v2.16b
> +       shl             v1.4s, v4.4s, #12
> +       sri             v1.4s, v4.4s, #20
> +
> +       /* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */
> +       add             v0.4s, v0.4s, v1.4s
> +       eor             v3.16b, v3.16b, v0.16b
> +       tbl             v3.16b, {v3.16b}, v12.16b
> +
> +       /* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */
> +       add             v2.4s, v2.4s, v3.4s
> +       eor             v4.16b, v1.16b, v2.16b
> +       shl             v1.4s, v4.4s, #7
> +       sri             v1.4s, v4.4s, #25
> +
> +       /* x1 = shuffle32(x1, MASK(0, 3, 2, 1)) */
> +       ext             v1.16b, v1.16b, v1.16b, #4
> +       /* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */
> +       ext             v2.16b, v2.16b, v2.16b, #8
> +       /* x3 = shuffle32(x3, MASK(2, 1, 0, 3)) */
> +       ext             v3.16b, v3.16b, v3.16b, #12
> +
> +       /* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */
> +       add             v0.4s, v0.4s, v1.4s
> +       eor             v3.16b, v3.16b, v0.16b
> +       rev32           v3.8h, v3.8h
> +
> +       /* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */
> +       add             v2.4s, v2.4s, v3.4s
> +       eor             v4.16b, v1.16b, v2.16b
> +       shl             v1.4s, v4.4s, #12
> +       sri             v1.4s, v4.4s, #20
> +
> +       /* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */
> +       add             v0.4s, v0.4s, v1.4s
> +       eor             v3.16b, v3.16b, v0.16b
> +       tbl             v3.16b, {v3.16b}, v12.16b
> +
> +       /* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */
> +       add             v2.4s, v2.4s, v3.4s
> +       eor             v4.16b, v1.16b, v2.16b
> +       shl             v1.4s, v4.4s, #7
> +       sri             v1.4s, v4.4s, #25
> +
> +       /* x1 = shuffle32(x1, MASK(2, 1, 0, 3)) */
> +       ext             v1.16b, v1.16b, v1.16b, #12
> +       /* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */
> +       ext             v2.16b, v2.16b, v2.16b, #8
> +       /* x3 = shuffle32(x3, MASK(0, 3, 2, 1)) */
> +       ext             v3.16b, v3.16b, v3.16b, #4
> +
> +       subs            w4, w4, #2
> +       b.ne            .Ldoubleround
> +
> +       /* output0 = state0 + v0 */
> +       add             v0.4s, v0.4s, v5.4s
> +       /* output1 = state1 + v1 */
> +       add             v1.4s, v1.4s, v6.4s
> +       /* output2 = state2 + v2 */
> +       add             v2.4s, v2.4s, v7.4s
> +       /* output2 = state3 + v3 */
> +       add             v3.4s, v3.4s, v8.4s
> +       st1             { v0.4s - v3.4s }, [x0]
> +
> +       /* ++copy3.counter */
> +       add             d8, d8, d13
> +
> +       /* output += 64, --nblocks */
> +       add             x0, x0, 64
> +       subs            x3, x3, #1
> +       b.ne            .Lblock
> +
> +       /* counter = copy3.counter */
> +       str             d8, [x2]
> +
> +       /* Zero out the potentially sensitive regs, in case nothing uses these again. */
> +       eor             v0.16b, v0.16b, v0.16b
> +       eor             v1.16b, v1.16b, v1.16b
> +       eor             v2.16b, v2.16b, v2.16b
> +       eor             v3.16b, v3.16b, v3.16b
> +       eor             v6.16b, v6.16b, v6.16b
> +       eor             v7.16b, v7.16b, v7.16b
> +       ret
> +SYM_FUNC_END(__arch_chacha20_blocks_nostack)
> +
> +        .section        ".rodata", "a", %progbits
> +        .align          L1_CACHE_SHIFT
> +
> +CTES:  .word           1634760805, 857760878,  2036477234, 1797285236
> +ONE:    .xword         1, 0
> +ROT8:  .word           0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
> +
> +emit_aarch64_feature_1_and
Christophe Leroy Aug. 27, 2024, 1:52 p.m. UTC | #9
Le 27/08/2024 à 15:17, Adhemerval Zanella Netto a écrit :
> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> 
> On 26/08/24 17:27, Jason A. Donenfeld wrote:
>> Hi Adhemerval,
>>
>> Thanks for posting this! Exciting to have it here.
>>
>> Just some small nits for now:
>>
>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
>>> +{
>>> +    register long int x8 asm ("x8") = __NR_getrandom;
>>> +    register long int x0 asm ("x0") = (long int) buffer;
>>> +    register long int x1 asm ("x1") = (long int) len;
>>> +    register long int x2 asm ("x2") = (long int) flags;
>>
>> Usually it's written just as `long` or `unsigned long`, and likewise
>> with the cast. Also, no space after the cast.
> 
> Ack.
> 
>>
>>> +#define __VDSO_RND_DATA_OFFSET  480
>>
>> This is the size of the data currently there?
> 
> Yes, I used the same strategy x86 did.
> 
>>
>>>   #include <asm/page.h>
>>>   #include <asm/vdso.h>
>>>   #include <asm-generic/vmlinux.lds.h>
>>> +#include <vdso/datapage.h>
>>> +#include <asm/vdso/vsyscall.h>
>>
>> Possible to keep the asm/ together?
> 
> Ack.
> 
>>
>>> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
>>
>> nonnce -> nonce
> 
> Ack.
> 
>>
>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
>>>   SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>>>
>>>   TEST_GEN_PROGS := vdso_test_gettimeofday
>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>>>   TEST_GEN_PROGS += vdso_standalone_test_x86
>>>   endif
>>>   TEST_GEN_PROGS += vdso_test_correctness
>>> -ifeq ($(uname_M),x86_64)
>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
>>>   TEST_GEN_PROGS += vdso_test_getrandom
>>>   ifneq ($(SODIUM),)
>>>   TEST_GEN_PROGS += vdso_test_chacha
>>
>> You'll need to add the symlink to get the chacha selftest running:
>>
>>    $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
>>    $ git add tools/arch/arm64/vdso
>>
>> Also, can you confirm that the chacha selftest runs and works?
> 
> Yes, last time I has to built it manually since the Makefile machinery seem
> to be broken even on x86_64.  In a Ubuntu vm I have:
> 
> tools/testing/selftests/vDSO$ make
>    CC       vdso_test_gettimeofday
>    CC       vdso_test_getcpu
>    CC       vdso_test_abi
>    CC       vdso_test_clock_getres
>    CC       vdso_standalone_test_x86
>    CC       vdso_test_correctness
>    CC       vdso_test_getrandom
>    CC       vdso_test_chacha
> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7,
>                   from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38,
>                   from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161,
>                   from /usr/include/limits.h:195,
>                   from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205,
>                   from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7,
>                   from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34,
>                   from /usr/include/sodium/export.h:7,
>                   from /usr/include/sodium/crypto_stream_chacha20.h:14,
>                   from vdso_test_chacha.c:6:
> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "("
>     99 | # if INT_MAX == 32767
>        |      ^~~~~~~
> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "("
>    102 | #  if INT_MAX == 2147483647
>        |       ^~~~~~~
> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "("
>    126 | # if LONG_MAX == 2147483647
>        |      ^~~~~~~~
> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1
> 
> 
> I will try to figure out to be build it correctly, but I think it would be
> better to vgetrandom-chacha.S with a different rule.

Hi, can you try with the following commit : 
https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git/commit/?id=e1af61334ade39a9af3031b7189f9acb419648a4

Thanks
Christophe
Mark Rutland Aug. 27, 2024, 2 p.m. UTC | #10
On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
> Hook up the generic vDSO implementation to the aarch64 vDSO data page.
> The _vdso_rng_data required data is placed within the _vdso_data vvar
> page, by using a offset larger than the vdso_data
> (__VDSO_RND_DATA_OFFSET).
> 
> The vDSO function requires a ChaCha20 implementation that does not
> write to the stack, and that can do an entire ChaCha20 permutation.
> The one provided is based on the current chacha-neon-core.S and uses NEON
> on the permute operation.

Is there a fallback for when NEON isn't present? The kernel supports
some (deeply embedded) implementations where NEON is not present, and 
AFAICT this will UNDEF on those machines.

Mark.

> Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
> ---
>  arch/arm64/Kconfig                         |   1 +
>  arch/arm64/include/asm/vdso/getrandom.h    |  50 +++++++
>  arch/arm64/include/asm/vdso/vsyscall.h     |   9 ++
>  arch/arm64/kernel/vdso/Makefile            |   7 +-
>  arch/arm64/kernel/vdso/vdso.lds.S          |   4 +
>  arch/arm64/kernel/vdso/vgetrandom-chacha.S | 153 +++++++++++++++++++++
>  arch/arm64/kernel/vdso/vgetrandom.c        |  13 ++
>  tools/testing/selftests/vDSO/Makefile      |   4 +-
>  8 files changed, 238 insertions(+), 3 deletions(-)
>  create mode 100644 arch/arm64/include/asm/vdso/getrandom.h
>  create mode 100644 arch/arm64/kernel/vdso/vgetrandom-chacha.S
>  create mode 100644 arch/arm64/kernel/vdso/vgetrandom.c
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index b3fc891f1544..e3f4c5bf0661 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -237,6 +237,7 @@ config ARM64
>  	select HAVE_KPROBES
>  	select HAVE_KRETPROBES
>  	select HAVE_GENERIC_VDSO
> +	select VDSO_GETRANDOM
>  	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
>  	select IRQ_DOMAIN
>  	select IRQ_FORCED_THREADING
> diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h
> new file mode 100644
> index 000000000000..6e2b136813ca
> --- /dev/null
> +++ b/arch/arm64/include/asm/vdso/getrandom.h
> @@ -0,0 +1,50 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_VDSO_GETRANDOM_H
> +#define __ASM_VDSO_GETRANDOM_H
> +
> +#ifndef __ASSEMBLY__
> +
> +#include <asm/unistd.h>
> +#include <vdso/datapage.h>
> +
> +/**
> + * getrandom_syscall - Invoke the getrandom() syscall.
> + * @buffer:	Destination buffer to fill with random bytes.
> + * @len:	Size of @buffer in bytes.
> + * @flags:	Zero or more GRND_* flags.
> + * Returns:	The number of random bytes written to @buffer, or a negative value indicating an error.
> + */
> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
> +{
> +	register long int x8 asm ("x8") = __NR_getrandom;
> +	register long int x0 asm ("x0") = (long int) buffer;
> +	register long int x1 asm ("x1") = (long int) len;
> +	register long int x2 asm ("x2") = (long int) flags;
> +
> +	asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2));
> +
> +	return x0;
> +}
> +
> +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
> +{
> +	return &_vdso_rng_data;
> +}
> +
> +/**
> + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack.
> + * @dst_bytes:	Destination buffer to hold @nblocks * 64 bytes of output.
> + * @key:	32-byte input key.
> + * @counter:	8-byte counter, read on input and updated on return.
> + * @nblocks:	Number of blocks to generate.
> + *
> + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write
> + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data
> + * leaking into forked child processes.
> + */
> +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks);
> +
> +#endif /* !__ASSEMBLY__ */
> +
> +#endif /* __ASM_VDSO_GETRANDOM_H */
> diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
> index f94b1457c117..7ddb2bc3b57b 100644
> --- a/arch/arm64/include/asm/vdso/vsyscall.h
> +++ b/arch/arm64/include/asm/vdso/vsyscall.h
> @@ -2,6 +2,8 @@
>  #ifndef __ASM_VDSO_VSYSCALL_H
>  #define __ASM_VDSO_VSYSCALL_H
>  
> +#define __VDSO_RND_DATA_OFFSET  480
> +
>  #ifndef __ASSEMBLY__
>  
>  #include <linux/timekeeper_internal.h>
> @@ -21,6 +23,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void)
>  }
>  #define __arch_get_k_vdso_data __arm64_get_k_vdso_data
>  
> +static __always_inline
> +struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void)
> +{
> +	return (void *)__arm64_get_k_vdso_data() + __VDSO_RND_DATA_OFFSET;
> +}
> +#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data
> +
>  static __always_inline
>  void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
>  {
> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
> index d11da6461278..37dad3bb953a 100644
> --- a/arch/arm64/kernel/vdso/Makefile
> +++ b/arch/arm64/kernel/vdso/Makefile
> @@ -9,7 +9,7 @@
>  # Include the generic Makefile to check the built vdso.
>  include $(srctree)/lib/vdso/Makefile
>  
> -obj-vdso := vgettimeofday.o note.o sigreturn.o
> +obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o
>  
>  # Build rules
>  targets := $(obj-vdso) vdso.so vdso.so.dbg
> @@ -40,8 +40,13 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
>  				$(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
>  				$(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
>  				-Wmissing-prototypes -Wmissing-declarations
> +CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
> +			     $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
> +			     $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
> +			     -Wmissing-prototypes -Wmissing-declarations
>  
>  CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
> +CFLAGS_vgetrandom.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
>  
>  ifneq ($(c-gettimeofday-y),)
>    CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
> diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
> index 45354f2ddf70..f8dbcece20e2 100644
> --- a/arch/arm64/kernel/vdso/vdso.lds.S
> +++ b/arch/arm64/kernel/vdso/vdso.lds.S
> @@ -12,6 +12,8 @@
>  #include <asm/page.h>
>  #include <asm/vdso.h>
>  #include <asm-generic/vmlinux.lds.h>
> +#include <vdso/datapage.h>
> +#include <asm/vdso/vsyscall.h>
>  
>  OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
>  OUTPUT_ARCH(aarch64)
> @@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64)
>  SECTIONS
>  {
>  	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
> +	PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
>  #ifdef CONFIG_TIME_NS
>  	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
>  #endif
> @@ -102,6 +105,7 @@ VERSION
>  		__kernel_gettimeofday;
>  		__kernel_clock_gettime;
>  		__kernel_clock_getres;
> +		__kernel_getrandom;
>  	local: *;
>  	};
>  }
> diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
> new file mode 100644
> index 000000000000..3fb9715dd6f0
> --- /dev/null
> +++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
> @@ -0,0 +1,153 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <linux/linkage.h>
> +#include <asm/cache.h>
> +
> +	.text
> +
> +/*
> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
> + * counter.  Importantly does not spill to the stack.
> + *
> + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
> + *				       const uint8_t *key,
> + * 				       uint32_t *counter,
> + *				       size_t nblocks)
> + *
> + * 	x0: output bytes
> + *	x1: 32-byte key input
> + *	x2: 8-byte counter input/output
> + *	x3: number of 64-byte block to write to output
> + */
> +SYM_FUNC_START(__arch_chacha20_blocks_nostack)
> +
> +	/* v0 = "expand 32-byte k" */
> +	adr_l		x8, CTES
> +	ld1		{v5.4s}, [x8]
> +	/* v1,v2 = key */
> +	ld1		{ v6.4s, v7.4s }, [x1]
> +	/* v3 = counter || zero noonce  */
> +	ldr		d8, [x2]
> +
> +	adr_l		x8, ONE
> +	ldr		q13, [x8]
> +
> +	adr_l		x10, ROT8
> +	ld1		{v12.4s}, [x10]
> +.Lblock:
> +	/* copy state to auxiliary vectors for the final add after the permute.  */
> +	mov		v0.16b, v5.16b
> +	mov		v1.16b, v6.16b
> +	mov		v2.16b, v7.16b
> +	mov		v3.16b, v8.16b
> +
> +	mov		w4, 20
> +.Lpermute:
> +	/*
> +	 * Permute one 64-byte block where the state matrix is stored in the four NEON
> +	 * registers v0-v3.  It performs matrix operations on four words in parallel,
> +	 * but requires shuffling to rearrange the words after each round.
> +	 */
> +
> +.Ldoubleround:
> +	/* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */
> +	add		v0.4s, v0.4s, v1.4s
> +	eor		v3.16b, v3.16b, v0.16b
> +	rev32		v3.8h, v3.8h
> +
> +	/* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */
> +	add		v2.4s, v2.4s, v3.4s
> +	eor		v4.16b, v1.16b, v2.16b
> +	shl		v1.4s, v4.4s, #12
> +	sri		v1.4s, v4.4s, #20
> +
> +	/* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */
> +	add		v0.4s, v0.4s, v1.4s
> +	eor		v3.16b, v3.16b, v0.16b
> +	tbl		v3.16b, {v3.16b}, v12.16b
> +
> +	/* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */
> +	add		v2.4s, v2.4s, v3.4s
> +	eor		v4.16b, v1.16b, v2.16b
> +	shl		v1.4s, v4.4s, #7
> +	sri		v1.4s, v4.4s, #25
> +
> +	/* x1 = shuffle32(x1, MASK(0, 3, 2, 1)) */
> +	ext		v1.16b, v1.16b, v1.16b, #4
> +	/* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */
> +	ext		v2.16b, v2.16b, v2.16b, #8
> +	/* x3 = shuffle32(x3, MASK(2, 1, 0, 3)) */
> +	ext		v3.16b, v3.16b, v3.16b, #12
> +
> +	/* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */
> +	add		v0.4s, v0.4s, v1.4s
> +	eor		v3.16b, v3.16b, v0.16b
> +	rev32		v3.8h, v3.8h
> +
> +	/* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */
> +	add		v2.4s, v2.4s, v3.4s
> +	eor		v4.16b, v1.16b, v2.16b
> +	shl		v1.4s, v4.4s, #12
> +	sri		v1.4s, v4.4s, #20
> +
> +	/* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */
> +	add		v0.4s, v0.4s, v1.4s
> +	eor		v3.16b, v3.16b, v0.16b
> +	tbl		v3.16b, {v3.16b}, v12.16b
> +
> +	/* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */
> +	add		v2.4s, v2.4s, v3.4s
> +	eor		v4.16b, v1.16b, v2.16b
> +	shl		v1.4s, v4.4s, #7
> +	sri		v1.4s, v4.4s, #25
> +
> +	/* x1 = shuffle32(x1, MASK(2, 1, 0, 3)) */
> +	ext		v1.16b, v1.16b, v1.16b, #12
> +	/* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */
> +	ext		v2.16b, v2.16b, v2.16b, #8
> +	/* x3 = shuffle32(x3, MASK(0, 3, 2, 1)) */
> +	ext		v3.16b, v3.16b, v3.16b, #4
> +
> +	subs		w4, w4, #2
> +	b.ne		.Ldoubleround
> +
> +	/* output0 = state0 + v0 */
> +	add		v0.4s, v0.4s, v5.4s
> +	/* output1 = state1 + v1 */
> +	add		v1.4s, v1.4s, v6.4s
> +	/* output2 = state2 + v2 */
> +	add		v2.4s, v2.4s, v7.4s
> +	/* output2 = state3 + v3 */
> +	add		v3.4s, v3.4s, v8.4s
> +	st1		{ v0.4s - v3.4s }, [x0]
> +
> +	/* ++copy3.counter */
> +	add		d8, d8, d13
> +
> +	/* output += 64, --nblocks */
> +	add		x0, x0, 64
> +	subs		x3, x3, #1
> +	b.ne		.Lblock
> +
> +	/* counter = copy3.counter */
> +	str		d8, [x2]
> +
> +	/* Zero out the potentially sensitive regs, in case nothing uses these again. */
> +	eor		v0.16b, v0.16b, v0.16b
> +	eor		v1.16b, v1.16b, v1.16b
> +	eor		v2.16b, v2.16b, v2.16b
> +	eor		v3.16b, v3.16b, v3.16b
> +	eor		v6.16b, v6.16b, v6.16b
> +	eor		v7.16b, v7.16b, v7.16b
> +	ret
> +SYM_FUNC_END(__arch_chacha20_blocks_nostack)
> +
> +        .section        ".rodata", "a", %progbits
> +        .align          L1_CACHE_SHIFT
> +
> +CTES:	.word		1634760805, 857760878, 	2036477234, 1797285236
> +ONE:    .xword		1, 0
> +ROT8:	.word		0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
> +
> +emit_aarch64_feature_1_and
> diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c
> new file mode 100644
> index 000000000000..b6d6f4db3a98
> --- /dev/null
> +++ b/arch/arm64/kernel/vdso/vgetrandom.c
> @@ -0,0 +1,13 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include <linux/types.h>
> +#include <linux/mm.h>
> +
> +#include "../../../../lib/vdso/getrandom.c"
> +
> +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
> +
> +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
> +{
> +	return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
> +}
> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
> index 10ffdda3f2fa..f07ea679a4cc 100644
> --- a/tools/testing/selftests/vDSO/Makefile
> +++ b/tools/testing/selftests/vDSO/Makefile
> @@ -1,6 +1,6 @@
>  # SPDX-License-Identifier: GPL-2.0
>  uname_M := $(shell uname -m 2>/dev/null || echo not)
> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
>  SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>  
>  TEST_GEN_PROGS := vdso_test_gettimeofday
> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>  TEST_GEN_PROGS += vdso_standalone_test_x86
>  endif
>  TEST_GEN_PROGS += vdso_test_correctness
> -ifeq ($(uname_M),x86_64)
> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
>  TEST_GEN_PROGS += vdso_test_getrandom
>  ifneq ($(SODIUM),)
>  TEST_GEN_PROGS += vdso_test_chacha
> -- 
> 2.43.0
> 
>
Christophe Leroy Aug. 27, 2024, 2 p.m. UTC | #11
Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit :
> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> 
> On 27/08/24 10:34, Jason A. Donenfeld wrote:
>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote:
>>>
>>>
>>> On 26/08/24 17:27, Jason A. Donenfeld wrote:
>>>> Hi Adhemerval,
>>>>
>>>> Thanks for posting this! Exciting to have it here.
>>>>
>>>> Just some small nits for now:
>>>>
>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
>>>>> +{
>>>>> +  register long int x8 asm ("x8") = __NR_getrandom;
>>>>> +  register long int x0 asm ("x0") = (long int) buffer;
>>>>> +  register long int x1 asm ("x1") = (long int) len;
>>>>> +  register long int x2 asm ("x2") = (long int) flags;
>>>>
>>>> Usually it's written just as `long` or `unsigned long`, and likewise
>>>> with the cast. Also, no space after the cast.
>>>
>>> Ack.
>>>
>>>>
>>>>> +#define __VDSO_RND_DATA_OFFSET  480
>>>>
>>>> This is the size of the data currently there?
>>>
>>> Yes, I used the same strategy x86 did.
>>>
>>>>
>>>>>   #include <asm/page.h>
>>>>>   #include <asm/vdso.h>
>>>>>   #include <asm-generic/vmlinux.lds.h>
>>>>> +#include <vdso/datapage.h>
>>>>> +#include <asm/vdso/vsyscall.h>
>>>>
>>>> Possible to keep the asm/ together?
>>>
>>> Ack.
>>>
>>>>
>>>>> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
>>>>
>>>> nonnce -> nonce
>>>
>>> Ack.
>>>
>>>>
>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
>>>>>   SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>>>>>
>>>>>   TEST_GEN_PROGS := vdso_test_gettimeofday
>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>>>>>   TEST_GEN_PROGS += vdso_standalone_test_x86
>>>>>   endif
>>>>>   TEST_GEN_PROGS += vdso_test_correctness
>>>>> -ifeq ($(uname_M),x86_64)
>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
>>>>>   TEST_GEN_PROGS += vdso_test_getrandom
>>>>>   ifneq ($(SODIUM),)
>>>>>   TEST_GEN_PROGS += vdso_test_chacha
>>>>
>>>> You'll need to add the symlink to get the chacha selftest running:
>>>>
>>>>    $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
>>>>    $ git add tools/arch/arm64/vdso
>>>>
>>>> Also, can you confirm that the chacha selftest runs and works?
>>>
>>> Yes, last time I has to built it manually since the Makefile machinery seem
>>> to be broken even on x86_64.  In a Ubuntu vm I have:
>>>
>>> tools/testing/selftests/vDSO$ make
>>>    CC       vdso_test_gettimeofday
>>>    CC       vdso_test_getcpu
>>>    CC       vdso_test_abi
>>>    CC       vdso_test_clock_getres
>>>    CC       vdso_standalone_test_x86
>>>    CC       vdso_test_correctness
>>>    CC       vdso_test_getrandom
>>>    CC       vdso_test_chacha
>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7,
>>>                   from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38,
>>>                   from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161,
>>>                   from /usr/include/limits.h:195,
>>>                   from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205,
>>>                   from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7,
>>>                   from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34,
>>>                   from /usr/include/sodium/export.h:7,
>>>                   from /usr/include/sodium/crypto_stream_chacha20.h:14,
>>>                   from vdso_test_chacha.c:6:
>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "("
>>>     99 | # if INT_MAX == 32767
>>>        |      ^~~~~~~
>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "("
>>>    102 | #  if INT_MAX == 2147483647
>>>        |       ^~~~~~~
>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "("
>>>    126 | # if LONG_MAX == 2147483647
>>>        |      ^~~~~~~~
>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1
>>
>> You get that even with the latest random.git? I thought Christophe's
>> patch fixed that, but maybe not and I should just remove the dependency
>> on the sodium header instead.
> 
> On x86_64 I tested with Linux master.  With random.git it is a different issue:
> 
> linux-git/tools/testing/selftests/vDSO$ make
>    CC       vdso_test_gettimeofday
>    CC       vdso_test_getcpu
>    CC       vdso_test_abi
>    CC       vdso_test_clock_getres
>    CC       vdso_standalone_test_x86
>    CC       vdso_test_correctness
>    CC       vdso_test_getrandom
>    CC       vdso_test_chacha
> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main':
> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20'
> collect2: error: ld returned 1 exit status
> 
> If I move -lsodium to the end of the compiler command it works.
> 
> 

Try a "make clean" maybe ?

I have Fedora 38 and no build problem with latest random tree:

$ make V=1
gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_gettimeofday.c parse_vdso.c 
-o 
/home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday
gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_getcpu.c parse_vdso.c  -o 
/home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu
gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_abi.c parse_vdso.c  -o 
/home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi
gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_clock_getres.c  -o 
/home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres
gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables 
-fno-stack-protector    vdso_standalone_test_x86.c parse_vdso.c  -o 
/home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86
gcc -std=gnu99 -D_GNU_SOURCE=  -ldl  vdso_test_correctness.c  -o 
/home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness
gcc -std=gnu99 -D_GNU_SOURCE= -isystem 
/home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include 
-isystem 
/home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi 
    vdso_test_getrandom.c parse_vdso.c  -o 
/home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom
gcc -std=gnu99 -D_GNU_SOURCE= -idirafter 
/home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include 
-idirafter 
/home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include 
-idirafter 
/home/chleroy/linux-powerpc/tools/testing/selftests/../../../include 
-D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 
-Wa,--noexecstack -lsodium     vdso_test_chacha.c 
/home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S 
  -o 
/home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha
$
Adhemerval Zanella Netto Aug. 27, 2024, 2:01 p.m. UTC | #12
On 27/08/24 11:00, Christophe Leroy wrote:
> 
> 
> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit :
>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
>>
>> On 27/08/24 10:34, Jason A. Donenfeld wrote:
>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote:
>>>>
>>>>
>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote:
>>>>> Hi Adhemerval,
>>>>>
>>>>> Thanks for posting this! Exciting to have it here.
>>>>>
>>>>> Just some small nits for now:
>>>>>
>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
>>>>>> +{
>>>>>> +  register long int x8 asm ("x8") = __NR_getrandom;
>>>>>> +  register long int x0 asm ("x0") = (long int) buffer;
>>>>>> +  register long int x1 asm ("x1") = (long int) len;
>>>>>> +  register long int x2 asm ("x2") = (long int) flags;
>>>>>
>>>>> Usually it's written just as `long` or `unsigned long`, and likewise
>>>>> with the cast. Also, no space after the cast.
>>>>
>>>> Ack.
>>>>
>>>>>
>>>>>> +#define __VDSO_RND_DATA_OFFSET  480
>>>>>
>>>>> This is the size of the data currently there?
>>>>
>>>> Yes, I used the same strategy x86 did.
>>>>
>>>>>
>>>>>>   #include <asm/page.h>
>>>>>>   #include <asm/vdso.h>
>>>>>>   #include <asm-generic/vmlinux.lds.h>
>>>>>> +#include <vdso/datapage.h>
>>>>>> +#include <asm/vdso/vsyscall.h>
>>>>>
>>>>> Possible to keep the asm/ together?
>>>>
>>>> Ack.
>>>>
>>>>>
>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
>>>>>
>>>>> nonnce -> nonce
>>>>
>>>> Ack.
>>>>
>>>>>
>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
>>>>>>   SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>>>>>>
>>>>>>   TEST_GEN_PROGS := vdso_test_gettimeofday
>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>>>>>>   TEST_GEN_PROGS += vdso_standalone_test_x86
>>>>>>   endif
>>>>>>   TEST_GEN_PROGS += vdso_test_correctness
>>>>>> -ifeq ($(uname_M),x86_64)
>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
>>>>>>   TEST_GEN_PROGS += vdso_test_getrandom
>>>>>>   ifneq ($(SODIUM),)
>>>>>>   TEST_GEN_PROGS += vdso_test_chacha
>>>>>
>>>>> You'll need to add the symlink to get the chacha selftest running:
>>>>>
>>>>>    $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
>>>>>    $ git add tools/arch/arm64/vdso
>>>>>
>>>>> Also, can you confirm that the chacha selftest runs and works?
>>>>
>>>> Yes, last time I has to built it manually since the Makefile machinery seem
>>>> to be broken even on x86_64.  In a Ubuntu vm I have:
>>>>
>>>> tools/testing/selftests/vDSO$ make
>>>>    CC       vdso_test_gettimeofday
>>>>    CC       vdso_test_getcpu
>>>>    CC       vdso_test_abi
>>>>    CC       vdso_test_clock_getres
>>>>    CC       vdso_standalone_test_x86
>>>>    CC       vdso_test_correctness
>>>>    CC       vdso_test_getrandom
>>>>    CC       vdso_test_chacha
>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7,
>>>>                   from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38,
>>>>                   from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161,
>>>>                   from /usr/include/limits.h:195,
>>>>                   from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205,
>>>>                   from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7,
>>>>                   from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34,
>>>>                   from /usr/include/sodium/export.h:7,
>>>>                   from /usr/include/sodium/crypto_stream_chacha20.h:14,
>>>>                   from vdso_test_chacha.c:6:
>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "("
>>>>     99 | # if INT_MAX == 32767
>>>>        |      ^~~~~~~
>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "("
>>>>    102 | #  if INT_MAX == 2147483647
>>>>        |       ^~~~~~~
>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "("
>>>>    126 | # if LONG_MAX == 2147483647
>>>>        |      ^~~~~~~~
>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1
>>>
>>> You get that even with the latest random.git? I thought Christophe's
>>> patch fixed that, but maybe not and I should just remove the dependency
>>> on the sodium header instead.
>>
>> On x86_64 I tested with Linux master.  With random.git it is a different issue:
>>
>> linux-git/tools/testing/selftests/vDSO$ make
>>    CC       vdso_test_gettimeofday
>>    CC       vdso_test_getcpu
>>    CC       vdso_test_abi
>>    CC       vdso_test_clock_getres
>>    CC       vdso_standalone_test_x86
>>    CC       vdso_test_correctness
>>    CC       vdso_test_getrandom
>>    CC       vdso_test_chacha
>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main':
>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20'
>> collect2: error: ld returned 1 exit status
>>
>> If I move -lsodium to the end of the compiler command it works.
>>
>>
> 
> Try a "make clean" maybe ?
> 
> I have Fedora 38 and no build problem with latest random tree:
> 
> $ make V=1
> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday
> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_getcpu.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu
> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_abi.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi
> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_clock_getres.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres
> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector    vdso_standalone_test_x86.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86
> gcc -std=gnu99 -D_GNU_SOURCE=  -ldl  vdso_test_correctness.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness
> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi    vdso_test_getrandom.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom
> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium     vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha
> $

It is a clean tree (git clean -dfx), and I take there is no need to build a kernel
prior hand.
Adhemerval Zanella Netto Aug. 27, 2024, 2:05 p.m. UTC | #13
On 27/08/24 11:00, Mark Rutland wrote:
> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
>> Hook up the generic vDSO implementation to the aarch64 vDSO data page.
>> The _vdso_rng_data required data is placed within the _vdso_data vvar
>> page, by using a offset larger than the vdso_data
>> (__VDSO_RND_DATA_OFFSET).
>>
>> The vDSO function requires a ChaCha20 implementation that does not
>> write to the stack, and that can do an entire ChaCha20 permutation.
>> The one provided is based on the current chacha-neon-core.S and uses NEON
>> on the permute operation.
> 
> Is there a fallback for when NEON isn't present? The kernel supports
> some (deeply embedded) implementations where NEON is not present, and 
> AFAICT this will UNDEF on those machines.
> 
> Mark.

Not right know, in this case I think it better to just do something similar
to Loongarch and fallback to the syscall. I will add this on the next version.

> 
>> Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
>> ---
>>  arch/arm64/Kconfig                         |   1 +
>>  arch/arm64/include/asm/vdso/getrandom.h    |  50 +++++++
>>  arch/arm64/include/asm/vdso/vsyscall.h     |   9 ++
>>  arch/arm64/kernel/vdso/Makefile            |   7 +-
>>  arch/arm64/kernel/vdso/vdso.lds.S          |   4 +
>>  arch/arm64/kernel/vdso/vgetrandom-chacha.S | 153 +++++++++++++++++++++
>>  arch/arm64/kernel/vdso/vgetrandom.c        |  13 ++
>>  tools/testing/selftests/vDSO/Makefile      |   4 +-
>>  8 files changed, 238 insertions(+), 3 deletions(-)
>>  create mode 100644 arch/arm64/include/asm/vdso/getrandom.h
>>  create mode 100644 arch/arm64/kernel/vdso/vgetrandom-chacha.S
>>  create mode 100644 arch/arm64/kernel/vdso/vgetrandom.c
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index b3fc891f1544..e3f4c5bf0661 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -237,6 +237,7 @@ config ARM64
>>  	select HAVE_KPROBES
>>  	select HAVE_KRETPROBES
>>  	select HAVE_GENERIC_VDSO
>> +	select VDSO_GETRANDOM
>>  	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
>>  	select IRQ_DOMAIN
>>  	select IRQ_FORCED_THREADING
>> diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h
>> new file mode 100644
>> index 000000000000..6e2b136813ca
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/vdso/getrandom.h
>> @@ -0,0 +1,50 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +
>> +#ifndef __ASM_VDSO_GETRANDOM_H
>> +#define __ASM_VDSO_GETRANDOM_H
>> +
>> +#ifndef __ASSEMBLY__
>> +
>> +#include <asm/unistd.h>
>> +#include <vdso/datapage.h>
>> +
>> +/**
>> + * getrandom_syscall - Invoke the getrandom() syscall.
>> + * @buffer:	Destination buffer to fill with random bytes.
>> + * @len:	Size of @buffer in bytes.
>> + * @flags:	Zero or more GRND_* flags.
>> + * Returns:	The number of random bytes written to @buffer, or a negative value indicating an error.
>> + */
>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
>> +{
>> +	register long int x8 asm ("x8") = __NR_getrandom;
>> +	register long int x0 asm ("x0") = (long int) buffer;
>> +	register long int x1 asm ("x1") = (long int) len;
>> +	register long int x2 asm ("x2") = (long int) flags;
>> +
>> +	asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2));
>> +
>> +	return x0;
>> +}
>> +
>> +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
>> +{
>> +	return &_vdso_rng_data;
>> +}
>> +
>> +/**
>> + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack.
>> + * @dst_bytes:	Destination buffer to hold @nblocks * 64 bytes of output.
>> + * @key:	32-byte input key.
>> + * @counter:	8-byte counter, read on input and updated on return.
>> + * @nblocks:	Number of blocks to generate.
>> + *
>> + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write
>> + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data
>> + * leaking into forked child processes.
>> + */
>> +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks);
>> +
>> +#endif /* !__ASSEMBLY__ */
>> +
>> +#endif /* __ASM_VDSO_GETRANDOM_H */
>> diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
>> index f94b1457c117..7ddb2bc3b57b 100644
>> --- a/arch/arm64/include/asm/vdso/vsyscall.h
>> +++ b/arch/arm64/include/asm/vdso/vsyscall.h
>> @@ -2,6 +2,8 @@
>>  #ifndef __ASM_VDSO_VSYSCALL_H
>>  #define __ASM_VDSO_VSYSCALL_H
>>  
>> +#define __VDSO_RND_DATA_OFFSET  480
>> +
>>  #ifndef __ASSEMBLY__
>>  
>>  #include <linux/timekeeper_internal.h>
>> @@ -21,6 +23,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void)
>>  }
>>  #define __arch_get_k_vdso_data __arm64_get_k_vdso_data
>>  
>> +static __always_inline
>> +struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void)
>> +{
>> +	return (void *)__arm64_get_k_vdso_data() + __VDSO_RND_DATA_OFFSET;
>> +}
>> +#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data
>> +
>>  static __always_inline
>>  void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
>>  {
>> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
>> index d11da6461278..37dad3bb953a 100644
>> --- a/arch/arm64/kernel/vdso/Makefile
>> +++ b/arch/arm64/kernel/vdso/Makefile
>> @@ -9,7 +9,7 @@
>>  # Include the generic Makefile to check the built vdso.
>>  include $(srctree)/lib/vdso/Makefile
>>  
>> -obj-vdso := vgettimeofday.o note.o sigreturn.o
>> +obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o
>>  
>>  # Build rules
>>  targets := $(obj-vdso) vdso.so vdso.so.dbg
>> @@ -40,8 +40,13 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
>>  				$(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
>>  				$(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
>>  				-Wmissing-prototypes -Wmissing-declarations
>> +CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
>> +			     $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
>> +			     $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
>> +			     -Wmissing-prototypes -Wmissing-declarations
>>  
>>  CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
>> +CFLAGS_vgetrandom.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
>>  
>>  ifneq ($(c-gettimeofday-y),)
>>    CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
>> diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
>> index 45354f2ddf70..f8dbcece20e2 100644
>> --- a/arch/arm64/kernel/vdso/vdso.lds.S
>> +++ b/arch/arm64/kernel/vdso/vdso.lds.S
>> @@ -12,6 +12,8 @@
>>  #include <asm/page.h>
>>  #include <asm/vdso.h>
>>  #include <asm-generic/vmlinux.lds.h>
>> +#include <vdso/datapage.h>
>> +#include <asm/vdso/vsyscall.h>
>>  
>>  OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
>>  OUTPUT_ARCH(aarch64)
>> @@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64)
>>  SECTIONS
>>  {
>>  	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
>> +	PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
>>  #ifdef CONFIG_TIME_NS
>>  	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
>>  #endif
>> @@ -102,6 +105,7 @@ VERSION
>>  		__kernel_gettimeofday;
>>  		__kernel_clock_gettime;
>>  		__kernel_clock_getres;
>> +		__kernel_getrandom;
>>  	local: *;
>>  	};
>>  }
>> diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
>> new file mode 100644
>> index 000000000000..3fb9715dd6f0
>> --- /dev/null
>> +++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
>> @@ -0,0 +1,153 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +#include <linux/linkage.h>
>> +#include <asm/cache.h>
>> +
>> +	.text
>> +
>> +/*
>> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
>> + * counter.  Importantly does not spill to the stack.
>> + *
>> + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
>> + *				       const uint8_t *key,
>> + * 				       uint32_t *counter,
>> + *				       size_t nblocks)
>> + *
>> + * 	x0: output bytes
>> + *	x1: 32-byte key input
>> + *	x2: 8-byte counter input/output
>> + *	x3: number of 64-byte block to write to output
>> + */
>> +SYM_FUNC_START(__arch_chacha20_blocks_nostack)
>> +
>> +	/* v0 = "expand 32-byte k" */
>> +	adr_l		x8, CTES
>> +	ld1		{v5.4s}, [x8]
>> +	/* v1,v2 = key */
>> +	ld1		{ v6.4s, v7.4s }, [x1]
>> +	/* v3 = counter || zero noonce  */
>> +	ldr		d8, [x2]
>> +
>> +	adr_l		x8, ONE
>> +	ldr		q13, [x8]
>> +
>> +	adr_l		x10, ROT8
>> +	ld1		{v12.4s}, [x10]
>> +.Lblock:
>> +	/* copy state to auxiliary vectors for the final add after the permute.  */
>> +	mov		v0.16b, v5.16b
>> +	mov		v1.16b, v6.16b
>> +	mov		v2.16b, v7.16b
>> +	mov		v3.16b, v8.16b
>> +
>> +	mov		w4, 20
>> +.Lpermute:
>> +	/*
>> +	 * Permute one 64-byte block where the state matrix is stored in the four NEON
>> +	 * registers v0-v3.  It performs matrix operations on four words in parallel,
>> +	 * but requires shuffling to rearrange the words after each round.
>> +	 */
>> +
>> +.Ldoubleround:
>> +	/* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */
>> +	add		v0.4s, v0.4s, v1.4s
>> +	eor		v3.16b, v3.16b, v0.16b
>> +	rev32		v3.8h, v3.8h
>> +
>> +	/* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */
>> +	add		v2.4s, v2.4s, v3.4s
>> +	eor		v4.16b, v1.16b, v2.16b
>> +	shl		v1.4s, v4.4s, #12
>> +	sri		v1.4s, v4.4s, #20
>> +
>> +	/* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */
>> +	add		v0.4s, v0.4s, v1.4s
>> +	eor		v3.16b, v3.16b, v0.16b
>> +	tbl		v3.16b, {v3.16b}, v12.16b
>> +
>> +	/* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */
>> +	add		v2.4s, v2.4s, v3.4s
>> +	eor		v4.16b, v1.16b, v2.16b
>> +	shl		v1.4s, v4.4s, #7
>> +	sri		v1.4s, v4.4s, #25
>> +
>> +	/* x1 = shuffle32(x1, MASK(0, 3, 2, 1)) */
>> +	ext		v1.16b, v1.16b, v1.16b, #4
>> +	/* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */
>> +	ext		v2.16b, v2.16b, v2.16b, #8
>> +	/* x3 = shuffle32(x3, MASK(2, 1, 0, 3)) */
>> +	ext		v3.16b, v3.16b, v3.16b, #12
>> +
>> +	/* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */
>> +	add		v0.4s, v0.4s, v1.4s
>> +	eor		v3.16b, v3.16b, v0.16b
>> +	rev32		v3.8h, v3.8h
>> +
>> +	/* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */
>> +	add		v2.4s, v2.4s, v3.4s
>> +	eor		v4.16b, v1.16b, v2.16b
>> +	shl		v1.4s, v4.4s, #12
>> +	sri		v1.4s, v4.4s, #20
>> +
>> +	/* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */
>> +	add		v0.4s, v0.4s, v1.4s
>> +	eor		v3.16b, v3.16b, v0.16b
>> +	tbl		v3.16b, {v3.16b}, v12.16b
>> +
>> +	/* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */
>> +	add		v2.4s, v2.4s, v3.4s
>> +	eor		v4.16b, v1.16b, v2.16b
>> +	shl		v1.4s, v4.4s, #7
>> +	sri		v1.4s, v4.4s, #25
>> +
>> +	/* x1 = shuffle32(x1, MASK(2, 1, 0, 3)) */
>> +	ext		v1.16b, v1.16b, v1.16b, #12
>> +	/* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */
>> +	ext		v2.16b, v2.16b, v2.16b, #8
>> +	/* x3 = shuffle32(x3, MASK(0, 3, 2, 1)) */
>> +	ext		v3.16b, v3.16b, v3.16b, #4
>> +
>> +	subs		w4, w4, #2
>> +	b.ne		.Ldoubleround
>> +
>> +	/* output0 = state0 + v0 */
>> +	add		v0.4s, v0.4s, v5.4s
>> +	/* output1 = state1 + v1 */
>> +	add		v1.4s, v1.4s, v6.4s
>> +	/* output2 = state2 + v2 */
>> +	add		v2.4s, v2.4s, v7.4s
>> +	/* output2 = state3 + v3 */
>> +	add		v3.4s, v3.4s, v8.4s
>> +	st1		{ v0.4s - v3.4s }, [x0]
>> +
>> +	/* ++copy3.counter */
>> +	add		d8, d8, d13
>> +
>> +	/* output += 64, --nblocks */
>> +	add		x0, x0, 64
>> +	subs		x3, x3, #1
>> +	b.ne		.Lblock
>> +
>> +	/* counter = copy3.counter */
>> +	str		d8, [x2]
>> +
>> +	/* Zero out the potentially sensitive regs, in case nothing uses these again. */
>> +	eor		v0.16b, v0.16b, v0.16b
>> +	eor		v1.16b, v1.16b, v1.16b
>> +	eor		v2.16b, v2.16b, v2.16b
>> +	eor		v3.16b, v3.16b, v3.16b
>> +	eor		v6.16b, v6.16b, v6.16b
>> +	eor		v7.16b, v7.16b, v7.16b
>> +	ret
>> +SYM_FUNC_END(__arch_chacha20_blocks_nostack)
>> +
>> +        .section        ".rodata", "a", %progbits
>> +        .align          L1_CACHE_SHIFT
>> +
>> +CTES:	.word		1634760805, 857760878, 	2036477234, 1797285236
>> +ONE:    .xword		1, 0
>> +ROT8:	.word		0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
>> +
>> +emit_aarch64_feature_1_and
>> diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c
>> new file mode 100644
>> index 000000000000..b6d6f4db3a98
>> --- /dev/null
>> +++ b/arch/arm64/kernel/vdso/vgetrandom.c
>> @@ -0,0 +1,13 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +
>> +#include <linux/types.h>
>> +#include <linux/mm.h>
>> +
>> +#include "../../../../lib/vdso/getrandom.c"
>> +
>> +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
>> +
>> +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
>> +{
>> +	return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
>> +}
>> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
>> index 10ffdda3f2fa..f07ea679a4cc 100644
>> --- a/tools/testing/selftests/vDSO/Makefile
>> +++ b/tools/testing/selftests/vDSO/Makefile
>> @@ -1,6 +1,6 @@
>>  # SPDX-License-Identifier: GPL-2.0
>>  uname_M := $(shell uname -m 2>/dev/null || echo not)
>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
>>  SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>>  
>>  TEST_GEN_PROGS := vdso_test_gettimeofday
>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>>  TEST_GEN_PROGS += vdso_standalone_test_x86
>>  endif
>>  TEST_GEN_PROGS += vdso_test_correctness
>> -ifeq ($(uname_M),x86_64)
>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
>>  TEST_GEN_PROGS += vdso_test_getrandom
>>  ifneq ($(SODIUM),)
>>  TEST_GEN_PROGS += vdso_test_chacha
>> -- 
>> 2.43.0
>>
>>
Adhemerval Zanella Netto Aug. 27, 2024, 2:07 p.m. UTC | #14
On 27/08/24 05:46, Christophe Leroy wrote:
> 
> 
> Le 26/08/2024 à 20:10, Adhemerval Zanella a écrit :
>> Hook up the generic vDSO implementation to the aarch64 vDSO data page.
>> The _vdso_rng_data required data is placed within the _vdso_data vvar
>> page, by using a offset larger than the vdso_data
>> (__VDSO_RND_DATA_OFFSET).
>>
>> The vDSO function requires a ChaCha20 implementation that does not
>> write to the stack, and that can do an entire ChaCha20 permutation.
>> The one provided is based on the current chacha-neon-core.S and uses NEON
>> on the permute operation.
>>
>> Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
>> ---
>>   arch/arm64/Kconfig                         |   1 +
>>   arch/arm64/include/asm/vdso/getrandom.h    |  50 +++++++
>>   arch/arm64/include/asm/vdso/vsyscall.h     |   9 ++
>>   arch/arm64/kernel/vdso/Makefile            |   7 +-
>>   arch/arm64/kernel/vdso/vdso.lds.S          |   4 +
>>   arch/arm64/kernel/vdso/vgetrandom-chacha.S | 153 +++++++++++++++++++++
>>   arch/arm64/kernel/vdso/vgetrandom.c        |  13 ++
>>   tools/testing/selftests/vDSO/Makefile      |   4 +-
>>   8 files changed, 238 insertions(+), 3 deletions(-)
>>   create mode 100644 arch/arm64/include/asm/vdso/getrandom.h
>>   create mode 100644 arch/arm64/kernel/vdso/vgetrandom-chacha.S
>>   create mode 100644 arch/arm64/kernel/vdso/vgetrandom.c
> 
> Were you able to use selftests ? I think you are missing the symbolic link to vdso directory (assuming you are using latest master branch from https://git.kernel.org/pub/scm/linux/kernel/git/crng/random.git)

It is missing indeed, last time I use a old brach that has a different Makefile
machinery and it I could it built more easily.

> 
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index b3fc891f1544..e3f4c5bf0661 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -237,6 +237,7 @@ config ARM64
>>       select HAVE_KPROBES
>>       select HAVE_KRETPROBES
>>       select HAVE_GENERIC_VDSO
>> +    select VDSO_GETRANDOM
> 
> You don't keep things in alphabetical here order on ARM64 ?

It seems to most part, but the file does have some outliers (HAVE_SOFTIRQ_ON_OWN_STACK
for instance).   I moved to the end of the list.

> 
>>       select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
>>       select IRQ_DOMAIN
>>       select IRQ_FORCED_THREADING
>> diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h
>> new file mode 100644
>> index 000000000000..6e2b136813ca
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/vdso/getrandom.h
>> @@ -0,0 +1,50 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +
>> +#ifndef __ASM_VDSO_GETRANDOM_H
>> +#define __ASM_VDSO_GETRANDOM_H
>> +
>> +#ifndef __ASSEMBLY__
>> +
>> +#include <asm/unistd.h>
>> +#include <vdso/datapage.h>
>> +
>> +/**
>> + * getrandom_syscall - Invoke the getrandom() syscall.
>> + * @buffer:    Destination buffer to fill with random bytes.
>> + * @len:    Size of @buffer in bytes.
>> + * @flags:    Zero or more GRND_* flags.
>> + * Returns:    The number of random bytes written to @buffer, or a negative value indicating an error.
>> + */
>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
>> +{
>> +    register long int x8 asm ("x8") = __NR_getrandom;
>> +    register long int x0 asm ("x0") = (long int) buffer;
>> +    register long int x1 asm ("x1") = (long int) len;
>> +    register long int x2 asm ("x2") = (long int) flags;
>> +
>> +    asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2));
>> +
>> +    return x0;
>> +}
>> +
>> +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
>> +{
>> +    return &_vdso_rng_data;
>> +}
>> +
>> +/**
>> + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack.
>> + * @dst_bytes:    Destination buffer to hold @nblocks * 64 bytes of output.
>> + * @key:    32-byte input key.
>> + * @counter:    8-byte counter, read on input and updated on return.
>> + * @nblocks:    Number of blocks to generate.
>> + *
>> + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write
>> + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data
>> + * leaking into forked child processes.
>> + */
>> +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks);
> 
> For Jason: We all redefine this prototype, should we have it in a central place, or do you expect some architecture to provide some static inline for it ?
> 
>> +
>> +#endif /* !__ASSEMBLY__ */
>> +
>> +#endif /* __ASM_VDSO_GETRANDOM_H */
>> diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
>> index f94b1457c117..7ddb2bc3b57b 100644
>> --- a/arch/arm64/include/asm/vdso/vsyscall.h
>> +++ b/arch/arm64/include/asm/vdso/vsyscall.h
>> @@ -2,6 +2,8 @@
>>   #ifndef __ASM_VDSO_VSYSCALL_H
>>   #define __ASM_VDSO_VSYSCALL_H
>>   +#define __VDSO_RND_DATA_OFFSET  480
>> +
> 
> How is this offset calculated or defined ? What happens if the other structures grow ? Could you use some sizeof(something) instead of something from asm-offsets if you also need it in ASM ?

That is something I talked to Jason some time ago, since a similar strategy
to use a 'magic' offset is used on x86_64.  Ideally I think the vdso_rnd_data
should be moved to  a common static structure along with _vdso_data, so the
there is no need to come up with magic offset like this.  It seems that the
powerpc does follow this pattern, but other ports no.

However, since each architecture does some specific machinery with the vdso
datapages; it would require some more extensive refactoring on multiple
architectures to get this right.

> 
>>   #ifndef __ASSEMBLY__
>>     #include <linux/timekeeper_internal.h>
>> @@ -21,6 +23,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void)
>>   }
>>   #define __arch_get_k_vdso_data __arm64_get_k_vdso_data
>>   +static __always_inline
>> +struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void)
>> +{
>> +    return (void *)__arm64_get_k_vdso_data() + __VDSO_RND_DATA_OFFSET;
>> +}
>> +#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data
>> +
>>   static __always_inline
>>   void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
>>   {
>> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
>> index d11da6461278..37dad3bb953a 100644
>> --- a/arch/arm64/kernel/vdso/Makefile
>> +++ b/arch/arm64/kernel/vdso/Makefile
>> @@ -9,7 +9,7 @@
>>   # Include the generic Makefile to check the built vdso.
>>   include $(srctree)/lib/vdso/Makefile
>>   -obj-vdso := vgettimeofday.o note.o sigreturn.o
>> +obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o
>>     # Build rules
>>   targets := $(obj-vdso) vdso.so vdso.so.dbg
>> @@ -40,8 +40,13 @@ CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
>>                   $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
>>                   $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
>>                   -Wmissing-prototypes -Wmissing-declarations
>> +CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
>> +                 $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
>> +                 $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
>> +                 -Wmissing-prototypes -Wmissing-declarations
>>     CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
>> +CFLAGS_vgetrandom.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
>>     ifneq ($(c-gettimeofday-y),)
>>     CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
>> diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
>> index 45354f2ddf70..f8dbcece20e2 100644
>> --- a/arch/arm64/kernel/vdso/vdso.lds.S
>> +++ b/arch/arm64/kernel/vdso/vdso.lds.S
>> @@ -12,6 +12,8 @@
>>   #include <asm/page.h>
>>   #include <asm/vdso.h>
>>   #include <asm-generic/vmlinux.lds.h>
>> +#include <vdso/datapage.h>
>> +#include <asm/vdso/vsyscall.h>
>>     OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
>>   OUTPUT_ARCH(aarch64)
>> @@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64)
>>   SECTIONS
>>   {
>>       PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
>> +    PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
>>   #ifdef CONFIG_TIME_NS
>>       PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
>>   #endif
>> @@ -102,6 +105,7 @@ VERSION
>>           __kernel_gettimeofday;
>>           __kernel_clock_gettime;
>>           __kernel_clock_getres;
>> +        __kernel_getrandom;
>>       local: *;
>>       };
>>   }
>> diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
> 
> [skipped ASM as I have not spoken ARM asm since I was at school in the 90's]
> 
>> diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c
>> new file mode 100644
>> index 000000000000..b6d6f4db3a98
>> --- /dev/null
>> +++ b/arch/arm64/kernel/vdso/vgetrandom.c
>> @@ -0,0 +1,13 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +
>> +#include <linux/types.h>
>> +#include <linux/mm.h>
>> +
>> +#include "../../../../lib/vdso/getrandom.c"
> 
> For gettimeofday ARM64 uses c-gettimeofday-y in the Makefile instead.
> 
> You should do the same with c-getrandom-y

Ack.

> 
>> +
>> +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
>> +
>> +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
>> +{
>> +    return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
>> +}
>> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
>> index 10ffdda3f2fa..f07ea679a4cc 100644
>> --- a/tools/testing/selftests/vDSO/Makefile
>> +++ b/tools/testing/selftests/vDSO/Makefile
>> @@ -1,6 +1,6 @@
>>   # SPDX-License-Identifier: GPL-2.0
>>   uname_M := $(shell uname -m 2>/dev/null || echo not)
>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
> 
>>   SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>>     TEST_GEN_PROGS := vdso_test_gettimeofday
>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>>   TEST_GEN_PROGS += vdso_standalone_test_x86
>>   endif
>>   TEST_GEN_PROGS += vdso_test_correctness
>> -ifeq ($(uname_M),x86_64)
>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
> 
> Does that work for you when you cross-compile ? For powerpc when I cross compile I still get the x86_64 from uname_M here, which is unexpected.

Right, I haven't test cross-compilation on the selftests so I
am not sure.  I will check it.

> 
>>   TEST_GEN_PROGS += vdso_test_getrandom
>>   ifneq ($(SODIUM),)
>>   TEST_GEN_PROGS += vdso_test_chacha
> 
> Christophe
Christophe Leroy Aug. 27, 2024, 2:10 p.m. UTC | #15
Le 27/08/2024 à 16:01, Adhemerval Zanella Netto a écrit :
> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> 
> On 27/08/24 11:00, Christophe Leroy wrote:
>>
>>
>> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit :
>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
>>>
>>> On 27/08/24 10:34, Jason A. Donenfeld wrote:
>>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote:
>>>>>
>>>>>
>>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote:
>>>>>> Hi Adhemerval,
>>>>>>
>>>>>> Thanks for posting this! Exciting to have it here.
>>>>>>
>>>>>> Just some small nits for now:
>>>>>>
>>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
>>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
>>>>>>> +{
>>>>>>> +  register long int x8 asm ("x8") = __NR_getrandom;
>>>>>>> +  register long int x0 asm ("x0") = (long int) buffer;
>>>>>>> +  register long int x1 asm ("x1") = (long int) len;
>>>>>>> +  register long int x2 asm ("x2") = (long int) flags;
>>>>>>
>>>>>> Usually it's written just as `long` or `unsigned long`, and likewise
>>>>>> with the cast. Also, no space after the cast.
>>>>>
>>>>> Ack.
>>>>>
>>>>>>
>>>>>>> +#define __VDSO_RND_DATA_OFFSET  480
>>>>>>
>>>>>> This is the size of the data currently there?
>>>>>
>>>>> Yes, I used the same strategy x86 did.
>>>>>
>>>>>>
>>>>>>>    #include <asm/page.h>
>>>>>>>    #include <asm/vdso.h>
>>>>>>>    #include <asm-generic/vmlinux.lds.h>
>>>>>>> +#include <vdso/datapage.h>
>>>>>>> +#include <asm/vdso/vsyscall.h>
>>>>>>
>>>>>> Possible to keep the asm/ together?
>>>>>
>>>>> Ack.
>>>>>
>>>>>>
>>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
>>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
>>>>>>
>>>>>> nonnce -> nonce
>>>>>
>>>>> Ack.
>>>>>
>>>>>>
>>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
>>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
>>>>>>>    SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>>>>>>>
>>>>>>>    TEST_GEN_PROGS := vdso_test_gettimeofday
>>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>>>>>>>    TEST_GEN_PROGS += vdso_standalone_test_x86
>>>>>>>    endif
>>>>>>>    TEST_GEN_PROGS += vdso_test_correctness
>>>>>>> -ifeq ($(uname_M),x86_64)
>>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
>>>>>>>    TEST_GEN_PROGS += vdso_test_getrandom
>>>>>>>    ifneq ($(SODIUM),)
>>>>>>>    TEST_GEN_PROGS += vdso_test_chacha
>>>>>>
>>>>>> You'll need to add the symlink to get the chacha selftest running:
>>>>>>
>>>>>>     $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
>>>>>>     $ git add tools/arch/arm64/vdso
>>>>>>
>>>>>> Also, can you confirm that the chacha selftest runs and works?
>>>>>
>>>>> Yes, last time I has to built it manually since the Makefile machinery seem
>>>>> to be broken even on x86_64.  In a Ubuntu vm I have:
>>>>>
>>>>> tools/testing/selftests/vDSO$ make
>>>>>     CC       vdso_test_gettimeofday
>>>>>     CC       vdso_test_getcpu
>>>>>     CC       vdso_test_abi
>>>>>     CC       vdso_test_clock_getres
>>>>>     CC       vdso_standalone_test_x86
>>>>>     CC       vdso_test_correctness
>>>>>     CC       vdso_test_getrandom
>>>>>     CC       vdso_test_chacha
>>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7,
>>>>>                    from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38,
>>>>>                    from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161,
>>>>>                    from /usr/include/limits.h:195,
>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205,
>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7,
>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34,
>>>>>                    from /usr/include/sodium/export.h:7,
>>>>>                    from /usr/include/sodium/crypto_stream_chacha20.h:14,
>>>>>                    from vdso_test_chacha.c:6:
>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "("
>>>>>      99 | # if INT_MAX == 32767
>>>>>         |      ^~~~~~~
>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "("
>>>>>     102 | #  if INT_MAX == 2147483647
>>>>>         |       ^~~~~~~
>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "("
>>>>>     126 | # if LONG_MAX == 2147483647
>>>>>         |      ^~~~~~~~
>>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1
>>>>
>>>> You get that even with the latest random.git? I thought Christophe's
>>>> patch fixed that, but maybe not and I should just remove the dependency
>>>> on the sodium header instead.
>>>
>>> On x86_64 I tested with Linux master.  With random.git it is a different issue:
>>>
>>> linux-git/tools/testing/selftests/vDSO$ make
>>>     CC       vdso_test_gettimeofday
>>>     CC       vdso_test_getcpu
>>>     CC       vdso_test_abi
>>>     CC       vdso_test_clock_getres
>>>     CC       vdso_standalone_test_x86
>>>     CC       vdso_test_correctness
>>>     CC       vdso_test_getrandom
>>>     CC       vdso_test_chacha
>>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main':
>>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20'
>>> collect2: error: ld returned 1 exit status
>>>
>>> If I move -lsodium to the end of the compiler command it works.
>>>
>>>
>>
>> Try a "make clean" maybe ?
>>
>> I have Fedora 38 and no build problem with latest random tree:
>>
>> $ make V=1
>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday
>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_getcpu.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu
>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_abi.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi
>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_clock_getres.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres
>> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector    vdso_standalone_test_x86.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86
>> gcc -std=gnu99 -D_GNU_SOURCE=  -ldl  vdso_test_correctness.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness
>> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi    vdso_test_getrandom.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom
>> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium     vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha
>> $
> 
> It is a clean tree (git clean -dfx), and I take there is no need to build a kernel
> prior hand.

I meeant 'make clean'


Right, I have not built any x86 kernel at the moment.

Just :
$ pwd
/home/chleroy/linux-powerpc/tools/testing/selftests/vDSO

$ make clean

then

$ make V=1

Christophe
Adhemerval Zanella Netto Aug. 27, 2024, 2:14 p.m. UTC | #16
On 27/08/24 11:10, Christophe Leroy wrote:
> 
> 
> Le 27/08/2024 à 16:01, Adhemerval Zanella Netto a écrit :
>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
>>
>> On 27/08/24 11:00, Christophe Leroy wrote:
>>>
>>>
>>> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit :
>>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
>>>>
>>>> On 27/08/24 10:34, Jason A. Donenfeld wrote:
>>>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote:
>>>>>>
>>>>>>
>>>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote:
>>>>>>> Hi Adhemerval,
>>>>>>>
>>>>>>> Thanks for posting this! Exciting to have it here.
>>>>>>>
>>>>>>> Just some small nits for now:
>>>>>>>
>>>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
>>>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
>>>>>>>> +{
>>>>>>>> +  register long int x8 asm ("x8") = __NR_getrandom;
>>>>>>>> +  register long int x0 asm ("x0") = (long int) buffer;
>>>>>>>> +  register long int x1 asm ("x1") = (long int) len;
>>>>>>>> +  register long int x2 asm ("x2") = (long int) flags;
>>>>>>>
>>>>>>> Usually it's written just as `long` or `unsigned long`, and likewise
>>>>>>> with the cast. Also, no space after the cast.
>>>>>>
>>>>>> Ack.
>>>>>>
>>>>>>>
>>>>>>>> +#define __VDSO_RND_DATA_OFFSET  480
>>>>>>>
>>>>>>> This is the size of the data currently there?
>>>>>>
>>>>>> Yes, I used the same strategy x86 did.
>>>>>>
>>>>>>>
>>>>>>>>    #include <asm/page.h>
>>>>>>>>    #include <asm/vdso.h>
>>>>>>>>    #include <asm-generic/vmlinux.lds.h>
>>>>>>>> +#include <vdso/datapage.h>
>>>>>>>> +#include <asm/vdso/vsyscall.h>
>>>>>>>
>>>>>>> Possible to keep the asm/ together?
>>>>>>
>>>>>> Ack.
>>>>>>
>>>>>>>
>>>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
>>>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
>>>>>>>
>>>>>>> nonnce -> nonce
>>>>>>
>>>>>> Ack.
>>>>>>
>>>>>>>
>>>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
>>>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
>>>>>>>>    SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>>>>>>>>
>>>>>>>>    TEST_GEN_PROGS := vdso_test_gettimeofday
>>>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>>>>>>>>    TEST_GEN_PROGS += vdso_standalone_test_x86
>>>>>>>>    endif
>>>>>>>>    TEST_GEN_PROGS += vdso_test_correctness
>>>>>>>> -ifeq ($(uname_M),x86_64)
>>>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
>>>>>>>>    TEST_GEN_PROGS += vdso_test_getrandom
>>>>>>>>    ifneq ($(SODIUM),)
>>>>>>>>    TEST_GEN_PROGS += vdso_test_chacha
>>>>>>>
>>>>>>> You'll need to add the symlink to get the chacha selftest running:
>>>>>>>
>>>>>>>     $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
>>>>>>>     $ git add tools/arch/arm64/vdso
>>>>>>>
>>>>>>> Also, can you confirm that the chacha selftest runs and works?
>>>>>>
>>>>>> Yes, last time I has to built it manually since the Makefile machinery seem
>>>>>> to be broken even on x86_64.  In a Ubuntu vm I have:
>>>>>>
>>>>>> tools/testing/selftests/vDSO$ make
>>>>>>     CC       vdso_test_gettimeofday
>>>>>>     CC       vdso_test_getcpu
>>>>>>     CC       vdso_test_abi
>>>>>>     CC       vdso_test_clock_getres
>>>>>>     CC       vdso_standalone_test_x86
>>>>>>     CC       vdso_test_correctness
>>>>>>     CC       vdso_test_getrandom
>>>>>>     CC       vdso_test_chacha
>>>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7,
>>>>>>                    from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38,
>>>>>>                    from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161,
>>>>>>                    from /usr/include/limits.h:195,
>>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205,
>>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7,
>>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34,
>>>>>>                    from /usr/include/sodium/export.h:7,
>>>>>>                    from /usr/include/sodium/crypto_stream_chacha20.h:14,
>>>>>>                    from vdso_test_chacha.c:6:
>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "("
>>>>>>      99 | # if INT_MAX == 32767
>>>>>>         |      ^~~~~~~
>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "("
>>>>>>     102 | #  if INT_MAX == 2147483647
>>>>>>         |       ^~~~~~~
>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "("
>>>>>>     126 | # if LONG_MAX == 2147483647
>>>>>>         |      ^~~~~~~~
>>>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1
>>>>>
>>>>> You get that even with the latest random.git? I thought Christophe's
>>>>> patch fixed that, but maybe not and I should just remove the dependency
>>>>> on the sodium header instead.
>>>>
>>>> On x86_64 I tested with Linux master.  With random.git it is a different issue:
>>>>
>>>> linux-git/tools/testing/selftests/vDSO$ make
>>>>     CC       vdso_test_gettimeofday
>>>>     CC       vdso_test_getcpu
>>>>     CC       vdso_test_abi
>>>>     CC       vdso_test_clock_getres
>>>>     CC       vdso_standalone_test_x86
>>>>     CC       vdso_test_correctness
>>>>     CC       vdso_test_getrandom
>>>>     CC       vdso_test_chacha
>>>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main':
>>>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20'
>>>> collect2: error: ld returned 1 exit status
>>>>
>>>> If I move -lsodium to the end of the compiler command it works.
>>>>
>>>>
>>>
>>> Try a "make clean" maybe ?
>>>
>>> I have Fedora 38 and no build problem with latest random tree:
>>>
>>> $ make V=1
>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday
>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_getcpu.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu
>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_abi.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi
>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_clock_getres.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres
>>> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector    vdso_standalone_test_x86.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86
>>> gcc -std=gnu99 -D_GNU_SOURCE=  -ldl  vdso_test_correctness.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness
>>> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi    vdso_test_getrandom.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom
>>> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium     vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha
>>> $
>>
>> It is a clean tree (git clean -dfx), and I take there is no need to build a kernel
>> prior hand.
> 
> I meeant 'make clean'
> 
> 
> Right, I have not built any x86 kernel at the moment.
> 
> Just :
> $ pwd
> /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO
> 
> $ make clean
> 
> then
> 
> $ make V=1

The issue is Ubuntu linker is configure to use --as-needed by default, this
patch fixes the issue:

diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 10ffdda3f2fa..151baf650e4c 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -45,4 +45,4 @@ $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \
                                       -idirafter $(top_srcdir)/arch/$(ARCH)/include \
                                       -idirafter $(top_srcdir)/include \
                                       -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \
-                                      -Wa,--noexecstack $(SODIUM)
+                                      -Wa,--noexecstack -Wl,-no-as-needed $(SODIUM)
Jason A. Donenfeld Aug. 27, 2024, 2:28 p.m. UTC | #17
On Tue, Aug 27, 2024 at 11:14:27AM -0300, Adhemerval Zanella Netto wrote:
> 
> 
> On 27/08/24 11:10, Christophe Leroy wrote:
> > 
> > 
> > Le 27/08/2024 à 16:01, Adhemerval Zanella Netto a écrit :
> >> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> >>
> >> On 27/08/24 11:00, Christophe Leroy wrote:
> >>>
> >>>
> >>> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit :
> >>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> >>>>
> >>>> On 27/08/24 10:34, Jason A. Donenfeld wrote:
> >>>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote:
> >>>>>>
> >>>>>>
> >>>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote:
> >>>>>>> Hi Adhemerval,
> >>>>>>>
> >>>>>>> Thanks for posting this! Exciting to have it here.
> >>>>>>>
> >>>>>>> Just some small nits for now:
> >>>>>>>
> >>>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
> >>>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
> >>>>>>>> +{
> >>>>>>>> +  register long int x8 asm ("x8") = __NR_getrandom;
> >>>>>>>> +  register long int x0 asm ("x0") = (long int) buffer;
> >>>>>>>> +  register long int x1 asm ("x1") = (long int) len;
> >>>>>>>> +  register long int x2 asm ("x2") = (long int) flags;
> >>>>>>>
> >>>>>>> Usually it's written just as `long` or `unsigned long`, and likewise
> >>>>>>> with the cast. Also, no space after the cast.
> >>>>>>
> >>>>>> Ack.
> >>>>>>
> >>>>>>>
> >>>>>>>> +#define __VDSO_RND_DATA_OFFSET  480
> >>>>>>>
> >>>>>>> This is the size of the data currently there?
> >>>>>>
> >>>>>> Yes, I used the same strategy x86 did.
> >>>>>>
> >>>>>>>
> >>>>>>>>    #include <asm/page.h>
> >>>>>>>>    #include <asm/vdso.h>
> >>>>>>>>    #include <asm-generic/vmlinux.lds.h>
> >>>>>>>> +#include <vdso/datapage.h>
> >>>>>>>> +#include <asm/vdso/vsyscall.h>
> >>>>>>>
> >>>>>>> Possible to keep the asm/ together?
> >>>>>>
> >>>>>> Ack.
> >>>>>>
> >>>>>>>
> >>>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
> >>>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
> >>>>>>>
> >>>>>>> nonnce -> nonce
> >>>>>>
> >>>>>> Ack.
> >>>>>>
> >>>>>>>
> >>>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
> >>>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
> >>>>>>>>    SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
> >>>>>>>>
> >>>>>>>>    TEST_GEN_PROGS := vdso_test_gettimeofday
> >>>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
> >>>>>>>>    TEST_GEN_PROGS += vdso_standalone_test_x86
> >>>>>>>>    endif
> >>>>>>>>    TEST_GEN_PROGS += vdso_test_correctness
> >>>>>>>> -ifeq ($(uname_M),x86_64)
> >>>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
> >>>>>>>>    TEST_GEN_PROGS += vdso_test_getrandom
> >>>>>>>>    ifneq ($(SODIUM),)
> >>>>>>>>    TEST_GEN_PROGS += vdso_test_chacha
> >>>>>>>
> >>>>>>> You'll need to add the symlink to get the chacha selftest running:
> >>>>>>>
> >>>>>>>     $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
> >>>>>>>     $ git add tools/arch/arm64/vdso
> >>>>>>>
> >>>>>>> Also, can you confirm that the chacha selftest runs and works?
> >>>>>>
> >>>>>> Yes, last time I has to built it manually since the Makefile machinery seem
> >>>>>> to be broken even on x86_64.  In a Ubuntu vm I have:
> >>>>>>
> >>>>>> tools/testing/selftests/vDSO$ make
> >>>>>>     CC       vdso_test_gettimeofday
> >>>>>>     CC       vdso_test_getcpu
> >>>>>>     CC       vdso_test_abi
> >>>>>>     CC       vdso_test_clock_getres
> >>>>>>     CC       vdso_standalone_test_x86
> >>>>>>     CC       vdso_test_correctness
> >>>>>>     CC       vdso_test_getrandom
> >>>>>>     CC       vdso_test_chacha
> >>>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7,
> >>>>>>                    from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38,
> >>>>>>                    from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161,
> >>>>>>                    from /usr/include/limits.h:195,
> >>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205,
> >>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7,
> >>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34,
> >>>>>>                    from /usr/include/sodium/export.h:7,
> >>>>>>                    from /usr/include/sodium/crypto_stream_chacha20.h:14,
> >>>>>>                    from vdso_test_chacha.c:6:
> >>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "("
> >>>>>>      99 | # if INT_MAX == 32767
> >>>>>>         |      ^~~~~~~
> >>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "("
> >>>>>>     102 | #  if INT_MAX == 2147483647
> >>>>>>         |       ^~~~~~~
> >>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "("
> >>>>>>     126 | # if LONG_MAX == 2147483647
> >>>>>>         |      ^~~~~~~~
> >>>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1
> >>>>>
> >>>>> You get that even with the latest random.git? I thought Christophe's
> >>>>> patch fixed that, but maybe not and I should just remove the dependency
> >>>>> on the sodium header instead.
> >>>>
> >>>> On x86_64 I tested with Linux master.  With random.git it is a different issue:
> >>>>
> >>>> linux-git/tools/testing/selftests/vDSO$ make
> >>>>     CC       vdso_test_gettimeofday
> >>>>     CC       vdso_test_getcpu
> >>>>     CC       vdso_test_abi
> >>>>     CC       vdso_test_clock_getres
> >>>>     CC       vdso_standalone_test_x86
> >>>>     CC       vdso_test_correctness
> >>>>     CC       vdso_test_getrandom
> >>>>     CC       vdso_test_chacha
> >>>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main':
> >>>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20'
> >>>> collect2: error: ld returned 1 exit status
> >>>>
> >>>> If I move -lsodium to the end of the compiler command it works.
> >>>>
> >>>>
> >>>
> >>> Try a "make clean" maybe ?
> >>>
> >>> I have Fedora 38 and no build problem with latest random tree:
> >>>
> >>> $ make V=1
> >>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday
> >>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_getcpu.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu
> >>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_abi.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi
> >>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_clock_getres.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres
> >>> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector    vdso_standalone_test_x86.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86
> >>> gcc -std=gnu99 -D_GNU_SOURCE=  -ldl  vdso_test_correctness.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness
> >>> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi    vdso_test_getrandom.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom
> >>> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium     vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha
> >>> $
> >>
> >> It is a clean tree (git clean -dfx), and I take there is no need to build a kernel
> >> prior hand.
> > 
> > I meeant 'make clean'
> > 
> > 
> > Right, I have not built any x86 kernel at the moment.
> > 
> > Just :
> > $ pwd
> > /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO
> > 
> > $ make clean
> > 
> > then
> > 
> > $ make V=1
> 
> The issue is Ubuntu linker is configure to use --as-needed by default, this
> patch fixes the issue:
> 
> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
> index 10ffdda3f2fa..151baf650e4c 100644
> --- a/tools/testing/selftests/vDSO/Makefile
> +++ b/tools/testing/selftests/vDSO/Makefile
> @@ -45,4 +45,4 @@ $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \
>                                        -idirafter $(top_srcdir)/arch/$(ARCH)/include \
>                                        -idirafter $(top_srcdir)/include \
>                                        -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \
> -                                      -Wa,--noexecstack $(SODIUM)
> +                                      -Wa,--noexecstack -Wl,-no-as-needed $(SODIUM)

Oh, it's an as-needed thing. In that case, does this fix it for you?

diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 10ffdda3f2fa..834aa862ba2c 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
+SODIUM_LIBS := $(shell pkg-config --libs libsodium 2>/dev/null)
+SODIUM_CFLAGS := $(shell pkg-config --cflags libsodium 2>/dev/null)

 TEST_GEN_PROGS := vdso_test_gettimeofday
 TEST_GEN_PROGS += vdso_test_getcpu
@@ -13,7 +14,7 @@ endif
 TEST_GEN_PROGS += vdso_test_correctness
 ifeq ($(uname_M),x86_64)
 TEST_GEN_PROGS += vdso_test_getrandom
-ifneq ($(SODIUM),)
+ifneq ($(SODIUM_LIBS),)
 TEST_GEN_PROGS += vdso_test_chacha
 endif
 endif
@@ -41,8 +42,9 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \
                                          -isystem $(top_srcdir)/include/uapi

 $(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(ARCH)/vdso/vgetrandom-chacha.S
+$(OUTPUT)/vdso_test_chacha: LDLIBS += $(SODIUM_LIBS)
 $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \
                                       -idirafter $(top_srcdir)/arch/$(ARCH)/include \
                                       -idirafter $(top_srcdir)/include \
                                       -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \
-                                      -Wa,--noexecstack $(SODIUM)
+                                      -Wa,--noexecstack $(SODIUM_CFLAGS)
Adhemerval Zanella Netto Aug. 27, 2024, 2:30 p.m. UTC | #18
On 27/08/24 11:28, Jason A. Donenfeld wrote:
> On Tue, Aug 27, 2024 at 11:14:27AM -0300, Adhemerval Zanella Netto wrote:
>>
>>
>> On 27/08/24 11:10, Christophe Leroy wrote:
>>>
>>>
>>> Le 27/08/2024 à 16:01, Adhemerval Zanella Netto a écrit :
>>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
>>>>
>>>> On 27/08/24 11:00, Christophe Leroy wrote:
>>>>>
>>>>>
>>>>> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit :
>>>>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
>>>>>>
>>>>>> On 27/08/24 10:34, Jason A. Donenfeld wrote:
>>>>>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote:
>>>>>>>>> Hi Adhemerval,
>>>>>>>>>
>>>>>>>>> Thanks for posting this! Exciting to have it here.
>>>>>>>>>
>>>>>>>>> Just some small nits for now:
>>>>>>>>>
>>>>>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
>>>>>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
>>>>>>>>>> +{
>>>>>>>>>> +  register long int x8 asm ("x8") = __NR_getrandom;
>>>>>>>>>> +  register long int x0 asm ("x0") = (long int) buffer;
>>>>>>>>>> +  register long int x1 asm ("x1") = (long int) len;
>>>>>>>>>> +  register long int x2 asm ("x2") = (long int) flags;
>>>>>>>>>
>>>>>>>>> Usually it's written just as `long` or `unsigned long`, and likewise
>>>>>>>>> with the cast. Also, no space after the cast.
>>>>>>>>
>>>>>>>> Ack.
>>>>>>>>
>>>>>>>>>
>>>>>>>>>> +#define __VDSO_RND_DATA_OFFSET  480
>>>>>>>>>
>>>>>>>>> This is the size of the data currently there?
>>>>>>>>
>>>>>>>> Yes, I used the same strategy x86 did.
>>>>>>>>
>>>>>>>>>
>>>>>>>>>>    #include <asm/page.h>
>>>>>>>>>>    #include <asm/vdso.h>
>>>>>>>>>>    #include <asm-generic/vmlinux.lds.h>
>>>>>>>>>> +#include <vdso/datapage.h>
>>>>>>>>>> +#include <asm/vdso/vsyscall.h>
>>>>>>>>>
>>>>>>>>> Possible to keep the asm/ together?
>>>>>>>>
>>>>>>>> Ack.
>>>>>>>>
>>>>>>>>>
>>>>>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
>>>>>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
>>>>>>>>>
>>>>>>>>> nonnce -> nonce
>>>>>>>>
>>>>>>>> Ack.
>>>>>>>>
>>>>>>>>>
>>>>>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
>>>>>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
>>>>>>>>>>    SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>>>>>>>>>>
>>>>>>>>>>    TEST_GEN_PROGS := vdso_test_gettimeofday
>>>>>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>>>>>>>>>>    TEST_GEN_PROGS += vdso_standalone_test_x86
>>>>>>>>>>    endif
>>>>>>>>>>    TEST_GEN_PROGS += vdso_test_correctness
>>>>>>>>>> -ifeq ($(uname_M),x86_64)
>>>>>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
>>>>>>>>>>    TEST_GEN_PROGS += vdso_test_getrandom
>>>>>>>>>>    ifneq ($(SODIUM),)
>>>>>>>>>>    TEST_GEN_PROGS += vdso_test_chacha
>>>>>>>>>
>>>>>>>>> You'll need to add the symlink to get the chacha selftest running:
>>>>>>>>>
>>>>>>>>>     $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
>>>>>>>>>     $ git add tools/arch/arm64/vdso
>>>>>>>>>
>>>>>>>>> Also, can you confirm that the chacha selftest runs and works?
>>>>>>>>
>>>>>>>> Yes, last time I has to built it manually since the Makefile machinery seem
>>>>>>>> to be broken even on x86_64.  In a Ubuntu vm I have:
>>>>>>>>
>>>>>>>> tools/testing/selftests/vDSO$ make
>>>>>>>>     CC       vdso_test_gettimeofday
>>>>>>>>     CC       vdso_test_getcpu
>>>>>>>>     CC       vdso_test_abi
>>>>>>>>     CC       vdso_test_clock_getres
>>>>>>>>     CC       vdso_standalone_test_x86
>>>>>>>>     CC       vdso_test_correctness
>>>>>>>>     CC       vdso_test_getrandom
>>>>>>>>     CC       vdso_test_chacha
>>>>>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7,
>>>>>>>>                    from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38,
>>>>>>>>                    from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161,
>>>>>>>>                    from /usr/include/limits.h:195,
>>>>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205,
>>>>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7,
>>>>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34,
>>>>>>>>                    from /usr/include/sodium/export.h:7,
>>>>>>>>                    from /usr/include/sodium/crypto_stream_chacha20.h:14,
>>>>>>>>                    from vdso_test_chacha.c:6:
>>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "("
>>>>>>>>      99 | # if INT_MAX == 32767
>>>>>>>>         |      ^~~~~~~
>>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "("
>>>>>>>>     102 | #  if INT_MAX == 2147483647
>>>>>>>>         |       ^~~~~~~
>>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "("
>>>>>>>>     126 | # if LONG_MAX == 2147483647
>>>>>>>>         |      ^~~~~~~~
>>>>>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1
>>>>>>>
>>>>>>> You get that even with the latest random.git? I thought Christophe's
>>>>>>> patch fixed that, but maybe not and I should just remove the dependency
>>>>>>> on the sodium header instead.
>>>>>>
>>>>>> On x86_64 I tested with Linux master.  With random.git it is a different issue:
>>>>>>
>>>>>> linux-git/tools/testing/selftests/vDSO$ make
>>>>>>     CC       vdso_test_gettimeofday
>>>>>>     CC       vdso_test_getcpu
>>>>>>     CC       vdso_test_abi
>>>>>>     CC       vdso_test_clock_getres
>>>>>>     CC       vdso_standalone_test_x86
>>>>>>     CC       vdso_test_correctness
>>>>>>     CC       vdso_test_getrandom
>>>>>>     CC       vdso_test_chacha
>>>>>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main':
>>>>>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20'
>>>>>> collect2: error: ld returned 1 exit status
>>>>>>
>>>>>> If I move -lsodium to the end of the compiler command it works.
>>>>>>
>>>>>>
>>>>>
>>>>> Try a "make clean" maybe ?
>>>>>
>>>>> I have Fedora 38 and no build problem with latest random tree:
>>>>>
>>>>> $ make V=1
>>>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday
>>>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_getcpu.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu
>>>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_abi.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi
>>>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_clock_getres.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres
>>>>> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector    vdso_standalone_test_x86.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86
>>>>> gcc -std=gnu99 -D_GNU_SOURCE=  -ldl  vdso_test_correctness.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness
>>>>> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi    vdso_test_getrandom.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom
>>>>> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium     vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha
>>>>> $
>>>>
>>>> It is a clean tree (git clean -dfx), and I take there is no need to build a kernel
>>>> prior hand.
>>>
>>> I meeant 'make clean'
>>>
>>>
>>> Right, I have not built any x86 kernel at the moment.
>>>
>>> Just :
>>> $ pwd
>>> /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO
>>>
>>> $ make clean
>>>
>>> then
>>>
>>> $ make V=1
>>
>> The issue is Ubuntu linker is configure to use --as-needed by default, this
>> patch fixes the issue:
>>
>> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
>> index 10ffdda3f2fa..151baf650e4c 100644
>> --- a/tools/testing/selftests/vDSO/Makefile
>> +++ b/tools/testing/selftests/vDSO/Makefile
>> @@ -45,4 +45,4 @@ $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \
>>                                        -idirafter $(top_srcdir)/arch/$(ARCH)/include \
>>                                        -idirafter $(top_srcdir)/include \
>>                                        -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \
>> -                                      -Wa,--noexecstack $(SODIUM)
>> +                                      -Wa,--noexecstack -Wl,-no-as-needed $(SODIUM)
> 
> Oh, it's an as-needed thing. In that case, does this fix it for you?
> 
> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
> index 10ffdda3f2fa..834aa862ba2c 100644
> --- a/tools/testing/selftests/vDSO/Makefile
> +++ b/tools/testing/selftests/vDSO/Makefile
> @@ -1,7 +1,8 @@
>  # SPDX-License-Identifier: GPL-2.0
>  uname_M := $(shell uname -m 2>/dev/null || echo not)
>  ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
> -SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
> +SODIUM_LIBS := $(shell pkg-config --libs libsodium 2>/dev/null)
> +SODIUM_CFLAGS := $(shell pkg-config --cflags libsodium 2>/dev/null)
> 
>  TEST_GEN_PROGS := vdso_test_gettimeofday
>  TEST_GEN_PROGS += vdso_test_getcpu
> @@ -13,7 +14,7 @@ endif
>  TEST_GEN_PROGS += vdso_test_correctness
>  ifeq ($(uname_M),x86_64)
>  TEST_GEN_PROGS += vdso_test_getrandom
> -ifneq ($(SODIUM),)
> +ifneq ($(SODIUM_LIBS),)
>  TEST_GEN_PROGS += vdso_test_chacha
>  endif
>  endif
> @@ -41,8 +42,9 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \
>                                           -isystem $(top_srcdir)/include/uapi
> 
>  $(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(ARCH)/vdso/vgetrandom-chacha.S
> +$(OUTPUT)/vdso_test_chacha: LDLIBS += $(SODIUM_LIBS)
>  $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \
>                                        -idirafter $(top_srcdir)/arch/$(ARCH)/include \
>                                        -idirafter $(top_srcdir)/include \
>                                        -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \
> -                                      -Wa,--noexecstack $(SODIUM)
> +                                      -Wa,--noexecstack $(SODIUM_CFLAGS)
> 

Nops, 'pkg-config --cflags libsodium' is empty. The -Wl,-no-as-needed is simpler
I think.
Jason A. Donenfeld Aug. 27, 2024, 2:32 p.m. UTC | #19
On Tue, Aug 27, 2024 at 4:30 PM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 27/08/24 11:28, Jason A. Donenfeld wrote:
> > On Tue, Aug 27, 2024 at 11:14:27AM -0300, Adhemerval Zanella Netto wrote:
> >>
> >>
> >> On 27/08/24 11:10, Christophe Leroy wrote:
> >>>
> >>>
> >>> Le 27/08/2024 à 16:01, Adhemerval Zanella Netto a écrit :
> >>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> >>>>
> >>>> On 27/08/24 11:00, Christophe Leroy wrote:
> >>>>>
> >>>>>
> >>>>> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit :
> >>>>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> >>>>>>
> >>>>>> On 27/08/24 10:34, Jason A. Donenfeld wrote:
> >>>>>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote:
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote:
> >>>>>>>>> Hi Adhemerval,
> >>>>>>>>>
> >>>>>>>>> Thanks for posting this! Exciting to have it here.
> >>>>>>>>>
> >>>>>>>>> Just some small nits for now:
> >>>>>>>>>
> >>>>>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
> >>>>>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
> >>>>>>>>>> +{
> >>>>>>>>>> +  register long int x8 asm ("x8") = __NR_getrandom;
> >>>>>>>>>> +  register long int x0 asm ("x0") = (long int) buffer;
> >>>>>>>>>> +  register long int x1 asm ("x1") = (long int) len;
> >>>>>>>>>> +  register long int x2 asm ("x2") = (long int) flags;
> >>>>>>>>>
> >>>>>>>>> Usually it's written just as `long` or `unsigned long`, and likewise
> >>>>>>>>> with the cast. Also, no space after the cast.
> >>>>>>>>
> >>>>>>>> Ack.
> >>>>>>>>
> >>>>>>>>>
> >>>>>>>>>> +#define __VDSO_RND_DATA_OFFSET  480
> >>>>>>>>>
> >>>>>>>>> This is the size of the data currently there?
> >>>>>>>>
> >>>>>>>> Yes, I used the same strategy x86 did.
> >>>>>>>>
> >>>>>>>>>
> >>>>>>>>>>    #include <asm/page.h>
> >>>>>>>>>>    #include <asm/vdso.h>
> >>>>>>>>>>    #include <asm-generic/vmlinux.lds.h>
> >>>>>>>>>> +#include <vdso/datapage.h>
> >>>>>>>>>> +#include <asm/vdso/vsyscall.h>
> >>>>>>>>>
> >>>>>>>>> Possible to keep the asm/ together?
> >>>>>>>>
> >>>>>>>> Ack.
> >>>>>>>>
> >>>>>>>>>
> >>>>>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
> >>>>>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
> >>>>>>>>>
> >>>>>>>>> nonnce -> nonce
> >>>>>>>>
> >>>>>>>> Ack.
> >>>>>>>>
> >>>>>>>>>
> >>>>>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
> >>>>>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
> >>>>>>>>>>    SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
> >>>>>>>>>>
> >>>>>>>>>>    TEST_GEN_PROGS := vdso_test_gettimeofday
> >>>>>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
> >>>>>>>>>>    TEST_GEN_PROGS += vdso_standalone_test_x86
> >>>>>>>>>>    endif
> >>>>>>>>>>    TEST_GEN_PROGS += vdso_test_correctness
> >>>>>>>>>> -ifeq ($(uname_M),x86_64)
> >>>>>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
> >>>>>>>>>>    TEST_GEN_PROGS += vdso_test_getrandom
> >>>>>>>>>>    ifneq ($(SODIUM),)
> >>>>>>>>>>    TEST_GEN_PROGS += vdso_test_chacha
> >>>>>>>>>
> >>>>>>>>> You'll need to add the symlink to get the chacha selftest running:
> >>>>>>>>>
> >>>>>>>>>     $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
> >>>>>>>>>     $ git add tools/arch/arm64/vdso
> >>>>>>>>>
> >>>>>>>>> Also, can you confirm that the chacha selftest runs and works?
> >>>>>>>>
> >>>>>>>> Yes, last time I has to built it manually since the Makefile machinery seem
> >>>>>>>> to be broken even on x86_64.  In a Ubuntu vm I have:
> >>>>>>>>
> >>>>>>>> tools/testing/selftests/vDSO$ make
> >>>>>>>>     CC       vdso_test_gettimeofday
> >>>>>>>>     CC       vdso_test_getcpu
> >>>>>>>>     CC       vdso_test_abi
> >>>>>>>>     CC       vdso_test_clock_getres
> >>>>>>>>     CC       vdso_standalone_test_x86
> >>>>>>>>     CC       vdso_test_correctness
> >>>>>>>>     CC       vdso_test_getrandom
> >>>>>>>>     CC       vdso_test_chacha
> >>>>>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7,
> >>>>>>>>                    from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38,
> >>>>>>>>                    from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161,
> >>>>>>>>                    from /usr/include/limits.h:195,
> >>>>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205,
> >>>>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7,
> >>>>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34,
> >>>>>>>>                    from /usr/include/sodium/export.h:7,
> >>>>>>>>                    from /usr/include/sodium/crypto_stream_chacha20.h:14,
> >>>>>>>>                    from vdso_test_chacha.c:6:
> >>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "("
> >>>>>>>>      99 | # if INT_MAX == 32767
> >>>>>>>>         |      ^~~~~~~
> >>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "("
> >>>>>>>>     102 | #  if INT_MAX == 2147483647
> >>>>>>>>         |       ^~~~~~~
> >>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "("
> >>>>>>>>     126 | # if LONG_MAX == 2147483647
> >>>>>>>>         |      ^~~~~~~~
> >>>>>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1
> >>>>>>>
> >>>>>>> You get that even with the latest random.git? I thought Christophe's
> >>>>>>> patch fixed that, but maybe not and I should just remove the dependency
> >>>>>>> on the sodium header instead.
> >>>>>>
> >>>>>> On x86_64 I tested with Linux master.  With random.git it is a different issue:
> >>>>>>
> >>>>>> linux-git/tools/testing/selftests/vDSO$ make
> >>>>>>     CC       vdso_test_gettimeofday
> >>>>>>     CC       vdso_test_getcpu
> >>>>>>     CC       vdso_test_abi
> >>>>>>     CC       vdso_test_clock_getres
> >>>>>>     CC       vdso_standalone_test_x86
> >>>>>>     CC       vdso_test_correctness
> >>>>>>     CC       vdso_test_getrandom
> >>>>>>     CC       vdso_test_chacha
> >>>>>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main':
> >>>>>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20'
> >>>>>> collect2: error: ld returned 1 exit status
> >>>>>>
> >>>>>> If I move -lsodium to the end of the compiler command it works.
> >>>>>>
> >>>>>>
> >>>>>
> >>>>> Try a "make clean" maybe ?
> >>>>>
> >>>>> I have Fedora 38 and no build problem with latest random tree:
> >>>>>
> >>>>> $ make V=1
> >>>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday
> >>>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_getcpu.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu
> >>>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_abi.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi
> >>>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_clock_getres.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres
> >>>>> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector    vdso_standalone_test_x86.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86
> >>>>> gcc -std=gnu99 -D_GNU_SOURCE=  -ldl  vdso_test_correctness.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness
> >>>>> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi    vdso_test_getrandom.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom
> >>>>> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium     vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha
> >>>>> $
> >>>>
> >>>> It is a clean tree (git clean -dfx), and I take there is no need to build a kernel
> >>>> prior hand.
> >>>
> >>> I meeant 'make clean'
> >>>
> >>>
> >>> Right, I have not built any x86 kernel at the moment.
> >>>
> >>> Just :
> >>> $ pwd
> >>> /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO
> >>>
> >>> $ make clean
> >>>
> >>> then
> >>>
> >>> $ make V=1
> >>
> >> The issue is Ubuntu linker is configure to use --as-needed by default, this
> >> patch fixes the issue:
> >>
> >> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
> >> index 10ffdda3f2fa..151baf650e4c 100644
> >> --- a/tools/testing/selftests/vDSO/Makefile
> >> +++ b/tools/testing/selftests/vDSO/Makefile
> >> @@ -45,4 +45,4 @@ $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \
> >>                                        -idirafter $(top_srcdir)/arch/$(ARCH)/include \
> >>                                        -idirafter $(top_srcdir)/include \
> >>                                        -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \
> >> -                                      -Wa,--noexecstack $(SODIUM)
> >> +                                      -Wa,--noexecstack -Wl,-no-as-needed $(SODIUM)
> >
> > Oh, it's an as-needed thing. In that case, does this fix it for you?
> >
> > diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
> > index 10ffdda3f2fa..834aa862ba2c 100644
> > --- a/tools/testing/selftests/vDSO/Makefile
> > +++ b/tools/testing/selftests/vDSO/Makefile
> > @@ -1,7 +1,8 @@
> >  # SPDX-License-Identifier: GPL-2.0
> >  uname_M := $(shell uname -m 2>/dev/null || echo not)
> >  ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
> > -SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
> > +SODIUM_LIBS := $(shell pkg-config --libs libsodium 2>/dev/null)
> > +SODIUM_CFLAGS := $(shell pkg-config --cflags libsodium 2>/dev/null)
> >
> >  TEST_GEN_PROGS := vdso_test_gettimeofday
> >  TEST_GEN_PROGS += vdso_test_getcpu
> > @@ -13,7 +14,7 @@ endif
> >  TEST_GEN_PROGS += vdso_test_correctness
> >  ifeq ($(uname_M),x86_64)
> >  TEST_GEN_PROGS += vdso_test_getrandom
> > -ifneq ($(SODIUM),)
> > +ifneq ($(SODIUM_LIBS),)
> >  TEST_GEN_PROGS += vdso_test_chacha
> >  endif
> >  endif
> > @@ -41,8 +42,9 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \
> >                                           -isystem $(top_srcdir)/include/uapi
> >
> >  $(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(ARCH)/vdso/vgetrandom-chacha.S
> > +$(OUTPUT)/vdso_test_chacha: LDLIBS += $(SODIUM_LIBS)
> >  $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \
> >                                        -idirafter $(top_srcdir)/arch/$(ARCH)/include \
> >                                        -idirafter $(top_srcdir)/include \
> >                                        -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \
> > -                                      -Wa,--noexecstack $(SODIUM)
> > +                                      -Wa,--noexecstack $(SODIUM_CFLAGS)
> >
>
> Nops, 'pkg-config --cflags libsodium' is empty. The -Wl,-no-as-needed is simpler
> I think.

The --cflags thing is for a different issue Ruoyao found. My intended
fix here was the LDLIBS += $(SODIUM_LIBS) part, which moves the
`-lsodium` closer to the end of the command line. But it still doesn't
work? Surprising...
Adhemerval Zanella Netto Aug. 27, 2024, 2:35 p.m. UTC | #20
On 27/08/24 11:32, Jason A. Donenfeld wrote:
> On Tue, Aug 27, 2024 at 4:30 PM Adhemerval Zanella Netto
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>>
>> On 27/08/24 11:28, Jason A. Donenfeld wrote:
>>> On Tue, Aug 27, 2024 at 11:14:27AM -0300, Adhemerval Zanella Netto wrote:
>>>>
>>>>
>>>> On 27/08/24 11:10, Christophe Leroy wrote:
>>>>>
>>>>>
>>>>> Le 27/08/2024 à 16:01, Adhemerval Zanella Netto a écrit :
>>>>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
>>>>>>
>>>>>> On 27/08/24 11:00, Christophe Leroy wrote:
>>>>>>>
>>>>>>>
>>>>>>> Le 27/08/2024 à 15:39, Adhemerval Zanella Netto a écrit :
>>>>>>>> [Vous ne recevez pas souvent de courriers de adhemerval.zanella@linaro.org. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
>>>>>>>>
>>>>>>>> On 27/08/24 10:34, Jason A. Donenfeld wrote:
>>>>>>>>> On Tue, Aug 27, 2024 at 10:17:18AM -0300, Adhemerval Zanella Netto wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> On 26/08/24 17:27, Jason A. Donenfeld wrote:
>>>>>>>>>>> Hi Adhemerval,
>>>>>>>>>>>
>>>>>>>>>>> Thanks for posting this! Exciting to have it here.
>>>>>>>>>>>
>>>>>>>>>>> Just some small nits for now:
>>>>>>>>>>>
>>>>>>>>>>> On Mon, Aug 26, 2024 at 06:10:40PM +0000, Adhemerval Zanella wrote:
>>>>>>>>>>>> +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
>>>>>>>>>>>> +{
>>>>>>>>>>>> +  register long int x8 asm ("x8") = __NR_getrandom;
>>>>>>>>>>>> +  register long int x0 asm ("x0") = (long int) buffer;
>>>>>>>>>>>> +  register long int x1 asm ("x1") = (long int) len;
>>>>>>>>>>>> +  register long int x2 asm ("x2") = (long int) flags;
>>>>>>>>>>>
>>>>>>>>>>> Usually it's written just as `long` or `unsigned long`, and likewise
>>>>>>>>>>> with the cast. Also, no space after the cast.
>>>>>>>>>>
>>>>>>>>>> Ack.
>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>> +#define __VDSO_RND_DATA_OFFSET  480
>>>>>>>>>>>
>>>>>>>>>>> This is the size of the data currently there?
>>>>>>>>>>
>>>>>>>>>> Yes, I used the same strategy x86 did.
>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>>    #include <asm/page.h>
>>>>>>>>>>>>    #include <asm/vdso.h>
>>>>>>>>>>>>    #include <asm-generic/vmlinux.lds.h>
>>>>>>>>>>>> +#include <vdso/datapage.h>
>>>>>>>>>>>> +#include <asm/vdso/vsyscall.h>
>>>>>>>>>>>
>>>>>>>>>>> Possible to keep the asm/ together?
>>>>>>>>>>
>>>>>>>>>> Ack.
>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>> + * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
>>>>>>>>>>>> + * number of blocks of output with nonnce 0, taking an input key and 8-bytes
>>>>>>>>>>>
>>>>>>>>>>> nonnce -> nonce
>>>>>>>>>>
>>>>>>>>>> Ack.
>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>> -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
>>>>>>>>>>>> +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
>>>>>>>>>>>>    SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>>>>>>>>>>>>
>>>>>>>>>>>>    TEST_GEN_PROGS := vdso_test_gettimeofday
>>>>>>>>>>>> @@ -11,7 +11,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
>>>>>>>>>>>>    TEST_GEN_PROGS += vdso_standalone_test_x86
>>>>>>>>>>>>    endif
>>>>>>>>>>>>    TEST_GEN_PROGS += vdso_test_correctness
>>>>>>>>>>>> -ifeq ($(uname_M),x86_64)
>>>>>>>>>>>> +ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
>>>>>>>>>>>>    TEST_GEN_PROGS += vdso_test_getrandom
>>>>>>>>>>>>    ifneq ($(SODIUM),)
>>>>>>>>>>>>    TEST_GEN_PROGS += vdso_test_chacha
>>>>>>>>>>>
>>>>>>>>>>> You'll need to add the symlink to get the chacha selftest running:
>>>>>>>>>>>
>>>>>>>>>>>     $ ln -s ../../../arch/arm64/kernel/vdso tools/arch/arm64/vdso
>>>>>>>>>>>     $ git add tools/arch/arm64/vdso
>>>>>>>>>>>
>>>>>>>>>>> Also, can you confirm that the chacha selftest runs and works?
>>>>>>>>>>
>>>>>>>>>> Yes, last time I has to built it manually since the Makefile machinery seem
>>>>>>>>>> to be broken even on x86_64.  In a Ubuntu vm I have:
>>>>>>>>>>
>>>>>>>>>> tools/testing/selftests/vDSO$ make
>>>>>>>>>>     CC       vdso_test_gettimeofday
>>>>>>>>>>     CC       vdso_test_getcpu
>>>>>>>>>>     CC       vdso_test_abi
>>>>>>>>>>     CC       vdso_test_clock_getres
>>>>>>>>>>     CC       vdso_standalone_test_x86
>>>>>>>>>>     CC       vdso_test_correctness
>>>>>>>>>>     CC       vdso_test_getrandom
>>>>>>>>>>     CC       vdso_test_chacha
>>>>>>>>>> In file included from /home/azanella/Projects/linux/linux-git/include/linux/limits.h:7,
>>>>>>>>>>                    from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38,
>>>>>>>>>>                    from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161,
>>>>>>>>>>                    from /usr/include/limits.h:195,
>>>>>>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:205,
>>>>>>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/syslimits.h:7,
>>>>>>>>>>                    from /usr/lib/gcc/x86_64-linux-gnu/13/include/limits.h:34,
>>>>>>>>>>                    from /usr/include/sodium/export.h:7,
>>>>>>>>>>                    from /usr/include/sodium/crypto_stream_chacha20.h:14,
>>>>>>>>>>                    from vdso_test_chacha.c:6:
>>>>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:99:6: error: missing binary operator before token "("
>>>>>>>>>>      99 | # if INT_MAX == 32767
>>>>>>>>>>         |      ^~~~~~~
>>>>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:102:7: error: missing binary operator before token "("
>>>>>>>>>>     102 | #  if INT_MAX == 2147483647
>>>>>>>>>>         |       ^~~~~~~
>>>>>>>>>> /usr/include/x86_64-linux-gnu/bits/xopen_lim.h:126:6: error: missing binary operator before token "("
>>>>>>>>>>     126 | # if LONG_MAX == 2147483647
>>>>>>>>>>         |      ^~~~~~~~
>>>>>>>>>> make: *** [../lib.mk:222: /home/azanella/Projects/linux/linux-git/tools/testing/selftests/vDSO/vdso_test_chacha] Error 1
>>>>>>>>>
>>>>>>>>> You get that even with the latest random.git? I thought Christophe's
>>>>>>>>> patch fixed that, but maybe not and I should just remove the dependency
>>>>>>>>> on the sodium header instead.
>>>>>>>>
>>>>>>>> On x86_64 I tested with Linux master.  With random.git it is a different issue:
>>>>>>>>
>>>>>>>> linux-git/tools/testing/selftests/vDSO$ make
>>>>>>>>     CC       vdso_test_gettimeofday
>>>>>>>>     CC       vdso_test_getcpu
>>>>>>>>     CC       vdso_test_abi
>>>>>>>>     CC       vdso_test_clock_getres
>>>>>>>>     CC       vdso_standalone_test_x86
>>>>>>>>     CC       vdso_test_correctness
>>>>>>>>     CC       vdso_test_getrandom
>>>>>>>>     CC       vdso_test_chacha
>>>>>>>> /usr/bin/ld: /tmp/ccKpjnSM.o: in function `main':
>>>>>>>> vdso_test_chacha.c:(.text+0x276): undefined reference to `crypto_stream_chacha20'
>>>>>>>> collect2: error: ld returned 1 exit status
>>>>>>>>
>>>>>>>> If I move -lsodium to the end of the compiler command it works.
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>> Try a "make clean" maybe ?
>>>>>>>
>>>>>>> I have Fedora 38 and no build problem with latest random tree:
>>>>>>>
>>>>>>> $ make V=1
>>>>>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_gettimeofday.c parse_vdso.c -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_gettimeofday
>>>>>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_getcpu.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getcpu
>>>>>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_abi.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_abi
>>>>>>> gcc -std=gnu99 -D_GNU_SOURCE=    vdso_test_clock_getres.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_clock_getres
>>>>>>> gcc -std=gnu99 -D_GNU_SOURCE= -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector    vdso_standalone_test_x86.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_standalone_test_x86
>>>>>>> gcc -std=gnu99 -D_GNU_SOURCE=  -ldl  vdso_test_correctness.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_correctness
>>>>>>> gcc -std=gnu99 -D_GNU_SOURCE= -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -isystem /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include/uapi    vdso_test_getrandom.c parse_vdso.c  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_getrandom
>>>>>>> gcc -std=gnu99 -D_GNU_SOURCE= -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../arch/x86/include -idirafter /home/chleroy/linux-powerpc/tools/testing/selftests/../../../include -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 -Wa,--noexecstack -lsodium     vdso_test_chacha.c /home/chleroy/linux-powerpc/tools/testing/selftests/../../../tools/arch/x86/vdso/vgetrandom-chacha.S  -o /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO/vdso_test_chacha
>>>>>>> $
>>>>>>
>>>>>> It is a clean tree (git clean -dfx), and I take there is no need to build a kernel
>>>>>> prior hand.
>>>>>
>>>>> I meeant 'make clean'
>>>>>
>>>>>
>>>>> Right, I have not built any x86 kernel at the moment.
>>>>>
>>>>> Just :
>>>>> $ pwd
>>>>> /home/chleroy/linux-powerpc/tools/testing/selftests/vDSO
>>>>>
>>>>> $ make clean
>>>>>
>>>>> then
>>>>>
>>>>> $ make V=1
>>>>
>>>> The issue is Ubuntu linker is configure to use --as-needed by default, this
>>>> patch fixes the issue:
>>>>
>>>> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
>>>> index 10ffdda3f2fa..151baf650e4c 100644
>>>> --- a/tools/testing/selftests/vDSO/Makefile
>>>> +++ b/tools/testing/selftests/vDSO/Makefile
>>>> @@ -45,4 +45,4 @@ $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \
>>>>                                        -idirafter $(top_srcdir)/arch/$(ARCH)/include \
>>>>                                        -idirafter $(top_srcdir)/include \
>>>>                                        -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \
>>>> -                                      -Wa,--noexecstack $(SODIUM)
>>>> +                                      -Wa,--noexecstack -Wl,-no-as-needed $(SODIUM)
>>>
>>> Oh, it's an as-needed thing. In that case, does this fix it for you?
>>>
>>> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
>>> index 10ffdda3f2fa..834aa862ba2c 100644
>>> --- a/tools/testing/selftests/vDSO/Makefile
>>> +++ b/tools/testing/selftests/vDSO/Makefile
>>> @@ -1,7 +1,8 @@
>>>  # SPDX-License-Identifier: GPL-2.0
>>>  uname_M := $(shell uname -m 2>/dev/null || echo not)
>>>  ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
>>> -SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
>>> +SODIUM_LIBS := $(shell pkg-config --libs libsodium 2>/dev/null)
>>> +SODIUM_CFLAGS := $(shell pkg-config --cflags libsodium 2>/dev/null)
>>>
>>>  TEST_GEN_PROGS := vdso_test_gettimeofday
>>>  TEST_GEN_PROGS += vdso_test_getcpu
>>> @@ -13,7 +14,7 @@ endif
>>>  TEST_GEN_PROGS += vdso_test_correctness
>>>  ifeq ($(uname_M),x86_64)
>>>  TEST_GEN_PROGS += vdso_test_getrandom
>>> -ifneq ($(SODIUM),)
>>> +ifneq ($(SODIUM_LIBS),)
>>>  TEST_GEN_PROGS += vdso_test_chacha
>>>  endif
>>>  endif
>>> @@ -41,8 +42,9 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \
>>>                                           -isystem $(top_srcdir)/include/uapi
>>>
>>>  $(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(ARCH)/vdso/vgetrandom-chacha.S
>>> +$(OUTPUT)/vdso_test_chacha: LDLIBS += $(SODIUM_LIBS)
>>>  $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \
>>>                                        -idirafter $(top_srcdir)/arch/$(ARCH)/include \
>>>                                        -idirafter $(top_srcdir)/include \
>>>                                        -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \
>>> -                                      -Wa,--noexecstack $(SODIUM)
>>> +                                      -Wa,--noexecstack $(SODIUM_CFLAGS)
>>>
>>
>> Nops, 'pkg-config --cflags libsodium' is empty. The -Wl,-no-as-needed is simpler
>> I think.
> 
> The --cflags thing is for a different issue Ruoyao found. My intended
> fix here was the LDLIBS += $(SODIUM_LIBS) part, which moves the
> `-lsodium` closer to the end of the command line. But it still doesn't
> work? Surprising...

Oops, it does work indeed (my mistake here).
Jason A. Donenfeld Aug. 27, 2024, 3:16 p.m. UTC | #21
On Tue, Aug 27, 2024 at 11:02 AM Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>
> On Tue, Aug 27, 2024 at 10:46:21AM +0200, Christophe Leroy wrote:
> > > +/**
> > > + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack.
> > > + * @dst_bytes:     Destination buffer to hold @nblocks * 64 bytes of output.
> > > + * @key:   32-byte input key.
> > > + * @counter:       8-byte counter, read on input and updated on return.
> > > + * @nblocks:       Number of blocks to generate.
> > > + *
> > > + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write
> > > + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data
> > > + * leaking into forked child processes.
> > > + */
> > > +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks);
> >
> > For Jason: We all redefine this prototype, should we have it in a
> > central place, or do you expect some architecture to provide some static
> > inline for it ?
>
> Given the doc comment and such, that would be nice. But I didn't see a
> straight forward way of doing that when I tried before. If you want to
> try and send another fixup commit, that'd be welcomed.

I'll give it a shot.
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b3fc891f1544..e3f4c5bf0661 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -237,6 +237,7 @@  config ARM64
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_GENERIC_VDSO
+	select VDSO_GETRANDOM
 	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..6e2b136813ca
--- /dev/null
+++ b/arch/arm64/include/asm/vdso/getrandom.h
@@ -0,0 +1,50 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <vdso/datapage.h>
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer:	Destination buffer to fill with random bytes.
+ * @len:	Size of @buffer in bytes.
+ * @flags:	Zero or more GRND_* flags.
+ * Returns:	The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
+{
+	register long int x8 asm ("x8") = __NR_getrandom;
+	register long int x0 asm ("x0") = (long int) buffer;
+	register long int x1 asm ("x1") = (long int) len;
+	register long int x2 asm ("x2") = (long int) flags;
+
+	asm ("svc 0" : "=r"(x0) : "r"(x8), "0"(x0), "r"(x1), "r"(x2));
+
+	return x0;
+}
+
+static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
+{
+	return &_vdso_rng_data;
+}
+
+/**
+ * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack.
+ * @dst_bytes:	Destination buffer to hold @nblocks * 64 bytes of output.
+ * @key:	32-byte input key.
+ * @counter:	8-byte counter, read on input and updated on return.
+ * @nblocks:	Number of blocks to generate.
+ *
+ * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write
+ * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data
+ * leaking into forked child processes.
+ */
+extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
index f94b1457c117..7ddb2bc3b57b 100644
--- a/arch/arm64/include/asm/vdso/vsyscall.h
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -2,6 +2,8 @@ 
 #ifndef __ASM_VDSO_VSYSCALL_H
 #define __ASM_VDSO_VSYSCALL_H
 
+#define __VDSO_RND_DATA_OFFSET  480
+
 #ifndef __ASSEMBLY__
 
 #include <linux/timekeeper_internal.h>
@@ -21,6 +23,13 @@  struct vdso_data *__arm64_get_k_vdso_data(void)
 }
 #define __arch_get_k_vdso_data __arm64_get_k_vdso_data
 
+static __always_inline
+struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void)
+{
+	return (void *)__arm64_get_k_vdso_data() + __VDSO_RND_DATA_OFFSET;
+}
+#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data
+
 static __always_inline
 void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
 {
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index d11da6461278..37dad3bb953a 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -9,7 +9,7 @@ 
 # Include the generic Makefile to check the built vdso.
 include $(srctree)/lib/vdso/Makefile
 
-obj-vdso := vgettimeofday.o note.o sigreturn.o
+obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o
 
 # Build rules
 targets := $(obj-vdso) vdso.so vdso.so.dbg
@@ -40,8 +40,13 @@  CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
 				$(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
 				$(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
 				-Wmissing-prototypes -Wmissing-declarations
+CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
+			     $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
+			     $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
+			     -Wmissing-prototypes -Wmissing-declarations
 
 CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
+CFLAGS_vgetrandom.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
 
 ifneq ($(c-gettimeofday-y),)
   CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index 45354f2ddf70..f8dbcece20e2 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -12,6 +12,8 @@ 
 #include <asm/page.h>
 #include <asm/vdso.h>
 #include <asm-generic/vmlinux.lds.h>
+#include <vdso/datapage.h>
+#include <asm/vdso/vsyscall.h>
 
 OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
 OUTPUT_ARCH(aarch64)
@@ -19,6 +21,7 @@  OUTPUT_ARCH(aarch64)
 SECTIONS
 {
 	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+	PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
 #ifdef CONFIG_TIME_NS
 	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
 #endif
@@ -102,6 +105,7 @@  VERSION
 		__kernel_gettimeofday;
 		__kernel_clock_gettime;
 		__kernel_clock_getres;
+		__kernel_getrandom;
 	local: *;
 	};
 }
diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
new file mode 100644
index 000000000000..3fb9715dd6f0
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S
@@ -0,0 +1,153 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/linkage.h>
+#include <asm/cache.h>
+
+	.text
+
+/*
+ * ARM64 ChaCha20 implementation meant for vDSO.  Produces a given positive
+ * number of blocks of output with nonnce 0, taking an input key and 8-bytes
+ * counter.  Importantly does not spill to the stack.
+ *
+ * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
+ *				       const uint8_t *key,
+ * 				       uint32_t *counter,
+ *				       size_t nblocks)
+ *
+ * 	x0: output bytes
+ *	x1: 32-byte key input
+ *	x2: 8-byte counter input/output
+ *	x3: number of 64-byte block to write to output
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+
+	/* v0 = "expand 32-byte k" */
+	adr_l		x8, CTES
+	ld1		{v5.4s}, [x8]
+	/* v1,v2 = key */
+	ld1		{ v6.4s, v7.4s }, [x1]
+	/* v3 = counter || zero noonce  */
+	ldr		d8, [x2]
+
+	adr_l		x8, ONE
+	ldr		q13, [x8]
+
+	adr_l		x10, ROT8
+	ld1		{v12.4s}, [x10]
+.Lblock:
+	/* copy state to auxiliary vectors for the final add after the permute.  */
+	mov		v0.16b, v5.16b
+	mov		v1.16b, v6.16b
+	mov		v2.16b, v7.16b
+	mov		v3.16b, v8.16b
+
+	mov		w4, 20
+.Lpermute:
+	/*
+	 * Permute one 64-byte block where the state matrix is stored in the four NEON
+	 * registers v0-v3.  It performs matrix operations on four words in parallel,
+	 * but requires shuffling to rearrange the words after each round.
+	 */
+
+.Ldoubleround:
+	/* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */
+	add		v0.4s, v0.4s, v1.4s
+	eor		v3.16b, v3.16b, v0.16b
+	rev32		v3.8h, v3.8h
+
+	/* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */
+	add		v2.4s, v2.4s, v3.4s
+	eor		v4.16b, v1.16b, v2.16b
+	shl		v1.4s, v4.4s, #12
+	sri		v1.4s, v4.4s, #20
+
+	/* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */
+	add		v0.4s, v0.4s, v1.4s
+	eor		v3.16b, v3.16b, v0.16b
+	tbl		v3.16b, {v3.16b}, v12.16b
+
+	/* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */
+	add		v2.4s, v2.4s, v3.4s
+	eor		v4.16b, v1.16b, v2.16b
+	shl		v1.4s, v4.4s, #7
+	sri		v1.4s, v4.4s, #25
+
+	/* x1 = shuffle32(x1, MASK(0, 3, 2, 1)) */
+	ext		v1.16b, v1.16b, v1.16b, #4
+	/* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */
+	ext		v2.16b, v2.16b, v2.16b, #8
+	/* x3 = shuffle32(x3, MASK(2, 1, 0, 3)) */
+	ext		v3.16b, v3.16b, v3.16b, #12
+
+	/* x0 += x1, x3 = rotl32(x3 ^ x0, 16) */
+	add		v0.4s, v0.4s, v1.4s
+	eor		v3.16b, v3.16b, v0.16b
+	rev32		v3.8h, v3.8h
+
+	/* x2 += x3, x1 = rotl32(x1 ^ x2, 12) */
+	add		v2.4s, v2.4s, v3.4s
+	eor		v4.16b, v1.16b, v2.16b
+	shl		v1.4s, v4.4s, #12
+	sri		v1.4s, v4.4s, #20
+
+	/* x0 += x1, x3 = rotl32(x3 ^ x0, 8) */
+	add		v0.4s, v0.4s, v1.4s
+	eor		v3.16b, v3.16b, v0.16b
+	tbl		v3.16b, {v3.16b}, v12.16b
+
+	/* x2 += x3, x1 = rotl32(x1 ^ x2, 7) */
+	add		v2.4s, v2.4s, v3.4s
+	eor		v4.16b, v1.16b, v2.16b
+	shl		v1.4s, v4.4s, #7
+	sri		v1.4s, v4.4s, #25
+
+	/* x1 = shuffle32(x1, MASK(2, 1, 0, 3)) */
+	ext		v1.16b, v1.16b, v1.16b, #12
+	/* x2 = shuffle32(x2, MASK(1, 0, 3, 2)) */
+	ext		v2.16b, v2.16b, v2.16b, #8
+	/* x3 = shuffle32(x3, MASK(0, 3, 2, 1)) */
+	ext		v3.16b, v3.16b, v3.16b, #4
+
+	subs		w4, w4, #2
+	b.ne		.Ldoubleround
+
+	/* output0 = state0 + v0 */
+	add		v0.4s, v0.4s, v5.4s
+	/* output1 = state1 + v1 */
+	add		v1.4s, v1.4s, v6.4s
+	/* output2 = state2 + v2 */
+	add		v2.4s, v2.4s, v7.4s
+	/* output2 = state3 + v3 */
+	add		v3.4s, v3.4s, v8.4s
+	st1		{ v0.4s - v3.4s }, [x0]
+
+	/* ++copy3.counter */
+	add		d8, d8, d13
+
+	/* output += 64, --nblocks */
+	add		x0, x0, 64
+	subs		x3, x3, #1
+	b.ne		.Lblock
+
+	/* counter = copy3.counter */
+	str		d8, [x2]
+
+	/* Zero out the potentially sensitive regs, in case nothing uses these again. */
+	eor		v0.16b, v0.16b, v0.16b
+	eor		v1.16b, v1.16b, v1.16b
+	eor		v2.16b, v2.16b, v2.16b
+	eor		v3.16b, v3.16b, v3.16b
+	eor		v6.16b, v6.16b, v6.16b
+	eor		v7.16b, v7.16b, v7.16b
+	ret
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
+
+        .section        ".rodata", "a", %progbits
+        .align          L1_CACHE_SHIFT
+
+CTES:	.word		1634760805, 857760878, 	2036477234, 1797285236
+ONE:    .xword		1, 0
+ROT8:	.word		0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
+
+emit_aarch64_feature_1_and
diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c
new file mode 100644
index 000000000000..b6d6f4db3a98
--- /dev/null
+++ b/arch/arm64/kernel/vdso/vgetrandom.c
@@ -0,0 +1,13 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/types.h>
+#include <linux/mm.h>
+
+#include "../../../../lib/vdso/getrandom.c"
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
+
+ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
+{
+	return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+}
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 10ffdda3f2fa..f07ea679a4cc 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -1,6 +1,6 @@ 
 # SPDX-License-Identifier: GPL-2.0
 uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/ -e s/aarch64.*/arm64/)
 SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
 
 TEST_GEN_PROGS := vdso_test_gettimeofday
@@ -11,7 +11,7 @@  ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
 TEST_GEN_PROGS += vdso_standalone_test_x86
 endif
 TEST_GEN_PROGS += vdso_test_correctness
-ifeq ($(uname_M),x86_64)
+ifeq ($(uname_M), $(filter x86_64 aarch64, $(uname_M)))
 TEST_GEN_PROGS += vdso_test_getrandom
 ifneq ($(SODIUM),)
 TEST_GEN_PROGS += vdso_test_chacha