diff mbox series

[v4,5/5] powerpc/vdso: Wire up getrandom() vDSO implementation on PPC64

Message ID 27de70dcc356e56754a03a2887a97597f5e840a4.1725278148.git.christophe.leroy@csgroup.eu (mailing list archive)
State New
Headers show
Series Wire up getrandom() vDSO implementation on powerpc | expand

Commit Message

Christophe Leroy Sept. 2, 2024, 12:04 p.m. UTC
Extend getrandom() vDSO implementation to powerpc64.

Tested on QEMU on both ppc64_defconfig and ppc64le_defconfig.

The results are not precise as it is QEMU on an x86 laptop, but
no need to be precise to see the benefit.

~ # ./vdso_test_getrandom bench-single
   vdso: 25000000 times in 4.977777162 seconds
   libc: 25000000 times in 75.516749981 seconds
syscall: 25000000 times in 86.842242014 seconds

~ # ./vdso_test_getrandom bench-single
   vdso: 25000000 times in 6.473814156 seconds
   libc: 25000000 times in 73.875109463 seconds
syscall: 25000000 times in 71.805066229 seconds

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v4:
- Use __BIG_ENDIAN__ which is defined by GCC instead of CONFIG_CPU_BIG_ENDIAN which is unknown by selftests
- Implement a cleaner/smaller output copy for little endian instead of keeping compat macro.

v3: New (split out of previous patch)
---
 arch/powerpc/Kconfig                         |   2 +-
 arch/powerpc/include/asm/mman.h              |   2 +-
 arch/powerpc/kernel/vdso/Makefile            |  11 +-
 arch/powerpc/kernel/vdso/getrandom.S         |  16 +--
 arch/powerpc/kernel/vdso/vgetrandom-chacha.S | 117 ++++++++++++++++++-
 arch/powerpc/kernel/vdso/vgetrandom.c        |   2 -
 6 files changed, 132 insertions(+), 18 deletions(-)

Comments

Jason A. Donenfeld Sept. 2, 2024, 12:41 p.m. UTC | #1
On Mon, Sep 02, 2024 at 02:04:42PM +0200, Christophe Leroy wrote:
>  SYM_FUNC_START(__arch_chacha20_blocks_nostack)
>  #ifdef __powerpc64__
> -	blr
> +	std	r5, -216(r1)
> +
> +	std	r14, -144(r1)
> +	std	r15, -136(r1)
> +	std	r16, -128(r1)
> +	std	r17, -120(r1)
> +	std	r18, -112(r1)
> +	std	r19, -104(r1)
> +	std	r20, -96(r1)
> +	std	r21, -88(r1)
> +	std	r22, -80(r1)
> +	std	r23, -72(r1)
> +	std	r24, -64(r1)
> +	std	r25, -56(r1)
> +	std	r26, -48(r1)
> +	std	r27, -40(r1)
> +	std	r28, -32(r1)
> +	std	r29, -24(r1)
> +	std	r30, -16(r1)
> +	std	r31, -8(r1)
>  #else
>  	stwu	r1, -96(r1)
>  	stw	r5, 20(r1)
> +#ifdef __BIG_ENDIAN__
>  	stmw	r14, 24(r1)
> +#else
> +	stw	r14, 24(r1)
> +	stw	r15, 28(r1)
> +	stw	r16, 32(r1)
> +	stw	r17, 36(r1)
> +	stw	r18, 40(r1)
> +	stw	r19, 44(r1)
> +	stw	r20, 48(r1)
> +	stw	r21, 52(r1)
> +	stw	r22, 56(r1)
> +	stw	r23, 60(r1)
> +	stw	r24, 64(r1)
> +	stw	r25, 68(r1)
> +	stw	r26, 72(r1)
> +	stw	r27, 76(r1)
> +	stw	r28, 80(r1)
> +	stw	r29, 84(r1)
> +	stw	r30, 88(r1)
> +	stw	r31, 92(r1)
> +#endif
> +#endif

This confuses me. Why are you adding code to the !__powerpc64__ branch
in this commit? (Also, why does stmw not work on LE?)
Christophe Leroy Sept. 2, 2024, 1:12 p.m. UTC | #2
Le 02/09/2024 à 14:41, Jason A. Donenfeld a écrit :
> On Mon, Sep 02, 2024 at 02:04:42PM +0200, Christophe Leroy wrote:
>>   SYM_FUNC_START(__arch_chacha20_blocks_nostack)
>>   #ifdef __powerpc64__
>> -	blr
>> +	std	r5, -216(r1)
>> +
>> +	std	r14, -144(r1)
>> +	std	r15, -136(r1)
>> +	std	r16, -128(r1)
>> +	std	r17, -120(r1)
>> +	std	r18, -112(r1)
>> +	std	r19, -104(r1)
>> +	std	r20, -96(r1)
>> +	std	r21, -88(r1)
>> +	std	r22, -80(r1)
>> +	std	r23, -72(r1)
>> +	std	r24, -64(r1)
>> +	std	r25, -56(r1)
>> +	std	r26, -48(r1)
>> +	std	r27, -40(r1)
>> +	std	r28, -32(r1)
>> +	std	r29, -24(r1)
>> +	std	r30, -16(r1)
>> +	std	r31, -8(r1)
>>   #else
>>   	stwu	r1, -96(r1)
>>   	stw	r5, 20(r1)
>> +#ifdef __BIG_ENDIAN__
>>   	stmw	r14, 24(r1)
>> +#else
>> +	stw	r14, 24(r1)
>> +	stw	r15, 28(r1)
>> +	stw	r16, 32(r1)
>> +	stw	r17, 36(r1)
>> +	stw	r18, 40(r1)
>> +	stw	r19, 44(r1)
>> +	stw	r20, 48(r1)
>> +	stw	r21, 52(r1)
>> +	stw	r22, 56(r1)
>> +	stw	r23, 60(r1)
>> +	stw	r24, 64(r1)
>> +	stw	r25, 68(r1)
>> +	stw	r26, 72(r1)
>> +	stw	r27, 76(r1)
>> +	stw	r28, 80(r1)
>> +	stw	r29, 84(r1)
>> +	stw	r30, 88(r1)
>> +	stw	r31, 92(r1)
>> +#endif
>> +#endif
> 
> This confuses me. Why are you adding code to the !__powerpc64__ branch
> in this commit? (Also, why does stmw not work on LE?)

That's for the VDSO32 ie running 32 bits binaries on a 64 bits kernel.

"Programming Environments Manual for 32-Bit Implementations of the 
PowerPC™ Architecture" say: In some implementations operating with 
little-endian byte order, execution of an lmw or stmw instruction
causes the system alignment error handler to be invoked

And GCC doesn't like it either:

tools/arch/powerpc/vdso/vgetrandom-chacha.S:84: Error: `stmw' invalid 
when little-endian
Jason A. Donenfeld Sept. 2, 2024, 2 p.m. UTC | #3
On Mon, Sep 02, 2024 at 03:12:47PM +0200, Christophe Leroy wrote:
> 
> 
> Le 02/09/2024 à 14:41, Jason A. Donenfeld a écrit :
> > On Mon, Sep 02, 2024 at 02:04:42PM +0200, Christophe Leroy wrote:
> >>   SYM_FUNC_START(__arch_chacha20_blocks_nostack)
> >>   #ifdef __powerpc64__
> >> -	blr
> >> +	std	r5, -216(r1)
> >> +
> >> +	std	r14, -144(r1)
> >> +	std	r15, -136(r1)
> >> +	std	r16, -128(r1)
> >> +	std	r17, -120(r1)
> >> +	std	r18, -112(r1)
> >> +	std	r19, -104(r1)
> >> +	std	r20, -96(r1)
> >> +	std	r21, -88(r1)
> >> +	std	r22, -80(r1)
> >> +	std	r23, -72(r1)
> >> +	std	r24, -64(r1)
> >> +	std	r25, -56(r1)
> >> +	std	r26, -48(r1)
> >> +	std	r27, -40(r1)
> >> +	std	r28, -32(r1)
> >> +	std	r29, -24(r1)
> >> +	std	r30, -16(r1)
> >> +	std	r31, -8(r1)
> >>   #else
> >>   	stwu	r1, -96(r1)
> >>   	stw	r5, 20(r1)
> >> +#ifdef __BIG_ENDIAN__
> >>   	stmw	r14, 24(r1)
> >> +#else
> >> +	stw	r14, 24(r1)
> >> +	stw	r15, 28(r1)
> >> +	stw	r16, 32(r1)
> >> +	stw	r17, 36(r1)
> >> +	stw	r18, 40(r1)
> >> +	stw	r19, 44(r1)
> >> +	stw	r20, 48(r1)
> >> +	stw	r21, 52(r1)
> >> +	stw	r22, 56(r1)
> >> +	stw	r23, 60(r1)
> >> +	stw	r24, 64(r1)
> >> +	stw	r25, 68(r1)
> >> +	stw	r26, 72(r1)
> >> +	stw	r27, 76(r1)
> >> +	stw	r28, 80(r1)
> >> +	stw	r29, 84(r1)
> >> +	stw	r30, 88(r1)
> >> +	stw	r31, 92(r1)
> >> +#endif
> >> +#endif
> > 
> > This confuses me. Why are you adding code to the !__powerpc64__ branch
> > in this commit? (Also, why does stmw not work on LE?)
> 
> That's for the VDSO32 ie running 32 bits binaries on a 64 bits kernel.
> 
> "Programming Environments Manual for 32-Bit Implementations of the 
> PowerPC™ Architecture" say: In some implementations operating with 
> little-endian byte order, execution of an lmw or stmw instruction
> causes the system alignment error handler to be invoked
> 
> And GCC doesn't like it either:
> 
> tools/arch/powerpc/vdso/vgetrandom-chacha.S:84: Error: `stmw' invalid 
> when little-endian

Does it make sense to do all the 32-bit stuff in the PPC32 commit (and
then you can introduce the selftests there without the error you
mentioned), and then add the 64-bit stuff in this commit?
Christophe Leroy Sept. 2, 2024, 2:16 p.m. UTC | #4
Le 02/09/2024 à 16:00, Jason A. Donenfeld a écrit :
> On Mon, Sep 02, 2024 at 03:12:47PM +0200, Christophe Leroy wrote:
>>
>>
>> Le 02/09/2024 à 14:41, Jason A. Donenfeld a écrit :
>>> On Mon, Sep 02, 2024 at 02:04:42PM +0200, Christophe Leroy wrote:
>>>>    SYM_FUNC_START(__arch_chacha20_blocks_nostack)
>>>>    #ifdef __powerpc64__
>>>> -	blr
>>>> +	std	r5, -216(r1)
>>>> +
>>>> +	std	r14, -144(r1)
>>>> +	std	r15, -136(r1)
>>>> +	std	r16, -128(r1)
>>>> +	std	r17, -120(r1)
>>>> +	std	r18, -112(r1)
>>>> +	std	r19, -104(r1)
>>>> +	std	r20, -96(r1)
>>>> +	std	r21, -88(r1)
>>>> +	std	r22, -80(r1)
>>>> +	std	r23, -72(r1)
>>>> +	std	r24, -64(r1)
>>>> +	std	r25, -56(r1)
>>>> +	std	r26, -48(r1)
>>>> +	std	r27, -40(r1)
>>>> +	std	r28, -32(r1)
>>>> +	std	r29, -24(r1)
>>>> +	std	r30, -16(r1)
>>>> +	std	r31, -8(r1)
>>>>    #else
>>>>    	stwu	r1, -96(r1)
>>>>    	stw	r5, 20(r1)
>>>> +#ifdef __BIG_ENDIAN__
>>>>    	stmw	r14, 24(r1)
>>>> +#else
>>>> +	stw	r14, 24(r1)
>>>> +	stw	r15, 28(r1)
>>>> +	stw	r16, 32(r1)
>>>> +	stw	r17, 36(r1)
>>>> +	stw	r18, 40(r1)
>>>> +	stw	r19, 44(r1)
>>>> +	stw	r20, 48(r1)
>>>> +	stw	r21, 52(r1)
>>>> +	stw	r22, 56(r1)
>>>> +	stw	r23, 60(r1)
>>>> +	stw	r24, 64(r1)
>>>> +	stw	r25, 68(r1)
>>>> +	stw	r26, 72(r1)
>>>> +	stw	r27, 76(r1)
>>>> +	stw	r28, 80(r1)
>>>> +	stw	r29, 84(r1)
>>>> +	stw	r30, 88(r1)
>>>> +	stw	r31, 92(r1)
>>>> +#endif
>>>> +#endif
>>>
>>> This confuses me. Why are you adding code to the !__powerpc64__ branch
>>> in this commit? (Also, why does stmw not work on LE?)
>>
>> That's for the VDSO32 ie running 32 bits binaries on a 64 bits kernel.
>>
>> "Programming Environments Manual for 32-Bit Implementations of the
>> PowerPC™ Architecture" say: In some implementations operating with
>> little-endian byte order, execution of an lmw or stmw instruction
>> causes the system alignment error handler to be invoked
>>
>> And GCC doesn't like it either:
>>
>> tools/arch/powerpc/vdso/vgetrandom-chacha.S:84: Error: `stmw' invalid
>> when little-endian
> 
> Does it make sense to do all the 32-bit stuff in the PPC32 commit (and
> then you can introduce the selftests there without the error you
> mentioned), and then add the 64-bit stuff in this commit?

Can do that, but there will still be a problem with chacha selftests if 
I don't opt-out the entire function content when it is ppc64. It will 
build properly but if someone runs it on a ppc64 it will likely crash 
because only the low 32 bits of registers will be saved.

That's the reason why I really prefered the approach where I set 
something in vdso_config.h so that the assembly is used only for 
powerpc32 and when building powerpc64 the assembly part is kept out and 
vdso_test_chacha simply tells it is not supported.

Christophe
Jason A. Donenfeld Sept. 2, 2024, 2:19 p.m. UTC | #5
On Mon, Sep 02, 2024 at 04:16:48PM +0200, Christophe Leroy wrote:
> Can do that, but there will still be a problem with chacha selftests if 
> I don't opt-out the entire function content when it is ppc64. It will 
> build properly but if someone runs it on a ppc64 it will likely crash 
> because only the low 32 bits of registers will be saved.

What if you don't wire up the selftests _at all_ until the ppc64 commit?
Then there'll be no risk.

(And I think I would prefer to see the 32-bit code all in the 32-bit
commit; that'd make it more straight forward to review too.)
Christophe Leroy Sept. 2, 2024, 2:27 p.m. UTC | #6
Hi Jason, hi Michael,

Le 02/09/2024 à 16:19, Jason A. Donenfeld a écrit :
> On Mon, Sep 02, 2024 at 04:16:48PM +0200, Christophe Leroy wrote:
>> Can do that, but there will still be a problem with chacha selftests if
>> I don't opt-out the entire function content when it is ppc64. It will
>> build properly but if someone runs it on a ppc64 it will likely crash
>> because only the low 32 bits of registers will be saved.
> 
> What if you don't wire up the selftests _at all_ until the ppc64 commit?
> Then there'll be no risk.
> 
> (And I think I would prefer to see the 32-bit code all in the 32-bit
> commit; that'd make it more straight forward to review too.)

I'd be fine with that but I'd like feedback from Michael on it: Is there 
a risk to only get PPC32 part merged as a first step or will both PPC32 
and PPC64 go together anyway ?

I would prefer not to delay PPC32 because someone doesn't feel confident 
with PPC64.

Christophe
Jason A. Donenfeld Sept. 2, 2024, 2:37 p.m. UTC | #7
On Mon, Sep 02, 2024 at 04:27:12PM +0200, Christophe Leroy wrote:
> Hi Jason, hi Michael,
> 
> Le 02/09/2024 à 16:19, Jason A. Donenfeld a écrit :
> > On Mon, Sep 02, 2024 at 04:16:48PM +0200, Christophe Leroy wrote:
> >> Can do that, but there will still be a problem with chacha selftests if
> >> I don't opt-out the entire function content when it is ppc64. It will
> >> build properly but if someone runs it on a ppc64 it will likely crash
> >> because only the low 32 bits of registers will be saved.
> > 
> > What if you don't wire up the selftests _at all_ until the ppc64 commit?
> > Then there'll be no risk.
> > 
> > (And I think I would prefer to see the 32-bit code all in the 32-bit
> > commit; that'd make it more straight forward to review too.)
> 
> I'd be fine with that but I'd like feedback from Michael on it: Is there 
> a risk to only get PPC32 part merged as a first step or will both PPC32 
> and PPC64 go together anyway ?
> 
> I would prefer not to delay PPC32 because someone doesn't feel confident 
> with PPC64.

I have no objection to applying these at the same time.
diff mbox series

Patch

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 54b270ef18b1..b45452ac4a73 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -311,7 +311,7 @@  config PPC
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
 	select TRACE_IRQFLAGS_SUPPORT
-	select VDSO_GETRANDOM			if PPC32
+	select VDSO_GETRANDOM
 	#
 	# Please keep this list sorted alphabetically.
 	#
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 17a77d47ed6d..42a51a993d94 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -6,7 +6,7 @@ 
 
 #include <uapi/asm/mman.h>
 
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) && !defined(BUILD_VDSO)
 
 #include <asm/cputable.h>
 #include <linux/mm.h>
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
index af3ba61b022e..56fb1633529a 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -9,7 +9,7 @@  obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o not
 obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o
 
 obj-vdso32 += getrandom-32.o vgetrandom-chacha-32.o
-obj-vdso64 += getrandom-64.o
+obj-vdso64 += getrandom-64.o vgetrandom-chacha-64.o
 
 ifneq ($(c-gettimeofday-y),)
   CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y)
@@ -22,6 +22,7 @@  endif
 
 ifneq ($(c-getrandom-y),)
   CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y)
+  CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) $(call cc-option, -ffixed-r30)
 endif
 
 # Build rules
@@ -35,10 +36,10 @@  endif
 targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o vgetrandom-32.o
 targets += crtsavres-32.o
 obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
-targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o
+targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o vgetrandom-64.o
 obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
 
-ccflags-y := -fno-common -fno-builtin
+ccflags-y := -fno-common -fno-builtin -DBUILD_VDSO
 ccflags-y += $(DISABLE_LATENT_ENTROPY_PLUGIN)
 ccflags-y += $(call cc-option, -fno-stack-protector)
 ccflags-y += -DDISABLE_BRANCH_PROFILING
@@ -72,7 +73,7 @@  CPPFLAGS_vdso64.lds += -P -C
 # link rule for the .so file, .lds has to be first
 $(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o $(obj)/vgetrandom-32.o $(obj)/crtsavres-32.o FORCE
 	$(call if_changed,vdso32ld_and_check)
-$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE
+$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o $(obj)/vgetrandom-64.o FORCE
 	$(call if_changed,vdso64ld_and_check)
 
 # assembly rules for the .S files
@@ -88,6 +89,8 @@  $(obj-vdso64): %-64.o: %.S FORCE
 	$(call if_changed_dep,vdso64as)
 $(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE
 	$(call if_changed_dep,cc_o_c)
+$(obj)/vgetrandom-64.o: %-64.o: %.c FORCE
+	$(call if_changed_dep,cc_o_c)
 
 # Generate VDSO offsets using helper script
 gen-vdso32sym := $(src)/gen_vdso32_offsets.sh
diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S
index 7db51c0635a5..a957cd2b2b03 100644
--- a/arch/powerpc/kernel/vdso/getrandom.S
+++ b/arch/powerpc/kernel/vdso/getrandom.S
@@ -5,8 +5,6 @@ 
  *
  * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France
  */
-#include <linux/errno.h>
-
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/vdso.h>
@@ -29,10 +27,18 @@ 
   .cfi_adjust_cfa_offset PPC_MIN_STKFRM
 	PPC_STL		r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
   .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF
+#ifdef __powerpc64__
+	PPC_STL		r2, PPC_MIN_STKFRM + STK_GOT(r1)
+  .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT
+#endif
 	get_datapage	r8
 	addi		r8, r8, VDSO_RNG_DATA_OFFSET
 	bl		CFUNC(DOTSYM(\funct))
 	PPC_LL		r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+#ifdef __powerpc64__
+	PPC_LL		r2, PPC_MIN_STKFRM + STK_GOT(r1)
+  .cfi_restore r2
+#endif
 	cmpwi		r3, 0
 	mtlr		r0
 	addi		r1, r1, 2 * PPC_MIN_STKFRM
@@ -48,11 +54,5 @@ 
 
 	.text
 V_FUNCTION_BEGIN(__kernel_getrandom)
-#ifdef CONFIG_PPC64
-	li	r3, ENOSYS
-	crset	so
-	blr
-#else
 	cvdso_call __c_kernel_getrandom
-#endif
 V_FUNCTION_END(__kernel_getrandom)
diff --git a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
index 17a2f586223a..6b334bcef017 100644
--- a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
+++ b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
@@ -80,14 +80,58 @@ 
  */
 SYM_FUNC_START(__arch_chacha20_blocks_nostack)
 #ifdef __powerpc64__
-	blr
+	std	r5, -216(r1)
+
+	std	r14, -144(r1)
+	std	r15, -136(r1)
+	std	r16, -128(r1)
+	std	r17, -120(r1)
+	std	r18, -112(r1)
+	std	r19, -104(r1)
+	std	r20, -96(r1)
+	std	r21, -88(r1)
+	std	r22, -80(r1)
+	std	r23, -72(r1)
+	std	r24, -64(r1)
+	std	r25, -56(r1)
+	std	r26, -48(r1)
+	std	r27, -40(r1)
+	std	r28, -32(r1)
+	std	r29, -24(r1)
+	std	r30, -16(r1)
+	std	r31, -8(r1)
 #else
 	stwu	r1, -96(r1)
 	stw	r5, 20(r1)
+#ifdef __BIG_ENDIAN__
 	stmw	r14, 24(r1)
+#else
+	stw	r14, 24(r1)
+	stw	r15, 28(r1)
+	stw	r16, 32(r1)
+	stw	r17, 36(r1)
+	stw	r18, 40(r1)
+	stw	r19, 44(r1)
+	stw	r20, 48(r1)
+	stw	r21, 52(r1)
+	stw	r22, 56(r1)
+	stw	r23, 60(r1)
+	stw	r24, 64(r1)
+	stw	r25, 68(r1)
+	stw	r26, 72(r1)
+	stw	r27, 76(r1)
+	stw	r28, 80(r1)
+	stw	r29, 84(r1)
+	stw	r30, 88(r1)
+	stw	r31, 92(r1)
+#endif
+#endif
 
 	lwz	r14, 0(r5)
 	lwz	r15, 4(r5)
+#ifdef __powerpc64__
+	rldimi	r14, r15, 32, 0
+#endif
 	mr	r0, r6
 	subi	r3, r3, 4
 
@@ -156,6 +200,7 @@  SYM_FUNC_START(__arch_chacha20_blocks_nostack)
 	add	r28, r28, r14
 	add	r29, r29, r15
 
+#ifdef __BIG_ENDIAN__
 	stwbrx	r16, r4, r3
 	addi	r3, r3, 8
 	stwbrx	r17, 0, r3
@@ -180,15 +225,42 @@  SYM_FUNC_START(__arch_chacha20_blocks_nostack)
 	stwbrx	r30, r4, r3
 	addi	r3, r3, 8
 	stwbrx	r31, 0, r3
+#else
+	stw	r16, 4(r3)
+	stw	r17, 8(r3)
+	stw	r18, 12(r3)
+	stw	r19, 16(r3)
+	stw	r20, 20(r3)
+	stw	r21, 24(r3)
+	stw	r22, 28(r3)
+	stw	r23, 32(r3)
+	stw	r24, 36(r3)
+	stw	r25, 40(r3)
+	stw	r26, 44(r3)
+	stw	r27, 48(r3)
+	stw	r28, 52(r3)
+	stw	r29, 56(r3)
+	stw	r30, 60(r3)
+	stwu	r31, 64(r3)
+#endif
 
 	subic.	r0, r0, 1	/* subi. can't use r0 as source */
 
+#ifdef __powerpc64__
+	addi	r14, r14, 1
+	srdi	r15, r14, 32
+#else
 	addic	r14, r14, 1
 	addze	r15, r15
+#endif
 
 	bne	.Lblock
 
+#ifdef __powerpc64__
+	ld	r5, -216(r1)
+#else
 	lwz	r5, 20(r1)
+#endif
 	stw	r14, 0(r5)
 	stw	r15, 4(r5)
 
@@ -200,8 +272,49 @@  SYM_FUNC_START(__arch_chacha20_blocks_nostack)
 	li	r11, 0
 	li	r12, 0
 
+#ifdef __powerpc64__
+	ld	r14, -144(r1)
+	ld	r15, -136(r1)
+	ld	r16, -128(r1)
+	ld	r17, -120(r1)
+	ld	r18, -112(r1)
+	ld	r19, -104(r1)
+	ld	r20, -96(r1)
+	ld	r21, -88(r1)
+	ld	r22, -80(r1)
+	ld	r23, -72(r1)
+	ld	r24, -64(r1)
+	ld	r25, -56(r1)
+	ld	r26, -48(r1)
+	ld	r27, -40(r1)
+	ld	r28, -32(r1)
+	ld	r29, -24(r1)
+	ld	r30, -16(r1)
+	ld	r31, -8(r1)
+#else
+#ifdef __BIG_ENDIAN__
 	lmw	r14, 24(r1)
+#else
+	lwz	r14, 24(r1)
+	lwz	r15, 28(r1)
+	lwz	r16, 32(r1)
+	lwz	r17, 36(r1)
+	lwz	r18, 40(r1)
+	lwz	r19, 44(r1)
+	lwz	r20, 48(r1)
+	lwz	r21, 52(r1)
+	lwz	r22, 56(r1)
+	lwz	r23, 60(r1)
+	lwz	r24, 64(r1)
+	lwz	r25, 68(r1)
+	lwz	r26, 72(r1)
+	lwz	r27, 76(r1)
+	lwz	r28, 80(r1)
+	lwz	r29, 84(r1)
+	lwz	r30, 88(r1)
+	lwz	r31, 92(r1)
+#endif
 	addi	r1, r1, 96
-	blr
 #endif
+	blr
 SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/powerpc/kernel/vdso/vgetrandom.c b/arch/powerpc/kernel/vdso/vgetrandom.c
index 923330845b2c..5f855d45fb7b 100644
--- a/arch/powerpc/kernel/vdso/vgetrandom.c
+++ b/arch/powerpc/kernel/vdso/vgetrandom.c
@@ -7,10 +7,8 @@ 
 #include <linux/time.h>
 #include <linux/types.h>
 
-#ifndef CONFIG_PPC64
 ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state,
 			     size_t opaque_len, const struct vdso_rng_data *vd)
 {
 	return __cvdso_getrandom_data(vd, buffer, len, flags, opaque_state, opaque_len);
 }
-#endif