diff mbox series

[v4,2/3] arm64/crc32: Reorganize bit/byte ordering macros

Message ID 20241018075347.2821102-7-ardb+git@google.com (mailing list archive)
State New
Headers show
Series arm64: Speed up CRC-32 using PMULL instructions | expand

Commit Message

Ard Biesheuvel Oct. 18, 2024, 7:53 a.m. UTC
From: Ard Biesheuvel <ardb@kernel.org>

In preparation for a new user, reorganize the bit/byte ordering macros
that are used to parameterize the crc32 template code and instantiate
CRC-32, CRC-32c and 'big endian' CRC-32.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/lib/crc32.S | 91 +++++++++-----------
 1 file changed, 39 insertions(+), 52 deletions(-)
diff mbox series

Patch

diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S
index 22139691c7ae..f9920492f135 100644
--- a/arch/arm64/lib/crc32.S
+++ b/arch/arm64/lib/crc32.S
@@ -10,44 +10,48 @@ 
 
 	.arch		armv8-a+crc
 
-	.macro		byteorder, reg, be
-	.if		\be
-CPU_LE( rev		\reg, \reg	)
-	.else
-CPU_BE( rev		\reg, \reg	)
-	.endif
+	.macro		bitle, reg
 	.endm
 
-	.macro		byteorder16, reg, be
-	.if		\be
-CPU_LE( rev16		\reg, \reg	)
-	.else
-CPU_BE( rev16		\reg, \reg	)
-	.endif
+	.macro		bitbe, reg
+	rbit		\reg, \reg
 	.endm
 
-	.macro		bitorder, reg, be
-	.if		\be
-	rbit		\reg, \reg
-	.endif
+	.macro		bytele, reg
 	.endm
 
-	.macro		bitorder16, reg, be
-	.if		\be
+	.macro		bytebe, reg
 	rbit		\reg, \reg
-	lsr		\reg, \reg, #16
-	.endif
+	lsr		\reg, \reg, #24
+	.endm
+
+	.macro		hwordle, reg
+CPU_BE(	rev16		\reg, \reg	)
 	.endm
 
-	.macro		bitorder8, reg, be
-	.if		\be
+	.macro		hwordbe, reg
+CPU_LE(	rev		\reg, \reg	)
 	rbit		\reg, \reg
-	lsr		\reg, \reg, #24
-	.endif
+CPU_BE(	lsr		\reg, \reg, #16	)
+	.endm
+
+	.macro		le, regs:vararg
+	.irp		r, \regs
+CPU_BE(	rev		\r, \r		)
+	.endr
+	.endm
+
+	.macro		be, regs:vararg
+	.irp		r, \regs
+CPU_LE(	rev		\r, \r		)
+	.endr
+	.irp		r, \regs
+	rbit		\r, \r
+	.endr
 	.endm
 
-	.macro		__crc32, c, be=0
-	bitorder	w0, \be
+	.macro		__crc32, c, order=le
+	bit\order	w0
 	cmp		x2, #16
 	b.lt		8f			// less than 16 bytes
 
@@ -60,14 +64,7 @@  CPU_BE( rev16		\reg, \reg	)
 	add		x8, x8, x1
 	add		x1, x1, x7
 	ldp		x5, x6, [x8]
-	byteorder	x3, \be
-	byteorder	x4, \be
-	byteorder	x5, \be
-	byteorder	x6, \be
-	bitorder	x3, \be
-	bitorder	x4, \be
-	bitorder	x5, \be
-	bitorder	x6, \be
+	\order		x3, x4, x5, x6
 
 	tst		x7, #8
 	crc32\c\()x	w8, w0, x3
@@ -95,42 +92,32 @@  CPU_BE( rev16		\reg, \reg	)
 32:	ldp		x3, x4, [x1], #32
 	sub		x2, x2, #32
 	ldp		x5, x6, [x1, #-16]
-	byteorder	x3, \be
-	byteorder	x4, \be
-	byteorder	x5, \be
-	byteorder	x6, \be
-	bitorder	x3, \be
-	bitorder	x4, \be
-	bitorder	x5, \be
-	bitorder	x6, \be
+	\order		x3, x4, x5, x6
 	crc32\c\()x	w0, w0, x3
 	crc32\c\()x	w0, w0, x4
 	crc32\c\()x	w0, w0, x5
 	crc32\c\()x	w0, w0, x6
 	cbnz		x2, 32b
-0:	bitorder	w0, \be
+0:	bit\order	w0
 	ret
 
 8:	tbz		x2, #3, 4f
 	ldr		x3, [x1], #8
-	byteorder	x3, \be
-	bitorder	x3, \be
+	\order		x3
 	crc32\c\()x	w0, w0, x3
 4:	tbz		x2, #2, 2f
 	ldr		w3, [x1], #4
-	byteorder	w3, \be
-	bitorder	w3, \be
+	\order		w3
 	crc32\c\()w	w0, w0, w3
 2:	tbz		x2, #1, 1f
 	ldrh		w3, [x1], #2
-	byteorder16	w3, \be
-	bitorder16	w3, \be
+	hword\order	w3
 	crc32\c\()h	w0, w0, w3
 1:	tbz		x2, #0, 0f
 	ldrb		w3, [x1]
-	bitorder8	w3, \be
+	byte\order	w3
 	crc32\c\()b	w0, w0, w3
-0:	bitorder	w0, \be
+0:	bit\order	w0
 	ret
 	.endm
 
@@ -146,5 +133,5 @@  SYM_FUNC_END(crc32c_le_arm64)
 
 	.align		5
 SYM_FUNC_START(crc32_be_arm64)
-	__crc32		be=1
+	__crc32		order=be
 SYM_FUNC_END(crc32_be_arm64)