diff mbox series

crypto/x86: Use CRC32 mnemonic in crc32c-intel_glue.c

Message ID 20200805103932.255524-1-ubizjak@gmail.com (mailing list archive)
State Superseded
Delegated to: Herbert Xu
Headers show
Series crypto/x86: Use CRC32 mnemonic in crc32c-intel_glue.c | expand

Commit Message

Uros Bizjak Aug. 5, 2020, 10:39 a.m. UTC
Current minimum required version of binutils is 2.23,
which supports CRC32 instruction mnemonic.

Replace the byte-wise specification of CRC32 with this proper mnemonic.
The compiler is now able to pass memory operand to the instruction,
so there is no need for a temporary register anymore.

Some examples of the improvement:

 12a:	48 8b 08             	mov    (%rax),%rcx
 12d:	f2 48 0f 38 f1 f1    	crc32q %rcx,%rsi
 133:	48 83 c0 08          	add    $0x8,%rax
 137:	48 39 d0             	cmp    %rdx,%rax
 13a:	75 ee                	jne    12a <crc32c_intel_update+0x1a>

to:

 125:	f2 48 0f 38 f1 06    	crc32q (%rsi),%rax
 12b:	48 83 c6 08          	add    $0x8,%rsi
 12f:	48 39 d6             	cmp    %rdx,%rsi
 132:	75 f1                	jne    125 <crc32c_intel_update+0x15>

and:

 146:	0f b6 08             	movzbl (%rax),%ecx
 149:	f2 0f 38 f0 f1       	crc32b %cl,%esi
 14e:	48 83 c0 01          	add    $0x1,%rax
 152:	48 39 d0             	cmp    %rdx,%rax
 155:	75 ef                	jne    146 <crc32c_intel_update+0x36>

to:

 13b:	f2 0f 38 f0 02       	crc32b (%rdx),%eax
 140:	48 83 c2 01          	add    $0x1,%rdx
 144:	48 39 ca             	cmp    %rcx,%rdx
 147:	75 f2                	jne    13b <crc32c_intel_update+0x2b>

As the compiler has some more freedom w.r.t. register allocation,
there is also a couple of reg-reg moves removed.

There are no hidden states for CRC32 insn, so there is no need to mark
assembly as volatile.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
---
 arch/x86/crypto/crc32c-intel_glue.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index d2d069bd459b..3a34b2351559 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -27,12 +27,6 @@ 
 
 #define SCALE_F	sizeof(unsigned long)
 
-#ifdef CONFIG_X86_64
-#define REX_PRE "0x48, "
-#else
-#define REX_PRE
-#endif
-
 #ifdef CONFIG_X86_64
 /*
  * use carryless multiply version of crc32c when buffer
@@ -48,11 +42,8 @@  asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
 static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
 {
 	while (length--) {
-		__asm__ __volatile__(
-			".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
-			:"=S"(crc)
-			:"0"(crc), "c"(*data)
-		);
+		asm("crc32b %1, %0"
+		    : "+r" (crc) : "rm" (*data));
 		data++;
 	}
 
@@ -66,11 +57,12 @@  static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len
 	unsigned long *ptmp = (unsigned long *)p;
 
 	while (iquotient--) {
-		__asm__ __volatile__(
-			".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
-			:"=S"(crc)
-			:"0"(crc), "c"(*ptmp)
-		);
+#ifdef CONFIG_X86_64
+		asm("crc32q %1, %q0"
+#else
+		asm("crc32l %1, %0"
+#endif
+		    : "+r" (crc) : "rm" (*ptmp));
 		ptmp++;
 	}