diff mbox series

crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl

Message ID 20200827173831.95039-1-ubizjak@gmail.com (mailing list archive)
State Accepted
Delegated to: Herbert Xu
Headers show
Series crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl | expand

Commit Message

Uros Bizjak Aug. 27, 2020, 5:38 p.m. UTC
x86_64 zero extends 32bit operations, so for 64bit operands,
XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
a REX prefix byte when legacy registers are used.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
---
 arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

Comments

Jason A. Donenfeld Sept. 1, 2020, 7:16 p.m. UTC | #1
Hi Uros,

Any benchmarks for this? Seems like it's all in initialization code,
right? I'm CC'ing Andy into this.

Jason

On Thu, Aug 27, 2020 at 07:38:31PM +0200, Uros Bizjak wrote:
> x86_64 zero extends 32bit operations, so for 64bit operands,
> XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
> a REX prefix byte when legacy registers are used.
> 
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: "David S. Miller" <davem@davemloft.net>
> ---
>  arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> index 137edcf038cb..7d568012cc15 100644
> --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> @@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
>  ___
>  &declare_function("poly1305_init_x86_64", 32, 3);
>  $code.=<<___;
> -	xor	%rax,%rax
> +	xor	%eax,%eax
>  	mov	%rax,0($ctx)		# initialize hash value
>  	mov	%rax,8($ctx)
>  	mov	%rax,16($ctx)
> @@ -2853,7 +2853,7 @@ $code.=<<___;
>  .type	poly1305_init_base2_44,\@function,3
>  .align	32
>  poly1305_init_base2_44:
> -	xor	%rax,%rax
> +	xor	%eax,%eax
>  	mov	%rax,0($ctx)		# initialize hash value
>  	mov	%rax,8($ctx)
>  	mov	%rax,16($ctx)
> @@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
>  	mov	\$16,$len
>  	sub	%r10,$len
>  	xor	%eax,%eax
> -	xor	%r11,%r11
> +	xor	%r11d,%r11d
>  .Loop_dec_byte:
>  	mov	($inp,$otp),%r11b
>  	mov	($otp),%al
> @@ -4085,7 +4085,7 @@ avx_handler:
>  	.long	0xa548f3fc		# cld; rep movsq
>  
>  	mov	$disp,%rsi
> -	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
> +	xor	%ecx,%ecx		# arg1, UNW_FLAG_NHANDLER
>  	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
>  	mov	0(%rsi),%r8		# arg3, disp->ControlPc
>  	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
> -- 
> 2.26.2
>
Uros Bizjak Sept. 2, 2020, 5:52 a.m. UTC | #2
On Tue, Sep 1, 2020 at 9:16 PM Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>
> Hi Uros,
>
> Any benchmarks for this? Seems like it's all in initialization code,
> right? I'm CC'ing Andy into this.

This patch should have no performance effect, it saves REX prefix byte
when the optimization is applied to legacy registers.

Uros.

> Jason
>
> On Thu, Aug 27, 2020 at 07:38:31PM +0200, Uros Bizjak wrote:
> > x86_64 zero extends 32bit operations, so for 64bit operands,
> > XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
> > a REX prefix byte when legacy registers are used.
> >
> > Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> > Cc: Herbert Xu <herbert@gondor.apana.org.au>
> > Cc: "David S. Miller" <davem@davemloft.net>
> > ---
> >  arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> > index 137edcf038cb..7d568012cc15 100644
> > --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> > +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> > @@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
> >  ___
> >  &declare_function("poly1305_init_x86_64", 32, 3);
> >  $code.=<<___;
> > -     xor     %rax,%rax
> > +     xor     %eax,%eax
> >       mov     %rax,0($ctx)            # initialize hash value
> >       mov     %rax,8($ctx)
> >       mov     %rax,16($ctx)
> > @@ -2853,7 +2853,7 @@ $code.=<<___;
> >  .type        poly1305_init_base2_44,\@function,3
> >  .align       32
> >  poly1305_init_base2_44:
> > -     xor     %rax,%rax
> > +     xor     %eax,%eax
> >       mov     %rax,0($ctx)            # initialize hash value
> >       mov     %rax,8($ctx)
> >       mov     %rax,16($ctx)
> > @@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
> >       mov     \$16,$len
> >       sub     %r10,$len
> >       xor     %eax,%eax
> > -     xor     %r11,%r11
> > +     xor     %r11d,%r11d
> >  .Loop_dec_byte:
> >       mov     ($inp,$otp),%r11b
> >       mov     ($otp),%al
> > @@ -4085,7 +4085,7 @@ avx_handler:
> >       .long   0xa548f3fc              # cld; rep movsq
> >
> >       mov     $disp,%rsi
> > -     xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
> > +     xor     %ecx,%ecx               # arg1, UNW_FLAG_NHANDLER
> >       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
> >       mov     0(%rsi),%r8             # arg3, disp->ControlPc
> >       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
> > --
> > 2.26.2
> >
>
> --
> Jason A. Donenfeld
> Deep Space Explorer
> fr: +33 6 51 90 82 66
> us: +1 513 476 1200
> www.jasondonenfeld.com
> www.zx2c4.com
> zx2c4.com/keys/AB9942E6D4A4CFC3412620A749FC7012A5DE03AE.asc
Jason A. Donenfeld Sept. 7, 2020, 1:16 p.m. UTC | #3
Hi Uros, Herbert,

On Thu, Aug 27, 2020 at 07:38:31PM +0200, Uros Bizjak wrote:
> x86_64 zero extends 32bit operations, so for 64bit operands,
> XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
> a REX prefix byte when legacy registers are used.
> 
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: "David S. Miller" <davem@davemloft.net>
> ---
>  arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> index 137edcf038cb..7d568012cc15 100644
> --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> @@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
>  ___
>  &declare_function("poly1305_init_x86_64", 32, 3);
>  $code.=<<___;
> -	xor	%rax,%rax
> +	xor	%eax,%eax
>  	mov	%rax,0($ctx)		# initialize hash value
>  	mov	%rax,8($ctx)
>  	mov	%rax,16($ctx)
> @@ -2853,7 +2853,7 @@ $code.=<<___;
>  .type	poly1305_init_base2_44,\@function,3
>  .align	32
>  poly1305_init_base2_44:
> -	xor	%rax,%rax
> +	xor	%eax,%eax
>  	mov	%rax,0($ctx)		# initialize hash value
>  	mov	%rax,8($ctx)
>  	mov	%rax,16($ctx)
> @@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
>  	mov	\$16,$len
>  	sub	%r10,$len
>  	xor	%eax,%eax
> -	xor	%r11,%r11
> +	xor	%r11d,%r11d
>  .Loop_dec_byte:
>  	mov	($inp,$otp),%r11b
>  	mov	($otp),%al
> @@ -4085,7 +4085,7 @@ avx_handler:
>  	.long	0xa548f3fc		# cld; rep movsq
>  
>  	mov	$disp,%rsi
> -	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
> +	xor	%ecx,%ecx		# arg1, UNW_FLAG_NHANDLER
>  	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
>  	mov	0(%rsi),%r8		# arg3, disp->ControlPc
>  	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
> -- 
> 2.26.2
> 

Per the discussion elsewhere,

Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>

for cryptodev-2.6.git, rather than crypto-2.6.git

Thanks,
Jason
Herbert Xu Sept. 11, 2020, 6:56 a.m. UTC | #4
On Thu, Aug 27, 2020 at 07:38:31PM +0200, Uros Bizjak wrote:
> x86_64 zero extends 32bit operations, so for 64bit operands,
> XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
> a REX prefix byte when legacy registers are used.
> 
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: "David S. Miller" <davem@davemloft.net>
> ---
>  arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)

Patch applied.  Thanks.
diff mbox series

Patch

diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
index 137edcf038cb..7d568012cc15 100644
--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
@@ -246,7 +246,7 @@  $code.=<<___ if (!$kernel);
 ___
 &declare_function("poly1305_init_x86_64", 32, 3);
 $code.=<<___;
-	xor	%rax,%rax
+	xor	%eax,%eax
 	mov	%rax,0($ctx)		# initialize hash value
 	mov	%rax,8($ctx)
 	mov	%rax,16($ctx)
@@ -2853,7 +2853,7 @@  $code.=<<___;
 .type	poly1305_init_base2_44,\@function,3
 .align	32
 poly1305_init_base2_44:
-	xor	%rax,%rax
+	xor	%eax,%eax
 	mov	%rax,0($ctx)		# initialize hash value
 	mov	%rax,8($ctx)
 	mov	%rax,16($ctx)
@@ -3947,7 +3947,7 @@  xor128_decrypt_n_pad:
 	mov	\$16,$len
 	sub	%r10,$len
 	xor	%eax,%eax
-	xor	%r11,%r11
+	xor	%r11d,%r11d
 .Loop_dec_byte:
 	mov	($inp,$otp),%r11b
 	mov	($otp),%al
@@ -4085,7 +4085,7 @@  avx_handler:
 	.long	0xa548f3fc		# cld; rep movsq
 
 	mov	$disp,%rsi
-	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
+	xor	%ecx,%ecx		# arg1, UNW_FLAG_NHANDLER
 	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
 	mov	0(%rsi),%r8		# arg3, disp->ControlPc
 	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry