diff mbox series

[RFC,01/12] crypto: Remove u128 usage

Message ID 20221219154118.889543494@infradead.org (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() | expand

Commit Message

Peter Zijlstra Dec. 19, 2022, 3:35 p.m. UTC
As seems to be the common (majority) usage in crypto, use __uint128_t
instead of u128.

This frees up u128 for definition in linux/types.h.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 lib/crypto/curve25519-hacl64.c |  142 ++++++++++++++++++++---------------------
 lib/crypto/poly1305-donna64.c  |   22 ++----
 2 files changed, 80 insertions(+), 84 deletions(-)

Comments

Jason A. Donenfeld Dec. 19, 2022, 3:56 p.m. UTC | #1
On Mon, Dec 19, 2022 at 04:35:26PM +0100, Peter Zijlstra wrote:
> As seems to be the common (majority) usage in crypto, use __uint128_t
> instead of u128.
> 
> This frees up u128 for definition in linux/types.h.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  lib/crypto/curve25519-hacl64.c |  142 ++++++++++++++++++++---------------------
>  lib/crypto/poly1305-donna64.c  |   22 ++----
>  2 files changed, 80 insertions(+), 84 deletions(-)
> 
> --- a/lib/crypto/curve25519-hacl64.c
> +++ b/lib/crypto/curve25519-hacl64.c
> @@ -14,8 +14,6 @@
>  #include <crypto/curve25519.h>
>  #include <linux/string.h>
>  
> -typedef __uint128_t u128;
> -
>  static __always_inline u64 u64_eq_mask(u64 a, u64 b)
>  {
>  	u64 x = a ^ b;
> @@ -50,77 +48,77 @@ static __always_inline void modulo_carry
>  	b[0] = b0_;
>  }
>  
> -static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input)
> +static __always_inline void fproduct_copy_from_wide_(u64 *output, __uint128_t *input)
>  {
>  	{
> -		u128 xi = input[0];
> +		__uint128_t xi = input[0];

Why not just use `u128` from types.h in this file?

Jason
Peter Zijlstra Dec. 19, 2022, 5 p.m. UTC | #2
On Mon, Dec 19, 2022 at 04:56:33PM +0100, Jason A. Donenfeld wrote:

> Why not just use `u128` from types.h in this file?

Ordering, I can't very well introduce it in types.h while other
definitions exist in the tree. So I first have to clean up the u128
namespace.
Jason A. Donenfeld Dec. 19, 2022, 5:03 p.m. UTC | #3
On Mon, Dec 19, 2022 at 6:01 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, Dec 19, 2022 at 04:56:33PM +0100, Jason A. Donenfeld wrote:
>
> > Why not just use `u128` from types.h in this file?
>
> Ordering, I can't very well introduce it in types.h while other
> definitions exist in the tree. So I first have to clean up the u128
> namespace.

Is there a patch at the end of the series that adds it back in to use u128?
Herbert Xu Dec. 20, 2022, 3:50 a.m. UTC | #4
On Mon, Dec 19, 2022 at 06:03:04PM +0100, Jason A. Donenfeld wrote:
>
> Is there a patch at the end of the series that adds it back in to use u128?

Could we do some ifdef trickery to reduce the amount of code churn
please? Changing everything away from u128 and then back to it seems
silly.

Thanks,
H. Peter Anvin Dec. 20, 2022, 4:11 a.m. UTC | #5
On December 19, 2022 7:50:47 PM PST, Herbert Xu <herbert@gondor.apana.org.au> wrote:
>On Mon, Dec 19, 2022 at 06:03:04PM +0100, Jason A. Donenfeld wrote:
>>
>> Is there a patch at the end of the series that adds it back in to use u128?
>
>Could we do some ifdef trickery to reduce the amount of code churn
>please? Changing everything away from u128 and then back to it seems
>silly.
>
>Thanks,

Seems like "merging common code snippets" is something we at least used to do with single patches...
Herbert Xu Dec. 20, 2022, 4:15 a.m. UTC | #6
On Mon, Dec 19, 2022 at 08:11:37PM -0800, H. Peter Anvin wrote:
>
> Seems like "merging common code snippets" is something we at least used to do with single patches...

I certainly don't have any objections if we go down this route.

Thanks,
diff mbox series

Patch

--- a/lib/crypto/curve25519-hacl64.c
+++ b/lib/crypto/curve25519-hacl64.c
@@ -14,8 +14,6 @@ 
 #include <crypto/curve25519.h>
 #include <linux/string.h>
 
-typedef __uint128_t u128;
-
 static __always_inline u64 u64_eq_mask(u64 a, u64 b)
 {
 	u64 x = a ^ b;
@@ -50,77 +48,77 @@  static __always_inline void modulo_carry
 	b[0] = b0_;
 }
 
-static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input)
+static __always_inline void fproduct_copy_from_wide_(u64 *output, __uint128_t *input)
 {
 	{
-		u128 xi = input[0];
+		__uint128_t xi = input[0];
 		output[0] = ((u64)(xi));
 	}
 	{
-		u128 xi = input[1];
+		__uint128_t xi = input[1];
 		output[1] = ((u64)(xi));
 	}
 	{
-		u128 xi = input[2];
+		__uint128_t xi = input[2];
 		output[2] = ((u64)(xi));
 	}
 	{
-		u128 xi = input[3];
+		__uint128_t xi = input[3];
 		output[3] = ((u64)(xi));
 	}
 	{
-		u128 xi = input[4];
+		__uint128_t xi = input[4];
 		output[4] = ((u64)(xi));
 	}
 }
 
 static __always_inline void
-fproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s)
+fproduct_sum_scalar_multiplication_(__uint128_t *output, u64 *input, u64 s)
 {
-	output[0] += (u128)input[0] * s;
-	output[1] += (u128)input[1] * s;
-	output[2] += (u128)input[2] * s;
-	output[3] += (u128)input[3] * s;
-	output[4] += (u128)input[4] * s;
+	output[0] += (__uint128_t)input[0] * s;
+	output[1] += (__uint128_t)input[1] * s;
+	output[2] += (__uint128_t)input[2] * s;
+	output[3] += (__uint128_t)input[3] * s;
+	output[4] += (__uint128_t)input[4] * s;
 }
 
-static __always_inline void fproduct_carry_wide_(u128 *tmp)
+static __always_inline void fproduct_carry_wide_(__uint128_t *tmp)
 {
 	{
 		u32 ctr = 0;
-		u128 tctr = tmp[ctr];
-		u128 tctrp1 = tmp[ctr + 1];
+		__uint128_t tctr = tmp[ctr];
+		__uint128_t tctrp1 = tmp[ctr + 1];
 		u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
-		u128 c = ((tctr) >> (51));
-		tmp[ctr] = ((u128)(r0));
+		__uint128_t c = ((tctr) >> (51));
+		tmp[ctr] = ((__uint128_t)(r0));
 		tmp[ctr + 1] = ((tctrp1) + (c));
 	}
 	{
 		u32 ctr = 1;
-		u128 tctr = tmp[ctr];
-		u128 tctrp1 = tmp[ctr + 1];
+		__uint128_t tctr = tmp[ctr];
+		__uint128_t tctrp1 = tmp[ctr + 1];
 		u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
-		u128 c = ((tctr) >> (51));
-		tmp[ctr] = ((u128)(r0));
+		__uint128_t c = ((tctr) >> (51));
+		tmp[ctr] = ((__uint128_t)(r0));
 		tmp[ctr + 1] = ((tctrp1) + (c));
 	}
 
 	{
 		u32 ctr = 2;
-		u128 tctr = tmp[ctr];
-		u128 tctrp1 = tmp[ctr + 1];
+		__uint128_t tctr = tmp[ctr];
+		__uint128_t tctrp1 = tmp[ctr + 1];
 		u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
-		u128 c = ((tctr) >> (51));
-		tmp[ctr] = ((u128)(r0));
+		__uint128_t c = ((tctr) >> (51));
+		tmp[ctr] = ((__uint128_t)(r0));
 		tmp[ctr + 1] = ((tctrp1) + (c));
 	}
 	{
 		u32 ctr = 3;
-		u128 tctr = tmp[ctr];
-		u128 tctrp1 = tmp[ctr + 1];
+		__uint128_t tctr = tmp[ctr];
+		__uint128_t tctrp1 = tmp[ctr + 1];
 		u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
-		u128 c = ((tctr) >> (51));
-		tmp[ctr] = ((u128)(r0));
+		__uint128_t c = ((tctr) >> (51));
+		tmp[ctr] = ((__uint128_t)(r0));
 		tmp[ctr + 1] = ((tctrp1) + (c));
 	}
 }
@@ -154,7 +152,7 @@  static __always_inline void fmul_shift_r
 	output[0] = 19 * b0;
 }
 
-static __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input,
+static __always_inline void fmul_mul_shift_reduce_(__uint128_t *output, u64 *input,
 						   u64 *input21)
 {
 	u32 i;
@@ -188,21 +186,21 @@  static __always_inline void fmul_fmul(u6
 {
 	u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] };
 	{
-		u128 b4;
-		u128 b0;
-		u128 b4_;
-		u128 b0_;
+		__uint128_t b4;
+		__uint128_t b0;
+		__uint128_t b4_;
+		__uint128_t b0_;
 		u64 i0;
 		u64 i1;
 		u64 i0_;
 		u64 i1_;
-		u128 t[5] = { 0 };
+		__uint128_t t[5] = { 0 };
 		fmul_mul_shift_reduce_(t, tmp, input21);
 		fproduct_carry_wide_(t);
 		b4 = t[4];
 		b0 = t[0];
-		b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
-		b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
+		b4_ = ((b4) & (((__uint128_t)(0x7ffffffffffffLLU))));
+		b0_ = ((b0) + (((__uint128_t)(19) * (((u64)(((b4) >> (51))))))));
 		t[4] = b4_;
 		t[0] = b0_;
 		fproduct_copy_from_wide_(output, t);
@@ -215,7 +213,7 @@  static __always_inline void fmul_fmul(u6
 	}
 }
 
-static __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output)
+static __always_inline void fsquare_fsquare__(__uint128_t *tmp, u64 *output)
 {
 	u64 r0 = output[0];
 	u64 r1 = output[1];
@@ -227,16 +225,16 @@  static __always_inline void fsquare_fsqu
 	u64 d2 = r2 * 2 * 19;
 	u64 d419 = r4 * 19;
 	u64 d4 = d419 * 2;
-	u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) +
-		   (((u128)(d2) * (r3))));
-	u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) +
-		   (((u128)(r3 * 19) * (r3))));
-	u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) +
-		   (((u128)(d4) * (r3))));
-	u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) +
-		   (((u128)(r4) * (d419))));
-	u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) +
-		   (((u128)(r2) * (r2))));
+	__uint128_t s0 = ((((((__uint128_t)(r0) * (r0))) + (((__uint128_t)(d4) * (r1))))) +
+		   (((__uint128_t)(d2) * (r3))));
+	__uint128_t s1 = ((((((__uint128_t)(d0) * (r1))) + (((__uint128_t)(d4) * (r2))))) +
+		   (((__uint128_t)(r3 * 19) * (r3))));
+	__uint128_t s2 = ((((((__uint128_t)(d0) * (r2))) + (((__uint128_t)(r1) * (r1))))) +
+		   (((__uint128_t)(d4) * (r3))));
+	__uint128_t s3 = ((((((__uint128_t)(d0) * (r3))) + (((__uint128_t)(d1) * (r2))))) +
+		   (((__uint128_t)(r4) * (d419))));
+	__uint128_t s4 = ((((((__uint128_t)(d0) * (r4))) + (((__uint128_t)(d1) * (r3))))) +
+		   (((__uint128_t)(r2) * (r2))));
 	tmp[0] = s0;
 	tmp[1] = s1;
 	tmp[2] = s2;
@@ -244,12 +242,12 @@  static __always_inline void fsquare_fsqu
 	tmp[4] = s4;
 }
 
-static __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output)
+static __always_inline void fsquare_fsquare_(__uint128_t *tmp, u64 *output)
 {
-	u128 b4;
-	u128 b0;
-	u128 b4_;
-	u128 b0_;
+	__uint128_t b4;
+	__uint128_t b0;
+	__uint128_t b4_;
+	__uint128_t b0_;
 	u64 i0;
 	u64 i1;
 	u64 i0_;
@@ -258,8 +256,8 @@  static __always_inline void fsquare_fsqu
 	fproduct_carry_wide_(tmp);
 	b4 = tmp[4];
 	b0 = tmp[0];
-	b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
-	b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
+	b4_ = ((b4) & (((__uint128_t)(0x7ffffffffffffLLU))));
+	b0_ = ((b0) + (((__uint128_t)(19) * (((u64)(((b4) >> (51))))))));
 	tmp[4] = b4_;
 	tmp[0] = b0_;
 	fproduct_copy_from_wide_(output, tmp);
@@ -271,7 +269,7 @@  static __always_inline void fsquare_fsqu
 	output[1] = i1_;
 }
 
-static __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp,
+static __always_inline void fsquare_fsquare_times_(u64 *output, __uint128_t *tmp,
 						   u32 count1)
 {
 	u32 i;
@@ -283,7 +281,7 @@  static __always_inline void fsquare_fsqu
 static __always_inline void fsquare_fsquare_times(u64 *output, u64 *input,
 						  u32 count1)
 {
-	u128 t[5];
+	__uint128_t t[5];
 	memcpy(output, input, 5 * sizeof(*input));
 	fsquare_fsquare_times_(output, t, count1);
 }
@@ -291,7 +289,7 @@  static __always_inline void fsquare_fsqu
 static __always_inline void fsquare_fsquare_times_inplace(u64 *output,
 							  u32 count1)
 {
-	u128 t[5];
+	__uint128_t t[5];
 	fsquare_fsquare_times_(output, t, count1);
 }
 
@@ -396,36 +394,36 @@  static __always_inline void fdifference(
 
 static __always_inline void fscalar(u64 *output, u64 *b, u64 s)
 {
-	u128 tmp[5];
-	u128 b4;
-	u128 b0;
-	u128 b4_;
-	u128 b0_;
+	__uint128_t tmp[5];
+	__uint128_t b4;
+	__uint128_t b0;
+	__uint128_t b4_;
+	__uint128_t b0_;
 	{
 		u64 xi = b[0];
-		tmp[0] = ((u128)(xi) * (s));
+		tmp[0] = ((__uint128_t)(xi) * (s));
 	}
 	{
 		u64 xi = b[1];
-		tmp[1] = ((u128)(xi) * (s));
+		tmp[1] = ((__uint128_t)(xi) * (s));
 	}
 	{
 		u64 xi = b[2];
-		tmp[2] = ((u128)(xi) * (s));
+		tmp[2] = ((__uint128_t)(xi) * (s));
 	}
 	{
 		u64 xi = b[3];
-		tmp[3] = ((u128)(xi) * (s));
+		tmp[3] = ((__uint128_t)(xi) * (s));
 	}
 	{
 		u64 xi = b[4];
-		tmp[4] = ((u128)(xi) * (s));
+		tmp[4] = ((__uint128_t)(xi) * (s));
 	}
 	fproduct_carry_wide_(tmp);
 	b4 = tmp[4];
 	b0 = tmp[0];
-	b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
-	b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
+	b4_ = ((b4) & (((__uint128_t)(0x7ffffffffffffLLU))));
+	b0_ = ((b0) + (((__uint128_t)(19) * (((u64)(((b4) >> (51))))))));
 	tmp[4] = b4_;
 	tmp[0] = b0_;
 	fproduct_copy_from_wide_(output, tmp);
--- a/lib/crypto/poly1305-donna64.c
+++ b/lib/crypto/poly1305-donna64.c
@@ -10,8 +10,6 @@ 
 #include <asm/unaligned.h>
 #include <crypto/internal/poly1305.h>
 
-typedef __uint128_t u128;
-
 void poly1305_core_setkey(struct poly1305_core_key *key,
 			  const u8 raw_key[POLY1305_BLOCK_SIZE])
 {
@@ -41,7 +39,7 @@  void poly1305_core_blocks(struct poly130
 	u64 s1, s2;
 	u64 h0, h1, h2;
 	u64 c;
-	u128 d0, d1, d2, d;
+	__uint128_t d0, d1, d2, d;
 
 	if (!nblocks)
 		return;
@@ -71,20 +69,20 @@  void poly1305_core_blocks(struct poly130
 		h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit64;
 
 		/* h *= r */
-		d0 = (u128)h0 * r0;
-		d = (u128)h1 * s2;
+		d0 = (__uint128_t)h0 * r0;
+		d = (__uint128_t)h1 * s2;
 		d0 += d;
-		d = (u128)h2 * s1;
+		d = (__uint128_t)h2 * s1;
 		d0 += d;
-		d1 = (u128)h0 * r1;
-		d = (u128)h1 * r0;
+		d1 = (__uint128_t)h0 * r1;
+		d = (__uint128_t)h1 * r0;
 		d1 += d;
-		d = (u128)h2 * s2;
+		d = (__uint128_t)h2 * s2;
 		d1 += d;
-		d2 = (u128)h0 * r2;
-		d = (u128)h1 * r1;
+		d2 = (__uint128_t)h0 * r2;
+		d = (__uint128_t)h1 * r1;
 		d2 += d;
-		d = (u128)h2 * r0;
+		d = (__uint128_t)h2 * r0;
 		d2 += d;
 
 		/* (partial) h %= p */