diff mbox series

[RFC,v3,10/15] crypto: poly1305 - use structures for key and accumulator

Message ID 20181105232526.173947-11-ebiggers@kernel.org (mailing list archive)
State Superseded
Delegated to: Herbert Xu
Headers show
Series crypto: Adiantum support | expand

Commit Message

Eric Biggers Nov. 5, 2018, 11:25 p.m. UTC
From: Eric Biggers <ebiggers@google.com>

In preparation for exposing a low-level Poly1305 API which implements
the ε-almost-∆-universal (εA∆U) hash function underlying the Poly1305
MAC and supports block-aligned inputs only, create structures
poly1305_key and poly1305_state which hold the limbs of the Poly1305
"r" key and accumulator, respectively.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/x86/crypto/poly1305_glue.c | 20 +++++++------
 crypto/poly1305_generic.c       | 52 ++++++++++++++++-----------------
 include/crypto/poly1305.h       | 12 ++++++--
 3 files changed, 47 insertions(+), 37 deletions(-)

Comments

Ard Biesheuvel Nov. 6, 2018, 2:28 p.m. UTC | #1
On 6 November 2018 at 00:25, Eric Biggers <ebiggers@kernel.org> wrote:
> From: Eric Biggers <ebiggers@google.com>
>
> In preparation for exposing a low-level Poly1305 API which implements
> the ε-almost-∆-universal (εA∆U) hash function underlying the Poly1305
> MAC and supports block-aligned inputs only, create structures
> poly1305_key and poly1305_state which hold the limbs of the Poly1305
> "r" key and accumulator, respectively.
>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  arch/x86/crypto/poly1305_glue.c | 20 +++++++------
>  crypto/poly1305_generic.c       | 52 ++++++++++++++++-----------------
>  include/crypto/poly1305.h       | 12 ++++++--
>  3 files changed, 47 insertions(+), 37 deletions(-)
>
> diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
> index f012b7e28ad1..88cc01506c84 100644
> --- a/arch/x86/crypto/poly1305_glue.c
> +++ b/arch/x86/crypto/poly1305_glue.c
> @@ -83,35 +83,37 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
>         if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
>                 if (unlikely(!sctx->wset)) {
>                         if (!sctx->uset) {
> -                               memcpy(sctx->u, dctx->r, sizeof(sctx->u));
> -                               poly1305_simd_mult(sctx->u, dctx->r);
> +                               memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
> +                               poly1305_simd_mult(sctx->u, dctx->r.r);
>                                 sctx->uset = true;
>                         }
>                         memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
> -                       poly1305_simd_mult(sctx->u + 5, dctx->r);
> +                       poly1305_simd_mult(sctx->u + 5, dctx->r.r);
>                         memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
> -                       poly1305_simd_mult(sctx->u + 10, dctx->r);
> +                       poly1305_simd_mult(sctx->u + 10, dctx->r.r);
>                         sctx->wset = true;
>                 }
>                 blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
> -               poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
> +               poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
> +                                    sctx->u);
>                 src += POLY1305_BLOCK_SIZE * 4 * blocks;
>                 srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
>         }
>  #endif
>         if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
>                 if (unlikely(!sctx->uset)) {
> -                       memcpy(sctx->u, dctx->r, sizeof(sctx->u));
> -                       poly1305_simd_mult(sctx->u, dctx->r);
> +                       memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
> +                       poly1305_simd_mult(sctx->u, dctx->r.r);
>                         sctx->uset = true;
>                 }
>                 blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
> -               poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
> +               poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
> +                                    sctx->u);
>                 src += POLY1305_BLOCK_SIZE * 2 * blocks;
>                 srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
>         }
>         if (srclen >= POLY1305_BLOCK_SIZE) {
> -               poly1305_block_sse2(dctx->h, src, dctx->r, 1);
> +               poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
>                 srclen -= POLY1305_BLOCK_SIZE;
>         }
>         return srclen;
> diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
> index 47d3a6b83931..a23173f351b7 100644
> --- a/crypto/poly1305_generic.c
> +++ b/crypto/poly1305_generic.c
> @@ -38,7 +38,7 @@ int crypto_poly1305_init(struct shash_desc *desc)
>  {
>         struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
>
> -       memset(dctx->h, 0, sizeof(dctx->h));
> +       memset(dctx->h.h, 0, sizeof(dctx->h.h));
>         dctx->buflen = 0;
>         dctx->rset = false;
>         dctx->sset = false;
> @@ -50,11 +50,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_init);
>  static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
>  {
>         /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
> -       dctx->r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
> -       dctx->r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
> -       dctx->r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
> -       dctx->r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
> -       dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
> +       dctx->r.r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
> +       dctx->r.r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
> +       dctx->r.r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
> +       dctx->r.r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
> +       dctx->r.r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
>  }
>
>  static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
> @@ -107,22 +107,22 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
>                 srclen = datalen;
>         }
>
> -       r0 = dctx->r[0];
> -       r1 = dctx->r[1];
> -       r2 = dctx->r[2];
> -       r3 = dctx->r[3];
> -       r4 = dctx->r[4];
> +       r0 = dctx->r.r[0];
> +       r1 = dctx->r.r[1];
> +       r2 = dctx->r.r[2];
> +       r3 = dctx->r.r[3];
> +       r4 = dctx->r.r[4];
>
>         s1 = r1 * 5;
>         s2 = r2 * 5;
>         s3 = r3 * 5;
>         s4 = r4 * 5;
>
> -       h0 = dctx->h[0];
> -       h1 = dctx->h[1];
> -       h2 = dctx->h[2];
> -       h3 = dctx->h[3];
> -       h4 = dctx->h[4];
> +       h0 = dctx->h.h[0];
> +       h1 = dctx->h.h[1];
> +       h2 = dctx->h.h[2];
> +       h3 = dctx->h.h[3];
> +       h4 = dctx->h.h[4];
>
>         while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
>
> @@ -157,11 +157,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
>                 srclen -= POLY1305_BLOCK_SIZE;
>         }
>
> -       dctx->h[0] = h0;
> -       dctx->h[1] = h1;
> -       dctx->h[2] = h2;
> -       dctx->h[3] = h3;
> -       dctx->h[4] = h4;
> +       dctx->h.h[0] = h0;
> +       dctx->h.h[1] = h1;
> +       dctx->h.h[2] = h2;
> +       dctx->h.h[3] = h3;
> +       dctx->h.h[4] = h4;
>
>         return srclen;
>  }
> @@ -220,11 +220,11 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
>         }
>
>         /* fully carry h */
> -       h0 = dctx->h[0];
> -       h1 = dctx->h[1];
> -       h2 = dctx->h[2];
> -       h3 = dctx->h[3];
> -       h4 = dctx->h[4];
> +       h0 = dctx->h.h[0];
> +       h1 = dctx->h.h[1];
> +       h2 = dctx->h.h[2];
> +       h3 = dctx->h.h[3];
> +       h4 = dctx->h.h[4];
>
>         h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
>         h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
> diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
> index f718a19da82f..493244c46664 100644
> --- a/include/crypto/poly1305.h
> +++ b/include/crypto/poly1305.h
> @@ -13,13 +13,21 @@
>  #define POLY1305_KEY_SIZE      32
>  #define POLY1305_DIGEST_SIZE   16
>
> +struct poly1305_key {
> +       u32 r[5];       /* key, base 2^26 */
> +};
> +
> +struct poly1305_state {
> +       u32 h[5];       /* accumulator, base 2^26 */
> +};
> +

Sorry to bikeshed, but wouldn't it make more sense to have single definition

struct poly1305_val {
    u32  v[5]; /* base 2^26 */
};

and have 'key' and 'accum[ulator]' fields of type struct poly1305_val
in the struct below?

>  struct poly1305_desc_ctx {
>         /* key */
> -       u32 r[5];
> +       struct poly1305_key r;
>         /* finalize key */
>         u32 s[4];
>         /* accumulator */
> -       u32 h[5];
> +       struct poly1305_state h;
>         /* partial buffer */
>         u8 buf[POLY1305_BLOCK_SIZE];
>         /* bytes used in partial buffer */
> --
> 2.19.1.930.g4563a0d9d0-goog
>
Eric Biggers Nov. 12, 2018, 6:58 p.m. UTC | #2
Hi Ard,

On Tue, Nov 06, 2018 at 03:28:24PM +0100, Ard Biesheuvel wrote:
> On 6 November 2018 at 00:25, Eric Biggers <ebiggers@kernel.org> wrote:
> > From: Eric Biggers <ebiggers@google.com>
> >
> > In preparation for exposing a low-level Poly1305 API which implements
> > the ε-almost-∆-universal (εA∆U) hash function underlying the Poly1305
> > MAC and supports block-aligned inputs only, create structures
> > poly1305_key and poly1305_state which hold the limbs of the Poly1305
> > "r" key and accumulator, respectively.
> >
> > Signed-off-by: Eric Biggers <ebiggers@google.com>
> > ---
> >  arch/x86/crypto/poly1305_glue.c | 20 +++++++------
> >  crypto/poly1305_generic.c       | 52 ++++++++++++++++-----------------
> >  include/crypto/poly1305.h       | 12 ++++++--
> >  3 files changed, 47 insertions(+), 37 deletions(-)
> >
> > diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
> > index f012b7e28ad1..88cc01506c84 100644
> > --- a/arch/x86/crypto/poly1305_glue.c
> > +++ b/arch/x86/crypto/poly1305_glue.c
> > @@ -83,35 +83,37 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
> >         if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
> >                 if (unlikely(!sctx->wset)) {
> >                         if (!sctx->uset) {
> > -                               memcpy(sctx->u, dctx->r, sizeof(sctx->u));
> > -                               poly1305_simd_mult(sctx->u, dctx->r);
> > +                               memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
> > +                               poly1305_simd_mult(sctx->u, dctx->r.r);
> >                                 sctx->uset = true;
> >                         }
> >                         memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
> > -                       poly1305_simd_mult(sctx->u + 5, dctx->r);
> > +                       poly1305_simd_mult(sctx->u + 5, dctx->r.r);
> >                         memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
> > -                       poly1305_simd_mult(sctx->u + 10, dctx->r);
> > +                       poly1305_simd_mult(sctx->u + 10, dctx->r.r);
> >                         sctx->wset = true;
> >                 }
> >                 blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
> > -               poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
> > +               poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
> > +                                    sctx->u);
> >                 src += POLY1305_BLOCK_SIZE * 4 * blocks;
> >                 srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
> >         }
> >  #endif
> >         if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
> >                 if (unlikely(!sctx->uset)) {
> > -                       memcpy(sctx->u, dctx->r, sizeof(sctx->u));
> > -                       poly1305_simd_mult(sctx->u, dctx->r);
> > +                       memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
> > +                       poly1305_simd_mult(sctx->u, dctx->r.r);
> >                         sctx->uset = true;
> >                 }
> >                 blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
> > -               poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
> > +               poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
> > +                                    sctx->u);
> >                 src += POLY1305_BLOCK_SIZE * 2 * blocks;
> >                 srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
> >         }
> >         if (srclen >= POLY1305_BLOCK_SIZE) {
> > -               poly1305_block_sse2(dctx->h, src, dctx->r, 1);
> > +               poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
> >                 srclen -= POLY1305_BLOCK_SIZE;
> >         }
> >         return srclen;
> > diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
> > index 47d3a6b83931..a23173f351b7 100644
> > --- a/crypto/poly1305_generic.c
> > +++ b/crypto/poly1305_generic.c
> > @@ -38,7 +38,7 @@ int crypto_poly1305_init(struct shash_desc *desc)
> >  {
> >         struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
> >
> > -       memset(dctx->h, 0, sizeof(dctx->h));
> > +       memset(dctx->h.h, 0, sizeof(dctx->h.h));
> >         dctx->buflen = 0;
> >         dctx->rset = false;
> >         dctx->sset = false;
> > @@ -50,11 +50,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_init);
> >  static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
> >  {
> >         /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
> > -       dctx->r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
> > -       dctx->r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
> > -       dctx->r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
> > -       dctx->r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
> > -       dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
> > +       dctx->r.r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
> > +       dctx->r.r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
> > +       dctx->r.r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
> > +       dctx->r.r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
> > +       dctx->r.r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
> >  }
> >
> >  static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
> > @@ -107,22 +107,22 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
> >                 srclen = datalen;
> >         }
> >
> > -       r0 = dctx->r[0];
> > -       r1 = dctx->r[1];
> > -       r2 = dctx->r[2];
> > -       r3 = dctx->r[3];
> > -       r4 = dctx->r[4];
> > +       r0 = dctx->r.r[0];
> > +       r1 = dctx->r.r[1];
> > +       r2 = dctx->r.r[2];
> > +       r3 = dctx->r.r[3];
> > +       r4 = dctx->r.r[4];
> >
> >         s1 = r1 * 5;
> >         s2 = r2 * 5;
> >         s3 = r3 * 5;
> >         s4 = r4 * 5;
> >
> > -       h0 = dctx->h[0];
> > -       h1 = dctx->h[1];
> > -       h2 = dctx->h[2];
> > -       h3 = dctx->h[3];
> > -       h4 = dctx->h[4];
> > +       h0 = dctx->h.h[0];
> > +       h1 = dctx->h.h[1];
> > +       h2 = dctx->h.h[2];
> > +       h3 = dctx->h.h[3];
> > +       h4 = dctx->h.h[4];
> >
> >         while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
> >
> > @@ -157,11 +157,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
> >                 srclen -= POLY1305_BLOCK_SIZE;
> >         }
> >
> > -       dctx->h[0] = h0;
> > -       dctx->h[1] = h1;
> > -       dctx->h[2] = h2;
> > -       dctx->h[3] = h3;
> > -       dctx->h[4] = h4;
> > +       dctx->h.h[0] = h0;
> > +       dctx->h.h[1] = h1;
> > +       dctx->h.h[2] = h2;
> > +       dctx->h.h[3] = h3;
> > +       dctx->h.h[4] = h4;
> >
> >         return srclen;
> >  }
> > @@ -220,11 +220,11 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
> >         }
> >
> >         /* fully carry h */
> > -       h0 = dctx->h[0];
> > -       h1 = dctx->h[1];
> > -       h2 = dctx->h[2];
> > -       h3 = dctx->h[3];
> > -       h4 = dctx->h[4];
> > +       h0 = dctx->h.h[0];
> > +       h1 = dctx->h.h[1];
> > +       h2 = dctx->h.h[2];
> > +       h3 = dctx->h.h[3];
> > +       h4 = dctx->h.h[4];
> >
> >         h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
> >         h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
> > diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
> > index f718a19da82f..493244c46664 100644
> > --- a/include/crypto/poly1305.h
> > +++ b/include/crypto/poly1305.h
> > @@ -13,13 +13,21 @@
> >  #define POLY1305_KEY_SIZE      32
> >  #define POLY1305_DIGEST_SIZE   16
> >
> > +struct poly1305_key {
> > +       u32 r[5];       /* key, base 2^26 */
> > +};
> > +
> > +struct poly1305_state {
> > +       u32 h[5];       /* accumulator, base 2^26 */
> > +};
> > +
> 
> Sorry to bikeshed, but wouldn't it make more sense to have single definition
> 
> struct poly1305_val {
>     u32  v[5]; /* base 2^26 */
> };
> 
> and have 'key' and 'accum[ulator]' fields of type struct poly1305_val
> in the struct below?
> 

I prefer separate types so that the type system enforces that a key is never
accidentally used as an accumulator, and vice versa.  Then the poly1305_core_*
functions will be harder to misuse, and the Poly1305 MAC implementations harder
to get wrong.

The key also has certain bits clear whereas the accumulator does not.  In the
future, the Poly1305 C implementation might use base 2^32 and take advantage of
this.  In that case, the two inputs to each multiplication won't be
interchangeable, so using the same type for both would be extra confusing.

Having a poly1305_val nested inside poly1305_key and poly1305_state would work,
but seemed excessive.

> >  struct poly1305_desc_ctx {
> >         /* key */
> > -       u32 r[5];
> > +       struct poly1305_key r;
> >         /* finalize key */
> >         u32 s[4];
> >         /* accumulator */
> > -       u32 h[5];
> > +       struct poly1305_state h;
> >         /* partial buffer */
> >         u8 buf[POLY1305_BLOCK_SIZE];
> >         /* bytes used in partial buffer */
> > --

Thanks,

- Eric
Herbert Xu Nov. 16, 2018, 6:02 a.m. UTC | #3
Hi Eric:

On Mon, Nov 12, 2018 at 10:58:17AM -0800, Eric Biggers wrote:
>
> I prefer separate types so that the type system enforces that a key is never
> accidentally used as an accumulator, and vice versa.  Then the poly1305_core_*
> functions will be harder to misuse, and the Poly1305 MAC implementations harder
> to get wrong.
> 
> The key also has certain bits clear whereas the accumulator does not.  In the
> future, the Poly1305 C implementation might use base 2^32 and take advantage of
> this.  In that case, the two inputs to each multiplication won't be
> interchangeable, so using the same type for both would be extra confusing.
> 
> Having a poly1305_val nested inside poly1305_key and poly1305_state would work,
> but seemed excessive.

So it looks like there are no more unresolved issues with this
patch series.  Please let me know when you want it to go in.

Thanks,
Eric Biggers Nov. 17, 2018, 12:17 a.m. UTC | #4
Hi Herbert,

On Fri, Nov 16, 2018 at 02:02:27PM +0800, Herbert Xu wrote:
> Hi Eric:
> 
> On Mon, Nov 12, 2018 at 10:58:17AM -0800, Eric Biggers wrote:
> >
> > I prefer separate types so that the type system enforces that a key is never
> > accidentally used as an accumulator, and vice versa.  Then the poly1305_core_*
> > functions will be harder to misuse, and the Poly1305 MAC implementations harder
> > to get wrong.
> > 
> > The key also has certain bits clear whereas the accumulator does not.  In the
> > future, the Poly1305 C implementation might use base 2^32 and take advantage of
> > this.  In that case, the two inputs to each multiplication won't be
> > interchangeable, so using the same type for both would be extra confusing.
> > 
> > Having a poly1305_val nested inside poly1305_key and poly1305_state would work,
> > but seemed excessive.
> 
> So it looks like there are no more unresolved issues with this
> patch series.  Please let me know when you want it to go in.
> 

I believe it's ready to go in.  I'll need to rebase it onto cryptodev, to
resolve some small conflicts with the recent ChaCha20-related changes.  Also
I'll probably omit the fscrypt patch (patch 15/15) so that that patch can be
taken through the fscrypt tree instead.

Do you prefer that this be merged before or after Zinc?  It seems it may still
be a while before the community is satisfied with Zinc (and Wireguard which is
in the same patchset), and I don't want this blocked unnecessarily...  So on my
part I'd prefer to just have this merged as-is.

Of course, it's always possible to change the xchacha12 and xchacha20
implementations later, whether that's to use "Zinc" or otherwise.  And
NHPoly1305 and Adiantum itself will be the same either way, except that the
Poly1305 helpers may change slightly.

What do you think?

Thanks!

- Eric
Ard Biesheuvel Nov. 17, 2018, 12:30 a.m. UTC | #5
On Fri, 16 Nov 2018 at 16:17, Eric Biggers <ebiggers@kernel.org> wrote:
>
> Hi Herbert,
>
> On Fri, Nov 16, 2018 at 02:02:27PM +0800, Herbert Xu wrote:
> > Hi Eric:
> >
> > On Mon, Nov 12, 2018 at 10:58:17AM -0800, Eric Biggers wrote:
> > >
> > > I prefer separate types so that the type system enforces that a key is never
> > > accidentally used as an accumulator, and vice versa.  Then the poly1305_core_*
> > > functions will be harder to misuse, and the Poly1305 MAC implementations harder
> > > to get wrong.
> > >
> > > The key also has certain bits clear whereas the accumulator does not.  In the
> > > future, the Poly1305 C implementation might use base 2^32 and take advantage of
> > > this.  In that case, the two inputs to each multiplication won't be
> > > interchangeable, so using the same type for both would be extra confusing.
> > >
> > > Having a poly1305_val nested inside poly1305_key and poly1305_state would work,
> > > but seemed excessive.
> >
> > So it looks like there are no more unresolved issues with this
> > patch series.  Please let me know when you want it to go in.
> >
>
> I believe it's ready to go in.  I'll need to rebase it onto cryptodev, to
> resolve some small conflicts with the recent ChaCha20-related changes.  Also
> I'll probably omit the fscrypt patch (patch 15/15) so that that patch can be
> taken through the fscrypt tree instead.
>
> Do you prefer that this be merged before or after Zinc?  It seems it may still
> be a while before the community is satisfied with Zinc (and Wireguard which is
> in the same patchset), and I don't want this blocked unnecessarily...  So on my
> part I'd prefer to just have this merged as-is.
>
> Of course, it's always possible to change the xchacha12 and xchacha20
> implementations later, whether that's to use "Zinc" or otherwise.  And
> NHPoly1305 and Adiantum itself will be the same either way, except that the
> Poly1305 helpers may change slightly.
>
> What do you think?
>

I think this is ready to go in. Most of Zinc itself will not be
blocked by this, only the changes to the crypto API ChaCha20
implementation will have to wait until the Zinc version gains support
for the reduced round variant, but that will not block WireGuard.
Jason A. Donenfeld Nov. 18, 2018, 1:46 p.m. UTC | #6
Hi Eric,

On Sat, Nov 17, 2018 at 1:17 AM Eric Biggers <ebiggers@kernel.org> wrote:
> Do you prefer that this be merged before or after Zinc?  It seems it may still
> be a while before the community is satisfied with Zinc (and Wireguard which is
> in the same patchset), and I don't want this blocked unnecessarily...  So on my
> part I'd prefer to just have this merged as-is.

Personally I'd prefer this be merged after Zinc, since there's work to
be done on adjusting the 20->12 in chacha20. That's not really much of
a reason though. But maybe we can just sidestep the ordering concern
all together:

What I suspect we should do is make the initial Zinc merge _without_
those top two patches that replace the crypto api's implementations
with Zinc, and defer those until after the initial merges. Those
commits are already written -- so there's no chance it won't happen
due to laziness or something -- and I think the general merge will go
a bit more smoothly if we wait until after. (Somebody suggested we do
it this way at Plumbers, and was actually a bit surprised I had
already ported the crypto API stuff to Zinc.) This way, Adiantum and
Zinc aren't potentially co-dependent in their initial merges and we
can work on the details carefully with each other after both have
landed. I figure this might make things a little bit less stressful
for both of us. How would you feel about doing it that?

Jason
diff mbox series

Patch

diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index f012b7e28ad1..88cc01506c84 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -83,35 +83,37 @@  static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
 	if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
 		if (unlikely(!sctx->wset)) {
 			if (!sctx->uset) {
-				memcpy(sctx->u, dctx->r, sizeof(sctx->u));
-				poly1305_simd_mult(sctx->u, dctx->r);
+				memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
+				poly1305_simd_mult(sctx->u, dctx->r.r);
 				sctx->uset = true;
 			}
 			memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 5, dctx->r);
+			poly1305_simd_mult(sctx->u + 5, dctx->r.r);
 			memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 10, dctx->r);
+			poly1305_simd_mult(sctx->u + 10, dctx->r.r);
 			sctx->wset = true;
 		}
 		blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
-		poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
+		poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
+				     sctx->u);
 		src += POLY1305_BLOCK_SIZE * 4 * blocks;
 		srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
 	}
 #endif
 	if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
 		if (unlikely(!sctx->uset)) {
-			memcpy(sctx->u, dctx->r, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u, dctx->r);
+			memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
+			poly1305_simd_mult(sctx->u, dctx->r.r);
 			sctx->uset = true;
 		}
 		blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
-		poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
+		poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
+				     sctx->u);
 		src += POLY1305_BLOCK_SIZE * 2 * blocks;
 		srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
 	}
 	if (srclen >= POLY1305_BLOCK_SIZE) {
-		poly1305_block_sse2(dctx->h, src, dctx->r, 1);
+		poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
 		srclen -= POLY1305_BLOCK_SIZE;
 	}
 	return srclen;
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index 47d3a6b83931..a23173f351b7 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -38,7 +38,7 @@  int crypto_poly1305_init(struct shash_desc *desc)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 
-	memset(dctx->h, 0, sizeof(dctx->h));
+	memset(dctx->h.h, 0, sizeof(dctx->h.h));
 	dctx->buflen = 0;
 	dctx->rset = false;
 	dctx->sset = false;
@@ -50,11 +50,11 @@  EXPORT_SYMBOL_GPL(crypto_poly1305_init);
 static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
 {
 	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
-	dctx->r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
-	dctx->r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
-	dctx->r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
-	dctx->r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
-	dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
+	dctx->r.r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
+	dctx->r.r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
+	dctx->r.r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
+	dctx->r.r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
+	dctx->r.r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
 }
 
 static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
@@ -107,22 +107,22 @@  static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
 		srclen = datalen;
 	}
 
-	r0 = dctx->r[0];
-	r1 = dctx->r[1];
-	r2 = dctx->r[2];
-	r3 = dctx->r[3];
-	r4 = dctx->r[4];
+	r0 = dctx->r.r[0];
+	r1 = dctx->r.r[1];
+	r2 = dctx->r.r[2];
+	r3 = dctx->r.r[3];
+	r4 = dctx->r.r[4];
 
 	s1 = r1 * 5;
 	s2 = r2 * 5;
 	s3 = r3 * 5;
 	s4 = r4 * 5;
 
-	h0 = dctx->h[0];
-	h1 = dctx->h[1];
-	h2 = dctx->h[2];
-	h3 = dctx->h[3];
-	h4 = dctx->h[4];
+	h0 = dctx->h.h[0];
+	h1 = dctx->h.h[1];
+	h2 = dctx->h.h[2];
+	h3 = dctx->h.h[3];
+	h4 = dctx->h.h[4];
 
 	while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
 
@@ -157,11 +157,11 @@  static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
 		srclen -= POLY1305_BLOCK_SIZE;
 	}
 
-	dctx->h[0] = h0;
-	dctx->h[1] = h1;
-	dctx->h[2] = h2;
-	dctx->h[3] = h3;
-	dctx->h[4] = h4;
+	dctx->h.h[0] = h0;
+	dctx->h.h[1] = h1;
+	dctx->h.h[2] = h2;
+	dctx->h.h[3] = h3;
+	dctx->h.h[4] = h4;
 
 	return srclen;
 }
@@ -220,11 +220,11 @@  int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 	}
 
 	/* fully carry h */
-	h0 = dctx->h[0];
-	h1 = dctx->h[1];
-	h2 = dctx->h[2];
-	h3 = dctx->h[3];
-	h4 = dctx->h[4];
+	h0 = dctx->h.h[0];
+	h1 = dctx->h.h[1];
+	h2 = dctx->h.h[2];
+	h3 = dctx->h.h[3];
+	h4 = dctx->h.h[4];
 
 	h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
 	h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index f718a19da82f..493244c46664 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -13,13 +13,21 @@ 
 #define POLY1305_KEY_SIZE	32
 #define POLY1305_DIGEST_SIZE	16
 
+struct poly1305_key {
+	u32 r[5];	/* key, base 2^26 */
+};
+
+struct poly1305_state {
+	u32 h[5];	/* accumulator, base 2^26 */
+};
+
 struct poly1305_desc_ctx {
 	/* key */
-	u32 r[5];
+	struct poly1305_key r;
 	/* finalize key */
 	u32 s[4];
 	/* accumulator */
-	u32 h[5];
+	struct poly1305_state h;
 	/* partial buffer */
 	u8 buf[POLY1305_BLOCK_SIZE];
 	/* bytes used in partial buffer */