diff mbox series

[RFC,v3,10/15] crypto: poly1305 - use structures for key and accumulator

Message ID 20181105232526.173947-11-ebiggers@kernel.org (mailing list archive)
State Superseded
Headers show
Series crypto: Adiantum support | expand

Commit Message

Eric Biggers Nov. 5, 2018, 11:25 p.m. UTC
From: Eric Biggers <ebiggers@google.com>

In preparation for exposing a low-level Poly1305 API which implements
the ε-almost-∆-universal (εA∆U) hash function underlying the Poly1305
MAC and supports block-aligned inputs only, create structures
poly1305_key and poly1305_state which hold the limbs of the Poly1305
"r" key and accumulator, respectively.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/x86/crypto/poly1305_glue.c | 20 +++++++------
 crypto/poly1305_generic.c       | 52 ++++++++++++++++-----------------
 include/crypto/poly1305.h       | 12 ++++++--
 3 files changed, 47 insertions(+), 37 deletions(-)

Comments

Ard Biesheuvel Nov. 6, 2018, 2:28 p.m. UTC | #1
On 6 November 2018 at 00:25, Eric Biggers <ebiggers@kernel.org> wrote:
> From: Eric Biggers <ebiggers@google.com>
>
> In preparation for exposing a low-level Poly1305 API which implements
> the ε-almost-∆-universal (εA∆U) hash function underlying the Poly1305
> MAC and supports block-aligned inputs only, create structures
> poly1305_key and poly1305_state which hold the limbs of the Poly1305
> "r" key and accumulator, respectively.
>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  arch/x86/crypto/poly1305_glue.c | 20 +++++++------
>  crypto/poly1305_generic.c       | 52 ++++++++++++++++-----------------
>  include/crypto/poly1305.h       | 12 ++++++--
>  3 files changed, 47 insertions(+), 37 deletions(-)
>
> diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
> index f012b7e28ad1..88cc01506c84 100644
> --- a/arch/x86/crypto/poly1305_glue.c
> +++ b/arch/x86/crypto/poly1305_glue.c
> @@ -83,35 +83,37 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
>         if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
>                 if (unlikely(!sctx->wset)) {
>                         if (!sctx->uset) {
> -                               memcpy(sctx->u, dctx->r, sizeof(sctx->u));
> -                               poly1305_simd_mult(sctx->u, dctx->r);
> +                               memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
> +                               poly1305_simd_mult(sctx->u, dctx->r.r);
>                                 sctx->uset = true;
>                         }
>                         memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
> -                       poly1305_simd_mult(sctx->u + 5, dctx->r);
> +                       poly1305_simd_mult(sctx->u + 5, dctx->r.r);
>                         memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
> -                       poly1305_simd_mult(sctx->u + 10, dctx->r);
> +                       poly1305_simd_mult(sctx->u + 10, dctx->r.r);
>                         sctx->wset = true;
>                 }
>                 blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
> -               poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
> +               poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
> +                                    sctx->u);
>                 src += POLY1305_BLOCK_SIZE * 4 * blocks;
>                 srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
>         }
>  #endif
>         if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
>                 if (unlikely(!sctx->uset)) {
> -                       memcpy(sctx->u, dctx->r, sizeof(sctx->u));
> -                       poly1305_simd_mult(sctx->u, dctx->r);
> +                       memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
> +                       poly1305_simd_mult(sctx->u, dctx->r.r);
>                         sctx->uset = true;
>                 }
>                 blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
> -               poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
> +               poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
> +                                    sctx->u);
>                 src += POLY1305_BLOCK_SIZE * 2 * blocks;
>                 srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
>         }
>         if (srclen >= POLY1305_BLOCK_SIZE) {
> -               poly1305_block_sse2(dctx->h, src, dctx->r, 1);
> +               poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
>                 srclen -= POLY1305_BLOCK_SIZE;
>         }
>         return srclen;
> diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
> index 47d3a6b83931..a23173f351b7 100644
> --- a/crypto/poly1305_generic.c
> +++ b/crypto/poly1305_generic.c
> @@ -38,7 +38,7 @@ int crypto_poly1305_init(struct shash_desc *desc)
>  {
>         struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
>
> -       memset(dctx->h, 0, sizeof(dctx->h));
> +       memset(dctx->h.h, 0, sizeof(dctx->h.h));
>         dctx->buflen = 0;
>         dctx->rset = false;
>         dctx->sset = false;
> @@ -50,11 +50,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_init);
>  static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
>  {
>         /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
> -       dctx->r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
> -       dctx->r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
> -       dctx->r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
> -       dctx->r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
> -       dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
> +       dctx->r.r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
> +       dctx->r.r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
> +       dctx->r.r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
> +       dctx->r.r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
> +       dctx->r.r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
>  }
>
>  static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
> @@ -107,22 +107,22 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
>                 srclen = datalen;
>         }
>
> -       r0 = dctx->r[0];
> -       r1 = dctx->r[1];
> -       r2 = dctx->r[2];
> -       r3 = dctx->r[3];
> -       r4 = dctx->r[4];
> +       r0 = dctx->r.r[0];
> +       r1 = dctx->r.r[1];
> +       r2 = dctx->r.r[2];
> +       r3 = dctx->r.r[3];
> +       r4 = dctx->r.r[4];
>
>         s1 = r1 * 5;
>         s2 = r2 * 5;
>         s3 = r3 * 5;
>         s4 = r4 * 5;
>
> -       h0 = dctx->h[0];
> -       h1 = dctx->h[1];
> -       h2 = dctx->h[2];
> -       h3 = dctx->h[3];
> -       h4 = dctx->h[4];
> +       h0 = dctx->h.h[0];
> +       h1 = dctx->h.h[1];
> +       h2 = dctx->h.h[2];
> +       h3 = dctx->h.h[3];
> +       h4 = dctx->h.h[4];
>
>         while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
>
> @@ -157,11 +157,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
>                 srclen -= POLY1305_BLOCK_SIZE;
>         }
>
> -       dctx->h[0] = h0;
> -       dctx->h[1] = h1;
> -       dctx->h[2] = h2;
> -       dctx->h[3] = h3;
> -       dctx->h[4] = h4;
> +       dctx->h.h[0] = h0;
> +       dctx->h.h[1] = h1;
> +       dctx->h.h[2] = h2;
> +       dctx->h.h[3] = h3;
> +       dctx->h.h[4] = h4;
>
>         return srclen;
>  }
> @@ -220,11 +220,11 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
>         }
>
>         /* fully carry h */
> -       h0 = dctx->h[0];
> -       h1 = dctx->h[1];
> -       h2 = dctx->h[2];
> -       h3 = dctx->h[3];
> -       h4 = dctx->h[4];
> +       h0 = dctx->h.h[0];
> +       h1 = dctx->h.h[1];
> +       h2 = dctx->h.h[2];
> +       h3 = dctx->h.h[3];
> +       h4 = dctx->h.h[4];
>
>         h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
>         h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
> diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
> index f718a19da82f..493244c46664 100644
> --- a/include/crypto/poly1305.h
> +++ b/include/crypto/poly1305.h
> @@ -13,13 +13,21 @@
>  #define POLY1305_KEY_SIZE      32
>  #define POLY1305_DIGEST_SIZE   16
>
> +struct poly1305_key {
> +       u32 r[5];       /* key, base 2^26 */
> +};
> +
> +struct poly1305_state {
> +       u32 h[5];       /* accumulator, base 2^26 */
> +};
> +

Sorry to bikeshed, but wouldn't it make more sense to have single definition

struct poly1305_val {
    u32  v[5]; /* base 2^26 */
};

and have 'key' and 'accum[ulator]' fields of type struct poly1305_val
in the struct below?

>  struct poly1305_desc_ctx {
>         /* key */
> -       u32 r[5];
> +       struct poly1305_key r;
>         /* finalize key */
>         u32 s[4];
>         /* accumulator */
> -       u32 h[5];
> +       struct poly1305_state h;
>         /* partial buffer */
>         u8 buf[POLY1305_BLOCK_SIZE];
>         /* bytes used in partial buffer */
> --
> 2.19.1.930.g4563a0d9d0-goog
>
Eric Biggers Nov. 12, 2018, 6:58 p.m. UTC | #2
Hi Ard,

On Tue, Nov 06, 2018 at 03:28:24PM +0100, Ard Biesheuvel wrote:
> On 6 November 2018 at 00:25, Eric Biggers <ebiggers@kernel.org> wrote:
> > From: Eric Biggers <ebiggers@google.com>
> >
> > In preparation for exposing a low-level Poly1305 API which implements
> > the ε-almost-∆-universal (εA∆U) hash function underlying the Poly1305
> > MAC and supports block-aligned inputs only, create structures
> > poly1305_key and poly1305_state which hold the limbs of the Poly1305
> > "r" key and accumulator, respectively.
> >
> > Signed-off-by: Eric Biggers <ebiggers@google.com>
> > ---
> >  arch/x86/crypto/poly1305_glue.c | 20 +++++++------
> >  crypto/poly1305_generic.c       | 52 ++++++++++++++++-----------------
> >  include/crypto/poly1305.h       | 12 ++++++--
> >  3 files changed, 47 insertions(+), 37 deletions(-)
> >
> > diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
> > index f012b7e28ad1..88cc01506c84 100644
> > --- a/arch/x86/crypto/poly1305_glue.c
> > +++ b/arch/x86/crypto/poly1305_glue.c
> > @@ -83,35 +83,37 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
> >         if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
> >                 if (unlikely(!sctx->wset)) {
> >                         if (!sctx->uset) {
> > -                               memcpy(sctx->u, dctx->r, sizeof(sctx->u));
> > -                               poly1305_simd_mult(sctx->u, dctx->r);
> > +                               memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
> > +                               poly1305_simd_mult(sctx->u, dctx->r.r);
> >                                 sctx->uset = true;
> >                         }
> >                         memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
> > -                       poly1305_simd_mult(sctx->u + 5, dctx->r);
> > +                       poly1305_simd_mult(sctx->u + 5, dctx->r.r);
> >                         memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
> > -                       poly1305_simd_mult(sctx->u + 10, dctx->r);
> > +                       poly1305_simd_mult(sctx->u + 10, dctx->r.r);
> >                         sctx->wset = true;
> >                 }
> >                 blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
> > -               poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
> > +               poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
> > +                                    sctx->u);
> >                 src += POLY1305_BLOCK_SIZE * 4 * blocks;
> >                 srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
> >         }
> >  #endif
> >         if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
> >                 if (unlikely(!sctx->uset)) {
> > -                       memcpy(sctx->u, dctx->r, sizeof(sctx->u));
> > -                       poly1305_simd_mult(sctx->u, dctx->r);
> > +                       memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
> > +                       poly1305_simd_mult(sctx->u, dctx->r.r);
> >                         sctx->uset = true;
> >                 }
> >                 blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
> > -               poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
> > +               poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
> > +                                    sctx->u);
> >                 src += POLY1305_BLOCK_SIZE * 2 * blocks;
> >                 srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
> >         }
> >         if (srclen >= POLY1305_BLOCK_SIZE) {
> > -               poly1305_block_sse2(dctx->h, src, dctx->r, 1);
> > +               poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
> >                 srclen -= POLY1305_BLOCK_SIZE;
> >         }
> >         return srclen;
> > diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
> > index 47d3a6b83931..a23173f351b7 100644
> > --- a/crypto/poly1305_generic.c
> > +++ b/crypto/poly1305_generic.c
> > @@ -38,7 +38,7 @@ int crypto_poly1305_init(struct shash_desc *desc)
> >  {
> >         struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
> >
> > -       memset(dctx->h, 0, sizeof(dctx->h));
> > +       memset(dctx->h.h, 0, sizeof(dctx->h.h));
> >         dctx->buflen = 0;
> >         dctx->rset = false;
> >         dctx->sset = false;
> > @@ -50,11 +50,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_init);
> >  static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
> >  {
> >         /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
> > -       dctx->r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
> > -       dctx->r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
> > -       dctx->r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
> > -       dctx->r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
> > -       dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
> > +       dctx->r.r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
> > +       dctx->r.r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
> > +       dctx->r.r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
> > +       dctx->r.r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
> > +       dctx->r.r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
> >  }
> >
> >  static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
> > @@ -107,22 +107,22 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
> >                 srclen = datalen;
> >         }
> >
> > -       r0 = dctx->r[0];
> > -       r1 = dctx->r[1];
> > -       r2 = dctx->r[2];
> > -       r3 = dctx->r[3];
> > -       r4 = dctx->r[4];
> > +       r0 = dctx->r.r[0];
> > +       r1 = dctx->r.r[1];
> > +       r2 = dctx->r.r[2];
> > +       r3 = dctx->r.r[3];
> > +       r4 = dctx->r.r[4];
> >
> >         s1 = r1 * 5;
> >         s2 = r2 * 5;
> >         s3 = r3 * 5;
> >         s4 = r4 * 5;
> >
> > -       h0 = dctx->h[0];
> > -       h1 = dctx->h[1];
> > -       h2 = dctx->h[2];
> > -       h3 = dctx->h[3];
> > -       h4 = dctx->h[4];
> > +       h0 = dctx->h.h[0];
> > +       h1 = dctx->h.h[1];
> > +       h2 = dctx->h.h[2];
> > +       h3 = dctx->h.h[3];
> > +       h4 = dctx->h.h[4];
> >
> >         while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
> >
> > @@ -157,11 +157,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
> >                 srclen -= POLY1305_BLOCK_SIZE;
> >         }
> >
> > -       dctx->h[0] = h0;
> > -       dctx->h[1] = h1;
> > -       dctx->h[2] = h2;
> > -       dctx->h[3] = h3;
> > -       dctx->h[4] = h4;
> > +       dctx->h.h[0] = h0;
> > +       dctx->h.h[1] = h1;
> > +       dctx->h.h[2] = h2;
> > +       dctx->h.h[3] = h3;
> > +       dctx->h.h[4] = h4;
> >
> >         return srclen;
> >  }
> > @@ -220,11 +220,11 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
> >         }
> >
> >         /* fully carry h */
> > -       h0 = dctx->h[0];
> > -       h1 = dctx->h[1];
> > -       h2 = dctx->h[2];
> > -       h3 = dctx->h[3];
> > -       h4 = dctx->h[4];
> > +       h0 = dctx->h.h[0];
> > +       h1 = dctx->h.h[1];
> > +       h2 = dctx->h.h[2];
> > +       h3 = dctx->h.h[3];
> > +       h4 = dctx->h.h[4];
> >
> >         h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
> >         h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
> > diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
> > index f718a19da82f..493244c46664 100644
> > --- a/include/crypto/poly1305.h
> > +++ b/include/crypto/poly1305.h
> > @@ -13,13 +13,21 @@
> >  #define POLY1305_KEY_SIZE      32
> >  #define POLY1305_DIGEST_SIZE   16
> >
> > +struct poly1305_key {
> > +       u32 r[5];       /* key, base 2^26 */
> > +};
> > +
> > +struct poly1305_state {
> > +       u32 h[5];       /* accumulator, base 2^26 */
> > +};
> > +
> 
> Sorry to bikeshed, but wouldn't it make more sense to have single definition
> 
> struct poly1305_val {
>     u32  v[5]; /* base 2^26 */
> };
> 
> and have 'key' and 'accum[ulator]' fields of type struct poly1305_val
> in the struct below?
> 

I prefer separate types so that the type system enforces that a key is never
accidentally used as an accumulator, and vice versa.  Then the poly1305_core_*
functions will be harder to misuse, and the Poly1305 MAC implementations harder
to get wrong.

The key also has certain bits clear whereas the accumulator does not.  In the
future, the Poly1305 C implementation might use base 2^32 and take advantage of
this.  In that case, the two inputs to each multiplication won't be
interchangeable, so using the same type for both would be extra confusing.

Having a poly1305_val nested inside poly1305_key and poly1305_state would work,
but seemed excessive.

> >  struct poly1305_desc_ctx {
> >         /* key */
> > -       u32 r[5];
> > +       struct poly1305_key r;
> >         /* finalize key */
> >         u32 s[4];
> >         /* accumulator */
> > -       u32 h[5];
> > +       struct poly1305_state h;
> >         /* partial buffer */
> >         u8 buf[POLY1305_BLOCK_SIZE];
> >         /* bytes used in partial buffer */
> > --

Thanks,

- Eric
Herbert Xu Nov. 16, 2018, 6:02 a.m. UTC | #3
Hi Eric:

On Mon, Nov 12, 2018 at 10:58:17AM -0800, Eric Biggers wrote:
>
> I prefer separate types so that the type system enforces that a key is never
> accidentally used as an accumulator, and vice versa.  Then the poly1305_core_*
> functions will be harder to misuse, and the Poly1305 MAC implementations harder
> to get wrong.
> 
> The key also has certain bits clear whereas the accumulator does not.  In the
> future, the Poly1305 C implementation might use base 2^32 and take advantage of
> this.  In that case, the two inputs to each multiplication won't be
> interchangeable, so using the same type for both would be extra confusing.
> 
> Having a poly1305_val nested inside poly1305_key and poly1305_state would work,
> but seemed excessive.

So it looks like there are no more unresolved issues with this
patch series.  Please let me know when you want it to go in.

Thanks,
Eric Biggers Nov. 17, 2018, 12:17 a.m. UTC | #4
Hi Herbert,

On Fri, Nov 16, 2018 at 02:02:27PM +0800, Herbert Xu wrote:
> Hi Eric:
> 
> On Mon, Nov 12, 2018 at 10:58:17AM -0800, Eric Biggers wrote:
> >
> > I prefer separate types so that the type system enforces that a key is never
> > accidentally used as an accumulator, and vice versa.  Then the poly1305_core_*
> > functions will be harder to misuse, and the Poly1305 MAC implementations harder
> > to get wrong.
> > 
> > The key also has certain bits clear whereas the accumulator does not.  In the
> > future, the Poly1305 C implementation might use base 2^32 and take advantage of
> > this.  In that case, the two inputs to each multiplication won't be
> > interchangeable, so using the same type for both would be extra confusing.
> > 
> > Having a poly1305_val nested inside poly1305_key and poly1305_state would work,
> > but seemed excessive.
> 
> So it looks like there are no more unresolved issues with this
> patch series.  Please let me know when you want it to go in.
> 

I believe it's ready to go in.  I'll need to rebase it onto cryptodev, to
resolve some small conflicts with the recent ChaCha20-related changes.  Also
I'll probably omit the fscrypt patch (patch 15/15) so that that patch can be
taken through the fscrypt tree instead.

Do you prefer that this be merged before or after Zinc?  It seems it may still
be a while before the community is satisfied with Zinc (and Wireguard which is
in the same patchset), and I don't want this blocked unnecessarily...  So on my
part I'd prefer to just have this merged as-is.

Of course, it's always possible to change the xchacha12 and xchacha20
implementations later, whether that's to use "Zinc" or otherwise.  And
NHPoly1305 and Adiantum itself will be the same either way, except that the
Poly1305 helpers may change slightly.

What do you think?

Thanks!

- Eric
Ard Biesheuvel Nov. 17, 2018, 12:30 a.m. UTC | #5
On Fri, 16 Nov 2018 at 16:17, Eric Biggers <ebiggers@kernel.org> wrote:
>
> Hi Herbert,
>
> On Fri, Nov 16, 2018 at 02:02:27PM +0800, Herbert Xu wrote:
> > Hi Eric:
> >
> > On Mon, Nov 12, 2018 at 10:58:17AM -0800, Eric Biggers wrote:
> > >
> > > I prefer separate types so that the type system enforces that a key is never
> > > accidentally used as an accumulator, and vice versa.  Then the poly1305_core_*
> > > functions will be harder to misuse, and the Poly1305 MAC implementations harder
> > > to get wrong.
> > >
> > > The key also has certain bits clear whereas the accumulator does not.  In the
> > > future, the Poly1305 C implementation might use base 2^32 and take advantage of
> > > this.  In that case, the two inputs to each multiplication won't be
> > > interchangeable, so using the same type for both would be extra confusing.
> > >
> > > Having a poly1305_val nested inside poly1305_key and poly1305_state would work,
> > > but seemed excessive.
> >
> > So it looks like there are no more unresolved issues with this
> > patch series.  Please let me know when you want it to go in.
> >
>
> I believe it's ready to go in.  I'll need to rebase it onto cryptodev, to
> resolve some small conflicts with the recent ChaCha20-related changes.  Also
> I'll probably omit the fscrypt patch (patch 15/15) so that that patch can be
> taken through the fscrypt tree instead.
>
> Do you prefer that this be merged before or after Zinc?  It seems it may still
> be a while before the community is satisfied with Zinc (and Wireguard which is
> in the same patchset), and I don't want this blocked unnecessarily...  So on my
> part I'd prefer to just have this merged as-is.
>
> Of course, it's always possible to change the xchacha12 and xchacha20
> implementations later, whether that's to use "Zinc" or otherwise.  And
> NHPoly1305 and Adiantum itself will be the same either way, except that the
> Poly1305 helpers may change slightly.
>
> What do you think?
>

I think this is ready to go in. Most of Zinc itself will not be
blocked by this, only the changes to the crypto API ChaCha20
implementation will have to wait until the Zinc version gains support
for the reduced round variant, but that will not block WireGuard.
Jason A. Donenfeld Nov. 18, 2018, 1:46 p.m. UTC | #6
Hi Eric,

On Sat, Nov 17, 2018 at 1:17 AM Eric Biggers <ebiggers@kernel.org> wrote:
> Do you prefer that this be merged before or after Zinc?  It seems it may still
> be a while before the community is satisfied with Zinc (and Wireguard which is
> in the same patchset), and I don't want this blocked unnecessarily...  So on my
> part I'd prefer to just have this merged as-is.

Personally I'd prefer this be merged after Zinc, since there's work to
be done on adjusting the 20->12 in chacha20. That's not really much of
a reason though. But maybe we can just sidestep the ordering concern
all together:

What I suspect we should do is make the initial Zinc merge _without_
those top two patches that replace the crypto api's implementations
with Zinc, and defer those until after the initial merges. Those
commits are already written -- so there's no chance it won't happen
due to laziness or something -- and I think the general merge will go
a bit more smoothly if we wait until after. (Somebody suggested we do
it this way at Plumbers, and was actually a bit surprised I had
already ported the crypto API stuff to Zinc.) This way, Adiantum and
Zinc aren't potentially co-dependent in their initial merges and we
can work on the details carefully with each other after both have
landed. I figure this might make things a little bit less stressful
for both of us. How would you feel about doing it that?

Jason
Jason A. Donenfeld Nov. 19, 2018, 6:13 a.m. UTC | #7
Hi Herbert,

On Mon, Nov 19, 2018 at 6:25 AM Herbert Xu <herbert@gondor.apana.org.au> wrote:
> My proposal is to merge the zinc interface as is, but to invert
> how we place the algorithm implementations.  IOW the implementations
> should stay where they are now, with in the crypto API.  However,
> we will provide direct access to them for zinc without going through
> the crypto API.  IOW we'll just export the functions directly.

Sorry, but there isn't a chance I'll agree to this. The whole idea is
to have a clean place to focus on synchronous software
implementations, and not keep it tangled up with the asynchronous API.
If WireGuard and Zinc go in, it's going to be done right, not in a way
that introduces more problems and organizational complexity. Avoiding
the solution proposed here is kind of the point. And given that
cryptographic code is so security sensitive, I want to make sure this
is done right, as opposed to rushing it with some half-baked
concoction that will be maybe possibly be replaced "later". From
talking to folks at Plumbers, I believe most of the remaining issues
are taken care of by the upcoming v9, and that you're overstating the
status of discussions regarding that.

I think the remaining issue right now is how to order my series and
Eric's series. If Eric's goes in first, it means that I can either a)
spend some time developing Zinc further _now_ to support chacha12 and
keep the top two conversion patches, or b) just drop the top two
conversion patches and work that out carefully with Eric after. I
think (b) would be better, but I'm happy to do (a) if you disagree.
And as I mentioned in the last email, I'd prefer Eric's work to go in
after Zinc, but I don't think the reasoning for that is particularly
strong, so it seems fine to merge Eric's work first.

I'll post v9 pretty soon and you can see how things are shaping up.
Hopefully before then Eric/Ard/you can provide some feedback on
whether you'd prefer (a) or (b) above.

Regards,
Jason
Herbert Xu Nov. 19, 2018, 6:22 a.m. UTC | #8
Hi Jason:

On Mon, Nov 19, 2018 at 07:13:07AM +0100, Jason A. Donenfeld wrote:
>
> Sorry, but there isn't a chance I'll agree to this. The whole idea is
> to have a clean place to focus on synchronous software
> implementations, and not keep it tangled up with the asynchronous API.

There is nothing asynchronous in the relevant crypto code at all.
The underlying CPU-based software implementations are all completely
synchronous, including the architecture-specific ones such as
x86/arm.

So I don't understand why you are rejecting it.

Cheers,
Eric Biggers Nov. 19, 2018, 10:54 p.m. UTC | #9
On Mon, Nov 19, 2018 at 07:13:07AM +0100, Jason A. Donenfeld wrote:
> Hi Herbert,
> 
> On Mon, Nov 19, 2018 at 6:25 AM Herbert Xu <herbert@gondor.apana.org.au> wrote:
> > My proposal is to merge the zinc interface as is, but to invert
> > how we place the algorithm implementations.  IOW the implementations
> > should stay where they are now, with in the crypto API.  However,
> > we will provide direct access to them for zinc without going through
> > the crypto API.  IOW we'll just export the functions directly.
> 
> Sorry, but there isn't a chance I'll agree to this. The whole idea is
> to have a clean place to focus on synchronous software
> implementations, and not keep it tangled up with the asynchronous API.
> If WireGuard and Zinc go in, it's going to be done right, not in a way
> that introduces more problems and organizational complexity. Avoiding
> the solution proposed here is kind of the point. And given that
> cryptographic code is so security sensitive, I want to make sure this
> is done right, as opposed to rushing it with some half-baked
> concoction that will be maybe possibly be replaced "later". From
> talking to folks at Plumbers, I believe most of the remaining issues
> are taken care of by the upcoming v9, and that you're overstating the
> status of discussions regarding that.

Will v9 include a documentation file for Zinc in Documentation/crypto/?
That's been suggested several times.

> 
> I think the remaining issue right now is how to order my series and
> Eric's series. If Eric's goes in first, it means that I can either a)
> spend some time developing Zinc further _now_ to support chacha12 and
> keep the top two conversion patches, or b) just drop the top two
> conversion patches and work that out carefully with Eric after. I
> think (b) would be better, but I'm happy to do (a) if you disagree.
> And as I mentioned in the last email, I'd prefer Eric's work to go in
> after Zinc, but I don't think the reasoning for that is particularly
> strong, so it seems fine to merge Eric's work first.
> 
> I'll post v9 pretty soon and you can see how things are shaping up.
> Hopefully before then Eric/Ard/you can provide some feedback on
> whether you'd prefer (a) or (b) above.
> 

I'd still prefer to see the conversion patches included.  Skipping them would be
kicking the can down the road and avoiding issues that will need to be addressed
anyway.  Like you, I don't want a "half-baked concoction that will be maybe
possibly be replaced 'later'" :-)

Either way though, it would make things much easier if you at least named the
files, structures, constants, etc. "ChaCha" rather than "ChaCha20" from the
start where appropriate.  For an example, see the commit "crypto: chacha -
prepare for supporting non-20-round variants" on my "adiantum-zinc" branch:
https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git/commit/?h=adiantum-zinc&id=754af8d7d39f31238114426e39786c84d7cc0f98
Then the actual introduction of the 12-round variant is much less noisy.

- Eric
Jason A. Donenfeld Nov. 19, 2018, 11:15 p.m. UTC | #10
Hi Eric,

On Mon, Nov 19, 2018 at 11:54 PM Eric Biggers <ebiggers@kernel.org> wrote:
> Will v9 include a documentation file for Zinc in Documentation/crypto/?
> That's been suggested several times.

I had started writing that there, but then thought that the requested
information could go in the commit message instead. But I'm guessing
you're asking again now because you poked into the repo and didn't
find the Documentation/, so presumably you still want it. I can
reorganize the presentation of that to be more suitable for
Documentation/, and I'll have that for v9.

> I'd still prefer to see the conversion patches included.  Skipping them would be
> kicking the can down the road and avoiding issues that will need to be addressed
> anyway.  Like you, I don't want a "half-baked concoction that will be maybe
> possibly be replaced 'later'" :-)

Okay, fair enough. Will do.

> Either way though, it would make things much easier if you at least named the
> files, structures, constants, etc. "ChaCha" rather than "ChaCha20" from the
> start where appropriate.  For an example, see the commit "crypto: chacha -
> prepare for supporting non-20-round variants" on my "adiantum-zinc" branch:
> https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git/commit/?h=adiantum-zinc&id=754af8d7d39f31238114426e39786c84d7cc0f98
> Then the actual introduction of the 12-round variant is much less noisy.

That's a good idea. I'll do it like that. I'll likely order it as what
we have now (renamed to omit the 20), and then put the 12 stuff on top
of that, so it's easier to see what's changed in the process. I
noticed in that branch, you didn't port the assembly to support fewer
rounds. Shall I follow suite, and then expect patches from you later
doing that? Or were you expecting me to also port the architecture
implementations to chacha12 as well?

Regards,
Jason
Eric Biggers Nov. 19, 2018, 11:23 p.m. UTC | #11
On Tue, Nov 20, 2018 at 12:15:17AM +0100, Jason A. Donenfeld wrote:
> Hi Eric,
> 
> On Mon, Nov 19, 2018 at 11:54 PM Eric Biggers <ebiggers@kernel.org> wrote:
> > Will v9 include a documentation file for Zinc in Documentation/crypto/?
> > That's been suggested several times.
> 
> I had started writing that there, but then thought that the requested
> information could go in the commit message instead. But I'm guessing
> you're asking again now because you poked into the repo and didn't
> find the Documentation/, so presumably you still want it. I can
> reorganize the presentation of that to be more suitable for
> Documentation/, and I'll have that for v9.
> 

It's much better to have the documentation in a permanent location.

> > I'd still prefer to see the conversion patches included.  Skipping them would be
> > kicking the can down the road and avoiding issues that will need to be addressed
> > anyway.  Like you, I don't want a "half-baked concoction that will be maybe
> > possibly be replaced 'later'" :-)
> 
> Okay, fair enough. Will do.
> 
> > Either way though, it would make things much easier if you at least named the
> > files, structures, constants, etc. "ChaCha" rather than "ChaCha20" from the
> > start where appropriate.  For an example, see the commit "crypto: chacha -
> > prepare for supporting non-20-round variants" on my "adiantum-zinc" branch:
> > https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git/commit/?h=adiantum-zinc&id=754af8d7d39f31238114426e39786c84d7cc0f98
> > Then the actual introduction of the 12-round variant is much less noisy.
> 
> That's a good idea. I'll do it like that. I'll likely order it as what
> we have now (renamed to omit the 20), and then put the 12 stuff on top
> of that, so it's easier to see what's changed in the process. I
> noticed in that branch, you didn't port the assembly to support fewer
> rounds. Shall I follow suite, and then expect patches from you later
> doing that? Or were you expecting me to also port the architecture
> implementations to chacha12 as well?
> 

I actually did add ChaCha12 support to most of the Zinc assembly in
"[WIP] crypto: assembly support for ChaCha12"
(https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git/commit/?h=adiantum-zinc&id=0a7787a515a977e11b680f1752b430ca1744e399).
But I skipped AVX-512 and MIPS since I didn't have a way to test those yet,
and I haven't ported the changes to your new perl scripts yet.

- Eric
Jason A. Donenfeld Nov. 19, 2018, 11:31 p.m. UTC | #12
Hi Eric,

On Tue, Nov 20, 2018 at 12:23 AM Eric Biggers <ebiggers@kernel.org> wrote:
> It's much better to have the documentation in a permanent location.

Agreed.

> I actually did add ChaCha12 support to most of the Zinc assembly in
> "[WIP] crypto: assembly support for ChaCha12"
> (https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git/commit/?h=adiantum-zinc&id=0a7787a515a977e11b680f1752b430ca1744e399).
> But I skipped AVX-512 and MIPS since I didn't have a way to test those yet,
> and I haven't ported the changes to your new perl scripts yet.

Oh, great. If you're playing with this in the context of the WireGuard
repo, you can run `ARCH=mips make test-qemu -j$(nproc)` (which is
what's running on https://www.wireguard.com/build-status/ ). For
AVX-512 testing, I've got a box I'm happy to give you access to, if
you want to send an SSH key and your employer allows for that kind of
thing etc. I can have a stab at all of this too if you like.

Jason
Herbert Xu Nov. 20, 2018, 3:06 a.m. UTC | #13
On Mon, Nov 19, 2018 at 02:54:15PM -0800, Eric Biggers wrote:
>
> > I think the remaining issue right now is how to order my series and
> > Eric's series. If Eric's goes in first, it means that I can either a)
> > spend some time developing Zinc further _now_ to support chacha12 and
> > keep the top two conversion patches, or b) just drop the top two
> > conversion patches and work that out carefully with Eric after. I
> > think (b) would be better, but I'm happy to do (a) if you disagree.
> > And as I mentioned in the last email, I'd prefer Eric's work to go in
> > after Zinc, but I don't think the reasoning for that is particularly
> > strong, so it seems fine to merge Eric's work first.
> > 
> > I'll post v9 pretty soon and you can see how things are shaping up.
> > Hopefully before then Eric/Ard/you can provide some feedback on
> > whether you'd prefer (a) or (b) above.
> > 
> 
> I'd still prefer to see the conversion patches included.  Skipping them would be
> kicking the can down the road and avoiding issues that will need to be addressed
> anyway.  Like you, I don't want a "half-baked concoction that will be maybe
> possibly be replaced 'later'" :-)

Are you guys talking about the conversion patches to eliminate the
two copies of chacha code that would otherwise exist in crypto as
well as in zinc?

If so I think that's not negotiable.  Having two copies of the same
code is just not acceptable.

Cheers,
Jason A. Donenfeld Nov. 20, 2018, 3:08 a.m. UTC | #14
Hi Herbert,

On Tue, Nov 20, 2018 at 4:06 AM Herbert Xu <herbert@gondor.apana.org.au> wrote:
> > I'd still prefer to see the conversion patches included.  Skipping them would be
> > kicking the can down the road and avoiding issues that will need to be addressed
> > anyway.  Like you, I don't want a "half-baked concoction that will be maybe
> > possibly be replaced 'later'" :-)
>
> Are you guys talking about the conversion patches to eliminate the
> two copies of chacha code that would otherwise exist in crypto as
> well as in zinc?
>
> If so I think that's not negotiable.  Having two copies of the same
> code is just not acceptable.

Yes, that's the topic. Eric already expressed his preference to keep
them, and I agreed, so the plan is to keep them.

Jason
diff mbox series

Patch

diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index f012b7e28ad1..88cc01506c84 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -83,35 +83,37 @@  static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
 	if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
 		if (unlikely(!sctx->wset)) {
 			if (!sctx->uset) {
-				memcpy(sctx->u, dctx->r, sizeof(sctx->u));
-				poly1305_simd_mult(sctx->u, dctx->r);
+				memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
+				poly1305_simd_mult(sctx->u, dctx->r.r);
 				sctx->uset = true;
 			}
 			memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 5, dctx->r);
+			poly1305_simd_mult(sctx->u + 5, dctx->r.r);
 			memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u + 10, dctx->r);
+			poly1305_simd_mult(sctx->u + 10, dctx->r.r);
 			sctx->wset = true;
 		}
 		blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
-		poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
+		poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
+				     sctx->u);
 		src += POLY1305_BLOCK_SIZE * 4 * blocks;
 		srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
 	}
 #endif
 	if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
 		if (unlikely(!sctx->uset)) {
-			memcpy(sctx->u, dctx->r, sizeof(sctx->u));
-			poly1305_simd_mult(sctx->u, dctx->r);
+			memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
+			poly1305_simd_mult(sctx->u, dctx->r.r);
 			sctx->uset = true;
 		}
 		blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
-		poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
+		poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
+				     sctx->u);
 		src += POLY1305_BLOCK_SIZE * 2 * blocks;
 		srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
 	}
 	if (srclen >= POLY1305_BLOCK_SIZE) {
-		poly1305_block_sse2(dctx->h, src, dctx->r, 1);
+		poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
 		srclen -= POLY1305_BLOCK_SIZE;
 	}
 	return srclen;
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index 47d3a6b83931..a23173f351b7 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -38,7 +38,7 @@  int crypto_poly1305_init(struct shash_desc *desc)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 
-	memset(dctx->h, 0, sizeof(dctx->h));
+	memset(dctx->h.h, 0, sizeof(dctx->h.h));
 	dctx->buflen = 0;
 	dctx->rset = false;
 	dctx->sset = false;
@@ -50,11 +50,11 @@  EXPORT_SYMBOL_GPL(crypto_poly1305_init);
 static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
 {
 	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
-	dctx->r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
-	dctx->r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
-	dctx->r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
-	dctx->r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
-	dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
+	dctx->r.r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
+	dctx->r.r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
+	dctx->r.r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
+	dctx->r.r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
+	dctx->r.r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
 }
 
 static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
@@ -107,22 +107,22 @@  static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
 		srclen = datalen;
 	}
 
-	r0 = dctx->r[0];
-	r1 = dctx->r[1];
-	r2 = dctx->r[2];
-	r3 = dctx->r[3];
-	r4 = dctx->r[4];
+	r0 = dctx->r.r[0];
+	r1 = dctx->r.r[1];
+	r2 = dctx->r.r[2];
+	r3 = dctx->r.r[3];
+	r4 = dctx->r.r[4];
 
 	s1 = r1 * 5;
 	s2 = r2 * 5;
 	s3 = r3 * 5;
 	s4 = r4 * 5;
 
-	h0 = dctx->h[0];
-	h1 = dctx->h[1];
-	h2 = dctx->h[2];
-	h3 = dctx->h[3];
-	h4 = dctx->h[4];
+	h0 = dctx->h.h[0];
+	h1 = dctx->h.h[1];
+	h2 = dctx->h.h[2];
+	h3 = dctx->h.h[3];
+	h4 = dctx->h.h[4];
 
 	while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
 
@@ -157,11 +157,11 @@  static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
 		srclen -= POLY1305_BLOCK_SIZE;
 	}
 
-	dctx->h[0] = h0;
-	dctx->h[1] = h1;
-	dctx->h[2] = h2;
-	dctx->h[3] = h3;
-	dctx->h[4] = h4;
+	dctx->h.h[0] = h0;
+	dctx->h.h[1] = h1;
+	dctx->h.h[2] = h2;
+	dctx->h.h[3] = h3;
+	dctx->h.h[4] = h4;
 
 	return srclen;
 }
@@ -220,11 +220,11 @@  int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 	}
 
 	/* fully carry h */
-	h0 = dctx->h[0];
-	h1 = dctx->h[1];
-	h2 = dctx->h[2];
-	h3 = dctx->h[3];
-	h4 = dctx->h[4];
+	h0 = dctx->h.h[0];
+	h1 = dctx->h.h[1];
+	h2 = dctx->h.h[2];
+	h3 = dctx->h.h[3];
+	h4 = dctx->h.h[4];
 
 	h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
 	h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index f718a19da82f..493244c46664 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -13,13 +13,21 @@ 
 #define POLY1305_KEY_SIZE	32
 #define POLY1305_DIGEST_SIZE	16
 
+struct poly1305_key {
+	u32 r[5];	/* key, base 2^26 */
+};
+
+struct poly1305_state {
+	u32 h[5];	/* accumulator, base 2^26 */
+};
+
 struct poly1305_desc_ctx {
 	/* key */
-	u32 r[5];
+	struct poly1305_key r;
 	/* finalize key */
 	u32 s[4];
 	/* accumulator */
-	u32 h[5];
+	struct poly1305_state h;
 	/* partial buffer */
 	u8 buf[POLY1305_BLOCK_SIZE];
 	/* bytes used in partial buffer */