diff mbox series

[net-next,v3,07/17] zinc: Poly1305 generic C implementations and selftest

Message ID 20180911010838.8818-9-Jason@zx2c4.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series None | expand

Commit Message

Jason A. Donenfeld Sept. 11, 2018, 1:08 a.m. UTC
These two C implementations -- a 32x32 one and a 64x64 one, depending on
the platform -- come from Andrew Moon's public domain poly1305-donna
portable code, modified for usage in the kernel and for usage with
accelerated primitives.

Information: https://cr.yp.to/mac.html

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Samuel Neves <sneves@dei.uc.pt>
Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
Cc: linux-crypto@vger.kernel.org
---
 include/zinc/poly1305.h              |  38 ++
 lib/zinc/Kconfig                     |   4 +
 lib/zinc/Makefile                    |   4 +
 lib/zinc/main.c                      |   5 +
 lib/zinc/poly1305/poly1305-donna32.h | 198 ++++++
 lib/zinc/poly1305/poly1305-donna64.h | 177 ++++++
 lib/zinc/poly1305/poly1305.c         | 131 ++++
 lib/zinc/selftest/poly1305.h         | 874 +++++++++++++++++++++++++++
 8 files changed, 1431 insertions(+)
 create mode 100644 include/zinc/poly1305.h
 create mode 100644 lib/zinc/poly1305/poly1305-donna32.h
 create mode 100644 lib/zinc/poly1305/poly1305-donna64.h
 create mode 100644 lib/zinc/poly1305/poly1305.c
 create mode 100644 lib/zinc/selftest/poly1305.h
diff mbox series

Patch

diff --git a/include/zinc/poly1305.h b/include/zinc/poly1305.h
new file mode 100644
index 000000000000..5c9220f3f736
--- /dev/null
+++ b/include/zinc/poly1305.h
@@ -0,0 +1,38 @@ 
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _ZINC_POLY1305_H
+#define _ZINC_POLY1305_H
+
+#include <linux/simd.h>
+#include <linux/types.h>
+
+enum poly1305_lengths {
+	POLY1305_BLOCK_SIZE = 16,
+	POLY1305_KEY_SIZE = 32,
+	POLY1305_MAC_SIZE = 16
+};
+
+struct poly1305_ctx {
+	u8 opaque[24 * sizeof(u64)];
+	u32 nonce[4];
+	u8 data[POLY1305_BLOCK_SIZE];
+	size_t num;
+} __aligned(8);
+
+void poly1305_fpu_init(void);
+
+void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE],
+		   simd_context_t simd_context);
+void poly1305_update(struct poly1305_ctx *ctx, const u8 *inp, const size_t len,
+		     simd_context_t simd_context);
+void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
+		     simd_context_t simd_context);
+
+#ifdef DEBUG
+bool poly1305_selftest(void);
+#endif
+
+#endif /* _ZINC_POLY1305_H */
diff --git a/lib/zinc/Kconfig b/lib/zinc/Kconfig
index 5311a0d6ba8b..86936739a05f 100644
--- a/lib/zinc/Kconfig
+++ b/lib/zinc/Kconfig
@@ -10,6 +10,10 @@  config ZINC_CHACHA20
 	bool
 	select ZINC
 
+config ZINC_POLY1305
+	bool
+	select ZINC
+
 config ZINC_DEBUG
 	bool "Zinc cryptography library debugging and self-tests"
 	depends on ZINC
diff --git a/lib/zinc/Makefile b/lib/zinc/Makefile
index ba4aebdada1b..ec5fa39d1d79 100644
--- a/lib/zinc/Makefile
+++ b/lib/zinc/Makefile
@@ -23,6 +23,10 @@  CFLAGS_chacha20.o += -include $(srctree)/$(src)/chacha20/chacha20-mips-glue.h
 endif
 endif
 
+ifeq ($(CONFIG_ZINC_POLY1305),y)
+zinc-y += poly1305/poly1305.o
+endif
+
 zinc-y += main.o
 
 obj-$(CONFIG_ZINC) := zinc.o
diff --git a/lib/zinc/main.c b/lib/zinc/main.c
index 7e8e84b706b7..d871dd406a5c 100644
--- a/lib/zinc/main.c
+++ b/lib/zinc/main.c
@@ -4,6 +4,7 @@ 
  */
 
 #include <zinc/chacha20.h>
+#include <zinc/poly1305.h>
 
 #include <linux/init.h>
 #include <linux/module.h>
@@ -21,6 +22,10 @@  static int __init mod_init(void)
 {
 #ifdef CONFIG_ZINC_CHACHA20
 	chacha20_fpu_init();
+#endif
+#ifdef CONFIG_ZINC_POLY1305
+	poly1305_fpu_init();
+	selftest(poly1305);
 #endif
 	return 0;
 }
diff --git a/lib/zinc/poly1305/poly1305-donna32.h b/lib/zinc/poly1305/poly1305-donna32.h
new file mode 100644
index 000000000000..1decb3385fdf
--- /dev/null
+++ b/lib/zinc/poly1305/poly1305-donna32.h
@@ -0,0 +1,198 @@ 
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is based in part on Andrew Moon's poly1305-donna, which is in the
+ * public domain.
+ */
+
+struct poly1305_internal {
+	u32 h[5];
+	u32 r[5];
+};
+
+static void poly1305_init_generic(void *ctx, const u8 key[16])
+{
+	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
+
+	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+	st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff;
+	st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03;
+	st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff;
+	st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff;
+	st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff;
+
+	/* h = 0 */
+	st->h[0] = 0;
+	st->h[1] = 0;
+	st->h[2] = 0;
+	st->h[3] = 0;
+	st->h[4] = 0;
+}
+
+static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len,
+				    const u32 padbit)
+{
+	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
+	const u32 hibit = padbit << 24;
+	u32 r0, r1, r2, r3, r4;
+	u32 s1, s2, s3, s4;
+	u32 h0, h1, h2, h3, h4;
+	u64 d0, d1, d2, d3, d4;
+	u32 c;
+
+	r0 = st->r[0];
+	r1 = st->r[1];
+	r2 = st->r[2];
+	r3 = st->r[3];
+	r4 = st->r[4];
+
+	s1 = r1 * 5;
+	s2 = r2 * 5;
+	s3 = r3 * 5;
+	s4 = r4 * 5;
+
+	h0 = st->h[0];
+	h1 = st->h[1];
+	h2 = st->h[2];
+	h3 = st->h[3];
+	h4 = st->h[4];
+
+	while (len >= POLY1305_BLOCK_SIZE) {
+		/* h += m[i] */
+		h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
+		h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
+		h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
+		h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
+		h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
+
+		/* h *= r */
+		d0 = ((u64)h0 * r0) + ((u64)h1 * s4) +
+		     ((u64)h2 * s3) + ((u64)h3 * s2) +
+		     ((u64)h4 * s1);
+		d1 = ((u64)h0 * r1) + ((u64)h1 * r0) +
+		     ((u64)h2 * s4) + ((u64)h3 * s3) +
+		     ((u64)h4 * s2);
+		d2 = ((u64)h0 * r2) + ((u64)h1 * r1) +
+		     ((u64)h2 * r0) + ((u64)h3 * s4) +
+		     ((u64)h4 * s3);
+		d3 = ((u64)h0 * r3) + ((u64)h1 * r2) +
+		     ((u64)h2 * r1) + ((u64)h3 * r0) +
+		     ((u64)h4 * s4);
+		d4 = ((u64)h0 * r4) + ((u64)h1 * r3) +
+		     ((u64)h2 * r2) + ((u64)h3 * r1) +
+		     ((u64)h4 * r0);
+
+		/* (partial) h %= p */
+		c = (u32)(d0 >> 26);
+		h0 = (u32)d0 & 0x3ffffff;
+		d1 += c;
+		c = (u32)(d1 >> 26);
+		h1 = (u32)d1 & 0x3ffffff;
+		d2 += c;
+		c = (u32)(d2 >> 26);
+		h2 = (u32)d2 & 0x3ffffff;
+		d3 += c;
+		c = (u32)(d3 >> 26);
+		h3 = (u32)d3 & 0x3ffffff;
+		d4 += c;
+		c = (u32)(d4 >> 26);
+		h4 = (u32)d4 & 0x3ffffff;
+		h0 += c * 5;
+		c = (h0 >> 26);
+		h0 = h0 & 0x3ffffff;
+		h1 += c;
+
+		input += POLY1305_BLOCK_SIZE;
+		len -= POLY1305_BLOCK_SIZE;
+	}
+
+	st->h[0] = h0;
+	st->h[1] = h1;
+	st->h[2] = h2;
+	st->h[3] = h3;
+	st->h[4] = h4;
+}
+
+static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
+{
+	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
+	u32 h0, h1, h2, h3, h4, c;
+	u32 g0, g1, g2, g3, g4;
+	u64 f;
+	u32 mask;
+
+	/* fully carry h */
+	h0 = st->h[0];
+	h1 = st->h[1];
+	h2 = st->h[2];
+	h3 = st->h[3];
+	h4 = st->h[4];
+
+	c = h1 >> 26;
+	h1 = h1 & 0x3ffffff;
+	h2 += c;
+	c = h2 >> 26;
+	h2 = h2 & 0x3ffffff;
+	h3 += c;
+	c = h3 >> 26;
+	h3 = h3 & 0x3ffffff;
+	h4 += c;
+	c = h4 >> 26;
+	h4 = h4 & 0x3ffffff;
+	h0 += c * 5;
+	c = h0 >> 26;
+	h0 = h0 & 0x3ffffff;
+	h1 += c;
+
+	/* compute h + -p */
+	g0 = h0 + 5;
+	c = g0 >> 26;
+	g0 &= 0x3ffffff;
+	g1 = h1 + c;
+	c = g1 >> 26;
+	g1 &= 0x3ffffff;
+	g2 = h2 + c;
+	c = g2 >> 26;
+	g2 &= 0x3ffffff;
+	g3 = h3 + c;
+	c = g3 >> 26;
+	g3 &= 0x3ffffff;
+	g4 = h4 + c - (1UL << 26);
+
+	/* select h if h < p, or h + -p if h >= p */
+	mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
+	g0 &= mask;
+	g1 &= mask;
+	g2 &= mask;
+	g3 &= mask;
+	g4 &= mask;
+	mask = ~mask;
+
+	h0 = (h0 & mask) | g0;
+	h1 = (h1 & mask) | g1;
+	h2 = (h2 & mask) | g2;
+	h3 = (h3 & mask) | g3;
+	h4 = (h4 & mask) | g4;
+
+	/* h = h % (2^128) */
+	h0 = ((h0) | (h1 << 26)) & 0xffffffff;
+	h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
+	h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
+	h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
+
+	/* mac = (h + nonce) % (2^128) */
+	f = (u64)h0 + nonce[0];
+	h0 = (u32)f;
+	f = (u64)h1 + nonce[1] + (f >> 32);
+	h1 = (u32)f;
+	f = (u64)h2 + nonce[2] + (f >> 32);
+	h2 = (u32)f;
+	f = (u64)h3 + nonce[3] + (f >> 32);
+	h3 = (u32)f;
+
+	put_unaligned_le32(h0, &mac[0]);
+	put_unaligned_le32(h1, &mac[4]);
+	put_unaligned_le32(h2, &mac[8]);
+	put_unaligned_le32(h3, &mac[12]);
+}
diff --git a/lib/zinc/poly1305/poly1305-donna64.h b/lib/zinc/poly1305/poly1305-donna64.h
new file mode 100644
index 000000000000..2aa2570873a4
--- /dev/null
+++ b/lib/zinc/poly1305/poly1305-donna64.h
@@ -0,0 +1,177 @@ 
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * This is based in part on Andrew Moon's poly1305-donna, which is in the
+ * public domain.
+ */
+
+typedef __uint128_t u128;
+
+struct poly1305_internal {
+	u64 r[3];
+	u64 h[3];
+};
+
+static void poly1305_init_generic(void *ctx, const u8 key[16])
+{
+	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
+	u64 t0, t1;
+
+	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+	t0 = get_unaligned_le64(&key[0]);
+	t1 = get_unaligned_le64(&key[8]);
+
+	st->r[0] = t0 & 0xffc0fffffff;
+	st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
+	st->r[2] = ((t1 >> 24)) & 0x00ffffffc0f;
+
+	/* h = 0 */
+	st->h[0] = 0;
+	st->h[1] = 0;
+	st->h[2] = 0;
+}
+
+static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len,
+				    const u32 padbit)
+{
+	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
+	const u64 hibit = ((u64)padbit) << 40;
+	u64 r0, r1, r2;
+	u64 s1, s2;
+	u64 h0, h1, h2;
+	u64 c;
+	u128 d0, d1, d2, d;
+
+	r0 = st->r[0];
+	r1 = st->r[1];
+	r2 = st->r[2];
+
+	h0 = st->h[0];
+	h1 = st->h[1];
+	h2 = st->h[2];
+
+	s1 = r1 * (5 << 2);
+	s2 = r2 * (5 << 2);
+
+	while (len >= POLY1305_BLOCK_SIZE) {
+		u64 t0, t1;
+
+		/* h += m[i] */
+		t0 = get_unaligned_le64(&input[0]);
+		t1 = get_unaligned_le64(&input[8]);
+
+		h0 += t0 & 0xfffffffffff;
+		h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffff;
+		h2 += (((t1 >> 24)) & 0x3ffffffffff) | hibit;
+
+		/* h *= r */
+		d0 = (u128)h0 * r0;
+		d = (u128)h1 * s2;
+		d0 += d;
+		d = (u128)h2 * s1;
+		d0 += d;
+		d1 = (u128)h0 * r1;
+		d = (u128)h1 * r0;
+		d1 += d;
+		d = (u128)h2 * s2;
+		d1 += d;
+		d2 = (u128)h0 * r2;
+		d = (u128)h1 * r1;
+		d2 += d;
+		d = (u128)h2 * r0;
+		d2 += d;
+
+		/* (partial) h %= p */
+		c = (u64)(d0 >> 44);
+		h0 = (u64)d0 & 0xfffffffffff;
+		d1 += c;
+		c = (u64)(d1 >> 44);
+		h1 = (u64)d1 & 0xfffffffffff;
+		d2 += c;
+		c = (u64)(d2 >> 42);
+		h2 = (u64)d2 & 0x3ffffffffff;
+		h0 += c * 5;
+		c = h0 >> 44;
+		h0 = h0 & 0xfffffffffff;
+		h1 += c;
+
+		input += POLY1305_BLOCK_SIZE;
+		len -= POLY1305_BLOCK_SIZE;
+	}
+
+	st->h[0] = h0;
+	st->h[1] = h1;
+	st->h[2] = h2;
+}
+
+static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
+{
+	struct poly1305_internal *st = (struct poly1305_internal *)ctx;
+	u64 h0, h1, h2, c;
+	u64 g0, g1, g2;
+	u64 t0, t1;
+
+	/* fully carry h */
+	h0 = st->h[0];
+	h1 = st->h[1];
+	h2 = st->h[2];
+
+	c = h1 >> 44;
+	h1 &= 0xfffffffffff;
+	h2 += c;
+	c = h2 >> 42;
+	h2 &= 0x3ffffffffff;
+	h0 += c * 5;
+	c = h0 >> 44;
+	h0 &= 0xfffffffffff;
+	h1 += c;
+	c = h1 >> 44;
+	h1 &= 0xfffffffffff;
+	h2 += c;
+	c = h2 >> 42;
+	h2 &= 0x3ffffffffff;
+	h0 += c * 5;
+	c = h0 >> 44;
+	h0 &= 0xfffffffffff;
+	h1 += c;
+
+	/* compute h + -p */
+	g0 = h0 + 5;
+	c  = g0 >> 44;
+	g0 &= 0xfffffffffff;
+	g1 = h1 + c;
+	c  = g1 >> 44;
+	g1 &= 0xfffffffffff;
+	g2 = h2 + c - (1ULL << 42);
+
+	/* select h if h < p, or h + -p if h >= p */
+	c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1;
+	g0 &= c;
+	g1 &= c;
+	g2 &= c;
+	c  = ~c;
+	h0 = (h0 & c) | g0;
+	h1 = (h1 & c) | g1;
+	h2 = (h2 & c) | g2;
+
+	/* h = (h + nonce) */
+	t0 = ((u64)nonce[1] << 32) | nonce[0];
+	t1 = ((u64)nonce[3] << 32) | nonce[2];
+
+	h0 += t0 & 0xfffffffffff;
+	c = h0 >> 44;
+	h0 &= 0xfffffffffff;
+	h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c;
+	c = h1 >> 44;
+	h1 &= 0xfffffffffff;
+	h2 += (((t1 >> 24)) & 0x3ffffffffff) + c;
+	h2 &= 0x3ffffffffff;
+
+	/* mac = h % (2^128) */
+	h0 = h0 | (h1 << 44);
+	h1 = (h1 >> 20) | (h2 << 24);
+
+	put_unaligned_le64(h0, &mac[0]);
+	put_unaligned_le64(h1, &mac[8]);
+}
diff --git a/lib/zinc/poly1305/poly1305.c b/lib/zinc/poly1305/poly1305.c
new file mode 100644
index 000000000000..c09b09260c40
--- /dev/null
+++ b/lib/zinc/poly1305/poly1305.c
@@ -0,0 +1,131 @@ 
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Implementation of the Poly1305 message authenticator.
+ *
+ * Information: https://cr.yp.to/mac.html
+ */
+
+#include <zinc/poly1305.h>
+
+#include <asm/unaligned.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#ifndef HAVE_POLY1305_ARCH_IMPLEMENTATION
+static inline bool poly1305_init_arch(void *ctx,
+				      const u8 key[POLY1305_KEY_SIZE],
+				      simd_context_t simd_context)
+{
+	return false;
+}
+static inline bool poly1305_blocks_arch(void *ctx, const u8 *input,
+					const size_t len, const u32 padbit,
+					simd_context_t simd_context)
+{
+	return false;
+}
+static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
+				      const u32 nonce[4],
+				      simd_context_t simd_context)
+{
+	return false;
+}
+void __init poly1305_fpu_init(void)
+{
+}
+#endif
+
+#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
+#include "poly1305-donna64.h"
+#else
+#include "poly1305-donna32.h"
+#endif
+
+void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE],
+		   simd_context_t simd_context)
+{
+	ctx->nonce[0] = get_unaligned_le32(&key[16]);
+	ctx->nonce[1] = get_unaligned_le32(&key[20]);
+	ctx->nonce[2] = get_unaligned_le32(&key[24]);
+	ctx->nonce[3] = get_unaligned_le32(&key[28]);
+
+	if (!poly1305_init_arch(ctx->opaque, key, simd_context))
+		poly1305_init_generic(ctx->opaque, key);
+
+	ctx->num = 0;
+}
+EXPORT_SYMBOL(poly1305_init);
+
+static inline void poly1305_blocks(void *ctx, const u8 *input, const size_t len,
+				   const u32 padbit,
+				   simd_context_t simd_context)
+{
+	if (!poly1305_blocks_arch(ctx, input, len, padbit, simd_context))
+		poly1305_blocks_generic(ctx, input, len, padbit);
+}
+
+static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE],
+				 const u32 nonce[4],
+				 simd_context_t simd_context)
+{
+	if (!poly1305_emit_arch(ctx, mac, nonce, simd_context))
+		poly1305_emit_generic(ctx, mac, nonce);
+}
+
+void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len,
+		     simd_context_t simd_context)
+{
+	const size_t num = ctx->num % POLY1305_BLOCK_SIZE;
+	size_t rem;
+
+	if (num) {
+		rem = POLY1305_BLOCK_SIZE - num;
+		if (len < rem) {
+			memcpy(ctx->data + num, input, len);
+			ctx->num = num + len;
+			return;
+		}
+		memcpy(ctx->data + num, input, rem);
+		poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1,
+				simd_context);
+		input += rem;
+		len -= rem;
+	}
+
+	rem = len % POLY1305_BLOCK_SIZE;
+	len -= rem;
+
+	if (len >= POLY1305_BLOCK_SIZE) {
+		poly1305_blocks(ctx->opaque, input, len, 1, simd_context);
+		input += len;
+	}
+
+	if (rem)
+		memcpy(ctx->data, input, rem);
+
+	ctx->num = rem;
+}
+EXPORT_SYMBOL(poly1305_update);
+
+void poly1305_finish(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
+		     simd_context_t simd_context)
+{
+	size_t num = ctx->num % POLY1305_BLOCK_SIZE;
+
+	if (num) {
+		ctx->data[num++] = 1;
+		while (num < POLY1305_BLOCK_SIZE)
+			ctx->data[num++] = 0;
+		poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0,
+				simd_context);
+	}
+
+	poly1305_emit(ctx->opaque, mac, ctx->nonce, simd_context);
+
+	memzero_explicit(ctx, sizeof(*ctx));
+}
+EXPORT_SYMBOL(poly1305_finish);
+
+#include "../selftest/poly1305.h"
diff --git a/lib/zinc/selftest/poly1305.h b/lib/zinc/selftest/poly1305.h
new file mode 100644
index 000000000000..28b8da4d9c44
--- /dev/null
+++ b/lib/zinc/selftest/poly1305.h
@@ -0,0 +1,874 @@ 
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+struct poly1305_testvec {
+	u8 *input, *output, *key;
+	size_t ilen;
+};
+
+static const struct poly1305_testvec poly1305_testvecs[] __initconst = {
+{ /* RFC7539 */
+	.input	= (u8[]){ 0x43, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x67, 0x72,
+			  0x61, 0x70, 0x68, 0x69, 0x63, 0x20, 0x46, 0x6f,
+			  0x72, 0x75, 0x6d, 0x20, 0x52, 0x65, 0x73, 0x65,
+			  0x61, 0x72, 0x63, 0x68, 0x20, 0x47, 0x72, 0x6f,
+			  0x75, 0x70 },
+	.ilen	= 34,
+	.output	= (u8[]){ 0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6,
+			  0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9 },
+	.key	= (u8[]){ 0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33,
+			  0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8,
+			  0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd,
+			  0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b },
+}, { /* "The Poly1305-AES message-authentication code" */
+	.input	= (u8[]){ 0xf3, 0xf6 },
+	.ilen	= 2,
+	.output	= (u8[]){ 0xf4, 0xc6, 0x33, 0xc3, 0x04, 0x4f, 0xc1, 0x45,
+			  0xf8, 0x4f, 0x33, 0x5c, 0xb8, 0x19, 0x53, 0xde },
+	.key	= (u8[]){ 0x85, 0x1f, 0xc4, 0x0c, 0x34, 0x67, 0xac, 0x0b,
+			  0xe0, 0x5c, 0xc2, 0x04, 0x04, 0xf3, 0xf7, 0x00,
+			  0x58, 0x0b, 0x3b, 0x0f, 0x94, 0x47, 0xbb, 0x1e,
+			  0x69, 0xd0, 0x95, 0xb5, 0x92, 0x8b, 0x6d, 0xbc },
+}, {
+	.input	= "",
+	.ilen	= 0,
+	.output	= (u8[]){ 0xdd, 0x3f, 0xab, 0x22, 0x51, 0xf1, 0x1a, 0xc7,
+			  0x59, 0xf0, 0x88, 0x71, 0x29, 0xcc, 0x2e, 0xe7 },
+	.key	= (u8[]){ 0xa0, 0xf3, 0x08, 0x00, 0x00, 0xf4, 0x64, 0x00,
+			  0xd0, 0xc7, 0xe9, 0x07, 0x6c, 0x83, 0x44, 0x03,
+			  0xdd, 0x3f, 0xab, 0x22, 0x51, 0xf1, 0x1a, 0xc7,
+			  0x59, 0xf0, 0x88, 0x71, 0x29, 0xcc, 0x2e, 0xe7 },
+}, {
+	.input	= (u8[]){ 0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
+			  0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
+			  0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
+			  0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36 },
+	.ilen	= 32,
+	.output	= (u8[]){ 0x0e, 0xe1, 0xc1, 0x6b, 0xb7, 0x3f, 0x0f, 0x4f,
+			  0xd1, 0x98, 0x81, 0x75, 0x3c, 0x01, 0xcd, 0xbe },
+	.key	= (u8[]){ 0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
+			  0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
+			  0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
+			  0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef },
+}, {
+	.input	= (u8[]){ 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
+			  0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
+			  0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
+			  0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
+			  0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
+			  0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
+			  0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
+			  0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9 },
+	.ilen	= 63,
+	.output	= (u8[]){ 0x51, 0x54, 0xad, 0x0d, 0x2c, 0xb2, 0x6e, 0x01,
+			  0x27, 0x4f, 0xc5, 0x11, 0x48, 0x49, 0x1f, 0x1b },
+	.key	= (u8[]){ 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
+			  0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
+			  0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
+			  0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 },
+}, { /* self-generated vectors exercise "significant" lengths, such that they
+      * are handled by different code paths */
+	.input	= (u8[]){ 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
+			  0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
+			  0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
+			  0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
+			  0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
+			  0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
+			  0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
+			  0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf },
+	.ilen	= 64,
+	.output	= (u8[]){ 0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37,
+			  0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66 },
+	.key	= (u8[]){ 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
+			  0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
+			  0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
+			  0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 },
+}, {
+	.input	= (u8[]){ 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
+			  0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
+			  0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
+			  0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
+			  0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
+			  0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67 },
+	.ilen	= 48,
+	.output	= (u8[]){ 0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2,
+			  0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61 },
+	.key	= (u8[]){ 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
+			  0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
+			  0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
+			  0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 },
+}, {
+	.input	= (u8[]){ 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
+			  0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
+			  0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
+			  0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
+			  0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
+			  0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
+			  0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
+			  0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
+			  0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
+			  0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
+			  0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
+			  0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36 },
+	.ilen	= 96,
+	.output	= (u8[]){ 0xbb, 0xb6, 0x13, 0xb2, 0xb6, 0xd7, 0x53, 0xba,
+			  0x07, 0x39, 0x5b, 0x91, 0x6a, 0xae, 0xce, 0x15 },
+	.key	= (u8[]){ 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
+			  0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
+			  0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
+			  0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 },
+}, {
+	.input	= (u8[]){ 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
+			  0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
+			  0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
+			  0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
+			  0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
+			  0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
+			  0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
+			  0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
+			  0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
+			  0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
+			  0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
+			  0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
+			  0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
+			  0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24 },
+	.ilen	= 112,
+	.output	= (u8[]){ 0xc7, 0x94, 0xd7, 0x05, 0x7d, 0x17, 0x78, 0xc4,
+			  0xbb, 0xee, 0x0a, 0x39, 0xb3, 0xd9, 0x73, 0x42 },
+	.key	= (u8[]){ 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
+			  0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
+			  0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
+			  0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 },
+}, {
+	.input	= (u8[]){ 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
+			  0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
+			  0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
+			  0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
+			  0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
+			  0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
+			  0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
+			  0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
+			  0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
+			  0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
+			  0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
+			  0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
+			  0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
+			  0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
+			  0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
+			  0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36 },
+	.ilen	= 128,
+	.output	= (u8[]){ 0xff, 0xbc, 0xb9, 0xb3, 0x71, 0x42, 0x31, 0x52,
+			  0xd7, 0xfc, 0xa5, 0xad, 0x04, 0x2f, 0xba, 0xa9 },
+	.key	= (u8[]){ 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
+			  0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
+			  0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
+			  0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 },
+}, {
+	.input	= (u8[]){ 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
+			  0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
+			  0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
+			  0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
+			  0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
+			  0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
+			  0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
+			  0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
+			  0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
+			  0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
+			  0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
+			  0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
+			  0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
+			  0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
+			  0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
+			  0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36,
+			  0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37,
+			  0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66 },
+	.ilen	= 144,
+	.output	= (u8[]){ 0x06, 0x9e, 0xd6, 0xb8, 0xef, 0x0f, 0x20, 0x7b,
+			  0x3e, 0x24, 0x3b, 0xb1, 0x01, 0x9f, 0xe6, 0x32 },
+	.key	= (u8[]){ 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
+			  0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
+			  0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
+			  0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 },
+}, {
+	.input	= (u8[]){ 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
+			  0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
+			  0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
+			  0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
+			  0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
+			  0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
+			  0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
+			  0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
+			  0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
+			  0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
+			  0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
+			  0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
+			  0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
+			  0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
+			  0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
+			  0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36,
+			  0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37,
+			  0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66,
+			  0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2,
+			  0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61 },
+	.ilen	= 160,
+	.output	= (u8[]){ 0xcc, 0xa3, 0x39, 0xd9, 0xa4, 0x5f, 0xa2, 0x36,
+			  0x8c, 0x2c, 0x68, 0xb3, 0xa4, 0x17, 0x91, 0x33 },
+	.key	= (u8[]){ 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
+			  0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
+			  0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
+			  0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 },
+}, {
+	.input	= (u8[]){ 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
+			  0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
+			  0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
+			  0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
+			  0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
+			  0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
+			  0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
+			  0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
+			  0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
+			  0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
+			  0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
+			  0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
+			  0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
+			  0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
+			  0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
+			  0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36,
+			  0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37,
+			  0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66,
+			  0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2,
+			  0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61,
+			  0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
+			  0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
+			  0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
+			  0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
+			  0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
+			  0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
+			  0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
+			  0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
+			  0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
+			  0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
+			  0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
+			  0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
+			  0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
+			  0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
+			  0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
+			  0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36 },
+	.ilen	= 288,
+	.output	= (u8[]){ 0x53, 0xf6, 0xe8, 0x28, 0xa2, 0xf0, 0xfe, 0x0e,
+			  0xe8, 0x15, 0xbf, 0x0b, 0xd5, 0x84, 0x1a, 0x34 },
+	.key	= (u8[]){ 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
+			  0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
+			  0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
+			  0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 },
+}, {
+	.input	= (u8[]){ 0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
+			  0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
+			  0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
+			  0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
+			  0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
+			  0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
+			  0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
+			  0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
+			  0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
+			  0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
+			  0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
+			  0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
+			  0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
+			  0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
+			  0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
+			  0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36,
+			  0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37,
+			  0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66,
+			  0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2,
+			  0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61,
+			  0xab, 0x08, 0x12, 0x72, 0x4a, 0x7f, 0x1e, 0x34,
+			  0x27, 0x42, 0xcb, 0xed, 0x37, 0x4d, 0x94, 0xd1,
+			  0x36, 0xc6, 0xb8, 0x79, 0x5d, 0x45, 0xb3, 0x81,
+			  0x98, 0x30, 0xf2, 0xc0, 0x44, 0x91, 0xfa, 0xf0,
+			  0x99, 0x0c, 0x62, 0xe4, 0x8b, 0x80, 0x18, 0xb2,
+			  0xc3, 0xe4, 0xa0, 0xfa, 0x31, 0x34, 0xcb, 0x67,
+			  0xfa, 0x83, 0xe1, 0x58, 0xc9, 0x94, 0xd9, 0x61,
+			  0xc4, 0xcb, 0x21, 0x09, 0x5c, 0x1b, 0xf9, 0xaf,
+			  0x48, 0x44, 0x3d, 0x0b, 0xb0, 0xd2, 0x11, 0x09,
+			  0xc8, 0x9a, 0x10, 0x0b, 0x5c, 0xe2, 0xc2, 0x08,
+			  0x83, 0x14, 0x9c, 0x69, 0xb5, 0x61, 0xdd, 0x88,
+			  0x29, 0x8a, 0x17, 0x98, 0xb1, 0x07, 0x16, 0xef,
+			  0x66, 0x3c, 0xea, 0x19, 0x0f, 0xfb, 0x83, 0xd8,
+			  0x95, 0x93, 0xf3, 0xf4, 0x76, 0xb6, 0xbc, 0x24,
+			  0xd7, 0xe6, 0x79, 0x10, 0x7e, 0xa2, 0x6a, 0xdb,
+			  0x8c, 0xaf, 0x66, 0x52, 0xd0, 0x65, 0x61, 0x36,
+			  0x81, 0x20, 0x59, 0xa5, 0xda, 0x19, 0x86, 0x37,
+			  0xca, 0xc7, 0xc4, 0xa6, 0x31, 0xbe, 0xe4, 0x66,
+			  0x5b, 0x88, 0xd7, 0xf6, 0x22, 0x8b, 0x11, 0xe2,
+			  0xe2, 0x85, 0x79, 0xa5, 0xc0, 0xc1, 0xf7, 0x61 },
+	.ilen	= 320,
+	.output	= (u8[]){ 0xb8, 0x46, 0xd4, 0x4e, 0x9b, 0xbd, 0x53, 0xce,
+			  0xdf, 0xfb, 0xfb, 0xb6, 0xb7, 0xfa, 0x49, 0x33 },
+	.key	= (u8[]){ 0x12, 0x97, 0x6a, 0x08, 0xc4, 0x42, 0x6d, 0x0c,
+			  0xe8, 0xa8, 0x24, 0x07, 0xc4, 0xf4, 0x82, 0x07,
+			  0x80, 0xf8, 0xc2, 0x0a, 0xa7, 0x12, 0x02, 0xd1,
+			  0xe2, 0x91, 0x79, 0xcb, 0xcb, 0x55, 0x5a, 0x57 },
+}, { /* 4th power of the key spills to 131th bit in SIMD key setup */
+	.input	= (u8[]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.ilen	= 256,
+	.output	= (u8[]){ 0x07, 0x14, 0x5a, 0x4c, 0x02, 0xfe, 0x5f, 0xa3,
+			  0x20, 0x36, 0xde, 0x68, 0xfa, 0xbe, 0x90, 0x66 },
+	.key	= (u8[]){ 0xad, 0x62, 0x81, 0x07, 0xe8, 0x35, 0x1d, 0x0f,
+			  0x2c, 0x23, 0x1a, 0x05, 0xdc, 0x4a, 0x41, 0x06,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+}, { /* OpenSSL's poly1305_ieee754.c failed this in final stage */
+	.input	= (u8[]){ 0x84, 0x23, 0x64, 0xe1, 0x56, 0x33, 0x6c, 0x09,
+			  0x98, 0xb9, 0x33, 0xa6, 0x23, 0x77, 0x26, 0x18,
+			  0x0d, 0x9e, 0x3f, 0xdc, 0xbd, 0xe4, 0xcd, 0x5d,
+			  0x17, 0x08, 0x0f, 0xc3, 0xbe, 0xb4, 0x96, 0x14,
+			  0xd7, 0x12, 0x2c, 0x03, 0x74, 0x63, 0xff, 0x10,
+			  0x4d, 0x73, 0xf1, 0x9c, 0x12, 0x70, 0x46, 0x28,
+			  0xd4, 0x17, 0xc4, 0xc5, 0x4a, 0x3f, 0xe3, 0x0d,
+			  0x3c, 0x3d, 0x77, 0x14, 0x38, 0x2d, 0x43, 0xb0,
+			  0x38, 0x2a, 0x50, 0xa5, 0xde, 0xe5, 0x4b, 0xe8,
+			  0x44, 0xb0, 0x76, 0xe8, 0xdf, 0x88, 0x20, 0x1a,
+			  0x1c, 0xd4, 0x3b, 0x90, 0xeb, 0x21, 0x64, 0x3f,
+			  0xa9, 0x6f, 0x39, 0xb5, 0x18, 0xaa, 0x83, 0x40,
+			  0xc9, 0x42, 0xff, 0x3c, 0x31, 0xba, 0xf7, 0xc9,
+			  0xbd, 0xbf, 0x0f, 0x31, 0xae, 0x3f, 0xa0, 0x96,
+			  0xbf, 0x8c, 0x63, 0x03, 0x06, 0x09, 0x82, 0x9f,
+			  0xe7, 0x2e, 0x17, 0x98, 0x24, 0x89, 0x0b, 0xc8,
+			  0xe0, 0x8c, 0x31, 0x5c, 0x1c, 0xce, 0x2a, 0x83,
+			  0x14, 0x4d, 0xbb, 0xff, 0x09, 0xf7, 0x4e, 0x3e,
+			  0xfc, 0x77, 0x0b, 0x54, 0xd0, 0x98, 0x4a, 0x8f,
+			  0x19, 0xb1, 0x47, 0x19, 0xe6, 0x36, 0x35, 0x64,
+			  0x1d, 0x6b, 0x1e, 0xed, 0xf6, 0x3e, 0xfb, 0xf0,
+			  0x80, 0xe1, 0x78, 0x3d, 0x32, 0x44, 0x54, 0x12,
+			  0x11, 0x4c, 0x20, 0xde, 0x0b, 0x83, 0x7a, 0x0d,
+			  0xfa, 0x33, 0xd6, 0xb8, 0x28, 0x25, 0xff, 0xf4,
+			  0x4c, 0x9a, 0x70, 0xea, 0x54, 0xce, 0x47, 0xf0,
+			  0x7d, 0xf6, 0x98, 0xe6, 0xb0, 0x33, 0x23, 0xb5,
+			  0x30, 0x79, 0x36, 0x4a, 0x5f, 0xc3, 0xe9, 0xdd,
+			  0x03, 0x43, 0x92, 0xbd, 0xde, 0x86, 0xdc, 0xcd,
+			  0xda, 0x94, 0x32, 0x1c, 0x5e, 0x44, 0x06, 0x04,
+			  0x89, 0x33, 0x6c, 0xb6, 0x5b, 0xf3, 0x98, 0x9c,
+			  0x36, 0xf7, 0x28, 0x2c, 0x2f, 0x5d, 0x2b, 0x88,
+			  0x2c, 0x17, 0x1e, 0x74 },
+	.ilen	= 252,
+	.output	= (u8[]){ 0xf2, 0x48, 0x31, 0x2e, 0x57, 0x8d, 0x9d, 0x58,
+			  0xf8, 0xb7, 0xbb, 0x4d, 0x19, 0x10, 0x54, 0x31 },
+	.key	= (u8[]){ 0x95, 0xd5, 0xc0, 0x05, 0x50, 0x3e, 0x51, 0x0d,
+			  0x8c, 0xd0, 0xaa, 0x07, 0x2c, 0x4a, 0x4d, 0x06,
+			  0x6e, 0xab, 0xc5, 0x2d, 0x11, 0x65, 0x3d, 0xf4,
+			  0x7f, 0xbf, 0x63, 0xab, 0x19, 0x8b, 0xcc, 0x26 },
+}, { /* AVX2 in OpenSSL's poly1305-x86.pl failed this with 176+32 split */
+	.input	= (u8[]){ 0x24, 0x8a, 0xc3, 0x10, 0x85, 0xb6, 0xc2, 0xad,
+			  0xaa, 0xa3, 0x82, 0x59, 0xa0, 0xd7, 0x19, 0x2c,
+			  0x5c, 0x35, 0xd1, 0xbb, 0x4e, 0xf3, 0x9a, 0xd9,
+			  0x4c, 0x38, 0xd1, 0xc8, 0x24, 0x79, 0xe2, 0xdd,
+			  0x21, 0x59, 0xa0, 0x77, 0x02, 0x4b, 0x05, 0x89,
+			  0xbc, 0x8a, 0x20, 0x10, 0x1b, 0x50, 0x6f, 0x0a,
+			  0x1a, 0xd0, 0xbb, 0xab, 0x76, 0xe8, 0x3a, 0x83,
+			  0xf1, 0xb9, 0x4b, 0xe6, 0xbe, 0xae, 0x74, 0xe8,
+			  0x74, 0xca, 0xb6, 0x92, 0xc5, 0x96, 0x3a, 0x75,
+			  0x43, 0x6b, 0x77, 0x61, 0x21, 0xec, 0x9f, 0x62,
+			  0x39, 0x9a, 0x3e, 0x66, 0xb2, 0xd2, 0x27, 0x07,
+			  0xda, 0xe8, 0x19, 0x33, 0xb6, 0x27, 0x7f, 0x3c,
+			  0x85, 0x16, 0xbc, 0xbe, 0x26, 0xdb, 0xbd, 0x86,
+			  0xf3, 0x73, 0x10, 0x3d, 0x7c, 0xf4, 0xca, 0xd1,
+			  0x88, 0x8c, 0x95, 0x21, 0x18, 0xfb, 0xfb, 0xd0,
+			  0xd7, 0xb4, 0xbe, 0xdc, 0x4a, 0xe4, 0x93, 0x6a,
+			  0xff, 0x91, 0x15, 0x7e, 0x7a, 0xa4, 0x7c, 0x54,
+			  0x44, 0x2e, 0xa7, 0x8d, 0x6a, 0xc2, 0x51, 0xd3,
+			  0x24, 0xa0, 0xfb, 0xe4, 0x9d, 0x89, 0xcc, 0x35,
+			  0x21, 0xb6, 0x6d, 0x16, 0xe9, 0xc6, 0x6a, 0x37,
+			  0x09, 0x89, 0x4e, 0x4e, 0xb0, 0xa4, 0xee, 0xdc,
+			  0x4a, 0xe1, 0x94, 0x68, 0xe6, 0x6b, 0x81, 0xf2,
+			  0x71, 0x35, 0x1b, 0x1d, 0x92, 0x1e, 0xa5, 0x51,
+			  0x04, 0x7a, 0xbc, 0xc6, 0xb8, 0x7a, 0x90, 0x1f,
+			  0xde, 0x7d, 0xb7, 0x9f, 0xa1, 0x81, 0x8c, 0x11,
+			  0x33, 0x6d, 0xbc, 0x07, 0x24, 0x4a, 0x40, 0xeb },
+	.ilen	= 208,
+	.output	= (u8[]){ 0xbc, 0x93, 0x9b, 0xc5, 0x28, 0x14, 0x80, 0xfa,
+			  0x99, 0xc6, 0xd6, 0x8c, 0x25, 0x8e, 0xc4, 0x2f },
+	.key	= (u8[]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+			  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+}, { /* test vectors from Google */
+	.input	= "",
+	.ilen	= 0,
+	.output	= (u8[]){ 0x47, 0x10, 0x13, 0x0e, 0x9f, 0x6f, 0xea, 0x8d,
+			  0x72, 0x29, 0x38, 0x50, 0xa6, 0x67, 0xd8, 0x6c },
+	.key	= (u8[]){ 0xc8, 0xaf, 0xaa, 0xc3, 0x31, 0xee, 0x37, 0x2c,
+			  0xd6, 0x08, 0x2d, 0xe1, 0x34, 0x94, 0x3b, 0x17,
+			  0x47, 0x10, 0x13, 0x0e, 0x9f, 0x6f, 0xea, 0x8d,
+			  0x72, 0x29, 0x38, 0x50, 0xa6, 0x67, 0xd8, 0x6c },
+}, {
+	.input	= (u8[]){ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+			  0x72, 0x6c, 0x64, 0x21 },
+	.ilen	= 12,
+	.output	= (u8[]){ 0xa6, 0xf7, 0x45, 0x00, 0x8f, 0x81, 0xc9, 0x16,
+			  0xa2, 0x0d, 0xcc, 0x74, 0xee, 0xf2, 0xb2, 0xf0 },
+	.key	= (u8[]){ 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20,
+			  0x33, 0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20,
+			  0x6b, 0x65, 0x79, 0x20, 0x66, 0x6f, 0x72, 0x20,
+			  0x50, 0x6f, 0x6c, 0x79, 0x31, 0x33, 0x30, 0x35 },
+}, {
+	.input	= (u8[]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.ilen	= 32,
+	.output	= (u8[]){ 0x49, 0xec, 0x78, 0x09, 0x0e, 0x48, 0x1e, 0xc6,
+			  0xc2, 0x6b, 0x33, 0xb9, 0x1c, 0xcc, 0x03, 0x07 },
+	.key	= (u8[]){ 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20,
+			  0x33, 0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20,
+			  0x6b, 0x65, 0x79, 0x20, 0x66, 0x6f, 0x72, 0x20,
+			  0x50, 0x6f, 0x6c, 0x79, 0x31, 0x33, 0x30, 0x35 },
+}, {
+	.input	= (u8[]){ 0x89, 0xda, 0xb8, 0x0b, 0x77, 0x17, 0xc1, 0xdb,
+			  0x5d, 0xb4, 0x37, 0x86, 0x0a, 0x3f, 0x70, 0x21,
+			  0x8e, 0x93, 0xe1, 0xb8, 0xf4, 0x61, 0xfb, 0x67,
+			  0x7f, 0x16, 0xf3, 0x5f, 0x6f, 0x87, 0xe2, 0xa9,
+			  0x1c, 0x99, 0xbc, 0x3a, 0x47, 0xac, 0xe4, 0x76,
+			  0x40, 0xcc, 0x95, 0xc3, 0x45, 0xbe, 0x5e, 0xcc,
+			  0xa5, 0xa3, 0x52, 0x3c, 0x35, 0xcc, 0x01, 0x89,
+			  0x3a, 0xf0, 0xb6, 0x4a, 0x62, 0x03, 0x34, 0x27,
+			  0x03, 0x72, 0xec, 0x12, 0x48, 0x2d, 0x1b, 0x1e,
+			  0x36, 0x35, 0x61, 0x69, 0x8a, 0x57, 0x8b, 0x35,
+			  0x98, 0x03, 0x49, 0x5b, 0xb4, 0xe2, 0xef, 0x19,
+			  0x30, 0xb1, 0x7a, 0x51, 0x90, 0xb5, 0x80, 0xf1,
+			  0x41, 0x30, 0x0d, 0xf3, 0x0a, 0xdb, 0xec, 0xa2,
+			  0x8f, 0x64, 0x27, 0xa8, 0xbc, 0x1a, 0x99, 0x9f,
+			  0xd5, 0x1c, 0x55, 0x4a, 0x01, 0x7d, 0x09, 0x5d,
+			  0x8c, 0x3e, 0x31, 0x27, 0xda, 0xf9, 0xf5, 0x95 },
+	.ilen	= 128,
+	.output	= (u8[]){ 0xc8, 0x5d, 0x15, 0xed, 0x44, 0xc3, 0x78, 0xd6,
+			  0xb0, 0x0e, 0x23, 0x06, 0x4c, 0x7b, 0xcd, 0x51 },
+	.key	= (u8[]){ 0x2d, 0x77, 0x3b, 0xe3, 0x7a, 0xdb, 0x1e, 0x4d,
+			  0x68, 0x3b, 0xf0, 0x07, 0x5e, 0x79, 0xc4, 0xee,
+			  0x03, 0x79, 0x18, 0x53, 0x5a, 0x7f, 0x99, 0xcc,
+			  0xb7, 0x04, 0x0f, 0xb5, 0xf5, 0xf4, 0x3a, 0xea },
+}, {
+	.input	= (u8[]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0b,
+			  0x17, 0x03, 0x03, 0x02, 0x00, 0x00, 0x00, 0x00,
+			  0x06, 0xdb, 0x1f, 0x1f, 0x36, 0x8d, 0x69, 0x6a,
+			  0x81, 0x0a, 0x34, 0x9c, 0x0c, 0x71, 0x4c, 0x9a,
+			  0x5e, 0x78, 0x50, 0xc2, 0x40, 0x7d, 0x72, 0x1a,
+			  0xcd, 0xed, 0x95, 0xe0, 0x18, 0xd7, 0xa8, 0x52,
+			  0x66, 0xa6, 0xe1, 0x28, 0x9c, 0xdb, 0x4a, 0xeb,
+			  0x18, 0xda, 0x5a, 0xc8, 0xa2, 0xb0, 0x02, 0x6d,
+			  0x24, 0xa5, 0x9a, 0xd4, 0x85, 0x22, 0x7f, 0x3e,
+			  0xae, 0xdb, 0xb2, 0xe7, 0xe3, 0x5e, 0x1c, 0x66,
+			  0xcd, 0x60, 0xf9, 0xab, 0xf7, 0x16, 0xdc, 0xc9,
+			  0xac, 0x42, 0x68, 0x2d, 0xd7, 0xda, 0xb2, 0x87,
+			  0xa7, 0x02, 0x4c, 0x4e, 0xef, 0xc3, 0x21, 0xcc,
+			  0x05, 0x74, 0xe1, 0x67, 0x93, 0xe3, 0x7c, 0xec,
+			  0x03, 0xc5, 0xbd, 0xa4, 0x2b, 0x54, 0xc1, 0x14,
+			  0xa8, 0x0b, 0x57, 0xaf, 0x26, 0x41, 0x6c, 0x7b,
+			  0xe7, 0x42, 0x00, 0x5e, 0x20, 0x85, 0x5c, 0x73,
+			  0xe2, 0x1d, 0xc8, 0xe2, 0xed, 0xc9, 0xd4, 0x35,
+			  0xcb, 0x6f, 0x60, 0x59, 0x28, 0x00, 0x11, 0xc2,
+			  0x70, 0xb7, 0x15, 0x70, 0x05, 0x1c, 0x1c, 0x9b,
+			  0x30, 0x52, 0x12, 0x66, 0x20, 0xbc, 0x1e, 0x27,
+			  0x30, 0xfa, 0x06, 0x6c, 0x7a, 0x50, 0x9d, 0x53,
+			  0xc6, 0x0e, 0x5a, 0xe1, 0xb4, 0x0a, 0xa6, 0xe3,
+			  0x9e, 0x49, 0x66, 0x92, 0x28, 0xc9, 0x0e, 0xec,
+			  0xb4, 0xa5, 0x0d, 0xb3, 0x2a, 0x50, 0xbc, 0x49,
+			  0xe9, 0x0b, 0x4f, 0x4b, 0x35, 0x9a, 0x1d, 0xfd,
+			  0x11, 0x74, 0x9c, 0xd3, 0x86, 0x7f, 0xcf, 0x2f,
+			  0xb7, 0xbb, 0x6c, 0xd4, 0x73, 0x8f, 0x6a, 0x4a,
+			  0xd6, 0xf7, 0xca, 0x50, 0x58, 0xf7, 0x61, 0x88,
+			  0x45, 0xaf, 0x9f, 0x02, 0x0f, 0x6c, 0x3b, 0x96,
+			  0x7b, 0x8f, 0x4c, 0xd4, 0xa9, 0x1e, 0x28, 0x13,
+			  0xb5, 0x07, 0xae, 0x66, 0xf2, 0xd3, 0x5c, 0x18,
+			  0x28, 0x4f, 0x72, 0x92, 0x18, 0x60, 0x62, 0xe1,
+			  0x0f, 0xd5, 0x51, 0x0d, 0x18, 0x77, 0x53, 0x51,
+			  0xef, 0x33, 0x4e, 0x76, 0x34, 0xab, 0x47, 0x43,
+			  0xf5, 0xb6, 0x8f, 0x49, 0xad, 0xca, 0xb3, 0x84,
+			  0xd3, 0xfd, 0x75, 0xf7, 0x39, 0x0f, 0x40, 0x06,
+			  0xef, 0x2a, 0x29, 0x5c, 0x8c, 0x7a, 0x07, 0x6a,
+			  0xd5, 0x45, 0x46, 0xcd, 0x25, 0xd2, 0x10, 0x7f,
+			  0xbe, 0x14, 0x36, 0xc8, 0x40, 0x92, 0x4a, 0xae,
+			  0xbe, 0x5b, 0x37, 0x08, 0x93, 0xcd, 0x63, 0xd1,
+			  0x32, 0x5b, 0x86, 0x16, 0xfc, 0x48, 0x10, 0x88,
+			  0x6b, 0xc1, 0x52, 0xc5, 0x32, 0x21, 0xb6, 0xdf,
+			  0x37, 0x31, 0x19, 0x39, 0x32, 0x55, 0xee, 0x72,
+			  0xbc, 0xaa, 0x88, 0x01, 0x74, 0xf1, 0x71, 0x7f,
+			  0x91, 0x84, 0xfa, 0x91, 0x64, 0x6f, 0x17, 0xa2,
+			  0x4a, 0xc5, 0x5d, 0x16, 0xbf, 0xdd, 0xca, 0x95,
+			  0x81, 0xa9, 0x2e, 0xda, 0x47, 0x92, 0x01, 0xf0,
+			  0xed, 0xbf, 0x63, 0x36, 0x00, 0xd6, 0x06, 0x6d,
+			  0x1a, 0xb3, 0x6d, 0x5d, 0x24, 0x15, 0xd7, 0x13,
+			  0x51, 0xbb, 0xcd, 0x60, 0x8a, 0x25, 0x10, 0x8d,
+			  0x25, 0x64, 0x19, 0x92, 0xc1, 0xf2, 0x6c, 0x53,
+			  0x1c, 0xf9, 0xf9, 0x02, 0x03, 0xbc, 0x4c, 0xc1,
+			  0x9f, 0x59, 0x27, 0xd8, 0x34, 0xb0, 0xa4, 0x71,
+			  0x16, 0xd3, 0x88, 0x4b, 0xbb, 0x16, 0x4b, 0x8e,
+			  0xc8, 0x83, 0xd1, 0xac, 0x83, 0x2e, 0x56, 0xb3,
+			  0x91, 0x8a, 0x98, 0x60, 0x1a, 0x08, 0xd1, 0x71,
+			  0x88, 0x15, 0x41, 0xd5, 0x94, 0xdb, 0x39, 0x9c,
+			  0x6a, 0xe6, 0x15, 0x12, 0x21, 0x74, 0x5a, 0xec,
+			  0x81, 0x4c, 0x45, 0xb0, 0xb0, 0x5b, 0x56, 0x54,
+			  0x36, 0xfd, 0x6f, 0x13, 0x7a, 0xa1, 0x0a, 0x0c,
+			  0x0b, 0x64, 0x37, 0x61, 0xdb, 0xd6, 0xf9, 0xa9,
+			  0xdc, 0xb9, 0x9b, 0x1a, 0x6e, 0x69, 0x08, 0x54,
+			  0xce, 0x07, 0x69, 0xcd, 0xe3, 0x97, 0x61, 0xd8,
+			  0x2f, 0xcd, 0xec, 0x15, 0xf0, 0xd9, 0x2d, 0x7d,
+			  0x8e, 0x94, 0xad, 0xe8, 0xeb, 0x83, 0xfb, 0xe0 },
+	.ilen	= 528,
+	.output	= (u8[]){ 0x26, 0x37, 0x40, 0x8f, 0xe1, 0x30, 0x86, 0xea,
+			  0x73, 0xf9, 0x71, 0xe3, 0x42, 0x5e, 0x28, 0x20 },
+	.key	= (u8[]){ 0x99, 0xe5, 0x82, 0x2d, 0xd4, 0x17, 0x3c, 0x99,
+			  0x5e, 0x3d, 0xae, 0x0d, 0xde, 0xfb, 0x97, 0x74,
+			  0x3f, 0xde, 0x3b, 0x08, 0x01, 0x34, 0xb3, 0x9f,
+			  0x76, 0xe9, 0xbf, 0x8d, 0x0e, 0x88, 0xd5, 0x46 },
+}, { /* test vectors from Hanno Böck */
+	.input	= (u8[]){ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0x80, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xc5,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xe3, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xac, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xe6,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x00, 0x00, 0x00,
+			  0xaf, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc,
+			  0xcc, 0xcc, 0xff, 0xff, 0xff, 0xf5, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0xff, 0xff, 0xff, 0xe7, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x71, 0x92, 0x05, 0xa8, 0x52, 0x1d,
+			  0xfc },
+	.ilen	= 257,
+	.output	= (u8[]){ 0x85, 0x59, 0xb8, 0x76, 0xec, 0xee, 0xd6, 0x6e,
+			  0xb3, 0x77, 0x98, 0xc0, 0x45, 0x7b, 0xaf, 0xf9 },
+	.key	= (u8[]){ 0x7f, 0x1b, 0x02, 0x64, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc },
+}, {
+	.input	= (u8[]){ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+			  0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+			  0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+			  0xaa, 0xaa, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x80, 0x02, 0x64 },
+	.ilen	= 39,
+	.output	= (u8[]){ 0x00, 0xbd, 0x12, 0x58, 0x97, 0x8e, 0x20, 0x54,
+			  0x44, 0xc9, 0xaa, 0xaa, 0x82, 0x00, 0x6f, 0xed },
+	.key	= (u8[]){ 0xe0, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+			  0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa },
+}, {
+	.input	= (u8[]){ 0x02, 0xfc },
+	.ilen	= 2,
+	.output	= (u8[]){ 0x06, 0x12, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+			  0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c },
+	.key	= (u8[]){ 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+			  0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+			  0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+			  0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c },
+}, {
+	.input	= (u8[]){ 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7a, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x5c, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x6e, 0x7b, 0x00, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7a, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x5c,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b, 0x7b,
+			  0x7b, 0x6e, 0x7b, 0x00, 0x13, 0x00, 0x00, 0x00,
+			  0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0xf2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x20, 0x00, 0xef, 0xff, 0x00,
+			  0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00,
+			  0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x64, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x00,
+			  0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf2,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x20, 0x00, 0xef, 0xff, 0x00, 0x09,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x7a, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+			  0x00, 0x09, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc },
+	.ilen	= 415,
+	.output	= (u8[]){ 0x33, 0x20, 0x5b, 0xbf, 0x9e, 0x9f, 0x8f, 0x72,
+			  0x12, 0xab, 0x9e, 0x2a, 0xb9, 0xb7, 0xe4, 0xa5 },
+	.key	= (u8[]){ 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7b, 0x7b },
+}, {
+	.input	= (u8[]){ 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
+			  0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
+			  0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
+			  0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
+			  0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
+			  0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
+			  0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
+			  0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
+			  0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77,
+			  0x77, 0x77, 0x77, 0x77, 0xff, 0xff, 0xff, 0xe9,
+			  0xe9, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac,
+			  0xac, 0xac, 0xac, 0xac, 0x00, 0x00, 0xac, 0xac,
+			  0xec, 0x01, 0x00, 0xac, 0xac, 0xac, 0x2c, 0xac,
+			  0xa2, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac, 0xac,
+			  0xac, 0xac, 0xac, 0xac, 0x64, 0xf2 },
+	.ilen	= 118,
+	.output	= (u8[]){ 0x02, 0xee, 0x7c, 0x8c, 0x54, 0x6d, 0xde, 0xb1,
+			  0xa4, 0x67, 0xe4, 0xc3, 0x98, 0x11, 0x58, 0xb9 },
+	.key	= (u8[]){ 0x00, 0x00, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x7f,
+			  0x01, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0xcf, 0x77, 0x77, 0x77, 0x77, 0x77,
+			  0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77, 0x77 },
+}, { /* nacl */
+	.input	= (u8[]){ 0x8e, 0x99, 0x3b, 0x9f, 0x48, 0x68, 0x12, 0x73,
+			  0xc2, 0x96, 0x50, 0xba, 0x32, 0xfc, 0x76, 0xce,
+			  0x48, 0x33, 0x2e, 0xa7, 0x16, 0x4d, 0x96, 0xa4,
+			  0x47, 0x6f, 0xb8, 0xc5, 0x31, 0xa1, 0x18, 0x6a,
+			  0xc0, 0xdf, 0xc1, 0x7c, 0x98, 0xdc, 0xe8, 0x7b,
+			  0x4d, 0xa7, 0xf0, 0x11, 0xec, 0x48, 0xc9, 0x72,
+			  0x71, 0xd2, 0xc2, 0x0f, 0x9b, 0x92, 0x8f, 0xe2,
+			  0x27, 0x0d, 0x6f, 0xb8, 0x63, 0xd5, 0x17, 0x38,
+			  0xb4, 0x8e, 0xee, 0xe3, 0x14, 0xa7, 0xcc, 0x8a,
+			  0xb9, 0x32, 0x16, 0x45, 0x48, 0xe5, 0x26, 0xae,
+			  0x90, 0x22, 0x43, 0x68, 0x51, 0x7a, 0xcf, 0xea,
+			  0xbd, 0x6b, 0xb3, 0x73, 0x2b, 0xc0, 0xe9, 0xda,
+			  0x99, 0x83, 0x2b, 0x61, 0xca, 0x01, 0xb6, 0xde,
+			  0x56, 0x24, 0x4a, 0x9e, 0x88, 0xd5, 0xf9, 0xb3,
+			  0x79, 0x73, 0xf6, 0x22, 0xa4, 0x3d, 0x14, 0xa6,
+			  0x59, 0x9b, 0x1f, 0x65, 0x4c, 0xb4, 0x5a, 0x74,
+			  0xe3, 0x55, 0xa5 },
+	.ilen	= 131,
+	.output	= (u8[]){ 0xf3, 0xff, 0xc7, 0x70, 0x3f, 0x94, 0x00, 0xe5,
+			  0x2a, 0x7d, 0xfb, 0x4b, 0x3d, 0x33, 0x05, 0xd9 },
+	.key	= (u8[]){ 0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91,
+			  0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25,
+			  0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65,
+			  0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80 },
+}, { /* wrap 2^130-5 */
+	.input	= (u8[]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.ilen	= 16,
+	.output	= (u8[]){ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.key	= (u8[]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+}, { /* wrap 2^128 */
+	.input	= (u8[]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.ilen	= 16,
+	.output	= (u8[]){ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.key	= (u8[]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+}, { /* limb carry */
+	.input	= (u8[]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.ilen	= 48,
+	.output	= (u8[]){ 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.key	= (u8[]){ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+}, { /* 2^130-5 */
+	.input	= (u8[]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xfb, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
+			  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
+			  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+			  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 },
+	.ilen	= 48,
+	.output	= (u8[]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.key	= (u8[]){ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+}, { /* 2^130-6 */
+	.input	= (u8[]){ 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.ilen	= 16,
+	.output	= (u8[]){ 0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+			  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+	.key	= (u8[]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+}, { /* 5*H+L reduction intermediate */
+	.input	= (u8[]){ 0xe3, 0x35, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0xb9,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x33, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0x79, 0xcd,
+			  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.ilen	= 64,
+	.output	= (u8[]){ 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.key	= (u8[]){ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+}, { /* 5*H+L reduction final */
+	.input	= (u8[]){ 0xe3, 0x35, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0xb9,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x33, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0x79, 0xcd,
+			  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.ilen	= 48,
+	.output	= (u8[]){ 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+	.key	= (u8[]){ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+} };
+
+bool __init poly1305_selftest(void)
+{
+	simd_context_t simd_context = simd_get();
+	bool success = true;
+	size_t i, j;
+
+	for (i = 0; i < ARRAY_SIZE(poly1305_testvecs); ++i) {
+		struct poly1305_ctx poly1305;
+		u8 out[POLY1305_MAC_SIZE];
+
+		memset(out, 0, sizeof(out));
+		memset(&poly1305, 0, sizeof(poly1305));
+		poly1305_init(&poly1305, poly1305_testvecs[i].key,
+			      simd_context);
+		poly1305_update(&poly1305, poly1305_testvecs[i].input,
+				poly1305_testvecs[i].ilen, simd_context);
+		poly1305_finish(&poly1305, out, simd_context);
+		if (memcmp(out, poly1305_testvecs[i].output,
+			   POLY1305_MAC_SIZE)) {
+			pr_info("poly1305 self-test %zu: FAIL\n", i + 1);
+			success = false;
+		}
+		simd_context = simd_relax(simd_context);
+
+		if (poly1305_testvecs[i].ilen <= 1)
+			continue;
+
+		for (j = 1; j < poly1305_testvecs[i].ilen - 1; ++j) {
+			memset(out, 0, sizeof(out));
+			memset(&poly1305, 0, sizeof(poly1305));
+			poly1305_init(&poly1305, poly1305_testvecs[i].key,
+				      simd_context);
+			poly1305_update(&poly1305, poly1305_testvecs[i].input,
+					j, simd_context);
+			poly1305_update(&poly1305,
+					poly1305_testvecs[i].input + j,
+					poly1305_testvecs[i].ilen - j,
+					simd_context);
+			poly1305_finish(&poly1305, out, simd_context);
+			if (memcmp(out, poly1305_testvecs[i].output,
+				   POLY1305_MAC_SIZE)) {
+				pr_info("poly1305 self-test %zu (split %zu): FAIL\n",
+					i + 1, j);
+				success = false;
+			}
+			simd_context = simd_relax(simd_context);
+		}
+	}
+	simd_put(simd_context);
+
+	if (success)
+		pr_info("poly1305 self-tests: pass\n");
+
+	return success;
+}
+#endif