Message ID | 20181009125541.24455-4-berrange@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | crypto: improve performance of XTS cipher mode | expand |
On Tue 09 Oct 2018 02:55:38 PM CEST, Daniel P. Berrangé wrote: > The new type is designed to allow use of 64-bit arithmetic instead > of operating 1-byte at a time. The following patches will use this to > improve performance. > > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> I suppose that the fixes for the endianness problem may end up requiring you to change this, but the patch itself is fine as it is now. Reviewed-by: Alberto Garcia <berto@igalia.com> Berto
On Tue 09 Oct 2018 02:55:38 PM CEST, Daniel P. Berrangé wrote: > @@ -85,7 +90,7 @@ void xts_decrypt(const void *datactx, > uint8_t *dst, > const uint8_t *src) > { > - uint8_t PP[XTS_BLOCK_SIZE], CC[XTS_BLOCK_SIZE], T[XTS_BLOCK_SIZE]; > + xts_uint128 PP, CC, T; > unsigned long i, m, mo, lim; [...] > /* Pm = first length % XTS_BLOCK_SIZE bytes of PP */ > for (i = 0; i < mo; i++) { > - CC[i] = src[XTS_BLOCK_SIZE + i]; > - dst[XTS_BLOCK_SIZE + i] = PP[i]; > + ((uint8_t *)&CC)[i] = src[XTS_BLOCK_SIZE + i]; > + dst[XTS_BLOCK_SIZE + i] = ((uint8_t *)&PP)[i]; > } On second thoughts, these casts are a bit cumbersome. I wonder if it isn't better to keep the array a uint8_t[] and only treat it as xts_uint128 in the places where you actually do 64-bit operations (xts_uint128_xor, xts_mult_x). Berto
On Tue, Oct 09, 2018 at 04:50:16PM +0200, Alberto Garcia wrote: > On Tue 09 Oct 2018 02:55:38 PM CEST, Daniel P. Berrangé wrote: > > > @@ -85,7 +90,7 @@ void xts_decrypt(const void *datactx, > > uint8_t *dst, > > const uint8_t *src) > > { > > - uint8_t PP[XTS_BLOCK_SIZE], CC[XTS_BLOCK_SIZE], T[XTS_BLOCK_SIZE]; > > + xts_uint128 PP, CC, T; > > unsigned long i, m, mo, lim; > > [...] > > > /* Pm = first length % XTS_BLOCK_SIZE bytes of PP */ > > for (i = 0; i < mo; i++) { > > - CC[i] = src[XTS_BLOCK_SIZE + i]; > > - dst[XTS_BLOCK_SIZE + i] = PP[i]; > > + ((uint8_t *)&CC)[i] = src[XTS_BLOCK_SIZE + i]; > > + dst[XTS_BLOCK_SIZE + i] = ((uint8_t *)&PP)[i]; > > } > > On second thoughts, these casts are a bit cumbersome. I wonder if it > isn't better to keep the array a uint8_t[] and only treat it as > xts_uint128 in the places where you actually do 64-bit operations > (xts_uint128_xor, xts_mult_x). I had done that originally, but it just shifts ugly casts from one place to another place in the code. I preferred the idea of storing it all as a 128bit data type since that's matching the operational block size. A further alternative is for xts_uint128 to be a union providing both, and then have an extra level of access for respective fields, which I had also tried at one time but ultimately i decided I didn't mind the casts. Regards, Daniel
On Tue 09 Oct 2018 04:58:39 PM CEST, Daniel P. Berrangé wrote: >> > @@ -85,7 +90,7 @@ void xts_decrypt(const void *datactx, >> > uint8_t *dst, >> > const uint8_t *src) >> > { >> > - uint8_t PP[XTS_BLOCK_SIZE], CC[XTS_BLOCK_SIZE], T[XTS_BLOCK_SIZE]; >> > + xts_uint128 PP, CC, T; >> > unsigned long i, m, mo, lim; >> >> [...] >> >> > /* Pm = first length % XTS_BLOCK_SIZE bytes of PP */ >> > for (i = 0; i < mo; i++) { >> > - CC[i] = src[XTS_BLOCK_SIZE + i]; >> > - dst[XTS_BLOCK_SIZE + i] = PP[i]; >> > + ((uint8_t *)&CC)[i] = src[XTS_BLOCK_SIZE + i]; >> > + dst[XTS_BLOCK_SIZE + i] = ((uint8_t *)&PP)[i]; >> > } >> >> On second thoughts, these casts are a bit cumbersome. I wonder if it >> isn't better to keep the array a uint8_t[] and only treat it as >> xts_uint128 in the places where you actually do 64-bit operations >> (xts_uint128_xor, xts_mult_x). > > I had done that originally, but it just shifts ugly casts from one > place to another place in the code. Does it really? There's a dozen casts to uint8_t * in different places. If you use uint_8[] you would only need something like this: static void xts_mult_x(uint8_t *I8) { xts_uint128 *I = (xts_uint128 *) I8; /* ... the rest of the function remains the same ... */ } And something similar in xts_uint128_xor(), which could be an inline function instead of a macro. Berto
diff --git a/crypto/xts.c b/crypto/xts.c index 3c1a92f01d..ded4365191 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -26,6 +26,11 @@ #include "qemu/osdep.h" #include "crypto/xts.h" +typedef struct { + uint64_t a; + uint64_t b; +} xts_uint128; + static void xts_mult_x(uint8_t *I) { int x; @@ -85,7 +90,7 @@ void xts_decrypt(const void *datactx, uint8_t *dst, const uint8_t *src) { - uint8_t PP[XTS_BLOCK_SIZE], CC[XTS_BLOCK_SIZE], T[XTS_BLOCK_SIZE]; + xts_uint128 PP, CC, T; unsigned long i, m, mo, lim; /* get number of blocks */ @@ -102,10 +107,10 @@ void xts_decrypt(const void *datactx, } /* encrypt the iv */ - encfunc(tweakctx, XTS_BLOCK_SIZE, T, iv); + encfunc(tweakctx, XTS_BLOCK_SIZE, (uint8_t *)&T, iv); for (i = 0; i < lim; i++) { - xts_tweak_encdec(datactx, decfunc, src, dst, T); + xts_tweak_encdec(datactx, decfunc, src, dst, (uint8_t *)&T); src += XTS_BLOCK_SIZE; dst += XTS_BLOCK_SIZE; @@ -113,27 +118,27 @@ void xts_decrypt(const void *datactx, /* if length is not a multiple of XTS_BLOCK_SIZE then */ if (mo > 0) { - memcpy(CC, T, XTS_BLOCK_SIZE); - xts_mult_x(CC); + memcpy(&CC, &T, XTS_BLOCK_SIZE); + xts_mult_x((uint8_t *)&CC); /* PP = tweak decrypt block m-1 */ - xts_tweak_encdec(datactx, decfunc, src, PP, CC); + xts_tweak_encdec(datactx, decfunc, src, (uint8_t *)&PP, (uint8_t *)&CC); /* Pm = first length % XTS_BLOCK_SIZE bytes of PP */ for (i = 0; i < mo; i++) { - CC[i] = src[XTS_BLOCK_SIZE + i]; - dst[XTS_BLOCK_SIZE + i] = PP[i]; + ((uint8_t *)&CC)[i] = src[XTS_BLOCK_SIZE + i]; + dst[XTS_BLOCK_SIZE + i] = ((uint8_t *)&PP)[i]; } for (; i < XTS_BLOCK_SIZE; i++) { - CC[i] = PP[i]; + ((uint8_t *)&CC)[i] = ((uint8_t *)&PP)[i]; } /* Pm-1 = Tweak uncrypt CC */ - xts_tweak_encdec(datactx, decfunc, CC, dst, T); + xts_tweak_encdec(datactx, decfunc, (uint8_t *)&CC, dst, (uint8_t *)&T); } /* Decrypt the iv back */ - decfunc(tweakctx, XTS_BLOCK_SIZE, iv, T); + decfunc(tweakctx, XTS_BLOCK_SIZE, iv, (uint8_t *)&T); } @@ -146,7 +151,7 @@ void xts_encrypt(const void *datactx, uint8_t *dst, const uint8_t *src) { - uint8_t PP[XTS_BLOCK_SIZE], CC[XTS_BLOCK_SIZE], T[XTS_BLOCK_SIZE]; + xts_uint128 PP, CC, T; unsigned long i, m, mo, lim; /* get number of blocks */ @@ -163,10 +168,10 @@ void xts_encrypt(const void *datactx, } /* encrypt the iv */ - encfunc(tweakctx, XTS_BLOCK_SIZE, T, iv); + encfunc(tweakctx, XTS_BLOCK_SIZE, (uint8_t *)&T, iv); for (i = 0; i < lim; i++) { - xts_tweak_encdec(datactx, encfunc, src, dst, T); + xts_tweak_encdec(datactx, encfunc, src, dst, (uint8_t *)&T); dst += XTS_BLOCK_SIZE; src += XTS_BLOCK_SIZE; @@ -175,22 +180,22 @@ void xts_encrypt(const void *datactx, /* if length is not a multiple of XTS_BLOCK_SIZE then */ if (mo > 0) { /* CC = tweak encrypt block m-1 */ - xts_tweak_encdec(datactx, encfunc, src, CC, T); + xts_tweak_encdec(datactx, encfunc, src, (uint8_t *)&CC, (uint8_t *)&T); /* Cm = first length % XTS_BLOCK_SIZE bytes of CC */ for (i = 0; i < mo; i++) { - PP[i] = src[XTS_BLOCK_SIZE + i]; - dst[XTS_BLOCK_SIZE + i] = CC[i]; + ((uint8_t *)&PP)[i] = src[XTS_BLOCK_SIZE + i]; + dst[XTS_BLOCK_SIZE + i] = ((uint8_t *)&CC)[i]; } for (; i < XTS_BLOCK_SIZE; i++) { - PP[i] = CC[i]; + ((uint8_t *)&PP)[i] = ((uint8_t *)&CC)[i]; } /* Cm-1 = Tweak encrypt PP */ - xts_tweak_encdec(datactx, encfunc, PP, dst, T); + xts_tweak_encdec(datactx, encfunc, (uint8_t *)&PP, dst, (uint8_t *)&T); } /* Decrypt the iv back */ - decfunc(tweakctx, XTS_BLOCK_SIZE, iv, T); + decfunc(tweakctx, XTS_BLOCK_SIZE, iv, (uint8_t *)&T); }
The new type is designed to allow use of 64-bit arithmetic instead of operating 1-byte at a time. The following patches will use this to improve performance. Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> --- crypto/xts.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-)