Message ID | 20180926095159.22135-1-ard.biesheuvel@linaro.org (mailing list archive) |
---|---|
State | Accepted |
Delegated to: | Herbert Xu |
Headers | show |
Series | crypto: qat - move temp buffers off the stack | expand |
On Wed, 26 Sep 2018 at 11:53, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > > Arnd reports that with Kees's latest VLA patches applied, the HMAC > handling in the QAT driver uses a worst case estimate of 160 bytes > for the SHA blocksize, allowing the compiler to determine the size > of the stack frame at runtime and throw a warning: > s/runtime/compile time/ > drivers/crypto/qat/qat_common/qat_algs.c: In function 'qat_alg_do_precomputes': > drivers/crypto/qat/qat_common/qat_algs.c:257:1: error: the frame size > of 1112 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] > > Given that this worst case estimate is only 32 bytes larger than the > actual block size of SHA-512, the use of a VLA here was hiding the > excessive size of the stack frame from the compiler, and so we should > try to move these buffers off the stack. > > So move the ipad/opad buffers and the various SHA state descriptors > into the tfm context struct. Since qat_alg_do_precomputes() is only > called in the context of a setkey() operation, this should be safe. > Using SHA512_BLOCK_SIZE for the size of the ipad/opad buffers allows > them to be used by SHA-1/SHA-256 as well. > > Reported-by: Arnd Bergmann <arnd@arndb.de> > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> > --- > This applies against v4.19-rc while Arnd's report was about -next. However, > since Kees's VLA change results in a warning about a pre-existing condition, > we may decide to apply it as a fix, and handle the conflict with Kees's > patch in cryptodev. Otherwise, I can respin it to apply onto cryptodev > directly. The patch was build tested only - I don't have the hardware. > > Thoughts anyone? > > drivers/crypto/qat/qat_common/qat_algs.c | 60 ++++++++++---------- > 1 file changed, 31 insertions(+), 29 deletions(-) > > diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c > index 1138e41d6805..d2698299896f 100644 > --- a/drivers/crypto/qat/qat_common/qat_algs.c > +++ b/drivers/crypto/qat/qat_common/qat_algs.c > @@ -113,6 +113,13 @@ struct qat_alg_aead_ctx { > struct crypto_shash *hash_tfm; > enum icp_qat_hw_auth_algo qat_hash_alg; > struct qat_crypto_instance *inst; > + union { > + struct sha1_state sha1; > + struct sha256_state sha256; > + struct sha512_state sha512; > + }; > + char ipad[SHA512_BLOCK_SIZE]; /* sufficient for SHA-1/SHA-256 as well */ > + char opad[SHA512_BLOCK_SIZE]; > }; > > struct qat_alg_ablkcipher_ctx { > @@ -148,37 +155,32 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, > unsigned int auth_keylen) > { > SHASH_DESC_ON_STACK(shash, ctx->hash_tfm); > - struct sha1_state sha1; > - struct sha256_state sha256; > - struct sha512_state sha512; > int block_size = crypto_shash_blocksize(ctx->hash_tfm); > int digest_size = crypto_shash_digestsize(ctx->hash_tfm); > - char ipad[block_size]; > - char opad[block_size]; > __be32 *hash_state_out; > __be64 *hash512_state_out; > int i, offset; > > - memset(ipad, 0, block_size); > - memset(opad, 0, block_size); > + memset(ctx->ipad, 0, block_size); > + memset(ctx->opad, 0, block_size); > shash->tfm = ctx->hash_tfm; > shash->flags = 0x0; > > if (auth_keylen > block_size) { > int ret = crypto_shash_digest(shash, auth_key, > - auth_keylen, ipad); > + auth_keylen, ctx->ipad); > if (ret) > return ret; > > - memcpy(opad, ipad, digest_size); > + memcpy(ctx->opad, ctx->ipad, digest_size); > } else { > - memcpy(ipad, auth_key, auth_keylen); > - memcpy(opad, auth_key, auth_keylen); > + memcpy(ctx->ipad, auth_key, auth_keylen); > + memcpy(ctx->opad, auth_key, auth_keylen); > } > > for (i = 0; i < block_size; i++) { > - char *ipad_ptr = ipad + i; > - char *opad_ptr = opad + i; > + char *ipad_ptr = ctx->ipad + i; > + char *opad_ptr = ctx->opad + i; > *ipad_ptr ^= HMAC_IPAD_VALUE; > *opad_ptr ^= HMAC_OPAD_VALUE; > } > @@ -186,7 +188,7 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, > if (crypto_shash_init(shash)) > return -EFAULT; > > - if (crypto_shash_update(shash, ipad, block_size)) > + if (crypto_shash_update(shash, ctx->ipad, block_size)) > return -EFAULT; > > hash_state_out = (__be32 *)hash->sha.state1; > @@ -194,22 +196,22 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, > > switch (ctx->qat_hash_alg) { > case ICP_QAT_HW_AUTH_ALGO_SHA1: > - if (crypto_shash_export(shash, &sha1)) > + if (crypto_shash_export(shash, &ctx->sha1)) > return -EFAULT; > for (i = 0; i < digest_size >> 2; i++, hash_state_out++) > - *hash_state_out = cpu_to_be32(*(sha1.state + i)); > + *hash_state_out = cpu_to_be32(ctx->sha1.state[i]); > break; > case ICP_QAT_HW_AUTH_ALGO_SHA256: > - if (crypto_shash_export(shash, &sha256)) > + if (crypto_shash_export(shash, &ctx->sha256)) > return -EFAULT; > for (i = 0; i < digest_size >> 2; i++, hash_state_out++) > - *hash_state_out = cpu_to_be32(*(sha256.state + i)); > + *hash_state_out = cpu_to_be32(ctx->sha256.state[i]); > break; > case ICP_QAT_HW_AUTH_ALGO_SHA512: > - if (crypto_shash_export(shash, &sha512)) > + if (crypto_shash_export(shash, &ctx->sha512)) > return -EFAULT; > for (i = 0; i < digest_size >> 3; i++, hash512_state_out++) > - *hash512_state_out = cpu_to_be64(*(sha512.state + i)); > + *hash512_state_out = cpu_to_be64(ctx->sha512.state[i]); > break; > default: > return -EFAULT; > @@ -218,7 +220,7 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, > if (crypto_shash_init(shash)) > return -EFAULT; > > - if (crypto_shash_update(shash, opad, block_size)) > + if (crypto_shash_update(shash, ctx->opad, block_size)) > return -EFAULT; > > offset = round_up(qat_get_inter_state_size(ctx->qat_hash_alg), 8); > @@ -227,28 +229,28 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, > > switch (ctx->qat_hash_alg) { > case ICP_QAT_HW_AUTH_ALGO_SHA1: > - if (crypto_shash_export(shash, &sha1)) > + if (crypto_shash_export(shash, &ctx->sha1)) > return -EFAULT; > for (i = 0; i < digest_size >> 2; i++, hash_state_out++) > - *hash_state_out = cpu_to_be32(*(sha1.state + i)); > + *hash_state_out = cpu_to_be32(ctx->sha1.state[i]); > break; > case ICP_QAT_HW_AUTH_ALGO_SHA256: > - if (crypto_shash_export(shash, &sha256)) > + if (crypto_shash_export(shash, &ctx->sha256)) > return -EFAULT; > for (i = 0; i < digest_size >> 2; i++, hash_state_out++) > - *hash_state_out = cpu_to_be32(*(sha256.state + i)); > + *hash_state_out = cpu_to_be32(ctx->sha256.state[i]); > break; > case ICP_QAT_HW_AUTH_ALGO_SHA512: > - if (crypto_shash_export(shash, &sha512)) > + if (crypto_shash_export(shash, &ctx->sha512)) > return -EFAULT; > for (i = 0; i < digest_size >> 3; i++, hash512_state_out++) > - *hash512_state_out = cpu_to_be64(*(sha512.state + i)); > + *hash512_state_out = cpu_to_be64(ctx->sha512.state[i]); > break; > default: > return -EFAULT; > } > - memzero_explicit(ipad, block_size); > - memzero_explicit(opad, block_size); > + memzero_explicit(ctx->ipad, block_size); > + memzero_explicit(ctx->opad, block_size); > return 0; > } > > -- > 2.18.0 >
On Wed, Sep 26, 2018 at 2:51 AM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > Arnd reports that with Kees's latest VLA patches applied, the HMAC > handling in the QAT driver uses a worst case estimate of 160 bytes > for the SHA blocksize, allowing the compiler to determine the size > of the stack frame at runtime and throw a warning: > > drivers/crypto/qat/qat_common/qat_algs.c: In function 'qat_alg_do_precomputes': > drivers/crypto/qat/qat_common/qat_algs.c:257:1: error: the frame size > of 1112 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] > > Given that this worst case estimate is only 32 bytes larger than the > actual block size of SHA-512, the use of a VLA here was hiding the > excessive size of the stack frame from the compiler, and so we should > try to move these buffers off the stack. > > So move the ipad/opad buffers and the various SHA state descriptors > into the tfm context struct. Since qat_alg_do_precomputes() is only > called in the context of a setkey() operation, this should be safe. > Using SHA512_BLOCK_SIZE for the size of the ipad/opad buffers allows > them to be used by SHA-1/SHA-256 as well. > > Reported-by: Arnd Bergmann <arnd@arndb.de> > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> > --- > This applies against v4.19-rc while Arnd's report was about -next. However, > since Kees's VLA change results in a warning about a pre-existing condition, > we may decide to apply it as a fix, and handle the conflict with Kees's > patch in cryptodev. Otherwise, I can respin it to apply onto cryptodev > directly. The patch was build tested only - I don't have the hardware. I think the depth warning is minor (90 bytes over), so I don't think it's high priority to backport the fix. I'm fine either way, of course. Reviewed-by: Kees Cook <keescook@chromium.org> -Kees
On Wed, Sep 26, 2018 at 5:43 PM Kees Cook <keescook@chromium.org> wrote: > On Wed, Sep 26, 2018 at 2:51 AM, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote: > > I think the depth warning is minor (90 bytes over), so I don't think > it's high priority to backport the fix. I'm fine either way, of > course. The way I see these warnings, anything that is close to 1 kilobyte of stack usage is a serious issue. The warning limit was just picked because we have some existing functions that are known to be safe and close to the limit. Arnd
On Wed, Sep 26, 2018 at 11:51:59AM +0200, Ard Biesheuvel wrote: > Arnd reports that with Kees's latest VLA patches applied, the HMAC > handling in the QAT driver uses a worst case estimate of 160 bytes > for the SHA blocksize, allowing the compiler to determine the size > of the stack frame at runtime and throw a warning: > > drivers/crypto/qat/qat_common/qat_algs.c: In function 'qat_alg_do_precomputes': > drivers/crypto/qat/qat_common/qat_algs.c:257:1: error: the frame size > of 1112 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] > > Given that this worst case estimate is only 32 bytes larger than the > actual block size of SHA-512, the use of a VLA here was hiding the > excessive size of the stack frame from the compiler, and so we should > try to move these buffers off the stack. > > So move the ipad/opad buffers and the various SHA state descriptors > into the tfm context struct. Since qat_alg_do_precomputes() is only > called in the context of a setkey() operation, this should be safe. > Using SHA512_BLOCK_SIZE for the size of the ipad/opad buffers allows > them to be used by SHA-1/SHA-256 as well. > > Reported-by: Arnd Bergmann <arnd@arndb.de> > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> > --- > This applies against v4.19-rc while Arnd's report was about -next. However, > since Kees's VLA change results in a warning about a pre-existing condition, > we may decide to apply it as a fix, and handle the conflict with Kees's > patch in cryptodev. Otherwise, I can respin it to apply onto cryptodev > directly. The patch was build tested only - I don't have the hardware. > > Thoughts anyone? I applied it against cryptodev only. I don't think it's bad enough to warrant a backport to stable though. But if you guys disagree we could always send the backport to stable after this goes upstream. > drivers/crypto/qat/qat_common/qat_algs.c | 60 ++++++++++---------- > 1 file changed, 31 insertions(+), 29 deletions(-) Patch applied. Thanks.
On 5 October 2018 at 04:29, Herbert Xu <herbert@gondor.apana.org.au> wrote: > On Wed, Sep 26, 2018 at 11:51:59AM +0200, Ard Biesheuvel wrote: >> Arnd reports that with Kees's latest VLA patches applied, the HMAC >> handling in the QAT driver uses a worst case estimate of 160 bytes >> for the SHA blocksize, allowing the compiler to determine the size >> of the stack frame at runtime and throw a warning: >> >> drivers/crypto/qat/qat_common/qat_algs.c: In function 'qat_alg_do_precomputes': >> drivers/crypto/qat/qat_common/qat_algs.c:257:1: error: the frame size >> of 1112 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] >> >> Given that this worst case estimate is only 32 bytes larger than the >> actual block size of SHA-512, the use of a VLA here was hiding the >> excessive size of the stack frame from the compiler, and so we should >> try to move these buffers off the stack. >> >> So move the ipad/opad buffers and the various SHA state descriptors >> into the tfm context struct. Since qat_alg_do_precomputes() is only >> called in the context of a setkey() operation, this should be safe. >> Using SHA512_BLOCK_SIZE for the size of the ipad/opad buffers allows >> them to be used by SHA-1/SHA-256 as well. >> >> Reported-by: Arnd Bergmann <arnd@arndb.de> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> >> --- >> This applies against v4.19-rc while Arnd's report was about -next. However, >> since Kees's VLA change results in a warning about a pre-existing condition, >> we may decide to apply it as a fix, and handle the conflict with Kees's >> patch in cryptodev. Otherwise, I can respin it to apply onto cryptodev >> directly. The patch was build tested only - I don't have the hardware. >> >> Thoughts anyone? > > I applied it against cryptodev only. I don't think it's bad enough > to warrant a backport to stable though. But if you guys disagree we > could always send the backport to stable after this goes upstream. > Works for me.
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 1138e41d6805..d2698299896f 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -113,6 +113,13 @@ struct qat_alg_aead_ctx { struct crypto_shash *hash_tfm; enum icp_qat_hw_auth_algo qat_hash_alg; struct qat_crypto_instance *inst; + union { + struct sha1_state sha1; + struct sha256_state sha256; + struct sha512_state sha512; + }; + char ipad[SHA512_BLOCK_SIZE]; /* sufficient for SHA-1/SHA-256 as well */ + char opad[SHA512_BLOCK_SIZE]; }; struct qat_alg_ablkcipher_ctx { @@ -148,37 +155,32 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, unsigned int auth_keylen) { SHASH_DESC_ON_STACK(shash, ctx->hash_tfm); - struct sha1_state sha1; - struct sha256_state sha256; - struct sha512_state sha512; int block_size = crypto_shash_blocksize(ctx->hash_tfm); int digest_size = crypto_shash_digestsize(ctx->hash_tfm); - char ipad[block_size]; - char opad[block_size]; __be32 *hash_state_out; __be64 *hash512_state_out; int i, offset; - memset(ipad, 0, block_size); - memset(opad, 0, block_size); + memset(ctx->ipad, 0, block_size); + memset(ctx->opad, 0, block_size); shash->tfm = ctx->hash_tfm; shash->flags = 0x0; if (auth_keylen > block_size) { int ret = crypto_shash_digest(shash, auth_key, - auth_keylen, ipad); + auth_keylen, ctx->ipad); if (ret) return ret; - memcpy(opad, ipad, digest_size); + memcpy(ctx->opad, ctx->ipad, digest_size); } else { - memcpy(ipad, auth_key, auth_keylen); - memcpy(opad, auth_key, auth_keylen); + memcpy(ctx->ipad, auth_key, auth_keylen); + memcpy(ctx->opad, auth_key, auth_keylen); } for (i = 0; i < block_size; i++) { - char *ipad_ptr = ipad + i; - char *opad_ptr = opad + i; + char *ipad_ptr = ctx->ipad + i; + char *opad_ptr = ctx->opad + i; *ipad_ptr ^= HMAC_IPAD_VALUE; *opad_ptr ^= HMAC_OPAD_VALUE; } @@ -186,7 +188,7 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, if (crypto_shash_init(shash)) return -EFAULT; - if (crypto_shash_update(shash, ipad, block_size)) + if (crypto_shash_update(shash, ctx->ipad, block_size)) return -EFAULT; hash_state_out = (__be32 *)hash->sha.state1; @@ -194,22 +196,22 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, switch (ctx->qat_hash_alg) { case ICP_QAT_HW_AUTH_ALGO_SHA1: - if (crypto_shash_export(shash, &sha1)) + if (crypto_shash_export(shash, &ctx->sha1)) return -EFAULT; for (i = 0; i < digest_size >> 2; i++, hash_state_out++) - *hash_state_out = cpu_to_be32(*(sha1.state + i)); + *hash_state_out = cpu_to_be32(ctx->sha1.state[i]); break; case ICP_QAT_HW_AUTH_ALGO_SHA256: - if (crypto_shash_export(shash, &sha256)) + if (crypto_shash_export(shash, &ctx->sha256)) return -EFAULT; for (i = 0; i < digest_size >> 2; i++, hash_state_out++) - *hash_state_out = cpu_to_be32(*(sha256.state + i)); + *hash_state_out = cpu_to_be32(ctx->sha256.state[i]); break; case ICP_QAT_HW_AUTH_ALGO_SHA512: - if (crypto_shash_export(shash, &sha512)) + if (crypto_shash_export(shash, &ctx->sha512)) return -EFAULT; for (i = 0; i < digest_size >> 3; i++, hash512_state_out++) - *hash512_state_out = cpu_to_be64(*(sha512.state + i)); + *hash512_state_out = cpu_to_be64(ctx->sha512.state[i]); break; default: return -EFAULT; @@ -218,7 +220,7 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, if (crypto_shash_init(shash)) return -EFAULT; - if (crypto_shash_update(shash, opad, block_size)) + if (crypto_shash_update(shash, ctx->opad, block_size)) return -EFAULT; offset = round_up(qat_get_inter_state_size(ctx->qat_hash_alg), 8); @@ -227,28 +229,28 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, switch (ctx->qat_hash_alg) { case ICP_QAT_HW_AUTH_ALGO_SHA1: - if (crypto_shash_export(shash, &sha1)) + if (crypto_shash_export(shash, &ctx->sha1)) return -EFAULT; for (i = 0; i < digest_size >> 2; i++, hash_state_out++) - *hash_state_out = cpu_to_be32(*(sha1.state + i)); + *hash_state_out = cpu_to_be32(ctx->sha1.state[i]); break; case ICP_QAT_HW_AUTH_ALGO_SHA256: - if (crypto_shash_export(shash, &sha256)) + if (crypto_shash_export(shash, &ctx->sha256)) return -EFAULT; for (i = 0; i < digest_size >> 2; i++, hash_state_out++) - *hash_state_out = cpu_to_be32(*(sha256.state + i)); + *hash_state_out = cpu_to_be32(ctx->sha256.state[i]); break; case ICP_QAT_HW_AUTH_ALGO_SHA512: - if (crypto_shash_export(shash, &sha512)) + if (crypto_shash_export(shash, &ctx->sha512)) return -EFAULT; for (i = 0; i < digest_size >> 3; i++, hash512_state_out++) - *hash512_state_out = cpu_to_be64(*(sha512.state + i)); + *hash512_state_out = cpu_to_be64(ctx->sha512.state[i]); break; default: return -EFAULT; } - memzero_explicit(ipad, block_size); - memzero_explicit(opad, block_size); + memzero_explicit(ctx->ipad, block_size); + memzero_explicit(ctx->opad, block_size); return 0; }
Arnd reports that with Kees's latest VLA patches applied, the HMAC handling in the QAT driver uses a worst case estimate of 160 bytes for the SHA blocksize, allowing the compiler to determine the size of the stack frame at runtime and throw a warning: drivers/crypto/qat/qat_common/qat_algs.c: In function 'qat_alg_do_precomputes': drivers/crypto/qat/qat_common/qat_algs.c:257:1: error: the frame size of 1112 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] Given that this worst case estimate is only 32 bytes larger than the actual block size of SHA-512, the use of a VLA here was hiding the excessive size of the stack frame from the compiler, and so we should try to move these buffers off the stack. So move the ipad/opad buffers and the various SHA state descriptors into the tfm context struct. Since qat_alg_do_precomputes() is only called in the context of a setkey() operation, this should be safe. Using SHA512_BLOCK_SIZE for the size of the ipad/opad buffers allows them to be used by SHA-1/SHA-256 as well. Reported-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> --- This applies against v4.19-rc while Arnd's report was about -next. However, since Kees's VLA change results in a warning about a pre-existing condition, we may decide to apply it as a fix, and handle the conflict with Kees's patch in cryptodev. Otherwise, I can respin it to apply onto cryptodev directly. The patch was build tested only - I don't have the hardware. Thoughts anyone? drivers/crypto/qat/qat_common/qat_algs.c | 60 ++++++++++---------- 1 file changed, 31 insertions(+), 29 deletions(-)