diff mbox

crypto: ctr: avoid VLA use

Message ID 1521033450-14447-1-git-send-email-s.mesoraca16@gmail.com (mailing list archive)
State Superseded
Headers show

Commit Message

Salvatore Mesoraca March 14, 2018, 1:17 p.m. UTC
All ciphers implemented in Linux have a block size less than or
equal to 16 bytes and the most demanding hw require 16 bits
alignment for the block buffer.
We avoid 2 VLAs[1] by always allocating 16 bytes with 16 bits
alignment, unless the architecture support efficient unaligned
accesses.
We also check, at runtime, that our assumptions still stand,
possibly dynamically allocating a new buffer, just in case
something changes in the future.

[1] https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
---

Notes:
    Can we maybe skip the runtime check?

 crypto/ctr.c | 50 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 8 deletions(-)

Comments

Stephan Mueller March 14, 2018, 1:31 p.m. UTC | #1
Am Mittwoch, 14. März 2018, 14:17:30 CET schrieb Salvatore Mesoraca:

Hi Salvatore,

>  	if (walk.nbytes) {
> -		crypto_ctr_crypt_final(&walk, child);
> -		err = blkcipher_walk_done(desc, &walk, 0);
> +		err = crypto_ctr_crypt_final(&walk, child);
> +		err = blkcipher_walk_done(desc, &walk, err);

I guess you either want to handle the error from crypto_ctr_crypt_final or do 
an err |= blkcipher_walk_done.

>  	}
> 
>  	return err;



Ciao
Stephan
Salvatore Mesoraca March 14, 2018, 1:46 p.m. UTC | #2
2018-03-14 14:31 GMT+01:00 Stephan Mueller <smueller@chronox.de>:
> Am Mittwoch, 14. März 2018, 14:17:30 CET schrieb Salvatore Mesoraca:
>
> Hi Salvatore,
>
>>       if (walk.nbytes) {
>> -             crypto_ctr_crypt_final(&walk, child);
>> -             err = blkcipher_walk_done(desc, &walk, 0);
>> +             err = crypto_ctr_crypt_final(&walk, child);
>> +             err = blkcipher_walk_done(desc, &walk, err);
>
> I guess you either want to handle the error from crypto_ctr_crypt_final or do
> an err |= blkcipher_walk_done.

I think that blkcipher_walk_done handles and returns the error for me.
Am I wrong?

Best regards,

Salvatore
Stephan Mueller March 14, 2018, 1:52 p.m. UTC | #3
Am Mittwoch, 14. März 2018, 14:46:29 CET schrieb Salvatore Mesoraca:

Hi Salvatore,

> 2018-03-14 14:31 GMT+01:00 Stephan Mueller <smueller@chronox.de>:
> > Am Mittwoch, 14. März 2018, 14:17:30 CET schrieb Salvatore Mesoraca:
> > 
> > Hi Salvatore,
> > 
> >>       if (walk.nbytes) {
> >> 
> >> -             crypto_ctr_crypt_final(&walk, child);
> >> -             err = blkcipher_walk_done(desc, &walk, 0);
> >> +             err = crypto_ctr_crypt_final(&walk, child);
> >> +             err = blkcipher_walk_done(desc, &walk, err);
> > 
> > I guess you either want to handle the error from crypto_ctr_crypt_final or
> > do an err |= blkcipher_walk_done.
> 
> I think that blkcipher_walk_done handles and returns the error for me.
> Am I wrong?

You are right as you want to finalize the crypto operation even though the 
encryption fails.

Please disregard my comment.
> 
> Best regards,
> 
> Salvatore



Ciao
Stephan
Eric Biggers March 14, 2018, 6:31 p.m. UTC | #4
On Wed, Mar 14, 2018 at 02:17:30PM +0100, Salvatore Mesoraca wrote:
> All ciphers implemented in Linux have a block size less than or
> equal to 16 bytes and the most demanding hw require 16 bits
> alignment for the block buffer.
> We avoid 2 VLAs[1] by always allocating 16 bytes with 16 bits
> alignment, unless the architecture support efficient unaligned
> accesses.
> We also check, at runtime, that our assumptions still stand,
> possibly dynamically allocating a new buffer, just in case
> something changes in the future.
> 
> [1] https://lkml.org/lkml/2018/3/7/621
> 
> Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
> ---
> 
> Notes:
>     Can we maybe skip the runtime check?
> 
>  crypto/ctr.c | 50 ++++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 42 insertions(+), 8 deletions(-)
> 
> diff --git a/crypto/ctr.c b/crypto/ctr.c
> index 854d924..f37adf0 100644
> --- a/crypto/ctr.c
> +++ b/crypto/ctr.c
> @@ -35,6 +35,16 @@ struct crypto_rfc3686_req_ctx {
>  	struct skcipher_request subreq CRYPTO_MINALIGN_ATTR;
>  };
>  
> +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
> +#define DECLARE_CIPHER_BUFFER(name) u8 name[16]
> +#else
> +#define DECLARE_CIPHER_BUFFER(name) u8 __aligned(16) name[16]
> +#endif
> +
> +#define CHECK_CIPHER_BUFFER(name, size, align)			\
> +	likely(size <= sizeof(name) &&				\
> +	       name == PTR_ALIGN(((u8 *) name), align + 1))
> +
>  static int crypto_ctr_setkey(struct crypto_tfm *parent, const u8 *key,
>  			     unsigned int keylen)
>  {
> @@ -52,22 +62,35 @@ static int crypto_ctr_setkey(struct crypto_tfm *parent, const u8 *key,
>  	return err;
>  }
>  
> -static void crypto_ctr_crypt_final(struct blkcipher_walk *walk,
> -				   struct crypto_cipher *tfm)
> +static int crypto_ctr_crypt_final(struct blkcipher_walk *walk,
> +				  struct crypto_cipher *tfm)
>  {
>  	unsigned int bsize = crypto_cipher_blocksize(tfm);
>  	unsigned long alignmask = crypto_cipher_alignmask(tfm);
>  	u8 *ctrblk = walk->iv;
> -	u8 tmp[bsize + alignmask];
> -	u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
>  	u8 *src = walk->src.virt.addr;
>  	u8 *dst = walk->dst.virt.addr;
>  	unsigned int nbytes = walk->nbytes;
> +	DECLARE_CIPHER_BUFFER(tmp);
> +	u8 *keystream, *tmp2;
> +
> +	if (CHECK_CIPHER_BUFFER(tmp, bsize, alignmask))
> +		keystream = tmp;
> +	else {
> +		tmp2 = kmalloc(bsize + alignmask, GFP_ATOMIC);
> +		if (!tmp2)
> +			return -ENOMEM;
> +		keystream = PTR_ALIGN(tmp2 + 0, alignmask + 1);
> +	}
>  
>  	crypto_cipher_encrypt_one(tfm, keystream, ctrblk);
>  	crypto_xor_cpy(dst, keystream, src, nbytes);
>  
>  	crypto_inc(ctrblk, bsize);
> +
> +	if (unlikely(keystream != tmp))
> +		kfree(tmp2);
> +	return 0;
>  }

This seems silly; isn't the !CHECK_CIPHER_BUFFER() case unreachable?  Did you
even test it?  If there's going to be limits, the crypto API ought to enforce
them when registering an algorithm.

A better alternative may be to move the keystream buffer into the request
context, which is allowed to be variable length.  It looks like that would
require converting the ctr template over to the skcipher API, since the
blkcipher API doesn't have a request context.  But my understanding is that that
will need to be done eventually anyway, since the blkcipher (and ablkcipher) API
is going away.  I converted a bunch of algorithms recently and I can look at the
remaining ones in crypto/*.c if no one else gets to it first, but it may be a
little while until I have time.

Also, I recall there being a long discussion a while back about how
__aligned(16) doesn't work on local variables because the kernel's stack pointer
isn't guaranteed to maintain the alignment assumed by the compiler (see commit
b8fbe71f7535)...

Eric
Salvatore Mesoraca March 14, 2018, 7:25 p.m. UTC | #5
2018-03-14 19:31 GMT+01:00 Eric Biggers <ebiggers3@gmail.com>:
> On Wed, Mar 14, 2018 at 02:17:30PM +0100, Salvatore Mesoraca wrote:
>> All ciphers implemented in Linux have a block size less than or
>> equal to 16 bytes and the most demanding hw require 16 bits
>> alignment for the block buffer.
>> We avoid 2 VLAs[1] by always allocating 16 bytes with 16 bits
>> alignment, unless the architecture support efficient unaligned
>> accesses.
>> We also check, at runtime, that our assumptions still stand,
>> possibly dynamically allocating a new buffer, just in case
>> something changes in the future.
>>
>> [1] https://lkml.org/lkml/2018/3/7/621
>>
>> Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
>> ---
>>
>> Notes:
>>     Can we maybe skip the runtime check?
>>
>>  crypto/ctr.c | 50 ++++++++++++++++++++++++++++++++++++++++++--------
>>  1 file changed, 42 insertions(+), 8 deletions(-)
>>
>> diff --git a/crypto/ctr.c b/crypto/ctr.c
>> index 854d924..f37adf0 100644
>> --- a/crypto/ctr.c
>> +++ b/crypto/ctr.c
>> @@ -35,6 +35,16 @@ struct crypto_rfc3686_req_ctx {
>>       struct skcipher_request subreq CRYPTO_MINALIGN_ATTR;
>>  };
>>
>> +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
>> +#define DECLARE_CIPHER_BUFFER(name) u8 name[16]
>> +#else
>> +#define DECLARE_CIPHER_BUFFER(name) u8 __aligned(16) name[16]
>> +#endif
>> +
>> +#define CHECK_CIPHER_BUFFER(name, size, align)                       \
>> +     likely(size <= sizeof(name) &&                          \
>> +            name == PTR_ALIGN(((u8 *) name), align + 1))
>> +
>>  static int crypto_ctr_setkey(struct crypto_tfm *parent, const u8 *key,
>>                            unsigned int keylen)
>>  {
>> @@ -52,22 +62,35 @@ static int crypto_ctr_setkey(struct crypto_tfm *parent, const u8 *key,
>>       return err;
>>  }
>>
>> -static void crypto_ctr_crypt_final(struct blkcipher_walk *walk,
>> -                                struct crypto_cipher *tfm)
>> +static int crypto_ctr_crypt_final(struct blkcipher_walk *walk,
>> +                               struct crypto_cipher *tfm)
>>  {
>>       unsigned int bsize = crypto_cipher_blocksize(tfm);
>>       unsigned long alignmask = crypto_cipher_alignmask(tfm);
>>       u8 *ctrblk = walk->iv;
>> -     u8 tmp[bsize + alignmask];
>> -     u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
>>       u8 *src = walk->src.virt.addr;
>>       u8 *dst = walk->dst.virt.addr;
>>       unsigned int nbytes = walk->nbytes;
>> +     DECLARE_CIPHER_BUFFER(tmp);
>> +     u8 *keystream, *tmp2;
>> +
>> +     if (CHECK_CIPHER_BUFFER(tmp, bsize, alignmask))
>> +             keystream = tmp;
>> +     else {
>> +             tmp2 = kmalloc(bsize + alignmask, GFP_ATOMIC);
>> +             if (!tmp2)
>> +                     return -ENOMEM;
>> +             keystream = PTR_ALIGN(tmp2 + 0, alignmask + 1);
>> +     }
>>
>>       crypto_cipher_encrypt_one(tfm, keystream, ctrblk);
>>       crypto_xor_cpy(dst, keystream, src, nbytes);
>>
>>       crypto_inc(ctrblk, bsize);
>> +
>> +     if (unlikely(keystream != tmp))
>> +             kfree(tmp2);
>> +     return 0;
>>  }
>
> This seems silly; isn't the !CHECK_CIPHER_BUFFER() case unreachable?  Did you
> even test it? If there's going to be limits, the crypto API ought to enforce
> them when registering an algorithm.

Yes, as I wrote in the commit log, I put that code just in case
something changes (i.e.
someone adds a cipher with a bigger block size), so that it won't fail
but just work as
is. Although I didn't really like it, hence the note.

> A better alternative may be to move the keystream buffer into the request
> context, which is allowed to be variable length.  It looks like that would
> require converting the ctr template over to the skcipher API, since the
> blkcipher API doesn't have a request context.  But my understanding is that that
> will need to be done eventually anyway, since the blkcipher (and ablkcipher) API
> is going away.  I converted a bunch of algorithms recently and I can look at the
> remaining ones in crypto/*.c if no one else gets to it first, but it may be a
> little while until I have time.

This seems much better. I don't think that removing these VLAs is
urgent, after all their sizes
are limited and not under user control: we can just wait.
I might help porting some crypto/*.c to skcipher API.

> Also, I recall there being a long discussion a while back about how
> __aligned(16) doesn't work on local variables because the kernel's stack pointer
> isn't guaranteed to maintain the alignment assumed by the compiler (see commit
> b8fbe71f7535)...

Oh... didn't know this! Interesting...

Thank you for your time,

Salvatore
Herbert Xu March 15, 2018, 9:54 a.m. UTC | #6
On Wed, Mar 14, 2018 at 02:17:30PM +0100, Salvatore Mesoraca wrote:
> All ciphers implemented in Linux have a block size less than or
> equal to 16 bytes and the most demanding hw require 16 bits
> alignment for the block buffer.
> We avoid 2 VLAs[1] by always allocating 16 bytes with 16 bits
> alignment, unless the architecture support efficient unaligned
> accesses.
> We also check, at runtime, that our assumptions still stand,
> possibly dynamically allocating a new buffer, just in case
> something changes in the future.

Please move the check to ctr instance creation time.  That is,
if the underlying blocksize is greater than 16 bytes than simply
fail the creation.

Thanks,
Salvatore Mesoraca March 15, 2018, 10:42 a.m. UTC | #7
2018-03-15 10:54 GMT+01:00 Herbert Xu <herbert@gondor.apana.org.au>:
> On Wed, Mar 14, 2018 at 02:17:30PM +0100, Salvatore Mesoraca wrote:
>> All ciphers implemented in Linux have a block size less than or
>> equal to 16 bytes and the most demanding hw require 16 bits
>> alignment for the block buffer.
>> We avoid 2 VLAs[1] by always allocating 16 bytes with 16 bits
>> alignment, unless the architecture support efficient unaligned
>> accesses.
>> We also check, at runtime, that our assumptions still stand,
>> possibly dynamically allocating a new buffer, just in case
>> something changes in the future.
>
> Please move the check to ctr instance creation time.  That is,
> if the underlying blocksize is greater than 16 bytes than simply
> fail the creation.

Good, I'll send a v2.
Thank you for your help,

Salvatore
David Laight March 15, 2018, 11:55 a.m. UTC | #8
From: Eric Biggers
> Sent: 14 March 2018 18:32
...
> Also, I recall there being a long discussion a while back about how
> __aligned(16) doesn't work on local variables because the kernel's stack pointer
> isn't guaranteed to maintain the alignment assumed by the compiler (see commit
> b8fbe71f7535)...

ISTR that gcc arbitrarily decided that the x86 stack (for 32 bit) would be
kept aligned to more than 4 bytes (16??) - probably so that xmm registers
could be written to stack locations.
This was a massive ABI change that they didn't tell anyone about!
While gcc compiled code maintained the alignment a lot of asm code didn't.
I don't know about Linux, but NetBSD didn't even align user stacks.

There is a gcc option to not assume that the stack is 'appropriately aligned',
but ISTR that it generates rather more code that one might have wished.

If the compiler does align the stack, it does so by generating a double
stack frame - not pretty at all.

	David
kernel test robot March 15, 2018, 2:41 p.m. UTC | #9
Hi Salvatore,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v4.16-rc5 next-20180314]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Salvatore-Mesoraca/crypto-ctr-avoid-VLA-use/20180315-213008
config: x86_64-randconfig-x014-201810 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

Note: it may well be a FALSE warning. FWIW you are at least aware of it now.
http://gcc.gnu.org/wiki/Better_Uninitialized_Warnings

All warnings (new ones prefixed by >>):

   crypto/ctr.c: In function 'crypto_ctr_crypt':
>> crypto/ctr.c:156:3: warning: 'tmp2' may be used uninitialized in this function [-Wmaybe-uninitialized]
      kfree(tmp2);
      ^~~~~~~~~~~
   crypto/ctr.c:133:18: note: 'tmp2' was declared here
     u8 *keystream, *tmp2;
                     ^~~~

vim +/tmp2 +156 crypto/ctr.c

   121	
   122	static int crypto_ctr_crypt_inplace(struct blkcipher_walk *walk,
   123					    struct crypto_cipher *tfm)
   124	{
   125		void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
   126			   crypto_cipher_alg(tfm)->cia_encrypt;
   127		unsigned int bsize = crypto_cipher_blocksize(tfm);
   128		unsigned long alignmask = crypto_cipher_alignmask(tfm);
   129		unsigned int nbytes = walk->nbytes;
   130		u8 *ctrblk = walk->iv;
   131		u8 *src = walk->src.virt.addr;
   132		DECLARE_CIPHER_BUFFER(tmp);
   133		u8 *keystream, *tmp2;
   134	
   135		if (CHECK_CIPHER_BUFFER(tmp, bsize, alignmask))
   136			keystream = tmp;
   137		else {
   138			tmp2 = kmalloc(bsize + alignmask, GFP_ATOMIC);
   139			if (!tmp2)
   140				return -ENOMEM;
   141			keystream = PTR_ALIGN(tmp2 + 0, alignmask + 1);
   142		}
   143	
   144		do {
   145			/* create keystream */
   146			fn(crypto_cipher_tfm(tfm), keystream, ctrblk);
   147			crypto_xor(src, keystream, bsize);
   148	
   149			/* increment counter in counterblock */
   150			crypto_inc(ctrblk, bsize);
   151	
   152			src += bsize;
   153		} while ((nbytes -= bsize) >= bsize);
   154	
   155		if (unlikely(keystream != tmp))
 > 156			kfree(tmp2);
   157		return nbytes;
   158	}
   159	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/crypto/ctr.c b/crypto/ctr.c
index 854d924..f37adf0 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -35,6 +35,16 @@  struct crypto_rfc3686_req_ctx {
 	struct skcipher_request subreq CRYPTO_MINALIGN_ATTR;
 };
 
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#define DECLARE_CIPHER_BUFFER(name) u8 name[16]
+#else
+#define DECLARE_CIPHER_BUFFER(name) u8 __aligned(16) name[16]
+#endif
+
+#define CHECK_CIPHER_BUFFER(name, size, align)			\
+	likely(size <= sizeof(name) &&				\
+	       name == PTR_ALIGN(((u8 *) name), align + 1))
+
 static int crypto_ctr_setkey(struct crypto_tfm *parent, const u8 *key,
 			     unsigned int keylen)
 {
@@ -52,22 +62,35 @@  static int crypto_ctr_setkey(struct crypto_tfm *parent, const u8 *key,
 	return err;
 }
 
-static void crypto_ctr_crypt_final(struct blkcipher_walk *walk,
-				   struct crypto_cipher *tfm)
+static int crypto_ctr_crypt_final(struct blkcipher_walk *walk,
+				  struct crypto_cipher *tfm)
 {
 	unsigned int bsize = crypto_cipher_blocksize(tfm);
 	unsigned long alignmask = crypto_cipher_alignmask(tfm);
 	u8 *ctrblk = walk->iv;
-	u8 tmp[bsize + alignmask];
-	u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
 	u8 *src = walk->src.virt.addr;
 	u8 *dst = walk->dst.virt.addr;
 	unsigned int nbytes = walk->nbytes;
+	DECLARE_CIPHER_BUFFER(tmp);
+	u8 *keystream, *tmp2;
+
+	if (CHECK_CIPHER_BUFFER(tmp, bsize, alignmask))
+		keystream = tmp;
+	else {
+		tmp2 = kmalloc(bsize + alignmask, GFP_ATOMIC);
+		if (!tmp2)
+			return -ENOMEM;
+		keystream = PTR_ALIGN(tmp2 + 0, alignmask + 1);
+	}
 
 	crypto_cipher_encrypt_one(tfm, keystream, ctrblk);
 	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 	crypto_inc(ctrblk, bsize);
+
+	if (unlikely(keystream != tmp))
+		kfree(tmp2);
+	return 0;
 }
 
 static int crypto_ctr_crypt_segment(struct blkcipher_walk *walk,
@@ -106,8 +129,17 @@  static int crypto_ctr_crypt_inplace(struct blkcipher_walk *walk,
 	unsigned int nbytes = walk->nbytes;
 	u8 *ctrblk = walk->iv;
 	u8 *src = walk->src.virt.addr;
-	u8 tmp[bsize + alignmask];
-	u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
+	DECLARE_CIPHER_BUFFER(tmp);
+	u8 *keystream, *tmp2;
+
+	if (CHECK_CIPHER_BUFFER(tmp, bsize, alignmask))
+		keystream = tmp;
+	else {
+		tmp2 = kmalloc(bsize + alignmask, GFP_ATOMIC);
+		if (!tmp2)
+			return -ENOMEM;
+		keystream = PTR_ALIGN(tmp2 + 0, alignmask + 1);
+	}
 
 	do {
 		/* create keystream */
@@ -120,6 +152,8 @@  static int crypto_ctr_crypt_inplace(struct blkcipher_walk *walk,
 		src += bsize;
 	} while ((nbytes -= bsize) >= bsize);
 
+	if (unlikely(keystream != tmp))
+		kfree(tmp2);
 	return nbytes;
 }
 
@@ -147,8 +181,8 @@  static int crypto_ctr_crypt(struct blkcipher_desc *desc,
 	}
 
 	if (walk.nbytes) {
-		crypto_ctr_crypt_final(&walk, child);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = crypto_ctr_crypt_final(&walk, child);
+		err = blkcipher_walk_done(desc, &walk, err);
 	}
 
 	return err;