diff mbox

libceph: do not crash on large auth tickets

Message ID 1414425929-10625-1-git-send-email-idryomov@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ilya Dryomov Oct. 27, 2014, 4:05 p.m. UTC
Large (greater than 32k, the value of PAGE_ALLOC_COSTLY_ORDER) auth
tickets will have their buffers vmalloc'ed, which leads to the
following crash in crypto:

[   28.685082] BUG: unable to handle kernel paging request at ffffeb04000032c0
[   28.686032] IP: [<ffffffff81392b42>] scatterwalk_pagedone+0x22/0x80
[   28.686032] PGD 0
[   28.688088] Oops: 0000 [#1] PREEMPT SMP
[   28.688088] Modules linked in:
[   28.688088] CPU: 0 PID: 878 Comm: kworker/0:2 Not tainted 3.17.0-vm+ #305
[   28.688088] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[   28.688088] Workqueue: ceph-msgr con_work
[   28.688088] task: ffff88011a7f9030 ti: ffff8800d903c000 task.ti: ffff8800d903c000
[   28.688088] RIP: 0010:[<ffffffff81392b42>]  [<ffffffff81392b42>] scatterwalk_pagedone+0x22/0x80
[   28.688088] RSP: 0018:ffff8800d903f688  EFLAGS: 00010286
[   28.688088] RAX: ffffeb04000032c0 RBX: ffff8800d903f718 RCX: ffffeb04000032c0
[   28.688088] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8800d903f750
[   28.688088] RBP: ffff8800d903f688 R08: 00000000000007de R09: ffff8800d903f880
[   28.688088] R10: 18df467c72d6257b R11: 0000000000000000 R12: 0000000000000010
[   28.688088] R13: ffff8800d903f750 R14: ffff8800d903f8a0 R15: 0000000000000000
[   28.688088] FS:  00007f50a41c7700(0000) GS:ffff88011fc00000(0000) knlGS:0000000000000000
[   28.688088] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[   28.688088] CR2: ffffeb04000032c0 CR3: 00000000da3f3000 CR4: 00000000000006b0
[   28.688088] Stack:
[   28.688088]  ffff8800d903f698 ffffffff81392ca8 ffff8800d903f6e8 ffffffff81395d32
[   28.688088]  ffff8800dac96000 ffff880000000000 ffff8800d903f980 ffff880119b7e020
[   28.688088]  ffff880119b7e010 0000000000000000 0000000000000010 0000000000000010
[   28.688088] Call Trace:
[   28.688088]  [<ffffffff81392ca8>] scatterwalk_done+0x38/0x40
[   28.688088]  [<ffffffff81392ca8>] scatterwalk_done+0x38/0x40
[   28.688088]  [<ffffffff81395d32>] blkcipher_walk_done+0x182/0x220
[   28.688088]  [<ffffffff813990bf>] crypto_cbc_encrypt+0x15f/0x180
[   28.688088]  [<ffffffff81399780>] ? crypto_aes_set_key+0x30/0x30
[   28.688088]  [<ffffffff8156c40c>] ceph_aes_encrypt2+0x29c/0x2e0
[   28.688088]  [<ffffffff8156d2a3>] ceph_encrypt2+0x93/0xb0
[   28.688088]  [<ffffffff8156d7da>] ceph_x_encrypt+0x4a/0x60
[   28.688088]  [<ffffffff8155b39d>] ? ceph_buffer_new+0x5d/0xf0
[   28.688088]  [<ffffffff8156e837>] ceph_x_build_authorizer.isra.6+0x297/0x360
[   28.688088]  [<ffffffff8112089b>] ? kmem_cache_alloc_trace+0x11b/0x1c0
[   28.688088]  [<ffffffff8156b496>] ? ceph_auth_create_authorizer+0x36/0x80
[   28.688088]  [<ffffffff8156ed83>] ceph_x_create_authorizer+0x63/0xd0
[   28.688088]  [<ffffffff8156b4b4>] ceph_auth_create_authorizer+0x54/0x80
[   28.688088]  [<ffffffff8155f7c0>] get_authorizer+0x80/0xd0
[   28.688088]  [<ffffffff81555a8b>] prepare_write_connect+0x18b/0x2b0
[   28.688088]  [<ffffffff81559289>] try_read+0x1e59/0x1f10

This is because we set up crypto scatterlists as if all buffers were
kmalloc'ed.  Fix it.

Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
---
 net/ceph/crypto.c |   33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

Comments

Sage Weil Oct. 30, 2014, 3:10 p.m. UTC | #1
On Mon, 27 Oct 2014, Ilya Dryomov wrote:
> Large (greater than 32k, the value of PAGE_ALLOC_COSTLY_ORDER) auth
> tickets will have their buffers vmalloc'ed, which leads to the
> following crash in crypto:
> 
> [   28.685082] BUG: unable to handle kernel paging request at ffffeb04000032c0
> [   28.686032] IP: [<ffffffff81392b42>] scatterwalk_pagedone+0x22/0x80
> [   28.686032] PGD 0
> [   28.688088] Oops: 0000 [#1] PREEMPT SMP
> [   28.688088] Modules linked in:
> [   28.688088] CPU: 0 PID: 878 Comm: kworker/0:2 Not tainted 3.17.0-vm+ #305
> [   28.688088] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
> [   28.688088] Workqueue: ceph-msgr con_work
> [   28.688088] task: ffff88011a7f9030 ti: ffff8800d903c000 task.ti: ffff8800d903c000
> [   28.688088] RIP: 0010:[<ffffffff81392b42>]  [<ffffffff81392b42>] scatterwalk_pagedone+0x22/0x80
> [   28.688088] RSP: 0018:ffff8800d903f688  EFLAGS: 00010286
> [   28.688088] RAX: ffffeb04000032c0 RBX: ffff8800d903f718 RCX: ffffeb04000032c0
> [   28.688088] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8800d903f750
> [   28.688088] RBP: ffff8800d903f688 R08: 00000000000007de R09: ffff8800d903f880
> [   28.688088] R10: 18df467c72d6257b R11: 0000000000000000 R12: 0000000000000010
> [   28.688088] R13: ffff8800d903f750 R14: ffff8800d903f8a0 R15: 0000000000000000
> [   28.688088] FS:  00007f50a41c7700(0000) GS:ffff88011fc00000(0000) knlGS:0000000000000000
> [   28.688088] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> [   28.688088] CR2: ffffeb04000032c0 CR3: 00000000da3f3000 CR4: 00000000000006b0
> [   28.688088] Stack:
> [   28.688088]  ffff8800d903f698 ffffffff81392ca8 ffff8800d903f6e8 ffffffff81395d32
> [   28.688088]  ffff8800dac96000 ffff880000000000 ffff8800d903f980 ffff880119b7e020
> [   28.688088]  ffff880119b7e010 0000000000000000 0000000000000010 0000000000000010
> [   28.688088] Call Trace:
> [   28.688088]  [<ffffffff81392ca8>] scatterwalk_done+0x38/0x40
> [   28.688088]  [<ffffffff81392ca8>] scatterwalk_done+0x38/0x40
> [   28.688088]  [<ffffffff81395d32>] blkcipher_walk_done+0x182/0x220
> [   28.688088]  [<ffffffff813990bf>] crypto_cbc_encrypt+0x15f/0x180
> [   28.688088]  [<ffffffff81399780>] ? crypto_aes_set_key+0x30/0x30
> [   28.688088]  [<ffffffff8156c40c>] ceph_aes_encrypt2+0x29c/0x2e0
> [   28.688088]  [<ffffffff8156d2a3>] ceph_encrypt2+0x93/0xb0
> [   28.688088]  [<ffffffff8156d7da>] ceph_x_encrypt+0x4a/0x60
> [   28.688088]  [<ffffffff8155b39d>] ? ceph_buffer_new+0x5d/0xf0
> [   28.688088]  [<ffffffff8156e837>] ceph_x_build_authorizer.isra.6+0x297/0x360
> [   28.688088]  [<ffffffff8112089b>] ? kmem_cache_alloc_trace+0x11b/0x1c0
> [   28.688088]  [<ffffffff8156b496>] ? ceph_auth_create_authorizer+0x36/0x80
> [   28.688088]  [<ffffffff8156ed83>] ceph_x_create_authorizer+0x63/0xd0
> [   28.688088]  [<ffffffff8156b4b4>] ceph_auth_create_authorizer+0x54/0x80
> [   28.688088]  [<ffffffff8155f7c0>] get_authorizer+0x80/0xd0
> [   28.688088]  [<ffffffff81555a8b>] prepare_write_connect+0x18b/0x2b0
> [   28.688088]  [<ffffffff81559289>] try_read+0x1e59/0x1f10
> 
> This is because we set up crypto scatterlists as if all buffers were
> kmalloc'ed.  Fix it.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
> ---
>  net/ceph/crypto.c |   33 +++++++++++++++++++++++++--------
>  1 file changed, 25 insertions(+), 8 deletions(-)
> 
> diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
> index 62fc5e7a9acf..37a9b5eea3c3 100644
> --- a/net/ceph/crypto.c
> +++ b/net/ceph/crypto.c
> @@ -90,6 +90,27 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
>  
>  static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
>  
> +/*
> + * Should be used for buffers allocated with ceph_kvmalloc().
> + * Currently these are encrypt out-buffer (ceph_buffer) and decrypt
> + * in-buffer (msg front).  @buf has to fit in a single page.
> + */
> +static void set_kvmalloc_buf(struct scatterlist *sg, const void *buf,
> +			     size_t len)
> +{
> +	const void *sg_buf;
> +	unsigned long off = offset_in_page(buf);
> +
> +	BUG_ON(off + len > PAGE_SIZE);
> +
> +	if (is_vmalloc_addr(buf))
> +		sg_buf = page_address(vmalloc_to_page(buf)) + off;

I'm not very familiar with the vm stuff, but this confuses me.  It looks 
like it's taking the low memory (physical?) address of the first page in 
the vmalloc'ed range.  But the whole point of vmalloc is that it is 
allocating non-contiguous physical memory.  How does the sg code 
traverse the rest of the buffer if it isn't using the virtual addresses 
that vmalloc set up?

Thanks!
sage



> +	else
> +		sg_buf = buf;
> +
> +	sg_init_one(sg, sg_buf, len);
> +}
> +
>  static int ceph_aes_encrypt(const void *key, int key_len,
>  			    void *dst, size_t *dst_len,
>  			    const void *src, size_t src_len)
> @@ -114,8 +135,7 @@ static int ceph_aes_encrypt(const void *key, int key_len,
>  	sg_init_table(sg_in, 2);
>  	sg_set_buf(&sg_in[0], src, src_len);
>  	sg_set_buf(&sg_in[1], pad, zero_padding);
> -	sg_init_table(sg_out, 1);
> -	sg_set_buf(sg_out, dst, *dst_len);
> +	set_kvmalloc_buf(sg_out, dst, *dst_len);
>  	iv = crypto_blkcipher_crt(tfm)->iv;
>  	ivsize = crypto_blkcipher_ivsize(tfm);
>  
> @@ -166,8 +186,7 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
>  	sg_set_buf(&sg_in[0], src1, src1_len);
>  	sg_set_buf(&sg_in[1], src2, src2_len);
>  	sg_set_buf(&sg_in[2], pad, zero_padding);
> -	sg_init_table(sg_out, 1);
> -	sg_set_buf(sg_out, dst, *dst_len);
> +	set_kvmalloc_buf(sg_out, dst, *dst_len);
>  	iv = crypto_blkcipher_crt(tfm)->iv;
>  	ivsize = crypto_blkcipher_ivsize(tfm);
>  
> @@ -211,9 +230,8 @@ static int ceph_aes_decrypt(const void *key, int key_len,
>  		return PTR_ERR(tfm);
>  
>  	crypto_blkcipher_setkey((void *)tfm, key, key_len);
> -	sg_init_table(sg_in, 1);
> +	set_kvmalloc_buf(sg_in, src, src_len);
>  	sg_init_table(sg_out, 2);
> -	sg_set_buf(sg_in, src, src_len);
>  	sg_set_buf(&sg_out[0], dst, *dst_len);
>  	sg_set_buf(&sg_out[1], pad, sizeof(pad));
>  
> @@ -271,8 +289,7 @@ static int ceph_aes_decrypt2(const void *key, int key_len,
>  	if (IS_ERR(tfm))
>  		return PTR_ERR(tfm);
>  
> -	sg_init_table(sg_in, 1);
> -	sg_set_buf(sg_in, src, src_len);
> +	set_kvmalloc_buf(sg_in, src, src_len);
>  	sg_init_table(sg_out, 3);
>  	sg_set_buf(&sg_out[0], dst1, *dst1_len);
>  	sg_set_buf(&sg_out[1], dst2, *dst2_len);
> -- 
> 1.7.10.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilya Dryomov Oct. 30, 2014, 3:18 p.m. UTC | #2
On Thu, Oct 30, 2014 at 6:10 PM, Sage Weil <sage@newdream.net> wrote:
> On Mon, 27 Oct 2014, Ilya Dryomov wrote:
>> Large (greater than 32k, the value of PAGE_ALLOC_COSTLY_ORDER) auth
>> tickets will have their buffers vmalloc'ed, which leads to the
>> following crash in crypto:
>>
>> [   28.685082] BUG: unable to handle kernel paging request at ffffeb04000032c0
>> [   28.686032] IP: [<ffffffff81392b42>] scatterwalk_pagedone+0x22/0x80
>> [   28.686032] PGD 0
>> [   28.688088] Oops: 0000 [#1] PREEMPT SMP
>> [   28.688088] Modules linked in:
>> [   28.688088] CPU: 0 PID: 878 Comm: kworker/0:2 Not tainted 3.17.0-vm+ #305
>> [   28.688088] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
>> [   28.688088] Workqueue: ceph-msgr con_work
>> [   28.688088] task: ffff88011a7f9030 ti: ffff8800d903c000 task.ti: ffff8800d903c000
>> [   28.688088] RIP: 0010:[<ffffffff81392b42>]  [<ffffffff81392b42>] scatterwalk_pagedone+0x22/0x80
>> [   28.688088] RSP: 0018:ffff8800d903f688  EFLAGS: 00010286
>> [   28.688088] RAX: ffffeb04000032c0 RBX: ffff8800d903f718 RCX: ffffeb04000032c0
>> [   28.688088] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8800d903f750
>> [   28.688088] RBP: ffff8800d903f688 R08: 00000000000007de R09: ffff8800d903f880
>> [   28.688088] R10: 18df467c72d6257b R11: 0000000000000000 R12: 0000000000000010
>> [   28.688088] R13: ffff8800d903f750 R14: ffff8800d903f8a0 R15: 0000000000000000
>> [   28.688088] FS:  00007f50a41c7700(0000) GS:ffff88011fc00000(0000) knlGS:0000000000000000
>> [   28.688088] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
>> [   28.688088] CR2: ffffeb04000032c0 CR3: 00000000da3f3000 CR4: 00000000000006b0
>> [   28.688088] Stack:
>> [   28.688088]  ffff8800d903f698 ffffffff81392ca8 ffff8800d903f6e8 ffffffff81395d32
>> [   28.688088]  ffff8800dac96000 ffff880000000000 ffff8800d903f980 ffff880119b7e020
>> [   28.688088]  ffff880119b7e010 0000000000000000 0000000000000010 0000000000000010
>> [   28.688088] Call Trace:
>> [   28.688088]  [<ffffffff81392ca8>] scatterwalk_done+0x38/0x40
>> [   28.688088]  [<ffffffff81392ca8>] scatterwalk_done+0x38/0x40
>> [   28.688088]  [<ffffffff81395d32>] blkcipher_walk_done+0x182/0x220
>> [   28.688088]  [<ffffffff813990bf>] crypto_cbc_encrypt+0x15f/0x180
>> [   28.688088]  [<ffffffff81399780>] ? crypto_aes_set_key+0x30/0x30
>> [   28.688088]  [<ffffffff8156c40c>] ceph_aes_encrypt2+0x29c/0x2e0
>> [   28.688088]  [<ffffffff8156d2a3>] ceph_encrypt2+0x93/0xb0
>> [   28.688088]  [<ffffffff8156d7da>] ceph_x_encrypt+0x4a/0x60
>> [   28.688088]  [<ffffffff8155b39d>] ? ceph_buffer_new+0x5d/0xf0
>> [   28.688088]  [<ffffffff8156e837>] ceph_x_build_authorizer.isra.6+0x297/0x360
>> [   28.688088]  [<ffffffff8112089b>] ? kmem_cache_alloc_trace+0x11b/0x1c0
>> [   28.688088]  [<ffffffff8156b496>] ? ceph_auth_create_authorizer+0x36/0x80
>> [   28.688088]  [<ffffffff8156ed83>] ceph_x_create_authorizer+0x63/0xd0
>> [   28.688088]  [<ffffffff8156b4b4>] ceph_auth_create_authorizer+0x54/0x80
>> [   28.688088]  [<ffffffff8155f7c0>] get_authorizer+0x80/0xd0
>> [   28.688088]  [<ffffffff81555a8b>] prepare_write_connect+0x18b/0x2b0
>> [   28.688088]  [<ffffffff81559289>] try_read+0x1e59/0x1f10
>>
>> This is because we set up crypto scatterlists as if all buffers were
>> kmalloc'ed.  Fix it.
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
>> ---
>>  net/ceph/crypto.c |   33 +++++++++++++++++++++++++--------
>>  1 file changed, 25 insertions(+), 8 deletions(-)
>>
>> diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
>> index 62fc5e7a9acf..37a9b5eea3c3 100644
>> --- a/net/ceph/crypto.c
>> +++ b/net/ceph/crypto.c
>> @@ -90,6 +90,27 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
>>
>>  static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
>>
>> +/*
>> + * Should be used for buffers allocated with ceph_kvmalloc().
>> + * Currently these are encrypt out-buffer (ceph_buffer) and decrypt
>> + * in-buffer (msg front).  @buf has to fit in a single page.

                               ^^^^

>> + */
>> +static void set_kvmalloc_buf(struct scatterlist *sg, const void *buf,
>> +                          size_t len)
>> +{
>> +     const void *sg_buf;
>> +     unsigned long off = offset_in_page(buf);
>> +
>> +     BUG_ON(off + len > PAGE_SIZE);

         ^^^^

>> +
>> +     if (is_vmalloc_addr(buf))
>> +             sg_buf = page_address(vmalloc_to_page(buf)) + off;
>
> I'm not very familiar with the vm stuff, but this confuses me.  It looks
> like it's taking the low memory (physical?) address of the first page in
> the vmalloc'ed range.  But the whole point of vmalloc is that it is
> allocating non-contiguous physical memory.  How does the sg code
> traverse the rest of the buffer if it isn't using the virtual addresses
> that vmalloc set up?

It doesn't - the buffer has to fit in a single page, works for the
current users.  To make it work with multiple pages we'd have to
allocate one sg per page and init each of them in this (or similar)
fashion.

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilya Dryomov Oct. 30, 2014, 3:47 p.m. UTC | #3
On Thu, Oct 30, 2014 at 6:18 PM, Ilya Dryomov <ilya.dryomov@inktank.com> wrote:
> On Thu, Oct 30, 2014 at 6:10 PM, Sage Weil <sage@newdream.net> wrote:
>> On Mon, 27 Oct 2014, Ilya Dryomov wrote:
>>> Large (greater than 32k, the value of PAGE_ALLOC_COSTLY_ORDER) auth
>>> tickets will have their buffers vmalloc'ed, which leads to the
>>> following crash in crypto:
>>>
>>> [   28.685082] BUG: unable to handle kernel paging request at ffffeb04000032c0
>>> [   28.686032] IP: [<ffffffff81392b42>] scatterwalk_pagedone+0x22/0x80
>>> [   28.686032] PGD 0
>>> [   28.688088] Oops: 0000 [#1] PREEMPT SMP
>>> [   28.688088] Modules linked in:
>>> [   28.688088] CPU: 0 PID: 878 Comm: kworker/0:2 Not tainted 3.17.0-vm+ #305
>>> [   28.688088] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
>>> [   28.688088] Workqueue: ceph-msgr con_work
>>> [   28.688088] task: ffff88011a7f9030 ti: ffff8800d903c000 task.ti: ffff8800d903c000
>>> [   28.688088] RIP: 0010:[<ffffffff81392b42>]  [<ffffffff81392b42>] scatterwalk_pagedone+0x22/0x80
>>> [   28.688088] RSP: 0018:ffff8800d903f688  EFLAGS: 00010286
>>> [   28.688088] RAX: ffffeb04000032c0 RBX: ffff8800d903f718 RCX: ffffeb04000032c0
>>> [   28.688088] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8800d903f750
>>> [   28.688088] RBP: ffff8800d903f688 R08: 00000000000007de R09: ffff8800d903f880
>>> [   28.688088] R10: 18df467c72d6257b R11: 0000000000000000 R12: 0000000000000010
>>> [   28.688088] R13: ffff8800d903f750 R14: ffff8800d903f8a0 R15: 0000000000000000
>>> [   28.688088] FS:  00007f50a41c7700(0000) GS:ffff88011fc00000(0000) knlGS:0000000000000000
>>> [   28.688088] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
>>> [   28.688088] CR2: ffffeb04000032c0 CR3: 00000000da3f3000 CR4: 00000000000006b0
>>> [   28.688088] Stack:
>>> [   28.688088]  ffff8800d903f698 ffffffff81392ca8 ffff8800d903f6e8 ffffffff81395d32
>>> [   28.688088]  ffff8800dac96000 ffff880000000000 ffff8800d903f980 ffff880119b7e020
>>> [   28.688088]  ffff880119b7e010 0000000000000000 0000000000000010 0000000000000010
>>> [   28.688088] Call Trace:
>>> [   28.688088]  [<ffffffff81392ca8>] scatterwalk_done+0x38/0x40
>>> [   28.688088]  [<ffffffff81392ca8>] scatterwalk_done+0x38/0x40
>>> [   28.688088]  [<ffffffff81395d32>] blkcipher_walk_done+0x182/0x220
>>> [   28.688088]  [<ffffffff813990bf>] crypto_cbc_encrypt+0x15f/0x180
>>> [   28.688088]  [<ffffffff81399780>] ? crypto_aes_set_key+0x30/0x30
>>> [   28.688088]  [<ffffffff8156c40c>] ceph_aes_encrypt2+0x29c/0x2e0
>>> [   28.688088]  [<ffffffff8156d2a3>] ceph_encrypt2+0x93/0xb0
>>> [   28.688088]  [<ffffffff8156d7da>] ceph_x_encrypt+0x4a/0x60
>>> [   28.688088]  [<ffffffff8155b39d>] ? ceph_buffer_new+0x5d/0xf0
>>> [   28.688088]  [<ffffffff8156e837>] ceph_x_build_authorizer.isra.6+0x297/0x360
>>> [   28.688088]  [<ffffffff8112089b>] ? kmem_cache_alloc_trace+0x11b/0x1c0
>>> [   28.688088]  [<ffffffff8156b496>] ? ceph_auth_create_authorizer+0x36/0x80
>>> [   28.688088]  [<ffffffff8156ed83>] ceph_x_create_authorizer+0x63/0xd0
>>> [   28.688088]  [<ffffffff8156b4b4>] ceph_auth_create_authorizer+0x54/0x80
>>> [   28.688088]  [<ffffffff8155f7c0>] get_authorizer+0x80/0xd0
>>> [   28.688088]  [<ffffffff81555a8b>] prepare_write_connect+0x18b/0x2b0
>>> [   28.688088]  [<ffffffff81559289>] try_read+0x1e59/0x1f10
>>>
>>> This is because we set up crypto scatterlists as if all buffers were
>>> kmalloc'ed.  Fix it.
>>>
>>> Cc: stable@vger.kernel.org
>>> Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
>>> ---
>>>  net/ceph/crypto.c |   33 +++++++++++++++++++++++++--------
>>>  1 file changed, 25 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
>>> index 62fc5e7a9acf..37a9b5eea3c3 100644
>>> --- a/net/ceph/crypto.c
>>> +++ b/net/ceph/crypto.c
>>> @@ -90,6 +90,27 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
>>>
>>>  static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
>>>
>>> +/*
>>> + * Should be used for buffers allocated with ceph_kvmalloc().
>>> + * Currently these are encrypt out-buffer (ceph_buffer) and decrypt
>>> + * in-buffer (msg front).  @buf has to fit in a single page.
>
>                                ^^^^
>
>>> + */
>>> +static void set_kvmalloc_buf(struct scatterlist *sg, const void *buf,
>>> +                          size_t len)
>>> +{
>>> +     const void *sg_buf;
>>> +     unsigned long off = offset_in_page(buf);
>>> +
>>> +     BUG_ON(off + len > PAGE_SIZE);
>
>          ^^^^
>
>>> +
>>> +     if (is_vmalloc_addr(buf))
>>> +             sg_buf = page_address(vmalloc_to_page(buf)) + off;
>>
>> I'm not very familiar with the vm stuff, but this confuses me.  It looks
>> like it's taking the low memory (physical?) address of the first page in
>> the vmalloc'ed range.  But the whole point of vmalloc is that it is
>> allocating non-contiguous physical memory.  How does the sg code
>> traverse the rest of the buffer if it isn't using the virtual addresses
>> that vmalloc set up?
>
> It doesn't - the buffer has to fit in a single page, works for the
> current users.  To make it work with multiple pages we'd have to
> allocate one sg per page and init each of them in this (or similar)
> fashion.

Or we could use sg_alloc_table_from_pages() which it looks like will coalesce
physically adjacent pages into a single sg.  I went with a simpler solution
because all current users of ceph_{encrypt,decrypt}() are fine with a single
page constraint.

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil Oct. 30, 2014, 4:33 p.m. UTC | #4
On Thu, 30 Oct 2014, Ilya Dryomov wrote:
> On Thu, Oct 30, 2014 at 6:10 PM, Sage Weil <sage@newdream.net> wrote:
> > On Mon, 27 Oct 2014, Ilya Dryomov wrote:
> >> Large (greater than 32k, the value of PAGE_ALLOC_COSTLY_ORDER) auth
> >> tickets will have their buffers vmalloc'ed, which leads to the
> >> following crash in crypto:
> >>
> >> [   28.685082] BUG: unable to handle kernel paging request at ffffeb04000032c0
> >> [   28.686032] IP: [<ffffffff81392b42>] scatterwalk_pagedone+0x22/0x80
> >> [   28.686032] PGD 0
> >> [   28.688088] Oops: 0000 [#1] PREEMPT SMP
> >> [   28.688088] Modules linked in:
> >> [   28.688088] CPU: 0 PID: 878 Comm: kworker/0:2 Not tainted 3.17.0-vm+ #305
> >> [   28.688088] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
> >> [   28.688088] Workqueue: ceph-msgr con_work
> >> [   28.688088] task: ffff88011a7f9030 ti: ffff8800d903c000 task.ti: ffff8800d903c000
> >> [   28.688088] RIP: 0010:[<ffffffff81392b42>]  [<ffffffff81392b42>] scatterwalk_pagedone+0x22/0x80
> >> [   28.688088] RSP: 0018:ffff8800d903f688  EFLAGS: 00010286
> >> [   28.688088] RAX: ffffeb04000032c0 RBX: ffff8800d903f718 RCX: ffffeb04000032c0
> >> [   28.688088] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8800d903f750
> >> [   28.688088] RBP: ffff8800d903f688 R08: 00000000000007de R09: ffff8800d903f880
> >> [   28.688088] R10: 18df467c72d6257b R11: 0000000000000000 R12: 0000000000000010
> >> [   28.688088] R13: ffff8800d903f750 R14: ffff8800d903f8a0 R15: 0000000000000000
> >> [   28.688088] FS:  00007f50a41c7700(0000) GS:ffff88011fc00000(0000) knlGS:0000000000000000
> >> [   28.688088] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> >> [   28.688088] CR2: ffffeb04000032c0 CR3: 00000000da3f3000 CR4: 00000000000006b0
> >> [   28.688088] Stack:
> >> [   28.688088]  ffff8800d903f698 ffffffff81392ca8 ffff8800d903f6e8 ffffffff81395d32
> >> [   28.688088]  ffff8800dac96000 ffff880000000000 ffff8800d903f980 ffff880119b7e020
> >> [   28.688088]  ffff880119b7e010 0000000000000000 0000000000000010 0000000000000010
> >> [   28.688088] Call Trace:
> >> [   28.688088]  [<ffffffff81392ca8>] scatterwalk_done+0x38/0x40
> >> [   28.688088]  [<ffffffff81392ca8>] scatterwalk_done+0x38/0x40
> >> [   28.688088]  [<ffffffff81395d32>] blkcipher_walk_done+0x182/0x220
> >> [   28.688088]  [<ffffffff813990bf>] crypto_cbc_encrypt+0x15f/0x180
> >> [   28.688088]  [<ffffffff81399780>] ? crypto_aes_set_key+0x30/0x30
> >> [   28.688088]  [<ffffffff8156c40c>] ceph_aes_encrypt2+0x29c/0x2e0
> >> [   28.688088]  [<ffffffff8156d2a3>] ceph_encrypt2+0x93/0xb0
> >> [   28.688088]  [<ffffffff8156d7da>] ceph_x_encrypt+0x4a/0x60
> >> [   28.688088]  [<ffffffff8155b39d>] ? ceph_buffer_new+0x5d/0xf0
> >> [   28.688088]  [<ffffffff8156e837>] ceph_x_build_authorizer.isra.6+0x297/0x360
> >> [   28.688088]  [<ffffffff8112089b>] ? kmem_cache_alloc_trace+0x11b/0x1c0
> >> [   28.688088]  [<ffffffff8156b496>] ? ceph_auth_create_authorizer+0x36/0x80
> >> [   28.688088]  [<ffffffff8156ed83>] ceph_x_create_authorizer+0x63/0xd0
> >> [   28.688088]  [<ffffffff8156b4b4>] ceph_auth_create_authorizer+0x54/0x80
> >> [   28.688088]  [<ffffffff8155f7c0>] get_authorizer+0x80/0xd0
> >> [   28.688088]  [<ffffffff81555a8b>] prepare_write_connect+0x18b/0x2b0
> >> [   28.688088]  [<ffffffff81559289>] try_read+0x1e59/0x1f10
> >>
> >> This is because we set up crypto scatterlists as if all buffers were
> >> kmalloc'ed.  Fix it.
> >>
> >> Cc: stable@vger.kernel.org
> >> Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
> >> ---
> >>  net/ceph/crypto.c |   33 +++++++++++++++++++++++++--------
> >>  1 file changed, 25 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
> >> index 62fc5e7a9acf..37a9b5eea3c3 100644
> >> --- a/net/ceph/crypto.c
> >> +++ b/net/ceph/crypto.c
> >> @@ -90,6 +90,27 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
> >>
> >>  static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
> >>
> >> +/*
> >> + * Should be used for buffers allocated with ceph_kvmalloc().
> >> + * Currently these are encrypt out-buffer (ceph_buffer) and decrypt
> >> + * in-buffer (msg front).  @buf has to fit in a single page.
> 
>                                ^^^^
> 
> >> + */
> >> +static void set_kvmalloc_buf(struct scatterlist *sg, const void *buf,
> >> +                          size_t len)
> >> +{
> >> +     const void *sg_buf;
> >> +     unsigned long off = offset_in_page(buf);
> >> +
> >> +     BUG_ON(off + len > PAGE_SIZE);
> 
>          ^^^^
> 
> >> +
> >> +     if (is_vmalloc_addr(buf))
> >> +             sg_buf = page_address(vmalloc_to_page(buf)) + off;
> >
> > I'm not very familiar with the vm stuff, but this confuses me.  It looks
> > like it's taking the low memory (physical?) address of the first page in
> > the vmalloc'ed range.  But the whole point of vmalloc is that it is
> > allocating non-contiguous physical memory.  How does the sg code
> > traverse the rest of the buffer if it isn't using the virtual addresses
> > that vmalloc set up?
> 
> It doesn't - the buffer has to fit in a single page, works for the
> current users.  To make it work with multiple pages we'd have to
> allocate one sg per page and init each of them in this (or similar)
> fashion.

I see now (and I should read the comments next time :).  We talked about 
this in standup, but I'm a bit worried that this breaks when the small 
range we care about spans as page boundary in the vmalloc range.  Even if 
we don't ever trigger that today, we should at least call it out clearly 
so that it's less of a surprise when a new user trips over it...

sage

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 62fc5e7a9acf..37a9b5eea3c3 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -90,6 +90,27 @@  static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
 
 static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
 
+/*
+ * Should be used for buffers allocated with ceph_kvmalloc().
+ * Currently these are encrypt out-buffer (ceph_buffer) and decrypt
+ * in-buffer (msg front).  @buf has to fit in a single page.
+ */
+static void set_kvmalloc_buf(struct scatterlist *sg, const void *buf,
+			     size_t len)
+{
+	const void *sg_buf;
+	unsigned long off = offset_in_page(buf);
+
+	BUG_ON(off + len > PAGE_SIZE);
+
+	if (is_vmalloc_addr(buf))
+		sg_buf = page_address(vmalloc_to_page(buf)) + off;
+	else
+		sg_buf = buf;
+
+	sg_init_one(sg, sg_buf, len);
+}
+
 static int ceph_aes_encrypt(const void *key, int key_len,
 			    void *dst, size_t *dst_len,
 			    const void *src, size_t src_len)
@@ -114,8 +135,7 @@  static int ceph_aes_encrypt(const void *key, int key_len,
 	sg_init_table(sg_in, 2);
 	sg_set_buf(&sg_in[0], src, src_len);
 	sg_set_buf(&sg_in[1], pad, zero_padding);
-	sg_init_table(sg_out, 1);
-	sg_set_buf(sg_out, dst, *dst_len);
+	set_kvmalloc_buf(sg_out, dst, *dst_len);
 	iv = crypto_blkcipher_crt(tfm)->iv;
 	ivsize = crypto_blkcipher_ivsize(tfm);
 
@@ -166,8 +186,7 @@  static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
 	sg_set_buf(&sg_in[0], src1, src1_len);
 	sg_set_buf(&sg_in[1], src2, src2_len);
 	sg_set_buf(&sg_in[2], pad, zero_padding);
-	sg_init_table(sg_out, 1);
-	sg_set_buf(sg_out, dst, *dst_len);
+	set_kvmalloc_buf(sg_out, dst, *dst_len);
 	iv = crypto_blkcipher_crt(tfm)->iv;
 	ivsize = crypto_blkcipher_ivsize(tfm);
 
@@ -211,9 +230,8 @@  static int ceph_aes_decrypt(const void *key, int key_len,
 		return PTR_ERR(tfm);
 
 	crypto_blkcipher_setkey((void *)tfm, key, key_len);
-	sg_init_table(sg_in, 1);
+	set_kvmalloc_buf(sg_in, src, src_len);
 	sg_init_table(sg_out, 2);
-	sg_set_buf(sg_in, src, src_len);
 	sg_set_buf(&sg_out[0], dst, *dst_len);
 	sg_set_buf(&sg_out[1], pad, sizeof(pad));
 
@@ -271,8 +289,7 @@  static int ceph_aes_decrypt2(const void *key, int key_len,
 	if (IS_ERR(tfm))
 		return PTR_ERR(tfm);
 
-	sg_init_table(sg_in, 1);
-	sg_set_buf(sg_in, src, src_len);
+	set_kvmalloc_buf(sg_in, src, src_len);
 	sg_init_table(sg_out, 3);
 	sg_set_buf(&sg_out[0], dst1, *dst1_len);
 	sg_set_buf(&sg_out[1], dst2, *dst2_len);