diff mbox series

[v2] proc: use vmalloc for our kernel buffer

Message ID 20200813153356.857625-1-josef@toxicpanda.com (mailing list archive)
State New, archived
Headers show
Series [v2] proc: use vmalloc for our kernel buffer | expand

Commit Message

Josef Bacik Aug. 13, 2020, 3:33 p.m. UTC
Since

  sysctl: pass kernel pointers to ->proc_handler

we have been pre-allocating a buffer to copy the data from the proc
handlers into, and then copying that to userspace.  The problem is this
just blind kmalloc()'s the buffer size passed in from the read, which in
the case of our 'cat' binary was 64kib.  Order-4 allocations are not
awesome, and since we can potentially allocate up to our maximum order,
use vmalloc for these buffers.

Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler")
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
v1->v2:
- Make vmemdup_user_nul actually do the right thing...sorry about that.

 fs/proc/proc_sysctl.c  |  6 +++---
 include/linux/string.h |  1 +
 mm/util.c              | 27 +++++++++++++++++++++++++++
 3 files changed, 31 insertions(+), 3 deletions(-)

Comments

Christoph Hellwig Aug. 13, 2020, 3:37 p.m. UTC | #1
On Thu, Aug 13, 2020 at 11:33:56AM -0400, Josef Bacik wrote:
> Since
> 
>   sysctl: pass kernel pointers to ->proc_handler
> 
> we have been pre-allocating a buffer to copy the data from the proc
> handlers into, and then copying that to userspace.  The problem is this
> just blind kmalloc()'s the buffer size passed in from the read, which in
> the case of our 'cat' binary was 64kib.  Order-4 allocations are not
> awesome, and since we can potentially allocate up to our maximum order,
> use vmalloc for these buffers.
> 
> Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler")
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> ---
> v1->v2:
> - Make vmemdup_user_nul actually do the right thing...sorry about that.
> 
>  fs/proc/proc_sysctl.c  |  6 +++---
>  include/linux/string.h |  1 +
>  mm/util.c              | 27 +++++++++++++++++++++++++++
>  3 files changed, 31 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> index 6c1166ccdaea..207ac6e6e028 100644
> --- a/fs/proc/proc_sysctl.c
> +++ b/fs/proc/proc_sysctl.c
> @@ -571,13 +571,13 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
>  		goto out;
>  
>  	if (write) {
> -		kbuf = memdup_user_nul(ubuf, count);
> +		kbuf = vmemdup_user_nul(ubuf, count);

Given that this can also do a kmalloc and thus needs to be paired
with kvfree shouldn't it be kvmemdup_user_nul?
Josef Bacik Aug. 13, 2020, 3:40 p.m. UTC | #2
On 8/13/20 11:37 AM, Christoph Hellwig wrote:
> On Thu, Aug 13, 2020 at 11:33:56AM -0400, Josef Bacik wrote:
>> Since
>>
>>    sysctl: pass kernel pointers to ->proc_handler
>>
>> we have been pre-allocating a buffer to copy the data from the proc
>> handlers into, and then copying that to userspace.  The problem is this
>> just blind kmalloc()'s the buffer size passed in from the read, which in
>> the case of our 'cat' binary was 64kib.  Order-4 allocations are not
>> awesome, and since we can potentially allocate up to our maximum order,
>> use vmalloc for these buffers.
>>
>> Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler")
>> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
>> ---
>> v1->v2:
>> - Make vmemdup_user_nul actually do the right thing...sorry about that.
>>
>>   fs/proc/proc_sysctl.c  |  6 +++---
>>   include/linux/string.h |  1 +
>>   mm/util.c              | 27 +++++++++++++++++++++++++++
>>   3 files changed, 31 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
>> index 6c1166ccdaea..207ac6e6e028 100644
>> --- a/fs/proc/proc_sysctl.c
>> +++ b/fs/proc/proc_sysctl.c
>> @@ -571,13 +571,13 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
>>   		goto out;
>>   
>>   	if (write) {
>> -		kbuf = memdup_user_nul(ubuf, count);
>> +		kbuf = vmemdup_user_nul(ubuf, count);
> 
> Given that this can also do a kmalloc and thus needs to be paired
> with kvfree shouldn't it be kvmemdup_user_nul?
> 

There's an existing vmemdup_user that does kvmalloc, so I followed the existing 
naming convention.  Do you want me to change them both?  Thanks,

Josef
Christoph Hellwig Aug. 13, 2020, 3:41 p.m. UTC | #3
On Thu, Aug 13, 2020 at 11:40:00AM -0400, Josef Bacik wrote:
> On 8/13/20 11:37 AM, Christoph Hellwig wrote:
>> On Thu, Aug 13, 2020 at 11:33:56AM -0400, Josef Bacik wrote:
>>> Since
>>>
>>>    sysctl: pass kernel pointers to ->proc_handler
>>>
>>> we have been pre-allocating a buffer to copy the data from the proc
>>> handlers into, and then copying that to userspace.  The problem is this
>>> just blind kmalloc()'s the buffer size passed in from the read, which in
>>> the case of our 'cat' binary was 64kib.  Order-4 allocations are not
>>> awesome, and since we can potentially allocate up to our maximum order,
>>> use vmalloc for these buffers.
>>>
>>> Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler")
>>> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
>>> ---
>>> v1->v2:
>>> - Make vmemdup_user_nul actually do the right thing...sorry about that.
>>>
>>>   fs/proc/proc_sysctl.c  |  6 +++---
>>>   include/linux/string.h |  1 +
>>>   mm/util.c              | 27 +++++++++++++++++++++++++++
>>>   3 files changed, 31 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
>>> index 6c1166ccdaea..207ac6e6e028 100644
>>> --- a/fs/proc/proc_sysctl.c
>>> +++ b/fs/proc/proc_sysctl.c
>>> @@ -571,13 +571,13 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
>>>   		goto out;
>>>     	if (write) {
>>> -		kbuf = memdup_user_nul(ubuf, count);
>>> +		kbuf = vmemdup_user_nul(ubuf, count);
>>
>> Given that this can also do a kmalloc and thus needs to be paired
>> with kvfree shouldn't it be kvmemdup_user_nul?
>>
>
> There's an existing vmemdup_user that does kvmalloc, so I followed the 
> existing naming convention.  Do you want me to change them both?  Thanks,

I personally would, and given that it only has a few users it might
even be feasible.
Al Viro Aug. 13, 2020, 4:20 p.m. UTC | #4
On Thu, Aug 13, 2020 at 05:41:17PM +0200, Christoph Hellwig wrote:
> On Thu, Aug 13, 2020 at 11:40:00AM -0400, Josef Bacik wrote:
> > On 8/13/20 11:37 AM, Christoph Hellwig wrote:
> >> On Thu, Aug 13, 2020 at 11:33:56AM -0400, Josef Bacik wrote:
> >>> Since
> >>>
> >>>    sysctl: pass kernel pointers to ->proc_handler
> >>>
> >>> we have been pre-allocating a buffer to copy the data from the proc
> >>> handlers into, and then copying that to userspace.  The problem is this
> >>> just blind kmalloc()'s the buffer size passed in from the read, which in
> >>> the case of our 'cat' binary was 64kib.  Order-4 allocations are not
> >>> awesome, and since we can potentially allocate up to our maximum order,
> >>> use vmalloc for these buffers.
> >>>
> >>> Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler")
> >>> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> >>> ---
> >>> v1->v2:
> >>> - Make vmemdup_user_nul actually do the right thing...sorry about that.
> >>>
> >>>   fs/proc/proc_sysctl.c  |  6 +++---
> >>>   include/linux/string.h |  1 +
> >>>   mm/util.c              | 27 +++++++++++++++++++++++++++
> >>>   3 files changed, 31 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
> >>> index 6c1166ccdaea..207ac6e6e028 100644
> >>> --- a/fs/proc/proc_sysctl.c
> >>> +++ b/fs/proc/proc_sysctl.c
> >>> @@ -571,13 +571,13 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
> >>>   		goto out;
> >>>     	if (write) {
> >>> -		kbuf = memdup_user_nul(ubuf, count);
> >>> +		kbuf = vmemdup_user_nul(ubuf, count);
> >>
> >> Given that this can also do a kmalloc and thus needs to be paired
> >> with kvfree shouldn't it be kvmemdup_user_nul?
> >>
> >
> > There's an existing vmemdup_user that does kvmalloc, so I followed the 
> > existing naming convention.  Do you want me to change them both?  Thanks,
> 
> I personally would, and given that it only has a few users it might
> even be feasible.

FWIW, how about following or combining that with "allocate count + 1 bytes on
the read side"?  Allows some nice cleanups - e.g.
                len = sprintf(tmpbuf, "0x%04x", *(unsigned int *) table->data);
                if (len > left)
                        len = left;
                memcpy(buffer, tmpbuf, len);
                if ((left -= len) > 0) {
                        *((char *)buffer + len) = '\n';
                        left--;
                }
in sunrpc proc_dodebug() turns into
		left -= snprintf(buffer, left, "0x%04x\n",
				 *(unsigned int *) table->data);
and that's not the only example.
Josef Bacik Aug. 13, 2020, 5:19 p.m. UTC | #5
On 8/13/20 12:20 PM, Al Viro wrote:
> On Thu, Aug 13, 2020 at 05:41:17PM +0200, Christoph Hellwig wrote:
>> On Thu, Aug 13, 2020 at 11:40:00AM -0400, Josef Bacik wrote:
>>> On 8/13/20 11:37 AM, Christoph Hellwig wrote:
>>>> On Thu, Aug 13, 2020 at 11:33:56AM -0400, Josef Bacik wrote:
>>>>> Since
>>>>>
>>>>>     sysctl: pass kernel pointers to ->proc_handler
>>>>>
>>>>> we have been pre-allocating a buffer to copy the data from the proc
>>>>> handlers into, and then copying that to userspace.  The problem is this
>>>>> just blind kmalloc()'s the buffer size passed in from the read, which in
>>>>> the case of our 'cat' binary was 64kib.  Order-4 allocations are not
>>>>> awesome, and since we can potentially allocate up to our maximum order,
>>>>> use vmalloc for these buffers.
>>>>>
>>>>> Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler")
>>>>> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
>>>>> ---
>>>>> v1->v2:
>>>>> - Make vmemdup_user_nul actually do the right thing...sorry about that.
>>>>>
>>>>>    fs/proc/proc_sysctl.c  |  6 +++---
>>>>>    include/linux/string.h |  1 +
>>>>>    mm/util.c              | 27 +++++++++++++++++++++++++++
>>>>>    3 files changed, 31 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
>>>>> index 6c1166ccdaea..207ac6e6e028 100644
>>>>> --- a/fs/proc/proc_sysctl.c
>>>>> +++ b/fs/proc/proc_sysctl.c
>>>>> @@ -571,13 +571,13 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
>>>>>    		goto out;
>>>>>      	if (write) {
>>>>> -		kbuf = memdup_user_nul(ubuf, count);
>>>>> +		kbuf = vmemdup_user_nul(ubuf, count);
>>>>
>>>> Given that this can also do a kmalloc and thus needs to be paired
>>>> with kvfree shouldn't it be kvmemdup_user_nul?
>>>>
>>>
>>> There's an existing vmemdup_user that does kvmalloc, so I followed the
>>> existing naming convention.  Do you want me to change them both?  Thanks,
>>
>> I personally would, and given that it only has a few users it might
>> even be feasible.
> 
> FWIW, how about following or combining that with "allocate count + 1 bytes on
> the read side"?  Allows some nice cleanups - e.g.
>                  len = sprintf(tmpbuf, "0x%04x", *(unsigned int *) table->data);
>                  if (len > left)
>                          len = left;
>                  memcpy(buffer, tmpbuf, len);
>                  if ((left -= len) > 0) {
>                          *((char *)buffer + len) = '\n';
>                          left--;
>                  }
> in sunrpc proc_dodebug() turns into
> 		left -= snprintf(buffer, left, "0x%04x\n",
> 				 *(unsigned int *) table->data);
> and that's not the only example.
> 

We wouldn't even need the extra +1 part, since we're only copying in how much 
the user wants anyway, we could just go ahead and convert this to

left -= snprintf(buffer, left, "0x%04x\n", *(unsigned int *) table->data);

and be fine, right?  Or am I misunderstanding what you're looking for?  Thanks,

Josef
Al Viro Aug. 13, 2020, 5:31 p.m. UTC | #6
On Thu, Aug 13, 2020 at 01:19:18PM -0400, Josef Bacik wrote:

> > in sunrpc proc_dodebug() turns into
> > 		left -= snprintf(buffer, left, "0x%04x\n",
					 ^^^^
					 left + 1, that is.

> > 				 *(unsigned int *) table->data);
> > and that's not the only example.
> > 
> 
> We wouldn't even need the extra +1 part, since we're only copying in how
> much the user wants anyway, we could just go ahead and convert this to
> 
> left -= snprintf(buffer, left, "0x%04x\n", *(unsigned int *) table->data);
> 
> and be fine, right?  Or am I misunderstanding what you're looking for?  Thanks,

snprintf() always produces a NUL-terminated string.  And if you are passing 7 as
len, you want 0xf0ad\n to be copied to user.  For that you need 8 passed to
snprintf, and 8-byte buffer given to it.
Josef Bacik Aug. 13, 2020, 5:36 p.m. UTC | #7
On 8/13/20 1:31 PM, Al Viro wrote:
> On Thu, Aug 13, 2020 at 01:19:18PM -0400, Josef Bacik wrote:
> 
>>> in sunrpc proc_dodebug() turns into
>>> 		left -= snprintf(buffer, left, "0x%04x\n",
> 					 ^^^^
> 					 left + 1, that is.
> 
>>> 				 *(unsigned int *) table->data);
>>> and that's not the only example.
>>>
>>
>> We wouldn't even need the extra +1 part, since we're only copying in how
>> much the user wants anyway, we could just go ahead and convert this to
>>
>> left -= snprintf(buffer, left, "0x%04x\n", *(unsigned int *) table->data);
>>
>> and be fine, right?  Or am I misunderstanding what you're looking for?  Thanks,
> 
> snprintf() always produces a NUL-terminated string.  And if you are passing 7 as
> len, you want 0xf0ad\n to be copied to user.  For that you need 8 passed to
> snprintf, and 8-byte buffer given to it.
> 

Right, gotcha.  I'll rig that up and see how it looks.  I'd recommend looking 
through what I do with a fine tooth comb, I'm obviously not batting 1000 today. 
Thanks,

Josef
David Laight Aug. 13, 2020, 9:10 p.m. UTC | #8
From: Josef Bacik
> Sent: 13 August 2020 18:19
...
> We wouldn't even need the extra +1 part, since we're only copying in how much
> the user wants anyway, we could just go ahead and convert this to
> 
> left -= snprintf(buffer, left, "0x%04x\n", *(unsigned int *) table->data);
> 
> and be fine, right?  Or am I misunderstanding what you're looking for?  Thanks,

Doesn't that need to be scnprintf()?
IIRC snprintf() returns the number of bytes that would have been
written were the buffer infinite size?
(I suspect this is an 'accidental' return value from the original
SYSV? userspace implementation that just dumped characters that
wouldn't fit in the buffer somewhere.)

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Josef Bacik Aug. 13, 2020, 9:31 p.m. UTC | #9
On 8/13/20 5:10 PM, David Laight wrote:
> From: Josef Bacik
>> Sent: 13 August 2020 18:19
> ...
>> We wouldn't even need the extra +1 part, since we're only copying in how much
>> the user wants anyway, we could just go ahead and convert this to
>>
>> left -= snprintf(buffer, left, "0x%04x\n", *(unsigned int *) table->data);
>>
>> and be fine, right?  Or am I misunderstanding what you're looking for?  Thanks,
> 
> Doesn't that need to be scnprintf()?
> IIRC snprintf() returns the number of bytes that would have been
> written were the buffer infinite size?
> (I suspect this is an 'accidental' return value from the original
> SYSV? userspace implementation that just dumped characters that
> wouldn't fit in the buffer somewhere.)
> 

Yeah, if you look at the patches I just sent you'll notice I used scnprintf() 
everywhere.  Thanks,

Josef
diff mbox series

Patch

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 6c1166ccdaea..207ac6e6e028 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -571,13 +571,13 @@  static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
 		goto out;
 
 	if (write) {
-		kbuf = memdup_user_nul(ubuf, count);
+		kbuf = vmemdup_user_nul(ubuf, count);
 		if (IS_ERR(kbuf)) {
 			error = PTR_ERR(kbuf);
 			goto out;
 		}
 	} else {
-		kbuf = kzalloc(count, GFP_KERNEL);
+		kbuf = kvzalloc(count, GFP_KERNEL);
 		if (!kbuf)
 			goto out;
 	}
@@ -600,7 +600,7 @@  static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
 
 	error = count;
 out_free_buf:
-	kfree(kbuf);
+	kvfree(kbuf);
 out:
 	sysctl_head_finish(head);
 
diff --git a/include/linux/string.h b/include/linux/string.h
index 9b7a0632e87a..aee3689fb865 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -12,6 +12,7 @@ 
 extern char *strndup_user(const char __user *, long);
 extern void *memdup_user(const void __user *, size_t);
 extern void *vmemdup_user(const void __user *, size_t);
+extern void *vmemdup_user_nul(const void __user *, size_t);
 extern void *memdup_user_nul(const void __user *, size_t);
 
 /*
diff --git a/mm/util.c b/mm/util.c
index 5ef378a2a038..9d0ad7aafc27 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -208,6 +208,33 @@  void *vmemdup_user(const void __user *src, size_t len)
 }
 EXPORT_SYMBOL(vmemdup_user);
 
+/**
+ * vmemdup_user_nul - duplicate memory region from user space and NUL-terminate
+ *
+ * @src: source address in user space
+ * @len: number of bytes to copy
+ *
+ * Return: an ERR_PTR() on failure.  Result may be not
+ * physically contiguous.  Use kvfree() to free.
+ */
+void *vmemdup_user_nul(const void __user *src, size_t len)
+{
+	char *p;
+
+	p = kvmalloc(len + 1, GFP_USER);
+	if (!p)
+		return ERR_PTR(-ENOMEM);
+
+	if (copy_from_user(p, src, len)) {
+		kvfree(p);
+		return ERR_PTR(-EFAULT);
+	}
+	p[len] = '\0';
+
+	return p;
+}
+EXPORT_SYMBOL(vmemdup_user_nul);
+
 /**
  * strndup_user - duplicate an existing string from user space
  * @s: The string to duplicate