diff mbox series

mm/page_owner: use vmalloc instead of kmalloc

Message ID 1540492481-4144-1-git-send-email-miles.chen@mediatek.com (mailing list archive)
State New, archived
Headers show
Series mm/page_owner: use vmalloc instead of kmalloc | expand

Commit Message

Miles Chen Oct. 25, 2018, 6:34 p.m. UTC
From: Miles Chen <miles.chen@mediatek.com>

The kbuf used by page owner is allocated by kmalloc(),
which means it can use only normal memory and there might
be a "out of memory" issue when we're out of normal memory.

Use vmalloc() so we can also allocate kbuf from highmem
on 32bit kernel.

Signed-off-by: Miles Chen <miles.chen@mediatek.com>
---
 mm/page_owner.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

Comments

Joe Perches Oct. 25, 2018, 6:44 p.m. UTC | #1
On Fri, 2018-10-26 at 02:34 +0800, miles.chen@mediatek.com wrote:
> From: Miles Chen <miles.chen@mediatek.com>
> 
> The kbuf used by page owner is allocated by kmalloc(),
> which means it can use only normal memory and there might
> be a "out of memory" issue when we're out of normal memory.
> 
> Use vmalloc() so we can also allocate kbuf from highmem
> on 32bit kernel.

If this is really necessary, using kvmalloc/kvfree would
be better as the vmalloc space is also limited.

> diff --git a/mm/page_owner.c b/mm/page_owner.c
[]
> @@ -1,7 +1,6 @@
>  // SPDX-License-Identifier: GPL-2.0
>  #include <linux/debugfs.h>
>  #include <linux/mm.h>
> -#include <linux/slab.h>
>  #include <linux/uaccess.h>
>  #include <linux/bootmem.h>
>  #include <linux/stacktrace.h>
> @@ -10,6 +9,7 @@
>  #include <linux/migrate.h>
>  #include <linux/stackdepot.h>
>  #include <linux/seq_file.h>
> +#include <linux/vmalloc.h>
>  
>  #include "internal.h"
>  
> @@ -351,7 +351,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
>  		.skip = 0
>  	};
>  
> -	kbuf = kmalloc(count, GFP_KERNEL);
> +	kbuf = vmalloc(count);
>  	if (!kbuf)
>  		return -ENOMEM;
>  
> @@ -397,11 +397,11 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
>  	if (copy_to_user(buf, kbuf, ret))
>  		ret = -EFAULT;
>  
> -	kfree(kbuf);
> +	vfree(kbuf);
>  	return ret;
>  
>  err:
> -	kfree(kbuf);
> +	vfree(kbuf);
>  	return -ENOMEM;
>  }
>
Matthew Wilcox Oct. 25, 2018, 7:27 p.m. UTC | #2
On Fri, Oct 26, 2018 at 02:34:41AM +0800, miles.chen@mediatek.com wrote:
> The kbuf used by page owner is allocated by kmalloc(),
> which means it can use only normal memory and there might
> be a "out of memory" issue when we're out of normal memory.
> 
> Use vmalloc() so we can also allocate kbuf from highmem
> on 32bit kernel.

... hang on, there's a bigger problem here.

static const struct file_operations proc_page_owner_operations = {
        .read           = read_page_owner,
};

read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
...
                return print_page_owner(buf, count, pfn, page,
                                page_owner, handle);
}

static ssize_t
print_page_owner(char __user *buf, size_t count, unsigned long pfn,
                struct page *page, struct page_owner *page_owner,
                depot_stack_handle_t handle)
{
...
      kbuf = kmalloc(count, GFP_KERNEL);

So I can force the kernel to make an arbitrary size allocation, triggering
OOMs and forcing swapping if I can get a file handle to this file.
The only saving grace is that (a) this is a debugfs file and (b) it's
root-only (mode 0400).  Nevertheless, I feel some clamping is called
for here.  Do we really need to output more than 4kB worth of text here?
Miles Chen Oct. 26, 2018, 8:01 a.m. UTC | #3
On Thu, 2018-10-25 at 11:44 -0700, Joe Perches wrote:
> On Fri, 2018-10-26 at 02:34 +0800, miles.chen@mediatek.com wrote:
> > From: Miles Chen <miles.chen@mediatek.com>
> > 
> > The kbuf used by page owner is allocated by kmalloc(),
> > which means it can use only normal memory and there might
> > be a "out of memory" issue when we're out of normal memory.
> > 
> > Use vmalloc() so we can also allocate kbuf from highmem
> > on 32bit kernel.
> 
> If this is really necessary, using kvmalloc/kvfree would
> be better as the vmalloc space is also limited.

thanks for the advise.
kvmalloc/kvfree is better here.

> 
> > diff --git a/mm/page_owner.c b/mm/page_owner.c
> []
> > @@ -1,7 +1,6 @@
> >  // SPDX-License-Identifier: GPL-2.0
> >  #include <linux/debugfs.h>
> >  #include <linux/mm.h>
> > -#include <linux/slab.h>
> >  #include <linux/uaccess.h>
> >  #include <linux/bootmem.h>
> >  #include <linux/stacktrace.h>
> > @@ -10,6 +9,7 @@
> >  #include <linux/migrate.h>
> >  #include <linux/stackdepot.h>
> >  #include <linux/seq_file.h>
> > +#include <linux/vmalloc.h>
> >  
> >  #include "internal.h"
> >  
> > @@ -351,7 +351,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
> >  		.skip = 0
> >  	};
> >  
> > -	kbuf = kmalloc(count, GFP_KERNEL);
> > +	kbuf = vmalloc(count);
> >  	if (!kbuf)
> >  		return -ENOMEM;
> >  
> > @@ -397,11 +397,11 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
> >  	if (copy_to_user(buf, kbuf, ret))
> >  		ret = -EFAULT;
> >  
> > -	kfree(kbuf);
> > +	vfree(kbuf);
> >  	return ret;
> >  
> >  err:
> > -	kfree(kbuf);
> > +	vfree(kbuf);
> >  	return -ENOMEM;
> >  }
> >  
>
Miles Chen Oct. 26, 2018, 8:50 a.m. UTC | #4
On Thu, 2018-10-25 at 12:27 -0700, Matthew Wilcox wrote:
> On Fri, Oct 26, 2018 at 02:34:41AM +0800, miles.chen@mediatek.com wrote:
> > The kbuf used by page owner is allocated by kmalloc(),
> > which means it can use only normal memory and there might
> > be a "out of memory" issue when we're out of normal memory.
> > 
> > Use vmalloc() so we can also allocate kbuf from highmem
> > on 32bit kernel.
> 
> ... hang on, there's a bigger problem here.
> 
> static const struct file_operations proc_page_owner_operations = {
>         .read           = read_page_owner,
> };
> 
> read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
> {
> ...
>                 return print_page_owner(buf, count, pfn, page,
>                                 page_owner, handle);
> }
> 
> static ssize_t
> print_page_owner(char __user *buf, size_t count, unsigned long pfn,
>                 struct page *page, struct page_owner *page_owner,
>                 depot_stack_handle_t handle)
> {mount -t debugfs none /sys/kernel/debug/
> ...
>       kbuf = kmalloc(count, GFP_KERNEL);
> 
> So I can force the kernel to make an arbitrary size allocation, triggering
> OOMs and forcing swapping if I can get a file handle to this file.
> The only saving grace is that (a) this is a debugfs file and (b) it's
> root-only (mode 0400).  Nevertheless, I feel some clamping is called
> for here.  Do we really need to output more than 4kB worth of text here?
> 
I did a test on my device, the allocation count is 4096 and around 6xx
bytes are used each print_page_owner() is called. It looks like that
clamping the reading count to PAGE_SIZE is ok.

The following output from print_page_owner() is 660 bytes long, I think
PAGE_SIZE should be enough to print the information we need.

Page allocated via order 0, mask 0x6200ca(GFP_HIGHUSER_MOVABLE)
PFN 262199 type Movable Block 512 type Movable Flags 0x4003c(referenced|
uptodate|dirty|lru|swapbacked)
 get_page_from_freelist+0x1580/0x1650
 __alloc_pages_nodemask+0xcc/0xfa4
 shmem_alloc_page+0xa4/0xc8
 shmem_alloc_and_acct_page+0x138/0x2b8
 shmem_getpage_gfp.isra.54+0x164/0xfc8
 shmem_write_begin+0x84/0xcc
 generic_perform_write+0xe8/0x210
 __generic_file_write_iter+0x1d4/0x230
 generic_file_write_iter+0x184/0x2e8
 new_sync_write+0x144/0x1c4
 vfs_write+0x194/0x278
 ksys_write+0x64/0xd4
 xwrite+0x34/0x84
 do_copy+0xf4/0x168
 flush_buffer+0x68/0xec
 __gunzip+0x370/0x448
Michal Hocko Oct. 26, 2018, 10:56 a.m. UTC | #5
On Thu 25-10-18 12:27:01, Matthew Wilcox wrote:
> On Fri, Oct 26, 2018 at 02:34:41AM +0800, miles.chen@mediatek.com wrote:
> > The kbuf used by page owner is allocated by kmalloc(),
> > which means it can use only normal memory and there might
> > be a "out of memory" issue when we're out of normal memory.
> > 
> > Use vmalloc() so we can also allocate kbuf from highmem
> > on 32bit kernel.
> 
> ... hang on, there's a bigger problem here.
> 
> static const struct file_operations proc_page_owner_operations = {
>         .read           = read_page_owner,
> };
> 
> read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
> {
> ...
>                 return print_page_owner(buf, count, pfn, page,
>                                 page_owner, handle);
> }
> 
> static ssize_t
> print_page_owner(char __user *buf, size_t count, unsigned long pfn,
>                 struct page *page, struct page_owner *page_owner,
>                 depot_stack_handle_t handle)
> {
> ...
>       kbuf = kmalloc(count, GFP_KERNEL);
> 
> So I can force the kernel to make an arbitrary size allocation, triggering
> OOMs and forcing swapping if I can get a file handle to this file.
> The only saving grace is that (a) this is a debugfs file and (b) it's
> root-only (mode 0400).  Nevertheless, I feel some clamping is called
> for here.  Do we really need to output more than 4kB worth of text here?

Completely agreed. Let's just clamp it to a single page. Userspace can
easily loop around the syscall.
diff mbox series

Patch

diff --git a/mm/page_owner.c b/mm/page_owner.c
index d80adfe702d3..7e6962adaa79 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -1,7 +1,6 @@ 
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/debugfs.h>
 #include <linux/mm.h>
-#include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/bootmem.h>
 #include <linux/stacktrace.h>
@@ -10,6 +9,7 @@ 
 #include <linux/migrate.h>
 #include <linux/stackdepot.h>
 #include <linux/seq_file.h>
+#include <linux/vmalloc.h>
 
 #include "internal.h"
 
@@ -351,7 +351,7 @@  print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 		.skip = 0
 	};
 
-	kbuf = kmalloc(count, GFP_KERNEL);
+	kbuf = vmalloc(count);
 	if (!kbuf)
 		return -ENOMEM;
 
@@ -397,11 +397,11 @@  print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 	if (copy_to_user(buf, kbuf, ret))
 		ret = -EFAULT;
 
-	kfree(kbuf);
+	vfree(kbuf);
 	return ret;
 
 err:
-	kfree(kbuf);
+	vfree(kbuf);
 	return -ENOMEM;
 }