diff mbox series

[v6,3/4] vmalloc: Add debugfs modfraginfo

Message ID 1536874298-23492-4-git-send-email-rick.p.edgecombe@intel.com (mailing list archive)
State New, archived
Headers show
Series KASLR feature to randomize each loadable module | expand

Commit Message

Edgecombe, Rick P Sept. 13, 2018, 9:31 p.m. UTC
Add debugfs file "modfraginfo" for providing info on module space fragmentation.
This can be used for determining if loadable module randomization is causing any
problems for extreme module loading situations, like huge numbers of modules or
extremely large modules.

Sample output when KASLR is enabled and X86_64 is configured:
	Largest free space:	897912 kB
	  Total free space:	1025424 kB
Allocations in backup area:	0

Sample output when just X86_64:
	Largest free space:	897912 kB
	  Total free space:	1025424 kB

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
 mm/vmalloc.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 101 insertions(+), 1 deletion(-)

Comments

Kees Cook Sept. 21, 2018, 6:56 p.m. UTC | #1
On Thu, Sep 13, 2018 at 2:31 PM, Rick Edgecombe
<rick.p.edgecombe@intel.com> wrote:
> Add debugfs file "modfraginfo" for providing info on module space fragmentation.
> This can be used for determining if loadable module randomization is causing any
> problems for extreme module loading situations, like huge numbers of modules or
> extremely large modules.
>
> Sample output when KASLR is enabled and X86_64 is configured:
>         Largest free space:     897912 kB
>           Total free space:     1025424 kB
> Allocations in backup area:     0
>
> Sample output when just X86_64:
>         Largest free space:     897912 kB
>           Total free space:     1025424 kB
>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>

I like having these statistics available!

> ---
>  mm/vmalloc.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 101 insertions(+), 1 deletion(-)
>
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 1954458..a44b902 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -18,6 +18,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/proc_fs.h>
>  #include <linux/seq_file.h>
> +#include <linux/debugfs.h>
>  #include <linux/debugobjects.h>
>  #include <linux/kallsyms.h>
>  #include <linux/list.h>
> @@ -33,6 +34,7 @@
>  #include <linux/bitops.h>
>
>  #include <linux/uaccess.h>
> +#include <asm/setup.h>
>  #include <asm/tlbflush.h>
>  #include <asm/shmparam.h>
>
> @@ -2919,7 +2921,105 @@ static int __init proc_vmalloc_init(void)
>                 proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op);
>         return 0;
>  }
> -module_init(proc_vmalloc_init);
> +#else
> +static int __init proc_vmalloc_init(void)
> +{
> +       return 0;
> +}
> +#endif
> +
> +#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_X86_64)
> +static inline unsigned long is_in_backup(unsigned long addr)
> +{
> +       return addr >= MODULES_VADDR + MODULES_RAND_LEN;
> +}
> +#else
> +static inline unsigned long is_in_backup(unsigned long addr)
> +{
> +       return 0;
> +}
>
> +inline bool kaslr_enabled(void);
>  #endif
>
> +
> +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_64)
> +static int modulefraginfo_debug_show(struct seq_file *m, void *v)
> +{
> +       unsigned long last_end = MODULES_VADDR;
> +       unsigned long total_free = 0;
> +       unsigned long largest_free = 0;
> +       unsigned long backup_cnt = 0;
> +       unsigned long gap;
> +       struct vmap_area *prev, *cur = NULL;
> +
> +       spin_lock(&vmap_area_lock);
> +
> +       if (!pvm_find_next_prev(MODULES_VADDR, &cur, &prev) || !cur)
> +               goto done;
> +
> +       for (; cur->va_end <= MODULES_END; cur = list_next_entry(cur, list)) {
> +               /* Don't count areas that are marked to be lazily freed */
> +               if (!(cur->flags & VM_LAZY_FREE)) {
> +                       backup_cnt += is_in_backup(cur->va_start);
> +                       gap = cur->va_start - last_end;
> +                       if (gap > largest_free)
> +                               largest_free = gap;
> +                       total_free += gap;
> +                       last_end = cur->va_end;
> +               }
> +
> +               if (list_is_last(&cur->list, &vmap_area_list))
> +                       break;
> +       }
> +
> +done:
> +       gap = (MODULES_END - last_end);
> +       if (gap > largest_free)
> +               largest_free = gap;
> +       total_free += gap;
> +
> +       spin_unlock(&vmap_area_lock);
> +
> +       seq_printf(m, "\tLargest free space:\t%lu kB\n", largest_free / 1024);
> +       seq_printf(m, "\t  Total free space:\t%lu kB\n", total_free / 1024);
> +
> +       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled())
> +               seq_printf(m, "Allocations in backup area:\t%lu\n", backup_cnt);

I don't think the IS_ENABLED is needed here?

I wonder if there is a better way to arrange this code that uses fewer
ifdefs, etc. Maybe a single CONFIG that capture whether or not
fine-grained module randomization is built in, like:

config RANDOMIZE_FINE_MODULE
    def_bool y if RANDOMIZE_BASE && X86_64

#ifdef CONFIG_RANDOMIZE_FINE_MODULE
...
#endif

But that doesn't capture the DEBUG_FS and PROC_FS bits ... so ...
maybe not worth it. I guess, either way:

Reviewed-by: Kees Cook <keescook@chromium.org>

-Kees
Edgecombe, Rick P Sept. 24, 2018, 6:58 p.m. UTC | #2
On Fri, 2018-09-21 at 11:56 -0700, Kees Cook wrote:
> On Thu, Sep 13, 2018 at 2:31 PM, Rick Edgecombe
> <rick.p.edgecombe@intel.com> wrote:
> > +done:
> > +       gap = (MODULES_END - last_end);
> > +       if (gap > largest_free)
> > +               largest_free = gap;
> > +       total_free += gap;
> > +
> > +       spin_unlock(&vmap_area_lock);
> > +
> > +       seq_printf(m, "\tLargest free space:\t%lu kB\n", largest_free /
> > 1024);
> > +       seq_printf(m, "\t  Total free space:\t%lu kB\n", total_free / 1024);
> > +
> > +       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled())
> > +               seq_printf(m, "Allocations in backup area:\t%lu\n",
> > backup_cnt);
> I don't think the IS_ENABLED is needed here?
The reason for this is that for ARCH=um, CONFIG_X86_64 is defined but
kaslr_enabled is not. kaslr_enabled is declared above to protect against a
compiler error.

So IS_ENABLED(CONFIG_RANDOMIZE_BASE) is protecting kaslr_enabled from causing a
linker error. It gets constant evaluated to 0 and the compiler optimizes out the
kaslr_enabled call. Thought it was better to guard with CONFIG_RANDOMIZE_BASE
than with CONFIG_UM, to try to catch the broader situation. I guess I could move
it to a helper inside ifdefs instead. Was trying to keep the ifdef-ed code down.

> I wonder if there is a better way to arrange this code that uses fewer
> ifdefs, etc. Maybe a single CONFIG that capture whether or not
> fine-grained module randomization is built in, like:
> 
> config RANDOMIZE_FINE_MODULE
>     def_bool y if RANDOMIZE_BASE && X86_64
> 
> #ifdef CONFIG_RANDOMIZE_FINE_MODULE
> ...
> #endif
> 
> But that doesn't capture the DEBUG_FS and PROC_FS bits ... so ...
> maybe not worth it. I guess, either way:
Hmmm, didn't know about that. Would clean it up some at least.

I wish the debugfs info could be in module.c to help with this IFDEFs, but it
needs vmalloc internals. MODULES_VADDR is not standardized across the ARCH's as
well, so this was my best attempt to implement this without having to make
changes in other architectures.
> Reviewed-by: Kees Cook <keescook@chromium.org>
> 
> -Kees
>
Kees Cook Sept. 24, 2018, 8:03 p.m. UTC | #3
On Mon, Sep 24, 2018 at 11:58 AM, Edgecombe, Rick P
<rick.p.edgecombe@intel.com> wrote:
> On Fri, 2018-09-21 at 11:56 -0700, Kees Cook wrote:
>> On Thu, Sep 13, 2018 at 2:31 PM, Rick Edgecombe
>> <rick.p.edgecombe@intel.com> wrote:
>> > +done:
>> > +       gap = (MODULES_END - last_end);
>> > +       if (gap > largest_free)
>> > +               largest_free = gap;
>> > +       total_free += gap;
>> > +
>> > +       spin_unlock(&vmap_area_lock);
>> > +
>> > +       seq_printf(m, "\tLargest free space:\t%lu kB\n", largest_free /
>> > 1024);
>> > +       seq_printf(m, "\t  Total free space:\t%lu kB\n", total_free / 1024);
>> > +
>> > +       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled())
>> > +               seq_printf(m, "Allocations in backup area:\t%lu\n",
>> > backup_cnt);
>> I don't think the IS_ENABLED is needed here?
> The reason for this is that for ARCH=um, CONFIG_X86_64 is defined but
> kaslr_enabled is not. kaslr_enabled is declared above to protect against a
> compiler error.
>
> So IS_ENABLED(CONFIG_RANDOMIZE_BASE) is protecting kaslr_enabled from causing a
> linker error. It gets constant evaluated to 0 and the compiler optimizes out the
> kaslr_enabled call. Thought it was better to guard with CONFIG_RANDOMIZE_BASE
> than with CONFIG_UM, to try to catch the broader situation. I guess I could move
> it to a helper inside ifdefs instead. Was trying to keep the ifdef-ed code down.

Ah yes, UM. Perhaps kaslr_enabled() could be defined somewhere so that
it would link sanely? (Maybe in module.h?)

>> I wonder if there is a better way to arrange this code that uses fewer
>> ifdefs, etc. Maybe a single CONFIG that capture whether or not
>> fine-grained module randomization is built in, like:
>>
>> config RANDOMIZE_FINE_MODULE
>>     def_bool y if RANDOMIZE_BASE && X86_64
>>
>> #ifdef CONFIG_RANDOMIZE_FINE_MODULE
>> ...
>> #endif
>>
>> But that doesn't capture the DEBUG_FS and PROC_FS bits ... so ...
>> maybe not worth it. I guess, either way:
> Hmmm, didn't know about that. Would clean it up some at least.
>
> I wish the debugfs info could be in module.c to help with this IFDEFs, but it
> needs vmalloc internals. MODULES_VADDR is not standardized across the ARCH's as
> well, so this was my best attempt to implement this without having to make
> changes in other architectures.

Yeah, I've long wanted to try to sandardize the module+vmalloc guts,
but it's just different enough in each architecture that it eludes
people.

-Kees
diff mbox series

Patch

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1954458..a44b902 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -18,6 +18,7 @@ 
 #include <linux/interrupt.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/debugfs.h>
 #include <linux/debugobjects.h>
 #include <linux/kallsyms.h>
 #include <linux/list.h>
@@ -33,6 +34,7 @@ 
 #include <linux/bitops.h>
 
 #include <linux/uaccess.h>
+#include <asm/setup.h>
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
 
@@ -2919,7 +2921,105 @@  static int __init proc_vmalloc_init(void)
 		proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op);
 	return 0;
 }
-module_init(proc_vmalloc_init);
+#else
+static int __init proc_vmalloc_init(void)
+{
+	return 0;
+}
+#endif
+
+#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_X86_64)
+static inline unsigned long is_in_backup(unsigned long addr)
+{
+	return addr >= MODULES_VADDR + MODULES_RAND_LEN;
+}
+#else
+static inline unsigned long is_in_backup(unsigned long addr)
+{
+	return 0;
+}
 
+inline bool kaslr_enabled(void);
 #endif
 
+
+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_64)
+static int modulefraginfo_debug_show(struct seq_file *m, void *v)
+{
+	unsigned long last_end = MODULES_VADDR;
+	unsigned long total_free = 0;
+	unsigned long largest_free = 0;
+	unsigned long backup_cnt = 0;
+	unsigned long gap;
+	struct vmap_area *prev, *cur = NULL;
+
+	spin_lock(&vmap_area_lock);
+
+	if (!pvm_find_next_prev(MODULES_VADDR, &cur, &prev) || !cur)
+		goto done;
+
+	for (; cur->va_end <= MODULES_END; cur = list_next_entry(cur, list)) {
+		/* Don't count areas that are marked to be lazily freed */
+		if (!(cur->flags & VM_LAZY_FREE)) {
+			backup_cnt += is_in_backup(cur->va_start);
+			gap = cur->va_start - last_end;
+			if (gap > largest_free)
+				largest_free = gap;
+			total_free += gap;
+			last_end = cur->va_end;
+		}
+
+		if (list_is_last(&cur->list, &vmap_area_list))
+			break;
+	}
+
+done:
+	gap = (MODULES_END - last_end);
+	if (gap > largest_free)
+		largest_free = gap;
+	total_free += gap;
+
+	spin_unlock(&vmap_area_lock);
+
+	seq_printf(m, "\tLargest free space:\t%lu kB\n", largest_free / 1024);
+	seq_printf(m, "\t  Total free space:\t%lu kB\n", total_free / 1024);
+
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled())
+		seq_printf(m, "Allocations in backup area:\t%lu\n", backup_cnt);
+
+	return 0;
+}
+
+static int proc_module_frag_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, modulefraginfo_debug_show, NULL);
+}
+
+static const struct file_operations debug_module_frag_operations = {
+	.open       = proc_module_frag_debug_open,
+	.read       = seq_read,
+	.llseek     = seq_lseek,
+	.release    = single_release,
+};
+
+static void __init debug_modfrag_init(void)
+{
+	debugfs_create_file("modfraginfo", 0400, NULL, NULL,
+			&debug_module_frag_operations);
+}
+#else /* defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_64) */
+static void __init debug_modfrag_init(void)
+{
+}
+#endif
+
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_PROC_FS)
+static int __init info_vmalloc_init(void)
+{
+	proc_vmalloc_init();
+	debug_modfrag_init();
+	return 0;
+}
+
+module_init(info_vmalloc_init);
+#endif