[v7,13/14] module: Do not set nx for module memory before freeing
diff mbox series

Message ID 20181205013408.47725-14-namit@vmware.com
State New
Headers show
Series
  • x86/alternative: text_poke() enhancements
Related show

Commit Message

Nadav Amit Dec. 5, 2018, 1:34 a.m. UTC
When module memory is about to be freed, there is no apparent reason to
make it (and its data) executable, but that's exactly what is done
today. This is not efficient and not secure.

There are various theories why it was done, but none of them seem as
something that really require it today. nios2 uses kmalloc for module
memory, but anyhow it does not change the PTEs of the module memory.  In
x86, changing vmalloc'd memory mappings also modifies the direct mapping
alias, but the NX-bit is not modified in such way.

So let's remove it. Andy suggested that the changes of the PTEs can be
avoided (excluding the direct-mapping alias), which is true. However,
in x86 it requires some cleanup of the contiguous page allocator, which
is outside of the scope of this patch-set.

Cc: Rick P Edgecombe <rick.p.edgecombe@intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
---
 kernel/module.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

Comments

Peter Zijlstra Dec. 6, 2018, 9:57 a.m. UTC | #1
On Tue, Dec 04, 2018 at 05:34:07PM -0800, Nadav Amit wrote:

> So let's remove it. Andy suggested that the changes of the PTEs can be
> avoided (excluding the direct-mapping alias), which is true. However,
> in x86 it requires some cleanup of the contiguous page allocator, which
> is outside of the scope of this patch-set.

I think x86-cpa stands for change_page_attr() :-)
Andrea Parri Dec. 6, 2018, 11:13 a.m. UTC | #2
On Tue, Dec 04, 2018 at 05:34:07PM -0800, Nadav Amit wrote:
> When module memory is about to be freed, there is no apparent reason to
> make it (and its data) executable, but that's exactly what is done
> today. This is not efficient and not secure.

Looks to me like you forgot to Cc the maintainer of this file: doing it
now.  The same consideration would hold for 14/14.

  Andrea


> 
> There are various theories why it was done, but none of them seem as
> something that really require it today. nios2 uses kmalloc for module
> memory, but anyhow it does not change the PTEs of the module memory.  In
> x86, changing vmalloc'd memory mappings also modifies the direct mapping
> alias, but the NX-bit is not modified in such way.
> 
> So let's remove it. Andy suggested that the changes of the PTEs can be
> avoided (excluding the direct-mapping alias), which is true. However,
> in x86 it requires some cleanup of the contiguous page allocator, which
> is outside of the scope of this patch-set.
> 
> Cc: Rick P Edgecombe <rick.p.edgecombe@intel.com>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Andy Lutomirski <luto@kernel.org>
> Signed-off-by: Nadav Amit <namit@vmware.com>
> ---
>  kernel/module.c | 35 ++++++++++++++++++++++-------------
>  1 file changed, 22 insertions(+), 13 deletions(-)
> 
> diff --git a/kernel/module.c b/kernel/module.c
> index 7cb207249437..57c5b23746e7 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -2027,20 +2027,29 @@ void set_all_modules_text_ro(void)
>  	mutex_unlock(&module_mutex);
>  }
>  
> -static void disable_ro_nx(const struct module_layout *layout)
> +static void module_restore_mappings(const struct module_layout *layout)
>  {
> -	if (rodata_enabled) {
> -		frob_text(layout, set_memory_rw);
> -		frob_rodata(layout, set_memory_rw);
> -		frob_ro_after_init(layout, set_memory_rw);
> -	}
> -	frob_rodata(layout, set_memory_x);
> -	frob_ro_after_init(layout, set_memory_x);
> -	frob_writable_data(layout, set_memory_x);
> +	/*
> +	 * First, make the mappings of the code non-executable to prevent
> +	 * transient W+X mappings from being set when the text is set as RW.
> +	 */
> +	frob_text(layout, set_memory_nx);
> +
> +	if (!rodata_enabled)
> +		return;
> +
> +	/*
> +	 * Second, set the memory as writable. Although the module memory is
> +	 * about to be freed, these calls are required (at least on x86) to
> +	 * restore the direct map to its "correct" state.
> +	 */
> +	frob_text(layout, set_memory_rw);
> +	frob_rodata(layout, set_memory_rw);
> +	frob_ro_after_init(layout, set_memory_rw);
>  }
>  
>  #else
> -static void disable_ro_nx(const struct module_layout *layout) { }
> +static void module_restore_mappings(const struct module_layout *layout) { }
>  static void module_enable_nx(const struct module *mod) { }
>  static void module_disable_nx(const struct module *mod) { }
>  #endif
> @@ -2173,7 +2182,7 @@ static void free_module(struct module *mod)
>  	mutex_unlock(&module_mutex);
>  
>  	/* This may be empty, but that's OK */
> -	disable_ro_nx(&mod->init_layout);
> +	module_restore_mappings(&mod->init_layout);
>  	module_arch_freeing_init(mod);
>  	module_memfree(mod->init_layout.base);
>  	kfree(mod->args);
> @@ -2183,7 +2192,7 @@ static void free_module(struct module *mod)
>  	lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size);
>  
>  	/* Finally, free the core (containing the module structure) */
> -	disable_ro_nx(&mod->core_layout);
> +	module_restore_mappings(&mod->core_layout);
>  	module_memfree(mod->core_layout.base);
>  }
>  
> @@ -3507,7 +3516,7 @@ static noinline int do_init_module(struct module *mod)
>  #endif
>  	module_enable_ro(mod, true);
>  	mod_tree_remove_init(mod);
> -	disable_ro_nx(&mod->init_layout);
> +	module_restore_mappings(&mod->init_layout);
>  	module_arch_freeing_init(mod);
>  	mod->init_layout.base = NULL;
>  	mod->init_layout.size = 0;
> -- 
> 2.17.1
>
Nadav Amit Dec. 6, 2018, 5:28 p.m. UTC | #3
> On Dec 6, 2018, at 1:57 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> 
> On Tue, Dec 04, 2018 at 05:34:07PM -0800, Nadav Amit wrote:
> 
>> So let's remove it. Andy suggested that the changes of the PTEs can be
>> avoided (excluding the direct-mapping alias), which is true. However,
>> in x86 it requires some cleanup of the contiguous page allocator, which
>> is outside of the scope of this patch-set.
> 
> I think x86-cpa stands for change_page_attr() :-)

Thanks - it makes much more sense… I took the first thing that Google
showed.
Andy Lutomirski Dec. 6, 2018, 6:52 p.m. UTC | #4
On Wed, Dec 5, 2018 at 12:52 AM Nadav Amit <namit@vmware.com> wrote:
>
> When module memory is about to be freed, there is no apparent reason to
> make it (and its data) executable, but that's exactly what is done
> today. This is not efficient and not secure.
>
> There are various theories why it was done, but none of them seem as
> something that really require it today. nios2 uses kmalloc for module
> memory, but anyhow it does not change the PTEs of the module memory.  In
> x86, changing vmalloc'd memory mappings also modifies the direct mapping
> alias, but the NX-bit is not modified in such way.
>
> So let's remove it. Andy suggested that the changes of the PTEs can be
> avoided (excluding the direct-mapping alias), which is true. However,
> in x86 it requires some cleanup of the contiguous page allocator, which
> is outside of the scope of this patch-set.
>


I'm okay with this, but I'd like to see Rick's stuff get rebased on
top of it and clean it up for real.
Nadav Amit Dec. 6, 2018, 6:56 p.m. UTC | #5
> On Dec 6, 2018, at 10:52 AM, Andy Lutomirski <luto@kernel.org> wrote:
> 
> On Wed, Dec 5, 2018 at 12:52 AM Nadav Amit <namit@vmware.com> wrote:
>> When module memory is about to be freed, there is no apparent reason to
>> make it (and its data) executable, but that's exactly what is done
>> today. This is not efficient and not secure.
>> 
>> There are various theories why it was done, but none of them seem as
>> something that really require it today. nios2 uses kmalloc for module
>> memory, but anyhow it does not change the PTEs of the module memory.  In
>> x86, changing vmalloc'd memory mappings also modifies the direct mapping
>> alias, but the NX-bit is not modified in such way.
>> 
>> So let's remove it. Andy suggested that the changes of the PTEs can be
>> avoided (excluding the direct-mapping alias), which is true. However,
>> in x86 it requires some cleanup of the contiguous page allocator, which
>> is outside of the scope of this patch-set.
> 
> 
> I'm okay with this, but I'd like to see Rick's stuff get rebased on
> top of it and clean it up for real.

Sorry for my laziness. It just seems that every small thing I touch in
regard to W^X or text_poke() is broken, and I need to finish some other
“chores” first.
Edgecombe, Rick P Dec. 6, 2018, 8:21 p.m. UTC | #6
On Thu, 2018-12-06 at 10:52 -0800, Andy Lutomirski wrote:
> On Wed, Dec 5, 2018 at 12:52 AM Nadav Amit <namit@vmware.com> wrote:
> > 
> > When module memory is about to be freed, there is no apparent reason to
> > make it (and its data) executable, but that's exactly what is done
> > today. This is not efficient and not secure.
> > 
> > There are various theories why it was done, but none of them seem as
> > something that really require it today. nios2 uses kmalloc for module
> > memory, but anyhow it does not change the PTEs of the module memory.  In
> > x86, changing vmalloc'd memory mappings also modifies the direct mapping
> > alias, but the NX-bit is not modified in such way.
> > 
> > So let's remove it. Andy suggested that the changes of the PTEs can be
> > avoided (excluding the direct-mapping alias), which is true. However,
> > in x86 it requires some cleanup of the contiguous page allocator, which
> > is outside of the scope of this patch-set.
> > 
> 
> 
> I'm okay with this, but I'd like to see Rick's stuff get rebased on
> top of it and clean it up for real.

Nadav,

Hmm, since you are trying to move things forward and not close all cases in one
swoop, would it make sense to split the modules W^X mission from this patchset?

Thanks,

Rick
Nadav Amit Dec. 6, 2018, 8:29 p.m. UTC | #7
> On Dec 6, 2018, at 12:21 PM, Edgecombe, Rick P <rick.p.edgecombe@intel.com> wrote:
> 
> On Thu, 2018-12-06 at 10:52 -0800, Andy Lutomirski wrote:
>> On Wed, Dec 5, 2018 at 12:52 AM Nadav Amit <namit@vmware.com> wrote:
>>> When module memory is about to be freed, there is no apparent reason to
>>> make it (and its data) executable, but that's exactly what is done
>>> today. This is not efficient and not secure.
>>> 
>>> There are various theories why it was done, but none of them seem as
>>> something that really require it today. nios2 uses kmalloc for module
>>> memory, but anyhow it does not change the PTEs of the module memory.  In
>>> x86, changing vmalloc'd memory mappings also modifies the direct mapping
>>> alias, but the NX-bit is not modified in such way.
>>> 
>>> So let's remove it. Andy suggested that the changes of the PTEs can be
>>> avoided (excluding the direct-mapping alias), which is true. However,
>>> in x86 it requires some cleanup of the contiguous page allocator, which
>>> is outside of the scope of this patch-set.
>> 
>> 
>> I'm okay with this, but I'd like to see Rick's stuff get rebased on
>> top of it and clean it up for real.
> 
> Nadav,
> 
> Hmm, since you are trying to move things forward and not close all cases in one
> swoop, would it make sense to split the modules W^X mission from this patchset?

That’s what I tried to “hint”. Tglx asked for the module stuff in one of the
previous versions.
Jessica Yu Dec. 13, 2018, 2:10 p.m. UTC | #8
+++ Nadav Amit [04/12/18 17:34 -0800]:
>When module memory is about to be freed, there is no apparent reason to
>make it (and its data) executable, but that's exactly what is done
>today. This is not efficient and not secure.
>
>There are various theories why it was done, but none of them seem as
>something that really require it today. nios2 uses kmalloc for module
>memory, but anyhow it does not change the PTEs of the module memory.  In
>x86, changing vmalloc'd memory mappings also modifies the direct mapping
>alias, but the NX-bit is not modified in such way.
>
>So let's remove it. Andy suggested that the changes of the PTEs can be
>avoided (excluding the direct-mapping alias), which is true. However,
>in x86 it requires some cleanup of the contiguous page allocator, which
>is outside of the scope of this patch-set.
>
>Cc: Rick P Edgecombe <rick.p.edgecombe@intel.com>
>Cc: Will Deacon <will.deacon@arm.com>
>Cc: Andy Lutomirski <luto@kernel.org>
>Signed-off-by: Nadav Amit <namit@vmware.com>

[ Thanks Andrea Parri for the cc ]

Regarding the patch subject, don't you mean "Do not make module
memory executable" or "Do not unset nx" instead of "Do not set nx"?
Hm, these double negatives are confusing :-)

I think this also needs to be done in the load_module() error path.
See the bug_cleanup label. There, module_disable_{ro,nx}() are called
before module deallocation.

I am not sure why all this was made executable before freeing in the
first place.  Tried to dig through the commit history and the first
commit that introduced this behavior was 448694a1d50 ("module: undo
module RONX protection correctly"). There, the behavior was changed
from W+NX to W+X before releasing the module. But AFAIK from the
changelog, there was no real technical reason behind it, it stemmed
out of the complaint of code asymmetry :-/

Jessica

>---
> kernel/module.c | 35 ++++++++++++++++++++++-------------
> 1 file changed, 22 insertions(+), 13 deletions(-)
>
>diff --git a/kernel/module.c b/kernel/module.c
>index 7cb207249437..57c5b23746e7 100644
>--- a/kernel/module.c
>+++ b/kernel/module.c
>@@ -2027,20 +2027,29 @@ void set_all_modules_text_ro(void)
> 	mutex_unlock(&module_mutex);
> }
>
>-static void disable_ro_nx(const struct module_layout *layout)
>+static void module_restore_mappings(const struct module_layout *layout)
> {
>-	if (rodata_enabled) {
>-		frob_text(layout, set_memory_rw);
>-		frob_rodata(layout, set_memory_rw);
>-		frob_ro_after_init(layout, set_memory_rw);
>-	}
>-	frob_rodata(layout, set_memory_x);
>-	frob_ro_after_init(layout, set_memory_x);
>-	frob_writable_data(layout, set_memory_x);
>+	/*
>+	 * First, make the mappings of the code non-executable to prevent
>+	 * transient W+X mappings from being set when the text is set as RW.
>+	 */
>+	frob_text(layout, set_memory_nx);
>+
>+	if (!rodata_enabled)
>+		return;
>+
>+	/*
>+	 * Second, set the memory as writable. Although the module memory is
>+	 * about to be freed, these calls are required (at least on x86) to
>+	 * restore the direct map to its "correct" state.
>+	 */
>+	frob_text(layout, set_memory_rw);
>+	frob_rodata(layout, set_memory_rw);
>+	frob_ro_after_init(layout, set_memory_rw);
> }
>
> #else
>-static void disable_ro_nx(const struct module_layout *layout) { }
>+static void module_restore_mappings(const struct module_layout *layout) { }
> static void module_enable_nx(const struct module *mod) { }
> static void module_disable_nx(const struct module *mod) { }
> #endif
>@@ -2173,7 +2182,7 @@ static void free_module(struct module *mod)
> 	mutex_unlock(&module_mutex);
>
> 	/* This may be empty, but that's OK */
>-	disable_ro_nx(&mod->init_layout);
>+	module_restore_mappings(&mod->init_layout);
> 	module_arch_freeing_init(mod);
> 	module_memfree(mod->init_layout.base);
> 	kfree(mod->args);
>@@ -2183,7 +2192,7 @@ static void free_module(struct module *mod)
> 	lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size);
>
> 	/* Finally, free the core (containing the module structure) */
>-	disable_ro_nx(&mod->core_layout);
>+	module_restore_mappings(&mod->core_layout);
> 	module_memfree(mod->core_layout.base);
> }
>
>@@ -3507,7 +3516,7 @@ static noinline int do_init_module(struct module *mod)
> #endif
> 	module_enable_ro(mod, true);
> 	mod_tree_remove_init(mod);
>-	disable_ro_nx(&mod->init_layout);
>+	module_restore_mappings(&mod->init_layout);
> 	module_arch_freeing_init(mod);
> 	mod->init_layout.base = NULL;
> 	mod->init_layout.size = 0;
>-- 
>2.17.1
>
Nadav Amit Dec. 13, 2018, 5:25 p.m. UTC | #9
> On Dec 13, 2018, at 6:10 AM, Jessica Yu <jeyu@kernel.org> wrote:
> 
> +++ Nadav Amit [04/12/18 17:34 -0800]:
>> When module memory is about to be freed, there is no apparent reason to
>> make it (and its data) executable, but that's exactly what is done
>> today. This is not efficient and not secure.
>> 
>> There are various theories why it was done, but none of them seem as
>> something that really require it today. nios2 uses kmalloc for module
>> memory, but anyhow it does not change the PTEs of the module memory.  In
>> x86, changing vmalloc'd memory mappings also modifies the direct mapping
>> alias, but the NX-bit is not modified in such way.
>> 
>> So let's remove it. Andy suggested that the changes of the PTEs can be
>> avoided (excluding the direct-mapping alias), which is true. However,
>> in x86 it requires some cleanup of the contiguous page allocator, which
>> is outside of the scope of this patch-set.
>> 
>> Cc: Rick P Edgecombe <rick.p.edgecombe@intel.com>
>> Cc: Will Deacon <will.deacon@arm.com>
>> Cc: Andy Lutomirski <luto@kernel.org>
>> Signed-off-by: Nadav Amit <namit@vmware.com>
> 
> [ Thanks Andrea Parri for the cc ]
> 
> Regarding the patch subject, don't you mean "Do not make module
> memory executable" or "Do not unset nx" instead of "Do not set nx"?
> Hm, these double negatives are confusing :-)

I guess it is just plain wrong in this case… ;-)

> 
> I think this also needs to be done in the load_module() error path.
> See the bug_cleanup label. There, module_disable_{ro,nx}() are called
> before module deallocation.

Yes, I missed this one. I think Rick Edgecombe has a better version of this
patch that also takes care of this case (see
https://lkml.org/lkml/2018/12/11/1573 ). I think he will merge the rest of
this series (although I’m still waiting for Thomas/Ingo to tell me what’s it
going to be with the first patches).

> I am not sure why all this was made executable before freeing in the
> first place.  Tried to dig through the commit history and the first
> commit that introduced this behavior was 448694a1d50 ("module: undo
> module RONX protection correctly"). There, the behavior was changed
> from W+NX to W+X before releasing the module. But AFAIK from the
> changelog, there was no real technical reason behind it, it stemmed
> out of the complaint of code asymmetry :-/

Thanks for looking into it. I gave up after I saw it should have no
architectural reason (on x86) and could not think about such one (on any
arch., certainly for the data). Anyhow, that’s what automatic testing are
for. If this is wrong, things should crash and burn very fast.

Patch
diff mbox series

diff --git a/kernel/module.c b/kernel/module.c
index 7cb207249437..57c5b23746e7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2027,20 +2027,29 @@  void set_all_modules_text_ro(void)
 	mutex_unlock(&module_mutex);
 }
 
-static void disable_ro_nx(const struct module_layout *layout)
+static void module_restore_mappings(const struct module_layout *layout)
 {
-	if (rodata_enabled) {
-		frob_text(layout, set_memory_rw);
-		frob_rodata(layout, set_memory_rw);
-		frob_ro_after_init(layout, set_memory_rw);
-	}
-	frob_rodata(layout, set_memory_x);
-	frob_ro_after_init(layout, set_memory_x);
-	frob_writable_data(layout, set_memory_x);
+	/*
+	 * First, make the mappings of the code non-executable to prevent
+	 * transient W+X mappings from being set when the text is set as RW.
+	 */
+	frob_text(layout, set_memory_nx);
+
+	if (!rodata_enabled)
+		return;
+
+	/*
+	 * Second, set the memory as writable. Although the module memory is
+	 * about to be freed, these calls are required (at least on x86) to
+	 * restore the direct map to its "correct" state.
+	 */
+	frob_text(layout, set_memory_rw);
+	frob_rodata(layout, set_memory_rw);
+	frob_ro_after_init(layout, set_memory_rw);
 }
 
 #else
-static void disable_ro_nx(const struct module_layout *layout) { }
+static void module_restore_mappings(const struct module_layout *layout) { }
 static void module_enable_nx(const struct module *mod) { }
 static void module_disable_nx(const struct module *mod) { }
 #endif
@@ -2173,7 +2182,7 @@  static void free_module(struct module *mod)
 	mutex_unlock(&module_mutex);
 
 	/* This may be empty, but that's OK */
-	disable_ro_nx(&mod->init_layout);
+	module_restore_mappings(&mod->init_layout);
 	module_arch_freeing_init(mod);
 	module_memfree(mod->init_layout.base);
 	kfree(mod->args);
@@ -2183,7 +2192,7 @@  static void free_module(struct module *mod)
 	lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size);
 
 	/* Finally, free the core (containing the module structure) */
-	disable_ro_nx(&mod->core_layout);
+	module_restore_mappings(&mod->core_layout);
 	module_memfree(mod->core_layout.base);
 }
 
@@ -3507,7 +3516,7 @@  static noinline int do_init_module(struct module *mod)
 #endif
 	module_enable_ro(mod, true);
 	mod_tree_remove_init(mod);
-	disable_ro_nx(&mod->init_layout);
+	module_restore_mappings(&mod->init_layout);
 	module_arch_freeing_init(mod);
 	mod->init_layout.base = NULL;
 	mod->init_layout.size = 0;