diff mbox

x86: fix kaslr and memmap collision

Message ID 147977413859.13657.2181994710415174471.stgit@djiang5-desk3.ch.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Jiang Nov. 22, 2016, 12:22 a.m. UTC
CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address.
However it does not take into account the memmap= parameter passed in from
the kernel commandline. This results in the kernel sometimes being put in
the middle of the user memmap. Check has been added in the kaslr in order
to avoid the region marked by memmap.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 arch/x86/boot/boot.h             |    2 ++
 arch/x86/boot/compressed/kaslr.c |   45 ++++++++++++++++++++++++++++++++++++++
 arch/x86/boot/string.c           |   25 +++++++++++++++++++++
 3 files changed, 72 insertions(+)

Comments

Ingo Molnar Nov. 22, 2016, 8:47 a.m. UTC | #1
* Dave Jiang <dave.jiang@intel.com> wrote:

> CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address.
> However it does not take into account the memmap= parameter passed in from
> the kernel commandline.

memmap= parameters are often used as a list.

> [...] This results in the kernel sometimes being put in the middle of the user 
> memmap. [...]

What does this mean? If memmap= is used to re-define the memory map then the 
kernel getting in the middle of a RAM area is what we want, isn't it? What we 
don't want is for the kernel to get into reserved areas, right?

> [...] Check has been added in the kaslr in order to avoid the region marked by 
> memmap.

What does this mean?

> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  arch/x86/boot/boot.h             |    2 ++
>  arch/x86/boot/compressed/kaslr.c |   45 ++++++++++++++++++++++++++++++++++++++
>  arch/x86/boot/string.c           |   25 +++++++++++++++++++++
>  3 files changed, 72 insertions(+)
> 
> diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
> index e5612f3..0d5fe5b 100644
> --- a/arch/x86/boot/boot.h
> +++ b/arch/x86/boot/boot.h
> @@ -332,6 +332,8 @@ int strncmp(const char *cs, const char *ct, size_t count);
>  size_t strnlen(const char *s, size_t maxlen);
>  unsigned int atou(const char *s);
>  unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
> +unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base);
> +long simple_strtol(const char *cp, char **endp, unsigned int base);
>  size_t strlen(const char *s);
>  
>  /* tty.c */
> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> index a66854d..6fb8f1ec 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -11,6 +11,7 @@
>   */
>  #include "misc.h"
>  #include "error.h"
> +#include "../boot.h"
>  
>  #include <generated/compile.h>
>  #include <linux/module.h>
> @@ -61,6 +62,7 @@ enum mem_avoid_index {
>  	MEM_AVOID_INITRD,
>  	MEM_AVOID_CMDLINE,
>  	MEM_AVOID_BOOTPARAMS,
> +	MEM_AVOID_MEMMAP,
>  	MEM_AVOID_MAX,
>  };
>  
> @@ -77,6 +79,37 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
>  	return true;
>  }
>  
> +#include "../../../../lib/cmdline.c"
> +
> +static int
> +parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
> +{
> +	char *oldp;
> +
> +	if (!p)
> +		return -EINVAL;
> +
> +	/* we don't care about this option here */
> +	if (!strncmp(p, "exactmap", 8))
> +		return -EINVAL;
> +
> +	oldp = p;
> +	*size = memparse(p, &p);
> +	if (p == oldp)
> +		return -EINVAL;
> +
> +	switch (*p) {
> +	case '@':
> +	case '#':
> +	case '$':
> +	case '!':
> +		*start = memparse(p+1, &p);
> +		return 0;
> +	}
> +
> +	return -EINVAL;
> +}
> +
>  /*
>   * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
>   * The mem_avoid array is used to store the ranges that need to be avoided
> @@ -158,6 +191,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
>  	u64 initrd_start, initrd_size;
>  	u64 cmd_line, cmd_line_size;
>  	char *ptr;
> +	char arg[38];

Where does the magic '38' come from?

> +	unsigned long long memmap_start, memmap_size;
>  
>  	/*
>  	 * Avoid the region that is unsafe to overlap during
> @@ -195,6 +230,16 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
>  	add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
>  			 mem_avoid[MEM_AVOID_BOOTPARAMS].size);
>  
> +	/* see if we have any memmap areas */
> +	if (cmdline_find_option("memmap", arg, sizeof(arg)) > 0) {
> +		int rc = parse_memmap(arg, &memmap_start, &memmap_size);
> +
> +		if (!rc) {
> +			mem_avoid[MEM_AVOID_MEMMAP].start = memmap_start;
> +			mem_avoid[MEM_AVOID_MEMMAP].size = memmap_size;
> +		}
> +	}
> +

This only handles a single (first) memmap argument, is that sufficient?

Thanks,

	Ingo
Dan Williams Nov. 22, 2016, 5:26 p.m. UTC | #2
[ replying for Dave since he's offline today and tomorrow ]

On Tue, Nov 22, 2016 at 12:47 AM, Ingo Molnar <mingo@kernel.org> wrote:
>
> * Dave Jiang <dave.jiang@intel.com> wrote:
>
>> CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address.
>> However it does not take into account the memmap= parameter passed in from
>> the kernel commandline.
>
> memmap= parameters are often used as a list.
>
>> [...] This results in the kernel sometimes being put in the middle of the user
>> memmap. [...]
>
> What does this mean? If memmap= is used to re-define the memory map then the
> kernel getting in the middle of a RAM area is what we want, isn't it? What we
> don't want is for the kernel to get into reserved areas, right?

Right, this is about teaching kaslr to not land the kernel in newly
defined reserved regions that were not marked reserved in the initial
e820 map from platform firmware.

>> [...] Check has been added in the kaslr in order to avoid the region marked by
>> memmap.
>
> What does this mean?

Is this clearer? "Update the set of 'mem_avoid' entries to exclude
'memmap=' defined reserved regions from the set of valid address range
to land the kernel image."

>
>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>> ---
>>  arch/x86/boot/boot.h             |    2 ++
>>  arch/x86/boot/compressed/kaslr.c |   45 ++++++++++++++++++++++++++++++++++++++
>>  arch/x86/boot/string.c           |   25 +++++++++++++++++++++
>>  3 files changed, 72 insertions(+)
>>
>> diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
>> index e5612f3..0d5fe5b 100644
>> --- a/arch/x86/boot/boot.h
>> +++ b/arch/x86/boot/boot.h
>> @@ -332,6 +332,8 @@ int strncmp(const char *cs, const char *ct, size_t count);
>>  size_t strnlen(const char *s, size_t maxlen);
>>  unsigned int atou(const char *s);
>>  unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
>> +unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base);
>> +long simple_strtol(const char *cp, char **endp, unsigned int base);
>>  size_t strlen(const char *s);
>>
>>  /* tty.c */
>> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
>> index a66854d..6fb8f1ec 100644
>> --- a/arch/x86/boot/compressed/kaslr.c
>> +++ b/arch/x86/boot/compressed/kaslr.c
>> @@ -11,6 +11,7 @@
>>   */
>>  #include "misc.h"
>>  #include "error.h"
>> +#include "../boot.h"
>>
>>  #include <generated/compile.h>
>>  #include <linux/module.h>
>> @@ -61,6 +62,7 @@ enum mem_avoid_index {
>>       MEM_AVOID_INITRD,
>>       MEM_AVOID_CMDLINE,
>>       MEM_AVOID_BOOTPARAMS,
>> +     MEM_AVOID_MEMMAP,
>>       MEM_AVOID_MAX,
>>  };
>>
>> @@ -77,6 +79,37 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
>>       return true;
>>  }
>>
>> +#include "../../../../lib/cmdline.c"
>> +
>> +static int
>> +parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
>> +{
>> +     char *oldp;
>> +
>> +     if (!p)
>> +             return -EINVAL;
>> +
>> +     /* we don't care about this option here */
>> +     if (!strncmp(p, "exactmap", 8))
>> +             return -EINVAL;
>> +
>> +     oldp = p;
>> +     *size = memparse(p, &p);
>> +     if (p == oldp)
>> +             return -EINVAL;
>> +
>> +     switch (*p) {
>> +     case '@':
>> +     case '#':
>> +     case '$':
>> +     case '!':
>> +             *start = memparse(p+1, &p);
>> +             return 0;
>> +     }
>> +
>> +     return -EINVAL;
>> +}
>> +
>>  /*
>>   * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
>>   * The mem_avoid array is used to store the ranges that need to be avoided
>> @@ -158,6 +191,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
>>       u64 initrd_start, initrd_size;
>>       u64 cmd_line, cmd_line_size;
>>       char *ptr;
>> +     char arg[38];
>
> Where does the magic '38' come from?
>
>> +     unsigned long long memmap_start, memmap_size;
>>
>>       /*
>>        * Avoid the region that is unsafe to overlap during
>> @@ -195,6 +230,16 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
>>       add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
>>                        mem_avoid[MEM_AVOID_BOOTPARAMS].size);
>>
>> +     /* see if we have any memmap areas */
>> +     if (cmdline_find_option("memmap", arg, sizeof(arg)) > 0) {
>> +             int rc = parse_memmap(arg, &memmap_start, &memmap_size);
>> +
>> +             if (!rc) {
>> +                     mem_avoid[MEM_AVOID_MEMMAP].start = memmap_start;
>> +                     mem_avoid[MEM_AVOID_MEMMAP].size = memmap_size;
>> +             }
>> +     }
>> +
>
> This only handles a single (first) memmap argument, is that sufficient?

No, you're right, we need to handle multiple ranges.  Since the
mem_avoid array is statically allocated perhaps we can handle up to 4
memmap= entries, but past that point disable kaslr for that boot?
Kees Cook Nov. 22, 2016, 6:54 p.m. UTC | #3
On Tue, Nov 22, 2016 at 9:26 AM, Dan Williams <dan.j.williams@intel.com> wrote:
> [ replying for Dave since he's offline today and tomorrow ]
>
> On Tue, Nov 22, 2016 at 12:47 AM, Ingo Molnar <mingo@kernel.org> wrote:
>>
>> * Dave Jiang <dave.jiang@intel.com> wrote:
>>
>>> CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address.
>>> However it does not take into account the memmap= parameter passed in from
>>> the kernel commandline.
>>
>> memmap= parameters are often used as a list.
>>
>>> [...] This results in the kernel sometimes being put in the middle of the user
>>> memmap. [...]
>>
>> What does this mean? If memmap= is used to re-define the memory map then the
>> kernel getting in the middle of a RAM area is what we want, isn't it? What we
>> don't want is for the kernel to get into reserved areas, right?
>
> Right, this is about teaching kaslr to not land the kernel in newly
> defined reserved regions that were not marked reserved in the initial
> e820 map from platform firmware.
>
>>> [...] Check has been added in the kaslr in order to avoid the region marked by
>>> memmap.
>>
>> What does this mean?
>
> Is this clearer? "Update the set of 'mem_avoid' entries to exclude
> 'memmap=' defined reserved regions from the set of valid address range
> to land the kernel image."
>
>>
>>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>>> ---
>>>  arch/x86/boot/boot.h             |    2 ++
>>>  arch/x86/boot/compressed/kaslr.c |   45 ++++++++++++++++++++++++++++++++++++++
>>>  arch/x86/boot/string.c           |   25 +++++++++++++++++++++
>>>  3 files changed, 72 insertions(+)
>>>
>>> diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
>>> index e5612f3..0d5fe5b 100644
>>> --- a/arch/x86/boot/boot.h
>>> +++ b/arch/x86/boot/boot.h
>>> @@ -332,6 +332,8 @@ int strncmp(const char *cs, const char *ct, size_t count);
>>>  size_t strnlen(const char *s, size_t maxlen);
>>>  unsigned int atou(const char *s);
>>>  unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
>>> +unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base);
>>> +long simple_strtol(const char *cp, char **endp, unsigned int base);
>>>  size_t strlen(const char *s);
>>>
>>>  /* tty.c */
>>> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
>>> index a66854d..6fb8f1ec 100644
>>> --- a/arch/x86/boot/compressed/kaslr.c
>>> +++ b/arch/x86/boot/compressed/kaslr.c
>>> @@ -11,6 +11,7 @@
>>>   */
>>>  #include "misc.h"
>>>  #include "error.h"
>>> +#include "../boot.h"
>>>
>>>  #include <generated/compile.h>
>>>  #include <linux/module.h>
>>> @@ -61,6 +62,7 @@ enum mem_avoid_index {
>>>       MEM_AVOID_INITRD,
>>>       MEM_AVOID_CMDLINE,
>>>       MEM_AVOID_BOOTPARAMS,
>>> +     MEM_AVOID_MEMMAP,
>>>       MEM_AVOID_MAX,
>>>  };
>>>
>>> @@ -77,6 +79,37 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
>>>       return true;
>>>  }
>>>
>>> +#include "../../../../lib/cmdline.c"
>>> +
>>> +static int
>>> +parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
>>> +{
>>> +     char *oldp;
>>> +
>>> +     if (!p)
>>> +             return -EINVAL;
>>> +
>>> +     /* we don't care about this option here */
>>> +     if (!strncmp(p, "exactmap", 8))
>>> +             return -EINVAL;
>>> +
>>> +     oldp = p;
>>> +     *size = memparse(p, &p);
>>> +     if (p == oldp)
>>> +             return -EINVAL;
>>> +
>>> +     switch (*p) {
>>> +     case '@':
>>> +     case '#':
>>> +     case '$':
>>> +     case '!':
>>> +             *start = memparse(p+1, &p);
>>> +             return 0;
>>> +     }
>>> +
>>> +     return -EINVAL;
>>> +}
>>> +
>>>  /*
>>>   * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
>>>   * The mem_avoid array is used to store the ranges that need to be avoided
>>> @@ -158,6 +191,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
>>>       u64 initrd_start, initrd_size;
>>>       u64 cmd_line, cmd_line_size;
>>>       char *ptr;
>>> +     char arg[38];
>>
>> Where does the magic '38' come from?
>>
>>> +     unsigned long long memmap_start, memmap_size;
>>>
>>>       /*
>>>        * Avoid the region that is unsafe to overlap during
>>> @@ -195,6 +230,16 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
>>>       add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
>>>                        mem_avoid[MEM_AVOID_BOOTPARAMS].size);
>>>
>>> +     /* see if we have any memmap areas */
>>> +     if (cmdline_find_option("memmap", arg, sizeof(arg)) > 0) {
>>> +             int rc = parse_memmap(arg, &memmap_start, &memmap_size);
>>> +
>>> +             if (!rc) {
>>> +                     mem_avoid[MEM_AVOID_MEMMAP].start = memmap_start;
>>> +                     mem_avoid[MEM_AVOID_MEMMAP].size = memmap_size;
>>> +             }
>>> +     }
>>> +
>>
>> This only handles a single (first) memmap argument, is that sufficient?
>
> No, you're right, we need to handle multiple ranges.  Since the
> mem_avoid array is statically allocated perhaps we can handle up to 4
> memmap= entries, but past that point disable kaslr for that boot?

Yeah, that seems fine to me. I assume it's rare to have 4?

-Kees
Dan Williams Nov. 22, 2016, 7:01 p.m. UTC | #4
On Tue, Nov 22, 2016 at 10:54 AM, Kees Cook <keescook@chromium.org> wrote:
> On Tue, Nov 22, 2016 at 9:26 AM, Dan Williams <dan.j.williams@intel.com> wrote:
>> [ replying for Dave since he's offline today and tomorrow ]
>>
>> On Tue, Nov 22, 2016 at 12:47 AM, Ingo Molnar <mingo@kernel.org> wrote:
>>>
>>> * Dave Jiang <dave.jiang@intel.com> wrote:
>>>
>>>> CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address.
>>>> However it does not take into account the memmap= parameter passed in from
>>>> the kernel commandline.
>>>
>>> memmap= parameters are often used as a list.
>>>
>>>> [...] This results in the kernel sometimes being put in the middle of the user
>>>> memmap. [...]
>>>
>>> What does this mean? If memmap= is used to re-define the memory map then the
>>> kernel getting in the middle of a RAM area is what we want, isn't it? What we
>>> don't want is for the kernel to get into reserved areas, right?
>>
>> Right, this is about teaching kaslr to not land the kernel in newly
>> defined reserved regions that were not marked reserved in the initial
>> e820 map from platform firmware.
>>
>>>> [...] Check has been added in the kaslr in order to avoid the region marked by
>>>> memmap.
>>>
>>> What does this mean?
>>
>> Is this clearer? "Update the set of 'mem_avoid' entries to exclude
>> 'memmap=' defined reserved regions from the set of valid address range
>> to land the kernel image."
>>
>>>
>>>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>>>> ---
>>>>  arch/x86/boot/boot.h             |    2 ++
>>>>  arch/x86/boot/compressed/kaslr.c |   45 ++++++++++++++++++++++++++++++++++++++
>>>>  arch/x86/boot/string.c           |   25 +++++++++++++++++++++
>>>>  3 files changed, 72 insertions(+)
>>>>
>>>> diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
>>>> index e5612f3..0d5fe5b 100644
>>>> --- a/arch/x86/boot/boot.h
>>>> +++ b/arch/x86/boot/boot.h
>>>> @@ -332,6 +332,8 @@ int strncmp(const char *cs, const char *ct, size_t count);
>>>>  size_t strnlen(const char *s, size_t maxlen);
>>>>  unsigned int atou(const char *s);
>>>>  unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
>>>> +unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base);
>>>> +long simple_strtol(const char *cp, char **endp, unsigned int base);
>>>>  size_t strlen(const char *s);
>>>>
>>>>  /* tty.c */
>>>> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
>>>> index a66854d..6fb8f1ec 100644
>>>> --- a/arch/x86/boot/compressed/kaslr.c
>>>> +++ b/arch/x86/boot/compressed/kaslr.c
>>>> @@ -11,6 +11,7 @@
>>>>   */
>>>>  #include "misc.h"
>>>>  #include "error.h"
>>>> +#include "../boot.h"
>>>>
>>>>  #include <generated/compile.h>
>>>>  #include <linux/module.h>
>>>> @@ -61,6 +62,7 @@ enum mem_avoid_index {
>>>>       MEM_AVOID_INITRD,
>>>>       MEM_AVOID_CMDLINE,
>>>>       MEM_AVOID_BOOTPARAMS,
>>>> +     MEM_AVOID_MEMMAP,
>>>>       MEM_AVOID_MAX,
>>>>  };
>>>>
>>>> @@ -77,6 +79,37 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
>>>>       return true;
>>>>  }
>>>>
>>>> +#include "../../../../lib/cmdline.c"
>>>> +
>>>> +static int
>>>> +parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
>>>> +{
>>>> +     char *oldp;
>>>> +
>>>> +     if (!p)
>>>> +             return -EINVAL;
>>>> +
>>>> +     /* we don't care about this option here */
>>>> +     if (!strncmp(p, "exactmap", 8))
>>>> +             return -EINVAL;
>>>> +
>>>> +     oldp = p;
>>>> +     *size = memparse(p, &p);
>>>> +     if (p == oldp)
>>>> +             return -EINVAL;
>>>> +
>>>> +     switch (*p) {
>>>> +     case '@':
>>>> +     case '#':
>>>> +     case '$':
>>>> +     case '!':
>>>> +             *start = memparse(p+1, &p);
>>>> +             return 0;
>>>> +     }
>>>> +
>>>> +     return -EINVAL;
>>>> +}
>>>> +
>>>>  /*
>>>>   * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
>>>>   * The mem_avoid array is used to store the ranges that need to be avoided
>>>> @@ -158,6 +191,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
>>>>       u64 initrd_start, initrd_size;
>>>>       u64 cmd_line, cmd_line_size;
>>>>       char *ptr;
>>>> +     char arg[38];
>>>
>>> Where does the magic '38' come from?
>>>
>>>> +     unsigned long long memmap_start, memmap_size;
>>>>
>>>>       /*
>>>>        * Avoid the region that is unsafe to overlap during
>>>> @@ -195,6 +230,16 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
>>>>       add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
>>>>                        mem_avoid[MEM_AVOID_BOOTPARAMS].size);
>>>>
>>>> +     /* see if we have any memmap areas */
>>>> +     if (cmdline_find_option("memmap", arg, sizeof(arg)) > 0) {
>>>> +             int rc = parse_memmap(arg, &memmap_start, &memmap_size);
>>>> +
>>>> +             if (!rc) {
>>>> +                     mem_avoid[MEM_AVOID_MEMMAP].start = memmap_start;
>>>> +                     mem_avoid[MEM_AVOID_MEMMAP].size = memmap_size;
>>>> +             }
>>>> +     }
>>>> +
>>>
>>> This only handles a single (first) memmap argument, is that sufficient?
>>
>> No, you're right, we need to handle multiple ranges.  Since the
>> mem_avoid array is statically allocated perhaps we can handle up to 4
>> memmap= entries, but past that point disable kaslr for that boot?
>
> Yeah, that seems fine to me. I assume it's rare to have 4?
>

It should be rare to have *one* since ACPI 6.0 added support for
communicating persistent memory ranges.  However there are legacy
nvdimm users that I know are doing at least 2, but I have hard time
imagining they would ever do more than 4.
Kees Cook Nov. 22, 2016, 10:37 p.m. UTC | #5
On Tue, Nov 22, 2016 at 11:01 AM, Dan Williams <dan.j.williams@intel.com> wrote:
> On Tue, Nov 22, 2016 at 10:54 AM, Kees Cook <keescook@chromium.org> wrote:
>> On Tue, Nov 22, 2016 at 9:26 AM, Dan Williams <dan.j.williams@intel.com> wrote:
>>> [ replying for Dave since he's offline today and tomorrow ]
>>>
>>> On Tue, Nov 22, 2016 at 12:47 AM, Ingo Molnar <mingo@kernel.org> wrote:
>>>>
>>>> * Dave Jiang <dave.jiang@intel.com> wrote:
>>>>
>>>>> CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address.
>>>>> However it does not take into account the memmap= parameter passed in from
>>>>> the kernel commandline.
>>>>
>>>> memmap= parameters are often used as a list.
>>>>
>>>>> [...] This results in the kernel sometimes being put in the middle of the user
>>>>> memmap. [...]
>>>>
>>>> What does this mean? If memmap= is used to re-define the memory map then the
>>>> kernel getting in the middle of a RAM area is what we want, isn't it? What we
>>>> don't want is for the kernel to get into reserved areas, right?
>>>
>>> Right, this is about teaching kaslr to not land the kernel in newly
>>> defined reserved regions that were not marked reserved in the initial
>>> e820 map from platform firmware.
>>>
>>>>> [...] Check has been added in the kaslr in order to avoid the region marked by
>>>>> memmap.
>>>>
>>>> What does this mean?
>>>
>>> Is this clearer? "Update the set of 'mem_avoid' entries to exclude
>>> 'memmap=' defined reserved regions from the set of valid address range
>>> to land the kernel image."
>>>
>>>>
>>>>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>>>>> ---
>>>>>  arch/x86/boot/boot.h             |    2 ++
>>>>>  arch/x86/boot/compressed/kaslr.c |   45 ++++++++++++++++++++++++++++++++++++++
>>>>>  arch/x86/boot/string.c           |   25 +++++++++++++++++++++
>>>>>  3 files changed, 72 insertions(+)
>>>>>
>>>>> diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
>>>>> index e5612f3..0d5fe5b 100644
>>>>> --- a/arch/x86/boot/boot.h
>>>>> +++ b/arch/x86/boot/boot.h
>>>>> @@ -332,6 +332,8 @@ int strncmp(const char *cs, const char *ct, size_t count);
>>>>>  size_t strnlen(const char *s, size_t maxlen);
>>>>>  unsigned int atou(const char *s);
>>>>>  unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
>>>>> +unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base);
>>>>> +long simple_strtol(const char *cp, char **endp, unsigned int base);
>>>>>  size_t strlen(const char *s);
>>>>>
>>>>>  /* tty.c */
>>>>> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
>>>>> index a66854d..6fb8f1ec 100644
>>>>> --- a/arch/x86/boot/compressed/kaslr.c
>>>>> +++ b/arch/x86/boot/compressed/kaslr.c
>>>>> @@ -11,6 +11,7 @@
>>>>>   */
>>>>>  #include "misc.h"
>>>>>  #include "error.h"
>>>>> +#include "../boot.h"
>>>>>
>>>>>  #include <generated/compile.h>
>>>>>  #include <linux/module.h>
>>>>> @@ -61,6 +62,7 @@ enum mem_avoid_index {
>>>>>       MEM_AVOID_INITRD,
>>>>>       MEM_AVOID_CMDLINE,
>>>>>       MEM_AVOID_BOOTPARAMS,
>>>>> +     MEM_AVOID_MEMMAP,
>>>>>       MEM_AVOID_MAX,
>>>>>  };
>>>>>
>>>>> @@ -77,6 +79,37 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
>>>>>       return true;
>>>>>  }
>>>>>
>>>>> +#include "../../../../lib/cmdline.c"
>>>>> +
>>>>> +static int
>>>>> +parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
>>>>> +{
>>>>> +     char *oldp;
>>>>> +
>>>>> +     if (!p)
>>>>> +             return -EINVAL;
>>>>> +
>>>>> +     /* we don't care about this option here */
>>>>> +     if (!strncmp(p, "exactmap", 8))
>>>>> +             return -EINVAL;
>>>>> +
>>>>> +     oldp = p;
>>>>> +     *size = memparse(p, &p);
>>>>> +     if (p == oldp)
>>>>> +             return -EINVAL;
>>>>> +
>>>>> +     switch (*p) {
>>>>> +     case '@':
>>>>> +     case '#':
>>>>> +     case '$':
>>>>> +     case '!':
>>>>> +             *start = memparse(p+1, &p);
>>>>> +             return 0;
>>>>> +     }
>>>>> +
>>>>> +     return -EINVAL;
>>>>> +}
>>>>> +
>>>>>  /*
>>>>>   * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
>>>>>   * The mem_avoid array is used to store the ranges that need to be avoided
>>>>> @@ -158,6 +191,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
>>>>>       u64 initrd_start, initrd_size;
>>>>>       u64 cmd_line, cmd_line_size;
>>>>>       char *ptr;
>>>>> +     char arg[38];
>>>>
>>>> Where does the magic '38' come from?
>>>>
>>>>> +     unsigned long long memmap_start, memmap_size;
>>>>>
>>>>>       /*
>>>>>        * Avoid the region that is unsafe to overlap during
>>>>> @@ -195,6 +230,16 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
>>>>>       add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
>>>>>                        mem_avoid[MEM_AVOID_BOOTPARAMS].size);
>>>>>
>>>>> +     /* see if we have any memmap areas */
>>>>> +     if (cmdline_find_option("memmap", arg, sizeof(arg)) > 0) {
>>>>> +             int rc = parse_memmap(arg, &memmap_start, &memmap_size);
>>>>> +
>>>>> +             if (!rc) {
>>>>> +                     mem_avoid[MEM_AVOID_MEMMAP].start = memmap_start;
>>>>> +                     mem_avoid[MEM_AVOID_MEMMAP].size = memmap_size;
>>>>> +             }
>>>>> +     }
>>>>> +
>>>>
>>>> This only handles a single (first) memmap argument, is that sufficient?
>>>
>>> No, you're right, we need to handle multiple ranges.  Since the
>>> mem_avoid array is statically allocated perhaps we can handle up to 4
>>> memmap= entries, but past that point disable kaslr for that boot?
>>
>> Yeah, that seems fine to me. I assume it's rare to have 4?
>>
>
> It should be rare to have *one* since ACPI 6.0 added support for
> communicating persistent memory ranges.  However there are legacy
> nvdimm users that I know are doing at least 2, but I have hard time
> imagining they would ever do more than 4.

Cool. As long as it announces KASLR being disabled (as in some of the
other conditions) that should be fine.

-Kees
Dave Chinner Nov. 24, 2016, 12:04 a.m. UTC | #6
On Tue, Nov 22, 2016 at 11:01:32AM -0800, Dan Williams wrote:
> On Tue, Nov 22, 2016 at 10:54 AM, Kees Cook <keescook@chromium.org> wrote:
> > On Tue, Nov 22, 2016 at 9:26 AM, Dan Williams <dan.j.williams@intel.com> wrote:
> >> No, you're right, we need to handle multiple ranges.  Since the
> >> mem_avoid array is statically allocated perhaps we can handle up to 4
> >> memmap= entries, but past that point disable kaslr for that boot?
> >
> > Yeah, that seems fine to me. I assume it's rare to have 4?
> >
> 
> It should be rare to have *one* since ACPI 6.0 added support for
> communicating persistent memory ranges.  However there are legacy
> nvdimm users that I know are doing at least 2, but I have hard time
> imagining they would ever do more than 4.

I doubt it's rare amongst the people using RAM to emulate pmem for
filesystem testing purposes. My "pmem" test VM always has at least 2
ranges set to give me two discrete pmem devices, and I have used 4
from time to time to do things like test multi-volume scratch XFS
filesystems in xfstests (i.e. data, log and realtime volumes) so I
didn't need to play games with partitioning or DM...

Cheers,

Dave.
Dan Williams Nov. 24, 2016, 7:30 p.m. UTC | #7
On Wed, Nov 23, 2016 at 4:04 PM, Dave Chinner <david@fromorbit.com> wrote:
> On Tue, Nov 22, 2016 at 11:01:32AM -0800, Dan Williams wrote:
>> On Tue, Nov 22, 2016 at 10:54 AM, Kees Cook <keescook@chromium.org> wrote:
>> > On Tue, Nov 22, 2016 at 9:26 AM, Dan Williams <dan.j.williams@intel.com> wrote:
>> >> No, you're right, we need to handle multiple ranges.  Since the
>> >> mem_avoid array is statically allocated perhaps we can handle up to 4
>> >> memmap= entries, but past that point disable kaslr for that boot?
>> >
>> > Yeah, that seems fine to me. I assume it's rare to have 4?
>> >
>>
>> It should be rare to have *one* since ACPI 6.0 added support for
>> communicating persistent memory ranges.  However there are legacy
>> nvdimm users that I know are doing at least 2, but I have hard time
>> imagining they would ever do more than 4.
>
> I doubt it's rare amongst the people using RAM to emulate pmem for
> filesystem testing purposes. My "pmem" test VM always has at least 2
> ranges set to give me two discrete pmem devices, and I have used 4
> from time to time to do things like test multi-volume scratch XFS
> filesystems in xfstests (i.e. data, log and realtime volumes) so I
> didn't need to play games with partitioning or DM...

Right, but for testing do you need kaslr to be active? You can have as
many memmap regions as you want, we'll just stop trying to find a
random kernel base address after you've defined 4.
Baoquan He Jan. 3, 2017, 8:31 a.m. UTC | #8
Hi Dan,

On 11/22/16 at 09:26am, Dan Williams wrote:
> [ replying for Dave since he's offline today and tomorrow ]
> 
> On Tue, Nov 22, 2016 at 12:47 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >
> > * Dave Jiang <dave.jiang@intel.com> wrote:
> >
> >> CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address.
> >> However it does not take into account the memmap= parameter passed in from
> >> the kernel commandline.
> >
> > memmap= parameters are often used as a list.
> >
> >> [...] This results in the kernel sometimes being put in the middle of the user
> >> memmap. [...]
> >
> > What does this mean? If memmap= is used to re-define the memory map then the
> > kernel getting in the middle of a RAM area is what we want, isn't it? What we
> > don't want is for the kernel to get into reserved areas, right?
> 
> Right, this is about teaching kaslr to not land the kernel in newly
> defined reserved regions that were not marked reserved in the initial
> e820 map from platform firmware.

If only tell kaslr to not land kernel in newly defined reserved regions,
memory added by "memmap=nn[KMG]@ss[KMG]" should not be avoided since
it's usable memory. Kernel randomized into this region is also what we
want. Not sure if I understand it right.

Thanks
Baoquan
Ross Zwisler Jan. 3, 2017, 4:27 p.m. UTC | #9
On Tue, Jan 03, 2017 at 04:31:37PM +0800, Baoquan He wrote:
> Hi Dan,
> 
> On 11/22/16 at 09:26am, Dan Williams wrote:
> > [ replying for Dave since he's offline today and tomorrow ]
> > 
> > On Tue, Nov 22, 2016 at 12:47 AM, Ingo Molnar <mingo@kernel.org> wrote:
> > >
> > > * Dave Jiang <dave.jiang@intel.com> wrote:
> > >
> > >> CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address.
> > >> However it does not take into account the memmap= parameter passed in from
> > >> the kernel commandline.
> > >
> > > memmap= parameters are often used as a list.
> > >
> > >> [...] This results in the kernel sometimes being put in the middle of the user
> > >> memmap. [...]
> > >
> > > What does this mean? If memmap= is used to re-define the memory map then the
> > > kernel getting in the middle of a RAM area is what we want, isn't it? What we
> > > don't want is for the kernel to get into reserved areas, right?
> > 
> > Right, this is about teaching kaslr to not land the kernel in newly
> > defined reserved regions that were not marked reserved in the initial
> > e820 map from platform firmware.
> 
> If only tell kaslr to not land kernel in newly defined reserved regions,
> memory added by "memmap=nn[KMG]@ss[KMG]" should not be avoided since
> it's usable memory. Kernel randomized into this region is also what we
> want. Not sure if I understand it right.

The following text is from:
https://nvdimm.wiki.kernel.org/how_to_choose_the_correct_memmap_kernel_parameter_for_pmem_on_your_system

Hopefully this will make it clearer.

---

Another thing that you may need to be aware of is the CONFIG_RANDOMIZE_BASE
kernel config option. When enabled, this randomizes the physical address at
which the kernel image is decompressed and the virtual address where the kernel
image is mapped. Currently this random address is chosen without regard to the
memmap kernel command line parameter.

This means that the kernel can choose to put itself in the middle of your
reserved memmap area. You can observe this behavior via /proc/iomem.

Here is /proc/iomem from a system with CONFIG_RANDOMIZE_BASE turned off:

# cat /proc/iomem
00000000-00000fff : reserved
00001000-0009fbff : System RAM
0009fc00-0009ffff : reserved
000a0000-000bffff : PCI Bus 0000:00
000c0000-000c97ff : Video ROM
000c9800-000ca5ff : Adapter ROM
000ca800-000ccbff : Adapter ROM
000f0000-000fffff : reserved
  000f0000-000fffff : System ROM
00100000-bffd8fff : System RAM
  01000000-01b18598 : Kernel code
  01b18599-023f53ff : Kernel data
  0276d000-0365efff : Kernel bss
bffd9000-bfffffff : reserved
c0000000-febfffff : PCI Bus 0000:00
  f4000000-f7ffffff : 0000:00:02.0
  f8000000-fbffffff : 0000:00:02.0
  fc000000-fc03ffff : 0000:00:03.0
  fc050000-fc051fff : 0000:00:02.0
  fc052000-fc052fff : 0000:00:03.0
  fc053000-fc053fff : 0000:00:04.0
  fc054000-fc054fff : 0000:00:05.7
    fc054000-fc054fff : ehci_hcd
  fc055000-fc055fff : 0000:00:06.0
fec00000-fec003ff : IOAPIC 0
fee00000-fee00fff : Local APIC
feffc000-feffffff : reserved
fffc0000-ffffffff : reserved
100000000-4ffffffff : Persistent Memory (legacy)
  100000000-4ffffffff : namespace0.0
500000000-53fffffff : System RAM

The interesting bits for us are the “System RAM” region from 00100000-bffd8fff,
and the “Persistent Memory (legacy)” region from 100000000-4ffffffff.

If I turn on CONFIG_RANDOMIZE_BASE on this same system, I get the following:

# cat /proc/iomem
00000000-00000fff : reserved
00001000-0009fbff : System RAM
0009fc00-0009ffff : reserved
000a0000-000bffff : PCI Bus 0000:00
000c0000-000c97ff : Video ROM
000c9800-000ca5ff : Adapter ROM
000ca800-000ccbff : Adapter ROM
000f0000-000fffff : reserved
  000f0000-000fffff : System ROM
00100000-bffd8fff : System RAM
bffd9000-bfffffff : reserved
c0000000-febfffff : PCI Bus 0000:00
  f4000000-f7ffffff : 0000:00:02.0
  f8000000-fbffffff : 0000:00:02.0
  fc000000-fc03ffff : 0000:00:03.0
  fc050000-fc051fff : 0000:00:02.0
  fc052000-fc052fff : 0000:00:03.0
  fc053000-fc053fff : 0000:00:04.0
  fc054000-fc054fff : 0000:00:05.7
    fc054000-fc054fff : ehci_hcd
  fc055000-fc055fff : 0000:00:06.0
fec00000-fec003ff : IOAPIC 0
fee00000-fee00fff : Local APIC
feffc000-feffffff : reserved
fffc0000-ffffffff : reserved
100000000-4e6ffffff : Persistent Memory (legacy)
4e7000000-4e968bfff : System RAM
  4e7000000-4e7b185d8 : Kernel code
  4e7b185d9-4e83f54bf : Kernel data
  4e876d000-4e965efff : Kernel bss
4e968c000-4ffffffff : Persistent Memory (legacy)
500000000-53fffffff : System RAM

The “System RAM” region now sits in the middle of my “Persistent Memory
(legacy)” region, splitting it in half. This results in the following kernel
WARNING:

[    6.356180] WARNING: CPU: 4 PID: 689 at kernel/memremap.c:300 devm_memremap_pages+0x3b2/0x4c0
[    6.357757] devm_memremap_pages attempted on mixed region [mem 0x4e968c000-0x4ffffffff flags 0x200]

and no /dev/pmem* devices being created.
Dan Williams Jan. 3, 2017, 6:24 p.m. UTC | #10
On Tue, Jan 3, 2017 at 12:31 AM, Baoquan He <bhe@redhat.com> wrote:
> Hi Dan,
>
> On 11/22/16 at 09:26am, Dan Williams wrote:
>> [ replying for Dave since he's offline today and tomorrow ]
>>
>> On Tue, Nov 22, 2016 at 12:47 AM, Ingo Molnar <mingo@kernel.org> wrote:
>> >
>> > * Dave Jiang <dave.jiang@intel.com> wrote:
>> >
>> >> CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address.
>> >> However it does not take into account the memmap= parameter passed in from
>> >> the kernel commandline.
>> >
>> > memmap= parameters are often used as a list.
>> >
>> >> [...] This results in the kernel sometimes being put in the middle of the user
>> >> memmap. [...]
>> >
>> > What does this mean? If memmap= is used to re-define the memory map then the
>> > kernel getting in the middle of a RAM area is what we want, isn't it? What we
>> > don't want is for the kernel to get into reserved areas, right?
>>
>> Right, this is about teaching kaslr to not land the kernel in newly
>> defined reserved regions that were not marked reserved in the initial
>> e820 map from platform firmware.
>
> If only tell kaslr to not land kernel in newly defined reserved regions,
> memory added by "memmap=nn[KMG]@ss[KMG]" should not be avoided since
> it's usable memory. Kernel randomized into this region is also what we
> want. Not sure if I understand it right.

You're right, this is supposed to be for memmap=nn!ss cases which
defines reserved persistent memory ranges, not memmap=nn@ss which
defines usable memory.

We need to fix mem_avoid_memmap() to only skip memmap= statements that
specify reserved memory.
Dave Jiang Jan. 3, 2017, 8:15 p.m. UTC | #11
On 01/03/2017 11:24 AM, Dan Williams wrote:
> On Tue, Jan 3, 2017 at 12:31 AM, Baoquan He <bhe@redhat.com> wrote:
>> Hi Dan,
>>
>> On 11/22/16 at 09:26am, Dan Williams wrote:
>>> [ replying for Dave since he's offline today and tomorrow ]
>>>
>>> On Tue, Nov 22, 2016 at 12:47 AM, Ingo Molnar <mingo@kernel.org> wrote:
>>>>
>>>> * Dave Jiang <dave.jiang@intel.com> wrote:
>>>>
>>>>> CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address.
>>>>> However it does not take into account the memmap= parameter passed in from
>>>>> the kernel commandline.
>>>>
>>>> memmap= parameters are often used as a list.
>>>>
>>>>> [...] This results in the kernel sometimes being put in the middle of the user
>>>>> memmap. [...]
>>>>
>>>> What does this mean? If memmap= is used to re-define the memory map then the
>>>> kernel getting in the middle of a RAM area is what we want, isn't it? What we
>>>> don't want is for the kernel to get into reserved areas, right?
>>>
>>> Right, this is about teaching kaslr to not land the kernel in newly
>>> defined reserved regions that were not marked reserved in the initial
>>> e820 map from platform firmware.
>>
>> If only tell kaslr to not land kernel in newly defined reserved regions,
>> memory added by "memmap=nn[KMG]@ss[KMG]" should not be avoided since
>> it's usable memory. Kernel randomized into this region is also what we
>> want. Not sure if I understand it right.
> 
> You're right, this is supposed to be for memmap=nn!ss cases which
> defines reserved persistent memory ranges, not memmap=nn@ss which
> defines usable memory.
> 
> We need to fix mem_avoid_memmap() to only skip memmap= statements that
> specify reserved memory.
> 

I think nn@ss is the only one that we should skip over, otherwise
everything else looks like should be avoided. I'll update.
Baoquan He Jan. 4, 2017, 1:57 a.m. UTC | #12
On 01/03/17 at 01:15pm, Dave Jiang wrote:
> 
> 
> On 01/03/2017 11:24 AM, Dan Williams wrote:
> > On Tue, Jan 3, 2017 at 12:31 AM, Baoquan He <bhe@redhat.com> wrote:
> >> Hi Dan,
> >>
> >> On 11/22/16 at 09:26am, Dan Williams wrote:
> >>> [ replying for Dave since he's offline today and tomorrow ]
> >>>
> >>> On Tue, Nov 22, 2016 at 12:47 AM, Ingo Molnar <mingo@kernel.org> wrote:
> >>>>
> >>>> * Dave Jiang <dave.jiang@intel.com> wrote:
> >>>>
> >>>>> CONFIG_RANDOMIZE_BASE relocates the kernel to a random base address.
> >>>>> However it does not take into account the memmap= parameter passed in from
> >>>>> the kernel commandline.
> >>>>
> >>>> memmap= parameters are often used as a list.
> >>>>
> >>>>> [...] This results in the kernel sometimes being put in the middle of the user
> >>>>> memmap. [...]
> >>>>
> >>>> What does this mean? If memmap= is used to re-define the memory map then the
> >>>> kernel getting in the middle of a RAM area is what we want, isn't it? What we
> >>>> don't want is for the kernel to get into reserved areas, right?
> >>>
> >>> Right, this is about teaching kaslr to not land the kernel in newly
> >>> defined reserved regions that were not marked reserved in the initial
> >>> e820 map from platform firmware.
> >>
> >> If only tell kaslr to not land kernel in newly defined reserved regions,
> >> memory added by "memmap=nn[KMG]@ss[KMG]" should not be avoided since
> >> it's usable memory. Kernel randomized into this region is also what we
> >> want. Not sure if I understand it right.
> > 
> > You're right, this is supposed to be for memmap=nn!ss cases which
> > defines reserved persistent memory ranges, not memmap=nn@ss which
> > defines usable memory.
> > 
> > We need to fix mem_avoid_memmap() to only skip memmap= statements that
> > specify reserved memory.

Thanks for confirmation, Dan!

> > 
> 
> I think nn@ss is the only one that we should skip over, otherwise
> everything else looks like should be avoided. I'll update.
Hi Dave,

I guess your purpose is to avoid the user defined reserved memory and
pmem which I am not very sure about since kaslr won't stamp on ACPI
region reported by BIOS. Seems OK to avoid them all except of nn@ss.

I have other concerns, will directly comment in your v4 post.

Thanks
Baoquan
diff mbox

Patch

diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index e5612f3..0d5fe5b 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -332,6 +332,8 @@  int strncmp(const char *cs, const char *ct, size_t count);
 size_t strnlen(const char *s, size_t maxlen);
 unsigned int atou(const char *s);
 unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
+unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base);
+long simple_strtol(const char *cp, char **endp, unsigned int base);
 size_t strlen(const char *s);
 
 /* tty.c */
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a66854d..6fb8f1ec 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -11,6 +11,7 @@ 
  */
 #include "misc.h"
 #include "error.h"
+#include "../boot.h"
 
 #include <generated/compile.h>
 #include <linux/module.h>
@@ -61,6 +62,7 @@  enum mem_avoid_index {
 	MEM_AVOID_INITRD,
 	MEM_AVOID_CMDLINE,
 	MEM_AVOID_BOOTPARAMS,
+	MEM_AVOID_MEMMAP,
 	MEM_AVOID_MAX,
 };
 
@@ -77,6 +79,37 @@  static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
 	return true;
 }
 
+#include "../../../../lib/cmdline.c"
+
+static int
+parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
+{
+	char *oldp;
+
+	if (!p)
+		return -EINVAL;
+
+	/* we don't care about this option here */
+	if (!strncmp(p, "exactmap", 8))
+		return -EINVAL;
+
+	oldp = p;
+	*size = memparse(p, &p);
+	if (p == oldp)
+		return -EINVAL;
+
+	switch (*p) {
+	case '@':
+	case '#':
+	case '$':
+	case '!':
+		*start = memparse(p+1, &p);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
 /*
  * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
  * The mem_avoid array is used to store the ranges that need to be avoided
@@ -158,6 +191,8 @@  static void mem_avoid_init(unsigned long input, unsigned long input_size,
 	u64 initrd_start, initrd_size;
 	u64 cmd_line, cmd_line_size;
 	char *ptr;
+	char arg[38];
+	unsigned long long memmap_start, memmap_size;
 
 	/*
 	 * Avoid the region that is unsafe to overlap during
@@ -195,6 +230,16 @@  static void mem_avoid_init(unsigned long input, unsigned long input_size,
 	add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
 			 mem_avoid[MEM_AVOID_BOOTPARAMS].size);
 
+	/* see if we have any memmap areas */
+	if (cmdline_find_option("memmap", arg, sizeof(arg)) > 0) {
+		int rc = parse_memmap(arg, &memmap_start, &memmap_size);
+
+		if (!rc) {
+			mem_avoid[MEM_AVOID_MEMMAP].start = memmap_start;
+			mem_avoid[MEM_AVOID_MEMMAP].size = memmap_size;
+		}
+	}
+
 	/* We don't need to set a mapping for setup_data. */
 
 #ifdef CONFIG_X86_VERBOSE_BOOTUP
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index cc3bd58..7a376c1 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -122,6 +122,31 @@  unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas
 }
 
 /**
+ * simple_strtoul - convert a string to an unsigned long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
+{
+	return simple_strtoull(cp, endp, base);
+}
+
+/**
+ * simple_strtol - convert a string to a signed long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long simple_strtol(const char *cp, char **endp, unsigned int base)
+{
+	if (*cp == '-')
+		return -simple_strtoul(cp + 1, endp, base);
+
+	return simple_strtoul(cp, endp, base);
+}
+
+/**
  * strlen - Find the length of a string
  * @s: The string to be sized
  */