diff mbox series

[3/3] x86/kasan: support KASAN_VMALLOC

Message ID 20190725055503.19507-4-dja@axtens.net (mailing list archive)
State New, archived
Headers show
Series kasan: support backing vmalloc space with real shadow memory | expand

Commit Message

Daniel Axtens July 25, 2019, 5:55 a.m. UTC
In the case where KASAN directly allocates memory to back vmalloc
space, don't map the early shadow page over it.

Not mapping the early shadow page over the whole shadow space means
that there are some pgds that are not populated on boot. Allow the
vmalloc fault handler to also fault in vmalloc shadow as needed.

Signed-off-by: Daniel Axtens <dja@axtens.net>
---
 arch/x86/Kconfig            |  1 +
 arch/x86/mm/fault.c         | 13 +++++++++++++
 arch/x86/mm/kasan_init_64.c | 10 ++++++++++
 3 files changed, 24 insertions(+)

Comments

Dmitry Vyukov July 25, 2019, 7:49 a.m. UTC | #1
On Thu, Jul 25, 2019 at 7:55 AM Daniel Axtens <dja@axtens.net> wrote:
>
> In the case where KASAN directly allocates memory to back vmalloc
> space, don't map the early shadow page over it.
>
> Not mapping the early shadow page over the whole shadow space means
> that there are some pgds that are not populated on boot. Allow the
> vmalloc fault handler to also fault in vmalloc shadow as needed.
>
> Signed-off-by: Daniel Axtens <dja@axtens.net>


Would it make things simpler if we pre-populate the top level page
tables for the whole vmalloc region? That would be
(16<<40)/4096/512/512*8 = 131072 bytes?
The check in vmalloc_fault in not really a big burden, so I am not
sure. Just brining as an option.

Acked-by: Dmitry Vyukov <dvyukov@google.com>

> ---
>  arch/x86/Kconfig            |  1 +
>  arch/x86/mm/fault.c         | 13 +++++++++++++
>  arch/x86/mm/kasan_init_64.c | 10 ++++++++++
>  3 files changed, 24 insertions(+)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 222855cc0158..40562cc3771f 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -134,6 +134,7 @@ config X86
>         select HAVE_ARCH_JUMP_LABEL
>         select HAVE_ARCH_JUMP_LABEL_RELATIVE
>         select HAVE_ARCH_KASAN                  if X86_64
> +       select HAVE_ARCH_KASAN_VMALLOC          if X86_64
>         select HAVE_ARCH_KGDB
>         select HAVE_ARCH_MMAP_RND_BITS          if MMU
>         select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if MMU && COMPAT
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index 6c46095cd0d9..d722230121c3 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -340,8 +340,21 @@ static noinline int vmalloc_fault(unsigned long address)
>         pte_t *pte;
>
>         /* Make sure we are in vmalloc area: */
> +#ifndef CONFIG_KASAN_VMALLOC
>         if (!(address >= VMALLOC_START && address < VMALLOC_END))
>                 return -1;
> +#else
> +       /*
> +        * Some of the shadow mapping for the vmalloc area lives outside the
> +        * pgds populated by kasan init. They are created dynamically and so
> +        * we may need to fault them in.
> +        *
> +        * You can observe this with test_vmalloc's align_shift_alloc_test
> +        */
> +       if (!((address >= VMALLOC_START && address < VMALLOC_END) ||
> +             (address >= KASAN_SHADOW_START && address < KASAN_SHADOW_END)))
> +               return -1;
> +#endif
>
>         /*
>          * Copy kernel mappings over when needed. This can also
> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
> index 296da58f3013..e2fe1c1b805c 100644
> --- a/arch/x86/mm/kasan_init_64.c
> +++ b/arch/x86/mm/kasan_init_64.c
> @@ -352,9 +352,19 @@ void __init kasan_init(void)
>         shadow_cpu_entry_end = (void *)round_up(
>                         (unsigned long)shadow_cpu_entry_end, PAGE_SIZE);
>
> +       /*
> +        * If we're in full vmalloc mode, don't back vmalloc space with early
> +        * shadow pages.
> +        */
> +#ifdef CONFIG_KASAN_VMALLOC
> +       kasan_populate_early_shadow(
> +               kasan_mem_to_shadow((void *)VMALLOC_END+1),
> +               shadow_cpu_entry_begin);
> +#else
>         kasan_populate_early_shadow(
>                 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
>                 shadow_cpu_entry_begin);
> +#endif
>
>         kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
>                               (unsigned long)shadow_cpu_entry_end, 0);
> --
> 2.20.1
>
> --
> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20190725055503.19507-4-dja%40axtens.net.
Andy Lutomirski July 25, 2019, 3:08 p.m. UTC | #2
> On Jul 25, 2019, at 12:49 AM, Dmitry Vyukov <dvyukov@google.com> wrote:
> 
>> On Thu, Jul 25, 2019 at 7:55 AM Daniel Axtens <dja@axtens.net> wrote:
>> 
>> In the case where KASAN directly allocates memory to back vmalloc
>> space, don't map the early shadow page over it.
>> 
>> Not mapping the early shadow page over the whole shadow space means
>> that there are some pgds that are not populated on boot. Allow the
>> vmalloc fault handler to also fault in vmalloc shadow as needed.
>> 
>> Signed-off-by: Daniel Axtens <dja@axtens.net>
> 
> 
> Would it make things simpler if we pre-populate the top level page
> tables for the whole vmalloc region? That would be
> (16<<40)/4096/512/512*8 = 131072 bytes?
> The check in vmalloc_fault in not really a big burden, so I am not
> sure. Just brining as an option.

I prefer pre-populating them. In particular, I have already spent far too much time debugging the awful explosions when the stack doesn’t have KASAN backing, and the vmap stack code is very careful to pre-populate the stack pgds — vmalloc_fault fundamentally can’t recover when the stack itself isn’t mapped.

So the vmalloc_fault code, if it stays, needs some careful analysis to make sure it will actually survive all the various context switch cases.  Or you can pre-populate it.

> 
> Acked-by: Dmitry Vyukov <dvyukov@google.com>
> 
>> ---
>> arch/x86/Kconfig            |  1 +
>> arch/x86/mm/fault.c         | 13 +++++++++++++
>> arch/x86/mm/kasan_init_64.c | 10 ++++++++++
>> 3 files changed, 24 insertions(+)
>> 
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index 222855cc0158..40562cc3771f 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -134,6 +134,7 @@ config X86
>>        select HAVE_ARCH_JUMP_LABEL
>>        select HAVE_ARCH_JUMP_LABEL_RELATIVE
>>        select HAVE_ARCH_KASAN                  if X86_64
>> +       select HAVE_ARCH_KASAN_VMALLOC          if X86_64
>>        select HAVE_ARCH_KGDB
>>        select HAVE_ARCH_MMAP_RND_BITS          if MMU
>>        select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if MMU && COMPAT
>> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
>> index 6c46095cd0d9..d722230121c3 100644
>> --- a/arch/x86/mm/fault.c
>> +++ b/arch/x86/mm/fault.c
>> @@ -340,8 +340,21 @@ static noinline int vmalloc_fault(unsigned long address)
>>        pte_t *pte;
>> 
>>        /* Make sure we are in vmalloc area: */
>> +#ifndef CONFIG_KASAN_VMALLOC
>>        if (!(address >= VMALLOC_START && address < VMALLOC_END))
>>                return -1;
>> +#else
>> +       /*
>> +        * Some of the shadow mapping for the vmalloc area lives outside the
>> +        * pgds populated by kasan init. They are created dynamically and so
>> +        * we may need to fault them in.
>> +        *
>> +        * You can observe this with test_vmalloc's align_shift_alloc_test
>> +        */
>> +       if (!((address >= VMALLOC_START && address < VMALLOC_END) ||
>> +             (address >= KASAN_SHADOW_START && address < KASAN_SHADOW_END)))
>> +               return -1;
>> +#endif
>> 
>>        /*
>>         * Copy kernel mappings over when needed. This can also
>> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
>> index 296da58f3013..e2fe1c1b805c 100644
>> --- a/arch/x86/mm/kasan_init_64.c
>> +++ b/arch/x86/mm/kasan_init_64.c
>> @@ -352,9 +352,19 @@ void __init kasan_init(void)
>>        shadow_cpu_entry_end = (void *)round_up(
>>                        (unsigned long)shadow_cpu_entry_end, PAGE_SIZE);
>> 
>> +       /*
>> +        * If we're in full vmalloc mode, don't back vmalloc space with early
>> +        * shadow pages.
>> +        */
>> +#ifdef CONFIG_KASAN_VMALLOC
>> +       kasan_populate_early_shadow(
>> +               kasan_mem_to_shadow((void *)VMALLOC_END+1),
>> +               shadow_cpu_entry_begin);
>> +#else
>>        kasan_populate_early_shadow(
>>                kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
>>                shadow_cpu_entry_begin);
>> +#endif
>> 
>>        kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
>>                              (unsigned long)shadow_cpu_entry_end, 0);
>> --
>> 2.20.1
>> 
>> --
>> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
>> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
>> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20190725055503.19507-4-dja%40axtens.net.
Daniel Axtens July 25, 2019, 3:39 p.m. UTC | #3
>> Would it make things simpler if we pre-populate the top level page
>> tables for the whole vmalloc region? That would be
>> (16<<40)/4096/512/512*8 = 131072 bytes?
>> The check in vmalloc_fault in not really a big burden, so I am not
>> sure. Just brining as an option.
>
> I prefer pre-populating them. In particular, I have already spent far too much time debugging the awful explosions when the stack doesn’t have KASAN backing, and the vmap stack code is very careful to pre-populate the stack pgds — vmalloc_fault fundamentally can’t recover when the stack itself isn’t mapped.
>
> So the vmalloc_fault code, if it stays, needs some careful analysis to make sure it will actually survive all the various context switch cases.  Or you can pre-populate it.
>

No worries - I'll have another crack at prepopulating them for v2. 

I tried prepopulating them at first, but because I'm really a powerpc
developer rather than an x86 developer (and because I find mm code
confusing at the best of times) I didn't have a lot of luck. I think on
reflection I stuffed up the pgd/p4d stuff and I think I know how to fix
it. So I'll give it another go and ask for help here if I get stuck :)

Regards,
Daniel


>> 
>> Acked-by: Dmitry Vyukov <dvyukov@google.com>
>> 
>>> ---
>>> arch/x86/Kconfig            |  1 +
>>> arch/x86/mm/fault.c         | 13 +++++++++++++
>>> arch/x86/mm/kasan_init_64.c | 10 ++++++++++
>>> 3 files changed, 24 insertions(+)
>>> 
>>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>>> index 222855cc0158..40562cc3771f 100644
>>> --- a/arch/x86/Kconfig
>>> +++ b/arch/x86/Kconfig
>>> @@ -134,6 +134,7 @@ config X86
>>>        select HAVE_ARCH_JUMP_LABEL
>>>        select HAVE_ARCH_JUMP_LABEL_RELATIVE
>>>        select HAVE_ARCH_KASAN                  if X86_64
>>> +       select HAVE_ARCH_KASAN_VMALLOC          if X86_64
>>>        select HAVE_ARCH_KGDB
>>>        select HAVE_ARCH_MMAP_RND_BITS          if MMU
>>>        select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if MMU && COMPAT
>>> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
>>> index 6c46095cd0d9..d722230121c3 100644
>>> --- a/arch/x86/mm/fault.c
>>> +++ b/arch/x86/mm/fault.c
>>> @@ -340,8 +340,21 @@ static noinline int vmalloc_fault(unsigned long address)
>>>        pte_t *pte;
>>> 
>>>        /* Make sure we are in vmalloc area: */
>>> +#ifndef CONFIG_KASAN_VMALLOC
>>>        if (!(address >= VMALLOC_START && address < VMALLOC_END))
>>>                return -1;
>>> +#else
>>> +       /*
>>> +        * Some of the shadow mapping for the vmalloc area lives outside the
>>> +        * pgds populated by kasan init. They are created dynamically and so
>>> +        * we may need to fault them in.
>>> +        *
>>> +        * You can observe this with test_vmalloc's align_shift_alloc_test
>>> +        */
>>> +       if (!((address >= VMALLOC_START && address < VMALLOC_END) ||
>>> +             (address >= KASAN_SHADOW_START && address < KASAN_SHADOW_END)))
>>> +               return -1;
>>> +#endif
>>> 
>>>        /*
>>>         * Copy kernel mappings over when needed. This can also
>>> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
>>> index 296da58f3013..e2fe1c1b805c 100644
>>> --- a/arch/x86/mm/kasan_init_64.c
>>> +++ b/arch/x86/mm/kasan_init_64.c
>>> @@ -352,9 +352,19 @@ void __init kasan_init(void)
>>>        shadow_cpu_entry_end = (void *)round_up(
>>>                        (unsigned long)shadow_cpu_entry_end, PAGE_SIZE);
>>> 
>>> +       /*
>>> +        * If we're in full vmalloc mode, don't back vmalloc space with early
>>> +        * shadow pages.
>>> +        */
>>> +#ifdef CONFIG_KASAN_VMALLOC
>>> +       kasan_populate_early_shadow(
>>> +               kasan_mem_to_shadow((void *)VMALLOC_END+1),
>>> +               shadow_cpu_entry_begin);
>>> +#else
>>>        kasan_populate_early_shadow(
>>>                kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
>>>                shadow_cpu_entry_begin);
>>> +#endif
>>> 
>>>        kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
>>>                              (unsigned long)shadow_cpu_entry_end, 0);
>>> --
>>> 2.20.1
>>> 
>>> --
>>> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
>>> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
>>> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20190725055503.19507-4-dja%40axtens.net.
Andy Lutomirski July 25, 2019, 4:32 p.m. UTC | #4
On Thu, Jul 25, 2019 at 8:39 AM Daniel Axtens <dja@axtens.net> wrote:
>
>
> >> Would it make things simpler if we pre-populate the top level page
> >> tables for the whole vmalloc region? That would be
> >> (16<<40)/4096/512/512*8 = 131072 bytes?
> >> The check in vmalloc_fault in not really a big burden, so I am not
> >> sure. Just brining as an option.
> >
> > I prefer pre-populating them. In particular, I have already spent far too much time debugging the awful explosions when the stack doesn’t have KASAN backing, and the vmap stack code is very careful to pre-populate the stack pgds — vmalloc_fault fundamentally can’t recover when the stack itself isn’t mapped.
> >
> > So the vmalloc_fault code, if it stays, needs some careful analysis to make sure it will actually survive all the various context switch cases.  Or you can pre-populate it.
> >
>
> No worries - I'll have another crack at prepopulating them for v2.
>
> I tried prepopulating them at first, but because I'm really a powerpc
> developer rather than an x86 developer (and because I find mm code
> confusing at the best of times) I didn't have a lot of luck. I think on
> reflection I stuffed up the pgd/p4d stuff and I think I know how to fix
> it. So I'll give it another go and ask for help here if I get stuck :)
>

I looked at this a bit more, and I think the vmalloc_fault approach is
fine with one tweak.  In prepare_switch_to(), you'll want to add
something like:

kasan_probe_shadow(next->thread.sp);

where kasan_probe_shadow() is a new function that, depending on kernel
config, either does nothing or reads the shadow associated with the
passed-in address.  Also, if you take this approach, I think you
should refactor vmalloc_fault() to push the address check to a new
helper:

static bool is_vmalloc_fault_addr(unsigned long addr)
{
  if (addr >= VMALLOC_START && addr < VMALLOC_END)
    return true;

#ifdef CONFIG_WHATEVER
  if (addr >= whatever && etc)
    return true;
#endif

 return false;
}

and call that from vmalloc_fault() rather than duplicating the logic.

Also, thanks for doing this series!
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 222855cc0158..40562cc3771f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -134,6 +134,7 @@  config X86
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN			if X86_64
+	select HAVE_ARCH_KASAN_VMALLOC		if X86_64
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_MMAP_RND_BITS		if MMU
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS	if MMU && COMPAT
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 6c46095cd0d9..d722230121c3 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -340,8 +340,21 @@  static noinline int vmalloc_fault(unsigned long address)
 	pte_t *pte;
 
 	/* Make sure we are in vmalloc area: */
+#ifndef CONFIG_KASAN_VMALLOC
 	if (!(address >= VMALLOC_START && address < VMALLOC_END))
 		return -1;
+#else
+	/*
+	 * Some of the shadow mapping for the vmalloc area lives outside the
+	 * pgds populated by kasan init. They are created dynamically and so
+	 * we may need to fault them in.
+	 *
+	 * You can observe this with test_vmalloc's align_shift_alloc_test
+	 */
+	if (!((address >= VMALLOC_START && address < VMALLOC_END) ||
+	      (address >= KASAN_SHADOW_START && address < KASAN_SHADOW_END)))
+		return -1;
+#endif
 
 	/*
 	 * Copy kernel mappings over when needed. This can also
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 296da58f3013..e2fe1c1b805c 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -352,9 +352,19 @@  void __init kasan_init(void)
 	shadow_cpu_entry_end = (void *)round_up(
 			(unsigned long)shadow_cpu_entry_end, PAGE_SIZE);
 
+	/*
+	 * If we're in full vmalloc mode, don't back vmalloc space with early
+	 * shadow pages.
+	 */
+#ifdef CONFIG_KASAN_VMALLOC
+	kasan_populate_early_shadow(
+		kasan_mem_to_shadow((void *)VMALLOC_END+1),
+		shadow_cpu_entry_begin);
+#else
 	kasan_populate_early_shadow(
 		kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
 		shadow_cpu_entry_begin);
+#endif
 
 	kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
 			      (unsigned long)shadow_cpu_entry_end, 0);