diff mbox series

[RFC,1/3] x86/xen: add basic KASAN support for PV kernel

Message ID 20191217140804.27364-2-sergey.dyasli@citrix.com (mailing list archive)
State Superseded
Headers show
Series basic KASAN support for Xen PV domains | expand

Commit Message

Sergey Dyasli Dec. 17, 2019, 2:08 p.m. UTC
This enables to use Outline instrumentation for Xen PV kernels.

KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
and hence disabled.

Rough edges in the patch are marked with XXX.

Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
---
 arch/x86/mm/init.c          | 14 ++++++++++++++
 arch/x86/mm/kasan_init_64.c | 28 ++++++++++++++++++++++++++++
 arch/x86/xen/Makefile       |  7 +++++++
 arch/x86/xen/enlighten_pv.c |  3 +++
 arch/x86/xen/mmu_pv.c       | 13 +++++++++++--
 arch/x86/xen/multicalls.c   | 10 ++++++++++
 drivers/xen/Makefile        |  2 ++
 kernel/Makefile             |  2 ++
 lib/Kconfig.kasan           |  3 ++-
 9 files changed, 79 insertions(+), 3 deletions(-)

Comments

Jürgen Groß Dec. 18, 2019, 9:24 a.m. UTC | #1
On 17.12.19 15:08, Sergey Dyasli wrote:
> This enables to use Outline instrumentation for Xen PV kernels.
> 
> KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
> and hence disabled.
> 
> Rough edges in the patch are marked with XXX.
> 
> Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
> ---
>   arch/x86/mm/init.c          | 14 ++++++++++++++
>   arch/x86/mm/kasan_init_64.c | 28 ++++++++++++++++++++++++++++
>   arch/x86/xen/Makefile       |  7 +++++++
>   arch/x86/xen/enlighten_pv.c |  3 +++
>   arch/x86/xen/mmu_pv.c       | 13 +++++++++++--
>   arch/x86/xen/multicalls.c   | 10 ++++++++++
>   drivers/xen/Makefile        |  2 ++
>   kernel/Makefile             |  2 ++
>   lib/Kconfig.kasan           |  3 ++-
>   9 files changed, 79 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
> index e7bb483557c9..0c98a45eec6c 100644
> --- a/arch/x86/mm/init.c
> +++ b/arch/x86/mm/init.c
> @@ -8,6 +8,8 @@
>   #include <linux/kmemleak.h>
>   #include <linux/sched/task.h>
>   
> +#include <xen/xen.h>
> +
>   #include <asm/set_memory.h>
>   #include <asm/e820/api.h>
>   #include <asm/init.h>
> @@ -835,6 +837,18 @@ void free_kernel_image_pages(const char *what, void *begin, void *end)
>   	unsigned long end_ul = (unsigned long)end;
>   	unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
>   
> +	/*
> +	 * XXX: skip this for now. Otherwise it leads to:
> +	 *
> +	 * (XEN) mm.c:2713:d157v0 Bad type (saw 8c00000000000001 != exp e000000000000000) for mfn 36f40 (pfn 02f40)
> +	 * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page
> +	 * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 entry 8010000036f40067 for l1e_owner d157, pg_owner d157
> +	 *
> +	 * and further #PF error: [PROT] [WRITE] in the kernel.
> +	 */
> +	if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN))
> +		return;
> +

I guess this is related to freeing some kasan page tables without
unpinning them?

>   	free_init_pages(what, begin_ul, end_ul);
>   
>   	/*
> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
> index cf5bc37c90ac..caee2022f8b0 100644
> --- a/arch/x86/mm/kasan_init_64.c
> +++ b/arch/x86/mm/kasan_init_64.c
> @@ -13,6 +13,8 @@
>   #include <linux/sched/task.h>
>   #include <linux/vmalloc.h>
>   
> +#include <xen/xen.h>
> +
>   #include <asm/e820/types.h>
>   #include <asm/pgalloc.h>
>   #include <asm/tlbflush.h>
> @@ -20,6 +22,9 @@
>   #include <asm/pgtable.h>
>   #include <asm/cpu_entry_area.h>
>   
> +#include <xen/interface/xen.h>
> +#include <asm/xen/hypervisor.h>
> +
>   extern struct range pfn_mapped[E820_MAX_ENTRIES];
>   
>   static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
> @@ -305,6 +310,12 @@ static struct notifier_block kasan_die_notifier = {
>   };
>   #endif
>   
> +#ifdef CONFIG_XEN
> +/* XXX: this should go to some header */
> +void __init set_page_prot(void *addr, pgprot_t prot);
> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn);
> +#endif
> +

Instead of exporting those, why don't you ...

>   void __init kasan_early_init(void)
>   {
>   	int i;
> @@ -332,6 +343,16 @@ void __init kasan_early_init(void)
>   	for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
>   		kasan_early_shadow_p4d[i] = __p4d(p4d_val);
>   
> +	if (xen_pv_domain()) {
> +		/* PV page tables must have PAGE_KERNEL_RO */
> +		set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO);
> +		set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO);
> +		set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO);

add a function doing that to mmu_pv.c (e.g. xen_pv_kasan_early_init())?

> +
> +		/* Add mappings to the initial PV page tables */
> +		kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base);
> +	}
> +
>   	kasan_map_early_shadow(early_top_pgt);
>   	kasan_map_early_shadow(init_top_pgt);
>   }
> @@ -369,6 +390,13 @@ void __init kasan_init(void)
>   				__pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
>   	}
>   
> +	if (xen_pv_domain()) {
> +		/* PV page tables must be pinned */
> +		set_page_prot(early_top_pgt, PAGE_KERNEL_RO);
> +		pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
> +				  PFN_DOWN(__pa_symbol(early_top_pgt)));

and another one like xen_pv_kasan_init() here.

> +	}
> +
>   	load_cr3(early_top_pgt);
>   	__flush_tlb_all();
>   
> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
> index 084de77a109e..102fad0b0bca 100644
> --- a/arch/x86/xen/Makefile
> +++ b/arch/x86/xen/Makefile
> @@ -1,3 +1,10 @@
> +KASAN_SANITIZE_enlighten_pv.o := n
> +KASAN_SANITIZE_enlighten.o := n
> +KASAN_SANITIZE_irq.o := n
> +KASAN_SANITIZE_mmu_pv.o := n
> +KASAN_SANITIZE_p2m.o := n
> +KASAN_SANITIZE_multicalls.o := n
> +
>   # SPDX-License-Identifier: GPL-2.0
>   OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
>   
> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
> index ae4a41ca19f6..27de55699f24 100644
> --- a/arch/x86/xen/enlighten_pv.c
> +++ b/arch/x86/xen/enlighten_pv.c
> @@ -72,6 +72,7 @@
>   #include <asm/mwait.h>
>   #include <asm/pci_x86.h>
>   #include <asm/cpu.h>
> +#include <asm/kasan.h>
>   
>   #ifdef CONFIG_ACPI
>   #include <linux/acpi.h>
> @@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
>   	/* Get mfn list */
>   	xen_build_dynamic_phys_to_machine();
>   
> +	kasan_early_init();
> +
>   	/*
>   	 * Set up kernel GDT and segment registers, mainly so that
>   	 * -fstack-protector code can be executed.
> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
> index c8dbee62ec2a..eaf63f1f26af 100644
> --- a/arch/x86/xen/mmu_pv.c
> +++ b/arch/x86/xen/mmu_pv.c
> @@ -1079,7 +1079,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
>   
>   static void xen_post_allocator_init(void);
>   
> -static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
>   {
>   	struct mmuext_op op;
>   
> @@ -1767,7 +1767,7 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot,
>   	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
>   		BUG();
>   }
> -static void __init set_page_prot(void *addr, pgprot_t prot)
> +void __init set_page_prot(void *addr, pgprot_t prot)
>   {
>   	return set_page_prot_flags(addr, prot, UVMF_NONE);
>   }
> @@ -1943,6 +1943,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
>   	if (i && i < pgd_index(__START_KERNEL_map))
>   		init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
>   
> +#ifdef CONFIG_KASAN
> +	/*
> +	 * Copy KASAN mappings
> +	 * ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
> +	 */
> +	for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++)
> +		init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
> +#endif
> +
>   	/* Make pagetable pieces RO */
>   	set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
>   	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
> diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
> index 07054572297f..5e4729efbbe2 100644
> --- a/arch/x86/xen/multicalls.c
> +++ b/arch/x86/xen/multicalls.c
> @@ -99,6 +99,15 @@ void xen_mc_flush(void)
>   				ret++;
>   	}
>   
> +	/*
> +	 * XXX: Kasan produces quite a lot (~2000) of warnings in a form of:
> +	 *
> +	 *     (XEN) mm.c:3222:d155v0 mfn 3704b already pinned
> +	 *
> +	 * during kasan_init(). They are benign, but silence them for now.
> +	 * Otherwise, booting takes too long due to printk() spam.
> +	 */
> +#ifndef CONFIG_KASAN

It might be interesting to identify the problematic page tables.

I guess this would require some hacking to avoid the multicalls in order
to identify which page table should not be pinned again.


Juergen
Sergey Dyasli Dec. 19, 2019, 4:42 p.m. UTC | #2
On 18/12/2019 09:24, Jürgen Groß wrote:
> On 17.12.19 15:08, Sergey Dyasli wrote:
>> This enables to use Outline instrumentation for Xen PV kernels.
>>
>> KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
>> and hence disabled.
>>
>> Rough edges in the patch are marked with XXX.
>>
>> Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
>> ---
>>   arch/x86/mm/init.c          | 14 ++++++++++++++
>>   arch/x86/mm/kasan_init_64.c | 28 ++++++++++++++++++++++++++++
>>   arch/x86/xen/Makefile       |  7 +++++++
>>   arch/x86/xen/enlighten_pv.c |  3 +++
>>   arch/x86/xen/mmu_pv.c       | 13 +++++++++++--
>>   arch/x86/xen/multicalls.c   | 10 ++++++++++
>>   drivers/xen/Makefile        |  2 ++
>>   kernel/Makefile             |  2 ++
>>   lib/Kconfig.kasan           |  3 ++-
>>   9 files changed, 79 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
>> index e7bb483557c9..0c98a45eec6c 100644
>> --- a/arch/x86/mm/init.c
>> +++ b/arch/x86/mm/init.c
>> @@ -8,6 +8,8 @@
>>   #include <linux/kmemleak.h>
>>   #include <linux/sched/task.h>
>>   +#include <xen/xen.h>
>> +
>>   #include <asm/set_memory.h>
>>   #include <asm/e820/api.h>
>>   #include <asm/init.h>
>> @@ -835,6 +837,18 @@ void free_kernel_image_pages(const char *what, void *begin, void *end)
>>       unsigned long end_ul = (unsigned long)end;
>>       unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
>>   +    /*
>> +     * XXX: skip this for now. Otherwise it leads to:
>> +     *
>> +     * (XEN) mm.c:2713:d157v0 Bad type (saw 8c00000000000001 != exp e000000000000000) for mfn 36f40 (pfn 02f40)
>> +     * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page
>> +     * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 entry 8010000036f40067 for l1e_owner d157, pg_owner d157
>> +     *
>> +     * and further #PF error: [PROT] [WRITE] in the kernel.
>> +     */
>> +    if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN))
>> +        return;
>> +
> 
> I guess this is related to freeing some kasan page tables without
> unpinning them?

Your guess was correct. Turned out that early_top_pgt which I pinned and made RO
is located in .init section and that was causing issues. Unpinning it and making
RW again right after kasan_init() switches to use init_top_pgt seem to fix this
issue.

> 
>>       free_init_pages(what, begin_ul, end_ul);
>>         /*
>> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
>> index cf5bc37c90ac..caee2022f8b0 100644
>> --- a/arch/x86/mm/kasan_init_64.c
>> +++ b/arch/x86/mm/kasan_init_64.c
>> @@ -13,6 +13,8 @@
>>   #include <linux/sched/task.h>
>>   #include <linux/vmalloc.h>
>>   +#include <xen/xen.h>
>> +
>>   #include <asm/e820/types.h>
>>   #include <asm/pgalloc.h>
>>   #include <asm/tlbflush.h>
>> @@ -20,6 +22,9 @@
>>   #include <asm/pgtable.h>
>>   #include <asm/cpu_entry_area.h>
>>   +#include <xen/interface/xen.h>
>> +#include <asm/xen/hypervisor.h>
>> +
>>   extern struct range pfn_mapped[E820_MAX_ENTRIES];
>>     static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
>> @@ -305,6 +310,12 @@ static struct notifier_block kasan_die_notifier = {
>>   };
>>   #endif
>>   +#ifdef CONFIG_XEN
>> +/* XXX: this should go to some header */
>> +void __init set_page_prot(void *addr, pgprot_t prot);
>> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn);
>> +#endif
>> +
> 
> Instead of exporting those, why don't you ...
> 
>>   void __init kasan_early_init(void)
>>   {
>>       int i;
>> @@ -332,6 +343,16 @@ void __init kasan_early_init(void)
>>       for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
>>           kasan_early_shadow_p4d[i] = __p4d(p4d_val);
>>   +    if (xen_pv_domain()) {
>> +        /* PV page tables must have PAGE_KERNEL_RO */
>> +        set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO);
>> +        set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO);
>> +        set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO);
> 
> add a function doing that to mmu_pv.c (e.g. xen_pv_kasan_early_init())?

Sounds like a good suggestion, but new functions still need some header for
declarations (xen/xen.h?). And kasan_map_early_shadow() will need exporting
through kasan.h as well, but that's probably not an issue.

> 
>> +
>> +        /* Add mappings to the initial PV page tables */
>> +        kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base);
>> +    }
>> +
>>       kasan_map_early_shadow(early_top_pgt);
>>       kasan_map_early_shadow(init_top_pgt);
>>   }
>> @@ -369,6 +390,13 @@ void __init kasan_init(void)
>>                   __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
>>       }
>>   +    if (xen_pv_domain()) {
>> +        /* PV page tables must be pinned */
>> +        set_page_prot(early_top_pgt, PAGE_KERNEL_RO);
>> +        pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
>> +                  PFN_DOWN(__pa_symbol(early_top_pgt)));
> 
> and another one like xen_pv_kasan_init() here.

Now there needs to be a 3rd function to unpin early_top_pgt.

> 
>> +    }
>> +
>>       load_cr3(early_top_pgt);
>>       __flush_tlb_all();
>>   diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
>> index 084de77a109e..102fad0b0bca 100644
>> --- a/arch/x86/xen/Makefile
>> +++ b/arch/x86/xen/Makefile
>> @@ -1,3 +1,10 @@
>> +KASAN_SANITIZE_enlighten_pv.o := n
>> +KASAN_SANITIZE_enlighten.o := n
>> +KASAN_SANITIZE_irq.o := n
>> +KASAN_SANITIZE_mmu_pv.o := n
>> +KASAN_SANITIZE_p2m.o := n
>> +KASAN_SANITIZE_multicalls.o := n
>> +
>>   # SPDX-License-Identifier: GPL-2.0
>>   OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
>>   diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
>> index ae4a41ca19f6..27de55699f24 100644
>> --- a/arch/x86/xen/enlighten_pv.c
>> +++ b/arch/x86/xen/enlighten_pv.c
>> @@ -72,6 +72,7 @@
>>   #include <asm/mwait.h>
>>   #include <asm/pci_x86.h>
>>   #include <asm/cpu.h>
>> +#include <asm/kasan.h>
>>     #ifdef CONFIG_ACPI
>>   #include <linux/acpi.h>
>> @@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
>>       /* Get mfn list */
>>       xen_build_dynamic_phys_to_machine();
>>   +    kasan_early_init();
>> +
>>       /*
>>        * Set up kernel GDT and segment registers, mainly so that
>>        * -fstack-protector code can be executed.
>> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
>> index c8dbee62ec2a..eaf63f1f26af 100644
>> --- a/arch/x86/xen/mmu_pv.c
>> +++ b/arch/x86/xen/mmu_pv.c
>> @@ -1079,7 +1079,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
>>     static void xen_post_allocator_init(void);
>>   -static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
>> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
>>   {
>>       struct mmuext_op op;
>>   @@ -1767,7 +1767,7 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot,
>>       if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
>>           BUG();
>>   }
>> -static void __init set_page_prot(void *addr, pgprot_t prot)
>> +void __init set_page_prot(void *addr, pgprot_t prot)
>>   {
>>       return set_page_prot_flags(addr, prot, UVMF_NONE);
>>   }
>> @@ -1943,6 +1943,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
>>       if (i && i < pgd_index(__START_KERNEL_map))
>>           init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
>>   +#ifdef CONFIG_KASAN
>> +    /*
>> +     * Copy KASAN mappings
>> +     * ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
>> +     */
>> +    for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++)
>> +        init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
>> +#endif
>> +
>>       /* Make pagetable pieces RO */
>>       set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
>>       set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
>> diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
>> index 07054572297f..5e4729efbbe2 100644
>> --- a/arch/x86/xen/multicalls.c
>> +++ b/arch/x86/xen/multicalls.c
>> @@ -99,6 +99,15 @@ void xen_mc_flush(void)
>>                   ret++;
>>       }
>>   +    /*
>> +     * XXX: Kasan produces quite a lot (~2000) of warnings in a form of:
>> +     *
>> +     *     (XEN) mm.c:3222:d155v0 mfn 3704b already pinned
>> +     *
>> +     * during kasan_init(). They are benign, but silence them for now.
>> +     * Otherwise, booting takes too long due to printk() spam.
>> +     */
>> +#ifndef CONFIG_KASAN
> 
> It might be interesting to identify the problematic page tables.
> 
> I guess this would require some hacking to avoid the multicalls in order
> to identify which page table should not be pinned again.

I tracked this down to xen_alloc_ptpage() in mmu_pv.c:

			if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
				__pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);

kasan_populate_early_shadow() is doing lots pmd_populate_kernel() with
kasan_early_shadow_pte (mfn of which is reported by Xen). Currently I'm not
sure how to fix that. Is it possible to check that pfn has already been pinned
from Linux kernel? xen_page_pinned() seems to be an incorrect way to check that.

--
Thanks,
Sergey
Jürgen Groß Dec. 20, 2019, 8:43 a.m. UTC | #3
On 19.12.19 17:42, Sergey Dyasli wrote:
> On 18/12/2019 09:24, Jürgen Groß wrote:
>> On 17.12.19 15:08, Sergey Dyasli wrote:
>>> This enables to use Outline instrumentation for Xen PV kernels.
>>>
>>> KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes
>>> and hence disabled.
>>>
>>> Rough edges in the patch are marked with XXX.
>>>
>>> Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com>
>>> ---
>>>    arch/x86/mm/init.c          | 14 ++++++++++++++
>>>    arch/x86/mm/kasan_init_64.c | 28 ++++++++++++++++++++++++++++
>>>    arch/x86/xen/Makefile       |  7 +++++++
>>>    arch/x86/xen/enlighten_pv.c |  3 +++
>>>    arch/x86/xen/mmu_pv.c       | 13 +++++++++++--
>>>    arch/x86/xen/multicalls.c   | 10 ++++++++++
>>>    drivers/xen/Makefile        |  2 ++
>>>    kernel/Makefile             |  2 ++
>>>    lib/Kconfig.kasan           |  3 ++-
>>>    9 files changed, 79 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
>>> index e7bb483557c9..0c98a45eec6c 100644
>>> --- a/arch/x86/mm/init.c
>>> +++ b/arch/x86/mm/init.c
>>> @@ -8,6 +8,8 @@
>>>    #include <linux/kmemleak.h>
>>>    #include <linux/sched/task.h>
>>>    +#include <xen/xen.h>
>>> +
>>>    #include <asm/set_memory.h>
>>>    #include <asm/e820/api.h>
>>>    #include <asm/init.h>
>>> @@ -835,6 +837,18 @@ void free_kernel_image_pages(const char *what, void *begin, void *end)
>>>        unsigned long end_ul = (unsigned long)end;
>>>        unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
>>>    +    /*
>>> +     * XXX: skip this for now. Otherwise it leads to:
>>> +     *
>>> +     * (XEN) mm.c:2713:d157v0 Bad type (saw 8c00000000000001 != exp e000000000000000) for mfn 36f40 (pfn 02f40)
>>> +     * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page
>>> +     * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 entry 8010000036f40067 for l1e_owner d157, pg_owner d157
>>> +     *
>>> +     * and further #PF error: [PROT] [WRITE] in the kernel.
>>> +     */
>>> +    if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN))
>>> +        return;
>>> +
>>
>> I guess this is related to freeing some kasan page tables without
>> unpinning them?
> 
> Your guess was correct. Turned out that early_top_pgt which I pinned and made RO
> is located in .init section and that was causing issues. Unpinning it and making
> RW again right after kasan_init() switches to use init_top_pgt seem to fix this
> issue.
> 
>>
>>>        free_init_pages(what, begin_ul, end_ul);
>>>          /*
>>> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
>>> index cf5bc37c90ac..caee2022f8b0 100644
>>> --- a/arch/x86/mm/kasan_init_64.c
>>> +++ b/arch/x86/mm/kasan_init_64.c
>>> @@ -13,6 +13,8 @@
>>>    #include <linux/sched/task.h>
>>>    #include <linux/vmalloc.h>
>>>    +#include <xen/xen.h>
>>> +
>>>    #include <asm/e820/types.h>
>>>    #include <asm/pgalloc.h>
>>>    #include <asm/tlbflush.h>
>>> @@ -20,6 +22,9 @@
>>>    #include <asm/pgtable.h>
>>>    #include <asm/cpu_entry_area.h>
>>>    +#include <xen/interface/xen.h>
>>> +#include <asm/xen/hypervisor.h>
>>> +
>>>    extern struct range pfn_mapped[E820_MAX_ENTRIES];
>>>      static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
>>> @@ -305,6 +310,12 @@ static struct notifier_block kasan_die_notifier = {
>>>    };
>>>    #endif
>>>    +#ifdef CONFIG_XEN
>>> +/* XXX: this should go to some header */
>>> +void __init set_page_prot(void *addr, pgprot_t prot);
>>> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn);
>>> +#endif
>>> +
>>
>> Instead of exporting those, why don't you ...
>>
>>>    void __init kasan_early_init(void)
>>>    {
>>>        int i;
>>> @@ -332,6 +343,16 @@ void __init kasan_early_init(void)
>>>        for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
>>>            kasan_early_shadow_p4d[i] = __p4d(p4d_val);
>>>    +    if (xen_pv_domain()) {
>>> +        /* PV page tables must have PAGE_KERNEL_RO */
>>> +        set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO);
>>> +        set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO);
>>> +        set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO);
>>
>> add a function doing that to mmu_pv.c (e.g. xen_pv_kasan_early_init())?
> 
> Sounds like a good suggestion, but new functions still need some header for
> declarations (xen/xen.h?). And kasan_map_early_shadow() will need exporting

xen/xen-ops.h

> through kasan.h as well, but that's probably not an issue.

You could let the new function return (pgd_t *)xen_start_info->pt_base
and use that here, e.g.:

if (xen_pv_domain()) {
     pgd_t *pgd;

     pgd = xen_kasan_early_init();
     kasan_map_early_shadow(pgd);
}

> 
>>
>>> +
>>> +        /* Add mappings to the initial PV page tables */
>>> +        kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base);
>>> +    }
>>> +
>>>        kasan_map_early_shadow(early_top_pgt);
>>>        kasan_map_early_shadow(init_top_pgt);
>>>    }
>>> @@ -369,6 +390,13 @@ void __init kasan_init(void)
>>>                    __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
>>>        }
>>>    +    if (xen_pv_domain()) {
>>> +        /* PV page tables must be pinned */
>>> +        set_page_prot(early_top_pgt, PAGE_KERNEL_RO);
>>> +        pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
>>> +                  PFN_DOWN(__pa_symbol(early_top_pgt)));
>>
>> and another one like xen_pv_kasan_init() here.
> 
> Now there needs to be a 3rd function to unpin early_top_pgt.

Not if you do the load_cr3 in the xen pv case in the new function:

if (xen_pv_domain())
     xen_kasan_load_cr3(early_top_pgt);
else
     load_cr3(early_top_pgt);

> 
>>
>>> +    }
>>> +
>>>        load_cr3(early_top_pgt);
>>>        __flush_tlb_all();
>>>    diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
>>> index 084de77a109e..102fad0b0bca 100644
>>> --- a/arch/x86/xen/Makefile
>>> +++ b/arch/x86/xen/Makefile
>>> @@ -1,3 +1,10 @@
>>> +KASAN_SANITIZE_enlighten_pv.o := n
>>> +KASAN_SANITIZE_enlighten.o := n
>>> +KASAN_SANITIZE_irq.o := n
>>> +KASAN_SANITIZE_mmu_pv.o := n
>>> +KASAN_SANITIZE_p2m.o := n
>>> +KASAN_SANITIZE_multicalls.o := n
>>> +
>>>    # SPDX-License-Identifier: GPL-2.0
>>>    OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
>>>    diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
>>> index ae4a41ca19f6..27de55699f24 100644
>>> --- a/arch/x86/xen/enlighten_pv.c
>>> +++ b/arch/x86/xen/enlighten_pv.c
>>> @@ -72,6 +72,7 @@
>>>    #include <asm/mwait.h>
>>>    #include <asm/pci_x86.h>
>>>    #include <asm/cpu.h>
>>> +#include <asm/kasan.h>
>>>      #ifdef CONFIG_ACPI
>>>    #include <linux/acpi.h>
>>> @@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
>>>        /* Get mfn list */
>>>        xen_build_dynamic_phys_to_machine();
>>>    +    kasan_early_init();
>>> +
>>>        /*
>>>         * Set up kernel GDT and segment registers, mainly so that
>>>         * -fstack-protector code can be executed.
>>> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
>>> index c8dbee62ec2a..eaf63f1f26af 100644
>>> --- a/arch/x86/xen/mmu_pv.c
>>> +++ b/arch/x86/xen/mmu_pv.c
>>> @@ -1079,7 +1079,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
>>>      static void xen_post_allocator_init(void);
>>>    -static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
>>> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
>>>    {
>>>        struct mmuext_op op;
>>>    @@ -1767,7 +1767,7 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot,
>>>        if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
>>>            BUG();
>>>    }
>>> -static void __init set_page_prot(void *addr, pgprot_t prot)
>>> +void __init set_page_prot(void *addr, pgprot_t prot)
>>>    {
>>>        return set_page_prot_flags(addr, prot, UVMF_NONE);
>>>    }
>>> @@ -1943,6 +1943,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
>>>        if (i && i < pgd_index(__START_KERNEL_map))
>>>            init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
>>>    +#ifdef CONFIG_KASAN
>>> +    /*
>>> +     * Copy KASAN mappings
>>> +     * ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
>>> +     */
>>> +    for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++)
>>> +        init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
>>> +#endif
>>> +
>>>        /* Make pagetable pieces RO */
>>>        set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
>>>        set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
>>> diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
>>> index 07054572297f..5e4729efbbe2 100644
>>> --- a/arch/x86/xen/multicalls.c
>>> +++ b/arch/x86/xen/multicalls.c
>>> @@ -99,6 +99,15 @@ void xen_mc_flush(void)
>>>                    ret++;
>>>        }
>>>    +    /*
>>> +     * XXX: Kasan produces quite a lot (~2000) of warnings in a form of:
>>> +     *
>>> +     *     (XEN) mm.c:3222:d155v0 mfn 3704b already pinned
>>> +     *
>>> +     * during kasan_init(). They are benign, but silence them for now.
>>> +     * Otherwise, booting takes too long due to printk() spam.
>>> +     */
>>> +#ifndef CONFIG_KASAN
>>
>> It might be interesting to identify the problematic page tables.
>>
>> I guess this would require some hacking to avoid the multicalls in order
>> to identify which page table should not be pinned again.
> 
> I tracked this down to xen_alloc_ptpage() in mmu_pv.c:
> 
> 			if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
> 				__pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
> 
> kasan_populate_early_shadow() is doing lots pmd_populate_kernel() with
> kasan_early_shadow_pte (mfn of which is reported by Xen). Currently I'm not
> sure how to fix that. Is it possible to check that pfn has already been pinned
> from Linux kernel? xen_page_pinned() seems to be an incorrect way to check that.

Right, xen_page_pinned() is not yet working at this stage of booting.

But using pmd_populate_kernel() with the same page table multiple times
is just wrong. Doing so the first time is fine, all the other cases
should just use set_pmd().


Juergen
diff mbox series

Patch

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index e7bb483557c9..0c98a45eec6c 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -8,6 +8,8 @@ 
 #include <linux/kmemleak.h>
 #include <linux/sched/task.h>
 
+#include <xen/xen.h>
+
 #include <asm/set_memory.h>
 #include <asm/e820/api.h>
 #include <asm/init.h>
@@ -835,6 +837,18 @@  void free_kernel_image_pages(const char *what, void *begin, void *end)
 	unsigned long end_ul = (unsigned long)end;
 	unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
 
+	/*
+	 * XXX: skip this for now. Otherwise it leads to:
+	 *
+	 * (XEN) mm.c:2713:d157v0 Bad type (saw 8c00000000000001 != exp e000000000000000) for mfn 36f40 (pfn 02f40)
+	 * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page
+	 * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 entry 8010000036f40067 for l1e_owner d157, pg_owner d157
+	 *
+	 * and further #PF error: [PROT] [WRITE] in the kernel.
+	 */
+	if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN))
+		return;
+
 	free_init_pages(what, begin_ul, end_ul);
 
 	/*
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index cf5bc37c90ac..caee2022f8b0 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -13,6 +13,8 @@ 
 #include <linux/sched/task.h>
 #include <linux/vmalloc.h>
 
+#include <xen/xen.h>
+
 #include <asm/e820/types.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
@@ -20,6 +22,9 @@ 
 #include <asm/pgtable.h>
 #include <asm/cpu_entry_area.h>
 
+#include <xen/interface/xen.h>
+#include <asm/xen/hypervisor.h>
+
 extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
 static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
@@ -305,6 +310,12 @@  static struct notifier_block kasan_die_notifier = {
 };
 #endif
 
+#ifdef CONFIG_XEN
+/* XXX: this should go to some header */
+void __init set_page_prot(void *addr, pgprot_t prot);
+void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn);
+#endif
+
 void __init kasan_early_init(void)
 {
 	int i;
@@ -332,6 +343,16 @@  void __init kasan_early_init(void)
 	for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
 		kasan_early_shadow_p4d[i] = __p4d(p4d_val);
 
+	if (xen_pv_domain()) {
+		/* PV page tables must have PAGE_KERNEL_RO */
+		set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO);
+		set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO);
+		set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO);
+
+		/* Add mappings to the initial PV page tables */
+		kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base);
+	}
+
 	kasan_map_early_shadow(early_top_pgt);
 	kasan_map_early_shadow(init_top_pgt);
 }
@@ -369,6 +390,13 @@  void __init kasan_init(void)
 				__pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
 	}
 
+	if (xen_pv_domain()) {
+		/* PV page tables must be pinned */
+		set_page_prot(early_top_pgt, PAGE_KERNEL_RO);
+		pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+				  PFN_DOWN(__pa_symbol(early_top_pgt)));
+	}
+
 	load_cr3(early_top_pgt);
 	__flush_tlb_all();
 
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 084de77a109e..102fad0b0bca 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,3 +1,10 @@ 
+KASAN_SANITIZE_enlighten_pv.o := n
+KASAN_SANITIZE_enlighten.o := n
+KASAN_SANITIZE_irq.o := n
+KASAN_SANITIZE_mmu_pv.o := n
+KASAN_SANITIZE_p2m.o := n
+KASAN_SANITIZE_multicalls.o := n
+
 # SPDX-License-Identifier: GPL-2.0
 OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
 
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ae4a41ca19f6..27de55699f24 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -72,6 +72,7 @@ 
 #include <asm/mwait.h>
 #include <asm/pci_x86.h>
 #include <asm/cpu.h>
+#include <asm/kasan.h>
 
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
@@ -1231,6 +1232,8 @@  asmlinkage __visible void __init xen_start_kernel(void)
 	/* Get mfn list */
 	xen_build_dynamic_phys_to_machine();
 
+	kasan_early_init();
+
 	/*
 	 * Set up kernel GDT and segment registers, mainly so that
 	 * -fstack-protector code can be executed.
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index c8dbee62ec2a..eaf63f1f26af 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1079,7 +1079,7 @@  static void xen_exit_mmap(struct mm_struct *mm)
 
 static void xen_post_allocator_init(void);
 
-static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
 {
 	struct mmuext_op op;
 
@@ -1767,7 +1767,7 @@  static void __init set_page_prot_flags(void *addr, pgprot_t prot,
 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
 		BUG();
 }
-static void __init set_page_prot(void *addr, pgprot_t prot)
+void __init set_page_prot(void *addr, pgprot_t prot)
 {
 	return set_page_prot_flags(addr, prot, UVMF_NONE);
 }
@@ -1943,6 +1943,15 @@  void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	if (i && i < pgd_index(__START_KERNEL_map))
 		init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
 
+#ifdef CONFIG_KASAN
+	/*
+	 * Copy KASAN mappings
+	 * ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
+	 */
+	for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++)
+		init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
+#endif
+
 	/* Make pagetable pieces RO */
 	set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 07054572297f..5e4729efbbe2 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -99,6 +99,15 @@  void xen_mc_flush(void)
 				ret++;
 	}
 
+	/*
+	 * XXX: Kasan produces quite a lot (~2000) of warnings in a form of:
+	 *
+	 *     (XEN) mm.c:3222:d155v0 mfn 3704b already pinned
+	 *
+	 * during kasan_init(). They are benign, but silence them for now.
+	 * Otherwise, booting takes too long due to printk() spam.
+	 */
+#ifndef CONFIG_KASAN
 	if (WARN_ON(ret)) {
 		pr_err("%d of %d multicall(s) failed: cpu %d\n",
 		       ret, b->mcidx, smp_processor_id());
@@ -121,6 +130,7 @@  void xen_mc_flush(void)
 			}
 		}
 	}
+#endif /* CONFIG_KASAN */
 
 	b->mcidx = 0;
 	b->argidx = 0;
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 0c4efa6fe450..1e9e1e41c0a8 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,4 +1,6 @@ 
 # SPDX-License-Identifier: GPL-2.0
+KASAN_SANITIZE_features.o := n
+
 obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
 obj-y	+= grant-table.o features.o balloon.o manage.o preempt.o time.o
 obj-y	+= mem-reservation.o
diff --git a/kernel/Makefile b/kernel/Makefile
index f2cc0d118a0b..1da6fd93c00c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -12,6 +12,8 @@  obj-y     = fork.o exec_domain.o panic.o \
 	    notifier.o ksysfs.o cred.o reboot.o \
 	    async.o range.o smpboot.o ucount.o
 
+KASAN_SANITIZE_cpu.o := n
+
 obj-$(CONFIG_MODULES) += kmod.o
 obj-$(CONFIG_MULTIUSER) += groups.o
 
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 81f5464ea9e1..429a638625ea 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -98,6 +98,7 @@  config KASAN_OUTLINE
 
 config KASAN_INLINE
 	bool "Inline instrumentation"
+	depends on !XEN_PV
 	help
 	  Compiler directly inserts code checking shadow memory before
 	  memory accesses. This is faster than outline (in some workloads
@@ -147,7 +148,7 @@  config KASAN_SW_TAGS_IDENTIFY
 
 config KASAN_VMALLOC
 	bool "Back mappings in vmalloc space with real shadow memory"
-	depends on KASAN && HAVE_ARCH_KASAN_VMALLOC
+	depends on KASAN && HAVE_ARCH_KASAN_VMALLOC && !XEN_PV
 	help
 	  By default, the shadow region for vmalloc space is the read-only
 	  zero page. This means that KASAN cannot detect errors involving