Message ID | 20191217140804.27364-2-sergey.dyasli@citrix.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | basic KASAN support for Xen PV domains | expand |
On 17.12.19 15:08, Sergey Dyasli wrote: > This enables to use Outline instrumentation for Xen PV kernels. > > KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes > and hence disabled. > > Rough edges in the patch are marked with XXX. > > Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com> > --- > arch/x86/mm/init.c | 14 ++++++++++++++ > arch/x86/mm/kasan_init_64.c | 28 ++++++++++++++++++++++++++++ > arch/x86/xen/Makefile | 7 +++++++ > arch/x86/xen/enlighten_pv.c | 3 +++ > arch/x86/xen/mmu_pv.c | 13 +++++++++++-- > arch/x86/xen/multicalls.c | 10 ++++++++++ > drivers/xen/Makefile | 2 ++ > kernel/Makefile | 2 ++ > lib/Kconfig.kasan | 3 ++- > 9 files changed, 79 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c > index e7bb483557c9..0c98a45eec6c 100644 > --- a/arch/x86/mm/init.c > +++ b/arch/x86/mm/init.c > @@ -8,6 +8,8 @@ > #include <linux/kmemleak.h> > #include <linux/sched/task.h> > > +#include <xen/xen.h> > + > #include <asm/set_memory.h> > #include <asm/e820/api.h> > #include <asm/init.h> > @@ -835,6 +837,18 @@ void free_kernel_image_pages(const char *what, void *begin, void *end) > unsigned long end_ul = (unsigned long)end; > unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT; > > + /* > + * XXX: skip this for now. Otherwise it leads to: > + * > + * (XEN) mm.c:2713:d157v0 Bad type (saw 8c00000000000001 != exp e000000000000000) for mfn 36f40 (pfn 02f40) > + * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page > + * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 entry 8010000036f40067 for l1e_owner d157, pg_owner d157 > + * > + * and further #PF error: [PROT] [WRITE] in the kernel. > + */ > + if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN)) > + return; > + I guess this is related to freeing some kasan page tables without unpinning them? > free_init_pages(what, begin_ul, end_ul); > > /* > diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c > index cf5bc37c90ac..caee2022f8b0 100644 > --- a/arch/x86/mm/kasan_init_64.c > +++ b/arch/x86/mm/kasan_init_64.c > @@ -13,6 +13,8 @@ > #include <linux/sched/task.h> > #include <linux/vmalloc.h> > > +#include <xen/xen.h> > + > #include <asm/e820/types.h> > #include <asm/pgalloc.h> > #include <asm/tlbflush.h> > @@ -20,6 +22,9 @@ > #include <asm/pgtable.h> > #include <asm/cpu_entry_area.h> > > +#include <xen/interface/xen.h> > +#include <asm/xen/hypervisor.h> > + > extern struct range pfn_mapped[E820_MAX_ENTRIES]; > > static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); > @@ -305,6 +310,12 @@ static struct notifier_block kasan_die_notifier = { > }; > #endif > > +#ifdef CONFIG_XEN > +/* XXX: this should go to some header */ > +void __init set_page_prot(void *addr, pgprot_t prot); > +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn); > +#endif > + Instead of exporting those, why don't you ... > void __init kasan_early_init(void) > { > int i; > @@ -332,6 +343,16 @@ void __init kasan_early_init(void) > for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++) > kasan_early_shadow_p4d[i] = __p4d(p4d_val); > > + if (xen_pv_domain()) { > + /* PV page tables must have PAGE_KERNEL_RO */ > + set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO); > + set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO); > + set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO); add a function doing that to mmu_pv.c (e.g. xen_pv_kasan_early_init())? > + > + /* Add mappings to the initial PV page tables */ > + kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base); > + } > + > kasan_map_early_shadow(early_top_pgt); > kasan_map_early_shadow(init_top_pgt); > } > @@ -369,6 +390,13 @@ void __init kasan_init(void) > __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE)); > } > > + if (xen_pv_domain()) { > + /* PV page tables must be pinned */ > + set_page_prot(early_top_pgt, PAGE_KERNEL_RO); > + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, > + PFN_DOWN(__pa_symbol(early_top_pgt))); and another one like xen_pv_kasan_init() here. > + } > + > load_cr3(early_top_pgt); > __flush_tlb_all(); > > diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile > index 084de77a109e..102fad0b0bca 100644 > --- a/arch/x86/xen/Makefile > +++ b/arch/x86/xen/Makefile > @@ -1,3 +1,10 @@ > +KASAN_SANITIZE_enlighten_pv.o := n > +KASAN_SANITIZE_enlighten.o := n > +KASAN_SANITIZE_irq.o := n > +KASAN_SANITIZE_mmu_pv.o := n > +KASAN_SANITIZE_p2m.o := n > +KASAN_SANITIZE_multicalls.o := n > + > # SPDX-License-Identifier: GPL-2.0 > OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y > > diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c > index ae4a41ca19f6..27de55699f24 100644 > --- a/arch/x86/xen/enlighten_pv.c > +++ b/arch/x86/xen/enlighten_pv.c > @@ -72,6 +72,7 @@ > #include <asm/mwait.h> > #include <asm/pci_x86.h> > #include <asm/cpu.h> > +#include <asm/kasan.h> > > #ifdef CONFIG_ACPI > #include <linux/acpi.h> > @@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void) > /* Get mfn list */ > xen_build_dynamic_phys_to_machine(); > > + kasan_early_init(); > + > /* > * Set up kernel GDT and segment registers, mainly so that > * -fstack-protector code can be executed. > diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c > index c8dbee62ec2a..eaf63f1f26af 100644 > --- a/arch/x86/xen/mmu_pv.c > +++ b/arch/x86/xen/mmu_pv.c > @@ -1079,7 +1079,7 @@ static void xen_exit_mmap(struct mm_struct *mm) > > static void xen_post_allocator_init(void); > > -static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) > +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) > { > struct mmuext_op op; > > @@ -1767,7 +1767,7 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot, > if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) > BUG(); > } > -static void __init set_page_prot(void *addr, pgprot_t prot) > +void __init set_page_prot(void *addr, pgprot_t prot) > { > return set_page_prot_flags(addr, prot, UVMF_NONE); > } > @@ -1943,6 +1943,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) > if (i && i < pgd_index(__START_KERNEL_map)) > init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; > > +#ifdef CONFIG_KASAN > + /* > + * Copy KASAN mappings > + * ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) > + */ > + for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++) > + init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; > +#endif > + > /* Make pagetable pieces RO */ > set_page_prot(init_top_pgt, PAGE_KERNEL_RO); > set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); > diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c > index 07054572297f..5e4729efbbe2 100644 > --- a/arch/x86/xen/multicalls.c > +++ b/arch/x86/xen/multicalls.c > @@ -99,6 +99,15 @@ void xen_mc_flush(void) > ret++; > } > > + /* > + * XXX: Kasan produces quite a lot (~2000) of warnings in a form of: > + * > + * (XEN) mm.c:3222:d155v0 mfn 3704b already pinned > + * > + * during kasan_init(). They are benign, but silence them for now. > + * Otherwise, booting takes too long due to printk() spam. > + */ > +#ifndef CONFIG_KASAN It might be interesting to identify the problematic page tables. I guess this would require some hacking to avoid the multicalls in order to identify which page table should not be pinned again. Juergen
On 18/12/2019 09:24, Jürgen Groß wrote: > On 17.12.19 15:08, Sergey Dyasli wrote: >> This enables to use Outline instrumentation for Xen PV kernels. >> >> KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes >> and hence disabled. >> >> Rough edges in the patch are marked with XXX. >> >> Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com> >> --- >> arch/x86/mm/init.c | 14 ++++++++++++++ >> arch/x86/mm/kasan_init_64.c | 28 ++++++++++++++++++++++++++++ >> arch/x86/xen/Makefile | 7 +++++++ >> arch/x86/xen/enlighten_pv.c | 3 +++ >> arch/x86/xen/mmu_pv.c | 13 +++++++++++-- >> arch/x86/xen/multicalls.c | 10 ++++++++++ >> drivers/xen/Makefile | 2 ++ >> kernel/Makefile | 2 ++ >> lib/Kconfig.kasan | 3 ++- >> 9 files changed, 79 insertions(+), 3 deletions(-) >> >> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c >> index e7bb483557c9..0c98a45eec6c 100644 >> --- a/arch/x86/mm/init.c >> +++ b/arch/x86/mm/init.c >> @@ -8,6 +8,8 @@ >> #include <linux/kmemleak.h> >> #include <linux/sched/task.h> >> +#include <xen/xen.h> >> + >> #include <asm/set_memory.h> >> #include <asm/e820/api.h> >> #include <asm/init.h> >> @@ -835,6 +837,18 @@ void free_kernel_image_pages(const char *what, void *begin, void *end) >> unsigned long end_ul = (unsigned long)end; >> unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT; >> + /* >> + * XXX: skip this for now. Otherwise it leads to: >> + * >> + * (XEN) mm.c:2713:d157v0 Bad type (saw 8c00000000000001 != exp e000000000000000) for mfn 36f40 (pfn 02f40) >> + * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page >> + * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 entry 8010000036f40067 for l1e_owner d157, pg_owner d157 >> + * >> + * and further #PF error: [PROT] [WRITE] in the kernel. >> + */ >> + if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN)) >> + return; >> + > > I guess this is related to freeing some kasan page tables without > unpinning them? Your guess was correct. Turned out that early_top_pgt which I pinned and made RO is located in .init section and that was causing issues. Unpinning it and making RW again right after kasan_init() switches to use init_top_pgt seem to fix this issue. > >> free_init_pages(what, begin_ul, end_ul); >> /* >> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c >> index cf5bc37c90ac..caee2022f8b0 100644 >> --- a/arch/x86/mm/kasan_init_64.c >> +++ b/arch/x86/mm/kasan_init_64.c >> @@ -13,6 +13,8 @@ >> #include <linux/sched/task.h> >> #include <linux/vmalloc.h> >> +#include <xen/xen.h> >> + >> #include <asm/e820/types.h> >> #include <asm/pgalloc.h> >> #include <asm/tlbflush.h> >> @@ -20,6 +22,9 @@ >> #include <asm/pgtable.h> >> #include <asm/cpu_entry_area.h> >> +#include <xen/interface/xen.h> >> +#include <asm/xen/hypervisor.h> >> + >> extern struct range pfn_mapped[E820_MAX_ENTRIES]; >> static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); >> @@ -305,6 +310,12 @@ static struct notifier_block kasan_die_notifier = { >> }; >> #endif >> +#ifdef CONFIG_XEN >> +/* XXX: this should go to some header */ >> +void __init set_page_prot(void *addr, pgprot_t prot); >> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn); >> +#endif >> + > > Instead of exporting those, why don't you ... > >> void __init kasan_early_init(void) >> { >> int i; >> @@ -332,6 +343,16 @@ void __init kasan_early_init(void) >> for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++) >> kasan_early_shadow_p4d[i] = __p4d(p4d_val); >> + if (xen_pv_domain()) { >> + /* PV page tables must have PAGE_KERNEL_RO */ >> + set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO); >> + set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO); >> + set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO); > > add a function doing that to mmu_pv.c (e.g. xen_pv_kasan_early_init())? Sounds like a good suggestion, but new functions still need some header for declarations (xen/xen.h?). And kasan_map_early_shadow() will need exporting through kasan.h as well, but that's probably not an issue. > >> + >> + /* Add mappings to the initial PV page tables */ >> + kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base); >> + } >> + >> kasan_map_early_shadow(early_top_pgt); >> kasan_map_early_shadow(init_top_pgt); >> } >> @@ -369,6 +390,13 @@ void __init kasan_init(void) >> __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE)); >> } >> + if (xen_pv_domain()) { >> + /* PV page tables must be pinned */ >> + set_page_prot(early_top_pgt, PAGE_KERNEL_RO); >> + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, >> + PFN_DOWN(__pa_symbol(early_top_pgt))); > > and another one like xen_pv_kasan_init() here. Now there needs to be a 3rd function to unpin early_top_pgt. > >> + } >> + >> load_cr3(early_top_pgt); >> __flush_tlb_all(); >> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile >> index 084de77a109e..102fad0b0bca 100644 >> --- a/arch/x86/xen/Makefile >> +++ b/arch/x86/xen/Makefile >> @@ -1,3 +1,10 @@ >> +KASAN_SANITIZE_enlighten_pv.o := n >> +KASAN_SANITIZE_enlighten.o := n >> +KASAN_SANITIZE_irq.o := n >> +KASAN_SANITIZE_mmu_pv.o := n >> +KASAN_SANITIZE_p2m.o := n >> +KASAN_SANITIZE_multicalls.o := n >> + >> # SPDX-License-Identifier: GPL-2.0 >> OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y >> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c >> index ae4a41ca19f6..27de55699f24 100644 >> --- a/arch/x86/xen/enlighten_pv.c >> +++ b/arch/x86/xen/enlighten_pv.c >> @@ -72,6 +72,7 @@ >> #include <asm/mwait.h> >> #include <asm/pci_x86.h> >> #include <asm/cpu.h> >> +#include <asm/kasan.h> >> #ifdef CONFIG_ACPI >> #include <linux/acpi.h> >> @@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void) >> /* Get mfn list */ >> xen_build_dynamic_phys_to_machine(); >> + kasan_early_init(); >> + >> /* >> * Set up kernel GDT and segment registers, mainly so that >> * -fstack-protector code can be executed. >> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c >> index c8dbee62ec2a..eaf63f1f26af 100644 >> --- a/arch/x86/xen/mmu_pv.c >> +++ b/arch/x86/xen/mmu_pv.c >> @@ -1079,7 +1079,7 @@ static void xen_exit_mmap(struct mm_struct *mm) >> static void xen_post_allocator_init(void); >> -static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) >> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) >> { >> struct mmuext_op op; >> @@ -1767,7 +1767,7 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot, >> if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) >> BUG(); >> } >> -static void __init set_page_prot(void *addr, pgprot_t prot) >> +void __init set_page_prot(void *addr, pgprot_t prot) >> { >> return set_page_prot_flags(addr, prot, UVMF_NONE); >> } >> @@ -1943,6 +1943,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) >> if (i && i < pgd_index(__START_KERNEL_map)) >> init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; >> +#ifdef CONFIG_KASAN >> + /* >> + * Copy KASAN mappings >> + * ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) >> + */ >> + for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++) >> + init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; >> +#endif >> + >> /* Make pagetable pieces RO */ >> set_page_prot(init_top_pgt, PAGE_KERNEL_RO); >> set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); >> diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c >> index 07054572297f..5e4729efbbe2 100644 >> --- a/arch/x86/xen/multicalls.c >> +++ b/arch/x86/xen/multicalls.c >> @@ -99,6 +99,15 @@ void xen_mc_flush(void) >> ret++; >> } >> + /* >> + * XXX: Kasan produces quite a lot (~2000) of warnings in a form of: >> + * >> + * (XEN) mm.c:3222:d155v0 mfn 3704b already pinned >> + * >> + * during kasan_init(). They are benign, but silence them for now. >> + * Otherwise, booting takes too long due to printk() spam. >> + */ >> +#ifndef CONFIG_KASAN > > It might be interesting to identify the problematic page tables. > > I guess this would require some hacking to avoid the multicalls in order > to identify which page table should not be pinned again. I tracked this down to xen_alloc_ptpage() in mmu_pv.c: if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); kasan_populate_early_shadow() is doing lots pmd_populate_kernel() with kasan_early_shadow_pte (mfn of which is reported by Xen). Currently I'm not sure how to fix that. Is it possible to check that pfn has already been pinned from Linux kernel? xen_page_pinned() seems to be an incorrect way to check that. -- Thanks, Sergey
On 19.12.19 17:42, Sergey Dyasli wrote: > On 18/12/2019 09:24, Jürgen Groß wrote: >> On 17.12.19 15:08, Sergey Dyasli wrote: >>> This enables to use Outline instrumentation for Xen PV kernels. >>> >>> KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes >>> and hence disabled. >>> >>> Rough edges in the patch are marked with XXX. >>> >>> Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com> >>> --- >>> arch/x86/mm/init.c | 14 ++++++++++++++ >>> arch/x86/mm/kasan_init_64.c | 28 ++++++++++++++++++++++++++++ >>> arch/x86/xen/Makefile | 7 +++++++ >>> arch/x86/xen/enlighten_pv.c | 3 +++ >>> arch/x86/xen/mmu_pv.c | 13 +++++++++++-- >>> arch/x86/xen/multicalls.c | 10 ++++++++++ >>> drivers/xen/Makefile | 2 ++ >>> kernel/Makefile | 2 ++ >>> lib/Kconfig.kasan | 3 ++- >>> 9 files changed, 79 insertions(+), 3 deletions(-) >>> >>> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c >>> index e7bb483557c9..0c98a45eec6c 100644 >>> --- a/arch/x86/mm/init.c >>> +++ b/arch/x86/mm/init.c >>> @@ -8,6 +8,8 @@ >>> #include <linux/kmemleak.h> >>> #include <linux/sched/task.h> >>> +#include <xen/xen.h> >>> + >>> #include <asm/set_memory.h> >>> #include <asm/e820/api.h> >>> #include <asm/init.h> >>> @@ -835,6 +837,18 @@ void free_kernel_image_pages(const char *what, void *begin, void *end) >>> unsigned long end_ul = (unsigned long)end; >>> unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT; >>> + /* >>> + * XXX: skip this for now. Otherwise it leads to: >>> + * >>> + * (XEN) mm.c:2713:d157v0 Bad type (saw 8c00000000000001 != exp e000000000000000) for mfn 36f40 (pfn 02f40) >>> + * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page >>> + * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 entry 8010000036f40067 for l1e_owner d157, pg_owner d157 >>> + * >>> + * and further #PF error: [PROT] [WRITE] in the kernel. >>> + */ >>> + if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN)) >>> + return; >>> + >> >> I guess this is related to freeing some kasan page tables without >> unpinning them? > > Your guess was correct. Turned out that early_top_pgt which I pinned and made RO > is located in .init section and that was causing issues. Unpinning it and making > RW again right after kasan_init() switches to use init_top_pgt seem to fix this > issue. > >> >>> free_init_pages(what, begin_ul, end_ul); >>> /* >>> diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c >>> index cf5bc37c90ac..caee2022f8b0 100644 >>> --- a/arch/x86/mm/kasan_init_64.c >>> +++ b/arch/x86/mm/kasan_init_64.c >>> @@ -13,6 +13,8 @@ >>> #include <linux/sched/task.h> >>> #include <linux/vmalloc.h> >>> +#include <xen/xen.h> >>> + >>> #include <asm/e820/types.h> >>> #include <asm/pgalloc.h> >>> #include <asm/tlbflush.h> >>> @@ -20,6 +22,9 @@ >>> #include <asm/pgtable.h> >>> #include <asm/cpu_entry_area.h> >>> +#include <xen/interface/xen.h> >>> +#include <asm/xen/hypervisor.h> >>> + >>> extern struct range pfn_mapped[E820_MAX_ENTRIES]; >>> static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); >>> @@ -305,6 +310,12 @@ static struct notifier_block kasan_die_notifier = { >>> }; >>> #endif >>> +#ifdef CONFIG_XEN >>> +/* XXX: this should go to some header */ >>> +void __init set_page_prot(void *addr, pgprot_t prot); >>> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn); >>> +#endif >>> + >> >> Instead of exporting those, why don't you ... >> >>> void __init kasan_early_init(void) >>> { >>> int i; >>> @@ -332,6 +343,16 @@ void __init kasan_early_init(void) >>> for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++) >>> kasan_early_shadow_p4d[i] = __p4d(p4d_val); >>> + if (xen_pv_domain()) { >>> + /* PV page tables must have PAGE_KERNEL_RO */ >>> + set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO); >>> + set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO); >>> + set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO); >> >> add a function doing that to mmu_pv.c (e.g. xen_pv_kasan_early_init())? > > Sounds like a good suggestion, but new functions still need some header for > declarations (xen/xen.h?). And kasan_map_early_shadow() will need exporting xen/xen-ops.h > through kasan.h as well, but that's probably not an issue. You could let the new function return (pgd_t *)xen_start_info->pt_base and use that here, e.g.: if (xen_pv_domain()) { pgd_t *pgd; pgd = xen_kasan_early_init(); kasan_map_early_shadow(pgd); } > >> >>> + >>> + /* Add mappings to the initial PV page tables */ >>> + kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base); >>> + } >>> + >>> kasan_map_early_shadow(early_top_pgt); >>> kasan_map_early_shadow(init_top_pgt); >>> } >>> @@ -369,6 +390,13 @@ void __init kasan_init(void) >>> __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE)); >>> } >>> + if (xen_pv_domain()) { >>> + /* PV page tables must be pinned */ >>> + set_page_prot(early_top_pgt, PAGE_KERNEL_RO); >>> + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, >>> + PFN_DOWN(__pa_symbol(early_top_pgt))); >> >> and another one like xen_pv_kasan_init() here. > > Now there needs to be a 3rd function to unpin early_top_pgt. Not if you do the load_cr3 in the xen pv case in the new function: if (xen_pv_domain()) xen_kasan_load_cr3(early_top_pgt); else load_cr3(early_top_pgt); > >> >>> + } >>> + >>> load_cr3(early_top_pgt); >>> __flush_tlb_all(); >>> diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile >>> index 084de77a109e..102fad0b0bca 100644 >>> --- a/arch/x86/xen/Makefile >>> +++ b/arch/x86/xen/Makefile >>> @@ -1,3 +1,10 @@ >>> +KASAN_SANITIZE_enlighten_pv.o := n >>> +KASAN_SANITIZE_enlighten.o := n >>> +KASAN_SANITIZE_irq.o := n >>> +KASAN_SANITIZE_mmu_pv.o := n >>> +KASAN_SANITIZE_p2m.o := n >>> +KASAN_SANITIZE_multicalls.o := n >>> + >>> # SPDX-License-Identifier: GPL-2.0 >>> OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y >>> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c >>> index ae4a41ca19f6..27de55699f24 100644 >>> --- a/arch/x86/xen/enlighten_pv.c >>> +++ b/arch/x86/xen/enlighten_pv.c >>> @@ -72,6 +72,7 @@ >>> #include <asm/mwait.h> >>> #include <asm/pci_x86.h> >>> #include <asm/cpu.h> >>> +#include <asm/kasan.h> >>> #ifdef CONFIG_ACPI >>> #include <linux/acpi.h> >>> @@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void) >>> /* Get mfn list */ >>> xen_build_dynamic_phys_to_machine(); >>> + kasan_early_init(); >>> + >>> /* >>> * Set up kernel GDT and segment registers, mainly so that >>> * -fstack-protector code can be executed. >>> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c >>> index c8dbee62ec2a..eaf63f1f26af 100644 >>> --- a/arch/x86/xen/mmu_pv.c >>> +++ b/arch/x86/xen/mmu_pv.c >>> @@ -1079,7 +1079,7 @@ static void xen_exit_mmap(struct mm_struct *mm) >>> static void xen_post_allocator_init(void); >>> -static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) >>> +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) >>> { >>> struct mmuext_op op; >>> @@ -1767,7 +1767,7 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot, >>> if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) >>> BUG(); >>> } >>> -static void __init set_page_prot(void *addr, pgprot_t prot) >>> +void __init set_page_prot(void *addr, pgprot_t prot) >>> { >>> return set_page_prot_flags(addr, prot, UVMF_NONE); >>> } >>> @@ -1943,6 +1943,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) >>> if (i && i < pgd_index(__START_KERNEL_map)) >>> init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; >>> +#ifdef CONFIG_KASAN >>> + /* >>> + * Copy KASAN mappings >>> + * ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) >>> + */ >>> + for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++) >>> + init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; >>> +#endif >>> + >>> /* Make pagetable pieces RO */ >>> set_page_prot(init_top_pgt, PAGE_KERNEL_RO); >>> set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); >>> diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c >>> index 07054572297f..5e4729efbbe2 100644 >>> --- a/arch/x86/xen/multicalls.c >>> +++ b/arch/x86/xen/multicalls.c >>> @@ -99,6 +99,15 @@ void xen_mc_flush(void) >>> ret++; >>> } >>> + /* >>> + * XXX: Kasan produces quite a lot (~2000) of warnings in a form of: >>> + * >>> + * (XEN) mm.c:3222:d155v0 mfn 3704b already pinned >>> + * >>> + * during kasan_init(). They are benign, but silence them for now. >>> + * Otherwise, booting takes too long due to printk() spam. >>> + */ >>> +#ifndef CONFIG_KASAN >> >> It might be interesting to identify the problematic page tables. >> >> I guess this would require some hacking to avoid the multicalls in order >> to identify which page table should not be pinned again. > > I tracked this down to xen_alloc_ptpage() in mmu_pv.c: > > if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) > __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); > > kasan_populate_early_shadow() is doing lots pmd_populate_kernel() with > kasan_early_shadow_pte (mfn of which is reported by Xen). Currently I'm not > sure how to fix that. Is it possible to check that pfn has already been pinned > from Linux kernel? xen_page_pinned() seems to be an incorrect way to check that. Right, xen_page_pinned() is not yet working at this stage of booting. But using pmd_populate_kernel() with the same page table multiple times is just wrong. Doing so the first time is fine, all the other cases should just use set_pmd(). Juergen
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index e7bb483557c9..0c98a45eec6c 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -8,6 +8,8 @@ #include <linux/kmemleak.h> #include <linux/sched/task.h> +#include <xen/xen.h> + #include <asm/set_memory.h> #include <asm/e820/api.h> #include <asm/init.h> @@ -835,6 +837,18 @@ void free_kernel_image_pages(const char *what, void *begin, void *end) unsigned long end_ul = (unsigned long)end; unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT; + /* + * XXX: skip this for now. Otherwise it leads to: + * + * (XEN) mm.c:2713:d157v0 Bad type (saw 8c00000000000001 != exp e000000000000000) for mfn 36f40 (pfn 02f40) + * (XEN) mm.c:1043:d157v0 Could not get page type PGT_writable_page + * (XEN) mm.c:1096:d157v0 Error getting mfn 36f40 (pfn 02f40) from L1 entry 8010000036f40067 for l1e_owner d157, pg_owner d157 + * + * and further #PF error: [PROT] [WRITE] in the kernel. + */ + if (xen_pv_domain() && IS_ENABLED(CONFIG_KASAN)) + return; + free_init_pages(what, begin_ul, end_ul); /* diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index cf5bc37c90ac..caee2022f8b0 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -13,6 +13,8 @@ #include <linux/sched/task.h> #include <linux/vmalloc.h> +#include <xen/xen.h> + #include <asm/e820/types.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> @@ -20,6 +22,9 @@ #include <asm/pgtable.h> #include <asm/cpu_entry_area.h> +#include <xen/interface/xen.h> +#include <asm/xen/hypervisor.h> + extern struct range pfn_mapped[E820_MAX_ENTRIES]; static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); @@ -305,6 +310,12 @@ static struct notifier_block kasan_die_notifier = { }; #endif +#ifdef CONFIG_XEN +/* XXX: this should go to some header */ +void __init set_page_prot(void *addr, pgprot_t prot); +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn); +#endif + void __init kasan_early_init(void) { int i; @@ -332,6 +343,16 @@ void __init kasan_early_init(void) for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++) kasan_early_shadow_p4d[i] = __p4d(p4d_val); + if (xen_pv_domain()) { + /* PV page tables must have PAGE_KERNEL_RO */ + set_page_prot(kasan_early_shadow_pud, PAGE_KERNEL_RO); + set_page_prot(kasan_early_shadow_pmd, PAGE_KERNEL_RO); + set_page_prot(kasan_early_shadow_pte, PAGE_KERNEL_RO); + + /* Add mappings to the initial PV page tables */ + kasan_map_early_shadow((pgd_t *)xen_start_info->pt_base); + } + kasan_map_early_shadow(early_top_pgt); kasan_map_early_shadow(init_top_pgt); } @@ -369,6 +390,13 @@ void __init kasan_init(void) __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE)); } + if (xen_pv_domain()) { + /* PV page tables must be pinned */ + set_page_prot(early_top_pgt, PAGE_KERNEL_RO); + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, + PFN_DOWN(__pa_symbol(early_top_pgt))); + } + load_cr3(early_top_pgt); __flush_tlb_all(); diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 084de77a109e..102fad0b0bca 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,3 +1,10 @@ +KASAN_SANITIZE_enlighten_pv.o := n +KASAN_SANITIZE_enlighten.o := n +KASAN_SANITIZE_irq.o := n +KASAN_SANITIZE_mmu_pv.o := n +KASAN_SANITIZE_p2m.o := n +KASAN_SANITIZE_multicalls.o := n + # SPDX-License-Identifier: GPL-2.0 OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index ae4a41ca19f6..27de55699f24 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -72,6 +72,7 @@ #include <asm/mwait.h> #include <asm/pci_x86.h> #include <asm/cpu.h> +#include <asm/kasan.h> #ifdef CONFIG_ACPI #include <linux/acpi.h> @@ -1231,6 +1232,8 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Get mfn list */ xen_build_dynamic_phys_to_machine(); + kasan_early_init(); + /* * Set up kernel GDT and segment registers, mainly so that * -fstack-protector code can be executed. diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index c8dbee62ec2a..eaf63f1f26af 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1079,7 +1079,7 @@ static void xen_exit_mmap(struct mm_struct *mm) static void xen_post_allocator_init(void); -static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) +void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) { struct mmuext_op op; @@ -1767,7 +1767,7 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot, if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) BUG(); } -static void __init set_page_prot(void *addr, pgprot_t prot) +void __init set_page_prot(void *addr, pgprot_t prot) { return set_page_prot_flags(addr, prot, UVMF_NONE); } @@ -1943,6 +1943,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) if (i && i < pgd_index(__START_KERNEL_map)) init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; +#ifdef CONFIG_KASAN + /* + * Copy KASAN mappings + * ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) + */ + for (i = 0xec0 >> 3; i < 0xfc0 >> 3; i++) + init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i]; +#endif + /* Make pagetable pieces RO */ set_page_prot(init_top_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 07054572297f..5e4729efbbe2 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -99,6 +99,15 @@ void xen_mc_flush(void) ret++; } + /* + * XXX: Kasan produces quite a lot (~2000) of warnings in a form of: + * + * (XEN) mm.c:3222:d155v0 mfn 3704b already pinned + * + * during kasan_init(). They are benign, but silence them for now. + * Otherwise, booting takes too long due to printk() spam. + */ +#ifndef CONFIG_KASAN if (WARN_ON(ret)) { pr_err("%d of %d multicall(s) failed: cpu %d\n", ret, b->mcidx, smp_processor_id()); @@ -121,6 +130,7 @@ void xen_mc_flush(void) } } } +#endif /* CONFIG_KASAN */ b->mcidx = 0; b->argidx = 0; diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 0c4efa6fe450..1e9e1e41c0a8 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +KASAN_SANITIZE_features.o := n + obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-y += grant-table.o features.o balloon.o manage.o preempt.o time.o obj-y += mem-reservation.o diff --git a/kernel/Makefile b/kernel/Makefile index f2cc0d118a0b..1da6fd93c00c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -12,6 +12,8 @@ obj-y = fork.o exec_domain.o panic.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o smpboot.o ucount.o +KASAN_SANITIZE_cpu.o := n + obj-$(CONFIG_MODULES) += kmod.o obj-$(CONFIG_MULTIUSER) += groups.o diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 81f5464ea9e1..429a638625ea 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -98,6 +98,7 @@ config KASAN_OUTLINE config KASAN_INLINE bool "Inline instrumentation" + depends on !XEN_PV help Compiler directly inserts code checking shadow memory before memory accesses. This is faster than outline (in some workloads @@ -147,7 +148,7 @@ config KASAN_SW_TAGS_IDENTIFY config KASAN_VMALLOC bool "Back mappings in vmalloc space with real shadow memory" - depends on KASAN && HAVE_ARCH_KASAN_VMALLOC + depends on KASAN && HAVE_ARCH_KASAN_VMALLOC && !XEN_PV help By default, the shadow region for vmalloc space is the read-only zero page. This means that KASAN cannot detect errors involving
This enables to use Outline instrumentation for Xen PV kernels. KASAN_INLINE and KASAN_VMALLOC options currently lead to boot crashes and hence disabled. Rough edges in the patch are marked with XXX. Signed-off-by: Sergey Dyasli <sergey.dyasli@citrix.com> --- arch/x86/mm/init.c | 14 ++++++++++++++ arch/x86/mm/kasan_init_64.c | 28 ++++++++++++++++++++++++++++ arch/x86/xen/Makefile | 7 +++++++ arch/x86/xen/enlighten_pv.c | 3 +++ arch/x86/xen/mmu_pv.c | 13 +++++++++++-- arch/x86/xen/multicalls.c | 10 ++++++++++ drivers/xen/Makefile | 2 ++ kernel/Makefile | 2 ++ lib/Kconfig.kasan | 3 ++- 9 files changed, 79 insertions(+), 3 deletions(-)