Message ID | 20170216154508.19244.58580.stgit@tlendack-t1.amdoffice.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Feb 16, 2017 at 09:45:09AM -0600, Tom Lendacky wrote: > Boot data (such as EFI related data) is not encrypted when the system is > booted and needs to be mapped decrypted. Add support to apply the proper > attributes to the EFI page tables and to the early_memremap and memremap > APIs to identify the type of data being accessed so that the proper > encryption attribute can be applied. So this doesn't even begin to explain *why* we need this. The emphasis being on *why*. Lemme guess? kexec? And because of efi_reuse_config? If so, then that whole ad-hoc caching in parse_setup_data() needs to go. Especially if efi_reuse_config() already sees those addresses so while we're there, we could save them somewhere or whatnot. But not doing the whole thing again in parse_setup_data(). > Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> > --- > arch/x86/include/asm/io.h | 3 + > arch/x86/include/asm/setup.h | 8 +++ > arch/x86/kernel/setup.c | 33 ++++++++++++ > arch/x86/mm/ioremap.c | 111 ++++++++++++++++++++++++++++++++++++++++ > arch/x86/platform/efi/efi_64.c | 16 ++++-- > kernel/memremap.c | 11 ++++ > mm/early_ioremap.c | 18 +++++- > 7 files changed, 192 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h > index 7afb0e2..833f7cc 100644 > --- a/arch/x86/include/asm/io.h > +++ b/arch/x86/include/asm/io.h > @@ -381,4 +381,7 @@ extern int __must_check arch_phys_wc_add(unsigned long base, > #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc > #endif > > +extern bool arch_memremap_do_ram_remap(resource_size_t offset, size_t size); > +#define arch_memremap_do_ram_remap arch_memremap_do_ram_remap > + > #endif /* _ASM_X86_IO_H */ > diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h > index ac1d5da..99998d9 100644 > --- a/arch/x86/include/asm/setup.h > +++ b/arch/x86/include/asm/setup.h > @@ -63,6 +63,14 @@ static inline void x86_ce4100_early_setup(void) { } > #include <asm/espfix.h> > #include <linux/kernel.h> > > +struct setup_data_attrs { > + u64 paddr; > + unsigned long size; > +}; > + > +extern struct setup_data_attrs setup_data_list[]; > +extern unsigned int setup_data_list_count; > + > /* > * This is set up by the setup-routine at boot-time > */ > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c > index bd5b9a7..d2234bf 100644 > --- a/arch/x86/kernel/setup.c > +++ b/arch/x86/kernel/setup.c > @@ -148,6 +148,9 @@ int default_check_phys_apicid_present(int phys_apicid) > > struct boot_params boot_params; > > +struct setup_data_attrs setup_data_list[32]; > +unsigned int setup_data_list_count; > + > /* > * Machine setup.. > */ > @@ -419,6 +422,32 @@ static void __init reserve_initrd(void) > } > #endif /* CONFIG_BLK_DEV_INITRD */ > > +static void __init update_setup_data_list(u64 pa_data, unsigned long size) > +{ > + unsigned int i; > + > + for (i = 0; i < setup_data_list_count; i++) { > + if (setup_data_list[i].paddr != pa_data) > + continue; > + > + setup_data_list[i].size = size; > + break; > + } > +} > + > +static void __init add_to_setup_data_list(u64 pa_data, unsigned long size) > +{ > + if (!sme_active()) > + return; > + > + if (!WARN(setup_data_list_count == ARRAY_SIZE(setup_data_list), > + "exceeded maximum setup data list slots")) { > + setup_data_list[setup_data_list_count].paddr = pa_data; > + setup_data_list[setup_data_list_count].size = size; > + setup_data_list_count++; > + } > +} > + > static void __init parse_setup_data(void) > { > struct setup_data *data; > @@ -428,12 +457,16 @@ static void __init parse_setup_data(void) > while (pa_data) { > u32 data_len, data_type; > > + add_to_setup_data_list(pa_data, sizeof(*data)); > + > data = early_memremap(pa_data, sizeof(*data)); > data_len = data->len + sizeof(struct setup_data); > data_type = data->type; > pa_next = data->next; > early_memunmap(data, sizeof(*data)); > > + update_setup_data_list(pa_data, data_len); > + > switch (data_type) { > case SETUP_E820_EXT: > e820__memory_setup_extended(pa_data, data_len); > diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c > index 2385e70..b0ff6bc 100644 > --- a/arch/x86/mm/ioremap.c > +++ b/arch/x86/mm/ioremap.c > @@ -13,6 +13,7 @@ > #include <linux/slab.h> > #include <linux/vmalloc.h> > #include <linux/mmiotrace.h> > +#include <linux/efi.h> > > #include <asm/cacheflush.h> > #include <asm/e820/api.h> > @@ -21,6 +22,7 @@ > #include <asm/tlbflush.h> > #include <asm/pgalloc.h> > #include <asm/pat.h> > +#include <asm/setup.h> > > #include "physaddr.h" > > @@ -419,6 +421,115 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) > iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK)); > } > > +/* > + * Examine the physical address to determine if it is boot data. Check > + * it against the boot params structure and EFI tables. > + */ > +static bool memremap_is_setup_data(resource_size_t phys_addr, > + unsigned long size) > +{ > + unsigned int i; > + u64 paddr; > + > + for (i = 0; i < setup_data_list_count; i++) { > + if (phys_addr < setup_data_list[i].paddr) > + continue; > + > + if (phys_addr >= (setup_data_list[i].paddr + > + setup_data_list[i].size)) > + continue; > + > + /* Address is within setup data range */ > + return true; > + } > + > + paddr = boot_params.efi_info.efi_memmap_hi; > + paddr <<= 32; > + paddr |= boot_params.efi_info.efi_memmap; > + if (phys_addr == paddr) > + return true; > + > + paddr = boot_params.efi_info.efi_systab_hi; > + paddr <<= 32; > + paddr |= boot_params.efi_info.efi_systab; > + if (phys_addr == paddr) > + return true; > + > + if (efi_table_address_match(phys_addr)) > + return true; > + > + return false; > +} > + > +/* > + * This function determines if an address should be mapped encrypted. > + * Boot setup data, EFI data and E820 areas are checked in making this > + * determination. > + */ > +static bool memremap_should_map_encrypted(resource_size_t phys_addr, > + unsigned long size) > +{ > + /* > + * SME is not active, return true: > + * - For early_memremap_pgprot_adjust(), returning true or false > + * results in the same protection value > + * - For arch_memremap_do_ram_remap(), returning true will allow > + * the RAM remap to occur instead of falling back to ioremap() > + */ > + if (!sme_active()) > + return true; > + > + /* Check if the address is part of the setup data */ > + if (memremap_is_setup_data(phys_addr, size)) > + return false; > + > + /* Check if the address is part of EFI boot/runtime data */ > + switch (efi_mem_type(phys_addr)) { arch/x86/built-in.o: In function `memremap_should_map_encrypted': /home/boris/kernel/alt-linux/arch/x86/mm/ioremap.c:487: undefined reference to `efi_mem_type' make: *** [vmlinux] Error 1 That's a !CONFIG_EFI .config. > + case EFI_BOOT_SERVICES_DATA: > + case EFI_RUNTIME_SERVICES_DATA: > + return false; > + default: > + break; > + } > + > + /* Check if the address is outside kernel usable area */ > + switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) { > + case E820_TYPE_RESERVED: > + case E820_TYPE_ACPI: > + case E820_TYPE_NVS: > + case E820_TYPE_UNUSABLE: > + return false; > + default: > + break; > + } > + > + return true; > +} > + > +/* > + * Architecure function to determine if RAM remap is allowed. > + */ > +bool arch_memremap_do_ram_remap(resource_size_t phys_addr, unsigned long size) > +{ > + return memremap_should_map_encrypted(phys_addr, size); > +} > + > +/* > + * Architecure override of __weak function to adjust the protection attributes > + * used when remapping memory. > + */ > +pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr, > + unsigned long size, > + pgprot_t prot) > +{ > + if (memremap_should_map_encrypted(phys_addr, size)) > + prot = pgprot_encrypted(prot); > + else > + prot = pgprot_decrypted(prot); > + > + return prot; > +} > + > #ifdef CONFIG_ARCH_USE_MEMREMAP_PROT > /* Remap memory with encryption */ > void __init *early_memremap_encrypted(resource_size_t phys_addr, > diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c > index 2ee7694..2d8674d 100644 > --- a/arch/x86/platform/efi/efi_64.c > +++ b/arch/x86/platform/efi/efi_64.c > @@ -243,7 +243,7 @@ void efi_sync_low_kernel_mappings(void) > > int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) > { > - unsigned long pfn, text; > + unsigned long pfn, text, pf; > struct page *page; > unsigned npages; > pgd_t *pgd; > @@ -251,7 +251,13 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) > if (efi_enabled(EFI_OLD_MEMMAP)) > return 0; > > - efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd); > + /* > + * Since the PGD is encrypted, set the encryption mask so that when > + * this value is loaded into cr3 the PGD will be decrypted during > + * the pagetable walk. > + */ > + efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd); > + > pgd = efi_pgd; > > /* > @@ -261,7 +267,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) > * phys_efi_set_virtual_address_map(). > */ > pfn = pa_memmap >> PAGE_SHIFT; > - if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) { > + pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC; > + if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) { > pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); > return 1; > } > @@ -304,7 +311,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) > text = __pa(_text); > pfn = text >> PAGE_SHIFT; > > - if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) { > + pf = _PAGE_RW | _PAGE_ENC; > + if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) { > pr_err("Failed to map kernel text 1:1\n"); > return 1; > } Those changes should be in a separate patch IMHO.
On 2/21/2017 9:06 AM, Borislav Petkov wrote: > On Thu, Feb 16, 2017 at 09:45:09AM -0600, Tom Lendacky wrote: >> Boot data (such as EFI related data) is not encrypted when the system is >> booted and needs to be mapped decrypted. Add support to apply the proper >> attributes to the EFI page tables and to the early_memremap and memremap >> APIs to identify the type of data being accessed so that the proper >> encryption attribute can be applied. > > So this doesn't even begin to explain *why* we need this. The emphasis > being on *why*. > > Lemme guess? kexec? And because of efi_reuse_config? Hmm... maybe I'm missing something here. This doesn't have anything to do with kexec or efi_reuse_config. This has to do with the fact that when a system boots the setup data and the EFI data are not encrypted. Since it's not encrypted we need to be sure that any early_memremap() and memremap() calls remove the encryption mask from the resulting pagetable entry that is created so the data can be accessed properly. > > If so, then that whole ad-hoc caching in parse_setup_data() needs to go. > Especially if efi_reuse_config() already sees those addresses so while > we're there, we could save them somewhere or whatnot. But not doing the > whole thing again in parse_setup_data(). > >> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> >> --- >> arch/x86/include/asm/io.h | 3 + >> arch/x86/include/asm/setup.h | 8 +++ >> arch/x86/kernel/setup.c | 33 ++++++++++++ >> arch/x86/mm/ioremap.c | 111 ++++++++++++++++++++++++++++++++++++++++ >> arch/x86/platform/efi/efi_64.c | 16 ++++-- >> kernel/memremap.c | 11 ++++ >> mm/early_ioremap.c | 18 +++++- >> 7 files changed, 192 insertions(+), 8 deletions(-) >> >> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h >> index 7afb0e2..833f7cc 100644 >> --- a/arch/x86/include/asm/io.h >> +++ b/arch/x86/include/asm/io.h >> @@ -381,4 +381,7 @@ extern int __must_check arch_phys_wc_add(unsigned long base, >> #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc >> #endif >> >> +extern bool arch_memremap_do_ram_remap(resource_size_t offset, size_t size); >> +#define arch_memremap_do_ram_remap arch_memremap_do_ram_remap >> + >> #endif /* _ASM_X86_IO_H */ >> diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h >> index ac1d5da..99998d9 100644 >> --- a/arch/x86/include/asm/setup.h >> +++ b/arch/x86/include/asm/setup.h >> @@ -63,6 +63,14 @@ static inline void x86_ce4100_early_setup(void) { } >> #include <asm/espfix.h> >> #include <linux/kernel.h> >> >> +struct setup_data_attrs { >> + u64 paddr; >> + unsigned long size; >> +}; >> + >> +extern struct setup_data_attrs setup_data_list[]; >> +extern unsigned int setup_data_list_count; >> + >> /* >> * This is set up by the setup-routine at boot-time >> */ >> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c >> index bd5b9a7..d2234bf 100644 >> --- a/arch/x86/kernel/setup.c >> +++ b/arch/x86/kernel/setup.c >> @@ -148,6 +148,9 @@ int default_check_phys_apicid_present(int phys_apicid) >> >> struct boot_params boot_params; >> >> +struct setup_data_attrs setup_data_list[32]; >> +unsigned int setup_data_list_count; >> + >> /* >> * Machine setup.. >> */ >> @@ -419,6 +422,32 @@ static void __init reserve_initrd(void) >> } >> #endif /* CONFIG_BLK_DEV_INITRD */ >> >> +static void __init update_setup_data_list(u64 pa_data, unsigned long size) >> +{ >> + unsigned int i; >> + >> + for (i = 0; i < setup_data_list_count; i++) { >> + if (setup_data_list[i].paddr != pa_data) >> + continue; >> + >> + setup_data_list[i].size = size; >> + break; >> + } >> +} >> + >> +static void __init add_to_setup_data_list(u64 pa_data, unsigned long size) >> +{ >> + if (!sme_active()) >> + return; >> + >> + if (!WARN(setup_data_list_count == ARRAY_SIZE(setup_data_list), >> + "exceeded maximum setup data list slots")) { >> + setup_data_list[setup_data_list_count].paddr = pa_data; >> + setup_data_list[setup_data_list_count].size = size; >> + setup_data_list_count++; >> + } >> +} >> + >> static void __init parse_setup_data(void) >> { >> struct setup_data *data; >> @@ -428,12 +457,16 @@ static void __init parse_setup_data(void) >> while (pa_data) { >> u32 data_len, data_type; >> >> + add_to_setup_data_list(pa_data, sizeof(*data)); >> + >> data = early_memremap(pa_data, sizeof(*data)); >> data_len = data->len + sizeof(struct setup_data); >> data_type = data->type; >> pa_next = data->next; >> early_memunmap(data, sizeof(*data)); >> >> + update_setup_data_list(pa_data, data_len); >> + >> switch (data_type) { >> case SETUP_E820_EXT: >> e820__memory_setup_extended(pa_data, data_len); >> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c >> index 2385e70..b0ff6bc 100644 >> --- a/arch/x86/mm/ioremap.c >> +++ b/arch/x86/mm/ioremap.c >> @@ -13,6 +13,7 @@ >> #include <linux/slab.h> >> #include <linux/vmalloc.h> >> #include <linux/mmiotrace.h> >> +#include <linux/efi.h> >> >> #include <asm/cacheflush.h> >> #include <asm/e820/api.h> >> @@ -21,6 +22,7 @@ >> #include <asm/tlbflush.h> >> #include <asm/pgalloc.h> >> #include <asm/pat.h> >> +#include <asm/setup.h> >> >> #include "physaddr.h" >> >> @@ -419,6 +421,115 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) >> iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK)); >> } >> >> +/* >> + * Examine the physical address to determine if it is boot data. Check >> + * it against the boot params structure and EFI tables. >> + */ >> +static bool memremap_is_setup_data(resource_size_t phys_addr, >> + unsigned long size) >> +{ >> + unsigned int i; >> + u64 paddr; >> + >> + for (i = 0; i < setup_data_list_count; i++) { >> + if (phys_addr < setup_data_list[i].paddr) >> + continue; >> + >> + if (phys_addr >= (setup_data_list[i].paddr + >> + setup_data_list[i].size)) >> + continue; >> + >> + /* Address is within setup data range */ >> + return true; >> + } >> + >> + paddr = boot_params.efi_info.efi_memmap_hi; >> + paddr <<= 32; >> + paddr |= boot_params.efi_info.efi_memmap; >> + if (phys_addr == paddr) >> + return true; >> + >> + paddr = boot_params.efi_info.efi_systab_hi; >> + paddr <<= 32; >> + paddr |= boot_params.efi_info.efi_systab; >> + if (phys_addr == paddr) >> + return true; >> + >> + if (efi_table_address_match(phys_addr)) >> + return true; >> + >> + return false; >> +} >> + >> +/* >> + * This function determines if an address should be mapped encrypted. >> + * Boot setup data, EFI data and E820 areas are checked in making this >> + * determination. >> + */ >> +static bool memremap_should_map_encrypted(resource_size_t phys_addr, >> + unsigned long size) >> +{ >> + /* >> + * SME is not active, return true: >> + * - For early_memremap_pgprot_adjust(), returning true or false >> + * results in the same protection value >> + * - For arch_memremap_do_ram_remap(), returning true will allow >> + * the RAM remap to occur instead of falling back to ioremap() >> + */ >> + if (!sme_active()) >> + return true; >> + >> + /* Check if the address is part of the setup data */ >> + if (memremap_is_setup_data(phys_addr, size)) >> + return false; >> + >> + /* Check if the address is part of EFI boot/runtime data */ >> + switch (efi_mem_type(phys_addr)) { > > arch/x86/built-in.o: In function `memremap_should_map_encrypted': > /home/boris/kernel/alt-linux/arch/x86/mm/ioremap.c:487: undefined reference to `efi_mem_type' > make: *** [vmlinux] Error 1 > > That's a !CONFIG_EFI .config. Missed that, I'll fix it. > >> + case EFI_BOOT_SERVICES_DATA: >> + case EFI_RUNTIME_SERVICES_DATA: >> + return false; >> + default: >> + break; >> + } >> + >> + /* Check if the address is outside kernel usable area */ >> + switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) { >> + case E820_TYPE_RESERVED: >> + case E820_TYPE_ACPI: >> + case E820_TYPE_NVS: >> + case E820_TYPE_UNUSABLE: >> + return false; >> + default: >> + break; >> + } >> + >> + return true; >> +} >> + >> +/* >> + * Architecure function to determine if RAM remap is allowed. >> + */ >> +bool arch_memremap_do_ram_remap(resource_size_t phys_addr, unsigned long size) >> +{ >> + return memremap_should_map_encrypted(phys_addr, size); >> +} >> + >> +/* >> + * Architecure override of __weak function to adjust the protection attributes >> + * used when remapping memory. >> + */ >> +pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr, >> + unsigned long size, >> + pgprot_t prot) >> +{ >> + if (memremap_should_map_encrypted(phys_addr, size)) >> + prot = pgprot_encrypted(prot); >> + else >> + prot = pgprot_decrypted(prot); >> + >> + return prot; >> +} >> + >> #ifdef CONFIG_ARCH_USE_MEMREMAP_PROT >> /* Remap memory with encryption */ >> void __init *early_memremap_encrypted(resource_size_t phys_addr, >> diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c >> index 2ee7694..2d8674d 100644 >> --- a/arch/x86/platform/efi/efi_64.c >> +++ b/arch/x86/platform/efi/efi_64.c >> @@ -243,7 +243,7 @@ void efi_sync_low_kernel_mappings(void) >> >> int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) >> { >> - unsigned long pfn, text; >> + unsigned long pfn, text, pf; >> struct page *page; >> unsigned npages; >> pgd_t *pgd; >> @@ -251,7 +251,13 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) >> if (efi_enabled(EFI_OLD_MEMMAP)) >> return 0; >> >> - efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd); >> + /* >> + * Since the PGD is encrypted, set the encryption mask so that when >> + * this value is loaded into cr3 the PGD will be decrypted during >> + * the pagetable walk. >> + */ >> + efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd); >> + >> pgd = efi_pgd; >> >> /* >> @@ -261,7 +267,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) >> * phys_efi_set_virtual_address_map(). >> */ >> pfn = pa_memmap >> PAGE_SHIFT; >> - if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) { >> + pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC; >> + if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) { >> pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); >> return 1; >> } >> @@ -304,7 +311,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) >> text = __pa(_text); >> pfn = text >> PAGE_SHIFT; >> >> - if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) { >> + pf = _PAGE_RW | _PAGE_ENC; >> + if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) { >> pr_err("Failed to map kernel text 1:1\n"); >> return 1; >> } > > Those changes should be in a separate patch IMHO. I can break out the mapping changes from the EFI pagetable changes. Thanks, Tom >
On Thu, Feb 23, 2017 at 03:34:30PM -0600, Tom Lendacky wrote: > Hmm... maybe I'm missing something here. This doesn't have anything to > do with kexec or efi_reuse_config. This has to do with the fact that I said kexec because kexec uses the setup_data mechanism to pass config tables to the second kernel, for example. > when a system boots the setup data and the EFI data are not encrypted. > Since it's not encrypted we need to be sure that any early_memremap() > and memremap() calls remove the encryption mask from the resulting > pagetable entry that is created so the data can be accessed properly. Anyway, I'd prefer not to do this ad-hoc caching if it can be helped. You're imposing an arbitrary limit of 32 there which the setup_data linked list doesn't have. So if you really want to go inspect those elements, you could iterate over them starting from boot_params.hdr.setup_data, just like parse_setup_data() does. Most of the time that list should be non-existent and if it is, it will be short anyway. And if we really decide that we need to cache it for later inspection due to speed considerations, as you do in memremap_is_setup_data(), you could do that in the default: branch of parse_setup_data() and do it just once: I don't see why you need to do add_to_setup_data_list() *and* update_setup_data_list() when you could add both pointer and updated size once.
On 2/24/2017 4:21 AM, Borislav Petkov wrote: > On Thu, Feb 23, 2017 at 03:34:30PM -0600, Tom Lendacky wrote: >> Hmm... maybe I'm missing something here. This doesn't have anything to >> do with kexec or efi_reuse_config. This has to do with the fact that > > I said kexec because kexec uses the setup_data mechanism to pass config > tables to the second kernel, for example. > >> when a system boots the setup data and the EFI data are not encrypted. >> Since it's not encrypted we need to be sure that any early_memremap() >> and memremap() calls remove the encryption mask from the resulting >> pagetable entry that is created so the data can be accessed properly. > > Anyway, I'd prefer not to do this ad-hoc caching if it can be > helped. You're imposing an arbitrary limit of 32 there which the > setup_data linked list doesn't have. So if you really want to go > inspect those elements, you could iterate over them starting from > boot_params.hdr.setup_data, just like parse_setup_data() does. Most of > the time that list should be non-existent and if it is, it will be short > anyway. > I looked at doing that but you get into this cyclical situation unless you specifically map each setup data elemement as decrypted. This is ok for early_memremap since we have early_memremap_decrypted() but a new memremap_decrypted() would have to be added. But I was trying to avoid having to do multiple mapping calls inside the current mapping call. I can always look at converting the setup_data_list from an array into a list to eliminate the 32 entry limit, too. Let me look at adding the early_memremap_decrypted() type support to memremap() and see how that looks. > And if we really decide that we need to cache it for later inspection > due to speed considerations, as you do in memremap_is_setup_data(), you > could do that in the default: branch of parse_setup_data() and do it > just once: I don't see why you need to do add_to_setup_data_list() *and* > update_setup_data_list() when you could add both pointer and updated > size once. I do the add followed by the update because we can't determine the true size of the setup data until it is first mapped so that the data->len field can be accessed. In order to map it properly the physical address range needs to be added to the list before it is mapped. After it's mapped, the true physical address range can be calculated and updated. Thanks, Tom >
On Fri, Feb 24, 2017 at 09:04:21AM -0600, Tom Lendacky wrote: > I looked at doing that but you get into this cyclical situation unless > you specifically map each setup data elemement as decrypted. This is ok > for early_memremap since we have early_memremap_decrypted() but a new > memremap_decrypted() would have to be added. But I was trying to avoid > having to do multiple mapping calls inside the current mapping call. > > I can always look at converting the setup_data_list from an array > into a list to eliminate the 32 entry limit, too. > > Let me look at adding the early_memremap_decrypted() type support to > memremap() and see how that looks. Yes, so this sounds better than the cyclic thing you explained where you have to add and update since early_memremap() calls into memremap_should_map_encrypted() which touches the list we're updating at the same time. So in the case where you absolutely know that those ranges should be mapped decrypted, we should have special helpers which do that explicitly and they are called when we access those special regions. Well, special for SME. I'm thinking that should simplify the handling but you'll know better once you write it. :) Thanks.
On 02/16/17 at 09:45am, Tom Lendacky wrote: [snip] > + * This function determines if an address should be mapped encrypted. > + * Boot setup data, EFI data and E820 areas are checked in making this > + * determination. > + */ > +static bool memremap_should_map_encrypted(resource_size_t phys_addr, > + unsigned long size) > +{ > + /* > + * SME is not active, return true: > + * - For early_memremap_pgprot_adjust(), returning true or false > + * results in the same protection value > + * - For arch_memremap_do_ram_remap(), returning true will allow > + * the RAM remap to occur instead of falling back to ioremap() > + */ > + if (!sme_active()) > + return true; From the function name shouldn't above be return false? > + > + /* Check if the address is part of the setup data */ > + if (memremap_is_setup_data(phys_addr, size)) > + return false; > + > + /* Check if the address is part of EFI boot/runtime data */ > + switch (efi_mem_type(phys_addr)) { > + case EFI_BOOT_SERVICES_DATA: > + case EFI_RUNTIME_SERVICES_DATA: Only these two types needed? I'm not sure about this, just bring up the question. > + return false; > + default: > + break; > + } > + > + /* Check if the address is outside kernel usable area */ > + switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) { > + case E820_TYPE_RESERVED: > + case E820_TYPE_ACPI: > + case E820_TYPE_NVS: > + case E820_TYPE_UNUSABLE: > + return false; > + default: > + break; > + } > + > + return true; > +} > + Thanks Dave
On 3/8/2017 12:55 AM, Dave Young wrote: > On 02/16/17 at 09:45am, Tom Lendacky wrote: > [snip] >> + * This function determines if an address should be mapped encrypted. >> + * Boot setup data, EFI data and E820 areas are checked in making this >> + * determination. >> + */ >> +static bool memremap_should_map_encrypted(resource_size_t phys_addr, >> + unsigned long size) >> +{ >> + /* >> + * SME is not active, return true: >> + * - For early_memremap_pgprot_adjust(), returning true or false >> + * results in the same protection value >> + * - For arch_memremap_do_ram_remap(), returning true will allow >> + * the RAM remap to occur instead of falling back to ioremap() >> + */ >> + if (!sme_active()) >> + return true; > > From the function name shouldn't above be return false? I've re-worked this so that the check is in a different location and doesn't cause confusion. > >> + >> + /* Check if the address is part of the setup data */ >> + if (memremap_is_setup_data(phys_addr, size)) >> + return false; >> + >> + /* Check if the address is part of EFI boot/runtime data */ >> + switch (efi_mem_type(phys_addr)) { >> + case EFI_BOOT_SERVICES_DATA: >> + case EFI_RUNTIME_SERVICES_DATA: > > Only these two types needed? I'm not sure about this, just bring up the > question. I've re-worked this code so that there is a single EFI routine that checks boot_params.efi_info.efi_memmap/efi_systab, EFI tables and the EFI memtype. As for the EFI memtypes, I believe those are the only ones required. Some of the other types will be picked up by the e820 checks (ACPI, NVS, etc.). Thanks, Tom > >> + return false; >> + default: >> + break; >> + } >> + >> + /* Check if the address is outside kernel usable area */ >> + switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) { >> + case E820_TYPE_RESERVED: >> + case E820_TYPE_ACPI: >> + case E820_TYPE_NVS: >> + case E820_TYPE_UNUSABLE: >> + return false; >> + default: >> + break; >> + } >> + >> + return true; >> +} >> + > > Thanks > Dave >
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 7afb0e2..833f7cc 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -381,4 +381,7 @@ extern int __must_check arch_phys_wc_add(unsigned long base, #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc #endif +extern bool arch_memremap_do_ram_remap(resource_size_t offset, size_t size); +#define arch_memremap_do_ram_remap arch_memremap_do_ram_remap + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ac1d5da..99998d9 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -63,6 +63,14 @@ static inline void x86_ce4100_early_setup(void) { } #include <asm/espfix.h> #include <linux/kernel.h> +struct setup_data_attrs { + u64 paddr; + unsigned long size; +}; + +extern struct setup_data_attrs setup_data_list[]; +extern unsigned int setup_data_list_count; + /* * This is set up by the setup-routine at boot-time */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bd5b9a7..d2234bf 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -148,6 +148,9 @@ int default_check_phys_apicid_present(int phys_apicid) struct boot_params boot_params; +struct setup_data_attrs setup_data_list[32]; +unsigned int setup_data_list_count; + /* * Machine setup.. */ @@ -419,6 +422,32 @@ static void __init reserve_initrd(void) } #endif /* CONFIG_BLK_DEV_INITRD */ +static void __init update_setup_data_list(u64 pa_data, unsigned long size) +{ + unsigned int i; + + for (i = 0; i < setup_data_list_count; i++) { + if (setup_data_list[i].paddr != pa_data) + continue; + + setup_data_list[i].size = size; + break; + } +} + +static void __init add_to_setup_data_list(u64 pa_data, unsigned long size) +{ + if (!sme_active()) + return; + + if (!WARN(setup_data_list_count == ARRAY_SIZE(setup_data_list), + "exceeded maximum setup data list slots")) { + setup_data_list[setup_data_list_count].paddr = pa_data; + setup_data_list[setup_data_list_count].size = size; + setup_data_list_count++; + } +} + static void __init parse_setup_data(void) { struct setup_data *data; @@ -428,12 +457,16 @@ static void __init parse_setup_data(void) while (pa_data) { u32 data_len, data_type; + add_to_setup_data_list(pa_data, sizeof(*data)); + data = early_memremap(pa_data, sizeof(*data)); data_len = data->len + sizeof(struct setup_data); data_type = data->type; pa_next = data->next; early_memunmap(data, sizeof(*data)); + update_setup_data_list(pa_data, data_len); + switch (data_type) { case SETUP_E820_EXT: e820__memory_setup_extended(pa_data, data_len); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 2385e70..b0ff6bc 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mmiotrace.h> +#include <linux/efi.h> #include <asm/cacheflush.h> #include <asm/e820/api.h> @@ -21,6 +22,7 @@ #include <asm/tlbflush.h> #include <asm/pgalloc.h> #include <asm/pat.h> +#include <asm/setup.h> #include "physaddr.h" @@ -419,6 +421,115 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK)); } +/* + * Examine the physical address to determine if it is boot data. Check + * it against the boot params structure and EFI tables. + */ +static bool memremap_is_setup_data(resource_size_t phys_addr, + unsigned long size) +{ + unsigned int i; + u64 paddr; + + for (i = 0; i < setup_data_list_count; i++) { + if (phys_addr < setup_data_list[i].paddr) + continue; + + if (phys_addr >= (setup_data_list[i].paddr + + setup_data_list[i].size)) + continue; + + /* Address is within setup data range */ + return true; + } + + paddr = boot_params.efi_info.efi_memmap_hi; + paddr <<= 32; + paddr |= boot_params.efi_info.efi_memmap; + if (phys_addr == paddr) + return true; + + paddr = boot_params.efi_info.efi_systab_hi; + paddr <<= 32; + paddr |= boot_params.efi_info.efi_systab; + if (phys_addr == paddr) + return true; + + if (efi_table_address_match(phys_addr)) + return true; + + return false; +} + +/* + * This function determines if an address should be mapped encrypted. + * Boot setup data, EFI data and E820 areas are checked in making this + * determination. + */ +static bool memremap_should_map_encrypted(resource_size_t phys_addr, + unsigned long size) +{ + /* + * SME is not active, return true: + * - For early_memremap_pgprot_adjust(), returning true or false + * results in the same protection value + * - For arch_memremap_do_ram_remap(), returning true will allow + * the RAM remap to occur instead of falling back to ioremap() + */ + if (!sme_active()) + return true; + + /* Check if the address is part of the setup data */ + if (memremap_is_setup_data(phys_addr, size)) + return false; + + /* Check if the address is part of EFI boot/runtime data */ + switch (efi_mem_type(phys_addr)) { + case EFI_BOOT_SERVICES_DATA: + case EFI_RUNTIME_SERVICES_DATA: + return false; + default: + break; + } + + /* Check if the address is outside kernel usable area */ + switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) { + case E820_TYPE_RESERVED: + case E820_TYPE_ACPI: + case E820_TYPE_NVS: + case E820_TYPE_UNUSABLE: + return false; + default: + break; + } + + return true; +} + +/* + * Architecure function to determine if RAM remap is allowed. + */ +bool arch_memremap_do_ram_remap(resource_size_t phys_addr, unsigned long size) +{ + return memremap_should_map_encrypted(phys_addr, size); +} + +/* + * Architecure override of __weak function to adjust the protection attributes + * used when remapping memory. + */ +pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr, + unsigned long size, + pgprot_t prot) +{ + if (memremap_should_map_encrypted(phys_addr, size)) + prot = pgprot_encrypted(prot); + else + prot = pgprot_decrypted(prot); + + return prot; +} + #ifdef CONFIG_ARCH_USE_MEMREMAP_PROT /* Remap memory with encryption */ void __init *early_memremap_encrypted(resource_size_t phys_addr, diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 2ee7694..2d8674d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -243,7 +243,7 @@ void efi_sync_low_kernel_mappings(void) int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - unsigned long pfn, text; + unsigned long pfn, text, pf; struct page *page; unsigned npages; pgd_t *pgd; @@ -251,7 +251,13 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) if (efi_enabled(EFI_OLD_MEMMAP)) return 0; - efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd); + /* + * Since the PGD is encrypted, set the encryption mask so that when + * this value is loaded into cr3 the PGD will be decrypted during + * the pagetable walk. + */ + efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd); + pgd = efi_pgd; /* @@ -261,7 +267,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * phys_efi_set_virtual_address_map(). */ pfn = pa_memmap >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) { + pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC; + if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) { pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); return 1; } @@ -304,7 +311,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) text = __pa(_text); pfn = text >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) { + pf = _PAGE_RW | _PAGE_ENC; + if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) { pr_err("Failed to map kernel text 1:1\n"); return 1; } diff --git a/kernel/memremap.c b/kernel/memremap.c index 9ecedc2..d7a27ea 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -34,13 +34,22 @@ static void *arch_memremap_wb(resource_size_t offset, unsigned long size) } #endif +#ifndef arch_memremap_do_ram_remap +static bool arch_memremap_do_ram_remap(resource_size_t offset, size_t size) +{ + return true; +} +#endif + static void *try_ram_remap(resource_size_t offset, size_t size) { unsigned long pfn = PHYS_PFN(offset); /* In the simple case just return the existing linear address */ - if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn))) + if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)) && + arch_memremap_do_ram_remap(offset, size)) return __va(offset); + return NULL; /* fallback to arch_memremap_wb */ } diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index d7d30da..b1dd4a9 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c @@ -30,6 +30,13 @@ static int __init early_ioremap_debug_setup(char *str) static int after_paging_init __initdata; +pgprot_t __init __weak early_memremap_pgprot_adjust(resource_size_t phys_addr, + unsigned long size, + pgprot_t prot) +{ + return prot; +} + void __init __weak early_ioremap_shutdown(void) { } @@ -215,14 +222,19 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) void __init * early_memremap(resource_size_t phys_addr, unsigned long size) { - return (__force void *)__early_ioremap(phys_addr, size, - FIXMAP_PAGE_NORMAL); + pgprot_t prot = early_memremap_pgprot_adjust(phys_addr, size, + FIXMAP_PAGE_NORMAL); + + return (__force void *)__early_ioremap(phys_addr, size, prot); } #ifdef FIXMAP_PAGE_RO void __init * early_memremap_ro(resource_size_t phys_addr, unsigned long size) { - return (__force void *)__early_ioremap(phys_addr, size, FIXMAP_PAGE_RO); + pgprot_t prot = early_memremap_pgprot_adjust(phys_addr, size, + FIXMAP_PAGE_RO); + + return (__force void *)__early_ioremap(phys_addr, size, prot); } #endif
Boot data (such as EFI related data) is not encrypted when the system is booted and needs to be mapped decrypted. Add support to apply the proper attributes to the EFI page tables and to the early_memremap and memremap APIs to identify the type of data being accessed so that the proper encryption attribute can be applied. Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> --- arch/x86/include/asm/io.h | 3 + arch/x86/include/asm/setup.h | 8 +++ arch/x86/kernel/setup.c | 33 ++++++++++++ arch/x86/mm/ioremap.c | 111 ++++++++++++++++++++++++++++++++++++++++ arch/x86/platform/efi/efi_64.c | 16 ++++-- kernel/memremap.c | 11 ++++ mm/early_ioremap.c | 18 +++++- 7 files changed, 192 insertions(+), 8 deletions(-)