diff mbox

[RFC,v3,10/20] Add support to access boot related data in the clear

Message ID 20161110003631.3280.73292.stgit@tlendack-t1.amdoffice.net (mailing list archive)
State New, archived
Headers show

Commit Message

Tom Lendacky Nov. 10, 2016, 12:36 a.m. UTC
Boot data (such as EFI related data) is not encrypted when the system is
booted and needs to be accessed unencrypted.  Add support to apply the
proper attributes to the EFI page tables and to the early_memremap and
memremap APIs to identify the type of data being accessed so that the
proper encryption attribute can be applied.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/include/asm/e820.h    |    1 
 arch/x86/kernel/e820.c         |   16 +++++++
 arch/x86/mm/ioremap.c          |   89 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/platform/efi/efi_64.c |   12 ++++-
 drivers/firmware/efi/efi.c     |   33 +++++++++++++++
 include/linux/efi.h            |    2 +
 kernel/memremap.c              |    8 +++-
 mm/early_ioremap.c             |   18 +++++++-
 8 files changed, 172 insertions(+), 7 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Kani, Toshi Nov. 11, 2016, 4:17 p.m. UTC | #1
T24gV2VkLCAyMDE2LTExLTA5IGF0IDE4OjM2IC0wNjAwLCBUb20gTGVuZGFja3kgd3JvdGU6DQo+
IEJvb3QgZGF0YSAoc3VjaCBhcyBFRkkgcmVsYXRlZCBkYXRhKSBpcyBub3QgZW5jcnlwdGVkIHdo
ZW4gdGhlIHN5c3RlbQ0KPiBpcyBib290ZWQgYW5kIG5lZWRzIHRvIGJlIGFjY2Vzc2VkIHVuZW5j
cnlwdGVkLsKgwqBBZGQgc3VwcG9ydCB0byBhcHBseQ0KPiB0aGUgcHJvcGVyIGF0dHJpYnV0ZXMg
dG8gdGhlIEVGSSBwYWdlIHRhYmxlcyBhbmQgdG8gdGhlDQo+IGVhcmx5X21lbXJlbWFwIGFuZCBt
ZW1yZW1hcCBBUElzIHRvIGlkZW50aWZ5IHRoZSB0eXBlIG9mIGRhdGEgYmVpbmcNCj4gYWNjZXNz
ZWQgc28gdGhhdCB0aGUgcHJvcGVyIGVuY3J5cHRpb24gYXR0cmlidXRlIGNhbiBiZSBhcHBsaWVk
Lg0KwqA6DQo+ICtzdGF0aWMgYm9vbCBtZW1yZW1hcF9hcHBseV9lbmNyeXB0aW9uKHJlc291cmNl
X3NpemVfdCBwaHlzX2FkZHIsDQo+ICsJCQkJwqDCoMKgwqDCoMKgdW5zaWduZWQgbG9uZyBzaXpl
KQ0KPiArew0KPiArCS8qIFNNRSBpcyBub3QgYWN0aXZlLCBqdXN0IHJldHVybiB0cnVlICovDQo+
ICsJaWYgKCFzbWVfbWVfbWFzaykNCj4gKwkJcmV0dXJuIHRydWU7DQo+ICsNCj4gKwkvKiBDaGVj
ayBpZiB0aGUgYWRkcmVzcyBpcyBwYXJ0IG9mIHRoZSBzZXR1cCBkYXRhICovDQo+ICsJaWYgKG1l
bXJlbWFwX3NldHVwX2RhdGEocGh5c19hZGRyLCBzaXplKSkNCj4gKwkJcmV0dXJuIGZhbHNlOw0K
PiArDQo+ICsJLyogQ2hlY2sgaWYgdGhlIGFkZHJlc3MgaXMgcGFydCBvZiBFRkkgYm9vdC9ydW50
aW1lIGRhdGEgKi8NCj4gKwlzd2l0Y2ggKGVmaV9tZW1fdHlwZShwaHlzX2FkZHIpKSB7DQo+ICsJ
Y2FzZSBFRklfQk9PVF9TRVJWSUNFU19EQVRBOg0KPiArCWNhc2UgRUZJX1JVTlRJTUVfU0VSVklD
RVNfREFUQToNCj4gKwkJcmV0dXJuIGZhbHNlOw0KPiArCX0NCj4gKw0KPiArCS8qIENoZWNrIGlm
IHRoZSBhZGRyZXNzIGlzIG91dHNpZGUga2VybmVsIHVzYWJsZSBhcmVhICovDQo+ICsJc3dpdGNo
IChlODIwX2dldF9lbnRyeV90eXBlKHBoeXNfYWRkciwgcGh5c19hZGRyICsgc2l6ZSAtDQo+IDEp
KSB7DQo+ICsJY2FzZSBFODIwX1JFU0VSVkVEOg0KPiArCWNhc2UgRTgyMF9BQ1BJOg0KPiArCWNh
c2UgRTgyMF9OVlM6DQo+ICsJY2FzZSBFODIwX1VOVVNBQkxFOg0KPiArCQlyZXR1cm4gZmFsc2U7
DQo+ICsJfQ0KPiArDQo+ICsJcmV0dXJuIHRydWU7DQo+ICt9DQoNCkFyZSB5b3Ugc3VwcG9ydGlu
ZyBlbmNyeXB0aW9uIGZvciBFODIwX1BNRU0gcmFuZ2VzPyDCoElmIHNvLCB0aGlzDQplbmNyeXB0
aW9uIHdpbGwgcGVyc2lzdCBhY3Jvc3MgYSByZWJvb3QgYW5kIGRvZXMgbm90IG5lZWQgdG8gYmUN
CmVuY3J5cHRlZCBhZ2FpbiwgcmlnaHQ/IMKgQWxzbywgaG93IGRvIHlvdSBrZWVwIGEgc2FtZSBr
ZXkgYWNyb3NzIGENCnJlYm9vdD8NCg0KVGhhbmtzLA0KLVRvc2hp
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Lendacky Nov. 14, 2016, 4:24 p.m. UTC | #2
On 11/11/2016 10:17 AM, Kani, Toshimitsu wrote:
> On Wed, 2016-11-09 at 18:36 -0600, Tom Lendacky wrote:
>> Boot data (such as EFI related data) is not encrypted when the system
>> is booted and needs to be accessed unencrypted.  Add support to apply
>> the proper attributes to the EFI page tables and to the
>> early_memremap and memremap APIs to identify the type of data being
>> accessed so that the proper encryption attribute can be applied.
>  :
>> +static bool memremap_apply_encryption(resource_size_t phys_addr,
>> +				      unsigned long size)
>> +{
>> +	/* SME is not active, just return true */
>> +	if (!sme_me_mask)
>> +		return true;
>> +
>> +	/* Check if the address is part of the setup data */
>> +	if (memremap_setup_data(phys_addr, size))
>> +		return false;
>> +
>> +	/* Check if the address is part of EFI boot/runtime data */
>> +	switch (efi_mem_type(phys_addr)) {
>> +	case EFI_BOOT_SERVICES_DATA:
>> +	case EFI_RUNTIME_SERVICES_DATA:
>> +		return false;
>> +	}
>> +
>> +	/* Check if the address is outside kernel usable area */
>> +	switch (e820_get_entry_type(phys_addr, phys_addr + size -
>> 1)) {
>> +	case E820_RESERVED:
>> +	case E820_ACPI:
>> +	case E820_NVS:
>> +	case E820_UNUSABLE:
>> +		return false;
>> +	}
>> +
>> +	return true;
>> +}
> 
> Are you supporting encryption for E820_PMEM ranges?  If so, this
> encryption will persist across a reboot and does not need to be
> encrypted again, right?  Also, how do you keep a same key across a
> reboot?

The key will change across a reboot... so I need to look into this
more for memory that isn't used as traditional system ram.

Thanks,
Tom

> 
> Thanks,
> -Toshi
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Borislav Petkov Nov. 17, 2016, 3:55 p.m. UTC | #3
On Wed, Nov 09, 2016 at 06:36:31PM -0600, Tom Lendacky wrote:
> Boot data (such as EFI related data) is not encrypted when the system is
> booted and needs to be accessed unencrypted.  Add support to apply the
> proper attributes to the EFI page tables and to the early_memremap and
> memremap APIs to identify the type of data being accessed so that the
> proper encryption attribute can be applied.
> 
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> ---
>  arch/x86/include/asm/e820.h    |    1 
>  arch/x86/kernel/e820.c         |   16 +++++++
>  arch/x86/mm/ioremap.c          |   89 ++++++++++++++++++++++++++++++++++++++++
>  arch/x86/platform/efi/efi_64.c |   12 ++++-
>  drivers/firmware/efi/efi.c     |   33 +++++++++++++++
>  include/linux/efi.h            |    2 +
>  kernel/memremap.c              |    8 +++-
>  mm/early_ioremap.c             |   18 +++++++-
>  8 files changed, 172 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
> index 476b574..186f1d04 100644
> --- a/arch/x86/include/asm/e820.h
> +++ b/arch/x86/include/asm/e820.h
> @@ -16,6 +16,7 @@ extern struct e820map *e820_saved;
>  extern unsigned long pci_mem_start;
>  extern int e820_any_mapped(u64 start, u64 end, unsigned type);
>  extern int e820_all_mapped(u64 start, u64 end, unsigned type);
> +extern unsigned int e820_get_entry_type(u64 start, u64 end);
>  extern void e820_add_region(u64 start, u64 size, int type);
>  extern void e820_print_map(char *who);
>  extern int
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index b85fe5f..92fce4e 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -107,6 +107,22 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
>  	return 0;
>  }
>  
> +unsigned int e820_get_entry_type(u64 start, u64 end)
> +{
> +	int i;
> +
> +	for (i = 0; i < e820->nr_map; i++) {
> +		struct e820entry *ei = &e820->map[i];
> +
> +		if (ei->addr >= end || ei->addr + ei->size <= start)
> +			continue;
> +
> +		return ei->type;
> +	}
> +
> +	return 0;

Please add a

#define E820_TYPE_INVALID	0

or so and return it instead of the naked number 0.

Also, this patch can be split in logical parts. The e820 stuff can be a
separate pre-patch.

efi_table_address_match() and the tables definitions is a second pre-patch.

The rest is then the third patch.

...

> +}
> +
>  /*
>   * Add a memory region to the kernel e820 map.
>   */
> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
> index ff542cd..ee347c2 100644
> --- a/arch/x86/mm/ioremap.c
> +++ b/arch/x86/mm/ioremap.c
> @@ -20,6 +20,9 @@
>  #include <asm/tlbflush.h>
>  #include <asm/pgalloc.h>
>  #include <asm/pat.h>
> +#include <asm/e820.h>
> +#include <asm/setup.h>
> +#include <linux/efi.h>
>  
>  #include "physaddr.h"
>  
> @@ -418,6 +421,92 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
>  	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
>  }
>  
> +static bool memremap_setup_data(resource_size_t phys_addr,
> +				unsigned long size)

This function name doesn't read like what the function does.

> +{
> +	u64 paddr;
> +
> +	if (phys_addr == boot_params.hdr.setup_data)
> +		return true;
> +
> +	paddr = boot_params.efi_info.efi_memmap_hi;
> +	paddr <<= 32;
> +	paddr |= boot_params.efi_info.efi_memmap;
> +	if (phys_addr == paddr)
> +		return true;
> +
> +	paddr = boot_params.efi_info.efi_systab_hi;
> +	paddr <<= 32;
> +	paddr |= boot_params.efi_info.efi_systab;
> +	if (phys_addr == paddr)
> +		return true;
> +
> +	if (efi_table_address_match(phys_addr))
> +		return true;
> +
> +	return false;
> +}

arch/x86/built-in.o: In function `memremap_setup_data':
/home/boris/kernel/alt-linux/arch/x86/mm/ioremap.c:444: undefined reference to `efi_table_address_match'
arch/x86/built-in.o: In function `memremap_apply_encryption':
/home/boris/kernel/alt-linux/arch/x86/mm/ioremap.c:462: undefined reference to `efi_mem_type'
make: *** [vmlinux] Error 1

I guess due to

# CONFIG_EFI is not set

> +
> +static bool memremap_apply_encryption(resource_size_t phys_addr,
> +				      unsigned long size)

This name is misleading too: it doesn't apply encryption but checks
whether to apply encryption for @phys_addr or not. So something like:

... memremap_should_encrypt(...)
{
	return true - for should
	return false - for should not

should make the whole thing much more straightforward. Or am I
misunderstanding you here?

> +{
> +	/* SME is not active, just return true */
> +	if (!sme_me_mask)
> +		return true;

I don't understand the logic here: SME is not active -> apply encryption?!

> +
> +	/* Check if the address is part of the setup data */

That comment belongs over the function definition of
memremap_setup_data() along with what it is supposed to do.

> +	if (memremap_setup_data(phys_addr, size))
> +		return false;
> +
> +	/* Check if the address is part of EFI boot/runtime data */
> +	switch (efi_mem_type(phys_addr)) {

Please send a pre-patch fix for efi_mem_type() to return
EFI_RESERVED_TYPE instead of naked 0 in the failure case.

> +	case EFI_BOOT_SERVICES_DATA:
> +	case EFI_RUNTIME_SERVICES_DATA:
> +		return false;
> +	}
> +
> +	/* Check if the address is outside kernel usable area */
> +	switch (e820_get_entry_type(phys_addr, phys_addr + size - 1)) {
> +	case E820_RESERVED:
> +	case E820_ACPI:
> +	case E820_NVS:
> +	case E820_UNUSABLE:
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +/*
> + * Architecure override of __weak function to prevent ram remap and use the

s/ram/RAM/

> + * architectural remap function.
> + */
> +bool memremap_do_ram_remap(resource_size_t phys_addr, unsigned long size)
> +{
> +	if (!memremap_apply_encryption(phys_addr, size))
> +		return false;
> +
> +	return true;

Do I see it correctly that this could just very simply be:

	return memremap_apply_encryption(phys_addr, size);

?

> +}
> +
> +/*
> + * Architecure override of __weak function to adjust the protection attributes
> + * used when remapping memory.
> + */
> +pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
> +					     unsigned long size,
> +					     pgprot_t prot)
> +{
> +	unsigned long prot_val = pgprot_val(prot);
> +
> +	if (memremap_apply_encryption(phys_addr, size))
> +		prot_val |= _PAGE_ENC;
> +	else
> +		prot_val &= ~_PAGE_ENC;
> +
> +	return __pgprot(prot_val);
> +}
> +
>  /* Remap memory with encryption */
>  void __init *early_memremap_enc(resource_size_t phys_addr,
>  				unsigned long size)
> diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
> index 58b0f80..3f89179 100644
> --- a/arch/x86/platform/efi/efi_64.c
> +++ b/arch/x86/platform/efi/efi_64.c
> @@ -221,7 +221,13 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>  	if (efi_enabled(EFI_OLD_MEMMAP))
>  		return 0;
>  
> -	efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
> +	/*
> +	 * Since the PGD is encrypted, set the encryption mask so that when
> +	 * this value is loaded into cr3 the PGD will be decrypted during
> +	 * the pagetable walk.
> +	 */
> +	efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd);
> +
>  	pgd = efi_pgd;
>  
>  	/*
> @@ -231,7 +237,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>  	 * phys_efi_set_virtual_address_map().
>  	 */
>  	pfn = pa_memmap >> PAGE_SHIFT;
> -	if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) {
> +	if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW | _PAGE_ENC)) {

That line sticks too far out, let's shorten it:

	unsigned long pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC;

	...

	if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) {


	..

	pf = _PAGE_RW | _PAGE_ENC;
	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) {

	..


>  		pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
>  		return 1;
>  	}
> @@ -258,7 +264,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>  	text = __pa(_text);
>  	pfn = text >> PAGE_SHIFT;
>  
> -	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) {
> +	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW | _PAGE_ENC)) {
>  		pr_err("Failed to map kernel text 1:1\n");
>  		return 1;
>  	}
> diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
> index 1ac199c..91c06ec 100644
> --- a/drivers/firmware/efi/efi.c
> +++ b/drivers/firmware/efi/efi.c
> @@ -51,6 +51,25 @@ struct efi __read_mostly efi = {
>  };
>  EXPORT_SYMBOL(efi);
>  
> +static unsigned long *efi_tables[] = {
> +	&efi.mps,
> +	&efi.acpi,
> +	&efi.acpi20,
> +	&efi.smbios,
> +	&efi.smbios3,
> +	&efi.sal_systab,
> +	&efi.boot_info,
> +	&efi.hcdp,
> +	&efi.uga,
> +	&efi.uv_systab,
> +	&efi.fw_vendor,
> +	&efi.runtime,
> +	&efi.config_table,
> +	&efi.esrt,
> +	&efi.properties_table,
> +	&efi.mem_attr_table,
> +};
> +
>  static bool disable_runtime;
>  static int __init setup_noefi(char *arg)
>  {
> @@ -822,3 +841,17 @@ int efi_status_to_err(efi_status_t status)
>  
>  	return err;
>  }
> +
> +bool efi_table_address_match(unsigned long phys_addr)
> +{
> +	int i;
> +
> +	if (phys_addr == EFI_INVALID_TABLE_ADDR)
> +		return false;
> +
> +	for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
> +		if (*(efi_tables[i]) == phys_addr)
> +			return true;
> +
> +	return false;
> +}
> diff --git a/include/linux/efi.h b/include/linux/efi.h
> index 2d08948..72d89bf 100644
> --- a/include/linux/efi.h
> +++ b/include/linux/efi.h
> @@ -1070,6 +1070,8 @@ efi_capsule_pending(int *reset_type)
>  
>  extern int efi_status_to_err(efi_status_t status);
>  
> +extern bool efi_table_address_match(unsigned long phys_addr);
> +
>  /*
>   * Variable Attributes
>   */
> diff --git a/kernel/memremap.c b/kernel/memremap.c
> index b501e39..ac1437e 100644
> --- a/kernel/memremap.c
> +++ b/kernel/memremap.c
> @@ -34,12 +34,18 @@ static void *arch_memremap_wb(resource_size_t offset, unsigned long size)
>  }
>  #endif
>  
> +bool __weak memremap_do_ram_remap(resource_size_t offset, size_t size)
> +{
> +	return true;
> +}
> +

Why isn't this an inline in a header?

>  static void *try_ram_remap(resource_size_t offset, size_t size)
>  {
>  	unsigned long pfn = PHYS_PFN(offset);
>  
>  	/* In the simple case just return the existing linear address */
> -	if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)))
> +	if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)) &&
> +	    memremap_do_ram_remap(offset, size))
>  		return __va(offset);

<---- newline here.

>  	return NULL; /* fallback to arch_memremap_wb */
>  }
> diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
> index d71b98b..34af5b6 100644
> --- a/mm/early_ioremap.c
> +++ b/mm/early_ioremap.c
> @@ -30,6 +30,13 @@ early_param("early_ioremap_debug", early_ioremap_debug_setup);
>  
>  static int after_paging_init __initdata;
>  
> +pgprot_t __init __weak early_memremap_pgprot_adjust(resource_size_t phys_addr,
> +						    unsigned long size,
> +						    pgprot_t prot)
> +{
> +	return prot;
> +}

Also, why isn't this an inline in a header somewhere?
Tom Lendacky Nov. 19, 2016, 6:33 p.m. UTC | #4
On 11/17/2016 9:55 AM, Borislav Petkov wrote:
> On Wed, Nov 09, 2016 at 06:36:31PM -0600, Tom Lendacky wrote:
>> Boot data (such as EFI related data) is not encrypted when the system is
>> booted and needs to be accessed unencrypted.  Add support to apply the
>> proper attributes to the EFI page tables and to the early_memremap and
>> memremap APIs to identify the type of data being accessed so that the
>> proper encryption attribute can be applied.
>>
>> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
>> ---
>>  arch/x86/include/asm/e820.h    |    1 
>>  arch/x86/kernel/e820.c         |   16 +++++++
>>  arch/x86/mm/ioremap.c          |   89 ++++++++++++++++++++++++++++++++++++++++
>>  arch/x86/platform/efi/efi_64.c |   12 ++++-
>>  drivers/firmware/efi/efi.c     |   33 +++++++++++++++
>>  include/linux/efi.h            |    2 +
>>  kernel/memremap.c              |    8 +++-
>>  mm/early_ioremap.c             |   18 +++++++-
>>  8 files changed, 172 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
>> index 476b574..186f1d04 100644
>> --- a/arch/x86/include/asm/e820.h
>> +++ b/arch/x86/include/asm/e820.h
>> @@ -16,6 +16,7 @@ extern struct e820map *e820_saved;
>>  extern unsigned long pci_mem_start;
>>  extern int e820_any_mapped(u64 start, u64 end, unsigned type);
>>  extern int e820_all_mapped(u64 start, u64 end, unsigned type);
>> +extern unsigned int e820_get_entry_type(u64 start, u64 end);
>>  extern void e820_add_region(u64 start, u64 size, int type);
>>  extern void e820_print_map(char *who);
>>  extern int
>> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
>> index b85fe5f..92fce4e 100644
>> --- a/arch/x86/kernel/e820.c
>> +++ b/arch/x86/kernel/e820.c
>> @@ -107,6 +107,22 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
>>  	return 0;
>>  }
>>  
>> +unsigned int e820_get_entry_type(u64 start, u64 end)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < e820->nr_map; i++) {
>> +		struct e820entry *ei = &e820->map[i];
>> +
>> +		if (ei->addr >= end || ei->addr + ei->size <= start)
>> +			continue;
>> +
>> +		return ei->type;
>> +	}
>> +
>> +	return 0;
> 
> Please add a
> 
> #define E820_TYPE_INVALID	0
> 
> or so and return it instead of the naked number 0.
> 
> Also, this patch can be split in logical parts. The e820 stuff can be a
> separate pre-patch.
> 
> efi_table_address_match() and the tables definitions is a second pre-patch.
> 
> The rest is then the third patch.
> 

Ok, I'll add the new #define and split this into separate patches.

> ...
> 
>> +}
>> +
>>  /*
>>   * Add a memory region to the kernel e820 map.
>>   */
>> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
>> index ff542cd..ee347c2 100644
>> --- a/arch/x86/mm/ioremap.c
>> +++ b/arch/x86/mm/ioremap.c
>> @@ -20,6 +20,9 @@
>>  #include <asm/tlbflush.h>
>>  #include <asm/pgalloc.h>
>>  #include <asm/pat.h>
>> +#include <asm/e820.h>
>> +#include <asm/setup.h>
>> +#include <linux/efi.h>
>>  
>>  #include "physaddr.h"
>>  
>> @@ -418,6 +421,92 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
>>  	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
>>  }
>>  
>> +static bool memremap_setup_data(resource_size_t phys_addr,
>> +				unsigned long size)
> 
> This function name doesn't read like what the function does.
> 

Ok, I'll work on the naming.

>> +{
>> +	u64 paddr;
>> +
>> +	if (phys_addr == boot_params.hdr.setup_data)
>> +		return true;
>> +
>> +	paddr = boot_params.efi_info.efi_memmap_hi;
>> +	paddr <<= 32;
>> +	paddr |= boot_params.efi_info.efi_memmap;
>> +	if (phys_addr == paddr)
>> +		return true;
>> +
>> +	paddr = boot_params.efi_info.efi_systab_hi;
>> +	paddr <<= 32;
>> +	paddr |= boot_params.efi_info.efi_systab;
>> +	if (phys_addr == paddr)
>> +		return true;
>> +
>> +	if (efi_table_address_match(phys_addr))
>> +		return true;
>> +
>> +	return false;
>> +}
> 
> arch/x86/built-in.o: In function `memremap_setup_data':
> /home/boris/kernel/alt-linux/arch/x86/mm/ioremap.c:444: undefined reference to `efi_table_address_match'
> arch/x86/built-in.o: In function `memremap_apply_encryption':
> /home/boris/kernel/alt-linux/arch/x86/mm/ioremap.c:462: undefined reference to `efi_mem_type'
> make: *** [vmlinux] Error 1
> 
> I guess due to
> 
> # CONFIG_EFI is not set
> 

Good catch, I'll make sure this builds without CONFIG_EFI.

>> +
>> +static bool memremap_apply_encryption(resource_size_t phys_addr,
>> +				      unsigned long size)
> 
> This name is misleading too: it doesn't apply encryption but checks
> whether to apply encryption for @phys_addr or not. So something like:
> 
> ... memremap_should_encrypt(...)
> {
> 	return true - for should
> 	return false - for should not
> 
> should make the whole thing much more straightforward. Or am I
> misunderstanding you here?
> 

No, you got it.  Maybe even something memremap_should_map_encrypted()
would be even better.

>> +{
>> +	/* SME is not active, just return true */
>> +	if (!sme_me_mask)
>> +		return true;
> 
> I don't understand the logic here: SME is not active -> apply encryption?!

It does seem counter-intuitive, but it is mainly because of the memremap
vs. early_memremap support. For the early_memremap support, if the
sme_me_mask is 0 it doesn't matter whether we return true or false since
the mask is zero even if you try to apply it. But for the memremap
support, it's used to determine whether to do the ram remap vs an
ioremap.

I'll pull the sme_me_mask check out of the function and put it in the
individual functions to remove the contradiction and make things
clearer.

> 
>> +
>> +	/* Check if the address is part of the setup data */
> 
> That comment belongs over the function definition of
> memremap_setup_data() along with what it is supposed to do.

Ok.

> 
>> +	if (memremap_setup_data(phys_addr, size))
>> +		return false;
>> +
>> +	/* Check if the address is part of EFI boot/runtime data */
>> +	switch (efi_mem_type(phys_addr)) {
> 
> Please send a pre-patch fix for efi_mem_type() to return
> EFI_RESERVED_TYPE instead of naked 0 in the failure case.

I can do that.

> 
>> +	case EFI_BOOT_SERVICES_DATA:
>> +	case EFI_RUNTIME_SERVICES_DATA:
>> +		return false;
>> +	}
>> +
>> +	/* Check if the address is outside kernel usable area */
>> +	switch (e820_get_entry_type(phys_addr, phys_addr + size - 1)) {
>> +	case E820_RESERVED:
>> +	case E820_ACPI:
>> +	case E820_NVS:
>> +	case E820_UNUSABLE:
>> +		return false;
>> +	}
>> +
>> +	return true;
>> +}
>> +
>> +/*
>> + * Architecure override of __weak function to prevent ram remap and use the
> 
> s/ram/RAM/

Ok.  I'll check throughout the series, too.

> 
>> + * architectural remap function.
>> + */
>> +bool memremap_do_ram_remap(resource_size_t phys_addr, unsigned long size)
>> +{
>> +	if (!memremap_apply_encryption(phys_addr, size))
>> +		return false;
>> +
>> +	return true;
> 
> Do I see it correctly that this could just very simply be:
> 
> 	return memremap_apply_encryption(phys_addr, size);
> 
> ?
> 

Yup, very true.

>> +}
>> +
>> +/*
>> + * Architecure override of __weak function to adjust the protection attributes
>> + * used when remapping memory.
>> + */
>> +pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
>> +					     unsigned long size,
>> +					     pgprot_t prot)
>> +{
>> +	unsigned long prot_val = pgprot_val(prot);
>> +
>> +	if (memremap_apply_encryption(phys_addr, size))
>> +		prot_val |= _PAGE_ENC;
>> +	else
>> +		prot_val &= ~_PAGE_ENC;
>> +
>> +	return __pgprot(prot_val);
>> +}
>> +
>>  /* Remap memory with encryption */
>>  void __init *early_memremap_enc(resource_size_t phys_addr,
>>  				unsigned long size)
>> diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
>> index 58b0f80..3f89179 100644
>> --- a/arch/x86/platform/efi/efi_64.c
>> +++ b/arch/x86/platform/efi/efi_64.c
>> @@ -221,7 +221,13 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>>  	if (efi_enabled(EFI_OLD_MEMMAP))
>>  		return 0;
>>  
>> -	efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
>> +	/*
>> +	 * Since the PGD is encrypted, set the encryption mask so that when
>> +	 * this value is loaded into cr3 the PGD will be decrypted during
>> +	 * the pagetable walk.
>> +	 */
>> +	efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd);
>> +
>>  	pgd = efi_pgd;
>>  
>>  	/*
>> @@ -231,7 +237,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>>  	 * phys_efi_set_virtual_address_map().
>>  	 */
>>  	pfn = pa_memmap >> PAGE_SHIFT;
>> -	if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) {
>> +	if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW | _PAGE_ENC)) {
> 
> That line sticks too far out, let's shorten it:
> 
> 	unsigned long pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC;
> 
> 	...
> 
> 	if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) {
> 
> 
> 	..
> 
> 	pf = _PAGE_RW | _PAGE_ENC;
> 	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) {
> 
> 	..
> 
> 

Ok, will do.

>>  		pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
>>  		return 1;
>>  	}
>> @@ -258,7 +264,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>>  	text = __pa(_text);
>>  	pfn = text >> PAGE_SHIFT;
>>  
>> -	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) {
>> +	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW | _PAGE_ENC)) {
>>  		pr_err("Failed to map kernel text 1:1\n");
>>  		return 1;
>>  	}
>> diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
>> index 1ac199c..91c06ec 100644
>> --- a/drivers/firmware/efi/efi.c
>> +++ b/drivers/firmware/efi/efi.c
>> @@ -51,6 +51,25 @@ struct efi __read_mostly efi = {
>>  };
>>  EXPORT_SYMBOL(efi);
>>  
>> +static unsigned long *efi_tables[] = {
>> +	&efi.mps,
>> +	&efi.acpi,
>> +	&efi.acpi20,
>> +	&efi.smbios,
>> +	&efi.smbios3,
>> +	&efi.sal_systab,
>> +	&efi.boot_info,
>> +	&efi.hcdp,
>> +	&efi.uga,
>> +	&efi.uv_systab,
>> +	&efi.fw_vendor,
>> +	&efi.runtime,
>> +	&efi.config_table,
>> +	&efi.esrt,
>> +	&efi.properties_table,
>> +	&efi.mem_attr_table,
>> +};
>> +
>>  static bool disable_runtime;
>>  static int __init setup_noefi(char *arg)
>>  {
>> @@ -822,3 +841,17 @@ int efi_status_to_err(efi_status_t status)
>>  
>>  	return err;
>>  }
>> +
>> +bool efi_table_address_match(unsigned long phys_addr)
>> +{
>> +	int i;
>> +
>> +	if (phys_addr == EFI_INVALID_TABLE_ADDR)
>> +		return false;
>> +
>> +	for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
>> +		if (*(efi_tables[i]) == phys_addr)
>> +			return true;
>> +
>> +	return false;
>> +}
>> diff --git a/include/linux/efi.h b/include/linux/efi.h
>> index 2d08948..72d89bf 100644
>> --- a/include/linux/efi.h
>> +++ b/include/linux/efi.h
>> @@ -1070,6 +1070,8 @@ efi_capsule_pending(int *reset_type)
>>  
>>  extern int efi_status_to_err(efi_status_t status);
>>  
>> +extern bool efi_table_address_match(unsigned long phys_addr);
>> +
>>  /*
>>   * Variable Attributes
>>   */
>> diff --git a/kernel/memremap.c b/kernel/memremap.c
>> index b501e39..ac1437e 100644
>> --- a/kernel/memremap.c
>> +++ b/kernel/memremap.c
>> @@ -34,12 +34,18 @@ static void *arch_memremap_wb(resource_size_t offset, unsigned long size)
>>  }
>>  #endif
>>  
>> +bool __weak memremap_do_ram_remap(resource_size_t offset, size_t size)
>> +{
>> +	return true;
>> +}
>> +
> 
> Why isn't this an inline in a header?

I'll take a look at doing that vs the __weak method.  It will mean
having to do some #ifndef stuff but hopefully it shouldn't be too bad.

> 
>>  static void *try_ram_remap(resource_size_t offset, size_t size)
>>  {
>>  	unsigned long pfn = PHYS_PFN(offset);
>>  
>>  	/* In the simple case just return the existing linear address */
>> -	if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)))
>> +	if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)) &&
>> +	    memremap_do_ram_remap(offset, size))
>>  		return __va(offset);
> 
> <---- newline here.
> 

Ok.

>>  	return NULL; /* fallback to arch_memremap_wb */
>>  }
>> diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
>> index d71b98b..34af5b6 100644
>> --- a/mm/early_ioremap.c
>> +++ b/mm/early_ioremap.c
>> @@ -30,6 +30,13 @@ early_param("early_ioremap_debug", early_ioremap_debug_setup);
>>  
>>  static int after_paging_init __initdata;
>>  
>> +pgprot_t __init __weak early_memremap_pgprot_adjust(resource_size_t phys_addr,
>> +						    unsigned long size,
>> +						    pgprot_t prot)
>> +{
>> +	return prot;
>> +}
> 
> Also, why isn't this an inline in a header somewhere?

I'll look into it.

Thanks,
Tom

> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Borislav Petkov Nov. 20, 2016, 11:04 p.m. UTC | #5
On Sat, Nov 19, 2016 at 12:33:49PM -0600, Tom Lendacky wrote:
> >> +{
> >> +	/* SME is not active, just return true */
> >> +	if (!sme_me_mask)
> >> +		return true;
> > 
> > I don't understand the logic here: SME is not active -> apply encryption?!
> 
> It does seem counter-intuitive, but it is mainly because of the memremap
> vs. early_memremap support. For the early_memremap support, if the
> sme_me_mask is 0 it doesn't matter whether we return true or false since
> the mask is zero even if you try to apply it. But for the memremap
> support, it's used to determine whether to do the ram remap vs an
> ioremap.
> 
> I'll pull the sme_me_mask check out of the function and put it in the
> individual functions to remove the contradiction and make things
> clearer.

But that would be more code, right?

Instead, you could simply explain in a comment above it what do you
mean exactly. Something along the lines of "if sme_me_mask is not
set, we should map encrypted because if not set, we can simply remap
RAM. Otherwise we have to ioremap because we need to access it in the
clear..."

I presume - I still don't grok that difference here completely.
Matt Fleming Dec. 7, 2016, 1:19 p.m. UTC | #6
On Wed, 09 Nov, at 06:36:31PM, Tom Lendacky wrote:
> Boot data (such as EFI related data) is not encrypted when the system is
> booted and needs to be accessed unencrypted.  Add support to apply the
> proper attributes to the EFI page tables and to the early_memremap and
> memremap APIs to identify the type of data being accessed so that the
> proper encryption attribute can be applied.
> 
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> ---
>  arch/x86/include/asm/e820.h    |    1 
>  arch/x86/kernel/e820.c         |   16 +++++++
>  arch/x86/mm/ioremap.c          |   89 ++++++++++++++++++++++++++++++++++++++++
>  arch/x86/platform/efi/efi_64.c |   12 ++++-
>  drivers/firmware/efi/efi.c     |   33 +++++++++++++++
>  include/linux/efi.h            |    2 +
>  kernel/memremap.c              |    8 +++-
>  mm/early_ioremap.c             |   18 +++++++-
>  8 files changed, 172 insertions(+), 7 deletions(-)
 
FWIW, I think this version is an improvement over all the previous
ones.

[...]

> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
> index ff542cd..ee347c2 100644
> --- a/arch/x86/mm/ioremap.c
> +++ b/arch/x86/mm/ioremap.c
> @@ -20,6 +20,9 @@
>  #include <asm/tlbflush.h>
>  #include <asm/pgalloc.h>
>  #include <asm/pat.h>
> +#include <asm/e820.h>
> +#include <asm/setup.h>
> +#include <linux/efi.h>
>  
>  #include "physaddr.h"
>  
> @@ -418,6 +421,92 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
>  	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
>  }
>  
> +static bool memremap_setup_data(resource_size_t phys_addr,
> +				unsigned long size)
> +{
> +	u64 paddr;
> +
> +	if (phys_addr == boot_params.hdr.setup_data)
> +		return true;
> +

Why is the setup_data linked list not traversed when checking for
matching addresses? Am I reading this incorrectly? I don't see how
this can work.

> +	paddr = boot_params.efi_info.efi_memmap_hi;
> +	paddr <<= 32;
> +	paddr |= boot_params.efi_info.efi_memmap;
> +	if (phys_addr == paddr)
> +		return true;
> +
> +	paddr = boot_params.efi_info.efi_systab_hi;
> +	paddr <<= 32;
> +	paddr |= boot_params.efi_info.efi_systab;
> +	if (phys_addr == paddr)
> +		return true;
> +
> +	if (efi_table_address_match(phys_addr))
> +		return true;
> +
> +	return false;
> +}
> +
> +static bool memremap_apply_encryption(resource_size_t phys_addr,
> +				      unsigned long size)
> +{
> +	/* SME is not active, just return true */
> +	if (!sme_me_mask)
> +		return true;
> +
> +	/* Check if the address is part of the setup data */
> +	if (memremap_setup_data(phys_addr, size))
> +		return false;
> +
> +	/* Check if the address is part of EFI boot/runtime data */
> +	switch (efi_mem_type(phys_addr)) {
> +	case EFI_BOOT_SERVICES_DATA:
> +	case EFI_RUNTIME_SERVICES_DATA:
> +		return false;
> +	}

EFI_LOADER_DATA is notable by its absence.

We use that memory type for allocations inside of the EFI boot stub
that are than used while the kernel is running. One use that comes to
mind is for initrd files, see handle_cmdline_files().

Oh I see you handle that in PATCH 9, never mind.

> diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
> index 58b0f80..3f89179 100644
> --- a/arch/x86/platform/efi/efi_64.c
> +++ b/arch/x86/platform/efi/efi_64.c
> @@ -221,7 +221,13 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>  	if (efi_enabled(EFI_OLD_MEMMAP))
>  		return 0;
>  
> -	efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
> +	/*
> +	 * Since the PGD is encrypted, set the encryption mask so that when
> +	 * this value is loaded into cr3 the PGD will be decrypted during
> +	 * the pagetable walk.
> +	 */
> +	efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd);
> +
>  	pgd = efi_pgd;
>  
>  	/*

Do all callers of __pa() in arch/x86 need fixing up like this?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Lendacky Dec. 9, 2016, 2:26 p.m. UTC | #7
On 12/7/2016 7:19 AM, Matt Fleming wrote:
> On Wed, 09 Nov, at 06:36:31PM, Tom Lendacky wrote:
>> Boot data (such as EFI related data) is not encrypted when the system is
>> booted and needs to be accessed unencrypted.  Add support to apply the
>> proper attributes to the EFI page tables and to the early_memremap and
>> memremap APIs to identify the type of data being accessed so that the
>> proper encryption attribute can be applied.
>>
>> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
>> ---
>>  arch/x86/include/asm/e820.h    |    1 
>>  arch/x86/kernel/e820.c         |   16 +++++++
>>  arch/x86/mm/ioremap.c          |   89 ++++++++++++++++++++++++++++++++++++++++
>>  arch/x86/platform/efi/efi_64.c |   12 ++++-
>>  drivers/firmware/efi/efi.c     |   33 +++++++++++++++
>>  include/linux/efi.h            |    2 +
>>  kernel/memremap.c              |    8 +++-
>>  mm/early_ioremap.c             |   18 +++++++-
>>  8 files changed, 172 insertions(+), 7 deletions(-)
>  
> FWIW, I think this version is an improvement over all the previous
> ones.
> 
> [...]
> 
>> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
>> index ff542cd..ee347c2 100644
>> --- a/arch/x86/mm/ioremap.c
>> +++ b/arch/x86/mm/ioremap.c
>> @@ -20,6 +20,9 @@
>>  #include <asm/tlbflush.h>
>>  #include <asm/pgalloc.h>
>>  #include <asm/pat.h>
>> +#include <asm/e820.h>
>> +#include <asm/setup.h>
>> +#include <linux/efi.h>
>>  
>>  #include "physaddr.h"
>>  
>> @@ -418,6 +421,92 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
>>  	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
>>  }
>>  
>> +static bool memremap_setup_data(resource_size_t phys_addr,
>> +				unsigned long size)
>> +{
>> +	u64 paddr;
>> +
>> +	if (phys_addr == boot_params.hdr.setup_data)
>> +		return true;
>> +
> 
> Why is the setup_data linked list not traversed when checking for
> matching addresses? Am I reading this incorrectly? I don't see how
> this can work.

Yeah, I caught that too after I sent this out. I think the best way to
handle this would be to create a list/array of setup data addresses in
the parse_setup_data() routine and then check the address against that
list in this routine.

> 
>> +	paddr = boot_params.efi_info.efi_memmap_hi;
>> +	paddr <<= 32;
>> +	paddr |= boot_params.efi_info.efi_memmap;
>> +	if (phys_addr == paddr)
>> +		return true;
>> +
>> +	paddr = boot_params.efi_info.efi_systab_hi;
>> +	paddr <<= 32;
>> +	paddr |= boot_params.efi_info.efi_systab;
>> +	if (phys_addr == paddr)
>> +		return true;
>> +
>> +	if (efi_table_address_match(phys_addr))
>> +		return true;
>> +
>> +	return false;
>> +}
>> +
>> +static bool memremap_apply_encryption(resource_size_t phys_addr,
>> +				      unsigned long size)
>> +{
>> +	/* SME is not active, just return true */
>> +	if (!sme_me_mask)
>> +		return true;
>> +
>> +	/* Check if the address is part of the setup data */
>> +	if (memremap_setup_data(phys_addr, size))
>> +		return false;
>> +
>> +	/* Check if the address is part of EFI boot/runtime data */
>> +	switch (efi_mem_type(phys_addr)) {
>> +	case EFI_BOOT_SERVICES_DATA:
>> +	case EFI_RUNTIME_SERVICES_DATA:
>> +		return false;
>> +	}
> 
> EFI_LOADER_DATA is notable by its absence.
> 
> We use that memory type for allocations inside of the EFI boot stub
> that are than used while the kernel is running. One use that comes to
> mind is for initrd files, see handle_cmdline_files().
> 
> Oh I see you handle that in PATCH 9, never mind.
> 
>> diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
>> index 58b0f80..3f89179 100644
>> --- a/arch/x86/platform/efi/efi_64.c
>> +++ b/arch/x86/platform/efi/efi_64.c
>> @@ -221,7 +221,13 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>>  	if (efi_enabled(EFI_OLD_MEMMAP))
>>  		return 0;
>>  
>> -	efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
>> +	/*
>> +	 * Since the PGD is encrypted, set the encryption mask so that when
>> +	 * this value is loaded into cr3 the PGD will be decrypted during
>> +	 * the pagetable walk.
>> +	 */
>> +	efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd);
>> +
>>  	pgd = efi_pgd;
>>  
>>  	/*
> 
> Do all callers of __pa() in arch/x86 need fixing up like this?

No, currently this is only be needed when we're dealing with values that
will be used in the cr3 register.

Thanks,
Tom

> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 476b574..186f1d04 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -16,6 +16,7 @@  extern struct e820map *e820_saved;
 extern unsigned long pci_mem_start;
 extern int e820_any_mapped(u64 start, u64 end, unsigned type);
 extern int e820_all_mapped(u64 start, u64 end, unsigned type);
+extern unsigned int e820_get_entry_type(u64 start, u64 end);
 extern void e820_add_region(u64 start, u64 size, int type);
 extern void e820_print_map(char *who);
 extern int
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index b85fe5f..92fce4e 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -107,6 +107,22 @@  int __init e820_all_mapped(u64 start, u64 end, unsigned type)
 	return 0;
 }
 
+unsigned int e820_get_entry_type(u64 start, u64 end)
+{
+	int i;
+
+	for (i = 0; i < e820->nr_map; i++) {
+		struct e820entry *ei = &e820->map[i];
+
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+
+		return ei->type;
+	}
+
+	return 0;
+}
+
 /*
  * Add a memory region to the kernel e820 map.
  */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ff542cd..ee347c2 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -20,6 +20,9 @@ 
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
 #include <asm/pat.h>
+#include <asm/e820.h>
+#include <asm/setup.h>
+#include <linux/efi.h>
 
 #include "physaddr.h"
 
@@ -418,6 +421,92 @@  void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
 	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
 }
 
+static bool memremap_setup_data(resource_size_t phys_addr,
+				unsigned long size)
+{
+	u64 paddr;
+
+	if (phys_addr == boot_params.hdr.setup_data)
+		return true;
+
+	paddr = boot_params.efi_info.efi_memmap_hi;
+	paddr <<= 32;
+	paddr |= boot_params.efi_info.efi_memmap;
+	if (phys_addr == paddr)
+		return true;
+
+	paddr = boot_params.efi_info.efi_systab_hi;
+	paddr <<= 32;
+	paddr |= boot_params.efi_info.efi_systab;
+	if (phys_addr == paddr)
+		return true;
+
+	if (efi_table_address_match(phys_addr))
+		return true;
+
+	return false;
+}
+
+static bool memremap_apply_encryption(resource_size_t phys_addr,
+				      unsigned long size)
+{
+	/* SME is not active, just return true */
+	if (!sme_me_mask)
+		return true;
+
+	/* Check if the address is part of the setup data */
+	if (memremap_setup_data(phys_addr, size))
+		return false;
+
+	/* Check if the address is part of EFI boot/runtime data */
+	switch (efi_mem_type(phys_addr)) {
+	case EFI_BOOT_SERVICES_DATA:
+	case EFI_RUNTIME_SERVICES_DATA:
+		return false;
+	}
+
+	/* Check if the address is outside kernel usable area */
+	switch (e820_get_entry_type(phys_addr, phys_addr + size - 1)) {
+	case E820_RESERVED:
+	case E820_ACPI:
+	case E820_NVS:
+	case E820_UNUSABLE:
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Architecure override of __weak function to prevent ram remap and use the
+ * architectural remap function.
+ */
+bool memremap_do_ram_remap(resource_size_t phys_addr, unsigned long size)
+{
+	if (!memremap_apply_encryption(phys_addr, size))
+		return false;
+
+	return true;
+}
+
+/*
+ * Architecure override of __weak function to adjust the protection attributes
+ * used when remapping memory.
+ */
+pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
+					     unsigned long size,
+					     pgprot_t prot)
+{
+	unsigned long prot_val = pgprot_val(prot);
+
+	if (memremap_apply_encryption(phys_addr, size))
+		prot_val |= _PAGE_ENC;
+	else
+		prot_val &= ~_PAGE_ENC;
+
+	return __pgprot(prot_val);
+}
+
 /* Remap memory with encryption */
 void __init *early_memremap_enc(resource_size_t phys_addr,
 				unsigned long size)
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 58b0f80..3f89179 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -221,7 +221,13 @@  int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 	if (efi_enabled(EFI_OLD_MEMMAP))
 		return 0;
 
-	efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
+	/*
+	 * Since the PGD is encrypted, set the encryption mask so that when
+	 * this value is loaded into cr3 the PGD will be decrypted during
+	 * the pagetable walk.
+	 */
+	efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd);
+
 	pgd = efi_pgd;
 
 	/*
@@ -231,7 +237,7 @@  int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 	 * phys_efi_set_virtual_address_map().
 	 */
 	pfn = pa_memmap >> PAGE_SHIFT;
-	if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) {
+	if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW | _PAGE_ENC)) {
 		pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
 		return 1;
 	}
@@ -258,7 +264,7 @@  int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 	text = __pa(_text);
 	pfn = text >> PAGE_SHIFT;
 
-	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) {
+	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW | _PAGE_ENC)) {
 		pr_err("Failed to map kernel text 1:1\n");
 		return 1;
 	}
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 1ac199c..91c06ec 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -51,6 +51,25 @@  struct efi __read_mostly efi = {
 };
 EXPORT_SYMBOL(efi);
 
+static unsigned long *efi_tables[] = {
+	&efi.mps,
+	&efi.acpi,
+	&efi.acpi20,
+	&efi.smbios,
+	&efi.smbios3,
+	&efi.sal_systab,
+	&efi.boot_info,
+	&efi.hcdp,
+	&efi.uga,
+	&efi.uv_systab,
+	&efi.fw_vendor,
+	&efi.runtime,
+	&efi.config_table,
+	&efi.esrt,
+	&efi.properties_table,
+	&efi.mem_attr_table,
+};
+
 static bool disable_runtime;
 static int __init setup_noefi(char *arg)
 {
@@ -822,3 +841,17 @@  int efi_status_to_err(efi_status_t status)
 
 	return err;
 }
+
+bool efi_table_address_match(unsigned long phys_addr)
+{
+	int i;
+
+	if (phys_addr == EFI_INVALID_TABLE_ADDR)
+		return false;
+
+	for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
+		if (*(efi_tables[i]) == phys_addr)
+			return true;
+
+	return false;
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 2d08948..72d89bf 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1070,6 +1070,8 @@  efi_capsule_pending(int *reset_type)
 
 extern int efi_status_to_err(efi_status_t status);
 
+extern bool efi_table_address_match(unsigned long phys_addr);
+
 /*
  * Variable Attributes
  */
diff --git a/kernel/memremap.c b/kernel/memremap.c
index b501e39..ac1437e 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -34,12 +34,18 @@  static void *arch_memremap_wb(resource_size_t offset, unsigned long size)
 }
 #endif
 
+bool __weak memremap_do_ram_remap(resource_size_t offset, size_t size)
+{
+	return true;
+}
+
 static void *try_ram_remap(resource_size_t offset, size_t size)
 {
 	unsigned long pfn = PHYS_PFN(offset);
 
 	/* In the simple case just return the existing linear address */
-	if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)))
+	if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)) &&
+	    memremap_do_ram_remap(offset, size))
 		return __va(offset);
 	return NULL; /* fallback to arch_memremap_wb */
 }
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index d71b98b..34af5b6 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -30,6 +30,13 @@  early_param("early_ioremap_debug", early_ioremap_debug_setup);
 
 static int after_paging_init __initdata;
 
+pgprot_t __init __weak early_memremap_pgprot_adjust(resource_size_t phys_addr,
+						    unsigned long size,
+						    pgprot_t prot)
+{
+	return prot;
+}
+
 void __init __weak early_ioremap_shutdown(void)
 {
 }
@@ -215,14 +222,19 @@  early_ioremap(resource_size_t phys_addr, unsigned long size)
 void __init *
 early_memremap(resource_size_t phys_addr, unsigned long size)
 {
-	return (__force void *)__early_ioremap(phys_addr, size,
-					       FIXMAP_PAGE_NORMAL);
+	pgprot_t prot = early_memremap_pgprot_adjust(phys_addr, size,
+						     FIXMAP_PAGE_NORMAL);
+
+	return (__force void *)__early_ioremap(phys_addr, size, prot);
 }
 #ifdef FIXMAP_PAGE_RO
 void __init *
 early_memremap_ro(resource_size_t phys_addr, unsigned long size)
 {
-	return (__force void *)__early_ioremap(phys_addr, size, FIXMAP_PAGE_RO);
+	pgprot_t prot = early_memremap_pgprot_adjust(phys_addr, size,
+						     FIXMAP_PAGE_RO);
+
+	return (__force void *)__early_ioremap(phys_addr, size, prot);
 }
 #endif