diff mbox

[v5,17/32] x86/mm: Add support to access boot related data in the clear

Message ID 20170418211921.10190.1537.stgit@tlendack-t1.amdoffice.net (mailing list archive)
State New, archived
Headers show

Commit Message

Tom Lendacky April 18, 2017, 9:19 p.m. UTC
Boot data (such as EFI related data) is not encrypted when the system is
booted because UEFI/BIOS does not run with SME active. In order to access
this data properly it needs to be mapped decrypted.

The early_memremap() support is updated to provide an arch specific
routine to modify the pagetable protection attributes before they are
applied to the new mapping. This is used to remove the encryption mask
for boot related data.

The memremap() support is updated to provide an arch specific routine
to determine if RAM remapping is allowed.  RAM remapping will cause an
encrypted mapping to be generated. By preventing RAM remapping,
ioremap_cache() will be used instead, which will provide a decrypted
mapping of the boot related data.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
 arch/x86/include/asm/io.h |    4 +
 arch/x86/mm/ioremap.c     |  182 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/io.h        |    2 
 kernel/memremap.c         |   20 ++++-
 mm/early_ioremap.c        |   18 ++++
 5 files changed, 219 insertions(+), 7 deletions(-)

Comments

Borislav Petkov May 15, 2017, 6:35 p.m. UTC | #1
On Tue, Apr 18, 2017 at 04:19:21PM -0500, Tom Lendacky wrote:
> Boot data (such as EFI related data) is not encrypted when the system is
> booted because UEFI/BIOS does not run with SME active. In order to access
> this data properly it needs to be mapped decrypted.
> 
> The early_memremap() support is updated to provide an arch specific

"Update early_memremap() to provide... "

> routine to modify the pagetable protection attributes before they are
> applied to the new mapping. This is used to remove the encryption mask
> for boot related data.
> 
> The memremap() support is updated to provide an arch specific routine

Ditto. Passive tone always reads harder than an active tone,
"doer"-sentence.

> to determine if RAM remapping is allowed.  RAM remapping will cause an
> encrypted mapping to be generated. By preventing RAM remapping,
> ioremap_cache() will be used instead, which will provide a decrypted
> mapping of the boot related data.
> 
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
> ---
>  arch/x86/include/asm/io.h |    4 +
>  arch/x86/mm/ioremap.c     |  182 +++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/io.h        |    2 
>  kernel/memremap.c         |   20 ++++-
>  mm/early_ioremap.c        |   18 ++++
>  5 files changed, 219 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
> index 7afb0e2..75f2858 100644
> --- a/arch/x86/include/asm/io.h
> +++ b/arch/x86/include/asm/io.h
> @@ -381,4 +381,8 @@ extern int __must_check arch_phys_wc_add(unsigned long base,
>  #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
>  #endif
>  
> +extern bool arch_memremap_do_ram_remap(resource_size_t offset, size_t size,
> +				       unsigned long flags);
> +#define arch_memremap_do_ram_remap arch_memremap_do_ram_remap
> +
>  #endif /* _ASM_X86_IO_H */
> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
> index 9bfcb1f..bce0604 100644
> --- a/arch/x86/mm/ioremap.c
> +++ b/arch/x86/mm/ioremap.c
> @@ -13,6 +13,7 @@
>  #include <linux/slab.h>
>  #include <linux/vmalloc.h>
>  #include <linux/mmiotrace.h>
> +#include <linux/efi.h>
>  
>  #include <asm/cacheflush.h>
>  #include <asm/e820/api.h>
> @@ -21,6 +22,7 @@
>  #include <asm/tlbflush.h>
>  #include <asm/pgalloc.h>
>  #include <asm/pat.h>
> +#include <asm/setup.h>
>  
>  #include "physaddr.h"
>  
> @@ -419,6 +421,186 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
>  	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
>  }
>  
> +/*
> + * Examine the physical address to determine if it is an area of memory
> + * that should be mapped decrypted.  If the memory is not part of the
> + * kernel usable area it was accessed and created decrypted, so these
> + * areas should be mapped decrypted.
> + */
> +static bool memremap_should_map_decrypted(resource_size_t phys_addr,
> +					  unsigned long size)
> +{
> +	/* Check if the address is outside kernel usable area */
> +	switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
> +	case E820_TYPE_RESERVED:
> +	case E820_TYPE_ACPI:
> +	case E820_TYPE_NVS:
> +	case E820_TYPE_UNUSABLE:
> +		return true;
> +	default:
> +		break;
> +	}
> +
> +	return false;
> +}
> +
> +/*
> + * Examine the physical address to determine if it is EFI data. Check
> + * it against the boot params structure and EFI tables and memory types.
> + */
> +static bool memremap_is_efi_data(resource_size_t phys_addr,
> +				 unsigned long size)
> +{
> +	u64 paddr;
> +
> +	/* Check if the address is part of EFI boot/runtime data */
> +	if (efi_enabled(EFI_BOOT)) {

Save indentation level:

	if (!efi_enabled(EFI_BOOT))
		return false;


> +		paddr = boot_params.efi_info.efi_memmap_hi;
> +		paddr <<= 32;
> +		paddr |= boot_params.efi_info.efi_memmap;
> +		if (phys_addr == paddr)
> +			return true;
> +
> +		paddr = boot_params.efi_info.efi_systab_hi;
> +		paddr <<= 32;
> +		paddr |= boot_params.efi_info.efi_systab;

So those two above look like could be two global vars which are
initialized somewhere in the EFI init path:

efi_memmap_phys and efi_systab_phys or so.

Matt ?

And then you won't need to create that paddr each time on the fly. I
mean, it's not a lot of instructions but still...

> +		if (phys_addr == paddr)
> +			return true;
> +
> +		if (efi_table_address_match(phys_addr))
> +			return true;
> +
> +		switch (efi_mem_type(phys_addr)) {
> +		case EFI_BOOT_SERVICES_DATA:
> +		case EFI_RUNTIME_SERVICES_DATA:
> +			return true;
> +		default:
> +			break;
> +		}
> +	}
> +
> +	return false;
> +}
> +
> +/*
> + * Examine the physical address to determine if it is boot data by checking
> + * it against the boot params setup_data chain.
> + */
> +static bool memremap_is_setup_data(resource_size_t phys_addr,
> +				   unsigned long size)
> +{
> +	struct setup_data *data;
> +	u64 paddr, paddr_next;
> +
> +	paddr = boot_params.hdr.setup_data;
> +	while (paddr) {
> +		bool is_setup_data = false;

You don't need that bool:

static bool memremap_is_setup_data(resource_size_t phys_addr,
                                   unsigned long size)
{
        struct setup_data *data;
        u64 paddr, paddr_next;

        paddr = boot_params.hdr.setup_data;
        while (paddr) {
                if (phys_addr == paddr)
                        return true;

                data = memremap(paddr, sizeof(*data), MEMREMAP_WB | MEMREMAP_DEC);

                paddr_next = data->next;

                if ((phys_addr > paddr) && (phys_addr < (paddr + data->len))) {
                        memunmap(data);
                        return true;
                }

                memunmap(data);

                paddr = paddr_next;
        }
        return false;
}

Flow is a bit clearer.

> +/*
> + * Examine the physical address to determine if it is boot data by checking
> + * it against the boot params setup_data chain (early boot version).
> + */
> +static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
> +						unsigned long size)
> +{
> +	struct setup_data *data;
> +	u64 paddr, paddr_next;
> +
> +	paddr = boot_params.hdr.setup_data;
> +	while (paddr) {
> +		bool is_setup_data = false;
> +
> +		if (phys_addr == paddr)
> +			return true;
> +
> +		data = early_memremap_decrypted(paddr, sizeof(*data));
> +
> +		paddr_next = data->next;
> +
> +		if ((phys_addr > paddr) && (phys_addr < (paddr + data->len)))
> +			is_setup_data = true;
> +
> +		early_memunmap(data, sizeof(*data));
> +
> +		if (is_setup_data)
> +			return true;
> +
> +		paddr = paddr_next;
> +	}
> +
> +	return false;
> +}

This one is begging to be unified with memremap_is_setup_data() to both
call a __ worker function.

> +
> +/*
> + * Architecture function to determine if RAM remap is allowed. By default, a
> + * RAM remap will map the data as encrypted. Determine if a RAM remap should
> + * not be done so that the data will be mapped decrypted.
> + */
> +bool arch_memremap_do_ram_remap(resource_size_t phys_addr, unsigned long size,
> +				unsigned long flags)

So this function doesn't do anything - it replies to a yes/no question.
So the name should not say "do" but sound like a question. Maybe:

	if (arch_memremap_can_remap( ... ))

or so...

> +{
> +	if (!sme_active())
> +		return true;
> +
> +	if (flags & MEMREMAP_ENC)
> +		return true;
> +
> +	if (flags & MEMREMAP_DEC)
> +		return false;

So this looks strange to me: both flags MEMREMAP_ENC and _DEC override
setup and efi data checking. But we want to remap setup and EFI  data
*always* decrypted because that data was not encrypted as, as you say,
firmware doesn't run with SME active.

So my simple logic says that EFI stuff should *always* be mapped DEC,
regardless of flags. Ditto for setup data. So that check below should
actually *override* the flags checks and go before them, no?

> +
> +	if (memremap_is_setup_data(phys_addr, size) ||
> +	    memremap_is_efi_data(phys_addr, size) ||
> +	    memremap_should_map_decrypted(phys_addr, size))
> +		return false;
> +
> +	return true;
> +}
> +
> +/*
> + * Architecture override of __weak function to adjust the protection attributes
> + * used when remapping memory. By default, early_memremp() will map the data

early_memremAp() - a is missing.
Tom Lendacky May 17, 2017, 6:54 p.m. UTC | #2
On 5/15/2017 1:35 PM, Borislav Petkov wrote:
> On Tue, Apr 18, 2017 at 04:19:21PM -0500, Tom Lendacky wrote:
>> Boot data (such as EFI related data) is not encrypted when the system is
>> booted because UEFI/BIOS does not run with SME active. In order to access
>> this data properly it needs to be mapped decrypted.
>>
>> The early_memremap() support is updated to provide an arch specific
>
> "Update early_memremap() to provide... "

Will do.

>
>> routine to modify the pagetable protection attributes before they are
>> applied to the new mapping. This is used to remove the encryption mask
>> for boot related data.
>>
>> The memremap() support is updated to provide an arch specific routine
>
> Ditto. Passive tone always reads harder than an active tone,
> "doer"-sentence.

Ditto.

>
>> to determine if RAM remapping is allowed.  RAM remapping will cause an
>> encrypted mapping to be generated. By preventing RAM remapping,
>> ioremap_cache() will be used instead, which will provide a decrypted
>> mapping of the boot related data.
>>
>> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
>> ---
>>  arch/x86/include/asm/io.h |    4 +
>>  arch/x86/mm/ioremap.c     |  182 +++++++++++++++++++++++++++++++++++++++++++++
>>  include/linux/io.h        |    2
>>  kernel/memremap.c         |   20 ++++-
>>  mm/early_ioremap.c        |   18 ++++
>>  5 files changed, 219 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
>> index 7afb0e2..75f2858 100644
>> --- a/arch/x86/include/asm/io.h
>> +++ b/arch/x86/include/asm/io.h
>> @@ -381,4 +381,8 @@ extern int __must_check arch_phys_wc_add(unsigned long base,
>>  #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
>>  #endif
>>
>> +extern bool arch_memremap_do_ram_remap(resource_size_t offset, size_t size,
>> +				       unsigned long flags);
>> +#define arch_memremap_do_ram_remap arch_memremap_do_ram_remap
>> +
>>  #endif /* _ASM_X86_IO_H */
>> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
>> index 9bfcb1f..bce0604 100644
>> --- a/arch/x86/mm/ioremap.c
>> +++ b/arch/x86/mm/ioremap.c
>> @@ -13,6 +13,7 @@
>>  #include <linux/slab.h>
>>  #include <linux/vmalloc.h>
>>  #include <linux/mmiotrace.h>
>> +#include <linux/efi.h>
>>
>>  #include <asm/cacheflush.h>
>>  #include <asm/e820/api.h>
>> @@ -21,6 +22,7 @@
>>  #include <asm/tlbflush.h>
>>  #include <asm/pgalloc.h>
>>  #include <asm/pat.h>
>> +#include <asm/setup.h>
>>
>>  #include "physaddr.h"
>>
>> @@ -419,6 +421,186 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
>>  	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
>>  }
>>
>> +/*
>> + * Examine the physical address to determine if it is an area of memory
>> + * that should be mapped decrypted.  If the memory is not part of the
>> + * kernel usable area it was accessed and created decrypted, so these
>> + * areas should be mapped decrypted.
>> + */
>> +static bool memremap_should_map_decrypted(resource_size_t phys_addr,
>> +					  unsigned long size)
>> +{
>> +	/* Check if the address is outside kernel usable area */
>> +	switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
>> +	case E820_TYPE_RESERVED:
>> +	case E820_TYPE_ACPI:
>> +	case E820_TYPE_NVS:
>> +	case E820_TYPE_UNUSABLE:
>> +		return true;
>> +	default:
>> +		break;
>> +	}
>> +
>> +	return false;
>> +}
>> +
>> +/*
>> + * Examine the physical address to determine if it is EFI data. Check
>> + * it against the boot params structure and EFI tables and memory types.
>> + */
>> +static bool memremap_is_efi_data(resource_size_t phys_addr,
>> +				 unsigned long size)
>> +{
>> +	u64 paddr;
>> +
>> +	/* Check if the address is part of EFI boot/runtime data */
>> +	if (efi_enabled(EFI_BOOT)) {
>
> Save indentation level:
>
> 	if (!efi_enabled(EFI_BOOT))
> 		return false;
>

I was worried what the compiler might do when CONFIG_EFI is not set,
but it appears to take care of it. I'll double check though.

>
>> +		paddr = boot_params.efi_info.efi_memmap_hi;
>> +		paddr <<= 32;
>> +		paddr |= boot_params.efi_info.efi_memmap;
>> +		if (phys_addr == paddr)
>> +			return true;
>> +
>> +		paddr = boot_params.efi_info.efi_systab_hi;
>> +		paddr <<= 32;
>> +		paddr |= boot_params.efi_info.efi_systab;
>
> So those two above look like could be two global vars which are
> initialized somewhere in the EFI init path:
>
> efi_memmap_phys and efi_systab_phys or so.
>
> Matt ?
>
> And then you won't need to create that paddr each time on the fly. I
> mean, it's not a lot of instructions but still...
>
>> +		if (phys_addr == paddr)
>> +			return true;
>> +
>> +		if (efi_table_address_match(phys_addr))
>> +			return true;
>> +
>> +		switch (efi_mem_type(phys_addr)) {
>> +		case EFI_BOOT_SERVICES_DATA:
>> +		case EFI_RUNTIME_SERVICES_DATA:
>> +			return true;
>> +		default:
>> +			break;
>> +		}
>> +	}
>> +
>> +	return false;
>> +}
>> +
>> +/*
>> + * Examine the physical address to determine if it is boot data by checking
>> + * it against the boot params setup_data chain.
>> + */
>> +static bool memremap_is_setup_data(resource_size_t phys_addr,
>> +				   unsigned long size)
>> +{
>> +	struct setup_data *data;
>> +	u64 paddr, paddr_next;
>> +
>> +	paddr = boot_params.hdr.setup_data;
>> +	while (paddr) {
>> +		bool is_setup_data = false;
>
> You don't need that bool:
>
> static bool memremap_is_setup_data(resource_size_t phys_addr,
>                                    unsigned long size)
> {
>         struct setup_data *data;
>         u64 paddr, paddr_next;
>
>         paddr = boot_params.hdr.setup_data;
>         while (paddr) {
>                 if (phys_addr == paddr)
>                         return true;
>
>                 data = memremap(paddr, sizeof(*data), MEMREMAP_WB | MEMREMAP_DEC);
>
>                 paddr_next = data->next;
>
>                 if ((phys_addr > paddr) && (phys_addr < (paddr + data->len))) {
>                         memunmap(data);
>                         return true;
>                 }
>
>                 memunmap(data);
>
>                 paddr = paddr_next;
>         }
>         return false;
> }
>
> Flow is a bit clearer.

I may introduce a length variable to capture data->len right after
paddr_next is set and then have just a single memunmap() call before
the if check.

>
>> +/*
>> + * Examine the physical address to determine if it is boot data by checking
>> + * it against the boot params setup_data chain (early boot version).
>> + */
>> +static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
>> +						unsigned long size)
>> +{
>> +	struct setup_data *data;
>> +	u64 paddr, paddr_next;
>> +
>> +	paddr = boot_params.hdr.setup_data;
>> +	while (paddr) {
>> +		bool is_setup_data = false;
>> +
>> +		if (phys_addr == paddr)
>> +			return true;
>> +
>> +		data = early_memremap_decrypted(paddr, sizeof(*data));
>> +
>> +		paddr_next = data->next;
>> +
>> +		if ((phys_addr > paddr) && (phys_addr < (paddr + data->len)))
>> +			is_setup_data = true;
>> +
>> +		early_memunmap(data, sizeof(*data));
>> +
>> +		if (is_setup_data)
>> +			return true;
>> +
>> +		paddr = paddr_next;
>> +	}
>> +
>> +	return false;
>> +}
>
> This one is begging to be unified with memremap_is_setup_data() to both
> call a __ worker function.

I tried that, but calling an "__init" function (early_memremap()) from
a non "__init" function generated warnings. I suppose I can pass in a
function for the map and unmap but that looks worse to me (also the
unmap functions take different arguments).

>
>> +
>> +/*
>> + * Architecture function to determine if RAM remap is allowed. By default, a
>> + * RAM remap will map the data as encrypted. Determine if a RAM remap should
>> + * not be done so that the data will be mapped decrypted.
>> + */
>> +bool arch_memremap_do_ram_remap(resource_size_t phys_addr, unsigned long size,
>> +				unsigned long flags)
>
> So this function doesn't do anything - it replies to a yes/no question.
> So the name should not say "do" but sound like a question. Maybe:
>
> 	if (arch_memremap_can_remap( ... ))
>
> or so...

Ok, I'll change that.

>
>> +{
>> +	if (!sme_active())
>> +		return true;
>> +
>> +	if (flags & MEMREMAP_ENC)
>> +		return true;
>> +
>> +	if (flags & MEMREMAP_DEC)
>> +		return false;
>
> So this looks strange to me: both flags MEMREMAP_ENC and _DEC override
> setup and efi data checking. But we want to remap setup and EFI  data
> *always* decrypted because that data was not encrypted as, as you say,
> firmware doesn't run with SME active.
>
> So my simple logic says that EFI stuff should *always* be mapped DEC,
> regardless of flags. Ditto for setup data. So that check below should
> actually *override* the flags checks and go before them, no?

This is like the chicken and the egg scenario. In order to determine if
an address is setup data I have to explicitly map the setup data chain
as decrypted. In order to do that I have to supply a flag to explicitly
map the data decrypted otherwise I wind up back in the
memremap_is_setup_data() function again and again and again...

>
>> +
>> +	if (memremap_is_setup_data(phys_addr, size) ||
>> +	    memremap_is_efi_data(phys_addr, size) ||
>> +	    memremap_should_map_decrypted(phys_addr, size))
>> +		return false;
>> +
>> +	return true;
>> +}
>> +
>> +/*
>> + * Architecture override of __weak function to adjust the protection attributes
>> + * used when remapping memory. By default, early_memremp() will map the data
>
> early_memremAp() - a is missing.

Got it.

Thanks,
Tom

>
Borislav Petkov May 18, 2017, 9:02 a.m. UTC | #3
On Wed, May 17, 2017 at 01:54:39PM -0500, Tom Lendacky wrote:
> I was worried what the compiler might do when CONFIG_EFI is not set,
> but it appears to take care of it. I'll double check though.

There's a efi_enabled() !CONFIG_EFI version too, so should be fine.

> I may introduce a length variable to capture data->len right after
> paddr_next is set and then have just a single memunmap() call before
> the if check.

Yap.

> I tried that, but calling an "__init" function (early_memremap()) from
> a non "__init" function generated warnings. I suppose I can pass in a
> function for the map and unmap but that looks worse to me (also the
> unmap functions take different arguments).

No, the other way around: the __init function should call the non-init
one and you need the non-init one anyway for memremap_is_setup_data().

> This is like the chicken and the egg scenario. In order to determine if
> an address is setup data I have to explicitly map the setup data chain
> as decrypted. In order to do that I have to supply a flag to explicitly
> map the data decrypted otherwise I wind up back in the
> memremap_is_setup_data() function again and again and again...

Oh, fun.
Matt Fleming May 18, 2017, 7:50 p.m. UTC | #4
On Mon, 15 May, at 08:35:17PM, Borislav Petkov wrote:
> On Tue, Apr 18, 2017 at 04:19:21PM -0500, Tom Lendacky wrote:
>
> > +		paddr = boot_params.efi_info.efi_memmap_hi;
> > +		paddr <<= 32;
> > +		paddr |= boot_params.efi_info.efi_memmap;
> > +		if (phys_addr == paddr)
> > +			return true;
> > +
> > +		paddr = boot_params.efi_info.efi_systab_hi;
> > +		paddr <<= 32;
> > +		paddr |= boot_params.efi_info.efi_systab;
> 
> So those two above look like could be two global vars which are
> initialized somewhere in the EFI init path:
> 
> efi_memmap_phys and efi_systab_phys or so.
> 
> Matt ?
> 
> And then you won't need to create that paddr each time on the fly. I
> mean, it's not a lot of instructions but still...
 
We should already have the physical memmap address available in
'efi.memmap.phys_map'.

And the physical address of the system table should be in
'efi_phys.systab'. See efi_init().
Tom Lendacky May 19, 2017, 8:50 p.m. UTC | #5
On 5/18/2017 4:02 AM, Borislav Petkov wrote:
> On Wed, May 17, 2017 at 01:54:39PM -0500, Tom Lendacky wrote:
>> I was worried what the compiler might do when CONFIG_EFI is not set,
>> but it appears to take care of it. I'll double check though.
>
> There's a efi_enabled() !CONFIG_EFI version too, so should be fine.
>
>> I may introduce a length variable to capture data->len right after
>> paddr_next is set and then have just a single memunmap() call before
>> the if check.
>
> Yap.
>
>> I tried that, but calling an "__init" function (early_memremap()) from
>> a non "__init" function generated warnings. I suppose I can pass in a
>> function for the map and unmap but that looks worse to me (also the
>> unmap functions take different arguments).
>
> No, the other way around: the __init function should call the non-init
> one and you need the non-init one anyway for memremap_is_setup_data().
>

The "worker" function would be doing the loop through the setup data,
but since the setup data is mapped inside the loop I can't do the __init
calling the non-init function and still hope to consolidate the code.
Maybe I'm missing something here...

Thanks,
Tom

>> This is like the chicken and the egg scenario. In order to determine if
>> an address is setup data I have to explicitly map the setup data chain
>> as decrypted. In order to do that I have to supply a flag to explicitly
>> map the data decrypted otherwise I wind up back in the
>> memremap_is_setup_data() function again and again and again...
>
> Oh, fun.
>
Tom Lendacky May 26, 2017, 4:22 p.m. UTC | #6
On 5/18/2017 2:50 PM, Matt Fleming wrote:
> On Mon, 15 May, at 08:35:17PM, Borislav Petkov wrote:
>> On Tue, Apr 18, 2017 at 04:19:21PM -0500, Tom Lendacky wrote:
>>
>>> +		paddr = boot_params.efi_info.efi_memmap_hi;
>>> +		paddr <<= 32;
>>> +		paddr |= boot_params.efi_info.efi_memmap;
>>> +		if (phys_addr == paddr)
>>> +			return true;
>>> +
>>> +		paddr = boot_params.efi_info.efi_systab_hi;
>>> +		paddr <<= 32;
>>> +		paddr |= boot_params.efi_info.efi_systab;
>>
>> So those two above look like could be two global vars which are
>> initialized somewhere in the EFI init path:
>>
>> efi_memmap_phys and efi_systab_phys or so.
>>
>> Matt ?
>>
>> And then you won't need to create that paddr each time on the fly. I
>> mean, it's not a lot of instructions but still...
>
> We should already have the physical memmap address available in
> 'efi.memmap.phys_map'.

Unfortunately memremap_is_efi_data() is called before the efi structure
gets initialized, so I can't use that value.

>
> And the physical address of the system table should be in
> 'efi_phys.systab'. See efi_init().

In addition to the same issue as efi.memmap.phys_map, efi_phys has
the __initdata attribute so it will be released/freed which will cause
problems in checks performed afterwards.

Thanks,
Tom

>
Borislav Petkov May 26, 2017, 4:35 p.m. UTC | #7
On Fri, May 26, 2017 at 11:22:36AM -0500, Tom Lendacky wrote:
> In addition to the same issue as efi.memmap.phys_map, efi_phys has
> the __initdata attribute so it will be released/freed which will cause
> problems in checks performed afterwards.

Sounds to me like we should drop the __initdata attr and prepare them
much earlier for use by the SME code.
Tom Lendacky May 30, 2017, 5:47 p.m. UTC | #8
On 5/26/2017 11:35 AM, Borislav Petkov wrote:
> On Fri, May 26, 2017 at 11:22:36AM -0500, Tom Lendacky wrote:
>> In addition to the same issue as efi.memmap.phys_map, efi_phys has
>> the __initdata attribute so it will be released/freed which will cause
>> problems in checks performed afterwards.
> 
> Sounds to me like we should drop the __initdata attr and prepare them
> much earlier for use by the SME code.

Probably something we can look at for a follow-on patch.

Thanks,
Tom

>
diff mbox

Patch

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 7afb0e2..75f2858 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -381,4 +381,8 @@  extern int __must_check arch_phys_wc_add(unsigned long base,
 #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
 #endif
 
+extern bool arch_memremap_do_ram_remap(resource_size_t offset, size_t size,
+				       unsigned long flags);
+#define arch_memremap_do_ram_remap arch_memremap_do_ram_remap
+
 #endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 9bfcb1f..bce0604 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -13,6 +13,7 @@ 
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mmiotrace.h>
+#include <linux/efi.h>
 
 #include <asm/cacheflush.h>
 #include <asm/e820/api.h>
@@ -21,6 +22,7 @@ 
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
 #include <asm/pat.h>
+#include <asm/setup.h>
 
 #include "physaddr.h"
 
@@ -419,6 +421,186 @@  void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
 	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
 }
 
+/*
+ * Examine the physical address to determine if it is an area of memory
+ * that should be mapped decrypted.  If the memory is not part of the
+ * kernel usable area it was accessed and created decrypted, so these
+ * areas should be mapped decrypted.
+ */
+static bool memremap_should_map_decrypted(resource_size_t phys_addr,
+					  unsigned long size)
+{
+	/* Check if the address is outside kernel usable area */
+	switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
+	case E820_TYPE_RESERVED:
+	case E820_TYPE_ACPI:
+	case E820_TYPE_NVS:
+	case E820_TYPE_UNUSABLE:
+		return true;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+/*
+ * Examine the physical address to determine if it is EFI data. Check
+ * it against the boot params structure and EFI tables and memory types.
+ */
+static bool memremap_is_efi_data(resource_size_t phys_addr,
+				 unsigned long size)
+{
+	u64 paddr;
+
+	/* Check if the address is part of EFI boot/runtime data */
+	if (efi_enabled(EFI_BOOT)) {
+		paddr = boot_params.efi_info.efi_memmap_hi;
+		paddr <<= 32;
+		paddr |= boot_params.efi_info.efi_memmap;
+		if (phys_addr == paddr)
+			return true;
+
+		paddr = boot_params.efi_info.efi_systab_hi;
+		paddr <<= 32;
+		paddr |= boot_params.efi_info.efi_systab;
+		if (phys_addr == paddr)
+			return true;
+
+		if (efi_table_address_match(phys_addr))
+			return true;
+
+		switch (efi_mem_type(phys_addr)) {
+		case EFI_BOOT_SERVICES_DATA:
+		case EFI_RUNTIME_SERVICES_DATA:
+			return true;
+		default:
+			break;
+		}
+	}
+
+	return false;
+}
+
+/*
+ * Examine the physical address to determine if it is boot data by checking
+ * it against the boot params setup_data chain.
+ */
+static bool memremap_is_setup_data(resource_size_t phys_addr,
+				   unsigned long size)
+{
+	struct setup_data *data;
+	u64 paddr, paddr_next;
+
+	paddr = boot_params.hdr.setup_data;
+	while (paddr) {
+		bool is_setup_data = false;
+
+		if (phys_addr == paddr)
+			return true;
+
+		data = memremap(paddr, sizeof(*data),
+				MEMREMAP_WB | MEMREMAP_DEC);
+
+		paddr_next = data->next;
+
+		if ((phys_addr > paddr) && (phys_addr < (paddr + data->len)))
+			is_setup_data = true;
+
+		memunmap(data);
+
+		if (is_setup_data)
+			return true;
+
+		paddr = paddr_next;
+	}
+
+	return false;
+}
+
+/*
+ * Examine the physical address to determine if it is boot data by checking
+ * it against the boot params setup_data chain (early boot version).
+ */
+static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
+						unsigned long size)
+{
+	struct setup_data *data;
+	u64 paddr, paddr_next;
+
+	paddr = boot_params.hdr.setup_data;
+	while (paddr) {
+		bool is_setup_data = false;
+
+		if (phys_addr == paddr)
+			return true;
+
+		data = early_memremap_decrypted(paddr, sizeof(*data));
+
+		paddr_next = data->next;
+
+		if ((phys_addr > paddr) && (phys_addr < (paddr + data->len)))
+			is_setup_data = true;
+
+		early_memunmap(data, sizeof(*data));
+
+		if (is_setup_data)
+			return true;
+
+		paddr = paddr_next;
+	}
+
+	return false;
+}
+
+/*
+ * Architecture function to determine if RAM remap is allowed. By default, a
+ * RAM remap will map the data as encrypted. Determine if a RAM remap should
+ * not be done so that the data will be mapped decrypted.
+ */
+bool arch_memremap_do_ram_remap(resource_size_t phys_addr, unsigned long size,
+				unsigned long flags)
+{
+	if (!sme_active())
+		return true;
+
+	if (flags & MEMREMAP_ENC)
+		return true;
+
+	if (flags & MEMREMAP_DEC)
+		return false;
+
+	if (memremap_is_setup_data(phys_addr, size) ||
+	    memremap_is_efi_data(phys_addr, size) ||
+	    memremap_should_map_decrypted(phys_addr, size))
+		return false;
+
+	return true;
+}
+
+/*
+ * Architecture override of __weak function to adjust the protection attributes
+ * used when remapping memory. By default, early_memremp() will map the data
+ * as encrypted. Determine if an encrypted mapping should not be done and set
+ * the appropriate protection attributes.
+ */
+pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
+					     unsigned long size,
+					     pgprot_t prot)
+{
+	if (!sme_active())
+		return prot;
+
+	if (early_memremap_is_setup_data(phys_addr, size) ||
+	    memremap_is_efi_data(phys_addr, size) ||
+	    memremap_should_map_decrypted(phys_addr, size))
+		prot = pgprot_decrypted(prot);
+	else
+		prot = pgprot_encrypted(prot);
+
+	return prot;
+}
+
 #ifdef CONFIG_ARCH_USE_MEMREMAP_PROT
 /* Remap memory with encryption */
 void __init *early_memremap_encrypted(resource_size_t phys_addr,
diff --git a/include/linux/io.h b/include/linux/io.h
index 82ef36e..deaeb1d 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -136,6 +136,8 @@  enum {
 	MEMREMAP_WB = 1 << 0,
 	MEMREMAP_WT = 1 << 1,
 	MEMREMAP_WC = 1 << 2,
+	MEMREMAP_ENC = 1 << 3,
+	MEMREMAP_DEC = 1 << 4,
 };
 
 void *memremap(resource_size_t offset, size_t size, unsigned long flags);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 07e85e5..2361bf7 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -34,13 +34,24 @@  static void *arch_memremap_wb(resource_size_t offset, unsigned long size)
 }
 #endif
 
-static void *try_ram_remap(resource_size_t offset, size_t size)
+#ifndef arch_memremap_do_ram_remap
+static bool arch_memremap_do_ram_remap(resource_size_t offset, size_t size,
+				       unsigned long flags)
+{
+	return true;
+}
+#endif
+
+static void *try_ram_remap(resource_size_t offset, size_t size,
+			   unsigned long flags)
 {
 	unsigned long pfn = PHYS_PFN(offset);
 
 	/* In the simple case just return the existing linear address */
-	if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)))
+	if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)) &&
+	    arch_memremap_do_ram_remap(offset, size, flags))
 		return __va(offset);
+
 	return NULL; /* fallback to arch_memremap_wb */
 }
 
@@ -48,7 +59,8 @@  static void *try_ram_remap(resource_size_t offset, size_t size)
  * memremap() - remap an iomem_resource as cacheable memory
  * @offset: iomem resource start address
  * @size: size of remap
- * @flags: any of MEMREMAP_WB, MEMREMAP_WT and MEMREMAP_WC
+ * @flags: any of MEMREMAP_WB, MEMREMAP_WT, MEMREMAP_WC,
+ *		  MEMREMAP_ENC, MEMREMAP_DEC
  *
  * memremap() is "ioremap" for cases where it is known that the resource
  * being mapped does not have i/o side effects and the __iomem
@@ -95,7 +107,7 @@  void *memremap(resource_size_t offset, size_t size, unsigned long flags)
 		 * the requested range is potentially in System RAM.
 		 */
 		if (is_ram == REGION_INTERSECTS)
-			addr = try_ram_remap(offset, size);
+			addr = try_ram_remap(offset, size, flags);
 		if (!addr)
 			addr = arch_memremap_wb(offset, size);
 	}
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index d7d30da..b1dd4a9 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -30,6 +30,13 @@  static int __init early_ioremap_debug_setup(char *str)
 
 static int after_paging_init __initdata;
 
+pgprot_t __init __weak early_memremap_pgprot_adjust(resource_size_t phys_addr,
+						    unsigned long size,
+						    pgprot_t prot)
+{
+	return prot;
+}
+
 void __init __weak early_ioremap_shutdown(void)
 {
 }
@@ -215,14 +222,19 @@  void __init early_iounmap(void __iomem *addr, unsigned long size)
 void __init *
 early_memremap(resource_size_t phys_addr, unsigned long size)
 {
-	return (__force void *)__early_ioremap(phys_addr, size,
-					       FIXMAP_PAGE_NORMAL);
+	pgprot_t prot = early_memremap_pgprot_adjust(phys_addr, size,
+						     FIXMAP_PAGE_NORMAL);
+
+	return (__force void *)__early_ioremap(phys_addr, size, prot);
 }
 #ifdef FIXMAP_PAGE_RO
 void __init *
 early_memremap_ro(resource_size_t phys_addr, unsigned long size)
 {
-	return (__force void *)__early_ioremap(phys_addr, size, FIXMAP_PAGE_RO);
+	pgprot_t prot = early_memremap_pgprot_adjust(phys_addr, size,
+						     FIXMAP_PAGE_RO);
+
+	return (__force void *)__early_ioremap(phys_addr, size, prot);
 }
 #endif