Message ID | 20180827185507.17087-8-jarkko.sakkinen@linux.intel.com (mailing list archive) |
---|---|
State | Deferred, archived |
Headers | show |
Series | Intel SGX1 support | expand |
On 08/27/2018 11:53 AM, Jarkko Sakkinen wrote: > Add data structures to track Enclave Page Cache (EPC) pages. EPC is > divided into multiple banks (1-N) of which addresses and sizes can be > enumerated with CPUID by the OS. > > On NUMA systems a node can have at most bank. A bank can be at most part of > two nodes. SGX supports both nodes with a single memory controller and also > sub-cluster nodes with severals memory controllers on a single die. > > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> > Co-developed-by: Serge Ayoun <serge.ayoun@intel.com> > Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com> > Signed-off-by: Serge Ayoun <serge.ayoun@intel.com> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> > --- > arch/x86/include/asm/sgx.h | 60 ++++++++++++++++++ > arch/x86/kernel/cpu/intel_sgx.c | 106 +++++++++++++++++++++++++++++++- > 2 files changed, 164 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h > index 2130e639ab49..17b7b3aa66bf 100644 > --- a/arch/x86/include/asm/sgx.h > +++ b/arch/x86/include/asm/sgx.h > @@ -4,9 +4,69 @@ > #ifndef _ASM_X86_SGX_H > #define _ASM_X86_SGX_H > > +#include <linux/bitops.h> > +#include <linux/err.h> > +#include <linux/rwsem.h> > #include <linux/types.h> > +#include <asm/sgx_arch.h> > +#include <asm/asm.h> > + > +#define SGX_MAX_EPC_BANKS 8 This is _still_ missing a meaningful description of what a bank is and whether it is a hardware or software structure. It would also help us to determine whether your bit packing below is really required. > +struct sgx_epc_page { > + unsigned long desc; > + struct list_head list; > +}; > + > +struct sgx_epc_bank { > + unsigned long pa; > + void *va; > + unsigned long size; Please add units. size could be bytes or pages, or who knows what. I can't tell you how many bugs I've tripped over in the past from simple unit conversions > + struct sgx_epc_page *pages_data; > + struct sgx_epc_page **pages; > + unsigned long free_cnt; > + spinlock_t lock; > +}; > > extern bool sgx_enabled; > extern bool sgx_lc_enabled; > +extern struct sgx_epc_bank sgx_epc_banks[SGX_MAX_EPC_BANKS]; > + > +/* > + * enum sgx_epc_page_desc - defines bits and masks for an EPC page's desc Why are you bothering packing these bits? This seems a rather convoluted way to store two integers. > +static __init int sgx_init_epc_bank(u64 addr, u64 size, unsigned long index, > + struct sgx_epc_bank *bank) > +{ > + unsigned long nr_pages = size >> PAGE_SHIFT; > + struct sgx_epc_page *pages_data; > + unsigned long i; > + void *va; > + > + va = ioremap_cache(addr, size); > + if (!va) > + return -ENOMEM; > + > + pages_data = kcalloc(nr_pages, sizeof(struct sgx_epc_page), GFP_KERNEL); > + if (!pages_data) > + goto out_iomap; This looks like you're roughly limited by the page allocator to a bank size of ~1.4GB which seems kinda small. Is this really OK? > + bank->pages = kcalloc(nr_pages, sizeof(struct sgx_epc_page *), > + GFP_KERNEL); > + if (!bank->pages) > + goto out_pdata; > + > + for (i = 0; i < nr_pages; i++) { > + bank->pages[i] = &pages_data[i]; > + bank->pages[i]->desc = (addr + (i << PAGE_SHIFT)) | index; > + } > + > + bank->pa = addr; > + bank->size = size; > + bank->va = va; > + bank->free_cnt = nr_pages; > + bank->pages_data = pages_data; > + spin_lock_init(&bank->lock); > + return 0; > +out_pdata: > + kfree(pages_data); > +out_iomap: > + iounmap(va); > + return -ENOMEM; > +} > + > +static __init void sgx_page_cache_teardown(void) > +{ > + struct sgx_epc_bank *bank; > + int i; > + > + for (i = 0; i < sgx_nr_epc_banks; i++) { > + bank = &sgx_epc_banks[i]; > + iounmap((void *)bank->va); > + kfree(bank->pages); > + kfree(bank->pages_data); > + } > +} > + > +static inline u64 sgx_combine_bank_regs(u64 low, u64 high) > +{ > + return (low & 0xFFFFF000) + ((high & 0xFFFFF) << 32); > +} -ENOCOMMENT for a rather weird looking calculation > +static __init int sgx_page_cache_init(void) > +{ > + u32 eax, ebx, ecx, edx; > + u64 pa, size; > + int ret; > + int i; > + > + for (i = 0; i < SGX_MAX_EPC_BANKS; i++) { > + cpuid_count(SGX_CPUID, 2 + i, &eax, &ebx, &ecx, &edx); > + if (!(eax & 0xF)) > + break; So, we have random data coming out of a random CPUID leaf being called 'eax' and then being tested against a random hard-coded mask. This seems rather unfortunate for someone trying to understand the code. Can we do better? > + pa = sgx_combine_bank_regs(eax, ebx); > + size = sgx_combine_bank_regs(ecx, edx); > + pr_info("EPC bank 0x%llx-0x%llx\n", pa, pa + size - 1); > + ret = sgx_init_epc_bank(pa, size, i, &sgx_epc_banks[i]); > + if (ret) { > + sgx_page_cache_teardown(); > + return ret; > + } So if one bank fails, we tear down all banks, yet leave sgx_nr_epc_banks incremented? That sounds troublesome. > + sgx_nr_epc_banks++; > + } > + > + if (!sgx_nr_epc_banks) { > + pr_err("There are zero EPC banks.\n"); > + return -ENODEV; > + } > + > + return 0; > +} Does this support hot-addition of a bank? If not, why not?
On Mon, Aug 27, 2018 at 02:07:53PM -0700, Dave Hansen wrote: > On 08/27/2018 11:53 AM, Jarkko Sakkinen wrote: > > Add data structures to track Enclave Page Cache (EPC) pages. EPC is > > divided into multiple banks (1-N) of which addresses and sizes can be > > enumerated with CPUID by the OS. > > > > On NUMA systems a node can have at most bank. A bank can be at most part of > > two nodes. SGX supports both nodes with a single memory controller and also > > sub-cluster nodes with severals memory controllers on a single die. > > > > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> > > Co-developed-by: Serge Ayoun <serge.ayoun@intel.com> > > Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com> > > Signed-off-by: Serge Ayoun <serge.ayoun@intel.com> > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> > > --- > > arch/x86/include/asm/sgx.h | 60 ++++++++++++++++++ > > arch/x86/kernel/cpu/intel_sgx.c | 106 +++++++++++++++++++++++++++++++- > > 2 files changed, 164 insertions(+), 2 deletions(-) > > > > diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h > > index 2130e639ab49..17b7b3aa66bf 100644 > > --- a/arch/x86/include/asm/sgx.h > > +++ b/arch/x86/include/asm/sgx.h > > @@ -4,9 +4,69 @@ > > #ifndef _ASM_X86_SGX_H > > #define _ASM_X86_SGX_H > > > > +#include <linux/bitops.h> > > +#include <linux/err.h> > > +#include <linux/rwsem.h> > > #include <linux/types.h> > > +#include <asm/sgx_arch.h> > > +#include <asm/asm.h> > > + > > +#define SGX_MAX_EPC_BANKS 8 > > This is _still_ missing a meaningful description of what a bank is and > whether it is a hardware or software structure. > > It would also help us to determine whether your bit packing below is > really required. I think a better name would be EPC section as this is what the SDM uses in the Table 3-8 when describing subleaves of EAX=0x12 (SGX specific leaf) starting from ECX=0x02. It is a software structure that contains the information given by these subleaves. These sections constitute the physical pages that are part of the EPC. > > +struct sgx_epc_page { > > + unsigned long desc; > > + struct list_head list; > > +}; > > + > > +struct sgx_epc_bank { > > + unsigned long pa; > > + void *va; > > + unsigned long size; > > Please add units. size could be bytes or pages, or who knows what. I > can't tell you how many bugs I've tripped over in the past from simple > unit conversions Will do. > > + struct sgx_epc_page *pages_data; > > + struct sgx_epc_page **pages; > > + unsigned long free_cnt; > > + spinlock_t lock; > > +}; > > > > extern bool sgx_enabled; > > extern bool sgx_lc_enabled; > > +extern struct sgx_epc_bank sgx_epc_banks[SGX_MAX_EPC_BANKS]; > > + > > +/* > > + * enum sgx_epc_page_desc - defines bits and masks for an EPC page's desc > > Why are you bothering packing these bits? This seems a rather > convoluted way to store two integers. To keep struct sgx_epc_page 64 bytes. > > +static __init int sgx_init_epc_bank(u64 addr, u64 size, unsigned long index, > > + struct sgx_epc_bank *bank) > > +{ > > + unsigned long nr_pages = size >> PAGE_SHIFT; > > + struct sgx_epc_page *pages_data; > > + unsigned long i; > > + void *va; > > + > > + va = ioremap_cache(addr, size); > > + if (!va) > > + return -ENOMEM; > > + > > + pages_data = kcalloc(nr_pages, sizeof(struct sgx_epc_page), GFP_KERNEL); > > + if (!pages_data) > > + goto out_iomap; > > This looks like you're roughly limited by the page allocator to a bank > size of ~1.4GB which seems kinda small. Is this really OK? Where does this limitation come from? > > > + bank->pages = kcalloc(nr_pages, sizeof(struct sgx_epc_page *), > > + GFP_KERNEL); > > + if (!bank->pages) > > + goto out_pdata; > > + > > + for (i = 0; i < nr_pages; i++) { > > + bank->pages[i] = &pages_data[i]; > > + bank->pages[i]->desc = (addr + (i << PAGE_SHIFT)) | index; > > + } > > + > > + bank->pa = addr; > > + bank->size = size; > > + bank->va = va; > > + bank->free_cnt = nr_pages; > > + bank->pages_data = pages_data; > > + spin_lock_init(&bank->lock); > > + return 0; > > +out_pdata: > > + kfree(pages_data); > > +out_iomap: > > + iounmap(va); > > + return -ENOMEM; > > +} > > + > > +static __init void sgx_page_cache_teardown(void) > > +{ > > + struct sgx_epc_bank *bank; > > + int i; > > + > > + for (i = 0; i < sgx_nr_epc_banks; i++) { > > + bank = &sgx_epc_banks[i]; > > + iounmap((void *)bank->va); > > + kfree(bank->pages); > > + kfree(bank->pages_data); > > + } > > +} > > + > > +static inline u64 sgx_combine_bank_regs(u64 low, u64 high) > > +{ > > + return (low & 0xFFFFF000) + ((high & 0xFFFFF) << 32); > > +} > > -ENOCOMMENT for a rather weird looking calculation Yea, totally agreed... I'll think about how to make this cleaner. Maybe it would be anyway better idea to open code this to the call sites and explain the calculation in a comment. > > +static __init int sgx_page_cache_init(void) > > +{ > > + u32 eax, ebx, ecx, edx; > > + u64 pa, size; > > + int ret; > > + int i; > > + > > + for (i = 0; i < SGX_MAX_EPC_BANKS; i++) { > > + cpuid_count(SGX_CPUID, 2 + i, &eax, &ebx, &ecx, &edx); > > + if (!(eax & 0xF)) > > + break; > > So, we have random data coming out of a random CPUID leaf being called > 'eax' and then being tested against a random hard-coded mask. This > seems rather unfortunate for someone trying to understand the code. Can > we do better? Should probably do something along the lines: #define SGX_CPUID_SECTION(i) (2 + (i)) enum sgx_section { SGX_CPUID_SECTION_INVALID = 0x00, SGX_CPUID_SECTION_VALID = 0x1B, SGX_CPUID_SECTION_MASK = 0xFF, }; for (i = 0; i < SGX_MAX_EPC_BANKS; i++) { cpuid_count(SGX_CPUID, SGX_CPUID_SECTION(i), &eax, &ebx, &ecx, &edx); section = eax & SGX_SECTION_MASK; if (section != SGX_CPUID_SECTION_VALID) { if (section != SGX_CPUID_SECTION_INVALID) { /* Maybe a warning here for any other value as * they are reserved according to the SDM? */ } continue; } /* ... */ } > > + pa = sgx_combine_bank_regs(eax, ebx); > > + size = sgx_combine_bank_regs(ecx, edx); > > + pr_info("EPC bank 0x%llx-0x%llx\n", pa, pa + size - 1); > > + ret = sgx_init_epc_bank(pa, size, i, &sgx_epc_banks[i]); > > + if (ret) { > > + sgx_page_cache_teardown(); > > + return ret; > > + } > > So if one bank fails, we tear down all banks, yet leave sgx_nr_epc_banks > incremented? That sounds troublesome. It is. Thanks for spotting that out. > > + sgx_nr_epc_banks++; > > + } > > + > > + if (!sgx_nr_epc_banks) { > > + pr_err("There are zero EPC banks.\n"); > > + return -ENODEV; > > + } > > + > > + return 0; > > +} > > Does this support hot-addition of a bank? If not, why not? This is the DSDT for this data from my GLK NUC: Scope (_SB) { Device (EPC) { Name (_HID, EisaId ("INT0E0C")) // _HID: Hardware ID Name (_STR, Unicode ("Enclave Page Cache 1.0")) // _STR: Description String Name (_MLS, Package (0x01) // _MLS: Multiple Language String { Package (0x02) { "en", Unicode ("Enclave Page Cache 1.0") } }) Name (RBUF, ResourceTemplate () { QWordMemory (ResourceConsumer, PosDecode, MinNotFixed, MaxNotFixed, NonCacheable, ReadWrite, 0x0000000000000000, // Granularity 0x0000000000000000, // Range Minimum 0x0000000000000000, // Range Maximum 0x0000000000000000, // Translation Offset 0x0000000000000001, // Length ,, _Y18, AddressRangeMemory, TypeStatic) }) Method (_CRS, 0, NotSerialized) // _CRS: Current Resource Settings { CreateQWordField (RBUF, \_SB.EPC._Y18._MIN, EMIN) // _MIN: Minimum Base Address CreateQWordField (RBUF, \_SB.EPC._Y18._MAX, EMAX) // _MAX: Maximum Base Address CreateQWordField (RBUF, \_SB.EPC._Y18._LEN, ELEN) // _LEN: Length EMIN = EMNA /* External reference */ ELEN = ELNG /* External reference */ EMAX = ((EMNA + ELNG) - One) Return (RBUF) /* \_SB_.EPC_.RBUF */ } Method (_STA, 0, NotSerialized) // _STA: Status { If ((EPCS != Zero)) { Return (0x0F) } Return (Zero) } } } I'm not aware that we would have an ACPI specification for SGX so this is all I have at the moment (does not show any ACPI event for hotplugging). /Jarkko
>>> extern bool sgx_enabled; >>> extern bool sgx_lc_enabled; >>> +extern struct sgx_epc_bank sgx_epc_banks[SGX_MAX_EPC_BANKS]; >>> + >>> +/* >>> + * enum sgx_epc_page_desc - defines bits and masks for an EPC page's desc >> >> Why are you bothering packing these bits? This seems a rather >> convoluted way to store two integers. > > To keep struct sgx_epc_page 64 bytes. It's a list_head and a ulong now. That doesn't add up to 64. If you properly describe the bounds and limits of banks we can possibly help you find a nice solution. As it stands, they are totally opaque and we have no idea what is going on. >>> +static __init int sgx_init_epc_bank(u64 addr, u64 size, unsigned long index, >>> + struct sgx_epc_bank *bank) >>> +{ >>> + unsigned long nr_pages = size >> PAGE_SHIFT; >>> + struct sgx_epc_page *pages_data; >>> + unsigned long i; >>> + void *va; >>> + >>> + va = ioremap_cache(addr, size); >>> + if (!va) >>> + return -ENOMEM; >>> + >>> + pages_data = kcalloc(nr_pages, sizeof(struct sgx_epc_page), GFP_KERNEL); >>> + if (!pages_data) >>> + goto out_iomap; >> >> This looks like you're roughly limited by the page allocator to a bank >> size of ~1.4GB which seems kinda small. Is this really OK? > > Where does this limitation come from? The page allocator can only do 4MB at a time. Using your 64 byte numbers: 4MB/64 = 64k sgx_epc_pages. 64k*PAGE_SIZE = 256MB. So you can only handle 256MB banks with this code. BTW, if you only have 64k worth of pages, you can use a u16 for the index. >>> + u32 eax, ebx, ecx, edx; >>> + u64 pa, size; >>> + int ret; >>> + int i; >>> + >>> + for (i = 0; i < SGX_MAX_EPC_BANKS; i++) { >>> + cpuid_count(SGX_CPUID, 2 + i, &eax, &ebx, &ecx, &edx); >>> + if (!(eax & 0xF)) >>> + break; >> >> So, we have random data coming out of a random CPUID leaf being called >> 'eax' and then being tested against a random hard-coded mask. This >> seems rather unfortunate for someone trying to understand the code. Can >> we do better? > > Should probably do something along the lines: > > #define SGX_CPUID_SECTION(i) (2 + (i)) > > enum sgx_section { > SGX_CPUID_SECTION_INVALID = 0x00, > SGX_CPUID_SECTION_VALID = 0x1B, > SGX_CPUID_SECTION_MASK = 0xFF, > }; Plus comments, that would be nice. >>> + sgx_nr_epc_banks++; >>> + } >>> + >>> + if (!sgx_nr_epc_banks) { >>> + pr_err("There are zero EPC banks.\n"); >>> + return -ENODEV; >>> + } >>> + >>> + return 0; >>> +} >> >> Does this support hot-addition of a bank? If not, why not? ... > I'm not aware that we would have an ACPI specification for SGX so this > is all I have at the moment (does not show any ACPI event for > hotplugging). So you're saying the one platform you looked at don't support hotplug. I was looking for a more broad statement about SGX.
On Tue, Aug 28, 2018 at 09:53:11AM -0700, Dave Hansen wrote: > >>> + sgx_nr_epc_banks++; > >>> + } > >>> + > >>> + if (!sgx_nr_epc_banks) { > >>> + pr_err("There are zero EPC banks.\n"); > >>> + return -ENODEV; > >>> + } > >>> + > >>> + return 0; > >>> +} > >> > >> Does this support hot-addition of a bank? If not, why not? > ... > > I'm not aware that we would have an ACPI specification for SGX so this > > is all I have at the moment (does not show any ACPI event for > > hotplugging). > > So you're saying the one platform you looked at don't support hotplug. > I was looking for a more broad statement about SGX. Hardware doesn't support hotplug of EPC as the EPC size and location is locked during activation of SGX. And IIRC, activation of SGX must be synchronized across all CPUs in a multi-socket platform, e.g. you can't late-enable SGX on a socket and due hotplugging that way. In a virtualized environment there are no such restrictions. I am not aware of any explicit requirements or use cases for supporting hotplug of EPC, but that's probably only because virtualization of SGX is fairly nascent.
On Tue, Aug 28, 2018 at 09:53:11AM -0700, Dave Hansen wrote: > >>> extern bool sgx_enabled; > >>> extern bool sgx_lc_enabled; > >>> +extern struct sgx_epc_bank sgx_epc_banks[SGX_MAX_EPC_BANKS]; > >>> + > >>> +/* > >>> + * enum sgx_epc_page_desc - defines bits and masks for an EPC page's desc > >> > >> Why are you bothering packing these bits? This seems a rather > >> convoluted way to store two integers. > > > > To keep struct sgx_epc_page 64 bytes. > > It's a list_head and a ulong now. That doesn't add up to 64. Ah, there used to be one more variable in it. > If you properly describe the bounds and limits of banks we can possibly > help you find a nice solution. As it stands, they are totally opaque > and we have no idea what is going on. Great, I see what I can do. I understand now better what you are looking for. Thanks Dave. /Jarkko
On Tue, Aug 28, 2018 at 02:34:32PM -0700, Sean Christopherson wrote: > On Tue, Aug 28, 2018 at 09:53:11AM -0700, Dave Hansen wrote: > > >>> + sgx_nr_epc_banks++; > > >>> + } > > >>> + > > >>> + if (!sgx_nr_epc_banks) { > > >>> + pr_err("There are zero EPC banks.\n"); > > >>> + return -ENODEV; > > >>> + } > > >>> + > > >>> + return 0; > > >>> +} > > >> > > >> Does this support hot-addition of a bank? If not, why not? > > ... > > > I'm not aware that we would have an ACPI specification for SGX so this > > > is all I have at the moment (does not show any ACPI event for > > > hotplugging). > > > > So you're saying the one platform you looked at don't support hotplug. > > I was looking for a more broad statement about SGX. > > Hardware doesn't support hotplug of EPC as the EPC size and location > is locked during activation of SGX. And IIRC, activation of SGX must > be synchronized across all CPUs in a multi-socket platform, e.g. you > can't late-enable SGX on a socket and due hotplugging that way. Makes me wonder how on a multisocket platform would this work anyway given that they have different fused keys? > In a virtualized environment there are no such restrictions. I am not > aware of any explicit requirements or use cases for supporting hotplug > of EPC, but that's probably only because virtualization of SGX is > fairly nascent. /Jarkko
On Mon, Aug 27, 2018 at 9:58 PM Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote: > > Add data structures to track Enclave Page Cache (EPC) pages. EPC is > divided into multiple banks (1-N) of which addresses and sizes can be > enumerated with CPUID by the OS. > > On NUMA systems a node can have at most bank. A bank can be at most part of > two nodes. SGX supports both nodes with a single memory controller and also > sub-cluster nodes with severals memory controllers on a single die. > -#include <asm/sgx.h> > -#include <asm/sgx_pr.h> > #include <linux/freezer.h> > #include <linux/highmem.h> > #include <linux/kthread.h> > +#include <linux/pagemap.h> > #include <linux/ratelimit.h> > #include <linux/sched/signal.h> > +#include <linux/shmem_fs.h> > #include <linux/slab.h> > +#include <asm/sgx.h> > +#include <asm/sgx_pr.h> Squash issues? > + va = ioremap_cache(addr, size); > + if (!va) > + return -ENOMEM; I'm not sure this is a right API. Do we operate with memory? Does it have I/O side effects? If no, memremap() would be better to use.
On Mon, Sep 03, 2018 at 05:41:53PM +0300, Andy Shevchenko wrote: > On Mon, Aug 27, 2018 at 9:58 PM Jarkko Sakkinen > <jarkko.sakkinen@linux.intel.com> wrote: > > > > Add data structures to track Enclave Page Cache (EPC) pages. EPC is > > divided into multiple banks (1-N) of which addresses and sizes can be > > enumerated with CPUID by the OS. > > > > On NUMA systems a node can have at most bank. A bank can be at most part of > > two nodes. SGX supports both nodes with a single memory controller and also > > sub-cluster nodes with severals memory controllers on a single die. > > > -#include <asm/sgx.h> > > -#include <asm/sgx_pr.h> > > #include <linux/freezer.h> > > #include <linux/highmem.h> > > #include <linux/kthread.h> > > +#include <linux/pagemap.h> > > #include <linux/ratelimit.h> > > #include <linux/sched/signal.h> > > +#include <linux/shmem_fs.h> > > #include <linux/slab.h> > > +#include <asm/sgx.h> > > +#include <asm/sgx_pr.h> > > Squash issues? Yes :-/ > > + va = ioremap_cache(addr, size); > > + if (!va) > > + return -ENOMEM; > > I'm not sure this is a right API. Do we operate with memory? Does it > have I/O side effects? > If no, memremap() would be better to use. Had this idea a long time ago but had forgotten it. EPC is from caching perspective like regular memory. > -- > With Best Regards, > Andy Shevchenko > /Jarkko
On Mon, Sep 03, 2018 at 05:41:53PM +0300, Andy Shevchenko wrote: > On Mon, Aug 27, 2018 at 9:58 PM Jarkko Sakkinen > <jarkko.sakkinen@linux.intel.com> wrote: > > > + va = ioremap_cache(addr, size); > > + if (!va) > > + return -ENOMEM; > > I'm not sure this is a right API. Do we operate with memory? Does it > have I/O side effects? > If no, memremap() would be better to use. Preserving __iomem is desirable. There aren't side effects per se, but direct non-enclave accesses to the EPC get abort page semantics so the kernel shouldn't be directly dereferencing a pointer to the EPC. Though by that argument, sgx_epc_bank.va, sgx_epc_addr's return and all ENCLS helpers should be tagged __iomem. For documentation purposes, maybe it would be better to use __private or "#define __sgx_epc __iomem" and use that?
On Tue, Sep 4, 2018 a> +/** > > > + va = ioremap_cache(addr, size); > > > + if (!va) > > > + return -ENOMEM; > > > > I'm not sure this is a right API. Do we operate with memory? Does it > > have I/O side effects? > > If no, memremap() would be better to use. > > Preserving __iomem is desirable. There aren't side effects per se, > but direct non-enclave accesses to the EPC get abort page semantics so > the kernel shouldn't be directly dereferencing a pointer to the EPC. > Though by that argument, sgx_epc_bank.va, sgx_epc_addr's return and > all ENCLS helpers should be tagged __iomem. Why? Does it related to *any* I/O?
On Tue, Sep 04, 2018 at 09:01:15PM +0300, Andy Shevchenko wrote: > On Tue, Sep 4, 2018 a> +/** > > > > > + va = ioremap_cache(addr, size); > > > > + if (!va) > > > > + return -ENOMEM; > > > > > > I'm not sure this is a right API. Do we operate with memory? Does it > > > have I/O side effects? > > > If no, memremap() would be better to use. > > > > Preserving __iomem is desirable. There aren't side effects per se, > > but direct non-enclave accesses to the EPC get abort page semantics so > > the kernel shouldn't be directly dereferencing a pointer to the EPC. > > Though by that argument, sgx_epc_bank.va, sgx_epc_addr's return and > > all ENCLS helpers should be tagged __iomem. > > Why? > Does it related to *any* I/O? No, hence my other comment that __private or a new tag altogether may be more appropriate. The noderef attribute is what we truly care about.
On Tue, Sep 04, 2018 at 11:17:35AM -0700, Sean Christopherson wrote: > On Tue, Sep 04, 2018 at 09:01:15PM +0300, Andy Shevchenko wrote: > > On Tue, Sep 4, 2018 a> +/** > > > > > > > + va = ioremap_cache(addr, size); > > > > > + if (!va) > > > > > + return -ENOMEM; > > > > > > > > I'm not sure this is a right API. Do we operate with memory? Does it > > > > have I/O side effects? > > > > If no, memremap() would be better to use. > > > > > > Preserving __iomem is desirable. There aren't side effects per se, > > > but direct non-enclave accesses to the EPC get abort page semantics so > > > the kernel shouldn't be directly dereferencing a pointer to the EPC. > > > Though by that argument, sgx_epc_bank.va, sgx_epc_addr's return and > > > all ENCLS helpers should be tagged __iomem. > > > > Why? > > Does it related to *any* I/O? > > No, hence my other comment that __private or a new tag altogether may > be more appropriate. The noderef attribute is what we truly care > about. My proposal is that we go with memremap() and use #define __sgx_epc __attribute__((noderef)) It makes sense to check that direct EPC pointers are not passed to functions when they are not supposed to. /Jarkko
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 2130e639ab49..17b7b3aa66bf 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -4,9 +4,69 @@ #ifndef _ASM_X86_SGX_H #define _ASM_X86_SGX_H +#include <linux/bitops.h> +#include <linux/err.h> +#include <linux/rwsem.h> #include <linux/types.h> +#include <asm/sgx_arch.h> +#include <asm/asm.h> + +#define SGX_MAX_EPC_BANKS 8 + +struct sgx_epc_page { + unsigned long desc; + struct list_head list; +}; + +struct sgx_epc_bank { + unsigned long pa; + void *va; + unsigned long size; + struct sgx_epc_page *pages_data; + struct sgx_epc_page **pages; + unsigned long free_cnt; + spinlock_t lock; +}; extern bool sgx_enabled; extern bool sgx_lc_enabled; +extern struct sgx_epc_bank sgx_epc_banks[SGX_MAX_EPC_BANKS]; + +/* + * enum sgx_epc_page_desc - defines bits and masks for an EPC page's desc + * @SGX_EPC_BANK_MASK: SGX allows a system to multiple EPC banks (at + * different physical locations). The index of a + * page's bank in its desc so that we can do a quick + * lookup of its virtual address (EPC is mapped via + * ioremap_cache() because it's non-standard memory). + * Current and near-future hardware defines at most + * eight banks, hence three bits to hold the bank. + * sgx_page_cache_init() asserts that the max bank + * index doesn't exceed SGX_EPC_BANK_MASK. + * @SGX_EPC_PAGE_RECLAIMABLE: When set, indicates a page is reclaimable. Used + * when freeing a page to know that we also need to + * remove the page from the active page list. + * + * Defines the layout of the desc field in the &struct sgx_epc_page, which + * contains EPC bank number, physical address of the page and the page status + * flag. + */ +enum sgx_epc_page_desc { + SGX_EPC_BANK_MASK = GENMASK_ULL(3, 0), + SGX_EPC_PAGE_RECLAIMABLE = BIT(4), + /* bits 12-63 are reserved for the physical page address of the page */ +}; + +static inline struct sgx_epc_bank *sgx_epc_bank(struct sgx_epc_page *page) +{ + return &sgx_epc_banks[page->desc & SGX_EPC_BANK_MASK]; +} + +static inline void *sgx_epc_addr(struct sgx_epc_page *page) +{ + struct sgx_epc_bank *bank = sgx_epc_bank(page); + + return (void *)(bank->va + (page->desc & PAGE_MASK) - bank->pa); +} #endif /* _ASM_X86_SGX_H */ diff --git a/arch/x86/kernel/cpu/intel_sgx.c b/arch/x86/kernel/cpu/intel_sgx.c index 17b46bec9c54..53ac172e8006 100644 --- a/arch/x86/kernel/cpu/intel_sgx.c +++ b/arch/x86/kernel/cpu/intel_sgx.c @@ -1,23 +1,121 @@ // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) // Copyright(c) 2016-17 Intel Corporation. -#include <asm/sgx.h> -#include <asm/sgx_pr.h> #include <linux/freezer.h> #include <linux/highmem.h> #include <linux/kthread.h> +#include <linux/pagemap.h> #include <linux/ratelimit.h> #include <linux/sched/signal.h> +#include <linux/shmem_fs.h> #include <linux/slab.h> +#include <asm/sgx.h> +#include <asm/sgx_pr.h> bool sgx_enabled __ro_after_init; EXPORT_SYMBOL_GPL(sgx_enabled); bool sgx_lc_enabled __ro_after_init; EXPORT_SYMBOL_GPL(sgx_lc_enabled); +struct sgx_epc_bank sgx_epc_banks[SGX_MAX_EPC_BANKS]; +EXPORT_SYMBOL_GPL(sgx_epc_banks); + +static int sgx_nr_epc_banks; + +static __init int sgx_init_epc_bank(u64 addr, u64 size, unsigned long index, + struct sgx_epc_bank *bank) +{ + unsigned long nr_pages = size >> PAGE_SHIFT; + struct sgx_epc_page *pages_data; + unsigned long i; + void *va; + + va = ioremap_cache(addr, size); + if (!va) + return -ENOMEM; + + pages_data = kcalloc(nr_pages, sizeof(struct sgx_epc_page), GFP_KERNEL); + if (!pages_data) + goto out_iomap; + + bank->pages = kcalloc(nr_pages, sizeof(struct sgx_epc_page *), + GFP_KERNEL); + if (!bank->pages) + goto out_pdata; + + for (i = 0; i < nr_pages; i++) { + bank->pages[i] = &pages_data[i]; + bank->pages[i]->desc = (addr + (i << PAGE_SHIFT)) | index; + } + + bank->pa = addr; + bank->size = size; + bank->va = va; + bank->free_cnt = nr_pages; + bank->pages_data = pages_data; + spin_lock_init(&bank->lock); + return 0; +out_pdata: + kfree(pages_data); +out_iomap: + iounmap(va); + return -ENOMEM; +} + +static __init void sgx_page_cache_teardown(void) +{ + struct sgx_epc_bank *bank; + int i; + + for (i = 0; i < sgx_nr_epc_banks; i++) { + bank = &sgx_epc_banks[i]; + iounmap((void *)bank->va); + kfree(bank->pages); + kfree(bank->pages_data); + } +} + +static inline u64 sgx_combine_bank_regs(u64 low, u64 high) +{ + return (low & 0xFFFFF000) + ((high & 0xFFFFF) << 32); +} + +static __init int sgx_page_cache_init(void) +{ + u32 eax, ebx, ecx, edx; + u64 pa, size; + int ret; + int i; + + for (i = 0; i < SGX_MAX_EPC_BANKS; i++) { + cpuid_count(SGX_CPUID, 2 + i, &eax, &ebx, &ecx, &edx); + if (!(eax & 0xF)) + break; + + pa = sgx_combine_bank_regs(eax, ebx); + size = sgx_combine_bank_regs(ecx, edx); + pr_info("EPC bank 0x%llx-0x%llx\n", pa, pa + size - 1); + + ret = sgx_init_epc_bank(pa, size, i, &sgx_epc_banks[i]); + if (ret) { + sgx_page_cache_teardown(); + return ret; + } + + sgx_nr_epc_banks++; + } + + if (!sgx_nr_epc_banks) { + pr_err("There are zero EPC banks.\n"); + return -ENODEV; + } + + return 0; +} static __init int sgx_init(void) { unsigned long fc; + int ret; if (!boot_cpu_has(X86_FEATURE_SGX)) return false; @@ -39,6 +137,10 @@ static __init int sgx_init(void) if (!(fc & FEATURE_CONTROL_SGX_LE_WR)) pr_info("IA32_SGXLEPUBKEYHASHn MSRs are not writable\n"); + ret = sgx_page_cache_init(); + if (ret) + return ret; + sgx_enabled = true; sgx_lc_enabled = !!(fc & FEATURE_CONTROL_SGX_LE_WR); return 0;