diff mbox series

[v15,16/23] x86/sgx: Enumerate and track EPC sections

Message ID 20181102231320.29164-17-jarkko.sakkinen@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Intel SGX1 | expand

Commit Message

Jarkko Sakkinen Nov. 2, 2018, 11:11 p.m. UTC
From: Sean Christopherson <sean.j.christopherson@intel.com>

Enumerate Enclave Page Cache (EPC) sections via CPUID and add the data
structures necessary to track EPC pages so that they can be allocated,
freed and managed.  As a system may have multiple EPC sections, invoke
CPUID on SGX sub-leafs until an invalid leaf is encountered.

On NUMA systems, a node can have at most one bank. A bank can be at
most part of two nodes.  SGX supports both nodes with a single memory
controller and also sub-cluster nodes with severals memory controllers
on a single die.

For simplicity, support a maximum of eight EPC sections.  Current
client hardware supports only a single section, while upcoming server
hardware will support at most eight sections.  Bounding the number of
sections also allows the section ID to be embedded along with a page's
offset in a single unsigned long, enabling easy retrieval of both the
VA and PA for a given page.

Co-developed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Co-developed-by: Suresh Siddha <suresh.b.siddha@intel.com>
Co-developed-by: Serge Ayoun <serge.ayoun@intel.com>
Signed-off-by: Serge Ayoun <serge.ayoun@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 arch/x86/Kconfig                |  17 ++++
 arch/x86/include/asm/sgx.h      |  57 +++++++++++++
 arch/x86/kernel/cpu/Makefile    |   1 +
 arch/x86/kernel/cpu/intel_sgx.c | 137 ++++++++++++++++++++++++++++++++
 4 files changed, 212 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/intel_sgx.c

Comments

Jethro Beekman Nov. 3, 2018, 1:07 a.m. UTC | #1
On 2018-11-02 16:11, Jarkko Sakkinen wrote:
> diff --git a/arch/x86/kernel/cpu/intel_sgx.c b/arch/x86/kernel/cpu/intel_sgx.c
> new file mode 100644
> index 000000000000..b86aa4111592
> --- /dev/null
> +++ b/arch/x86/kernel/cpu/intel_sgx.c
> 
> ...
> 
> +static __init int sgx_page_cache_init(void)
> +{
> +	u32 eax, ebx, ecx, edx, type;
> +	u64 pa, size;
> +	int ret;
> +	int i;
> +
> +	BUILD_BUG_ON(SGX_MAX_EPC_SECTIONS > (SGX_EPC_SECTION_MASK + 1));
> +
> +	for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) {

Perhaps print a warning if there are more than SGX_MAX_EPC_SECTIONS 
sections reported by CPUID.

> +		cpuid_count(SGX_CPUID, i + SGX_CPUID_FIRST_VARIABLE_SUB_LEAF,
> +			    &eax, &ebx, &ecx, &edx);
> +
> +		type = eax & SGX_CPUID_SUB_LEAF_TYPE_MASK;
> +		if (type == SGX_CPUID_SUB_LEAF_INVALID)
> +			break;
> +		if (type != SGX_CPUID_SUB_LEAF_EPC_SECTION) {
> +			pr_err_once("sgx: Unknown sub-leaf type: %u\n", type);
> +			continue;
> +		}
> +
> +		pa = sgx_calc_section_metric(eax, ebx);
> +		size = sgx_calc_section_metric(ecx, edx);
> +		pr_info("sgx: EPC section 0x%llx-0x%llx\n", pa, pa + size - 1);
> +
> +		ret = sgx_init_epc_section(pa, size, i, &sgx_epc_sections[i]);
> +		if (ret) {
> +			sgx_page_cache_teardown();
> +			return ret;
> +		}
> +
> +		sgx_nr_epc_sections++;
> +	}
> +
> +	if (!sgx_nr_epc_sections) {
> +		pr_err("sgx: There are zero EPC sections.\n");
> +		return -ENODEV;
> +	}
> +
> +	return 0;
> +}

--
Jethro Beekman | Fortanix
Andy Shevchenko Nov. 3, 2018, 1:22 p.m. UTC | #2
On Sat, Nov 3, 2018 at 1:17 AM Jarkko Sakkinen
<jarkko.sakkinen@linux.intel.com> wrote:
>
> From: Sean Christopherson <sean.j.christopherson@intel.com>
>
> Enumerate Enclave Page Cache (EPC) sections via CPUID and add the data
> structures necessary to track EPC pages so that they can be allocated,
> freed and managed.  As a system may have multiple EPC sections, invoke
> CPUID on SGX sub-leafs until an invalid leaf is encountered.
>
> On NUMA systems, a node can have at most one bank. A bank can be at
> most part of two nodes.  SGX supports both nodes with a single memory
> controller and also sub-cluster nodes with severals memory controllers
> on a single die.
>
> For simplicity, support a maximum of eight EPC sections.  Current
> client hardware supports only a single section, while upcoming server
> hardware will support at most eight sections.  Bounding the number of
> sections also allows the section ID to be embedded along with a page's
> offset in a single unsigned long, enabling easy retrieval of both the
> VA and PA for a given page.

> +       iounmap(section->va);

> +       section->va = ioremap_cache(addr, size);
> +       if (!section->va)
> +               return -ENOMEM;

I forgot if it's a place in code on which we discussed ioremap() vs.
memremap() call.
And I forgot why the conclusion is to leave ioremap().

Sorry, if I'm mistaken.
Jarkko Sakkinen Nov. 5, 2018, 5:31 p.m. UTC | #3
On Sat, Nov 03, 2018 at 01:07:27AM +0000, Jethro Beekman wrote:
> > +	for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) {
> 
> Perhaps print a warning if there are more than SGX_MAX_EPC_SECTIONS sections
> reported by CPUID.

Makes sense. I'll add it.

/Jarkko
Jarkko Sakkinen Nov. 5, 2018, 5:35 p.m. UTC | #4
On Sat, Nov 03, 2018 at 03:22:34PM +0200, Andy Shevchenko wrote:
> On Sat, Nov 3, 2018 at 1:17 AM Jarkko Sakkinen
> <jarkko.sakkinen@linux.intel.com> wrote:
> >
> > From: Sean Christopherson <sean.j.christopherson@intel.com>
> >
> > Enumerate Enclave Page Cache (EPC) sections via CPUID and add the data
> > structures necessary to track EPC pages so that they can be allocated,
> > freed and managed.  As a system may have multiple EPC sections, invoke
> > CPUID on SGX sub-leafs until an invalid leaf is encountered.
> >
> > On NUMA systems, a node can have at most one bank. A bank can be at
> > most part of two nodes.  SGX supports both nodes with a single memory
> > controller and also sub-cluster nodes with severals memory controllers
> > on a single die.
> >
> > For simplicity, support a maximum of eight EPC sections.  Current
> > client hardware supports only a single section, while upcoming server
> > hardware will support at most eight sections.  Bounding the number of
> > sections also allows the section ID to be embedded along with a page's
> > offset in a single unsigned long, enabling easy retrieval of both the
> > VA and PA for a given page.
> 
> > +       iounmap(section->va);
> 
> > +       section->va = ioremap_cache(addr, size);
> > +       if (!section->va)
> > +               return -ENOMEM;
> 
> I forgot if it's a place in code on which we discussed ioremap() vs.
> memremap() call.
> And I forgot why the conclusion is to leave ioremap().
> 
> Sorry, if I'm mistaken.

Thanks for pointing this out. It very well might I have either

1. forgotten to do the switch.
2. had some open about it that I forgot to ask.

Anyway, I'll check this and at minimum document the reason to the
change log if there is one to keep the ioremap().

/Jarkko
Jarkko Sakkinen Nov. 6, 2018, 12:10 p.m. UTC | #5
On Sat, Nov 03, 2018 at 03:22:34PM +0200, Andy Shevchenko wrote:
> On Sat, Nov 3, 2018 at 1:17 AM Jarkko Sakkinen
> <jarkko.sakkinen@linux.intel.com> wrote:
> >
> > From: Sean Christopherson <sean.j.christopherson@intel.com>
> >
> > Enumerate Enclave Page Cache (EPC) sections via CPUID and add the data
> > structures necessary to track EPC pages so that they can be allocated,
> > freed and managed.  As a system may have multiple EPC sections, invoke
> > CPUID on SGX sub-leafs until an invalid leaf is encountered.
> >
> > On NUMA systems, a node can have at most one bank. A bank can be at
> > most part of two nodes.  SGX supports both nodes with a single memory
> > controller and also sub-cluster nodes with severals memory controllers
> > on a single die.
> >
> > For simplicity, support a maximum of eight EPC sections.  Current
> > client hardware supports only a single section, while upcoming server
> > hardware will support at most eight sections.  Bounding the number of
> > sections also allows the section ID to be embedded along with a page's
> > offset in a single unsigned long, enabling easy retrieval of both the
> > VA and PA for a given page.
> 
> > +       iounmap(section->va);
> 
> > +       section->va = ioremap_cache(addr, size);
> > +       if (!section->va)
> > +               return -ENOMEM;
> 
> I forgot if it's a place in code on which we discussed ioremap() vs.
> memremap() call.
> And I forgot why the conclusion is to leave ioremap().
> 
> Sorry, if I'm mistaken.

As far as I can tell you are not mistaken. It happened that in the patch
set version that you gave this comment I first the fixed missing __iomem
annotations because I like to do other changes on top of code that has
no known regressions. Then I guess I simply forgot to replace it with
memremap(). I will do it for the next version.

/Jarkko
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a0be022f91d..b47e1a144409 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1913,6 +1913,23 @@  config X86_INTEL_MEMORY_PROTECTION_KEYS
 
 	  If unsure, say y.
 
+config INTEL_SGX_CORE
+	bool "Intel SGX core functionality"
+	depends on X86_64 && CPU_SUP_INTEL
+	help
+	Intel Software Guard eXtensions (SGX) CPU feature that allows ring 3
+	applications to create enclaves: private regions of memory that are
+	architecturally protected from unauthorized access and/or modification.
+
+	This option enables kernel recognition of SGX, high-level management
+	of the Enclave Page Cache (EPC), tracking and writing of SGX Launch
+	Enclave Hash MSRs, and allows for virtualization of SGX via KVM. By
+	itself, this option does not provide SGX support to userspace.
+
+	For details, see Documentation/x86/intel_sgx.rst
+
+	If unsure, say N.
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index 23361d508348..aa51165eb3a8 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -5,9 +5,66 @@ 
 #ifndef _ASM_X86_SGX_H
 #define _ASM_X86_SGX_H
 
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/rwsem.h>
+#include <linux/types.h>
 #include <asm/asm.h>
 #include <asm/sgx_arch.h>
 
+struct sgx_epc_page {
+	unsigned long desc;
+	struct list_head list;
+};
+
+/**
+ * struct sgx_epc_section
+ *
+ * The firmware can define multiple chunks of EPC to the different areas of the
+ * physical memory e.g. for memory areas of the each node. This structure is
+ * used to store EPC pages for one EPC section and virtual memory area where
+ * the pages have been mapped.
+ */
+struct sgx_epc_section {
+	unsigned long pa;
+	void __iomem *va;
+	struct sgx_epc_page **pages;
+	unsigned long free_cnt;
+	spinlock_t lock;
+};
+
+#define SGX_MAX_EPC_SECTIONS	8
+
+extern struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
+
+/**
+ * enum sgx_epc_page_desc - bits and masks for an EPC page's descriptor
+ * %SGX_EPC_SECTION_MASK:	SGX allows to have multiple EPC sections in the
+ *				physical memory. The existing and near-future
+ *				hardware defines at most eight sections, hence
+ *				three bits to hold a section.
+ * %SGX_EPC_PAGE_RECLAIMABLE:	The page page is reclaimable. Used when freeing
+ *				a page to know that we also need to remove the
+ *				page from the list of reclaimable pages.
+ */
+enum sgx_epc_page_desc {
+	SGX_EPC_SECTION_MASK			= GENMASK_ULL(3, 0),
+	SGX_EPC_PAGE_RECLAIMABLE		= BIT(4),
+	/* bits 12-63 are reserved for the physical page address of the page */
+};
+
+static inline struct sgx_epc_section *sgx_epc_section(struct sgx_epc_page *page)
+{
+	return &sgx_epc_sections[page->desc & SGX_EPC_SECTION_MASK];
+}
+
+static inline void __iomem *sgx_epc_addr(struct sgx_epc_page *page)
+{
+	struct sgx_epc_section *section = sgx_epc_section(page);
+
+	return section->va + (page->desc & PAGE_MASK) - section->pa;
+}
+
 /**
  * ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr
  *
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 347137e80bf5..71876f2b35fc 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -38,6 +38,7 @@  obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 obj-$(CONFIG_INTEL_RDT)	+= intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o
 obj-$(CONFIG_INTEL_RDT)	+= intel_rdt_ctrlmondata.o intel_rdt_pseudo_lock.o
 CFLAGS_intel_rdt_pseudo_lock.o = -I$(src)
+obj-$(CONFIG_INTEL_SGX_CORE)		+= intel_sgx.o
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
diff --git a/arch/x86/kernel/cpu/intel_sgx.c b/arch/x86/kernel/cpu/intel_sgx.c
new file mode 100644
index 000000000000..b86aa4111592
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_sgx.c
@@ -0,0 +1,137 @@ 
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2016-17 Intel Corporation.
+
+#include <linux/freezer.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/pagemap.h>
+#include <linux/ratelimit.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <asm/sgx.h>
+
+struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
+EXPORT_SYMBOL_GPL(sgx_epc_sections);
+
+static int sgx_nr_epc_sections;
+
+static __init void sgx_free_epc_section(struct sgx_epc_section *section)
+{
+	int i;
+
+	for (i = 0; i < section->free_cnt && section->pages[i]; i++)
+		kfree(section->pages[i]);
+	kfree(section->pages);
+	iounmap(section->va);
+}
+
+static __init int sgx_init_epc_section(u64 addr, u64 size, unsigned long index,
+				       struct sgx_epc_section *section)
+{
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	unsigned long i;
+
+	section->va = ioremap_cache(addr, size);
+	if (!section->va)
+		return -ENOMEM;
+
+	section->pa = addr;
+	section->free_cnt = nr_pages;
+	spin_lock_init(&section->lock);
+
+	section->pages = kcalloc(nr_pages, sizeof(struct sgx_epc_page *),
+				 GFP_KERNEL);
+	if (!section->pages)
+		goto out;
+
+	for (i = 0; i < nr_pages; i++) {
+		section->pages[i] = kzalloc(sizeof(struct sgx_epc_page),
+					    GFP_KERNEL);
+		if (!section->pages[i])
+			goto out;
+
+		section->pages[i]->desc = (addr + (i << PAGE_SHIFT)) | index;
+	}
+
+	return 0;
+out:
+	sgx_free_epc_section(section);
+	return -ENOMEM;
+}
+
+static __init void sgx_page_cache_teardown(void)
+{
+	int i;
+
+	for (i = 0; i < sgx_nr_epc_sections; i++)
+		sgx_free_epc_section(&sgx_epc_sections[i]);
+}
+
+/**
+ * A section metric is concatenated in a way that @low bits 12-31 define the
+ * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the
+ * metric.
+ */
+static inline u64 sgx_calc_section_metric(u64 low, u64 high)
+{
+	return (low & GENMASK_ULL(31, 12)) +
+	       ((high & GENMASK_ULL(19, 0)) << 32);
+}
+
+static __init int sgx_page_cache_init(void)
+{
+	u32 eax, ebx, ecx, edx, type;
+	u64 pa, size;
+	int ret;
+	int i;
+
+	BUILD_BUG_ON(SGX_MAX_EPC_SECTIONS > (SGX_EPC_SECTION_MASK + 1));
+
+	for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) {
+		cpuid_count(SGX_CPUID, i + SGX_CPUID_FIRST_VARIABLE_SUB_LEAF,
+			    &eax, &ebx, &ecx, &edx);
+
+		type = eax & SGX_CPUID_SUB_LEAF_TYPE_MASK;
+		if (type == SGX_CPUID_SUB_LEAF_INVALID)
+			break;
+		if (type != SGX_CPUID_SUB_LEAF_EPC_SECTION) {
+			pr_err_once("sgx: Unknown sub-leaf type: %u\n", type);
+			continue;
+		}
+
+		pa = sgx_calc_section_metric(eax, ebx);
+		size = sgx_calc_section_metric(ecx, edx);
+		pr_info("sgx: EPC section 0x%llx-0x%llx\n", pa, pa + size - 1);
+
+		ret = sgx_init_epc_section(pa, size, i, &sgx_epc_sections[i]);
+		if (ret) {
+			sgx_page_cache_teardown();
+			return ret;
+		}
+
+		sgx_nr_epc_sections++;
+	}
+
+	if (!sgx_nr_epc_sections) {
+		pr_err("sgx: There are zero EPC sections.\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static __init int sgx_init(void)
+{
+	int ret;
+
+	if (!boot_cpu_has(X86_FEATURE_SGX))
+		return false;
+
+	ret = sgx_page_cache_init();
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+arch_initcall(sgx_init);