diff mbox series

[v30,07/20] x86/sgx: Enumerate and track EPC sections

Message ID 20200515004410.723949-8-jarkko.sakkinen@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Intel SGX foundations | expand

Commit Message

Jarkko Sakkinen May 15, 2020, 12:43 a.m. UTC
From: Sean Christopherson <sean.j.christopherson@intel.com>

Enumerate Enclave Page Cache (EPC) sections via CPUID and add the data
structures necessary to track EPC pages so that they can be allocated,
freed and managed. As a system may have multiple EPC sections, invoke CPUID
on SGX sub-leafs until an invalid leaf is encountered.

For simplicity, support a maximum of eight EPC sections. Existing client
hardware supports only a single section, while upcoming server hardware
will support at most eight sections. Bounding the number of sections also
allows the section ID to be embedded along with a page's offset in a single
unsigned long, enabling easy retrieval of both the VA and PA for a given
page.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Co-developed-by: Serge Ayoun <serge.ayoun@intel.com>
Signed-off-by: Serge Ayoun <serge.ayoun@intel.com>
Co-developed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Acked-by: Jethro Beekman <jethro@fortanix.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 arch/x86/Kconfig                  |  14 +++
 arch/x86/kernel/cpu/Makefile      |   1 +
 arch/x86/kernel/cpu/sgx/Makefile  |   3 +
 arch/x86/kernel/cpu/sgx/main.c    | 151 ++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/sgx/reclaim.c |  82 ++++++++++++++++
 arch/x86/kernel/cpu/sgx/sgx.h     |  70 ++++++++++++++
 6 files changed, 321 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/sgx/Makefile
 create mode 100644 arch/x86/kernel/cpu/sgx/main.c
 create mode 100644 arch/x86/kernel/cpu/sgx/reclaim.c
 create mode 100644 arch/x86/kernel/cpu/sgx/sgx.h

Comments

Borislav Petkov May 25, 2020, 9:23 a.m. UTC | #1
On Fri, May 15, 2020 at 03:43:57AM +0300, Jarkko Sakkinen wrote:
> +config INTEL_SGX
> +	bool "Intel SGX"
> +	depends on X86_64 && CPU_SUP_INTEL
> +	select SRCU
> +	select MMU_NOTIFIER
> +	help
> +	  Intel(R) SGX is a set of CPU instructions that can be used by
> +	  applications to set aside private regions of code and data, referred
> +	  to as enclaves. An enclave's private memory can only be accessed by
> +	  code running within the enclave. Accesses from outside the enclave,
> +	  including other enclaves, are disallowed by hardware.
> +
> +	  If unsure, say N.
> +

Enabling this gives:

In file included from arch/x86/kernel/cpu/sgx/main.c:11:
arch/x86/kernel/cpu/sgx/encls.h:189:51: warning: ‘struct sgx_einittoken’ declared inside parameter list will not be visible outside of this definition or declaration
  189 | static inline int __einit(void *sigstruct, struct sgx_einittoken *einittoken,
      |                                                   ^~~~~~~~~~~~~~
In file included from arch/x86/kernel/cpu/sgx/reclaim.c:12:
arch/x86/kernel/cpu/sgx/encls.h:189:51: warning: ‘struct sgx_einittoken’ declared inside parameter list will not be visible outside of this definition or declaration
  189 | static inline int __einit(void *sigstruct, struct sgx_einittoken *einittoken,
      |

You need a forward declaration somewhere.

>  config EFI
>  	bool "EFI runtime service support"
>  	depends on ACPI
> diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
> index 7dc4ad68eb41..45534fb81007 100644
> --- a/arch/x86/kernel/cpu/Makefile
> +++ b/arch/x86/kernel/cpu/Makefile
> @@ -46,6 +46,7 @@ obj-$(CONFIG_X86_MCE)			+= mce/
>  obj-$(CONFIG_MTRR)			+= mtrr/
>  obj-$(CONFIG_MICROCODE)			+= microcode/
>  obj-$(CONFIG_X86_CPU_RESCTRL)		+= resctrl/
> +obj-$(CONFIG_INTEL_SGX)			+= sgx/
>  
>  obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
>  
> diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
> new file mode 100644
> index 000000000000..2dec75916a5e
> --- /dev/null
> +++ b/arch/x86/kernel/cpu/sgx/Makefile
> @@ -0,0 +1,3 @@
> +obj-y += \
> +	main.o \
> +	reclaim.o
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> new file mode 100644
> index 000000000000..38424c1e8341
> --- /dev/null
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -0,0 +1,151 @@
> +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
> +// Copyright(c) 2016-17 Intel Corporation.
> +
> +#include <linux/freezer.h>
> +#include <linux/highmem.h>
> +#include <linux/kthread.h>
> +#include <linux/pagemap.h>
> +#include <linux/ratelimit.h>
> +#include <linux/sched/signal.h>
> +#include <linux/slab.h>
> +#include "encls.h"
> +
> +struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
> +int sgx_nr_epc_sections;

We have become very averse against global stuff. What is going to use
those, only sgx code I assume...?

> +static bool __init sgx_page_cache_init(void)
> +{
> +	u32 eax, ebx, ecx, edx, type;
> +	u64 pa, size;
> +	int i;
> +
> +	for (i = 0; i <= ARRAY_SIZE(sgx_epc_sections); i++) {
> +		cpuid_count(SGX_CPUID, i + SGX_CPUID_FIRST_VARIABLE_SUB_LEAF,
> +			    &eax, &ebx, &ecx, &edx);
> +
> +		type = eax & SGX_CPUID_SUB_LEAF_TYPE_MASK;
> +		if (type == SGX_CPUID_SUB_LEAF_INVALID)
> +			break;
> +
> +		if (type != SGX_CPUID_SUB_LEAF_EPC_SECTION) {
> +			pr_err_once("Unknown EPC section type: %u\n", type);
> +			break;
> +		}
> +
> +		if (i == ARRAY_SIZE(sgx_epc_sections)) {
> +			pr_warn("No free slot for an EPC section\n");
> +			break;
> +		}

This is also the loop termination: do we really need this warn or can
the loop simply do "i < ARRAY_SIZE" ?

If the warn is needed, it can be after the loop too.

> +
> +		pa = sgx_calc_section_metric(eax, ebx);
> +		size = sgx_calc_section_metric(ecx, edx);
> +
> +		pr_info("EPC section 0x%llx-0x%llx\n", pa, pa + size - 1);

I'm assuming that's useful information to issue in dmesg?

> diff --git a/arch/x86/kernel/cpu/sgx/reclaim.c b/arch/x86/kernel/cpu/sgx/reclaim.c
> new file mode 100644
> index 000000000000..215371588a25
> --- /dev/null
> +++ b/arch/x86/kernel/cpu/sgx/reclaim.c
> @@ -0,0 +1,82 @@
> +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
> +// Copyright(c) 2016-19 Intel Corporation.
> +
> +#include <linux/freezer.h>
> +#include <linux/highmem.h>
> +#include <linux/kthread.h>
> +#include <linux/pagemap.h>
> +#include <linux/ratelimit.h>
> +#include <linux/slab.h>
> +#include <linux/sched/mm.h>
> +#include <linux/sched/signal.h>
> +#include "encls.h"
> +
> +struct task_struct *ksgxswapd_tsk;

Same for this one: also shared only among sgx code?

> +/**
> + * enum sgx_epc_page_desc - bits and masks for an EPC page's descriptor
> + * %SGX_EPC_SECTION_MASK:	SGX allows to have multiple EPC sections in the
> + *				physical memory. The existing and near-future
> + *				hardware defines at most eight sections, hence
> + *				three bits to hold a section.
> + */
> +enum sgx_epc_page_desc {
> +	SGX_EPC_SECTION_MASK			= GENMASK_ULL(3, 0),

If that should be three bits, then it should be (2, 0). Because now you
have 4 bits:

# arch/x86/kernel/cpu/sgx/sgx.h:56:     return &sgx_epc_sections[page->desc & SGX_EPC_SECTION_MASK];
        andl    $15, %edx       #, _22
		^^^

> +	/* bits 12-63 are reserved for the physical page address of the page */
> +};
> +
> +#define SGX_MAX_EPC_SECTIONS (SGX_EPC_SECTION_MASK + 1)
> +
> +extern struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
> +
> +static inline struct sgx_epc_section *sgx_epc_section(struct sgx_epc_page *page)

This function name needs a verb. "get" sounds fitting. :)

> +{
> +	return &sgx_epc_sections[page->desc & SGX_EPC_SECTION_MASK];
> +}
> +
> +static inline void *sgx_epc_addr(struct sgx_epc_page *page)

Ditto.
Sean Christopherson May 27, 2020, 3:56 a.m. UTC | #2
On Mon, May 25, 2020 at 11:23:04AM +0200, Borislav Petkov wrote:
> On Fri, May 15, 2020 at 03:43:57AM +0300, Jarkko Sakkinen wrote:
> > +struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
> > +int sgx_nr_epc_sections;
> 
> We have become very averse against global stuff. What is going to use
> those, only sgx code I assume...?

Yes, only SGX code.  The reclaim/swap code needs access to the sections,
and that code is in a different file, reclaim.c.  I don't have a super
strong objection to sucking reclaim.c into main.c, but I'm somewhat
indifferent on code organization as a whole.  Jarkko likely has a stronger
opinion.

> > +static bool __init sgx_page_cache_init(void)
> > +{
> > +	u32 eax, ebx, ecx, edx, type;
> > +	u64 pa, size;
> > +	int i;
> > +
> > +	for (i = 0; i <= ARRAY_SIZE(sgx_epc_sections); i++) {
> > +		cpuid_count(SGX_CPUID, i + SGX_CPUID_FIRST_VARIABLE_SUB_LEAF,
> > +			    &eax, &ebx, &ecx, &edx);
> > +
> > +		type = eax & SGX_CPUID_SUB_LEAF_TYPE_MASK;
> > +		if (type == SGX_CPUID_SUB_LEAF_INVALID)
> > +			break;
> > +
> > +		if (type != SGX_CPUID_SUB_LEAF_EPC_SECTION) {
> > +			pr_err_once("Unknown EPC section type: %u\n", type);
> > +			break;
> > +		}
> > +
> > +		if (i == ARRAY_SIZE(sgx_epc_sections)) {
> > +			pr_warn("No free slot for an EPC section\n");
> > +			break;
> > +		}
> 
> This is also the loop termination: do we really need this warn or can
> the loop simply do "i < ARRAY_SIZE" ?

The warn alerts the user that there will effectively be lost/unused memory,
so IMO it's worth keeping.
 
> If the warn is needed, it can be after the loop too.
> > +
> > +		pa = sgx_calc_section_metric(eax, ebx);
> > +		size = sgx_calc_section_metric(ecx, edx);
> > +
> > +		pr_info("EPC section 0x%llx-0x%llx\n", pa, pa + size - 1);
> 
> I'm assuming that's useful information to issue in dmesg?

Yes, it's effectively the equivalent of dumping the e820 tables.  It might
not be as useful now that the code is stable, but I suspect it will come in
handy for debug/triage down the road.
 
> > diff --git a/arch/x86/kernel/cpu/sgx/reclaim.c b/arch/x86/kernel/cpu/sgx/reclaim.c
> > new file mode 100644
> > index 000000000000..215371588a25
> > --- /dev/null
> > +++ b/arch/x86/kernel/cpu/sgx/reclaim.c
> > @@ -0,0 +1,82 @@
> > +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
> > +// Copyright(c) 2016-19 Intel Corporation.
> > +
> > +#include <linux/freezer.h>
> > +#include <linux/highmem.h>
> > +#include <linux/kthread.h>
> > +#include <linux/pagemap.h>
> > +#include <linux/ratelimit.h>
> > +#include <linux/slab.h>
> > +#include <linux/sched/mm.h>
> > +#include <linux/sched/signal.h>
> > +#include "encls.h"
> > +
> > +struct task_struct *ksgxswapd_tsk;
> 
> Same for this one: also shared only among sgx code?

Yes, that one can definitely be buried behind a helper.

> > +/**
> > + * enum sgx_epc_page_desc - bits and masks for an EPC page's descriptor
> > + * %SGX_EPC_SECTION_MASK:	SGX allows to have multiple EPC sections in the
> > + *				physical memory. The existing and near-future
> > + *				hardware defines at most eight sections, hence
> > + *				three bits to hold a section.
> > + */
> > +enum sgx_epc_page_desc {
> > +	SGX_EPC_SECTION_MASK			= GENMASK_ULL(3, 0),
> 
> If that should be three bits, then it should be (2, 0). Because now you
> have 4 bits:

Apparently even pre-school math is hard these days.  I'm pretty sure that's
an ongoing brain fart, I don't think we ever conciously bumped it to 4 bits.

That being said, using 4 bits (or even 5+) isn't necessary a bad thing.  We
aren't tight on bits and the burned memory isn't awful, e.g. <100 bytes per
unused section.  Not having to update the kernel just to handle a system
with more EPC sections would be nice, though we (Intel) should check on our
end to see if nearish-future CPUs are still hard limited to eight sections.

One idea would be to provide a Kconfig a la NR_CPUS or NODES_SHIFT.  I.e.
carve out the bits in sgx_epc_page_desc to allow up to N sections, but let
the user limit the number of sections to recoup the unused memory.

> # arch/x86/kernel/cpu/sgx/sgx.h:56:     return &sgx_epc_sections[page->desc & SGX_EPC_SECTION_MASK];
>         andl    $15, %edx       #, _22
> 		^^^
> 
> > +	/* bits 12-63 are reserved for the physical page address of the page */
> > +};
Borislav Petkov May 27, 2020, 8:35 p.m. UTC | #3
On Tue, May 26, 2020 at 08:56:14PM -0700, Sean Christopherson wrote:
> > > +		if (i == ARRAY_SIZE(sgx_epc_sections)) {
> > > +			pr_warn("No free slot for an EPC section\n");
> > > +			break;
> > > +		}
> > 
> > This is also the loop termination: do we really need this warn or can
> > the loop simply do "i < ARRAY_SIZE" ?
> 
> The warn alerts the user that there will effectively be lost/unused memory,
> so IMO it's worth keeping.

Can the user do anything about it? If not, then probably no need to say
anything.

> > > +		pr_info("EPC section 0x%llx-0x%llx\n", pa, pa + size - 1);
> > 
> > I'm assuming that's useful information to issue in dmesg?
> 
> Yes, it's effectively the equivalent of dumping the e820 tables.  It might
> not be as useful now that the code is stable, but I suspect it will come in
> handy for debug/triage down the road.

Ok.

> > Same for this one: also shared only among sgx code?
> 
> Yes, that one can definitely be buried behind a helper.

Shared only within sgx is fine. 

<snip> 

> One idea would be to provide a Kconfig a la NR_CPUS or NODES_SHIFT.  I.e.
> carve out the bits in sgx_epc_page_desc to allow up to N sections, but let
> the user limit the number of sections to recoup the unused memory.

I'd prefer a good estimate of a highest value, say 8 bits, which should
suffice for the foreseeable future. That's the simplest thing to do and
our Kconfig symbols space is an abomination of gazillion symbols.

Thx.
Jarkko Sakkinen May 28, 2020, 5:13 a.m. UTC | #4
On Mon, May 25, 2020 at 11:23:04AM +0200, Borislav Petkov wrote:
> Enabling this gives:
> 
> In file included from arch/x86/kernel/cpu/sgx/main.c:11:
> arch/x86/kernel/cpu/sgx/encls.h:189:51: warning: ‘struct sgx_einittoken’ declared inside parameter list will not be visible outside of this definition or declaration
>   189 | static inline int __einit(void *sigstruct, struct sgx_einittoken *einittoken,
>       |                                                   ^~~~~~~~~~~~~~
> In file included from arch/x86/kernel/cpu/sgx/reclaim.c:12:
> arch/x86/kernel/cpu/sgx/encls.h:189:51: warning: ‘struct sgx_einittoken’ declared inside parameter list will not be visible outside of this definition or declaration
>   189 | static inline int __einit(void *sigstruct, struct sgx_einittoken *einittoken,
>       |
> 
> You need a forward declaration somewhere.

It is a left-over from v28 and should be "void *".

To backtrack what happened it looks that I squashed the change that
does this to "x86/sgx: Linux Enclave Driver".

This is fixed now in my tree.

/Jarkko
Jarkko Sakkinen May 28, 2020, 5:25 a.m. UTC | #5
On Tue, May 26, 2020 at 08:56:14PM -0700, Sean Christopherson wrote:
> On Mon, May 25, 2020 at 11:23:04AM +0200, Borislav Petkov wrote:
> > On Fri, May 15, 2020 at 03:43:57AM +0300, Jarkko Sakkinen wrote:
> > > +struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
> > > +int sgx_nr_epc_sections;
> > 
> > We have become very averse against global stuff. What is going to use
> > those, only sgx code I assume...?
> 
> Yes, only SGX code.  The reclaim/swap code needs access to the sections,
> and that code is in a different file, reclaim.c.  I don't have a super
> strong objection to sucking reclaim.c into main.c, but I'm somewhat
> indifferent on code organization as a whole.  Jarkko likely has a stronger
> opinion.

I'll change it.

It's not quite as easy as just "sucking the file in". All the commits
that touch the file need to be reworked:

$ git --no-pager log --format="%H %s" arch/x86/kernel/cpu/sgx/reclaim.c
5aeca6dabf767e9350ee3188ba25ceb21f3162b4 x86/sgx: Add a page reclaimer
de9b1088959f36ffdaf43a49bfea1c7f9f81cac7 x86/sgx: Linux Enclave Driver
08d8fcb74fe268059ee58fcc2a0833b244e1f22a x86/sgx: Enumerate and track EPC sections

/Jarkko
Jarkko Sakkinen May 28, 2020, 5:35 a.m. UTC | #6
On Thu, May 28, 2020 at 08:25:43AM +0300, Jarkko Sakkinen wrote:
> On Tue, May 26, 2020 at 08:56:14PM -0700, Sean Christopherson wrote:
> > On Mon, May 25, 2020 at 11:23:04AM +0200, Borislav Petkov wrote:
> > > On Fri, May 15, 2020 at 03:43:57AM +0300, Jarkko Sakkinen wrote:
> > > > +struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
> > > > +int sgx_nr_epc_sections;
> > > 
> > > We have become very averse against global stuff. What is going to use
> > > those, only sgx code I assume...?
> > 
> > Yes, only SGX code.  The reclaim/swap code needs access to the sections,
> > and that code is in a different file, reclaim.c.  I don't have a super
> > strong objection to sucking reclaim.c into main.c, but I'm somewhat
> > indifferent on code organization as a whole.  Jarkko likely has a stronger
> > opinion.
> 
> I'll change it.
> 
> It's not quite as easy as just "sucking the file in". All the commits
> that touch the file need to be reworked:
> 
> $ git --no-pager log --format="%H %s" arch/x86/kernel/cpu/sgx/reclaim.c
> 5aeca6dabf767e9350ee3188ba25ceb21f3162b4 x86/sgx: Add a page reclaimer
> de9b1088959f36ffdaf43a49bfea1c7f9f81cac7 x86/sgx: Linux Enclave Driver
> 08d8fcb74fe268059ee58fcc2a0833b244e1f22a x86/sgx: Enumerate and track EPC sections

Not that I haven't done this a lot last few years. A proven approach
is to do it in two "git rebase -i mainline/master" sweeps:

1. For each commit, remove reclaim.c entry from the Makefile and import
   reclaim.c contents to main.c.
2. For each commit, delete reclaim.c.

I've tried quite a few different angles and this what I've converged
into. Very hard to hit messy into messy merge conflicts.

/Jarkko
Jarkko Sakkinen May 28, 2020, 6:14 a.m. UTC | #7
On Thu, May 28, 2020 at 08:35:15AM +0300, Jarkko Sakkinen wrote:
> On Thu, May 28, 2020 at 08:25:43AM +0300, Jarkko Sakkinen wrote:
> > On Tue, May 26, 2020 at 08:56:14PM -0700, Sean Christopherson wrote:
> > > On Mon, May 25, 2020 at 11:23:04AM +0200, Borislav Petkov wrote:
> > > > On Fri, May 15, 2020 at 03:43:57AM +0300, Jarkko Sakkinen wrote:
> > > > > +struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
> > > > > +int sgx_nr_epc_sections;
> > > > 
> > > > We have become very averse against global stuff. What is going to use
> > > > those, only sgx code I assume...?
> > > 
> > > Yes, only SGX code.  The reclaim/swap code needs access to the sections,
> > > and that code is in a different file, reclaim.c.  I don't have a super
> > > strong objection to sucking reclaim.c into main.c, but I'm somewhat
> > > indifferent on code organization as a whole.  Jarkko likely has a stronger
> > > opinion.
> > 
> > I'll change it.
> > 
> > It's not quite as easy as just "sucking the file in". All the commits
> > that touch the file need to be reworked:
> > 
> > $ git --no-pager log --format="%H %s" arch/x86/kernel/cpu/sgx/reclaim.c
> > 5aeca6dabf767e9350ee3188ba25ceb21f3162b4 x86/sgx: Add a page reclaimer
> > de9b1088959f36ffdaf43a49bfea1c7f9f81cac7 x86/sgx: Linux Enclave Driver
> > 08d8fcb74fe268059ee58fcc2a0833b244e1f22a x86/sgx: Enumerate and track EPC sections
> 
> Not that I haven't done this a lot last few years. A proven approach
> is to do it in two "git rebase -i mainline/master" sweeps:
> 
> 1. For each commit, remove reclaim.c entry from the Makefile and import
>    reclaim.c contents to main.c.
> 2. For each commit, delete reclaim.c.
> 
> I've tried quite a few different angles and this what I've converged
> into. Very hard to hit messy into messy merge conflicts.

Remembered why the things are the way they are. Also ioctl.c needs these
symbols and I'd keep that separate from the contents of main.c and
reclaim.c. There the separation obviously makes sense.

I'll anyway merge main.c and reclaim.c as one for v31 because they are
strongly connected.

/Jarkko
Jarkko Sakkinen May 28, 2020, 6:16 a.m. UTC | #8
On Thu, May 28, 2020 at 09:14:43AM +0300, Jarkko Sakkinen wrote:
> On Thu, May 28, 2020 at 08:35:15AM +0300, Jarkko Sakkinen wrote:
> > On Thu, May 28, 2020 at 08:25:43AM +0300, Jarkko Sakkinen wrote:
> > > On Tue, May 26, 2020 at 08:56:14PM -0700, Sean Christopherson wrote:
> > > > On Mon, May 25, 2020 at 11:23:04AM +0200, Borislav Petkov wrote:
> > > > > On Fri, May 15, 2020 at 03:43:57AM +0300, Jarkko Sakkinen wrote:
> > > > > > +struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
> > > > > > +int sgx_nr_epc_sections;
> > > > > 
> > > > > We have become very averse against global stuff. What is going to use
> > > > > those, only sgx code I assume...?
> > > > 
> > > > Yes, only SGX code.  The reclaim/swap code needs access to the sections,
> > > > and that code is in a different file, reclaim.c.  I don't have a super
> > > > strong objection to sucking reclaim.c into main.c, but I'm somewhat
> > > > indifferent on code organization as a whole.  Jarkko likely has a stronger
> > > > opinion.
> > > 
> > > I'll change it.
> > > 
> > > It's not quite as easy as just "sucking the file in". All the commits
> > > that touch the file need to be reworked:
> > > 
> > > $ git --no-pager log --format="%H %s" arch/x86/kernel/cpu/sgx/reclaim.c
> > > 5aeca6dabf767e9350ee3188ba25ceb21f3162b4 x86/sgx: Add a page reclaimer
> > > de9b1088959f36ffdaf43a49bfea1c7f9f81cac7 x86/sgx: Linux Enclave Driver
> > > 08d8fcb74fe268059ee58fcc2a0833b244e1f22a x86/sgx: Enumerate and track EPC sections
> > 
> > Not that I haven't done this a lot last few years. A proven approach
> > is to do it in two "git rebase -i mainline/master" sweeps:
> > 
> > 1. For each commit, remove reclaim.c entry from the Makefile and import
> >    reclaim.c contents to main.c.
> > 2. For each commit, delete reclaim.c.
> > 
> > I've tried quite a few different angles and this what I've converged
> > into. Very hard to hit messy into messy merge conflicts.
> 
> Remembered why the things are the way they are. Also ioctl.c needs these
> symbols and I'd keep that separate from the contents of main.c and
> reclaim.c. There the separation obviously makes sense.
> 
> I'll anyway merge main.c and reclaim.c as one for v31 because they are
> strongly connected.

And more importantly for the reason that it allows to make ksgxswapd_tsk
making the whole thing way more cleaner.

/Jarkko
Jarkko Sakkinen May 28, 2020, 7:36 a.m. UTC | #9
On Wed, May 27, 2020 at 10:35:09PM +0200, Borislav Petkov wrote:
 
> > One idea would be to provide a Kconfig a la NR_CPUS or NODES_SHIFT.  I.e.
> > carve out the bits in sgx_epc_page_desc to allow up to N sections, but let
> > the user limit the number of sections to recoup the unused memory.
> 
> I'd prefer a good estimate of a highest value, say 8 bits, which should
> suffice for the foreseeable future. That's the simplest thing to do and
> our Kconfig symbols space is an abomination of gazillion symbols.

OK, will do.

/Jarkko
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1197b5596d5a..4a7e7e484783 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1947,6 +1947,20 @@  config X86_INTEL_TSX_MODE_AUTO
 	  side channel attacks- equals the tsx=auto command line parameter.
 endchoice
 
+config INTEL_SGX
+	bool "Intel SGX"
+	depends on X86_64 && CPU_SUP_INTEL
+	select SRCU
+	select MMU_NOTIFIER
+	help
+	  Intel(R) SGX is a set of CPU instructions that can be used by
+	  applications to set aside private regions of code and data, referred
+	  to as enclaves. An enclave's private memory can only be accessed by
+	  code running within the enclave. Accesses from outside the enclave,
+	  including other enclaves, are disallowed by hardware.
+
+	  If unsure, say N.
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 7dc4ad68eb41..45534fb81007 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -46,6 +46,7 @@  obj-$(CONFIG_X86_MCE)			+= mce/
 obj-$(CONFIG_MTRR)			+= mtrr/
 obj-$(CONFIG_MICROCODE)			+= microcode/
 obj-$(CONFIG_X86_CPU_RESCTRL)		+= resctrl/
+obj-$(CONFIG_INTEL_SGX)			+= sgx/
 
 obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
new file mode 100644
index 000000000000..2dec75916a5e
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -0,0 +1,3 @@ 
+obj-y += \
+	main.o \
+	reclaim.o
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
new file mode 100644
index 000000000000..38424c1e8341
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -0,0 +1,151 @@ 
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2016-17 Intel Corporation.
+
+#include <linux/freezer.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/pagemap.h>
+#include <linux/ratelimit.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include "encls.h"
+
+struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
+int sgx_nr_epc_sections;
+
+static void __init sgx_free_epc_section(struct sgx_epc_section *section)
+{
+	struct sgx_epc_page *page;
+
+	while (!list_empty(&section->page_list)) {
+		page = list_first_entry(&section->page_list,
+					struct sgx_epc_page, list);
+		list_del(&page->list);
+		kfree(page);
+	}
+
+	while (!list_empty(&section->unsanitized_page_list)) {
+		page = list_first_entry(&section->unsanitized_page_list,
+					struct sgx_epc_page, list);
+		list_del(&page->list);
+		kfree(page);
+	}
+
+	memunmap(section->va);
+}
+
+static bool __init sgx_alloc_epc_section(u64 addr, u64 size,
+					 unsigned long index,
+					 struct sgx_epc_section *section)
+{
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+	struct sgx_epc_page *page;
+	unsigned long i;
+
+	section->va = memremap(addr, size, MEMREMAP_WB);
+	if (!section->va)
+		return false;
+
+	section->pa = addr;
+	spin_lock_init(&section->lock);
+	INIT_LIST_HEAD(&section->page_list);
+	INIT_LIST_HEAD(&section->unsanitized_page_list);
+
+	for (i = 0; i < nr_pages; i++) {
+		page = kzalloc(sizeof(*page), GFP_KERNEL);
+		if (!page)
+			goto err_out;
+
+		page->desc = (addr + (i << PAGE_SHIFT)) | index;
+		list_add_tail(&page->list, &section->unsanitized_page_list);
+	}
+
+	return true;
+
+err_out:
+	sgx_free_epc_section(section);
+	return false;
+}
+
+static void __init sgx_page_cache_teardown(void)
+{
+	int i;
+
+	for (i = 0; i < sgx_nr_epc_sections; i++)
+		sgx_free_epc_section(&sgx_epc_sections[i]);
+}
+
+/**
+ * A section metric is concatenated in a way that @low bits 12-31 define the
+ * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the
+ * metric.
+ */
+static inline u64 __init sgx_calc_section_metric(u64 low, u64 high)
+{
+	return (low & GENMASK_ULL(31, 12)) +
+	       ((high & GENMASK_ULL(19, 0)) << 32);
+}
+
+static bool __init sgx_page_cache_init(void)
+{
+	u32 eax, ebx, ecx, edx, type;
+	u64 pa, size;
+	int i;
+
+	for (i = 0; i <= ARRAY_SIZE(sgx_epc_sections); i++) {
+		cpuid_count(SGX_CPUID, i + SGX_CPUID_FIRST_VARIABLE_SUB_LEAF,
+			    &eax, &ebx, &ecx, &edx);
+
+		type = eax & SGX_CPUID_SUB_LEAF_TYPE_MASK;
+		if (type == SGX_CPUID_SUB_LEAF_INVALID)
+			break;
+
+		if (type != SGX_CPUID_SUB_LEAF_EPC_SECTION) {
+			pr_err_once("Unknown EPC section type: %u\n", type);
+			break;
+		}
+
+		if (i == ARRAY_SIZE(sgx_epc_sections)) {
+			pr_warn("No free slot for an EPC section\n");
+			break;
+		}
+
+		pa = sgx_calc_section_metric(eax, ebx);
+		size = sgx_calc_section_metric(ecx, edx);
+
+		pr_info("EPC section 0x%llx-0x%llx\n", pa, pa + size - 1);
+
+		if (!sgx_alloc_epc_section(pa, size, i, &sgx_epc_sections[i])) {
+			pr_err("No free memory for an EPC section\n");
+			break;
+		}
+
+		sgx_nr_epc_sections++;
+	}
+
+	if (!sgx_nr_epc_sections) {
+		pr_err("There are zero EPC sections.\n");
+		return false;
+	}
+
+	return true;
+}
+
+static void __init sgx_init(void)
+{
+	if (!boot_cpu_has(X86_FEATURE_SGX))
+		return;
+
+	if (!sgx_page_cache_init())
+		return;
+
+	if (!sgx_page_reclaimer_init())
+		goto err_page_cache;
+
+	return;
+
+err_page_cache:
+	sgx_page_cache_teardown();
+}
+
+arch_initcall(sgx_init);
diff --git a/arch/x86/kernel/cpu/sgx/reclaim.c b/arch/x86/kernel/cpu/sgx/reclaim.c
new file mode 100644
index 000000000000..215371588a25
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/reclaim.c
@@ -0,0 +1,82 @@ 
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2016-19 Intel Corporation.
+
+#include <linux/freezer.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/pagemap.h>
+#include <linux/ratelimit.h>
+#include <linux/slab.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/signal.h>
+#include "encls.h"
+
+struct task_struct *ksgxswapd_tsk;
+
+static void sgx_sanitize_section(struct sgx_epc_section *section)
+{
+	struct sgx_epc_page *page;
+	LIST_HEAD(secs_list);
+	int ret;
+
+	while (!list_empty(&section->unsanitized_page_list)) {
+		if (kthread_should_stop())
+			return;
+
+		spin_lock(&section->lock);
+
+		page = list_first_entry(&section->unsanitized_page_list,
+					struct sgx_epc_page, list);
+
+		ret = __eremove(sgx_epc_addr(page));
+		if (!ret)
+			list_move(&page->list, &section->page_list);
+		else
+			list_move_tail(&page->list, &secs_list);
+
+		spin_unlock(&section->lock);
+
+		cond_resched();
+	}
+}
+
+static int ksgxswapd(void *p)
+{
+	int i;
+
+	set_freezable();
+
+	/*
+	 * Reset all pages to uninitialized state. Pages could be in initialized
+	 * on kmemexec.
+	 */
+	for (i = 0; i < sgx_nr_epc_sections; i++)
+		sgx_sanitize_section(&sgx_epc_sections[i]);
+
+	/*
+	 * 2nd round for the SECS pages as they cannot be removed when they
+	 * still hold child pages.
+	 */
+	for (i = 0; i < sgx_nr_epc_sections; i++) {
+		sgx_sanitize_section(&sgx_epc_sections[i]);
+
+		/* Should never happen. */
+		if (!list_empty(&sgx_epc_sections[i].unsanitized_page_list))
+			WARN(1, "EPC section %d has unsanitized pages.\n", i);
+	}
+
+	return 0;
+}
+
+bool __init sgx_page_reclaimer_init(void)
+{
+	struct task_struct *tsk;
+
+	tsk = kthread_run(ksgxswapd, NULL, "ksgxswapd");
+	if (IS_ERR(tsk))
+		return false;
+
+	ksgxswapd_tsk = tsk;
+
+	return true;
+}
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
new file mode 100644
index 000000000000..aad30980be32
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -0,0 +1,70 @@ 
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+#ifndef _X86_SGX_H
+#define _X86_SGX_H
+
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/rwsem.h>
+#include <linux/types.h>
+#include <asm/asm.h>
+#include "arch.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "sgx: " fmt
+
+struct sgx_epc_page {
+	unsigned long desc;
+	struct list_head list;
+};
+
+/**
+ * struct sgx_epc_section
+ *
+ * The firmware can define multiple chunks of EPC to the different areas of the
+ * physical memory e.g. for memory areas of the each node. This structure is
+ * used to store EPC pages for one EPC section and virtual memory area where
+ * the pages have been mapped.
+ */
+struct sgx_epc_section {
+	unsigned long pa;
+	void *va;
+	struct list_head page_list;
+	struct list_head unsanitized_page_list;
+	spinlock_t lock;
+};
+
+/**
+ * enum sgx_epc_page_desc - bits and masks for an EPC page's descriptor
+ * %SGX_EPC_SECTION_MASK:	SGX allows to have multiple EPC sections in the
+ *				physical memory. The existing and near-future
+ *				hardware defines at most eight sections, hence
+ *				three bits to hold a section.
+ */
+enum sgx_epc_page_desc {
+	SGX_EPC_SECTION_MASK			= GENMASK_ULL(3, 0),
+	/* bits 12-63 are reserved for the physical page address of the page */
+};
+
+#define SGX_MAX_EPC_SECTIONS (SGX_EPC_SECTION_MASK + 1)
+
+extern struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
+
+static inline struct sgx_epc_section *sgx_epc_section(struct sgx_epc_page *page)
+{
+	return &sgx_epc_sections[page->desc & SGX_EPC_SECTION_MASK];
+}
+
+static inline void *sgx_epc_addr(struct sgx_epc_page *page)
+{
+	struct sgx_epc_section *section = sgx_epc_section(page);
+
+	return section->va + (page->desc & PAGE_MASK) - section->pa;
+}
+
+extern int sgx_nr_epc_sections;
+extern struct task_struct *ksgxswapd_tsk;
+
+bool __init sgx_page_reclaimer_init(void);
+
+#endif /* _X86_SGX_H */