diff mbox series

[v4,2/3] x86/sgx: Replace section local dirty page lists with a global list

Message ID 20210313160119.1318533-3-jarkko@kernel.org (mailing list archive)
State New, archived
Headers show
Series x86/sgx: NUMA | expand

Commit Message

Jarkko Sakkinen March 13, 2021, 4:01 p.m. UTC
Reset initialized EPC pages in sgx_dirty_page_list to uninitialized state,
and free them using sgx_free_epc_page(). Do two passes, as for SECS pages
the first round can fail, if all child pages have not yet been removed.
The driver puts all pages on startup first to sgx_dirty_page_list, as the
initialization could be triggered by kexec(), meaning that pages have been
reserved for active enclaves before the operation.

The section local lists are redundant, as sgx_free_epc_page() figures
out the correction by using epc_page->section.

Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---

v4:
* Open coded sgx_santize_section() to ksgxd().
* Rewrote the commit message.

 arch/x86/kernel/cpu/sgx/main.c | 81 ++++++++++++++++------------------
 arch/x86/kernel/cpu/sgx/sgx.h  |  7 ---
 2 files changed, 37 insertions(+), 51 deletions(-)

Comments

Dave Hansen March 15, 2021, 4:03 p.m. UTC | #1
On 3/13/21 8:01 AM, Jarkko Sakkinen wrote:
> Reset initialized EPC pages in sgx_dirty_page_list to uninitialized state,
> and free them using sgx_free_epc_page(). Do two passes, as for SECS pages
> the first round can fail, if all child pages have not yet been removed.
> The driver puts all pages on startup first to sgx_dirty_page_list, as the
> initialization could be triggered by kexec(), meaning that pages have been
> reserved for active enclaves before the operation.
> 
> The section local lists are redundant, as sgx_free_epc_page() figures
> out the correction by using epc_page->section.

During normal runtime, the "ksgxd" daemon behaves like a  version of
kswapd just for SGX.  But, its first job is to initialize enclave
memory.  This is done in a a separate thread because this initialization
can be quite slow.

Currently, the SGX boot code places each enclave page on a
sgx_section-local list (init_laundry_list).  Once it starts up, the
ksgxd code walks over that list and populates the actual SGX page allocator.

However, the per-section structures are going away to make way for the
SGX NUMA allocator.  There's also little need to have a per-section
structure; the enclave pages are all treated identically, and they can
be placed on the correct allocator list from metadata stoered in the
enclave page itself.

> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index 65004fb8a91f..cb4561444b96 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -27,39 +27,10 @@ static LIST_HEAD(sgx_active_page_list);
>  static DEFINE_SPINLOCK(sgx_reclaimer_lock);
>  
>  /*
> - * Reset dirty EPC pages to uninitialized state. Laundry can be left with SECS
> - * pages whose child pages blocked EREMOVE.
> + * When the driver initialized, EPC pages go first here, as they could be
> + * initialized to an active enclave, on kexec entry.
>   */
> -static void sgx_sanitize_section(struct sgx_epc_section *section)
> -{
> -	struct sgx_epc_page *page;
> -	LIST_HEAD(dirty);
> -	int ret;
> -
> -	/* init_laundry_list is thread-local, no need for a lock: */
> -	while (!list_empty(&section->init_laundry_list)) {
> -		if (kthread_should_stop())
> -			return;
> -
> -		/* needed for access to ->page_list: */
> -		spin_lock(&section->lock);
> -
> -		page = list_first_entry(&section->init_laundry_list,
> -					struct sgx_epc_page, list);
> -
> -		ret = __eremove(sgx_get_epc_virt_addr(page));
> -		if (!ret)
> -			list_move(&page->list, &section->page_list);
> -		else
> -			list_move_tail(&page->list, &dirty);
> -
> -		spin_unlock(&section->lock);
> -
> -		cond_resched();
> -	}
> -
> -	list_splice(&dirty, &section->init_laundry_list);
> -}
> +static LIST_HEAD(sgx_dirty_page_list);
>  
>  static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page)
>  {
> @@ -400,25 +371,48 @@ static bool sgx_should_reclaim(unsigned long watermark)
>  
>  static int ksgxd(void *p)
>  {
> -	int i;
> +	struct sgx_epc_page *page;
> +	LIST_HEAD(dirty);
> +	int i, ret;
>  
>  	set_freezable();
>  
>  	/*
> -	 * Sanitize pages in order to recover from kexec(). The 2nd pass is
> -	 * required for SECS pages, whose child pages blocked EREMOVE.
> +	 * Reset initialized EPC pages in sgx_dirty_page_list to uninitialized state,
> +	 * and free them using sgx_free_epc_page(). 

I'm not a fan of comments that tell us verbatim what the code does.

>							Do two passes, as for SECS pages the
> +	 * first round can fail, if all child pages have not yet been removed.  The
> +	 * driver puts all pages on startup first to sgx_dirty_page_list, as the
> +	 * initialization could be triggered by kexec(), meaning that pages have been
> +	 * reserved for active enclaves before the operation.
>  	 */



> -	for (i = 0; i < sgx_nr_epc_sections; i++)
> -		sgx_sanitize_section(&sgx_epc_sections[i]);
>  
> -	for (i = 0; i < sgx_nr_epc_sections; i++) {
> -		sgx_sanitize_section(&sgx_epc_sections[i]);

FWIW, I don't like the removal of the helper here.  I really like kernel
threads' top-level function to be very understandable and clean.  This
makes it quite a bit harder to figure out what is going on.

For instance, we could just have a sgx_sanitize_pages() which has a
local dirty list and just calls:

void sgx_santitize_pages(void)
{
	LIST_HEAD(dirty);

	/*
	 * Comment about two passes
	 */
	__sgx_sanitize_pages(&dirty)
	__sgx_sanitize_pages(&dirty)
}

> +	/* sgx_dirty_page_list is thread-local to ksgxd, no need for a lock: */
> +	for (i = 0; i < 2 && !list_empty(&sgx_dirty_page_list); i++) {
> +		while (!list_empty(&sgx_dirty_page_list)) {
> +			if (kthread_should_stop())
> +				return 0;
> +
> +			page = list_first_entry(&sgx_dirty_page_list, struct sgx_epc_page, list);
> +
> +			ret = __eremove(sgx_get_epc_virt_addr(page));
> +			if (!ret) {
> +				/* The page is clean - move to the free list. */

I'd even say:
				/*
				 * page is now sanitized.  Make it
				 * available via the SGX page allocator:
				 */

See what that does?  It actually links the "cleaning" to the freeing.

> +				list_del(&page->list);
> +				sgx_free_epc_page(page);
> +			} else {
> +				/* The page is not yet clean - move to the dirty list. */
> +				list_move_tail(&page->list, &dirty);
> +			}
> +
> +			cond_resched();
> +		}
>  
> -		/* Should never happen. */
> -		if (!list_empty(&sgx_epc_sections[i].init_laundry_list))
> -			WARN(1, "EPC section %d has unsanitized pages.\n", i);
> +		list_splice(&dirty, &sgx_dirty_page_list);
>  	}
>  
> +	if (!list_empty(&sgx_dirty_page_list))
> +		WARN(1, "EPC section %d has unsanitized pages.\n", i);
> +
>  	while (!kthread_should_stop()) {
>  		if (try_to_freeze())
>  			continue;
> @@ -632,13 +626,12 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
>  	section->phys_addr = phys_addr;
>  	spin_lock_init(&section->lock);
>  	INIT_LIST_HEAD(&section->page_list);
> -	INIT_LIST_HEAD(&section->init_laundry_list);
>  
>  	for (i = 0; i < nr_pages; i++) {
>  		section->pages[i].section = index;
>  		section->pages[i].flags = 0;
>  		section->pages[i].owner = NULL;
> -		list_add_tail(&section->pages[i].list, &section->init_laundry_list);
> +		list_add_tail(&section->pages[i].list, &sgx_dirty_page_list);
>  	}
...
Jarkko Sakkinen March 15, 2021, 7:14 p.m. UTC | #2
On Mon, Mar 15, 2021 at 09:03:21AM -0700, Dave Hansen wrote:
> On 3/13/21 8:01 AM, Jarkko Sakkinen wrote:
> > Reset initialized EPC pages in sgx_dirty_page_list to uninitialized state,
> > and free them using sgx_free_epc_page(). Do two passes, as for SECS pages
> > the first round can fail, if all child pages have not yet been removed.
> > The driver puts all pages on startup first to sgx_dirty_page_list, as the
> > initialization could be triggered by kexec(), meaning that pages have been
> > reserved for active enclaves before the operation.
> > 
> > The section local lists are redundant, as sgx_free_epc_page() figures
> > out the correction by using epc_page->section.
> 
> During normal runtime, the "ksgxd" daemon behaves like a  version of
> kswapd just for SGX.  But, its first job is to initialize enclave
> memory.  This is done in a a separate thread because this initialization
> can be quite slow.
> 
> Currently, the SGX boot code places each enclave page on a
> sgx_section-local list (init_laundry_list).  Once it starts up, the
> ksgxd code walks over that list and populates the actual SGX page allocator.
> 
> However, the per-section structures are going away to make way for the
> SGX NUMA allocator.  There's also little need to have a per-section
> structure; the enclave pages are all treated identically, and they can
> be placed on the correct allocator list from metadata stoered in the
> enclave page itself.
> 
Is this a suggestion how to rephrase the commit message? :-)

> > diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> > index 65004fb8a91f..cb4561444b96 100644
> > --- a/arch/x86/kernel/cpu/sgx/main.c
> > +++ b/arch/x86/kernel/cpu/sgx/main.c
> > @@ -27,39 +27,10 @@ static LIST_HEAD(sgx_active_page_list);
> >  static DEFINE_SPINLOCK(sgx_reclaimer_lock);
> >  
> >  /*
> > - * Reset dirty EPC pages to uninitialized state. Laundry can be left with SECS
> > - * pages whose child pages blocked EREMOVE.
> > + * When the driver initialized, EPC pages go first here, as they could be
> > + * initialized to an active enclave, on kexec entry.
> >   */
> > -static void sgx_sanitize_section(struct sgx_epc_section *section)
> > -{
> > -	struct sgx_epc_page *page;
> > -	LIST_HEAD(dirty);
> > -	int ret;
> > -
> > -	/* init_laundry_list is thread-local, no need for a lock: */
> > -	while (!list_empty(&section->init_laundry_list)) {
> > -		if (kthread_should_stop())
> > -			return;
> > -
> > -		/* needed for access to ->page_list: */
> > -		spin_lock(&section->lock);
> > -
> > -		page = list_first_entry(&section->init_laundry_list,
> > -					struct sgx_epc_page, list);
> > -
> > -		ret = __eremove(sgx_get_epc_virt_addr(page));
> > -		if (!ret)
> > -			list_move(&page->list, &section->page_list);
> > -		else
> > -			list_move_tail(&page->list, &dirty);
> > -
> > -		spin_unlock(&section->lock);
> > -
> > -		cond_resched();
> > -	}
> > -
> > -	list_splice(&dirty, &section->init_laundry_list);
> > -}
> > +static LIST_HEAD(sgx_dirty_page_list);
> >  
> >  static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page)
> >  {
> > @@ -400,25 +371,48 @@ static bool sgx_should_reclaim(unsigned long watermark)
> >  
> >  static int ksgxd(void *p)
> >  {
> > -	int i;
> > +	struct sgx_epc_page *page;
> > +	LIST_HEAD(dirty);
> > +	int i, ret;
> >  
> >  	set_freezable();
> >  
> >  	/*
> > -	 * Sanitize pages in order to recover from kexec(). The 2nd pass is
> > -	 * required for SECS pages, whose child pages blocked EREMOVE.
> > +	 * Reset initialized EPC pages in sgx_dirty_page_list to uninitialized state,
> > +	 * and free them using sgx_free_epc_page(). 
> 
> I'm not a fan of comments that tell us verbatim what the code does.

So, what are you suggesting? Remove the whole comment or parts of it? I'm
presuming that you suggest removing the "top-level" part.

> 
> >							Do two passes, as for SECS pages the
> > +	 * first round can fail, if all child pages have not yet been removed.  The
> > +	 * driver puts all pages on startup first to sgx_dirty_page_list, as the
> > +	 * initialization could be triggered by kexec(), meaning that pages have been
> > +	 * reserved for active enclaves before the operation.
> >  	 */
> 
> 
> 
> > -	for (i = 0; i < sgx_nr_epc_sections; i++)
> > -		sgx_sanitize_section(&sgx_epc_sections[i]);
> >  
> > -	for (i = 0; i < sgx_nr_epc_sections; i++) {
> > -		sgx_sanitize_section(&sgx_epc_sections[i]);
> 
> FWIW, I don't like the removal of the helper here.  I really like kernel
> threads' top-level function to be very understandable and clean.  This
> makes it quite a bit harder to figure out what is going on.
> 
> For instance, we could just have a sgx_sanitize_pages() which has a
> local dirty list and just calls:
> 
> void sgx_santitize_pages(void)
> {
> 	LIST_HEAD(dirty);
> 
> 	/*
> 	 * Comment about two passes
> 	 */
> 	__sgx_sanitize_pages(&dirty)
> 	__sgx_sanitize_pages(&dirty)

OK.

> }
> 
> > +	/* sgx_dirty_page_list is thread-local to ksgxd, no need for a lock: */
> > +	for (i = 0; i < 2 && !list_empty(&sgx_dirty_page_list); i++) {
> > +		while (!list_empty(&sgx_dirty_page_list)) {
> > +			if (kthread_should_stop())
> > +				return 0;
> > +
> > +			page = list_first_entry(&sgx_dirty_page_list, struct sgx_epc_page, list);
> > +
> > +			ret = __eremove(sgx_get_epc_virt_addr(page));
> > +			if (!ret) {
> > +				/* The page is clean - move to the free list. */
> 
> I'd even say:
> 				/*
> 				 * page is now sanitized.  Make it
> 				 * available via the SGX page allocator:
> 				 */
> 
> See what that does?  It actually links the "cleaning" to the freeing.

I agree, thanks.

> > +				list_del(&page->list);
> > +				sgx_free_epc_page(page);
> > +			} else {
> > +				/* The page is not yet clean - move to the dirty list. */
> > +				list_move_tail(&page->list, &dirty);
> > +			}
> > +
> > +			cond_resched();
> > +		}
> >  
> > -		/* Should never happen. */
> > -		if (!list_empty(&sgx_epc_sections[i].init_laundry_list))
> > -			WARN(1, "EPC section %d has unsanitized pages.\n", i);
> > +		list_splice(&dirty, &sgx_dirty_page_list);
> >  	}
> >  
> > +	if (!list_empty(&sgx_dirty_page_list))
> > +		WARN(1, "EPC section %d has unsanitized pages.\n", i);
> > +
> >  	while (!kthread_should_stop()) {
> >  		if (try_to_freeze())
> >  			continue;
> > @@ -632,13 +626,12 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
> >  	section->phys_addr = phys_addr;
> >  	spin_lock_init(&section->lock);
> >  	INIT_LIST_HEAD(&section->page_list);
> > -	INIT_LIST_HEAD(&section->init_laundry_list);
> >  
> >  	for (i = 0; i < nr_pages; i++) {
> >  		section->pages[i].section = index;
> >  		section->pages[i].flags = 0;
> >  		section->pages[i].owner = NULL;
> > -		list_add_tail(&section->pages[i].list, &section->init_laundry_list);
> > +		list_add_tail(&section->pages[i].list, &sgx_dirty_page_list);
> >  	}
> ...
> 

/Jarkko
Dave Hansen March 15, 2021, 7:47 p.m. UTC | #3
On 3/15/21 12:14 PM, Jarkko Sakkinen wrote:
> On Mon, Mar 15, 2021 at 09:03:21AM -0700, Dave Hansen wrote:
>> On 3/13/21 8:01 AM, Jarkko Sakkinen wrote:
>>> Reset initialized EPC pages in sgx_dirty_page_list to uninitialized state,
>>> and free them using sgx_free_epc_page(). Do two passes, as for SECS pages
>>> the first round can fail, if all child pages have not yet been removed.
>>> The driver puts all pages on startup first to sgx_dirty_page_list, as the
>>> initialization could be triggered by kexec(), meaning that pages have been
>>> reserved for active enclaves before the operation.
>>>
>>> The section local lists are redundant, as sgx_free_epc_page() figures
>>> out the correction by using epc_page->section.
>>
>> During normal runtime, the "ksgxd" daemon behaves like a  version of
>> kswapd just for SGX.  But, its first job is to initialize enclave
>> memory.  This is done in a a separate thread because this initialization
>> can be quite slow.
>>
>> Currently, the SGX boot code places each enclave page on a
>> sgx_section-local list (init_laundry_list).  Once it starts up, the
>> ksgxd code walks over that list and populates the actual SGX page allocator.
>>
>> However, the per-section structures are going away to make way for the
>> SGX NUMA allocator.  There's also little need to have a per-section
>> structure; the enclave pages are all treated identically, and they can
>> be placed on the correct allocator list from metadata stoered in the
>> enclave page itself.
>>
> Is this a suggestion how to rephrase the commit message? :-)

Yep.

>>>  static int ksgxd(void *p)
>>>  {
>>> -	int i;
>>> +	struct sgx_epc_page *page;
>>> +	LIST_HEAD(dirty);
>>> +	int i, ret;
>>>  
>>>  	set_freezable();
>>>  
>>>  	/*
>>> -	 * Sanitize pages in order to recover from kexec(). The 2nd pass is
>>> -	 * required for SECS pages, whose child pages blocked EREMOVE.
>>> +	 * Reset initialized EPC pages in sgx_dirty_page_list to uninitialized state,
>>> +	 * and free them using sgx_free_epc_page(). 
>>
>> I'm not a fan of comments that tell us verbatim what the code does.
> 
> So, what are you suggesting? Remove the whole comment or parts of it? I'm
> presuming that you suggest removing the "top-level" part.

Just please make it more relevant and informative.  I can tell this is
processing 'sgx_dirty_page_list' and calling sgx_free_epc_page() from
the code.  I don't need a comment to tell me that.

A better comment would say:

	Take enclave pages from the init code, ensure they are clean,
	and free the back into the main SGX page allocator

That tells what *role* this code plays (connecting init code to the
allocator) in a way that just verbatim telling what code is called does not.
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 65004fb8a91f..cb4561444b96 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -27,39 +27,10 @@  static LIST_HEAD(sgx_active_page_list);
 static DEFINE_SPINLOCK(sgx_reclaimer_lock);
 
 /*
- * Reset dirty EPC pages to uninitialized state. Laundry can be left with SECS
- * pages whose child pages blocked EREMOVE.
+ * When the driver initialized, EPC pages go first here, as they could be
+ * initialized to an active enclave, on kexec entry.
  */
-static void sgx_sanitize_section(struct sgx_epc_section *section)
-{
-	struct sgx_epc_page *page;
-	LIST_HEAD(dirty);
-	int ret;
-
-	/* init_laundry_list is thread-local, no need for a lock: */
-	while (!list_empty(&section->init_laundry_list)) {
-		if (kthread_should_stop())
-			return;
-
-		/* needed for access to ->page_list: */
-		spin_lock(&section->lock);
-
-		page = list_first_entry(&section->init_laundry_list,
-					struct sgx_epc_page, list);
-
-		ret = __eremove(sgx_get_epc_virt_addr(page));
-		if (!ret)
-			list_move(&page->list, &section->page_list);
-		else
-			list_move_tail(&page->list, &dirty);
-
-		spin_unlock(&section->lock);
-
-		cond_resched();
-	}
-
-	list_splice(&dirty, &section->init_laundry_list);
-}
+static LIST_HEAD(sgx_dirty_page_list);
 
 static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page)
 {
@@ -400,25 +371,48 @@  static bool sgx_should_reclaim(unsigned long watermark)
 
 static int ksgxd(void *p)
 {
-	int i;
+	struct sgx_epc_page *page;
+	LIST_HEAD(dirty);
+	int i, ret;
 
 	set_freezable();
 
 	/*
-	 * Sanitize pages in order to recover from kexec(). The 2nd pass is
-	 * required for SECS pages, whose child pages blocked EREMOVE.
+	 * Reset initialized EPC pages in sgx_dirty_page_list to uninitialized state,
+	 * and free them using sgx_free_epc_page(). Do two passes, as for SECS pages the
+	 * first round can fail, if all child pages have not yet been removed.  The
+	 * driver puts all pages on startup first to sgx_dirty_page_list, as the
+	 * initialization could be triggered by kexec(), meaning that pages have been
+	 * reserved for active enclaves before the operation.
 	 */
-	for (i = 0; i < sgx_nr_epc_sections; i++)
-		sgx_sanitize_section(&sgx_epc_sections[i]);
 
-	for (i = 0; i < sgx_nr_epc_sections; i++) {
-		sgx_sanitize_section(&sgx_epc_sections[i]);
+	/* sgx_dirty_page_list is thread-local to ksgxd, no need for a lock: */
+	for (i = 0; i < 2 && !list_empty(&sgx_dirty_page_list); i++) {
+		while (!list_empty(&sgx_dirty_page_list)) {
+			if (kthread_should_stop())
+				return 0;
+
+			page = list_first_entry(&sgx_dirty_page_list, struct sgx_epc_page, list);
+
+			ret = __eremove(sgx_get_epc_virt_addr(page));
+			if (!ret) {
+				/* The page is clean - move to the free list. */
+				list_del(&page->list);
+				sgx_free_epc_page(page);
+			} else {
+				/* The page is not yet clean - move to the dirty list. */
+				list_move_tail(&page->list, &dirty);
+			}
+
+			cond_resched();
+		}
 
-		/* Should never happen. */
-		if (!list_empty(&sgx_epc_sections[i].init_laundry_list))
-			WARN(1, "EPC section %d has unsanitized pages.\n", i);
+		list_splice(&dirty, &sgx_dirty_page_list);
 	}
 
+	if (!list_empty(&sgx_dirty_page_list))
+		WARN(1, "EPC section %d has unsanitized pages.\n", i);
+
 	while (!kthread_should_stop()) {
 		if (try_to_freeze())
 			continue;
@@ -632,13 +626,12 @@  static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
 	section->phys_addr = phys_addr;
 	spin_lock_init(&section->lock);
 	INIT_LIST_HEAD(&section->page_list);
-	INIT_LIST_HEAD(&section->init_laundry_list);
 
 	for (i = 0; i < nr_pages; i++) {
 		section->pages[i].section = index;
 		section->pages[i].flags = 0;
 		section->pages[i].owner = NULL;
-		list_add_tail(&section->pages[i].list, &section->init_laundry_list);
+		list_add_tail(&section->pages[i].list, &sgx_dirty_page_list);
 	}
 
 	section->free_cnt = nr_pages;
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 5fa42d143feb..bc8af0428640 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -45,13 +45,6 @@  struct sgx_epc_section {
 	spinlock_t lock;
 	struct list_head page_list;
 	unsigned long free_cnt;
-
-	/*
-	 * Pages which need EREMOVE run on them before they can be
-	 * used.  Only safe to be accessed in ksgxd and init code.
-	 * Not protected by locks.
-	 */
-	struct list_head init_laundry_list;
 };
 
 extern struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];