[intel-sgx-kernel-dev,v2] intel_sgx: Add lock to make EPC eviction atomic
diff mbox

Message ID 1480372367-12679-1-git-send-email-sean.j.christopherson@intel.com
State New
Headers show

Commit Message

Sean Christopherson Nov. 28, 2016, 10:32 p.m. UTC
Add an eviction specific, per-enclave lock so that the lock can be held
for the entire EPC eviction sequence without having to hold the overall
per-enclave lock for an extended duration.  Holding an enclave's overall
lock prevents any other driver operations on the enclave, e.g. faulting
a page into the enclave, and evicting 16 EPC pages takes upwards of half
a millisecond.

Holding a lock for the entire EBLOCK->ETRACK->EWB sequence is necessary
to make the sequence atomic with respect to other evictions on the same
enclave.  The SGX architecture does not allow multiple ETRACK sequences
to be in flight at the same time if the enclave being tracked has one or
more active threads in the enclave.  A second ETRACK will fail due to
PREV_TRK_INCMPL if it is attempted before EWB is executed on all blocked
pages (in the presence of active enclave threads).

Currently the driver releases the enclave's lock after ETRACK and then
reacquires the lock prior to EWB.  Releasing the lock from the thread
that performed ETRACK, T1, allows a different thread, T2, to acquire the
lock for its own ETRACK.  T2's ETRACK will fail due to the architectural
restrictions if a third thread, T3, is executing inside the enclave
whose pages are being swapped.

    T3: <active in enclave E1>
    T1: lock_mutex(E1)
    T1: ETRACK(E1)
    T1: unlock_mutex(E1)
    T2: lock_mutex(E1)
    T2: ETRACK(E1) fails

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 drivers/platform/x86/intel_sgx.h            |  1 +
 drivers/platform/x86/intel_sgx_ioctl.c      |  1 +
 drivers/platform/x86/intel_sgx_page_cache.c | 69 +++++++++++++++++------------
 3 files changed, 43 insertions(+), 28 deletions(-)

Comments

Jarkko Sakkinen Dec. 1, 2016, 11:32 a.m. UTC | #1
On Mon, Nov 28, 2016 at 02:32:47PM -0800, Sean Christopherson wrote:
> Add an eviction specific, per-enclave lock so that the lock can be held
> for the entire EPC eviction sequence without having to hold the overall
> per-enclave lock for an extended duration.  Holding an enclave's overall
> lock prevents any other driver operations on the enclave, e.g. faulting
> a page into the enclave, and evicting 16 EPC pages takes upwards of half
> a millisecond.
> 
> Holding a lock for the entire EBLOCK->ETRACK->EWB sequence is necessary
> to make the sequence atomic with respect to other evictions on the same
> enclave.  The SGX architecture does not allow multiple ETRACK sequences
> to be in flight at the same time if the enclave being tracked has one or
> more active threads in the enclave.  A second ETRACK will fail due to
> PREV_TRK_INCMPL if it is attempted before EWB is executed on all blocked
> pages (in the presence of active enclave threads).
> 
> Currently the driver releases the enclave's lock after ETRACK and then
> reacquires the lock prior to EWB.  Releasing the lock from the thread
> that performed ETRACK, T1, allows a different thread, T2, to acquire the
> lock for its own ETRACK.  T2's ETRACK will fail due to the architectural
> restrictions if a third thread, T3, is executing inside the enclave
> whose pages are being swapped.
> 
>     T3: <active in enclave E1>
>     T1: lock_mutex(E1)
>     T1: ETRACK(E1)
>     T1: unlock_mutex(E1)
>     T2: lock_mutex(E1)
>     T2: ETRACK(E1) fails
> 
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>

The commit message is worse than in the earlier patch. There are no new
locks. I don't like the use of word atomic here. It's too rounded word
to be used.


> ---
>  drivers/platform/x86/intel_sgx.h            |  1 +
>  drivers/platform/x86/intel_sgx_ioctl.c      |  1 +
>  drivers/platform/x86/intel_sgx_page_cache.c | 69 +++++++++++++++++------------
>  3 files changed, 43 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/platform/x86/intel_sgx.h b/drivers/platform/x86/intel_sgx.h
> index 464763d..d837789 100644
> --- a/drivers/platform/x86/intel_sgx.h
> +++ b/drivers/platform/x86/intel_sgx.h
> @@ -137,6 +137,7 @@ struct sgx_encl {
>  	unsigned int secs_child_cnt;
>  	unsigned int vma_cnt;
>  	struct mutex lock;
> +	struct mutex eviction_lock;
>  	struct task_struct *owner;
>  	struct mm_struct *mm;
>  	struct file *backing;
> diff --git a/drivers/platform/x86/intel_sgx_ioctl.c b/drivers/platform/x86/intel_sgx_ioctl.c
> index d54c410..72fe9aa 100644
> --- a/drivers/platform/x86/intel_sgx_ioctl.c
> +++ b/drivers/platform/x86/intel_sgx_ioctl.c
> @@ -522,6 +522,7 @@ static long sgx_ioc_enclave_create(struct file *filep, unsigned int cmd,
>  	INIT_LIST_HEAD(&encl->load_list);
>  	INIT_LIST_HEAD(&encl->encl_list);
>  	mutex_init(&encl->lock);
> +	mutex_init(&encl->eviction_lock);
>  	INIT_WORK(&encl->add_page_work, sgx_add_page_worker);
>  
>  	encl->owner = current->group_leader;
> diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c
> index 8b1cc82..ce60b76 100644
> --- a/drivers/platform/x86/intel_sgx_page_cache.c
> +++ b/drivers/platform/x86/intel_sgx_page_cache.c
> @@ -224,9 +224,9 @@ static int sgx_ewb(struct sgx_encl *encl,
>  	return ret;
>  }
>  
> -void sgx_free_encl_page(struct sgx_encl_page *entry,
> -		    struct sgx_encl *encl,
> -		    unsigned int flags)
> +static void sgx_free_encl_page(struct sgx_encl_page *entry,
> +			       struct sgx_encl *encl,
> +			       unsigned int flags)
>  {
>  	sgx_free_page(entry->epc_page, encl, flags);
>  	entry->epc_page = NULL;
> @@ -238,7 +238,7 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
>  	struct sgx_encl_page *entry;
>  	struct sgx_encl_page *tmp;
>  	struct page *pages[SGX_NR_SWAP_CLUSTER_MAX + 1];
> -	struct vm_area_struct *evma;
> +	struct vm_area_struct *vma;
>  	int cnt = 0;
>  	int i = 0;
>  	int ret;
> @@ -261,13 +261,15 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
>  		return;
>  	}
>  
> -	/* EBLOCK */
> +	mutex_lock(&encl->eviction_lock);
>  
> +	/* EBLOCK */
>  	list_for_each_entry_safe(entry, tmp, src, load_list) {
> -		mutex_lock(&encl->lock);
> -		evma = sgx_find_vma(encl, entry->addr);
> -		if (!evma) {
> +		vma = sgx_find_vma(encl, entry->addr);
> +		if (!vma) {
>  			list_del(&entry->load_list);
> +
> +			mutex_lock(&encl->lock);
>  			sgx_free_encl_page(entry, encl, 0);
>  			mutex_unlock(&encl->lock);
>  			continue;
> @@ -276,36 +278,32 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
>  		pages[cnt] = sgx_get_backing(encl, entry);
>  		if (IS_ERR(pages[cnt])) {
>  			list_del(&entry->load_list);
> +
> +			mutex_lock(&encl->lock);
>  			list_add_tail(&entry->load_list, &encl->load_list);
>  			entry->flags &= ~SGX_ENCL_PAGE_RESERVED;
>  			mutex_unlock(&encl->lock);
>  			continue;
>  		}
>  
> -		zap_vma_ptes(evma, entry->addr, PAGE_SIZE);
> +		zap_vma_ptes(vma, entry->addr, PAGE_SIZE);
>  		sgx_eblock(entry->epc_page);
>  		cnt++;
> -		mutex_unlock(&encl->lock);
>  	}
>  
> -	/* ETRACK */
> +	if (!cnt) {
> +		mutex_unlock(&encl->eviction_lock);
> +		goto out;
> +	}

Unrelated change. Should be removed or sent as a separate patch if 
you think that his complication of the code flow gives value.

> -	mutex_lock(&encl->lock);
> +	/* ETRACK */
>  	sgx_etrack(encl->secs_page.epc_page);
> -	mutex_unlock(&encl->lock);
>  
>  	/* EWB */
> -
> -	mutex_lock(&encl->lock);
>  	i = 0;
> -
> -	while (!list_empty(src)) {
> -		entry = list_first_entry(src, struct sgx_encl_page,
> -					 load_list);
> -		list_del(&entry->load_list);
> -
> -		evma = sgx_find_vma(encl, entry->addr);
> -		if (evma) {
> +	list_for_each_entry(entry, src, load_list) {
> +		vma = sgx_find_vma(encl, entry->addr);
> +		if (vma) {
>  			ret = sgx_ewb(encl, entry, pages[i]);
>  			BUG_ON(ret != 0 && ret != SGX_NOT_TRACKED);
>  			/* Only kick out threads with an IPI if needed. */
> @@ -313,15 +311,30 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
>  				smp_call_function(sgx_ipi_cb, NULL, 1);
>  				BUG_ON(sgx_ewb(encl, entry, pages[i]));
>  			}
> -			encl->secs_child_cnt--;
>  		}
> +		sgx_put_backing(pages[i++], vma);
> +	}
> +
> +	mutex_unlock(&encl->eviction_lock);
> +
> +	mutex_lock(&encl->lock);
> +
> +	/* Bookeeping */

What this comment tells that is non-obvious?

> +	while (!list_empty(src)) {
> +		entry = list_first_entry(src, struct sgx_encl_page,
> +					 load_list);
> +		list_del(&entry->load_list);
>  
>  		sgx_free_encl_page(entry, encl,
> -				   evma ? SGX_FREE_SKIP_EREMOVE : 0);
> -		sgx_put_backing(pages[i++], evma);
> +				   vma ? SGX_FREE_SKIP_EREMOVE : 0);
>  	}
>  
> -	/* Allow SECS page eviction only when the encl is initialized. */
> +	encl->secs_child_cnt -= cnt;
> +
> +	/* Allow SECS page eviction only when the encl is initialized.
> +	 * The eviction lock does not need to be held to evict the SECS,
> +	 * conflict is impossible as there are no other pages to evict.
> +	 */
>  	if (!encl->secs_child_cnt &&
>  	    (encl->flags & SGX_ENCL_INITIALIZED)) {
>  		pages[cnt] = sgx_get_backing(encl, &encl->secs_page);
> @@ -336,9 +349,9 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
>  			sgx_put_backing(pages[cnt], true);
>  		}
>  	}
> -
>  	mutex_unlock(&encl->lock);
>  
> +out:

This label won't be needed if the snippet above is removed.

>  	sgx_unpin_mm(encl);
>  }
>  
> -- 
> 2.7.4
> 
> _______________________________________________
> intel-sgx-kernel-dev mailing list
> intel-sgx-kernel-dev@lists.01.org
> https://lists.01.org/mailman/listinfo/intel-sgx-kernel-dev

This is now worse than it was. I would rather apply the original
patch (with that code flow change removed).

/Jarkko
Sean Christopherson Dec. 1, 2016, 3:46 p.m. UTC | #2
Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > +311,30 @@ static void sgx_write_pages(struct sgx_encl *encl, struct
> > list_head *src) smp_call_function(sgx_ipi_cb, NULL, 1);
> >  				BUG_ON(sgx_ewb(encl, entry, pages[i]));
> >  			}
> > -			encl->secs_child_cnt--;
> >  		}
> > +		sgx_put_backing(pages[i++], vma);
> > +	}
> > +
> > +	mutex_unlock(&encl->eviction_lock);
> > +
> > +	mutex_lock(&encl->lock);
> > +
> > +	/* Bookeeping */
> 
> What this comment tells that is non-obvious?

Just trying to keep with the style of the rest of the function.  I would describe this section as less obvious than the EBLOCK, ETRACK and EWB sections.


> This is now worse than it was. I would rather apply the original patch (with
> that code flow change removed).

Can you elaborate as to how this is worse than holding the generic enclave lock for the entire flow?  I can do more in-depth analysis, but my initial testing showed that not holding encl->lock during eviction significantly reduced enclave startup time due to not blocking sgx_add_page_worker.
Jarkko Sakkinen Dec. 1, 2016, 7:55 p.m. UTC | #3
On Thu, Dec 01, 2016 at 03:46:11PM +0000, Christopherson, Sean J wrote:
> Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > > +311,30 @@ static void sgx_write_pages(struct sgx_encl *encl, struct
> > > list_head *src) smp_call_function(sgx_ipi_cb, NULL, 1);
> > >  				BUG_ON(sgx_ewb(encl, entry, pages[i]));
> > >  			}
> > > -			encl->secs_child_cnt--;
> > >  		}
> > > +		sgx_put_backing(pages[i++], vma);
> > > +	}
> > > +
> > > +	mutex_unlock(&encl->eviction_lock);
> > > +
> > > +	mutex_lock(&encl->lock);
> > > +
> > > +	/* Bookeeping */
> > 
> > What this comment tells that is non-obvious?
> 
> Just trying to keep with the style of the rest of the function.  I would describe this section as less obvious than the EBLOCK, ETRACK and EWB sections.
> 
> 
> > This is now worse than it was. I would rather apply the original patch (with
> > that code flow change removed).
> 
> Can you elaborate as to how this is worse than holding the generic
> enclave lock for the entire flow?  I can do more in-depth analysis,
> but my initial testing showed that not holding encl->lock during
> eviction significantly reduced enclave startup time due to not
> blocking sgx_add_page_worker.

I do not want a bug fix and potential performance optimization mixed
into together. Even if this was considered it is now a mess. The bug
fix does not require adding eviction_lock. So even if I would accept
that optimization I never would accept this patch.

/Jarkko
Jarkko Sakkinen Dec. 2, 2016, 4:21 p.m. UTC | #4
On Thu, Dec 01, 2016 at 09:55:24PM +0200, Jarkko Sakkinen wrote:
> On Thu, Dec 01, 2016 at 03:46:11PM +0000, Christopherson, Sean J wrote:
> > Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > > > +311,30 @@ static void sgx_write_pages(struct sgx_encl *encl, struct
> > > > list_head *src) smp_call_function(sgx_ipi_cb, NULL, 1);
> > > >  				BUG_ON(sgx_ewb(encl, entry, pages[i]));
> > > >  			}
> > > > -			encl->secs_child_cnt--;
> > > >  		}
> > > > +		sgx_put_backing(pages[i++], vma);
> > > > +	}
> > > > +
> > > > +	mutex_unlock(&encl->eviction_lock);
> > > > +
> > > > +	mutex_lock(&encl->lock);
> > > > +
> > > > +	/* Bookeeping */
> > > 
> > > What this comment tells that is non-obvious?
> > 
> > Just trying to keep with the style of the rest of the function.  I would describe this section as less obvious than the EBLOCK, ETRACK and EWB sections.
> > 
> > 
> > > This is now worse than it was. I would rather apply the original patch (with
> > > that code flow change removed).
> > 
> > Can you elaborate as to how this is worse than holding the generic
> > enclave lock for the entire flow?  I can do more in-depth analysis,
> > but my initial testing showed that not holding encl->lock during
> > eviction significantly reduced enclave startup time due to not
> > blocking sgx_add_page_worker.
> 
> I do not want a bug fix and potential performance optimization mixed
> into together. Even if this was considered it is now a mess. The bug
> fix does not require adding eviction_lock. So even if I would accept
> that optimization I never would accept this patch.

I'm happy to review the patch after all those fixes in queue have been
merged. In order to do that I need tested/reviewed-by's (or review
feedback if there's still some problem) to those patches that have me
as the author because I cannot review my own patches.

/Jarkko

Patch
diff mbox

diff --git a/drivers/platform/x86/intel_sgx.h b/drivers/platform/x86/intel_sgx.h
index 464763d..d837789 100644
--- a/drivers/platform/x86/intel_sgx.h
+++ b/drivers/platform/x86/intel_sgx.h
@@ -137,6 +137,7 @@  struct sgx_encl {
 	unsigned int secs_child_cnt;
 	unsigned int vma_cnt;
 	struct mutex lock;
+	struct mutex eviction_lock;
 	struct task_struct *owner;
 	struct mm_struct *mm;
 	struct file *backing;
diff --git a/drivers/platform/x86/intel_sgx_ioctl.c b/drivers/platform/x86/intel_sgx_ioctl.c
index d54c410..72fe9aa 100644
--- a/drivers/platform/x86/intel_sgx_ioctl.c
+++ b/drivers/platform/x86/intel_sgx_ioctl.c
@@ -522,6 +522,7 @@  static long sgx_ioc_enclave_create(struct file *filep, unsigned int cmd,
 	INIT_LIST_HEAD(&encl->load_list);
 	INIT_LIST_HEAD(&encl->encl_list);
 	mutex_init(&encl->lock);
+	mutex_init(&encl->eviction_lock);
 	INIT_WORK(&encl->add_page_work, sgx_add_page_worker);
 
 	encl->owner = current->group_leader;
diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c
index 8b1cc82..ce60b76 100644
--- a/drivers/platform/x86/intel_sgx_page_cache.c
+++ b/drivers/platform/x86/intel_sgx_page_cache.c
@@ -224,9 +224,9 @@  static int sgx_ewb(struct sgx_encl *encl,
 	return ret;
 }
 
-void sgx_free_encl_page(struct sgx_encl_page *entry,
-		    struct sgx_encl *encl,
-		    unsigned int flags)
+static void sgx_free_encl_page(struct sgx_encl_page *entry,
+			       struct sgx_encl *encl,
+			       unsigned int flags)
 {
 	sgx_free_page(entry->epc_page, encl, flags);
 	entry->epc_page = NULL;
@@ -238,7 +238,7 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 	struct sgx_encl_page *entry;
 	struct sgx_encl_page *tmp;
 	struct page *pages[SGX_NR_SWAP_CLUSTER_MAX + 1];
-	struct vm_area_struct *evma;
+	struct vm_area_struct *vma;
 	int cnt = 0;
 	int i = 0;
 	int ret;
@@ -261,13 +261,15 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 		return;
 	}
 
-	/* EBLOCK */
+	mutex_lock(&encl->eviction_lock);
 
+	/* EBLOCK */
 	list_for_each_entry_safe(entry, tmp, src, load_list) {
-		mutex_lock(&encl->lock);
-		evma = sgx_find_vma(encl, entry->addr);
-		if (!evma) {
+		vma = sgx_find_vma(encl, entry->addr);
+		if (!vma) {
 			list_del(&entry->load_list);
+
+			mutex_lock(&encl->lock);
 			sgx_free_encl_page(entry, encl, 0);
 			mutex_unlock(&encl->lock);
 			continue;
@@ -276,36 +278,32 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 		pages[cnt] = sgx_get_backing(encl, entry);
 		if (IS_ERR(pages[cnt])) {
 			list_del(&entry->load_list);
+
+			mutex_lock(&encl->lock);
 			list_add_tail(&entry->load_list, &encl->load_list);
 			entry->flags &= ~SGX_ENCL_PAGE_RESERVED;
 			mutex_unlock(&encl->lock);
 			continue;
 		}
 
-		zap_vma_ptes(evma, entry->addr, PAGE_SIZE);
+		zap_vma_ptes(vma, entry->addr, PAGE_SIZE);
 		sgx_eblock(entry->epc_page);
 		cnt++;
-		mutex_unlock(&encl->lock);
 	}
 
-	/* ETRACK */
+	if (!cnt) {
+		mutex_unlock(&encl->eviction_lock);
+		goto out;
+	}
 
-	mutex_lock(&encl->lock);
+	/* ETRACK */
 	sgx_etrack(encl->secs_page.epc_page);
-	mutex_unlock(&encl->lock);
 
 	/* EWB */
-
-	mutex_lock(&encl->lock);
 	i = 0;
-
-	while (!list_empty(src)) {
-		entry = list_first_entry(src, struct sgx_encl_page,
-					 load_list);
-		list_del(&entry->load_list);
-
-		evma = sgx_find_vma(encl, entry->addr);
-		if (evma) {
+	list_for_each_entry(entry, src, load_list) {
+		vma = sgx_find_vma(encl, entry->addr);
+		if (vma) {
 			ret = sgx_ewb(encl, entry, pages[i]);
 			BUG_ON(ret != 0 && ret != SGX_NOT_TRACKED);
 			/* Only kick out threads with an IPI if needed. */
@@ -313,15 +311,30 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 				smp_call_function(sgx_ipi_cb, NULL, 1);
 				BUG_ON(sgx_ewb(encl, entry, pages[i]));
 			}
-			encl->secs_child_cnt--;
 		}
+		sgx_put_backing(pages[i++], vma);
+	}
+
+	mutex_unlock(&encl->eviction_lock);
+
+	mutex_lock(&encl->lock);
+
+	/* Bookeeping */
+	while (!list_empty(src)) {
+		entry = list_first_entry(src, struct sgx_encl_page,
+					 load_list);
+		list_del(&entry->load_list);
 
 		sgx_free_encl_page(entry, encl,
-				   evma ? SGX_FREE_SKIP_EREMOVE : 0);
-		sgx_put_backing(pages[i++], evma);
+				   vma ? SGX_FREE_SKIP_EREMOVE : 0);
 	}
 
-	/* Allow SECS page eviction only when the encl is initialized. */
+	encl->secs_child_cnt -= cnt;
+
+	/* Allow SECS page eviction only when the encl is initialized.
+	 * The eviction lock does not need to be held to evict the SECS,
+	 * conflict is impossible as there are no other pages to evict.
+	 */
 	if (!encl->secs_child_cnt &&
 	    (encl->flags & SGX_ENCL_INITIALIZED)) {
 		pages[cnt] = sgx_get_backing(encl, &encl->secs_page);
@@ -336,9 +349,9 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 			sgx_put_backing(pages[cnt], true);
 		}
 	}
-
 	mutex_unlock(&encl->lock);
 
+out:
 	sgx_unpin_mm(encl);
 }