[intel-sgx-kernel-dev,v8,01/10] intel_sgx: fallback more gracefully from EWB failure
diff mbox

Message ID 20161208123828.21834-2-jarkko.sakkinen@linux.intel.com
State New
Headers show

Commit Message

Jarkko Sakkinen Dec. 8, 2016, 12:38 p.m. UTC
Fallback from EWB failure by killing the enclave by zeroing TCS PTEs
and kicking out threads instead of crashing the driver with BUG_ON().

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/platform/x86/intel_sgx_page_cache.c | 57 ++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 21 deletions(-)

Comments

Sean Christopherson Dec. 13, 2016, 7:24 p.m. UTC | #1
On Thu, 2016-12-08 at 14:38 +0200, Jarkko Sakkinen wrote:
> Fallback from EWB failure by killing the enclave by zeroing TCS PTEs
> and kicking out threads instead of crashing the driver with BUG_ON().
> 
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> ---
>  drivers/platform/x86/intel_sgx_page_cache.c | 57 ++++++++++++++++++-----------
>  1 file changed, 36 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c
> index 8b1cc82..0f63060 100644
> --- a/drivers/platform/x86/intel_sgx_page_cache.c
> +++ b/drivers/platform/x86/intel_sgx_page_cache.c
> @@ -195,9 +195,9 @@ static void sgx_etrack(struct sgx_epc_page *epc_page)
>  	sgx_put_epc_page(epc);
>  }
>  
> -static int sgx_ewb(struct sgx_encl *encl,
> -		   struct sgx_encl_page *encl_page,
> -		   struct page *backing)
> +static int __sgx_ewb(struct sgx_encl *encl,
> +		     struct sgx_encl_page *encl_page,
> +		     struct page *backing)
>  {
>  	struct sgx_page_info pginfo;
>  	void *epc;
> @@ -218,12 +218,31 @@ static int sgx_ewb(struct sgx_encl *encl,
>  	sgx_put_epc_page(epc);
>  	kunmap_atomic((void *)(unsigned long)pginfo.srcpge);
>  
> -	if (ret != 0 && ret != SGX_NOT_TRACKED)
> -		sgx_err(encl, "EWB returned %d\n", ret);
> -
>  	return ret;
>  }
>  
> +static bool sgx_ewb(struct sgx_encl *encl,
> +		    struct sgx_encl_page *entry,
> +		    struct page *backing)
> +{
> +	int ret = __sgx_ewb(encl, entry, backing);
> +
> +	/* Only kick out threads with an IPI if needed. */
> +	if (ret == SGX_NOT_TRACKED) {
> +		smp_call_function(sgx_ipi_cb, NULL, 1);
> +		ret = __sgx_ewb(encl, entry, backing);
> +	}
> +
> +	if (ret) {
> +		/* Make enclave inaccessible. */
> +		sgx_invalidate(encl);
> +		smp_call_function(sgx_ipi_cb, NULL, 1);
> +		return false;
> +	}
> +
> +	return true;
> +}
> +

If __sgx_ewb() fails after sending an IPI, shouldn't we display a kernel warning, unload the driver and prevent
reloading the driver until the system is rebooted?  A failing EWB after a system wide IPI would indicate a hardware or
kernel bug; in either case, the user/admin should be alerted and SGX should be effectively disabled.  Such an abort flow
could also be used to replace the other BUG/BUG_ON calls.
Jarkko Sakkinen Dec. 14, 2016, 1:49 p.m. UTC | #2
On Tue, Dec 13, 2016 at 11:24:08AM -0800, Sean Christopherson wrote:
> On Thu, 2016-12-08 at 14:38 +0200, Jarkko Sakkinen wrote:
> > Fallback from EWB failure by killing the enclave by zeroing TCS PTEs
> > and kicking out threads instead of crashing the driver with BUG_ON().
> > 
> > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> > ---
> >  drivers/platform/x86/intel_sgx_page_cache.c | 57 ++++++++++++++++++-----------
> >  1 file changed, 36 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c
> > index 8b1cc82..0f63060 100644
> > --- a/drivers/platform/x86/intel_sgx_page_cache.c
> > +++ b/drivers/platform/x86/intel_sgx_page_cache.c
> > @@ -195,9 +195,9 @@ static void sgx_etrack(struct sgx_epc_page *epc_page)
> >  	sgx_put_epc_page(epc);
> >  }
> >  
> > -static int sgx_ewb(struct sgx_encl *encl,
> > -		   struct sgx_encl_page *encl_page,
> > -		   struct page *backing)
> > +static int __sgx_ewb(struct sgx_encl *encl,
> > +		     struct sgx_encl_page *encl_page,
> > +		     struct page *backing)
> >  {
> >  	struct sgx_page_info pginfo;
> >  	void *epc;
> > @@ -218,12 +218,31 @@ static int sgx_ewb(struct sgx_encl *encl,
> >  	sgx_put_epc_page(epc);
> >  	kunmap_atomic((void *)(unsigned long)pginfo.srcpge);
> >  
> > -	if (ret != 0 && ret != SGX_NOT_TRACKED)
> > -		sgx_err(encl, "EWB returned %d\n", ret);
> > -
> >  	return ret;
> >  }
> >  
> > +static bool sgx_ewb(struct sgx_encl *encl,
> > +		    struct sgx_encl_page *entry,
> > +		    struct page *backing)
> > +{
> > +	int ret = __sgx_ewb(encl, entry, backing);
> > +
> > +	/* Only kick out threads with an IPI if needed. */
> > +	if (ret == SGX_NOT_TRACKED) {
> > +		smp_call_function(sgx_ipi_cb, NULL, 1);
> > +		ret = __sgx_ewb(encl, entry, backing);
> > +	}
> > +
> > +	if (ret) {
> > +		/* Make enclave inaccessible. */
> > +		sgx_invalidate(encl);
> > +		smp_call_function(sgx_ipi_cb, NULL, 1);
> > +		return false;
> > +	}
> > +
> > +	return true;
> > +}
> > +
> 
> If __sgx_ewb() fails after sending an IPI, shouldn't we display a
> kernel warning, unload the driver and prevent reloading the driver
> until the system is rebooted?  A failing EWB after a system wide IPI
> would indicate a hardware or kernel bug; in either case, the

Showing an error message makes sense. Other actions are up to sysadmin.

> user/admin should be alerted and SGX should be effectively disabled.
>  Such an abort flow could also be used to replace the other BUG/BUG_ON
> calls.

The situations are very different in terms of context. Aborting is
not a feature.

/Jarkko

Patch
diff mbox

diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c
index 8b1cc82..0f63060 100644
--- a/drivers/platform/x86/intel_sgx_page_cache.c
+++ b/drivers/platform/x86/intel_sgx_page_cache.c
@@ -195,9 +195,9 @@  static void sgx_etrack(struct sgx_epc_page *epc_page)
 	sgx_put_epc_page(epc);
 }
 
-static int sgx_ewb(struct sgx_encl *encl,
-		   struct sgx_encl_page *encl_page,
-		   struct page *backing)
+static int __sgx_ewb(struct sgx_encl *encl,
+		     struct sgx_encl_page *encl_page,
+		     struct page *backing)
 {
 	struct sgx_page_info pginfo;
 	void *epc;
@@ -218,12 +218,31 @@  static int sgx_ewb(struct sgx_encl *encl,
 	sgx_put_epc_page(epc);
 	kunmap_atomic((void *)(unsigned long)pginfo.srcpge);
 
-	if (ret != 0 && ret != SGX_NOT_TRACKED)
-		sgx_err(encl, "EWB returned %d\n", ret);
-
 	return ret;
 }
 
+static bool sgx_ewb(struct sgx_encl *encl,
+		    struct sgx_encl_page *entry,
+		    struct page *backing)
+{
+	int ret = __sgx_ewb(encl, entry, backing);
+
+	/* Only kick out threads with an IPI if needed. */
+	if (ret == SGX_NOT_TRACKED) {
+		smp_call_function(sgx_ipi_cb, NULL, 1);
+		ret = __sgx_ewb(encl, entry, backing);
+	}
+
+	if (ret) {
+		/* Make enclave inaccessible. */
+		sgx_invalidate(encl);
+		smp_call_function(sgx_ipi_cb, NULL, 1);
+		return false;
+	}
+
+	return true;
+}
+
 void sgx_free_encl_page(struct sgx_encl_page *entry,
 		    struct sgx_encl *encl,
 		    unsigned int flags)
@@ -239,9 +258,9 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 	struct sgx_encl_page *tmp;
 	struct page *pages[SGX_NR_SWAP_CLUSTER_MAX + 1];
 	struct vm_area_struct *evma;
+	unsigned int free_flags;
 	int cnt = 0;
 	int i = 0;
-	int ret;
 
 	if (list_empty(src))
 		return;
@@ -304,20 +323,16 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 					 load_list);
 		list_del(&entry->load_list);
 
+		free_flags = 0;
+
 		evma = sgx_find_vma(encl, entry->addr);
 		if (evma) {
-			ret = sgx_ewb(encl, entry, pages[i]);
-			BUG_ON(ret != 0 && ret != SGX_NOT_TRACKED);
-			/* Only kick out threads with an IPI if needed. */
-			if (ret) {
-				smp_call_function(sgx_ipi_cb, NULL, 1);
-				BUG_ON(sgx_ewb(encl, entry, pages[i]));
-			}
+			if (sgx_ewb(encl, entry, pages[i]))
+				free_flags = SGX_FREE_SKIP_EREMOVE;
 			encl->secs_child_cnt--;
 		}
 
-		sgx_free_encl_page(entry, encl,
-				   evma ? SGX_FREE_SKIP_EREMOVE : 0);
+		sgx_free_encl_page(entry, encl, free_flags);
 		sgx_put_backing(pages[i++], evma);
 	}
 
@@ -326,13 +341,13 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 	    (encl->flags & SGX_ENCL_INITIALIZED)) {
 		pages[cnt] = sgx_get_backing(encl, &encl->secs_page);
 		if (!IS_ERR(pages[cnt])) {
-			ret = sgx_ewb(encl, &encl->secs_page,
-				      pages[cnt]);
-			BUG_ON(ret);
+			free_flags = 0;
+			if (sgx_ewb(encl, &encl->secs_page, pages[cnt]))
+				free_flags = SGX_FREE_SKIP_EREMOVE;
+
 			encl->flags |= SGX_ENCL_SECS_EVICTED;
 
-			sgx_free_encl_page(&encl->secs_page, encl,
-					      SGX_FREE_SKIP_EREMOVE);
+			sgx_free_encl_page(&encl->secs_page, encl, free_flags);
 			sgx_put_backing(pages[cnt], true);
 		}
 	}