diff mbox

[intel-sgx-kernel-dev] intel_sgx: correctly handle vm_insert_pfn failure

Message ID 1490213241-8017-1-git-send-email-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sean Christopherson March 22, 2017, 8:07 p.m. UTC
Update the EPC page tracking immediately after sgx_eldu, and retry
vm_insert_pfn on a future fault if vm_insert_pfn fails.  Previously
we tried to EREMOVE the EPC page if vm_insert_pfn return an error,
but EREMOVE fails if there are active cpus in the enclave, in which
case the driver would effectively lose track of the EPC page.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 drivers/platform/x86/intel_sgx.h      |  1 +
 drivers/platform/x86/intel_sgx_util.c | 25 +++++++++++++++++--------
 2 files changed, 18 insertions(+), 8 deletions(-)

Comments

Jarkko Sakkinen March 23, 2017, 3:44 p.m. UTC | #1
On Wed, Mar 22, 2017 at 01:07:21PM -0700, Sean Christopherson wrote:
> Update the EPC page tracking immediately after sgx_eldu, and retry
> vm_insert_pfn on a future fault if vm_insert_pfn fails.  Previously
> we tried to EREMOVE the EPC page if vm_insert_pfn return an error,
> but EREMOVE fails if there are active cpus in the enclave, in which
> case the driver would effectively lose track of the EPC page.
> 
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>

I'll prioritize reviewing this patch it is a fix for a regression.


> ---
>  drivers/platform/x86/intel_sgx.h      |  1 +
>  drivers/platform/x86/intel_sgx_util.c | 25 +++++++++++++++++--------
>  2 files changed, 18 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/platform/x86/intel_sgx.h b/drivers/platform/x86/intel_sgx.h
> index adb5b17..8b14b1f 100644
> --- a/drivers/platform/x86/intel_sgx.h
> +++ b/drivers/platform/x86/intel_sgx.h
> @@ -106,6 +106,7 @@ static inline void sgx_free_va_slot(struct sgx_va_page *page,
>  enum sgx_encl_page_flags {
>  	SGX_ENCL_PAGE_TCS	= BIT(0),
>  	SGX_ENCL_PAGE_RESERVED	= BIT(1),
> +	SGX_ENCL_PAGE_PTE_VALID	= BIT(2),
>  };
>  
>  struct sgx_encl_page {
> diff --git a/drivers/platform/x86/intel_sgx_util.c b/drivers/platform/x86/intel_sgx_util.c
> index 234a5fb..096d33c 100644
> --- a/drivers/platform/x86/intel_sgx_util.c
> +++ b/drivers/platform/x86/intel_sgx_util.c
> @@ -340,6 +340,8 @@ static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
>  
>  	/* Legal race condition, page is already faulted. */
>  	if (entry->epc_page) {
> +		if (!(entry->flags & SGX_ENCL_PAGE_PTE_VALID))
> +			goto insert_pfn;

Please create a helper function sgx_fault_insert_pfn() and call it here.
It should do all the checks and always called. If the PTE is valid it
can just return zero.

/Jarkko
Jarkko Sakkinen March 31, 2017, 8:07 a.m. UTC | #2
On Thu, Mar 23, 2017 at 05:44:13PM +0200, Jarkko Sakkinen wrote:
> On Wed, Mar 22, 2017 at 01:07:21PM -0700, Sean Christopherson wrote:
> > Update the EPC page tracking immediately after sgx_eldu, and retry
> > vm_insert_pfn on a future fault if vm_insert_pfn fails.  Previously
> > we tried to EREMOVE the EPC page if vm_insert_pfn return an error,
> > but EREMOVE fails if there are active cpus in the enclave, in which
> > case the driver would effectively lose track of the EPC page.
> > 
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> 
> I'll prioritize reviewing this patch it is a fix for a regression.
> 
> 
> > ---
> >  drivers/platform/x86/intel_sgx.h      |  1 +
> >  drivers/platform/x86/intel_sgx_util.c | 25 +++++++++++++++++--------
> >  2 files changed, 18 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/platform/x86/intel_sgx.h b/drivers/platform/x86/intel_sgx.h
> > index adb5b17..8b14b1f 100644
> > --- a/drivers/platform/x86/intel_sgx.h
> > +++ b/drivers/platform/x86/intel_sgx.h
> > @@ -106,6 +106,7 @@ static inline void sgx_free_va_slot(struct sgx_va_page *page,
> >  enum sgx_encl_page_flags {
> >  	SGX_ENCL_PAGE_TCS	= BIT(0),
> >  	SGX_ENCL_PAGE_RESERVED	= BIT(1),
> > +	SGX_ENCL_PAGE_PTE_VALID	= BIT(2),
> >  };
> >  
> >  struct sgx_encl_page {
> > diff --git a/drivers/platform/x86/intel_sgx_util.c b/drivers/platform/x86/intel_sgx_util.c
> > index 234a5fb..096d33c 100644
> > --- a/drivers/platform/x86/intel_sgx_util.c
> > +++ b/drivers/platform/x86/intel_sgx_util.c
> > @@ -340,6 +340,8 @@ static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
> >  
> >  	/* Legal race condition, page is already faulted. */
> >  	if (entry->epc_page) {
> > +		if (!(entry->flags & SGX_ENCL_PAGE_PTE_VALID))
> > +			goto insert_pfn;
> 
> Please create a helper function sgx_fault_insert_pfn() and call it here.
> It should do all the checks and always called. If the PTE is valid it
> can just return zero.
> 
> /Jarkko

When are you going to revise this or are you going to revise it?

I'll start looking into other patches only after this has been merged.

/Jarkko
Sean Christopherson March 31, 2017, 2:34 p.m. UTC | #3
Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> On Thu, Mar 23, 2017 at 05:44:13PM +0200, Jarkko Sakkinen wrote:
> > On Wed, Mar 22, 2017 at 01:07:21PM -0700, Sean Christopherson wrote:
> > > Update the EPC page tracking immediately after sgx_eldu, and retry
> > > vm_insert_pfn on a future fault if vm_insert_pfn fails.  Previously
> > > we tried to EREMOVE the EPC page if vm_insert_pfn return an error,
> > > but EREMOVE fails if there are active cpus in the enclave, in which
> > > case the driver would effectively lose track of the EPC page.
> > > 
> > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > 
> > I'll prioritize reviewing this patch it is a fix for a regression.
> > 
> > 
> > > ---
> > >  drivers/platform/x86/intel_sgx.h      |  1 +
> > >  drivers/platform/x86/intel_sgx_util.c | 25 +++++++++++++++++--------
> > >  2 files changed, 18 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/drivers/platform/x86/intel_sgx.h
> > > b/drivers/platform/x86/intel_sgx.h index adb5b17..8b14b1f 100644
> > > --- a/drivers/platform/x86/intel_sgx.h
> > > +++ b/drivers/platform/x86/intel_sgx.h
> > > @@ -106,6 +106,7 @@ static inline void sgx_free_va_slot(struct
> > > sgx_va_page *page, enum sgx_encl_page_flags {
> > >  	SGX_ENCL_PAGE_TCS	= BIT(0),
> > >  	SGX_ENCL_PAGE_RESERVED	= BIT(1),
> > > +	SGX_ENCL_PAGE_PTE_VALID	= BIT(2),
> > >  };
> > >  
> > >  struct sgx_encl_page {
> > > diff --git a/drivers/platform/x86/intel_sgx_util.c
> > > b/drivers/platform/x86/intel_sgx_util.c index 234a5fb..096d33c 100644
> > > --- a/drivers/platform/x86/intel_sgx_util.c
> > > +++ b/drivers/platform/x86/intel_sgx_util.c
> > > @@ -340,6 +340,8 @@ static struct sgx_encl_page *sgx_do_fault(struct
> > > vm_area_struct *vma, 
> > >  	/* Legal race condition, page is already faulted. */
> > >  	if (entry->epc_page) {
> > > +		if (!(entry->flags & SGX_ENCL_PAGE_PTE_VALID))
> > > +			goto insert_pfn;
> > 
> > Please create a helper function sgx_fault_insert_pfn() and call it here.
> > It should do all the checks and always called. If the PTE is valid it
> > can just return zero.
> > 
> > /Jarkko
> 
> When are you going to revise this or are you going to revise it?
> 
> I'll start looking into other patches only after this has been merged.
> 
> /Jarkko

Yikes, sorry, I completely missed your previous comment.  I'll look at this ASAP.
Jarkko Sakkinen March 31, 2017, 4:44 p.m. UTC | #4
On Fri, Mar 31, 2017 at 02:34:29PM +0000, Christopherson, Sean J wrote:
> Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > On Thu, Mar 23, 2017 at 05:44:13PM +0200, Jarkko Sakkinen wrote:
> > > On Wed, Mar 22, 2017 at 01:07:21PM -0700, Sean Christopherson wrote:
> > > > Update the EPC page tracking immediately after sgx_eldu, and retry
> > > > vm_insert_pfn on a future fault if vm_insert_pfn fails.  Previously
> > > > we tried to EREMOVE the EPC page if vm_insert_pfn return an error,
> > > > but EREMOVE fails if there are active cpus in the enclave, in which
> > > > case the driver would effectively lose track of the EPC page.
> > > > 
> > > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > > 
> > > I'll prioritize reviewing this patch it is a fix for a regression.
> > > 
> > > 
> > > > ---
> > > >  drivers/platform/x86/intel_sgx.h      |  1 +
> > > >  drivers/platform/x86/intel_sgx_util.c | 25 +++++++++++++++++--------
> > > >  2 files changed, 18 insertions(+), 8 deletions(-)
> > > > 
> > > > diff --git a/drivers/platform/x86/intel_sgx.h
> > > > b/drivers/platform/x86/intel_sgx.h index adb5b17..8b14b1f 100644
> > > > --- a/drivers/platform/x86/intel_sgx.h
> > > > +++ b/drivers/platform/x86/intel_sgx.h
> > > > @@ -106,6 +106,7 @@ static inline void sgx_free_va_slot(struct
> > > > sgx_va_page *page, enum sgx_encl_page_flags {
> > > >  	SGX_ENCL_PAGE_TCS	= BIT(0),
> > > >  	SGX_ENCL_PAGE_RESERVED	= BIT(1),
> > > > +	SGX_ENCL_PAGE_PTE_VALID	= BIT(2),
> > > >  };
> > > >  
> > > >  struct sgx_encl_page {
> > > > diff --git a/drivers/platform/x86/intel_sgx_util.c
> > > > b/drivers/platform/x86/intel_sgx_util.c index 234a5fb..096d33c 100644
> > > > --- a/drivers/platform/x86/intel_sgx_util.c
> > > > +++ b/drivers/platform/x86/intel_sgx_util.c
> > > > @@ -340,6 +340,8 @@ static struct sgx_encl_page *sgx_do_fault(struct
> > > > vm_area_struct *vma, 
> > > >  	/* Legal race condition, page is already faulted. */
> > > >  	if (entry->epc_page) {
> > > > +		if (!(entry->flags & SGX_ENCL_PAGE_PTE_VALID))
> > > > +			goto insert_pfn;
> > > 
> > > Please create a helper function sgx_fault_insert_pfn() and call it here.
> > > It should do all the checks and always called. If the PTE is valid it
> > > can just return zero.
> > > 
> > > /Jarkko
> > 
> > When are you going to revise this or are you going to revise it?
> > 
> > I'll start looking into other patches only after this has been merged.
> > 
> > /Jarkko
> 
> Yikes, sorry, I completely missed your previous comment.  I'll look at this ASAP.

Thank you :-) No worries.

/Jarkko
diff mbox

Patch

diff --git a/drivers/platform/x86/intel_sgx.h b/drivers/platform/x86/intel_sgx.h
index adb5b17..8b14b1f 100644
--- a/drivers/platform/x86/intel_sgx.h
+++ b/drivers/platform/x86/intel_sgx.h
@@ -106,6 +106,7 @@  static inline void sgx_free_va_slot(struct sgx_va_page *page,
 enum sgx_encl_page_flags {
 	SGX_ENCL_PAGE_TCS	= BIT(0),
 	SGX_ENCL_PAGE_RESERVED	= BIT(1),
+	SGX_ENCL_PAGE_PTE_VALID	= BIT(2),
 };
 
 struct sgx_encl_page {
diff --git a/drivers/platform/x86/intel_sgx_util.c b/drivers/platform/x86/intel_sgx_util.c
index 234a5fb..096d33c 100644
--- a/drivers/platform/x86/intel_sgx_util.c
+++ b/drivers/platform/x86/intel_sgx_util.c
@@ -340,6 +340,8 @@  static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
 
 	/* Legal race condition, page is already faulted. */
 	if (entry->epc_page) {
+		if (!(entry->flags & SGX_ENCL_PAGE_PTE_VALID))
+			goto insert_pfn;
 		if (reserve)
 			entry->flags |= SGX_ENCL_PAGE_RESERVED;
 		goto out;
@@ -369,22 +371,29 @@  static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
 	if (rc)
 		goto out;
 
-	rc = vm_insert_pfn(vma, entry->addr, PFN_DOWN(epc_page->pa));
-	if (rc)
-		goto out;
-
+        /* Track the page, even if vm_insert_pfn fails.  We can't EREMOVE
+         * the page because EREMOVE may fail due to an active cpu in the
+         * enclave.  We can't call vm_insert_pfn before sgx_eldu because
+         * SKL platforms signal #GP instead of #PF if the EPC page is invalid.
+         */
 	encl->secs_child_cnt++;
 
 	entry->epc_page = epc_page;
-
-	if (reserve)
-		entry->flags |= SGX_ENCL_PAGE_RESERVED;
+	entry->flags &= ~SGX_ENCL_PAGE_PTE_VALID;
 
 	/* Do not free */
 	epc_page = NULL;
+	list_add_tail(&entry->load_list, &encl->load_list);
 
+insert_pfn:
+	rc = vm_insert_pfn(vma, entry->addr, PFN_DOWN(entry->epc_page->pa));
+	if (rc)
+		goto out;
+
+	entry->flags |= SGX_ENCL_PAGE_PTE_VALID;
+	if (reserve)
+		entry->flags |= SGX_ENCL_PAGE_RESERVED;
 	sgx_test_and_clear_young(entry, encl);
-	list_add_tail(&entry->load_list, &encl->load_list);
 out:
 	mutex_unlock(&encl->lock);
 	if (epc_page)