[intel-sgx-kernel-dev,v8,06/10] intel_sgx: kill the enclave when any of its VMAs are closed
diff mbox

Message ID 20161208123828.21834-7-jarkko.sakkinen@linux.intel.com
State New
Headers show

Commit Message

Jarkko Sakkinen Dec. 8, 2016, 12:38 p.m. UTC
ELRANGE protects from non-enclave access but still closing any of the
VMAs leaves the enclave and remaining VMAs to a state that is not wanted
for any legit application using enclaves. Therefore it is in this
situation advisable to kill the whole enclave.

One could also consider hardening the vma_open callback in a way that it
would also kill the enclave if any new VMAs areis created after the
first EADD or in the corner case after the first EINIT (if enclave does
only have the SECS page).

This makes sense because you could enforce that EPCM entries and PTEs
stay in sync, which is a good property. However, most of the time you
want to implement a relocator for the user space that runs inside the
enclave because the ELRANGE position varies based on which address
mmap() gives. Since the enclave code is signed and measured the
relocation can only happen after EINIT.

That's why we will not do such enforcement. The ramifications of this
are not too bad. In the worst case you could end up crashing your user
space process in a safe way (#GP due to EPCM/PTE mismatch).

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/platform/x86/intel_sgx.h       |  2 +-
 drivers/platform/x86/intel_sgx_ioctl.c | 10 +++++---
 drivers/platform/x86/intel_sgx_util.c  | 13 ++++------
 drivers/platform/x86/intel_sgx_vma.c   | 45 +++++++++++++++-------------------
 4 files changed, 32 insertions(+), 38 deletions(-)

Comments

Jarkko Sakkinen Dec. 8, 2016, 3:10 p.m. UTC | #1
On Thu, Dec 08, 2016 at 02:38:24PM +0200, Jarkko Sakkinen wrote:
> ELRANGE protects from non-enclave access but still closing any of the
> VMAs leaves the enclave and remaining VMAs to a state that is not wanted
> for any legit application using enclaves. Therefore it is in this
> situation advisable to kill the whole enclave.
> 
> One could also consider hardening the vma_open callback in a way that it
> would also kill the enclave if any new VMAs areis created after the
> first EADD or in the corner case after the first EINIT (if enclave does
> only have the SECS page).
> 
> This makes sense because you could enforce that EPCM entries and PTEs
> stay in sync, which is a good property. However, most of the time you
> want to implement a relocator for the user space that runs inside the
> enclave because the ELRANGE position varies based on which address
> mmap() gives. Since the enclave code is signed and measured the
> relocation can only happen after EINIT.
> 
> That's why we will not do such enforcement. The ramifications of this
> are not too bad. In the worst case you could end up crashing your user
> space process in a safe way (#GP due to EPCM/PTE mismatch).
> 
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> ---
>  drivers/platform/x86/intel_sgx.h       |  2 +-
>  drivers/platform/x86/intel_sgx_ioctl.c | 10 +++++---
>  drivers/platform/x86/intel_sgx_util.c  | 13 ++++------
>  drivers/platform/x86/intel_sgx_vma.c   | 45 +++++++++++++++-------------------
>  4 files changed, 32 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/platform/x86/intel_sgx.h b/drivers/platform/x86/intel_sgx.h
> index 464763d..018cd03 100644
> --- a/drivers/platform/x86/intel_sgx.h
> +++ b/drivers/platform/x86/intel_sgx.h
> @@ -130,12 +130,12 @@ enum sgx_encl_flags {
>  	SGX_ENCL_DEBUG		= BIT(1),
>  	SGX_ENCL_SECS_EVICTED	= BIT(2),
>  	SGX_ENCL_SUSPEND	= BIT(3),
> +	SGX_ENCL_DEAD		= BIT(4),
>  };
>  
>  struct sgx_encl {
>  	unsigned int flags;
>  	unsigned int secs_child_cnt;
> -	unsigned int vma_cnt;
>  	struct mutex lock;
>  	struct task_struct *owner;
>  	struct mm_struct *mm;
> diff --git a/drivers/platform/x86/intel_sgx_ioctl.c b/drivers/platform/x86/intel_sgx_ioctl.c
> index b377200..d0113b0 100644
> --- a/drivers/platform/x86/intel_sgx_ioctl.c
> +++ b/drivers/platform/x86/intel_sgx_ioctl.c
> @@ -255,7 +255,10 @@ static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
>  
>  	mutex_lock(&encl->lock);
>  
> -	if (!encl->vma_cnt || sgx_find_encl(encl->mm, encl_page->addr, &vma))
> +	if (encl->flags & SGX_ENCL_DEAD)
> +		goto out;
> +
> +	if (sgx_find_encl(encl->mm, encl_page->addr, &vma))
>  		goto out;
>  
>  	backing = sgx_get_backing(encl, encl_page);
> @@ -317,7 +320,7 @@ static void sgx_add_page_worker(struct work_struct *work)
>  	do {
>  		schedule();
>  
> -		if (encl->flags & SGX_ENCL_SUSPEND)
> +		if (encl->flags & SGX_ENCL_DEAD)
>  			skip_rest = true;
>  
>  		mutex_lock(&encl->lock);
> @@ -578,7 +581,6 @@ static long sgx_ioc_enclave_create(struct file *filep, unsigned int cmd,
>  		up_read(&current->mm->mmap_sem);
>  		goto out;
>  	}
> -	encl->vma_cnt++;
>  	vma->vm_private_data = encl;
>  	up_read(&current->mm->mmap_sem);
>  
> @@ -682,7 +684,7 @@ static int __encl_add_page(struct sgx_encl *encl,
>  
>  	mutex_lock(&encl->lock);
>  
> -	if (encl->flags & SGX_ENCL_INITIALIZED) {
> +	if (encl->flags & (SGX_ENCL_INITIALIZED | SGX_ENCL_DEAD)) {
>  		ret = -EINVAL;
>  		goto out;
>  	}
> diff --git a/drivers/platform/x86/intel_sgx_util.c b/drivers/platform/x86/intel_sgx_util.c
> index 3878d9a..d5238c2 100644
> --- a/drivers/platform/x86/intel_sgx_util.c
> +++ b/drivers/platform/x86/intel_sgx_util.c
> @@ -135,21 +135,18 @@ void sgx_zap_tcs_ptes(struct sgx_encl *encl, struct vm_area_struct *vma)
>  
>  bool sgx_pin_mm(struct sgx_encl *encl)
>  {
> -	if (encl->flags & SGX_ENCL_SUSPEND)
> -		return false;
> -
>  	mutex_lock(&encl->lock);
> -	if (encl->vma_cnt) {
> -		atomic_inc(&encl->mm->mm_count);
> -	} else {
> +	if (encl->flags & SGX_ENCL_DEAD) {
>  		mutex_unlock(&encl->lock);
>  		return false;
>  	}
> +
> +	atomic_inc(&encl->mm->mm_count);
>  	mutex_unlock(&encl->lock);
>  
>  	down_read(&encl->mm->mmap_sem);
>  
> -	if (!encl->vma_cnt) {
> +	if (encl->flags & SGX_ENCL_DEAD) {
>  		sgx_unpin_mm(encl);
>  		return false;
>  	}
> @@ -177,7 +174,7 @@ void sgx_invalidate(struct sgx_encl *encl)
>  			break;
>  	}
>  
> -	encl->vma_cnt = 0;
> +	encl->flags |= SGX_ENCL_DEAD;
>  }
>  
>  int sgx_find_encl(struct mm_struct *mm, unsigned long addr,
> diff --git a/drivers/platform/x86/intel_sgx_vma.c b/drivers/platform/x86/intel_sgx_vma.c
> index 0649978..f273282 100644
> --- a/drivers/platform/x86/intel_sgx_vma.c
> +++ b/drivers/platform/x86/intel_sgx_vma.c
> @@ -70,48 +70,43 @@
>  
>  static void sgx_vma_open(struct vm_area_struct *vma)
>  {
> -	struct sgx_encl *encl;
> +	struct sgx_encl *encl = vma->vm_private_data;
>  
> -	/* Was vm_private_data nullified as a result of the previous fork? */
> -	encl = vma->vm_private_data;
> -	if (!encl)
> -		goto out_fork;
> +	/* When forking for the second time vm_private_data is already set to
> +	 * NULL.
> +	 */
> +	if (!encl) {
> +		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> +		return;
> +	}
>  
> -	/* Was the process forked? mm_struct changes when the process is
> -	 * forked.
> +	/* Invalidate enclave when the process has been forked for the first
> +	 * time or pages have been added because PTEs must bee in sync with
> +	 * the EPCM entries.
>  	 */
>  	mutex_lock(&encl->lock);
>  	if (encl->mm != vma->vm_mm) {
> -		mutex_unlock(&encl->lock);
> -		goto out_fork;
> +		sgx_invalidate(encl);
> +		vma->vm_private_data = NULL;
>  	}
> -	encl->vma_cnt++;
>  	mutex_unlock(&encl->lock);
>  
> -	kref_get(&encl->refcount);
> -	return;
> -out_fork:
> -	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> -	vma->vm_private_data = NULL;
> +	if (vma->vm_private_data)
> +		kref_get(&encl->refcount);

For the sake of consistency I will update this for the final patch:

if (encl->flags & SGX_ENCL_DEAD)
	kref_get(&encl->refcount);

/Jarkko
Sean Christopherson Dec. 12, 2016, 6:08 p.m. UTC | #2
On Thu, 2016-12-08 at 17:10 +0200, Jarkko Sakkinen wrote:
> On Thu, Dec 08, 2016 at 02:38:24PM +0200, Jarkko Sakkinen wrote:
> > diff --git a/drivers/platform/x86/intel_sgx_vma.c b/drivers/platform/x86/intel_sgx_vma.c
> > index 0649978..f273282 100644
> > --- a/drivers/platform/x86/intel_sgx_vma.c
> > +++ b/drivers/platform/x86/intel_sgx_vma.c
> > @@ -70,48 +70,43 @@
> >  
> >  static void sgx_vma_open(struct vm_area_struct *vma)
> >  {
> > -	struct sgx_encl *encl;
> > +	struct sgx_encl *encl = vma->vm_private_data;
> >  
> > -	/* Was vm_private_data nullified as a result of the previous fork? */
> > -	encl = vma->vm_private_data;
> > -	if (!encl)
> > -		goto out_fork;
> > +	/* When forking for the second time vm_private_data is already set to
> > +	 * NULL.
> > +	 */
> > +	if (!encl) {
> > +		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > +		return;
> > +	}
> >  
> > -	/* Was the process forked? mm_struct changes when the process is
> > -	 * forked.
> > +	/* Invalidate enclave when the process has been forked for the first
> > +	 * time or pages have been added because PTEs must bee in sync with
> > +	 * the EPCM entries.
> >  	 */
> >  	mutex_lock(&encl->lock);
> >  	if (encl->mm != vma->vm_mm) {
> > -		mutex_unlock(&encl->lock);
> > -		goto out_fork;
> > +		sgx_invalidate(encl);
> > +		vma->vm_private_data = NULL;
> >  	}
> > -	encl->vma_cnt++;
> >  	mutex_unlock(&encl->lock);
> >  
> > -	kref_get(&encl->refcount);
> > -	return;
> > -out_fork:
> > -	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > -	vma->vm_private_data = NULL;
> > +	if (vma->vm_private_data)
> > +		kref_get(&encl->refcount);
> For the sake of consistency I will update this for the final patch:
> 
> if (encl->flags & SGX_ENCL_DEAD)
> 	kref_get(&encl->refcount);

I assume you mean "if (!(encl->flags & SGX_ENCL_DEAD))"?
Sean Christopherson Dec. 12, 2016, 6:50 p.m. UTC | #3
On Thu, 2016-12-08 at 14:38 +0200, Jarkko Sakkinen wrote:
>  static void sgx_vma_close(struct vm_area_struct *vma)
>  {
>  	struct sgx_encl *encl = vma->vm_private_data;
>  
> -	/* If process was forked, VMA is still there but
> -	 * vm_private_data is set to NULL.
> +	/* When forking for the second time vm_private_data is already set to
> +	 * NULL.
>  	 */
>  	if (!encl)
>  		return;
>  
>  	mutex_lock(&encl->lock);
> -	encl->vma_cnt--;
> -	vma->vm_private_data = NULL;
> -
> -	sgx_zap_tcs_ptes(encl, vma);
> -	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> -
> +	sgx_invalidate(encl);
>  	mutex_unlock(&encl->lock);
>  
>  	kref_put(&encl->refcount, sgx_encl_release);


On Fri, 2016-12-02 at 19:49 +0000, Christopherson, Sean J wrote:
> Invoking sgx_invalidate in sgx_vma_close causes NULL pointer faults in find_vma when sgx_vma_close is called via
> do_exit->mmput->exit_mmap->remove_vma.  The exit_mmap flow doesn't dismantle mm->mm_rb, it simply nukes the vma
> structs, i.e. calling find_vma via sgx_vma_close results in walking mm->mm_rb when it contains one or more invalid
> nodes.

Calling sgx_invalidate in sgx_vma_close triggers the exact same null pointer fault that I described in a previous email.
 Referencing the mm is flat out illegal in the vma close callback.



On Wed, 2016-12-07 at 10:31 +0200, Jarkko Sakkinen wrote:
> I'll change this back to using sgx_invalidate() as I had in earlier
> patch version. It was a wrong choice to convert it to this. If the
> enclave is killed, we want to first destroy TCS pages to prevent new
> hardware threads entering and we want to do it for *all* VMAs when the
> enclave becomes get invalidated.

> We do not want to start destroying regular pages before all new HW
> threads have been blocked. This is just more senseful and stable way
> to rollback.

Regarding your previous email that stated your reasoning behind using sgx_invalidate, I do not think that allowing VMAs
and/or hardware threads to linger in the enclave will negatively affect performance.  In normal operation, sgx_vma_close
will either be (a) invoked in quick succession for all VMAs, e.g. do_exit, or (b) invoked only after all hardware
threads have exited the enclave, e.g. application exits naturally.
Jarkko Sakkinen Dec. 13, 2016, 7:51 a.m. UTC | #4
On Mon, Dec 12, 2016 at 10:08:42AM -0800, Sean Christopherson wrote:
> On Thu, 2016-12-08 at 17:10 +0200, Jarkko Sakkinen wrote:
> > On Thu, Dec 08, 2016 at 02:38:24PM +0200, Jarkko Sakkinen wrote:
> > > diff --git a/drivers/platform/x86/intel_sgx_vma.c b/drivers/platform/x86/intel_sgx_vma.c
> > > index 0649978..f273282 100644
> > > --- a/drivers/platform/x86/intel_sgx_vma.c
> > > +++ b/drivers/platform/x86/intel_sgx_vma.c
> > > @@ -70,48 +70,43 @@
> > >  
> > >  static void sgx_vma_open(struct vm_area_struct *vma)
> > >  {
> > > -	struct sgx_encl *encl;
> > > +	struct sgx_encl *encl = vma->vm_private_data;
> > >  
> > > -	/* Was vm_private_data nullified as a result of the previous fork? */
> > > -	encl = vma->vm_private_data;
> > > -	if (!encl)
> > > -		goto out_fork;
> > > +	/* When forking for the second time vm_private_data is already set to
> > > +	 * NULL.
> > > +	 */
> > > +	if (!encl) {
> > > +		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > > +		return;
> > > +	}
> > >  
> > > -	/* Was the process forked? mm_struct changes when the process is
> > > -	 * forked.
> > > +	/* Invalidate enclave when the process has been forked for the first
> > > +	 * time or pages have been added because PTEs must bee in sync with
> > > +	 * the EPCM entries.
> > >  	 */
> > >  	mutex_lock(&encl->lock);
> > >  	if (encl->mm != vma->vm_mm) {
> > > -		mutex_unlock(&encl->lock);
> > > -		goto out_fork;
> > > +		sgx_invalidate(encl);
> > > +		vma->vm_private_data = NULL;
> > >  	}
> > > -	encl->vma_cnt++;
> > >  	mutex_unlock(&encl->lock);
> > >  
> > > -	kref_get(&encl->refcount);
> > > -	return;
> > > -out_fork:
> > > -	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > > -	vma->vm_private_data = NULL;
> > > +	if (vma->vm_private_data)
> > > +		kref_get(&encl->refcount);
> > For the sake of consistency I will update this for the final patch:
> > 
> > if (encl->flags & SGX_ENCL_DEAD)
> > 	kref_get(&encl->refcount);
> 
> I assume you mean "if (!(encl->flags & SGX_ENCL_DEAD))"?

Yeah, thanks :) Have you tested these patches? I cannot apply them
to my master branch before they have Reviewed-by and Tested-by.

/Jarkko
Jarkko Sakkinen Dec. 13, 2016, 1:48 p.m. UTC | #5
On Tue, Dec 13, 2016 at 09:51:30AM +0200, Jarkko Sakkinen wrote:
> On Mon, Dec 12, 2016 at 10:08:42AM -0800, Sean Christopherson wrote:
> > On Thu, 2016-12-08 at 17:10 +0200, Jarkko Sakkinen wrote:
> > > On Thu, Dec 08, 2016 at 02:38:24PM +0200, Jarkko Sakkinen wrote:
> > > > diff --git a/drivers/platform/x86/intel_sgx_vma.c b/drivers/platform/x86/intel_sgx_vma.c
> > > > index 0649978..f273282 100644
> > > > --- a/drivers/platform/x86/intel_sgx_vma.c
> > > > +++ b/drivers/platform/x86/intel_sgx_vma.c
> > > > @@ -70,48 +70,43 @@
> > > >  
> > > >  static void sgx_vma_open(struct vm_area_struct *vma)
> > > >  {
> > > > -	struct sgx_encl *encl;
> > > > +	struct sgx_encl *encl = vma->vm_private_data;
> > > >  
> > > > -	/* Was vm_private_data nullified as a result of the previous fork? */
> > > > -	encl = vma->vm_private_data;
> > > > -	if (!encl)
> > > > -		goto out_fork;
> > > > +	/* When forking for the second time vm_private_data is already set to
> > > > +	 * NULL.
> > > > +	 */
> > > > +	if (!encl) {
> > > > +		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > > > +		return;
> > > > +	}
> > > >  
> > > > -	/* Was the process forked? mm_struct changes when the process is
> > > > -	 * forked.
> > > > +	/* Invalidate enclave when the process has been forked for the first
> > > > +	 * time or pages have been added because PTEs must bee in sync with
> > > > +	 * the EPCM entries.
> > > >  	 */
> > > >  	mutex_lock(&encl->lock);
> > > >  	if (encl->mm != vma->vm_mm) {
> > > > -		mutex_unlock(&encl->lock);
> > > > -		goto out_fork;
> > > > +		sgx_invalidate(encl);
> > > > +		vma->vm_private_data = NULL;
> > > >  	}
> > > > -	encl->vma_cnt++;
> > > >  	mutex_unlock(&encl->lock);
> > > >  
> > > > -	kref_get(&encl->refcount);
> > > > -	return;
> > > > -out_fork:
> > > > -	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > > > -	vma->vm_private_data = NULL;
> > > > +	if (vma->vm_private_data)
> > > > +		kref_get(&encl->refcount);
> > > For the sake of consistency I will update this for the final patch:
> > > 
> > > if (encl->flags & SGX_ENCL_DEAD)
> > > 	kref_get(&encl->refcount);
> > 
> > I assume you mean "if (!(encl->flags & SGX_ENCL_DEAD))"?
> 
> Yeah, thanks :) Have you tested these patches? I cannot apply them
> to my master branch before they have Reviewed-by and Tested-by.

Not going to make this change though. Would be a bad idea or actually
would cause regression (put/get sync). It's only for fork case. I.e.
I'll keep it in the same way as in the patch set.

/Jarkko
Jarkko Sakkinen Dec. 13, 2016, 1:55 p.m. UTC | #6
On Mon, Dec 12, 2016 at 10:50:10AM -0800, Sean Christopherson wrote:
> On Thu, 2016-12-08 at 14:38 +0200, Jarkko Sakkinen wrote:
> >  static void sgx_vma_close(struct vm_area_struct *vma)
> >  {
> >  	struct sgx_encl *encl = vma->vm_private_data;
> >  
> > -	/* If process was forked, VMA is still there but
> > -	 * vm_private_data is set to NULL.
> > +	/* When forking for the second time vm_private_data is already set to
> > +	 * NULL.
> >  	 */
> >  	if (!encl)
> >  		return;
> >  
> >  	mutex_lock(&encl->lock);
> > -	encl->vma_cnt--;
> > -	vma->vm_private_data = NULL;
> > -
> > -	sgx_zap_tcs_ptes(encl, vma);
> > -	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > -
> > +	sgx_invalidate(encl);
> >  	mutex_unlock(&encl->lock);
> >  
> >  	kref_put(&encl->refcount, sgx_encl_release);
> 
> 
> On Fri, 2016-12-02 at 19:49 +0000, Christopherson, Sean J wrote:
> > Invoking sgx_invalidate in sgx_vma_close causes NULL pointer faults in find_vma when sgx_vma_close is called via
> > do_exit->mmput->exit_mmap->remove_vma.  The exit_mmap flow doesn't dismantle mm->mm_rb, it simply nukes the vma
> > structs, i.e. calling find_vma via sgx_vma_close results in walking mm->mm_rb when it contains one or more invalid
> > nodes.
> 
> Calling sgx_invalidate in sgx_vma_close triggers the exact same null
> pointer fault that I described in a previous email.   Referencing the
> mm is flat out illegal in the vma close callback.

Hmm very valid point.

I think if I could should zap TCS pages from the VMA being closed.

> On Wed, 2016-12-07 at 10:31 +0200, Jarkko Sakkinen wrote:
> > I'll change this back to using sgx_invalidate() as I had in earlier
> > patch version. It was a wrong choice to convert it to this. If the
> > enclave is killed, we want to first destroy TCS pages to prevent new
> > hardware threads entering and we want to do it for *all* VMAs when the
> > enclave becomes get invalidated.
> > 
> > We do not want to start destroying regular pages before all new HW
> > threads have been blocked. This is just more senseful and stable way
> > to rollback.
> 
> Regarding your previous email that stated your reasoning behind using
> sgx_invalidate, I do not think that allowing VMAs and/or hardware
> threads to linger in the enclave will negatively affect performance.
>  In normal operation, sgx_vma_close will either be (a) invoked in
> quick succession for all VMAs, e.g. do_exit, or (b) invoked only after
> all hardware threads have exited the enclave, e.g. application exits
> naturally.

This commit is not supposed to be a performance optimization.

/Jarkko
Sean Christopherson Dec. 13, 2016, 4:01 p.m. UTC | #7
On Tue, 2016-12-13 at 15:55 +0200, Jarkko Sakkinen wrote:
> On Mon, Dec 12, 2016 at 10:50:10AM -0800, Sean Christopherson wrote:
> > 
> > On Thu, 2016-12-08 at 14:38 +0200, Jarkko Sakkinen wrote:
> > > 
> > >  static void sgx_vma_close(struct vm_area_struct *vma)
> > >  {
> > >  	struct sgx_encl *encl = vma->vm_private_data;
> > >  
> > > -	/* If process was forked, VMA is still there but
> > > -	 * vm_private_data is set to NULL.
> > > +	/* When forking for the second time vm_private_data is already set to
> > > +	 * NULL.
> > >  	 */
> > >  	if (!encl)
> > >  		return;
> > >  
> > >  	mutex_lock(&encl->lock);
> > > -	encl->vma_cnt--;
> > > -	vma->vm_private_data = NULL;
> > > -
> > > -	sgx_zap_tcs_ptes(encl, vma);
> > > -	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > > -
> > > +	sgx_invalidate(encl);
> > >  	mutex_unlock(&encl->lock);
> > >  
> > >  	kref_put(&encl->refcount, sgx_encl_release);
> > 
> > On Fri, 2016-12-02 at 19:49 +0000, Christopherson, Sean J wrote:
> > > 
> > > Invoking sgx_invalidate in sgx_vma_close causes NULL pointer faults in find_vma when sgx_vma_close is called via
> > > do_exit->mmput->exit_mmap->remove_vma.  The exit_mmap flow doesn't dismantle mm->mm_rb, it simply nukes the vma
> > > structs, i.e. calling find_vma via sgx_vma_close results in walking mm->mm_rb when it contains one or more invalid
> > > nodes.
> > Calling sgx_invalidate in sgx_vma_close triggers the exact same null
> > pointer fault that I described in a previous email.   Referencing the
> > mm is flat out illegal in the vma close callback.
> Hmm very valid point.
> 
> I think if I could should zap TCS pages from the VMA being closed.

What's the motivation behind explicitly zapping TCS pages in this scenario?  I can't think of any functional issues with
lazily zapping all PTEs in the VMA.
Jarkko Sakkinen Dec. 13, 2016, 6:14 p.m. UTC | #8
On Tue, Dec 13, 2016 at 08:01:31AM -0800, Sean Christopherson wrote:
> On Tue, 2016-12-13 at 15:55 +0200, Jarkko Sakkinen wrote:
> > On Mon, Dec 12, 2016 at 10:50:10AM -0800, Sean Christopherson wrote:
> > > 
> > > On Thu, 2016-12-08 at 14:38 +0200, Jarkko Sakkinen wrote:
> > > > 
> > > >  static void sgx_vma_close(struct vm_area_struct *vma)
> > > >  {
> > > >  	struct sgx_encl *encl = vma->vm_private_data;
> > > >  
> > > > -	/* If process was forked, VMA is still there but
> > > > -	 * vm_private_data is set to NULL.
> > > > +	/* When forking for the second time vm_private_data is already set to
> > > > +	 * NULL.
> > > >  	 */
> > > >  	if (!encl)
> > > >  		return;
> > > >  
> > > >  	mutex_lock(&encl->lock);
> > > > -	encl->vma_cnt--;
> > > > -	vma->vm_private_data = NULL;
> > > > -
> > > > -	sgx_zap_tcs_ptes(encl, vma);
> > > > -	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > > > -
> > > > +	sgx_invalidate(encl);
> > > >  	mutex_unlock(&encl->lock);
> > > >  
> > > >  	kref_put(&encl->refcount, sgx_encl_release);
> > > 
> > > On Fri, 2016-12-02 at 19:49 +0000, Christopherson, Sean J wrote:
> > > > 
> > > > Invoking sgx_invalidate in sgx_vma_close causes NULL pointer faults in find_vma when sgx_vma_close is called via
> > > > do_exit->mmput->exit_mmap->remove_vma.  The exit_mmap flow doesn't dismantle mm->mm_rb, it simply nukes the vma
> > > > structs, i.e. calling find_vma via sgx_vma_close results in walking mm->mm_rb when it contains one or more invalid
> > > > nodes.
> > > Calling sgx_invalidate in sgx_vma_close triggers the exact same null
> > > pointer fault that I described in a previous email.   Referencing the
> > > mm is flat out illegal in the vma close callback.
> > Hmm very valid point.
> > 
> > I think if I could should zap TCS pages from the VMA being closed.
> 
> What's the motivation behind explicitly zapping TCS pages in this
> scenario?  I can't think of any functional issues with lazily zapping
> all PTEs in the VMA.

Agreed that this case it is not mandatory. Once all the patches are peer
reviewed and tested I'll update that. I won't roll a new round of patch
set just for that.

/Jarkko
Jarkko Sakkinen Dec. 13, 2016, 6:21 p.m. UTC | #9
On Tue, Dec 13, 2016 at 08:14:27PM +0200, Jarkko Sakkinen wrote:
> On Tue, Dec 13, 2016 at 08:01:31AM -0800, Sean Christopherson wrote:
> > On Tue, 2016-12-13 at 15:55 +0200, Jarkko Sakkinen wrote:
> > > On Mon, Dec 12, 2016 at 10:50:10AM -0800, Sean Christopherson wrote:
> > > > 
> > > > On Thu, 2016-12-08 at 14:38 +0200, Jarkko Sakkinen wrote:
> > > > > 
> > > > >  static void sgx_vma_close(struct vm_area_struct *vma)
> > > > >  {
> > > > >  	struct sgx_encl *encl = vma->vm_private_data;
> > > > >  
> > > > > -	/* If process was forked, VMA is still there but
> > > > > -	 * vm_private_data is set to NULL.
> > > > > +	/* When forking for the second time vm_private_data is already set to
> > > > > +	 * NULL.
> > > > >  	 */
> > > > >  	if (!encl)
> > > > >  		return;
> > > > >  
> > > > >  	mutex_lock(&encl->lock);
> > > > > -	encl->vma_cnt--;
> > > > > -	vma->vm_private_data = NULL;
> > > > > -
> > > > > -	sgx_zap_tcs_ptes(encl, vma);
> > > > > -	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > > > > -
> > > > > +	sgx_invalidate(encl);
> > > > >  	mutex_unlock(&encl->lock);
> > > > >  
> > > > >  	kref_put(&encl->refcount, sgx_encl_release);
> > > > 
> > > > On Fri, 2016-12-02 at 19:49 +0000, Christopherson, Sean J wrote:
> > > > > 
> > > > > Invoking sgx_invalidate in sgx_vma_close causes NULL pointer faults in find_vma when sgx_vma_close is called via
> > > > > do_exit->mmput->exit_mmap->remove_vma.  The exit_mmap flow doesn't dismantle mm->mm_rb, it simply nukes the vma
> > > > > structs, i.e. calling find_vma via sgx_vma_close results in walking mm->mm_rb when it contains one or more invalid
> > > > > nodes.
> > > > Calling sgx_invalidate in sgx_vma_close triggers the exact same null
> > > > pointer fault that I described in a previous email.   Referencing the
> > > > mm is flat out illegal in the vma close callback.
> > > Hmm very valid point.
> > > 
> > > I think if I could should zap TCS pages from the VMA being closed.
> > 
> > What's the motivation behind explicitly zapping TCS pages in this
> > scenario?  I can't think of any functional issues with lazily zapping
> > all PTEs in the VMA.
> 
> Agreed that this case it is not mandatory. Once all the patches are peer
> reviewed and tested I'll update that. I won't roll a new round of patch
> set just for that.

Ignore this haven't yet send v9 :) But agreed this can be done.

/Jarkko
Jarkko Sakkinen Dec. 13, 2016, 6:27 p.m. UTC | #10
On Tue, Dec 13, 2016 at 08:21:21PM +0200, Jarkko Sakkinen wrote:
> On Tue, Dec 13, 2016 at 08:14:27PM +0200, Jarkko Sakkinen wrote:
> > On Tue, Dec 13, 2016 at 08:01:31AM -0800, Sean Christopherson wrote:
> > > On Tue, 2016-12-13 at 15:55 +0200, Jarkko Sakkinen wrote:
> > > > On Mon, Dec 12, 2016 at 10:50:10AM -0800, Sean Christopherson wrote:
> > > > > 
> > > > > On Thu, 2016-12-08 at 14:38 +0200, Jarkko Sakkinen wrote:
> > > > > > 
> > > > > >  static void sgx_vma_close(struct vm_area_struct *vma)
> > > > > >  {
> > > > > >  	struct sgx_encl *encl = vma->vm_private_data;
> > > > > >  
> > > > > > -	/* If process was forked, VMA is still there but
> > > > > > -	 * vm_private_data is set to NULL.
> > > > > > +	/* When forking for the second time vm_private_data is already set to
> > > > > > +	 * NULL.
> > > > > >  	 */
> > > > > >  	if (!encl)
> > > > > >  		return;
> > > > > >  
> > > > > >  	mutex_lock(&encl->lock);
> > > > > > -	encl->vma_cnt--;
> > > > > > -	vma->vm_private_data = NULL;
> > > > > > -
> > > > > > -	sgx_zap_tcs_ptes(encl, vma);
> > > > > > -	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > > > > > -
> > > > > > +	sgx_invalidate(encl);
> > > > > >  	mutex_unlock(&encl->lock);
> > > > > >  
> > > > > >  	kref_put(&encl->refcount, sgx_encl_release);
> > > > > 
> > > > > On Fri, 2016-12-02 at 19:49 +0000, Christopherson, Sean J wrote:
> > > > > > 
> > > > > > Invoking sgx_invalidate in sgx_vma_close causes NULL pointer faults in find_vma when sgx_vma_close is called via
> > > > > > do_exit->mmput->exit_mmap->remove_vma.  The exit_mmap flow doesn't dismantle mm->mm_rb, it simply nukes the vma
> > > > > > structs, i.e. calling find_vma via sgx_vma_close results in walking mm->mm_rb when it contains one or more invalid
> > > > > > nodes.
> > > > > Calling sgx_invalidate in sgx_vma_close triggers the exact same null
> > > > > pointer fault that I described in a previous email.   Referencing the
> > > > > mm is flat out illegal in the vma close callback.
> > > > Hmm very valid point.
> > > > 
> > > > I think if I could should zap TCS pages from the VMA being closed.
> > > 
> > > What's the motivation behind explicitly zapping TCS pages in this
> > > scenario?  I can't think of any functional issues with lazily zapping
> > > all PTEs in the VMA.
> > 
> > Agreed that this case it is not mandatory. Once all the patches are peer
> > reviewed and tested I'll update that. I won't roll a new round of patch
> > set just for that.
> 
> Ignore this haven't yet send v9 :) But agreed this can be done.

BTW, are any of the seven other unreviewed and untested patches
unacceptable? They all need Tested-by and Reviewed-by before I can
put this series into master branch. Now only 3/10 have been tested
and reviewed (I've done it).

/Jarkko

Patch
diff mbox

diff --git a/drivers/platform/x86/intel_sgx.h b/drivers/platform/x86/intel_sgx.h
index 464763d..018cd03 100644
--- a/drivers/platform/x86/intel_sgx.h
+++ b/drivers/platform/x86/intel_sgx.h
@@ -130,12 +130,12 @@  enum sgx_encl_flags {
 	SGX_ENCL_DEBUG		= BIT(1),
 	SGX_ENCL_SECS_EVICTED	= BIT(2),
 	SGX_ENCL_SUSPEND	= BIT(3),
+	SGX_ENCL_DEAD		= BIT(4),
 };
 
 struct sgx_encl {
 	unsigned int flags;
 	unsigned int secs_child_cnt;
-	unsigned int vma_cnt;
 	struct mutex lock;
 	struct task_struct *owner;
 	struct mm_struct *mm;
diff --git a/drivers/platform/x86/intel_sgx_ioctl.c b/drivers/platform/x86/intel_sgx_ioctl.c
index b377200..d0113b0 100644
--- a/drivers/platform/x86/intel_sgx_ioctl.c
+++ b/drivers/platform/x86/intel_sgx_ioctl.c
@@ -255,7 +255,10 @@  static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
 
 	mutex_lock(&encl->lock);
 
-	if (!encl->vma_cnt || sgx_find_encl(encl->mm, encl_page->addr, &vma))
+	if (encl->flags & SGX_ENCL_DEAD)
+		goto out;
+
+	if (sgx_find_encl(encl->mm, encl_page->addr, &vma))
 		goto out;
 
 	backing = sgx_get_backing(encl, encl_page);
@@ -317,7 +320,7 @@  static void sgx_add_page_worker(struct work_struct *work)
 	do {
 		schedule();
 
-		if (encl->flags & SGX_ENCL_SUSPEND)
+		if (encl->flags & SGX_ENCL_DEAD)
 			skip_rest = true;
 
 		mutex_lock(&encl->lock);
@@ -578,7 +581,6 @@  static long sgx_ioc_enclave_create(struct file *filep, unsigned int cmd,
 		up_read(&current->mm->mmap_sem);
 		goto out;
 	}
-	encl->vma_cnt++;
 	vma->vm_private_data = encl;
 	up_read(&current->mm->mmap_sem);
 
@@ -682,7 +684,7 @@  static int __encl_add_page(struct sgx_encl *encl,
 
 	mutex_lock(&encl->lock);
 
-	if (encl->flags & SGX_ENCL_INITIALIZED) {
+	if (encl->flags & (SGX_ENCL_INITIALIZED | SGX_ENCL_DEAD)) {
 		ret = -EINVAL;
 		goto out;
 	}
diff --git a/drivers/platform/x86/intel_sgx_util.c b/drivers/platform/x86/intel_sgx_util.c
index 3878d9a..d5238c2 100644
--- a/drivers/platform/x86/intel_sgx_util.c
+++ b/drivers/platform/x86/intel_sgx_util.c
@@ -135,21 +135,18 @@  void sgx_zap_tcs_ptes(struct sgx_encl *encl, struct vm_area_struct *vma)
 
 bool sgx_pin_mm(struct sgx_encl *encl)
 {
-	if (encl->flags & SGX_ENCL_SUSPEND)
-		return false;
-
 	mutex_lock(&encl->lock);
-	if (encl->vma_cnt) {
-		atomic_inc(&encl->mm->mm_count);
-	} else {
+	if (encl->flags & SGX_ENCL_DEAD) {
 		mutex_unlock(&encl->lock);
 		return false;
 	}
+
+	atomic_inc(&encl->mm->mm_count);
 	mutex_unlock(&encl->lock);
 
 	down_read(&encl->mm->mmap_sem);
 
-	if (!encl->vma_cnt) {
+	if (encl->flags & SGX_ENCL_DEAD) {
 		sgx_unpin_mm(encl);
 		return false;
 	}
@@ -177,7 +174,7 @@  void sgx_invalidate(struct sgx_encl *encl)
 			break;
 	}
 
-	encl->vma_cnt = 0;
+	encl->flags |= SGX_ENCL_DEAD;
 }
 
 int sgx_find_encl(struct mm_struct *mm, unsigned long addr,
diff --git a/drivers/platform/x86/intel_sgx_vma.c b/drivers/platform/x86/intel_sgx_vma.c
index 0649978..f273282 100644
--- a/drivers/platform/x86/intel_sgx_vma.c
+++ b/drivers/platform/x86/intel_sgx_vma.c
@@ -70,48 +70,43 @@ 
 
 static void sgx_vma_open(struct vm_area_struct *vma)
 {
-	struct sgx_encl *encl;
+	struct sgx_encl *encl = vma->vm_private_data;
 
-	/* Was vm_private_data nullified as a result of the previous fork? */
-	encl = vma->vm_private_data;
-	if (!encl)
-		goto out_fork;
+	/* When forking for the second time vm_private_data is already set to
+	 * NULL.
+	 */
+	if (!encl) {
+		zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+		return;
+	}
 
-	/* Was the process forked? mm_struct changes when the process is
-	 * forked.
+	/* Invalidate enclave when the process has been forked for the first
+	 * time or pages have been added because PTEs must bee in sync with
+	 * the EPCM entries.
 	 */
 	mutex_lock(&encl->lock);
 	if (encl->mm != vma->vm_mm) {
-		mutex_unlock(&encl->lock);
-		goto out_fork;
+		sgx_invalidate(encl);
+		vma->vm_private_data = NULL;
 	}
-	encl->vma_cnt++;
 	mutex_unlock(&encl->lock);
 
-	kref_get(&encl->refcount);
-	return;
-out_fork:
-	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
-	vma->vm_private_data = NULL;
+	if (vma->vm_private_data)
+		kref_get(&encl->refcount);
 }
 
 static void sgx_vma_close(struct vm_area_struct *vma)
 {
 	struct sgx_encl *encl = vma->vm_private_data;
 
-	/* If process was forked, VMA is still there but
-	 * vm_private_data is set to NULL.
+	/* When forking for the second time vm_private_data is already set to
+	 * NULL.
 	 */
 	if (!encl)
 		return;
 
 	mutex_lock(&encl->lock);
-	encl->vma_cnt--;
-	vma->vm_private_data = NULL;
-
-	sgx_zap_tcs_ptes(encl, vma);
-	zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
-
+	sgx_invalidate(encl);
 	mutex_unlock(&encl->lock);
 
 	kref_put(&encl->refcount, sgx_encl_release);
@@ -187,7 +182,7 @@  static struct sgx_encl_page *sgx_vma_do_fault(struct vm_area_struct *vma,
 
 	mutex_lock(&encl->lock);
 
-	if (!encl->vma_cnt) {
+	if (encl->flags & SGX_ENCL_DEAD) {
 		entry = ERR_PTR(-EFAULT);
 		goto out;
 	}
@@ -370,7 +365,7 @@  static int sgx_vma_access(struct vm_area_struct *vma, unsigned long addr,
 
 	if (!(encl->flags & SGX_ENCL_DEBUG) ||
 	    !(encl->flags & SGX_ENCL_INITIALIZED) ||
-	    (encl->flags & SGX_ENCL_SUSPEND))
+	    (encl->flags & SGX_ENCL_DEAD))
 		return -EFAULT;
 
 	sgx_dbg(encl, "%s addr=0x%lx, len=%d\n", op_str, addr, len);