[intel-sgx-kernel-dev,v4,5/8] intel_sgx: freeze VMAs after EPC pages have been added
diff mbox

Message ID 37306EFA9975BE469F115FDE982C075B9B695ABE@ORSMSX108.amr.corp.intel.com
State New
Headers show

Commit Message

Sean Christopherson Dec. 2, 2016, 9:23 p.m. UTC
Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > Below is my suggested version of sgx_vma_open/sgx_vma_close to 
> > eliminate the calls to sgx_invalidate.  I think it makes sense to 
> > remove sgx_invalidate from sgx_vma_open as well as sgx_vma_close to 
> > keep "invalidate" behavior consistent between the two functions, and 
> > within sgx_vma_open itself, i.e. jump to out_fork like the other 
> > failing cases.  This also saves us from grabbing an unnecessary 
> > reference to the encl and results in an early return in the subsequent 
> > sgx_vma_close since vma->vm_private_data is null.
> 
> Is the difference to above is that you set the flag instead of calling
> sgx_invalidate()? Are there other differences?

The code also reinstates the zap_vma_ptes() calls in both functions and does not get a reference to the encl in the open case (because it jumps to out_fork).


On top of your patch:

Comments

Jarkko Sakkinen Dec. 2, 2016, 10:04 p.m. UTC | #1
On Fri, Dec 02, 2016 at 09:23:54PM +0000, Christopherson, Sean J wrote:
> Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > > Below is my suggested version of sgx_vma_open/sgx_vma_close to 
> > > eliminate the calls to sgx_invalidate.  I think it makes sense to 
> > > remove sgx_invalidate from sgx_vma_open as well as sgx_vma_close to 
> > > keep "invalidate" behavior consistent between the two functions, and 
> > > within sgx_vma_open itself, i.e. jump to out_fork like the other 
> > > failing cases.  This also saves us from grabbing an unnecessary 
> > > reference to the encl and results in an early return in the subsequent 
> > > sgx_vma_close since vma->vm_private_data is null.
> > 
> > Is the difference to above is that you set the flag instead of calling
> > sgx_invalidate()? Are there other differences?
> 
> The code also reinstates the zap_vma_ptes() calls in both functions and does not get a reference to the encl in the open case (because it jumps to out_fork).
> 
> 
> On top of your patch:

There's something wrong in your diff below because the patch in question
rips of vma_cnt already.

However, I see your point. In these cases it's fine to call the
zap_vma_ptes() for the VMA in question and not use the same route as in
suspend. I think I'll update the patch by stripping of invalidate
function completely and moving of the implementation to the suspend
callback.

I'll also squash the subsequent patch and instead of adding flag for
added pages I'll use secs_child_cnt. Just noticed that adding that flag
is redundant.

I'll send v6 with this update once the v5 series is otherwise checked
in order not to flood the ML too much if there are other issues. Please
give Reviewed-by for those patches that make sense to you so that we can
move forward.

/Jarkko

> 
> diff --git a/intel_sgx_vma.c b/intel_sgx_vma.c
> index 1349d73..90c34b3 100644
> --- a/intel_sgx_vma.c
> +++ b/intel_sgx_vma.c
> @@ -83,8 +83,9 @@ static void sgx_vma_open(struct vm_area_struct *vma)
> 
>         if (test_bit(SGX_ENCL_PAGES_ADDED, &encl->flags)) {
>                 mutex_lock(&encl->lock);
> -               sgx_invalidate(encl);
> +               encl->flags |= SGX_ENCL_INVALIDATED;
>                 mutex_unlock(&encl->lock);
> +               goto out_fork;
>         }
> 
>         kref_get(&encl->refcount);
> @@ -103,8 +104,10 @@ static void sgx_vma_close(struct vm_area_struct *vma)
>                 return;
> 
>         mutex_lock(&encl->lock);
> -       sgx_invalidate(encl);
> +       encl->flags |= SGX_ENCL_INVALIDATED;
>         mutex_unlock(&encl->lock);
> +       zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> +       vma->vm_private_data = NULL;
>         kref_put(&encl->refcount, sgx_encl_release);
>  }
> 
> 
> 
> Or from the base:
> 
> diff --git a/intel_sgx_vma.c b/intel_sgx_vma.c
> index 0649978..90c34b3 100644
> --- a/intel_sgx_vma.c
> +++ b/intel_sgx_vma.c
> @@ -72,21 +72,21 @@ static void sgx_vma_open(struct vm_area_struct *vma)
>  {
>         struct sgx_encl *encl;
> 
> -       /* Was vm_private_data nullified as a result of the previous fork? */
> +       /* fork after fork */
>         encl = vma->vm_private_data;
>         if (!encl)
>                 goto out_fork;
> 
> -       /* Was the process forked? mm_struct changes when the process is
> -        * forked.
> -        */
> -       mutex_lock(&encl->lock);
> -       if (encl->mm != vma->vm_mm) {
> +       /* mm_struct changes when the process is forked */
> +       if (encl->mm != vma->vm_mm)
> +               goto out_fork;
> +
> +       if (test_bit(SGX_ENCL_PAGES_ADDED, &encl->flags)) {
> +               mutex_lock(&encl->lock);
> +               encl->flags |= SGX_ENCL_INVALIDATED;
>                 mutex_unlock(&encl->lock);
>                 goto out_fork;
>         }
> -       encl->vma_cnt++;
> -       mutex_unlock(&encl->lock);
> 
>         kref_get(&encl->refcount);
>         return;
> @@ -99,21 +99,15 @@ static void sgx_vma_close(struct vm_area_struct *vma)
>  {
>         struct sgx_encl *encl = vma->vm_private_data;
> 
> -       /* If process was forked, VMA is still there but
> -        * vm_private_data is set to NULL.
> -        */
> +       /* fork */
>         if (!encl)
>                 return;
> 
>         mutex_lock(&encl->lock);
> -       encl->vma_cnt--;
> -       vma->vm_private_data = NULL;
> -
> -       sgx_zap_tcs_ptes(encl, vma);
> -       zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> -
> +       encl->flags |= SGX_ENCL_INVALIDATED;
>         mutex_unlock(&encl->lock);
> -
> +       zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> +       vma->vm_private_data = NULL;
>         kref_put(&encl->refcount, sgx_encl_release);
>  }
>
Jarkko Sakkinen Dec. 2, 2016, 10:15 p.m. UTC | #2
On Sat, Dec 03, 2016 at 12:04:44AM +0200, Jarkko Sakkinen wrote:
> On Fri, Dec 02, 2016 at 09:23:54PM +0000, Christopherson, Sean J wrote:
> > Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > > > Below is my suggested version of sgx_vma_open/sgx_vma_close to 
> > > > eliminate the calls to sgx_invalidate.  I think it makes sense to 
> > > > remove sgx_invalidate from sgx_vma_open as well as sgx_vma_close to 
> > > > keep "invalidate" behavior consistent between the two functions, and 
> > > > within sgx_vma_open itself, i.e. jump to out_fork like the other 
> > > > failing cases.  This also saves us from grabbing an unnecessary 
> > > > reference to the encl and results in an early return in the subsequent 
> > > > sgx_vma_close since vma->vm_private_data is null.
> > > 
> > > Is the difference to above is that you set the flag instead of calling
> > > sgx_invalidate()? Are there other differences?
> > 
> > The code also reinstates the zap_vma_ptes() calls in both functions and does not get a reference to the encl in the open case (because it jumps to out_fork).
> > 
> > 
> > On top of your patch:
> 
> There's something wrong in your diff below because the patch in question
> rips of vma_cnt already.
> 
> However, I see your point. In these cases it's fine to call the
> zap_vma_ptes() for the VMA in question and not use the same route as in
> suspend. I think I'll update the patch by stripping of invalidate
> function completely and moving of the implementation to the suspend
> callback.
> 
> I'll also squash the subsequent patch and instead of adding flag for
> added pages I'll use secs_child_cnt. Just noticed that adding that flag
> is redundant.
> 
> I'll send v6 with this update once the v5 series is otherwise checked
> in order not to flood the ML too much if there are other issues. Please
> give Reviewed-by for those patches that make sense to you so that we can
> move forward.
> 
> /Jarkko
> 
> > 
> > diff --git a/intel_sgx_vma.c b/intel_sgx_vma.c
> > index 1349d73..90c34b3 100644
> > --- a/intel_sgx_vma.c
> > +++ b/intel_sgx_vma.c
> > @@ -83,8 +83,9 @@ static void sgx_vma_open(struct vm_area_struct *vma)
> > 
> >         if (test_bit(SGX_ENCL_PAGES_ADDED, &encl->flags)) {
> >                 mutex_lock(&encl->lock);
> > -               sgx_invalidate(encl);
> > +               encl->flags |= SGX_ENCL_INVALIDATED;
> >                 mutex_unlock(&encl->lock);
> > +               goto out_fork;
> >         }
> > 
> >         kref_get(&encl->refcount);
> > @@ -103,8 +104,10 @@ static void sgx_vma_close(struct vm_area_struct *vma)
> >                 return;
> > 
> >         mutex_lock(&encl->lock);
> > -       sgx_invalidate(encl);
> > +       encl->flags |= SGX_ENCL_INVALIDATED;
> >         mutex_unlock(&encl->lock);
> > +       zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > +       vma->vm_private_data = NULL;
            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Is there any specific reason for this particular line? Without
any deep analysis I do not see why the assignment would be
mandatory.

/Jarkko
Jarkko Sakkinen Dec. 2, 2016, 10:24 p.m. UTC | #3
On Sat, Dec 03, 2016 at 12:15:10AM +0200, Jarkko Sakkinen wrote:
> On Sat, Dec 03, 2016 at 12:04:44AM +0200, Jarkko Sakkinen wrote:
> > On Fri, Dec 02, 2016 at 09:23:54PM +0000, Christopherson, Sean J wrote:
> > > Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > > > > Below is my suggested version of sgx_vma_open/sgx_vma_close to 
> > > > > eliminate the calls to sgx_invalidate.  I think it makes sense to 
> > > > > remove sgx_invalidate from sgx_vma_open as well as sgx_vma_close to 
> > > > > keep "invalidate" behavior consistent between the two functions, and 
> > > > > within sgx_vma_open itself, i.e. jump to out_fork like the other 
> > > > > failing cases.  This also saves us from grabbing an unnecessary 
> > > > > reference to the encl and results in an early return in the subsequent 
> > > > > sgx_vma_close since vma->vm_private_data is null.
> > > > 
> > > > Is the difference to above is that you set the flag instead of calling
> > > > sgx_invalidate()? Are there other differences?
> > > 
> > > The code also reinstates the zap_vma_ptes() calls in both functions and does not get a reference to the encl in the open case (because it jumps to out_fork).
> > > 
> > > 
> > > On top of your patch:
> > 
> > There's something wrong in your diff below because the patch in question
> > rips of vma_cnt already.
> > 
> > However, I see your point. In these cases it's fine to call the
> > zap_vma_ptes() for the VMA in question and not use the same route as in
> > suspend. I think I'll update the patch by stripping of invalidate
> > function completely and moving of the implementation to the suspend
> > callback.
> > 
> > I'll also squash the subsequent patch and instead of adding flag for
> > added pages I'll use secs_child_cnt. Just noticed that adding that flag
> > is redundant.
> > 
> > I'll send v6 with this update once the v5 series is otherwise checked
> > in order not to flood the ML too much if there are other issues. Please
> > give Reviewed-by for those patches that make sense to you so that we can
> > move forward.
> > 
> > /Jarkko
> > 
> > > 
> > > diff --git a/intel_sgx_vma.c b/intel_sgx_vma.c
> > > index 1349d73..90c34b3 100644
> > > --- a/intel_sgx_vma.c
> > > +++ b/intel_sgx_vma.c
> > > @@ -83,8 +83,9 @@ static void sgx_vma_open(struct vm_area_struct *vma)
> > > 
> > >         if (test_bit(SGX_ENCL_PAGES_ADDED, &encl->flags)) {
> > >                 mutex_lock(&encl->lock);
> > > -               sgx_invalidate(encl);
> > > +               encl->flags |= SGX_ENCL_INVALIDATED;
> > >                 mutex_unlock(&encl->lock);
> > > +               goto out_fork;
> > >         }
> > > 
> > >         kref_get(&encl->refcount);
> > > @@ -103,8 +104,10 @@ static void sgx_vma_close(struct vm_area_struct *vma)
> > >                 return;
> > > 
> > >         mutex_lock(&encl->lock);
> > > -       sgx_invalidate(encl);
> > > +       encl->flags |= SGX_ENCL_INVALIDATED;
> > >         mutex_unlock(&encl->lock);
> > > +       zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > > +       vma->vm_private_data = NULL;
>             ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 
> Is there any specific reason for this particular line? Without
> any deep analysis I do not see why the assignment would be
> mandatory.

There's now a new version of patches in the 'mmu' branch (temporary
branch). It's still untested. I'll send a new version once I've tested
it and fixed possible regressions if they come up.

/Jarkko
Sean Christopherson Dec. 2, 2016, 10:36 p.m. UTC | #4
Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> There's something wrong in your diff below because the patch in question rips
> of vma_cnt already.

The second diff, "from base", is a diff against origin/master/.../intel_sgx_vma.c, sorry for not being clear on what "base" meant.


> I'll also squash the subsequent patch and instead of adding flag for added
> pages I'll use secs_child_cnt. Just noticed that adding that flag is
> redundant.

Checking secs_child_cnt will allow sgx_vma_open after adding pages if the driver evicts all of the enclave's pages.  Actually, now that I think about it, isn't it legal for a user to call mprotect after launching the enclave, e.g. to mark a range of memory not writable?   That could trigger splitting a vma and thus a call to sgx_vma_open.  The EPCM guarantees that permissions won't be promoted beyond what the was intended at signing time, but I don't think there is anything in the SGX spec that says it's illegal to demote permissions after launching the enclave.  For example, marking part of the heap read-only to prevent accidental corruption of a dynamically acquired/generated key.
Sean Christopherson Dec. 2, 2016, 10:38 p.m. UTC | #5
Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > > @@ -103,8 +104,10 @@ static void sgx_vma_close(struct vm_area_struct *vma)
> > >                 return;
> > > 
> > >         mutex_lock(&encl->lock);
> > > -       sgx_invalidate(encl);
> > > +       encl->flags |= SGX_ENCL_INVALIDATED;
> > >         mutex_unlock(&encl->lock);
> > > +       zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
> > > +       vma->vm_private_data = NULL;
>             ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 
> Is there any specific reason for this particular line? Without any deep
> analysis I do not see why the assignment would be mandatory.

Not that I know of.  The "vma->vm_private_data = NULL" code exists in origin/master, I was just carrying it forward to avoid introducing more noise.
Jarkko Sakkinen Dec. 2, 2016, 11:05 p.m. UTC | #6
On Fri, Dec 02, 2016 at 10:36:42PM +0000, Christopherson, Sean J wrote:
> Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > There's something wrong in your diff below because the patch in question rips
> > of vma_cnt already.
> 
> The second diff, "from base", is a diff against origin/master/.../intel_sgx_vma.c, sorry for not being clear on what "base" meant.
> 
> 
> > I'll also squash the subsequent patch and instead of adding flag for added
> > pages I'll use secs_child_cnt. Just noticed that adding that flag is
> > redundant.
> 
> Checking secs_child_cnt will allow sgx_vma_open after adding pages if
> the driver evicts all of the enclave's pages.  Actually, now that I

My bad. Noticed this and won't do such an update for v6.

> think about it, isn't it legal for a user to call mprotect after
> launching the enclave, e.g. to mark a range of memory not writable?
> That could trigger splitting a vma and thus a call to sgx_vma_open.
> The EPCM guarantees that permissions won't be promoted beyond what the
> was intended at signing time, but I don't think there is anything in
> the SGX spec that says it's illegal to demote permissions after
> launching the enclave.  For example, marking part of the heap
> read-only to prevent accidental corruption of a dynamically
> acquired/generated key.

For SGX1 you can only set permissions at the time of EADD. The
permissions of a page in EPCM and PTE cannot conflict.

/Jarkko
Jarkko Sakkinen Dec. 2, 2016, 11:28 p.m. UTC | #7
On Sat, Dec 03, 2016 at 01:05:19AM +0200, Jarkko Sakkinen wrote:
> On Fri, Dec 02, 2016 at 10:36:42PM +0000, Christopherson, Sean J wrote:
> > Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > > There's something wrong in your diff below because the patch in question rips
> > > of vma_cnt already.
> > 
> > The second diff, "from base", is a diff against origin/master/.../intel_sgx_vma.c, sorry for not being clear on what "base" meant.
> > 
> > 
> > > I'll also squash the subsequent patch and instead of adding flag for added
> > > pages I'll use secs_child_cnt. Just noticed that adding that flag is
> > > redundant.
> > 
> > Checking secs_child_cnt will allow sgx_vma_open after adding pages if
> > the driver evicts all of the enclave's pages.  Actually, now that I
> 
> My bad. Noticed this and won't do such an update for v6.
> 
> > think about it, isn't it legal for a user to call mprotect after
> > launching the enclave, e.g. to mark a range of memory not writable?
> > That could trigger splitting a vma and thus a call to sgx_vma_open.
> > The EPCM guarantees that permissions won't be promoted beyond what the
> > was intended at signing time, but I don't think there is anything in
> > the SGX spec that says it's illegal to demote permissions after
> > launching the enclave.  For example, marking part of the heap
> > read-only to prevent accidental corruption of a dynamically
> > acquired/generated key.
> 
> For SGX1 you can only set permissions at the time of EADD. The
> permissions of a page in EPCM and PTE cannot conflict.

In this case I strongly see that enforcing a policy where you do your
mprotects etc before adding pages is a safe choice to start with to
put into nutshell. We could consider to ease to policy later on but
it is much harder to make stricter without breaking some weird usage
pattern from user space. That's why I did the patch.

/Jarkko

Patch
diff mbox

diff --git a/intel_sgx_vma.c b/intel_sgx_vma.c
index 1349d73..90c34b3 100644
--- a/intel_sgx_vma.c
+++ b/intel_sgx_vma.c
@@ -83,8 +83,9 @@  static void sgx_vma_open(struct vm_area_struct *vma)

        if (test_bit(SGX_ENCL_PAGES_ADDED, &encl->flags)) {
                mutex_lock(&encl->lock);
-               sgx_invalidate(encl);
+               encl->flags |= SGX_ENCL_INVALIDATED;
                mutex_unlock(&encl->lock);
+               goto out_fork;
        }

        kref_get(&encl->refcount);
@@ -103,8 +104,10 @@  static void sgx_vma_close(struct vm_area_struct *vma)
                return;

        mutex_lock(&encl->lock);
-       sgx_invalidate(encl);
+       encl->flags |= SGX_ENCL_INVALIDATED;
        mutex_unlock(&encl->lock);
+       zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+       vma->vm_private_data = NULL;
        kref_put(&encl->refcount, sgx_encl_release);
 }



Or from the base:

diff --git a/intel_sgx_vma.c b/intel_sgx_vma.c
index 0649978..90c34b3 100644
--- a/intel_sgx_vma.c
+++ b/intel_sgx_vma.c
@@ -72,21 +72,21 @@  static void sgx_vma_open(struct vm_area_struct *vma)
 {
        struct sgx_encl *encl;

-       /* Was vm_private_data nullified as a result of the previous fork? */
+       /* fork after fork */
        encl = vma->vm_private_data;
        if (!encl)
                goto out_fork;

-       /* Was the process forked? mm_struct changes when the process is
-        * forked.
-        */
-       mutex_lock(&encl->lock);
-       if (encl->mm != vma->vm_mm) {
+       /* mm_struct changes when the process is forked */
+       if (encl->mm != vma->vm_mm)
+               goto out_fork;
+
+       if (test_bit(SGX_ENCL_PAGES_ADDED, &encl->flags)) {
+               mutex_lock(&encl->lock);
+               encl->flags |= SGX_ENCL_INVALIDATED;
                mutex_unlock(&encl->lock);
                goto out_fork;
        }
-       encl->vma_cnt++;
-       mutex_unlock(&encl->lock);

        kref_get(&encl->refcount);
        return;
@@ -99,21 +99,15 @@  static void sgx_vma_close(struct vm_area_struct *vma)
 {
        struct sgx_encl *encl = vma->vm_private_data;

-       /* If process was forked, VMA is still there but
-        * vm_private_data is set to NULL.
-        */
+       /* fork */
        if (!encl)
                return;

        mutex_lock(&encl->lock);
-       encl->vma_cnt--;
-       vma->vm_private_data = NULL;
-
-       sgx_zap_tcs_ptes(encl, vma);
-       zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
-
+       encl->flags |= SGX_ENCL_INVALIDATED;
        mutex_unlock(&encl->lock);
-
+       zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+       vma->vm_private_data = NULL;
        kref_put(&encl->refcount, sgx_encl_release);
 }