[intel-sgx-kernel-dev,2/4] intel_sgx: delay VA slot allocation until EWB
diff mbox

Message ID 1492009821-22428-3-git-send-email-sean.j.christopherson@intel.com
State New
Headers show

Commit Message

Sean Christopherson April 12, 2017, 3:10 p.m. UTC
Reserve enough VA slots/pages at enclave creation, but delay actual
VA slot allocation until EWB.  Track the number of EPC and VA pages
that have been allocated and use those numbers to determine whether
or not enough VA slots have been reserved.

Rename sgx_init_page to sgx_alloc_va_page and check if we need a new
VA page outside of the function.  This allows __encl_add_page to do
a single mutex_lock for its fast path; the lock cannot be held while
allocating the VA page as allocating an EPC page may cause the thread
to yield.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 drivers/platform/x86/intel_sgx/sgx.h            |  12 +--
 drivers/platform/x86/intel_sgx/sgx_ioctl.c      | 103 +++++++++++-------------
 drivers/platform/x86/intel_sgx/sgx_page_cache.c |  43 +++++++++-
 drivers/platform/x86/intel_sgx/sgx_util.c       |   5 +-
 4 files changed, 94 insertions(+), 69 deletions(-)

Comments

Jarkko Sakkinen April 13, 2017, 1:12 p.m. UTC | #1
On Wed, Apr 12, 2017 at 08:10:19AM -0700, Sean Christopherson wrote:
> Reserve enough VA slots/pages at enclave creation, but delay actual
> VA slot allocation until EWB.  Track the number of EPC and VA pages
> that have been allocated and use those numbers to determine whether
> or not enough VA slots have been reserved.
> 
> Rename sgx_init_page to sgx_alloc_va_page and check if we need a new
> VA page outside of the function.  This allows __encl_add_page to do
> a single mutex_lock for its fast path; the lock cannot be held while
> allocating the VA page as allocating an EPC page may cause the thread
> to yield.
> 
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>

NAK for two reasons:

- Doing this is not mandatory for upstreaming. Just don't want to change
  the flow at this point so radically.
- Not in the scope of the patch set as stated by the cover letter.

After the driver has been upstreamed you could resend this, fair enough?

/Jarkko


> ---
>  drivers/platform/x86/intel_sgx/sgx.h            |  12 +--
>  drivers/platform/x86/intel_sgx/sgx_ioctl.c      | 103 +++++++++++-------------
>  drivers/platform/x86/intel_sgx/sgx_page_cache.c |  43 +++++++++-
>  drivers/platform/x86/intel_sgx/sgx_util.c       |   5 +-
>  4 files changed, 94 insertions(+), 69 deletions(-)
> 
> diff --git a/drivers/platform/x86/intel_sgx/sgx.h b/drivers/platform/x86/intel_sgx/sgx.h
> index 0397141..633526f 100644
> --- a/drivers/platform/x86/intel_sgx/sgx.h
> +++ b/drivers/platform/x86/intel_sgx/sgx.h
> @@ -82,16 +82,6 @@ struct sgx_va_page {
>  	struct list_head list;
>  };
>  
> -static inline unsigned int sgx_alloc_va_slot(struct sgx_va_page *page)
> -{
> -	int slot = find_first_zero_bit(page->slots, SGX_VA_SLOT_COUNT);
> -
> -	if (slot < SGX_VA_SLOT_COUNT)
> -		set_bit(slot, page->slots);
> -
> -	return slot << 3;
> -}
> -
>  static inline void sgx_free_va_slot(struct sgx_va_page *page,
>  				    unsigned int offset)
>  {
> @@ -129,6 +119,8 @@ enum sgx_encl_flags {
>  struct sgx_encl {
>  	unsigned int flags;
>  	unsigned int secs_child_cnt;
> +	unsigned int encl_page_cnt;
> +	unsigned int va_page_cnt;
>  	struct mutex lock;
>  	struct mm_struct *mm;
>  	struct file *backing;
> diff --git a/drivers/platform/x86/intel_sgx/sgx_ioctl.c b/drivers/platform/x86/intel_sgx/sgx_ioctl.c
> index ba7c0d2..af80571 100644
> --- a/drivers/platform/x86/intel_sgx/sgx_ioctl.c
> +++ b/drivers/platform/x86/intel_sgx/sgx_ioctl.c
> @@ -384,63 +384,48 @@ static int sgx_validate_secs(const struct sgx_secs *secs)
>  	return 0;
>  }
>  
> -static int sgx_init_page(struct sgx_encl *encl,
> -			 struct sgx_encl_page *entry,
> -			 unsigned long addr)
> +static int sgx_alloc_va_page(struct sgx_encl *encl)
>  {
>  	struct sgx_va_page *va_page;
>  	struct sgx_epc_page *epc_page = NULL;
> -	unsigned int va_offset = PAGE_SIZE;
>  	void *vaddr;
>  	int ret = 0;
>  
> -	list_for_each_entry(va_page, &encl->va_pages, list) {
> -		va_offset = sgx_alloc_va_slot(va_page);
> -		if (va_offset < PAGE_SIZE)
> -			break;
> -	}
> -
> -	if (va_offset == PAGE_SIZE) {
> -		va_page = kzalloc(sizeof(*va_page), GFP_KERNEL);
> -		if (!va_page)
> -			return -ENOMEM;
> -
> -		epc_page = sgx_alloc_page(0);
> -		if (IS_ERR(epc_page)) {
> -			kfree(va_page);
> -			return PTR_ERR(epc_page);
> -		}
> -
> -		vaddr = sgx_get_page(epc_page);
> -		if (!vaddr) {
> -			sgx_warn(encl, "kmap of a new VA page failed %d\n",
> -				 ret);
> -			sgx_free_page(epc_page, encl);
> -			kfree(va_page);
> -			return -EFAULT;
> -		}
> +	va_page = kzalloc(sizeof(*va_page), GFP_KERNEL);
> +	if (!va_page)
> +		return -ENOMEM;
>  
> -		ret = __epa(vaddr);
> -		sgx_put_page(vaddr);
> +	epc_page = sgx_alloc_page(0);
> +	if (IS_ERR(epc_page)) {
> +		kfree(va_page);
> +		return PTR_ERR(epc_page);
> +	}
>  
> -		if (ret) {
> -			sgx_warn(encl, "EPA returned %d\n", ret);
> -			sgx_free_page(epc_page, encl);
> -			kfree(va_page);
> -			return -EFAULT;
> -		}
> +	vaddr = sgx_get_page(epc_page);
> +	if (!vaddr) {
> +		sgx_warn(encl, "kmap of a new VA page failed %d\n",
> +				ret);
> +		sgx_free_page(epc_page, encl);
> +		kfree(va_page);
> +		return -EFAULT;
> +	}
>  
> -		va_page->epc_page = epc_page;
> -		va_offset = sgx_alloc_va_slot(va_page);
> +	ret = __epa(vaddr);
> +	sgx_put_page(vaddr);
>  
> -		mutex_lock(&encl->lock);
> -		list_add(&va_page->list, &encl->va_pages);
> -		mutex_unlock(&encl->lock);
> +	if (ret) {
> +		sgx_warn(encl, "EPA returned %d\n", ret);
> +		sgx_free_page(epc_page, encl);
> +		kfree(va_page);
> +		return -EFAULT;
>  	}
>  
> -	entry->va_page = va_page;
> -	entry->va_offset = va_offset;
> -	entry->addr = addr;
> +	va_page->epc_page = epc_page;
> +
> +	mutex_lock(&encl->lock);
> +	encl->va_page_cnt++;
> +	list_add(&va_page->list, &encl->va_pages);
> +	mutex_unlock(&encl->lock);
>  
>  	return 0;
>  }
> @@ -549,8 +534,10 @@ static long sgx_ioc_enclave_create(struct file *filep, unsigned int cmd,
>  	if (ret)
>  		goto out;
>  
> -	ret = sgx_init_page(encl, &encl->secs_page,
> -			    encl->base + encl->size);
> +	encl->encl_page_cnt++;
> +	encl->secs_page.addr = encl->base + encl->size;
> +
> +	ret = sgx_alloc_va_page(encl);
>  	if (ret)
>  		goto out;
>  
> @@ -690,14 +677,21 @@ static int __encl_add_page(struct sgx_encl *encl,
>  		}
>  	}
>  
> -	ret = sgx_init_page(encl, encl_page, addp->addr);
> -	if (ret) {
> -		__free_page(tmp_page);
> -		return -EINVAL;
> -	}
> -
>  	mutex_lock(&encl->lock);
>  
> +	encl_page->addr = addp->addr;
> +	encl->encl_page_cnt++;
> +
> +	if (encl->encl_page_cnt > (encl->va_page_cnt * SGX_VA_SLOT_COUNT)) {
> +		/* slow path, new VA page needed */
> +		mutex_unlock(&encl->lock);
> +		ret = sgx_alloc_va_page(encl);
> +		mutex_lock(&encl->lock);
> +		if (ret) {
> +			goto out;
> +		}
> +	}
> +
>  	if (encl->flags & (SGX_ENCL_INITIALIZED | SGX_ENCL_DEAD)) {
>  		ret = -EINVAL;
>  		goto out;
> @@ -752,8 +746,7 @@ static int __encl_add_page(struct sgx_encl *encl,
>  
>  	if (ret) {
>  		kfree(req);
> -		sgx_free_va_slot(encl_page->va_page,
> -				 encl_page->va_offset);
> +		encl->encl_page_cnt--;
>  	}
>  
>  	mutex_unlock(&encl->lock);
> diff --git a/drivers/platform/x86/intel_sgx/sgx_page_cache.c b/drivers/platform/x86/intel_sgx/sgx_page_cache.c
> index 26b845b..a2e39a1 100644
> --- a/drivers/platform/x86/intel_sgx/sgx_page_cache.c
> +++ b/drivers/platform/x86/intel_sgx/sgx_page_cache.c
> @@ -264,10 +264,36 @@ static void sgx_etrack(struct sgx_encl *encl)
>  	}
>  }
>  
> +
> +/**
> + * sgx_alloc_va_slot() - allocate VA slot from a VA page
> + *
> + * @encl:        Enclave to allocate from
> + * @va_page:     Pointer to a VA page pointer
> + *
> + * Returns the offset of a free VA slot and sets va_page to the corresponding
> + * VA page.  If there are no free slots, returns PAGE_SIZE.
> + */
> +static inline unsigned int sgx_alloc_va_slot(struct sgx_encl *encl,
> +					     struct sgx_va_page **va_page)
> +{
> +       unsigned int slot = SGX_VA_SLOT_COUNT;
> +       list_for_each_entry((*va_page), &encl->va_pages, list) {
> +               slot = find_first_zero_bit((*va_page)->slots, SGX_VA_SLOT_COUNT);
> +               if (slot < SGX_VA_SLOT_COUNT) {
> +                       set_bit(slot, (*va_page)->slots);
> +                       break;
> +               }
> +       }
> +
> +       return slot << 3;
> +}
> +
>  static int __sgx_ewb(struct sgx_encl *encl,
>  		     struct sgx_encl_page *encl_page)
>  {
>  	struct sgx_page_info pginfo;
> +	struct sgx_va_page *va_page;
>  	struct page *backing;
>  	struct page *pcmd;
>  	unsigned long pcmd_offset;
> @@ -293,8 +319,15 @@ static int __sgx_ewb(struct sgx_encl *encl,
>  		goto out;
>  	}
>  
> +	encl_page->va_offset = sgx_alloc_va_slot(encl, &va_page);
> +	if (encl_page->va_offset == PAGE_SIZE) {
> +		sgx_warn(encl, "allocating a VA slot for EWB failed\n");
> +		ret = -ENOMEM;
> +		goto out_pcmd;
> +	}
> +
>  	epc = sgx_get_page(encl_page->epc_page);
> -	va = sgx_get_page(encl_page->va_page->epc_page);
> +	va = sgx_get_page(va_page->epc_page);
>  
>  	pginfo.srcpge = (unsigned long)kmap_atomic(backing);
>  	pginfo.pcmd = (unsigned long)kmap_atomic(pcmd) + pcmd_offset;
> @@ -307,8 +340,14 @@ static int __sgx_ewb(struct sgx_encl *encl,
>  
>  	sgx_put_page(va);
>  	sgx_put_page(epc);
> -	sgx_put_backing(pcmd, true);
>  
> +	if (ret == SGX_SUCCESS)
> +		encl_page->va_page = va_page;
> +	else
> +		sgx_free_va_slot(va_page, encl_page->va_offset);
> +
> +out_pcmd:
> +	sgx_put_backing(pcmd, true);
>  out:
>  	sgx_put_backing(backing, true);
>  	return ret;
> diff --git a/drivers/platform/x86/intel_sgx/sgx_util.c b/drivers/platform/x86/intel_sgx/sgx_util.c
> index 420f96f..f4b2f1a 100644
> --- a/drivers/platform/x86/intel_sgx/sgx_util.c
> +++ b/drivers/platform/x86/intel_sgx/sgx_util.c
> @@ -218,6 +218,9 @@ static int sgx_eldu(struct sgx_encl *encl,
>  	if (ret) {
>  		sgx_err(encl, "ELDU returned %d\n", ret);
>  		ret = -EFAULT;
> +	} else {
> +		sgx_free_va_slot(encl_page->va_page, encl_page->va_offset);
> +		encl_page->epc_page = epc_page;
>  	}
>  
>  	kunmap_atomic((void *)(unsigned long)(pginfo.pcmd - pcmd_offset));
> @@ -304,8 +307,6 @@ static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
>  		if (rc)
>  			goto out;
>  
> -		encl->secs_page.epc_page = secs_epc_page;
> -
>  		/* Do not free */
>  		secs_epc_page = NULL;
>  	}
> -- 
> 2.7.4
> 
> _______________________________________________
> intel-sgx-kernel-dev mailing list
> intel-sgx-kernel-dev@lists.01.org
> https://lists.01.org/mailman/listinfo/intel-sgx-kernel-dev
Sean Christopherson April 13, 2017, 2:28 p.m. UTC | #2
Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> On Wed, Apr 12, 2017 at 08:10:19AM -0700, Sean Christopherson wrote:
> > Reserve enough VA slots/pages at enclave creation, but delay actual
> > VA slot allocation until EWB.  Track the number of EPC and VA pages
> > that have been allocated and use those numbers to determine whether
> > or not enough VA slots have been reserved.
> > 
> > Rename sgx_init_page to sgx_alloc_va_page and check if we need a new
> > VA page outside of the function.  This allows __encl_add_page to do
> > a single mutex_lock for its fast path; the lock cannot be held while
> > allocating the VA page as allocating an EPC page may cause the thread
> > to yield.
> > 
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> 
> NAK for two reasons:
> 
> - Doing this is not mandatory for upstreaming. Just don't want to change
>   the flow at this point so radically.
> - Not in the scope of the patch set as stated by the cover letter.
> 
> After the driver has been upstreamed you could resend this, fair enough?
> 
> /Jarkko

The problem is that combining epc_page and va_page into a union is not
possible with the current code as the va_page pointer is allocated at
page creation and is not freed until the encl is released.  This patch
is what makes it possible to unionize the two pointers.  I can update
the cover letter if it's a sticking point.

In my opinion this is not a radical code change as the allocation and
tracking logic is not being modified.  Effectively the only functional
changes introduced by this patch are what VA page/offset of an encl's
pre-allocated VA pages are used when eviciting a page from the EPC.


> 
> 
> > ---
> >  drivers/platform/x86/intel_sgx/sgx.h            |  12 +--
> >  drivers/platform/x86/intel_sgx/sgx_ioctl.c      | 103
> > +++++++++++------------- drivers/platform/x86/intel_sgx/sgx_page_cache.c |
> > 43 +++++++++- drivers/platform/x86/intel_sgx/sgx_util.c       |   5 +-
> >  4 files changed, 94 insertions(+), 69 deletions(-)
> > 
> > diff --git a/drivers/platform/x86/intel_sgx/sgx.h
> > b/drivers/platform/x86/intel_sgx/sgx.h index 0397141..633526f 100644
> > --- a/drivers/platform/x86/intel_sgx/sgx.h
> > +++ b/drivers/platform/x86/intel_sgx/sgx.h
> > @@ -82,16 +82,6 @@ struct sgx_va_page {
> >     struct list_head list;
> >  };
> >  
> > -static inline unsigned int sgx_alloc_va_slot(struct sgx_va_page *page)
> > -{
> > -   int slot = find_first_zero_bit(page->slots, SGX_VA_SLOT_COUNT);
> > -
> > -   if (slot < SGX_VA_SLOT_COUNT)
> > -       set_bit(slot, page->slots);
> > -
> > -   return slot << 3;
> > -}
> > -
> >  static inline void sgx_free_va_slot(struct sgx_va_page *page,
> >                     unsigned int offset)
> >  {
> > @@ -129,6 +119,8 @@ enum sgx_encl_flags {
> >  struct sgx_encl {
> >     unsigned int flags;
> >     unsigned int secs_child_cnt;
> > +   unsigned int encl_page_cnt;
> > +   unsigned int va_page_cnt;
> >     struct mutex lock;
> >     struct mm_struct *mm;
> >     struct file *backing;
> > diff --git a/drivers/platform/x86/intel_sgx/sgx_ioctl.c
> > b/drivers/platform/x86/intel_sgx/sgx_ioctl.c index ba7c0d2..af80571 100644
> > --- a/drivers/platform/x86/intel_sgx/sgx_ioctl.c
> > +++ b/drivers/platform/x86/intel_sgx/sgx_ioctl.c
> > @@ -384,63 +384,48 @@ static int sgx_validate_secs(const struct sgx_secs
> > *secs) return 0;
> >  }
> >  
> > -static int sgx_init_page(struct sgx_encl *encl,
> > -            struct sgx_encl_page *entry,
> > -            unsigned long addr)
> > +static int sgx_alloc_va_page(struct sgx_encl *encl)
> >  {
> >     struct sgx_va_page *va_page;
> >     struct sgx_epc_page *epc_page = NULL;
> > -   unsigned int va_offset = PAGE_SIZE;
> >     void *vaddr;
> >     int ret = 0;
> >  
> > -   list_for_each_entry(va_page, &encl->va_pages, list) {
> > -       va_offset = sgx_alloc_va_slot(va_page);
> > -       if (va_offset < PAGE_SIZE)
> > -           break;
> > -   }
> > -
> > -   if (va_offset == PAGE_SIZE) {
> > -       va_page = kzalloc(sizeof(*va_page), GFP_KERNEL);
> > -       if (!va_page)
> > -           return -ENOMEM;
> > -
> > -       epc_page = sgx_alloc_page(0);
> > -       if (IS_ERR(epc_page)) {
> > -           kfree(va_page);
> > -           return PTR_ERR(epc_page);
> > -       }
> > -
> > -       vaddr = sgx_get_page(epc_page);
> > -       if (!vaddr) {
> > -           sgx_warn(encl, "kmap of a new VA page failed %d\n",
> > -                ret);
> > -           sgx_free_page(epc_page, encl);
> > -           kfree(va_page);
> > -           return -EFAULT;
> > -       }
> > +   va_page = kzalloc(sizeof(*va_page), GFP_KERNEL);
> > +   if (!va_page)
> > +       return -ENOMEM;
> >  
> > -       ret = __epa(vaddr);
> > -       sgx_put_page(vaddr);
> > +   epc_page = sgx_alloc_page(0);
> > +   if (IS_ERR(epc_page)) {
> > +       kfree(va_page);
> > +       return PTR_ERR(epc_page);
> > +   }
> >  
> > -       if (ret) {
> > -           sgx_warn(encl, "EPA returned %d\n", ret);
> > -           sgx_free_page(epc_page, encl);
> > -           kfree(va_page);
> > -           return -EFAULT;
> > -       }
> > +   vaddr = sgx_get_page(epc_page);
> > +   if (!vaddr) {
> > +       sgx_warn(encl, "kmap of a new VA page failed %d\n",
> > +               ret);
> > +       sgx_free_page(epc_page, encl);
> > +       kfree(va_page);
> > +       return -EFAULT;
> > +   }
> >  
> > -       va_page->epc_page = epc_page;
> > -       va_offset = sgx_alloc_va_slot(va_page);
> > +   ret = __epa(vaddr);
> > +   sgx_put_page(vaddr);
> >  
> > -       mutex_lock(&encl->lock);
> > -       list_add(&va_page->list, &encl->va_pages);
> > -       mutex_unlock(&encl->lock);
> > +   if (ret) {
> > +       sgx_warn(encl, "EPA returned %d\n", ret);
> > +       sgx_free_page(epc_page, encl);
> > +       kfree(va_page);
> > +       return -EFAULT;
> >     }
> >  
> > -   entry->va_page = va_page;
> > -   entry->va_offset = va_offset;
> > -   entry->addr = addr;
> > +   va_page->epc_page = epc_page;
> > +
> > +   mutex_lock(&encl->lock);
> > +   encl->va_page_cnt++;
> > +   list_add(&va_page->list, &encl->va_pages);
> > +   mutex_unlock(&encl->lock);
> >  
> >     return 0;
> >  }
> > @@ -549,8 +534,10 @@ static long sgx_ioc_enclave_create(struct file *filep,
> > unsigned int cmd, if (ret)
> >         goto out;
> >  
> > -   ret = sgx_init_page(encl, &encl->secs_page,
> > -               encl->base + encl->size);
> > +   encl->encl_page_cnt++;
> > +   encl->secs_page.addr = encl->base + encl->size;
> > +
> > +   ret = sgx_alloc_va_page(encl);
> >     if (ret)
> >         goto out;
> >  
> > @@ -690,14 +677,21 @@ static int __encl_add_page(struct sgx_encl *encl,
> >         }
> >     }
> >  
> > -   ret = sgx_init_page(encl, encl_page, addp->addr);
> > -   if (ret) {
> > -       __free_page(tmp_page);
> > -       return -EINVAL;
> > -   }
> > -
> >     mutex_lock(&encl->lock);
> >  
> > +   encl_page->addr = addp->addr;
> > +   encl->encl_page_cnt++;
> > +
> > +   if (encl->encl_page_cnt > (encl->va_page_cnt * SGX_VA_SLOT_COUNT))
> > {
> > +       /* slow path, new VA page needed */
> > +       mutex_unlock(&encl->lock);
> > +       ret = sgx_alloc_va_page(encl);
> > +       mutex_lock(&encl->lock);
> > +       if (ret) {
> > +           goto out;
> > +       }
> > +   }
> > +
> >     if (encl->flags & (SGX_ENCL_INITIALIZED | SGX_ENCL_DEAD)) {
> >         ret = -EINVAL;
> >         goto out;
> > @@ -752,8 +746,7 @@ static int __encl_add_page(struct sgx_encl *encl,
> >  
> >     if (ret) {
> >         kfree(req);
> > -       sgx_free_va_slot(encl_page->va_page,
> > -                encl_page->va_offset);
> > +       encl->encl_page_cnt--;
> >     }
> >  
> >     mutex_unlock(&encl->lock);
> > diff --git a/drivers/platform/x86/intel_sgx/sgx_page_cache.c
> > b/drivers/platform/x86/intel_sgx/sgx_page_cache.c index 26b845b..a2e39a1
> > 100644 --- a/drivers/platform/x86/intel_sgx/sgx_page_cache.c
> > +++ b/drivers/platform/x86/intel_sgx/sgx_page_cache.c
> > @@ -264,10 +264,36 @@ static void sgx_etrack(struct sgx_encl *encl)
> >     }
> >  }
> >  
> > +
> > +/**
> > + * sgx_alloc_va_slot() - allocate VA slot from a VA page
> > + *
> > + * @encl:        Enclave to allocate from
> > + * @va_page:     Pointer to a VA page pointer
> > + *
> > + * Returns the offset of a free VA slot and sets va_page to the
> > corresponding
> > + * VA page.  If there are no free slots, returns PAGE_SIZE.
> > + */
> > +static inline unsigned int sgx_alloc_va_slot(struct sgx_encl *encl,
> > +                        struct sgx_va_page **va_page)
> > +{
> > +       unsigned int slot = SGX_VA_SLOT_COUNT;
> > +       list_for_each_entry((*va_page), &encl->va_pages, list) {
> > +               slot = find_first_zero_bit((*va_page)->slots,
> > SGX_VA_SLOT_COUNT);
> > +               if (slot < SGX_VA_SLOT_COUNT) {
> > +                       set_bit(slot, (*va_page)->slots);
> > +                       break;
> > +               }
> > +       }
> > +
> > +       return slot << 3;
> > +}
> > +
> >  static int __sgx_ewb(struct sgx_encl *encl,
> >              struct sgx_encl_page *encl_page)
> >  {
> >     struct sgx_page_info pginfo;
> > +   struct sgx_va_page *va_page;
> >     struct page *backing;
> >     struct page *pcmd;
> >     unsigned long pcmd_offset;
> > @@ -293,8 +319,15 @@ static int __sgx_ewb(struct sgx_encl *encl,
> >         goto out;
> >     }
> >  
> > +   encl_page->va_offset = sgx_alloc_va_slot(encl, &va_page);
> > +   if (encl_page->va_offset == PAGE_SIZE) {
> > +       sgx_warn(encl, "allocating a VA slot for EWB failed\n");
> > +       ret = -ENOMEM;
> > +       goto out_pcmd;
> > +   }
> > +
> >     epc = sgx_get_page(encl_page->epc_page);
> > -   va = sgx_get_page(encl_page->va_page->epc_page);
> > +   va = sgx_get_page(va_page->epc_page);
> >  
> >     pginfo.srcpge = (unsigned long)kmap_atomic(backing);
> >     pginfo.pcmd = (unsigned long)kmap_atomic(pcmd) + pcmd_offset;
> > @@ -307,8 +340,14 @@ static int __sgx_ewb(struct sgx_encl *encl,
> >  
> >     sgx_put_page(va);
> >     sgx_put_page(epc);
> > -   sgx_put_backing(pcmd, true);
> >  
> > +   if (ret == SGX_SUCCESS)
> > +       encl_page->va_page = va_page;
> > +   else
> > +       sgx_free_va_slot(va_page, encl_page->va_offset);
> > +
> > +out_pcmd:
> > +   sgx_put_backing(pcmd, true);
> >  out:
> >     sgx_put_backing(backing, true);
> >     return ret;
> > diff --git a/drivers/platform/x86/intel_sgx/sgx_util.c
> > b/drivers/platform/x86/intel_sgx/sgx_util.c index 420f96f..f4b2f1a 100644
> > --- a/drivers/platform/x86/intel_sgx/sgx_util.c
> > +++ b/drivers/platform/x86/intel_sgx/sgx_util.c
> > @@ -218,6 +218,9 @@ static int sgx_eldu(struct sgx_encl *encl,
> >     if (ret) {
> >         sgx_err(encl, "ELDU returned %d\n", ret);
> >         ret = -EFAULT;
> > +   } else {
> > +       sgx_free_va_slot(encl_page->va_page, encl_page->va_offset);
> > +       encl_page->epc_page = epc_page;
> >     }
> >  
> >     kunmap_atomic((void *)(unsigned long)(pginfo.pcmd - pcmd_offset));
> > @@ -304,8 +307,6 @@ static struct sgx_encl_page *sgx_do_fault(struct
> > vm_area_struct *vma, if (rc)
> >             goto out;
> >  
> > -       encl->secs_page.epc_page = secs_epc_page;
> > -
> >         /* Do not free */
> >         secs_epc_page = NULL;
> >     }
> > -- 
> > 2.7.4
> > 
> > _______________________________________________
> > intel-sgx-kernel-dev mailing list
> > intel-sgx-kernel-dev@lists.01.org
> > https://lists.01.org/mailman/listinfo/intel-sgx-kernel-dev
Jarkko Sakkinen April 19, 2017, 12:11 p.m. UTC | #3
On Thu, Apr 13, 2017 at 02:28:59PM +0000, Christopherson, Sean J wrote:
> Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > On Wed, Apr 12, 2017 at 08:10:19AM -0700, Sean Christopherson wrote:
> > > Reserve enough VA slots/pages at enclave creation, but delay actual
> > > VA slot allocation until EWB.  Track the number of EPC and VA pages
> > > that have been allocated and use those numbers to determine whether
> > > or not enough VA slots have been reserved.
> > > 
> > > Rename sgx_init_page to sgx_alloc_va_page and check if we need a new
> > > VA page outside of the function.  This allows __encl_add_page to do
> > > a single mutex_lock for its fast path; the lock cannot be held while
> > > allocating the VA page as allocating an EPC page may cause the thread
> > > to yield.
> > > 
> > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > 
> > NAK for two reasons:
> > 
> > - Doing this is not mandatory for upstreaming. Just don't want to change
> >   the flow at this point so radically.
> > - Not in the scope of the patch set as stated by the cover letter.
> > 
> > After the driver has been upstreamed you could resend this, fair enough?
> > 
> > /Jarkko
> 
> The problem is that combining epc_page and va_page into a union is not
> possible with the current code as the va_page pointer is allocated at
> page creation and is not freed until the encl is released.  This patch
> is what makes it possible to unionize the two pointers.  I can update
> the cover letter if it's a sticking point.
> 
> In my opinion this is not a radical code change as the allocation and
> tracking logic is not being modified.  Effectively the only functional
> changes introduced by this patch are what VA page/offset of an encl's
> pre-allocated VA pages are used when eviciting a page from the EPC.

I see your point. Sorry I did not look carefully enough but you are
right. Can you do the updates and send an updated patch set? I'm not
yet sure at what point I'll merge these changes. They make obviously
sense but they are not *right now* high priority. Given your explanation
I'll probably anyway merge them before sending patches to upstream.

/Jarkko

Patch
diff mbox

diff --git a/drivers/platform/x86/intel_sgx/sgx.h b/drivers/platform/x86/intel_sgx/sgx.h
index 0397141..633526f 100644
--- a/drivers/platform/x86/intel_sgx/sgx.h
+++ b/drivers/platform/x86/intel_sgx/sgx.h
@@ -82,16 +82,6 @@  struct sgx_va_page {
 	struct list_head list;
 };
 
-static inline unsigned int sgx_alloc_va_slot(struct sgx_va_page *page)
-{
-	int slot = find_first_zero_bit(page->slots, SGX_VA_SLOT_COUNT);
-
-	if (slot < SGX_VA_SLOT_COUNT)
-		set_bit(slot, page->slots);
-
-	return slot << 3;
-}
-
 static inline void sgx_free_va_slot(struct sgx_va_page *page,
 				    unsigned int offset)
 {
@@ -129,6 +119,8 @@  enum sgx_encl_flags {
 struct sgx_encl {
 	unsigned int flags;
 	unsigned int secs_child_cnt;
+	unsigned int encl_page_cnt;
+	unsigned int va_page_cnt;
 	struct mutex lock;
 	struct mm_struct *mm;
 	struct file *backing;
diff --git a/drivers/platform/x86/intel_sgx/sgx_ioctl.c b/drivers/platform/x86/intel_sgx/sgx_ioctl.c
index ba7c0d2..af80571 100644
--- a/drivers/platform/x86/intel_sgx/sgx_ioctl.c
+++ b/drivers/platform/x86/intel_sgx/sgx_ioctl.c
@@ -384,63 +384,48 @@  static int sgx_validate_secs(const struct sgx_secs *secs)
 	return 0;
 }
 
-static int sgx_init_page(struct sgx_encl *encl,
-			 struct sgx_encl_page *entry,
-			 unsigned long addr)
+static int sgx_alloc_va_page(struct sgx_encl *encl)
 {
 	struct sgx_va_page *va_page;
 	struct sgx_epc_page *epc_page = NULL;
-	unsigned int va_offset = PAGE_SIZE;
 	void *vaddr;
 	int ret = 0;
 
-	list_for_each_entry(va_page, &encl->va_pages, list) {
-		va_offset = sgx_alloc_va_slot(va_page);
-		if (va_offset < PAGE_SIZE)
-			break;
-	}
-
-	if (va_offset == PAGE_SIZE) {
-		va_page = kzalloc(sizeof(*va_page), GFP_KERNEL);
-		if (!va_page)
-			return -ENOMEM;
-
-		epc_page = sgx_alloc_page(0);
-		if (IS_ERR(epc_page)) {
-			kfree(va_page);
-			return PTR_ERR(epc_page);
-		}
-
-		vaddr = sgx_get_page(epc_page);
-		if (!vaddr) {
-			sgx_warn(encl, "kmap of a new VA page failed %d\n",
-				 ret);
-			sgx_free_page(epc_page, encl);
-			kfree(va_page);
-			return -EFAULT;
-		}
+	va_page = kzalloc(sizeof(*va_page), GFP_KERNEL);
+	if (!va_page)
+		return -ENOMEM;
 
-		ret = __epa(vaddr);
-		sgx_put_page(vaddr);
+	epc_page = sgx_alloc_page(0);
+	if (IS_ERR(epc_page)) {
+		kfree(va_page);
+		return PTR_ERR(epc_page);
+	}
 
-		if (ret) {
-			sgx_warn(encl, "EPA returned %d\n", ret);
-			sgx_free_page(epc_page, encl);
-			kfree(va_page);
-			return -EFAULT;
-		}
+	vaddr = sgx_get_page(epc_page);
+	if (!vaddr) {
+		sgx_warn(encl, "kmap of a new VA page failed %d\n",
+				ret);
+		sgx_free_page(epc_page, encl);
+		kfree(va_page);
+		return -EFAULT;
+	}
 
-		va_page->epc_page = epc_page;
-		va_offset = sgx_alloc_va_slot(va_page);
+	ret = __epa(vaddr);
+	sgx_put_page(vaddr);
 
-		mutex_lock(&encl->lock);
-		list_add(&va_page->list, &encl->va_pages);
-		mutex_unlock(&encl->lock);
+	if (ret) {
+		sgx_warn(encl, "EPA returned %d\n", ret);
+		sgx_free_page(epc_page, encl);
+		kfree(va_page);
+		return -EFAULT;
 	}
 
-	entry->va_page = va_page;
-	entry->va_offset = va_offset;
-	entry->addr = addr;
+	va_page->epc_page = epc_page;
+
+	mutex_lock(&encl->lock);
+	encl->va_page_cnt++;
+	list_add(&va_page->list, &encl->va_pages);
+	mutex_unlock(&encl->lock);
 
 	return 0;
 }
@@ -549,8 +534,10 @@  static long sgx_ioc_enclave_create(struct file *filep, unsigned int cmd,
 	if (ret)
 		goto out;
 
-	ret = sgx_init_page(encl, &encl->secs_page,
-			    encl->base + encl->size);
+	encl->encl_page_cnt++;
+	encl->secs_page.addr = encl->base + encl->size;
+
+	ret = sgx_alloc_va_page(encl);
 	if (ret)
 		goto out;
 
@@ -690,14 +677,21 @@  static int __encl_add_page(struct sgx_encl *encl,
 		}
 	}
 
-	ret = sgx_init_page(encl, encl_page, addp->addr);
-	if (ret) {
-		__free_page(tmp_page);
-		return -EINVAL;
-	}
-
 	mutex_lock(&encl->lock);
 
+	encl_page->addr = addp->addr;
+	encl->encl_page_cnt++;
+
+	if (encl->encl_page_cnt > (encl->va_page_cnt * SGX_VA_SLOT_COUNT)) {
+		/* slow path, new VA page needed */
+		mutex_unlock(&encl->lock);
+		ret = sgx_alloc_va_page(encl);
+		mutex_lock(&encl->lock);
+		if (ret) {
+			goto out;
+		}
+	}
+
 	if (encl->flags & (SGX_ENCL_INITIALIZED | SGX_ENCL_DEAD)) {
 		ret = -EINVAL;
 		goto out;
@@ -752,8 +746,7 @@  static int __encl_add_page(struct sgx_encl *encl,
 
 	if (ret) {
 		kfree(req);
-		sgx_free_va_slot(encl_page->va_page,
-				 encl_page->va_offset);
+		encl->encl_page_cnt--;
 	}
 
 	mutex_unlock(&encl->lock);
diff --git a/drivers/platform/x86/intel_sgx/sgx_page_cache.c b/drivers/platform/x86/intel_sgx/sgx_page_cache.c
index 26b845b..a2e39a1 100644
--- a/drivers/platform/x86/intel_sgx/sgx_page_cache.c
+++ b/drivers/platform/x86/intel_sgx/sgx_page_cache.c
@@ -264,10 +264,36 @@  static void sgx_etrack(struct sgx_encl *encl)
 	}
 }
 
+
+/**
+ * sgx_alloc_va_slot() - allocate VA slot from a VA page
+ *
+ * @encl:        Enclave to allocate from
+ * @va_page:     Pointer to a VA page pointer
+ *
+ * Returns the offset of a free VA slot and sets va_page to the corresponding
+ * VA page.  If there are no free slots, returns PAGE_SIZE.
+ */
+static inline unsigned int sgx_alloc_va_slot(struct sgx_encl *encl,
+					     struct sgx_va_page **va_page)
+{
+       unsigned int slot = SGX_VA_SLOT_COUNT;
+       list_for_each_entry((*va_page), &encl->va_pages, list) {
+               slot = find_first_zero_bit((*va_page)->slots, SGX_VA_SLOT_COUNT);
+               if (slot < SGX_VA_SLOT_COUNT) {
+                       set_bit(slot, (*va_page)->slots);
+                       break;
+               }
+       }
+
+       return slot << 3;
+}
+
 static int __sgx_ewb(struct sgx_encl *encl,
 		     struct sgx_encl_page *encl_page)
 {
 	struct sgx_page_info pginfo;
+	struct sgx_va_page *va_page;
 	struct page *backing;
 	struct page *pcmd;
 	unsigned long pcmd_offset;
@@ -293,8 +319,15 @@  static int __sgx_ewb(struct sgx_encl *encl,
 		goto out;
 	}
 
+	encl_page->va_offset = sgx_alloc_va_slot(encl, &va_page);
+	if (encl_page->va_offset == PAGE_SIZE) {
+		sgx_warn(encl, "allocating a VA slot for EWB failed\n");
+		ret = -ENOMEM;
+		goto out_pcmd;
+	}
+
 	epc = sgx_get_page(encl_page->epc_page);
-	va = sgx_get_page(encl_page->va_page->epc_page);
+	va = sgx_get_page(va_page->epc_page);
 
 	pginfo.srcpge = (unsigned long)kmap_atomic(backing);
 	pginfo.pcmd = (unsigned long)kmap_atomic(pcmd) + pcmd_offset;
@@ -307,8 +340,14 @@  static int __sgx_ewb(struct sgx_encl *encl,
 
 	sgx_put_page(va);
 	sgx_put_page(epc);
-	sgx_put_backing(pcmd, true);
 
+	if (ret == SGX_SUCCESS)
+		encl_page->va_page = va_page;
+	else
+		sgx_free_va_slot(va_page, encl_page->va_offset);
+
+out_pcmd:
+	sgx_put_backing(pcmd, true);
 out:
 	sgx_put_backing(backing, true);
 	return ret;
diff --git a/drivers/platform/x86/intel_sgx/sgx_util.c b/drivers/platform/x86/intel_sgx/sgx_util.c
index 420f96f..f4b2f1a 100644
--- a/drivers/platform/x86/intel_sgx/sgx_util.c
+++ b/drivers/platform/x86/intel_sgx/sgx_util.c
@@ -218,6 +218,9 @@  static int sgx_eldu(struct sgx_encl *encl,
 	if (ret) {
 		sgx_err(encl, "ELDU returned %d\n", ret);
 		ret = -EFAULT;
+	} else {
+		sgx_free_va_slot(encl_page->va_page, encl_page->va_offset);
+		encl_page->epc_page = epc_page;
 	}
 
 	kunmap_atomic((void *)(unsigned long)(pginfo.pcmd - pcmd_offset));
@@ -304,8 +307,6 @@  static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
 		if (rc)
 			goto out;
 
-		encl->secs_page.epc_page = secs_epc_page;
-
 		/* Do not free */
 		secs_epc_page = NULL;
 	}