Message ID | 1480372367-12679-1-git-send-email-sean.j.christopherson@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Nov 28, 2016 at 02:32:47PM -0800, Sean Christopherson wrote: > Add an eviction specific, per-enclave lock so that the lock can be held > for the entire EPC eviction sequence without having to hold the overall > per-enclave lock for an extended duration. Holding an enclave's overall > lock prevents any other driver operations on the enclave, e.g. faulting > a page into the enclave, and evicting 16 EPC pages takes upwards of half > a millisecond. > > Holding a lock for the entire EBLOCK->ETRACK->EWB sequence is necessary > to make the sequence atomic with respect to other evictions on the same > enclave. The SGX architecture does not allow multiple ETRACK sequences > to be in flight at the same time if the enclave being tracked has one or > more active threads in the enclave. A second ETRACK will fail due to > PREV_TRK_INCMPL if it is attempted before EWB is executed on all blocked > pages (in the presence of active enclave threads). > > Currently the driver releases the enclave's lock after ETRACK and then > reacquires the lock prior to EWB. Releasing the lock from the thread > that performed ETRACK, T1, allows a different thread, T2, to acquire the > lock for its own ETRACK. T2's ETRACK will fail due to the architectural > restrictions if a third thread, T3, is executing inside the enclave > whose pages are being swapped. > > T3: <active in enclave E1> > T1: lock_mutex(E1) > T1: ETRACK(E1) > T1: unlock_mutex(E1) > T2: lock_mutex(E1) > T2: ETRACK(E1) fails > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> The commit message is worse than in the earlier patch. There are no new locks. I don't like the use of word atomic here. It's too rounded word to be used. > --- > drivers/platform/x86/intel_sgx.h | 1 + > drivers/platform/x86/intel_sgx_ioctl.c | 1 + > drivers/platform/x86/intel_sgx_page_cache.c | 69 +++++++++++++++++------------ > 3 files changed, 43 insertions(+), 28 deletions(-) > > diff --git a/drivers/platform/x86/intel_sgx.h b/drivers/platform/x86/intel_sgx.h > index 464763d..d837789 100644 > --- a/drivers/platform/x86/intel_sgx.h > +++ b/drivers/platform/x86/intel_sgx.h > @@ -137,6 +137,7 @@ struct sgx_encl { > unsigned int secs_child_cnt; > unsigned int vma_cnt; > struct mutex lock; > + struct mutex eviction_lock; > struct task_struct *owner; > struct mm_struct *mm; > struct file *backing; > diff --git a/drivers/platform/x86/intel_sgx_ioctl.c b/drivers/platform/x86/intel_sgx_ioctl.c > index d54c410..72fe9aa 100644 > --- a/drivers/platform/x86/intel_sgx_ioctl.c > +++ b/drivers/platform/x86/intel_sgx_ioctl.c > @@ -522,6 +522,7 @@ static long sgx_ioc_enclave_create(struct file *filep, unsigned int cmd, > INIT_LIST_HEAD(&encl->load_list); > INIT_LIST_HEAD(&encl->encl_list); > mutex_init(&encl->lock); > + mutex_init(&encl->eviction_lock); > INIT_WORK(&encl->add_page_work, sgx_add_page_worker); > > encl->owner = current->group_leader; > diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c > index 8b1cc82..ce60b76 100644 > --- a/drivers/platform/x86/intel_sgx_page_cache.c > +++ b/drivers/platform/x86/intel_sgx_page_cache.c > @@ -224,9 +224,9 @@ static int sgx_ewb(struct sgx_encl *encl, > return ret; > } > > -void sgx_free_encl_page(struct sgx_encl_page *entry, > - struct sgx_encl *encl, > - unsigned int flags) > +static void sgx_free_encl_page(struct sgx_encl_page *entry, > + struct sgx_encl *encl, > + unsigned int flags) > { > sgx_free_page(entry->epc_page, encl, flags); > entry->epc_page = NULL; > @@ -238,7 +238,7 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) > struct sgx_encl_page *entry; > struct sgx_encl_page *tmp; > struct page *pages[SGX_NR_SWAP_CLUSTER_MAX + 1]; > - struct vm_area_struct *evma; > + struct vm_area_struct *vma; > int cnt = 0; > int i = 0; > int ret; > @@ -261,13 +261,15 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) > return; > } > > - /* EBLOCK */ > + mutex_lock(&encl->eviction_lock); > > + /* EBLOCK */ > list_for_each_entry_safe(entry, tmp, src, load_list) { > - mutex_lock(&encl->lock); > - evma = sgx_find_vma(encl, entry->addr); > - if (!evma) { > + vma = sgx_find_vma(encl, entry->addr); > + if (!vma) { > list_del(&entry->load_list); > + > + mutex_lock(&encl->lock); > sgx_free_encl_page(entry, encl, 0); > mutex_unlock(&encl->lock); > continue; > @@ -276,36 +278,32 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) > pages[cnt] = sgx_get_backing(encl, entry); > if (IS_ERR(pages[cnt])) { > list_del(&entry->load_list); > + > + mutex_lock(&encl->lock); > list_add_tail(&entry->load_list, &encl->load_list); > entry->flags &= ~SGX_ENCL_PAGE_RESERVED; > mutex_unlock(&encl->lock); > continue; > } > > - zap_vma_ptes(evma, entry->addr, PAGE_SIZE); > + zap_vma_ptes(vma, entry->addr, PAGE_SIZE); > sgx_eblock(entry->epc_page); > cnt++; > - mutex_unlock(&encl->lock); > } > > - /* ETRACK */ > + if (!cnt) { > + mutex_unlock(&encl->eviction_lock); > + goto out; > + } Unrelated change. Should be removed or sent as a separate patch if you think that his complication of the code flow gives value. > - mutex_lock(&encl->lock); > + /* ETRACK */ > sgx_etrack(encl->secs_page.epc_page); > - mutex_unlock(&encl->lock); > > /* EWB */ > - > - mutex_lock(&encl->lock); > i = 0; > - > - while (!list_empty(src)) { > - entry = list_first_entry(src, struct sgx_encl_page, > - load_list); > - list_del(&entry->load_list); > - > - evma = sgx_find_vma(encl, entry->addr); > - if (evma) { > + list_for_each_entry(entry, src, load_list) { > + vma = sgx_find_vma(encl, entry->addr); > + if (vma) { > ret = sgx_ewb(encl, entry, pages[i]); > BUG_ON(ret != 0 && ret != SGX_NOT_TRACKED); > /* Only kick out threads with an IPI if needed. */ > @@ -313,15 +311,30 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) > smp_call_function(sgx_ipi_cb, NULL, 1); > BUG_ON(sgx_ewb(encl, entry, pages[i])); > } > - encl->secs_child_cnt--; > } > + sgx_put_backing(pages[i++], vma); > + } > + > + mutex_unlock(&encl->eviction_lock); > + > + mutex_lock(&encl->lock); > + > + /* Bookeeping */ What this comment tells that is non-obvious? > + while (!list_empty(src)) { > + entry = list_first_entry(src, struct sgx_encl_page, > + load_list); > + list_del(&entry->load_list); > > sgx_free_encl_page(entry, encl, > - evma ? SGX_FREE_SKIP_EREMOVE : 0); > - sgx_put_backing(pages[i++], evma); > + vma ? SGX_FREE_SKIP_EREMOVE : 0); > } > > - /* Allow SECS page eviction only when the encl is initialized. */ > + encl->secs_child_cnt -= cnt; > + > + /* Allow SECS page eviction only when the encl is initialized. > + * The eviction lock does not need to be held to evict the SECS, > + * conflict is impossible as there are no other pages to evict. > + */ > if (!encl->secs_child_cnt && > (encl->flags & SGX_ENCL_INITIALIZED)) { > pages[cnt] = sgx_get_backing(encl, &encl->secs_page); > @@ -336,9 +349,9 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) > sgx_put_backing(pages[cnt], true); > } > } > - > mutex_unlock(&encl->lock); > > +out: This label won't be needed if the snippet above is removed. > sgx_unpin_mm(encl); > } > > -- > 2.7.4 > > _______________________________________________ > intel-sgx-kernel-dev mailing list > intel-sgx-kernel-dev@lists.01.org > https://lists.01.org/mailman/listinfo/intel-sgx-kernel-dev This is now worse than it was. I would rather apply the original patch (with that code flow change removed). /Jarkko
Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote: > > +311,30 @@ static void sgx_write_pages(struct sgx_encl *encl, struct > > list_head *src) smp_call_function(sgx_ipi_cb, NULL, 1); > > BUG_ON(sgx_ewb(encl, entry, pages[i])); > > } > > - encl->secs_child_cnt--; > > } > > + sgx_put_backing(pages[i++], vma); > > + } > > + > > + mutex_unlock(&encl->eviction_lock); > > + > > + mutex_lock(&encl->lock); > > + > > + /* Bookeeping */ > > What this comment tells that is non-obvious? Just trying to keep with the style of the rest of the function. I would describe this section as less obvious than the EBLOCK, ETRACK and EWB sections. > This is now worse than it was. I would rather apply the original patch (with > that code flow change removed). Can you elaborate as to how this is worse than holding the generic enclave lock for the entire flow? I can do more in-depth analysis, but my initial testing showed that not holding encl->lock during eviction significantly reduced enclave startup time due to not blocking sgx_add_page_worker.
On Thu, Dec 01, 2016 at 03:46:11PM +0000, Christopherson, Sean J wrote: > Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote: > > > +311,30 @@ static void sgx_write_pages(struct sgx_encl *encl, struct > > > list_head *src) smp_call_function(sgx_ipi_cb, NULL, 1); > > > BUG_ON(sgx_ewb(encl, entry, pages[i])); > > > } > > > - encl->secs_child_cnt--; > > > } > > > + sgx_put_backing(pages[i++], vma); > > > + } > > > + > > > + mutex_unlock(&encl->eviction_lock); > > > + > > > + mutex_lock(&encl->lock); > > > + > > > + /* Bookeeping */ > > > > What this comment tells that is non-obvious? > > Just trying to keep with the style of the rest of the function. I would describe this section as less obvious than the EBLOCK, ETRACK and EWB sections. > > > > This is now worse than it was. I would rather apply the original patch (with > > that code flow change removed). > > Can you elaborate as to how this is worse than holding the generic > enclave lock for the entire flow? I can do more in-depth analysis, > but my initial testing showed that not holding encl->lock during > eviction significantly reduced enclave startup time due to not > blocking sgx_add_page_worker. I do not want a bug fix and potential performance optimization mixed into together. Even if this was considered it is now a mess. The bug fix does not require adding eviction_lock. So even if I would accept that optimization I never would accept this patch. /Jarkko
On Thu, Dec 01, 2016 at 09:55:24PM +0200, Jarkko Sakkinen wrote: > On Thu, Dec 01, 2016 at 03:46:11PM +0000, Christopherson, Sean J wrote: > > Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote: > > > > +311,30 @@ static void sgx_write_pages(struct sgx_encl *encl, struct > > > > list_head *src) smp_call_function(sgx_ipi_cb, NULL, 1); > > > > BUG_ON(sgx_ewb(encl, entry, pages[i])); > > > > } > > > > - encl->secs_child_cnt--; > > > > } > > > > + sgx_put_backing(pages[i++], vma); > > > > + } > > > > + > > > > + mutex_unlock(&encl->eviction_lock); > > > > + > > > > + mutex_lock(&encl->lock); > > > > + > > > > + /* Bookeeping */ > > > > > > What this comment tells that is non-obvious? > > > > Just trying to keep with the style of the rest of the function. I would describe this section as less obvious than the EBLOCK, ETRACK and EWB sections. > > > > > > > This is now worse than it was. I would rather apply the original patch (with > > > that code flow change removed). > > > > Can you elaborate as to how this is worse than holding the generic > > enclave lock for the entire flow? I can do more in-depth analysis, > > but my initial testing showed that not holding encl->lock during > > eviction significantly reduced enclave startup time due to not > > blocking sgx_add_page_worker. > > I do not want a bug fix and potential performance optimization mixed > into together. Even if this was considered it is now a mess. The bug > fix does not require adding eviction_lock. So even if I would accept > that optimization I never would accept this patch. I'm happy to review the patch after all those fixes in queue have been merged. In order to do that I need tested/reviewed-by's (or review feedback if there's still some problem) to those patches that have me as the author because I cannot review my own patches. /Jarkko
diff --git a/drivers/platform/x86/intel_sgx.h b/drivers/platform/x86/intel_sgx.h index 464763d..d837789 100644 --- a/drivers/platform/x86/intel_sgx.h +++ b/drivers/platform/x86/intel_sgx.h @@ -137,6 +137,7 @@ struct sgx_encl { unsigned int secs_child_cnt; unsigned int vma_cnt; struct mutex lock; + struct mutex eviction_lock; struct task_struct *owner; struct mm_struct *mm; struct file *backing; diff --git a/drivers/platform/x86/intel_sgx_ioctl.c b/drivers/platform/x86/intel_sgx_ioctl.c index d54c410..72fe9aa 100644 --- a/drivers/platform/x86/intel_sgx_ioctl.c +++ b/drivers/platform/x86/intel_sgx_ioctl.c @@ -522,6 +522,7 @@ static long sgx_ioc_enclave_create(struct file *filep, unsigned int cmd, INIT_LIST_HEAD(&encl->load_list); INIT_LIST_HEAD(&encl->encl_list); mutex_init(&encl->lock); + mutex_init(&encl->eviction_lock); INIT_WORK(&encl->add_page_work, sgx_add_page_worker); encl->owner = current->group_leader; diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c index 8b1cc82..ce60b76 100644 --- a/drivers/platform/x86/intel_sgx_page_cache.c +++ b/drivers/platform/x86/intel_sgx_page_cache.c @@ -224,9 +224,9 @@ static int sgx_ewb(struct sgx_encl *encl, return ret; } -void sgx_free_encl_page(struct sgx_encl_page *entry, - struct sgx_encl *encl, - unsigned int flags) +static void sgx_free_encl_page(struct sgx_encl_page *entry, + struct sgx_encl *encl, + unsigned int flags) { sgx_free_page(entry->epc_page, encl, flags); entry->epc_page = NULL; @@ -238,7 +238,7 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) struct sgx_encl_page *entry; struct sgx_encl_page *tmp; struct page *pages[SGX_NR_SWAP_CLUSTER_MAX + 1]; - struct vm_area_struct *evma; + struct vm_area_struct *vma; int cnt = 0; int i = 0; int ret; @@ -261,13 +261,15 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) return; } - /* EBLOCK */ + mutex_lock(&encl->eviction_lock); + /* EBLOCK */ list_for_each_entry_safe(entry, tmp, src, load_list) { - mutex_lock(&encl->lock); - evma = sgx_find_vma(encl, entry->addr); - if (!evma) { + vma = sgx_find_vma(encl, entry->addr); + if (!vma) { list_del(&entry->load_list); + + mutex_lock(&encl->lock); sgx_free_encl_page(entry, encl, 0); mutex_unlock(&encl->lock); continue; @@ -276,36 +278,32 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) pages[cnt] = sgx_get_backing(encl, entry); if (IS_ERR(pages[cnt])) { list_del(&entry->load_list); + + mutex_lock(&encl->lock); list_add_tail(&entry->load_list, &encl->load_list); entry->flags &= ~SGX_ENCL_PAGE_RESERVED; mutex_unlock(&encl->lock); continue; } - zap_vma_ptes(evma, entry->addr, PAGE_SIZE); + zap_vma_ptes(vma, entry->addr, PAGE_SIZE); sgx_eblock(entry->epc_page); cnt++; - mutex_unlock(&encl->lock); } - /* ETRACK */ + if (!cnt) { + mutex_unlock(&encl->eviction_lock); + goto out; + } - mutex_lock(&encl->lock); + /* ETRACK */ sgx_etrack(encl->secs_page.epc_page); - mutex_unlock(&encl->lock); /* EWB */ - - mutex_lock(&encl->lock); i = 0; - - while (!list_empty(src)) { - entry = list_first_entry(src, struct sgx_encl_page, - load_list); - list_del(&entry->load_list); - - evma = sgx_find_vma(encl, entry->addr); - if (evma) { + list_for_each_entry(entry, src, load_list) { + vma = sgx_find_vma(encl, entry->addr); + if (vma) { ret = sgx_ewb(encl, entry, pages[i]); BUG_ON(ret != 0 && ret != SGX_NOT_TRACKED); /* Only kick out threads with an IPI if needed. */ @@ -313,15 +311,30 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) smp_call_function(sgx_ipi_cb, NULL, 1); BUG_ON(sgx_ewb(encl, entry, pages[i])); } - encl->secs_child_cnt--; } + sgx_put_backing(pages[i++], vma); + } + + mutex_unlock(&encl->eviction_lock); + + mutex_lock(&encl->lock); + + /* Bookeeping */ + while (!list_empty(src)) { + entry = list_first_entry(src, struct sgx_encl_page, + load_list); + list_del(&entry->load_list); sgx_free_encl_page(entry, encl, - evma ? SGX_FREE_SKIP_EREMOVE : 0); - sgx_put_backing(pages[i++], evma); + vma ? SGX_FREE_SKIP_EREMOVE : 0); } - /* Allow SECS page eviction only when the encl is initialized. */ + encl->secs_child_cnt -= cnt; + + /* Allow SECS page eviction only when the encl is initialized. + * The eviction lock does not need to be held to evict the SECS, + * conflict is impossible as there are no other pages to evict. + */ if (!encl->secs_child_cnt && (encl->flags & SGX_ENCL_INITIALIZED)) { pages[cnt] = sgx_get_backing(encl, &encl->secs_page); @@ -336,9 +349,9 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src) sgx_put_backing(pages[cnt], true); } } - mutex_unlock(&encl->lock); +out: sgx_unpin_mm(encl); }
Add an eviction specific, per-enclave lock so that the lock can be held for the entire EPC eviction sequence without having to hold the overall per-enclave lock for an extended duration. Holding an enclave's overall lock prevents any other driver operations on the enclave, e.g. faulting a page into the enclave, and evicting 16 EPC pages takes upwards of half a millisecond. Holding a lock for the entire EBLOCK->ETRACK->EWB sequence is necessary to make the sequence atomic with respect to other evictions on the same enclave. The SGX architecture does not allow multiple ETRACK sequences to be in flight at the same time if the enclave being tracked has one or more active threads in the enclave. A second ETRACK will fail due to PREV_TRK_INCMPL if it is attempted before EWB is executed on all blocked pages (in the presence of active enclave threads). Currently the driver releases the enclave's lock after ETRACK and then reacquires the lock prior to EWB. Releasing the lock from the thread that performed ETRACK, T1, allows a different thread, T2, to acquire the lock for its own ETRACK. T2's ETRACK will fail due to the architectural restrictions if a third thread, T3, is executing inside the enclave whose pages are being swapped. T3: <active in enclave E1> T1: lock_mutex(E1) T1: ETRACK(E1) T1: unlock_mutex(E1) T2: lock_mutex(E1) T2: ETRACK(E1) fails Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> --- drivers/platform/x86/intel_sgx.h | 1 + drivers/platform/x86/intel_sgx_ioctl.c | 1 + drivers/platform/x86/intel_sgx_page_cache.c | 69 +++++++++++++++++------------ 3 files changed, 43 insertions(+), 28 deletions(-)