[intel-sgx-kernel-dev,v5,8/8] intel_sgx: add LRU algorithm to page swapping
diff mbox

Message ID 20161202200018.25552-9-jarkko.sakkinen@linux.intel.com
State New
Headers show

Commit Message

Jarkko Sakkinen Dec. 2, 2016, 8 p.m. UTC
From: Sean Christopherson <sean.j.christopherson@intel.com>

Test and clear the A bit of an EPC page when isolating pages during EPC
page swap.  Move accessed pages to the end of the load list instead of
the eviction list, i.e. isolate only those pages that have not been
accessed since the last time the swapping flow was run.

This basic LRU algorithm yields a significant improvement in throughput
when the system is under a heavy EPC pressure.

This patch is based on the code originally written Serge Ayoun for the
out-of-tree driver with a bug fix that the EPC pages are moved always to
the end of the list if they have been lately accessed. For unknown
reason a version of out-of-tree driver slipped into the Github, which
this regression.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/platform/x86/intel_sgx.h            |  2 +-
 drivers/platform/x86/intel_sgx_ioctl.c      |  1 +
 drivers/platform/x86/intel_sgx_page_cache.c | 54 ++++++++++++++++++++---------
 drivers/platform/x86/intel_sgx_vma.c        |  1 +
 4 files changed, 41 insertions(+), 17 deletions(-)

Comments

Jarkko Sakkinen Dec. 2, 2016, 10:48 p.m. UTC | #1
On Fri, Dec 02, 2016 at 10:00:18PM +0200, Jarkko Sakkinen wrote:
> From: Sean Christopherson <sean.j.christopherson@intel.com>
> 
> Test and clear the A bit of an EPC page when isolating pages during EPC
> page swap.  Move accessed pages to the end of the load list instead of
> the eviction list, i.e. isolate only those pages that have not been
> accessed since the last time the swapping flow was run.
> 
> This basic LRU algorithm yields a significant improvement in throughput
> when the system is under a heavy EPC pressure.
> 
> This patch is based on the code originally written Serge Ayoun for the
> out-of-tree driver with a bug fix that the EPC pages are moved always to
> the end of the list if they have been lately accessed. For unknown
> reason a version of out-of-tree driver slipped into the Github, which
> this regression.
> 
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> ---
>  drivers/platform/x86/intel_sgx.h            |  2 +-
>  drivers/platform/x86/intel_sgx_ioctl.c      |  1 +
>  drivers/platform/x86/intel_sgx_page_cache.c | 54 ++++++++++++++++++++---------
>  drivers/platform/x86/intel_sgx_vma.c        |  1 +
>  4 files changed, 41 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/platform/x86/intel_sgx.h b/drivers/platform/x86/intel_sgx.h
> index fc52a6f..a5e54f8 100644
> --- a/drivers/platform/x86/intel_sgx.h
> +++ b/drivers/platform/x86/intel_sgx.h
> @@ -193,7 +193,7 @@ long sgx_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
>  #endif
>  
>  /* Utility functions */
> -
> +int sgx_test_and_clear_young(struct sgx_encl_page *page, struct sgx_encl *encl);
>  void *sgx_get_epc_page(struct sgx_epc_page *entry);
>  void sgx_put_epc_page(void *epc_page_vaddr);
>  struct page *sgx_get_backing(struct sgx_encl *encl,
> diff --git a/drivers/platform/x86/intel_sgx_ioctl.c b/drivers/platform/x86/intel_sgx_ioctl.c
> index 3bceebd..363aa68 100644
> --- a/drivers/platform/x86/intel_sgx_ioctl.c
> +++ b/drivers/platform/x86/intel_sgx_ioctl.c
> @@ -296,6 +296,7 @@ static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
>  	}
>  
>  	encl_page->epc_page = epc_page;
> +	sgx_test_and_clear_young(encl_page, encl);
>  	list_add_tail(&encl_page->load_list, &encl->load_list);
>  
>  	mutex_unlock(&encl->lock);
> diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c
> index c4865d2..3c89f3a 100644
> --- a/drivers/platform/x86/intel_sgx_page_cache.c
> +++ b/drivers/platform/x86/intel_sgx_page_cache.c
> @@ -77,6 +77,37 @@ static unsigned int sgx_nr_high_pages;
>  struct task_struct *ksgxswapd_tsk;
>  static DECLARE_WAIT_QUEUE_HEAD(ksgxswapd_waitq);
>  
> +
> +static int sgx_test_and_clear_young_cb(pte_t *ptep, pgtable_t token,
> +				       unsigned long addr, void *data)
> +{
> +	int ret = pte_young(*ptep);
> +	if (ret) {
> +		pte_t pte = pte_mkold(*ptep);
> +		set_pte_at((struct mm_struct *) data, addr, ptep, pte);
> +	}
> +	return ret;
> +}
> +
> +/**
> + * sgx_test_and_clear_young() - Test and reset the accessed bit
> + * @page:	enclave EPC page to be tested for recent access
> + * @encl:	enclave which owns @page
> + *
> + * Checks the Access (A) bit from the PTE corresponding to the
> + * enclave page and clears it.  Returns 1 if the page has been
> + * recently accessed and 0 if not.
> + */
> +int sgx_test_and_clear_young(struct sgx_encl_page *page, struct sgx_encl *encl)
> +{
> +	struct vm_area_struct *vma = sgx_find_vma(encl, page->addr);
> +	if (!vma)
> +		return 0;
> +
> +	return apply_to_page_range(vma->vm_mm, page->addr, PAGE_SIZE,
> +				   sgx_test_and_clear_young_cb, vma->vm_mm);
> +}
> +
>  static struct sgx_tgid_ctx *sgx_isolate_tgid_ctx(unsigned long nr_to_scan)
>  {
>  	struct sgx_tgid_ctx *ctx = NULL;
> @@ -166,7 +197,8 @@ static void sgx_isolate_pages(struct sgx_encl *encl,
>  					 struct sgx_encl_page,
>  					 load_list);
>  
> -		if (!(entry->flags & SGX_ENCL_PAGE_RESERVED)) {
> +		if (!sgx_test_and_clear_young(entry, encl) &&
> +		    !(entry->flags & SGX_ENCL_PAGE_RESERVED)) {
>  			entry->flags |= SGX_ENCL_PAGE_RESERVED;
>  			list_move_tail(&entry->load_list, dst);
>  		} else {
> @@ -248,19 +280,6 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
>  
>  	entry = list_first_entry(src, struct sgx_encl_page, load_list);
>  
> -	if (!sgx_pin_mm(encl)) {
> -		while (!list_empty(src)) {
> -			entry = list_first_entry(src, struct sgx_encl_page,
> -						 load_list);
> -			list_del(&entry->load_list);
> -			mutex_lock(&encl->lock);
> -			sgx_free_encl_page(entry, encl, 0);
> -			mutex_unlock(&encl->lock);
> -		}
> -
> -		return;
> -	}
> -

Whoops this is still needed. Sorry about this. I'll revise for v6.

/Jarkko
Jarkko Sakkinen Dec. 2, 2016, 10:50 p.m. UTC | #2
On Sat, Dec 03, 2016 at 12:48:39AM +0200, Jarkko Sakkinen wrote:
> On Fri, Dec 02, 2016 at 10:00:18PM +0200, Jarkko Sakkinen wrote:
> > From: Sean Christopherson <sean.j.christopherson@intel.com>
> > 
> > Test and clear the A bit of an EPC page when isolating pages during EPC
> > page swap.  Move accessed pages to the end of the load list instead of
> > the eviction list, i.e. isolate only those pages that have not been
> > accessed since the last time the swapping flow was run.
> > 
> > This basic LRU algorithm yields a significant improvement in throughput
> > when the system is under a heavy EPC pressure.
> > 
> > This patch is based on the code originally written Serge Ayoun for the
> > out-of-tree driver with a bug fix that the EPC pages are moved always to
> > the end of the list if they have been lately accessed. For unknown
> > reason a version of out-of-tree driver slipped into the Github, which
> > this regression.
> > 
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> > ---
> >  drivers/platform/x86/intel_sgx.h            |  2 +-
> >  drivers/platform/x86/intel_sgx_ioctl.c      |  1 +
> >  drivers/platform/x86/intel_sgx_page_cache.c | 54 ++++++++++++++++++++---------
> >  drivers/platform/x86/intel_sgx_vma.c        |  1 +
> >  4 files changed, 41 insertions(+), 17 deletions(-)
> > 
> > diff --git a/drivers/platform/x86/intel_sgx.h b/drivers/platform/x86/intel_sgx.h
> > index fc52a6f..a5e54f8 100644
> > --- a/drivers/platform/x86/intel_sgx.h
> > +++ b/drivers/platform/x86/intel_sgx.h
> > @@ -193,7 +193,7 @@ long sgx_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
> >  #endif
> >  
> >  /* Utility functions */
> > -
> > +int sgx_test_and_clear_young(struct sgx_encl_page *page, struct sgx_encl *encl);
> >  void *sgx_get_epc_page(struct sgx_epc_page *entry);
> >  void sgx_put_epc_page(void *epc_page_vaddr);
> >  struct page *sgx_get_backing(struct sgx_encl *encl,
> > diff --git a/drivers/platform/x86/intel_sgx_ioctl.c b/drivers/platform/x86/intel_sgx_ioctl.c
> > index 3bceebd..363aa68 100644
> > --- a/drivers/platform/x86/intel_sgx_ioctl.c
> > +++ b/drivers/platform/x86/intel_sgx_ioctl.c
> > @@ -296,6 +296,7 @@ static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
> >  	}
> >  
> >  	encl_page->epc_page = epc_page;
> > +	sgx_test_and_clear_young(encl_page, encl);
> >  	list_add_tail(&encl_page->load_list, &encl->load_list);
> >  
> >  	mutex_unlock(&encl->lock);
> > diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c
> > index c4865d2..3c89f3a 100644
> > --- a/drivers/platform/x86/intel_sgx_page_cache.c
> > +++ b/drivers/platform/x86/intel_sgx_page_cache.c
> > @@ -77,6 +77,37 @@ static unsigned int sgx_nr_high_pages;
> >  struct task_struct *ksgxswapd_tsk;
> >  static DECLARE_WAIT_QUEUE_HEAD(ksgxswapd_waitq);
> >  
> > +
> > +static int sgx_test_and_clear_young_cb(pte_t *ptep, pgtable_t token,
> > +				       unsigned long addr, void *data)
> > +{
> > +	int ret = pte_young(*ptep);
> > +	if (ret) {
> > +		pte_t pte = pte_mkold(*ptep);
> > +		set_pte_at((struct mm_struct *) data, addr, ptep, pte);
> > +	}
> > +	return ret;
> > +}
> > +
> > +/**
> > + * sgx_test_and_clear_young() - Test and reset the accessed bit
> > + * @page:	enclave EPC page to be tested for recent access
> > + * @encl:	enclave which owns @page
> > + *
> > + * Checks the Access (A) bit from the PTE corresponding to the
> > + * enclave page and clears it.  Returns 1 if the page has been
> > + * recently accessed and 0 if not.
> > + */
> > +int sgx_test_and_clear_young(struct sgx_encl_page *page, struct sgx_encl *encl)
> > +{
> > +	struct vm_area_struct *vma = sgx_find_vma(encl, page->addr);
> > +	if (!vma)
> > +		return 0;
> > +
> > +	return apply_to_page_range(vma->vm_mm, page->addr, PAGE_SIZE,
> > +				   sgx_test_and_clear_young_cb, vma->vm_mm);
> > +}
> > +
> >  static struct sgx_tgid_ctx *sgx_isolate_tgid_ctx(unsigned long nr_to_scan)
> >  {
> >  	struct sgx_tgid_ctx *ctx = NULL;
> > @@ -166,7 +197,8 @@ static void sgx_isolate_pages(struct sgx_encl *encl,
> >  					 struct sgx_encl_page,
> >  					 load_list);
> >  
> > -		if (!(entry->flags & SGX_ENCL_PAGE_RESERVED)) {
> > +		if (!sgx_test_and_clear_young(entry, encl) &&
> > +		    !(entry->flags & SGX_ENCL_PAGE_RESERVED)) {
> >  			entry->flags |= SGX_ENCL_PAGE_RESERVED;
> >  			list_move_tail(&entry->load_list, dst);
> >  		} else {
> > @@ -248,19 +280,6 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
> >  
> >  	entry = list_first_entry(src, struct sgx_encl_page, load_list);
> >  
> > -	if (!sgx_pin_mm(encl)) {
> > -		while (!list_empty(src)) {
> > -			entry = list_first_entry(src, struct sgx_encl_page,
> > -						 load_list);
> > -			list_del(&entry->load_list);
> > -			mutex_lock(&encl->lock);
> > -			sgx_free_encl_page(entry, encl, 0);
> > -			mutex_unlock(&encl->lock);
> > -		}
> > -
> > -		return;
> > -	}
> > -
> 
> Whoops this is still needed. Sorry about this. I'll revise for v6.

Actually really isn't but I need this as helper function for making
better recovery for failing EWB in order to get rid of BUG_ON(), which
almost strictly denied in the mainline.

/Jarkko

Patch
diff mbox

diff --git a/drivers/platform/x86/intel_sgx.h b/drivers/platform/x86/intel_sgx.h
index fc52a6f..a5e54f8 100644
--- a/drivers/platform/x86/intel_sgx.h
+++ b/drivers/platform/x86/intel_sgx.h
@@ -193,7 +193,7 @@  long sgx_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 #endif
 
 /* Utility functions */
-
+int sgx_test_and_clear_young(struct sgx_encl_page *page, struct sgx_encl *encl);
 void *sgx_get_epc_page(struct sgx_epc_page *entry);
 void sgx_put_epc_page(void *epc_page_vaddr);
 struct page *sgx_get_backing(struct sgx_encl *encl,
diff --git a/drivers/platform/x86/intel_sgx_ioctl.c b/drivers/platform/x86/intel_sgx_ioctl.c
index 3bceebd..363aa68 100644
--- a/drivers/platform/x86/intel_sgx_ioctl.c
+++ b/drivers/platform/x86/intel_sgx_ioctl.c
@@ -296,6 +296,7 @@  static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
 	}
 
 	encl_page->epc_page = epc_page;
+	sgx_test_and_clear_young(encl_page, encl);
 	list_add_tail(&encl_page->load_list, &encl->load_list);
 
 	mutex_unlock(&encl->lock);
diff --git a/drivers/platform/x86/intel_sgx_page_cache.c b/drivers/platform/x86/intel_sgx_page_cache.c
index c4865d2..3c89f3a 100644
--- a/drivers/platform/x86/intel_sgx_page_cache.c
+++ b/drivers/platform/x86/intel_sgx_page_cache.c
@@ -77,6 +77,37 @@  static unsigned int sgx_nr_high_pages;
 struct task_struct *ksgxswapd_tsk;
 static DECLARE_WAIT_QUEUE_HEAD(ksgxswapd_waitq);
 
+
+static int sgx_test_and_clear_young_cb(pte_t *ptep, pgtable_t token,
+				       unsigned long addr, void *data)
+{
+	int ret = pte_young(*ptep);
+	if (ret) {
+		pte_t pte = pte_mkold(*ptep);
+		set_pte_at((struct mm_struct *) data, addr, ptep, pte);
+	}
+	return ret;
+}
+
+/**
+ * sgx_test_and_clear_young() - Test and reset the accessed bit
+ * @page:	enclave EPC page to be tested for recent access
+ * @encl:	enclave which owns @page
+ *
+ * Checks the Access (A) bit from the PTE corresponding to the
+ * enclave page and clears it.  Returns 1 if the page has been
+ * recently accessed and 0 if not.
+ */
+int sgx_test_and_clear_young(struct sgx_encl_page *page, struct sgx_encl *encl)
+{
+	struct vm_area_struct *vma = sgx_find_vma(encl, page->addr);
+	if (!vma)
+		return 0;
+
+	return apply_to_page_range(vma->vm_mm, page->addr, PAGE_SIZE,
+				   sgx_test_and_clear_young_cb, vma->vm_mm);
+}
+
 static struct sgx_tgid_ctx *sgx_isolate_tgid_ctx(unsigned long nr_to_scan)
 {
 	struct sgx_tgid_ctx *ctx = NULL;
@@ -166,7 +197,8 @@  static void sgx_isolate_pages(struct sgx_encl *encl,
 					 struct sgx_encl_page,
 					 load_list);
 
-		if (!(entry->flags & SGX_ENCL_PAGE_RESERVED)) {
+		if (!sgx_test_and_clear_young(entry, encl) &&
+		    !(entry->flags & SGX_ENCL_PAGE_RESERVED)) {
 			entry->flags |= SGX_ENCL_PAGE_RESERVED;
 			list_move_tail(&entry->load_list, dst);
 		} else {
@@ -248,19 +280,6 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 
 	entry = list_first_entry(src, struct sgx_encl_page, load_list);
 
-	if (!sgx_pin_mm(encl)) {
-		while (!list_empty(src)) {
-			entry = list_first_entry(src, struct sgx_encl_page,
-						 load_list);
-			list_del(&entry->load_list);
-			mutex_lock(&encl->lock);
-			sgx_free_encl_page(entry, encl, 0);
-			mutex_unlock(&encl->lock);
-		}
-
-		return;
-	}
-
 	mutex_lock(&encl->lock);
 
 	/* EBLOCK */
@@ -330,8 +349,6 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 	}
 
 	mutex_unlock(&encl->lock);
-
-	sgx_unpin_mm(encl);
 }
 
 static void sgx_swap_pages(unsigned long nr_to_scan)
@@ -348,9 +365,14 @@  static void sgx_swap_pages(unsigned long nr_to_scan)
 	if (!encl)
 		goto out;
 
+	if (!sgx_pin_mm(encl))
+		goto out_enclave;
+
 	sgx_isolate_pages(encl, &cluster, nr_to_scan);
 	sgx_write_pages(encl, &cluster);
+	sgx_unpin_mm(encl);
 
+out_enclave:
 	kref_put(&encl->refcount, sgx_encl_release);
 out:
 	kref_put(&ctx->refcount, sgx_tgid_ctx_release);
diff --git a/drivers/platform/x86/intel_sgx_vma.c b/drivers/platform/x86/intel_sgx_vma.c
index 3ea200c..1349d73 100644
--- a/drivers/platform/x86/intel_sgx_vma.c
+++ b/drivers/platform/x86/intel_sgx_vma.c
@@ -256,6 +256,7 @@  static struct sgx_encl_page *sgx_vma_do_fault(struct vm_area_struct *vma,
 	/* Do not free */
 	epc_page = NULL;
 
+	sgx_test_and_clear_young(entry, encl);
 	list_add_tail(&entry->load_list, &encl->load_list);
 out:
 	mutex_unlock(&encl->lock);