diff mbox

[intel-sgx-kernel-dev,RFC,03/12] intel_sgx: swap pages using common active/lru list

Message ID 1497461858-20309-4-git-send-email-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sean Christopherson June 14, 2017, 5:37 p.m. UTC
Remove struct sgx_encl's load_list and instead track loaded EPC pages
via a shared list.  A shared list provides fairer and more accurate
LRU-based swapping, as determining a page's LRU status is no longer
dependent on the number of TGID contexts or enclaves, i.e. the time
between calls to age a given page is purely a function of the number
of EPC pages in use system wide, as opposed to the current approach
in which the time between calls may vary depending on the number of
contexts (processes) and enclaves.

For example, given enclave "A", running in a process with multiple
enclaves, and enclave "B", running in a process with one enclave,
when isolating per-process and then per-enclave, pages in "A" will
be aged less often than pages from "B", regardless of the number of
pages used by "A" and "B".  Processes can also abuse this behavior
by spawning dummy enclaves to further reduce the frequency of aging
for a single enclave's pages.

Enclave agnostic swapping also allows for maintaining multiple LRU
lists that are not tied to a context/enclave, e.g. per-cgroup LRUs.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/include/asm/sgx.h                      |   1 +
 drivers/platform/x86/intel_sgx/sgx.h            |   5 +-
 drivers/platform/x86/intel_sgx/sgx_ioctl.c      |   5 +-
 drivers/platform/x86/intel_sgx/sgx_page_cache.c | 215 ++++++++++++------------
 drivers/platform/x86/intel_sgx/sgx_util.c       |   6 +-
 5 files changed, 118 insertions(+), 114 deletions(-)
diff mbox

Patch

diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index e1f27fd..97119b8 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -344,6 +344,7 @@  struct sgx_encl;
 struct sgx_epc_page {
 	resource_size_t	pa;
 	struct list_head list;
+	struct sgx_encl *encl;
 	struct sgx_encl_page *encl_page;
 };
 
diff --git a/drivers/platform/x86/intel_sgx/sgx.h b/drivers/platform/x86/intel_sgx/sgx.h
index 4c18f9f..93c1730 100644
--- a/drivers/platform/x86/intel_sgx/sgx.h
+++ b/drivers/platform/x86/intel_sgx/sgx.h
@@ -133,7 +133,6 @@  struct sgx_encl {
 	struct mm_struct *mm;
 	struct file *backing;
 	struct file *pcmd;
-	struct list_head load_list;
 	struct kref refcount;
 	unsigned long base;
 	unsigned long size;
@@ -219,6 +218,10 @@  int ksgxswapd(void *p);
 int sgx_add_epc_bank(resource_size_t start, unsigned long size);
 int sgx_page_cache_init(void);
 void sgx_page_cache_teardown(void);
+void sgx_activate_page(struct sgx_epc_page *epc_page,
+		       struct sgx_encl *encl,
+		       struct sgx_encl_page *encl_page);
+void sgx_deactivate_page(struct sgx_epc_page *epc_page);
 struct sgx_epc_page *sgx_alloc_page(unsigned int flags);
 int sgx_free_page(struct sgx_epc_page *entry, struct sgx_encl *encl);
 void *sgx_get_page(struct sgx_epc_page *entry);
diff --git a/drivers/platform/x86/intel_sgx/sgx_ioctl.c b/drivers/platform/x86/intel_sgx/sgx_ioctl.c
index 0741e6c..17999c6 100644
--- a/drivers/platform/x86/intel_sgx/sgx_ioctl.c
+++ b/drivers/platform/x86/intel_sgx/sgx_ioctl.c
@@ -267,10 +267,8 @@  static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
 		goto out;
 	}
 
-	epc_page->encl_page = encl_page;
 	encl_page->epc_page = epc_page;
-	sgx_test_and_clear_young(encl_page, encl);
-	list_add_tail(&epc_page->list, &encl->load_list);
+	sgx_activate_page(epc_page, encl, encl_page);
 
 	mutex_unlock(&encl->lock);
 	up_read(&encl->mm->mmap_sem);
@@ -467,7 +465,6 @@  static long sgx_ioc_enclave_create(struct file *filep, unsigned int cmd,
 	INIT_LIST_HEAD(&encl->add_page_reqs);
 	INIT_LIST_HEAD(&encl->va_pages);
 	INIT_RADIX_TREE(&encl->page_tree, GFP_KERNEL);
-	INIT_LIST_HEAD(&encl->load_list);
 	INIT_LIST_HEAD(&encl->encl_list);
 	mutex_init(&encl->lock);
 	INIT_WORK(&encl->add_page_work, sgx_add_page_worker);
diff --git a/drivers/platform/x86/intel_sgx/sgx_page_cache.c b/drivers/platform/x86/intel_sgx/sgx_page_cache.c
index 0829ee0..d47e4e6 100644
--- a/drivers/platform/x86/intel_sgx/sgx_page_cache.c
+++ b/drivers/platform/x86/intel_sgx/sgx_page_cache.c
@@ -71,6 +71,8 @@ 
 
 static LIST_HEAD(sgx_free_list);
 static DEFINE_SPINLOCK(sgx_free_list_lock);
+static LIST_HEAD(sgx_active_list);
+static DEFINE_SPINLOCK(sgx_active_list_lock);
 
 LIST_HEAD(sgx_tgid_ctx_list);
 DEFINE_MUTEX(sgx_tgid_ctx_mutex);
@@ -117,108 +119,51 @@  int sgx_test_and_clear_young(struct sgx_encl_page *page, struct sgx_encl *encl)
 				   sgx_test_and_clear_young_cb, vma->vm_mm);
 }
 
-static struct sgx_tgid_ctx *sgx_isolate_tgid_ctx(unsigned long nr_to_scan)
+void sgx_activate_page(struct sgx_epc_page *epc_page,
+		       struct sgx_encl *encl,
+		       struct sgx_encl_page *encl_page)
 {
-	struct sgx_tgid_ctx *ctx = NULL;
-	int i;
-
-	mutex_lock(&sgx_tgid_ctx_mutex);
-
-	if (list_empty(&sgx_tgid_ctx_list)) {
-		mutex_unlock(&sgx_tgid_ctx_mutex);
-		return NULL;
-	}
-
-	for (i = 0; i < nr_to_scan; i++) {
-		/* Peek TGID context from the head. */
-		ctx = list_first_entry(&sgx_tgid_ctx_list,
-				       struct sgx_tgid_ctx,
-				       list);
-
-		/* Move to the tail so that we do not encounter it in the
-		 * next iteration.
-		 */
-		list_move_tail(&ctx->list, &sgx_tgid_ctx_list);
-
-		/* Non-empty TGID context? */
-		if (!list_empty(&ctx->encl_list) &&
-		    kref_get_unless_zero(&ctx->refcount))
-			break;
-
-		ctx = NULL;
-	}
+	epc_page->encl = encl;
+	epc_page->encl_page = encl_page;
 
-	mutex_unlock(&sgx_tgid_ctx_mutex);
+	sgx_test_and_clear_young(encl_page, encl);
 
-	return ctx;
+	spin_lock(&sgx_active_list_lock);
+	list_add_tail(&epc_page->list, &sgx_active_list);
+	spin_unlock(&sgx_active_list_lock);
 }
 
-static struct sgx_encl *sgx_isolate_encl(struct sgx_tgid_ctx *ctx,
-					       unsigned long nr_to_scan)
+void sgx_deactivate_page(struct sgx_epc_page *epc_page)
 {
-	struct sgx_encl *encl = NULL;
-	int i;
-
-	mutex_lock(&sgx_tgid_ctx_mutex);
-
-	if (list_empty(&ctx->encl_list)) {
-		mutex_unlock(&sgx_tgid_ctx_mutex);
-		return NULL;
-	}
-
-	for (i = 0; i < nr_to_scan; i++) {
-		/* Peek encl from the head. */
-		encl = list_first_entry(&ctx->encl_list, struct sgx_encl,
-					encl_list);
-
-		/* Move to the tail so that we do not encounter it in the
-		 * next iteration.
-		 */
-		list_move_tail(&encl->encl_list, &ctx->encl_list);
-
-		/* Enclave with faulted pages?  */
-		if (!list_empty(&encl->load_list) &&
-		    kref_get_unless_zero(&encl->refcount))
-			break;
-
-		encl = NULL;
-	}
-
-	mutex_unlock(&sgx_tgid_ctx_mutex);
-
-	return encl;
+	spin_lock(&sgx_active_list_lock);
+	list_del_init(&epc_page->list);
+	spin_unlock(&sgx_active_list_lock);
 }
 
-static void sgx_isolate_pages(struct sgx_encl *encl,
-			      struct list_head *dst,
+static void sgx_isolate_pages(struct list_head *dst,
 			      unsigned long nr_to_scan)
 {
+	unsigned long i;
 	struct sgx_epc_page *entry;
-	int i;
 
-	mutex_lock(&encl->lock);
-
-	if (encl->flags & SGX_ENCL_DEAD)
-		goto out;
+	spin_lock(&sgx_active_list_lock);
 
 	for (i = 0; i < nr_to_scan; i++) {
-		if (list_empty(&encl->load_list))
+		if (list_empty(&sgx_active_list))
 			break;
 
-		entry = list_first_entry(&encl->load_list,
+		entry = list_first_entry(&sgx_active_list,
 					 struct sgx_epc_page,
 					 list);
 
-		if (!sgx_test_and_clear_young(entry->encl_page, encl) &&
-		    !(entry->encl_page->flags & SGX_ENCL_PAGE_RESERVED)) {
-			entry->encl_page->flags |= SGX_ENCL_PAGE_RESERVED;
+		if ((entry->encl->flags & SGX_ENCL_DEAD) ||
+		    !kref_get_unless_zero(&entry->encl->refcount))
+			list_del_init(&entry->list);
+		else
 			list_move_tail(&entry->list, dst);
-		} else {
-			list_move_tail(&entry->list, &encl->load_list);
-		}
 	}
-out:
-	mutex_unlock(&encl->lock);
+
+	spin_unlock(&sgx_active_list_lock);
 }
 
 static int __sgx_ewb(struct sgx_encl *encl,
@@ -311,16 +256,11 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 	if (list_empty(src))
 		return;
 
-	entry = list_first_entry(src, struct sgx_epc_page, list);
-
-	mutex_lock(&encl->lock);
-
 	/* EBLOCK */
 	list_for_each_entry_safe(entry, tmp, src, list) {
 		vma = sgx_find_vma(encl, entry->encl_page->addr);
-		if (vma) {
+		if (vma)
 			zap_vma_ptes(vma, entry->encl_page->addr, PAGE_SIZE);
-		}
 
 		sgx_eblock(encl, entry);
 	}
@@ -331,7 +271,7 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 	/* EWB */
 	while (!list_empty(src)) {
 		entry = list_first_entry(src, struct sgx_epc_page, list);
-		list_del(&entry->list);
+		list_del_init(&entry->list);
 		sgx_evict_page(entry->encl_page, encl);
 		encl->secs_child_cnt--;
 	}
@@ -340,32 +280,94 @@  static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
 		sgx_evict_page(&encl->secs_page, encl);
 		encl->flags |= SGX_ENCL_SECS_EVICTED;
 	}
+}
+
+static inline void sgx_age_pages(struct list_head *swap, struct list_head *skip)
+{
+	struct sgx_epc_page *entry, *tmp;
+
+	if (list_empty(swap))
+		return;
+
+	list_for_each_entry_safe(entry, tmp, swap, list) {
+		if (sgx_test_and_clear_young(entry->encl_page, entry->encl))
+			list_move_tail(&entry->list, skip);
+	}
+}
+
+static inline void sgx_reserve_pages(struct list_head *swap, struct list_head *skip)
+{
+	struct sgx_epc_page *entry, *tmp;
 
-	mutex_unlock(&encl->lock);
+	if (list_empty(swap))
+		return;
+
+	list_for_each_entry_safe(entry, tmp, swap, list) {
+		if (entry->encl_page->flags & SGX_ENCL_PAGE_RESERVED)
+			list_move_tail(&entry->list, skip);
+		else
+			entry->encl_page->flags |= SGX_ENCL_PAGE_RESERVED;
+	}
+}
+
+static inline void sgx_del_if_dead(struct sgx_encl *encl,
+				   struct list_head *swap,
+				   struct list_head *skip)
+{
+	if (encl->flags & SGX_ENCL_DEAD) {
+		list_del_init(swap);
+		list_del_init(skip);
+	}
 }
 
 static void sgx_swap_pages(unsigned long nr_to_scan)
 {
-	struct sgx_tgid_ctx *ctx;
+	struct sgx_epc_page *entry, *tmp;
 	struct sgx_encl *encl;
-	LIST_HEAD(cluster);
 
-	ctx = sgx_isolate_tgid_ctx(nr_to_scan);
-	if (!ctx)
-		return;
+	LIST_HEAD(iso);
+	LIST_HEAD(swap);
+	LIST_HEAD(skip);
 
-	encl = sgx_isolate_encl(ctx, nr_to_scan);
-	if (!encl)
-		goto out;
+	sgx_isolate_pages(&iso, nr_to_scan);
 
-	down_read(&encl->mm->mmap_sem);
-	sgx_isolate_pages(encl, &cluster, nr_to_scan);
-	sgx_write_pages(encl, &cluster);
-	up_read(&encl->mm->mmap_sem);
+	while (!list_empty(&iso)) {
+		encl = list_first_entry(&iso, struct sgx_epc_page, list)->encl;
+		kref_get(&encl->refcount);
 
-	kref_put(&encl->refcount, sgx_encl_release);
-out:
-	kref_put(&ctx->refcount, sgx_tgid_ctx_release);
+		list_for_each_entry_safe(entry, tmp, &iso, list) {
+			if (entry->encl != encl)
+				continue;
+
+			kref_put(&encl->refcount, sgx_encl_release);
+			list_move_tail(&entry->list, &swap);
+		}
+
+		down_read(&encl->mm->mmap_sem);
+
+		sgx_del_if_dead(encl, &swap, &skip);
+		sgx_age_pages(&swap, &skip);
+
+		if (!list_empty(&swap)) {
+			mutex_lock(&encl->lock);
+
+			sgx_del_if_dead(encl, &swap, &skip);
+			sgx_reserve_pages(&swap, &skip);
+			sgx_write_pages(encl, &swap);
+
+			mutex_unlock(&encl->lock);
+		}
+
+		up_read(&encl->mm->mmap_sem);
+
+		if (!list_empty(&skip)) {
+			spin_lock(&sgx_active_list_lock);
+			list_splice_tail_init(&skip, &sgx_active_list);
+			spin_unlock(&sgx_active_list_lock);
+		}
+
+		kref_put(&encl->refcount, sgx_encl_release);
+	}
 }
 
 int ksgxswapd(void *p)
@@ -447,7 +449,7 @@  static struct sgx_epc_page *sgx_alloc_page_fast(void)
 	if (!list_empty(&sgx_free_list)) {
 		entry = list_first_entry(&sgx_free_list, struct sgx_epc_page,
 					 list);
-		list_del(&entry->list);
+		list_del_init(&entry->list);
 		sgx_nr_free_pages--;
 	}
 
@@ -531,6 +533,9 @@  int sgx_free_page(struct sgx_epc_page *entry, struct sgx_encl *encl)
 		return ret;
 	}
 
+	entry->encl = NULL;
+	entry->encl_page = NULL;
+
 	spin_lock(&sgx_free_list_lock);
 	list_add(&entry->list, &sgx_free_list);
 	sgx_nr_free_pages++;
diff --git a/drivers/platform/x86/intel_sgx/sgx_util.c b/drivers/platform/x86/intel_sgx/sgx_util.c
index d6d96b4..94efee8 100644
--- a/drivers/platform/x86/intel_sgx/sgx_util.c
+++ b/drivers/platform/x86/intel_sgx/sgx_util.c
@@ -330,7 +330,6 @@  static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
 	 */
 	encl->secs_child_cnt++;
 
-	epc_page->encl_page = entry;
 	entry->epc_page = epc_page;
 
 	if (reserve)
@@ -338,7 +337,6 @@  static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
 
 	/* Do not free */
 	epc_page = NULL;
-	list_add_tail(&entry->epc_page->list, &encl->load_list);
 
 	rc = vm_insert_pfn(vma, entry->addr, PFN_DOWN(entry->epc_page->pa));
 	if (rc) {
@@ -350,7 +348,7 @@  static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
 		goto out;
 	}
 
-	sgx_test_and_clear_young(entry, encl);
+	sgx_activate_page(entry->epc_page, encl, entry);
 out:
 	mutex_unlock(&encl->lock);
 	if (epc_page)
@@ -395,7 +393,7 @@  void sgx_encl_release(struct kref *ref)
 	radix_tree_for_each_slot(slot, &encl->page_tree, &iter, 0) {
 		entry = *slot;
 		if (entry->epc_page) {
-			list_del(&entry->epc_page->list);
+			sgx_deactivate_page(entry->epc_page);
 			sgx_free_page(entry->epc_page, encl);
 		}
 		radix_tree_delete(&encl->page_tree, entry->addr >> PAGE_SHIFT);