@@ -344,6 +344,7 @@ struct sgx_encl;
struct sgx_epc_page {
resource_size_t pa;
struct list_head list;
+ struct sgx_encl *encl;
struct sgx_encl_page *encl_page;
};
@@ -133,7 +133,6 @@ struct sgx_encl {
struct mm_struct *mm;
struct file *backing;
struct file *pcmd;
- struct list_head load_list;
struct kref refcount;
unsigned long base;
unsigned long size;
@@ -219,6 +218,10 @@ int ksgxswapd(void *p);
int sgx_add_epc_bank(resource_size_t start, unsigned long size);
int sgx_page_cache_init(void);
void sgx_page_cache_teardown(void);
+void sgx_activate_page(struct sgx_epc_page *epc_page,
+ struct sgx_encl *encl,
+ struct sgx_encl_page *encl_page);
+void sgx_deactivate_page(struct sgx_epc_page *epc_page);
struct sgx_epc_page *sgx_alloc_page(unsigned int flags);
int sgx_free_page(struct sgx_epc_page *entry, struct sgx_encl *encl);
void *sgx_get_page(struct sgx_epc_page *entry);
@@ -267,10 +267,8 @@ static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
goto out;
}
- epc_page->encl_page = encl_page;
encl_page->epc_page = epc_page;
- sgx_test_and_clear_young(encl_page, encl);
- list_add_tail(&epc_page->list, &encl->load_list);
+ sgx_activate_page(epc_page, encl, encl_page);
mutex_unlock(&encl->lock);
up_read(&encl->mm->mmap_sem);
@@ -467,7 +465,6 @@ static long sgx_ioc_enclave_create(struct file *filep, unsigned int cmd,
INIT_LIST_HEAD(&encl->add_page_reqs);
INIT_LIST_HEAD(&encl->va_pages);
INIT_RADIX_TREE(&encl->page_tree, GFP_KERNEL);
- INIT_LIST_HEAD(&encl->load_list);
INIT_LIST_HEAD(&encl->encl_list);
mutex_init(&encl->lock);
INIT_WORK(&encl->add_page_work, sgx_add_page_worker);
@@ -71,6 +71,8 @@
static LIST_HEAD(sgx_free_list);
static DEFINE_SPINLOCK(sgx_free_list_lock);
+static LIST_HEAD(sgx_active_list);
+static DEFINE_SPINLOCK(sgx_active_list_lock);
LIST_HEAD(sgx_tgid_ctx_list);
DEFINE_MUTEX(sgx_tgid_ctx_mutex);
@@ -117,108 +119,51 @@ int sgx_test_and_clear_young(struct sgx_encl_page *page, struct sgx_encl *encl)
sgx_test_and_clear_young_cb, vma->vm_mm);
}
-static struct sgx_tgid_ctx *sgx_isolate_tgid_ctx(unsigned long nr_to_scan)
+void sgx_activate_page(struct sgx_epc_page *epc_page,
+ struct sgx_encl *encl,
+ struct sgx_encl_page *encl_page)
{
- struct sgx_tgid_ctx *ctx = NULL;
- int i;
-
- mutex_lock(&sgx_tgid_ctx_mutex);
-
- if (list_empty(&sgx_tgid_ctx_list)) {
- mutex_unlock(&sgx_tgid_ctx_mutex);
- return NULL;
- }
-
- for (i = 0; i < nr_to_scan; i++) {
- /* Peek TGID context from the head. */
- ctx = list_first_entry(&sgx_tgid_ctx_list,
- struct sgx_tgid_ctx,
- list);
-
- /* Move to the tail so that we do not encounter it in the
- * next iteration.
- */
- list_move_tail(&ctx->list, &sgx_tgid_ctx_list);
-
- /* Non-empty TGID context? */
- if (!list_empty(&ctx->encl_list) &&
- kref_get_unless_zero(&ctx->refcount))
- break;
-
- ctx = NULL;
- }
+ epc_page->encl = encl;
+ epc_page->encl_page = encl_page;
- mutex_unlock(&sgx_tgid_ctx_mutex);
+ sgx_test_and_clear_young(encl_page, encl);
- return ctx;
+ spin_lock(&sgx_active_list_lock);
+ list_add_tail(&epc_page->list, &sgx_active_list);
+ spin_unlock(&sgx_active_list_lock);
}
-static struct sgx_encl *sgx_isolate_encl(struct sgx_tgid_ctx *ctx,
- unsigned long nr_to_scan)
+void sgx_deactivate_page(struct sgx_epc_page *epc_page)
{
- struct sgx_encl *encl = NULL;
- int i;
-
- mutex_lock(&sgx_tgid_ctx_mutex);
-
- if (list_empty(&ctx->encl_list)) {
- mutex_unlock(&sgx_tgid_ctx_mutex);
- return NULL;
- }
-
- for (i = 0; i < nr_to_scan; i++) {
- /* Peek encl from the head. */
- encl = list_first_entry(&ctx->encl_list, struct sgx_encl,
- encl_list);
-
- /* Move to the tail so that we do not encounter it in the
- * next iteration.
- */
- list_move_tail(&encl->encl_list, &ctx->encl_list);
-
- /* Enclave with faulted pages? */
- if (!list_empty(&encl->load_list) &&
- kref_get_unless_zero(&encl->refcount))
- break;
-
- encl = NULL;
- }
-
- mutex_unlock(&sgx_tgid_ctx_mutex);
-
- return encl;
+ spin_lock(&sgx_active_list_lock);
+ list_del_init(&epc_page->list);
+ spin_unlock(&sgx_active_list_lock);
}
-static void sgx_isolate_pages(struct sgx_encl *encl,
- struct list_head *dst,
+static void sgx_isolate_pages(struct list_head *dst,
unsigned long nr_to_scan)
{
+ unsigned long i;
struct sgx_epc_page *entry;
- int i;
- mutex_lock(&encl->lock);
-
- if (encl->flags & SGX_ENCL_DEAD)
- goto out;
+ spin_lock(&sgx_active_list_lock);
for (i = 0; i < nr_to_scan; i++) {
- if (list_empty(&encl->load_list))
+ if (list_empty(&sgx_active_list))
break;
- entry = list_first_entry(&encl->load_list,
+ entry = list_first_entry(&sgx_active_list,
struct sgx_epc_page,
list);
- if (!sgx_test_and_clear_young(entry->encl_page, encl) &&
- !(entry->encl_page->flags & SGX_ENCL_PAGE_RESERVED)) {
- entry->encl_page->flags |= SGX_ENCL_PAGE_RESERVED;
+ if ((entry->encl->flags & SGX_ENCL_DEAD) ||
+ !kref_get_unless_zero(&entry->encl->refcount))
+ list_del_init(&entry->list);
+ else
list_move_tail(&entry->list, dst);
- } else {
- list_move_tail(&entry->list, &encl->load_list);
- }
}
-out:
- mutex_unlock(&encl->lock);
+
+ spin_unlock(&sgx_active_list_lock);
}
static int __sgx_ewb(struct sgx_encl *encl,
@@ -311,16 +256,11 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
if (list_empty(src))
return;
- entry = list_first_entry(src, struct sgx_epc_page, list);
-
- mutex_lock(&encl->lock);
-
/* EBLOCK */
list_for_each_entry_safe(entry, tmp, src, list) {
vma = sgx_find_vma(encl, entry->encl_page->addr);
- if (vma) {
+ if (vma)
zap_vma_ptes(vma, entry->encl_page->addr, PAGE_SIZE);
- }
sgx_eblock(encl, entry);
}
@@ -331,7 +271,7 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
/* EWB */
while (!list_empty(src)) {
entry = list_first_entry(src, struct sgx_epc_page, list);
- list_del(&entry->list);
+ list_del_init(&entry->list);
sgx_evict_page(entry->encl_page, encl);
encl->secs_child_cnt--;
}
@@ -340,32 +280,94 @@ static void sgx_write_pages(struct sgx_encl *encl, struct list_head *src)
sgx_evict_page(&encl->secs_page, encl);
encl->flags |= SGX_ENCL_SECS_EVICTED;
}
+}
+
+static inline void sgx_age_pages(struct list_head *swap, struct list_head *skip)
+{
+ struct sgx_epc_page *entry, *tmp;
+
+ if (list_empty(swap))
+ return;
+
+ list_for_each_entry_safe(entry, tmp, swap, list) {
+ if (sgx_test_and_clear_young(entry->encl_page, entry->encl))
+ list_move_tail(&entry->list, skip);
+ }
+}
+
+static inline void sgx_reserve_pages(struct list_head *swap, struct list_head *skip)
+{
+ struct sgx_epc_page *entry, *tmp;
- mutex_unlock(&encl->lock);
+ if (list_empty(swap))
+ return;
+
+ list_for_each_entry_safe(entry, tmp, swap, list) {
+ if (entry->encl_page->flags & SGX_ENCL_PAGE_RESERVED)
+ list_move_tail(&entry->list, skip);
+ else
+ entry->encl_page->flags |= SGX_ENCL_PAGE_RESERVED;
+ }
+}
+
+static inline void sgx_del_if_dead(struct sgx_encl *encl,
+ struct list_head *swap,
+ struct list_head *skip)
+{
+ if (encl->flags & SGX_ENCL_DEAD) {
+ list_del_init(swap);
+ list_del_init(skip);
+ }
}
static void sgx_swap_pages(unsigned long nr_to_scan)
{
- struct sgx_tgid_ctx *ctx;
+ struct sgx_epc_page *entry, *tmp;
struct sgx_encl *encl;
- LIST_HEAD(cluster);
- ctx = sgx_isolate_tgid_ctx(nr_to_scan);
- if (!ctx)
- return;
+ LIST_HEAD(iso);
+ LIST_HEAD(swap);
+ LIST_HEAD(skip);
- encl = sgx_isolate_encl(ctx, nr_to_scan);
- if (!encl)
- goto out;
+ sgx_isolate_pages(&iso, nr_to_scan);
- down_read(&encl->mm->mmap_sem);
- sgx_isolate_pages(encl, &cluster, nr_to_scan);
- sgx_write_pages(encl, &cluster);
- up_read(&encl->mm->mmap_sem);
+ while (!list_empty(&iso)) {
+ encl = list_first_entry(&iso, struct sgx_epc_page, list)->encl;
+ kref_get(&encl->refcount);
- kref_put(&encl->refcount, sgx_encl_release);
-out:
- kref_put(&ctx->refcount, sgx_tgid_ctx_release);
+ list_for_each_entry_safe(entry, tmp, &iso, list) {
+ if (entry->encl != encl)
+ continue;
+
+ kref_put(&encl->refcount, sgx_encl_release);
+ list_move_tail(&entry->list, &swap);
+ }
+
+ down_read(&encl->mm->mmap_sem);
+
+ sgx_del_if_dead(encl, &swap, &skip);
+ sgx_age_pages(&swap, &skip);
+
+ if (!list_empty(&swap)) {
+ mutex_lock(&encl->lock);
+
+ sgx_del_if_dead(encl, &swap, &skip);
+ sgx_reserve_pages(&swap, &skip);
+ sgx_write_pages(encl, &swap);
+
+ mutex_unlock(&encl->lock);
+ }
+
+ up_read(&encl->mm->mmap_sem);
+
+ if (!list_empty(&skip)) {
+ spin_lock(&sgx_active_list_lock);
+ list_splice_tail_init(&skip, &sgx_active_list);
+ spin_unlock(&sgx_active_list_lock);
+ }
+
+ kref_put(&encl->refcount, sgx_encl_release);
+ }
}
int ksgxswapd(void *p)
@@ -447,7 +449,7 @@ static struct sgx_epc_page *sgx_alloc_page_fast(void)
if (!list_empty(&sgx_free_list)) {
entry = list_first_entry(&sgx_free_list, struct sgx_epc_page,
list);
- list_del(&entry->list);
+ list_del_init(&entry->list);
sgx_nr_free_pages--;
}
@@ -531,6 +533,9 @@ int sgx_free_page(struct sgx_epc_page *entry, struct sgx_encl *encl)
return ret;
}
+ entry->encl = NULL;
+ entry->encl_page = NULL;
+
spin_lock(&sgx_free_list_lock);
list_add(&entry->list, &sgx_free_list);
sgx_nr_free_pages++;
@@ -330,7 +330,6 @@ static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
*/
encl->secs_child_cnt++;
- epc_page->encl_page = entry;
entry->epc_page = epc_page;
if (reserve)
@@ -338,7 +337,6 @@ static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
/* Do not free */
epc_page = NULL;
- list_add_tail(&entry->epc_page->list, &encl->load_list);
rc = vm_insert_pfn(vma, entry->addr, PFN_DOWN(entry->epc_page->pa));
if (rc) {
@@ -350,7 +348,7 @@ static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
goto out;
}
- sgx_test_and_clear_young(entry, encl);
+ sgx_activate_page(entry->epc_page, encl, entry);
out:
mutex_unlock(&encl->lock);
if (epc_page)
@@ -395,7 +393,7 @@ void sgx_encl_release(struct kref *ref)
radix_tree_for_each_slot(slot, &encl->page_tree, &iter, 0) {
entry = *slot;
if (entry->epc_page) {
- list_del(&entry->epc_page->list);
+ sgx_deactivate_page(entry->epc_page);
sgx_free_page(entry->epc_page, encl);
}
radix_tree_delete(&encl->page_tree, entry->addr >> PAGE_SHIFT);
Remove struct sgx_encl's load_list and instead track loaded EPC pages via a shared list. A shared list provides fairer and more accurate LRU-based swapping, as determining a page's LRU status is no longer dependent on the number of TGID contexts or enclaves, i.e. the time between calls to age a given page is purely a function of the number of EPC pages in use system wide, as opposed to the current approach in which the time between calls may vary depending on the number of contexts (processes) and enclaves. For example, given enclave "A", running in a process with multiple enclaves, and enclave "B", running in a process with one enclave, when isolating per-process and then per-enclave, pages in "A" will be aged less often than pages from "B", regardless of the number of pages used by "A" and "B". Processes can also abuse this behavior by spawning dummy enclaves to further reduce the frequency of aging for a single enclave's pages. Enclave agnostic swapping also allows for maintaining multiple LRU lists that are not tied to a context/enclave, e.g. per-cgroup LRUs. Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> --- arch/x86/include/asm/sgx.h | 1 + drivers/platform/x86/intel_sgx/sgx.h | 5 +- drivers/platform/x86/intel_sgx/sgx_ioctl.c | 5 +- drivers/platform/x86/intel_sgx/sgx_page_cache.c | 215 ++++++++++++------------ drivers/platform/x86/intel_sgx/sgx_util.c | 6 +- 5 files changed, 118 insertions(+), 114 deletions(-)