@@ -13,18 +13,17 @@
struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
int sgx_nr_epc_sections;
-unsigned long sgx_nr_free_pages;
static struct sgx_epc_page *__sgx_try_alloc_page(struct sgx_epc_section *section)
{
struct sgx_epc_page *page;
- if (list_empty(§ion->page_list))
+ if (!section->free_cnt)
return NULL;
page = list_first_entry(§ion->page_list, struct sgx_epc_page, list);
list_del_init(&page->list);
- sgx_nr_free_pages--;
+ section->free_cnt--;
return page;
}
@@ -97,7 +96,7 @@ struct sgx_epc_page *sgx_alloc_page(void *owner, bool reclaim)
schedule();
}
- if (sgx_nr_free_pages < SGX_NR_LOW_PAGES)
+ if (!sgx_at_least_N_free_pages(SGX_NR_LOW_PAGES))
wake_up(&ksgxswapd_waitq);
return entry;
@@ -131,7 +130,7 @@ void __sgx_free_page(struct sgx_epc_page *page)
spin_lock(§ion->lock);
list_add_tail(&page->list, §ion->page_list);
- sgx_nr_free_pages++;
+ section->free_cnt++;
spin_unlock(§ion->lock);
}
@@ -218,7 +217,7 @@ static bool __init sgx_alloc_epc_section(u64 addr, u64 size,
list_add_tail(&page->list, §ion->unsanitized_page_list);
}
- sgx_nr_free_pages += nr_pages;
+ section->free_cnt = nr_pages;
return true;
@@ -68,7 +68,7 @@ static void sgx_sanitize_section(struct sgx_epc_section *section)
static inline bool sgx_should_reclaim(void)
{
- return sgx_nr_free_pages < SGX_NR_HIGH_PAGES &&
+ return !sgx_at_least_N_free_pages(SGX_NR_HIGH_PAGES) &&
!list_empty(&sgx_active_page_list);
}
@@ -430,7 +430,7 @@ void sgx_reclaim_pages(void)
section = sgx_epc_section(epc_page);
spin_lock(§ion->lock);
list_add_tail(&epc_page->list, §ion->page_list);
- sgx_nr_free_pages++;
+ section->free_cnt++;
spin_unlock(§ion->lock);
}
}
@@ -30,6 +30,7 @@ struct sgx_epc_page {
struct sgx_epc_section {
unsigned long pa;
void *va;
+ unsigned long free_cnt;
struct list_head page_list;
struct list_head unsanitized_page_list;
spinlock_t lock;
@@ -73,12 +74,27 @@ static inline void *sgx_epc_addr(struct sgx_epc_page *page)
#define SGX_NR_HIGH_PAGES 64
extern int sgx_nr_epc_sections;
-extern unsigned long sgx_nr_free_pages;
extern struct task_struct *ksgxswapd_tsk;
extern struct wait_queue_head(ksgxswapd_waitq);
extern struct list_head sgx_active_page_list;
extern spinlock_t sgx_active_page_list_lock;
+static inline bool sgx_at_least_N_free_pages(unsigned long threshold)
+{
+ struct sgx_epc_section *section;
+ unsigned long free_cnt = 0;
+ int i;
+
+ for (i = 0; i < sgx_nr_epc_sections; i++) {
+ section = &sgx_epc_sections[i];
+ free_cnt += section->free_cnt;
+ if (free_cnt >= threshold)
+ return true;
+ }
+
+ return false;
+}
+
bool __init sgx_page_reclaimer_init(void);
void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
void sgx_reclaim_pages(void);
Track the free page count on a per EPC section basis so that the value is properly protected by the section's spinlock. As was pointed out when the change was proposed[*], using a global non-atomic counter to track the number of free EPC pages is not safe. The order of non-atomic reads and writes are not guaranteed, i.e. concurrent RMW operats can write stale data. This causes a variety of bad behavior, e.g. livelocks because the free page count wraps and causes the swap thread to stop reclaiming. Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> --- arch/x86/kernel/cpu/sgx/main.c | 11 +++++------ arch/x86/kernel/cpu/sgx/reclaim.c | 4 ++-- arch/x86/kernel/cpu/sgx/sgx.h | 18 +++++++++++++++++- 3 files changed, 24 insertions(+), 9 deletions(-)