[intel-sgx-kernel-dev] intel_sgx: handle kthread_run failure during PM resume
diff mbox

Message ID 1500574115-7166-1-git-send-email-sean.j.christopherson@intel.com
State New
Headers show

Commit Message

Sean Christopherson July 20, 2017, 6:08 p.m. UTC
Add utilities for starting/stopping the swap kthread, and ensure that
ksgxswapd_tsk is either NULL or valid.  Attempt to run the swap thread
if it is not running when when sgx_alloc_page wants to wake it.  Return
sgx_run_swap_kthread's error if both it and sgx_alloc_page_fast fail so
the application sees the more critical error, e.g. -ENOMEM vs -EBUSY.

Previously, sgx_pm_resume didn't check the return of kthread_run, which
can fail with -ENOMEM.  Upon failure, SGX applications would hang due
to the lack of a kernel thread to perform EPC swapping, and the kernel
would crash on the next system suspend due to passing an error pointer
to kthread_stop.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 drivers/platform/x86/intel_sgx/sgx.h            |  4 +--
 drivers/platform/x86/intel_sgx/sgx_main.c       |  6 ++---
 drivers/platform/x86/intel_sgx/sgx_page_cache.c | 34 ++++++++++++++++++++-----
 3 files changed, 31 insertions(+), 13 deletions(-)

Comments

Jarkko Sakkinen Aug. 1, 2017, 1:02 p.m. UTC | #1
On Thu, Jul 20, 2017 at 11:08:35AM -0700, Sean Christopherson wrote:
> Add utilities for starting/stopping the swap kthread, and ensure that

These utility weaken the readability of the code more than help
reuse of anything non-trivial. You h

> ksgxswapd_tsk is either NULL or valid.  Attempt to run the swap thread
> if it is not running when when sgx_alloc_page wants to wake it.  Return
> sgx_run_swap_kthread's error if both it and sgx_alloc_page_fast fail so
> the application sees the more critical error, e.g. -ENOMEM vs -EBUSY.
> 
> Previously, sgx_pm_resume didn't check the return of kthread_run, which
> can fail with -ENOMEM.  Upon failure, SGX applications would hang due
> to the lack of a kernel thread to perform EPC swapping, and the kernel
> would crash on the next system suspend due to passing an error pointer
> to kthread_stop.

This other (separate) change makes sense.

> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>

/Jarkko

Patch
diff mbox

diff --git a/drivers/platform/x86/intel_sgx/sgx.h b/drivers/platform/x86/intel_sgx/sgx.h
index b5b21d4..b0fd1c8 100644
--- a/drivers/platform/x86/intel_sgx/sgx.h
+++ b/drivers/platform/x86/intel_sgx/sgx.h
@@ -215,9 +215,9 @@  void sgx_tgid_ctx_release(struct kref *ref);
 
 extern struct mutex sgx_tgid_ctx_mutex;
 extern struct list_head sgx_tgid_ctx_list;
-extern struct task_struct *ksgxswapd_tsk;
 
-int ksgxswapd(void *p);
+int sgx_run_swap_kthread(void);
+void sgx_stop_swap_kthread(void);
 int sgx_add_epc_bank(resource_size_t start, unsigned long size);
 int sgx_page_cache_init(void);
 void sgx_page_cache_teardown(void);
diff --git a/drivers/platform/x86/intel_sgx/sgx_main.c b/drivers/platform/x86/intel_sgx/sgx_main.c
index 8124d02..be012b7 100644
--- a/drivers/platform/x86/intel_sgx/sgx_main.c
+++ b/drivers/platform/x86/intel_sgx/sgx_main.c
@@ -163,8 +163,7 @@  static int sgx_pm_suspend(struct device *dev)
 	struct sgx_tgid_ctx *ctx;
 	struct sgx_encl *encl;
 
-	kthread_stop(ksgxswapd_tsk);
-	ksgxswapd_tsk = NULL;
+	sgx_stop_swap_kthread();
 
 	list_for_each_entry(ctx, &sgx_tgid_ctx_list, list) {
 		list_for_each_entry(encl, &ctx->encl_list, encl_list) {
@@ -179,8 +178,7 @@  static int sgx_pm_suspend(struct device *dev)
 
 static int sgx_pm_resume(struct device *dev)
 {
-	ksgxswapd_tsk = kthread_run(ksgxswapd, NULL, "kswapd");
-	return 0;
+	return sgx_run_swap_kthread();
 }
 
 static SIMPLE_DEV_PM_OPS(sgx_drv_pm, sgx_pm_suspend, sgx_pm_resume);
diff --git a/drivers/platform/x86/intel_sgx/sgx_page_cache.c b/drivers/platform/x86/intel_sgx/sgx_page_cache.c
index 356f2c2..cc4875e 100644
--- a/drivers/platform/x86/intel_sgx/sgx_page_cache.c
+++ b/drivers/platform/x86/intel_sgx/sgx_page_cache.c
@@ -78,7 +78,7 @@  static unsigned int sgx_nr_total_epc_pages;
 static unsigned int sgx_nr_free_pages;
 static unsigned int sgx_nr_low_pages = SGX_NR_LOW_EPC_PAGES_DEFAULT;
 static unsigned int sgx_nr_high_pages;
-struct task_struct *ksgxswapd_tsk;
+static struct task_struct *ksgxswapd_tsk;
 static DECLARE_WAIT_QUEUE_HEAD(ksgxswapd_waitq);
 
 
@@ -368,7 +368,7 @@  static void sgx_swap_pages(unsigned long nr_to_scan)
 	kref_put(&ctx->refcount, sgx_tgid_ctx_release);
 }
 
-int ksgxswapd(void *p)
+static int ksgxswapd(void *p)
 {
 	while (!kthread_should_stop()) {
 		wait_event_interruptible(ksgxswapd_waitq,
@@ -383,6 +383,22 @@  int ksgxswapd(void *p)
 	return 0;
 }
 
+int sgx_run_swap_kthread(void)
+{
+	struct task_struct *tmp = kthread_run(ksgxswapd, NULL, "ksgxswapd");
+	if (!IS_ERR(tmp))
+		ksgxswapd_tsk = tmp;
+	return PTR_ERR_OR_ZERO(tmp);
+}
+
+void sgx_stop_swap_kthread(void)
+{
+	if (ksgxswapd_tsk) {
+		kthread_stop(ksgxswapd_tsk);
+		ksgxswapd_tsk = NULL;
+	}
+}
+
 int sgx_add_epc_bank(resource_size_t start, unsigned long size)
 {
 	unsigned long i;
@@ -417,8 +433,7 @@  int sgx_add_epc_bank(resource_size_t start, unsigned long size)
 int sgx_page_cache_init(void)
 {
 	sgx_nr_high_pages = 2 * sgx_nr_low_pages;
-	ksgxswapd_tsk = kthread_run(ksgxswapd, NULL, "ksgxswapd");
-	return PTR_ERR_OR_ZERO(ksgxswapd_tsk);
+	return sgx_run_swap_kthread();
 }
 
 void sgx_page_cache_teardown(void)
@@ -426,8 +441,7 @@  void sgx_page_cache_teardown(void)
 	struct sgx_epc_page *entry;
 	struct list_head *parser, *temp;
 
-	if (ksgxswapd_tsk)
-		kthread_stop(ksgxswapd_tsk);
+	sgx_stop_swap_kthread();
 
 	spin_lock(&sgx_free_list_lock);
 	list_for_each_safe(parser, temp, &sgx_free_list) {
@@ -491,8 +505,14 @@  struct sgx_epc_page *sgx_alloc_page(unsigned int flags)
 		schedule();
 	}
 
-	if (sgx_nr_free_pages < sgx_nr_low_pages)
+	if (sgx_nr_free_pages < sgx_nr_low_pages) {
+		if (!ksgxswapd_tsk) {
+			int ret = sgx_run_swap_kthread();
+			if (ret && IS_ERR(entry))
+				return ERR_PTR(ret);
+		}
 		wake_up(&ksgxswapd_waitq);
+	}
 
 	return entry;
 }