diff mbox series

[for_v23,v3,12/12] x86/sgx: Reinstate per EPC section free page counts

Message ID 20191016183745.8226-13-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series x86/sgx: Bug fixes for v23 | expand

Commit Message

Sean Christopherson Oct. 16, 2019, 6:37 p.m. UTC
Track the free page count on a per EPC section basis so that the value
is properly protected by the section's spinlock.

As was pointed out when the change was proposed[*], using a global
non-atomic counter to track the number of free EPC pages is not safe.
The order of non-atomic reads and writes are not guaranteed, i.e.
concurrent RMW operats can write stale data.  This causes a variety
of bad behavior, e.g. livelocks because the free page count wraps and
causes the swap thread to stop reclaiming.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kernel/cpu/sgx/main.c    | 11 +++++------
 arch/x86/kernel/cpu/sgx/reclaim.c |  4 ++--
 arch/x86/kernel/cpu/sgx/sgx.h     | 18 +++++++++++++++++-
 3 files changed, 24 insertions(+), 9 deletions(-)

Comments

Jarkko Sakkinen Oct. 18, 2019, 12:49 p.m. UTC | #1
On Wed, Oct 16, 2019 at 11:37:45AM -0700, Sean Christopherson wrote:
> Track the free page count on a per EPC section basis so that the value
> is properly protected by the section's spinlock.
> 
> As was pointed out when the change was proposed[*], using a global
> non-atomic counter to track the number of free EPC pages is not safe.
> The order of non-atomic reads and writes are not guaranteed, i.e.
> concurrent RMW operats can write stale data.  This causes a variety
> of bad behavior, e.g. livelocks because the free page count wraps and
> causes the swap thread to stop reclaiming.
> 
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>

What is the reason not change it just to atomic?

For debugging the global is useful because it could be exposed
as a sysfs file.

> ---
>  arch/x86/kernel/cpu/sgx/main.c    | 11 +++++------
>  arch/x86/kernel/cpu/sgx/reclaim.c |  4 ++--
>  arch/x86/kernel/cpu/sgx/sgx.h     | 18 +++++++++++++++++-
>  3 files changed, 24 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index 6311aef10ec4..efbb52e4ecad 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -13,18 +13,17 @@
>  
>  struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
>  int sgx_nr_epc_sections;
> -unsigned long sgx_nr_free_pages;
>  
>  static struct sgx_epc_page *__sgx_try_alloc_page(struct sgx_epc_section *section)
>  {
>  	struct sgx_epc_page *page;
>  
> -	if (list_empty(&section->page_list))
> +	if (!section->free_cnt)
>  		return NULL;

 Why this check needs to be changed?

>  
>  	page = list_first_entry(&section->page_list, struct sgx_epc_page, list);
>  	list_del_init(&page->list);
> -	sgx_nr_free_pages--;
> +	section->free_cnt--;
>  	return page;
>  }
>  
> @@ -97,7 +96,7 @@ struct sgx_epc_page *sgx_alloc_page(void *owner, bool reclaim)
>  		schedule();
>  	}
>  
> -	if (sgx_nr_free_pages < SGX_NR_LOW_PAGES)
> +	if (!sgx_at_least_N_free_pages(SGX_NR_LOW_PAGES))
>  		wake_up(&ksgxswapd_waitq);
>  
>  	return entry;
> @@ -131,7 +130,7 @@ void __sgx_free_page(struct sgx_epc_page *page)
>  
>  	spin_lock(&section->lock);
>  	list_add_tail(&page->list, &section->page_list);
> -	sgx_nr_free_pages++;
> +	section->free_cnt++;
>  	spin_unlock(&section->lock);
>  
>  }
> @@ -218,7 +217,7 @@ static bool __init sgx_alloc_epc_section(u64 addr, u64 size,
>  		list_add_tail(&page->list, &section->unsanitized_page_list);
>  	}
>  
> -	sgx_nr_free_pages += nr_pages;
> +	section->free_cnt = nr_pages;
>  
>  	return true;
>  
> diff --git a/arch/x86/kernel/cpu/sgx/reclaim.c b/arch/x86/kernel/cpu/sgx/reclaim.c
> index 3f183dd0e653..8619141f4bed 100644
> --- a/arch/x86/kernel/cpu/sgx/reclaim.c
> +++ b/arch/x86/kernel/cpu/sgx/reclaim.c
> @@ -68,7 +68,7 @@ static void sgx_sanitize_section(struct sgx_epc_section *section)
>  
>  static inline bool sgx_should_reclaim(void)
>  {
> -	return sgx_nr_free_pages < SGX_NR_HIGH_PAGES &&
> +	return !sgx_at_least_N_free_pages(SGX_NR_HIGH_PAGES) &&
>  	       !list_empty(&sgx_active_page_list);
>  }
>  
> @@ -430,7 +430,7 @@ void sgx_reclaim_pages(void)
>  		section = sgx_epc_section(epc_page);
>  		spin_lock(&section->lock);
>  		list_add_tail(&epc_page->list, &section->page_list);
> -		sgx_nr_free_pages++;
> +		section->free_cnt++;
>  		spin_unlock(&section->lock);
>  	}
>  }
> diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
> index 87e375e8c25e..c7f0277299f6 100644
> --- a/arch/x86/kernel/cpu/sgx/sgx.h
> +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> @@ -30,6 +30,7 @@ struct sgx_epc_page {
>  struct sgx_epc_section {
>  	unsigned long pa;
>  	void *va;
> +	unsigned long free_cnt;
>  	struct list_head page_list;
>  	struct list_head unsanitized_page_list;
>  	spinlock_t lock;
> @@ -73,12 +74,27 @@ static inline void *sgx_epc_addr(struct sgx_epc_page *page)
>  #define SGX_NR_HIGH_PAGES	64
>  
>  extern int sgx_nr_epc_sections;
> -extern unsigned long sgx_nr_free_pages;
>  extern struct task_struct *ksgxswapd_tsk;
>  extern struct wait_queue_head(ksgxswapd_waitq);
>  extern struct list_head sgx_active_page_list;
>  extern spinlock_t sgx_active_page_list_lock;
>  
> +static inline bool sgx_at_least_N_free_pages(unsigned long threshold)

There is an upper case letter in the function name and name is also
weird overally.

> +{
> +	struct sgx_epc_section *section;
> +	unsigned long free_cnt = 0;
> +	int i;
> +
> +	for (i = 0; i < sgx_nr_epc_sections; i++) {
> +		section = &sgx_epc_sections[i];
> +		free_cnt += section->free_cnt;
> +		if (free_cnt >= threshold)
> +			return true;
> +	}
> +
> +	return false;
> +}

The complexity does not pay here. Better to revert instead back to this
if required:

unsigned long sgx_calc_free_cnt(void)
{
	struct sgx_epc_section *section;
	unsigned long free_cnt = 0;
	int i;

	for (i = 0; i < sgx_nr_epc_sections; i++) {
		section = &sgx_epc_sections[i];
		free_cnt += section->free_cnt;
	}

	return free_cnt;
}

/Jarkko
Jarkko Sakkinen Oct. 18, 2019, 12:55 p.m. UTC | #2
On Fri, Oct 18, 2019 at 03:49:42PM +0300, Jarkko Sakkinen wrote:
> On Wed, Oct 16, 2019 at 11:37:45AM -0700, Sean Christopherson wrote:
> > Track the free page count on a per EPC section basis so that the value
> > is properly protected by the section's spinlock.
> > 
> > As was pointed out when the change was proposed[*], using a global
> > non-atomic counter to track the number of free EPC pages is not safe.
> > The order of non-atomic reads and writes are not guaranteed, i.e.
> > concurrent RMW operats can write stale data.  This causes a variety
> > of bad behavior, e.g. livelocks because the free page count wraps and
> > causes the swap thread to stop reclaiming.
> > 
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> 
> What is the reason not change it just to atomic?
> 
> For debugging the global is useful because it could be exposed
> as a sysfs file.

So the regression is that counter updates is read + store (was not btw
described in the commit message what the regression you are speaking
of, which means that I'm making a guess myself).

This means that changing the variable to atomic should be IMHO
sufficient.

/Jarkko
Sean Christopherson Oct. 18, 2019, 2:30 p.m. UTC | #3
On Fri, Oct 18, 2019 at 03:49:42PM +0300, Jarkko Sakkinen wrote:
> On Wed, Oct 16, 2019 at 11:37:45AM -0700, Sean Christopherson wrote:
> > Track the free page count on a per EPC section basis so that the value
> > is properly protected by the section's spinlock.
> > 
> > As was pointed out when the change was proposed[*], using a global
> > non-atomic counter to track the number of free EPC pages is not safe.
> > The order of non-atomic reads and writes are not guaranteed, i.e.
> > concurrent RMW operats can write stale data.  This causes a variety
> > of bad behavior, e.g. livelocks because the free page count wraps and
> > causes the swap thread to stop reclaiming.
> > 
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> 
> What is the reason not change it just to atomic?

The purpose of separate sections is to avoid bouncing locks and whatnot
across packages.  Adding a global atomic to the hotpath defeats that
purpose.

> For debugging the global is useful because it could be exposed
> as a sysfs file.

Can't the sysfs read helper aggregate the info?

> > ---
> >  arch/x86/kernel/cpu/sgx/main.c    | 11 +++++------
> >  arch/x86/kernel/cpu/sgx/reclaim.c |  4 ++--
> >  arch/x86/kernel/cpu/sgx/sgx.h     | 18 +++++++++++++++++-
> >  3 files changed, 24 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> > index 6311aef10ec4..efbb52e4ecad 100644
> > --- a/arch/x86/kernel/cpu/sgx/main.c
> > +++ b/arch/x86/kernel/cpu/sgx/main.c
> > @@ -13,18 +13,17 @@
> >  
> >  struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
> >  int sgx_nr_epc_sections;
> > -unsigned long sgx_nr_free_pages;
> >  
> >  static struct sgx_epc_page *__sgx_try_alloc_page(struct sgx_epc_section *section)
> >  {
> >  	struct sgx_epc_page *page;
> >  
> > -	if (list_empty(&section->page_list))
> > +	if (!section->free_cnt)
> >  		return NULL;
> 
>  Why this check needs to be changed?

I was reverting to the previous behavior.

> >  
> >  	page = list_first_entry(&section->page_list, struct sgx_epc_page, list);
> >  	list_del_init(&page->list);
> > -	sgx_nr_free_pages--;
> > +	section->free_cnt--;
> >  	return page;
> >  }
> >  
> > @@ -97,7 +96,7 @@ struct sgx_epc_page *sgx_alloc_page(void *owner, bool reclaim)
> >  		schedule();
> >  	}
> >  
> > -	if (sgx_nr_free_pages < SGX_NR_LOW_PAGES)
> > +	if (!sgx_at_least_N_free_pages(SGX_NR_LOW_PAGES))
> >  		wake_up(&ksgxswapd_waitq);
> >  
> >  	return entry;
> > @@ -131,7 +130,7 @@ void __sgx_free_page(struct sgx_epc_page *page)
> >  
> >  	spin_lock(&section->lock);
> >  	list_add_tail(&page->list, &section->page_list);
> > -	sgx_nr_free_pages++;
> > +	section->free_cnt++;
> >  	spin_unlock(&section->lock);
> >  
> >  }
> > @@ -218,7 +217,7 @@ static bool __init sgx_alloc_epc_section(u64 addr, u64 size,
> >  		list_add_tail(&page->list, &section->unsanitized_page_list);
> >  	}
> >  
> > -	sgx_nr_free_pages += nr_pages;
> > +	section->free_cnt = nr_pages;
> >  
> >  	return true;
> >  
> > diff --git a/arch/x86/kernel/cpu/sgx/reclaim.c b/arch/x86/kernel/cpu/sgx/reclaim.c
> > index 3f183dd0e653..8619141f4bed 100644
> > --- a/arch/x86/kernel/cpu/sgx/reclaim.c
> > +++ b/arch/x86/kernel/cpu/sgx/reclaim.c
> > @@ -68,7 +68,7 @@ static void sgx_sanitize_section(struct sgx_epc_section *section)
> >  
> >  static inline bool sgx_should_reclaim(void)
> >  {
> > -	return sgx_nr_free_pages < SGX_NR_HIGH_PAGES &&
> > +	return !sgx_at_least_N_free_pages(SGX_NR_HIGH_PAGES) &&
> >  	       !list_empty(&sgx_active_page_list);
> >  }
> >  
> > @@ -430,7 +430,7 @@ void sgx_reclaim_pages(void)
> >  		section = sgx_epc_section(epc_page);
> >  		spin_lock(&section->lock);
> >  		list_add_tail(&epc_page->list, &section->page_list);
> > -		sgx_nr_free_pages++;
> > +		section->free_cnt++;
> >  		spin_unlock(&section->lock);
> >  	}
> >  }
> > diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
> > index 87e375e8c25e..c7f0277299f6 100644
> > --- a/arch/x86/kernel/cpu/sgx/sgx.h
> > +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> > @@ -30,6 +30,7 @@ struct sgx_epc_page {
> >  struct sgx_epc_section {
> >  	unsigned long pa;
> >  	void *va;
> > +	unsigned long free_cnt;
> >  	struct list_head page_list;
> >  	struct list_head unsanitized_page_list;
> >  	spinlock_t lock;
> > @@ -73,12 +74,27 @@ static inline void *sgx_epc_addr(struct sgx_epc_page *page)
> >  #define SGX_NR_HIGH_PAGES	64
> >  
> >  extern int sgx_nr_epc_sections;
> > -extern unsigned long sgx_nr_free_pages;
> >  extern struct task_struct *ksgxswapd_tsk;
> >  extern struct wait_queue_head(ksgxswapd_waitq);
> >  extern struct list_head sgx_active_page_list;
> >  extern spinlock_t sgx_active_page_list_lock;
> >  
> > +static inline bool sgx_at_least_N_free_pages(unsigned long threshold)
> 
> There is an upper case letter in the function name and name is also
> weird overally.

Ya, not a fan of the name either.

> > +{
> > +	struct sgx_epc_section *section;
> > +	unsigned long free_cnt = 0;
> > +	int i;
> > +
> > +	for (i = 0; i < sgx_nr_epc_sections; i++) {
> > +		section = &sgx_epc_sections[i];
> > +		free_cnt += section->free_cnt;
> > +		if (free_cnt >= threshold)
> > +			return true;
> > +	}
> > +
> > +	return false;
> > +}
> 
> The complexity does not pay here. Better to revert instead back to this
> if required:

I'd prefer to optimize for the happy case, even if it's minor.  But I'm
fine going with the below code.

> unsigned long sgx_calc_free_cnt(void)
> {
> 	struct sgx_epc_section *section;
> 	unsigned long free_cnt = 0;
> 	int i;
> 
> 	for (i = 0; i < sgx_nr_epc_sections; i++) {
> 		section = &sgx_epc_sections[i];
> 		free_cnt += section->free_cnt;
> 	}
> 
> 	return free_cnt;
> }
> 
> /Jarkko
Jarkko Sakkinen Oct. 21, 2019, 11:19 a.m. UTC | #4
On Fri, Oct 18, 2019 at 07:30:57AM -0700, Sean Christopherson wrote:
> On Fri, Oct 18, 2019 at 03:49:42PM +0300, Jarkko Sakkinen wrote:
> > On Wed, Oct 16, 2019 at 11:37:45AM -0700, Sean Christopherson wrote:
> > > Track the free page count on a per EPC section basis so that the value
> > > is properly protected by the section's spinlock.
> > > 
> > > As was pointed out when the change was proposed[*], using a global
> > > non-atomic counter to track the number of free EPC pages is not safe.
> > > The order of non-atomic reads and writes are not guaranteed, i.e.
> > > concurrent RMW operats can write stale data.  This causes a variety
> > > of bad behavior, e.g. livelocks because the free page count wraps and
> > > causes the swap thread to stop reclaiming.
> > > 
> > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > 
> > What is the reason not change it just to atomic?
> 
> The purpose of separate sections is to avoid bouncing locks and whatnot
> across packages.  Adding a global atomic to the hotpath defeats that
> purpose.

I do get that but it does not actually cause incorrect behaviour,
right? Not being atomic obivously does because READ part of the
READ+STORE can get re-ordered.

> Can't the sysfs read helper aggregate the info?

Sure.

/Jarkko
Sean Christopherson Oct. 22, 2019, 7:35 p.m. UTC | #5
On Mon, Oct 21, 2019 at 02:19:08PM +0300, Jarkko Sakkinen wrote:
> On Fri, Oct 18, 2019 at 07:30:57AM -0700, Sean Christopherson wrote:
> > On Fri, Oct 18, 2019 at 03:49:42PM +0300, Jarkko Sakkinen wrote:
> > > On Wed, Oct 16, 2019 at 11:37:45AM -0700, Sean Christopherson wrote:
> > > > Track the free page count on a per EPC section basis so that the value
> > > > is properly protected by the section's spinlock.
> > > > 
> > > > As was pointed out when the change was proposed[*], using a global
> > > > non-atomic counter to track the number of free EPC pages is not safe.
> > > > The order of non-atomic reads and writes are not guaranteed, i.e.
> > > > concurrent RMW operats can write stale data.  This causes a variety
> > > > of bad behavior, e.g. livelocks because the free page count wraps and
> > > > causes the swap thread to stop reclaiming.
> > > > 
> > > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > > 
> > > What is the reason not change it just to atomic?
> > 
> > The purpose of separate sections is to avoid bouncing locks and whatnot
> > across packages.  Adding a global atomic to the hotpath defeats that
> > purpose.
> 
> I do get that but it does not actually cause incorrect behaviour,
> right? Not being atomic obivously does because READ part of the
> READ+STORE can get re-ordered.

Haven't tested yet, but it should be functionally correct.  I just don't
understand the motivation for the change to a global free count.  I get
that we don't have any NUMA awareness whatsoever, but if that's the
argument, why bother with the complexity of per-section tracking in the
first place?
Jarkko Sakkinen Oct. 23, 2019, 12:02 p.m. UTC | #6
On Tue, Oct 22, 2019 at 12:35:30PM -0700, Sean Christopherson wrote:
> On Mon, Oct 21, 2019 at 02:19:08PM +0300, Jarkko Sakkinen wrote:
> > On Fri, Oct 18, 2019 at 07:30:57AM -0700, Sean Christopherson wrote:
> > > On Fri, Oct 18, 2019 at 03:49:42PM +0300, Jarkko Sakkinen wrote:
> > > > On Wed, Oct 16, 2019 at 11:37:45AM -0700, Sean Christopherson wrote:
> > > > > Track the free page count on a per EPC section basis so that the value
> > > > > is properly protected by the section's spinlock.
> > > > > 
> > > > > As was pointed out when the change was proposed[*], using a global
> > > > > non-atomic counter to track the number of free EPC pages is not safe.
> > > > > The order of non-atomic reads and writes are not guaranteed, i.e.
> > > > > concurrent RMW operats can write stale data.  This causes a variety
> > > > > of bad behavior, e.g. livelocks because the free page count wraps and
> > > > > causes the swap thread to stop reclaiming.
> > > > > 
> > > > > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > > > 
> > > > What is the reason not change it just to atomic?
> > > 
> > > The purpose of separate sections is to avoid bouncing locks and whatnot
> > > across packages.  Adding a global atomic to the hotpath defeats that
> > > purpose.
> > 
> > I do get that but it does not actually cause incorrect behaviour,
> > right? Not being atomic obivously does because READ part of the
> > READ+STORE can get re-ordered.
> 
> Haven't tested yet, but it should be functionally correct.  I just don't
> understand the motivation for the change to a global free count.  I get
> that we don't have any NUMA awareness whatsoever, but if that's the
> argument, why bother with the complexity of per-section tracking in the
> first place?

You are right what you are saying. We can revert to the aggregation
code. I'm just checking that I exactly get the point when it comes
to concurrency issues.

I can take care of reverting it as I broke it.

/Jarkko
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 6311aef10ec4..efbb52e4ecad 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -13,18 +13,17 @@ 
 
 struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
 int sgx_nr_epc_sections;
-unsigned long sgx_nr_free_pages;
 
 static struct sgx_epc_page *__sgx_try_alloc_page(struct sgx_epc_section *section)
 {
 	struct sgx_epc_page *page;
 
-	if (list_empty(&section->page_list))
+	if (!section->free_cnt)
 		return NULL;
 
 	page = list_first_entry(&section->page_list, struct sgx_epc_page, list);
 	list_del_init(&page->list);
-	sgx_nr_free_pages--;
+	section->free_cnt--;
 	return page;
 }
 
@@ -97,7 +96,7 @@  struct sgx_epc_page *sgx_alloc_page(void *owner, bool reclaim)
 		schedule();
 	}
 
-	if (sgx_nr_free_pages < SGX_NR_LOW_PAGES)
+	if (!sgx_at_least_N_free_pages(SGX_NR_LOW_PAGES))
 		wake_up(&ksgxswapd_waitq);
 
 	return entry;
@@ -131,7 +130,7 @@  void __sgx_free_page(struct sgx_epc_page *page)
 
 	spin_lock(&section->lock);
 	list_add_tail(&page->list, &section->page_list);
-	sgx_nr_free_pages++;
+	section->free_cnt++;
 	spin_unlock(&section->lock);
 
 }
@@ -218,7 +217,7 @@  static bool __init sgx_alloc_epc_section(u64 addr, u64 size,
 		list_add_tail(&page->list, &section->unsanitized_page_list);
 	}
 
-	sgx_nr_free_pages += nr_pages;
+	section->free_cnt = nr_pages;
 
 	return true;
 
diff --git a/arch/x86/kernel/cpu/sgx/reclaim.c b/arch/x86/kernel/cpu/sgx/reclaim.c
index 3f183dd0e653..8619141f4bed 100644
--- a/arch/x86/kernel/cpu/sgx/reclaim.c
+++ b/arch/x86/kernel/cpu/sgx/reclaim.c
@@ -68,7 +68,7 @@  static void sgx_sanitize_section(struct sgx_epc_section *section)
 
 static inline bool sgx_should_reclaim(void)
 {
-	return sgx_nr_free_pages < SGX_NR_HIGH_PAGES &&
+	return !sgx_at_least_N_free_pages(SGX_NR_HIGH_PAGES) &&
 	       !list_empty(&sgx_active_page_list);
 }
 
@@ -430,7 +430,7 @@  void sgx_reclaim_pages(void)
 		section = sgx_epc_section(epc_page);
 		spin_lock(&section->lock);
 		list_add_tail(&epc_page->list, &section->page_list);
-		sgx_nr_free_pages++;
+		section->free_cnt++;
 		spin_unlock(&section->lock);
 	}
 }
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 87e375e8c25e..c7f0277299f6 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -30,6 +30,7 @@  struct sgx_epc_page {
 struct sgx_epc_section {
 	unsigned long pa;
 	void *va;
+	unsigned long free_cnt;
 	struct list_head page_list;
 	struct list_head unsanitized_page_list;
 	spinlock_t lock;
@@ -73,12 +74,27 @@  static inline void *sgx_epc_addr(struct sgx_epc_page *page)
 #define SGX_NR_HIGH_PAGES	64
 
 extern int sgx_nr_epc_sections;
-extern unsigned long sgx_nr_free_pages;
 extern struct task_struct *ksgxswapd_tsk;
 extern struct wait_queue_head(ksgxswapd_waitq);
 extern struct list_head sgx_active_page_list;
 extern spinlock_t sgx_active_page_list_lock;
 
+static inline bool sgx_at_least_N_free_pages(unsigned long threshold)
+{
+	struct sgx_epc_section *section;
+	unsigned long free_cnt = 0;
+	int i;
+
+	for (i = 0; i < sgx_nr_epc_sections; i++) {
+		section = &sgx_epc_sections[i];
+		free_cnt += section->free_cnt;
+		if (free_cnt >= threshold)
+			return true;
+	}
+
+	return false;
+}
+
 bool __init sgx_page_reclaimer_init(void);
 void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
 void sgx_reclaim_pages(void);