diff mbox series

[v6,1/7] x86/sgx: Provide indication of life-cycle of EPC pages

Message ID 20210922182123.200105-2-tony.luck@intel.com (mailing list archive)
State New, archived
Headers show
Series Basic recovery for machine checks inside SGX | expand

Commit Message

Luck, Tony Sept. 22, 2021, 6:21 p.m. UTC
SGX EPC pages go through the following life cycle:

        DIRTY ---> FREE ---> IN-USE --\
                    ^                 |
                    \-----------------/

Recovery action for poison for a DIRTY or FREE page is simple. Just
make sure never to allocate the page. IN-USE pages need some extra
handling.

It would be good to use the sgx_epc_page->owner field as an indicator
of where an EPC page is currently in that cycle (owner != NULL means
the EPC page is IN-USE). But there is one caller, sgx_alloc_va_page(),
that calls with NULL.

Since there are multiple uses of the "owner" field with different types
change the type of sgx_epc_page.owner to "void *.

Start epc_pages out with a non-NULL owner while they are in DIRTY state.

Fix up the one holdout to provide a non-NULL owner.

Refactor the allocation sequence so that changes to/from NULL
value happen together with adding/removing the epc_page from
a free list while the node->lock is held.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/sgx/encl.c  |  5 +++--
 arch/x86/kernel/cpu/sgx/encl.h  |  2 +-
 arch/x86/kernel/cpu/sgx/ioctl.c |  2 +-
 arch/x86/kernel/cpu/sgx/main.c  | 21 +++++++++++----------
 arch/x86/kernel/cpu/sgx/sgx.h   |  4 ++--
 5 files changed, 18 insertions(+), 16 deletions(-)

Comments

Jarkko Sakkinen Sept. 23, 2021, 8:21 p.m. UTC | #1
On Wed, 2021-09-22 at 11:21 -0700, Tony Luck wrote:
> SGX EPC pages go through the following life cycle:
> 
>         DIRTY ---> FREE ---> IN-USE --\
>                     ^                 |
>                     \-----------------/
> 
> Recovery action for poison for a DIRTY or FREE page is simple. Just
> make sure never to allocate the page. IN-USE pages need some extra
> handling.
> 
> It would be good to use the sgx_epc_page->owner field as an indicator
> of where an EPC page is currently in that cycle (owner != NULL means
> the EPC page is IN-USE). But there is one caller, sgx_alloc_va_page(),
> that calls with NULL.
> 
> Since there are multiple uses of the "owner" field with different types
> change the type of sgx_epc_page.owner to "void *.
> 
> Start epc_pages out with a non-NULL owner while they are in DIRTY state.
> 
> Fix up the one holdout to provide a non-NULL owner.
> 
> Refactor the allocation sequence so that changes to/from NULL
> value happen together with adding/removing the epc_page from
> a free list while the node->lock is held.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  arch/x86/kernel/cpu/sgx/encl.c  |  5 +++--
>  arch/x86/kernel/cpu/sgx/encl.h  |  2 +-
>  arch/x86/kernel/cpu/sgx/ioctl.c |  2 +-
>  arch/x86/kernel/cpu/sgx/main.c  | 21 +++++++++++----------
>  arch/x86/kernel/cpu/sgx/sgx.h   |  4 ++--
>  5 files changed, 18 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 001808e3901c..62cf20d5fbf6 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -667,6 +667,7 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
>  
>  /**
>   * sgx_alloc_va_page() - Allocate a Version Array (VA) page
> + * @owner:	struct sgx_va_page connected to this VA page
>   *
>   * Allocate a free EPC page and convert it to a Version Array (VA) page.
>   *
> @@ -674,12 +675,12 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
>   *   a VA page,
>   *   -errno otherwise
>   */
> -struct sgx_epc_page *sgx_alloc_va_page(void)
> +struct sgx_epc_page *sgx_alloc_va_page(void *owner)
>  {
>  	struct sgx_epc_page *epc_page;
>  	int ret;
>  
> -	epc_page = sgx_alloc_epc_page(NULL, true);
> +	epc_page = sgx_alloc_epc_page(owner, true);
>  	if (IS_ERR(epc_page))
>  		return ERR_CAST(epc_page);
>  
> diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
> index fec43ca65065..2a972bc9b2d1 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.h
> +++ b/arch/x86/kernel/cpu/sgx/encl.h
> @@ -111,7 +111,7 @@ void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write);
>  int sgx_encl_test_and_clear_young(struct mm_struct *mm,
>  				  struct sgx_encl_page *page);
>  
> -struct sgx_epc_page *sgx_alloc_va_page(void);
> +struct sgx_epc_page *sgx_alloc_va_page(void *owner);
>  unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
>  void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset);
>  bool sgx_va_page_full(struct sgx_va_page *va_page);
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> index 83df20e3e633..655ce0bb069d 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -30,7 +30,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
>  		if (!va_page)
>  			return ERR_PTR(-ENOMEM);
>  
> -		va_page->epc_page = sgx_alloc_va_page();
> +		va_page->epc_page = sgx_alloc_va_page(va_page);
>  		if (IS_ERR(va_page->epc_page)) {
>  			err = ERR_CAST(va_page->epc_page);
>  			kfree(va_page);
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index 63d3de02bbcc..69743709ec90 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -457,7 +457,7 @@ static bool __init sgx_page_reclaimer_init(void)
>  	return true;
>  }
>  
> -static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
> +static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(void *owner, int nid)
>  {
>  	struct sgx_numa_node *node = &sgx_numa_nodes[nid];
>  	struct sgx_epc_page *page = NULL;
> @@ -471,6 +471,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
>  
>  	page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
>  	list_del_init(&page->list);
> +	page->owner = owner;
>  	sgx_nr_free_pages--;
>  
>  	spin_unlock(&node->lock);
> @@ -480,6 +481,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
>  
>  /**
>   * __sgx_alloc_epc_page() - Allocate an EPC page
> + * @owner:	the owner of the EPC page
>   *
>   * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start
>   * from the NUMA node, where the caller is executing.
> @@ -488,14 +490,14 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
>   * - an EPC page:	A borrowed EPC pages were available.
>   * - NULL:		Out of EPC pages.
>   */
> -struct sgx_epc_page *__sgx_alloc_epc_page(void)
> +struct sgx_epc_page *__sgx_alloc_epc_page(void *owner)
>  {
>  	struct sgx_epc_page *page;
>  	int nid_of_current = numa_node_id();
>  	int nid = nid_of_current;
>  
>  	if (node_isset(nid_of_current, sgx_numa_mask)) {
> -		page = __sgx_alloc_epc_page_from_node(nid_of_current);
> +		page = __sgx_alloc_epc_page_from_node(owner, nid_of_current);
>  		if (page)
>  			return page;
>  	}
> @@ -506,7 +508,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
>  		if (nid == nid_of_current)
>  			break;
>  
> -		page = __sgx_alloc_epc_page_from_node(nid);
> +		page = __sgx_alloc_epc_page_from_node(owner, nid);
>  		if (page)
>  			return page;
>  	}
> @@ -559,7 +561,7 @@ int sgx_unmark_page_reclaimable(struct sgx_epc_page *page)
>  
>  /**
>   * sgx_alloc_epc_page() - Allocate an EPC page
> - * @owner:	the owner of the EPC page
> + * @owner:	per-caller page owner
>   * @reclaim:	reclaim pages if necessary
>   *
>   * Iterate through EPC sections and borrow a free EPC page to the caller. When a
> @@ -579,11 +581,9 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim)
>  	struct sgx_epc_page *page;
>  
>  	for ( ; ; ) {
> -		page = __sgx_alloc_epc_page();
> -		if (!IS_ERR(page)) {
> -			page->owner = owner;
> +		page = __sgx_alloc_epc_page(owner);
> +		if (!IS_ERR(page))
>  			break;
> -		}
>  
>  		if (list_empty(&sgx_active_page_list))
>  			return ERR_PTR(-ENOMEM);
> @@ -624,6 +624,7 @@ void sgx_free_epc_page(struct sgx_epc_page *page)
>  
>  	spin_lock(&node->lock);
>  
> +	page->owner = NULL;
>  	list_add_tail(&page->list, &node->free_page_list);
>  	sgx_nr_free_pages++;
>  
> @@ -652,7 +653,7 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
>  	for (i = 0; i < nr_pages; i++) {
>  		section->pages[i].section = index;
>  		section->pages[i].flags = 0;
> -		section->pages[i].owner = NULL;
> +		section->pages[i].owner = (void *)-1;

Probably should have a named constant.

Anyway, I wonder why we want to do tricks with 'owner', when the
struct has a flags field?

Right now its use is so nice and straight forward, and most
importantly intuitive.

So what I would do instead of this, would be to add something
like

/* Pages, which are being tracked by the page reclaimer. */
#define SGX_EPC_PAGE_RECLAIMER_TRACKED	BIT(0)

/* Pages, which are allocated for use. */
#define SGX_EPC_PAGE_ALLOCATED		BIT(1)

This would be set by sgx_alloc_epc_page() and reset by
sgx_free_epc_page().

In the subsequent patch you could then instead of

       /*
        * If there is no owner, then the page is on a free list.
        * Move it to the poison page list.
        */
       if (!page->owner) {
               list_del(&page->list);
               list_add(&page->list, &sgx_poison_page_list);
               goto out;
       }

you would

       /*
        * If there is no owner, then the page is on a free list.
        * Move it to the poison page list.
        */
       if (!page->flags) {
               list_del(&page->list);
               list_add(&page->list, &sgx_poison_page_list);
               goto out;
       }

You don't actually need to compare to that flag because the
invariant would be that it is set, as long as the page is
not explicitly freed.

I think this is a better solution than in the patch set
because it does not introduce any unorthodox use of anything.

/Jarkko
Jarkko Sakkinen Sept. 23, 2021, 8:24 p.m. UTC | #2
On Thu, 2021-09-23 at 23:21 +0300, Jarkko Sakkinen wrote:
> On Wed, 2021-09-22 at 11:21 -0700, Tony Luck wrote:
> > SGX EPC pages go through the following life cycle:
> > 
> >         DIRTY ---> FREE ---> IN-USE --\
> >                     ^                 |
> >                     \-----------------/
> > 
> > Recovery action for poison for a DIRTY or FREE page is simple. Just
> > make sure never to allocate the page. IN-USE pages need some extra
> > handling.
> > 
> > It would be good to use the sgx_epc_page->owner field as an indicator
> > of where an EPC page is currently in that cycle (owner != NULL means
> > the EPC page is IN-USE). But there is one caller, sgx_alloc_va_page(),
> > that calls with NULL.
> > 
> > Since there are multiple uses of the "owner" field with different types
> > change the type of sgx_epc_page.owner to "void *.
> > 
> > Start epc_pages out with a non-NULL owner while they are in DIRTY state.
> > 
> > Fix up the one holdout to provide a non-NULL owner.
> > 
> > Refactor the allocation sequence so that changes to/from NULL
> > value happen together with adding/removing the epc_page from
> > a free list while the node->lock is held.
> > 
> > Signed-off-by: Tony Luck <tony.luck@intel.com>
> > ---
> >  arch/x86/kernel/cpu/sgx/encl.c  |  5 +++--
> >  arch/x86/kernel/cpu/sgx/encl.h  |  2 +-
> >  arch/x86/kernel/cpu/sgx/ioctl.c |  2 +-
> >  arch/x86/kernel/cpu/sgx/main.c  | 21 +++++++++++----------
> >  arch/x86/kernel/cpu/sgx/sgx.h   |  4 ++--
> >  5 files changed, 18 insertions(+), 16 deletions(-)
> > 
> > diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> > index 001808e3901c..62cf20d5fbf6 100644
> > --- a/arch/x86/kernel/cpu/sgx/encl.c
> > +++ b/arch/x86/kernel/cpu/sgx/encl.c
> > @@ -667,6 +667,7 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
> >  
> >  /**
> >   * sgx_alloc_va_page() - Allocate a Version Array (VA) page
> > + * @owner:	struct sgx_va_page connected to this VA page
> >   *
> >   * Allocate a free EPC page and convert it to a Version Array (VA) page.
> >   *
> > @@ -674,12 +675,12 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
> >   *   a VA page,
> >   *   -errno otherwise
> >   */
> > -struct sgx_epc_page *sgx_alloc_va_page(void)
> > +struct sgx_epc_page *sgx_alloc_va_page(void *owner)
> >  {
> >  	struct sgx_epc_page *epc_page;
> >  	int ret;
> >  
> > -	epc_page = sgx_alloc_epc_page(NULL, true);
> > +	epc_page = sgx_alloc_epc_page(owner, true);
> >  	if (IS_ERR(epc_page))
> >  		return ERR_CAST(epc_page);
> >  
> > diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
> > index fec43ca65065..2a972bc9b2d1 100644
> > --- a/arch/x86/kernel/cpu/sgx/encl.h
> > +++ b/arch/x86/kernel/cpu/sgx/encl.h
> > @@ -111,7 +111,7 @@ void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write);
> >  int sgx_encl_test_and_clear_young(struct mm_struct *mm,
> >  				  struct sgx_encl_page *page);
> >  
> > -struct sgx_epc_page *sgx_alloc_va_page(void);
> > +struct sgx_epc_page *sgx_alloc_va_page(void *owner);
> >  unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
> >  void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset);
> >  bool sgx_va_page_full(struct sgx_va_page *va_page);
> > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> > index 83df20e3e633..655ce0bb069d 100644
> > --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> > @@ -30,7 +30,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
> >  		if (!va_page)
> >  			return ERR_PTR(-ENOMEM);
> >  
> > -		va_page->epc_page = sgx_alloc_va_page();
> > +		va_page->epc_page = sgx_alloc_va_page(va_page);
> >  		if (IS_ERR(va_page->epc_page)) {
> >  			err = ERR_CAST(va_page->epc_page);
> >  			kfree(va_page);
> > diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> > index 63d3de02bbcc..69743709ec90 100644
> > --- a/arch/x86/kernel/cpu/sgx/main.c
> > +++ b/arch/x86/kernel/cpu/sgx/main.c
> > @@ -457,7 +457,7 @@ static bool __init sgx_page_reclaimer_init(void)
> >  	return true;
> >  }
> >  
> > -static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
> > +static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(void *owner, int nid)
> >  {
> >  	struct sgx_numa_node *node = &sgx_numa_nodes[nid];
> >  	struct sgx_epc_page *page = NULL;
> > @@ -471,6 +471,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
> >  
> >  	page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
> >  	list_del_init(&page->list);
> > +	page->owner = owner;
> >  	sgx_nr_free_pages--;
> >  
> >  	spin_unlock(&node->lock);
> > @@ -480,6 +481,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
> >  
> >  /**
> >   * __sgx_alloc_epc_page() - Allocate an EPC page
> > + * @owner:	the owner of the EPC page
> >   *
> >   * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start
> >   * from the NUMA node, where the caller is executing.
> > @@ -488,14 +490,14 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
> >   * - an EPC page:	A borrowed EPC pages were available.
> >   * - NULL:		Out of EPC pages.
> >   */
> > -struct sgx_epc_page *__sgx_alloc_epc_page(void)
> > +struct sgx_epc_page *__sgx_alloc_epc_page(void *owner)
> >  {
> >  	struct sgx_epc_page *page;
> >  	int nid_of_current = numa_node_id();
> >  	int nid = nid_of_current;
> >  
> >  	if (node_isset(nid_of_current, sgx_numa_mask)) {
> > -		page = __sgx_alloc_epc_page_from_node(nid_of_current);
> > +		page = __sgx_alloc_epc_page_from_node(owner, nid_of_current);
> >  		if (page)
> >  			return page;
> >  	}
> > @@ -506,7 +508,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
> >  		if (nid == nid_of_current)
> >  			break;
> >  
> > -		page = __sgx_alloc_epc_page_from_node(nid);
> > +		page = __sgx_alloc_epc_page_from_node(owner, nid);
> >  		if (page)
> >  			return page;
> >  	}
> > @@ -559,7 +561,7 @@ int sgx_unmark_page_reclaimable(struct sgx_epc_page *page)
> >  
> >  /**
> >   * sgx_alloc_epc_page() - Allocate an EPC page
> > - * @owner:	the owner of the EPC page
> > + * @owner:	per-caller page owner
> >   * @reclaim:	reclaim pages if necessary
> >   *
> >   * Iterate through EPC sections and borrow a free EPC page to the caller. When a
> > @@ -579,11 +581,9 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim)
> >  	struct sgx_epc_page *page;
> >  
> >  	for ( ; ; ) {
> > -		page = __sgx_alloc_epc_page();
> > -		if (!IS_ERR(page)) {
> > -			page->owner = owner;
> > +		page = __sgx_alloc_epc_page(owner);
> > +		if (!IS_ERR(page))
> >  			break;
> > -		}
> >  
> >  		if (list_empty(&sgx_active_page_list))
> >  			return ERR_PTR(-ENOMEM);
> > @@ -624,6 +624,7 @@ void sgx_free_epc_page(struct sgx_epc_page *page)
> >  
> >  	spin_lock(&node->lock);
> >  
> > +	page->owner = NULL;
> >  	list_add_tail(&page->list, &node->free_page_list);
> >  	sgx_nr_free_pages++;
> >  
> > @@ -652,7 +653,7 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
> >  	for (i = 0; i < nr_pages; i++) {
> >  		section->pages[i].section = index;
> >  		section->pages[i].flags = 0;
> > -		section->pages[i].owner = NULL;
> > +		section->pages[i].owner = (void *)-1;
> 
> Probably should have a named constant.
> 
> Anyway, I wonder why we want to do tricks with 'owner', when the
> struct has a flags field?
> 
> Right now its use is so nice and straight forward, and most
> importantly intuitive.
> 
> So what I would do instead of this, would be to add something
> like
> 
> /* Pages, which are being tracked by the page reclaimer. */
> #define SGX_EPC_PAGE_RECLAIMER_TRACKED	BIT(0)
> 
> /* Pages, which are allocated for use. */
> #define SGX_EPC_PAGE_ALLOCATED		BIT(1)
> 
> This would be set by sgx_alloc_epc_page() and reset by
> sgx_free_epc_page().
> 
> In the subsequent patch you could then instead of
> 
>        /*
>         * If there is no owner, then the page is on a free list.
>         * Move it to the poison page list.
>         */
>        if (!page->owner) {
>                list_del(&page->list);
>                list_add(&page->list, &sgx_poison_page_list);
>                goto out;
>        }
> 
> you would
> 
>        /*
>         * If there is no owner, then the page is on a free list.
>         * Move it to the poison page list.
>         */
>        if (!page->flags) {
>                list_del(&page->list);
>                list_add(&page->list, &sgx_poison_page_list);
>                goto out;
>        }
> 
> You don't actually need to compare to that flag because the
> invariant would be that it is set, as long as the page is
> not explicitly freed.
> 
> I think this is a better solution than in the patch set
> because it does not introduce any unorthodox use of anything.

And does not contain any special cases, e.g. when you debug
something you can always assume that a valid owner pointer is
always a legit sgx_encl_page instance.

/Jarkko
Luck, Tony Sept. 23, 2021, 8:46 p.m. UTC | #3
On Thu, Sep 23, 2021 at 11:24:35PM +0300, Jarkko Sakkinen wrote:
> On Thu, 2021-09-23 at 23:21 +0300, Jarkko Sakkinen wrote:
> > On Wed, 2021-09-22 at 11:21 -0700, Tony Luck wrote:
> > > -		section->pages[i].owner = NULL;
> > > +		section->pages[i].owner = (void *)-1;
> > 
> > Probably should have a named constant.
> > 
> > Anyway, I wonder why we want to do tricks with 'owner', when the
> > struct has a flags field?
> > 
> > Right now its use is so nice and straight forward, and most
> > importantly intuitive.
> > 
> > So what I would do instead of this, would be to add something
> > like
> > 
> > /* Pages, which are being tracked by the page reclaimer. */
> > #define SGX_EPC_PAGE_RECLAIMER_TRACKED	BIT(0)
> > 
> > /* Pages, which are allocated for use. */
> > #define SGX_EPC_PAGE_ALLOCATED		BIT(1)
> > 
> > This would be set by sgx_alloc_epc_page() and reset by
> > sgx_free_epc_page().
> > 
> > In the subsequent patch you could then instead of
> > 
> >        /*
> >         * If there is no owner, then the page is on a free list.
> >         * Move it to the poison page list.
> >         */
> >        if (!page->owner) {
> >                list_del(&page->list);
> >                list_add(&page->list, &sgx_poison_page_list);
> >                goto out;
> >        }
> > 
> > you would
> > 
> >        /*
> >         * If there is no owner, then the page is on a free list.
> >         * Move it to the poison page list.
> >         */
> >        if (!page->flags) {
> >                list_del(&page->list);
> >                list_add(&page->list, &sgx_poison_page_list);
> >                goto out;
> >        }
> > 
> > You don't actually need to compare to that flag because the
> > invariant would be that it is set, as long as the page is
> > not explicitly freed.
> > 
> > I think this is a better solution than in the patch set
> > because it does not introduce any unorthodox use of anything.
> 
> And does not contain any special cases, e.g. when you debug
> something you can always assume that a valid owner pointer is
> always a legit sgx_encl_page instance.
> 
Jarkko,

That's nice. It avoids having to create a fictitious owner for
the dirty pages, and for the sgx_alloc_va_page() case. Which
in turn means that the owner field in struct sgx_epc_page can
remain as "struct sgx_encl_page *owner;" (neatly avoiding DaveH's
request that it be an anonymous union of all the possible types,
because it is back to just being one type).

Thanks!  Will include in next version.

-Tony
Luck, Tony Sept. 23, 2021, 10:11 p.m. UTC | #4
> That's nice. It avoids having to create a fictitious owner for
> the dirty pages, and for the sgx_alloc_va_page() case. Which
> in turn means that the owner field in struct sgx_epc_page can
> remain as "struct sgx_encl_page *owner;" (neatly avoiding DaveH's
> request that it be an anonymous union of all the possible types,
> because it is back to just being one type).
>
> Thanks!  Will include in next version.

Also avoids a bunch of refactoring to make sure to set the owner field
while holding zone->lock.

I roughly coded it up and the old part 0001 was:

 arch/x86/kernel/cpu/sgx/encl.c  |    5 +++--
 arch/x86/kernel/cpu/sgx/encl.h  |    2 +-
 arch/x86/kernel/cpu/sgx/ioctl.c |    2 +-
 arch/x86/kernel/cpu/sgx/main.c  |   21 +++++++++++----------
 arch/x86/kernel/cpu/sgx/sgx.h   |    4 ++--
 5 files changed, 18 insertions(+), 16 deletions(-)

which is by no means huge, but the new part 0001 is

 arch/x86/kernel/cpu/sgx/main.c |    4 +++-
 arch/x86/kernel/cpu/sgx/sgx.h  |    3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

-Tony
Jarkko Sakkinen Sept. 28, 2021, 2:13 a.m. UTC | #5
On Thu, 2021-09-23 at 22:11 +0000, Luck, Tony wrote:
> > That's nice. It avoids having to create a fictitious owner for
> > the dirty pages, and for the sgx_alloc_va_page() case. Which
> > in turn means that the owner field in struct sgx_epc_page can
> > remain as "struct sgx_encl_page *owner;" (neatly avoiding DaveH's
> > request that it be an anonymous union of all the possible types,
> > because it is back to just being one type).
> > 
> > Thanks!  Will include in next version.
> 
> Also avoids a bunch of refactoring to make sure to set the owner field
> while holding zone->lock.
> 
> I roughly coded it up and the old part 0001 was:
> 
>  arch/x86/kernel/cpu/sgx/encl.c  |    5 +++--
>  arch/x86/kernel/cpu/sgx/encl.h  |    2 +-
>  arch/x86/kernel/cpu/sgx/ioctl.c |    2 +-
>  arch/x86/kernel/cpu/sgx/main.c  |   21 +++++++++++----------
>  arch/x86/kernel/cpu/sgx/sgx.h   |    4 ++--
>  5 files changed, 18 insertions(+), 16 deletions(-)
> 
> which is by no means huge, but the new part 0001 is
> 
>  arch/x86/kernel/cpu/sgx/main.c |    4 +++-
>  arch/x86/kernel/cpu/sgx/sgx.h  |    3 +++
>  2 files changed, 6 insertions(+), 1 deletion(-)
> 
> -Tony

This is good to hear. I guess it is then a no brainer to move into
this direction then.

/Jarkko
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 001808e3901c..62cf20d5fbf6 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -667,6 +667,7 @@  int sgx_encl_test_and_clear_young(struct mm_struct *mm,
 
 /**
  * sgx_alloc_va_page() - Allocate a Version Array (VA) page
+ * @owner:	struct sgx_va_page connected to this VA page
  *
  * Allocate a free EPC page and convert it to a Version Array (VA) page.
  *
@@ -674,12 +675,12 @@  int sgx_encl_test_and_clear_young(struct mm_struct *mm,
  *   a VA page,
  *   -errno otherwise
  */
-struct sgx_epc_page *sgx_alloc_va_page(void)
+struct sgx_epc_page *sgx_alloc_va_page(void *owner)
 {
 	struct sgx_epc_page *epc_page;
 	int ret;
 
-	epc_page = sgx_alloc_epc_page(NULL, true);
+	epc_page = sgx_alloc_epc_page(owner, true);
 	if (IS_ERR(epc_page))
 		return ERR_CAST(epc_page);
 
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index fec43ca65065..2a972bc9b2d1 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -111,7 +111,7 @@  void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write);
 int sgx_encl_test_and_clear_young(struct mm_struct *mm,
 				  struct sgx_encl_page *page);
 
-struct sgx_epc_page *sgx_alloc_va_page(void);
+struct sgx_epc_page *sgx_alloc_va_page(void *owner);
 unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
 void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset);
 bool sgx_va_page_full(struct sgx_va_page *va_page);
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 83df20e3e633..655ce0bb069d 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -30,7 +30,7 @@  static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
 		if (!va_page)
 			return ERR_PTR(-ENOMEM);
 
-		va_page->epc_page = sgx_alloc_va_page();
+		va_page->epc_page = sgx_alloc_va_page(va_page);
 		if (IS_ERR(va_page->epc_page)) {
 			err = ERR_CAST(va_page->epc_page);
 			kfree(va_page);
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 63d3de02bbcc..69743709ec90 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -457,7 +457,7 @@  static bool __init sgx_page_reclaimer_init(void)
 	return true;
 }
 
-static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
+static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(void *owner, int nid)
 {
 	struct sgx_numa_node *node = &sgx_numa_nodes[nid];
 	struct sgx_epc_page *page = NULL;
@@ -471,6 +471,7 @@  static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
 
 	page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
 	list_del_init(&page->list);
+	page->owner = owner;
 	sgx_nr_free_pages--;
 
 	spin_unlock(&node->lock);
@@ -480,6 +481,7 @@  static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
 
 /**
  * __sgx_alloc_epc_page() - Allocate an EPC page
+ * @owner:	the owner of the EPC page
  *
  * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start
  * from the NUMA node, where the caller is executing.
@@ -488,14 +490,14 @@  static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
  * - an EPC page:	A borrowed EPC pages were available.
  * - NULL:		Out of EPC pages.
  */
-struct sgx_epc_page *__sgx_alloc_epc_page(void)
+struct sgx_epc_page *__sgx_alloc_epc_page(void *owner)
 {
 	struct sgx_epc_page *page;
 	int nid_of_current = numa_node_id();
 	int nid = nid_of_current;
 
 	if (node_isset(nid_of_current, sgx_numa_mask)) {
-		page = __sgx_alloc_epc_page_from_node(nid_of_current);
+		page = __sgx_alloc_epc_page_from_node(owner, nid_of_current);
 		if (page)
 			return page;
 	}
@@ -506,7 +508,7 @@  struct sgx_epc_page *__sgx_alloc_epc_page(void)
 		if (nid == nid_of_current)
 			break;
 
-		page = __sgx_alloc_epc_page_from_node(nid);
+		page = __sgx_alloc_epc_page_from_node(owner, nid);
 		if (page)
 			return page;
 	}
@@ -559,7 +561,7 @@  int sgx_unmark_page_reclaimable(struct sgx_epc_page *page)
 
 /**
  * sgx_alloc_epc_page() - Allocate an EPC page
- * @owner:	the owner of the EPC page
+ * @owner:	per-caller page owner
  * @reclaim:	reclaim pages if necessary
  *
  * Iterate through EPC sections and borrow a free EPC page to the caller. When a
@@ -579,11 +581,9 @@  struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim)
 	struct sgx_epc_page *page;
 
 	for ( ; ; ) {
-		page = __sgx_alloc_epc_page();
-		if (!IS_ERR(page)) {
-			page->owner = owner;
+		page = __sgx_alloc_epc_page(owner);
+		if (!IS_ERR(page))
 			break;
-		}
 
 		if (list_empty(&sgx_active_page_list))
 			return ERR_PTR(-ENOMEM);
@@ -624,6 +624,7 @@  void sgx_free_epc_page(struct sgx_epc_page *page)
 
 	spin_lock(&node->lock);
 
+	page->owner = NULL;
 	list_add_tail(&page->list, &node->free_page_list);
 	sgx_nr_free_pages++;
 
@@ -652,7 +653,7 @@  static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
 	for (i = 0; i < nr_pages; i++) {
 		section->pages[i].section = index;
 		section->pages[i].flags = 0;
-		section->pages[i].owner = NULL;
+		section->pages[i].owner = (void *)-1;
 		list_add_tail(&section->pages[i].list, &sgx_dirty_page_list);
 	}
 
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 4628acec0009..cc624778645f 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -29,7 +29,7 @@ 
 struct sgx_epc_page {
 	unsigned int section;
 	unsigned int flags;
-	struct sgx_encl_page *owner;
+	void *owner;
 	struct list_head list;
 };
 
@@ -77,7 +77,7 @@  static inline void *sgx_get_epc_virt_addr(struct sgx_epc_page *page)
 	return section->virt_addr + index * PAGE_SIZE;
 }
 
-struct sgx_epc_page *__sgx_alloc_epc_page(void);
+struct sgx_epc_page *__sgx_alloc_epc_page(void *owner);
 void sgx_free_epc_page(struct sgx_epc_page *page);
 
 void sgx_mark_page_reclaimable(struct sgx_epc_page *page);