Message ID | 20240328002229.30264-6-haitao.huang@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add Cgroup support for SGX EPC memory | expand |
> --- /dev/null > +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c > @@ -0,0 +1,74 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// Copyright(c) 2022 Intel Corporation. It's 2024 now. And looks you need to use C style comment for /* Copyright ... */, after looking at some other C files. > + > +#include <linux/atomic.h> > +#include <linux/kernel.h> > +#include "epc_cgroup.h" > + > +/* The root SGX EPC cgroup */ > +static struct sgx_cgroup sgx_cg_root; > + > +/** > + * sgx_cgroup_try_charge() - try to charge cgroup for a single EPC page > + * > + * @sgx_cg: The EPC cgroup to be charged for the page. > + * Return: > + * * %0 - If successfully charged. > + * * -errno - for failures. > + */ > +int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg) > +{ > + return misc_cg_try_charge(MISC_CG_RES_SGX_EPC, sgx_cg->cg, PAGE_SIZE); > +} > + > +/** > + * sgx_cgroup_uncharge() - uncharge a cgroup for an EPC page > + * @sgx_cg: The charged sgx cgroup > + */ > +void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg) > +{ > + misc_cg_uncharge(MISC_CG_RES_SGX_EPC, sgx_cg->cg, PAGE_SIZE); > +} > + > +static void sgx_cgroup_free(struct misc_cg *cg) > +{ > + struct sgx_cgroup *sgx_cg; > + > + sgx_cg = sgx_cgroup_from_misc_cg(cg); > + if (!sgx_cg) > + return; > + > + kfree(sgx_cg); > +} > + > +static int sgx_cgroup_alloc(struct misc_cg *cg); Again, this declaration can be removed if you move the below structure ... > + > +const struct misc_res_ops sgx_cgroup_ops = { > + .alloc = sgx_cgroup_alloc, > + .free = sgx_cgroup_free, > +}; > + > +static void sgx_cgroup_misc_init(struct misc_cg *cg, struct sgx_cgroup *sgx_cg) > +{ > + cg->res[MISC_CG_RES_SGX_EPC].priv = sgx_cg; > + sgx_cg->cg = cg; > +} > + > +static int sgx_cgroup_alloc(struct misc_cg *cg) > +{ > + struct sgx_cgroup *sgx_cg; > + > + sgx_cg = kzalloc(sizeof(*sgx_cg), GFP_KERNEL); > + if (!sgx_cg) > + return -ENOMEM; > + > + sgx_cgroup_misc_init(cg, sgx_cg); > + > + return 0; > +} ... here. > + > +void sgx_cgroup_init(void) > +{ > + misc_cg_set_ops(MISC_CG_RES_SGX_EPC, &sgx_cgroup_ops); > + sgx_cgroup_misc_init(misc_cg_root(), &sgx_cg_root); > +} > diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.h b/arch/x86/kernel/cpu/sgx/epc_cgroup.h > new file mode 100644 > index 000000000000..8f794e23fad6 > --- /dev/null > +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.h > @@ -0,0 +1,70 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* Copyright(c) 2022 Intel Corporation. */ > +#ifndef _SGX_EPC_CGROUP_H_ > +#define _SGX_EPC_CGROUP_H_ > + > +#include <asm/sgx.h> > +#include <linux/cgroup.h> > +#include <linux/misc_cgroup.h> > + > +#include "sgx.h" > + > +#ifndef CONFIG_CGROUP_SGX_EPC Nit: add an empty line to make text more breathable. > +#define MISC_CG_RES_SGX_EPC MISC_CG_RES_TYPES > +struct sgx_cgroup; > + > +static inline struct sgx_cgroup *sgx_get_current_cg(void) > +{ > + return NULL; > +} > + > +static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg) { } > + > +static inline int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg) > +{ > + return 0; > +} > + > +static inline void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg) { } > + > +static inline void sgx_cgroup_init(void) { } > +#else Nit: I prefer two empty lines before and after the 'else'. > +struct sgx_cgroup { > + struct misc_cg *cg; > +}; > + > +static inline struct sgx_cgroup *sgx_cgroup_from_misc_cg(struct misc_cg *cg) > +{ > + return (struct sgx_cgroup *)(cg->res[MISC_CG_RES_SGX_EPC].priv); > +} > + > +/** > + * sgx_get_current_cg() - get the EPC cgroup of current process. > + * > + * Returned cgroup has its ref count increased by 1. Caller must call > + * sgx_put_cg() to return the reference. > + * > + * Return: EPC cgroup to which the current task belongs to. > + */ > +static inline struct sgx_cgroup *sgx_get_current_cg(void) > +{ > + return sgx_cgroup_from_misc_cg(get_current_misc_cg()); > +} Again, I _think_ you need to check whether get_current_misc_cg() returns NULL? Misc cgroup can be disabled by command line even it is on in the Kconfig. I am not expert on cgroup, so could you check on this? > + > +/** > + * sgx_put_sgx_cg() - Put the EPC cgroup and reduce its ref count. > + * @sgx_cg - EPC cgroup to put. > + */ > +static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg) > +{ > + if (sgx_cg) > + put_misc_cg(sgx_cg->cg); > +} > + > +int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg); > +void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg); > +void sgx_cgroup_init(void); > + > +#endif > + > +#endif /* _SGX_EPC_CGROUP_H_ */ > diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c > index d219f14365d4..023af54c1beb 100644 > --- a/arch/x86/kernel/cpu/sgx/main.c > +++ b/arch/x86/kernel/cpu/sgx/main.c > @@ -6,6 +6,7 @@ > #include <linux/highmem.h> > #include <linux/kthread.h> > #include <linux/miscdevice.h> > +#include <linux/misc_cgroup.h> > #include <linux/node.h> > #include <linux/pagemap.h> > #include <linux/ratelimit.h> > @@ -17,6 +18,7 @@ > #include "driver.h" > #include "encl.h" > #include "encls.h" > +#include "epc_cgroup.h" > > struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS]; > static int sgx_nr_epc_sections; > @@ -558,7 +560,16 @@ int sgx_unmark_page_reclaimable(struct sgx_epc_page *page) > */ > struct sgx_epc_page *sgx_alloc_epc_page(void *owner, enum sgx_reclaim reclaim) > { > + struct sgx_cgroup *sgx_cg; > struct sgx_epc_page *page; > + int ret; > + > + sgx_cg = sgx_get_current_cg(); > + ret = sgx_cgroup_try_charge(sgx_cg); > + if (ret) { > + sgx_put_cg(sgx_cg); > + return ERR_PTR(ret); > + } > > for ( ; ; ) { > page = __sgx_alloc_epc_page(); > @@ -567,8 +578,10 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, enum sgx_reclaim reclaim) > break; > } > > - if (list_empty(&sgx_active_page_list)) > - return ERR_PTR(-ENOMEM); > + if (list_empty(&sgx_active_page_list)) { > + page = ERR_PTR(-ENOMEM); > + break; > + } > > if (reclaim == SGX_NO_RECLAIM) { > page = ERR_PTR(-EBUSY); > @@ -580,10 +593,24 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, enum sgx_reclaim reclaim) > break; > } > > + /* > + * Need to do a global reclamation if cgroup was not full but free > + * physical pages run out, causing __sgx_alloc_epc_page() to fail. > + */ Again, to me this comment shouldn't be here, because it doesn't add any more information. If you can reach here, you have passed the charge(). In fact, I believe this doesn't matter: When you fail to allocate, you just need to reclaim. Now you only have the global reclamation, thus you need to reclaim from it. Perhaps it is useful when you have per-cgroup LRU list. In that case you can put this comment there. > sgx_reclaim_pages(); > cond_resched(); > } > > +#ifdef CONFIG_CGROUP_SGX_EPC > + if (!IS_ERR(page)) { > + WARN_ON_ONCE(page->sgx_cg); > + /* sgx_put_cg() in sgx_free_epc_page() */ > + page->sgx_cg = sgx_cg; > + } else { > + sgx_cgroup_uncharge(sgx_cg); > + sgx_put_cg(sgx_cg); > + } > +#endif Again, IMHO having CONFIG_CGROUP_SGX_EPC here is ugly, because it doesn't even match the try_charge() above, which doesn't have the CONFIG_CGROUP_SGX_EPC. If you add a wrapper in "epc_cgroup.h" static inline void sgx_epc_page_set_cgroup(struct epc_page *epc_page, struct sgx_cgroup *sgx_cg) { #ifdef CONFIG_CGROUP_SGX_EPC epc_page->sgx_cg = sgx_cg; #endif } Then I believe the above can be simplified to: if (!IS_ERR(page)) { sgx_epc_page_set_cgroup(page, sgx_cg); } else { sgx_cgroup_uncharge(sgx_cg); sgx_put_cg(sgx_cg); } > if (sgx_should_reclaim(SGX_NR_LOW_PAGES)) > wake_up(&ksgxd_waitq); > > @@ -604,6 +631,14 @@ void sgx_free_epc_page(struct sgx_epc_page *page) > struct sgx_epc_section *section = &sgx_epc_sections[page->section]; > struct sgx_numa_node *node = section->node; > > +#ifdef CONFIG_CGROUP_SGX_EPC > + if (page->sgx_cg) { > + sgx_cgroup_uncharge(page->sgx_cg); > + sgx_put_cg(page->sgx_cg); > + page->sgx_cg = NULL; > + } > +#endif > + Similarly, how about adding a wrapper in "epc_cgroup.h" struct sgx_cgroup *sgx_epc_page_get_cgroup(struct sgx_epc_page *page) { #ifdef CONFIG_CGROUP_SGX_EPC return page->sgx_cg; #else return NULL; #endif } Then this can be: sgx_cg = sgx_epc_page_get_cgroup(page); sgx_cgroup_uncharge(sgx_cg); sgx_put_cg(sgx_cg); sgx_epc_page_set_cgroup(page, NULL); > spin_lock(&node->lock); > > page->owner = NULL; > @@ -643,6 +678,11 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, > section->pages[i].flags = 0; > section->pages[i].owner = NULL; > section->pages[i].poison = 0; > + > +#ifdef CONFIG_CGROUP_SGX_EPC > + section->pages[i].sgx_cg = NULL; > +#endif Can use the wrapper too. [...] (Btw, I'll be away for couple of days due to public holiday and will review the rest starting from late next week).
On Thu Mar 28, 2024 at 2:53 PM EET, Huang, Kai wrote: > > > --- /dev/null > > +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c > > @@ -0,0 +1,74 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +// Copyright(c) 2022 Intel Corporation. > > It's 2024 now. > > And looks you need to use C style comment for /* Copyright ... */, after looking > at some other C files. To be fair, this happens *all the time* to everyone :-) I've proposed this few times in SGX context and going to say it now. Given the nature of Git copyrights would anyway need to be sorted by the Git log not possibly incorrect copyright platters in the header and source files. BR, Jarkko
On Sat, 2024-03-30 at 13:17 +0200, Jarkko Sakkinen wrote: > On Thu Mar 28, 2024 at 2:53 PM EET, Huang, Kai wrote: > > > > > --- /dev/null > > > +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c > > > @@ -0,0 +1,74 @@ > > > +// SPDX-License-Identifier: GPL-2.0 > > > +// Copyright(c) 2022 Intel Corporation. > > > > It's 2024 now. > > > > And looks you need to use C style comment for /* Copyright ... */, after looking > > at some other C files. > > To be fair, this happens *all the time* to everyone :-) > > I've proposed this few times in SGX context and going to say it now. > Given the nature of Git copyrights would anyway need to be sorted by > the Git log not possibly incorrect copyright platters in the header > and source files. > Sure fine to me either way. Thanks for pointing out. I have some vague memory that we should update the year but I guess I was wrong.
On Mon Apr 1, 2024 at 12:29 PM EEST, Huang, Kai wrote: > On Sat, 2024-03-30 at 13:17 +0200, Jarkko Sakkinen wrote: > > On Thu Mar 28, 2024 at 2:53 PM EET, Huang, Kai wrote: > > > > > > > --- /dev/null > > > > +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c > > > > @@ -0,0 +1,74 @@ > > > > +// SPDX-License-Identifier: GPL-2.0 > > > > +// Copyright(c) 2022 Intel Corporation. > > > > > > It's 2024 now. > > > > > > And looks you need to use C style comment for /* Copyright ... */, after looking > > > at some other C files. > > > > To be fair, this happens *all the time* to everyone :-) > > > > I've proposed this few times in SGX context and going to say it now. > > Given the nature of Git copyrights would anyway need to be sorted by > > the Git log not possibly incorrect copyright platters in the header > > and source files. > > > > Sure fine to me either way. Thanks for pointing out. > > I have some vague memory that we should update the year but I guess I was wrong. I think updating year makes sense! I'd be fine not having copyright platter at all since the commit is from Intel domain anyway but if it is kept then the year needs to be corrected. I mean Git commit stores all the data, including exact date. BR, Jarkko
On Thu, 28 Mar 2024 07:53:45 -0500, Huang, Kai <kai.huang@intel.com> wrote: > >> --- /dev/null >> +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c >> @@ -0,0 +1,74 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +// Copyright(c) 2022 Intel Corporation. > > It's 2024 now. > > And looks you need to use C style comment for /* Copyright ... */, after > looking > at some other C files. > Ok, will update years and use C style. >> + >> +#include <linux/atomic.h> >> +#include <linux/kernel.h> >> +#include "epc_cgroup.h" >> + >> +/* The root SGX EPC cgroup */ >> +static struct sgx_cgroup sgx_cg_root; >> + >> +/** >> + * sgx_cgroup_try_charge() - try to charge cgroup for a single EPC page >> + * >> + * @sgx_cg: The EPC cgroup to be charged for the page. >> + * Return: >> + * * %0 - If successfully charged. >> + * * -errno - for failures. >> + */ >> +int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg) >> +{ >> + return misc_cg_try_charge(MISC_CG_RES_SGX_EPC, sgx_cg->cg, PAGE_SIZE); >> +} >> + >> +/** >> + * sgx_cgroup_uncharge() - uncharge a cgroup for an EPC page >> + * @sgx_cg: The charged sgx cgroup >> + */ >> +void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg) >> +{ >> + misc_cg_uncharge(MISC_CG_RES_SGX_EPC, sgx_cg->cg, PAGE_SIZE); >> +} >> + >> +static void sgx_cgroup_free(struct misc_cg *cg) >> +{ >> + struct sgx_cgroup *sgx_cg; >> + >> + sgx_cg = sgx_cgroup_from_misc_cg(cg); >> + if (!sgx_cg) >> + return; >> + >> + kfree(sgx_cg); >> +} >> + >> +static int sgx_cgroup_alloc(struct misc_cg *cg); > > Again, this declaration can be removed if you move the below structure > ... > >> + >> +const struct misc_res_ops sgx_cgroup_ops = { >> + .alloc = sgx_cgroup_alloc, >> + .free = sgx_cgroup_free, >> +}; >> + >> +static void sgx_cgroup_misc_init(struct misc_cg *cg, struct sgx_cgroup >> *sgx_cg) >> +{ >> + cg->res[MISC_CG_RES_SGX_EPC].priv = sgx_cg; >> + sgx_cg->cg = cg; >> +} >> + >> +static int sgx_cgroup_alloc(struct misc_cg *cg) >> +{ >> + struct sgx_cgroup *sgx_cg; >> + >> + sgx_cg = kzalloc(sizeof(*sgx_cg), GFP_KERNEL); >> + if (!sgx_cg) >> + return -ENOMEM; >> + >> + sgx_cgroup_misc_init(cg, sgx_cg); >> + >> + return 0; >> +} > > ... here. > yes, thanks >> + >> +void sgx_cgroup_init(void) >> +{ >> + misc_cg_set_ops(MISC_CG_RES_SGX_EPC, &sgx_cgroup_ops); >> + sgx_cgroup_misc_init(misc_cg_root(), &sgx_cg_root); >> +} >> diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.h >> b/arch/x86/kernel/cpu/sgx/epc_cgroup.h >> new file mode 100644 >> index 000000000000..8f794e23fad6 >> --- /dev/null >> +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.h >> @@ -0,0 +1,70 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +/* Copyright(c) 2022 Intel Corporation. */ >> +#ifndef _SGX_EPC_CGROUP_H_ >> +#define _SGX_EPC_CGROUP_H_ >> + >> +#include <asm/sgx.h> >> +#include <linux/cgroup.h> >> +#include <linux/misc_cgroup.h> >> + >> +#include "sgx.h" >> + >> +#ifndef CONFIG_CGROUP_SGX_EPC > > Nit: add an empty line to make text more breathable. > ok >> +#define MISC_CG_RES_SGX_EPC MISC_CG_RES_TYPES >> +struct sgx_cgroup; >> + >> +static inline struct sgx_cgroup *sgx_get_current_cg(void) >> +{ >> + return NULL; >> +} >> + >> +static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg) { } >> + >> +static inline int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg) >> +{ >> + return 0; >> +} >> + >> +static inline void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg) { } >> + >> +static inline void sgx_cgroup_init(void) { } >> +#else > > Nit: I prefer two empty lines before and after the 'else'. > ok >> +struct sgx_cgroup { >> + struct misc_cg *cg; >> +}; >> + >> +static inline struct sgx_cgroup *sgx_cgroup_from_misc_cg(struct >> misc_cg *cg) >> +{ >> + return (struct sgx_cgroup *)(cg->res[MISC_CG_RES_SGX_EPC].priv); >> +} >> + >> +/** >> + * sgx_get_current_cg() - get the EPC cgroup of current process. >> + * >> + * Returned cgroup has its ref count increased by 1. Caller must call >> + * sgx_put_cg() to return the reference. >> + * >> + * Return: EPC cgroup to which the current task belongs to. >> + */ >> +static inline struct sgx_cgroup *sgx_get_current_cg(void) >> +{ >> + return sgx_cgroup_from_misc_cg(get_current_misc_cg()); >> +} > > Again, I _think_ you need to check whether get_current_misc_cg() returns > NULL? > > Misc cgroup can be disabled by command line even it is on in the Kconfig. > > I am not expert on cgroup, so could you check on this? > Good catch. Will add NULL check in sgx_cgroup_from_misc_cg() >> + >> +/** >> + * sgx_put_sgx_cg() - Put the EPC cgroup and reduce its ref count. >> + * @sgx_cg - EPC cgroup to put. >> + */ >> +static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg) >> +{ >> + if (sgx_cg) >> + put_misc_cg(sgx_cg->cg); >> +} >> + >> +int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg); >> +void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg); >> +void sgx_cgroup_init(void); >> + >> +#endif >> + >> +#endif /* _SGX_EPC_CGROUP_H_ */ >> diff --git a/arch/x86/kernel/cpu/sgx/main.c >> b/arch/x86/kernel/cpu/sgx/main.c >> index d219f14365d4..023af54c1beb 100644 >> --- a/arch/x86/kernel/cpu/sgx/main.c >> +++ b/arch/x86/kernel/cpu/sgx/main.c >> @@ -6,6 +6,7 @@ >> #include <linux/highmem.h> >> #include <linux/kthread.h> >> #include <linux/miscdevice.h> >> +#include <linux/misc_cgroup.h> >> #include <linux/node.h> >> #include <linux/pagemap.h> >> #include <linux/ratelimit.h> >> @@ -17,6 +18,7 @@ >> #include "driver.h" >> #include "encl.h" >> #include "encls.h" >> +#include "epc_cgroup.h" >> >> struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS]; >> static int sgx_nr_epc_sections; >> @@ -558,7 +560,16 @@ int sgx_unmark_page_reclaimable(struct >> sgx_epc_page *page) >> */ >> struct sgx_epc_page *sgx_alloc_epc_page(void *owner, enum sgx_reclaim >> reclaim) >> { >> + struct sgx_cgroup *sgx_cg; >> struct sgx_epc_page *page; >> + int ret; >> + >> + sgx_cg = sgx_get_current_cg(); >> + ret = sgx_cgroup_try_charge(sgx_cg); >> + if (ret) { >> + sgx_put_cg(sgx_cg); >> + return ERR_PTR(ret); >> + } >> >> for ( ; ; ) { >> page = __sgx_alloc_epc_page(); >> @@ -567,8 +578,10 @@ struct sgx_epc_page *sgx_alloc_epc_page(void >> *owner, enum sgx_reclaim reclaim) >> break; >> } >> >> - if (list_empty(&sgx_active_page_list)) >> - return ERR_PTR(-ENOMEM); >> + if (list_empty(&sgx_active_page_list)) { >> + page = ERR_PTR(-ENOMEM); >> + break; >> + } >> >> if (reclaim == SGX_NO_RECLAIM) { >> page = ERR_PTR(-EBUSY); >> @@ -580,10 +593,24 @@ struct sgx_epc_page *sgx_alloc_epc_page(void >> *owner, enum sgx_reclaim reclaim) >> break; >> } >> >> + /* >> + * Need to do a global reclamation if cgroup was not full but free >> + * physical pages run out, causing __sgx_alloc_epc_page() to fail. >> + */ > > Again, to me this comment shouldn't be here, because it doesn't add any > more > information. > > If you can reach here, you have passed the charge(). In fact, I believe > this > doesn't matter: > When you fail to allocate, you just need to reclaim. > > Now you only have the global reclamation, thus you need to reclaim from > it. > > Perhaps it is useful when you have per-cgroup LRU list. In that case > you can > put this comment there. > Ok >> sgx_reclaim_pages(); >> cond_resched(); >> } >> >> +#ifdef CONFIG_CGROUP_SGX_EPC >> + if (!IS_ERR(page)) { >> + WARN_ON_ONCE(page->sgx_cg); >> + /* sgx_put_cg() in sgx_free_epc_page() */ >> + page->sgx_cg = sgx_cg; >> + } else { >> + sgx_cgroup_uncharge(sgx_cg); >> + sgx_put_cg(sgx_cg); >> + } >> +#endif > > Again, IMHO having CONFIG_CGROUP_SGX_EPC here is ugly, because it > doesn't even > match the try_charge() above, which doesn't have the > CONFIG_CGROUP_SGX_EPC. > > If you add a wrapper in "epc_cgroup.h" > Agree. but in sgx.h so sgx_epc_page struct is not exposed in epc_cgroup.h. > static inline void sgx_epc_page_set_cgroup(struct epc_page *epc_page, > struct sgx_cgroup *sgx_cg) > { > #ifdef CONFIG_CGROUP_SGX_EPC > epc_page->sgx_cg = sgx_cg; > #endif > } > > Then I believe the above can be simplified to: > > if (!IS_ERR(page)) { > sgx_epc_page_set_cgroup(page, sgx_cg); > } else { > sgx_cgroup_uncharge(sgx_cg); > sgx_put_cg(sgx_cg); > } > >> if (sgx_should_reclaim(SGX_NR_LOW_PAGES)) >> wake_up(&ksgxd_waitq); >> >> @@ -604,6 +631,14 @@ void sgx_free_epc_page(struct sgx_epc_page *page) >> struct sgx_epc_section *section = &sgx_epc_sections[page->section]; >> struct sgx_numa_node *node = section->node; >> >> +#ifdef CONFIG_CGROUP_SGX_EPC >> + if (page->sgx_cg) { >> + sgx_cgroup_uncharge(page->sgx_cg); >> + sgx_put_cg(page->sgx_cg); >> + page->sgx_cg = NULL; >> + } >> +#endif >> + > > Similarly, how about adding a wrapper in "epc_cgroup.h" > > struct sgx_cgroup *sgx_epc_page_get_cgroup(struct sgx_epc_page *page) > { > #ifdef CONFIG_CGROUP_SGX_EPC > return page->sgx_cg; > #else > return NULL; > #endif > } > > Then this can be: > > sgx_cg = sgx_epc_page_get_cgroup(page); > sgx_cgroup_uncharge(sgx_cg); > sgx_put_cg(sgx_cg); > sgx_epc_page_set_cgroup(page, NULL); > sure. >> spin_lock(&node->lock); >> >> page->owner = NULL; >> @@ -643,6 +678,11 @@ static bool __init sgx_setup_epc_section(u64 >> phys_addr, u64 size, >> section->pages[i].flags = 0; >> section->pages[i].owner = NULL; >> section->pages[i].poison = 0; >> + >> +#ifdef CONFIG_CGROUP_SGX_EPC >> + section->pages[i].sgx_cg = NULL; >> +#endif > > Can use the wrapper too. > yes. > [...] > > (Btw, I'll be away for couple of days due to public holiday and will > review the > rest starting from late next week). Thanks Haitao
On Thu, 2024-04-04 at 20:24 -0500, Haitao Huang wrote: > > Again, IMHO having CONFIG_CGROUP_SGX_EPC here is ugly, because it > > doesn't even > > match the try_charge() above, which doesn't have the > > CONFIG_CGROUP_SGX_EPC. > > > > If you add a wrapper in "epc_cgroup.h" > > > Agree. but in sgx.h so sgx_epc_page struct is not exposed in epc_cgroup.h. I am fine with any place that suits.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 39886bab943a..bda78255a7ab 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1941,6 +1941,19 @@ config X86_SGX If unsure, say N. +config CGROUP_SGX_EPC + bool "Miscellaneous Cgroup Controller for Enclave Page Cache (EPC) for Intel SGX" + depends on X86_SGX && CGROUP_MISC + help + Provides control over the EPC footprint of tasks in a cgroup via + the Miscellaneous cgroup controller. + + EPC is a subset of regular memory that is usable only by SGX + enclaves and is very limited in quantity, e.g. less than 1% + of total DRAM. + + Say N if unsure. + config X86_USER_SHADOW_STACK bool "X86 userspace shadow stack" depends on AS_WRUSS diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile index 9c1656779b2a..12901a488da7 100644 --- a/arch/x86/kernel/cpu/sgx/Makefile +++ b/arch/x86/kernel/cpu/sgx/Makefile @@ -4,3 +4,4 @@ obj-y += \ ioctl.o \ main.o obj-$(CONFIG_X86_SGX_KVM) += virt.o +obj-$(CONFIG_CGROUP_SGX_EPC) += epc_cgroup.o diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.c b/arch/x86/kernel/cpu/sgx/epc_cgroup.c new file mode 100644 index 000000000000..a1dd43c195b2 --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright(c) 2022 Intel Corporation. + +#include <linux/atomic.h> +#include <linux/kernel.h> +#include "epc_cgroup.h" + +/* The root SGX EPC cgroup */ +static struct sgx_cgroup sgx_cg_root; + +/** + * sgx_cgroup_try_charge() - try to charge cgroup for a single EPC page + * + * @sgx_cg: The EPC cgroup to be charged for the page. + * Return: + * * %0 - If successfully charged. + * * -errno - for failures. + */ +int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg) +{ + return misc_cg_try_charge(MISC_CG_RES_SGX_EPC, sgx_cg->cg, PAGE_SIZE); +} + +/** + * sgx_cgroup_uncharge() - uncharge a cgroup for an EPC page + * @sgx_cg: The charged sgx cgroup + */ +void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg) +{ + misc_cg_uncharge(MISC_CG_RES_SGX_EPC, sgx_cg->cg, PAGE_SIZE); +} + +static void sgx_cgroup_free(struct misc_cg *cg) +{ + struct sgx_cgroup *sgx_cg; + + sgx_cg = sgx_cgroup_from_misc_cg(cg); + if (!sgx_cg) + return; + + kfree(sgx_cg); +} + +static int sgx_cgroup_alloc(struct misc_cg *cg); + +const struct misc_res_ops sgx_cgroup_ops = { + .alloc = sgx_cgroup_alloc, + .free = sgx_cgroup_free, +}; + +static void sgx_cgroup_misc_init(struct misc_cg *cg, struct sgx_cgroup *sgx_cg) +{ + cg->res[MISC_CG_RES_SGX_EPC].priv = sgx_cg; + sgx_cg->cg = cg; +} + +static int sgx_cgroup_alloc(struct misc_cg *cg) +{ + struct sgx_cgroup *sgx_cg; + + sgx_cg = kzalloc(sizeof(*sgx_cg), GFP_KERNEL); + if (!sgx_cg) + return -ENOMEM; + + sgx_cgroup_misc_init(cg, sgx_cg); + + return 0; +} + +void sgx_cgroup_init(void) +{ + misc_cg_set_ops(MISC_CG_RES_SGX_EPC, &sgx_cgroup_ops); + sgx_cgroup_misc_init(misc_cg_root(), &sgx_cg_root); +} diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.h b/arch/x86/kernel/cpu/sgx/epc_cgroup.h new file mode 100644 index 000000000000..8f794e23fad6 --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2022 Intel Corporation. */ +#ifndef _SGX_EPC_CGROUP_H_ +#define _SGX_EPC_CGROUP_H_ + +#include <asm/sgx.h> +#include <linux/cgroup.h> +#include <linux/misc_cgroup.h> + +#include "sgx.h" + +#ifndef CONFIG_CGROUP_SGX_EPC +#define MISC_CG_RES_SGX_EPC MISC_CG_RES_TYPES +struct sgx_cgroup; + +static inline struct sgx_cgroup *sgx_get_current_cg(void) +{ + return NULL; +} + +static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg) { } + +static inline int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg) +{ + return 0; +} + +static inline void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg) { } + +static inline void sgx_cgroup_init(void) { } +#else +struct sgx_cgroup { + struct misc_cg *cg; +}; + +static inline struct sgx_cgroup *sgx_cgroup_from_misc_cg(struct misc_cg *cg) +{ + return (struct sgx_cgroup *)(cg->res[MISC_CG_RES_SGX_EPC].priv); +} + +/** + * sgx_get_current_cg() - get the EPC cgroup of current process. + * + * Returned cgroup has its ref count increased by 1. Caller must call + * sgx_put_cg() to return the reference. + * + * Return: EPC cgroup to which the current task belongs to. + */ +static inline struct sgx_cgroup *sgx_get_current_cg(void) +{ + return sgx_cgroup_from_misc_cg(get_current_misc_cg()); +} + +/** + * sgx_put_sgx_cg() - Put the EPC cgroup and reduce its ref count. + * @sgx_cg - EPC cgroup to put. + */ +static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg) +{ + if (sgx_cg) + put_misc_cg(sgx_cg->cg); +} + +int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg); +void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg); +void sgx_cgroup_init(void); + +#endif + +#endif /* _SGX_EPC_CGROUP_H_ */ diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index d219f14365d4..023af54c1beb 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -6,6 +6,7 @@ #include <linux/highmem.h> #include <linux/kthread.h> #include <linux/miscdevice.h> +#include <linux/misc_cgroup.h> #include <linux/node.h> #include <linux/pagemap.h> #include <linux/ratelimit.h> @@ -17,6 +18,7 @@ #include "driver.h" #include "encl.h" #include "encls.h" +#include "epc_cgroup.h" struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS]; static int sgx_nr_epc_sections; @@ -558,7 +560,16 @@ int sgx_unmark_page_reclaimable(struct sgx_epc_page *page) */ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, enum sgx_reclaim reclaim) { + struct sgx_cgroup *sgx_cg; struct sgx_epc_page *page; + int ret; + + sgx_cg = sgx_get_current_cg(); + ret = sgx_cgroup_try_charge(sgx_cg); + if (ret) { + sgx_put_cg(sgx_cg); + return ERR_PTR(ret); + } for ( ; ; ) { page = __sgx_alloc_epc_page(); @@ -567,8 +578,10 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, enum sgx_reclaim reclaim) break; } - if (list_empty(&sgx_active_page_list)) - return ERR_PTR(-ENOMEM); + if (list_empty(&sgx_active_page_list)) { + page = ERR_PTR(-ENOMEM); + break; + } if (reclaim == SGX_NO_RECLAIM) { page = ERR_PTR(-EBUSY); @@ -580,10 +593,24 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, enum sgx_reclaim reclaim) break; } + /* + * Need to do a global reclamation if cgroup was not full but free + * physical pages run out, causing __sgx_alloc_epc_page() to fail. + */ sgx_reclaim_pages(); cond_resched(); } +#ifdef CONFIG_CGROUP_SGX_EPC + if (!IS_ERR(page)) { + WARN_ON_ONCE(page->sgx_cg); + /* sgx_put_cg() in sgx_free_epc_page() */ + page->sgx_cg = sgx_cg; + } else { + sgx_cgroup_uncharge(sgx_cg); + sgx_put_cg(sgx_cg); + } +#endif if (sgx_should_reclaim(SGX_NR_LOW_PAGES)) wake_up(&ksgxd_waitq); @@ -604,6 +631,14 @@ void sgx_free_epc_page(struct sgx_epc_page *page) struct sgx_epc_section *section = &sgx_epc_sections[page->section]; struct sgx_numa_node *node = section->node; +#ifdef CONFIG_CGROUP_SGX_EPC + if (page->sgx_cg) { + sgx_cgroup_uncharge(page->sgx_cg); + sgx_put_cg(page->sgx_cg); + page->sgx_cg = NULL; + } +#endif + spin_lock(&node->lock); page->owner = NULL; @@ -643,6 +678,11 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, section->pages[i].flags = 0; section->pages[i].owner = NULL; section->pages[i].poison = 0; + +#ifdef CONFIG_CGROUP_SGX_EPC + section->pages[i].sgx_cg = NULL; +#endif + list_add_tail(§ion->pages[i].list, &sgx_dirty_page_list); } @@ -787,6 +827,7 @@ static void __init arch_update_sysfs_visibility(int nid) {} static bool __init sgx_page_cache_init(void) { u32 eax, ebx, ecx, edx, type; + u64 capacity = 0; u64 pa, size; int nid; int i; @@ -837,6 +878,7 @@ static bool __init sgx_page_cache_init(void) sgx_epc_sections[i].node = &sgx_numa_nodes[nid]; sgx_numa_nodes[nid].size += size; + capacity += size; sgx_nr_epc_sections++; } @@ -846,6 +888,8 @@ static bool __init sgx_page_cache_init(void) return false; } + misc_cg_set_capacity(MISC_CG_RES_SGX_EPC, capacity); + return true; } @@ -942,6 +986,9 @@ static int __init sgx_init(void) if (sgx_vepc_init() && ret) goto err_provision; + /* Setup cgroup if either the native or vepc driver is active */ + sgx_cgroup_init(); + return 0; err_provision: diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h index ca34cd4f58ac..6accc81d19a9 100644 --- a/arch/x86/kernel/cpu/sgx/sgx.h +++ b/arch/x86/kernel/cpu/sgx/sgx.h @@ -39,12 +39,17 @@ enum sgx_reclaim { SGX_DO_RECLAIM }; +struct sgx_cgroup; + struct sgx_epc_page { unsigned int section; u16 flags; u16 poison; struct sgx_encl_page *owner; struct list_head list; +#ifdef CONFIG_CGROUP_SGX_EPC + struct sgx_cgroup *sgx_cg; +#endif }; /* diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index 2f6cc3a0ad23..1a16efdfcd3d 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -46,11 +46,13 @@ struct misc_res_ops { * @max: Maximum limit on the resource. * @usage: Current usage of the resource. * @events: Number of times, the resource limit exceeded. + * @priv: resource specific data. */ struct misc_res { u64 max; atomic64_t usage; atomic64_t events; + void *priv; }; /**