Message ID | 20240328002229.30264-13-haitao.huang@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add Cgroup support for SGX EPC memory | expand |
> --- a/arch/x86/kernel/cpu/sgx/epc_cgroup.h > +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.h > @@ -28,6 +28,10 @@ static inline int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg, enum sgx_recl > static inline void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg) { } > > static inline void sgx_cgroup_init(void) { } > + > +static inline void sgx_cgroup_reclaim_pages(struct misc_cg *root, struct mm_struct *charge_mm) > +{ > +} > #else > struct sgx_cgroup { > struct misc_cg *cg; > @@ -65,6 +69,9 @@ static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg) > > int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg, enum sgx_reclaim reclaim); > void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg); > +bool sgx_cgroup_lru_empty(struct misc_cg *root); > +bool sgx_cgroup_should_reclaim(struct sgx_cgroup *sgx_cg); > +void sgx_cgroup_reclaim_pages(struct misc_cg *root, struct mm_struct *charge_mm); Seems the decision to choose whether to implement a stub function for some function is quite random to me. My impression is people in general don't like #ifdef in the C file but prefer to implementing it in the header in some helper function. I guess you might want to just implement a stub function for each of the 3 functions exposed, so that we can eliminate some #ifdefs in the sgx/main.c (see below). > void sgx_cgroup_init(void); > > #endif > diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c > index 7f92455d957d..68f28ff2d5ef 100644 > --- a/arch/x86/kernel/cpu/sgx/main.c > +++ b/arch/x86/kernel/cpu/sgx/main.c > @@ -34,6 +34,16 @@ static struct sgx_epc_lru_list sgx_global_lru; > > static inline struct sgx_epc_lru_list *sgx_lru_list(struct sgx_epc_page *epc_page) > { > +#ifdef CONFIG_CGROUP_SGX_EPC > + if (epc_page->sgx_cg) > + return &epc_page->sgx_cg->lru; > + > + /* > + * This should not happen when cgroup is enabled: Every page belongs > + * to a cgroup, or the root by default. > + */ > + WARN_ON_ONCE(1); In the case MISC cgroup is enabled in Kconfig but disabled by command line, I think this becomes legal now? > +#endif > return &sgx_global_lru; > } > > @@ -42,7 +52,11 @@ static inline struct sgx_epc_lru_list *sgx_lru_list(struct sgx_epc_page *epc_pag > */ > static inline bool sgx_can_reclaim(void) > { > +#ifdef CONFIG_CGROUP_SGX_EPC > + return !sgx_cgroup_lru_empty(misc_cg_root()); > +#else > return !list_empty(&sgx_global_lru.reclaimable); > +#endif > } > Here you are using #ifdef CONFIG_CGRUP_SGX_EPC, but ... > static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0); > @@ -404,7 +418,10 @@ static bool sgx_should_reclaim(unsigned long watermark) > > static void sgx_reclaim_pages_global(struct mm_struct *charge_mm) > { > - sgx_reclaim_pages(&sgx_global_lru, charge_mm); > + if (IS_ENABLED(CONFIG_CGROUP_SGX_EPC)) > + sgx_cgroup_reclaim_pages(misc_cg_root(), charge_mm); > + else > + sgx_reclaim_pages(&sgx_global_lru, charge_mm); > } ... here you are using IS_ENABLED(CONFIG_CGROUP_SGX_EPC). Any reason they are not consistent? Also, in the case where MISC cgroup is disabled via commandline, I think it won't work, because misc_cg_root() should be NULL in this case while IS_ENABLED(CONFIG_CGROUP_SGX_EPC) is true. > > /* > @@ -414,6 +431,16 @@ static void sgx_reclaim_pages_global(struct mm_struct *charge_mm) > */ > void sgx_reclaim_direct(void) > { > +#ifdef CONFIG_CGROUP_SGX_EPC > + struct sgx_cgroup *sgx_cg = sgx_get_current_cg(); > + > + /* Make sure there are some free pages at cgroup level */ > + if (sgx_cg && sgx_cgroup_should_reclaim(sgx_cg)) { > + sgx_cgroup_reclaim_pages(sgx_cg->cg, current->mm); > + sgx_put_cg(sgx_cg); > + } > +#endif This #ifdef CONFIG_CGROUP_SGX_EPC can be removed if we implement a stub function for sgx_cgroup_should_reclaim(). > + /* Make sure there are some free pages at global level */ > if (sgx_should_reclaim(SGX_NR_LOW_PAGES)) > sgx_reclaim_pages_global(current->mm); > }
On Mon, 08 Apr 2024 07:20:23 -0500, Huang, Kai <kai.huang@intel.com> wrote: > >> --- a/arch/x86/kernel/cpu/sgx/epc_cgroup.h >> +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.h >> @@ -28,6 +28,10 @@ static inline int sgx_cgroup_try_charge(struct >> sgx_cgroup *sgx_cg, enum sgx_recl >> static inline void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg) { } >> >> static inline void sgx_cgroup_init(void) { } >> + >> +static inline void sgx_cgroup_reclaim_pages(struct misc_cg *root, >> struct mm_struct *charge_mm) >> +{ >> +} >> #else >> struct sgx_cgroup { >> struct misc_cg *cg; >> @@ -65,6 +69,9 @@ static inline void sgx_put_cg(struct sgx_cgroup >> *sgx_cg) >> >> int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg, enum sgx_reclaim >> reclaim); >> void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg); >> +bool sgx_cgroup_lru_empty(struct misc_cg *root); >> +bool sgx_cgroup_should_reclaim(struct sgx_cgroup *sgx_cg); >> +void sgx_cgroup_reclaim_pages(struct misc_cg *root, struct mm_struct >> *charge_mm); > > Seems the decision to choose whether to implement a stub function for > some > function is quite random to me. > > My impression is people in general don't like #ifdef in the C file but > prefer to > implementing it in the header in some helper function. > > I guess you might want to just implement a stub function for each of the > 3 > functions exposed, so that we can eliminate some #ifdefs in the > sgx/main.c (see > below). > >> void sgx_cgroup_init(void); >> >> #endif >> diff --git a/arch/x86/kernel/cpu/sgx/main.c >> b/arch/x86/kernel/cpu/sgx/main.c >> index 7f92455d957d..68f28ff2d5ef 100644 >> --- a/arch/x86/kernel/cpu/sgx/main.c >> +++ b/arch/x86/kernel/cpu/sgx/main.c >> @@ -34,6 +34,16 @@ static struct sgx_epc_lru_list sgx_global_lru; >> >> static inline struct sgx_epc_lru_list *sgx_lru_list(struct >> sgx_epc_page *epc_page) >> { >> +#ifdef CONFIG_CGROUP_SGX_EPC >> + if (epc_page->sgx_cg) >> + return &epc_page->sgx_cg->lru; >> + >> + /* >> + * This should not happen when cgroup is enabled: Every page belongs >> + * to a cgroup, or the root by default. >> + */ >> + WARN_ON_ONCE(1); > > In the case MISC cgroup is enabled in Kconfig but disabled by command > line, I > think this becomes legal now? > I'm not sure actually. Never saw the warning even if I set "cgroup_disable=misc" in commandlibe. Tried both v1 and v2. Anyway, I think we can remove this warning and we handle the NULL sgx_cg now. >> +#endif >> return &sgx_global_lru; >> } >> >> @@ -42,7 +52,11 @@ static inline struct sgx_epc_lru_list >> *sgx_lru_list(struct sgx_epc_page *epc_pag >> */ >> static inline bool sgx_can_reclaim(void) >> { >> +#ifdef CONFIG_CGROUP_SGX_EPC >> + return !sgx_cgroup_lru_empty(misc_cg_root()); >> +#else >> return !list_empty(&sgx_global_lru.reclaimable); >> +#endif >> } >> > > Here you are using #ifdef CONFIG_CGRUP_SGX_EPC, but ... > >> static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0); >> @@ -404,7 +418,10 @@ static bool sgx_should_reclaim(unsigned long >> watermark) >> >> static void sgx_reclaim_pages_global(struct mm_struct *charge_mm) >> { >> - sgx_reclaim_pages(&sgx_global_lru, charge_mm); >> + if (IS_ENABLED(CONFIG_CGROUP_SGX_EPC)) >> + sgx_cgroup_reclaim_pages(misc_cg_root(), charge_mm); >> + else >> + sgx_reclaim_pages(&sgx_global_lru, charge_mm); >> } > > ... here you are using IS_ENABLED(CONFIG_CGROUP_SGX_EPC). > > Any reason they are not consistent? I was hesitant to expose sgx_global_lru needed for implementing sgx_cgroup_lru_empty() stub which would also be a random decision and overkill to just remove couple of #ifdefs in short functions. > > Also, in the case where MISC cgroup is disabled via commandline, I think > it > won't work, because misc_cg_root() should be NULL in this case while > IS_ENABLED(CONFIG_CGROUP_SGX_EPC) is true. > >> The misc root cgroup is a static similar to sgx_cg_root. So misc_cg_root() won't be NULL However, based on how css_misc() was check NULL, I suppose sgx_get_current_cg() may be NULL when cgroup is disabled (again not 100% sure but we handle it anyway) >> /* >> @@ -414,6 +431,16 @@ static void sgx_reclaim_pages_global(struct >> mm_struct *charge_mm) >> */ >> void sgx_reclaim_direct(void) >> { >> +#ifdef CONFIG_CGROUP_SGX_EPC >> + struct sgx_cgroup *sgx_cg = sgx_get_current_cg(); >> + >> + /* Make sure there are some free pages at cgroup level */ >> + if (sgx_cg && sgx_cgroup_should_reclaim(sgx_cg)) { >> + sgx_cgroup_reclaim_pages(sgx_cg->cg, current->mm); >> + sgx_put_cg(sgx_cg); >> + } >> +#endif > > This #ifdef CONFIG_CGROUP_SGX_EPC can be removed if we implement a stub > function > for sgx_cgroup_should_reclaim(). > Yes. >> + /* Make sure there are some free pages at global level */ >> if (sgx_should_reclaim(SGX_NR_LOW_PAGES)) >> sgx_reclaim_pages_global(current->mm); >> } > Thanks Haitao
On 9/04/2024 6:03 am, Haitao Huang wrote: >>> > > The misc root cgroup is a static similar to sgx_cg_root. So > misc_cg_root() won't be NULL > However, based on how css_misc() was check NULL, I suppose > sgx_get_current_cg() may be NULL when cgroup is disabled (again not 100% > sure but we handle it anyway) Could you help to check? Sorry I am busy on something else thus won't be able to do any actual check.
On Mon, 08 Apr 2024 17:37:10 -0500, Huang, Kai <kai.huang@intel.com> wrote: > > > On 9/04/2024 6:03 am, Haitao Huang wrote: >>>> >> The misc root cgroup is a static similar to sgx_cg_root. So >> misc_cg_root() won't be NULL >> However, based on how css_misc() was check NULL, I suppose >> sgx_get_current_cg() may be NULL when cgroup is disabled (again not >> 100% sure but we handle it anyway) > > Could you help to check? Sorry I am busy on something else thus won't > be able to do any actual check. > It's always non-NULL based on testing. It's hard for me to say definitely by reading the code. But IIUC cgroup_disable command-line only blocks operations in /sys/fs/cgroup so user space can't set up controllers and config limits, etc., for the diasabled ones. Each task->cgroups would still have a non-NULL pointer to the static root object for each cgroup that is enabled by KConfig, so get_current_misc_cg() thus sgx_get_current_cg() should not return NULL regardless 'cgroup_disable=misc'. Maybe @Michal or @tj can confirm? Thanks Haitao
On Mon, Apr 08, 2024 at 11:23:21PM -0500, Haitao Huang <haitao.huang@linux.intel.com> wrote: > It's always non-NULL based on testing. > > It's hard for me to say definitely by reading the code. But IIUC > cgroup_disable command-line only blocks operations in /sys/fs/cgroup so user > space can't set up controllers and config limits, etc., for the diasabled > ones. Each task->cgroups would still have a non-NULL pointer to the static > root object for each cgroup that is enabled by KConfig, so > get_current_misc_cg() thus sgx_get_current_cg() should not return NULL > regardless 'cgroup_disable=misc'. > > Maybe @Michal or @tj can confirm? The current implementation creates root css object (see cgroup_init(), cgroup_ssid_enabled() check is after cgroup_init_subsys()). I.e. it will look like all tasks are members of root cgroup wrt given controller permanently and controller attribute files won't exist. (It is up to the controller implementation to do further optimization based on the boot-time disablement (e.g. see uses of mem_cgroup_disabled()). Not sure if this is useful for misc controller.) As for the WARN_ON(1), taking example from memcg -- NULL is best synonymous with root. It's a judgement call which of the values to store and when to intepret it. HTH, Michal
On Tue, 09 Apr 2024 04:03:22 -0500, Michal Koutný <mkoutny@suse.com> wrote: > On Mon, Apr 08, 2024 at 11:23:21PM -0500, Haitao Huang > <haitao.huang@linux.intel.com> wrote: >> It's always non-NULL based on testing. >> >> It's hard for me to say definitely by reading the code. But IIUC >> cgroup_disable command-line only blocks operations in /sys/fs/cgroup so >> user >> space can't set up controllers and config limits, etc., for the >> diasabled >> ones. Each task->cgroups would still have a non-NULL pointer to the >> static >> root object for each cgroup that is enabled by KConfig, so >> get_current_misc_cg() thus sgx_get_current_cg() should not return NULL >> regardless 'cgroup_disable=misc'. >> >> Maybe @Michal or @tj can confirm? > > The current implementation creates root css object (see cgroup_init(), > cgroup_ssid_enabled() check is after cgroup_init_subsys()). > I.e. it will look like all tasks are members of root cgroup wrt given > controller permanently and controller attribute files won't exist. > > (It is up to the controller implementation to do further optimization > based on the boot-time disablement (e.g. see uses of > mem_cgroup_disabled()). Not sure if this is useful for misc controller.) > > As for the WARN_ON(1), taking example from memcg -- NULL is best > synonymous with root. It's a judgement call which of the values to store > and when to intepret it. > > HTH, > Michal Thanks for the info. The way I see it, misc does not have special handling like memcg so every task at least belong to the root(default) group even if it's disabled by command line parameter. So we would not get NULL from get_current_misc_cg(). I think I'll keep the WARN_ON_ONCE for now as a reminder in case misc do have custom support for disabling in future. Thanks Haitao
On Tue, 09 Apr 2024 10:34:06 -0500, Haitao Huang <haitao.huang@linux.intel.com> wrote: > On Tue, 09 Apr 2024 04:03:22 -0500, Michal Koutný <mkoutny@suse.com> > wrote: > >> On Mon, Apr 08, 2024 at 11:23:21PM -0500, Haitao Huang >> <haitao.huang@linux.intel.com> wrote: >>> It's always non-NULL based on testing. >>> >>> It's hard for me to say definitely by reading the code. But IIUC >>> cgroup_disable command-line only blocks operations in /sys/fs/cgroup >>> so user >>> space can't set up controllers and config limits, etc., for the >>> diasabled >>> ones. Each task->cgroups would still have a non-NULL pointer to the >>> static >>> root object for each cgroup that is enabled by KConfig, so >>> get_current_misc_cg() thus sgx_get_current_cg() should not return NULL >>> regardless 'cgroup_disable=misc'. >>> >>> Maybe @Michal or @tj can confirm? >> >> The current implementation creates root css object (see cgroup_init(), >> cgroup_ssid_enabled() check is after cgroup_init_subsys()). >> I.e. it will look like all tasks are members of root cgroup wrt given >> controller permanently and controller attribute files won't exist. >> >> (It is up to the controller implementation to do further optimization >> based on the boot-time disablement (e.g. see uses of >> mem_cgroup_disabled()). Not sure if this is useful for misc controller.) >> >> As for the WARN_ON(1), taking example from memcg -- NULL is best >> synonymous with root. It's a judgement call which of the values to store >> and when to intepret it. >> >> HTH, >> Michal > Thanks for the info. > > The way I see it, misc does not have special handling like memcg so > every task at least belong to the root(default) group even if it's > disabled by command line parameter. So we would not get NULL from > get_current_misc_cg(). I think I'll keep the WARN_ON_ONCE for now as a > reminder in case misc do have custom support for disabling in future. > Actually I think it makes more sense just add some comments instead of WARN. That's what I did in v11 now. Thanks Haitao
diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.c b/arch/x86/kernel/cpu/sgx/epc_cgroup.c index 1defbf213e8d..cacd9e93344e 100644 --- a/arch/x86/kernel/cpu/sgx/epc_cgroup.c +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c @@ -72,7 +72,7 @@ static inline u64 sgx_cgroup_max_pages_to_root(struct sgx_cgroup *sgx_cg) * * Return: %true if all cgroups under the specified root have empty LRU lists. */ -static bool sgx_cgroup_lru_empty(struct misc_cg *root) +bool sgx_cgroup_lru_empty(struct misc_cg *root) { struct cgroup_subsys_state *css_root; struct cgroup_subsys_state *pos; @@ -125,7 +125,7 @@ static bool sgx_cgroup_lru_empty(struct misc_cg *root) * triggering reclamation, and call cond_resched() in between iterations to * avoid indefinite blocking. */ -static void sgx_cgroup_reclaim_pages(struct misc_cg *root, struct mm_struct *charge_mm) +void sgx_cgroup_reclaim_pages(struct misc_cg *root, struct mm_struct *charge_mm) { struct cgroup_subsys_state *css_root; struct cgroup_subsys_state *pos; @@ -166,7 +166,7 @@ static void sgx_cgroup_reclaim_pages(struct misc_cg *root, struct mm_struct *cha * threshold (%SGX_CG_MIN_FREE_PAGE) and there are reclaimable pages within the * cgroup. */ -static bool sgx_cgroup_should_reclaim(struct sgx_cgroup *sgx_cg) +bool sgx_cgroup_should_reclaim(struct sgx_cgroup *sgx_cg) { u64 cur, max; diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.h b/arch/x86/kernel/cpu/sgx/epc_cgroup.h index f66570d3ef42..8f55b38157da 100644 --- a/arch/x86/kernel/cpu/sgx/epc_cgroup.h +++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.h @@ -28,6 +28,10 @@ static inline int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg, enum sgx_recl static inline void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg) { } static inline void sgx_cgroup_init(void) { } + +static inline void sgx_cgroup_reclaim_pages(struct misc_cg *root, struct mm_struct *charge_mm) +{ +} #else struct sgx_cgroup { struct misc_cg *cg; @@ -65,6 +69,9 @@ static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg) int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg, enum sgx_reclaim reclaim); void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg); +bool sgx_cgroup_lru_empty(struct misc_cg *root); +bool sgx_cgroup_should_reclaim(struct sgx_cgroup *sgx_cg); +void sgx_cgroup_reclaim_pages(struct misc_cg *root, struct mm_struct *charge_mm); void sgx_cgroup_init(void); #endif diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 7f92455d957d..68f28ff2d5ef 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -34,6 +34,16 @@ static struct sgx_epc_lru_list sgx_global_lru; static inline struct sgx_epc_lru_list *sgx_lru_list(struct sgx_epc_page *epc_page) { +#ifdef CONFIG_CGROUP_SGX_EPC + if (epc_page->sgx_cg) + return &epc_page->sgx_cg->lru; + + /* + * This should not happen when cgroup is enabled: Every page belongs + * to a cgroup, or the root by default. + */ + WARN_ON_ONCE(1); +#endif return &sgx_global_lru; } @@ -42,7 +52,11 @@ static inline struct sgx_epc_lru_list *sgx_lru_list(struct sgx_epc_page *epc_pag */ static inline bool sgx_can_reclaim(void) { +#ifdef CONFIG_CGROUP_SGX_EPC + return !sgx_cgroup_lru_empty(misc_cg_root()); +#else return !list_empty(&sgx_global_lru.reclaimable); +#endif } static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0); @@ -404,7 +418,10 @@ static bool sgx_should_reclaim(unsigned long watermark) static void sgx_reclaim_pages_global(struct mm_struct *charge_mm) { - sgx_reclaim_pages(&sgx_global_lru, charge_mm); + if (IS_ENABLED(CONFIG_CGROUP_SGX_EPC)) + sgx_cgroup_reclaim_pages(misc_cg_root(), charge_mm); + else + sgx_reclaim_pages(&sgx_global_lru, charge_mm); } /* @@ -414,6 +431,16 @@ static void sgx_reclaim_pages_global(struct mm_struct *charge_mm) */ void sgx_reclaim_direct(void) { +#ifdef CONFIG_CGROUP_SGX_EPC + struct sgx_cgroup *sgx_cg = sgx_get_current_cg(); + + /* Make sure there are some free pages at cgroup level */ + if (sgx_cg && sgx_cgroup_should_reclaim(sgx_cg)) { + sgx_cgroup_reclaim_pages(sgx_cg->cg, current->mm); + sgx_put_cg(sgx_cg); + } +#endif + /* Make sure there are some free pages at global level */ if (sgx_should_reclaim(SGX_NR_LOW_PAGES)) sgx_reclaim_pages_global(current->mm); }