Message ID | 20231030182013.40086-2-haitao.huang@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add Cgroup support for SGX EPC memory | expand |
On Mon Oct 30, 2023 at 8:20 PM EET, Haitao Huang wrote: > From: Kristen Carlson Accardi <kristen@linux.intel.com> > > The misc cgroup controller (subsystem) currently does not perform > resource type specific action for Cgroups Subsystem State (CSS) events: > the 'css_alloc' event when a cgroup is created and the 'css_free' event > when a cgroup is destroyed. > > Define callbacks for those events and allow resource providers to > register the callbacks per resource type as needed. This will be > utilized later by the EPC misc cgroup support implemented in the SGX > driver. > > Also add per resource type private data for those callbacks to store and > access resource specific data. > > Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com> > Co-developed-by: Haitao Huang <haitao.huang@linux.intel.com> > Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com> > --- > V6: > - Create ops struct for per resource callbacks (Jarkko) > - Drop max_write callback (Dave, Michal) > - Style fixes (Kai) > --- > include/linux/misc_cgroup.h | 14 ++++++++++++++ > kernel/cgroup/misc.c | 27 ++++++++++++++++++++++++--- > 2 files changed, 38 insertions(+), 3 deletions(-) > > diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h > index e799b1f8d05b..5dc509c27c3d 100644 > --- a/include/linux/misc_cgroup.h > +++ b/include/linux/misc_cgroup.h > @@ -27,16 +27,30 @@ struct misc_cg; > > #include <linux/cgroup.h> > > +/** > + * struct misc_operations_struct: per resource callback ops. > + * @alloc: invoked for resource specific initialization when cgroup is allocated. > + * @free: invoked for resource specific cleanup when cgroup is deallocated. > + */ > +struct misc_operations_struct { > + int (*alloc)(struct misc_cg *cg); > + void (*free)(struct misc_cg *cg); > +}; Maybe just misc_operations, or even misc_ops? > + > /** > * struct misc_res: Per cgroup per misc type resource > * @max: Maximum limit on the resource. > * @usage: Current usage of the resource. > * @events: Number of times, the resource limit exceeded. > + * @priv: resource specific data. > + * @misc_ops: resource specific operations. > */ > struct misc_res { > u64 max; > atomic64_t usage; > atomic64_t events; > + void *priv; priv is the wrong patch, it just confuses the overall picture heere. please move it to 04/12. Let's deal with the callbacks here. > + const struct misc_operations_struct *misc_ops; > }; misc_ops would be at least consistent with this, as misc_res also has an acronym. > > /** > diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c > index 79a3717a5803..d971ede44ebf 100644 > --- a/kernel/cgroup/misc.c > +++ b/kernel/cgroup/misc.c > @@ -383,23 +383,37 @@ static struct cftype misc_cg_files[] = { > static struct cgroup_subsys_state * > misc_cg_alloc(struct cgroup_subsys_state *parent_css) > { > + struct misc_cg *parent_cg, *cg; > enum misc_res_type i; > - struct misc_cg *cg; > + int ret; > > if (!parent_css) { > - cg = &root_cg; > + parent_cg = cg = &root_cg; > } else { > cg = kzalloc(sizeof(*cg), GFP_KERNEL); > if (!cg) > return ERR_PTR(-ENOMEM); > + parent_cg = css_misc(parent_css); > } > > for (i = 0; i < MISC_CG_RES_TYPES; i++) { > WRITE_ONCE(cg->res[i].max, MAX_NUM); > atomic64_set(&cg->res[i].usage, 0); > + if (parent_cg->res[i].misc_ops && parent_cg->res[i].misc_ops->alloc) { > + ret = parent_cg->res[i].misc_ops->alloc(cg); > + if (ret) > + goto alloc_err; The patch set only has a use case for both operations defined - any partial combinations should never be allowed. To enforce this invariant you could create a set of operations (written out of top of my head): static int misc_res_init(struct misc_res *res, struct misc_ops *ops) { if (!misc_ops->alloc) { pr_err("%s: alloc missing\n", __func__); return -EINVAL; } if (!misc_ops->free) { pr_err("%s: free missing\n", __func__); return -EINVAL; } res->misc_ops = misc_ops; return 0; } static inline int misc_res_alloc(struct misc_cg *cg, struct misc_res *res) { int ret; if (!res->misc_ops) return 0; return res->misc_ops->alloc(cg); } static inline void misc_res_free(struct misc_cg *cg, struct misc_res *res) { int ret; if (!res->misc_ops) return 0; return res->misc_ops->alloc(cg); } Now if anything has misc_ops, it will also always have *both* callback, and nothing half-baked gets in. The above loops would be then: for (i = 0; i < MISC_CG_RES_TYPES; i++) { WRITE_ONCE(cg->res[i].max, MAX_NUM); atomic64_set(&cg->res[i].usage, 0); ret = misc_res_alloc(&parent_cg->res[i]); if (ret) goto alloc_err; Cleaner and better guards for state consistency. In 04/12 you need to then call misc_res_init() instead of direct assignment. BR, Jarkko
On Mon, Oct 30, 2023 at 11:20:02AM -0700, Haitao Huang <haitao.huang@linux.intel.com> wrote: > From: Kristen Carlson Accardi <kristen@linux.intel.com> > > The misc cgroup controller (subsystem) currently does not perform > resource type specific action for Cgroups Subsystem State (CSS) events: > the 'css_alloc' event when a cgroup is created and the 'css_free' event > when a cgroup is destroyed. > > Define callbacks for those events and allow resource providers to > register the callbacks per resource type as needed. This will be > utilized later by the EPC misc cgroup support implemented in the SGX > driver. I notice now that the callbacks are per resource and per cgroup. I think that is an overkill that complicates misc_cg allocation code with parent's callbacks while freeing is done by own callbacks. It's possibly too much liberal to keep objects' lifecycle understandable in the long term too. For some uniformity, I'd suggest struct blkcg_policy array in block/blk-cgroup.c as the precedent. (Perhaps indexed with static enum misc_res_type instead of dynamic index assignment as blkcg_policy_registeer() does.) I hope you don't really need per-cgroup callbacks :-) Michal
On Fri, 05 Jan 2024 03:45:02 -0600, Michal Koutný <mkoutny@suse.com> wrote: > On Mon, Oct 30, 2023 at 11:20:02AM -0700, Haitao Huang > <haitao.huang@linux.intel.com> wrote: >> From: Kristen Carlson Accardi <kristen@linux.intel.com> >> >> The misc cgroup controller (subsystem) currently does not perform >> resource type specific action for Cgroups Subsystem State (CSS) events: >> the 'css_alloc' event when a cgroup is created and the 'css_free' event >> when a cgroup is destroyed. >> >> Define callbacks for those events and allow resource providers to >> register the callbacks per resource type as needed. This will be >> utilized later by the EPC misc cgroup support implemented in the SGX >> driver. > > I notice now that the callbacks are per resource and per cgroup. > I think that is an overkill that complicates misc_cg allocation code > with parent's callbacks while freeing is done by own callbacks. > It's possibly too much liberal to keep objects' lifecycle understandable > in the long term too. > > For some uniformity, I'd suggest struct blkcg_policy array in > block/blk-cgroup.c as the precedent. (Perhaps indexed with static enum > misc_res_type instead of dynamic index assignment as > blkcg_policy_registeer() does.) > > I hope you don't really need per-cgroup callbacks :-) > > Michal Sure I can do that. IIUC, you are suggesting something similar how capacity is set via misc_cg_set_capacity for each resource type. Here we make a misc_cg_set_ops() which stores the ops pointers in a static array, then use them indexed with the resource type. Thanks Haitao
On Wed, 15 Nov 2023 14:25:59 -0600, Jarkko Sakkinen <jarkko@kernel.org> wrote: > On Mon Oct 30, 2023 at 8:20 PM EET, Haitao Huang wrote: >> From: Kristen Carlson Accardi <kristen@linux.intel.com> >> >> The misc cgroup controller (subsystem) currently does not perform >> resource type specific action for Cgroups Subsystem State (CSS) events: >> the 'css_alloc' event when a cgroup is created and the 'css_free' event >> when a cgroup is destroyed. >> >> Define callbacks for those events and allow resource providers to >> register the callbacks per resource type as needed. This will be >> utilized later by the EPC misc cgroup support implemented in the SGX >> driver. >> >> Also add per resource type private data for those callbacks to store and >> access resource specific data. >> >> Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com> >> Co-developed-by: Haitao Huang <haitao.huang@linux.intel.com> >> Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com> >> --- >> V6: >> - Create ops struct for per resource callbacks (Jarkko) >> - Drop max_write callback (Dave, Michal) >> - Style fixes (Kai) >> --- >> include/linux/misc_cgroup.h | 14 ++++++++++++++ >> kernel/cgroup/misc.c | 27 ++++++++++++++++++++++++--- >> 2 files changed, 38 insertions(+), 3 deletions(-) >> >> diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h >> index e799b1f8d05b..5dc509c27c3d 100644 >> --- a/include/linux/misc_cgroup.h >> +++ b/include/linux/misc_cgroup.h >> @@ -27,16 +27,30 @@ struct misc_cg; >> >> #include <linux/cgroup.h> >> >> +/** >> + * struct misc_operations_struct: per resource callback ops. >> + * @alloc: invoked for resource specific initialization when cgroup is >> allocated. >> + * @free: invoked for resource specific cleanup when cgroup is >> deallocated. >> + */ >> +struct misc_operations_struct { >> + int (*alloc)(struct misc_cg *cg); >> + void (*free)(struct misc_cg *cg); >> +}; > > Maybe just misc_operations, or even misc_ops? > With Michal's suggestion to make ops per-resource-type, I'll rename this misc_res_ops (I was following vm_operations_struct as example) >> + >> /** >> * struct misc_res: Per cgroup per misc type resource >> * @max: Maximum limit on the resource. >> * @usage: Current usage of the resource. >> * @events: Number of times, the resource limit exceeded. >> + * @priv: resource specific data. >> + * @misc_ops: resource specific operations. >> */ >> struct misc_res { >> u64 max; >> atomic64_t usage; >> atomic64_t events; >> + void *priv; > > priv is the wrong patch, it just confuses the overall picture heere. > please move it to 04/12. Let's deal with the callbacks here. > Ok >> + const struct misc_operations_struct *misc_ops; >> }; > > misc_ops would be at least consistent with this, as misc_res also has an > acronym. > >> >> /** >> diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c >> index 79a3717a5803..d971ede44ebf 100644 >> --- a/kernel/cgroup/misc.c >> +++ b/kernel/cgroup/misc.c >> @@ -383,23 +383,37 @@ static struct cftype misc_cg_files[] = { >> static struct cgroup_subsys_state * >> misc_cg_alloc(struct cgroup_subsys_state *parent_css) >> { >> + struct misc_cg *parent_cg, *cg; >> enum misc_res_type i; >> - struct misc_cg *cg; >> + int ret; >> >> if (!parent_css) { >> - cg = &root_cg; >> + parent_cg = cg = &root_cg; >> } else { >> cg = kzalloc(sizeof(*cg), GFP_KERNEL); >> if (!cg) >> return ERR_PTR(-ENOMEM); >> + parent_cg = css_misc(parent_css); >> } >> >> for (i = 0; i < MISC_CG_RES_TYPES; i++) { >> WRITE_ONCE(cg->res[i].max, MAX_NUM); >> atomic64_set(&cg->res[i].usage, 0); >> + if (parent_cg->res[i].misc_ops && parent_cg->res[i].misc_ops->alloc) >> { >> + ret = parent_cg->res[i].misc_ops->alloc(cg); >> + if (ret) >> + goto alloc_err; > > The patch set only has a use case for both operations defined - any > partial combinations should never be allowed. > > To enforce this invariant you could create a set of operations (written > out of top of my head): > > static int misc_res_init(struct misc_res *res, struct misc_ops *ops) > { > if (!misc_ops->alloc) { > pr_err("%s: alloc missing\n", __func__); > return -EINVAL; > } > > if (!misc_ops->free) { > pr_err("%s: free missing\n", __func__); > return -EINVAL; > } > > res->misc_ops = misc_ops; > return 0; > } > > static inline int misc_res_alloc(struct misc_cg *cg, struct misc_res > *res) > { > int ret; > > if (!res->misc_ops) > return 0; > > return res->misc_ops->alloc(cg); > } > > static inline void misc_res_free(struct misc_cg *cg, struct misc_res > *res) > { > int ret; > > if (!res->misc_ops) > return 0; > > return res->misc_ops->alloc(cg); > } > > Now if anything has misc_ops, it will also always have *both* callback, > and nothing half-baked gets in. > > The above loops would be then: > > for (i = 0; i < MISC_CG_RES_TYPES; i++) { > WRITE_ONCE(cg->res[i].max, MAX_NUM); > atomic64_set(&cg->res[i].usage, 0); > ret = misc_res_alloc(&parent_cg->res[i]); > if (ret) > goto alloc_err; > > Cleaner and better guards for state consistency. In 04/12 you need to > then call misc_res_init() instead of direct assignment. > > BR, Jarkko Will combine these with the use of a static operations array suggested by Michal. Thanks Haitao
On Tue Jan 9, 2024 at 5:37 AM EET, Haitao Huang wrote: > On Wed, 15 Nov 2023 14:25:59 -0600, Jarkko Sakkinen <jarkko@kernel.org> > wrote: > > > On Mon Oct 30, 2023 at 8:20 PM EET, Haitao Huang wrote: > >> From: Kristen Carlson Accardi <kristen@linux.intel.com> > >> > >> The misc cgroup controller (subsystem) currently does not perform > >> resource type specific action for Cgroups Subsystem State (CSS) events: > >> the 'css_alloc' event when a cgroup is created and the 'css_free' event > >> when a cgroup is destroyed. > >> > >> Define callbacks for those events and allow resource providers to > >> register the callbacks per resource type as needed. This will be > >> utilized later by the EPC misc cgroup support implemented in the SGX > >> driver. > >> > >> Also add per resource type private data for those callbacks to store and > >> access resource specific data. > >> > >> Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com> > >> Co-developed-by: Haitao Huang <haitao.huang@linux.intel.com> > >> Signed-off-by: Haitao Huang <haitao.huang@linux.intel.com> > >> --- > >> V6: > >> - Create ops struct for per resource callbacks (Jarkko) > >> - Drop max_write callback (Dave, Michal) > >> - Style fixes (Kai) > >> --- > >> include/linux/misc_cgroup.h | 14 ++++++++++++++ > >> kernel/cgroup/misc.c | 27 ++++++++++++++++++++++++--- > >> 2 files changed, 38 insertions(+), 3 deletions(-) > >> > >> diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h > >> index e799b1f8d05b..5dc509c27c3d 100644 > >> --- a/include/linux/misc_cgroup.h > >> +++ b/include/linux/misc_cgroup.h > >> @@ -27,16 +27,30 @@ struct misc_cg; > >> > >> #include <linux/cgroup.h> > >> > >> +/** > >> + * struct misc_operations_struct: per resource callback ops. > >> + * @alloc: invoked for resource specific initialization when cgroup is > >> allocated. > >> + * @free: invoked for resource specific cleanup when cgroup is > >> deallocated. > >> + */ > >> +struct misc_operations_struct { > >> + int (*alloc)(struct misc_cg *cg); > >> + void (*free)(struct misc_cg *cg); > >> +}; > > > > Maybe just misc_operations, or even misc_ops? > > > > With Michal's suggestion to make ops per-resource-type, I'll rename this > misc_res_ops (I was following vm_operations_struct as example) > > >> + > >> /** > >> * struct misc_res: Per cgroup per misc type resource > >> * @max: Maximum limit on the resource. > >> * @usage: Current usage of the resource. > >> * @events: Number of times, the resource limit exceeded. > >> + * @priv: resource specific data. > >> + * @misc_ops: resource specific operations. > >> */ > >> struct misc_res { > >> u64 max; > >> atomic64_t usage; > >> atomic64_t events; > >> + void *priv; > > > > priv is the wrong patch, it just confuses the overall picture heere. > > please move it to 04/12. Let's deal with the callbacks here. > > > > Ok > > >> + const struct misc_operations_struct *misc_ops; > >> }; > > > > misc_ops would be at least consistent with this, as misc_res also has an > > acronym. > > > >> > >> /** > >> diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c > >> index 79a3717a5803..d971ede44ebf 100644 > >> --- a/kernel/cgroup/misc.c > >> +++ b/kernel/cgroup/misc.c > >> @@ -383,23 +383,37 @@ static struct cftype misc_cg_files[] = { > >> static struct cgroup_subsys_state * > >> misc_cg_alloc(struct cgroup_subsys_state *parent_css) > >> { > >> + struct misc_cg *parent_cg, *cg; > >> enum misc_res_type i; > >> - struct misc_cg *cg; > >> + int ret; > >> > >> if (!parent_css) { > >> - cg = &root_cg; > >> + parent_cg = cg = &root_cg; > >> } else { > >> cg = kzalloc(sizeof(*cg), GFP_KERNEL); > >> if (!cg) > >> return ERR_PTR(-ENOMEM); > >> + parent_cg = css_misc(parent_css); > >> } > >> > >> for (i = 0; i < MISC_CG_RES_TYPES; i++) { > >> WRITE_ONCE(cg->res[i].max, MAX_NUM); > >> atomic64_set(&cg->res[i].usage, 0); > >> + if (parent_cg->res[i].misc_ops && parent_cg->res[i].misc_ops->alloc) > >> { > >> + ret = parent_cg->res[i].misc_ops->alloc(cg); > >> + if (ret) > >> + goto alloc_err; > > > > The patch set only has a use case for both operations defined - any > > partial combinations should never be allowed. > > > > To enforce this invariant you could create a set of operations (written > > out of top of my head): > > > > static int misc_res_init(struct misc_res *res, struct misc_ops *ops) > > { > > if (!misc_ops->alloc) { > > pr_err("%s: alloc missing\n", __func__); > > return -EINVAL; > > } > > > > if (!misc_ops->free) { > > pr_err("%s: free missing\n", __func__); > > return -EINVAL; > > } > > > > res->misc_ops = misc_ops; > > return 0; > > } > > > > static inline int misc_res_alloc(struct misc_cg *cg, struct misc_res > > *res) > > { > > int ret; > > > > if (!res->misc_ops) > > return 0; > > > > return res->misc_ops->alloc(cg); > > } > > > > static inline void misc_res_free(struct misc_cg *cg, struct misc_res > > *res) > > { > > int ret; > > > > if (!res->misc_ops) > > return 0; > > > > return res->misc_ops->alloc(cg); > > } > > > > Now if anything has misc_ops, it will also always have *both* callback, > > and nothing half-baked gets in. > > > > The above loops would be then: > > > > for (i = 0; i < MISC_CG_RES_TYPES; i++) { > > WRITE_ONCE(cg->res[i].max, MAX_NUM); > > atomic64_set(&cg->res[i].usage, 0); > > ret = misc_res_alloc(&parent_cg->res[i]); > > if (ret) > > goto alloc_err; > > > > Cleaner and better guards for state consistency. In 04/12 you need to > > then call misc_res_init() instead of direct assignment. > > > > BR, Jarkko > > Will combine these with the use of a static operations array suggested by > Michal. OK, great, thanks! BR, Jarkko
diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index e799b1f8d05b..5dc509c27c3d 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -27,16 +27,30 @@ struct misc_cg; #include <linux/cgroup.h> +/** + * struct misc_operations_struct: per resource callback ops. + * @alloc: invoked for resource specific initialization when cgroup is allocated. + * @free: invoked for resource specific cleanup when cgroup is deallocated. + */ +struct misc_operations_struct { + int (*alloc)(struct misc_cg *cg); + void (*free)(struct misc_cg *cg); +}; + /** * struct misc_res: Per cgroup per misc type resource * @max: Maximum limit on the resource. * @usage: Current usage of the resource. * @events: Number of times, the resource limit exceeded. + * @priv: resource specific data. + * @misc_ops: resource specific operations. */ struct misc_res { u64 max; atomic64_t usage; atomic64_t events; + void *priv; + const struct misc_operations_struct *misc_ops; }; /** diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c index 79a3717a5803..d971ede44ebf 100644 --- a/kernel/cgroup/misc.c +++ b/kernel/cgroup/misc.c @@ -383,23 +383,37 @@ static struct cftype misc_cg_files[] = { static struct cgroup_subsys_state * misc_cg_alloc(struct cgroup_subsys_state *parent_css) { + struct misc_cg *parent_cg, *cg; enum misc_res_type i; - struct misc_cg *cg; + int ret; if (!parent_css) { - cg = &root_cg; + parent_cg = cg = &root_cg; } else { cg = kzalloc(sizeof(*cg), GFP_KERNEL); if (!cg) return ERR_PTR(-ENOMEM); + parent_cg = css_misc(parent_css); } for (i = 0; i < MISC_CG_RES_TYPES; i++) { WRITE_ONCE(cg->res[i].max, MAX_NUM); atomic64_set(&cg->res[i].usage, 0); + if (parent_cg->res[i].misc_ops && parent_cg->res[i].misc_ops->alloc) { + ret = parent_cg->res[i].misc_ops->alloc(cg); + if (ret) + goto alloc_err; + } } return &cg->css; + +alloc_err: + for (i = 0; i < MISC_CG_RES_TYPES; i++) + if (parent_cg->res[i].misc_ops && parent_cg->res[i].misc_ops->free) + cg->res[i].misc_ops->free(cg); + kfree(cg); + return ERR_PTR(ret); } /** @@ -410,7 +424,14 @@ misc_cg_alloc(struct cgroup_subsys_state *parent_css) */ static void misc_cg_free(struct cgroup_subsys_state *css) { - kfree(css_misc(css)); + struct misc_cg *cg = css_misc(css); + enum misc_res_type i; + + for (i = 0; i < MISC_CG_RES_TYPES; i++) + if (cg->res[i].misc_ops && cg->res[i].misc_ops->free) + cg->res[i].misc_ops->free(cg); + + kfree(cg); } /* Cgroup controller callbacks */