Message ID | 20231017162306.176586-4-thinker.li@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | Registrating struct_ops types from modules | expand |
On 10/17/23 9:23 AM, thinker.li@gmail.com wrote: > From: Kui-Feng Lee <thinker.li@gmail.com> > > To ensure that a module remains accessible whenever a struct_ops object of > a struct_ops type provided by the module is still in use. > > Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com> > --- > include/linux/bpf.h | 1 + > kernel/bpf/bpf_struct_ops.c | 21 ++++++++++++++++++--- > 2 files changed, 19 insertions(+), 3 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index e6a648af2daa..1e1647c8b0ce 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1627,6 +1627,7 @@ struct bpf_struct_ops { > int (*update)(void *kdata, void *old_kdata); > int (*validate)(void *kdata); > struct btf *btf; > + struct module *owner; > const struct btf_type *type; > const struct btf_type *value_type; > const char *name; > diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c > index 7758f66ad734..b561245fe235 100644 > --- a/kernel/bpf/bpf_struct_ops.c > +++ b/kernel/bpf/bpf_struct_ops.c > @@ -112,6 +112,7 @@ static const struct btf_type *module_type; > > static void bpf_struct_ops_init_one(struct bpf_struct_ops *st_ops, > struct btf *btf, > + struct module *owner, > struct bpf_verifier_log *log) > { > const struct btf_member *member; > @@ -186,6 +187,7 @@ static void bpf_struct_ops_init_one(struct bpf_struct_ops *st_ops, > st_ops->name); > } else { > st_ops->btf = btf; > + st_ops->owner = owner; I suspect it will turn out to be just "st_ops->owner = st_ops->owner;" in a latter patch. st_ops->owner should have already been initialized (with THIS_MODULE?). > st_ops->type_id = type_id; > st_ops->type = t; > st_ops->value_id = value_id; > @@ -193,6 +195,7 @@ static void bpf_struct_ops_init_one(struct bpf_struct_ops *st_ops, > value_id); > } > } > + nit. extra newline. > } > > void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) > @@ -215,7 +218,7 @@ void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) > > for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { > st_ops = bpf_struct_ops[i]; > - bpf_struct_ops_init_one(st_ops, btf, log); > + bpf_struct_ops_init_one(st_ops, btf, NULL, log); > } > } > > @@ -630,6 +633,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map) > bpf_jit_uncharge_modmem(PAGE_SIZE); > } > bpf_map_area_free(st_map->uvalue); > + module_put(st_map->st_ops->owner); > bpf_map_area_free(st_map); > } > > @@ -676,9 +680,18 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) > if (!st_ops) > return ERR_PTR(-ENOTSUPP); > > + /* If st_ops->owner is NULL, it means the struct_ops is > + * statically defined in the kernel. We don't need to > + * take a refcount on it. > + */ > + if (st_ops->owner && !btf_try_get_module(st_ops->btf)) This just came to my mind. Is the module refcnt needed during map alloc/free or it could be done during the reg/unreg instead? > + return ERR_PTR(-EINVAL); > + > vt = st_ops->value_type; > - if (attr->value_size != vt->size) > + if (attr->value_size != vt->size) { > + module_put(st_ops->owner); > return ERR_PTR(-EINVAL); > + } > > t = st_ops->type; > > @@ -689,8 +702,10 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) > (vt->size - sizeof(struct bpf_struct_ops_value)); > > st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE); > - if (!st_map) > + if (!st_map) { > + module_put(st_ops->owner); > return ERR_PTR(-ENOMEM); > + } > > st_map->st_ops = st_ops; > map = &st_map->map;
On 10/18/23 17:36, Martin KaFai Lau wrote: > On 10/17/23 9:23 AM, thinker.li@gmail.com wrote: >> From: Kui-Feng Lee <thinker.li@gmail.com> >> >> To ensure that a module remains accessible whenever a struct_ops >> object of >> a struct_ops type provided by the module is still in use. >> >> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com> >> --- >> include/linux/bpf.h | 1 + >> kernel/bpf/bpf_struct_ops.c | 21 ++++++++++++++++++--- >> 2 files changed, 19 insertions(+), 3 deletions(-) >> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >> index e6a648af2daa..1e1647c8b0ce 100644 >> --- a/include/linux/bpf.h >> +++ b/include/linux/bpf.h >> @@ -1627,6 +1627,7 @@ struct bpf_struct_ops { >> int (*update)(void *kdata, void *old_kdata); >> int (*validate)(void *kdata); >> struct btf *btf; >> + struct module *owner; >> const struct btf_type *type; >> const struct btf_type *value_type; >> const char *name; >> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c >> index 7758f66ad734..b561245fe235 100644 >> --- a/kernel/bpf/bpf_struct_ops.c >> +++ b/kernel/bpf/bpf_struct_ops.c >> @@ -112,6 +112,7 @@ static const struct btf_type *module_type; >> static void bpf_struct_ops_init_one(struct bpf_struct_ops *st_ops, >> struct btf *btf, >> + struct module *owner, >> struct bpf_verifier_log *log) >> { >> const struct btf_member *member; >> @@ -186,6 +187,7 @@ static void bpf_struct_ops_init_one(struct >> bpf_struct_ops *st_ops, >> st_ops->name); >> } else { >> st_ops->btf = btf; >> + st_ops->owner = owner; > > I suspect it will turn out to be just "st_ops->owner = st_ops->owner;" > in a latter patch. st_ops->owner should have already been initialized > (with THIS_MODULE?). Yes, you are correct. It ends up st_ops->owner passing from the caller. I will remove this line and the argument. > >> st_ops->type_id = type_id; >> st_ops->type = t; >> st_ops->value_id = value_id; >> @@ -193,6 +195,7 @@ static void bpf_struct_ops_init_one(struct >> bpf_struct_ops *st_ops, >> value_id); >> } >> } >> + > > nit. extra newline. got it! > >> } >> void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) >> @@ -215,7 +218,7 @@ void bpf_struct_ops_init(struct btf *btf, struct >> bpf_verifier_log *log) >> for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { >> st_ops = bpf_struct_ops[i]; >> - bpf_struct_ops_init_one(st_ops, btf, log); >> + bpf_struct_ops_init_one(st_ops, btf, NULL, log); >> } >> } >> @@ -630,6 +633,7 @@ static void __bpf_struct_ops_map_free(struct >> bpf_map *map) >> bpf_jit_uncharge_modmem(PAGE_SIZE); >> } >> bpf_map_area_free(st_map->uvalue); >> + module_put(st_map->st_ops->owner); >> bpf_map_area_free(st_map); >> } >> @@ -676,9 +680,18 @@ static struct bpf_map >> *bpf_struct_ops_map_alloc(union bpf_attr *attr) >> if (!st_ops) >> return ERR_PTR(-ENOTSUPP); >> + /* If st_ops->owner is NULL, it means the struct_ops is >> + * statically defined in the kernel. We don't need to >> + * take a refcount on it. >> + */ >> + if (st_ops->owner && !btf_try_get_module(st_ops->btf)) > > This just came to my mind. Is the module refcnt needed during map > alloc/free or it could be done during the reg/unreg instead? Sure, I can move it to reg/unreg. > > >> + return ERR_PTR(-EINVAL); >> + >> vt = st_ops->value_type; >> - if (attr->value_size != vt->size) >> + if (attr->value_size != vt->size) { >> + module_put(st_ops->owner); >> return ERR_PTR(-EINVAL); >> + } >> t = st_ops->type; >> @@ -689,8 +702,10 @@ static struct bpf_map >> *bpf_struct_ops_map_alloc(union bpf_attr *attr) >> (vt->size - sizeof(struct bpf_struct_ops_value)); >> st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE); >> - if (!st_map) >> + if (!st_map) { >> + module_put(st_ops->owner); >> return ERR_PTR(-ENOMEM); >> + } >> st_map->st_ops = st_ops; >> map = &st_map->map; >
On 10/19/23 09:29, Kui-Feng Lee wrote: > > > On 10/18/23 17:36, Martin KaFai Lau wrote: >> On 10/17/23 9:23 AM, thinker.li@gmail.com wrote: > >> >>> } >>> void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log >>> *log) >>> @@ -215,7 +218,7 @@ void bpf_struct_ops_init(struct btf *btf, struct >>> bpf_verifier_log *log) >>> for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { >>> st_ops = bpf_struct_ops[i]; >>> - bpf_struct_ops_init_one(st_ops, btf, log); >>> + bpf_struct_ops_init_one(st_ops, btf, NULL, log); >>> } >>> } >>> @@ -630,6 +633,7 @@ static void __bpf_struct_ops_map_free(struct >>> bpf_map *map) >>> bpf_jit_uncharge_modmem(PAGE_SIZE); >>> } >>> bpf_map_area_free(st_map->uvalue); >>> + module_put(st_map->st_ops->owner); >>> bpf_map_area_free(st_map); >>> } >>> @@ -676,9 +680,18 @@ static struct bpf_map >>> *bpf_struct_ops_map_alloc(union bpf_attr *attr) >>> if (!st_ops) >>> return ERR_PTR(-ENOTSUPP); >>> + /* If st_ops->owner is NULL, it means the struct_ops is >>> + * statically defined in the kernel. We don't need to >>> + * take a refcount on it. >>> + */ >>> + if (st_ops->owner && !btf_try_get_module(st_ops->btf)) >> >> This just came to my mind. Is the module refcnt needed during map >> alloc/free or it could be done during the reg/unreg instead? > > > Sure, I can move it to reg/unreg. Just found that we relies type information in st_ops to update element and clean up maps. We can not move get/put modules to reg/unreg except keeping a redundant copy in st_map or somewhere. It make the code much more complicated by introducing get/put module here and there. I prefer to keep as it is now. WDYT? >
On 10/19/23 10:07 PM, Kui-Feng Lee wrote: > > > On 10/19/23 09:29, Kui-Feng Lee wrote: >> >> >> On 10/18/23 17:36, Martin KaFai Lau wrote: >>> On 10/17/23 9:23 AM, thinker.li@gmail.com wrote: >> >>> >>>> } >>>> void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) >>>> @@ -215,7 +218,7 @@ void bpf_struct_ops_init(struct btf *btf, struct >>>> bpf_verifier_log *log) >>>> for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { >>>> st_ops = bpf_struct_ops[i]; >>>> - bpf_struct_ops_init_one(st_ops, btf, log); >>>> + bpf_struct_ops_init_one(st_ops, btf, NULL, log); >>>> } >>>> } >>>> @@ -630,6 +633,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map) >>>> bpf_jit_uncharge_modmem(PAGE_SIZE); >>>> } >>>> bpf_map_area_free(st_map->uvalue); >>>> + module_put(st_map->st_ops->owner); >>>> bpf_map_area_free(st_map); >>>> } >>>> @@ -676,9 +680,18 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union >>>> bpf_attr *attr) >>>> if (!st_ops) >>>> return ERR_PTR(-ENOTSUPP); >>>> + /* If st_ops->owner is NULL, it means the struct_ops is >>>> + * statically defined in the kernel. We don't need to >>>> + * take a refcount on it. >>>> + */ >>>> + if (st_ops->owner && !btf_try_get_module(st_ops->btf)) While replying and looking at it again, I don't think the btf_try_get_module(st_ops->btf) is safe. The module's owned st_ops itself could have been gone with the module. The same goes with the "st_ops->owner" test, so btf_is_module(btf) should be used instead. I am risking to act like a broken clock to repeat this question, does it really need to store btf back into the st_ops which may accidentally get into the above btf_try_get_module(st_ops->btf) usage? >>> >>> This just came to my mind. Is the module refcnt needed during map alloc/free >>> or it could be done during the reg/unreg instead? >> >> >> Sure, I can move it to reg/unreg. > > Just found that we relies type information in st_ops to update element and clean > up maps. > We can not move get/put modules to reg/unreg except keeping a redundant copy in > st_map or somewhere. It make the code much more complicated by > introducing get/put module here and there. > > I prefer to keep as it is now. WDYT? Yeah, sure. I was asking after seeing a longer wait time for the module to go away in patch 11 selftest and requires an explicit waiting for the tasks_trace period. Releasing the module refcnt earlier will help. Regardless of the module refcnt hold/free location, I think storing the type* and value* in the module's owned st_ops does not look correct now. It was fine and convenient to piggy back them into bpf_struct_ops when everything was built-in the kernel and no lifetime concern. It makes sense now to separate them out from the module's owned st_ops. Something like: struct btf_struct_ops_desc { struct bpf_struct_ops *ops; const struct btf_type *type; const struct btf_type *value_type; u32 type_id; u32 value_id; }; struct btf_struct_ops_tab { u32 cnt; u32 capacity; struct btf_struct_ops_desc *st_ops_desc[]; }; wdyt?
On 10/20/23 14:37, Martin KaFai Lau wrote: > On 10/19/23 10:07 PM, Kui-Feng Lee wrote: >> >> >> On 10/19/23 09:29, Kui-Feng Lee wrote: >>> >>> >>> On 10/18/23 17:36, Martin KaFai Lau wrote: >>>> On 10/17/23 9:23 AM, thinker.li@gmail.com wrote: >>> >>>> >>>>> } >>>>> void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log >>>>> *log) >>>>> @@ -215,7 +218,7 @@ void bpf_struct_ops_init(struct btf *btf, >>>>> struct bpf_verifier_log *log) >>>>> for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { >>>>> st_ops = bpf_struct_ops[i]; >>>>> - bpf_struct_ops_init_one(st_ops, btf, log); >>>>> + bpf_struct_ops_init_one(st_ops, btf, NULL, log); >>>>> } >>>>> } >>>>> @@ -630,6 +633,7 @@ static void __bpf_struct_ops_map_free(struct >>>>> bpf_map *map) >>>>> bpf_jit_uncharge_modmem(PAGE_SIZE); >>>>> } >>>>> bpf_map_area_free(st_map->uvalue); >>>>> + module_put(st_map->st_ops->owner); >>>>> bpf_map_area_free(st_map); >>>>> } >>>>> @@ -676,9 +680,18 @@ static struct bpf_map >>>>> *bpf_struct_ops_map_alloc(union bpf_attr *attr) >>>>> if (!st_ops) >>>>> return ERR_PTR(-ENOTSUPP); >>>>> + /* If st_ops->owner is NULL, it means the struct_ops is >>>>> + * statically defined in the kernel. We don't need to >>>>> + * take a refcount on it. >>>>> + */ >>>>> + if (st_ops->owner && !btf_try_get_module(st_ops->btf)) > > While replying and looking at it again, I don't think the > btf_try_get_module(st_ops->btf) is safe. The module's owned st_ops > itself could have been gone with the module. The same goes with the > "st_ops->owner" test, so btf_is_module(btf) should be used instead. I have change it locally. Here, it calls btf_try_get_module() after calling btf_struct_ops_find_value(). The new code will call btf_try_get_module() against the btf from attr->value_type_btf_obj_fd before btf_struct_ops_find_value(). Just like I mentioned earlier to ensure the callers of btf_struct_ops_find_value() and btf_struct_ops_find() hold a refcount to the module. > > I am risking to act like a broken clock to repeat this question, does it > really need to store btf back into the st_ops which may accidentally get > into the above btf_try_get_module(st_ops->btf) usage? > >>>> >>>> This just came to my mind. Is the module refcnt needed during map >>>> alloc/free or it could be done during the reg/unreg instead? >>> >>> >>> Sure, I can move it to reg/unreg. >> >> Just found that we relies type information in st_ops to update element >> and clean up maps. >> We can not move get/put modules to reg/unreg except keeping a >> redundant copy in >> st_map or somewhere. It make the code much more complicated by >> introducing get/put module here and there. >> >> I prefer to keep as it is now. WDYT? > > Yeah, sure. I was asking after seeing a longer wait time for the module > to go away in patch 11 selftest and requires an explicit waiting for the > tasks_trace period. Releasing the module refcnt earlier will help. > > Regardless of the module refcnt hold/free location, I think storing the > type* and value* in the module's owned st_ops does not look correct now. > It was fine and convenient to piggy back them into bpf_struct_ops when > everything was built-in the kernel and no lifetime concern. It makes > sense now to separate them out from the module's owned st_ops. Something > like: > > struct btf_struct_ops_desc { > struct bpf_struct_ops *ops; > const struct btf_type *type; > const struct btf_type *value_type; > u32 type_id; > u32 value_id; > }; > > struct btf_struct_ops_tab { > u32 cnt; > u32 capacity; > struct btf_struct_ops_desc *st_ops_desc[]; > }; > > wdyt? So, st_map should hold a pointer to a bpf_struct_ops_desc instead of st_ops, right? It would work!
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e6a648af2daa..1e1647c8b0ce 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1627,6 +1627,7 @@ struct bpf_struct_ops { int (*update)(void *kdata, void *old_kdata); int (*validate)(void *kdata); struct btf *btf; + struct module *owner; const struct btf_type *type; const struct btf_type *value_type; const char *name; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 7758f66ad734..b561245fe235 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -112,6 +112,7 @@ static const struct btf_type *module_type; static void bpf_struct_ops_init_one(struct bpf_struct_ops *st_ops, struct btf *btf, + struct module *owner, struct bpf_verifier_log *log) { const struct btf_member *member; @@ -186,6 +187,7 @@ static void bpf_struct_ops_init_one(struct bpf_struct_ops *st_ops, st_ops->name); } else { st_ops->btf = btf; + st_ops->owner = owner; st_ops->type_id = type_id; st_ops->type = t; st_ops->value_id = value_id; @@ -193,6 +195,7 @@ static void bpf_struct_ops_init_one(struct bpf_struct_ops *st_ops, value_id); } } + } void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) @@ -215,7 +218,7 @@ void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { st_ops = bpf_struct_ops[i]; - bpf_struct_ops_init_one(st_ops, btf, log); + bpf_struct_ops_init_one(st_ops, btf, NULL, log); } } @@ -630,6 +633,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map) bpf_jit_uncharge_modmem(PAGE_SIZE); } bpf_map_area_free(st_map->uvalue); + module_put(st_map->st_ops->owner); bpf_map_area_free(st_map); } @@ -676,9 +680,18 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) if (!st_ops) return ERR_PTR(-ENOTSUPP); + /* If st_ops->owner is NULL, it means the struct_ops is + * statically defined in the kernel. We don't need to + * take a refcount on it. + */ + if (st_ops->owner && !btf_try_get_module(st_ops->btf)) + return ERR_PTR(-EINVAL); + vt = st_ops->value_type; - if (attr->value_size != vt->size) + if (attr->value_size != vt->size) { + module_put(st_ops->owner); return ERR_PTR(-EINVAL); + } t = st_ops->type; @@ -689,8 +702,10 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) (vt->size - sizeof(struct bpf_struct_ops_value)); st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE); - if (!st_map) + if (!st_map) { + module_put(st_ops->owner); return ERR_PTR(-ENOMEM); + } st_map->st_ops = st_ops; map = &st_map->map;