Message ID | 20230308005050.255859-6-kuifeng@meta.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | Transit between BPF TCP congestion controls. | expand |
On 3/7/23 4:50 PM, Kui-Feng Lee wrote: > By improving the BPF_LINK_UPDATE command of bpf(), it should allow you > to conveniently switch between different struct_ops on a single > bpf_link. This would enable smoother transitions from one struct_ops > to another. > > The struct_ops maps passing along with BPF_LINK_UPDATE should have the > BPF_F_LINK flag. > > Signed-off-by: Kui-Feng Lee <kuifeng@meta.com> > --- > include/linux/bpf.h | 1 + > include/uapi/linux/bpf.h | 8 ++++-- > kernel/bpf/bpf_struct_ops.c | 46 ++++++++++++++++++++++++++++++++++ > kernel/bpf/syscall.c | 43 ++++++++++++++++++++++++++++--- > tools/include/uapi/linux/bpf.h | 7 +++++- > 5 files changed, 98 insertions(+), 7 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index afca6c526fe4..29d555a82bad 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1470,6 +1470,7 @@ struct bpf_link_ops { > void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); > int (*fill_link_info)(const struct bpf_link *link, > struct bpf_link_info *info); > + int (*update_map)(struct bpf_link *link, struct bpf_map *new_map); > }; > > struct bpf_tramp_link { > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index f9fc7b8af3c4..edef9cf7d596 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -1555,8 +1555,12 @@ union bpf_attr { > > struct { /* struct used by BPF_LINK_UPDATE command */ > __u32 link_fd; /* link fd */ > - /* new program fd to update link with */ > - __u32 new_prog_fd; > + union { > + /* new program fd to update link with */ > + __u32 new_prog_fd; > + /* new struct_ops map fd to update link with */ > + __u32 new_map_fd; > + }; > __u32 flags; /* extra flags */ > /* expected link's program fd; is specified only if > * BPF_F_REPLACE flag is set in flags */ > diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c > index 5a7e86cf67b5..79e663869e51 100644 > --- a/kernel/bpf/bpf_struct_ops.c > +++ b/kernel/bpf/bpf_struct_ops.c > @@ -775,10 +775,56 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link, > return 0; > } > > +static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map *new_map) > +{ > + struct bpf_struct_ops_value *kvalue; > + struct bpf_struct_ops_map *st_map, *old_st_map; > + struct bpf_struct_ops_link *st_link; > + struct bpf_map *old_map; > + int err = 0; > + > + if (new_map->map_type != BPF_MAP_TYPE_STRUCT_OPS || > + !(new_map->map_flags & BPF_F_LINK)) > + return -EINVAL; > + > + mutex_lock(&update_mutex); > + > + st_link = container_of(link, struct bpf_struct_ops_link, link); > + > + /* The new and old struct_ops must be the same type. */ > + st_map = container_of(new_map, struct bpf_struct_ops_map, map); nit. move the st_link and st_map init out of the lock. > + > + old_map = st_link->map; rcu_dereference_protected(...) > + old_st_map = container_of(old_map, struct bpf_struct_ops_map, map); > + if (st_map->st_ops != old_st_map->st_ops || > + /* Pair with smp_store_release() during map_update */ > + smp_load_acquire(&st_map->kvalue.state) != BPF_STRUCT_OPS_STATE_READY) { nit. test the smp_load_acquire(&st_map...) outside of the lock. Do it together with the new_map checking at the beginning of the func. > + err = -EINVAL; > + goto err_out; > + } > + > + kvalue = &st_map->kvalue; > + > + err = st_map->st_ops->update(kvalue->data, old_st_map->kvalue.data); > + if (err) > + goto err_out; > + > + bpf_map_inc(new_map); > + rcu_assign_pointer(st_link->map, new_map); > + > + bpf_map_put(old_map); > + > +err_out: > + mutex_unlock(&update_mutex); > + > + return err; > +} > + > static const struct bpf_link_ops bpf_struct_ops_map_lops = { > .dealloc = bpf_struct_ops_map_link_dealloc, > .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, > .fill_link_info = bpf_struct_ops_map_link_fill_link_info, > + .update_map = bpf_struct_ops_map_link_update, > }; > > int bpf_struct_ops_link_create(union bpf_attr *attr) > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 3a4503987a48..c087dd2e2c08 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -4658,6 +4658,30 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) > return ret; > } > > +static int link_update_map(struct bpf_link *link, union bpf_attr *attr) > +{ > + struct bpf_map *new_map; > + int ret = 0; > + > + new_map = bpf_map_get(attr->link_update.new_map_fd); > + if (IS_ERR(new_map)) > + return -EINVAL; > + > + if (new_map->map_type != BPF_MAP_TYPE_STRUCT_OPS) { This is unnecessary test. The individual '.update_map()' should test for its own map_type and the new bpf_struct_ops_map_link_update() does test it. > + ret = -EINVAL; > + goto out_put_map; > + } > + > + if (link->ops->update_map) This has just been tested in link_update() before calling link_update_map(). > + ret = link->ops->update_map(link, new_map); > + else > + ret = -EINVAL; > + > +out_put_map: > + bpf_map_put(new_map); > + return ret; > +} > + > #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd > > static int link_update(union bpf_attr *attr) > @@ -4670,14 +4694,25 @@ static int link_update(union bpf_attr *attr) > if (CHECK_ATTR(BPF_LINK_UPDATE)) > return -EINVAL; > > - flags = attr->link_update.flags; > - if (flags & ~BPF_F_REPLACE) > - return -EINVAL; > - > link = bpf_link_get_from_fd(attr->link_update.link_fd); > if (IS_ERR(link)) > return PTR_ERR(link); > > + flags = attr->link_update.flags; > + > + if (link->ops->update_map) { > + if (flags) /* always replace the existing one */ > + ret = -EINVAL; > + else > + ret = link_update_map(link, attr); > + goto out_put_link; > + } > + > + if (flags & ~BPF_F_REPLACE) { > + ret = -EINVAL; > + goto out_put_link; > + } > + > new_prog = bpf_prog_get(attr->link_update.new_prog_fd); > if (IS_ERR(new_prog)) { > ret = PTR_ERR(new_prog);
On 3/8/23 14:21, Martin KaFai Lau wrote: > On 3/7/23 4:50 PM, Kui-Feng Lee wrote: >> By improving the BPF_LINK_UPDATE command of bpf(), it should allow you >> to conveniently switch between different struct_ops on a single >> bpf_link. This would enable smoother transitions from one struct_ops >> to another. >> >> The struct_ops maps passing along with BPF_LINK_UPDATE should have the >> BPF_F_LINK flag. >> >> Signed-off-by: Kui-Feng Lee <kuifeng@meta.com> >> --- >> include/linux/bpf.h | 1 + >> include/uapi/linux/bpf.h | 8 ++++-- >> kernel/bpf/bpf_struct_ops.c | 46 ++++++++++++++++++++++++++++++++++ >> kernel/bpf/syscall.c | 43 ++++++++++++++++++++++++++++--- >> tools/include/uapi/linux/bpf.h | 7 +++++- >> 5 files changed, 98 insertions(+), 7 deletions(-) >> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >> index afca6c526fe4..29d555a82bad 100644 >> --- a/include/linux/bpf.h >> +++ b/include/linux/bpf.h >> @@ -1470,6 +1470,7 @@ struct bpf_link_ops { >> void (*show_fdinfo)(const struct bpf_link *link, struct seq_file >> *seq); >> int (*fill_link_info)(const struct bpf_link *link, >> struct bpf_link_info *info); >> + int (*update_map)(struct bpf_link *link, struct bpf_map *new_map); >> }; >> struct bpf_tramp_link { >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >> index f9fc7b8af3c4..edef9cf7d596 100644 >> --- a/include/uapi/linux/bpf.h >> +++ b/include/uapi/linux/bpf.h >> @@ -1555,8 +1555,12 @@ union bpf_attr { >> struct { /* struct used by BPF_LINK_UPDATE command */ >> __u32 link_fd; /* link fd */ >> - /* new program fd to update link with */ >> - __u32 new_prog_fd; >> + union { >> + /* new program fd to update link with */ >> + __u32 new_prog_fd; >> + /* new struct_ops map fd to update link with */ >> + __u32 new_map_fd; >> + }; >> __u32 flags; /* extra flags */ >> /* expected link's program fd; is specified only if >> * BPF_F_REPLACE flag is set in flags */ >> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c >> index 5a7e86cf67b5..79e663869e51 100644 >> --- a/kernel/bpf/bpf_struct_ops.c >> +++ b/kernel/bpf/bpf_struct_ops.c >> @@ -775,10 +775,56 @@ static int >> bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link, >> return 0; >> } >> +static int bpf_struct_ops_map_link_update(struct bpf_link *link, >> struct bpf_map *new_map) >> +{ >> + struct bpf_struct_ops_value *kvalue; >> + struct bpf_struct_ops_map *st_map, *old_st_map; >> + struct bpf_struct_ops_link *st_link; >> + struct bpf_map *old_map; >> + int err = 0; >> + >> + if (new_map->map_type != BPF_MAP_TYPE_STRUCT_OPS || >> + !(new_map->map_flags & BPF_F_LINK)) >> + return -EINVAL; >> + >> + mutex_lock(&update_mutex); >> + >> + st_link = container_of(link, struct bpf_struct_ops_link, link); >> + >> + /* The new and old struct_ops must be the same type. */ >> + st_map = container_of(new_map, struct bpf_struct_ops_map, map); > > nit. move the st_link and st_map init out of the lock. > Ok >> + >> + old_map = st_link->map; > > rcu_dereference_protected(...) > >> + old_st_map = container_of(old_map, struct bpf_struct_ops_map, map); >> + if (st_map->st_ops != old_st_map->st_ops || >> + /* Pair with smp_store_release() during map_update */ >> + smp_load_acquire(&st_map->kvalue.state) != >> BPF_STRUCT_OPS_STATE_READY) { > > nit. test the smp_load_acquire(&st_map...) outside of the lock. > Do it together with the new_map checking at the beginning of the func. Ok > >> + err = -EINVAL; >> + goto err_out; >> + } >> + >> + kvalue = &st_map->kvalue; >> + >> + err = st_map->st_ops->update(kvalue->data, old_st_map->kvalue.data); >> + if (err) >> + goto err_out; >> + >> + bpf_map_inc(new_map); >> + rcu_assign_pointer(st_link->map, new_map); >> + >> + bpf_map_put(old_map); >> + >> +err_out: >> + mutex_unlock(&update_mutex); >> + >> + return err; >> +} >> + >> static const struct bpf_link_ops bpf_struct_ops_map_lops = { >> .dealloc = bpf_struct_ops_map_link_dealloc, >> .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, >> .fill_link_info = bpf_struct_ops_map_link_fill_link_info, >> + .update_map = bpf_struct_ops_map_link_update, >> }; >> int bpf_struct_ops_link_create(union bpf_attr *attr) >> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c >> index 3a4503987a48..c087dd2e2c08 100644 >> --- a/kernel/bpf/syscall.c >> +++ b/kernel/bpf/syscall.c >> @@ -4658,6 +4658,30 @@ static int link_create(union bpf_attr *attr, >> bpfptr_t uattr) >> return ret; >> } >> +static int link_update_map(struct bpf_link *link, union bpf_attr *attr) >> +{ >> + struct bpf_map *new_map; >> + int ret = 0; >> + >> + new_map = bpf_map_get(attr->link_update.new_map_fd); >> + if (IS_ERR(new_map)) >> + return -EINVAL; >> + >> + if (new_map->map_type != BPF_MAP_TYPE_STRUCT_OPS) { > > This is unnecessary test. The individual '.update_map()' should test for > its own map_type and the new bpf_struct_ops_map_link_update() does test it. > Will remove it. >> + ret = -EINVAL; >> + goto out_put_map; >> + } >> + >> + if (link->ops->update_map) > > This has just been tested in link_update() before calling > link_update_map(). Sure > >> + ret = link->ops->update_map(link, new_map); >> + else >> + ret = -EINVAL; >> + >> +out_put_map: >> + bpf_map_put(new_map); >> + return ret; >> +} >> + >> #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd >> static int link_update(union bpf_attr *attr) >> @@ -4670,14 +4694,25 @@ static int link_update(union bpf_attr *attr) >> if (CHECK_ATTR(BPF_LINK_UPDATE)) >> return -EINVAL; >> - flags = attr->link_update.flags; >> - if (flags & ~BPF_F_REPLACE) >> - return -EINVAL; >> - >> link = bpf_link_get_from_fd(attr->link_update.link_fd); >> if (IS_ERR(link)) >> return PTR_ERR(link); >> + flags = attr->link_update.flags; >> + >> + if (link->ops->update_map) { >> + if (flags) /* always replace the existing one */ >> + ret = -EINVAL; >> + else >> + ret = link_update_map(link, attr); >> + goto out_put_link; >> + } >> + >> + if (flags & ~BPF_F_REPLACE) { >> + ret = -EINVAL; >> + goto out_put_link; >> + } >> + >> new_prog = bpf_prog_get(attr->link_update.new_prog_fd); >> if (IS_ERR(new_prog)) { >> ret = PTR_ERR(new_prog); >
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index afca6c526fe4..29d555a82bad 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1470,6 +1470,7 @@ struct bpf_link_ops { void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); int (*fill_link_info)(const struct bpf_link *link, struct bpf_link_info *info); + int (*update_map)(struct bpf_link *link, struct bpf_map *new_map); }; struct bpf_tramp_link { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f9fc7b8af3c4..edef9cf7d596 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1555,8 +1555,12 @@ union bpf_attr { struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ /* expected link's program fd; is specified only if * BPF_F_REPLACE flag is set in flags */ diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 5a7e86cf67b5..79e663869e51 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -775,10 +775,56 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link, return 0; } +static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map *new_map) +{ + struct bpf_struct_ops_value *kvalue; + struct bpf_struct_ops_map *st_map, *old_st_map; + struct bpf_struct_ops_link *st_link; + struct bpf_map *old_map; + int err = 0; + + if (new_map->map_type != BPF_MAP_TYPE_STRUCT_OPS || + !(new_map->map_flags & BPF_F_LINK)) + return -EINVAL; + + mutex_lock(&update_mutex); + + st_link = container_of(link, struct bpf_struct_ops_link, link); + + /* The new and old struct_ops must be the same type. */ + st_map = container_of(new_map, struct bpf_struct_ops_map, map); + + old_map = st_link->map; + old_st_map = container_of(old_map, struct bpf_struct_ops_map, map); + if (st_map->st_ops != old_st_map->st_ops || + /* Pair with smp_store_release() during map_update */ + smp_load_acquire(&st_map->kvalue.state) != BPF_STRUCT_OPS_STATE_READY) { + err = -EINVAL; + goto err_out; + } + + kvalue = &st_map->kvalue; + + err = st_map->st_ops->update(kvalue->data, old_st_map->kvalue.data); + if (err) + goto err_out; + + bpf_map_inc(new_map); + rcu_assign_pointer(st_link->map, new_map); + + bpf_map_put(old_map); + +err_out: + mutex_unlock(&update_mutex); + + return err; +} + static const struct bpf_link_ops bpf_struct_ops_map_lops = { .dealloc = bpf_struct_ops_map_link_dealloc, .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, .fill_link_info = bpf_struct_ops_map_link_fill_link_info, + .update_map = bpf_struct_ops_map_link_update, }; int bpf_struct_ops_link_create(union bpf_attr *attr) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3a4503987a48..c087dd2e2c08 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4658,6 +4658,30 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) return ret; } +static int link_update_map(struct bpf_link *link, union bpf_attr *attr) +{ + struct bpf_map *new_map; + int ret = 0; + + new_map = bpf_map_get(attr->link_update.new_map_fd); + if (IS_ERR(new_map)) + return -EINVAL; + + if (new_map->map_type != BPF_MAP_TYPE_STRUCT_OPS) { + ret = -EINVAL; + goto out_put_map; + } + + if (link->ops->update_map) + ret = link->ops->update_map(link, new_map); + else + ret = -EINVAL; + +out_put_map: + bpf_map_put(new_map); + return ret; +} + #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd static int link_update(union bpf_attr *attr) @@ -4670,14 +4694,25 @@ static int link_update(union bpf_attr *attr) if (CHECK_ATTR(BPF_LINK_UPDATE)) return -EINVAL; - flags = attr->link_update.flags; - if (flags & ~BPF_F_REPLACE) - return -EINVAL; - link = bpf_link_get_from_fd(attr->link_update.link_fd); if (IS_ERR(link)) return PTR_ERR(link); + flags = attr->link_update.flags; + + if (link->ops->update_map) { + if (flags) /* always replace the existing one */ + ret = -EINVAL; + else + ret = link_update_map(link, attr); + goto out_put_link; + } + + if (flags & ~BPF_F_REPLACE) { + ret = -EINVAL; + goto out_put_link; + } + new_prog = bpf_prog_get(attr->link_update.new_prog_fd); if (IS_ERR(new_prog)) { ret = PTR_ERR(new_prog); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 051b85525302..cfd1d2a9898d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1556,7 +1556,12 @@ union bpf_attr { struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ /* expected link's program fd; is specified only if * BPF_F_REPLACE flag is set in flags */
By improving the BPF_LINK_UPDATE command of bpf(), it should allow you to conveniently switch between different struct_ops on a single bpf_link. This would enable smoother transitions from one struct_ops to another. The struct_ops maps passing along with BPF_LINK_UPDATE should have the BPF_F_LINK flag. Signed-off-by: Kui-Feng Lee <kuifeng@meta.com> --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 8 ++++-- kernel/bpf/bpf_struct_ops.c | 46 ++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 43 ++++++++++++++++++++++++++++--- tools/include/uapi/linux/bpf.h | 7 +++++- 5 files changed, 98 insertions(+), 7 deletions(-)