Message ID | 20230214221718.503964-3-kuifeng@meta.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | Transit between BPF TCP congestion controls. | expand |
On 02/14, Kui-Feng Lee wrote: > This feature lets you immediately transition to another congestion > control algorithm or implementation with the same name. Once a name > is updated, new connections will apply this new algorithm. > Signed-off-by: Kui-Feng Lee <kuifeng@meta.com> > --- > include/linux/bpf.h | 1 + > include/net/tcp.h | 2 ++ > net/bpf/bpf_dummy_struct_ops.c | 6 ++++++ > net/ipv4/bpf_tcp_ca.c | 6 ++++++ > net/ipv4/tcp_cong.c | 39 ++++++++++++++++++++++++++++++++++ > 5 files changed, 54 insertions(+) > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 13683584b071..5fe39f56a760 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1450,6 +1450,7 @@ struct bpf_struct_ops { > void *kdata, const void *udata); > int (*reg)(void *kdata); > void (*unreg)(void *kdata); > + int (*update)(void *kdata, void *old_kdata); > const struct btf_type *type; > const struct btf_type *value_type; > const char *name; > diff --git a/include/net/tcp.h b/include/net/tcp.h > index db9f828e9d1e..239cc0e2639c 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -1117,6 +1117,8 @@ struct tcp_congestion_ops { > int tcp_register_congestion_control(struct tcp_congestion_ops *type); > void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); > +int tcp_update_congestion_control(struct tcp_congestion_ops *type, > + struct tcp_congestion_ops *old_type); > void tcp_assign_congestion_control(struct sock *sk); > void tcp_init_congestion_control(struct sock *sk); > diff --git a/net/bpf/bpf_dummy_struct_ops.c > b/net/bpf/bpf_dummy_struct_ops.c > index ff4f89a2b02a..158f14e240d0 100644 > --- a/net/bpf/bpf_dummy_struct_ops.c > +++ b/net/bpf/bpf_dummy_struct_ops.c > @@ -222,12 +222,18 @@ static void bpf_dummy_unreg(void *kdata) > { > } > +static int bpf_dummy_update(void *kdata, void *old_kdata) > +{ > + return -EOPNOTSUPP; > +} > + > struct bpf_struct_ops bpf_bpf_dummy_ops = { > .verifier_ops = &bpf_dummy_verifier_ops, > .init = bpf_dummy_init, > .check_member = bpf_dummy_ops_check_member, > .init_member = bpf_dummy_init_member, > .reg = bpf_dummy_reg, > + .update = bpf_dummy_update, > .unreg = bpf_dummy_unreg, > .name = "bpf_dummy_ops", > }; > diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c > index 13fc0c185cd9..66ce5fadfe42 100644 > --- a/net/ipv4/bpf_tcp_ca.c > +++ b/net/ipv4/bpf_tcp_ca.c > @@ -266,10 +266,16 @@ static void bpf_tcp_ca_unreg(void *kdata) > tcp_unregister_congestion_control(kdata); > } > +static int bpf_tcp_ca_update(void *kdata, void *old_kdata) > +{ > + return tcp_update_congestion_control(kdata, old_kdata); > +} > + > struct bpf_struct_ops bpf_tcp_congestion_ops = { > .verifier_ops = &bpf_tcp_ca_verifier_ops, > .reg = bpf_tcp_ca_reg, > .unreg = bpf_tcp_ca_unreg, > + .update = bpf_tcp_ca_update, > .check_member = bpf_tcp_ca_check_member, > .init_member = bpf_tcp_ca_init_member, > .init = bpf_tcp_ca_init, > diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c > index db8b4b488c31..22fd7c12360e 100644 > --- a/net/ipv4/tcp_cong.c > +++ b/net/ipv4/tcp_cong.c > @@ -130,6 +130,45 @@ void tcp_unregister_congestion_control(struct > tcp_congestion_ops *ca) > } > EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); > +/* Replace a registered old ca with a new one. > + * > + * The new ca must have the same name as the old one, that has been > + * registered. > + */ > +int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct > tcp_congestion_ops *old_ca) > +{ > + struct tcp_congestion_ops *existing; > + int ret = 0; > + [..] > + /* all algorithms must implement these */ > + if (!ca->ssthresh || !ca->undo_cwnd || > + !(ca->cong_avoid || ca->cong_control)) { > + pr_err("%s does not implement required ops\n", old_ca->name); > + return -EINVAL; > + } > + > + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); Can we have this as some common _validate method to avoid copy-paste from tcp_register_congestion_control. Or, even better, can we can since function handle both cases? tcp_register_congestion_control(ca, old_ca); - when old_ca == NULL -> register - when old_ca != NULL -> try to update > + > + spin_lock(&tcp_cong_list_lock); > + existing = tcp_ca_find_key(ca->key); > + if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, > ca->name)) { > + pr_notice("%s not registered or non-unique key\n", > + ca->name); > + ret = -EINVAL; > + } else if (existing != old_ca) { > + pr_notice("invalid old congestion control algorithm to replace\n"); > + ret = -EINVAL; > + } else { > + list_del_rcu(&existing->list); > + list_add_tail_rcu(&ca->list, &tcp_cong_list); > + pr_debug("%s updated\n", ca->name); > + } > + spin_unlock(&tcp_cong_list_lock); > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(tcp_update_congestion_control); > + > u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool > *ecn_ca) > { > const struct tcp_congestion_ops *ca; > -- > 2.30.2
On 2/14/23 18:43, Stanislav Fomichev wrote: > On 02/14, Kui-Feng Lee wrote: >> This feature lets you immediately transition to another congestion >> control algorithm or implementation with the same name. Once a name >> is updated, new connections will apply this new algorithm. > >> Signed-off-by: Kui-Feng Lee <kuifeng@meta.com> >> --- >> include/linux/bpf.h | 1 + >> include/net/tcp.h | 2 ++ >> net/bpf/bpf_dummy_struct_ops.c | 6 ++++++ >> net/ipv4/bpf_tcp_ca.c | 6 ++++++ >> net/ipv4/tcp_cong.c | 39 ++++++++++++++++++++++++++++++++++ >> 5 files changed, 54 insertions(+) > >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >> index 13683584b071..5fe39f56a760 100644 >> --- a/include/linux/bpf.h >> +++ b/include/linux/bpf.h >> @@ -1450,6 +1450,7 @@ struct bpf_struct_ops { >> void *kdata, const void *udata); >> int (*reg)(void *kdata); >> void (*unreg)(void *kdata); >> + int (*update)(void *kdata, void *old_kdata); >> const struct btf_type *type; >> const struct btf_type *value_type; >> const char *name; >> diff --git a/include/net/tcp.h b/include/net/tcp.h >> index db9f828e9d1e..239cc0e2639c 100644 >> --- a/include/net/tcp.h >> +++ b/include/net/tcp.h >> @@ -1117,6 +1117,8 @@ struct tcp_congestion_ops { > >> int tcp_register_congestion_control(struct tcp_congestion_ops *type); >> void tcp_unregister_congestion_control(struct tcp_congestion_ops >> *type); >> +int tcp_update_congestion_control(struct tcp_congestion_ops *type, >> + struct tcp_congestion_ops *old_type); > >> void tcp_assign_congestion_control(struct sock *sk); >> void tcp_init_congestion_control(struct sock *sk); >> diff --git a/net/bpf/bpf_dummy_struct_ops.c >> b/net/bpf/bpf_dummy_struct_ops.c >> index ff4f89a2b02a..158f14e240d0 100644 >> --- a/net/bpf/bpf_dummy_struct_ops.c >> +++ b/net/bpf/bpf_dummy_struct_ops.c >> @@ -222,12 +222,18 @@ static void bpf_dummy_unreg(void *kdata) >> { >> } > >> +static int bpf_dummy_update(void *kdata, void *old_kdata) >> +{ >> + return -EOPNOTSUPP; >> +} >> + >> struct bpf_struct_ops bpf_bpf_dummy_ops = { >> .verifier_ops = &bpf_dummy_verifier_ops, >> .init = bpf_dummy_init, >> .check_member = bpf_dummy_ops_check_member, >> .init_member = bpf_dummy_init_member, >> .reg = bpf_dummy_reg, >> + .update = bpf_dummy_update, >> .unreg = bpf_dummy_unreg, >> .name = "bpf_dummy_ops", >> }; >> diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c >> index 13fc0c185cd9..66ce5fadfe42 100644 >> --- a/net/ipv4/bpf_tcp_ca.c >> +++ b/net/ipv4/bpf_tcp_ca.c >> @@ -266,10 +266,16 @@ static void bpf_tcp_ca_unreg(void *kdata) >> tcp_unregister_congestion_control(kdata); >> } > >> +static int bpf_tcp_ca_update(void *kdata, void *old_kdata) >> +{ >> + return tcp_update_congestion_control(kdata, old_kdata); >> +} >> + >> struct bpf_struct_ops bpf_tcp_congestion_ops = { >> .verifier_ops = &bpf_tcp_ca_verifier_ops, >> .reg = bpf_tcp_ca_reg, >> .unreg = bpf_tcp_ca_unreg, >> + .update = bpf_tcp_ca_update, >> .check_member = bpf_tcp_ca_check_member, >> .init_member = bpf_tcp_ca_init_member, >> .init = bpf_tcp_ca_init, >> diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c >> index db8b4b488c31..22fd7c12360e 100644 >> --- a/net/ipv4/tcp_cong.c >> +++ b/net/ipv4/tcp_cong.c >> @@ -130,6 +130,45 @@ void tcp_unregister_congestion_control(struct >> tcp_congestion_ops *ca) >> } >> EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); > >> +/* Replace a registered old ca with a new one. >> + * >> + * The new ca must have the same name as the old one, that has been >> + * registered. >> + */ >> +int tcp_update_congestion_control(struct tcp_congestion_ops *ca, >> struct tcp_congestion_ops *old_ca) >> +{ >> + struct tcp_congestion_ops *existing; >> + int ret = 0; >> + > > [..] > >> + /* all algorithms must implement these */ >> + if (!ca->ssthresh || !ca->undo_cwnd || >> + !(ca->cong_avoid || ca->cong_control)) { >> + pr_err("%s does not implement required ops\n", old_ca->name); >> + return -EINVAL; >> + } >> + >> + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); > > Can we have this as some common _validate method to avoid copy-paste > from tcp_register_congestion_control. Sure! > > Or, even better, can we can since function handle both cases? > > tcp_register_congestion_control(ca, old_ca); > - when old_ca == NULL -> register > - when old_ca != NULL -> try to update We use this function in a lot of different places. To make it easier, I added a new function instead of changing the old one. > >> + >> + spin_lock(&tcp_cong_list_lock); >> + existing = tcp_ca_find_key(ca->key); >> + if (ca->key == TCP_CA_UNSPEC || !existing || >> strcmp(existing->name, ca->name)) { >> + pr_notice("%s not registered or non-unique key\n", >> + ca->name); >> + ret = -EINVAL; >> + } else if (existing != old_ca) { >> + pr_notice("invalid old congestion control algorithm to >> replace\n"); >> + ret = -EINVAL; >> + } else { >> + list_del_rcu(&existing->list); >> + list_add_tail_rcu(&ca->list, &tcp_cong_list); >> + pr_debug("%s updated\n", ca->name); >> + } >> + spin_unlock(&tcp_cong_list_lock); >> + >> + return ret; >> +} >> +EXPORT_SYMBOL_GPL(tcp_update_congestion_control); >> + >> u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool >> *ecn_ca) >> { >> const struct tcp_congestion_ops *ca; >> -- >> 2.30.2 >
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 13683584b071..5fe39f56a760 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1450,6 +1450,7 @@ struct bpf_struct_ops { void *kdata, const void *udata); int (*reg)(void *kdata); void (*unreg)(void *kdata); + int (*update)(void *kdata, void *old_kdata); const struct btf_type *type; const struct btf_type *value_type; const char *name; diff --git a/include/net/tcp.h b/include/net/tcp.h index db9f828e9d1e..239cc0e2639c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1117,6 +1117,8 @@ struct tcp_congestion_ops { int tcp_register_congestion_control(struct tcp_congestion_ops *type); void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); +int tcp_update_congestion_control(struct tcp_congestion_ops *type, + struct tcp_congestion_ops *old_type); void tcp_assign_congestion_control(struct sock *sk); void tcp_init_congestion_control(struct sock *sk); diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index ff4f89a2b02a..158f14e240d0 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -222,12 +222,18 @@ static void bpf_dummy_unreg(void *kdata) { } +static int bpf_dummy_update(void *kdata, void *old_kdata) +{ + return -EOPNOTSUPP; +} + struct bpf_struct_ops bpf_bpf_dummy_ops = { .verifier_ops = &bpf_dummy_verifier_ops, .init = bpf_dummy_init, .check_member = bpf_dummy_ops_check_member, .init_member = bpf_dummy_init_member, .reg = bpf_dummy_reg, + .update = bpf_dummy_update, .unreg = bpf_dummy_unreg, .name = "bpf_dummy_ops", }; diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 13fc0c185cd9..66ce5fadfe42 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -266,10 +266,16 @@ static void bpf_tcp_ca_unreg(void *kdata) tcp_unregister_congestion_control(kdata); } +static int bpf_tcp_ca_update(void *kdata, void *old_kdata) +{ + return tcp_update_congestion_control(kdata, old_kdata); +} + struct bpf_struct_ops bpf_tcp_congestion_ops = { .verifier_ops = &bpf_tcp_ca_verifier_ops, .reg = bpf_tcp_ca_reg, .unreg = bpf_tcp_ca_unreg, + .update = bpf_tcp_ca_update, .check_member = bpf_tcp_ca_check_member, .init_member = bpf_tcp_ca_init_member, .init = bpf_tcp_ca_init, diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index db8b4b488c31..22fd7c12360e 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -130,6 +130,45 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); +/* Replace a registered old ca with a new one. + * + * The new ca must have the same name as the old one, that has been + * registered. + */ +int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca) +{ + struct tcp_congestion_ops *existing; + int ret = 0; + + /* all algorithms must implement these */ + if (!ca->ssthresh || !ca->undo_cwnd || + !(ca->cong_avoid || ca->cong_control)) { + pr_err("%s does not implement required ops\n", old_ca->name); + return -EINVAL; + } + + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); + + spin_lock(&tcp_cong_list_lock); + existing = tcp_ca_find_key(ca->key); + if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) { + pr_notice("%s not registered or non-unique key\n", + ca->name); + ret = -EINVAL; + } else if (existing != old_ca) { + pr_notice("invalid old congestion control algorithm to replace\n"); + ret = -EINVAL; + } else { + list_del_rcu(&existing->list); + list_add_tail_rcu(&ca->list, &tcp_cong_list); + pr_debug("%s updated\n", ca->name); + } + spin_unlock(&tcp_cong_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(tcp_update_congestion_control); + u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca;
This feature lets you immediately transition to another congestion control algorithm or implementation with the same name. Once a name is updated, new connections will apply this new algorithm. Signed-off-by: Kui-Feng Lee <kuifeng@meta.com> --- include/linux/bpf.h | 1 + include/net/tcp.h | 2 ++ net/bpf/bpf_dummy_struct_ops.c | 6 ++++++ net/ipv4/bpf_tcp_ca.c | 6 ++++++ net/ipv4/tcp_cong.c | 39 ++++++++++++++++++++++++++++++++++ 5 files changed, 54 insertions(+)