Message ID | 4089672c8557e031d38306ef49e26e9902e7db57.1631011068.git.geliangtang@xiaomi.com (mailing list archive) |
---|---|
State | RFC, archived |
Headers | show |
Series | round-robin packet scheduler support | expand |
On Tue, 2021-09-07 at 18:41 +0800, Geliang Tang wrote: > From: Geliang Tang <geliangtang@xiaomi.com> > > This patch added struct mptcp_sched_ops. And define the scheduler > init, register and find functions. > > Signed-off-by: Geliang Tang <geliangtang@xiaomi.com> > --- > net/mptcp/protocol.c | 60 +++++++++++++++++++++++++++++++++++++++++--- > net/mptcp/protocol.h | 8 ++++++ > 2 files changed, 65 insertions(+), 3 deletions(-) > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index 2a525c7ae920..ab72a3950f2b 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -1515,6 +1515,58 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) > return NULL; > } > > +static struct mptcp_sched_ops mptcp_sched_default = { > + .get_subflow = mptcp_subflow_get_send, > + .name = "default", > + .owner = THIS_MODULE, > +}; > + > +static DEFINE_SPINLOCK(mptcp_sched_list_lock); > +static LIST_HEAD(mptcp_sched_list); > + > +static struct mptcp_sched_ops *mptcp_sched_find(const char *name) > +{ > + struct mptcp_sched_ops *ops; > + > + list_for_each_entry_rcu(ops, &mptcp_sched_list, list) { > + if (!strcmp(ops->name, name)) > + return ops; > + } > + > + return NULL; > +} > + > +static int mptcp_register_scheduler(struct mptcp_sched_ops *sched) > +{ > + int ret = 0; > + > + if (!sched->get_subflow) > + return -EINVAL; > + > + spin_lock(&mptcp_sched_list_lock); > + if (mptcp_sched_find(sched->name)) { > + pr_debug("%s already registered", sched->name); > + ret = -EEXIST; > + } else { > + list_add_tail_rcu(&sched->list, &mptcp_sched_list); > + pr_debug("%s registered", sched->name); > + } > + spin_unlock(&mptcp_sched_list_lock); > + return 0; > +} > + > +static void mptcp_sched_data_init(struct mptcp_sock *msk) > +{ > + struct net *net = sock_net((struct sock *)msk); > + > + msk->sched = mptcp_sched_find(mptcp_get_scheduler(net)); > +} > + > +static void mptcp_sched_init(void) > +{ > + mptcp_register_scheduler(&mptcp_sched_default); > +} > + I think it would be nice keeping this infrastructure in a separate file, but I have no strong opinion on it. > static void mptcp_push_release(struct sock *sk, struct sock *ssk, > struct mptcp_sendmsg_info *info) > { > @@ -1567,7 +1619,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) > > prev_ssk = ssk; > mptcp_flush_join_list(msk); > - ssk = mptcp_subflow_get_send(msk); > + ssk = msk->sched->get_subflow(msk); uhm... it would be nice to avoid the indirect call, but I don't see any solution other then IDC wrapper usage (see linux/indirect_call_wrapper.h) > /* First check. If the ssk has changed since > * the last round, release prev_ssk > @@ -1634,7 +1686,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) > * check for a different subflow usage only after > * spooling the first chunk of data > */ > - xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk)); > + xmit_ssk = first ? ssk : mptcp_sk(sk)->sched->get_subflow(mptcp_sk(sk)); > if (!xmit_ssk) > goto out; > if (xmit_ssk != ssk) { > @@ -2534,6 +2586,7 @@ static int __mptcp_init_sock(struct sock *sk) > msk->recovery = false; > > mptcp_pm_data_init(msk); > + mptcp_sched_data_init(msk); > > /* re-use the csk retrans timer for MPTCP-level retrans */ > timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); I think we should do the lookup in mptcp_init_sock(), and copy the scheduler from the parent in mptcp_sk_clone(). Cheers, Paolo
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2a525c7ae920..ab72a3950f2b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1515,6 +1515,58 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) return NULL; } +static struct mptcp_sched_ops mptcp_sched_default = { + .get_subflow = mptcp_subflow_get_send, + .name = "default", + .owner = THIS_MODULE, +}; + +static DEFINE_SPINLOCK(mptcp_sched_list_lock); +static LIST_HEAD(mptcp_sched_list); + +static struct mptcp_sched_ops *mptcp_sched_find(const char *name) +{ + struct mptcp_sched_ops *ops; + + list_for_each_entry_rcu(ops, &mptcp_sched_list, list) { + if (!strcmp(ops->name, name)) + return ops; + } + + return NULL; +} + +static int mptcp_register_scheduler(struct mptcp_sched_ops *sched) +{ + int ret = 0; + + if (!sched->get_subflow) + return -EINVAL; + + spin_lock(&mptcp_sched_list_lock); + if (mptcp_sched_find(sched->name)) { + pr_debug("%s already registered", sched->name); + ret = -EEXIST; + } else { + list_add_tail_rcu(&sched->list, &mptcp_sched_list); + pr_debug("%s registered", sched->name); + } + spin_unlock(&mptcp_sched_list_lock); + return 0; +} + +static void mptcp_sched_data_init(struct mptcp_sock *msk) +{ + struct net *net = sock_net((struct sock *)msk); + + msk->sched = mptcp_sched_find(mptcp_get_scheduler(net)); +} + +static void mptcp_sched_init(void) +{ + mptcp_register_scheduler(&mptcp_sched_default); +} + static void mptcp_push_release(struct sock *sk, struct sock *ssk, struct mptcp_sendmsg_info *info) { @@ -1567,7 +1619,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) prev_ssk = ssk; mptcp_flush_join_list(msk); - ssk = mptcp_subflow_get_send(msk); + ssk = msk->sched->get_subflow(msk); /* First check. If the ssk has changed since * the last round, release prev_ssk @@ -1634,7 +1686,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) * check for a different subflow usage only after * spooling the first chunk of data */ - xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk)); + xmit_ssk = first ? ssk : mptcp_sk(sk)->sched->get_subflow(mptcp_sk(sk)); if (!xmit_ssk) goto out; if (xmit_ssk != ssk) { @@ -2534,6 +2586,7 @@ static int __mptcp_init_sock(struct sock *sk) msk->recovery = false; mptcp_pm_data_init(msk); + mptcp_sched_data_init(msk); /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); @@ -3005,7 +3058,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) return; if (!sock_owned_by_user(sk)) { - struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk)); + struct sock *xmit_ssk = mptcp_sk(sk)->sched->get_subflow(mptcp_sk(sk)); if (xmit_ssk == ssk) __mptcp_subflow_push_pending(sk, ssk); @@ -3569,6 +3622,7 @@ void __init mptcp_proto_init(void) mptcp_subflow_init(); mptcp_pm_init(); + mptcp_sched_init(); mptcp_token_init(); if (proto_register(&mptcp_prot, MPTCP_USE_SLAB) != 0) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 305d373332b7..71237d1458ea 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -207,6 +207,13 @@ struct mptcp_pm_data { }; #define MPTCP_SCHED_NAME_MAX 16 +struct mptcp_sched_ops { + struct list_head list; + + struct sock * (*get_subflow)(struct mptcp_sock *msk); + char name[MPTCP_SCHED_NAME_MAX]; + struct module *owner; +}; struct mptcp_data_frag { struct list_head list; @@ -264,6 +271,7 @@ struct mptcp_sock { struct socket *subflow; /* outgoing connect/listener/!mp_capable */ struct sock *first; struct mptcp_pm_data pm; + struct mptcp_sched_ops *sched; struct { u32 space; /* bytes copied in last measurement window */ u32 copied; /* bytes copied in this measurement window */