diff mbox series

[RFC,mptcp-next,2/4] mptcp: add struct mptcp_sched_ops

Message ID 4089672c8557e031d38306ef49e26e9902e7db57.1631011068.git.geliangtang@xiaomi.com (mailing list archive)
State RFC, archived
Headers show
Series round-robin packet scheduler support | expand

Commit Message

Geliang Tang Sept. 7, 2021, 10:41 a.m. UTC
From: Geliang Tang <geliangtang@xiaomi.com>

This patch added struct mptcp_sched_ops. And define the scheduler
init, register and find functions.

Signed-off-by: Geliang Tang <geliangtang@xiaomi.com>
---
 net/mptcp/protocol.c | 60 +++++++++++++++++++++++++++++++++++++++++---
 net/mptcp/protocol.h |  8 ++++++
 2 files changed, 65 insertions(+), 3 deletions(-)

Comments

Paolo Abeni Sept. 7, 2021, 11:04 a.m. UTC | #1
On Tue, 2021-09-07 at 18:41 +0800, Geliang Tang wrote:
> From: Geliang Tang <geliangtang@xiaomi.com>
> 
> This patch added struct mptcp_sched_ops. And define the scheduler
> init, register and find functions.
> 
> Signed-off-by: Geliang Tang <geliangtang@xiaomi.com>
> ---
>  net/mptcp/protocol.c | 60 +++++++++++++++++++++++++++++++++++++++++---
>  net/mptcp/protocol.h |  8 ++++++
>  2 files changed, 65 insertions(+), 3 deletions(-)
> 
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 2a525c7ae920..ab72a3950f2b 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1515,6 +1515,58 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
>  	return NULL;
>  }
>  
> +static struct mptcp_sched_ops mptcp_sched_default = {
> +	.get_subflow	= mptcp_subflow_get_send,
> +	.name		= "default",
> +	.owner		= THIS_MODULE,
> +};
> +
> +static DEFINE_SPINLOCK(mptcp_sched_list_lock);
> +static LIST_HEAD(mptcp_sched_list);
> +
> +static struct mptcp_sched_ops *mptcp_sched_find(const char *name)
> +{
> +	struct mptcp_sched_ops *ops;
> +
> +	list_for_each_entry_rcu(ops, &mptcp_sched_list, list) {
> +		if (!strcmp(ops->name, name))
> +			return ops;
> +	}
> +
> +	return NULL;
> +}
> +
> +static int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
> +{
> +	int ret = 0;
> +
> +	if (!sched->get_subflow)
> +		return -EINVAL;
> +
> +	spin_lock(&mptcp_sched_list_lock);
> +	if (mptcp_sched_find(sched->name)) {
> +		pr_debug("%s already registered", sched->name);
> +		ret = -EEXIST;
> +	} else {
> +		list_add_tail_rcu(&sched->list, &mptcp_sched_list);
> +		pr_debug("%s registered", sched->name);
> +	}
> +	spin_unlock(&mptcp_sched_list_lock);
> +	return 0;
> +}
> +
> +static void mptcp_sched_data_init(struct mptcp_sock *msk)
> +{
> +	struct net *net = sock_net((struct sock *)msk);
> +
> +	msk->sched = mptcp_sched_find(mptcp_get_scheduler(net));
> +}
> +
> +static void mptcp_sched_init(void)
> +{
> +	mptcp_register_scheduler(&mptcp_sched_default);
> +}
> +

I think it would be nice keeping this infrastructure in a separate
file, but I have no strong opinion on it.

>  static void mptcp_push_release(struct sock *sk, struct sock *ssk,
>  			       struct mptcp_sendmsg_info *info)
>  {
> @@ -1567,7 +1619,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
>  
>  			prev_ssk = ssk;
>  			mptcp_flush_join_list(msk);
> -			ssk = mptcp_subflow_get_send(msk);
> +			ssk = msk->sched->get_subflow(msk);


uhm... it would be nice to avoid the indirect call, but I don't see any
solution other then IDC wrapper usage
(see linux/indirect_call_wrapper.h)

>  			/* First check. If the ssk has changed since
>  			 * the last round, release prev_ssk
> @@ -1634,7 +1686,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
>  			 * check for a different subflow usage only after
>  			 * spooling the first chunk of data
>  			 */
> -			xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
> +			xmit_ssk = first ? ssk : mptcp_sk(sk)->sched->get_subflow(mptcp_sk(sk));
>  			if (!xmit_ssk)
>  				goto out;
>  			if (xmit_ssk != ssk) {
> @@ -2534,6 +2586,7 @@ static int __mptcp_init_sock(struct sock *sk)
>  	msk->recovery = false;
>  
>  	mptcp_pm_data_init(msk);
> +	mptcp_sched_data_init(msk);
>  
>  	/* re-use the csk retrans timer for MPTCP-level retrans */
>  	timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);

I think we should do the lookup in mptcp_init_sock(), and copy the
scheduler from the parent in mptcp_sk_clone().

Cheers,

Paolo
diff mbox series

Patch

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2a525c7ae920..ab72a3950f2b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1515,6 +1515,58 @@  static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 	return NULL;
 }
 
+static struct mptcp_sched_ops mptcp_sched_default = {
+	.get_subflow	= mptcp_subflow_get_send,
+	.name		= "default",
+	.owner		= THIS_MODULE,
+};
+
+static DEFINE_SPINLOCK(mptcp_sched_list_lock);
+static LIST_HEAD(mptcp_sched_list);
+
+static struct mptcp_sched_ops *mptcp_sched_find(const char *name)
+{
+	struct mptcp_sched_ops *ops;
+
+	list_for_each_entry_rcu(ops, &mptcp_sched_list, list) {
+		if (!strcmp(ops->name, name))
+			return ops;
+	}
+
+	return NULL;
+}
+
+static int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
+{
+	int ret = 0;
+
+	if (!sched->get_subflow)
+		return -EINVAL;
+
+	spin_lock(&mptcp_sched_list_lock);
+	if (mptcp_sched_find(sched->name)) {
+		pr_debug("%s already registered", sched->name);
+		ret = -EEXIST;
+	} else {
+		list_add_tail_rcu(&sched->list, &mptcp_sched_list);
+		pr_debug("%s registered", sched->name);
+	}
+	spin_unlock(&mptcp_sched_list_lock);
+	return 0;
+}
+
+static void mptcp_sched_data_init(struct mptcp_sock *msk)
+{
+	struct net *net = sock_net((struct sock *)msk);
+
+	msk->sched = mptcp_sched_find(mptcp_get_scheduler(net));
+}
+
+static void mptcp_sched_init(void)
+{
+	mptcp_register_scheduler(&mptcp_sched_default);
+}
+
 static void mptcp_push_release(struct sock *sk, struct sock *ssk,
 			       struct mptcp_sendmsg_info *info)
 {
@@ -1567,7 +1619,7 @@  void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 
 			prev_ssk = ssk;
 			mptcp_flush_join_list(msk);
-			ssk = mptcp_subflow_get_send(msk);
+			ssk = msk->sched->get_subflow(msk);
 
 			/* First check. If the ssk has changed since
 			 * the last round, release prev_ssk
@@ -1634,7 +1686,7 @@  static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 			 * check for a different subflow usage only after
 			 * spooling the first chunk of data
 			 */
-			xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+			xmit_ssk = first ? ssk : mptcp_sk(sk)->sched->get_subflow(mptcp_sk(sk));
 			if (!xmit_ssk)
 				goto out;
 			if (xmit_ssk != ssk) {
@@ -2534,6 +2586,7 @@  static int __mptcp_init_sock(struct sock *sk)
 	msk->recovery = false;
 
 	mptcp_pm_data_init(msk);
+	mptcp_sched_data_init(msk);
 
 	/* re-use the csk retrans timer for MPTCP-level retrans */
 	timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
@@ -3005,7 +3058,7 @@  void __mptcp_check_push(struct sock *sk, struct sock *ssk)
 		return;
 
 	if (!sock_owned_by_user(sk)) {
-		struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
+		struct sock *xmit_ssk = mptcp_sk(sk)->sched->get_subflow(mptcp_sk(sk));
 
 		if (xmit_ssk == ssk)
 			__mptcp_subflow_push_pending(sk, ssk);
@@ -3569,6 +3622,7 @@  void __init mptcp_proto_init(void)
 
 	mptcp_subflow_init();
 	mptcp_pm_init();
+	mptcp_sched_init();
 	mptcp_token_init();
 
 	if (proto_register(&mptcp_prot, MPTCP_USE_SLAB) != 0)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 305d373332b7..71237d1458ea 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -207,6 +207,13 @@  struct mptcp_pm_data {
 };
 
 #define MPTCP_SCHED_NAME_MAX 16
+struct mptcp_sched_ops {
+	struct list_head list;
+
+	struct sock *	(*get_subflow)(struct mptcp_sock *msk);
+	char		name[MPTCP_SCHED_NAME_MAX];
+	struct module	*owner;
+};
 
 struct mptcp_data_frag {
 	struct list_head list;
@@ -264,6 +271,7 @@  struct mptcp_sock {
 	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
 	struct sock	*first;
 	struct mptcp_pm_data	pm;
+	struct mptcp_sched_ops	*sched;
 	struct {
 		u32	space;	/* bytes copied in last measurement window */
 		u32	copied; /* bytes copied in this measurement window */