diff mbox series

[mptcp-next,v10,2/5] mptcp: redundant subflows push pending

Message ID ef3d52ff98c97b3c3e5b1efd84a1d4bf3448df5d.1656578856.git.geliang.tang@suse.com (mailing list archive)
State Superseded, archived
Delegated to: Mat Martineau
Headers show
Series BPF redundant scheduler | expand

Checks

Context Check Description
matttbe/checkpatch success total: 0 errors, 0 warnings, 0 checks, 129 lines checked
matttbe/build success Build and static analysis OK
matttbe/KVM_Validation__normal warning Unstable: 1 failed test(s): selftest_simult_flows - Critical: 2 Call Trace(s) ❌
matttbe/KVM_Validation__debug warning Unstable: 1 failed test(s): selftest_mptcp_join - Critical: 2 Call Trace(s) ❌

Commit Message

Geliang Tang June 30, 2022, 8:50 a.m. UTC
This patch adds the redundant subflows support for __mptcp_push_pending().
Use mptcp_sched_get_send() wrapper instead of mptcp_subflow_get_send()
in it.

Check the subflow scheduled flags to test which subflow or subflows are
picked by the scheduler, use them to send data.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/protocol.c | 96 ++++++++++++++++++++++++++------------------
 net/mptcp/protocol.h | 10 +++++
 2 files changed, 66 insertions(+), 40 deletions(-)

Comments

Mat Martineau July 1, 2022, 9:01 p.m. UTC | #1
On Thu, 30 Jun 2022, Geliang Tang wrote:

> This patch adds the redundant subflows support for __mptcp_push_pending().
> Use mptcp_sched_get_send() wrapper instead of mptcp_subflow_get_send()
> in it.
>
> Check the subflow scheduled flags to test which subflow or subflows are
> picked by the scheduler, use them to send data.
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/protocol.c | 96 ++++++++++++++++++++++++++------------------
> net/mptcp/protocol.h | 10 +++++
> 2 files changed, 66 insertions(+), 40 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 6e44786e01fd..f97e6c318c0b 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1516,57 +1516,73 @@ void mptcp_check_and_set_pending(struct sock *sk)
>
> void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> {
> -	struct sock *prev_ssk = NULL, *ssk = NULL;
> 	struct mptcp_sock *msk = mptcp_sk(sk);
> -	struct mptcp_sendmsg_info info = {
> -				.flags = flags,
> -	};
> -	struct mptcp_data_frag *dfrag;
> +	struct mptcp_subflow_context *subflow;
> 	int len, copied = 0;
>
> -	while ((dfrag = mptcp_send_head(sk))) {
> -		info.sent = dfrag->already_sent;
> -		info.limit = dfrag->data_len;
> -		len = dfrag->data_len - dfrag->already_sent;
> -		while (len > 0) {
> -			int ret = 0;
> +	if (!mptcp_sched_get_send(msk))
> +		goto out;
>
> -			prev_ssk = ssk;
> -			ssk = mptcp_subflow_get_send(msk);
> +	mptcp_for_each_subflow(msk, subflow) {
> +		if (READ_ONCE(subflow->scheduled)) {
> +			struct sock *prev_ssk = NULL, *ssk = NULL;
> +			struct mptcp_sendmsg_info info = {
> +				.flags = flags,
> +			};
> +			struct mptcp_data_frag *dfrag;
>
> -			/* First check. If the ssk has changed since
> -			 * the last round, release prev_ssk
> -			 */
> -			if (ssk != prev_ssk && prev_ssk)
> -				mptcp_push_release(prev_ssk, &info);
> -			if (!ssk)
> +			dfrag = mptcp_send_head(sk);
> +			if (!dfrag)
> 				goto out;
>
> -			/* Need to lock the new subflow only if different
> -			 * from the previous one, otherwise we are still
> -			 * helding the relevant lock
> -			 */
> -			if (ssk != prev_ssk)
> -				lock_sock(ssk);
> -
> -			ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
> -			if (ret <= 0) {
> +			do {
> +				info.sent = dfrag->already_sent;
> +				info.limit = dfrag->data_len;
> +				len = dfrag->data_len - dfrag->already_sent;
> +				while (len > 0) {
> +					int ret = 0;
> +
> +					prev_ssk = ssk;
> +					ssk = mptcp_subflow_tcp_sock(subflow);

prev_ssk isn't needed any more. The old code could return different ssks 
from each scheduler call, but now the ssk is the same for each iteration 
of the mptcp_for_each_subflow() loop. The locking can be simplified to 
lock the ssk before the do-while loop, and to always call 
mptcp_push_release() after the do-while loop is done.

> +
> +					/* First check. If the ssk has changed since
> +					 * the last round, release prev_ssk
> +					 */
> +					if (ssk != prev_ssk && prev_ssk)
> +						mptcp_push_release(prev_ssk, &info);
> +					if (!ssk)

This check is no longer needed, mptcp_subflow_tcp_sock() won't be 
returning NULL.

> +						goto out;
> +
> +					/* Need to lock the new subflow only if different
> +					 * from the previous one, otherwise we are still
> +					 * helding the relevant lock
> +					 */
> +					if (ssk != prev_ssk)
> +						lock_sock(ssk);
> +
> +					ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
> +					if (ret <= 0) {
> +						mptcp_push_release(ssk, &info);
> +						goto out;
> +					}
> +
> +					info.sent += ret;
> +					copied += ret;

Since 'copied' is only used below to determine if 
__mptcp_check_send_data_fin() should be called, I think it would be 
clearer to use a bool variable do something like this:

if (ret > 0)
 	do_check_data_fin = true;

instead. 'copied' can be removed.

> +					len -= ret;
> +
> +					mptcp_update_post_push(msk, dfrag, ret);

This modifies dfrag->already_sent and msk->snd_nxt, which will interfere 
with sending data on the next subflow. All the dfrag modifications need to 
be made _after_ the mptcp_for_each_subflow() loop is done (right before 
the "out:" label).

Store a pointer to the last dfrag that was used, and how many bytes were 
sent from it (last_dfrag and last_sent?).

> +				}
> +			} while ((dfrag = mptcp_next_frag(sk, dfrag)));
> +
> +			/* at this point we held the socket lock for the last subflow we used */
> +			if (ssk) {

Like I mentioned above, this conditional is no longer needed.

> 				mptcp_push_release(ssk, &info);
> -				goto out;
> +				msk->last_snd = ssk;
> +				mptcp_subflow_set_scheduled(subflow, false);
> 			}
> -
> -			info.sent += ret;
> -			copied += ret;
> -			len -= ret;
> -
> -			mptcp_update_post_push(msk, dfrag, ret);
> 		}
> -		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
> 	}
> -
> -	/* at this point we held the socket lock for the last subflow we used */
> -	if (ssk)
> -		mptcp_push_release(ssk, &info);

Here (before updating msk->first_pending) is the place to update the dfrag 
list. Walk the list from mptcp_send_head() to last_dfrag. All of the 
dfrags before last_dfrag need to assign dfrag->already_sent = 
dfrag->data_len (because they were fully sent). For last_dfrag, call 
mptcp_update_post_push().

Also double-check that msk->snd_burst is updated correctly.

> +	WRITE_ONCE(msk->first_pending, NULL);

This shouldn't be NULL - needs to be mptcp_send_next() like the old code, 
which should work if the dfrag list is updated.

>
> out:
> 	/* ensure the rtx timer is running */
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index af414cd8b7cd..09a9797d2dfb 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -355,6 +355,16 @@ static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)
> 						     list_next_entry(cur, list);
> }
>
> +static inline struct mptcp_data_frag *mptcp_next_frag(struct sock *sk,
> +						      struct mptcp_data_frag *cur)
> +{
> +	if (!cur)
> +		return NULL;
> +
> +	return list_is_last(&cur->list, &mptcp_sk(sk)->rtx_queue) ? NULL :
> +						     list_next_entry(cur, list);
> +}
> +
> static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
> {
> 	struct mptcp_sock *msk = mptcp_sk(sk);
> -- 
> 2.35.3
>
>
>

--
Mat Martineau
Intel
diff mbox series

Patch

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 6e44786e01fd..f97e6c318c0b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1516,57 +1516,73 @@  void mptcp_check_and_set_pending(struct sock *sk)
 
 void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 {
-	struct sock *prev_ssk = NULL, *ssk = NULL;
 	struct mptcp_sock *msk = mptcp_sk(sk);
-	struct mptcp_sendmsg_info info = {
-				.flags = flags,
-	};
-	struct mptcp_data_frag *dfrag;
+	struct mptcp_subflow_context *subflow;
 	int len, copied = 0;
 
-	while ((dfrag = mptcp_send_head(sk))) {
-		info.sent = dfrag->already_sent;
-		info.limit = dfrag->data_len;
-		len = dfrag->data_len - dfrag->already_sent;
-		while (len > 0) {
-			int ret = 0;
+	if (!mptcp_sched_get_send(msk))
+		goto out;
 
-			prev_ssk = ssk;
-			ssk = mptcp_subflow_get_send(msk);
+	mptcp_for_each_subflow(msk, subflow) {
+		if (READ_ONCE(subflow->scheduled)) {
+			struct sock *prev_ssk = NULL, *ssk = NULL;
+			struct mptcp_sendmsg_info info = {
+				.flags = flags,
+			};
+			struct mptcp_data_frag *dfrag;
 
-			/* First check. If the ssk has changed since
-			 * the last round, release prev_ssk
-			 */
-			if (ssk != prev_ssk && prev_ssk)
-				mptcp_push_release(prev_ssk, &info);
-			if (!ssk)
+			dfrag = mptcp_send_head(sk);
+			if (!dfrag)
 				goto out;
 
-			/* Need to lock the new subflow only if different
-			 * from the previous one, otherwise we are still
-			 * helding the relevant lock
-			 */
-			if (ssk != prev_ssk)
-				lock_sock(ssk);
-
-			ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
-			if (ret <= 0) {
+			do {
+				info.sent = dfrag->already_sent;
+				info.limit = dfrag->data_len;
+				len = dfrag->data_len - dfrag->already_sent;
+				while (len > 0) {
+					int ret = 0;
+
+					prev_ssk = ssk;
+					ssk = mptcp_subflow_tcp_sock(subflow);
+
+					/* First check. If the ssk has changed since
+					 * the last round, release prev_ssk
+					 */
+					if (ssk != prev_ssk && prev_ssk)
+						mptcp_push_release(prev_ssk, &info);
+					if (!ssk)
+						goto out;
+
+					/* Need to lock the new subflow only if different
+					 * from the previous one, otherwise we are still
+					 * helding the relevant lock
+					 */
+					if (ssk != prev_ssk)
+						lock_sock(ssk);
+
+					ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+					if (ret <= 0) {
+						mptcp_push_release(ssk, &info);
+						goto out;
+					}
+
+					info.sent += ret;
+					copied += ret;
+					len -= ret;
+
+					mptcp_update_post_push(msk, dfrag, ret);
+				}
+			} while ((dfrag = mptcp_next_frag(sk, dfrag)));
+
+			/* at this point we held the socket lock for the last subflow we used */
+			if (ssk) {
 				mptcp_push_release(ssk, &info);
-				goto out;
+				msk->last_snd = ssk;
+				mptcp_subflow_set_scheduled(subflow, false);
 			}
-
-			info.sent += ret;
-			copied += ret;
-			len -= ret;
-
-			mptcp_update_post_push(msk, dfrag, ret);
 		}
-		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
 	}
-
-	/* at this point we held the socket lock for the last subflow we used */
-	if (ssk)
-		mptcp_push_release(ssk, &info);
+	WRITE_ONCE(msk->first_pending, NULL);
 
 out:
 	/* ensure the rtx timer is running */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index af414cd8b7cd..09a9797d2dfb 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -355,6 +355,16 @@  static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk)
 						     list_next_entry(cur, list);
 }
 
+static inline struct mptcp_data_frag *mptcp_next_frag(struct sock *sk,
+						      struct mptcp_data_frag *cur)
+{
+	if (!cur)
+		return NULL;
+
+	return list_is_last(&cur->list, &mptcp_sk(sk)->rtx_queue) ? NULL :
+						     list_next_entry(cur, list);
+}
+
 static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);