diff mbox series

[RFC,mptcp-next,v12,1/7] mptcp: introduce MSG_FASTOPEN flag.

Message ID 20220927225341.14165-2-dmytro@shytyi.net (mailing list archive)
State Superseded, archived
Delegated to: Paolo Abeni
Headers show
Series mptcp: Fast Open Mechanism | expand

Checks

Context Check Description
matttbe/checkpatch warning total: 0 errors, 1 warnings, 2 checks, 104 lines checked
matttbe/build fail Build error with: -Werror
matttbe/KVM_Validation__normal warning Unstable: 8 failed test(s): packetdrill_fastopen packetdrill_mp_join packetdrill_mp_prio packetdrill_sockopts packetdrill_syscalls selftest_mptcp_join selftest_mptfo selftest_simult_flows
matttbe/KVM_Validation__debug warning Unstable: 8 failed test(s): packetdrill_fastopen packetdrill_mp_join packetdrill_mp_prio packetdrill_sockopts packetdrill_syscalls selftest_mptcp_join selftest_mptfo selftest_simult_flows

Commit Message

Dmytro Shytyi Sept. 27, 2022, 10:53 p.m. UTC
In the following patches we will analyse the MSG_FASTOPEN flag
in the mptcp_sendmsg() and invoke the MPTFO.

Signed-of-by: Benjamin Hesmans <benjamin.hesmans@tessares.net>
Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net>
---
 include/net/mptcp.h  |  9 +++++++++
 include/net/tcp.h    |  3 +++
 net/ipv4/tcp.c       | 20 ++++++++++++++++----
 net/mptcp/protocol.c | 19 +++++++++++--------
 4 files changed, 39 insertions(+), 12 deletions(-)

Comments

Paolo Abeni Sept. 28, 2022, 9:01 a.m. UTC | #1
On Tue, 2022-09-27 at 22:53 +0000, Dmytro Shytyi wrote:
> In the following patches we will analyse the MSG_FASTOPEN flag
> in the mptcp_sendmsg() and invoke the MPTFO.
> 
> Signed-of-by: Benjamin Hesmans <benjamin.hesmans@tessares.net>
> Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net>
> ---
>  include/net/mptcp.h  |  9 +++++++++
>  include/net/tcp.h    |  3 +++
>  net/ipv4/tcp.c       | 20 ++++++++++++++++----
>  net/mptcp/protocol.c | 19 +++++++++++--------
>  4 files changed, 39 insertions(+), 12 deletions(-)
> 
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index c25939b2af68..ccf2b42837a1 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -150,6 +150,8 @@ void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
>  			 struct mptcp_out_options *opts);
>  
>  void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info);
> +int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags);
> +struct sock *mptcp_subflow_conn_sock(struct sock *sk);
>  
>  /* move the skb extension owership, with the assumption that 'to' is
>   * newly allocated
> @@ -286,6 +288,13 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
>  
>  static inline void mptcp_space(const struct sock *ssk, int *s, int *fs) { }
>  static inline void mptcp_seq_show(struct seq_file *seq) { }
> +static inline int mptcp_stream_connect(struct socket *sock,
> +				       struct sockaddr *uaddr,
> +				       int addr_len,
> +				       int flags)
> +{
> +
> +}
>  
>  static inline int mptcp_subflow_init_cookie_req(struct request_sock *req,
>  						const struct sock *sk_listener,
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 4f71cc15ff8e..e53d26d74dec 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1757,6 +1757,9 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
>  			      struct request_sock *req,
>  			      struct tcp_fastopen_cookie *foc,
>  			      const struct dst_entry *dst);
> +int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
> +			 int *copied, size_t size,
> +			 struct ubuf_info *uarg);
>  void tcp_fastopen_init_key_once(struct net *net);
>  bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
>  			     struct tcp_fastopen_cookie *cookie);
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index 5237a3f08c94..daa611671d9a 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -1162,8 +1162,9 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
>  	}
>  }
>  
> -int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
> -			 size_t size, struct ubuf_info *uarg)
> +int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
> +			 int *copied, size_t size,
> +			 struct ubuf_info *uarg)
>  {
>  	struct tcp_sock *tp = tcp_sk(sk);
>  	struct inet_sock *inet = inet_sk(sk);
> @@ -1196,8 +1197,19 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
>  		}
>  	}
>  	flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
> -	err = __inet_stream_connect(sk->sk_socket, uaddr,
> -				    msg->msg_namelen, flags, 1);
> +	if (!sk_is_mptcp(sk)) {
> +		err = __inet_stream_connect(sk->sk_socket, uaddr,
> +					    msg->msg_namelen, flags, 1);
> +	} else {
> +		struct sock *parent = mptcp_subflow_conn_sock(sk);
> +
> +		release_sock(sk);
> +		release_sock(parent);
> +		err = mptcp_stream_connect(sk->sk_socket, uaddr,
> +					   msg->msg_namelen, msg->msg_flags);
> +		lock_sock(parent);
> +		lock_sock(sk);
> +	}
>  	/* fastopen_req could already be freed in __inet_stream_connect
>  	 * if the connection times out or gets rst
>  	 */

I'm sorry it looks like I was not clear enough in my previous replies. 

I really think we should avoid this chunk. I thought it only served for
updating the msk socket status, but now I see it is also needed to
properly allocate the token and update the MIBs, right? Does it serve
any other roles?

Anyway I still think you can avoid the above chunck, factoring out the
relevant slice of mptcp_stream_connect() in an helper:

static void __mptcp_pre_connect(struct mptcp_sock *msk, struct sock *ssk)
{
	struct mptcp_subflow_context *subflow;

	mptcp_token_destroy(msk);
        subflow = mptcp_subflow_ctx(ssk);
#ifdef CONFIG_TCP_MD5SIG
        /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
         * TCP option space.
         */
        if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
                mptcp_subflow_early_fallback(msk, subflow);
#endif
        if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
                MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
                mptcp_subflow_early_fallback(msk, subflow);
        }
        if (likely(!__mptcp_check_fallback(msk)))
                MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVE);
}

and call the above in mptcp_sendmsg():

	if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
			       msg->msg_flags & MSG_FASTOPEN))) {
		struct sock *ssk = ssock->sk;
                int copied_syn = 0;

                lock_sock(ssk);
		if (msg->msg_flags & MSG_FASTOPEN && sk->sk_state == TCP_CLOSE)
			__mptcp_pre_connect(msk, ssk);
		
Likely this patch should be split in 2 separate ones: the new patch
will just create the helper and use it in mptcp_stream_connect.	


Cheers,

Paolo
Dmytro Shytyi Oct. 1, 2022, 3:08 a.m. UTC | #2
Hello,

I'm getting the next stack trace [1] when following this approach, yet 
it needs uarg to be filled, not NULL.

After Matthieu suggestion, I tried to implement the .connect in mptcp, 
but I'm getting errors like "ENOBUFF" or "EINPROGRESS".

Finally I decided to continue with "mptcp_stream_connect()" function in v13.


[1] Code starting with the faulting instruction
===========================================
[   27.736069] RSP: 0018:ffffc90000adfce0 EFLAGS: 00010246
[   27.737115] RAX: 0000000000000000 RBX: ffff888003894440 RCX: 
0000000000000000
[   27.738560] RDX: 0000000000000010 RSI: ffffc90000adfe80 RDI: 
ffff888027ce8000
[   27.739883] RBP: 0000000000000000 R08: 0000000000000001 R09: 
ffff888015710cf0
[   27.741286] R10: 0000000000000001 R11: ffff888003ca78c8 R12: 
00000000ffffff96
[   27.742718] R13: ffffc90000adfdb0 R14: ffffc90000adfe80 R15: 
ffff888027ce8000
[   27.744135] FS:  00007f7cc983a740(0000) GS:ffff88803da00000(0000) 
knlGS:0000000000000000
[   27.745612] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   27.746788] CR2: ffffffffffffffd6 CR3: 000000001d180006 CR4: 
0000000000370ef0
[   27.748125] Call Trace:
[   27.748655]  <TASK>
[   27.749086] __inet_stream_connect (net/ipv4/af_inet.c:663)
[   27.750093] ? sk_reset_timer (net/core/sock.c:3341)
[   27.750803] ? tcp_connect (net/ipv4/tcp_output.c:3882)
[   27.751590] ? kmem_cache_alloc_trace (mm/slub.c:3286)
[   27.752492] tcp_sendmsg_fastopen (net/ipv4/tcp.c:1198)
[   27.753347] mptcp_sendmsg (net/mptcp/protocol.c:1710)
[   27.754047] sock_sendmsg_nosec (net/socket.c:714)
[   27.754831] __sys_sendto (net/socket.c:2117)
[   27.755539] ? handle_mm_fault (mm/memory.c:5151)
[   27.756311] ? do_user_addr_fault (arch/x86/mm/fault.c:1426)
[   27.757175] __x64_sys_sendto (net/socket.c:2129 net/socket.c:2125 
net/socket.c:2125)
[   27.758029] do_syscall_64 (arch/x86/entry/common.c:50 
arch/x86/entry/common.c:80)
[   27.758739] entry_SYSCALL_64_after_hwframe 
(arch/x86/entry/entry_64.S:120)
[   27.759719] RIP: 0033:0x7f7cc99484e6
[ 27.760458] Code: 69 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 
1f 00 41 89 ca 64 8b 04 25 18 00 00 00 85 c0c

On 9/28/2022 11:01 AM, Paolo Abeni wrote:
> On Tue, 2022-09-27 at 22:53 +0000, Dmytro Shytyi wrote:
>> In the following patches we will analyse the MSG_FASTOPEN flag
>> in the mptcp_sendmsg() and invoke the MPTFO.
>>
>> Signed-of-by: Benjamin Hesmans <benjamin.hesmans@tessares.net>
>> Signed-off-by: Dmytro Shytyi <dmytro@shytyi.net>
>> ---
>>   include/net/mptcp.h  |  9 +++++++++
>>   include/net/tcp.h    |  3 +++
>>   net/ipv4/tcp.c       | 20 ++++++++++++++++----
>>   net/mptcp/protocol.c | 19 +++++++++++--------
>>   4 files changed, 39 insertions(+), 12 deletions(-)
>>
>> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
>> index c25939b2af68..ccf2b42837a1 100644
>> --- a/include/net/mptcp.h
>> +++ b/include/net/mptcp.h
>> @@ -150,6 +150,8 @@ void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
>>   			 struct mptcp_out_options *opts);
>>   
>>   void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info);
>> +int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags);
>> +struct sock *mptcp_subflow_conn_sock(struct sock *sk);
>>   
>>   /* move the skb extension owership, with the assumption that 'to' is
>>    * newly allocated
>> @@ -286,6 +288,13 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
>>   
>>   static inline void mptcp_space(const struct sock *ssk, int *s, int *fs) { }
>>   static inline void mptcp_seq_show(struct seq_file *seq) { }
>> +static inline int mptcp_stream_connect(struct socket *sock,
>> +				       struct sockaddr *uaddr,
>> +				       int addr_len,
>> +				       int flags)
>> +{
>> +
>> +}
>>   
>>   static inline int mptcp_subflow_init_cookie_req(struct request_sock *req,
>>   						const struct sock *sk_listener,
>> diff --git a/include/net/tcp.h b/include/net/tcp.h
>> index 4f71cc15ff8e..e53d26d74dec 100644
>> --- a/include/net/tcp.h
>> +++ b/include/net/tcp.h
>> @@ -1757,6 +1757,9 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
>>   			      struct request_sock *req,
>>   			      struct tcp_fastopen_cookie *foc,
>>   			      const struct dst_entry *dst);
>> +int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
>> +			 int *copied, size_t size,
>> +			 struct ubuf_info *uarg);
>>   void tcp_fastopen_init_key_once(struct net *net);
>>   bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
>>   			     struct tcp_fastopen_cookie *cookie);
>> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
>> index 5237a3f08c94..daa611671d9a 100644
>> --- a/net/ipv4/tcp.c
>> +++ b/net/ipv4/tcp.c
>> @@ -1162,8 +1162,9 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
>>   	}
>>   }
>>   
>> -int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
>> -			 size_t size, struct ubuf_info *uarg)
>> +int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
>> +			 int *copied, size_t size,
>> +			 struct ubuf_info *uarg)
>>   {
>>   	struct tcp_sock *tp = tcp_sk(sk);
>>   	struct inet_sock *inet = inet_sk(sk);
>> @@ -1196,8 +1197,19 @@ int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
>>   		}
>>   	}
>>   	flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
>> -	err = __inet_stream_connect(sk->sk_socket, uaddr,
>> -				    msg->msg_namelen, flags, 1);
>> +	if (!sk_is_mptcp(sk)) {
>> +		err = __inet_stream_connect(sk->sk_socket, uaddr,
>> +					    msg->msg_namelen, flags, 1);
>> +	} else {
>> +		struct sock *parent = mptcp_subflow_conn_sock(sk);
>> +
>> +		release_sock(sk);
>> +		release_sock(parent);
>> +		err = mptcp_stream_connect(sk->sk_socket, uaddr,
>> +					   msg->msg_namelen, msg->msg_flags);
>> +		lock_sock(parent);
>> +		lock_sock(sk);
>> +	}
>>   	/* fastopen_req could already be freed in __inet_stream_connect
>>   	 * if the connection times out or gets rst
>>   	 */
> I'm sorry it looks like I was not clear enough in my previous replies.
>
> I really think we should avoid this chunk. I thought it only served for
> updating the msk socket status, but now I see it is also needed to
> properly allocate the token and update the MIBs, right? Does it serve
> any other roles?
>
> Anyway I still think you can avoid the above chunck, factoring out the
> relevant slice of mptcp_stream_connect() in an helper:
>
> static void __mptcp_pre_connect(struct mptcp_sock *msk, struct sock *ssk)
> {
> 	struct mptcp_subflow_context *subflow;
>
> 	mptcp_token_destroy(msk);
>          subflow = mptcp_subflow_ctx(ssk);
> #ifdef CONFIG_TCP_MD5SIG
>          /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
>           * TCP option space.
>           */
>          if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
>                  mptcp_subflow_early_fallback(msk, subflow);
> #endif
>          if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
>                  MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
>                  mptcp_subflow_early_fallback(msk, subflow);
>          }
>          if (likely(!__mptcp_check_fallback(msk)))
>                  MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVE);
> }
>
> and call the above in mptcp_sendmsg():
>
> 	if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
> 			       msg->msg_flags & MSG_FASTOPEN))) {
> 		struct sock *ssk = ssock->sk;
>                  int copied_syn = 0;
>
>                  lock_sock(ssk);
> 		if (msg->msg_flags & MSG_FASTOPEN && sk->sk_state == TCP_CLOSE)
> 			__mptcp_pre_connect(msk, ssk);
> 		
> Likely this patch should be split in 2 separate ones: the new patch
> will just create the helper and use it in mptcp_stream_connect.	
>
>
> Cheers,
>
> Paolo
>
>
Paolo Abeni Oct. 3, 2022, 8:02 a.m. UTC | #3
Hello,

On Sat, 2022-10-01 at 05:08 +0200, Dmytro Shytyi wrote:
> I'm getting the next stack trace [1] when following this approach, yet 
> it needs uarg to be filled, not NULL.
> 
> After Matthieu suggestion, I tried to implement the .connect in mptcp, 
> but I'm getting errors like "ENOBUFF" or "EINPROGRESS".
> 
> Finally I decided to continue with "mptcp_stream_connect()" function in v13.

The reported error is not clear at all to me. It's better to clarify it
before moving to the next version, or steps could (likelly, will) be in
the wrong direction.

> [1] Code starting with the faulting instruction
> ===========================================
> [   27.736069] RSP: 0018:ffffc90000adfce0 EFLAGS: 00010246
> [   27.737115] RAX: 0000000000000000 RBX: ffff888003894440 RCX: 
> 0000000000000000
> [   27.738560] RDX: 0000000000000010 RSI: ffffc90000adfe80 RDI: 
> ffff888027ce8000
> [   27.739883] RBP: 0000000000000000 R08: 0000000000000001 R09: 
> ffff888015710cf0
> [   27.741286] R10: 0000000000000001 R11: ffff888003ca78c8 R12: 
> 00000000ffffff96
> [   27.742718] R13: ffffc90000adfdb0 R14: ffffc90000adfe80 R15: 
> ffff888027ce8000
> [   27.744135] FS:  00007f7cc983a740(0000) GS:ffff88803da00000(0000) 
> knlGS:0000000000000000
> [   27.745612] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   27.746788] CR2: ffffffffffffffd6 CR3: 000000001d180006 CR4: 
> 0000000000370ef0
> [   27.748125] Call Trace:
> [   27.748655]  <TASK>
> [   27.749086] __inet_stream_connect (net/ipv4/af_inet.c:663)
> [   27.750093] ? sk_reset_timer (net/core/sock.c:3341)
> [   27.750803] ? tcp_connect (net/ipv4/tcp_output.c:3882)
> [   27.751590] ? kmem_cache_alloc_trace (mm/slub.c:3286)
> [   27.752492] tcp_sendmsg_fastopen (net/ipv4/tcp.c:1198)
> [   27.753347] mptcp_sendmsg (net/mptcp/protocol.c:1710)
> [   27.754047] sock_sendmsg_nosec (net/socket.c:714)
> [   27.754831] __sys_sendto (net/socket.c:2117)
> [   27.755539] ? handle_mm_fault (mm/memory.c:5151)
> [   27.756311] ? do_user_addr_fault (arch/x86/mm/fault.c:1426)
> [   27.757175] __x64_sys_sendto (net/socket.c:2129 net/socket.c:2125 
> net/socket.c:2125)
> [   27.758029] do_syscall_64 (arch/x86/entry/common.c:50 
> arch/x86/entry/common.c:80)
> [   27.758739] entry_SYSCALL_64_after_hwframe 
> (arch/x86/entry/entry_64.S:120)
> [   27.759719] RIP: 0033:0x7f7cc99484e6
> [ 27.760458] Code: 69 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 
> 1f 00 41 89 ca 64 8b 04 25 18 00 00 00 85 c0c

This backtrace is incomplete/lacks the oopsing address, so is unclear
what really went wrong. Please report the full backtrace.

Important: please include the code you used for this test. No need to
post the full old patches here, you can e.g. share an URL to your git
tree/branch.

The main point is that this backtrace does not look compatible with the
code suggested previously.


Thanks,

Paolo
Paolo Abeni Oct. 3, 2022, 11:26 a.m. UTC | #4
On Mon, 2022-10-03 at 10:02 +0200, Paolo Abeni wrote:
> Hello,
> 
> On Sat, 2022-10-01 at 05:08 +0200, Dmytro Shytyi wrote:
> > I'm getting the next stack trace [1] when following this approach, yet 
> > it needs uarg to be filled, not NULL.
> > 
> > After Matthieu suggestion, I tried to implement the .connect in mptcp, 
> > but I'm getting errors like "ENOBUFF" or "EINPROGRESS".
> > 
> > Finally I decided to continue with "mptcp_stream_connect()" function in v13.
> 
> The reported error is not clear at all to me. It's better to clarify it
> before moving to the next version, or steps could (likelly, will) be in
> the wrong direction.
> 
> > [1] Code starting with the faulting instruction
> > ===========================================
> > [   27.736069] RSP: 0018:ffffc90000adfce0 EFLAGS: 00010246
> > [   27.737115] RAX: 0000000000000000 RBX: ffff888003894440 RCX: 
> > 0000000000000000
> > [   27.738560] RDX: 0000000000000010 RSI: ffffc90000adfe80 RDI: 
> > ffff888027ce8000
> > [   27.739883] RBP: 0000000000000000 R08: 0000000000000001 R09: 
> > ffff888015710cf0
> > [   27.741286] R10: 0000000000000001 R11: ffff888003ca78c8 R12: 
> > 00000000ffffff96
> > [   27.742718] R13: ffffc90000adfdb0 R14: ffffc90000adfe80 R15: 
> > ffff888027ce8000
> > [   27.744135] FS:  00007f7cc983a740(0000) GS:ffff88803da00000(0000) 
> > knlGS:0000000000000000
> > [   27.745612] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [   27.746788] CR2: ffffffffffffffd6 CR3: 000000001d180006 CR4: 
> > 0000000000370ef0
> > [   27.748125] Call Trace:
> > [   27.748655]  <TASK>
> > [   27.749086] __inet_stream_connect (net/ipv4/af_inet.c:663)
> > [   27.750093] ? sk_reset_timer (net/core/sock.c:3341)
> > [   27.750803] ? tcp_connect (net/ipv4/tcp_output.c:3882)
> > [   27.751590] ? kmem_cache_alloc_trace (mm/slub.c:3286)
> > [   27.752492] tcp_sendmsg_fastopen (net/ipv4/tcp.c:1198)
> > [   27.753347] mptcp_sendmsg (net/mptcp/protocol.c:1710)
> > [   27.754047] sock_sendmsg_nosec (net/socket.c:714)
> > [   27.754831] __sys_sendto (net/socket.c:2117)
> > [   27.755539] ? handle_mm_fault (mm/memory.c:5151)
> > [   27.756311] ? do_user_addr_fault (arch/x86/mm/fault.c:1426)
> > [   27.757175] __x64_sys_sendto (net/socket.c:2129 net/socket.c:2125 
> > net/socket.c:2125)
> > [   27.758029] do_syscall_64 (arch/x86/entry/common.c:50 
> > arch/x86/entry/common.c:80)
> > [   27.758739] entry_SYSCALL_64_after_hwframe 
> > (arch/x86/entry/entry_64.S:120)
> > [   27.759719] RIP: 0033:0x7f7cc99484e6
> > [ 27.760458] Code: 69 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 
> > 1f 00 41 89 ca 64 8b 04 25 18 00 00 00 85 c0c
> 
> This backtrace is incomplete/lacks the oopsing address, so is unclear
> what really went wrong. Please report the full backtrace.
> 
> Important: please include the code you used for this test. No need to
> post the full old patches here, you can e.g. share an URL to your git
> tree/branch.
> 
> The main point is that this backtrace does not look compatible with the
> code suggested previously.

I discussed this with Mat(ttbe) on IRC. It looks like the main problem
is that mptcp don't implement the struct proto/sk->sk_prot->connect,
and the defer_connect is not triggering such path as subflow the socket
is not in SS_UNCONNECTED state on defer connect.

The correct solution would be re-factor the mptcp connect code/hooking
to re-use the inet_stream_connect() infrastructure. I'll try to cook
the relevant patch - not strictily fastclose related - asap, so that
you can rebase this series on top of them. 

Cheers,

Paolo
diff mbox series

Patch

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index c25939b2af68..ccf2b42837a1 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -150,6 +150,8 @@  void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
 			 struct mptcp_out_options *opts);
 
 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info);
+int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags);
+struct sock *mptcp_subflow_conn_sock(struct sock *sk);
 
 /* move the skb extension owership, with the assumption that 'to' is
  * newly allocated
@@ -286,6 +288,13 @@  static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
 
 static inline void mptcp_space(const struct sock *ssk, int *s, int *fs) { }
 static inline void mptcp_seq_show(struct seq_file *seq) { }
+static inline int mptcp_stream_connect(struct socket *sock,
+				       struct sockaddr *uaddr,
+				       int addr_len,
+				       int flags)
+{
+
+}
 
 static inline int mptcp_subflow_init_cookie_req(struct request_sock *req,
 						const struct sock *sk_listener,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4f71cc15ff8e..e53d26d74dec 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1757,6 +1757,9 @@  struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 			      struct request_sock *req,
 			      struct tcp_fastopen_cookie *foc,
 			      const struct dst_entry *dst);
+int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+			 int *copied, size_t size,
+			 struct ubuf_info *uarg);
 void tcp_fastopen_init_key_once(struct net *net);
 bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
 			     struct tcp_fastopen_cookie *cookie);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5237a3f08c94..daa611671d9a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1162,8 +1162,9 @@  void tcp_free_fastopen_req(struct tcp_sock *tp)
 	}
 }
 
-int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
-			 size_t size, struct ubuf_info *uarg)
+int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+			 int *copied, size_t size,
+			 struct ubuf_info *uarg)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
@@ -1196,8 +1197,19 @@  int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
 		}
 	}
 	flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
-	err = __inet_stream_connect(sk->sk_socket, uaddr,
-				    msg->msg_namelen, flags, 1);
+	if (!sk_is_mptcp(sk)) {
+		err = __inet_stream_connect(sk->sk_socket, uaddr,
+					    msg->msg_namelen, flags, 1);
+	} else {
+		struct sock *parent = mptcp_subflow_conn_sock(sk);
+
+		release_sock(sk);
+		release_sock(parent);
+		err = mptcp_stream_connect(sk->sk_socket, uaddr,
+					   msg->msg_namelen, msg->msg_flags);
+		lock_sock(parent);
+		lock_sock(sk);
+	}
 	/* fastopen_req could already be freed in __inet_stream_connect
 	 * if the connection times out or gets rst
 	 */
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 8feb684408f7..0db50712bad7 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -58,6 +58,12 @@  static void __mptcp_check_send_data_fin(struct sock *sk);
 DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
 static struct net_device mptcp_napi_dev;
 
+struct sock *
+mptcp_subflow_conn_sock(struct sock *sk)
+{
+	return ((mptcp_subflow_ctx(sk))->conn);
+}
+
 /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
  * completed yet or has failed, return the subflow socket.
  * Otherwise return NULL.
@@ -1673,17 +1679,14 @@  static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	int ret = 0;
 	long timeo;
 
-	/* we don't support FASTOPEN yet */
-	if (msg->msg_flags & MSG_FASTOPEN)
-		return -EOPNOTSUPP;
-
 	/* silently ignore everything else */
-	msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
+	msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN;
 
 	lock_sock(sk);
 
 	ssock = __mptcp_nmpc_socket(msk);
-	if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) {
+	if (unlikely(ssock && (inet_sk(ssock->sk)->defer_connect ||
+			       msg->msg_flags & MSG_FASTOPEN))) {
 		struct sock *ssk = ssock->sk;
 		int copied_syn = 0;
 
@@ -3568,8 +3571,8 @@  static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
 	__mptcp_do_fallback(msk);
 }
 
-static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
-				int addr_len, int flags)
+int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
+			 int addr_len, int flags)
 {
 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
 	struct mptcp_subflow_context *subflow;