diff mbox series

[net] net: prevent address overwrite in connect() and sendmsg()

Message ID 20230912013332.2048422-1-jrife@google.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net] net: prevent address overwrite in connect() and sendmsg() | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/fixes_present fail Series targets non-next tree, but doesn't contain any Fixes tags
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1350 this patch: 1350
netdev/cc_maintainers warning 2 maintainers not CCed: willemdebruijn.kernel@gmail.com dsahern@kernel.org
netdev/build_clang success Errors and warnings before: 1364 this patch: 1364
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1373 this patch: 1373
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Jordan Rife Sept. 12, 2023, 1:33 a.m. UTC
commit 0bdf399342c5 ("net: Avoid address overwrite in kernel_connect")
ensured that kernel_connect() will not overwrite the address parameter
in cases where BPF connect hooks perform an address rewrite. However,
there remain other cases where BPF hooks can overwrite an address held
by a kernel client.

==Scenarios Tested==

* Code in the SMB and Ceph modules calls sock->ops->connect() directly,
  allowing the address overwrite to occur. In the case of SMB, this can
  lead to broken mounts.
* NFS v3 mounts with proto=udp call sock_sendmsg() for each RPC call,
  passing a pointer to the mount address in msg->msg_name which is
  later overwritten by a BPF sendmsg hook. This can lead to broken NFS
  mounts.

In order to more comprehensively fix this class of problems, this patch
pushes the address copy deeper into the stack and introduces an address
copy to both udp_sendmsg() and udpv6_sendmsg() to insulate all callers
from address rewrites.

Signed-off-by: Jordan Rife <jrife@google.com>
---
 net/ipv4/af_inet.c | 18 ++++++++++++++++++
 net/ipv4/udp.c     | 21 ++++++++++++++++-----
 net/ipv6/udp.c     | 23 +++++++++++++++++------
 net/socket.c       |  7 +------
 4 files changed, 52 insertions(+), 17 deletions(-)

Comments

Willem de Bruijn Sept. 12, 2023, 1:33 p.m. UTC | #1
Jordan Rife wrote:
> commit 0bdf399342c5 ("net: Avoid address overwrite in kernel_connect")
> ensured that kernel_connect() will not overwrite the address parameter
> in cases where BPF connect hooks perform an address rewrite. However,
> there remain other cases where BPF hooks can overwrite an address held
> by a kernel client.
> 
> ==Scenarios Tested==
> 
> * Code in the SMB and Ceph modules calls sock->ops->connect() directly,
>   allowing the address overwrite to occur. In the case of SMB, this can
>   lead to broken mounts.

These should probably call kernel_connect instead.

> * NFS v3 mounts with proto=udp call sock_sendmsg() for each RPC call,
>   passing a pointer to the mount address in msg->msg_name which is
>   later overwritten by a BPF sendmsg hook. This can lead to broken NFS
>   mounts.

Similarly, this could call kernel_sendmsg, and the extra copy handled
in that wrapper. The arguments are not exacty the same, so not 100%
this is feasible.

But it's preferable if in-kernel callers use the kernel_.. API rather
than bypass it. Exactly for issues like the one you report.
 
> In order to more comprehensively fix this class of problems, this patch
> pushes the address copy deeper into the stack and introduces an address
> copy to both udp_sendmsg() and udpv6_sendmsg() to insulate all callers
> from address rewrites.
> 
> Signed-off-by: Jordan Rife <jrife@google.com>
> ---
>  net/ipv4/af_inet.c | 18 ++++++++++++++++++
>  net/ipv4/udp.c     | 21 ++++++++++++++++-----
>  net/ipv6/udp.c     | 23 +++++++++++++++++------
>  net/socket.c       |  7 +------
>  4 files changed, 52 insertions(+), 17 deletions(-)
> 
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index 3d2e30e204735..c37d484fbee34 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -568,6 +568,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
>  {
>  	struct sock *sk = sock->sk;
>  	const struct proto *prot;
> +	struct sockaddr_storage addr;
>  	int err;
>  
>  	if (addr_len < sizeof(uaddr->sa_family))
> @@ -580,6 +581,14 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
>  		return prot->disconnect(sk, flags);
>  
>  	if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
> +		if (uaddr && addr_len <= sizeof(addr)) {
> +			/* pre_connect can rewrite uaddr, so make a copy to
> +			 * insulate the caller.
> +			 */
> +			memcpy(&addr, uaddr, addr_len);
> +			uaddr = (struct sockaddr *)&addr;
> +		}
> +
>  		err = prot->pre_connect(sk, uaddr, addr_len);
>  		if (err)
>  			return err;
> @@ -625,6 +634,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
>  			  int addr_len, int flags, int is_sendmsg)
>  {
>  	struct sock *sk = sock->sk;
> +	struct sockaddr_storage addr;
>  	int err;
>  	long timeo;
>  
> @@ -668,6 +678,14 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
>  			goto out;
>  
>  		if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
> +			if (uaddr && addr_len <= sizeof(addr)) {
> +				/* pre_connect can rewrite uaddr, so make a copy to
> +				 * insulate the caller.
> +				 */
> +				memcpy(&addr, uaddr, addr_len);
> +				uaddr = (struct sockaddr *)&addr;
> +			}
> +
>  			err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
>  			if (err)
>  				goto out;
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index f39b9c8445808..5f5ee2752eeb7 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -1142,18 +1142,29 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>  	}
>  
>  	if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) {
> +		struct sockaddr_in tmp_addr;
> +		struct sockaddr_in *addr = usin;
> +
> +		/* BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK can rewrite usin, so make a
> +		 * copy to insulate the caller.
> +		 */
> +		if (usin && msg->msg_namelen <= sizeof(tmp_addr)) {
> +			memcpy(&tmp_addr, usin, msg->msg_namelen);
> +			addr = &tmp_addr;
> +		}
> +
>  		err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
> -					    (struct sockaddr *)usin, &ipc.addr);
> +					    (struct sockaddr *)addr, &ipc.addr);
>  		if (err)
>  			goto out_free;
> -		if (usin) {
> -			if (usin->sin_port == 0) {
> +		if (addr) {
> +			if (addr->sin_port == 0) {
>  				/* BPF program set invalid port. Reject it. */
>  				err = -EINVAL;
>  				goto out_free;
>  			}
> -			daddr = usin->sin_addr.s_addr;
> -			dport = usin->sin_port;
> +			daddr = addr->sin_addr.s_addr;
> +			dport = addr->sin_port;
>  		}
>  	}
>  
> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> index 86b5d509a4688..cbc1917fad629 100644
> --- a/net/ipv6/udp.c
> +++ b/net/ipv6/udp.c
> @@ -1506,26 +1506,37 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>  	fl6->fl6_sport = inet->inet_sport;
>  
>  	if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
> +		struct sockaddr_in6 tmp_addr;
> +		struct sockaddr_in6 *addr = sin6;
> +
> +		/* BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK can rewrite sin6, so make a
> +		 * copy to insulate the caller.
> +		 */
> +		if (sin6 && addr_len <= sizeof(tmp_addr)) {
> +			memcpy(&tmp_addr, sin6, addr_len);
> +			addr = &tmp_addr;
> +		}
> +
>  		err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
> -					   (struct sockaddr *)sin6,
> +					   (struct sockaddr *)addr,
>  					   &fl6->saddr);
>  		if (err)
>  			goto out_no_dst;
> -		if (sin6) {
> -			if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
> +		if (addr) {
> +			if (ipv6_addr_v4mapped(&addr->sin6_addr)) {
>  				/* BPF program rewrote IPv6-only by IPv4-mapped
>  				 * IPv6. It's currently unsupported.
>  				 */
>  				err = -ENOTSUPP;
>  				goto out_no_dst;
>  			}
> -			if (sin6->sin6_port == 0) {
> +			if (addr->sin6_port == 0) {
>  				/* BPF program set invalid port. Reject it. */
>  				err = -EINVAL;
>  				goto out_no_dst;
>  			}
> -			fl6->fl6_dport = sin6->sin6_port;
> -			fl6->daddr = sin6->sin6_addr;
> +			fl6->fl6_dport = addr->sin6_port;
> +			fl6->daddr = addr->sin6_addr;
>  		}
>  	}
>  
> diff --git a/net/socket.c b/net/socket.c
> index c8b08b32f097e..39794d026fa11 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -3570,12 +3570,7 @@ EXPORT_SYMBOL(kernel_accept);
>  int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
>  		   int flags)
>  {
> -	struct sockaddr_storage address;
> -
> -	memcpy(&address, addr, addrlen);
> -
> -	return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address,
> -					     addrlen, flags);
> +	return READ_ONCE(sock->ops)->connect(sock, addr, addrlen, flags);
>  }
>  EXPORT_SYMBOL(kernel_connect);
>  
> -- 
> 2.42.0.283.g2d96d420d3-goog
>
Daniel Borkmann Sept. 12, 2023, 2:22 p.m. UTC | #2
On 9/12/23 3:33 PM, Willem de Bruijn wrote:
> Jordan Rife wrote:
>> commit 0bdf399342c5 ("net: Avoid address overwrite in kernel_connect")
>> ensured that kernel_connect() will not overwrite the address parameter
>> in cases where BPF connect hooks perform an address rewrite. However,
>> there remain other cases where BPF hooks can overwrite an address held
>> by a kernel client.
>>
>> ==Scenarios Tested==
>>
>> * Code in the SMB and Ceph modules calls sock->ops->connect() directly,
>>    allowing the address overwrite to occur. In the case of SMB, this can
>>    lead to broken mounts.
> 
> These should probably call kernel_connect instead.
> 
>> * NFS v3 mounts with proto=udp call sock_sendmsg() for each RPC call,
>>    passing a pointer to the mount address in msg->msg_name which is
>>    later overwritten by a BPF sendmsg hook. This can lead to broken NFS
>>    mounts.
> 
> Similarly, this could call kernel_sendmsg, and the extra copy handled
> in that wrapper. The arguments are not exacty the same, so not 100%
> this is feasible.
> 
> But it's preferable if in-kernel callers use the kernel_.. API rather
> than bypass it. Exactly for issues like the one you report.

Fully agree, if it's feasible it would be better to convert them over to
in-kernel API.

>> In order to more comprehensively fix this class of problems, this patch
>> pushes the address copy deeper into the stack and introduces an address
>> copy to both udp_sendmsg() and udpv6_sendmsg() to insulate all callers
>> from address rewrites.
>>
>> Signed-off-by: Jordan Rife <jrife@google.com>
Jordan Rife Sept. 12, 2023, 6:31 p.m. UTC | #3
> These should probably call kernel_connect instead.
> Similarly, this could call kernel_sendmsg, and the extra copy handled in that wrapper.

I was considering this approach initially, but one concern I had was
that there are other instances I see of modules calling ops->connect()
directly (bypassing kernel_connect()):

- net/netfilter/ipvs/ip_vs_sync.c: make_send_sock()
- drivers/block/drbd/drbd_receiver.c: drbd_try_connect()
- drivers/infiniband/hw/erdma/erdma_cm.c: kernel_bindconnect()
- drivers/infiniband/sw/siw/siw_cm.c: kernel_bindconnect()
- fs/ocfs2/cluster/tcp.c: o2net_start_connect()
- net/rds/tcp_connect.c: rds_tcp_conn_path_connect()

and ops->sendmsg():

- net/smc/af_smc.c: smc_sendmsg()
- drivers/vhost/net.c: vhost_tx_batch(), handle_tx_copy(), handle_tx_zerocopy()
- drivers/target/iscsi/iscsi_target_util.c: iscsit_fe_sendpage_sg()

which (at least in theory) leaves them open to the same problem I'm
seeing with NFS/SMB right now. I worry that even if all these
instances were swapped out with kernel_sendmsg() and kernel_connect(),
it would turn into a game of whac-a-mole in the future as new changes
or new modules may reintroduce direct calls to sock->ops->connect() or
sock->ops->sendmsg().

-Jordan



On Tue, Sep 12, 2023 at 7:22 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 9/12/23 3:33 PM, Willem de Bruijn wrote:
> > Jordan Rife wrote:
> >> commit 0bdf399342c5 ("net: Avoid address overwrite in kernel_connect")
> >> ensured that kernel_connect() will not overwrite the address parameter
> >> in cases where BPF connect hooks perform an address rewrite. However,
> >> there remain other cases where BPF hooks can overwrite an address held
> >> by a kernel client.
> >>
> >> ==Scenarios Tested==
> >>
> >> * Code in the SMB and Ceph modules calls sock->ops->connect() directly,
> >>    allowing the address overwrite to occur. In the case of SMB, this can
> >>    lead to broken mounts.
> >
> > These should probably call kernel_connect instead.
> >
> >> * NFS v3 mounts with proto=udp call sock_sendmsg() for each RPC call,
> >>    passing a pointer to the mount address in msg->msg_name which is
> >>    later overwritten by a BPF sendmsg hook. This can lead to broken NFS
> >>    mounts.
> >
> > Similarly, this could call kernel_sendmsg, and the extra copy handled
> > in that wrapper. The arguments are not exacty the same, so not 100%
> > this is feasible.
> >
> > But it's preferable if in-kernel callers use the kernel_.. API rather
> > than bypass it. Exactly for issues like the one you report.
>
> Fully agree, if it's feasible it would be better to convert them over to
> in-kernel API.
>
> >> In order to more comprehensively fix this class of problems, this patch
> >> pushes the address copy deeper into the stack and introduces an address
> >> copy to both udp_sendmsg() and udpv6_sendmsg() to insulate all callers
> >> from address rewrites.
> >>
> >> Signed-off-by: Jordan Rife <jrife@google.com>
Willem de Bruijn Sept. 12, 2023, 7:35 p.m. UTC | #4
On Tue, Sep 12, 2023 at 2:31 PM Jordan Rife <jrife@google.com> wrote:
>
> > These should probably call kernel_connect instead.
> > Similarly, this could call kernel_sendmsg, and the extra copy handled in that wrapper.
>
> I was considering this approach initially, but one concern I had was
> that there are other instances I see of modules calling ops->connect()
> directly (bypassing kernel_connect()):
>
> - net/netfilter/ipvs/ip_vs_sync.c: make_send_sock()
> - drivers/block/drbd/drbd_receiver.c: drbd_try_connect()
> - drivers/infiniband/hw/erdma/erdma_cm.c: kernel_bindconnect()
> - drivers/infiniband/sw/siw/siw_cm.c: kernel_bindconnect()
> - fs/ocfs2/cluster/tcp.c: o2net_start_connect()
> - net/rds/tcp_connect.c: rds_tcp_conn_path_connect()
>
> and ops->sendmsg():
>
> - net/smc/af_smc.c: smc_sendmsg()
> - drivers/vhost/net.c: vhost_tx_batch(), handle_tx_copy(), handle_tx_zerocopy()
> - drivers/target/iscsi/iscsi_target_util.c: iscsit_fe_sendpage_sg()
>
> which (at least in theory) leaves them open to the same problem I'm
> seeing with NFS/SMB right now. I worry that even if all these
> instances were swapped out with kernel_sendmsg() and kernel_connect(),
> it would turn into a game of whac-a-mole in the future as new changes
> or new modules may reintroduce direct calls to sock->ops->connect() or
> sock->ops->sendmsg().

Ideally these all would use the proper kernel-internal APIs.

I care less about new code. If all examples are clear, that
will do the right thing, or is a simple fix-up worst case.

Question is if changing all these callers is suitable for a patch
targeting net.

The changes do seem like trivial one liners?

> -Jordan
>
>
>
> On Tue, Sep 12, 2023 at 7:22 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
> >
> > On 9/12/23 3:33 PM, Willem de Bruijn wrote:
> > > Jordan Rife wrote:
> > >> commit 0bdf399342c5 ("net: Avoid address overwrite in kernel_connect")
> > >> ensured that kernel_connect() will not overwrite the address parameter
> > >> in cases where BPF connect hooks perform an address rewrite. However,
> > >> there remain other cases where BPF hooks can overwrite an address held
> > >> by a kernel client.
> > >>
> > >> ==Scenarios Tested==
> > >>
> > >> * Code in the SMB and Ceph modules calls sock->ops->connect() directly,
> > >>    allowing the address overwrite to occur. In the case of SMB, this can
> > >>    lead to broken mounts.
> > >
> > > These should probably call kernel_connect instead.
> > >
> > >> * NFS v3 mounts with proto=udp call sock_sendmsg() for each RPC call,
> > >>    passing a pointer to the mount address in msg->msg_name which is
> > >>    later overwritten by a BPF sendmsg hook. This can lead to broken NFS
> > >>    mounts.
> > >
> > > Similarly, this could call kernel_sendmsg, and the extra copy handled
> > > in that wrapper. The arguments are not exacty the same, so not 100%
> > > this is feasible.
> > >
> > > But it's preferable if in-kernel callers use the kernel_.. API rather
> > > than bypass it. Exactly for issues like the one you report.
> >
> > Fully agree, if it's feasible it would be better to convert them over to
> > in-kernel API.
> >
> > >> In order to more comprehensively fix this class of problems, this patch
> > >> pushes the address copy deeper into the stack and introduces an address
> > >> copy to both udp_sendmsg() and udpv6_sendmsg() to insulate all callers
> > >> from address rewrites.
> > >>
> > >> Signed-off-by: Jordan Rife <jrife@google.com>
Jordan Rife Sept. 12, 2023, 8:07 p.m. UTC | #5
> Ideally these all would use the proper kernel-internal APIs.
> I care less about new code. If all examples are clear, that will do the right thing, or is a simple fix-up worst case.

Fair enough.

> The changes do seem like trivial one liners?

Looks like it. Still, it's going to take some time to patch+test each
of these. I'll start with NFS (SUNRPC), SMB, and Ceph, since I can
easily test them.

> Question is if changing all these callers is suitable for a patch targeting net.
One patch to net will be needed to add an address copy to
kernel_sendmsg(), but I guess I'll need to send some other patches to
the appropriate subsystem maintainers for SUNRPC, SMB, and Ceph to
swap out calls.

> Similarly, this could call kernel_sendmsg, and the extra copy handled in that wrapper.

Would it make more sense to do the address copy in sock_sendmsg()
instead? Are kernel callers "supposed" to use kernel_sendmsg() or is
it valid for them to use sock_sendmsg() as many of them seem to want
to do today. Seeing as sock_sendmsg() is called by kernel_sendmsg(),
adding the copy in sock_sendmsg() fixes this problem for callers to
both of these and avoids the need for patching all modules that call
sock_sendmsg().

-Jordan

On Tue, Sep 12, 2023 at 12:36 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Tue, Sep 12, 2023 at 2:31 PM Jordan Rife <jrife@google.com> wrote:
> >
> > > These should probably call kernel_connect instead.
> > > Similarly, this could call kernel_sendmsg, and the extra copy handled in that wrapper.
> >
> > I was considering this approach initially, but one concern I had was
> > that there are other instances I see of modules calling ops->connect()
> > directly (bypassing kernel_connect()):
> >
> > - net/netfilter/ipvs/ip_vs_sync.c: make_send_sock()
> > - drivers/block/drbd/drbd_receiver.c: drbd_try_connect()
> > - drivers/infiniband/hw/erdma/erdma_cm.c: kernel_bindconnect()
> > - drivers/infiniband/sw/siw/siw_cm.c: kernel_bindconnect()
> > - fs/ocfs2/cluster/tcp.c: o2net_start_connect()
> > - net/rds/tcp_connect.c: rds_tcp_conn_path_connect()
> >
> > and ops->sendmsg():
> >
> > - net/smc/af_smc.c: smc_sendmsg()
> > - drivers/vhost/net.c: vhost_tx_batch(), handle_tx_copy(), handle_tx_zerocopy()
> > - drivers/target/iscsi/iscsi_target_util.c: iscsit_fe_sendpage_sg()
> >
> > which (at least in theory) leaves them open to the same problem I'm
> > seeing with NFS/SMB right now. I worry that even if all these
> > instances were swapped out with kernel_sendmsg() and kernel_connect(),
> > it would turn into a game of whac-a-mole in the future as new changes
> > or new modules may reintroduce direct calls to sock->ops->connect() or
> > sock->ops->sendmsg().
>
> Ideally these all would use the proper kernel-internal APIs.
>
> I care less about new code. If all examples are clear, that
> will do the right thing, or is a simple fix-up worst case.
>
> Question is if changing all these callers is suitable for a patch
> targeting net.
>
> The changes do seem like trivial one liners?
>
> > -Jordan
> >
> >
> >
> > On Tue, Sep 12, 2023 at 7:22 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
> > >
> > > On 9/12/23 3:33 PM, Willem de Bruijn wrote:
> > > > Jordan Rife wrote:
> > > >> commit 0bdf399342c5 ("net: Avoid address overwrite in kernel_connect")
> > > >> ensured that kernel_connect() will not overwrite the address parameter
> > > >> in cases where BPF connect hooks perform an address rewrite. However,
> > > >> there remain other cases where BPF hooks can overwrite an address held
> > > >> by a kernel client.
> > > >>
> > > >> ==Scenarios Tested==
> > > >>
> > > >> * Code in the SMB and Ceph modules calls sock->ops->connect() directly,
> > > >>    allowing the address overwrite to occur. In the case of SMB, this can
> > > >>    lead to broken mounts.
> > > >
> > > > These should probably call kernel_connect instead.
> > > >
> > > >> * NFS v3 mounts with proto=udp call sock_sendmsg() for each RPC call,
> > > >>    passing a pointer to the mount address in msg->msg_name which is
> > > >>    later overwritten by a BPF sendmsg hook. This can lead to broken NFS
> > > >>    mounts.
> > > >
> > > > Similarly, this could call kernel_sendmsg, and the extra copy handled
> > > > in that wrapper. The arguments are not exacty the same, so not 100%
> > > > this is feasible.
> > > >
> > > > But it's preferable if in-kernel callers use the kernel_.. API rather
> > > > than bypass it. Exactly for issues like the one you report.
> > >
> > > Fully agree, if it's feasible it would be better to convert them over to
> > > in-kernel API.
> > >
> > > >> In order to more comprehensively fix this class of problems, this patch
> > > >> pushes the address copy deeper into the stack and introduces an address
> > > >> copy to both udp_sendmsg() and udpv6_sendmsg() to insulate all callers
> > > >> from address rewrites.
> > > >>
> > > >> Signed-off-by: Jordan Rife <jrife@google.com>
Willem de Bruijn Sept. 12, 2023, 8:48 p.m. UTC | #6
On Tue, Sep 12, 2023 at 4:07 PM Jordan Rife <jrife@google.com> wrote:
>
> > Ideally these all would use the proper kernel-internal APIs.
> > I care less about new code. If all examples are clear, that will do the right thing, or is a simple fix-up worst case.
>
> Fair enough.
>
> > The changes do seem like trivial one liners?
>
> Looks like it. Still, it's going to take some time to patch+test each
> of these. I'll start with NFS (SUNRPC), SMB, and Ceph, since I can
> easily test them.
>
> > Question is if changing all these callers is suitable for a patch targeting net.
> One patch to net will be needed to add an address copy to
> kernel_sendmsg(), but I guess I'll need to send some other patches to
> the appropriate subsystem maintainers for SUNRPC, SMB, and Ceph to
> swap out calls.

If we take this path, it could be a single patch. The subsystem
maintainers should be CC:ed so that they can (N)ACK it.

But I do not mean to ask to split it up and test each one separately.

The change from sock->ops->connect to kernel_connect is certainly
trivial enough that compile testing should suffice.

Note that kernel_connect actually uses READ_ONCE(sock->ops) because of
a data race. All callers that call a socket that may be subject to
IPV6_ADDRFORM should do that. Likely some of those open coded examples
are affected, and do not. This is another example why using a single
interface is preferable.

> > Similarly, this could call kernel_sendmsg, and the extra copy handled in that wrapper.
>
> Would it make more sense to do the address copy in sock_sendmsg()
> instead? Are kernel callers "supposed" to use kernel_sendmsg() or is
> it valid for them to use sock_sendmsg() as many of them seem to want
> to do today. Seeing as sock_sendmsg() is called by kernel_sendmsg(),
> adding the copy in sock_sendmsg() fixes this problem for callers to
> both of these and avoids the need for patching all modules that call
> sock_sendmsg().

I think it is "supposed" to be the API for these cases. But as you
show, clearly it isn't today. Practice trumps theory.

The only question is whether we should pursue your original patch and
accept that this will continue, or one that improves the situation,
but touches more files and thus has a higher risk of merge conflicts.

I'd like to give others some time to chime in. I've given my opinion,
but it's only one.



> -Jordan
>
> On Tue, Sep 12, 2023 at 12:36 PM Willem de Bruijn
> <willemdebruijn.kernel@gmail.com> wrote:
> >
> > On Tue, Sep 12, 2023 at 2:31 PM Jordan Rife <jrife@google.com> wrote:
> > >
> > > > These should probably call kernel_connect instead.
> > > > Similarly, this could call kernel_sendmsg, and the extra copy handled in that wrapper.
> > >
> > > I was considering this approach initially, but one concern I had was
> > > that there are other instances I see of modules calling ops->connect()
> > > directly (bypassing kernel_connect()):
> > >
> > > - net/netfilter/ipvs/ip_vs_sync.c: make_send_sock()
> > > - drivers/block/drbd/drbd_receiver.c: drbd_try_connect()
> > > - drivers/infiniband/hw/erdma/erdma_cm.c: kernel_bindconnect()
> > > - drivers/infiniband/sw/siw/siw_cm.c: kernel_bindconnect()
> > > - fs/ocfs2/cluster/tcp.c: o2net_start_connect()
> > > - net/rds/tcp_connect.c: rds_tcp_conn_path_connect()
> > >
> > > and ops->sendmsg():
> > >
> > > - net/smc/af_smc.c: smc_sendmsg()
> > > - drivers/vhost/net.c: vhost_tx_batch(), handle_tx_copy(), handle_tx_zerocopy()
> > > - drivers/target/iscsi/iscsi_target_util.c: iscsit_fe_sendpage_sg()
> > >
> > > which (at least in theory) leaves them open to the same problem I'm
> > > seeing with NFS/SMB right now. I worry that even if all these
> > > instances were swapped out with kernel_sendmsg() and kernel_connect(),
> > > it would turn into a game of whac-a-mole in the future as new changes
> > > or new modules may reintroduce direct calls to sock->ops->connect() or
> > > sock->ops->sendmsg().
> >
> > Ideally these all would use the proper kernel-internal APIs.
> >
> > I care less about new code. If all examples are clear, that
> > will do the right thing, or is a simple fix-up worst case.
> >
> > Question is if changing all these callers is suitable for a patch
> > targeting net.
> >
> > The changes do seem like trivial one liners?
> >
> > > -Jordan
> > >
> > >
> > >
> > > On Tue, Sep 12, 2023 at 7:22 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
> > > >
> > > > On 9/12/23 3:33 PM, Willem de Bruijn wrote:
> > > > > Jordan Rife wrote:
> > > > >> commit 0bdf399342c5 ("net: Avoid address overwrite in kernel_connect")
> > > > >> ensured that kernel_connect() will not overwrite the address parameter
> > > > >> in cases where BPF connect hooks perform an address rewrite. However,
> > > > >> there remain other cases where BPF hooks can overwrite an address held
> > > > >> by a kernel client.
> > > > >>
> > > > >> ==Scenarios Tested==
> > > > >>
> > > > >> * Code in the SMB and Ceph modules calls sock->ops->connect() directly,
> > > > >>    allowing the address overwrite to occur. In the case of SMB, this can
> > > > >>    lead to broken mounts.
> > > > >
> > > > > These should probably call kernel_connect instead.
> > > > >
> > > > >> * NFS v3 mounts with proto=udp call sock_sendmsg() for each RPC call,
> > > > >>    passing a pointer to the mount address in msg->msg_name which is
> > > > >>    later overwritten by a BPF sendmsg hook. This can lead to broken NFS
> > > > >>    mounts.
> > > > >
> > > > > Similarly, this could call kernel_sendmsg, and the extra copy handled
> > > > > in that wrapper. The arguments are not exacty the same, so not 100%
> > > > > this is feasible.
> > > > >
> > > > > But it's preferable if in-kernel callers use the kernel_.. API rather
> > > > > than bypass it. Exactly for issues like the one you report.
> > > >
> > > > Fully agree, if it's feasible it would be better to convert them over to
> > > > in-kernel API.
> > > >
> > > > >> In order to more comprehensively fix this class of problems, this patch
> > > > >> pushes the address copy deeper into the stack and introduces an address
> > > > >> copy to both udp_sendmsg() and udpv6_sendmsg() to insulate all callers
> > > > >> from address rewrites.
> > > > >>
> > > > >> Signed-off-by: Jordan Rife <jrife@google.com>
Jordan Rife Sept. 12, 2023, 9:08 p.m. UTC | #7
> If we take this path, it could be a single patch. The subsystem
> maintainers should be CC:ed so that they can (N)ACK it.
>
> But I do not mean to ask to split it up and test each one separately.
>
> The change from sock->ops->connect to kernel_connect is certainly
> trivial enough that compile testing should suffice.

Ack. Thanks for clarifying.

> The only question is whether we should pursue your original patch and
> accept that this will continue, or one that improves the situation,
> but touches more files and thus has a higher risk of merge conflicts.
>
> I'd like to give others some time to chime in. I've given my opinion,
> but it's only one.
>
> I'd like to give others some time to chime in. I've given my opinion,
> but it's only one.

Sounds good. I'll wait to hear others' opinions on the best path forward.

-Jordan

On Tue, Sep 12, 2023 at 1:49 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Tue, Sep 12, 2023 at 4:07 PM Jordan Rife <jrife@google.com> wrote:
> >
> > > Ideally these all would use the proper kernel-internal APIs.
> > > I care less about new code. If all examples are clear, that will do the right thing, or is a simple fix-up worst case.
> >
> > Fair enough.
> >
> > > The changes do seem like trivial one liners?
> >
> > Looks like it. Still, it's going to take some time to patch+test each
> > of these. I'll start with NFS (SUNRPC), SMB, and Ceph, since I can
> > easily test them.
> >
> > > Question is if changing all these callers is suitable for a patch targeting net.
> > One patch to net will be needed to add an address copy to
> > kernel_sendmsg(), but I guess I'll need to send some other patches to
> > the appropriate subsystem maintainers for SUNRPC, SMB, and Ceph to
> > swap out calls.
>
> If we take this path, it could be a single patch. The subsystem
> maintainers should be CC:ed so that they can (N)ACK it.
>
> But I do not mean to ask to split it up and test each one separately.
>
> The change from sock->ops->connect to kernel_connect is certainly
> trivial enough that compile testing should suffice.
>
> Note that kernel_connect actually uses READ_ONCE(sock->ops) because of
> a data race. All callers that call a socket that may be subject to
> IPV6_ADDRFORM should do that. Likely some of those open coded examples
> are affected, and do not. This is another example why using a single
> interface is preferable.
>
> > > Similarly, this could call kernel_sendmsg, and the extra copy handled in that wrapper.
> >
> > Would it make more sense to do the address copy in sock_sendmsg()
> > instead? Are kernel callers "supposed" to use kernel_sendmsg() or is
> > it valid for them to use sock_sendmsg() as many of them seem to want
> > to do today. Seeing as sock_sendmsg() is called by kernel_sendmsg(),
> > adding the copy in sock_sendmsg() fixes this problem for callers to
> > both of these and avoids the need for patching all modules that call
> > sock_sendmsg().
>
> I think it is "supposed" to be the API for these cases. But as you
> show, clearly it isn't today. Practice trumps theory.
>
> The only question is whether we should pursue your original patch and
> accept that this will continue, or one that improves the situation,
> but touches more files and thus has a higher risk of merge conflicts.
>
> I'd like to give others some time to chime in. I've given my opinion,
> but it's only one.
>
>
>
> > -Jordan
> >
> > On Tue, Sep 12, 2023 at 12:36 PM Willem de Bruijn
> > <willemdebruijn.kernel@gmail.com> wrote:
> > >
> > > On Tue, Sep 12, 2023 at 2:31 PM Jordan Rife <jrife@google.com> wrote:
> > > >
> > > > > These should probably call kernel_connect instead.
> > > > > Similarly, this could call kernel_sendmsg, and the extra copy handled in that wrapper.
> > > >
> > > > I was considering this approach initially, but one concern I had was
> > > > that there are other instances I see of modules calling ops->connect()
> > > > directly (bypassing kernel_connect()):
> > > >
> > > > - net/netfilter/ipvs/ip_vs_sync.c: make_send_sock()
> > > > - drivers/block/drbd/drbd_receiver.c: drbd_try_connect()
> > > > - drivers/infiniband/hw/erdma/erdma_cm.c: kernel_bindconnect()
> > > > - drivers/infiniband/sw/siw/siw_cm.c: kernel_bindconnect()
> > > > - fs/ocfs2/cluster/tcp.c: o2net_start_connect()
> > > > - net/rds/tcp_connect.c: rds_tcp_conn_path_connect()
> > > >
> > > > and ops->sendmsg():
> > > >
> > > > - net/smc/af_smc.c: smc_sendmsg()
> > > > - drivers/vhost/net.c: vhost_tx_batch(), handle_tx_copy(), handle_tx_zerocopy()
> > > > - drivers/target/iscsi/iscsi_target_util.c: iscsit_fe_sendpage_sg()
> > > >
> > > > which (at least in theory) leaves them open to the same problem I'm
> > > > seeing with NFS/SMB right now. I worry that even if all these
> > > > instances were swapped out with kernel_sendmsg() and kernel_connect(),
> > > > it would turn into a game of whac-a-mole in the future as new changes
> > > > or new modules may reintroduce direct calls to sock->ops->connect() or
> > > > sock->ops->sendmsg().
> > >
> > > Ideally these all would use the proper kernel-internal APIs.
> > >
> > > I care less about new code. If all examples are clear, that
> > > will do the right thing, or is a simple fix-up worst case.
> > >
> > > Question is if changing all these callers is suitable for a patch
> > > targeting net.
> > >
> > > The changes do seem like trivial one liners?
> > >
> > > > -Jordan
> > > >
> > > >
> > > >
> > > > On Tue, Sep 12, 2023 at 7:22 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
> > > > >
> > > > > On 9/12/23 3:33 PM, Willem de Bruijn wrote:
> > > > > > Jordan Rife wrote:
> > > > > >> commit 0bdf399342c5 ("net: Avoid address overwrite in kernel_connect")
> > > > > >> ensured that kernel_connect() will not overwrite the address parameter
> > > > > >> in cases where BPF connect hooks perform an address rewrite. However,
> > > > > >> there remain other cases where BPF hooks can overwrite an address held
> > > > > >> by a kernel client.
> > > > > >>
> > > > > >> ==Scenarios Tested==
> > > > > >>
> > > > > >> * Code in the SMB and Ceph modules calls sock->ops->connect() directly,
> > > > > >>    allowing the address overwrite to occur. In the case of SMB, this can
> > > > > >>    lead to broken mounts.
> > > > > >
> > > > > > These should probably call kernel_connect instead.
> > > > > >
> > > > > >> * NFS v3 mounts with proto=udp call sock_sendmsg() for each RPC call,
> > > > > >>    passing a pointer to the mount address in msg->msg_name which is
> > > > > >>    later overwritten by a BPF sendmsg hook. This can lead to broken NFS
> > > > > >>    mounts.
> > > > > >
> > > > > > Similarly, this could call kernel_sendmsg, and the extra copy handled
> > > > > > in that wrapper. The arguments are not exacty the same, so not 100%
> > > > > > this is feasible.
> > > > > >
> > > > > > But it's preferable if in-kernel callers use the kernel_.. API rather
> > > > > > than bypass it. Exactly for issues like the one you report.
> > > > >
> > > > > Fully agree, if it's feasible it would be better to convert them over to
> > > > > in-kernel API.
> > > > >
> > > > > >> In order to more comprehensively fix this class of problems, this patch
> > > > > >> pushes the address copy deeper into the stack and introduces an address
> > > > > >> copy to both udp_sendmsg() and udpv6_sendmsg() to insulate all callers
> > > > > >> from address rewrites.
> > > > > >>
> > > > > >> Signed-off-by: Jordan Rife <jrife@google.com>
Dan Carpenter Sept. 13, 2023, 7:19 a.m. UTC | #8
Hi Jordan,

kernel test robot noticed the following build warnings:

url:    https://github.com/intel-lab-lkp/linux/commits/Jordan-Rife/net-prevent-address-overwrite-in-connect-and-sendmsg/20230912-093550
base:   net/main
patch link:    https://lore.kernel.org/r/20230912013332.2048422-1-jrife%40google.com
patch subject: [PATCH net] net: prevent address overwrite in connect() and sendmsg()
config: x86_64-randconfig-161-20230912 (https://download.01.org/0day-ci/archive/20230913/202309131155.MonA0VTS-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce: (https://download.01.org/0day-ci/archive/20230913/202309131155.MonA0VTS-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202309131155.MonA0VTS-lkp@intel.com/

smatch warnings:
net/ipv4/af_inet.c:584 inet_dgram_connect() warn: variable dereferenced before check 'uaddr' (see line 580)

vim +/uaddr +584 net/ipv4/af_inet.c

^1da177e4c3f41 Linus Torvalds    2005-04-16  566  int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
^1da177e4c3f41 Linus Torvalds    2005-04-16  567  		       int addr_len, int flags)
^1da177e4c3f41 Linus Torvalds    2005-04-16  568  {
^1da177e4c3f41 Linus Torvalds    2005-04-16  569  	struct sock *sk = sock->sk;
364f997b5cfe1d Kuniyuki Iwashima 2022-10-06  570  	const struct proto *prot;
6113a07e1ad512 Jordan Rife       2023-09-11  571  	struct sockaddr_storage addr;
d74bad4e74ee37 Andrey Ignatov    2018-03-30  572  	int err;
^1da177e4c3f41 Linus Torvalds    2005-04-16  573  
6503d96168f891 Changli Gao       2010-03-31  574  	if (addr_len < sizeof(uaddr->sa_family))
6503d96168f891 Changli Gao       2010-03-31  575  		return -EINVAL;
364f997b5cfe1d Kuniyuki Iwashima 2022-10-06  576  
364f997b5cfe1d Kuniyuki Iwashima 2022-10-06  577  	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
364f997b5cfe1d Kuniyuki Iwashima 2022-10-06  578  	prot = READ_ONCE(sk->sk_prot);
364f997b5cfe1d Kuniyuki Iwashima 2022-10-06  579  
^1da177e4c3f41 Linus Torvalds    2005-04-16 @580  	if (uaddr->sa_family == AF_UNSPEC)
                                                            ^^^^^^^^^^^^^^^^
"uaddr" better not be NULL

364f997b5cfe1d Kuniyuki Iwashima 2022-10-06  581  		return prot->disconnect(sk, flags);
^1da177e4c3f41 Linus Torvalds    2005-04-16  582  
d74bad4e74ee37 Andrey Ignatov    2018-03-30  583  	if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
6113a07e1ad512 Jordan Rife       2023-09-11 @584  		if (uaddr && addr_len <= sizeof(addr)) {
                                                                    ^^^^^
Remove this check?

6113a07e1ad512 Jordan Rife       2023-09-11  585  			/* pre_connect can rewrite uaddr, so make a copy to
6113a07e1ad512 Jordan Rife       2023-09-11  586  			 * insulate the caller.
6113a07e1ad512 Jordan Rife       2023-09-11  587  			 */
6113a07e1ad512 Jordan Rife       2023-09-11  588  			memcpy(&addr, uaddr, addr_len);
6113a07e1ad512 Jordan Rife       2023-09-11  589  			uaddr = (struct sockaddr *)&addr;
6113a07e1ad512 Jordan Rife       2023-09-11  590  		}
6113a07e1ad512 Jordan Rife       2023-09-11  591  
364f997b5cfe1d Kuniyuki Iwashima 2022-10-06  592  		err = prot->pre_connect(sk, uaddr, addr_len);
d74bad4e74ee37 Andrey Ignatov    2018-03-30  593  		if (err)
d74bad4e74ee37 Andrey Ignatov    2018-03-30  594  			return err;
d74bad4e74ee37 Andrey Ignatov    2018-03-30  595  	}
d74bad4e74ee37 Andrey Ignatov    2018-03-30  596  
dcd01eeac14486 Eric Dumazet      2021-06-09  597  	if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk))
^1da177e4c3f41 Linus Torvalds    2005-04-16  598  		return -EAGAIN;
364f997b5cfe1d Kuniyuki Iwashima 2022-10-06  599  	return prot->connect(sk, uaddr, addr_len);
^1da177e4c3f41 Linus Torvalds    2005-04-16  600  }
Willem de Bruijn Sept. 13, 2023, 2:02 p.m. UTC | #9
On Tue, Sep 12, 2023 at 5:09 PM Jordan Rife <jrife@google.com> wrote:
>
> > If we take this path, it could be a single patch. The subsystem
> > maintainers should be CC:ed so that they can (N)ACK it.
> >
> > But I do not mean to ask to split it up and test each one separately.
> >
> > The change from sock->ops->connect to kernel_connect is certainly
> > trivial enough that compile testing should suffice.
>
> Ack. Thanks for clarifying.
>
> > The only question is whether we should pursue your original patch and
> > accept that this will continue, or one that improves the situation,
> > but touches more files and thus has a higher risk of merge conflicts.
> >
> > I'd like to give others some time to chime in. I've given my opinion,
> > but it's only one.
> >
> > I'd like to give others some time to chime in. I've given my opinion,
> > but it's only one.
>
> Sounds good. I'll wait to hear others' opinions on the best path forward.

No other comments so far.

My hunch is that a short list of these changes

```
@@ -1328,7 +1328,7 @@ static int kernel_bindconnect(struct socket *s,
struct sockaddr *laddr,
        if (rv < 0)
                return rv;

-       rv = s->ops->connect(s, raddr, size, flags);
+       rv = kernel_connect(s, raddr, size, flags);
```

is no more invasive than your proposed patch, and gives a more robust outcome.

Please take a stab.

If it proves at all non-trivial, e.g., in the conversion of arguments
between kernel_sendmsg and sock_sendmsg/sock->ops->sendmsg, then let's
submit your original patch. And I will do the conversion in net-next
instead.
Jordan Rife Sept. 13, 2023, 6:04 p.m. UTC | #10
> Please take a stab.

To clarify, the plan is to:

1) Swap out calls to sock->ops->connect() with kernel_connect()
2) Move the address copy to kernel_sendmsg()
3) Swap out calls to sock_sendmsg()/sock->ops->sendmsg() with kernel_sendmsg()

> If it proves at all non-trivial, e.g., in the conversion of arguments
> between kernel_sendmsg and sock_sendmsg/sock->ops->sendmsg, then let's
> submit your original patch. And I will do the conversion in net-next
> instead.

I'll give it a try and see how trivial the changes are.

-Jordan

On Wed, Sep 13, 2023 at 7:03 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Tue, Sep 12, 2023 at 5:09 PM Jordan Rife <jrife@google.com> wrote:
> >
> > > If we take this path, it could be a single patch. The subsystem
> > > maintainers should be CC:ed so that they can (N)ACK it.
> > >
> > > But I do not mean to ask to split it up and test each one separately.
> > >
> > > The change from sock->ops->connect to kernel_connect is certainly
> > > trivial enough that compile testing should suffice.
> >
> > Ack. Thanks for clarifying.
> >
> > > The only question is whether we should pursue your original patch and
> > > accept that this will continue, or one that improves the situation,
> > > but touches more files and thus has a higher risk of merge conflicts.
> > >
> > > I'd like to give others some time to chime in. I've given my opinion,
> > > but it's only one.
> > >
> > > I'd like to give others some time to chime in. I've given my opinion,
> > > but it's only one.
> >
> > Sounds good. I'll wait to hear others' opinions on the best path forward.
>
> No other comments so far.
>
> My hunch is that a short list of these changes
>
> ```
> @@ -1328,7 +1328,7 @@ static int kernel_bindconnect(struct socket *s,
> struct sockaddr *laddr,
>         if (rv < 0)
>                 return rv;
>
> -       rv = s->ops->connect(s, raddr, size, flags);
> +       rv = kernel_connect(s, raddr, size, flags);
> ```
>
> is no more invasive than your proposed patch, and gives a more robust outcome.
>
> Please take a stab.
>
> If it proves at all non-trivial, e.g., in the conversion of arguments
> between kernel_sendmsg and sock_sendmsg/sock->ops->sendmsg, then let's
> submit your original patch. And I will do the conversion in net-next
> instead.
Willem de Bruijn Sept. 13, 2023, 7:10 p.m. UTC | #11
On Wed, Sep 13, 2023 at 2:04 PM Jordan Rife <jrife@google.com> wrote:
>
> > Please take a stab.
>
> To clarify, the plan is to:
>
> 1) Swap out calls to sock->ops->connect() with kernel_connect()
> 2) Move the address copy to kernel_sendmsg()
> 3) Swap out calls to sock_sendmsg()/sock->ops->sendmsg() with kernel_sendmsg()

Exactly.

> > If it proves at all non-trivial, e.g., in the conversion of arguments
> > between kernel_sendmsg and sock_sendmsg/sock->ops->sendmsg, then let's
> > submit your original patch. And I will do the conversion in net-next
> > instead.
>
> I'll give it a try and see how trivial the changes are.

Thanks!

> -Jordan
>
> On Wed, Sep 13, 2023 at 7:03 AM Willem de Bruijn
> <willemdebruijn.kernel@gmail.com> wrote:
> >
> > On Tue, Sep 12, 2023 at 5:09 PM Jordan Rife <jrife@google.com> wrote:
> > >
> > > > If we take this path, it could be a single patch. The subsystem
> > > > maintainers should be CC:ed so that they can (N)ACK it.
> > > >
> > > > But I do not mean to ask to split it up and test each one separately.
> > > >
> > > > The change from sock->ops->connect to kernel_connect is certainly
> > > > trivial enough that compile testing should suffice.
> > >
> > > Ack. Thanks for clarifying.
> > >
> > > > The only question is whether we should pursue your original patch and
> > > > accept that this will continue, or one that improves the situation,
> > > > but touches more files and thus has a higher risk of merge conflicts.
> > > >
> > > > I'd like to give others some time to chime in. I've given my opinion,
> > > > but it's only one.
> > > >
> > > > I'd like to give others some time to chime in. I've given my opinion,
> > > > but it's only one.
> > >
> > > Sounds good. I'll wait to hear others' opinions on the best path forward.
> >
> > No other comments so far.
> >
> > My hunch is that a short list of these changes
> >
> > ```
> > @@ -1328,7 +1328,7 @@ static int kernel_bindconnect(struct socket *s,
> > struct sockaddr *laddr,
> >         if (rv < 0)
> >                 return rv;
> >
> > -       rv = s->ops->connect(s, raddr, size, flags);
> > +       rv = kernel_connect(s, raddr, size, flags);
> > ```
> >
> > is no more invasive than your proposed patch, and gives a more robust outcome.
> >
> > Please take a stab.
> >
> > If it proves at all non-trivial, e.g., in the conversion of arguments
> > between kernel_sendmsg and sock_sendmsg/sock->ops->sendmsg, then let's
> > submit your original patch. And I will do the conversion in net-next
> > instead.
Paolo Abeni Sept. 14, 2023, 8:24 a.m. UTC | #12
On Wed, 2023-09-13 at 10:02 -0400, Willem de Bruijn wrote:
> On Tue, Sep 12, 2023 at 5:09 PM Jordan Rife <jrife@google.com> wrote:
> > 
> > > If we take this path, it could be a single patch. The subsystem
> > > maintainers should be CC:ed so that they can (N)ACK it.
> > > 
> > > But I do not mean to ask to split it up and test each one separately.
> > > 
> > > The change from sock->ops->connect to kernel_connect is certainly
> > > trivial enough that compile testing should suffice.
> > 
> > Ack. Thanks for clarifying.
> > 
> > > The only question is whether we should pursue your original patch and
> > > accept that this will continue, or one that improves the situation,
> > > but touches more files and thus has a higher risk of merge conflicts.
> > > 
> > > I'd like to give others some time to chime in. I've given my opinion,
> > > but it's only one.
> > > 
> > > I'd like to give others some time to chime in. I've given my opinion,
> > > but it's only one.
> > 
> > Sounds good. I'll wait to hear others' opinions on the best path forward.
> 
> No other comments so far.
> 
> My hunch is that a short list of these changes
> 
> ```
> @@ -1328,7 +1328,7 @@ static int kernel_bindconnect(struct socket *s,
> struct sockaddr *laddr,
>         if (rv < 0)
>                 return rv;
> 
> -       rv = s->ops->connect(s, raddr, size, flags);
> +       rv = kernel_connect(s, raddr, size, flags);
> ```
> 
> is no more invasive than your proposed patch, and gives a more robust outcome.
> 
> Please take a stab.

I'm sorry for the late feedback. For the records, I agree the cleanest
fix described above should be attempted first.

Thanks,

Paolo
Jordan Rife Sept. 14, 2023, 6:41 p.m. UTC | #13
> 1) Swap out calls to sock->ops->connect() with kernel_connect()

This is trivial, as expected. I have a patch ready that swaps out all
occurrences of sock->ops->connect().

> 2) Move the address copy to kernel_sendmsg()
> 3) Swap out calls to sock_sendmsg()/sock->ops->sendmsg() with kernel_sendmsg()

This turns out to be less trivial. kernel_sensmsg() looks to be a
special case of sock_sendmsg() with sock_sendmsg() being the more
generic of the two:

int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
  struct kvec *vec, size_t num, size_t size)
{
  iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
  return sock_sendmsg(sock, msg);
}

It populates msg->msg_iter with a kvec whereas most cases I could find
where sock_sendmsg() is used are using a bio_vec. Some examples:

==drivers/iscsi/iscsi_tcp.c: iscsi_sw_tcp_xmit_segment()==
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, copy);

r = sock_sendmsg(sk, &msg);

==fs/ocfs2/cluster: o2net_sendpage()==
bvec_set_virt(&bv, virt, size);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, size);

while (1) {
msg.msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES;
mutex_lock(&sc->sc_send_lock);
ret = sock_sendmsg(sc->sc_sock, &msg);

==net/sunrpc/svcsock.c: svc_udp_sendto()==
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
count, 0);
err = sock_sendmsg(svsk->sk_sock, &msg);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
count, 0);
err = sock_sendmsg(svsk->sk_sock, &msg);
}

Maybe these two types are more interchangeable than I'm thinking, but
it seems like it might be simpler to just do the address copy inside
sock_sendmsg(). Does this revised plan sound reasonable:

1) Swap out calls to sock->ops->connect() with kernel_connect()
2) Move the address copy to sock_sendmsg()

I also noticed that BPF hooks inside bind() can rewrite the bind
address. Should we do something similar for kernel_bind:

1) Add an address copy to kernel_bind()
2) Swap out direct calls to ops->bind() with kernel_bind()

-Jordan

On Thu, Sep 14, 2023 at 1:24 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On Wed, 2023-09-13 at 10:02 -0400, Willem de Bruijn wrote:
> > On Tue, Sep 12, 2023 at 5:09 PM Jordan Rife <jrife@google.com> wrote:
> > >
> > > > If we take this path, it could be a single patch. The subsystem
> > > > maintainers should be CC:ed so that they can (N)ACK it.
> > > >
> > > > But I do not mean to ask to split it up and test each one separately.
> > > >
> > > > The change from sock->ops->connect to kernel_connect is certainly
> > > > trivial enough that compile testing should suffice.
> > >
> > > Ack. Thanks for clarifying.
> > >
> > > > The only question is whether we should pursue your original patch and
> > > > accept that this will continue, or one that improves the situation,
> > > > but touches more files and thus has a higher risk of merge conflicts.
> > > >
> > > > I'd like to give others some time to chime in. I've given my opinion,
> > > > but it's only one.
> > > >
> > > > I'd like to give others some time to chime in. I've given my opinion,
> > > > but it's only one.
> > >
> > > Sounds good. I'll wait to hear others' opinions on the best path forward.
> >
> > No other comments so far.
> >
> > My hunch is that a short list of these changes
> >
> > ```
> > @@ -1328,7 +1328,7 @@ static int kernel_bindconnect(struct socket *s,
> > struct sockaddr *laddr,
> >         if (rv < 0)
> >                 return rv;
> >
> > -       rv = s->ops->connect(s, raddr, size, flags);
> > +       rv = kernel_connect(s, raddr, size, flags);
> > ```
> >
> > is no more invasive than your proposed patch, and gives a more robust outcome.
> >
> > Please take a stab.
>
> I'm sorry for the late feedback. For the records, I agree the cleanest
> fix described above should be attempted first.
>
> Thanks,
>
> Paolo
>
Willem de Bruijn Sept. 15, 2023, 12:16 a.m. UTC | #14
On Thu, Sep 14, 2023 at 2:41 PM Jordan Rife <jrife@google.com> wrote:
>
> > 1) Swap out calls to sock->ops->connect() with kernel_connect()
>
> This is trivial, as expected. I have a patch ready that swaps out all
> occurrences of sock->ops->connect().
>
> > 2) Move the address copy to kernel_sendmsg()
> > 3) Swap out calls to sock_sendmsg()/sock->ops->sendmsg() with kernel_sendmsg()
>
> This turns out to be less trivial. kernel_sensmsg() looks to be a
> special case of sock_sendmsg() with sock_sendmsg() being the more
> generic of the two:
>
> int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
>   struct kvec *vec, size_t num, size_t size)
> {
>   iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
>   return sock_sendmsg(sock, msg);
> }
>
> It populates msg->msg_iter with a kvec whereas most cases I could find
> where sock_sendmsg() is used are using a bio_vec. Some examples:
>
> ==drivers/iscsi/iscsi_tcp.c: iscsi_sw_tcp_xmit_segment()==
> iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, copy);
>
> r = sock_sendmsg(sk, &msg);
>
> ==fs/ocfs2/cluster: o2net_sendpage()==
> bvec_set_virt(&bv, virt, size);
> iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, size);
>
> while (1) {
> msg.msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES;
> mutex_lock(&sc->sc_send_lock);
> ret = sock_sendmsg(sc->sc_sock, &msg);
>
> ==net/sunrpc/svcsock.c: svc_udp_sendto()==
> iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
> count, 0);
> err = sock_sendmsg(svsk->sk_sock, &msg);
> if (err == -ECONNREFUSED) {
> /* ICMP error on earlier request. */
> iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
> count, 0);
> err = sock_sendmsg(svsk->sk_sock, &msg);
> }
>
> Maybe these two types are more interchangeable than I'm thinking, but
> it seems like it might be simpler to just do the address copy inside
> sock_sendmsg(). Does this revised plan sound reasonable:
>
> 1) Swap out calls to sock->ops->connect() with kernel_connect()
> 2) Move the address copy to sock_sendmsg()

That also covers normal user code that is already protected.

How about a __kernel_sendmsg that follows the sock_sendmsg API and is
a thin wrapper exactly for in-kernel callers.

> I also noticed that BPF hooks inside bind() can rewrite the bind
> address. Should we do something similar for kernel_bind:
>
> 1) Add an address copy to kernel_bind()
> 2) Swap out direct calls to ops->bind() with kernel_bind()

Good catch. Sounds like it.

At that point it might be worth splitting into three patches.

> -Jordan
>
> On Thu, Sep 14, 2023 at 1:24 AM Paolo Abeni <pabeni@redhat.com> wrote:
> >
> > On Wed, 2023-09-13 at 10:02 -0400, Willem de Bruijn wrote:
> > > On Tue, Sep 12, 2023 at 5:09 PM Jordan Rife <jrife@google.com> wrote:
> > > >
> > > > > If we take this path, it could be a single patch. The subsystem
> > > > > maintainers should be CC:ed so that they can (N)ACK it.
> > > > >
> > > > > But I do not mean to ask to split it up and test each one separately.
> > > > >
> > > > > The change from sock->ops->connect to kernel_connect is certainly
> > > > > trivial enough that compile testing should suffice.
> > > >
> > > > Ack. Thanks for clarifying.
> > > >
> > > > > The only question is whether we should pursue your original patch and
> > > > > accept that this will continue, or one that improves the situation,
> > > > > but touches more files and thus has a higher risk of merge conflicts.
> > > > >
> > > > > I'd like to give others some time to chime in. I've given my opinion,
> > > > > but it's only one.
> > > > >
> > > > > I'd like to give others some time to chime in. I've given my opinion,
> > > > > but it's only one.
> > > >
> > > > Sounds good. I'll wait to hear others' opinions on the best path forward.
> > >
> > > No other comments so far.
> > >
> > > My hunch is that a short list of these changes
> > >
> > > ```
> > > @@ -1328,7 +1328,7 @@ static int kernel_bindconnect(struct socket *s,
> > > struct sockaddr *laddr,
> > >         if (rv < 0)
> > >                 return rv;
> > >
> > > -       rv = s->ops->connect(s, raddr, size, flags);
> > > +       rv = kernel_connect(s, raddr, size, flags);
> > > ```
> > >
> > > is no more invasive than your proposed patch, and gives a more robust outcome.
> > >
> > > Please take a stab.
> >
> > I'm sorry for the late feedback. For the records, I agree the cleanest
> > fix described above should be attempted first.
> >
> > Thanks,
> >
> > Paolo
> >
Jordan Rife Sept. 15, 2023, 12:21 a.m. UTC | #15
> How about a __kernel_sendmsg that follows the sock_sendmsg API and is
> a thin wrapper exactly for in-kernel callers.

Makes sense. That works for me.

> Good catch. Sounds like it.
> At that point it might be worth splitting into three patches.

Sounds good. I can do

Patch 1: kernel_connect() changes (ready to go)
Patch 2: sock_sendmsg() changes
Patch 3: kernel_bind() changes

-Jordan

On Thu, Sep 14, 2023 at 5:17 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> On Thu, Sep 14, 2023 at 2:41 PM Jordan Rife <jrife@google.com> wrote:
> >
> > > 1) Swap out calls to sock->ops->connect() with kernel_connect()
> >
> > This is trivial, as expected. I have a patch ready that swaps out all
> > occurrences of sock->ops->connect().
> >
> > > 2) Move the address copy to kernel_sendmsg()
> > > 3) Swap out calls to sock_sendmsg()/sock->ops->sendmsg() with kernel_sendmsg()
> >
> > This turns out to be less trivial. kernel_sensmsg() looks to be a
> > special case of sock_sendmsg() with sock_sendmsg() being the more
> > generic of the two:
> >
> > int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
> >   struct kvec *vec, size_t num, size_t size)
> > {
> >   iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
> >   return sock_sendmsg(sock, msg);
> > }
> >
> > It populates msg->msg_iter with a kvec whereas most cases I could find
> > where sock_sendmsg() is used are using a bio_vec. Some examples:
> >
> > ==drivers/iscsi/iscsi_tcp.c: iscsi_sw_tcp_xmit_segment()==
> > iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, copy);
> >
> > r = sock_sendmsg(sk, &msg);
> >
> > ==fs/ocfs2/cluster: o2net_sendpage()==
> > bvec_set_virt(&bv, virt, size);
> > iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, size);
> >
> > while (1) {
> > msg.msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES;
> > mutex_lock(&sc->sc_send_lock);
> > ret = sock_sendmsg(sc->sc_sock, &msg);
> >
> > ==net/sunrpc/svcsock.c: svc_udp_sendto()==
> > iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
> > count, 0);
> > err = sock_sendmsg(svsk->sk_sock, &msg);
> > if (err == -ECONNREFUSED) {
> > /* ICMP error on earlier request. */
> > iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
> > count, 0);
> > err = sock_sendmsg(svsk->sk_sock, &msg);
> > }
> >
> > Maybe these two types are more interchangeable than I'm thinking, but
> > it seems like it might be simpler to just do the address copy inside
> > sock_sendmsg(). Does this revised plan sound reasonable:
> >
> > 1) Swap out calls to sock->ops->connect() with kernel_connect()
> > 2) Move the address copy to sock_sendmsg()
>
> That also covers normal user code that is already protected.
>
> How about a __kernel_sendmsg that follows the sock_sendmsg API and is
> a thin wrapper exactly for in-kernel callers.
>
> > I also noticed that BPF hooks inside bind() can rewrite the bind
> > address. Should we do something similar for kernel_bind:
> >
> > 1) Add an address copy to kernel_bind()
> > 2) Swap out direct calls to ops->bind() with kernel_bind()
>
> Good catch. Sounds like it.
>
> At that point it might be worth splitting into three patches.
>
> > -Jordan
> >
> > On Thu, Sep 14, 2023 at 1:24 AM Paolo Abeni <pabeni@redhat.com> wrote:
> > >
> > > On Wed, 2023-09-13 at 10:02 -0400, Willem de Bruijn wrote:
> > > > On Tue, Sep 12, 2023 at 5:09 PM Jordan Rife <jrife@google.com> wrote:
> > > > >
> > > > > > If we take this path, it could be a single patch. The subsystem
> > > > > > maintainers should be CC:ed so that they can (N)ACK it.
> > > > > >
> > > > > > But I do not mean to ask to split it up and test each one separately.
> > > > > >
> > > > > > The change from sock->ops->connect to kernel_connect is certainly
> > > > > > trivial enough that compile testing should suffice.
> > > > >
> > > > > Ack. Thanks for clarifying.
> > > > >
> > > > > > The only question is whether we should pursue your original patch and
> > > > > > accept that this will continue, or one that improves the situation,
> > > > > > but touches more files and thus has a higher risk of merge conflicts.
> > > > > >
> > > > > > I'd like to give others some time to chime in. I've given my opinion,
> > > > > > but it's only one.
> > > > > >
> > > > > > I'd like to give others some time to chime in. I've given my opinion,
> > > > > > but it's only one.
> > > > >
> > > > > Sounds good. I'll wait to hear others' opinions on the best path forward.
> > > >
> > > > No other comments so far.
> > > >
> > > > My hunch is that a short list of these changes
> > > >
> > > > ```
> > > > @@ -1328,7 +1328,7 @@ static int kernel_bindconnect(struct socket *s,
> > > > struct sockaddr *laddr,
> > > >         if (rv < 0)
> > > >                 return rv;
> > > >
> > > > -       rv = s->ops->connect(s, raddr, size, flags);
> > > > +       rv = kernel_connect(s, raddr, size, flags);
> > > > ```
> > > >
> > > > is no more invasive than your proposed patch, and gives a more robust outcome.
> > > >
> > > > Please take a stab.
> > >
> > > I'm sorry for the late feedback. For the records, I agree the cleanest
> > > fix described above should be attempted first.
> > >
> > > Thanks,
> > >
> > > Paolo
> > >
diff mbox series

Patch

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3d2e30e204735..c37d484fbee34 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -568,6 +568,7 @@  int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
 {
 	struct sock *sk = sock->sk;
 	const struct proto *prot;
+	struct sockaddr_storage addr;
 	int err;
 
 	if (addr_len < sizeof(uaddr->sa_family))
@@ -580,6 +581,14 @@  int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
 		return prot->disconnect(sk, flags);
 
 	if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
+		if (uaddr && addr_len <= sizeof(addr)) {
+			/* pre_connect can rewrite uaddr, so make a copy to
+			 * insulate the caller.
+			 */
+			memcpy(&addr, uaddr, addr_len);
+			uaddr = (struct sockaddr *)&addr;
+		}
+
 		err = prot->pre_connect(sk, uaddr, addr_len);
 		if (err)
 			return err;
@@ -625,6 +634,7 @@  int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 			  int addr_len, int flags, int is_sendmsg)
 {
 	struct sock *sk = sock->sk;
+	struct sockaddr_storage addr;
 	int err;
 	long timeo;
 
@@ -668,6 +678,14 @@  int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 			goto out;
 
 		if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
+			if (uaddr && addr_len <= sizeof(addr)) {
+				/* pre_connect can rewrite uaddr, so make a copy to
+				 * insulate the caller.
+				 */
+				memcpy(&addr, uaddr, addr_len);
+				uaddr = (struct sockaddr *)&addr;
+			}
+
 			err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
 			if (err)
 				goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f39b9c8445808..5f5ee2752eeb7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1142,18 +1142,29 @@  int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	}
 
 	if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) {
+		struct sockaddr_in tmp_addr;
+		struct sockaddr_in *addr = usin;
+
+		/* BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK can rewrite usin, so make a
+		 * copy to insulate the caller.
+		 */
+		if (usin && msg->msg_namelen <= sizeof(tmp_addr)) {
+			memcpy(&tmp_addr, usin, msg->msg_namelen);
+			addr = &tmp_addr;
+		}
+
 		err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
-					    (struct sockaddr *)usin, &ipc.addr);
+					    (struct sockaddr *)addr, &ipc.addr);
 		if (err)
 			goto out_free;
-		if (usin) {
-			if (usin->sin_port == 0) {
+		if (addr) {
+			if (addr->sin_port == 0) {
 				/* BPF program set invalid port. Reject it. */
 				err = -EINVAL;
 				goto out_free;
 			}
-			daddr = usin->sin_addr.s_addr;
-			dport = usin->sin_port;
+			daddr = addr->sin_addr.s_addr;
+			dport = addr->sin_port;
 		}
 	}
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 86b5d509a4688..cbc1917fad629 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1506,26 +1506,37 @@  int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6->fl6_sport = inet->inet_sport;
 
 	if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
+		struct sockaddr_in6 tmp_addr;
+		struct sockaddr_in6 *addr = sin6;
+
+		/* BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK can rewrite sin6, so make a
+		 * copy to insulate the caller.
+		 */
+		if (sin6 && addr_len <= sizeof(tmp_addr)) {
+			memcpy(&tmp_addr, sin6, addr_len);
+			addr = &tmp_addr;
+		}
+
 		err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
-					   (struct sockaddr *)sin6,
+					   (struct sockaddr *)addr,
 					   &fl6->saddr);
 		if (err)
 			goto out_no_dst;
-		if (sin6) {
-			if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
+		if (addr) {
+			if (ipv6_addr_v4mapped(&addr->sin6_addr)) {
 				/* BPF program rewrote IPv6-only by IPv4-mapped
 				 * IPv6. It's currently unsupported.
 				 */
 				err = -ENOTSUPP;
 				goto out_no_dst;
 			}
-			if (sin6->sin6_port == 0) {
+			if (addr->sin6_port == 0) {
 				/* BPF program set invalid port. Reject it. */
 				err = -EINVAL;
 				goto out_no_dst;
 			}
-			fl6->fl6_dport = sin6->sin6_port;
-			fl6->daddr = sin6->sin6_addr;
+			fl6->fl6_dport = addr->sin6_port;
+			fl6->daddr = addr->sin6_addr;
 		}
 	}
 
diff --git a/net/socket.c b/net/socket.c
index c8b08b32f097e..39794d026fa11 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3570,12 +3570,7 @@  EXPORT_SYMBOL(kernel_accept);
 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 		   int flags)
 {
-	struct sockaddr_storage address;
-
-	memcpy(&address, addr, addrlen);
-
-	return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address,
-					     addrlen, flags);
+	return READ_ONCE(sock->ops)->connect(sock, addr, addrlen, flags);
 }
 EXPORT_SYMBOL(kernel_connect);