diff mbox series

[bpf,v3] bpf: Support dual-stack sockets in bpf_tcp_check_syncookie

Message ID 20220222105156.231344-1-maximmi@nvidia.com (mailing list archive)
State New, archived
Delegated to: BPF
Headers show
Series [bpf,v3] bpf: Support dual-stack sockets in bpf_tcp_check_syncookie | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 24 this patch: 24
netdev/cc_maintainers warning 1 maintainers not CCed: linux-kselftest@vger.kernel.org
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 29 this patch: 29
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 191 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf success VM_Test

Commit Message

Maxim Mikityanskiy Feb. 22, 2022, 10:51 a.m. UTC
bpf_tcp_gen_syncookie looks at the IP version in the IP header and
validates the address family of the socket. It supports IPv4 packets in
AF_INET6 dual-stack sockets.

On the other hand, bpf_tcp_check_syncookie looks only at the address
family of the socket, ignoring the real IP version in headers, and
validates only the packet size. This implementation has some drawbacks:

1. Packets are not validated properly, allowing a BPF program to trick
   bpf_tcp_check_syncookie into handling an IPv6 packet on an IPv4
   socket.

2. Dual-stack sockets fail the checks on IPv4 packets. IPv4 clients end
   up receiving a SYNACK with the cookie, but the following ACK gets
   dropped.

This patch fixes these issues by changing the checks in
bpf_tcp_check_syncookie to match the ones in bpf_tcp_gen_syncookie. IP
version from the header is taken into account, and it is validated
properly with address family.

Fixes: 399040847084 ("bpf: add helper to check for a valid SYN cookie")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
---
 net/core/filter.c                             | 17 +++-
 .../bpf/test_tcp_check_syncookie_user.c       | 78 ++++++++++++++-----
 2 files changed, 72 insertions(+), 23 deletions(-)

v3 changes: Added a selftest.

Comments

Maxim Mikityanskiy March 11, 2022, 4:36 p.m. UTC | #1
This patch was submitted more than two weeks ago, and there were no new
comments. Can it be accepted?

> -----Original Message-----
> From: Maxim Mikityanskiy <maximmi@nvidia.com>
> Sent: 22 February, 2022 12:52
> 
> bpf_tcp_gen_syncookie looks at the IP version in the IP header and
> validates the address family of the socket. It supports IPv4 packets in
> AF_INET6 dual-stack sockets.
> 
> On the other hand, bpf_tcp_check_syncookie looks only at the address
> family of the socket, ignoring the real IP version in headers, and
> validates only the packet size. This implementation has some drawbacks:
> 
> 1. Packets are not validated properly, allowing a BPF program to trick
>    bpf_tcp_check_syncookie into handling an IPv6 packet on an IPv4
>    socket.
> 
> 2. Dual-stack sockets fail the checks on IPv4 packets. IPv4 clients end
>    up receiving a SYNACK with the cookie, but the following ACK gets
>    dropped.
> 
> This patch fixes these issues by changing the checks in
> bpf_tcp_check_syncookie to match the ones in bpf_tcp_gen_syncookie. IP
> version from the header is taken into account, and it is validated
> properly with address family.
> 
> Fixes: 399040847084 ("bpf: add helper to check for a valid SYN cookie")
> Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
> Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
> ---
>  net/core/filter.c                             | 17 +++-
>  .../bpf/test_tcp_check_syncookie_user.c       | 78 ++++++++++++++-----
>  2 files changed, 72 insertions(+), 23 deletions(-)
> 
> v3 changes: Added a selftest.
> 
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 9eb785842258..d1914c5c171c 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -6777,24 +6777,33 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *,
> sk, void *, iph, u32, iph_len
>  	if (!th->ack || th->rst || th->syn)
>  		return -ENOENT;
> 
> +	if (unlikely(iph_len < sizeof(struct iphdr)))
> +		return -EINVAL;
> +
>  	if (tcp_synq_no_recent_overflow(sk))
>  		return -ENOENT;
> 
>  	cookie = ntohl(th->ack_seq) - 1;
> 
> -	switch (sk->sk_family) {
> -	case AF_INET:
> -		if (unlikely(iph_len < sizeof(struct iphdr)))
> +	/* Both struct iphdr and struct ipv6hdr have the version field at the
> +	 * same offset so we can cast to the shorter header (struct iphdr).
> +	 */
> +	switch (((struct iphdr *)iph)->version) {
> +	case 4:
> +		if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
>  			return -EINVAL;
> 
>  		ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
>  		break;
> 
>  #if IS_BUILTIN(CONFIG_IPV6)
> -	case AF_INET6:
> +	case 6:
>  		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
>  			return -EINVAL;
> 
> +		if (sk->sk_family != AF_INET6)
> +			return -EINVAL;
> +
>  		ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
>  		break;
>  #endif /* CONFIG_IPV6 */
> diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
> b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
> index b9e991d43155..e7775d3bbe08 100644
> --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
> +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
> @@ -18,8 +18,9 @@
>  #include "bpf_rlimit.h"
>  #include "cgroup_helpers.h"
> 
> -static int start_server(const struct sockaddr *addr, socklen_t len)
> +static int start_server(const struct sockaddr *addr, socklen_t len, bool
> dual)
>  {
> +	int mode = !dual;
>  	int fd;
> 
>  	fd = socket(addr->sa_family, SOCK_STREAM, 0);
> @@ -28,6 +29,14 @@ static int start_server(const struct sockaddr *addr,
> socklen_t len)
>  		goto out;
>  	}
> 
> +	if (addr->sa_family == AF_INET6) {
> +		if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&mode,
> +			       sizeof(mode)) == -1) {
> +			log_err("Failed to set the dual-stack mode");
> +			goto close_out;
> +		}
> +	}
> +
>  	if (bind(fd, addr, len) == -1) {
>  		log_err("Failed to bind server socket");
>  		goto close_out;
> @@ -47,24 +56,17 @@ static int start_server(const struct sockaddr *addr,
> socklen_t len)
>  	return fd;
>  }
> 
> -static int connect_to_server(int server_fd)
> +static int connect_to_server(const struct sockaddr *addr, socklen_t len)
>  {
> -	struct sockaddr_storage addr;
> -	socklen_t len = sizeof(addr);
>  	int fd = -1;
> 
> -	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
> -		log_err("Failed to get server addr");
> -		goto out;
> -	}
> -
> -	fd = socket(addr.ss_family, SOCK_STREAM, 0);
> +	fd = socket(addr->sa_family, SOCK_STREAM, 0);
>  	if (fd == -1) {
>  		log_err("Failed to create client socket");
>  		goto out;
>  	}
> 
> -	if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
> +	if (connect(fd, (const struct sockaddr *)addr, len) == -1) {
>  		log_err("Fail to connect to server");
>  		goto close_out;
>  	}
> @@ -116,7 +118,8 @@ static int get_map_fd_by_prog_id(int prog_id, bool *xdp)
>  	return map_fd;
>  }
> 
> -static int run_test(int server_fd, int results_fd, bool xdp)
> +static int run_test(int server_fd, int results_fd, bool xdp,
> +		    const struct sockaddr *addr, socklen_t len)
>  {
>  	int client = -1, srv_client = -1;
>  	int ret = 0;
> @@ -142,7 +145,7 @@ static int run_test(int server_fd, int results_fd, bool
> xdp)
>  		goto err;
>  	}
> 
> -	client = connect_to_server(server_fd);
> +	client = connect_to_server(addr, len);
>  	if (client == -1)
>  		goto err;
> 
> @@ -199,12 +202,30 @@ static int run_test(int server_fd, int results_fd,
> bool xdp)
>  	return ret;
>  }
> 
> +static bool get_port(int server_fd, in_port_t *port)
> +{
> +	struct sockaddr_in addr;
> +	socklen_t len = sizeof(addr);
> +
> +	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
> +		log_err("Failed to get server addr");
> +		return false;
> +	}
> +
> +	/* sin_port and sin6_port are located at the same offset. */
> +	*port = addr.sin_port;
> +	return true;
> +}
> +
>  int main(int argc, char **argv)
>  {
>  	struct sockaddr_in addr4;
>  	struct sockaddr_in6 addr6;
> +	struct sockaddr_in addr4dual;
> +	struct sockaddr_in6 addr6dual;
>  	int server = -1;
>  	int server_v6 = -1;
> +	int server_dual = -1;
>  	int results = -1;
>  	int err = 0;
>  	bool xdp;
> @@ -224,25 +245,43 @@ int main(int argc, char **argv)
>  	addr4.sin_family = AF_INET;
>  	addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
>  	addr4.sin_port = 0;
> +	memcpy(&addr4dual, &addr4, sizeof(addr4dual));
> 
>  	memset(&addr6, 0, sizeof(addr6));
>  	addr6.sin6_family = AF_INET6;
>  	addr6.sin6_addr = in6addr_loopback;
>  	addr6.sin6_port = 0;
> 
> -	server = start_server((const struct sockaddr *)&addr4, sizeof(addr4));
> -	if (server == -1)
> +	memset(&addr6dual, 0, sizeof(addr6dual));
> +	addr6dual.sin6_family = AF_INET6;
> +	addr6dual.sin6_addr = in6addr_any;
> +	addr6dual.sin6_port = 0;
> +
> +	server = start_server((const struct sockaddr *)&addr4, sizeof(addr4),
> +			      false);
> +	if (server == -1 || !get_port(server, &addr4.sin_port))
>  		goto err;
> 
>  	server_v6 = start_server((const struct sockaddr *)&addr6,
> -				 sizeof(addr6));
> -	if (server_v6 == -1)
> +				 sizeof(addr6), false);
> +	if (server_v6 == -1 || !get_port(server_v6, &addr6.sin6_port))
> +		goto err;
> +
> +	server_dual = start_server((const struct sockaddr *)&addr6dual,
> +				   sizeof(addr6dual), true);
> +	if (server_dual == -1 || !get_port(server_dual, &addr4dual.sin_port))
> +		goto err;
> +
> +	if (run_test(server, results, xdp,
> +		     (const struct sockaddr *)&addr4, sizeof(addr4)))
>  		goto err;
> 
> -	if (run_test(server, results, xdp))
> +	if (run_test(server_v6, results, xdp,
> +		     (const struct sockaddr *)&addr6, sizeof(addr6)))
>  		goto err;
> 
> -	if (run_test(server_v6, results, xdp))
> +	if (run_test(server_dual, results, xdp,
> +		     (const struct sockaddr *)&addr4dual, sizeof(addr4dual)))
>  		goto err;
> 
>  	printf("ok\n");
> @@ -252,6 +291,7 @@ int main(int argc, char **argv)
>  out:
>  	close(server);
>  	close(server_v6);
> +	close(server_dual);
>  	close(results);
>  	return err;
>  }
> --
> 2.30.2
Alexei Starovoitov March 11, 2022, 5:32 p.m. UTC | #2
On Fri, Mar 11, 2022 at 8:36 AM Maxim Mikityanskiy <maximmi@nvidia.com> wrote:
>
> This patch was submitted more than two weeks ago, and there were no new
> comments. Can it be accepted?

The patch wasn't acked by anyone.
Please solicit reviews for your changes in time.
Maxim Mikityanskiy March 14, 2022, 5:49 p.m. UTC | #3
> -----Original Message-----
> From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> 
> On Fri, Mar 11, 2022 at 8:36 AM Maxim Mikityanskiy <maximmi@nvidia.com>
> wrote:
> >
> > This patch was submitted more than two weeks ago, and there were no new
> > comments. Can it be accepted?
> 
> The patch wasn't acked by anyone.
> Please solicit reviews for your changes in time.

Could you elaborate? I sent the patch to the mailing list and CCed the
relevant people. That worked for v1 and v2, I received comments,
addressed them and sent a v3. What extra steps should I have done to
"solicit reviews"? What shall I do now?
Alexei Starovoitov March 15, 2022, 9:26 p.m. UTC | #4
On Mon, Mar 14, 2022 at 10:49 AM Maxim Mikityanskiy <maximmi@nvidia.com> wrote:
>
> > -----Original Message-----
> > From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> >
> > On Fri, Mar 11, 2022 at 8:36 AM Maxim Mikityanskiy <maximmi@nvidia.com>
> > wrote:
> > >
> > > This patch was submitted more than two weeks ago, and there were no new
> > > comments. Can it be accepted?
> >
> > The patch wasn't acked by anyone.
> > Please solicit reviews for your changes in time.
>
> Could you elaborate? I sent the patch to the mailing list and CCed the
> relevant people. That worked for v1 and v2, I received comments,
> addressed them and sent a v3. What extra steps should I have done to
> "solicit reviews"? What shall I do now?

cloudflare folks are original authors of this helper and
de-facto owners of this piece of code.
They need to ack it.
So you have to rebase, resubmit and solicit reviews.
Jakub Sitnicki March 16, 2022, 8:48 a.m. UTC | #5
On Tue, Mar 15, 2022 at 02:26 PM -07, Alexei Starovoitov wrote:
> On Mon, Mar 14, 2022 at 10:49 AM Maxim Mikityanskiy <maximmi@nvidia.com> wrote:
>>
>> > -----Original Message-----
>> > From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
>> >
>> > On Fri, Mar 11, 2022 at 8:36 AM Maxim Mikityanskiy <maximmi@nvidia.com>
>> > wrote:
>> > >
>> > > This patch was submitted more than two weeks ago, and there were no new
>> > > comments. Can it be accepted?
>> >
>> > The patch wasn't acked by anyone.
>> > Please solicit reviews for your changes in time.
>>
>> Could you elaborate? I sent the patch to the mailing list and CCed the
>> relevant people. That worked for v1 and v2, I received comments,
>> addressed them and sent a v3. What extra steps should I have done to
>> "solicit reviews"? What shall I do now?
>
> cloudflare folks are original authors of this helper and
> de-facto owners of this piece of code.
> They need to ack it.
> So you have to rebase, resubmit and solicit reviews.

Thanks for pulling me into the loop.

It's just a case of unfortunate timing. Lorenz left Cloudflare end of
Feb and he must have missed the email.

Adding Arthur to CC, who will be able to help with review and testing
once the rebased patch gets resubmitted.
diff mbox series

Patch

diff --git a/net/core/filter.c b/net/core/filter.c
index 9eb785842258..d1914c5c171c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6777,24 +6777,33 @@  BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
 	if (!th->ack || th->rst || th->syn)
 		return -ENOENT;
 
+	if (unlikely(iph_len < sizeof(struct iphdr)))
+		return -EINVAL;
+
 	if (tcp_synq_no_recent_overflow(sk))
 		return -ENOENT;
 
 	cookie = ntohl(th->ack_seq) - 1;
 
-	switch (sk->sk_family) {
-	case AF_INET:
-		if (unlikely(iph_len < sizeof(struct iphdr)))
+	/* Both struct iphdr and struct ipv6hdr have the version field at the
+	 * same offset so we can cast to the shorter header (struct iphdr).
+	 */
+	switch (((struct iphdr *)iph)->version) {
+	case 4:
+		if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
 			return -EINVAL;
 
 		ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
 		break;
 
 #if IS_BUILTIN(CONFIG_IPV6)
-	case AF_INET6:
+	case 6:
 		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
 			return -EINVAL;
 
+		if (sk->sk_family != AF_INET6)
+			return -EINVAL;
+
 		ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
 		break;
 #endif /* CONFIG_IPV6 */
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index b9e991d43155..e7775d3bbe08 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -18,8 +18,9 @@ 
 #include "bpf_rlimit.h"
 #include "cgroup_helpers.h"
 
-static int start_server(const struct sockaddr *addr, socklen_t len)
+static int start_server(const struct sockaddr *addr, socklen_t len, bool dual)
 {
+	int mode = !dual;
 	int fd;
 
 	fd = socket(addr->sa_family, SOCK_STREAM, 0);
@@ -28,6 +29,14 @@  static int start_server(const struct sockaddr *addr, socklen_t len)
 		goto out;
 	}
 
+	if (addr->sa_family == AF_INET6) {
+		if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&mode,
+			       sizeof(mode)) == -1) {
+			log_err("Failed to set the dual-stack mode");
+			goto close_out;
+		}
+	}
+
 	if (bind(fd, addr, len) == -1) {
 		log_err("Failed to bind server socket");
 		goto close_out;
@@ -47,24 +56,17 @@  static int start_server(const struct sockaddr *addr, socklen_t len)
 	return fd;
 }
 
-static int connect_to_server(int server_fd)
+static int connect_to_server(const struct sockaddr *addr, socklen_t len)
 {
-	struct sockaddr_storage addr;
-	socklen_t len = sizeof(addr);
 	int fd = -1;
 
-	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
-		log_err("Failed to get server addr");
-		goto out;
-	}
-
-	fd = socket(addr.ss_family, SOCK_STREAM, 0);
+	fd = socket(addr->sa_family, SOCK_STREAM, 0);
 	if (fd == -1) {
 		log_err("Failed to create client socket");
 		goto out;
 	}
 
-	if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+	if (connect(fd, (const struct sockaddr *)addr, len) == -1) {
 		log_err("Fail to connect to server");
 		goto close_out;
 	}
@@ -116,7 +118,8 @@  static int get_map_fd_by_prog_id(int prog_id, bool *xdp)
 	return map_fd;
 }
 
-static int run_test(int server_fd, int results_fd, bool xdp)
+static int run_test(int server_fd, int results_fd, bool xdp,
+		    const struct sockaddr *addr, socklen_t len)
 {
 	int client = -1, srv_client = -1;
 	int ret = 0;
@@ -142,7 +145,7 @@  static int run_test(int server_fd, int results_fd, bool xdp)
 		goto err;
 	}
 
-	client = connect_to_server(server_fd);
+	client = connect_to_server(addr, len);
 	if (client == -1)
 		goto err;
 
@@ -199,12 +202,30 @@  static int run_test(int server_fd, int results_fd, bool xdp)
 	return ret;
 }
 
+static bool get_port(int server_fd, in_port_t *port)
+{
+	struct sockaddr_in addr;
+	socklen_t len = sizeof(addr);
+
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
+		return false;
+	}
+
+	/* sin_port and sin6_port are located at the same offset. */
+	*port = addr.sin_port;
+	return true;
+}
+
 int main(int argc, char **argv)
 {
 	struct sockaddr_in addr4;
 	struct sockaddr_in6 addr6;
+	struct sockaddr_in addr4dual;
+	struct sockaddr_in6 addr6dual;
 	int server = -1;
 	int server_v6 = -1;
+	int server_dual = -1;
 	int results = -1;
 	int err = 0;
 	bool xdp;
@@ -224,25 +245,43 @@  int main(int argc, char **argv)
 	addr4.sin_family = AF_INET;
 	addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 	addr4.sin_port = 0;
+	memcpy(&addr4dual, &addr4, sizeof(addr4dual));
 
 	memset(&addr6, 0, sizeof(addr6));
 	addr6.sin6_family = AF_INET6;
 	addr6.sin6_addr = in6addr_loopback;
 	addr6.sin6_port = 0;
 
-	server = start_server((const struct sockaddr *)&addr4, sizeof(addr4));
-	if (server == -1)
+	memset(&addr6dual, 0, sizeof(addr6dual));
+	addr6dual.sin6_family = AF_INET6;
+	addr6dual.sin6_addr = in6addr_any;
+	addr6dual.sin6_port = 0;
+
+	server = start_server((const struct sockaddr *)&addr4, sizeof(addr4),
+			      false);
+	if (server == -1 || !get_port(server, &addr4.sin_port))
 		goto err;
 
 	server_v6 = start_server((const struct sockaddr *)&addr6,
-				 sizeof(addr6));
-	if (server_v6 == -1)
+				 sizeof(addr6), false);
+	if (server_v6 == -1 || !get_port(server_v6, &addr6.sin6_port))
+		goto err;
+
+	server_dual = start_server((const struct sockaddr *)&addr6dual,
+				   sizeof(addr6dual), true);
+	if (server_dual == -1 || !get_port(server_dual, &addr4dual.sin_port))
+		goto err;
+
+	if (run_test(server, results, xdp,
+		     (const struct sockaddr *)&addr4, sizeof(addr4)))
 		goto err;
 
-	if (run_test(server, results, xdp))
+	if (run_test(server_v6, results, xdp,
+		     (const struct sockaddr *)&addr6, sizeof(addr6)))
 		goto err;
 
-	if (run_test(server_v6, results, xdp))
+	if (run_test(server_dual, results, xdp,
+		     (const struct sockaddr *)&addr4dual, sizeof(addr4dual)))
 		goto err;
 
 	printf("ok\n");
@@ -252,6 +291,7 @@  int main(int argc, char **argv)
 out:
 	close(server);
 	close(server_v6);
+	close(server_dual);
 	close(results);
 	return err;
 }