diff mbox series

[v2,bpf-next,14/17] bpf: Change bpf_getsockopt(SOL_TCP) to reuse do_tcp_getsockopt()

Message ID 20220902002918.2894511-1-kafai@fb.com (mailing list archive)
State Accepted
Commit 273b7f0fb44847c41814a59901977be284daa447
Delegated to: BPF
Headers show
Series bpf: net: Remove duplicated code from bpf_getsockopt() | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count fail Series longer than 15 patches (and no cover letter)
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1004 this patch: 1004
netdev/cc_maintainers warning 10 maintainers not CCed: yoshfuji@linux-ipv6.org john.fastabend@gmail.com jolsa@kernel.org song@kernel.org yhs@fb.com dsahern@kernel.org haoluo@google.com martin.lau@linux.dev kpsingh@kernel.org sdf@google.com
netdev/build_clang success Errors and warnings before: 120 this patch: 120
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1011 this patch: 1011
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 124 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-4 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-1 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-6 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-PR success PR summary

Commit Message

Martin KaFai Lau Sept. 2, 2022, 12:29 a.m. UTC
From: Martin KaFai Lau <martin.lau@kernel.org>

This patch changes bpf_getsockopt(SOL_TCP) to reuse
do_tcp_getsockopt().  It removes the duplicated code from
bpf_getsockopt(SOL_TCP).

Before this patch, there were some optnames available to
bpf_setsockopt(SOL_TCP) but missing in bpf_getsockopt(SOL_TCP).
For example, TCP_NODELAY, TCP_MAXSEG, TCP_KEEPIDLE, TCP_KEEPINTVL,
and a few more.  It surprises users from time to time.  This patch
automatically closes this gap without duplicating more code.

bpf_getsockopt(TCP_SAVED_SYN) does not free the saved_syn,
so it stays in sol_tcp_sockopt().

For string name value like TCP_CONGESTION, bpf expects it
is always null terminated, so sol_tcp_sockopt() decrements
optlen by one before calling do_tcp_getsockopt() and
the 'if (optlen < saved_optlen) memset(..,0,..);'
in __bpf_getsockopt() will always do a null termination.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/net/tcp.h |  2 ++
 net/core/filter.c | 74 +++++++++++++++++++++++++++--------------------
 net/ipv4/tcp.c    |  4 +--
 3 files changed, 47 insertions(+), 33 deletions(-)
diff mbox series

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index c03a50c72f40..735e957f7f4b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -402,6 +402,8 @@  void tcp_init_sock(struct sock *sk);
 void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb);
 __poll_t tcp_poll(struct file *file, struct socket *sock,
 		      struct poll_table_struct *wait);
+int do_tcp_getsockopt(struct sock *sk, int level,
+		      int optname, sockptr_t optval, sockptr_t optlen);
 int tcp_getsockopt(struct sock *sk, int level, int optname,
 		   char __user *optval, int __user *optlen);
 bool tcp_bpf_bypass_getsockopt(int level, int optname);
diff --git a/net/core/filter.c b/net/core/filter.c
index 68b52243b306..e427f40f0cee 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5096,8 +5096,9 @@  static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
 	return 0;
 }
 
-static int sol_tcp_setsockopt(struct sock *sk, int optname,
-			      char *optval, int optlen)
+static int sol_tcp_sockopt(struct sock *sk, int optname,
+			   char *optval, int *optlen,
+			   bool getopt)
 {
 	if (sk->sk_prot->setsockopt != tcp_setsockopt)
 		return -EINVAL;
@@ -5114,17 +5115,51 @@  static int sol_tcp_setsockopt(struct sock *sk, int optname,
 	case TCP_USER_TIMEOUT:
 	case TCP_NOTSENT_LOWAT:
 	case TCP_SAVE_SYN:
-		if (optlen != sizeof(int))
+		if (*optlen != sizeof(int))
 			return -EINVAL;
 		break;
 	case TCP_CONGESTION:
+		if (*optlen < 2)
+			return -EINVAL;
+		break;
+	case TCP_SAVED_SYN:
+		if (*optlen < 1)
+			return -EINVAL;
 		break;
 	default:
-		return bpf_sol_tcp_setsockopt(sk, optname, optval, optlen);
+		if (getopt)
+			return -EINVAL;
+		return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
+	}
+
+	if (getopt) {
+		if (optname == TCP_SAVED_SYN) {
+			struct tcp_sock *tp = tcp_sk(sk);
+
+			if (!tp->saved_syn ||
+			    *optlen > tcp_saved_syn_len(tp->saved_syn))
+				return -EINVAL;
+			memcpy(optval, tp->saved_syn->data, *optlen);
+			/* It cannot free tp->saved_syn here because it
+			 * does not know if the user space still needs it.
+			 */
+			return 0;
+		}
+
+		if (optname == TCP_CONGESTION) {
+			if (!inet_csk(sk)->icsk_ca_ops)
+				return -EINVAL;
+			/* BPF expects NULL-terminated tcp-cc string */
+			optval[--(*optlen)] = '\0';
+		}
+
+		return do_tcp_getsockopt(sk, SOL_TCP, optname,
+					 KERNEL_SOCKPTR(optval),
+					 KERNEL_SOCKPTR(optlen));
 	}
 
 	return do_tcp_setsockopt(sk, SOL_TCP, optname,
-				 KERNEL_SOCKPTR(optval), optlen);
+				 KERNEL_SOCKPTR(optval), *optlen);
 }
 
 static int sol_ip_setsockopt(struct sock *sk, int optname,
@@ -5179,7 +5214,7 @@  static int __bpf_setsockopt(struct sock *sk, int level, int optname,
 	else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
 		return sol_ipv6_setsockopt(sk, optname, optval, optlen);
 	else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
-		return sol_tcp_setsockopt(sk, optname, optval, optlen);
+		return sol_tcp_sockopt(sk, optname, optval, &optlen, false);
 
 	return -EINVAL;
 }
@@ -5202,31 +5237,8 @@  static int __bpf_getsockopt(struct sock *sk, int level, int optname,
 
 	if (level == SOL_SOCKET) {
 		err = sol_socket_sockopt(sk, optname, optval, &optlen, true);
-	} else if (IS_ENABLED(CONFIG_INET) &&
-		   level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
-		struct inet_connection_sock *icsk;
-		struct tcp_sock *tp;
-
-		switch (optname) {
-		case TCP_CONGESTION:
-			icsk = inet_csk(sk);
-
-			if (!icsk->icsk_ca_ops || optlen <= 1)
-				goto err_clear;
-			strncpy(optval, icsk->icsk_ca_ops->name, optlen);
-			optval[optlen - 1] = 0;
-			break;
-		case TCP_SAVED_SYN:
-			tp = tcp_sk(sk);
-
-			if (optlen <= 0 || !tp->saved_syn ||
-			    optlen > tcp_saved_syn_len(tp->saved_syn))
-				goto err_clear;
-			memcpy(optval, tp->saved_syn->data, optlen);
-			break;
-		default:
-			goto err_clear;
-		}
+	} else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) {
+		err = sol_tcp_sockopt(sk, optname, optval, &optlen, true);
 	} else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) {
 		struct inet_sock *inet = inet_sk(sk);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 45c737ee95a1..a822cc627e2a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4043,8 +4043,8 @@  struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
 	return stats;
 }
 
-static int do_tcp_getsockopt(struct sock *sk, int level,
-			     int optname, sockptr_t optval, sockptr_t optlen)
+int do_tcp_getsockopt(struct sock *sk, int level,
+		      int optname, sockptr_t optval, sockptr_t optlen)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);