@@ -743,6 +743,8 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
struct ipcm_cookie *ipc, bool allow_ipv6);
DECLARE_STATIC_KEY_FALSE(ip4_min_ttl);
+int do_ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+ unsigned int optlen);
int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
unsigned int optlen);
int ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
@@ -5123,6 +5123,25 @@ static int sol_tcp_setsockopt(struct sock *sk, int optname,
KERNEL_SOCKPTR(optval), optlen);
}
+static int sol_ip_setsockopt(struct sock *sk, int optname,
+ char *optval, int optlen)
+{
+ if (sk->sk_family != AF_INET)
+ return -EINVAL;
+
+ switch (optname) {
+ case IP_TOS:
+ if (optlen != sizeof(int))
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return do_ip_setsockopt(sk, SOL_IP, optname,
+ KERNEL_SOCKPTR(optval), optlen);
+}
+
static int __bpf_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen)
{
@@ -5134,26 +5153,7 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
if (level == SOL_SOCKET) {
return sol_socket_setsockopt(sk, optname, optval, optlen);
} else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP) {
- if (optlen != sizeof(int) || sk->sk_family != AF_INET)
- return -EINVAL;
-
- val = *((int *)optval);
- /* Only some options are supported */
- switch (optname) {
- case IP_TOS:
- if (val < -1 || val > 0xff) {
- ret = -EINVAL;
- } else {
- struct inet_sock *inet = inet_sk(sk);
-
- if (val == -1)
- val = 0;
- inet->tos = val;
- }
- break;
- default:
- ret = -EINVAL;
- }
+ return sol_ip_setsockopt(sk, optname, optval, optlen);
} else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6) {
if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
return -EINVAL;
@@ -888,8 +888,8 @@ static int compat_ip_mcast_join_leave(struct sock *sk, int optname,
DEFINE_STATIC_KEY_FALSE(ip4_min_ttl);
-static int do_ip_setsockopt(struct sock *sk, int level, int optname,
- sockptr_t optval, unsigned int optlen)
+int do_ip_setsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
After the prep work in the previous patches, this patch removes the dup code from bpf_setsockopt(SOL_IP) and reuses the implementation in do_ip_setsockopt(). The existing optname white-list is refactored into a new function sol_ip_setsockopt(). NOTE, the current bpf_setsockopt(IP_TOS) is quite different from the the do_ip_setsockopt(IP_TOS). For example, it does not take the INET_ECN_MASK into the account for tcp and also does not adjust sk->sk_priority. It looks like the current bpf_setsockopt(IP_TOS) was referencing the IPV6_TCLASS implementation instead of IP_TOS. This patch tries to rectify that by using the do_ip_setsockopt(IP_TOS). While this is a behavior change, the do_ip_setsockopt(IP_TOS) behavior is arguably what the user is expecting. At least, the INET_ECN_MASK bits should be masked out for tcp. Signed-off-by: Martin KaFai Lau <kafai@fb.com> --- include/net/ip.h | 2 ++ net/core/filter.c | 40 ++++++++++++++++++++-------------------- net/ipv4/ip_sockglue.c | 4 ++-- 3 files changed, 24 insertions(+), 22 deletions(-)