From patchwork Mon Nov 20 22:23:31 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 13462186 X-Patchwork-Delegate: bpf@iogearbox.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=amazon.com header.i=@amazon.com header.b="fBfR9EMV" Received: from smtp-fw-6002.amazon.com (smtp-fw-6002.amazon.com [52.95.49.90]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id ABFD7E8; Mon, 20 Nov 2023 14:24:31 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1700519072; x=1732055072; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=R0Dzozr/kVc8IJh5icMBhMQjmf7I4hZmQKcdHxwevzc=; b=fBfR9EMVasfwn0aNRLeLm+2GrU1hBqLQbyrnxVKgpEdEroECD1yrTlxa rQPLHxI2EB4EhNzjgqJ3MJ9HS3KocSbniTp0OBLQaeIbG2VdugMkU49dh CYbq3yTwVkHIOPD2VGTPOtcBUeHf2vjTN9luK08nVveiYvvf/kxf4YBb8 c=; X-IronPort-AV: E=Sophos;i="6.04,214,1695686400"; d="scan'208";a="369176019" Received: from iad12-co-svc-p1-lb1-vlan3.amazon.com (HELO email-inbound-relay-pdx-2c-m6i4x-8c5b1df3.us-west-2.amazon.com) ([10.43.8.6]) by smtp-border-fw-6002.iad6.amazon.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Nov 2023 22:24:28 +0000 Received: from smtpout.prod.us-west-2.prod.farcaster.email.amazon.dev (pdx2-ws-svc-p26-lb5-vlan3.pdx.amazon.com [10.39.38.70]) by email-inbound-relay-pdx-2c-m6i4x-8c5b1df3.us-west-2.amazon.com (Postfix) with ESMTPS id 3763B40BBD; Mon, 20 Nov 2023 22:24:26 +0000 (UTC) Received: from EX19MTAUWC001.ant.amazon.com [10.0.7.35:28056] by smtpin.naws.us-west-2.prod.farcaster.email.amazon.dev [10.0.54.99:2525] with esmtp (Farcaster) id b5c9d4ad-f973-4a77-bbb4-046bec1561c4; Mon, 20 Nov 2023 22:24:25 +0000 (UTC) X-Farcaster-Flow-ID: b5c9d4ad-f973-4a77-bbb4-046bec1561c4 Received: from EX19D004ANA001.ant.amazon.com (10.37.240.138) by EX19MTAUWC001.ant.amazon.com (10.250.64.174) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:24:25 +0000 Received: from 88665a182662.ant.amazon.com.com (10.187.171.26) by EX19D004ANA001.ant.amazon.com (10.37.240.138) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:24:21 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , David Ahern , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Mykola Lysenko CC: Kuniyuki Iwashima , Kuniyuki Iwashima , , Subject: [PATCH v2 bpf-next 01/11] tcp: Clean up reverse xmas tree in cookie_v[46]_check(). Date: Mon, 20 Nov 2023 14:23:31 -0800 Message-ID: <20231120222341.54776-2-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20231120222341.54776-1-kuniyu@amazon.com> References: <20231120222341.54776-1-kuniyu@amazon.com> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Originating-IP: [10.187.171.26] X-ClientProxiedBy: EX19D038UWC001.ant.amazon.com (10.13.139.213) To EX19D004ANA001.ant.amazon.com (10.37.240.138) Precedence: Bulk X-Patchwork-Delegate: bpf@iogearbox.net We will grow and cut the xmas tree in cookie_v[46]_check(). This patch cleans it up to make later patches tidy. Signed-off-by: Kuniyuki Iwashima --- net/ipv4/syncookies.c | 10 +++++----- net/ipv6/syncookies.c | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index d37282c06e3d..a0118ea76734 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -331,18 +331,18 @@ EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; + const struct tcphdr *th = tcp_hdr(skb); + __u32 cookie = ntohl(th->ack_seq) - 1; struct tcp_options_received tcp_opt; + struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; struct tcp_request_sock *treq; - struct tcp_sock *tp = tcp_sk(sk); - const struct tcphdr *th = tcp_hdr(skb); - __u32 cookie = ntohl(th->ack_seq) - 1; - struct sock *ret = sk; struct request_sock *req; + struct sock *ret = sk; int full_space, mss; + struct flowi4 fl4; struct rtable *rt; __u8 rcv_wscale; - struct flowi4 fl4; u32 tsoff = 0; int l3index; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 12eedc6ca2cc..aa5fb5486cde 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -127,17 +127,17 @@ EXPORT_SYMBOL_GPL(__cookie_v6_check); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { + const struct tcphdr *th = tcp_hdr(skb); + __u32 cookie = ntohl(th->ack_seq) - 1; + struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_options_received tcp_opt; + struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; struct tcp_request_sock *treq; - struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - const struct tcphdr *th = tcp_hdr(skb); - __u32 cookie = ntohl(th->ack_seq) - 1; - struct sock *ret = sk; struct request_sock *req; - int full_space, mss; struct dst_entry *dst; + struct sock *ret = sk; + int full_space, mss; __u8 rcv_wscale; u32 tsoff = 0; int l3index; From patchwork Mon Nov 20 22:23:32 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 13462187 X-Patchwork-Delegate: bpf@iogearbox.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=amazon.com header.i=@amazon.com header.b="Qd0Nz24C" Received: from smtp-fw-80009.amazon.com (smtp-fw-80009.amazon.com [99.78.197.220]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id CD7FFA2; Mon, 20 Nov 2023 14:24:56 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1700519097; x=1732055097; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=QnpxzIipwhSuxYeF76lXOslv74Nh2iQ5cA36l7tkEMQ=; b=Qd0Nz24Cbq2KcLQGtnhjEdyvVFu66eSGTGhPC+95v0PXbS4qX1So3VLt KxF+lUdQLBbPFoIw0kvgtSMI+c+C3TkSw96rkwVYp57tKQf+H1PF2b4Fj zPKDszqDEdOrHA/aiJyMJUPjiaALXg/AMiWRQc6BZ7+0khLvHakPHoT7x c=; X-IronPort-AV: E=Sophos;i="6.04,214,1695686400"; d="scan'208";a="44989961" Received: from pdx4-co-svc-p1-lb2-vlan2.amazon.com (HELO email-inbound-relay-pdx-2c-m6i4x-8c5b1df3.us-west-2.amazon.com) ([10.25.36.210]) by smtp-border-fw-80009.pdx80.corp.amazon.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Nov 2023 22:24:53 +0000 Received: from smtpout.prod.us-west-2.prod.farcaster.email.amazon.dev (pdx2-ws-svc-p26-lb5-vlan3.pdx.amazon.com [10.39.38.70]) by email-inbound-relay-pdx-2c-m6i4x-8c5b1df3.us-west-2.amazon.com (Postfix) with ESMTPS id 385AB40D52; Mon, 20 Nov 2023 22:24:52 +0000 (UTC) Received: from EX19MTAUWA001.ant.amazon.com [10.0.21.151:11575] by smtpin.naws.us-west-2.prod.farcaster.email.amazon.dev [10.0.36.6:2525] with esmtp (Farcaster) id f8786bde-3035-4cce-a837-04f02d9be52c; Mon, 20 Nov 2023 22:24:51 +0000 (UTC) X-Farcaster-Flow-ID: f8786bde-3035-4cce-a837-04f02d9be52c Received: from EX19D004ANA001.ant.amazon.com (10.37.240.138) by EX19MTAUWA001.ant.amazon.com (10.250.64.218) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:24:51 +0000 Received: from 88665a182662.ant.amazon.com.com (10.187.171.26) by EX19D004ANA001.ant.amazon.com (10.37.240.138) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:24:47 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , David Ahern , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Mykola Lysenko CC: Kuniyuki Iwashima , Kuniyuki Iwashima , , Subject: [PATCH v2 bpf-next 02/11] tcp: Cache sock_net(sk) in cookie_v[46]_check(). Date: Mon, 20 Nov 2023 14:23:32 -0800 Message-ID: <20231120222341.54776-3-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20231120222341.54776-1-kuniyu@amazon.com> References: <20231120222341.54776-1-kuniyu@amazon.com> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Originating-IP: [10.187.171.26] X-ClientProxiedBy: EX19D044UWA003.ant.amazon.com (10.13.139.43) To EX19D004ANA001.ant.amazon.com (10.37.240.138) Precedence: Bulk X-Patchwork-Delegate: bpf@iogearbox.net sock_net(sk) is used repeatedly in cookie_v[46]_check(). Let's cache it in a variable. Signed-off-by: Kuniyuki Iwashima --- net/ipv4/syncookies.c | 21 +++++++++++---------- net/ipv6/syncookies.c | 19 ++++++++++--------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index a0118ea76734..fb41bb18fe6b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -336,6 +336,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct tcp_options_received tcp_opt; struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; + struct net *net = sock_net(sk); struct tcp_request_sock *treq; struct request_sock *req; struct sock *ret = sk; @@ -346,7 +347,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) u32 tsoff = 0; int l3index; - if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) goto out; @@ -355,24 +356,24 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) mss = __cookie_v4_check(ip_hdr(skb), th, cookie); if (mss == 0) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); + tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcp_ts_off(sock_net(sk), + tsoff = secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); tcp_opt.rcv_tsecr -= tsoff; } - if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt)) + if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; ret = NULL; @@ -406,13 +407,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->ir_iif = inet_request_bound_dev_if(sk, skb); - l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + l3index = l3mdev_master_ifindex_by_index(net, ireq->ir_iif); tcp_ao_syncookie(sk, skb, treq, AF_INET, l3index); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ - RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb)); + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); @@ -433,7 +434,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) opt->srr ? opt->faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi_common(&fl4)); - rt = ip_route_output_key(sock_net(sk), &fl4); + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { reqsk_free(req); goto out; @@ -453,7 +454,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) dst_metric(&rt->dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst); + ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, &rt->dst); ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff); /* ip_queue_xmit() depends on our flow being setup diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index aa5fb5486cde..ba394fa73f41 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -133,6 +133,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct tcp_options_received tcp_opt; struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; + struct net *net = sock_net(sk); struct tcp_request_sock *treq; struct request_sock *req; struct dst_entry *dst; @@ -142,7 +143,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) u32 tsoff = 0; int l3index; - if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) goto out; @@ -151,24 +152,24 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); if (mss == 0) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); + tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcpv6_ts_off(sock_net(sk), + tsoff = secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32); tcp_opt.rcv_tsecr -= tsoff; } - if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt)) + if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; ret = NULL; @@ -217,7 +218,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->ts_off = 0; treq->txhash = net_tx_rndhash(); - l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + l3index = l3mdev_master_ifindex_by_index(net, ireq->ir_iif); tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index); if (IS_ENABLED(CONFIG_SMC)) @@ -243,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); - dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); + dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); if (IS_ERR(dst)) goto out_free; } @@ -261,7 +262,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); + ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, dst); ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff); out: From patchwork Mon Nov 20 22:23:33 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 13462188 X-Patchwork-Delegate: bpf@iogearbox.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=amazon.com header.i=@amazon.com header.b="FQk+i8Z3" Received: from smtp-fw-2101.amazon.com (smtp-fw-2101.amazon.com [72.21.196.25]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id DD30ED8; Mon, 20 Nov 2023 14:25:24 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1700519126; x=1732055126; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=yYLlb5n0UieUbkQyodcteK0sltQb9GBOvK/pMQSwaB8=; b=FQk+i8Z3ve9rgZJ0MTKIy8FKVPqj0J4sRmdgN08fH1tGfU5+c95DgsNL A0RgWmQ7KSqZaXZ+tbwEcf9XiHVJ7ft4LEU67vwGzjFU9hi8KneFJQHe4 yUQNpL/2TJw6jaiRLZ7cd7/D+RtXqLDaEbYhHMJNojBaxK3AgBfTADe9B w=; X-IronPort-AV: E=Sophos;i="6.04,214,1695686400"; d="scan'208";a="363932543" Received: from iad12-co-svc-p1-lb1-vlan3.amazon.com (HELO email-inbound-relay-iad-1a-m6i4x-54a853e6.us-east-1.amazon.com) ([10.43.8.6]) by smtp-border-fw-2101.iad2.amazon.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Nov 2023 22:25:23 +0000 Received: from smtpout.prod.us-west-2.prod.farcaster.email.amazon.dev (iad7-ws-svc-p70-lb3-vlan3.iad.amazon.com [10.32.235.38]) by email-inbound-relay-iad-1a-m6i4x-54a853e6.us-east-1.amazon.com (Postfix) with ESMTPS id 044BD49E78; Mon, 20 Nov 2023 22:25:17 +0000 (UTC) Received: from EX19MTAUWC001.ant.amazon.com [10.0.7.35:60936] by smtpin.naws.us-west-2.prod.farcaster.email.amazon.dev [10.0.36.6:2525] with esmtp (Farcaster) id 52d17316-ef6d-4aaf-b85a-0492a131bd8b; Mon, 20 Nov 2023 22:25:17 +0000 (UTC) X-Farcaster-Flow-ID: 52d17316-ef6d-4aaf-b85a-0492a131bd8b Received: from EX19D004ANA001.ant.amazon.com (10.37.240.138) by EX19MTAUWC001.ant.amazon.com (10.250.64.174) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:25:16 +0000 Received: from 88665a182662.ant.amazon.com.com (10.187.171.26) by EX19D004ANA001.ant.amazon.com (10.37.240.138) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA) id 15.2.1118.39; Mon, 20 Nov 2023 22:25:12 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , David Ahern , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Mykola Lysenko CC: Kuniyuki Iwashima , Kuniyuki Iwashima , , Subject: [PATCH v2 bpf-next 03/11] tcp: Clean up goto labels in cookie_v[46]_check(). Date: Mon, 20 Nov 2023 14:23:33 -0800 Message-ID: <20231120222341.54776-4-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20231120222341.54776-1-kuniyu@amazon.com> References: <20231120222341.54776-1-kuniyu@amazon.com> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Originating-IP: [10.187.171.26] X-ClientProxiedBy: EX19D037UWB002.ant.amazon.com (10.13.138.121) To EX19D004ANA001.ant.amazon.com (10.37.240.138) Precedence: Bulk X-Patchwork-Delegate: bpf@iogearbox.net We will support arbitrary SYN Cookie with BPF, and then reqsk will be preallocated before cookie_v[46]_check(). Depending on how validation fails, we send RST or just drop skb. To make the error handling easier, let's clean up goto labels. Signed-off-by: Kuniyuki Iwashima --- net/ipv4/syncookies.c | 22 +++++++++++----------- net/ipv6/syncookies.c | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index fb41bb18fe6b..8b7d7d7788af 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -376,11 +376,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - ret = NULL; req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, &tcp_request_sock_ipv4_ops, sk, skb); if (!req) - goto out; + goto out_drop; ireq = inet_rsk(req); treq = tcp_rsk(req); @@ -415,10 +414,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) */ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); - if (security_inet_conn_request(sk, skb, req)) { - reqsk_free(req); - goto out; - } + if (security_inet_conn_request(sk, skb, req)) + goto out_free; req->num_retrans = 0; @@ -435,10 +432,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { - reqsk_free(req); - goto out; - } + if (IS_ERR(rt)) + goto out_free; /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); @@ -462,5 +457,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) */ if (ret) inet_sk(ret)->cork.fl.u.ip4 = fl4; -out: return ret; +out: + return ret; +out_free: + reqsk_free(req); +out_drop: + return NULL; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ba394fa73f41..106376cbc9de 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -172,11 +172,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - ret = NULL; req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, &tcp_request_sock_ipv6_ops, sk, skb); if (!req) - goto out; + goto out_drop; ireq = inet_rsk(req); treq = tcp_rsk(req); @@ -269,5 +268,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) return ret; out_free: reqsk_free(req); +out_drop: return NULL; } From patchwork Mon Nov 20 22:23:34 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 13462189 X-Patchwork-Delegate: bpf@iogearbox.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=amazon.com header.i=@amazon.com header.b="VJ2T0ZLT" Received: from smtp-fw-80008.amazon.com (smtp-fw-80008.amazon.com [99.78.197.219]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C77C3D8; Mon, 20 Nov 2023 14:25:57 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1700519157; x=1732055157; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=jlKUJrk73Mv3zbApSPHD3+Zd4V9jyvSXYH68b7XnrhQ=; b=VJ2T0ZLTsBTcQnRsGHW6uOcnPV7j9w85wZ5+GgUwAvH6dktaZb1rI/5s vbw53E9/PquJdFbDwBtHqGtD5b+S0cTudaLB6zkcR6Q1q72wr6kJrVTa4 MWxevpoDjqpUGkBHBc1tKk0H4IoN58BzYKiI/Pefp+6XD1qgOU0VCl/jQ A=; X-IronPort-AV: E=Sophos;i="6.04,214,1695686400"; d="scan'208";a="45067081" Received: from pdx4-co-svc-p1-lb2-vlan3.amazon.com (HELO email-inbound-relay-pdx-2a-m6i4x-8a14c045.us-west-2.amazon.com) ([10.25.36.214]) by smtp-border-fw-80008.pdx80.corp.amazon.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Nov 2023 22:25:52 +0000 Received: from smtpout.prod.us-west-2.prod.farcaster.email.amazon.dev (pdx2-ws-svc-p26-lb5-vlan3.pdx.amazon.com [10.39.38.70]) by email-inbound-relay-pdx-2a-m6i4x-8a14c045.us-west-2.amazon.com (Postfix) with ESMTPS id 183CB803D4; Mon, 20 Nov 2023 22:25:51 +0000 (UTC) Received: from EX19MTAUWA002.ant.amazon.com [10.0.21.151:2843] by smtpin.naws.us-west-2.prod.farcaster.email.amazon.dev [10.0.54.99:2525] with esmtp (Farcaster) id 9ed75e44-0066-4fb0-b8c0-8adfe43469a5; Mon, 20 Nov 2023 22:25:50 +0000 (UTC) X-Farcaster-Flow-ID: 9ed75e44-0066-4fb0-b8c0-8adfe43469a5 Received: from EX19D004ANA001.ant.amazon.com (10.37.240.138) by EX19MTAUWA002.ant.amazon.com (10.250.64.202) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:25:42 +0000 Received: from 88665a182662.ant.amazon.com.com (10.187.171.26) by EX19D004ANA001.ant.amazon.com (10.37.240.138) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA) id 15.2.1118.39; Mon, 20 Nov 2023 22:25:38 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , David Ahern , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Mykola Lysenko CC: Kuniyuki Iwashima , Kuniyuki Iwashima , , Subject: [PATCH v2 bpf-next 04/11] tcp: Don't pass cookie to __cookie_v[46]_check(). Date: Mon, 20 Nov 2023 14:23:34 -0800 Message-ID: <20231120222341.54776-5-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20231120222341.54776-1-kuniyu@amazon.com> References: <20231120222341.54776-1-kuniyu@amazon.com> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Originating-IP: [10.187.171.26] X-ClientProxiedBy: EX19D035UWA004.ant.amazon.com (10.13.139.109) To EX19D004ANA001.ant.amazon.com (10.37.240.138) Precedence: Bulk X-Patchwork-Delegate: bpf@iogearbox.net tcp_hdr(skb) and SYN Cookie are passed to __cookie_v[46]_check(), but none of the callers passes cookie other than ntohl(th->ack_seq) - 1. Let's fetch it in __cookie_v[46]_check() instead of passing the cookie over and over. Signed-off-by: Kuniyuki Iwashima --- include/linux/netfilter_ipv6.h | 8 ++++---- include/net/tcp.h | 6 ++---- net/core/filter.c | 15 ++++----------- net/ipv4/syncookies.c | 15 ++++++++------- net/ipv6/syncookies.c | 15 ++++++++------- net/netfilter/nf_synproxy_core.c | 4 ++-- 6 files changed, 28 insertions(+), 35 deletions(-) diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 7834c0be2831..61aa48f46dd7 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -51,7 +51,7 @@ struct nf_ipv6_ops { u32 (*cookie_init_sequence)(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp); int (*cookie_v6_check)(const struct ipv6hdr *iph, - const struct tcphdr *th, __u32 cookie); + const struct tcphdr *th); #endif void (*route_input)(struct sk_buff *skb); int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, @@ -179,16 +179,16 @@ static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph, } static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, - const struct tcphdr *th, __u32 cookie) + const struct tcphdr *th) { #if IS_ENABLED(CONFIG_SYN_COOKIES) #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (v6_ops) - return v6_ops->cookie_v6_check(iph, th, cookie); + return v6_ops->cookie_v6_check(iph, th); #elif IS_BUILTIN(CONFIG_IPV6) - return __cookie_v6_check(iph, th, cookie); + return __cookie_v6_check(iph, th); #endif #endif return 0; diff --git a/include/net/tcp.h b/include/net/tcp.h index d2f0736b76b8..2b2c79c7bbcd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -491,8 +491,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, u32 tsoff); -int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, - u32 cookie); +int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, const struct tcp_request_sock_ops *af_ops, @@ -586,8 +585,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *opt, const struct net *net, const struct dst_entry *dst); /* From net/ipv6/syncookies.c */ -int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, - u32 cookie); +int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, diff --git a/net/core/filter.c b/net/core/filter.c index 383f96b0a1c7..d64baa7ac6cd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7229,7 +7229,6 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len struct tcphdr *, th, u32, th_len) { #ifdef CONFIG_SYN_COOKIES - u32 cookie; int ret; if (unlikely(!sk || th_len < sizeof(*th))) @@ -7251,8 +7250,6 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (tcp_synq_no_recent_overflow(sk)) return -ENOENT; - cookie = ntohl(th->ack_seq) - 1; - /* Both struct iphdr and struct ipv6hdr have the version field at the * same offset so we can cast to the shorter header (struct iphdr). */ @@ -7261,7 +7258,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk)) return -EINVAL; - ret = __cookie_v4_check((struct iphdr *)iph, th, cookie); + ret = __cookie_v4_check((struct iphdr *)iph, th); break; #if IS_BUILTIN(CONFIG_IPV6) @@ -7272,7 +7269,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_family != AF_INET6) return -EINVAL; - ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie); + ret = __cookie_v6_check((struct ipv6hdr *)iph, th); break; #endif /* CONFIG_IPV6 */ @@ -7725,9 +7722,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = { BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph, struct tcphdr *, th) { - u32 cookie = ntohl(th->ack_seq) - 1; - - if (__cookie_v4_check(iph, th, cookie) > 0) + if (__cookie_v4_check(iph, th) > 0) return 0; return -EACCES; @@ -7748,9 +7743,7 @@ BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph, struct tcphdr *, th) { #if IS_BUILTIN(CONFIG_IPV6) - u32 cookie = ntohl(th->ack_seq) - 1; - - if (__cookie_v6_check(iph, th, cookie) > 0) + if (__cookie_v6_check(iph, th) > 0) return 0; return -EACCES; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 8b7d7d7788af..c08428d63d11 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -189,12 +189,14 @@ __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp) * Check if a ack sequence number is a valid syncookie. * Return the decoded mss if it is, or 0 if not. */ -int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, - u32 cookie) +int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th) { + __u32 cookie = ntohl(th->ack_seq) - 1; __u32 seq = ntohl(th->seq) - 1; - __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, - th->source, th->dest, seq); + __u32 mssind; + + mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, + th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } @@ -332,7 +334,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; const struct tcphdr *th = tcp_hdr(skb); - __u32 cookie = ntohl(th->ack_seq) - 1; struct tcp_options_received tcp_opt; struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; @@ -354,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) if (tcp_synq_no_recent_overflow(sk)) goto out; - mss = __cookie_v4_check(ip_hdr(skb), th, cookie); + mss = __cookie_v4_check(ip_hdr(skb), th); if (mss == 0) { __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; @@ -384,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); treq->rcv_isn = ntohl(th->seq) - 1; - treq->snt_isn = cookie; + treq->snt_isn = ntohl(th->ack_seq) - 1; treq->ts_off = 0; treq->txhash = net_tx_rndhash(); req->mss = mss; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 106376cbc9de..4cd26c481168 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -114,12 +114,14 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp) return __cookie_v6_init_sequence(iph, th, mssp); } -int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, - __u32 cookie) +int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th) { + __u32 cookie = ntohl(th->ack_seq) - 1; __u32 seq = ntohl(th->seq) - 1; - __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, - th->source, th->dest, seq); + __u32 mssind; + + mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, + th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } @@ -128,7 +130,6 @@ EXPORT_SYMBOL_GPL(__cookie_v6_check); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); - __u32 cookie = ntohl(th->ack_seq) - 1; struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_options_received tcp_opt; struct tcp_sock *tp = tcp_sk(sk); @@ -150,7 +151,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (tcp_synq_no_recent_overflow(sk)) goto out; - mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); + mss = __cookie_v6_check(ipv6_hdr(skb), th); if (mss == 0) { __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; @@ -213,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = 0; treq->rcv_isn = ntohl(th->seq) - 1; - treq->snt_isn = cookie; + treq->snt_isn = ntohl(th->ack_seq) - 1; treq->ts_off = 0; treq->txhash = net_tx_rndhash(); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 467671f2d42f..fbbc4fd37349 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -617,7 +617,7 @@ synproxy_recv_client_ack(struct net *net, struct synproxy_net *snet = synproxy_pernet(net); int mss; - mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1); + mss = __cookie_v4_check(ip_hdr(skb), th); if (mss == 0) { this_cpu_inc(snet->stats->cookie_invalid); return false; @@ -1034,7 +1034,7 @@ synproxy_recv_client_ack_ipv6(struct net *net, struct synproxy_net *snet = synproxy_pernet(net); int mss; - mss = nf_cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1); + mss = nf_cookie_v6_check(ipv6_hdr(skb), th); if (mss == 0) { this_cpu_inc(snet->stats->cookie_invalid); return false; From patchwork Mon Nov 20 22:23:35 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 13462190 X-Patchwork-Delegate: bpf@iogearbox.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=amazon.com header.i=@amazon.com header.b="n4vgT4Cw" Received: from smtp-fw-9106.amazon.com (smtp-fw-9106.amazon.com [207.171.188.206]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 54637A2; Mon, 20 Nov 2023 14:26:16 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1700519176; x=1732055176; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=hTXVYsjHlXxS4ygF6uxrwNsUA9p+2KM2DEkM38vXuYc=; b=n4vgT4CwaILibutFgmeeCqgWDKimbvfxSRafMcM9XdZeEqwfkBIG8jvX HGP92FApuv9BdaC1McDasNMSmWfeO/dgNY2kRH6LUKKLZyuujn5OXC1zV pdO01Dr5uc7M1mj/XRGVYO5QbIzOgXTchDah9x3W+dgXK1qK+W4QZwOgJ s=; X-IronPort-AV: E=Sophos;i="6.04,214,1695686400"; d="scan'208";a="685326482" Received: from pdx4-co-svc-p1-lb2-vlan2.amazon.com (HELO email-inbound-relay-pdx-2a-m6i4x-83883bdb.us-west-2.amazon.com) ([10.25.36.210]) by smtp-border-fw-9106.sea19.amazon.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Nov 2023 22:26:10 +0000 Received: from smtpout.prod.us-west-2.prod.farcaster.email.amazon.dev (pdx2-ws-svc-p26-lb5-vlan2.pdx.amazon.com [10.39.38.66]) by email-inbound-relay-pdx-2a-m6i4x-83883bdb.us-west-2.amazon.com (Postfix) with ESMTPS id 80D0760C9D; Mon, 20 Nov 2023 22:26:09 +0000 (UTC) Received: from EX19MTAUWB001.ant.amazon.com [10.0.21.151:25092] by smtpin.naws.us-west-2.prod.farcaster.email.amazon.dev [10.0.12.176:2525] with esmtp (Farcaster) id 58c38f50-0d78-4149-a99b-6abb269db523; Mon, 20 Nov 2023 22:26:09 +0000 (UTC) X-Farcaster-Flow-ID: 58c38f50-0d78-4149-a99b-6abb269db523 Received: from EX19D004ANA001.ant.amazon.com (10.37.240.138) by EX19MTAUWB001.ant.amazon.com (10.250.64.248) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:26:08 +0000 Received: from 88665a182662.ant.amazon.com.com (10.187.171.26) by EX19D004ANA001.ant.amazon.com (10.37.240.138) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:26:04 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , David Ahern , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Mykola Lysenko CC: Kuniyuki Iwashima , Kuniyuki Iwashima , , Subject: [PATCH v2 bpf-next 05/11] tcp: Don't initialise tp->tsoffset in tcp_get_cookie_sock(). Date: Mon, 20 Nov 2023 14:23:35 -0800 Message-ID: <20231120222341.54776-6-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20231120222341.54776-1-kuniyu@amazon.com> References: <20231120222341.54776-1-kuniyu@amazon.com> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Originating-IP: [10.187.171.26] X-ClientProxiedBy: EX19D039UWB002.ant.amazon.com (10.13.138.79) To EX19D004ANA001.ant.amazon.com (10.37.240.138) X-Patchwork-Delegate: bpf@iogearbox.net When we create a full socket from SYN Cookie, we initialise tcp_sk(sk)->tsoffset redundantly in tcp_get_cookie_sock() as the field is inherited from tcp_rsk(req)->ts_off. cookie_v[46]_check |- treq->ts_off = 0 `- tcp_get_cookie_sock |- tcp_v[46]_syn_recv_sock | `- tcp_create_openreq_child | `- newtp->tsoffset = treq->ts_off `- tcp_sk(child)->tsoffset = tsoff Let's initialise tcp_rsk(req)->ts_off with the correct offset and remove the second initialisation of tcp_sk(sk)->tsoffset. Signed-off-by: Kuniyuki Iwashima --- include/net/tcp.h | 2 +- net/ipv4/syncookies.c | 7 +++---- net/ipv6/syncookies.c | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 2b2c79c7bbcd..cc7143a781da 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -490,7 +490,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst, u32 tsoff); + struct dst_entry *dst); int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c08428d63d11..de051eb08db8 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -204,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check); struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst, u32 tsoff) + struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; @@ -214,7 +214,6 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, NULL, &own_req); if (child) { refcount_set(&req->rsk_refcnt, 1); - tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); if (rsk_drop_req(req)) { @@ -386,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = ntohl(th->ack_seq) - 1; - treq->ts_off = 0; + treq->ts_off = tsoff; treq->txhash = net_tx_rndhash(); req->mss = mss; ireq->ir_num = ntohs(th->dest); @@ -452,7 +451,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, &rt->dst); - ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff); + ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); /* ip_queue_xmit() depends on our flow being setup * Normal sockets get it right from inet_csk_route_child_sock() */ diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 4cd26c481168..18c2e3c1677b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -215,7 +215,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack = 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = ntohl(th->ack_seq) - 1; - treq->ts_off = 0; + treq->ts_off = tsoff; treq->txhash = net_tx_rndhash(); l3index = l3mdev_master_ifindex_by_index(net, ireq->ir_iif); @@ -264,7 +264,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, dst); - ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff); + ret = tcp_get_cookie_sock(sk, skb, req, dst); out: return ret; out_free: From patchwork Mon Nov 20 22:23:36 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 13462192 X-Patchwork-Delegate: bpf@iogearbox.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=amazon.com header.i=@amazon.com header.b="jE8nlfzM" Received: from smtp-fw-52005.amazon.com (smtp-fw-52005.amazon.com [52.119.213.156]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2C36DA2; Mon, 20 Nov 2023 14:26:49 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1700519209; x=1732055209; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=GE1NGt33UkveFcasoYOwExD3r8TqRfqQ+Y3LDNLMYy8=; b=jE8nlfzMf2yrRtFNC95l1Ab9TAC4iNUqnLZnn+r9mUtXfaUWzIzxYI1+ 6gbUjX5esd9l1eWkdZZY+7ln4sLwZD/9HLTxfC9OfuFkdWpRGxvekOKUt Ey9rXSVjTfm85261xazWxlQqm158CCo7/AIZANQj8Q5O3VaglrcfzGTNC I=; X-IronPort-AV: E=Sophos;i="6.04,214,1695686400"; d="scan'208";a="617190399" Received: from iad12-co-svc-p1-lb1-vlan3.amazon.com (HELO email-inbound-relay-pdx-2b-m6i4x-0ec33b60.us-west-2.amazon.com) ([10.43.8.6]) by smtp-border-fw-52005.iad7.amazon.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Nov 2023 22:26:40 +0000 Received: from smtpout.prod.us-west-2.prod.farcaster.email.amazon.dev (pdx2-ws-svc-p26-lb5-vlan2.pdx.amazon.com [10.39.38.66]) by email-inbound-relay-pdx-2b-m6i4x-0ec33b60.us-west-2.amazon.com (Postfix) with ESMTPS id 78C85A0992; Mon, 20 Nov 2023 22:26:35 +0000 (UTC) Received: from EX19MTAUWC002.ant.amazon.com [10.0.21.151:53038] by smtpin.naws.us-west-2.prod.farcaster.email.amazon.dev [10.0.8.2:2525] with esmtp (Farcaster) id 5aac473f-44a5-4b29-bde7-a12a0067d238; Mon, 20 Nov 2023 22:26:34 +0000 (UTC) X-Farcaster-Flow-ID: 5aac473f-44a5-4b29-bde7-a12a0067d238 Received: from EX19D004ANA001.ant.amazon.com (10.37.240.138) by EX19MTAUWC002.ant.amazon.com (10.250.64.143) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:26:34 +0000 Received: from 88665a182662.ant.amazon.com.com (10.187.171.26) by EX19D004ANA001.ant.amazon.com (10.37.240.138) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:26:30 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , David Ahern , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Mykola Lysenko CC: Kuniyuki Iwashima , Kuniyuki Iwashima , , Subject: [PATCH v2 bpf-next 06/11] tcp: Move TCP-AO bits from cookie_v[46]_check() to tcp_ao_syncookie(). Date: Mon, 20 Nov 2023 14:23:36 -0800 Message-ID: <20231120222341.54776-7-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20231120222341.54776-1-kuniyu@amazon.com> References: <20231120222341.54776-1-kuniyu@amazon.com> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Originating-IP: [10.187.171.26] X-ClientProxiedBy: EX19D043UWC003.ant.amazon.com (10.13.139.240) To EX19D004ANA001.ant.amazon.com (10.37.240.138) Precedence: Bulk X-Patchwork-Delegate: bpf@iogearbox.net We initialise treq->af_specific in cookie_tcp_reqsk_alloc() so that we can look up a key later in tcp_create_openreq_child(). Initially, that change was added for MD5 by commit ba5a4fdd63ae ("tcp: make sure treq->af_specific is initialized"), but it has not been used since commit d0f2b7a9ca0a ("tcp: Disable header prediction for MD5 flow."). Now, treq->af_specific is used only by TCP-AO, so, we can move that initialisation into tcp_ao_syncookie(). In addition to that, l3index in cookie_v[46]_check() is only used for tcp_ao_syncookie(), so let's move it as well. While at it, we move down tcp_ao_syncookie() in cookie_v4_check() so that it will be called after security_inet_conn_request() to make functions order consistent with cookie_v6_check(). Signed-off-by: Kuniyuki Iwashima --- include/net/tcp.h | 1 - include/net/tcp_ao.h | 6 ++---- net/ipv4/syncookies.c | 16 ++++------------ net/ipv4/tcp_ao.c | 16 ++++++++++++++-- net/ipv6/syncookies.c | 7 ++----- 5 files changed, 22 insertions(+), 24 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index cc7143a781da..d4d0e9763175 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -494,7 +494,6 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, - const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index b56be10838f0..4d900ef8dfc3 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -265,8 +265,7 @@ void tcp_ao_established(struct sock *sk); void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb); void tcp_ao_connect_init(struct sock *sk); void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, - struct tcp_request_sock *treq, - unsigned short int family, int l3index); + struct request_sock *req, unsigned short int family); #else /* CONFIG_TCP_AO */ static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, @@ -277,8 +276,7 @@ static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, } static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, - struct tcp_request_sock *treq, - unsigned short int family, int l3index) + struct request_sock *req, unsigned short int family) { } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index de051eb08db8..1e3783c97e28 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -286,9 +286,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, EXPORT_SYMBOL(cookie_ecn_ok); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, - const struct tcp_request_sock_ops *af_ops, - struct sock *sk, - struct sk_buff *skb) + struct sock *sk, struct sk_buff *skb) { struct tcp_request_sock *treq; struct request_sock *req; @@ -303,9 +301,6 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, treq = tcp_rsk(req); - /* treq->af_specific might be used to perform TCP_MD5 lookup */ - treq->af_specific = af_ops; - treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; treq->req_usec_ts = false; @@ -345,7 +340,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct rtable *rt; __u8 rcv_wscale; u32 tsoff = 0; - int l3index; if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -376,8 +370,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, - &tcp_request_sock_ipv4_ops, sk, skb); + req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb); if (!req) goto out_drop; @@ -406,9 +399,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->ir_iif = inet_request_bound_dev_if(sk, skb); - l3index = l3mdev_master_ifindex_by_index(net, ireq->ir_iif); - tcp_ao_syncookie(sk, skb, treq, AF_INET, l3index); - /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ @@ -417,6 +407,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) if (security_inet_conn_request(sk, skb, req)) goto out_free; + tcp_ao_syncookie(sk, skb, req, AF_INET); + req->num_retrans = 0; /* diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 7696417d0640..c4cd1e09eb6b 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -844,18 +844,30 @@ static struct tcp_ao_key *tcp_ao_inbound_lookup(unsigned short int family, } void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, - struct tcp_request_sock *treq, - unsigned short int family, int l3index) + struct request_sock *req, unsigned short int family) { + struct tcp_request_sock *treq = tcp_rsk(req); const struct tcphdr *th = tcp_hdr(skb); const struct tcp_ao_hdr *aoh; struct tcp_ao_key *key; + int l3index; + + /* treq->af_specific is used to perform TCP_AO lookup + * in tcp_create_openreq_child(). + */ +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6) + treq->af_specific = &tcp_request_sock_ipv6_ops; + else +#endif + treq->af_specific = &tcp_request_sock_ipv4_ops; treq->maclen = 0; if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh) return; + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), inet_rsk(req)->ir_iif); key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index); if (!key) /* Key not found, continue without TCP-AO */ diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 18c2e3c1677b..12b1809245f9 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -142,7 +142,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int full_space, mss; __u8 rcv_wscale; u32 tsoff = 0; - int l3index; if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -173,8 +172,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, - &tcp_request_sock_ipv6_ops, sk, skb); + req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb); if (!req) goto out_drop; @@ -218,8 +216,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->ts_off = tsoff; treq->txhash = net_tx_rndhash(); - l3index = l3mdev_master_ifindex_by_index(net, ireq->ir_iif); - tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index); + tcp_ao_syncookie(sk, skb, req, AF_INET6); if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; From patchwork Mon Nov 20 22:23:37 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 13462193 X-Patchwork-Delegate: bpf@iogearbox.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=amazon.com header.i=@amazon.com header.b="iN55E0Gk" Received: from smtp-fw-9102.amazon.com (smtp-fw-9102.amazon.com [207.171.184.29]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 675F2C7; Mon, 20 Nov 2023 14:27:10 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1700519230; x=1732055230; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=ogdre1bYMCb9SWGOxo4KI11NZiojqrX7LIjIUbvo+0A=; b=iN55E0Gk7YjFTbiS9wPIpprHjET+sigtRu4Wlt7LpwiS7uMWk9FvStq4 aCY1P/p6f7lYdA0SUo9mxkNQ2MF/txYjCTPgTa3UIQJOdj7e///hly+RI pXYsGhhQfS1uKhTiOpSbEmgDlEJ5fBFiE65A6mPtJzGgJJk6JvoZ7wCOK Y=; X-IronPort-AV: E=Sophos;i="6.04,214,1695686400"; d="scan'208";a="377651486" Received: from pdx4-co-svc-p1-lb2-vlan3.amazon.com (HELO email-inbound-relay-pdx-2c-m6i4x-b1c0e1d0.us-west-2.amazon.com) ([10.25.36.214]) by smtp-border-fw-9102.sea19.amazon.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Nov 2023 22:27:02 +0000 Received: from smtpout.prod.us-west-2.prod.farcaster.email.amazon.dev (pdx2-ws-svc-p26-lb5-vlan3.pdx.amazon.com [10.39.38.70]) by email-inbound-relay-pdx-2c-m6i4x-b1c0e1d0.us-west-2.amazon.com (Postfix) with ESMTPS id 6A22C8A35F; Mon, 20 Nov 2023 22:27:01 +0000 (UTC) Received: from EX19MTAUWA001.ant.amazon.com [10.0.7.35:40606] by smtpin.naws.us-west-2.prod.farcaster.email.amazon.dev [10.0.12.242:2525] with esmtp (Farcaster) id 02cb6594-e05f-4124-9a9e-3500c23824c0; Mon, 20 Nov 2023 22:27:01 +0000 (UTC) X-Farcaster-Flow-ID: 02cb6594-e05f-4124-9a9e-3500c23824c0 Received: from EX19D004ANA001.ant.amazon.com (10.37.240.138) by EX19MTAUWA001.ant.amazon.com (10.250.64.217) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:27:00 +0000 Received: from 88665a182662.ant.amazon.com.com (10.187.171.26) by EX19D004ANA001.ant.amazon.com (10.37.240.138) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:26:56 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , David Ahern , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Mykola Lysenko CC: Kuniyuki Iwashima , Kuniyuki Iwashima , , Subject: [PATCH v2 bpf-next 07/11] tcp: Factorise cookie req initialisation. Date: Mon, 20 Nov 2023 14:23:37 -0800 Message-ID: <20231120222341.54776-8-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20231120222341.54776-1-kuniyu@amazon.com> References: <20231120222341.54776-1-kuniyu@amazon.com> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Originating-IP: [10.187.171.26] X-ClientProxiedBy: EX19D041UWA003.ant.amazon.com (10.13.139.105) To EX19D004ANA001.ant.amazon.com (10.37.240.138) Precedence: Bulk X-Patchwork-Delegate: bpf@iogearbox.net We will support arbitrary SYN Cookie with BPF, and then some reqsk fields are initialised in kfunc, and others are done in cookie_v[46]_check(). This patch factorises the common part as cookie_tcp_reqsk_init() and calls it in cookie_tcp_reqsk_alloc() to minimise the discrepancy between cookie_v[46]_check(). Signed-off-by: Kuniyuki Iwashima --- net/ipv4/syncookies.c | 69 ++++++++++++++++++++++++------------------- net/ipv6/syncookies.c | 14 --------- 2 files changed, 38 insertions(+), 45 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 1e3783c97e28..9bca1c026525 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -285,10 +285,44 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, } EXPORT_SYMBOL(cookie_ecn_ok); +static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, + struct request_sock *req) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct tcp_request_sock *treq = tcp_rsk(req); + const struct tcphdr *th = tcp_hdr(skb); + + req->num_retrans = 0; + + ireq->ir_num = ntohs(th->dest); + ireq->ir_rmt_port = th->source; + ireq->ir_iif = inet_request_bound_dev_if(sk, skb); + ireq->ir_mark = inet_request_mark(sk, skb); + + if (IS_ENABLED(CONFIG_SMC)) + ireq->smc_ok = 0; + + treq->snt_synack = 0; + treq->tfo_listener = false; + treq->txhash = net_tx_rndhash(); + treq->rcv_isn = ntohl(th->seq) - 1; + treq->snt_isn = ntohl(th->ack_seq) - 1; + treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; + treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; + treq->req_usec_ts = false; + +#if IS_ENABLED(CONFIG_MPTCP) + treq->is_mptcp = sk_is_mptcp(sk); + if (treq->is_mptcp) + return mptcp_subflow_init_cookie_req(req, sk, skb); +#endif + + return 0; +} + struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk, struct sk_buff *skb) { - struct tcp_request_sock *treq; struct request_sock *req; if (sk_is_mptcp(sk)) @@ -299,22 +333,10 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, if (!req) return NULL; - treq = tcp_rsk(req); - - treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; - treq->req_usec_ts = false; - -#if IS_ENABLED(CONFIG_MPTCP) - treq->is_mptcp = sk_is_mptcp(sk); - if (treq->is_mptcp) { - int err = mptcp_subflow_init_cookie_req(req, sk, skb); - - if (err) { - reqsk_free(req); - return NULL; - } + if (cookie_tcp_reqsk_init(sk, skb, req)) { + reqsk_free(req); + return NULL; } -#endif return req; } @@ -376,28 +398,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); - treq->rcv_isn = ntohl(th->seq) - 1; - treq->snt_isn = ntohl(th->ack_seq) - 1; treq->ts_off = tsoff; - treq->txhash = net_tx_rndhash(); req->mss = mss; - ireq->ir_num = ntohs(th->dest); - ireq->ir_rmt_port = th->source; sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->ir_mark = inet_request_mark(sk, skb); ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack = 0; - treq->tfo_listener = false; - - if (IS_ENABLED(CONFIG_SMC)) - ireq->smc_ok = 0; - - ireq->ir_iif = inet_request_bound_dev_if(sk, skb); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -409,8 +418,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) tcp_ao_syncookie(sk, skb, req, AF_INET); - req->num_retrans = 0; - /* * We need to lookup the route here to get at the correct * window size. We should better make sure that the window size diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 12b1809245f9..e0a9220d1536 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -178,11 +178,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); - treq->tfo_listener = false; req->mss = mss; - ireq->ir_rmt_port = th->source; - ireq->ir_num = ntohs(th->dest); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; @@ -196,31 +193,20 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->pktopts = skb; } - ireq->ir_iif = inet_request_bound_dev_if(sk, skb); /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); - ireq->ir_mark = inet_request_mark(sk, skb); - - req->num_retrans = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack = 0; - treq->rcv_isn = ntohl(th->seq) - 1; - treq->snt_isn = ntohl(th->ack_seq) - 1; treq->ts_off = tsoff; - treq->txhash = net_tx_rndhash(); tcp_ao_syncookie(sk, skb, req, AF_INET6); - if (IS_ENABLED(CONFIG_SMC)) - ireq->smc_ok = 0; - /* * We need to lookup the dst_entry to get the correct window size. * This is taken from tcp_v6_syn_recv_sock. Somebody please enlighten From patchwork Mon Nov 20 22:23:38 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 13462194 X-Patchwork-Delegate: bpf@iogearbox.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=amazon.com header.i=@amazon.com header.b="PYhLbKf0" Received: from smtp-fw-80009.amazon.com (smtp-fw-80009.amazon.com [99.78.197.220]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id EE85E92; Mon, 20 Nov 2023 14:27:28 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1700519249; x=1732055249; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=/IDCn4vk4hlNpIMmgf+NBEzfSAq/I2fBO/TCBRwaARI=; b=PYhLbKf0+h/tYI8x24Ign6WNJEJ/WC3qf+v5k8vIrt570ABxDx2OJqHd D2zUhrmAA7ik3FihtbUM8Th+y1tIpvQbYifpYchoR8fImHRPBaHzp/7P7 F2TwoxdOZfVfTMPInGh/w8gTPv1xQscCAjEe8qZC2LeGwXNIymiQAjk+2 U=; X-IronPort-AV: E=Sophos;i="6.04,214,1695686400"; d="scan'208";a="44990747" Received: from pdx4-co-svc-p1-lb2-vlan2.amazon.com (HELO email-inbound-relay-pdx-2b-m6i4x-cadc3fbd.us-west-2.amazon.com) ([10.25.36.210]) by smtp-border-fw-80009.pdx80.corp.amazon.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Nov 2023 22:27:28 +0000 Received: from smtpout.prod.us-west-2.prod.farcaster.email.amazon.dev (pdx2-ws-svc-p26-lb5-vlan2.pdx.amazon.com [10.39.38.66]) by email-inbound-relay-pdx-2b-m6i4x-cadc3fbd.us-west-2.amazon.com (Postfix) with ESMTPS id 890C9A0975; Mon, 20 Nov 2023 22:27:27 +0000 (UTC) Received: from EX19MTAUWC001.ant.amazon.com [10.0.38.20:45441] by smtpin.naws.us-west-2.prod.farcaster.email.amazon.dev [10.0.12.176:2525] with esmtp (Farcaster) id 7e8454d7-50bb-4e59-bcbd-0697f8e0f015; Mon, 20 Nov 2023 22:27:27 +0000 (UTC) X-Farcaster-Flow-ID: 7e8454d7-50bb-4e59-bcbd-0697f8e0f015 Received: from EX19D004ANA001.ant.amazon.com (10.37.240.138) by EX19MTAUWC001.ant.amazon.com (10.250.64.174) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:27:26 +0000 Received: from 88665a182662.ant.amazon.com.com (10.187.171.26) by EX19D004ANA001.ant.amazon.com (10.37.240.138) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:27:22 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , David Ahern , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Mykola Lysenko CC: Kuniyuki Iwashima , Kuniyuki Iwashima , , Subject: [PATCH v2 bpf-next 08/11] tcp: Factorise non-BPF SYN Cookie handling. Date: Mon, 20 Nov 2023 14:23:38 -0800 Message-ID: <20231120222341.54776-9-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20231120222341.54776-1-kuniyu@amazon.com> References: <20231120222341.54776-1-kuniyu@amazon.com> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Originating-IP: [10.187.171.26] X-ClientProxiedBy: EX19D044UWB001.ant.amazon.com (10.13.139.171) To EX19D004ANA001.ant.amazon.com (10.37.240.138) Precedence: Bulk X-Patchwork-Delegate: bpf@iogearbox.net We will support arbitrary SYN Cookie with BPF, and then kfunc at TC will preallocate reqsk and initialise some fields that should not be overwritten later by cookie_v[46]_check(). To simplify the flow in cookie_v[46]_check(), we move such fields' initialisation to cookie_tcp_reqsk_alloc() and factorise non-BPF SYN Cookie handling into cookie_tcp_check(), where we validate the cookie and allocate reqsk, as done by kfunc later. Note that we set ireq->ecn_ok in two steps, the latter of which will be shared by the BPF case. As cookie_ecn_ok() is one-liner, now it's inlined. Signed-off-by: Kuniyuki Iwashima --- include/net/tcp.h | 13 ++++-- net/ipv4/syncookies.c | 106 +++++++++++++++++++++++------------------- net/ipv6/syncookies.c | 61 ++++++++++++------------ 3 files changed, 99 insertions(+), 81 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index d4d0e9763175..973555cb1d3f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -494,7 +494,10 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, - struct sock *sk, struct sk_buff *skb); + struct sock *sk, struct sk_buff *skb, + struct tcp_options_received *tcp_opt, + int mss, u32 tsoff); + #ifdef CONFIG_SYN_COOKIES /* Syncookies use a monotonic timer which increments every 60 seconds. @@ -580,8 +583,12 @@ __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); u64 cookie_init_timestamp(struct request_sock *req, u64 now); bool cookie_timestamp_decode(const struct net *net, struct tcp_options_received *opt); -bool cookie_ecn_ok(const struct tcp_options_received *opt, - const struct net *net, const struct dst_entry *dst); + +static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry *dst) +{ + return READ_ONCE(net->ipv4.sysctl_tcp_ecn) || + dst_feature(dst, RTAX_FEATURE_ECN); +} /* From net/ipv6/syncookies.c */ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9bca1c026525..beea4d05fafc 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -270,21 +270,6 @@ bool cookie_timestamp_decode(const struct net *net, } EXPORT_SYMBOL(cookie_timestamp_decode); -bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, - const struct net *net, const struct dst_entry *dst) -{ - bool ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN; - - if (!ecn_ok) - return false; - - if (READ_ONCE(net->ipv4.sysctl_tcp_ecn)) - return true; - - return dst_feature(dst, RTAX_FEATURE_ECN); -} -EXPORT_SYMBOL(cookie_ecn_ok); - static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { @@ -321,8 +306,12 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, } struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, - struct sock *sk, struct sk_buff *skb) + struct sock *sk, struct sk_buff *skb, + struct tcp_options_received *tcp_opt, + int mss, u32 tsoff) { + struct inet_request_sock *ireq; + struct tcp_request_sock *treq; struct request_sock *req; if (sk_is_mptcp(sk)) @@ -338,40 +327,36 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, return NULL; } + ireq = inet_rsk(req); + treq = tcp_rsk(req); + + req->mss = mss; + req->ts_recent = tcp_opt->saw_tstamp ? tcp_opt->rcv_tsval : 0; + + ireq->snd_wscale = tcp_opt->snd_wscale; + ireq->tstamp_ok = tcp_opt->saw_tstamp; + ireq->sack_ok = tcp_opt->sack_ok; + ireq->wscale_ok = tcp_opt->wscale_ok; + ireq->ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN; + + treq->ts_off = tsoff; + return req; } EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc); -/* On input, sk is a listener. - * Output is listener if incoming packet would not create a child - * NULL if memory could not be allocated. - */ -struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, + struct sk_buff *skb) { - struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; - const struct tcphdr *th = tcp_hdr(skb); struct tcp_options_received tcp_opt; - struct tcp_sock *tp = tcp_sk(sk); - struct inet_request_sock *ireq; - struct net *net = sock_net(sk); - struct tcp_request_sock *treq; - struct request_sock *req; - struct sock *ret = sk; - int full_space, mss; - struct flowi4 fl4; - struct rtable *rt; - __u8 rcv_wscale; u32 tsoff = 0; - - if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || - !th->ack || th->rst) - goto out; + int mss; if (tcp_synq_no_recent_overflow(sk)) goto out; - mss = __cookie_v4_check(ip_hdr(skb), th); - if (mss == 0) { + mss = __cookie_v4_check(ip_hdr(skb), tcp_hdr(skb)); + if (!mss) { __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } @@ -392,21 +377,44 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb); + return cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb, + &tcp_opt, mss, tsoff); +out: + return ERR_PTR(-EINVAL); +} + +/* On input, sk is a listener. + * Output is listener if incoming packet would not create a child + * NULL if memory could not be allocated. + */ +struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +{ + struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; + const struct tcphdr *th = tcp_hdr(skb); + struct tcp_sock *tp = tcp_sk(sk); + struct inet_request_sock *ireq; + struct net *net = sock_net(sk); + struct request_sock *req; + struct sock *ret = sk; + struct flowi4 fl4; + struct rtable *rt; + __u8 rcv_wscale; + int full_space; + + if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) + goto out; + + req = cookie_tcp_check(net, sk, skb); + if (IS_ERR(req)) + goto out; if (!req) goto out_drop; ireq = inet_rsk(req); - treq = tcp_rsk(req); - treq->ts_off = tsoff; - req->mss = mss; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -448,7 +456,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) dst_metric(&rt->dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, &rt->dst); + ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst); ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); /* ip_queue_xmit() depends on our flow being setup diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index e0a9220d1536..c8d2ca27220c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -127,31 +127,18 @@ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th) } EXPORT_SYMBOL_GPL(__cookie_v6_check); -struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) +static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, + struct sk_buff *skb) { - const struct tcphdr *th = tcp_hdr(skb); - struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_options_received tcp_opt; - struct tcp_sock *tp = tcp_sk(sk); - struct inet_request_sock *ireq; - struct net *net = sock_net(sk); - struct tcp_request_sock *treq; - struct request_sock *req; - struct dst_entry *dst; - struct sock *ret = sk; - int full_space, mss; - __u8 rcv_wscale; u32 tsoff = 0; - - if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || - !th->ack || th->rst) - goto out; + int mss; if (tcp_synq_no_recent_overflow(sk)) goto out; - mss = __cookie_v6_check(ipv6_hdr(skb), th); - if (mss == 0) { + mss = __cookie_v6_check(ipv6_hdr(skb), tcp_hdr(skb)); + if (!mss) { __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } @@ -172,14 +159,37 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb); + return cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb, + &tcp_opt, mss, tsoff); +out: + return ERR_PTR(-EINVAL); +} + +struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) +{ + const struct tcphdr *th = tcp_hdr(skb); + struct ipv6_pinfo *np = inet6_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct inet_request_sock *ireq; + struct net *net = sock_net(sk); + struct request_sock *req; + struct dst_entry *dst; + struct sock *ret = sk; + __u8 rcv_wscale; + int full_space; + + if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) + goto out; + + req = cookie_tcp_check(net, sk, skb); + if (IS_ERR(req)) + goto out; if (!req) goto out_drop; ireq = inet_rsk(req); - treq = tcp_rsk(req); - req->mss = mss; ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; @@ -198,13 +208,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->ts_off = tsoff; - tcp_ao_syncookie(sk, skb, req, AF_INET6); /* @@ -245,7 +248,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, dst); + ireq->ecn_ok &= cookie_ecn_ok(net, dst); ret = tcp_get_cookie_sock(sk, skb, req, dst); out: From patchwork Mon Nov 20 22:23:39 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 13462196 X-Patchwork-Delegate: bpf@iogearbox.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=amazon.com header.i=@amazon.com header.b="RCIairo9" Received: from smtp-fw-80008.amazon.com (smtp-fw-80008.amazon.com [99.78.197.219]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 88ACD97; Mon, 20 Nov 2023 14:28:01 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1700519281; x=1732055281; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=+JRCveewSWjc65hVKXp90gQjdRBf1pSvCvgd9Qx0MPw=; b=RCIairo9KEeVn5JxJ+o8yY42n4mA7SxnTnh1pmqBSKU3ouDgDnoDqHaD awcHQx6A8va5I44GsP78LKWi/jsD9p9839wKonTNPbgXCkZTVmDYDGxk8 vFqrqVtRt/bIX3g6C0yiEBDhnj3poZ8mMZy0eVobkV78sDukzqTZH9DzN k=; X-IronPort-AV: E=Sophos;i="6.04,214,1695686400"; d="scan'208";a="45067761" Received: from pdx4-co-svc-p1-lb2-vlan3.amazon.com (HELO email-inbound-relay-iad-1d-m6i4x-d23e07e8.us-east-1.amazon.com) ([10.25.36.214]) by smtp-border-fw-80008.pdx80.corp.amazon.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Nov 2023 22:27:58 +0000 Received: from smtpout.prod.us-west-2.prod.farcaster.email.amazon.dev (iad7-ws-svc-p70-lb3-vlan3.iad.amazon.com [10.32.235.38]) by email-inbound-relay-iad-1d-m6i4x-d23e07e8.us-east-1.amazon.com (Postfix) with ESMTPS id B002A80E60; Mon, 20 Nov 2023 22:27:52 +0000 (UTC) Received: from EX19MTAUWB002.ant.amazon.com [10.0.7.35:8002] by smtpin.naws.us-west-2.prod.farcaster.email.amazon.dev [10.0.56.167:2525] with esmtp (Farcaster) id d572354e-59d5-4a3d-9e5c-984c7801c1f3; Mon, 20 Nov 2023 22:27:52 +0000 (UTC) X-Farcaster-Flow-ID: d572354e-59d5-4a3d-9e5c-984c7801c1f3 Received: from EX19D004ANA001.ant.amazon.com (10.37.240.138) by EX19MTAUWB002.ant.amazon.com (10.250.64.231) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:27:52 +0000 Received: from 88665a182662.ant.amazon.com.com (10.187.171.26) by EX19D004ANA001.ant.amazon.com (10.37.240.138) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:27:48 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , David Ahern , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Mykola Lysenko CC: Kuniyuki Iwashima , Kuniyuki Iwashima , , Subject: [PATCH v2 bpf-next 09/11] bpf: tcp: Handle BPF SYN Cookie in cookie_v[46]_check(). Date: Mon, 20 Nov 2023 14:23:39 -0800 Message-ID: <20231120222341.54776-10-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20231120222341.54776-1-kuniyu@amazon.com> References: <20231120222341.54776-1-kuniyu@amazon.com> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Originating-IP: [10.187.171.26] X-ClientProxiedBy: EX19D039UWA002.ant.amazon.com (10.13.139.32) To EX19D004ANA001.ant.amazon.com (10.37.240.138) Precedence: Bulk X-Patchwork-Delegate: bpf@iogearbox.net We will support arbitrary SYN Cookie with BPF in the following patch. If BPF prog validates ACK and kfunc allocates reqsk, it will be carried to cookie_[46]_check() as skb->sk. Then, we call cookie_bpf_check() to validate the configuration passed to kfunc. First, we clear skb->sk, skb->destructor, and req->rsk_listener, which are needed not to hold refcnt for reqsk and the listener. See the following patch for details. Then, we parse TCP options to check if tstamp_ok is discrepant. If it is invalid, we increment LINUX_MIB_SYNCOOKIESFAILED and send RST. If tstamp_ok is valid, we increment LINUX_MIB_SYNCOOKIESRECV. After that, we check sack_ok and wscale_ok with corresponding sysctl knobs. If the test fails, we send RST but do not increment LINUX_MIB_SYNCOOKIESFAILED. This behaviour is the same with the non-BPF cookie handling in cookie_tcp_check(). Finally, we finish initialisation for the remaining fields with cookie_tcp_reqsk_init(). Signed-off-by: Kuniyuki Iwashima --- include/net/tcp.h | 21 +++++++++++++++ net/ipv4/syncookies.c | 59 ++++++++++++++++++++++++++++++++++++++++++- net/ipv6/syncookies.c | 6 ++++- 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 973555cb1d3f..842791997f30 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -590,6 +590,27 @@ static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry * dst_feature(dst, RTAX_FEATURE_ECN); } +#if IS_ENABLED(CONFIG_BPF) +static inline bool cookie_bpf_ok(struct sk_buff *skb) +{ + return skb->sk; +} + +struct request_sock *cookie_bpf_check(struct net *net, struct sock *sk, + struct sk_buff *skb); +#else +static inline bool cookie_bpf_ok(struct sk_buff *skb) +{ + return false; +} + +static inline struct request_sock *cookie_bpf_check(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return NULL; +} +#endif + /* From net/ipv6/syncookies.c */ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index beea4d05fafc..b120eb4be8eb 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -305,6 +305,59 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, return 0; } +#if IS_ENABLED(CONFIG_BPF) +struct request_sock *cookie_bpf_check(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct request_sock *req = inet_reqsk(skb->sk); + struct inet_request_sock *ireq = inet_rsk(req); + struct tcp_request_sock *treq = tcp_rsk(req); + struct tcp_options_received tcp_opt; + int ret; + + skb->sk = NULL; + skb->destructor = NULL; + req->rsk_listener = NULL; + + memset(&tcp_opt, 0, sizeof(tcp_opt)); + tcp_parse_options(net, skb, &tcp_opt, 0, NULL); + + if (ireq->tstamp_ok ^ tcp_opt.saw_tstamp) { + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); + goto reset; + } + + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); + + if (ireq->tstamp_ok) { + if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps)) + goto reset; + + req->ts_recent = tcp_opt.rcv_tsval; + treq->ts_off = tcp_opt.rcv_tsecr - tcp_ns_to_ts(false, tcp_clock_ns()); + } + + if (ireq->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack)) + goto reset; + + if (ireq->wscale_ok && !READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) + goto reset; + + ret = cookie_tcp_reqsk_init(sk, skb, req); + if (ret) { + reqsk_free(req); + req = NULL; + } + + return req; + +reset: + reqsk_free(req); + return ERR_PTR(-EINVAL); +} +EXPORT_SYMBOL_GPL(cookie_bpf_check); +#endif + struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk, struct sk_buff *skb, struct tcp_options_received *tcp_opt, @@ -405,7 +458,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) !th->ack || th->rst) goto out; - req = cookie_tcp_check(net, sk, skb); + if (cookie_bpf_ok(skb)) + req = cookie_bpf_check(net, sk, skb); + else + req = cookie_tcp_check(net, sk, skb); + if (IS_ERR(req)) goto out; if (!req) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index c8d2ca27220c..45f113994c4b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -182,7 +182,11 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) !th->ack || th->rst) goto out; - req = cookie_tcp_check(net, sk, skb); + if (cookie_bpf_ok(skb)) + req = cookie_bpf_check(net, sk, skb); + else + req = cookie_tcp_check(net, sk, skb); + if (IS_ERR(req)) goto out; if (!req) From patchwork Mon Nov 20 22:23:40 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 13462197 X-Patchwork-Delegate: bpf@iogearbox.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=amazon.com header.i=@amazon.com header.b="MGQvU0+8" Received: from smtp-fw-80008.amazon.com (smtp-fw-80008.amazon.com [99.78.197.219]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 804FDAA; Mon, 20 Nov 2023 14:28:21 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1700519301; x=1732055301; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=Osg4PHAQtLbisrixvy+MHg4/u6PoowyVDp20WB03Bsc=; b=MGQvU0+80LXVdbWRM295RwFOyy8cNWmln2xof8aXATpcyYGmIJqD6BOA quR267wzkKRYgFamuNIpF1Mw3WJS6n8+OBhKxvtOBvwhq4pSl2FfjM8sj gXPR02bfl0Pjh01ghS8ArZo0HDNDmP43TkcVMLZlDt4jqbCJAxNoQM4dc g=; X-IronPort-AV: E=Sophos;i="6.04,214,1695686400"; d="scan'208";a="45067859" Received: from pdx4-co-svc-p1-lb2-vlan3.amazon.com (HELO email-inbound-relay-iad-1e-m6i4x-245b69b1.us-east-1.amazon.com) ([10.25.36.214]) by smtp-border-fw-80008.pdx80.corp.amazon.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Nov 2023 22:28:19 +0000 Received: from smtpout.prod.us-west-2.prod.farcaster.email.amazon.dev (iad7-ws-svc-p70-lb3-vlan3.iad.amazon.com [10.32.235.38]) by email-inbound-relay-iad-1e-m6i4x-245b69b1.us-east-1.amazon.com (Postfix) with ESMTPS id BDDA334005D; Mon, 20 Nov 2023 22:28:18 +0000 (UTC) Received: from EX19MTAUWA002.ant.amazon.com [10.0.7.35:20196] by smtpin.naws.us-west-2.prod.farcaster.email.amazon.dev [10.0.31.8:2525] with esmtp (Farcaster) id dbcdfb03-48c9-4df6-b3d0-6c0a0c58df03; Mon, 20 Nov 2023 22:28:18 +0000 (UTC) X-Farcaster-Flow-ID: dbcdfb03-48c9-4df6-b3d0-6c0a0c58df03 Received: from EX19D004ANA001.ant.amazon.com (10.37.240.138) by EX19MTAUWA002.ant.amazon.com (10.250.64.202) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:28:17 +0000 Received: from 88665a182662.ant.amazon.com.com (10.187.171.26) by EX19D004ANA001.ant.amazon.com (10.37.240.138) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA) id 15.2.1118.39; Mon, 20 Nov 2023 22:28:13 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , David Ahern , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Mykola Lysenko CC: Kuniyuki Iwashima , Kuniyuki Iwashima , , Subject: [PATCH v2 bpf-next 10/11] bpf: tcp: Support arbitrary SYN Cookie. Date: Mon, 20 Nov 2023 14:23:40 -0800 Message-ID: <20231120222341.54776-11-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20231120222341.54776-1-kuniyu@amazon.com> References: <20231120222341.54776-1-kuniyu@amazon.com> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Originating-IP: [10.187.171.26] X-ClientProxiedBy: EX19D037UWC002.ant.amazon.com (10.13.139.250) To EX19D004ANA001.ant.amazon.com (10.37.240.138) X-Patchwork-Delegate: bpf@iogearbox.net This patch adds a new kfunc available at TC hook to support arbitrary SYN Cookie. The basic usage is as follows: struct tcp_cookie_attributes attr = { .tcp_opt = { .mss_clamp = mss, .wscale_ok = wscale_ok, .snd_scale = send_scale, /* < 15 */ .tstamp_ok = tstamp_ok, .sack_ok = sack_ok, }, .ecn_ok = ecn_ok, .usec_ts_ok = usec_ts_ok, }; skc = bpf_skc_lookup_tcp(...); sk = (struct sock *)bpf_skc_to_tcp_sock(skc); bpf_sk_assign_tcp_reqsk(skb, sk, attr, sizeof(attr)); bpf_sk_release(skc); bpf_sk_assign_tcp_reqsk() takes skb, a listener sk, and struct tcp_cookie_attributes and allocates reqsk and configures it. Then, bpf_sk_assign_tcp_reqsk() links reqsk with skb and the listener. The notable thing here is that we do not hold refcnt for both reqsk and listener. To differentiate that, we mark reqsk->syncookie, which is only used in TX for now. So, if reqsk->syncookie is 1 in RX, it means that the reqsk is allocated by kfunc. When skb is freed, sock_pfree() checks if reqsk->syncookie is 1, and in that case, we set NULL to reqsk->rsk_listener before calling reqsk_free() as reqsk does not hold a refcnt of the listener. When the TCP stack looks up a socket from the skb, we return inet_reqsk(skb->sk)->rsk_listener in inet6?_steal_sock(). However, we do not clear skb->sk and skb->destructor so that we can carry the reqsk to cookie_v[46]_check(). The refcnt of reqsk will finally be set to 1 in tcp_get_cookie_sock() after creating a full sk. Note that we can extend struct tcp_cookie_attributes in the future when we add a new attribute that is determined in 3WHS. Signed-off-by: Kuniyuki Iwashima --- include/net/inet6_hashtables.h | 14 ++++- include/net/inet_hashtables.h | 14 ++++- include/net/tcp.h | 6 +++ net/core/filter.c | 96 +++++++++++++++++++++++++++++++++- net/core/sock.c | 14 ++++- 5 files changed, 138 insertions(+), 6 deletions(-) diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 533a7337865a..b6c0ed5a1e3c 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -116,9 +116,21 @@ struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff, if (!sk) return NULL; - if (!prefetched || !sk_fullsock(sk)) + if (!prefetched) return sk; + if (sk->sk_state == TCP_NEW_SYN_RECV) { + if (inet_reqsk(sk)->syncookie) { + *refcounted = false; + skb->sk = sk; + skb->destructor = sock_pfree; + return inet_reqsk(sk)->rsk_listener; + } + return sk; + } else if (sk->sk_state == TCP_TIME_WAIT) { + return sk; + } + if (sk->sk_protocol == IPPROTO_TCP) { if (sk->sk_state != TCP_LISTEN) return sk; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3ecfeadbfa06..0f4091112967 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -462,9 +462,21 @@ struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff, if (!sk) return NULL; - if (!prefetched || !sk_fullsock(sk)) + if (!prefetched) return sk; + if (sk->sk_state == TCP_NEW_SYN_RECV) { + if (inet_reqsk(sk)->syncookie) { + *refcounted = false; + skb->sk = sk; + skb->destructor = sock_pfree; + return inet_reqsk(sk)->rsk_listener; + } + return sk; + } else if (sk->sk_state == TCP_TIME_WAIT) { + return sk; + } + if (sk->sk_protocol == IPPROTO_TCP) { if (sk->sk_state != TCP_LISTEN) return sk; diff --git a/include/net/tcp.h b/include/net/tcp.h index 842791997f30..373afcfaefa6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -591,6 +591,12 @@ static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry * } #if IS_ENABLED(CONFIG_BPF) +struct tcp_cookie_attributes { + struct tcp_options_received tcp_opt; + bool ecn_ok; + bool usec_ts_ok; +} __packed; + static inline bool cookie_bpf_ok(struct sk_buff *skb) { return skb->sk; diff --git a/net/core/filter.c b/net/core/filter.c index d64baa7ac6cd..58b567aaf577 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11807,6 +11807,88 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, return 0; } + +__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk, + struct tcp_cookie_attributes *attr, + int attr__sz) +{ + const struct request_sock_ops *ops; + struct inet_request_sock *ireq; + struct tcp_request_sock *treq; + struct request_sock *req; + __u16 min_mss; + + if (attr__sz != sizeof(*attr)) + return -EINVAL; + + if (!sk) + return -EINVAL; + + if (!skb_at_tc_ingress(skb)) + return -EINVAL; + + if (dev_net(skb->dev) != sock_net(sk)) + return -ENETUNREACH; + + switch (skb->protocol) { + case htons(ETH_P_IP): + ops = &tcp_request_sock_ops; + min_mss = 536; + break; +#if IS_BUILTIN(CONFIG_IPV6) + case htons(ETH_P_IPV6): + ops = &tcp6_request_sock_ops; + min_mss = IPV6_MIN_MTU - 60; + break; +#endif + default: + return -EINVAL; + } + + if (sk->sk_type != SOCK_STREAM || sk->sk_state != TCP_LISTEN) + return -EINVAL; + + if (attr->tcp_opt.mss_clamp < min_mss) { + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + return -EINVAL; + } + + if (attr->tcp_opt.wscale_ok && + attr->tcp_opt.snd_wscale > TCP_MAX_WSCALE) { + __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + return -EINVAL; + } + + if (sk_is_mptcp(sk)) + req = mptcp_subflow_reqsk_alloc(ops, sk, false); + else + req = inet_reqsk_alloc(ops, sk, false); + + if (!req) + return -ENOMEM; + + ireq = inet_rsk(req); + treq = tcp_rsk(req); + + req->syncookie = 1; + req->rsk_listener = sk; + req->mss = attr->tcp_opt.mss_clamp; + + ireq->snd_wscale = attr->tcp_opt.snd_wscale; + ireq->wscale_ok = attr->tcp_opt.wscale_ok; + ireq->tstamp_ok = attr->tcp_opt.tstamp_ok; + ireq->sack_ok = attr->tcp_opt.sack_ok; + ireq->ecn_ok = attr->ecn_ok; + + treq->req_usec_ts = attr->usec_ts_ok; + + skb_orphan(skb); + skb->sk = req_to_sk(req); + skb->destructor = sock_pfree; + + return 0; +} + __bpf_kfunc_end_defs(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, @@ -11835,6 +11917,10 @@ BTF_SET8_START(bpf_kfunc_check_set_sock_addr) BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) BTF_SET8_END(bpf_kfunc_check_set_sock_addr) +BTF_SET8_START(bpf_kfunc_check_set_tcp_reqsk) +BTF_ID_FLAGS(func, bpf_sk_assign_tcp_reqsk) +BTF_SET8_END(bpf_kfunc_check_set_tcp_reqsk) + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -11850,6 +11936,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = { .set = &bpf_kfunc_check_set_sock_addr, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_tcp_reqsk = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_tcp_reqsk, +}; + static int __init bpf_kfunc_init(void) { int ret; @@ -11865,8 +11956,9 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, - &bpf_kfunc_set_sock_addr); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + &bpf_kfunc_set_sock_addr); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk); } late_initcall(bpf_kfunc_init); diff --git a/net/core/sock.c b/net/core/sock.c index fef349dd72fa..998950e97dfe 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2579,8 +2579,18 @@ EXPORT_SYMBOL(sock_efree); #ifdef CONFIG_INET void sock_pfree(struct sk_buff *skb) { - if (sk_is_refcounted(skb->sk)) - sock_gen_put(skb->sk); + struct sock *sk = skb->sk; + + if (!sk_is_refcounted(sk)) + return; + + if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) { + inet_reqsk(sk)->rsk_listener = NULL; + reqsk_free(inet_reqsk(sk)); + return; + } + + sock_gen_put(sk); } EXPORT_SYMBOL(sock_pfree); #endif /* CONFIG_INET */ From patchwork Mon Nov 20 22:23:41 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kuniyuki Iwashima X-Patchwork-Id: 13462198 X-Patchwork-Delegate: bpf@iogearbox.net Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=amazon.com header.i=@amazon.com header.b="SH3T7zWK" Received: from smtp-fw-9106.amazon.com (smtp-fw-9106.amazon.com [207.171.188.206]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0C0371A2; Mon, 20 Nov 2023 14:28:59 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=amazon.com; i=@amazon.com; q=dns/txt; s=amazon201209; t=1700519339; x=1732055339; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=4K/nLXfTB54gSJ9yhi4uKBeoGrvgL+Lu2x7i6ycVpPU=; b=SH3T7zWK636pM4qemEo7J8zKsKhyTffWT+m8jpQFlA/XTey1OndHbqSn DgXgQ5zRGSSz4cZvfsx3rBzmR1/sB6pbbaW0m+mNCRg198wK1hjYTNrcM iZrzgjgETp1+fpouVOTLvpUIVVtVs6wQVHuNER31hbYY0Mrwa6wXHk/nY g=; X-IronPort-AV: E=Sophos;i="6.04,214,1695686400"; d="scan'208";a="685327260" Received: from pdx4-co-svc-p1-lb2-vlan2.amazon.com (HELO email-inbound-relay-iad-1d-m6i4x-e651a362.us-east-1.amazon.com) ([10.25.36.210]) by smtp-border-fw-9106.sea19.amazon.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Nov 2023 22:28:57 +0000 Received: from smtpout.prod.us-west-2.prod.farcaster.email.amazon.dev (iad7-ws-svc-p70-lb3-vlan3.iad.amazon.com [10.32.235.38]) by email-inbound-relay-iad-1d-m6i4x-e651a362.us-east-1.amazon.com (Postfix) with ESMTPS id 9B41B80E44; Mon, 20 Nov 2023 22:28:51 +0000 (UTC) Received: from EX19MTAUWB001.ant.amazon.com [10.0.7.35:2567] by smtpin.naws.us-west-2.prod.farcaster.email.amazon.dev [10.0.48.37:2525] with esmtp (Farcaster) id 5889c9b6-8f9c-4bb3-9b83-8cd2316969e4; Mon, 20 Nov 2023 22:28:50 +0000 (UTC) X-Farcaster-Flow-ID: 5889c9b6-8f9c-4bb3-9b83-8cd2316969e4 Received: from EX19D004ANA001.ant.amazon.com (10.37.240.138) by EX19MTAUWB001.ant.amazon.com (10.250.64.248) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.2.1118.39; Mon, 20 Nov 2023 22:28:43 +0000 Received: from 88665a182662.ant.amazon.com.com (10.187.171.26) by EX19D004ANA001.ant.amazon.com (10.37.240.138) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA) id 15.2.1118.39; Mon, 20 Nov 2023 22:28:39 +0000 From: Kuniyuki Iwashima To: "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni , David Ahern , Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , John Fastabend , KP Singh , Stanislav Fomichev , Hao Luo , Jiri Olsa , Mykola Lysenko CC: Kuniyuki Iwashima , Kuniyuki Iwashima , , Subject: [PATCH v2 bpf-next 11/11] selftest: bpf: Test bpf_sk_assign_tcp_reqsk(). Date: Mon, 20 Nov 2023 14:23:41 -0800 Message-ID: <20231120222341.54776-12-kuniyu@amazon.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20231120222341.54776-1-kuniyu@amazon.com> References: <20231120222341.54776-1-kuniyu@amazon.com> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Originating-IP: [10.187.171.26] X-ClientProxiedBy: EX19D045UWA004.ant.amazon.com (10.13.139.91) To EX19D004ANA001.ant.amazon.com (10.37.240.138) Precedence: Bulk X-Patchwork-Delegate: bpf@iogearbox.net This commit adds a sample selftest to demonstrate how we can use bpf_sk_assign_tcp_reqsk() as the backend of SYN Proxy. The test creates IPv4/IPv6 x TCP/MPTCP connections and transfer messages over them on lo with BPF tc prog attached. The tc prog will process SYN and returns SYN+ACK with the following ISN and TS. In a real use case, this part will be done by other hosts. MSB LSB ISN: | 31 ... 8 | 7 6 | 5 | 4 | 3 2 1 0 | | Hash_1 | MSS | ECN | SACK | WScale | TS: | 31 ... 8 | 7 ... 0 | | Random | Hash_2 | The client returns ACK, and tc prog will recalculate ISN and TS from ACK and validate SYN Cookie. If it's valid, the prog calls kfunc to allocate a reqsk to skb and configure the reqsk based on the argument created from SYN Cookie. Later, the reqsk will be processed in cookie_v[46]_check() to create a connection. Signed-off-by: Kuniyuki Iwashima --- tools/testing/selftests/bpf/bpf_kfuncs.h | 10 + .../bpf/prog_tests/tcp_custom_syncookie.c | 163 +++++ .../selftests/bpf/progs/test_siphash.h | 64 ++ .../bpf/progs/test_tcp_custom_syncookie.c | 570 ++++++++++++++++++ .../bpf/progs/test_tcp_custom_syncookie.h | 161 +++++ 5 files changed, 968 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c create mode 100644 tools/testing/selftests/bpf/progs/test_siphash.h create mode 100644 tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c create mode 100644 tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 5ca68ff0b59f..72a2521bb7ac 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -51,6 +51,16 @@ extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clo extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, const __u8 *sun_path, __u32 sun_path__sz) __ksym; +/* Description + * Allocate and configure a reqsk and link it with a listener and skb. + * Returns + * Error code + */ +struct sock; +struct tcp_cookie_attributes; +extern int bpf_sk_assign_tcp_reqsk(struct __sk_buff *skb, struct sock *sk, + struct tcp_cookie_attributes *attr, int attr__sz) __ksym; + void *bpf_cast_to_kern_ctx(void *) __ksym; void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym; diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c new file mode 100644 index 000000000000..fefb52d8222c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#define _GNU_SOURCE +#include +#include +#include + +#include "test_progs.h" +#include "cgroup_helpers.h" +#include "network_helpers.h" +#include "test_tcp_custom_syncookie.skel.h" + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + +static struct test_tcp_custom_syncookie_case { + int family, type, protocol; + char addr[16]; + char name[10]; +} test_cases[] = { + { + .name = "IPv4 TCP ", + .family = AF_INET, + .type = SOCK_STREAM, + .addr = "127.0.0.1", + }, + { + .name = "IPv6 TCP ", + .family = AF_INET6, + .type = SOCK_STREAM, + .addr = "::1", + }, + { + .name = "IPv4 MPTCP", + .family = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + .addr = "127.0.0.1", + }, + { + .name = "IPv6 MPTCP", + .family = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + .addr = "::1", + }, +}; + +static int setup_netns(void) +{ + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) + return -1; + + if (!ASSERT_OK(system("ip link set dev lo up"), "ip")) + goto err; + + if (!ASSERT_OK(write_sysctl("/proc/sys/net/ipv4/tcp_ecn", "1"), + "write_sysctl")) + goto err; + + return 0; +err: + return -1; +} + +static int setup_tc(struct test_tcp_custom_syncookie *skel) +{ + LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_attach, + .prog_fd = bpf_program__fd(skel->progs.tcp_custom_syncookie)); + + qdisc_lo.ifindex = if_nametoindex("lo"); + if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact")) + goto err; + + if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach), + "filter add dev lo ingress")) + goto err; + + return 0; +err: + return -1; +} + +#define msg "Hello World" +#define msglen 11 + +static void transfer_message(int sender, int receiver) +{ + char buf[msglen]; + int ret; + + ret = send(sender, msg, msglen, 0); + if (!ASSERT_EQ(ret, msglen, "send")) + return; + + memset(buf, 0, sizeof(buf)); + + ret = recv(receiver, buf, msglen, 0); + if (!ASSERT_EQ(ret, msglen, "recv")) + return; + + ret = strncmp(buf, msg, msglen); + if (!ASSERT_EQ(ret, 0, "strncmp")) + return; +} + +static void create_connection(struct test_tcp_custom_syncookie_case *test_case) +{ + int server, client, child; + + if (test_case->protocol == IPPROTO_MPTCP) + server = start_mptcp_server(test_case->family, test_case->addr, 0, 0); + else + server = start_server(test_case->family, test_case->type, test_case->addr, 0, 0); + if (!ASSERT_NEQ(server, -1, "start_server")) + return; + + client = connect_to_fd(server, 0); + if (!ASSERT_NEQ(client, -1, "connect_to_fd")) + goto close_server; + + child = accept(server, NULL, 0); + if (!ASSERT_NEQ(child, -1, "accept")) + goto close_client; + + transfer_message(client, child); + transfer_message(child, client); + + close(child); +close_client: + close(client); +close_server: + close(server); +} + +void test_tcp_custom_syncookie(void) +{ + struct test_tcp_custom_syncookie *skel; + int i; + + if (setup_netns()) + return; + + skel = test_tcp_custom_syncookie__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + if (setup_tc(skel)) + goto destroy_skel; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + test__start_subtest(test_cases[i].name); + create_connection(&test_cases[i]); + } + +destroy_skel: + system("tc qdisc del dev lo clsact"); + + test_tcp_custom_syncookie__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_siphash.h b/tools/testing/selftests/bpf/progs/test_siphash.h new file mode 100644 index 000000000000..25334079f8be --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_siphash.h @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#ifndef _TEST_SIPHASH_H +#define _TEST_SIPHASH_H + +/* include/linux/bitops.h */ +static __always_inline __u64 rol64(__u64 word, unsigned int shift) +{ + return (word << (shift & 63)) | (word >> ((-shift) & 63)); +} + +/* include/linux/siphash.h */ +#define SIPHASH_PERMUTATION(a, b, c, d) ( \ + (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \ + (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \ + (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \ + (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32)) + +#define SIPHASH_CONST_0 0x736f6d6570736575ULL +#define SIPHASH_CONST_1 0x646f72616e646f6dULL +#define SIPHASH_CONST_2 0x6c7967656e657261ULL +#define SIPHASH_CONST_3 0x7465646279746573ULL + +/* lib/siphash.c */ +#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3) + +#define PREAMBLE(len) \ + u64 v0 = SIPHASH_CONST_0; \ + u64 v1 = SIPHASH_CONST_1; \ + u64 v2 = SIPHASH_CONST_2; \ + u64 v3 = SIPHASH_CONST_3; \ + u64 b = ((u64)(len)) << 56; \ + v3 ^= key->key[1]; \ + v2 ^= key->key[0]; \ + v1 ^= key->key[1]; \ + v0 ^= key->key[0]; + +#define POSTAMBLE \ + v3 ^= b; \ + SIPROUND; \ + SIPROUND; \ + v0 ^= b; \ + v2 ^= 0xff; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + return (v0 ^ v1) ^ (v2 ^ v3); + +static inline u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key) +{ + PREAMBLE(16) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + v3 ^= second; + SIPROUND; + SIPROUND; + v0 ^= second; + POSTAMBLE +} +#endif diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c new file mode 100644 index 000000000000..8b8eda301bf7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c @@ -0,0 +1,570 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include "vmlinux.h" + +#include +#include +#include "bpf_kfuncs.h" +#include "test_siphash.h" +#include "test_tcp_custom_syncookie.h" + +/* Hash is calculated for each client and split into ISN and TS. + * + * MSB LSB + * ISN: | 31 ... 8 | 7 6 | 5 | 4 | 3 2 1 0 | + * | Hash_1 | MSS | ECN | SACK | WScale | + * + * TS: | 31 ... 8 | 7 ... 0 | + * | Random | Hash_2 | + */ +#define COOKIE_BITS 8 +#define COOKIE_MASK (((__u32)1 << COOKIE_BITS) - 1) + +enum { + /* 0xf is invalid thus means that SYN did not have WScale. */ + BPF_SYNCOOKIE_WSCALE_MASK = (1 << 4) - 1, + BPF_SYNCOOKIE_SACK = (1 << 4), + BPF_SYNCOOKIE_ECN = (1 << 5), +}; + +#define MSS_LOCAL_IPV4 65495 +#define MSS_LOCAL_IPV6 65476 + +const __u16 msstab4[] = { + 536, + 1300, + 1460, + MSS_LOCAL_IPV4, +}; + +const __u16 msstab6[] = { + 1280 - 60, /* IPV6_MIN_MTU - 60 */ + 1480 - 60, + 9000 - 60, + MSS_LOCAL_IPV6, +}; + +static siphash_key_t test_key_siphash = { + { 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL } +}; + +struct tcp_syncookie { + struct __sk_buff *skb; + void *data_end; + struct ethhdr *eth; + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + struct tcphdr *tcp; + union { + char *ptr; + __be32 *ptr32; + }; + struct tcp_cookie_attributes attr; + u32 cookie; + u64 first; +}; + +static __always_inline int tcp_load_headers(struct tcp_syncookie *ctx) +{ + ctx->data_end = (void *)(long)ctx->skb->data_end; + ctx->eth = (struct ethhdr *)(long)ctx->skb->data; + + if (ctx->eth + 1 > ctx->data_end) + goto err; + + switch (bpf_ntohs(ctx->eth->h_proto)) { + case ETH_P_IP: + ctx->ipv4 = (struct iphdr *)(ctx->eth + 1); + + if (ctx->ipv4 + 1 > ctx->data_end) + goto err; + + if (ctx->ipv4->ihl != sizeof(*ctx->ipv4) / 4) + goto err; + + if (ctx->ipv4->version != 4) + goto err; + + if (ctx->ipv4->protocol != IPPROTO_TCP) + goto err; + + ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1); + break; + case ETH_P_IPV6: + ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1); + + if (ctx->ipv6 + 1 > ctx->data_end) + goto err; + + if (ctx->ipv6->version != 6) + goto err; + + if (ctx->ipv6->nexthdr != NEXTHDR_TCP) + goto err; + + ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1); + break; + default: + goto err; + } + + if (ctx->tcp + 1 > ctx->data_end) + goto err; + + return 0; +err: + return -1; +} + +static __always_inline int tcp_reload_headers(struct tcp_syncookie *ctx) +{ + /* Without volatile, + * R3 32-bit pointer arithmetic prohibited + */ + volatile u64 data_len = ctx->skb->data_end - ctx->skb->data; + + if (ctx->tcp->doff < sizeof(*ctx->tcp) / 4) + goto err; + + /* Needed to calculate csum and parse TCP options. */ + if (bpf_skb_change_tail(ctx->skb, data_len + 60 - ctx->tcp->doff * 4, 0)) + goto err; + + ctx->data_end = (void *)(long)ctx->skb->data_end; + ctx->eth = (struct ethhdr *)(long)ctx->skb->data; + if (ctx->ipv4) { + ctx->ipv4 = (struct iphdr *)(ctx->eth + 1); + ctx->ipv6 = NULL; + ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1); + } else { + ctx->ipv4 = NULL; + ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1); + ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1); + } + + if ((void *)ctx->tcp + 60 > ctx->data_end) + goto err; + + return 0; +err: + return -1; +} + +static __always_inline __sum16 tcp_v4_csum(struct tcp_syncookie *ctx, __wsum csum) +{ + return csum_tcpudp_magic(ctx->ipv4->saddr, ctx->ipv4->daddr, + ctx->tcp->doff * 4, IPPROTO_TCP, csum); +} + +static __always_inline __sum16 tcp_v6_csum(struct tcp_syncookie *ctx, __wsum csum) +{ + return csum_ipv6_magic(&ctx->ipv6->saddr, &ctx->ipv6->daddr, + ctx->tcp->doff * 4, IPPROTO_TCP, csum); +} + +static __always_inline int tcp_validate_header(struct tcp_syncookie *ctx) +{ + s64 csum; + + if (tcp_reload_headers(ctx)) + goto err; + + csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0); + if (csum < 0) + goto err; + + if (ctx->ipv4) { + /* check tcp_v4_csum(csum) is 0 if not on lo. */ + + csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, ctx->ipv4->ihl * 4, 0); + if (csum < 0) + goto err; + + if (csum_fold(csum) != 0) + goto err; + } else if (ctx->ipv6) { + /* check tcp_v6_csum(csum) is 0 if not on lo. */ + } + + return 0; +err: + return -1; +} + +static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx) +{ + struct tcp_options_received *tcp_opt = &ctx->attr.tcp_opt; + char opcode, opsize; + + if (ctx->ptr > ctx->data_end) + goto stop; + + opcode = *ctx->ptr++; + + if (opcode == TCPOPT_EOL) + goto stop; + + if (opcode == TCPOPT_NOP) + goto next; + + if (ctx->ptr > ctx->data_end) + goto stop; + + opsize = *ctx->ptr++; + if (opsize < 2) + goto stop; + + if (ctx->ptr + opsize - 2 > ctx->data_end) + goto stop; + + switch (opcode) { + case TCPOPT_MSS: + if (opsize == TCPOLEN_MSS && ctx->tcp->syn) + tcp_opt->mss_clamp = get_unaligned_be16(ctx->ptr); + break; + case TCPOPT_WINDOW: + if (opsize == TCPOLEN_WINDOW && ctx->tcp->syn) { + tcp_opt->wscale_ok = 1; + tcp_opt->snd_wscale = *ctx->ptr; + } + break; + case TCPOPT_TIMESTAMP: + if (opsize == TCPOLEN_TIMESTAMP) { + tcp_opt->saw_tstamp = 1; + tcp_opt->rcv_tsval = get_unaligned_be32(ctx->ptr); + tcp_opt->rcv_tsecr = get_unaligned_be32(ctx->ptr + 4); + + if (ctx->tcp->syn && tcp_opt->rcv_tsecr) + tcp_opt->tstamp_ok = 0; + else + tcp_opt->tstamp_ok = 1; + } + break; + case TCPOPT_SACK_PERM: + if (opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn) + tcp_opt->sack_ok = 1; + break; + } + + ctx->ptr += opsize - 2; +next: + return 0; +stop: + return 1; +} + +static __always_inline void tcp_parse_options(struct tcp_syncookie *ctx) +{ + ctx->ptr = (char *)(ctx->tcp + 1); + + bpf_loop(40, tcp_parse_option, ctx, 0); +} + +static __always_inline int tcp_validate_sysctl(struct tcp_syncookie *ctx) +{ + struct tcp_options_received *tcp_opt = &ctx->attr.tcp_opt; + + if ((ctx->ipv4 && tcp_opt->mss_clamp != MSS_LOCAL_IPV4) || + (ctx->ipv6 && tcp_opt->mss_clamp != MSS_LOCAL_IPV6)) + goto err; + + if (!tcp_opt->wscale_ok || tcp_opt->snd_wscale != 7) + goto err; + + if (!tcp_opt->tstamp_ok) + goto err; + + if (!tcp_opt->sack_ok) + goto err; + + if (!ctx->tcp->ece || !ctx->tcp->cwr) + goto err; + + return 0; +err: + return -1; +} + +static __always_inline void tcp_prepare_cookie(struct tcp_syncookie *ctx) +{ + struct tcp_options_received *tcp_opt = &ctx->attr.tcp_opt; + u32 seq = bpf_ntohl(ctx->tcp->seq); + u64 first = 0, second; + int mssind = 0; + u32 hash; + + if (ctx->ipv4) { + for (mssind = ARRAY_SIZE(msstab4) - 1; mssind; mssind--) + if (tcp_opt->mss_clamp >= msstab4[mssind]) + break; + + tcp_opt->mss_clamp = msstab4[mssind]; + + first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr; + } else if (ctx->ipv6) { + for (mssind = ARRAY_SIZE(msstab6) - 1; mssind; mssind--) + if (tcp_opt->mss_clamp >= msstab6[mssind]) + break; + + tcp_opt->mss_clamp = msstab6[mssind]; + + first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 | + ctx->ipv6->daddr.in6_u.u6_addr32[0]; + } + + second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest; + hash = siphash_2u64(first, second, &test_key_siphash); + + if (tcp_opt->tstamp_ok) { + tcp_opt->rcv_tsecr = bpf_get_prandom_u32(); + tcp_opt->rcv_tsecr &= ~COOKIE_MASK; + tcp_opt->rcv_tsecr |= hash & COOKIE_MASK; + } + + hash &= ~COOKIE_MASK; + hash |= mssind << 6; + + if (tcp_opt->wscale_ok) + hash |= tcp_opt->snd_wscale & BPF_SYNCOOKIE_WSCALE_MASK; + + if (tcp_opt->sack_ok) + hash |= BPF_SYNCOOKIE_SACK; + + if (tcp_opt->tstamp_ok && ctx->tcp->ece && ctx->tcp->cwr) + hash |= BPF_SYNCOOKIE_ECN; + + ctx->cookie = hash; +} + +static __always_inline void tcp_write_options(struct tcp_syncookie *ctx) +{ + struct tcp_options_received *tcp_opt = &ctx->attr.tcp_opt; + + ctx->ptr32 = (__be32 *)(ctx->tcp + 1); + + *ctx->ptr32++ = bpf_htonl(TCPOPT_MSS << 24 | TCPOLEN_MSS << 16 | + tcp_opt->mss_clamp); + + if (tcp_opt->wscale_ok) + *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 | + TCPOPT_WINDOW << 16 | + TCPOLEN_WINDOW << 8 | + tcp_opt->snd_wscale); + + if (tcp_opt->tstamp_ok) { + if (tcp_opt->sack_ok) + *ctx->ptr32++ = bpf_htonl(TCPOPT_SACK_PERM << 24 | + TCPOLEN_SACK_PERM << 16 | + TCPOPT_TIMESTAMP << 8 | + TCPOLEN_TIMESTAMP); + else + *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 | + TCPOPT_NOP << 16 | + TCPOPT_TIMESTAMP << 8 | + TCPOLEN_TIMESTAMP); + + *ctx->ptr32++ = bpf_htonl(tcp_opt->rcv_tsecr); + *ctx->ptr32++ = bpf_htonl(tcp_opt->rcv_tsval); + } else if (tcp_opt->sack_ok) { + *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 | + TCPOPT_NOP << 16 | + TCPOPT_SACK_PERM << 8 | + TCPOLEN_SACK_PERM); + } +} + +static __always_inline int tcp_handle_syn(struct tcp_syncookie *ctx) +{ + s64 csum; + + if (tcp_validate_header(ctx)) + goto err; + + tcp_parse_options(ctx); + + if (tcp_validate_sysctl(ctx)) + goto err; + + tcp_prepare_cookie(ctx); + tcp_write_options(ctx); + + swap(ctx->tcp->source, ctx->tcp->dest); + ctx->tcp->check = 0; + ctx->tcp->ack_seq = bpf_htonl(bpf_ntohl(ctx->tcp->seq) + 1); + ctx->tcp->seq = bpf_htonl(ctx->cookie); + ctx->tcp->doff = ((long)ctx->ptr32 - (long)ctx->tcp) >> 2; + ctx->tcp->ack = 1; + if (!ctx->attr.tcp_opt.tstamp_ok || !ctx->tcp->ece || !ctx->tcp->cwr) + ctx->tcp->ece = 0; + ctx->tcp->cwr = 0; + + csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0); + if (csum < 0) + goto err; + + if (ctx->ipv4) { + swap(ctx->ipv4->saddr, ctx->ipv4->daddr); + ctx->tcp->check = tcp_v4_csum(ctx, csum); + + ctx->ipv4->check = 0; + ctx->ipv4->tos = 0; + ctx->ipv4->tot_len = bpf_htons((long)ctx->ptr32 - (long)ctx->ipv4); + ctx->ipv4->id = 0; + ctx->ipv4->ttl = 64; + + csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, sizeof(*ctx->ipv4), 0); + if (csum < 0) + goto err; + + ctx->ipv4->check = csum_fold(csum); + } else if (ctx->ipv6) { + swap(ctx->ipv6->saddr, ctx->ipv6->daddr); + ctx->tcp->check = tcp_v6_csum(ctx, csum); + + *(__be32 *)ctx->ipv6 = bpf_htonl(0x60000000); + ctx->ipv6->payload_len = bpf_htons((long)ctx->ptr32 - (long)ctx->tcp); + ctx->ipv6->hop_limit = 64; + } + + swap_array(ctx->eth->h_source, ctx->eth->h_dest); + + if (bpf_skb_change_tail(ctx->skb, (long)ctx->ptr32 - (long)ctx->eth, 0)) + goto err; + + return bpf_redirect(ctx->skb->ifindex, 0); +err: + return TC_ACT_SHOT; +} + +static __always_inline int tcp_validate_cookie(struct tcp_syncookie *ctx) +{ + struct tcp_options_received *tcp_opt = &ctx->attr.tcp_opt; + u32 cookie = bpf_ntohl(ctx->tcp->ack_seq) - 1; + u32 seq = bpf_ntohl(ctx->tcp->seq) - 1; + u64 first = 0, second; + int mssind; + u32 hash; + + if (ctx->ipv4) + first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr; + else if (ctx->ipv6) + first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 | + ctx->ipv6->daddr.in6_u.u6_addr32[0]; + + second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest; + hash = siphash_2u64(first, second, &test_key_siphash); + + if (tcp_opt->saw_tstamp) + hash -= tcp_opt->rcv_tsecr & COOKIE_MASK; + else + hash &= ~COOKIE_MASK; + + hash -= cookie & ~COOKIE_MASK; + if (hash) + goto err; + + mssind = (cookie & (3 << 6)) >> 6; + if (ctx->ipv4) { + if (mssind > ARRAY_SIZE(msstab4)) + goto err; + + tcp_opt->mss_clamp = msstab4[mssind]; + } else { + if (mssind > ARRAY_SIZE(msstab6)) + goto err; + + tcp_opt->mss_clamp = msstab6[mssind]; + } + + tcp_opt->snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK; + tcp_opt->wscale_ok = tcp_opt->snd_wscale == BPF_SYNCOOKIE_WSCALE_MASK; + tcp_opt->sack_ok = cookie & BPF_SYNCOOKIE_SACK; + ctx->attr.ecn_ok = cookie & BPF_SYNCOOKIE_ECN; + + return 0; +err: + return -1; +} + +static __always_inline int tcp_handle_ack(struct tcp_syncookie *ctx) +{ + struct bpf_sock_tuple tuple; + struct bpf_sock *skc; + int ret = TC_ACT_OK; + struct sock *sk; + u32 tuple_size; + + if (ctx->ipv4) { + tuple.ipv4.saddr = ctx->ipv4->saddr; + tuple.ipv4.daddr = ctx->ipv4->daddr; + tuple.ipv4.sport = ctx->tcp->source; + tuple.ipv4.dport = ctx->tcp->dest; + tuple_size = sizeof(tuple.ipv4); + } else if (ctx->ipv6) { + __builtin_memcpy(tuple.ipv6.saddr, &ctx->ipv6->saddr, sizeof(tuple.ipv6.saddr)); + __builtin_memcpy(tuple.ipv6.daddr, &ctx->ipv6->daddr, sizeof(tuple.ipv6.daddr)); + tuple.ipv6.sport = ctx->tcp->source; + tuple.ipv6.dport = ctx->tcp->dest; + tuple_size = sizeof(tuple.ipv6); + } else { + goto out; + } + + skc = bpf_skc_lookup_tcp(ctx->skb, &tuple, tuple_size, BPF_F_CURRENT_NETNS, 0); + if (!skc) + goto out; + + if (skc->state != TCP_LISTEN) + goto release; + + sk = (struct sock *)bpf_skc_to_tcp_sock(skc); + if (!sk) + goto err; + + if (tcp_validate_header(ctx)) + goto err; + + tcp_parse_options(ctx); + + if (tcp_validate_cookie(ctx)) + goto err; + + ret = bpf_sk_assign_tcp_reqsk(ctx->skb, sk, &ctx->attr, sizeof(ctx->attr)); + if (ret < 0) + goto err; + +release: + bpf_sk_release(skc); +out: + return ret; + +err: + ret = TC_ACT_SHOT; + goto release; +} + +SEC("tc") +int tcp_custom_syncookie(struct __sk_buff *skb) +{ + struct tcp_syncookie ctx = { + .skb = skb, + }; + + if (tcp_load_headers(&ctx)) + return TC_ACT_OK; + + if (ctx.tcp->rst) + return TC_ACT_OK; + + if (ctx.tcp->syn) { + if (ctx.tcp->ack) + return TC_ACT_OK; + + return tcp_handle_syn(&ctx); + } + + return tcp_handle_ack(&ctx); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h new file mode 100644 index 000000000000..d00f50fb05ab --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#ifndef _TEST_TCP_SYNCOOKIE_H +#define _TEST_TCP_SYNCOOKIE_H + +#define TC_ACT_OK 0 +#define TC_ACT_SHOT 2 + +#define ETH_ALEN 6 +#define ETH_P_IP 0x0800 +#define ETH_P_IPV6 0x86DD + +#define NEXTHDR_TCP 6 + +#define TCPOPT_NOP 1 +#define TCPOPT_EOL 0 +#define TCPOPT_MSS 2 +#define TCPOPT_WINDOW 3 +#define TCPOPT_TIMESTAMP 8 +#define TCPOPT_SACK_PERM 4 + +#define TCPOLEN_MSS 4 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_TIMESTAMP 10 +#define TCPOLEN_SACK_PERM 2 + +#define __packed __attribute__((__packed__)) +#define __force + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define swap(a, b) \ + do { \ + typeof(a) __tmp = (a); \ + (a) = (b); \ + (b) = __tmp; \ + } while (0) + +#define swap_array(a, b) \ + do { \ + typeof(a) __tmp[sizeof(a)]; \ + __builtin_memcpy(__tmp, a, sizeof(a)); \ + __builtin_memcpy(a, b, sizeof(a)); \ + __builtin_memcpy(b, __tmp, sizeof(a)); \ + } while (0) + +/* asm-generic/unaligned.h */ +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed * __pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) + +static inline u16 get_unaligned_be16(const void *p) +{ + return bpf_ntohs(__get_unaligned_t(__be16, p)); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return bpf_ntohl(__get_unaligned_t(__be32, p)); +} + +/* lib/checksum.c */ +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, __wsum sum) +{ + unsigned long long s = (__force u32)sum; + + s += (__force u32)saddr; + s += (__force u32)daddr; +#ifdef __BIG_ENDIAN + s += proto + len; +#else + s += (proto + len) << 8; +#endif + return (__force __wsum)from64to32(s); +} + +/* asm-generic/checksum.h */ +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__force __sum16)~sum; +} + +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +/* net/ipv6/ip6_checksum.c */ +static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum csum) +{ + int carry; + __u32 ulen; + __u32 uproto; + __u32 sum = (__force u32)csum; + + sum += (__force u32)saddr->in6_u.u6_addr32[0]; + carry = (sum < (__force u32)saddr->in6_u.u6_addr32[0]); + sum += carry; + + sum += (__force u32)saddr->in6_u.u6_addr32[1]; + carry = (sum < (__force u32)saddr->in6_u.u6_addr32[1]); + sum += carry; + + sum += (__force u32)saddr->in6_u.u6_addr32[2]; + carry = (sum < (__force u32)saddr->in6_u.u6_addr32[2]); + sum += carry; + + sum += (__force u32)saddr->in6_u.u6_addr32[3]; + carry = (sum < (__force u32)saddr->in6_u.u6_addr32[3]); + sum += carry; + + sum += (__force u32)daddr->in6_u.u6_addr32[0]; + carry = (sum < (__force u32)daddr->in6_u.u6_addr32[0]); + sum += carry; + + sum += (__force u32)daddr->in6_u.u6_addr32[1]; + carry = (sum < (__force u32)daddr->in6_u.u6_addr32[1]); + sum += carry; + + sum += (__force u32)daddr->in6_u.u6_addr32[2]; + carry = (sum < (__force u32)daddr->in6_u.u6_addr32[2]); + sum += carry; + + sum += (__force u32)daddr->in6_u.u6_addr32[3]; + carry = (sum < (__force u32)daddr->in6_u.u6_addr32[3]); + sum += carry; + + ulen = (__force u32)bpf_htonl((__u32)len); + sum += ulen; + carry = (sum < ulen); + sum += carry; + + uproto = (__force u32)bpf_htonl(proto); + sum += uproto; + carry = (sum < uproto); + sum += carry; + + return csum_fold((__force __wsum)sum); +} +#endif