Message ID | 20220831133758.3741187-1-leitao@debian.org (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [RESEND,net-next] tcp: socket-specific version of WARN_ON_ONCE() | expand |
On Wed, Aug 31, 2022 at 6:38 AM Breno Leitao <leitao@debian.org> wrote: > > There are cases where we need information about the socket during a > warning, so, it could help us to find bugs that happens that do not have > a easily repro. > > BPF congestion control algorithms can change socket state in unexpected > ways, leading to WARNings. Additional information about the socket state > is useful to identify the culprit. Well, this suggests we need to fix BPF side ? Not sure how this can happen, because TCP_BPF_IW has if (val <= 0 || tp->data_segs_out > tp->syn_data) ret = -EINVAL; else tcp_snd_cwnd_set(tp, val); It seems you already found the issue in an eBPF CC, can you share the details ? > > This diff creates a TCP socket-specific version of WARN_ON_ONCE(), and > attaches it to tcp_snd_cwnd_set(). Well, I feel this will need constant additions... the state of a custom BPF CC is opaque to core TCP stack anyway ? > > Signed-off-by: Breno Leitao <leitao@debian.org> > --- > include/net/tcp.h | 3 ++- > include/net/tcp_debug.h | 10 ++++++++++ > net/ipv4/tcp.c | 30 ++++++++++++++++++++++++++++++ > 3 files changed, 42 insertions(+), 1 deletion(-) > create mode 100644 include/net/tcp_debug.h > > diff --git a/include/net/tcp.h b/include/net/tcp.h > index d10962b9f0d0..73c3970d8839 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -40,6 +40,7 @@ > #include <net/inet_ecn.h> > #include <net/dst.h> > #include <net/mptcp.h> > +#include <net/tcp_debug.h> > > #include <linux/seq_file.h> > #include <linux/memcontrol.h> > @@ -1222,7 +1223,7 @@ static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp) > > static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) > { > - WARN_ON_ONCE((int)val <= 0); > + TCP_SOCK_WARN_ON_ONCE(tp, (int)val <= 0); > tp->snd_cwnd = val; > } > > diff --git a/include/net/tcp_debug.h b/include/net/tcp_debug.h > new file mode 100644 > index 000000000000..50e96d87d335 > --- /dev/null > +++ b/include/net/tcp_debug.h > @@ -0,0 +1,10 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef _LINUX_TCP_DEBUG_H > +#define _LINUX_TCP_DEBUG_H > + > +void tcp_sock_warn(const struct tcp_sock *tp); > + > +#define TCP_SOCK_WARN_ON_ONCE(tcp_sock, condition) \ > + DO_ONCE_LITE_IF(condition, tcp_sock_warn, tcp_sock) > + > +#endif /* _LINUX_TCP_DEBUG_H */ > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c > index bbe218753662..71771fee72f7 100644 > --- a/net/ipv4/tcp.c > +++ b/net/ipv4/tcp.c > @@ -4684,6 +4684,36 @@ int tcp_abort(struct sock *sk, int err) > } > EXPORT_SYMBOL_GPL(tcp_abort); > > +void tcp_sock_warn(const struct tcp_sock *tp) > +{ > + const struct sock *sk = (const struct sock *)tp; > + struct inet_sock *inet = inet_sk(sk); > + struct inet_connection_sock *icsk = inet_csk(sk); > + > + WARN_ON(1); > + > + if (!tp) > + return; > + > + pr_warn("Socket Info: family=%u state=%d sport=%u dport=%u ccname=%s cwnd=%u", > + sk->sk_family, sk->sk_state, ntohs(inet->inet_sport), > + ntohs(inet->inet_dport), icsk->icsk_ca_ops->name, tcp_snd_cwnd(tp)); > + > + switch (sk->sk_family) { > + case AF_INET: > + pr_warn("saddr=%pI4 daddr=%pI4", &inet->inet_saddr, > + &inet->inet_daddr); > + break; > +#if IS_ENABLED(CONFIG_IPV6) > + case AF_INET6: > + pr_warn("saddr=%pI6 daddr=%pI6", &sk->sk_v6_rcv_saddr, > + &sk->sk_v6_daddr); > + break; > +#endif > + } > +} > +EXPORT_SYMBOL_GPL(tcp_sock_warn); > + > extern struct tcp_congestion_ops tcp_reno; > > static __initdata unsigned long thash_entries; > -- > 2.30.2 >
Hello Eric, On Sat, Sep 03, 2022 at 09:42:43AM -0700, Eric Dumazet wrote: > On Wed, Aug 31, 2022 at 6:38 AM Breno Leitao <leitao@debian.org> wrote: > > > > There are cases where we need information about the socket during a > > warning, so, it could help us to find bugs that happens that do not have > > a easily repro. > > > > BPF congestion control algorithms can change socket state in unexpected > > ways, leading to WARNings. Additional information about the socket state > > is useful to identify the culprit. A little bit of more context here. We hit this warning in production several hundred times a day. We don't know exactly where it is coming from, that is why this patch is being proposed. > Well, this suggests we need to fix BPF side ? This patch might help us to identify who is the culprit that is setting the wrong value in the congestion window. If the problem is on the BPF side, we probably need to Fix BPF side, for sure. > It seems you already found the issue in an eBPF CC, can you share the details ? Not really. I've applied this patch into our internal kernel, and we might soon find more information of what is causing this warning. > > This diff creates a TCP socket-specific version of WARN_ON_ONCE(), and > > attaches it to tcp_snd_cwnd_set(). > > Well, I feel this will need constant additions... the state of a > custom BPF CC is opaque to core TCP stack anyway ? > > > > > Signed-off-by: Breno Leitao <leitao@debian.org> > > --- > > include/net/tcp.h | 3 ++- > > include/net/tcp_debug.h | 10 ++++++++++ > > net/ipv4/tcp.c | 30 ++++++++++++++++++++++++++++++ > > 3 files changed, 42 insertions(+), 1 deletion(-) > > create mode 100644 include/net/tcp_debug.h > > > > diff --git a/include/net/tcp.h b/include/net/tcp.h > > index d10962b9f0d0..73c3970d8839 100644 > > --- a/include/net/tcp.h > > +++ b/include/net/tcp.h > > @@ -40,6 +40,7 @@ > > #include <net/inet_ecn.h> > > #include <net/dst.h> > > #include <net/mptcp.h> > > +#include <net/tcp_debug.h> > > > > #include <linux/seq_file.h> > > #include <linux/memcontrol.h> > > @@ -1222,7 +1223,7 @@ static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp) > > > > static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) > > { > > - WARN_ON_ONCE((int)val <= 0); > > + TCP_SOCK_WARN_ON_ONCE(tp, (int)val <= 0); > > tp->snd_cwnd = val; > > } > > > > diff --git a/include/net/tcp_debug.h b/include/net/tcp_debug.h > > new file mode 100644 > > index 000000000000..50e96d87d335 > > --- /dev/null > > +++ b/include/net/tcp_debug.h > > @@ -0,0 +1,10 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +#ifndef _LINUX_TCP_DEBUG_H > > +#define _LINUX_TCP_DEBUG_H > > + > > +void tcp_sock_warn(const struct tcp_sock *tp); > > + > > +#define TCP_SOCK_WARN_ON_ONCE(tcp_sock, condition) \ > > + DO_ONCE_LITE_IF(condition, tcp_sock_warn, tcp_sock) > > + > > +#endif /* _LINUX_TCP_DEBUG_H */ > > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c > > index bbe218753662..71771fee72f7 100644 > > --- a/net/ipv4/tcp.c > > +++ b/net/ipv4/tcp.c > > @@ -4684,6 +4684,36 @@ int tcp_abort(struct sock *sk, int err) > > } > > EXPORT_SYMBOL_GPL(tcp_abort); > > > > +void tcp_sock_warn(const struct tcp_sock *tp) > > +{ > > + const struct sock *sk = (const struct sock *)tp; > > + struct inet_sock *inet = inet_sk(sk); > > + struct inet_connection_sock *icsk = inet_csk(sk); > > + > > + WARN_ON(1); > > + > > + if (!tp) > > + return; > > + > > + pr_warn("Socket Info: family=%u state=%d sport=%u dport=%u ccname=%s cwnd=%u", > > + sk->sk_family, sk->sk_state, ntohs(inet->inet_sport), > > + ntohs(inet->inet_dport), icsk->icsk_ca_ops->name, tcp_snd_cwnd(tp)); > > + > > + switch (sk->sk_family) { > > + case AF_INET: > > + pr_warn("saddr=%pI4 daddr=%pI4", &inet->inet_saddr, > > + &inet->inet_daddr); > > + break; > > +#if IS_ENABLED(CONFIG_IPV6) > > + case AF_INET6: > > + pr_warn("saddr=%pI6 daddr=%pI6", &sk->sk_v6_rcv_saddr, > > + &sk->sk_v6_daddr); > > + break; > > +#endif > > + } > > +} > > +EXPORT_SYMBOL_GPL(tcp_sock_warn); > > + > > extern struct tcp_congestion_ops tcp_reno; > > > > static __initdata unsigned long thash_entries; > > -- > > 2.30.2 > >
On Sat, Sep 03, 2022 at 09:42:43AM -0700, Eric Dumazet wrote: > On Wed, Aug 31, 2022 at 6:38 AM Breno Leitao <leitao@debian.org> wrote: > > > > There are cases where we need information about the socket during a > > warning, so, it could help us to find bugs that happens that do not have > > a easily repro. > > > > BPF congestion control algorithms can change socket state in unexpected > > ways, leading to WARNings. Additional information about the socket state > > is useful to identify the culprit. > > Well, this suggests we need to fix BPF side ? > > Not sure how this can happen, because TCP_BPF_IW has > > if (val <= 0 || tp->data_segs_out > tp->syn_data) > ret = -EINVAL; > else > tcp_snd_cwnd_set(tp, val); I am not sure we are hitting this path, please check the stack below > It seems you already found the issue in an eBPF CC, can you share the details ? Sure, here is an example of what we are facing (with some obfuscation). Remeber that there are users' BPF application running: [155375.750105] ------------[ cut here ]------------ [155375.759526] WARNING: CPU: 19 PID: 0 at net/ipv4/tcp.c:4552 tcp_sock_warn+0x6/0x20 [155375.774700] Modules linked in: vhost_net tun vhost vhost_iotlb tap virtio_net net_failover failover mpls_gso mpls_iptunnel mpls_router ip_tunnel fou ip6_udp_tunnel udp_tunnel sch_fq sunrpc bpf_preload tls act_gact cls_bpf tcp_diag inet_diag skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp iTCO_wdt kvm_intel iTCO_vendor_support kvm evdev ses irqbypass enclosure i2c_i801 i2c_smbus ipmi_si ipmi_devintf ipmi_msghandler acpi_cpufreq button tpm_crb sch_fq_codel fuse sg nvme mpi3mr scsi_transport_sas nvme_core xhci_pci xhci_hcd loop efivarfs autofs4 [155375.874916] CPU: 19 PID: 0 Comm: swapper/19 Kdump: loaded Tainted: G S 5.12.0-0_XXXXXXXX_g0fed6f189e14 #1 [155375.898770] Hardware name: XXXX XXX XXXX XXXXX, BIOS BS_BIOS_XXX XX/XX/2022 [155375.916015] RIP: 0010:tcp_sock_warn+0x6/0x20 [155375.924755] Code: 4d 01 3e 85 c0 0f 84 57 ff ff ff 48 8b 0c 24 44 8b 01 eb 82 e8 eb b7 14 00 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 53 <0f> 0b 48 85 ff 0f 85 77 70 14 00 5b c3 66 66 2e 0f 1f 84 00 00 00 [155375.962518] RSP: 0018:ffffc90000d08988 EFLAGS: 00010246 [155375.973157] RAX: ffff88817432b5c0 RBX: ffff88828f748000 RCX: ffffc90000d08a34 [155375.987614] RDX: 0000000000000000 RSI: 00000000822be8c8 RDI: ffff88828f748000 [155376.002074] RBP: 0000000000000040 R08: ffffc90000d08a38 R09: 00000000822be334 [155376.016531] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 [155376.030988] R13: ffffc90000d08a34 R14: 0000000000005546 R15: 0000000000000000 [155376.045441] FS: 0000000000000000(0000) GS:ffff88903f8c0000(0000) knlGS:0000000000000000 [155376.061804] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [155376.073474] CR2: 00007fe407603080 CR3: 0000000b19993006 CR4: 00000000007706e0 [155376.087928] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [155376.102383] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [155376.116841] PKRU: 55555554 [155376.122427] Call Trace: [155376.127497] <IRQ> [155376.131697] tcp_fastretrans_alert+0x988/0x9e0 [155376.140774] tcp_ack+0x8b4/0x1360 [155376.147581] ? __cgroup_bpf_run_filter_skb+0x185/0x440 [155376.158037] tcp_rcv_established+0x135/0x660 [155376.166755] ? sk_filter_trim_cap+0xbc/0x220 [155376.175472] tcp_v6_do_rcv+0xbe/0x3e0 [155376.182974] tcp_v6_rcv+0xc01/0xc90 [155376.190128] ip6_protocol_deliver_rcu+0xbd/0x450 [155376.199541] ip6_input_finish+0x3d/0x60 [155376.207388] ip6_input+0xb5/0xc0 [155376.214019] ip6_sublist_rcv_finish+0x37/0x50 [155376.222912] ip6_sublist_rcv+0x1dd/0x270 [155376.230935] ipv6_list_rcv+0x113/0x140 [155376.238618] __netif_receive_skb_list_core+0x1a0/0x210 [155376.249080] netif_receive_skb_list_internal+0x186/0x2a0 [155376.259887] gro_normal_list.part.171+0x19/0x40 [155376.269137] napi_complete_done+0x65/0x150 [155376.277514] bnxt_poll_p5+0x25b/0x2b0 [155376.285027] ? tcp_write_xmit+0x278/0x1060 [155376.293398] __napi_poll+0x25/0x120 [155376.300552] net_rx_action+0x189/0x300 [155376.308227] __do_softirq+0xbb/0x271 [155376.315554] irq_exit_rcu+0x97/0xa0 [155376.322710] common_interrupt+0x7f/0xa0 [155376.330566] </IRQ> [155376.334947] asm_common_interrupt+0x1e/0x40 [155376.343499] RIP: 0010:cpuidle_enter_state+0xc2/0x340 [155376.353619] Code: 48 89 c5 0f 1f 44 00 00 31 ff e8 f9 8d 73 ff 45 84 ff 74 12 9c 58 f6 c4 02 0f 85 38 02 00 00 31 ff e8 b2 36 79 ff fb 45 85 f6 <0f> 88 e8 00 00 00 49 63 d6 48 2b 2c 24 48 6b ca 68 48 8d 04 52 48 [155376.391367] RSP: 0018:ffffc90000293e90 EFLAGS: 00000202 [155376.402004] RAX: ffff88903f8eaa80 RBX: ffffe8ffff4c6c00 RCX: 000000000000001f [155376.416456] RDX: 00008d503c63f228 RSI: 000000005ba4b680 RDI: 0000000000000000 [155376.430910] RBP: 00008d503c63f228 R08: 0000000000000002 R09: 000000000002a300 [155376.445372] R10: 002435b781e62f4a R11: ffff88903f8e9a84 R12: 0000000000000001 [155376.459832] R13: ffffffff83621200 R14: 0000000000000001 R15: 0000000000000000 [155376.474307] cpuidle_enter+0x29/0x40 [155376.481654] do_idle+0x1bb/0x200 [155376.488297] cpu_startup_entry+0x19/0x20 [155376.496320] start_secondary+0x104/0x140 [155376.504342] secondary_startup_64_no_verify+0xb0/0xbb [155376.514629] ---[ end trace 9b428a0d7901c3ff ]--- [155376.524041] TCP: Socket Info: family=10 state=1 sport=12345 dport=57616 ccname=ned_tcp_dctcp01 cwnd=1 [155376.524045] TCP: saddr=XXXX:XXXX:XXXX:XXXX:XXXX:0000:00c8:0000 daddr=XXXX:XXXX:XXXX:XXXX:XXXX:0000:0288:00
On Wed, Dec 7, 2022 at 6:37 PM Breno Leitao <leitao@debian.org> wrote: > > On Sat, Sep 03, 2022 at 09:42:43AM -0700, Eric Dumazet wrote: > > On Wed, Aug 31, 2022 at 6:38 AM Breno Leitao <leitao@debian.org> wrote: > > > > > > There are cases where we need information about the socket during a > > > warning, so, it could help us to find bugs that happens that do not have > > > a easily repro. > > > > > > BPF congestion control algorithms can change socket state in unexpected > > > ways, leading to WARNings. Additional information about the socket state > > > is useful to identify the culprit. > > > > Well, this suggests we need to fix BPF side ? > > > > Not sure how this can happen, because TCP_BPF_IW has > > > > if (val <= 0 || tp->data_segs_out > tp->syn_data) > > ret = -EINVAL; > > else > > tcp_snd_cwnd_set(tp, val); > > I am not sure we are hitting this path, please check the stack below > > > It seems you already found the issue in an eBPF CC, can you share the details ? > > Sure, here is an example of what we are facing (with some obfuscation). > Remeber that there are users' BPF application running: > > [155375.750105] ------------[ cut here ]------------ > [155375.759526] WARNING: CPU: 19 PID: 0 at net/ipv4/tcp.c:4552 tcp_sock_warn+0x6/0x20 > [155375.774700] Modules linked in: vhost_net tun vhost vhost_iotlb tap virtio_net net_failover failover mpls_gso mpls_iptunnel mpls_router ip_tunnel fou ip6_udp_tunnel udp_tunnel sch_fq sunrpc bpf_preload tls act_gact cls_bpf tcp_diag inet_diag skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp iTCO_wdt kvm_intel iTCO_vendor_support kvm evdev ses irqbypass enclosure i2c_i801 i2c_smbus ipmi_si ipmi_devintf ipmi_msghandler acpi_cpufreq button tpm_crb sch_fq_codel fuse sg nvme mpi3mr scsi_transport_sas nvme_core xhci_pci xhci_hcd loop efivarfs autofs4 > [155375.874916] CPU: 19 PID: 0 Comm: swapper/19 Kdump: loaded Tainted: G S 5.12.0-0_XXXXXXXX_g0fed6f189e14 #1 > [155375.898770] Hardware name: XXXX XXX XXXX XXXXX, BIOS BS_BIOS_XXX XX/XX/2022 > [155375.916015] RIP: 0010:tcp_sock_warn+0x6/0x20 > [155375.924755] Code: 4d 01 3e 85 c0 0f 84 57 ff ff ff 48 8b 0c 24 44 8b 01 eb 82 e8 eb b7 14 00 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 53 <0f> 0b 48 85 ff 0f 85 77 70 14 00 5b c3 66 66 2e 0f 1f 84 00 00 00 > [155375.962518] RSP: 0018:ffffc90000d08988 EFLAGS: 00010246 > [155375.973157] RAX: ffff88817432b5c0 RBX: ffff88828f748000 RCX: ffffc90000d08a34 > [155375.987614] RDX: 0000000000000000 RSI: 00000000822be8c8 RDI: ffff88828f748000 > [155376.002074] RBP: 0000000000000040 R08: ffffc90000d08a38 R09: 00000000822be334 > [155376.016531] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 > [155376.030988] R13: ffffc90000d08a34 R14: 0000000000005546 R15: 0000000000000000 > [155376.045441] FS: 0000000000000000(0000) GS:ffff88903f8c0000(0000) knlGS:0000000000000000 > [155376.061804] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [155376.073474] CR2: 00007fe407603080 CR3: 0000000b19993006 CR4: 00000000007706e0 > [155376.087928] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [155376.102383] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > [155376.116841] PKRU: 55555554 > [155376.122427] Call Trace: > [155376.127497] <IRQ> > [155376.131697] tcp_fastretrans_alert+0x988/0x9e0 > [155376.140774] tcp_ack+0x8b4/0x1360 > [155376.147581] ? __cgroup_bpf_run_filter_skb+0x185/0x440 > [155376.158037] tcp_rcv_established+0x135/0x660 > [155376.166755] ? sk_filter_trim_cap+0xbc/0x220 > [155376.175472] tcp_v6_do_rcv+0xbe/0x3e0 > [155376.182974] tcp_v6_rcv+0xc01/0xc90 > [155376.190128] ip6_protocol_deliver_rcu+0xbd/0x450 > [155376.199541] ip6_input_finish+0x3d/0x60 > [155376.207388] ip6_input+0xb5/0xc0 > [155376.214019] ip6_sublist_rcv_finish+0x37/0x50 > [155376.222912] ip6_sublist_rcv+0x1dd/0x270 > [155376.230935] ipv6_list_rcv+0x113/0x140 > [155376.238618] __netif_receive_skb_list_core+0x1a0/0x210 > [155376.249080] netif_receive_skb_list_internal+0x186/0x2a0 > [155376.259887] gro_normal_list.part.171+0x19/0x40 > [155376.269137] napi_complete_done+0x65/0x150 > [155376.277514] bnxt_poll_p5+0x25b/0x2b0 > [155376.285027] ? tcp_write_xmit+0x278/0x1060 > [155376.293398] __napi_poll+0x25/0x120 > [155376.300552] net_rx_action+0x189/0x300 > [155376.308227] __do_softirq+0xbb/0x271 > [155376.315554] irq_exit_rcu+0x97/0xa0 > [155376.322710] common_interrupt+0x7f/0xa0 > [155376.330566] </IRQ> > [155376.334947] asm_common_interrupt+0x1e/0x40 > [155376.343499] RIP: 0010:cpuidle_enter_state+0xc2/0x340 > [155376.353619] Code: 48 89 c5 0f 1f 44 00 00 31 ff e8 f9 8d 73 ff 45 84 ff 74 12 9c 58 f6 c4 02 0f 85 38 02 00 00 31 ff e8 b2 36 79 ff fb 45 85 f6 <0f> 88 e8 00 00 00 49 63 d6 48 2b 2c 24 48 6b ca 68 48 8d 04 52 48 > [155376.391367] RSP: 0018:ffffc90000293e90 EFLAGS: 00000202 > [155376.402004] RAX: ffff88903f8eaa80 RBX: ffffe8ffff4c6c00 RCX: 000000000000001f > [155376.416456] RDX: 00008d503c63f228 RSI: 000000005ba4b680 RDI: 0000000000000000 > [155376.430910] RBP: 00008d503c63f228 R08: 0000000000000002 R09: 000000000002a300 > [155376.445372] R10: 002435b781e62f4a R11: ffff88903f8e9a84 R12: 0000000000000001 > [155376.459832] R13: ffffffff83621200 R14: 0000000000000001 R15: 0000000000000000 > [155376.474307] cpuidle_enter+0x29/0x40 > [155376.481654] do_idle+0x1bb/0x200 > [155376.488297] cpu_startup_entry+0x19/0x20 > [155376.496320] start_secondary+0x104/0x140 > [155376.504342] secondary_startup_64_no_verify+0xb0/0xbb > [155376.514629] ---[ end trace 9b428a0d7901c3ff ]--- > [155376.524041] TCP: Socket Info: family=10 state=1 sport=12345 dport=57616 ccname=ned_tcp_dctcp01 cwnd=1 > [155376.524045] TCP: saddr=XXXX:XXXX:XXXX:XXXX:XXXX:0000:00c8:0000 daddr=XXXX:XXXX:XXXX:XXXX:XXXX:0000:0288:00 Try to give us symbols with scripts/decode_stacktrace.sh , thanks. Also 5.12 is kind of old :/
On Wed, Dec 07, 2022 at 06:59:38PM +0100, Eric Dumazet wrote: > On Wed, Dec 7, 2022 at 6:37 PM Breno Leitao <leitao@debian.org> wrote: > > > > On Sat, Sep 03, 2022 at 09:42:43AM -0700, Eric Dumazet wrote: > > > On Wed, Aug 31, 2022 at 6:38 AM Breno Leitao <leitao@debian.org> wrote: > > > > > > > > There are cases where we need information about the socket during a > > > > warning, so, it could help us to find bugs that happens that do not have > > > > a easily repro. > > > > > > > > BPF congestion control algorithms can change socket state in unexpected > > > > ways, leading to WARNings. Additional information about the socket state > > > > is useful to identify the culprit. > > > > > > Well, this suggests we need to fix BPF side ? > > > > > > Not sure how this can happen, because TCP_BPF_IW has > > > > > > if (val <= 0 || tp->data_segs_out > tp->syn_data) > > > ret = -EINVAL; > > > else > > > tcp_snd_cwnd_set(tp, val); > > > > I am not sure we are hitting this path, please check the stack below > > > > > It seems you already found the issue in an eBPF CC, can you share the details ? > > > > Sure, here is an example of what we are facing (with some obfuscation). > > Remeber that there are users' BPF application running: > > > > [155375.750105] ------------[ cut here ]------------ > > [155375.759526] WARNING: CPU: 19 PID: 0 at net/ipv4/tcp.c:4552 tcp_sock_warn+0x6/0x20 > > [155375.774700] Modules linked in: vhost_net tun vhost vhost_iotlb tap virtio_net net_failover failover mpls_gso mpls_iptunnel mpls_router ip_tunnel fou ip6_udp_tunnel udp_tunnel sch_fq sunrpc bpf_preload tls act_gact cls_bpf tcp_diag inet_diag skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp iTCO_wdt kvm_intel iTCO_vendor_support kvm evdev ses irqbypass enclosure i2c_i801 i2c_smbus ipmi_si ipmi_devintf ipmi_msghandler acpi_cpufreq button tpm_crb sch_fq_codel fuse sg nvme mpi3mr scsi_transport_sas nvme_core xhci_pci xhci_hcd loop efivarfs autofs4 > > [155375.874916] CPU: 19 PID: 0 Comm: swapper/19 Kdump: loaded Tainted: G S 5.12.0-0_XXXXXXXX_g0fed6f189e14 #1 > > [155375.898770] Hardware name: XXXX XXX XXXX XXXXX, BIOS BS_BIOS_XXX XX/XX/2022 > > [155375.916015] RIP: 0010:tcp_sock_warn+0x6/0x20 > > [155375.924755] Code: 4d 01 3e 85 c0 0f 84 57 ff ff ff 48 8b 0c 24 44 8b 01 eb 82 e8 eb b7 14 00 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 53 <0f> 0b 48 85 ff 0f 85 77 70 14 00 5b c3 66 66 2e 0f 1f 84 00 00 00 > > [155375.962518] RSP: 0018:ffffc90000d08988 EFLAGS: 00010246 > > [155375.973157] RAX: ffff88817432b5c0 RBX: ffff88828f748000 RCX: ffffc90000d08a34 > > [155375.987614] RDX: 0000000000000000 RSI: 00000000822be8c8 RDI: ffff88828f748000 > > [155376.002074] RBP: 0000000000000040 R08: ffffc90000d08a38 R09: 00000000822be334 > > [155376.016531] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 > > [155376.030988] R13: ffffc90000d08a34 R14: 0000000000005546 R15: 0000000000000000 > > [155376.045441] FS: 0000000000000000(0000) GS:ffff88903f8c0000(0000) knlGS:0000000000000000 > > [155376.061804] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > > [155376.073474] CR2: 00007fe407603080 CR3: 0000000b19993006 CR4: 00000000007706e0 > > [155376.087928] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > > [155376.102383] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > > [155376.116841] PKRU: 55555554 > > [155376.122427] Call Trace: > > [155376.127497] <IRQ> > > [155376.131697] tcp_fastretrans_alert+0x988/0x9e0 > > [155376.140774] tcp_ack+0x8b4/0x1360 > > [155376.147581] ? __cgroup_bpf_run_filter_skb+0x185/0x440 > > [155376.158037] tcp_rcv_established+0x135/0x660 > > [155376.166755] ? sk_filter_trim_cap+0xbc/0x220 > > [155376.175472] tcp_v6_do_rcv+0xbe/0x3e0 > > [155376.182974] tcp_v6_rcv+0xc01/0xc90 > > [155376.190128] ip6_protocol_deliver_rcu+0xbd/0x450 > > [155376.199541] ip6_input_finish+0x3d/0x60 > > [155376.207388] ip6_input+0xb5/0xc0 > > [155376.214019] ip6_sublist_rcv_finish+0x37/0x50 > > [155376.222912] ip6_sublist_rcv+0x1dd/0x270 > > [155376.230935] ipv6_list_rcv+0x113/0x140 > > [155376.238618] __netif_receive_skb_list_core+0x1a0/0x210 > > [155376.249080] netif_receive_skb_list_internal+0x186/0x2a0 > > [155376.259887] gro_normal_list.part.171+0x19/0x40 > > [155376.269137] napi_complete_done+0x65/0x150 > > [155376.277514] bnxt_poll_p5+0x25b/0x2b0 > > [155376.285027] ? tcp_write_xmit+0x278/0x1060 > > [155376.293398] __napi_poll+0x25/0x120 > > [155376.300552] net_rx_action+0x189/0x300 > > [155376.308227] __do_softirq+0xbb/0x271 > > [155376.315554] irq_exit_rcu+0x97/0xa0 > > [155376.322710] common_interrupt+0x7f/0xa0 > > [155376.330566] </IRQ> > > [155376.334947] asm_common_interrupt+0x1e/0x40 > > [155376.343499] RIP: 0010:cpuidle_enter_state+0xc2/0x340 > > [155376.353619] Code: 48 89 c5 0f 1f 44 00 00 31 ff e8 f9 8d 73 ff 45 84 ff 74 12 9c 58 f6 c4 02 0f 85 38 02 00 00 31 ff e8 b2 36 79 ff fb 45 85 f6 <0f> 88 e8 00 00 00 49 63 d6 48 2b 2c 24 48 6b ca 68 48 8d 04 52 48 > > [155376.391367] RSP: 0018:ffffc90000293e90 EFLAGS: 00000202 > > [155376.402004] RAX: ffff88903f8eaa80 RBX: ffffe8ffff4c6c00 RCX: 000000000000001f > > [155376.416456] RDX: 00008d503c63f228 RSI: 000000005ba4b680 RDI: 0000000000000000 > > [155376.430910] RBP: 00008d503c63f228 R08: 0000000000000002 R09: 000000000002a300 > > [155376.445372] R10: 002435b781e62f4a R11: ffff88903f8e9a84 R12: 0000000000000001 > > [155376.459832] R13: ffffffff83621200 R14: 0000000000000001 R15: 0000000000000000 > > [155376.474307] cpuidle_enter+0x29/0x40 > > [155376.481654] do_idle+0x1bb/0x200 > > [155376.488297] cpu_startup_entry+0x19/0x20 > > [155376.496320] start_secondary+0x104/0x140 > > [155376.504342] secondary_startup_64_no_verify+0xb0/0xbb > > [155376.514629] ---[ end trace 9b428a0d7901c3ff ]--- > > [155376.524041] TCP: Socket Info: family=10 state=1 sport=12345 dport=57616 ccname=ned_tcp_dctcp01 cwnd=1 > > [155376.524045] TCP: saddr=XXXX:XXXX:XXXX:XXXX:XXXX:0000:00c8:0000 daddr=XXXX:XXXX:XXXX:XXXX:XXXX:0000:0288:00 > > Try to give us symbols with scripts/decode_stacktrace.sh , thanks. Sorry, here it is: [749619.538804] WARNING: CPU: 19 PID: 0 at net/ipv4/tcp.c:4552 tcp_sock_warn+0x6/0x20 [749619.553969] Modules linked in: sch_fq sunrpc bpf_preload tls act_gact cls_bpf tcp_diag inet_diag skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel iTCO_wdt iTCO_vendor_support kvm evdev ses irqbypass enclosure i2c_i801 i2c_smbus ipmi_si ipmi_devintf ipmi_msghandler acpi_cpufreq button tpm_crb sch_fq_codel vhost_net tun tap vhost vhost_iotlb virtio_net net_failover failover mpls_gso mpls_iptunnel mpls_router ip_tunnel fou ip6_udp_tunnel udp_tunnel fuse sg nvme mpi3mr scsi_transport_sas nvme_core xhci_pci xhci_hcd loop efivarfs autofs4 [749619.678066] Hardware name: XXXXX [749619.695308] RIP: tcp_sock_warn+0x6/0x20 [749619.704034] Code: 4d 01 3e 85 c0 0f 84 57 ff ff ff 48 8b 0c 24 44 8b 01 eb 82 e8 eb b7 14 00 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 53 <0f> 0b 48 85 ff 0f 85 77 70 14 00 5b c3 66 66 2e 0f 1f 84 00 00 00 All code ======== 0: 4d 01 3e add %r15,(%r14) 3: 85 c0 test %eax,%eax 5: 0f 84 57 ff ff ff je 0xffffffffffffff62 b: 48 8b 0c 24 mov (%rsp),%rcx f: 44 8b 01 mov (%rcx),%r8d 12: eb 82 jmp 0xffffffffffffff96 14: e8 eb b7 14 00 callq 0x14b804 19: 66 66 2e 0f 1f 84 00 data16 nopw %cs:0x0(%rax,%rax,1) 20: 00 00 00 00 24: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 29: 53 push %rbx 2a:* 0f 0b ud2 <-- trapping instruction 2c: 48 85 ff test %rdi,%rdi 2f: 0f 85 77 70 14 00 jne 0x1470ac 35: 5b pop %rbx 36: c3 retq 37: 66 data16 38: 66 data16 39: 2e cs 3a: 0f .byte 0xf 3b: 1f (bad) 3c: 84 00 test %al,(%rax) ... Code starting with the faulting instruction =========================================== 0: 0f 0b ud2 2: 48 85 ff test %rdi,%rdi 5: 0f 85 77 70 14 00 jne 0x147082 b: 5b pop %rbx c: c3 retq d: 66 data16 e: 66 data16 f: 2e cs 10: 0f .byte 0xf 11: 1f (bad) 12: 84 00 test %al,(%rax) ... [749619.741779] RSP: 0018:ffffc90000d08988 EFLAGS: 00010246 [749619.752436] RAX: ffff88814b57f5c0 RBX: ffff8881bd2540c0 RCX: ffffc90000d08a34 [749619.766900] RDX: 0000000000000000 RSI: 00000000cda8f4af RDI: ffff8881bd2540c0 [749619.781364] RBP: 0000000000000000 R08: ffffc90000d08a38 R09: 00000000cda8f44f [749619.795831] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [749619.810300] R13: ffffc90000d08a34 R14: 0000000000011406 R15: 0000000000000000 [749619.824788] FS: 0000000000000000(0000) GS:ffff88903f8c0000(0000) knlGS:0000000000000000 [749619.841168] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [749619.852857] CR2: 000000000007c2e9 CR3: 0000000b82412002 CR4: 00000000007706e0 [749619.867331] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [749619.881800] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [749619.896260] PKRU: 55555554 [749619.901859] Call Trace: [749619.906927] <IRQ> [749619.911129] tcp_fastretrans_alert+0x988/0x9e0 [749619.920222] ? kmem_cache_free+0x33c/0x3d0 [749619.928606] tcp_ack+0x8b4/0x1360 [749619.935425] ? __cgroup_bpf_run_filter_skb+0x185/0x440 [749619.945910] tcp_rcv_established+0x2f3/0x660 [749619.954639] ? sk_filter_trim_cap+0xbc/0x220 [749619.963358] tcp_v6_do_rcv+0xbe/0x3e0 [749619.970863] tcp_v6_rcv+0xc01/0xc90 [749619.978029] ip6_protocol_deliver_rcu+0xbd/0x450 [749619.987453] ip6_input_finish+0x3d/0x60 [749619.995313] ip6_input+0xb5/0xc0 [749620.001958] ip6_sublist_rcv_finish+0x37/0x50 [749620.010851] ip6_sublist_rcv+0x1dd/0x270 [749620.018877] ipv6_list_rcv+0x113/0x140 [749620.026552] __netif_receive_skb_list_core+0x1a0/0x210 [749620.037025] netif_receive_skb_list_internal+0x186/0x2a0 [749620.047838] ? napi_gro_complete+0x6c/0xd0 [749620.056215] gro_normal_list.part.171+0x19/0x40 [749620.065471] napi_complete_done+0x65/0x150 [749620.073856] bnxt_poll_p5+0x25b/0x2b0 [749620.081369] __napi_poll+0x25/0x120 [749620.088537] net_rx_action+0x189/0x300 [749620.096224] __do_softirq+0xbb/0x271 [749620.103571] irq_exit_rcu+0x97/0xa0 [749620.110732] common_interrupt+0x7f/0xa0 [749620.118595] </IRQ> [749620.122964] asm_common_interrupt+0x1e/0x40 [749620.131511] RIP: cpuidle_enter_state+0xc2/0x340 [749620.141621] Code: 48 89 c5 0f 1f 44 00 00 31 ff e8 f9 8d 73 ff 45 84 ff 74 12 9c 58 f6 c4 02 0f 85 38 02 00 00 31 ff e8 b2 36 79 ff fb 45 85 f6 <0f> 88 e8 00 00 00 49 63 d6 48 2b 2c 24 48 6b ca 68 48 8d 04 52 48 All code ======== 0: 48 89 c5 mov %rax,%rbp 3: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 8: 31 ff xor %edi,%edi a: e8 f9 8d 73 ff callq 0xffffffffff738e08 f: 45 84 ff test %r15b,%r15b 12: 74 12 je 0x26 14: 9c pushfq 15: 58 pop %rax 16: f6 c4 02 test $0x2,%ah 19: 0f 85 38 02 00 00 jne 0x257 1f: 31 ff xor %edi,%edi 21: e8 b2 36 79 ff callq 0xffffffffff7936d8 26: fb sti 27: 45 85 f6 test %r14d,%r14d 2a:* 0f 88 e8 00 00 00 js 0x118 <-- trapping instruction 30: 49 63 d6 movslq %r14d,%rdx 33: 48 2b 2c 24 sub (%rsp),%rbp 37: 48 6b ca 68 imul $0x68,%rdx,%rcx 3b: 48 8d 04 52 lea (%rdx,%rdx,2),%rax 3f: 48 rex.W Code starting with the faulting instruction =========================================== 0: 0f 88 e8 00 00 00 js 0xee 6: 49 63 d6 movslq %r14d,%rdx 9: 48 2b 2c 24 sub (%rsp),%rbp d: 48 6b ca 68 imul $0x68,%rdx,%rcx 11: 48 8d 04 52 lea (%rdx,%rdx,2),%rax 15: 48 rex.W [749620.179375] RSP: 0018:ffffc90000293e90 EFLAGS: 00000202 [749620.190016] RAX: ffff88903f8eaa80 RBX: ffffe8ffff4c6c00 RCX: 000000000000001f [749620.204491] RDX: 0002a9c665c30050 RSI: 000000005ba4b680 RDI: 0000000000000000 [749620.218949] RBP: 0002a9c665c30050 R08: 0000000000000002 R09: 000000000002a300 [749620.233403] R10: 00219fc7cf6dd4ca R11: ffff88903f8e9a84 R12: 0000000000000001 [749620.247854] R13: ffffffff83621200 R14: 0000000000000001 R15: 0000000000000000 [749620.262315] cpuidle_enter+0x29/0x40 [749620.269649] do_idle+0x1bb/0x200 [749620.276296] cpu_startup_entry+0x19/0x20 [749620.284333] start_secondary+0x104/0x140 [749620.292364] secondary_startup_64_no_verify+0xb0/0xbb [749620.302657] ---[ end trace 44f03a0d3a4e81fd ]--- > Also 5.12 is kind of old :/ Right. Unfortunately this problem is not easily reproducible, and, we need millions of machines running some specific workload to reproduce it. That said, the motivation for this patch is to dump more TCP information when we get such warnings, so, we have relevant data to understand more about the issues (when it happens). I.e, this patch is not this problem above, but, providing more information when we hit such warnings. That said, this is patch is about creating the TCP_SOCK_WARN_ON_ONCE() macro, and eventually move some of the WARN_ON_ONCE() to TCP_SOCK_WARN_ON_ONCE(). (I.e, this is not a patch to solve the case above.) Anyway, I got some feedbacks from the patch, let me send a V2 of the patch, which I expect to clarify the problem better.
On Thu, Dec 8, 2022 at 4:44 PM Breno Leitao <leitao@debian.org> wrote: > > On Wed, Dec 07, 2022 at 06:59:38PM +0100, Eric Dumazet wrote: > > Try to give us symbols with scripts/decode_stacktrace.sh , thanks. > > Sorry, here it is: > > [749619.538804] WARNING: CPU: 19 PID: 0 at net/ipv4/tcp.c:4552 tcp_sock_warn+0x6/0x20 > [749619.553969] Modules linked in: sch_fq sunrpc bpf_preload tls act_gact cls_bpf tcp_diag inet_diag skx_edac nfit libnvdimm x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel iTCO_wdt iTCO_vendor_support kvm evdev ses irqbypass enclosure i2c_i801 i2c_smbus ipmi_si ipmi_devintf ipmi_msghandler acpi_cpufreq button tpm_crb sch_fq_codel vhost_net tun tap vhost vhost_iotlb virtio_net net_failover failover mpls_gso mpls_iptunnel mpls_router ip_tunnel fou ip6_udp_tunnel udp_tunnel fuse sg nvme mpi3mr scsi_transport_sas nvme_core xhci_pci xhci_hcd loop efivarfs autofs4 > [749619.678066] Hardware name: XXXXX > [749619.695308] RIP: tcp_sock_warn+0x6/0x20 > [749619.704034] Code: 4d 01 3e 85 c0 0f 84 57 ff ff ff 48 8b 0c 24 44 8b 01 eb 82 e8 eb b7 14 00 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 53 <0f> 0b 48 85 ff 0f 85 77 70 14 00 5b c3 66 66 2e 0f 1f 84 00 00 00 > All code > ======== > 0: 4d 01 3e add %r15,(%r14) > 3: 85 c0 test %eax,%eax > 5: 0f 84 57 ff ff ff je 0xffffffffffffff62 > b: 48 8b 0c 24 mov (%rsp),%rcx > f: 44 8b 01 mov (%rcx),%r8d > 12: eb 82 jmp 0xffffffffffffff96 > 14: e8 eb b7 14 00 callq 0x14b804 > 19: 66 66 2e 0f 1f 84 00 data16 nopw %cs:0x0(%rax,%rax,1) > 20: 00 00 00 00 > 24: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) > 29: 53 push %rbx > 2a:* 0f 0b ud2 <-- trapping instruction > 2c: 48 85 ff test %rdi,%rdi > 2f: 0f 85 77 70 14 00 jne 0x1470ac > 35: 5b pop %rbx > 36: c3 retq > 37: 66 data16 > 38: 66 data16 > 39: 2e cs > 3a: 0f .byte 0xf > 3b: 1f (bad) > 3c: 84 00 test %al,(%rax) > ... > > Code starting with the faulting instruction > =========================================== > 0: 0f 0b ud2 > 2: 48 85 ff test %rdi,%rdi > 5: 0f 85 77 70 14 00 jne 0x147082 > b: 5b pop %rbx > c: c3 retq > d: 66 data16 > e: 66 data16 > f: 2e cs > 10: 0f .byte 0xf > 11: 1f (bad) > 12: 84 00 test %al,(%rax) > ... > [749619.741779] RSP: 0018:ffffc90000d08988 EFLAGS: 00010246 > [749619.752436] RAX: ffff88814b57f5c0 RBX: ffff8881bd2540c0 RCX: ffffc90000d08a34 > [749619.766900] RDX: 0000000000000000 RSI: 00000000cda8f4af RDI: ffff8881bd2540c0 > [749619.781364] RBP: 0000000000000000 R08: ffffc90000d08a38 R09: 00000000cda8f44f > [749619.795831] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 > [749619.810300] R13: ffffc90000d08a34 R14: 0000000000011406 R15: 0000000000000000 > [749619.824788] FS: 0000000000000000(0000) GS:ffff88903f8c0000(0000) knlGS:0000000000000000 > [749619.841168] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [749619.852857] CR2: 000000000007c2e9 CR3: 0000000b82412002 CR4: 00000000007706e0 > [749619.867331] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [749619.881800] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > [749619.896260] PKRU: 55555554 > [749619.901859] Call Trace: > [749619.906927] <IRQ> > [749619.911129] tcp_fastretrans_alert+0x988/0x9e0 > [749619.920222] ? kmem_cache_free+0x33c/0x3d0 > [749619.928606] tcp_ack+0x8b4/0x1360 > [749619.935425] ? __cgroup_bpf_run_filter_skb+0x185/0x440 > [749619.945910] tcp_rcv_established+0x2f3/0x660 > [749619.954639] ? sk_filter_trim_cap+0xbc/0x220 > [749619.963358] tcp_v6_do_rcv+0xbe/0x3e0 > [749619.970863] tcp_v6_rcv+0xc01/0xc90 Still no symbols (file name : line number). > [749619.978029] ip6_protocol_deliver_rcu+0xbd/0x450 > [749619.987453] ip6_input_finish+0x3d/0x60 > [749619.995313] ip6_input+0xb5/0xc0 > [749620.001958] ip6_sublist_rcv_finish+0x37/0x50 > [749620.010851] ip6_sublist_rcv+0x1dd/0x270 > [749620.018877] ipv6_list_rcv+0x113/0x140 > [749620.026552] __netif_receive_skb_list_core+0x1a0/0x210 > [749620.037025] netif_receive_skb_list_internal+0x186/0x2a0 > [749620.047838] ? napi_gro_complete+0x6c/0xd0 > [749620.056215] gro_normal_list.part.171+0x19/0x40 > [749620.065471] napi_complete_done+0x65/0x150 > [749620.073856] bnxt_poll_p5+0x25b/0x2b0 > [749620.081369] __napi_poll+0x25/0x120 > [749620.088537] net_rx_action+0x189/0x300 > [749620.096224] __do_softirq+0xbb/0x271 > [749620.103571] irq_exit_rcu+0x97/0xa0 > [749620.110732] common_interrupt+0x7f/0xa0 > [749620.118595] </IRQ> > [749620.122964] asm_common_interrupt+0x1e/0x40 > [749620.131511] RIP: cpuidle_enter_state+0xc2/0x340 > [749620.141621] Code: 48 89 c5 0f 1f 44 00 00 31 ff e8 f9 8d 73 ff 45 84 ff 74 12 9c 58 f6 c4 02 0f 85 38 02 00 00 31 ff e8 b2 36 79 ff fb 45 85 f6 <0f> 88 e8 00 00 00 49 63 d6 48 2b 2c 24 48 6b ca 68 48 8d 04 52 48
diff --git a/include/net/tcp.h b/include/net/tcp.h index d10962b9f0d0..73c3970d8839 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -40,6 +40,7 @@ #include <net/inet_ecn.h> #include <net/dst.h> #include <net/mptcp.h> +#include <net/tcp_debug.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> @@ -1222,7 +1223,7 @@ static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp) static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val) { - WARN_ON_ONCE((int)val <= 0); + TCP_SOCK_WARN_ON_ONCE(tp, (int)val <= 0); tp->snd_cwnd = val; } diff --git a/include/net/tcp_debug.h b/include/net/tcp_debug.h new file mode 100644 index 000000000000..50e96d87d335 --- /dev/null +++ b/include/net/tcp_debug.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TCP_DEBUG_H +#define _LINUX_TCP_DEBUG_H + +void tcp_sock_warn(const struct tcp_sock *tp); + +#define TCP_SOCK_WARN_ON_ONCE(tcp_sock, condition) \ + DO_ONCE_LITE_IF(condition, tcp_sock_warn, tcp_sock) + +#endif /* _LINUX_TCP_DEBUG_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bbe218753662..71771fee72f7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4684,6 +4684,36 @@ int tcp_abort(struct sock *sk, int err) } EXPORT_SYMBOL_GPL(tcp_abort); +void tcp_sock_warn(const struct tcp_sock *tp) +{ + const struct sock *sk = (const struct sock *)tp; + struct inet_sock *inet = inet_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + + WARN_ON(1); + + if (!tp) + return; + + pr_warn("Socket Info: family=%u state=%d sport=%u dport=%u ccname=%s cwnd=%u", + sk->sk_family, sk->sk_state, ntohs(inet->inet_sport), + ntohs(inet->inet_dport), icsk->icsk_ca_ops->name, tcp_snd_cwnd(tp)); + + switch (sk->sk_family) { + case AF_INET: + pr_warn("saddr=%pI4 daddr=%pI4", &inet->inet_saddr, + &inet->inet_daddr); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + pr_warn("saddr=%pI6 daddr=%pI6", &sk->sk_v6_rcv_saddr, + &sk->sk_v6_daddr); + break; +#endif + } +} +EXPORT_SYMBOL_GPL(tcp_sock_warn); + extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries;
There are cases where we need information about the socket during a warning, so, it could help us to find bugs that happens that do not have a easily repro. BPF congestion control algorithms can change socket state in unexpected ways, leading to WARNings. Additional information about the socket state is useful to identify the culprit. This diff creates a TCP socket-specific version of WARN_ON_ONCE(), and attaches it to tcp_snd_cwnd_set(). Signed-off-by: Breno Leitao <leitao@debian.org> --- include/net/tcp.h | 3 ++- include/net/tcp_debug.h | 10 ++++++++++ net/ipv4/tcp.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 include/net/tcp_debug.h