Message ID | 202402271050366715988@zte.com.cn (mailing list archive) |
---|---|
State | Rejected |
Headers | show |
Series | net/ipv4: add tracepoint for icmp_send | expand |
On Tue, Feb 27, 2024 at 3:50 AM <xu.xin16@zte.com.cn> wrote: > > From: xu xin <xu.xin16@zte.com.cn> > > Introduce a tracepoint for icmp_send, which can help users to get more > detail information conveniently when icmp abnormal events happen. > > 1. Giving an usecase example: > ============================= > When an application experiences packet loss due to an unreachable UDP > destination port, the kernel will send an exception message through the > icmp_send function. By adding a trace point for icmp_send, developers or > system administrators can obtain the detailed information easily about the > UDP packet loss, including the type, code, source address, destination > address, source port, and destination port. This facilitates the > trouble-shooting of packet loss issues especially for those complicated > network-service applications. > > 2. Operation Instructions: > ========================== > Switch to the tracing directory. > cd /sys/kernel/debug/tracing > Filter for destination port unreachable. > echo "type==3 && code==3" > events/icmp/icmp_send/filter > Enable trace event. > echo 1 > events/icmp/icmp_send/enable > > 3. Result View: > ================ > udp_client_erro-11370 [002] ...s.12 124.728002: icmp_send: > icmp_send: type=3, code=3.From 127.0.0.1:41895 to 127.0.0.1:6666 ulen=23 > skbaddr=00000000589b167a > > Signed-off-by: He Peilin <he.peilin@zte.com.cn> > Reviewed-by: xu xin <xu.xin16@zte.com.cn> > Reviewed-by: Yunkai Zhang <zhang.yunkai@zte.com.cn> > Cc: Yang Yang <yang.yang29@zte.com.cn> > Cc: Liu Chun <liu.chun2@zte.com.cn> > Cc: Xuexin Jiang <jiang.xuexin@zte.com.cn> > --- > include/trace/events/icmp.h | 57 +++++++++++++++++++++++++++++++++++++++++++++ > net/ipv4/icmp.c | 4 ++++ > 2 files changed, 61 insertions(+) > create mode 100644 include/trace/events/icmp.h > > diff --git a/include/trace/events/icmp.h b/include/trace/events/icmp.h > new file mode 100644 > index 000000000000..3d9af5769bc3 > --- /dev/null > +++ b/include/trace/events/icmp.h > @@ -0,0 +1,57 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#undef TRACE_SYSTEM > +#define TRACE_SYSTEM icmp > + > +#if !defined(_TRACE_ICMP_H) || defined(TRACE_HEADER_MULTI_READ) > +#define _TRACE_ICMP_H > + > +#include <linux/icmp.h> > +#include <linux/tracepoint.h> > + > +TRACE_EVENT(icmp_send, > + > + TP_PROTO(const struct sk_buff *skb, int type, int code), > + > + TP_ARGS(skb, type, code), > + > + TP_STRUCT__entry( > + __field(__u16, sport) > + __field(__u16, dport) > + __field(unsigned short, ulen) > + __field(const void *, skbaddr) > + __field(int, type) > + __field(int, code) > + __array(__u8, saddr, 4) > + __array(__u8, daddr, 4) > + ), > + > + TP_fast_assign( > + // Get UDP header > + struct udphdr *uh = udp_hdr(skb); > + struct iphdr *iph = ip_hdr(skb); > + __be32 *p32; > + > + __entry->sport = ntohs(uh->source); > + __entry->dport = ntohs(uh->dest); > + __entry->ulen = ntohs(uh->len); > + __entry->skbaddr = skb; > + __entry->type = type; > + __entry->code = code; > + > + p32 = (__be32 *) __entry->saddr; > + *p32 = iph->saddr; > + > + p32 = (__be32 *) __entry->daddr; > + *p32 = iph->daddr; > + ), > + FYI, ICMP can be generated for many other protocols than UDP. > + TP_printk("icmp_send: type=%d, code=%d. From %pI4:%u to %pI4:%u ulen=%d skbaddr=%p", > + __entry->type, __entry->code, > + __entry->saddr, __entry->sport, __entry->daddr, > + __entry->dport, __entry->ulen, __entry->skbaddr) > +); > + > +#endif /* _TRACE_ICMP_H */ > + > +/* This part must be outside protection */ > +#include <trace/define_trace.h> > diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c > index e63a3bf99617..437bdb7e2650 100644 > --- a/net/ipv4/icmp.c > +++ b/net/ipv4/icmp.c > @@ -92,6 +92,8 @@ > #include <net/inet_common.h> > #include <net/ip_fib.h> > #include <net/l3mdev.h> > +#define CREATE_TRACE_POINTS > +#include <trace/events/icmp.h> > > /* > * Build xmit assembly blocks > @@ -599,6 +601,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, > struct net *net; > struct sock *sk; > > + trace_icmp_send(skb_in, type, code); I think you missed many sanity checks between lines 622 and 676 Honestly, a kprobe BPF based solution would be less risky, and less maintenance for us.
On Tue, Feb 27, 2024 at 1:49 PM Eric Dumazet <edumazet@google.com> wrote: > > On Tue, Feb 27, 2024 at 3:50 AM <xu.xin16@zte.com.cn> wrote: > > > > From: xu xin <xu.xin16@zte.com.cn> > > > > Introduce a tracepoint for icmp_send, which can help users to get more > > detail information conveniently when icmp abnormal events happen. > > > > 1. Giving an usecase example: > > ============================= > > When an application experiences packet loss due to an unreachable UDP > > destination port, the kernel will send an exception message through the > > icmp_send function. By adding a trace point for icmp_send, developers or > > system administrators can obtain the detailed information easily about the > > UDP packet loss, including the type, code, source address, destination > > address, source port, and destination port. This facilitates the > > trouble-shooting of packet loss issues especially for those complicated > > network-service applications. > > > > 2. Operation Instructions: > > ========================== > > Switch to the tracing directory. > > cd /sys/kernel/debug/tracing > > Filter for destination port unreachable. > > echo "type==3 && code==3" > events/icmp/icmp_send/filter > > Enable trace event. > > echo 1 > events/icmp/icmp_send/enable > > > > 3. Result View: > > ================ > > udp_client_erro-11370 [002] ...s.12 124.728002: icmp_send: > > icmp_send: type=3, code=3.From 127.0.0.1:41895 to 127.0.0.1:6666 ulen=23 > > skbaddr=00000000589b167a > > > > Signed-off-by: He Peilin <he.peilin@zte.com.cn> > > Reviewed-by: xu xin <xu.xin16@zte.com.cn> > > Reviewed-by: Yunkai Zhang <zhang.yunkai@zte.com.cn> > > Cc: Yang Yang <yang.yang29@zte.com.cn> > > Cc: Liu Chun <liu.chun2@zte.com.cn> > > Cc: Xuexin Jiang <jiang.xuexin@zte.com.cn> > > --- > > include/trace/events/icmp.h | 57 +++++++++++++++++++++++++++++++++++++++++++++ > > net/ipv4/icmp.c | 4 ++++ > > 2 files changed, 61 insertions(+) > > create mode 100644 include/trace/events/icmp.h > > > > diff --git a/include/trace/events/icmp.h b/include/trace/events/icmp.h > > new file mode 100644 > > index 000000000000..3d9af5769bc3 > > --- /dev/null > > +++ b/include/trace/events/icmp.h > > @@ -0,0 +1,57 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +#undef TRACE_SYSTEM > > +#define TRACE_SYSTEM icmp > > + > > +#if !defined(_TRACE_ICMP_H) || defined(TRACE_HEADER_MULTI_READ) > > +#define _TRACE_ICMP_H > > + > > +#include <linux/icmp.h> > > +#include <linux/tracepoint.h> > > + > > +TRACE_EVENT(icmp_send, > > + > > + TP_PROTO(const struct sk_buff *skb, int type, int code), > > + > > + TP_ARGS(skb, type, code), > > + > > + TP_STRUCT__entry( > > + __field(__u16, sport) > > + __field(__u16, dport) > > + __field(unsigned short, ulen) > > + __field(const void *, skbaddr) > > + __field(int, type) > > + __field(int, code) > > + __array(__u8, saddr, 4) > > + __array(__u8, daddr, 4) > > + ), > > + > > + TP_fast_assign( > > + // Get UDP header > > + struct udphdr *uh = udp_hdr(skb); > > + struct iphdr *iph = ip_hdr(skb); > > + __be32 *p32; > > + > > + __entry->sport = ntohs(uh->source); > > + __entry->dport = ntohs(uh->dest); > > + __entry->ulen = ntohs(uh->len); > > + __entry->skbaddr = skb; > > + __entry->type = type; > > + __entry->code = code; > > + > > + p32 = (__be32 *) __entry->saddr; > > + *p32 = iph->saddr; > > + > > + p32 = (__be32 *) __entry->daddr; > > + *p32 = iph->daddr; > > + ), > > + > > FYI, ICMP can be generated for many other protocols than UDP. > > > + TP_printk("icmp_send: type=%d, code=%d. From %pI4:%u to %pI4:%u ulen=%d skbaddr=%p", > > + __entry->type, __entry->code, > > + __entry->saddr, __entry->sport, __entry->daddr, > > + __entry->dport, __entry->ulen, __entry->skbaddr) > > +); > > + > > +#endif /* _TRACE_ICMP_H */ > > + > > +/* This part must be outside protection */ > > +#include <trace/define_trace.h> > > diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c > > index e63a3bf99617..437bdb7e2650 100644 > > --- a/net/ipv4/icmp.c > > +++ b/net/ipv4/icmp.c > > @@ -92,6 +92,8 @@ > > #include <net/inet_common.h> > > #include <net/ip_fib.h> > > #include <net/l3mdev.h> > > +#define CREATE_TRACE_POINTS > > +#include <trace/events/icmp.h> > > > > /* > > * Build xmit assembly blocks > > @@ -599,6 +601,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, > > struct net *net; > > struct sock *sk; > > > > + trace_icmp_send(skb_in, type, code); > > I think you missed many sanity checks between lines 622 and 676 [...] > > Honestly, a kprobe BPF based solution would be less risky, and less > maintenance for us. > I agreed. I wonder if we can remove some trace_* at the very beginning of its caller function since they can be easily replaced with bpf tools and then we make less effort to maintain them, say, trace_tcp_probe(), trace_tcp_rcv_space_adjust, etc. Thanks, Jason
On Tue, 27 Feb 2024 10:50:36 +0800 (CST) <xu.xin16@zte.com.cn> wrote: > include/trace/events/icmp.h | 57 +++++++++++++++++++++++++++++++++++++++++++++ > net/ipv4/icmp.c | 4 ++++ > 2 files changed, 61 insertions(+) > create mode 100644 include/trace/events/icmp.h > > diff --git a/include/trace/events/icmp.h b/include/trace/events/icmp.h > new file mode 100644 > index 000000000000..3d9af5769bc3 > --- /dev/null > +++ b/include/trace/events/icmp.h > @@ -0,0 +1,57 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#undef TRACE_SYSTEM > +#define TRACE_SYSTEM icmp > + > +#if !defined(_TRACE_ICMP_H) || defined(TRACE_HEADER_MULTI_READ) > +#define _TRACE_ICMP_H > + > +#include <linux/icmp.h> > +#include <linux/tracepoint.h> > + > +TRACE_EVENT(icmp_send, > + > + TP_PROTO(const struct sk_buff *skb, int type, int code), > + > + TP_ARGS(skb, type, code), > + > + TP_STRUCT__entry( > + __field(__u16, sport) 2 bytes > + __field(__u16, dport) 2 bytes > + __field(unsigned short, ulen) 2 bytes [ 2 byte hole for alignment ] > + __field(const void *, skbaddr) 4/8 bytes It's best to keep the holes at the end of the TP_STRUCT__entry(). That is, I would move ulen to the end of the structure. It doesn't affect anything else. -- Steve > + __field(int, type) > + __field(int, code) > + __array(__u8, saddr, 4) > + __array(__u8, daddr, 4) > + ), > + > + TP_fast_assign( > + // Get UDP header > + struct udphdr *uh = udp_hdr(skb); > + struct iphdr *iph = ip_hdr(skb); > + __be32 *p32; > + > + __entry->sport = ntohs(uh->source); > + __entry->dport = ntohs(uh->dest); > + __entry->ulen = ntohs(uh->len); > + __entry->skbaddr = skb; > + __entry->type = type; > + __entry->code = code; > + > + p32 = (__be32 *) __entry->saddr; > + *p32 = iph->saddr; > + > + p32 = (__be32 *) __entry->daddr; > + *p32 = iph->daddr; > + ), > + > + TP_printk("icmp_send: type=%d, code=%d. From %pI4:%u to %pI4:%u ulen=%d skbaddr=%p", > + __entry->type, __entry->code, > + __entry->saddr, __entry->sport, __entry->daddr, > + __entry->dport, __entry->ulen, __entry->skbaddr) > +); > + > +#endif /* _TRACE_ICMP_H */ > + > +/* This part must be outside protection */ > +#include <trace/define_trace.h> > diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c > index e63a3bf99617..437bdb7e2650 100644 > --- a/net/ipv4/icmp.c > +++ b/net/ipv4/icmp.c > @@ -92,6 +92,8 @@ > #include <net/inet_common.h> > #include <net/ip_fib.h> > #include <net/l3mdev.h> > +#define CREATE_TRACE_POINTS > +#include <trace/events/icmp.h> > > /* > * Build xmit assembly blocks > @@ -599,6 +601,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, > struct net *net; > struct sock *sk; > > + trace_icmp_send(skb_in, type, code); > + > if (!rt) > goto out; >
> > include/trace/events/icmp.h | 57 +++++++++++++++++++++++++++++++++++++++++++++ > > net/ipv4/icmp.c | 4 ++++ > > 2 files changed, 61 insertions(+) > > create mode 100644 include/trace/events/icmp.h > > > > diff --git a/include/trace/events/icmp.h b/include/trace/events/icmp.h > > new file mode 100644 > > index 000000000000..3d9af5769bc3 > > --- /dev/null > > +++ b/include/trace/events/icmp.h > > @@ -0,0 +1,57 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +#undef TRACE_SYSTEM > > +#define TRACE_SYSTEM icmp > > + > > +#if !defined(_TRACE_ICMP_H) || defined(TRACE_HEADER_MULTI_READ) > > +#define _TRACE_ICMP_H > > + > > +#include <linux/icmp.h> > > +#include <linux/tracepoint.h> > > + > > +TRACE_EVENT(icmp_send, > > + > > + TP_PROTO(const struct sk_buff *skb, int type, int code), > > + > > + TP_ARGS(skb, type, code), > > + > > + TP_STRUCT__entry( > > + __field(__u16, sport) > > + __field(__u16, dport) > > + __field(unsigned short, ulen) > > + __field(const void *, skbaddr) > > + __field(int, type) > > + __field(int, code) > > + __array(__u8, saddr, 4) > > + __array(__u8, daddr, 4) > > + ), > > + > > + TP_fast_assign( > > + // Get UDP header > > + struct udphdr *uh = udp_hdr(skb); > > + struct iphdr *iph = ip_hdr(skb); > > + __be32 *p32; > > + > > + __entry->sport = ntohs(uh->source); > > + __entry->dport = ntohs(uh->dest); > > + __entry->ulen = ntohs(uh->len); > > + __entry->skbaddr = skb; > > + __entry->type = type; > > + __entry->code = code; > > + > > + p32 = (__be32 *) __entry->saddr; > > + *p32 = iph->saddr; > > + > > + p32 = (__be32 *) __entry->daddr; > > + *p32 = iph->daddr; > > + ), > > + > > FYI, ICMP can be generated for many other protocols than UDP. We have noted this issue. Therefore, a UDP judgment confition has been added in TP_fast_assign.This condition will only track abnormal messages when icmp_send is called with the UDP protocol. Otherwise, it will simply print the abnormal type and code. As follows: if(proto_4 == IPPROTO_UDP) { struct udphdr *uh = udp_hdr(skb); __entry->sport = nthos(uh->source); __entry_dport = nthos(uh->dest); __entry->ulen = nthos(uh->len); } else { __entry->sport = 0; __entry_dport = 0; __entry->ulen = 0; } > > > + TP_printk("icmp_send: type=%d, code=%d. From %pI4:%u to %pI4:%u ulen=%d skbaddr=%p", > > + __entry->type, __entry->code, > > + __entry->saddr, __entry->sport, __entry->daddr, > > + __entry->dport, __entry->ulen, __entry->skbaddr) > > +); > > + > > +#endif /* _TRACE_ICMP_H */ > > + > > +/* This part must be outside protection */ > > +#include <trace/define_trace.h> > > diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c > > index e63a3bf99617..437bdb7e2650 100644 > > --- a/net/ipv4/icmp.c > > +++ b/net/ipv4/icmp.c > > @@ -92,6 +92,8 @@ > > #include <net/inet_common.h> > > #include <net/ip_fib.h> > > #include <net/l3mdev.h> > > +#define CREATE_TRACE_POINTS > > +#include <trace/events/icmp.h> > > > > /* > > * Build xmit assembly blocks > > @@ -599,6 +601,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, > > struct net *net; > > struct sock *sk; > > > > + trace_icmp_send(skb_in, type, code); > > I think you missed many sanity checks between lines 622 and 676 Thank you for the reminder. The next step is to move the trace point to line 676. > Honestly, a kprobe BPF based solution would be less risky, and less > maintenance for us. emm, yeah, but tracepoints has advantages on its convienice, especially for those Embedded Linux which doesn't support EBPF.
> > include/trace/events/icmp.h | 57 +++++++++++++++++++++++++++++++++++++++++++++ > > net/ipv4/icmp.c | 4 ++++ > > 2 files changed, 61 insertions(+) > > create mode 100644 include/trace/events/icmp.h > > > > diff --git a/include/trace/events/icmp.h b/include/trace/events/icmp.h > > new file mode 100644 > > index 000000000000..3d9af5769bc3 > > --- /dev/null > > +++ b/include/trace/events/icmp.h > > @@ -0,0 +1,57 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +#undef TRACE_SYSTEM > > +#define TRACE_SYSTEM icmp > > + > > +#if !defined(_TRACE_ICMP_H) || defined(TRACE_HEADER_MULTI_READ) > > +#define _TRACE_ICMP_H > > + > > +#include <linux/icmp.h> > > +#include <linux/tracepoint.h> > > + > > +TRACE_EVENT(icmp_send, > > + > > + TP_PROTO(const struct sk_buff *skb, int type, int code), > > + > > + TP_ARGS(skb, type, code), > > + > > + TP_STRUCT__entry( > > + __field(__u16, sport) > > 2 bytes > > > + __field(__u16, dport) > > 2 bytes > > > + __field(unsigned short, ulen) > > 2 bytes > > [ 2 byte hole for alignment ] > > > + __field(const void *, skbaddr) > > 4/8 bytes > > It's best to keep the holes at the end of the TP_STRUCT__entry(). > > That is, I would move ulen to the end of the structure. It doesn't affect > anything else. > > -- Steve Thank you for pointing that out. The next step is to move __field(unsigned short, ulen) to the end of TP_STRUCT__entry(). > > > + __field(int, type) > > + __field(int, code) > > + __array(__u8, saddr, 4) > > + __array(__u8, daddr, 4) > > + ), > > + > > + TP_fast_assign( > > + // Get UDP header > > + struct udphdr *uh = udp_hdr(skb); > > + struct iphdr *iph = ip_hdr(skb); > > + __be32 *p32; > > + > > + __entry->sport = ntohs(uh->source); > > + __entry->dport = ntohs(uh->dest); > > + __entry->ulen = ntohs(uh->len); > > + __entry->skbaddr = skb; > > + __entry->type = type; > > + __entry->code = code; > > + > > + p32 = (__be32 *) __entry->saddr; > > + *p32 = iph->saddr; > > + > > + p32 = (__be32 *) __entry->daddr; > > + *p32 = iph->daddr; > > + ), > > + > > + TP_printk("icmp_send: type=%d, code=%d. From %pI4:%u to %pI4:%u ulen=%d skbaddr=%p", > > + __entry->type, __entry->code, > > + __entry->saddr, __entry->sport, __entry->daddr, > > + __entry->dport, __entry->ulen, __entry->skbaddr) > > +); > > + > > +#endif /* _TRACE_ICMP_H */ > > + > > +/* This part must be outside protection */ > > +#include <trace/define_trace.h> > > diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c > > index e63a3bf99617..437bdb7e2650 100644 > > --- a/net/ipv4/icmp.c > > +++ b/net/ipv4/icmp.c > > @@ -92,6 +92,8 @@ > > #include <net/inet_common.h> > > #include <net/ip_fib.h> > > #include <net/l3mdev.h> > > +#define CREATE_TRACE_POINTS > > +#include <trace/events/icmp.h> > > > > /* > > * Build xmit assembly blocks > > @@ -599,6 +601,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, > > struct net *net; > > struct sock *sk; > > > > + trace_icmp_send(skb_in, type, code); > > + > > if (!rt) > > goto out; > >
diff --git a/include/trace/events/icmp.h b/include/trace/events/icmp.h new file mode 100644 index 000000000000..3d9af5769bc3 --- /dev/null +++ b/include/trace/events/icmp.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM icmp + +#if !defined(_TRACE_ICMP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ICMP_H + +#include <linux/icmp.h> +#include <linux/tracepoint.h> + +TRACE_EVENT(icmp_send, + + TP_PROTO(const struct sk_buff *skb, int type, int code), + + TP_ARGS(skb, type, code), + + TP_STRUCT__entry( + __field(__u16, sport) + __field(__u16, dport) + __field(unsigned short, ulen) + __field(const void *, skbaddr) + __field(int, type) + __field(int, code) + __array(__u8, saddr, 4) + __array(__u8, daddr, 4) + ), + + TP_fast_assign( + // Get UDP header + struct udphdr *uh = udp_hdr(skb); + struct iphdr *iph = ip_hdr(skb); + __be32 *p32; + + __entry->sport = ntohs(uh->source); + __entry->dport = ntohs(uh->dest); + __entry->ulen = ntohs(uh->len); + __entry->skbaddr = skb; + __entry->type = type; + __entry->code = code; + + p32 = (__be32 *) __entry->saddr; + *p32 = iph->saddr; + + p32 = (__be32 *) __entry->daddr; + *p32 = iph->daddr; + ), + + TP_printk("icmp_send: type=%d, code=%d. From %pI4:%u to %pI4:%u ulen=%d skbaddr=%p", + __entry->type, __entry->code, + __entry->saddr, __entry->sport, __entry->daddr, + __entry->dport, __entry->ulen, __entry->skbaddr) +); + +#endif /* _TRACE_ICMP_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e63a3bf99617..437bdb7e2650 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -92,6 +92,8 @@ #include <net/inet_common.h> #include <net/ip_fib.h> #include <net/l3mdev.h> +#define CREATE_TRACE_POINTS +#include <trace/events/icmp.h> /* * Build xmit assembly blocks @@ -599,6 +601,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, struct net *net; struct sock *sk; + trace_icmp_send(skb_in, type, code); + if (!rt) goto out;