diff mbox series

[bpf-next,v2,1/6] xsk: add tracepoints for packet drops

Message ID 20210126075239.25378-2-ciara.loftus@intel.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series AF_XDP Packet Drop Tracing | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 14 maintainers not CCed: songliubraving@fb.com andrii@kernel.org jonathan.lemon@gmail.com daniel@iogearbox.net ast@kernel.org hawk@kernel.org rostedt@goodmis.org mingo@redhat.com kpsingh@kernel.org davem@davemloft.net john.fastabend@gmail.com kuba@kernel.org kafai@fb.com yhs@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 94 this patch: 94
netdev/kdoc success Errors and warnings before: 1 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Lines should not end with a '(' WARNING: line length of 81 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 94 this patch: 94
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Ciara Loftus Jan. 26, 2021, 7:52 a.m. UTC
This commit introduces static perf tracepoints for AF_XDP which
report information about packet drops, such as the netdev, qid and
high level reason for the drop. The tracepoint can be
enabled/disabled by toggling
/sys/kernel/debug/tracing/events/xsk/xsk_packet_drop/enable

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
---
 MAINTAINERS                       |  1 +
 include/linux/bpf_trace.h         |  1 +
 include/trace/events/xsk.h        | 45 +++++++++++++++++++++++++++++++
 include/uapi/linux/if_xdp.h       |  8 ++++++
 kernel/bpf/core.c                 |  1 +
 net/xdp/xsk.c                     |  5 ++++
 net/xdp/xsk_buff_pool.c           |  8 +++++-
 tools/include/uapi/linux/if_xdp.h |  8 ++++++
 8 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 include/trace/events/xsk.h

Comments

Daniel Borkmann Jan. 26, 2021, 10:51 p.m. UTC | #1
On 1/26/21 8:52 AM, Ciara Loftus wrote:
> This commit introduces static perf tracepoints for AF_XDP which
> report information about packet drops, such as the netdev, qid and
> high level reason for the drop. The tracepoint can be
> enabled/disabled by toggling
> /sys/kernel/debug/tracing/events/xsk/xsk_packet_drop/enable

Could you add a rationale to the commit log on why xsk diag stats dump
is insufficient here given you add tracepoints to most locations where
we also bump the counters already? And given diag infra also exposes the
ifindex, queue id, etc, why troubleshooting the xsk socket via ss tool
is not sufficient?

[...]
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index 4faabd1ecfd1..9b850716630b 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -11,6 +11,7 @@
>   
>   #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
>   
> +#include <linux/bpf_trace.h>
>   #include <linux/if_xdp.h>
>   #include <linux/init.h>
>   #include <linux/sched/mm.h>
> @@ -158,6 +159,7 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
>   	addr = xp_get_handle(xskb);
>   	err = xskq_prod_reserve_desc(xs->rx, addr, len);
>   	if (err) {
> +		trace_xsk_packet_drop(xs->dev->name, xs->queue_id, XSK_TRACE_DROP_RXQ_FULL);
>   		xs->rx_queue_full++;
>   		return err;

I presume if this will be triggered under stress you'll likely also spam
your trace event log w/ potentially mio of msgs per sec?

>   	}
> @@ -192,6 +194,7 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
>   
>   	len = xdp->data_end - xdp->data;
>   	if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
> +		trace_xsk_packet_drop(xs->dev->name, xs->queue_id, XSK_TRACE_DROP_PKT_TOO_BIG);
>   		xs->rx_dropped++;
>   		return -ENOSPC;
>   	}
> @@ -516,6 +519,8 @@ static int xsk_generic_xmit(struct sock *sk)
>   		if (err == NET_XMIT_DROP) {
>   			/* SKB completed but not sent */
>   			err = -EBUSY;
> +			trace_xsk_packet_drop(xs->dev->name, xs->queue_id,
> +					      XSK_TRACE_DROP_DRV_ERR_TX);

Is there a reason to not bump error counter here?

>   			goto out;
>   		}
>   
> diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
> index 8de01aaac4a0..d3c1ca83c75d 100644
> --- a/net/xdp/xsk_buff_pool.c
> +++ b/net/xdp/xsk_buff_pool.c
> @@ -1,5 +1,6 @@
>   // SPDX-License-Identifier: GPL-2.0
>
Ciara Loftus Jan. 27, 2021, 2:10 p.m. UTC | #2
> On 1/26/21 8:52 AM, Ciara Loftus wrote:
> > This commit introduces static perf tracepoints for AF_XDP which
> > report information about packet drops, such as the netdev, qid and
> > high level reason for the drop. The tracepoint can be
> > enabled/disabled by toggling
> > /sys/kernel/debug/tracing/events/xsk/xsk_packet_drop/enable
> 
> Could you add a rationale to the commit log on why xsk diag stats dump
> is insufficient here given you add tracepoints to most locations where
> we also bump the counters already? And given diag infra also exposes the
> ifindex, queue id, etc, why troubleshooting the xsk socket via ss tool
> is not sufficient?

Thanks for your feedback Daniel.

The stats tell us that there is *a* problem whereas the traces will shed
light on what that problem is. eg. The XSK_TRACE_DROP_PKT_TOO_BIG
trace tells us we dropped a packet on RX due to it being too big vs. ss
would just tell us the packet was dropped.
I will add this rationale in the v3.

> 
> [...]
> > diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> > index 4faabd1ecfd1..9b850716630b 100644
> > --- a/net/xdp/xsk.c
> > +++ b/net/xdp/xsk.c
> > @@ -11,6 +11,7 @@
> >
> >   #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
> >
> > +#include <linux/bpf_trace.h>
> >   #include <linux/if_xdp.h>
> >   #include <linux/init.h>
> >   #include <linux/sched/mm.h>
> > @@ -158,6 +159,7 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct
> xdp_buff *xdp, u32 len)
> >   	addr = xp_get_handle(xskb);
> >   	err = xskq_prod_reserve_desc(xs->rx, addr, len);
> >   	if (err) {
> > +		trace_xsk_packet_drop(xs->dev->name, xs->queue_id,
> XSK_TRACE_DROP_RXQ_FULL);
> >   		xs->rx_queue_full++;
> >   		return err;
> 
> I presume if this will be triggered under stress you'll likely also spam
> your trace event log w/ potentially mio of msgs per sec?

You are correct. After some consideration I'm going to drop this
trace and some others in the next rev which are not technically
guaranteed drops and could end up spamming the log under
stress as you mentioned.

> 
> >   	}
> > @@ -192,6 +194,7 @@ static int __xsk_rcv(struct xdp_sock *xs, struct
> xdp_buff *xdp)
> >
> >   	len = xdp->data_end - xdp->data;
> >   	if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
> > +		trace_xsk_packet_drop(xs->dev->name, xs->queue_id,
> XSK_TRACE_DROP_PKT_TOO_BIG);
> >   		xs->rx_dropped++;
> >   		return -ENOSPC;
> >   	}
> > @@ -516,6 +519,8 @@ static int xsk_generic_xmit(struct sock *sk)
> >   		if (err == NET_XMIT_DROP) {
> >   			/* SKB completed but not sent */
> >   			err = -EBUSY;
> > +			trace_xsk_packet_drop(xs->dev->name, xs-
> >queue_id,
> > +					      XSK_TRACE_DROP_DRV_ERR_TX);
> 
> Is there a reason to not bump error counter here?

This too falls into the not-technically-a-drop category and will be
removed in the next rev. I think a stat would be useful though.
I'll draw up a separate patch. Thanks for the suggestion.

> 
> >   			goto out;
> >   		}
> >
> > diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
> > index 8de01aaac4a0..d3c1ca83c75d 100644
> > --- a/net/xdp/xsk_buff_pool.c
> > +++ b/net/xdp/xsk_buff_pool.c
> > @@ -1,5 +1,6 @@
> >   // SPDX-License-Identifier: GPL-2.0
> >
Daniel Borkmann Jan. 27, 2021, 11:15 p.m. UTC | #3
On 1/27/21 3:10 PM, Loftus, Ciara wrote:
[...]
> 
> Thanks for your feedback Daniel.
> 
> The stats tell us that there is *a* problem whereas the traces will shed
> light on what that problem is. eg. The XSK_TRACE_DROP_PKT_TOO_BIG
> trace tells us we dropped a packet on RX due to it being too big vs. ss
> would just tell us the packet was dropped.

But wouldn't that just be a matter of extending struct xdp_diag_stats +
xsk_diag_put_stats() with more fine-grained counters? Just wondering given
you add the trace_xsk_packet_drop() tracepoints at locations where we
bump most of these counters already for ss tool. I guess this was my confusion -
as far as I can see only the two XSK_TRACE_DROP_{PKT_TOO_BIG,DRV_ERR_TX} are
not covered in xdp_diag_stats. Is there any other reason that the diag is
not sufficient for your use case?

Thanks,
Daniel
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 1df56a32d2df..efe6662d4198 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19440,6 +19440,7 @@  S:	Maintained
 F:	Documentation/networking/af_xdp.rst
 F:	include/net/xdp_sock*
 F:	include/net/xsk_buff_pool.h
+F:	include/trace/events/xsk.h
 F:	include/uapi/linux/if_xdp.h
 F:	include/uapi/linux/xdp_diag.h
 F:	include/net/netns/xdp.h
diff --git a/include/linux/bpf_trace.h b/include/linux/bpf_trace.h
index ddf896abcfb6..477d29b6c2c1 100644
--- a/include/linux/bpf_trace.h
+++ b/include/linux/bpf_trace.h
@@ -3,5 +3,6 @@ 
 #define __LINUX_BPF_TRACE_H__
 
 #include <trace/events/xdp.h>
+#include <trace/events/xsk.h>
 
 #endif /* __LINUX_BPF_TRACE_H__ */
diff --git a/include/trace/events/xsk.h b/include/trace/events/xsk.h
new file mode 100644
index 000000000000..4f5629ba1b0f
--- /dev/null
+++ b/include/trace/events/xsk.h
@@ -0,0 +1,45 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2021 Intel Corporation. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM xsk
+
+#if !defined(_TRACE_XSK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_XSK_H
+
+#include <linux/if_xdp.h>
+#include <linux/tracepoint.h>
+
+#define print_reason(val) \
+	__print_symbolic(val, \
+			{ XSK_TRACE_DROP_RXQ_FULL, "rxq full" }, \
+			{ XSK_TRACE_DROP_PKT_TOO_BIG, "packet too big" }, \
+			{ XSK_TRACE_DROP_FQ_EMPTY, "fq empty" }, \
+			{ XSK_TRACE_DROP_POOL_EMPTY, "xskb pool empty" }, \
+			{ XSK_TRACE_DROP_DRV_ERR_TX, "driver error on tx" })
+
+TRACE_EVENT(xsk_packet_drop,
+
+	TP_PROTO(char *name, u16 queue_id, u32 reason),
+
+	TP_ARGS(name, queue_id, reason),
+
+	TP_STRUCT__entry(
+		__field(char *, name)
+		__field(u16, queue_id)
+		__field(u32, reason)
+	),
+
+	TP_fast_assign(
+		__entry->name = name;
+		__entry->queue_id = queue_id;
+		__entry->reason = reason;
+	),
+
+	TP_printk("netdev: %s qid %u reason: %s", __entry->name,
+			__entry->queue_id, print_reason(__entry->reason))
+);
+
+#endif /* _TRACE_XSK_H */
+
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index a78a8096f4ce..5f1b8bf99bb5 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -108,4 +108,12 @@  struct xdp_desc {
 
 /* UMEM descriptor is __u64 */
 
+enum xdp_trace_reasons {
+	XSK_TRACE_DROP_RXQ_FULL,
+	XSK_TRACE_DROP_PKT_TOO_BIG,
+	XSK_TRACE_DROP_FQ_EMPTY,
+	XSK_TRACE_DROP_POOL_EMPTY,
+	XSK_TRACE_DROP_DRV_ERR_TX,
+};
+
 #endif /* _LINUX_IF_XDP_H */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 5bbd4884ff7a..442b0d7f9bf8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2362,3 +2362,4 @@  EXPORT_SYMBOL(bpf_stats_enabled_key);
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
 EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx);
+EXPORT_TRACEPOINT_SYMBOL_GPL(xsk_packet_drop);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 4faabd1ecfd1..9b850716630b 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -11,6 +11,7 @@ 
 
 #define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
 
+#include <linux/bpf_trace.h>
 #include <linux/if_xdp.h>
 #include <linux/init.h>
 #include <linux/sched/mm.h>
@@ -158,6 +159,7 @@  static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 	addr = xp_get_handle(xskb);
 	err = xskq_prod_reserve_desc(xs->rx, addr, len);
 	if (err) {
+		trace_xsk_packet_drop(xs->dev->name, xs->queue_id, XSK_TRACE_DROP_RXQ_FULL);
 		xs->rx_queue_full++;
 		return err;
 	}
@@ -192,6 +194,7 @@  static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 
 	len = xdp->data_end - xdp->data;
 	if (len > xsk_pool_get_rx_frame_size(xs->pool)) {
+		trace_xsk_packet_drop(xs->dev->name, xs->queue_id, XSK_TRACE_DROP_PKT_TOO_BIG);
 		xs->rx_dropped++;
 		return -ENOSPC;
 	}
@@ -516,6 +519,8 @@  static int xsk_generic_xmit(struct sock *sk)
 		if (err == NET_XMIT_DROP) {
 			/* SKB completed but not sent */
 			err = -EBUSY;
+			trace_xsk_packet_drop(xs->dev->name, xs->queue_id,
+					      XSK_TRACE_DROP_DRV_ERR_TX);
 			goto out;
 		}
 
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 8de01aaac4a0..d3c1ca83c75d 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -1,5 +1,6 @@ 
 // SPDX-License-Identifier: GPL-2.0
 
+#include <linux/bpf_trace.h>
 #include <net/xsk_buff_pool.h>
 #include <net/xdp_sock.h>
 #include <net/xdp_sock_drv.h>
@@ -445,8 +446,11 @@  static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
 	u64 addr;
 	bool ok;
 
-	if (pool->free_heads_cnt == 0)
+	if (pool->free_heads_cnt == 0) {
+		trace_xsk_packet_drop(pool->netdev->name, pool->queue_id,
+				      XSK_TRACE_DROP_POOL_EMPTY);
 		return NULL;
+	}
 
 	xskb = pool->free_heads[--pool->free_heads_cnt];
 
@@ -454,6 +458,8 @@  static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
 		if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) {
 			pool->fq->queue_empty_descs++;
 			xp_release(xskb);
+			trace_xsk_packet_drop(pool->netdev->name, pool->queue_id,
+					      XSK_TRACE_DROP_FQ_EMPTY);
 			return NULL;
 		}
 
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index a78a8096f4ce..5f1b8bf99bb5 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -108,4 +108,12 @@  struct xdp_desc {
 
 /* UMEM descriptor is __u64 */
 
+enum xdp_trace_reasons {
+	XSK_TRACE_DROP_RXQ_FULL,
+	XSK_TRACE_DROP_PKT_TOO_BIG,
+	XSK_TRACE_DROP_FQ_EMPTY,
+	XSK_TRACE_DROP_POOL_EMPTY,
+	XSK_TRACE_DROP_DRV_ERR_TX,
+};
+
 #endif /* _LINUX_IF_XDP_H */