diff mbox series

[RFC,bpf-next,03/15] samples: bpf: split out common bpf progs to its own file

Message ID 20210528235250.2635167-4-memxor@gmail.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series Improve XDP samples usability and output | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 1 maintainers not CCed: hawk@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: Improper SPDX comment style for 'samples/bpf/xdp_sample_kern.h', please use '/*' instead WARNING: Missing or malformed SPDX-License-Identifier tag in line 1 WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link

Commit Message

Kumar Kartikeya Dwivedi May 28, 2021, 11:52 p.m. UTC
This is done to later reuse these in a way that can be shared
among multiple samples.

We are using xdp_redirect_cpu_kern.c as a base to build further support on
top (mostly adding a few other things missing that xdp_monitor does in
subsequent patches).

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 samples/bpf/xdp_sample_kern.h | 220 ++++++++++++++++++++++++++++++++++
 1 file changed, 220 insertions(+)
 create mode 100644 samples/bpf/xdp_sample_kern.h

Comments

Andrii Nakryiko May 30, 2021, 3:05 a.m. UTC | #1
On Fri, May 28, 2021 at 4:53 PM Kumar Kartikeya Dwivedi
<memxor@gmail.com> wrote:
>
> This is done to later reuse these in a way that can be shared
> among multiple samples.
>
> We are using xdp_redirect_cpu_kern.c as a base to build further support on
> top (mostly adding a few other things missing that xdp_monitor does in
> subsequent patches).
>
> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
> ---
>  samples/bpf/xdp_sample_kern.h | 220 ++++++++++++++++++++++++++++++++++
>  1 file changed, 220 insertions(+)
>  create mode 100644 samples/bpf/xdp_sample_kern.h
>
> diff --git a/samples/bpf/xdp_sample_kern.h b/samples/bpf/xdp_sample_kern.h

instead of doing it as a header, can you please use BPF static linking
instead? I think that's a better approach and a good showcase for
anyone that would like to use static linking for their BPF programs

> new file mode 100644
> index 000000000000..bb809542ac20
> --- /dev/null
> +++ b/samples/bpf/xdp_sample_kern.h
> @@ -0,0 +1,220 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
> +#pragma once
> +
> +#include <uapi/linux/bpf.h>
> +#include <bpf/bpf_helpers.h>
> +
> +#define MAX_CPUS 64
> +
> +/* Common stats data record to keep userspace more simple */
> +struct datarec {
> +       __u64 processed;
> +       __u64 dropped;
> +       __u64 issue;
> +       __u64 xdp_pass;
> +       __u64 xdp_drop;
> +       __u64 xdp_redirect;
> +};
> +
> +/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
> + * feedback.  Redirect TX errors can be caught via a tracepoint.
> + */
> +struct {
> +       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
> +       __type(key, u32);
> +       __type(value, struct datarec);
> +       __uint(max_entries, 1);
> +} rx_cnt SEC(".maps");
> +
> +/* Used by trace point */
> +struct {
> +       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
> +       __type(key, u32);
> +       __type(value, struct datarec);
> +       __uint(max_entries, 2);
> +       /* TODO: have entries for all possible errno's */
> +} redirect_err_cnt SEC(".maps");
> +
> +/* Used by trace point */
> +struct {
> +       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
> +       __type(key, u32);
> +       __type(value, struct datarec);
> +       __uint(max_entries, MAX_CPUS);
> +} cpumap_enqueue_cnt SEC(".maps");

One way to squeeze a bit more performance would be to instead use
global variables instead of maps:

struct datarec cpu_map_enqueue_cnts[MAX_CPUS][MAX_CPUS];

and other PERCPU_ARRAY arrays could be just one-dimensional arrays.

You'd need to ensure each value sits on its own cache-line, of course.

> +
> +/* Used by trace point */
> +struct {
> +       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
> +       __type(key, u32);
> +       __type(value, struct datarec);
> +       __uint(max_entries, 1);
> +} cpumap_kthread_cnt SEC(".maps");
> +

[...]

> +
> +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
> + * Code in:         kernel/include/trace/events/xdp.h
> + */
> +struct cpumap_enqueue_ctx {
> +       u64 __pad;              // First 8 bytes are not accessible by bpf code
> +       int map_id;             //      offset:8;  size:4; signed:1;
> +       u32 act;                //      offset:12; size:4; signed:0;
> +       int cpu;                //      offset:16; size:4; signed:1;
> +       unsigned int drops;     //      offset:20; size:4; signed:0;
> +       unsigned int processed; //      offset:24; size:4; signed:0;
> +       int to_cpu;             //      offset:28; size:4; signed:1;
> +};

if you used vmlinux.h, this is already in there as struct
trace_event_raw_xdp_cpumap_enqueue, similarly for other tracepoints

> +
> +SEC("tracepoint/xdp/xdp_cpumap_enqueue")
> +int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
> +{
> +       u32 to_cpu = ctx->to_cpu;
> +       struct datarec *rec;
> +
> +       if (to_cpu >= MAX_CPUS)
> +               return 1;
> +
> +       rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
> +       if (!rec)
> +               return 0;
> +       rec->processed += ctx->processed;
> +       rec->dropped   += ctx->drops;
> +
> +       /* Record bulk events, then userspace can calc average bulk size */
> +       if (ctx->processed > 0)
> +               rec->issue += 1;
> +
> +       /* Inception: It's possible to detect overload situations, via
> +        * this tracepoint.  This can be used for creating a feedback
> +        * loop to XDP, which can take appropriate actions to mitigate
> +        * this overload situation.
> +        */
> +       return 0;
> +}
> +

[...]
diff mbox series

Patch

diff --git a/samples/bpf/xdp_sample_kern.h b/samples/bpf/xdp_sample_kern.h
new file mode 100644
index 000000000000..bb809542ac20
--- /dev/null
+++ b/samples/bpf/xdp_sample_kern.h
@@ -0,0 +1,220 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
+#pragma once
+
+#include <uapi/linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_CPUS 64
+
+/* Common stats data record to keep userspace more simple */
+struct datarec {
+	__u64 processed;
+	__u64 dropped;
+	__u64 issue;
+	__u64 xdp_pass;
+	__u64 xdp_drop;
+	__u64 xdp_redirect;
+};
+
+/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
+ * feedback.  Redirect TX errors can be caught via a tracepoint.
+ */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, u32);
+	__type(value, struct datarec);
+	__uint(max_entries, 1);
+} rx_cnt SEC(".maps");
+
+/* Used by trace point */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, u32);
+	__type(value, struct datarec);
+	__uint(max_entries, 2);
+	/* TODO: have entries for all possible errno's */
+} redirect_err_cnt SEC(".maps");
+
+/* Used by trace point */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, u32);
+	__type(value, struct datarec);
+	__uint(max_entries, MAX_CPUS);
+} cpumap_enqueue_cnt SEC(".maps");
+
+/* Used by trace point */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, u32);
+	__type(value, struct datarec);
+	__uint(max_entries, 1);
+} cpumap_kthread_cnt SEC(".maps");
+
+/* Used by trace point */
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, u32);
+	__type(value, struct datarec);
+	__uint(max_entries, 1);
+} exception_cnt SEC(".maps");
+
+/*** Trace point code ***/
+
+/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
+ * Code in:                kernel/include/trace/events/xdp.h
+ */
+struct xdp_redirect_ctx {
+	u64 __pad;	// First 8 bytes are not accessible by bpf code
+	int prog_id;	//	offset:8;  size:4; signed:1;
+	u32 act;	//	offset:12  size:4; signed:0;
+	int ifindex;	//	offset:16  size:4; signed:1;
+	int err;	//	offset:20  size:4; signed:1;
+	int to_ifindex;	//	offset:24  size:4; signed:1;
+	u32 map_id;	//	offset:28  size:4; signed:0;
+	int map_index;	//	offset:32  size:4; signed:1;
+};			//	offset:36
+
+enum {
+	XDP_REDIRECT_SUCCESS = 0,
+	XDP_REDIRECT_ERROR = 1
+};
+
+static __always_inline
+int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
+{
+	u32 key = XDP_REDIRECT_ERROR;
+	struct datarec *rec;
+	int err = ctx->err;
+
+	if (!err)
+		key = XDP_REDIRECT_SUCCESS;
+
+	rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
+	if (!rec)
+		return 0;
+	rec->dropped += 1;
+
+	return 0; /* Indicate event was filtered (no further processing)*/
+	/*
+	 * Returning 1 here would allow e.g. a perf-record tracepoint
+	 * to see and record these events, but it doesn't work well
+	 * in-practice as stopping perf-record also unload this
+	 * bpf_prog.  Plus, there is additional overhead of doing so.
+	 */
+}
+
+SEC("tracepoint/xdp/xdp_redirect_err")
+int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
+{
+	return xdp_redirect_collect_stat(ctx);
+}
+
+SEC("tracepoint/xdp/xdp_redirect_map_err")
+int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
+{
+	return xdp_redirect_collect_stat(ctx);
+}
+
+/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
+ * Code in:                kernel/include/trace/events/xdp.h
+ */
+struct xdp_exception_ctx {
+	u64 __pad;	// First 8 bytes are not accessible by bpf code
+	int prog_id;	//	offset:8;  size:4; signed:1;
+	u32 act;	//	offset:12; size:4; signed:0;
+	int ifindex;	//	offset:16; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_exception")
+int trace_xdp_exception(struct xdp_exception_ctx *ctx)
+{
+	struct datarec *rec;
+	u32 key = 0;
+
+	rec = bpf_map_lookup_elem(&exception_cnt, &key);
+	if (!rec)
+		return 1;
+	rec->dropped += 1;
+
+	return 0;
+}
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
+ * Code in:         kernel/include/trace/events/xdp.h
+ */
+struct cpumap_enqueue_ctx {
+	u64 __pad;		// First 8 bytes are not accessible by bpf code
+	int map_id;		//	offset:8;  size:4; signed:1;
+	u32 act;		//	offset:12; size:4; signed:0;
+	int cpu;		//	offset:16; size:4; signed:1;
+	unsigned int drops;	//	offset:20; size:4; signed:0;
+	unsigned int processed;	//	offset:24; size:4; signed:0;
+	int to_cpu;		//	offset:28; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_enqueue")
+int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
+{
+	u32 to_cpu = ctx->to_cpu;
+	struct datarec *rec;
+
+	if (to_cpu >= MAX_CPUS)
+		return 1;
+
+	rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
+	if (!rec)
+		return 0;
+	rec->processed += ctx->processed;
+	rec->dropped   += ctx->drops;
+
+	/* Record bulk events, then userspace can calc average bulk size */
+	if (ctx->processed > 0)
+		rec->issue += 1;
+
+	/* Inception: It's possible to detect overload situations, via
+	 * this tracepoint.  This can be used for creating a feedback
+	 * loop to XDP, which can take appropriate actions to mitigate
+	 * this overload situation.
+	 */
+	return 0;
+}
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
+ * Code in:         kernel/include/trace/events/xdp.h
+ */
+struct cpumap_kthread_ctx {
+	u64 __pad;			// First 8 bytes are not accessible
+	int map_id;			//	offset:8;  size:4; signed:1;
+	u32 act;			//	offset:12; size:4; signed:0;
+	int cpu;			//	offset:16; size:4; signed:1;
+	unsigned int drops;		//	offset:20; size:4; signed:0;
+	unsigned int processed;		//	offset:24; size:4; signed:0;
+	int sched;			//	offset:28; size:4; signed:1;
+	unsigned int xdp_pass;		//	offset:32; size:4; signed:0;
+	unsigned int xdp_drop;		//	offset:36; size:4; signed:0;
+	unsigned int xdp_redirect;	//	offset:40; size:4; signed:0;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_kthread")
+int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
+{
+	struct datarec *rec;
+	u32 key = 0;
+
+	rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
+	if (!rec)
+		return 0;
+	rec->processed += ctx->processed;
+	rec->dropped   += ctx->drops;
+	rec->xdp_pass  += ctx->xdp_pass;
+	rec->xdp_drop  += ctx->xdp_drop;
+	rec->xdp_redirect  += ctx->xdp_redirect;
+
+	/* Count times kthread yielded CPU via schedule call */
+	if (ctx->sched)
+		rec->issue++;
+
+	return 0;
+}