Message ID | 20211012135935.37054-1-lmb@cloudflare.com (mailing list archive) |
---|---|
Headers | show |
Series | Fix up bpf_jit_limit some more | expand |
On Tue, Oct 12, 2021 at 03:59 PM CEST, Lorenz Bauer wrote: > Some more cleanups around bpf_jit_limit to make it readable via sysctl. > > Jakub raised the point that a sysctl toggle is UAPI and therefore > can't be easily changed later on. I tried to find another place to stick > the info, but couldn't find a good one. All the current BPF knobs are in > sysctl. > > There are examples of read only sysctls: > $ sudo find /proc/sys -perm 0444 | wc -l > 90 > > There are no examples of sysctls with mode 0400 however: > $ sudo find /proc/sys -perm 0400 | wc -l > 0 > > Thoughts? I threw this idea out there during LPC already, that it would be cool to use BPF iterators for that. Pinned/preloaded iterators were made for dumping kernel data on demand after all. What is missing is a BPF iterator type that would run the program just once (there is just one thing to print), and a BPF helper to lookup symbol's address. I thought this would require a bit of work, but actually getting a PoC (see below) to work was rather pleasntly straightforward. Perhaps a bit of a hack but I'd consider it as an alternative. -- >8 -- >From bef52bec926ea08ccd32a3421d195210ae7d3b38 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki <jakub@cloudflare.com> Date: Wed, 13 Oct 2021 18:54:12 +0200 Subject: [PATCH] RFC: BPF iterator that always runs the program just once The test iterator loads the value of bpf_jit_current kernel global: # bpftool iter pin tools/testing/selftests/bpf/bpf_iter_once.o /sys/fs/bpf/bpf_jit_current libbpf: elf: skipping unrecognized data section(6) .rodata.str1.1 # cat /sys/fs/bpf/bpf_jit_current 2 # for ((i=0; i<10; i++)); do iptables -A OUTPUT -m bpf --bytecode '1,6 0 0 0' -j ACCEPT; done # cat /sys/fs/bpf/bpf_jit_current 12 # iptables -F OUTPUT # cat /sys/fs/bpf/bpf_jit_current 2 Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> --- include/uapi/linux/bpf.h | 7 ++ kernel/bpf/Makefile | 2 +- kernel/bpf/helpers.c | 22 ++++++ kernel/bpf/once_iter.c | 76 +++++++++++++++++++ tools/include/uapi/linux/bpf.h | 7 ++ .../selftests/bpf/progs/bpf_iter_once.c | 33 ++++++++ 6 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 kernel/bpf/once_iter.c create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_once.c diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6fc59d61937a..ec117ebd3d58 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4909,6 +4909,12 @@ union bpf_attr { * Return * The number of bytes written to the buffer, or a negative error * in case of failure. + * + * long bpf_kallsyms_lookup_name(const char *name, u32 name_size) + * Description + * Lookup the address for a symbol. + * Return + * Returns 0 if not found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5089,6 +5095,7 @@ union bpf_attr { FN(task_pt_regs), \ FN(get_branch_snapshot), \ FN(trace_vprintk), \ + FN(kallsyms_lookup_name), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 7f33098ca63f..f2dc86ea0f2d 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o once_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1ffd469c217f..d2524df54ab5 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -15,6 +15,7 @@ #include <linux/pid_namespace.h> #include <linux/proc_ns.h> #include <linux/security.h> +#include <linux/kallsyms.h> #include "../../lib/kstrtox.h" @@ -1328,6 +1329,25 @@ void bpf_timer_cancel_and_free(void *val) kfree(t); } +BPF_CALL_2(bpf_kallsyms_lookup_name, const char *, name, u32, name_size) +{ + const char *name_end; + + name_end = strnchr(name, name_size, 0); + if (!name_end) + return -EINVAL; + + return kallsyms_lookup_name(name); +} + +static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { + .func = bpf_kallsyms_lookup_name, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; @@ -1404,6 +1424,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_timer_start_proto; case BPF_FUNC_timer_cancel: return &bpf_timer_cancel_proto; + case BPF_FUNC_kallsyms_lookup_name: + return &bpf_kallsyms_lookup_name_proto; default: break; } diff --git a/kernel/bpf/once_iter.c b/kernel/bpf/once_iter.c new file mode 100644 index 000000000000..f2635f1b0043 --- /dev/null +++ b/kernel/bpf/once_iter.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2021 Cloudflare, Inc. */ + +#include <linux/bpf.h> +#include <linux/init.h> +#include <linux/seq_file.h> + +static struct {} empty; + +static void *once_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos == 0) + ++*pos; + return ∅ +} + +static void *once_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + return NULL; +} + +struct bpf_iter__once { + __bpf_md_ptr(struct bpf_iter_meta *, meta); +}; + +DEFINE_BPF_ITER_FUNC(once, struct bpf_iter_meta *meta) + +static int once_seq_show(struct seq_file *seq, void *v) +{ + return 0; +} + +static void once_seq_stop(struct seq_file *seq, void *v) +{ + struct bpf_iter_meta meta; + struct bpf_iter__once ctx; + struct bpf_prog *prog; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, true); + if (!prog) + return; + + meta.seq = seq; + ctx.meta = &meta; + bpf_iter_run_prog(prog, &ctx); +} + +static const struct seq_operations once_seq_ops = { + .start = once_seq_start, + .next = once_seq_next, + .stop = once_seq_stop, + .show = once_seq_show, +}; + +static const struct bpf_iter_seq_info once_seq_info = { + .seq_ops = &once_seq_ops, + .init_seq_private = NULL, + .fini_seq_private = NULL, + .seq_priv_size = 0, +}; + +static struct bpf_iter_reg once_reg_info = { + .target = "once", + .feature = 0, + .ctx_arg_info_size = 0, + .ctx_arg_info = {}, + .seq_info = &once_seq_info, +}; + +static int __init once_iter_init(void) +{ + return bpf_iter_reg_target(&once_reg_info); +} +late_initcall(once_iter_init); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6fc59d61937a..ec117ebd3d58 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4909,6 +4909,12 @@ union bpf_attr { * Return * The number of bytes written to the buffer, or a negative error * in case of failure. + * + * long bpf_kallsyms_lookup_name(const char *name, u32 name_size) + * Description + * Lookup the address for a symbol. + * Return + * Returns 0 if not found. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5089,6 +5095,7 @@ union bpf_attr { FN(task_pt_regs), \ FN(get_branch_snapshot), \ FN(trace_vprintk), \ + FN(kallsyms_lookup_name), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_once.c b/tools/testing/selftests/bpf/progs/bpf_iter_once.c new file mode 100644 index 000000000000..e5e6d779eb51 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_once.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Cloudflare, Inc. */ + +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +SEC("iter/once") +int dump_once(struct bpf_iter__once *ctx) +{ + const char sym_name[] = "bpf_jit_current"; + struct seq_file *seq = ctx->meta->seq; + unsigned long sym_addr; + s64 value = 0; + int err; + + sym_addr = bpf_kallsyms_lookup_name(sym_name, sizeof(sym_name)); + if (!sym_addr) { + BPF_SEQ_PRINTF(seq, "failed to find %s address\n", sym_name); + return 0; + } + + err = bpf_probe_read_kernel(&value, sizeof(value), (void *)sym_addr); + if (err) { + BPF_SEQ_PRINTF(seq, "failed to read from %s address\n", sym_name); + return 0; + } + + BPF_SEQ_PRINTF(seq, "%ld\n", value); + + return 0; +} -- 2.31.1
On Wed, 13 Oct 2021 at 20:56, Jakub Sitnicki <jakub@cloudflare.com> wrote: > > On Tue, Oct 12, 2021 at 03:59 PM CEST, Lorenz Bauer wrote: > > Some more cleanups around bpf_jit_limit to make it readable via sysctl. > > > > Jakub raised the point that a sysctl toggle is UAPI and therefore > > can't be easily changed later on. I tried to find another place to stick > > the info, but couldn't find a good one. All the current BPF knobs are in > > sysctl. > > > > There are examples of read only sysctls: > > $ sudo find /proc/sys -perm 0444 | wc -l > > 90 > > > > There are no examples of sysctls with mode 0400 however: > > $ sudo find /proc/sys -perm 0400 | wc -l > > 0 > > > > Thoughts? > > I threw this idea out there during LPC already, that it would be cool to > use BPF iterators for that. Pinned/preloaded iterators were made for > dumping kernel data on demand after all. > > What is missing is a BPF iterator type that would run the program just > once (there is just one thing to print), and a BPF helper to lookup > symbol's address. > > I thought this would require a bit of work, but actually getting a PoC > (see below) to work was rather pleasntly straightforward. > > Perhaps a bit of a hack but I'd consider it as an alternative. I spoke to Jakub, I won't have time to work on this myself. So I'll drop this patch from the series and send a v3 with just the fixes to bpf_jit_limit.