diff mbox series

[bpf-next,7/8] libbpf: implement __arg_ctx fallback logic

Message ID 20231220233127.1990417-8-andrii@kernel.org (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Libbpf-side __arg_ctx fallback support | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers warning 11 maintainers not CCed: hawk@kernel.org sdf@google.com haoluo@google.com kpsingh@kernel.org martin.lau@linux.dev jolsa@kernel.org netdev@vger.kernel.org kuba@kernel.org yonghong.song@linux.dev song@kernel.org john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch fail CHECK: No space is necessary after a cast ERROR: do not use assignment in if condition WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc

Commit Message

Andrii Nakryiko Dec. 20, 2023, 11:31 p.m. UTC
Out of all special global func arg tag annotations, __arg_ctx is
practically is the most immediately useful and most critical to have
working across multitude kernel version, if possible. This would allow
end users to write much simpler code if __arg_ctx semantics worked for
older kernels that don't natively understand btf_decl_tag("arg:ctx") in
verifier logic.

Luckily, it is possible to ensure __arg_ctx works on old kernels through
a bit of extra work done by libbpf, at least in a lot of common cases.

To explain the overall idea, we need to go back at how context argument
was supported in global funcs before __arg_ctx support was added. This
was done based on special struct name checks in kernel. E.g., for
BPF_PROG_TYPE_PERF_EVENT the expectation is that argument type `struct
bpf_perf_event_data *` mark that argument as PTR_TO_CTX. This is all
good as long as global function is used from the same BPF program types
only, which is often not the case. If the same subprog has to be called
from, say, kprobe and perf_event program types, there is no single
definition that would satisfy BPF verifier. Subprog will have context
argument either for kprobe (if using bpf_user_pt_regs_t struct name) or
perf_event (with bpf_perf_event_data struct name), but not both.

This limitation was the reason to add btf_decl_tag("arg:ctx"), making
the actual argument type not important, so that user can just define
"generic" signature:

  __noinline int global_subprog(void *ctx __arg_ctx) { ... }

I won't belabor how libbpf is implementing subprograms, see a huge
comment next to bpf_object__relocate_calls() function. The idea is that
each main/entry BPF program gets its own copy of global_subprog's code
appended.

This per-program copy of global subprog code *and* associated func_info
.BTF.ext information, pointing to FUNC -> FUNC_PROTO BTF type chain
allows libbpf to simulate __arg_ctx behavior transparently, even if the
kernel doesn't yet support __arg_ctx annotation natively.

The idea is straightforward: each time we append global subprog's code
and func_info information, we adjust its FUNC -> FUNC_PROTO type
information, if necessary (that is, libbpf can detect the presence of
btf_decl_tag("arg:ctx") just like BPF verifier would do it).

The rest is just mechanical and somewhat painful BTF manipulation code.
It's painful because we need to clone FUNC -> FUNC_PROTO, instead of
reusing it, as same FUNC -> FUNC_PROTO chain might be used by another
main BPF program within the same BPF object, so we can't just modify it
in-place (and cloning BTF types within the same struct btf object is
painful due to constant memory invalidation, see comments in code).
Uploaded BPF object's BTF information has to work for all BPF
programs at the same time.

Once we have FUNC -> FUNC_PROTO clones, we make sure that instead of
using some `void *ctx` parameter definition, we have an expected `struct
bpf_perf_event_data *ctx` definition (as far as BPF verifier and kernel
is concerned), which will mark it as context for BPF verifier. Same
global subprog relocated and copied into another main BPF program will
get different type information according to main program's type. It all
works out in the end in a completely transparent way for end user.

Libbpf maintains internal program type -> expected context struct name
mapping internally. Note, not all BPF program types have named context
struct, so this approach won't work for such programs (just like it
didn't before __arg_ctx). So native __arg_ctx is still important to have
in kernel to have generic context support across all BPF program types.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 tools/lib/bpf/libbpf.c | 239 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 231 insertions(+), 8 deletions(-)

Comments

Alexei Starovoitov Dec. 22, 2023, 1:26 a.m. UTC | #1
On Wed, Dec 20, 2023 at 03:31:26PM -0800, Andrii Nakryiko wrote:
> This limitation was the reason to add btf_decl_tag("arg:ctx"), making
> the actual argument type not important, so that user can just define
> "generic" signature:
> 
>   __noinline int global_subprog(void *ctx __arg_ctx) { ... }

Just realized that we probably need to enforce in both libbpf doing
rewrite and in the kernel that __arg_ctx is either valid
'struct correct_type_for_bpf_prog *' or 'void *'.

Otherwise the user will get surprising behavior when
int foo(struct __sk_buff *ctx __arg_ctx)
{
  ctx->len;
}
will get rewritten to 'struct pt_regs *ctx' based on prog type
while all asm instructions inside prog were compiled with 'struct __sk_buff'
and CO_RE performs relocations against that type.
 
> +static struct {
> +	enum bpf_prog_type prog_type;
> +	const char *ctx_name;
> +} global_ctx_map[] = {
> +	{ BPF_PROG_TYPE_CGROUP_DEVICE,           "bpf_cgroup_dev_ctx" },
> +	{ BPF_PROG_TYPE_CGROUP_SKB,              "__sk_buff" },
> +	{ BPF_PROG_TYPE_CGROUP_SOCK,             "bpf_sock" },
> +	{ BPF_PROG_TYPE_CGROUP_SOCK_ADDR,        "bpf_sock_addr" },
> +	{ BPF_PROG_TYPE_CGROUP_SOCKOPT,          "bpf_sockopt" },
> +	{ BPF_PROG_TYPE_CGROUP_SYSCTL,           "bpf_sysctl" },
> +	{ BPF_PROG_TYPE_FLOW_DISSECTOR,          "__sk_buff" },
> +	{ BPF_PROG_TYPE_KPROBE,                  "bpf_user_pt_regs_t" },
> +	{ BPF_PROG_TYPE_LWT_IN,                  "__sk_buff" },
> +	{ BPF_PROG_TYPE_LWT_OUT,                 "__sk_buff" },
> +	{ BPF_PROG_TYPE_LWT_SEG6LOCAL,           "__sk_buff" },
> +	{ BPF_PROG_TYPE_LWT_XMIT,                "__sk_buff" },
> +	{ BPF_PROG_TYPE_NETFILTER,               "bpf_nf_ctx" },
> +	{ BPF_PROG_TYPE_PERF_EVENT,              "bpf_perf_event_data" },
> +	{ BPF_PROG_TYPE_RAW_TRACEPOINT,          "bpf_raw_tracepoint_args" },
> +	{ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
> +	{ BPF_PROG_TYPE_SCHED_ACT,               "__sk_buff" },
> +	{ BPF_PROG_TYPE_SCHED_CLS,               "__sk_buff" },
> +	{ BPF_PROG_TYPE_SK_LOOKUP,               "bpf_sk_lookup" },
> +	{ BPF_PROG_TYPE_SK_MSG,                  "sk_msg_md" },
> +	{ BPF_PROG_TYPE_SK_REUSEPORT,            "sk_reuseport_md" },
> +	{ BPF_PROG_TYPE_SK_SKB,                  "__sk_buff" },
> +	{ BPF_PROG_TYPE_SOCK_OPS,                "bpf_sock_ops" },
> +	{ BPF_PROG_TYPE_SOCKET_FILTER,           "__sk_buff" },
> +	{ BPF_PROG_TYPE_XDP,                     "xdp_md" },

We already share the .c files (like relo_core.c) between kernel and libbpf
let's share here as well to avoid copy paste.
All of the above is available in include/linux/bpf_types.h

> +		/* clone fn/fn_proto, unless we already did it for another arg */
> +		if (func_rec->type_id == orig_fn_id) {

It feels that body of this 'if' can be factored out as a separate helper function.

> -static int
> -bpf_object__load_progs(struct bpf_object *obj, int log_level)
> +static int bpf_object_load_progs(struct bpf_object *obj, int log_level)

pls keep __ convention.
Jiri Olsa Dec. 22, 2023, 1:15 p.m. UTC | #2
On Wed, Dec 20, 2023 at 03:31:26PM -0800, Andrii Nakryiko wrote:
> Out of all special global func arg tag annotations, __arg_ctx is
> practically is the most immediately useful and most critical to have
> working across multitude kernel version, if possible. This would allow
> end users to write much simpler code if __arg_ctx semantics worked for
> older kernels that don't natively understand btf_decl_tag("arg:ctx") in
> verifier logic.

curious what's the workaround now.. having separate function for each
program type instead of just one global function? I wonder ebpf/cilium
library could do the same thing

whole patchset lgtm:

Acked-by: Jiri Olsa <jolsa@kernel.org>

jirka

> 
> Luckily, it is possible to ensure __arg_ctx works on old kernels through
> a bit of extra work done by libbpf, at least in a lot of common cases.
> 
> To explain the overall idea, we need to go back at how context argument
> was supported in global funcs before __arg_ctx support was added. This
> was done based on special struct name checks in kernel. E.g., for
> BPF_PROG_TYPE_PERF_EVENT the expectation is that argument type `struct
> bpf_perf_event_data *` mark that argument as PTR_TO_CTX. This is all
> good as long as global function is used from the same BPF program types
> only, which is often not the case. If the same subprog has to be called
> from, say, kprobe and perf_event program types, there is no single
> definition that would satisfy BPF verifier. Subprog will have context
> argument either for kprobe (if using bpf_user_pt_regs_t struct name) or
> perf_event (with bpf_perf_event_data struct name), but not both.
> 
> This limitation was the reason to add btf_decl_tag("arg:ctx"), making
> the actual argument type not important, so that user can just define
> "generic" signature:
> 
>   __noinline int global_subprog(void *ctx __arg_ctx) { ... }
> 
> I won't belabor how libbpf is implementing subprograms, see a huge
> comment next to bpf_object__relocate_calls() function. The idea is that
> each main/entry BPF program gets its own copy of global_subprog's code
> appended.
> 
> This per-program copy of global subprog code *and* associated func_info
> .BTF.ext information, pointing to FUNC -> FUNC_PROTO BTF type chain
> allows libbpf to simulate __arg_ctx behavior transparently, even if the
> kernel doesn't yet support __arg_ctx annotation natively.
> 
> The idea is straightforward: each time we append global subprog's code
> and func_info information, we adjust its FUNC -> FUNC_PROTO type
> information, if necessary (that is, libbpf can detect the presence of
> btf_decl_tag("arg:ctx") just like BPF verifier would do it).
> 
> The rest is just mechanical and somewhat painful BTF manipulation code.
> It's painful because we need to clone FUNC -> FUNC_PROTO, instead of
> reusing it, as same FUNC -> FUNC_PROTO chain might be used by another
> main BPF program within the same BPF object, so we can't just modify it
> in-place (and cloning BTF types within the same struct btf object is
> painful due to constant memory invalidation, see comments in code).
> Uploaded BPF object's BTF information has to work for all BPF
> programs at the same time.
> 
> Once we have FUNC -> FUNC_PROTO clones, we make sure that instead of
> using some `void *ctx` parameter definition, we have an expected `struct
> bpf_perf_event_data *ctx` definition (as far as BPF verifier and kernel
> is concerned), which will mark it as context for BPF verifier. Same
> global subprog relocated and copied into another main BPF program will
> get different type information according to main program's type. It all
> works out in the end in a completely transparent way for end user.
> 
> Libbpf maintains internal program type -> expected context struct name
> mapping internally. Note, not all BPF program types have named context
> struct, so this approach won't work for such programs (just like it
> didn't before __arg_ctx). So native __arg_ctx is still important to have
> in kernel to have generic context support across all BPF program types.
> 
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  tools/lib/bpf/libbpf.c | 239 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 231 insertions(+), 8 deletions(-)
> 
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 92171bcf4c25..1a7354b6a289 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -6168,7 +6168,7 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj,
>  	int err;
>  
>  	/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
> -	 * supprot func/line info
> +	 * support func/line info
>  	 */
>  	if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
>  		return 0;
> @@ -6650,8 +6650,223 @@ static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *pr
>  	return 0;
>  }
>  
> -static int
> -bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
> +static struct {
> +	enum bpf_prog_type prog_type;
> +	const char *ctx_name;
> +} global_ctx_map[] = {
> +	{ BPF_PROG_TYPE_CGROUP_DEVICE,           "bpf_cgroup_dev_ctx" },
> +	{ BPF_PROG_TYPE_CGROUP_SKB,              "__sk_buff" },
> +	{ BPF_PROG_TYPE_CGROUP_SOCK,             "bpf_sock" },
> +	{ BPF_PROG_TYPE_CGROUP_SOCK_ADDR,        "bpf_sock_addr" },
> +	{ BPF_PROG_TYPE_CGROUP_SOCKOPT,          "bpf_sockopt" },
> +	{ BPF_PROG_TYPE_CGROUP_SYSCTL,           "bpf_sysctl" },
> +	{ BPF_PROG_TYPE_FLOW_DISSECTOR,          "__sk_buff" },
> +	{ BPF_PROG_TYPE_KPROBE,                  "bpf_user_pt_regs_t" },
> +	{ BPF_PROG_TYPE_LWT_IN,                  "__sk_buff" },
> +	{ BPF_PROG_TYPE_LWT_OUT,                 "__sk_buff" },
> +	{ BPF_PROG_TYPE_LWT_SEG6LOCAL,           "__sk_buff" },
> +	{ BPF_PROG_TYPE_LWT_XMIT,                "__sk_buff" },
> +	{ BPF_PROG_TYPE_NETFILTER,               "bpf_nf_ctx" },
> +	{ BPF_PROG_TYPE_PERF_EVENT,              "bpf_perf_event_data" },
> +	{ BPF_PROG_TYPE_RAW_TRACEPOINT,          "bpf_raw_tracepoint_args" },
> +	{ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
> +	{ BPF_PROG_TYPE_SCHED_ACT,               "__sk_buff" },
> +	{ BPF_PROG_TYPE_SCHED_CLS,               "__sk_buff" },
> +	{ BPF_PROG_TYPE_SK_LOOKUP,               "bpf_sk_lookup" },
> +	{ BPF_PROG_TYPE_SK_MSG,                  "sk_msg_md" },
> +	{ BPF_PROG_TYPE_SK_REUSEPORT,            "sk_reuseport_md" },
> +	{ BPF_PROG_TYPE_SK_SKB,                  "__sk_buff" },
> +	{ BPF_PROG_TYPE_SOCK_OPS,                "bpf_sock_ops" },
> +	{ BPF_PROG_TYPE_SOCKET_FILTER,           "__sk_buff" },
> +	{ BPF_PROG_TYPE_XDP,                     "xdp_md" },
> +	/* all other program types don't have "named" context structs */
> +};
> +
> +/* Check if main program or global subprog's function prototype has `arg:ctx`
> + * argument tags, and, if necessary, substitute correct type to match what BPF
> + * verifier would expect, taking into account specific program type. This
> + * allows to support __arg_ctx tag transparently on old kernels that don't yet
> + * have a native support for it in the verifier, making user's life much
> + * easier.
> + */
> +static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
> +{
> +	const char *ctx_name = NULL, *ctx_tag = "arg:ctx";
> +	struct bpf_func_info_min *func_rec;
> +	struct btf_type *fn_t, *fn_proto_t;
> +	struct btf *btf = obj->btf;
> +	const struct btf_type *t;
> +	struct btf_param *p;
> +	int ptr_id = 0, struct_id, tag_id, orig_fn_id;
> +	int i, j, n, arg_idx, arg_cnt, err, name_off, rec_idx;
> +	int *orig_ids;
> +
> +	/* no .BTF.ext, no problem */
> +	if (!obj->btf_ext || !prog->func_info)
> +		return 0;
> +
> +	/* some BPF program types just don't have named context structs, so
> +	 * this fallback mechanism doesn't work for them
> +	 */
> +	for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
> +		if (global_ctx_map[i].prog_type != prog->type)
> +			continue;
> +		ctx_name = global_ctx_map[i].ctx_name;
> +		break;
> +	}
> +	if (!ctx_name)
> +		return 0;
> +
> +	/* remember original func BTF IDs to detect if we already cloned them */
> +	orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
> +	if (!orig_ids)
> +		return -ENOMEM;
> +	for (i = 0; i < prog->func_info_cnt; i++) {
> +		func_rec = prog->func_info + prog->func_info_rec_size * i;
> +		orig_ids[i] = func_rec->type_id;
> +	}
> +
> +	/* go through each DECL_TAG with "arg:ctx" and see if it points to one
> +	 * of our subprogs; if yes and subprog is global and needs adjustment,
> +	 * clone and adjust FUNC -> FUNC_PROTO combo
> +	 */
> +	for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
> +		/* only DECL_TAG with "arg:ctx" value are interesting */
> +		t = btf__type_by_id(btf, i);
> +		if (!btf_is_decl_tag(t))
> +			continue;
> +		if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
> +			continue;
> +
> +		/* only global funcs need adjustment, if at all */
> +		orig_fn_id = t->type;
> +		fn_t = btf_type_by_id(btf, orig_fn_id);
> +		if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
> +			continue;
> +
> +		/* sanity check FUNC -> FUNC_PROTO chain, just in case */
> +		fn_proto_t = btf_type_by_id(btf, fn_t->type);
> +		if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
> +			continue;
> +
> +		/* find corresponding func_info record */
> +		func_rec = NULL;
> +		for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
> +			if (orig_ids[rec_idx] == t->type) {
> +				func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
> +				break;
> +			}
> +		}
> +		/* current main program doesn't call into this subprog */
> +		if (!func_rec)
> +			continue;
> +
> +		/* some more sanity checking of DECL_TAG */
> +		arg_cnt = btf_vlen(fn_proto_t);
> +		arg_idx = btf_decl_tag(t)->component_idx;
> +		if (arg_idx < 0 || arg_idx >= arg_cnt)
> +			continue;
> +
> +		/* check if existing parameter already matches verifier expectations */
> +		p = &btf_params(fn_proto_t)[arg_idx];
> +		t = skip_mods_and_typedefs(btf, p->type, NULL);
> +		if (btf_is_ptr(t) &&
> +		    (t = skip_mods_and_typedefs(btf, t->type, NULL)) &&
> +		    btf_is_struct(t) &&
> +		    strcmp(btf__str_by_offset(btf, t->name_off), ctx_name) == 0) {
> +			continue; /* no need for fix up */
> +		}
> +
> +		/* clone fn/fn_proto, unless we already did it for another arg */
> +		if (func_rec->type_id == orig_fn_id) {
> +			int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
> +
> +			/* Note that each btf__add_xxx() operation invalidates
> +			 * all btf_type and string pointers, so we need to be
> +			 * very careful when cloning BTF types. BTF type
> +			 * pointers have to be always refetched. And to avoid
> +			 * problems with invalidated string pointers, we
> +			 * add empty strings initially, then just fix up
> +			 * name_off offsets in place. Offsets are stable for
> +			 * existing strings, so that works out.
> +			 */
> +			name_off = fn_t->name_off; /* we are about to invalidate fn_t */
> +			ret_type_id = fn_proto_t->type; /* and fn_proto_t as well */
> +			orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
> +
> +			/* clone FUNC first, btf__add_func() enforces
> +			 * non-empty name, so use entry program's name as
> +			 * a placeholder, which we replace immediately
> +			 */
> +			fn_id = btf__add_func(btf, prog->name, btf_func_linkage(fn_t), fn_t->type);
> +			if (fn_id < 0)
> +				return -EINVAL;
> +			fn_t = btf_type_by_id(btf, fn_id);
> +			fn_t->name_off = name_off; /* reuse original string */
> +			fn_t->type = fn_id + 1; /* we can predict FUNC_PROTO ID */
> +
> +			/* clone FUNC_PROTO and its params now */
> +			fn_proto_id = btf__add_func_proto(btf, ret_type_id);
> +			if (fn_proto_id < 0) {
> +				err = -EINVAL;
> +				goto err_out;
> +			}
> +			for (j = 0; j < arg_cnt; j++) {
> +				/* copy original parameter data */
> +				t = btf_type_by_id(btf, orig_proto_id);
> +				p = &btf_params(t)[j];
> +				name_off = p->name_off;
> +
> +				err = btf__add_func_param(btf, "", p->type);
> +				if (err)
> +					goto err_out;
> +				fn_proto_t = btf_type_by_id(btf, fn_proto_id);
> +				p = &btf_params(fn_proto_t)[j];
> +				p->name_off = name_off; /* use remembered str offset */
> +			}
> +
> +			/* point func_info record to a cloned FUNC type */
> +			func_rec->type_id = fn_id;
> +		}
> +
> +		/* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
> +		 * we do it just once per main BPF program, as all global
> +		 * funcs share the same program type, so need only PTR ->
> +		 * STRUCT type chain
> +		 */
> +		if (ptr_id == 0) {
> +			struct_id = btf__add_struct(btf, ctx_name, 0);
> +			ptr_id = btf__add_ptr(btf, struct_id);
> +			if (ptr_id < 0 || struct_id < 0) {
> +				err = -EINVAL;
> +				goto err_out;
> +			}
> +		}
> +
> +		/* for completeness, clone DECL_TAG and point it to cloned param */
> +		tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
> +		if (tag_id < 0) {
> +			err = -EINVAL;
> +			goto err_out;
> +		}
> +
> +		/* all the BTF manipulations invalidated pointers, refetch them */
> +		fn_t = btf_type_by_id(btf, func_rec->type_id);
> +		fn_proto_t = btf_type_by_id(btf, fn_t->type);
> +
> +		/* fix up type ID pointed to by param */
> +		p = &btf_params(fn_proto_t)[arg_idx];
> +		p->type = ptr_id;
> +	}
> +
> +	free(orig_ids);
> +	return 0;
> +err_out:
> +	free(orig_ids);
> +	return err;
> +}
> +
> +static int bpf_object_relocate(struct bpf_object *obj, const char *targ_btf_path)
>  {
>  	struct bpf_program *prog;
>  	size_t i, j;
> @@ -6732,19 +6947,28 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
>  			}
>  		}
>  	}
> -	/* Process data relos for main programs */
>  	for (i = 0; i < obj->nr_programs; i++) {
>  		prog = &obj->programs[i];
>  		if (prog_is_subprog(obj, prog))
>  			continue;
>  		if (!prog->autoload)
>  			continue;
> +
> +		/* Process data relos for main programs */
>  		err = bpf_object__relocate_data(obj, prog);
>  		if (err) {
>  			pr_warn("prog '%s': failed to relocate data references: %d\n",
>  				prog->name, err);
>  			return err;
>  		}
> +
> +		/* Fix up .BTF.ext information, if necessary */
> +		err = bpf_program_fixup_func_info(obj, prog);
> +		if (err) {
> +			pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n",
> +				prog->name, err);
> +			return err;
> +		}
>  	}
>  
>  	return 0;
> @@ -7456,8 +7680,7 @@ static int bpf_program_record_relos(struct bpf_program *prog)
>  	return 0;
>  }
>  
> -static int
> -bpf_object__load_progs(struct bpf_object *obj, int log_level)
> +static int bpf_object_load_progs(struct bpf_object *obj, int log_level)
>  {
>  	struct bpf_program *prog;
>  	size_t i;
> @@ -8093,10 +8316,10 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
>  	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
>  	err = err ? : bpf_object__sanitize_maps(obj);
>  	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
> -	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
> +	err = err ? : bpf_object_relocate(obj, obj->btf_custom_path ? : target_btf_path);
>  	err = err ? : bpf_object_load_btf(obj);
>  	err = err ? : bpf_object_create_maps(obj);
> -	err = err ? : bpf_object__load_progs(obj, extra_log_level);
> +	err = err ? : bpf_object_load_progs(obj, extra_log_level);
>  	err = err ? : bpf_object_init_prog_arrays(obj);
>  	err = err ? : bpf_object_prepare_struct_ops(obj);
>  
> -- 
> 2.34.1
> 
>
Andrii Nakryiko Jan. 2, 2024, 5:06 p.m. UTC | #3
On Thu, Dec 21, 2023 at 5:26 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Wed, Dec 20, 2023 at 03:31:26PM -0800, Andrii Nakryiko wrote:
> > This limitation was the reason to add btf_decl_tag("arg:ctx"), making
> > the actual argument type not important, so that user can just define
> > "generic" signature:
> >
> >   __noinline int global_subprog(void *ctx __arg_ctx) { ... }
>
> Just realized that we probably need to enforce in both libbpf doing
> rewrite and in the kernel that __arg_ctx is either valid
> 'struct correct_type_for_bpf_prog *' or 'void *'.
>
> Otherwise the user will get surprising behavior when
> int foo(struct __sk_buff *ctx __arg_ctx)
> {
>   ctx->len;
> }
> will get rewritten to 'struct pt_regs *ctx' based on prog type
> while all asm instructions inside prog were compiled with 'struct __sk_buff'
> and CO_RE performs relocations against that type.

Nothing really prevents users from misusing types even today, so it
doesn't seem like a common problem, luckily.

But really the problem is that some program types don't have an
associated struct name at all, but still a valid context. Like for
LSM/TRACING programs it's a u64[]/u64 *. For tracepoints context is
defined as just plain u64 (according to bpf_ctx_convert), and so on.

Oh, and there is KPROBE program type, where it's (typedef)
bpf_user_pt_regs_t, and for backwards compatibility reasons we also
allow basically non-existing `struct bpf_user_pt_regs_t`.

So it gets messy. Either way, I have a patch set coming up for
kernel-side __arg_xxx tags support, so let's discuss it there?

>
> > +static struct {
> > +     enum bpf_prog_type prog_type;
> > +     const char *ctx_name;
> > +} global_ctx_map[] = {
> > +     { BPF_PROG_TYPE_CGROUP_DEVICE,           "bpf_cgroup_dev_ctx" },
> > +     { BPF_PROG_TYPE_CGROUP_SKB,              "__sk_buff" },
> > +     { BPF_PROG_TYPE_CGROUP_SOCK,             "bpf_sock" },
> > +     { BPF_PROG_TYPE_CGROUP_SOCK_ADDR,        "bpf_sock_addr" },
> > +     { BPF_PROG_TYPE_CGROUP_SOCKOPT,          "bpf_sockopt" },
> > +     { BPF_PROG_TYPE_CGROUP_SYSCTL,           "bpf_sysctl" },
> > +     { BPF_PROG_TYPE_FLOW_DISSECTOR,          "__sk_buff" },
> > +     { BPF_PROG_TYPE_KPROBE,                  "bpf_user_pt_regs_t" },
> > +     { BPF_PROG_TYPE_LWT_IN,                  "__sk_buff" },
> > +     { BPF_PROG_TYPE_LWT_OUT,                 "__sk_buff" },
> > +     { BPF_PROG_TYPE_LWT_SEG6LOCAL,           "__sk_buff" },
> > +     { BPF_PROG_TYPE_LWT_XMIT,                "__sk_buff" },
> > +     { BPF_PROG_TYPE_NETFILTER,               "bpf_nf_ctx" },
> > +     { BPF_PROG_TYPE_PERF_EVENT,              "bpf_perf_event_data" },
> > +     { BPF_PROG_TYPE_RAW_TRACEPOINT,          "bpf_raw_tracepoint_args" },
> > +     { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
> > +     { BPF_PROG_TYPE_SCHED_ACT,               "__sk_buff" },
> > +     { BPF_PROG_TYPE_SCHED_CLS,               "__sk_buff" },
> > +     { BPF_PROG_TYPE_SK_LOOKUP,               "bpf_sk_lookup" },
> > +     { BPF_PROG_TYPE_SK_MSG,                  "sk_msg_md" },
> > +     { BPF_PROG_TYPE_SK_REUSEPORT,            "sk_reuseport_md" },
> > +     { BPF_PROG_TYPE_SK_SKB,                  "__sk_buff" },
> > +     { BPF_PROG_TYPE_SOCK_OPS,                "bpf_sock_ops" },
> > +     { BPF_PROG_TYPE_SOCKET_FILTER,           "__sk_buff" },
> > +     { BPF_PROG_TYPE_XDP,                     "xdp_md" },
>
> We already share the .c files (like relo_core.c) between kernel and libbpf
> let's share here as well to avoid copy paste.
> All of the above is available in include/linux/bpf_types.h

True, but libbpf sources are built both as part of the kernel repo and
separately on Github, so we'll need to start syncing
include/linux/bpf_types.h into tools/include, so that's a bit of
inconvenience.

But most of all I don't want to do it for a few other reasons.

This table was manually constructed by inspecting struct bpf_ctx_convert with:

  bpftool btf dump file /sys/kernel/btf/vmlinux format c | rg
bpf_ctx_convert -A65 | rg _prog

And it has to be manual because of other program types that don't have
associated struct for context. So even if there was bpf_types.h, we
can't use it as is.

But, if your concern is maintainability of this, I don't think that's
a problem at all. Even if we add a new program type with its own
struct name for context, we don't even have to extend this table
(though we might, if we want to), because at that point kernel is
guaranteed to have in-kernel native support for __arg_ctx, so libbpf
doesn't have to do type rewriting.

Also, this probably is the first explicit table that shows which
struct names should be used for global subprog context argument (if
not using __arg_ctx, of course). Which is not really a reason per se,
but it beats reading kernel code, and (non-trivially) figuring out
that one needs to look how struct bpf_ctx_convert is generated, etc.
Having this explicit table is much easier to link as a reference for
those special context type names.

>
> > +             /* clone fn/fn_proto, unless we already did it for another arg */
> > +             if (func_rec->type_id == orig_fn_id) {
>
> It feels that body of this 'if' can be factored out as a separate helper function.
>

Sure, I'll try to factor it out.

> > -static int
> > -bpf_object__load_progs(struct bpf_object *obj, int log_level)
> > +static int bpf_object_load_progs(struct bpf_object *obj, int log_level)
>
> pls keep __ convention.

replied on another patch, I'll do a conversion to consistent naming in
the next revision
Andrii Nakryiko Jan. 2, 2024, 5:11 p.m. UTC | #4
On Fri, Dec 22, 2023 at 5:15 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Wed, Dec 20, 2023 at 03:31:26PM -0800, Andrii Nakryiko wrote:
> > Out of all special global func arg tag annotations, __arg_ctx is
> > practically is the most immediately useful and most critical to have
> > working across multitude kernel version, if possible. This would allow
> > end users to write much simpler code if __arg_ctx semantics worked for
> > older kernels that don't natively understand btf_decl_tag("arg:ctx") in
> > verifier logic.
>
> curious what's the workaround now.. having separate function for each
> program type instead of just one global function? I wonder ebpf/cilium
> library could do the same thing

You mean what users do today? Something like this:

/* static, so types don't matter */

static int common_logic(void *ctx, ...) { ... }

/* global */ int kprobe_logic(struct bpf_user_pt_regs_t *ctx)
{
    return common_logic(ctx);
}

/* global */ int perf_event_logic(struct bpf_perf_event_data *ctx)
{
    return common_logic(ctx);
}

And so on. So it's not great, but it works.

The problem arises when you have nested global functions that need to
pass context.


/* global */ int kprobe_logic_1(struct bpf_user_pt_regs_t *ctx)
{
    ...
}

/* global */ int kprobe_logic_2(struct bpf_user_pt_regs_t *ctx)
{
    int x;

    x = kprobe_logic_1(ctx);
    ...
}


With this nesting of global funcs the above trick doesn't work anymore
because common_logic() can't call per-program global function anymore.

>
> whole patchset lgtm:
>
> Acked-by: Jiri Olsa <jolsa@kernel.org>
>

Thanks!


> jirka
>
> >
> > Luckily, it is possible to ensure __arg_ctx works on old kernels through
> > a bit of extra work done by libbpf, at least in a lot of common cases.
> >
> > To explain the overall idea, we need to go back at how context argument
> > was supported in global funcs before __arg_ctx support was added. This
> > was done based on special struct name checks in kernel. E.g., for
> > BPF_PROG_TYPE_PERF_EVENT the expectation is that argument type `struct
> > bpf_perf_event_data *` mark that argument as PTR_TO_CTX. This is all
> > good as long as global function is used from the same BPF program types
> > only, which is often not the case. If the same subprog has to be called
> > from, say, kprobe and perf_event program types, there is no single
> > definition that would satisfy BPF verifier. Subprog will have context
> > argument either for kprobe (if using bpf_user_pt_regs_t struct name) or
> > perf_event (with bpf_perf_event_data struct name), but not both.
> >
> > This limitation was the reason to add btf_decl_tag("arg:ctx"), making
> > the actual argument type not important, so that user can just define
> > "generic" signature:
> >
> >   __noinline int global_subprog(void *ctx __arg_ctx) { ... }
> >
> > I won't belabor how libbpf is implementing subprograms, see a huge
> > comment next to bpf_object__relocate_calls() function. The idea is that
> > each main/entry BPF program gets its own copy of global_subprog's code
> > appended.
> >
> > This per-program copy of global subprog code *and* associated func_info
> > .BTF.ext information, pointing to FUNC -> FUNC_PROTO BTF type chain
> > allows libbpf to simulate __arg_ctx behavior transparently, even if the
> > kernel doesn't yet support __arg_ctx annotation natively.
> >
> > The idea is straightforward: each time we append global subprog's code
> > and func_info information, we adjust its FUNC -> FUNC_PROTO type
> > information, if necessary (that is, libbpf can detect the presence of
> > btf_decl_tag("arg:ctx") just like BPF verifier would do it).
> >
> > The rest is just mechanical and somewhat painful BTF manipulation code.
> > It's painful because we need to clone FUNC -> FUNC_PROTO, instead of
> > reusing it, as same FUNC -> FUNC_PROTO chain might be used by another
> > main BPF program within the same BPF object, so we can't just modify it
> > in-place (and cloning BTF types within the same struct btf object is
> > painful due to constant memory invalidation, see comments in code).
> > Uploaded BPF object's BTF information has to work for all BPF
> > programs at the same time.
> >
> > Once we have FUNC -> FUNC_PROTO clones, we make sure that instead of
> > using some `void *ctx` parameter definition, we have an expected `struct
> > bpf_perf_event_data *ctx` definition (as far as BPF verifier and kernel
> > is concerned), which will mark it as context for BPF verifier. Same
> > global subprog relocated and copied into another main BPF program will
> > get different type information according to main program's type. It all
> > works out in the end in a completely transparent way for end user.
> >
> > Libbpf maintains internal program type -> expected context struct name
> > mapping internally. Note, not all BPF program types have named context
> > struct, so this approach won't work for such programs (just like it
> > didn't before __arg_ctx). So native __arg_ctx is still important to have
> > in kernel to have generic context support across all BPF program types.
> >
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >  tools/lib/bpf/libbpf.c | 239 +++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 231 insertions(+), 8 deletions(-)
> >

please trim

[...]
Andrii Nakryiko Jan. 3, 2024, 12:30 a.m. UTC | #5
On Tue, Jan 2, 2024 at 9:06 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Thu, Dec 21, 2023 at 5:26 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Wed, Dec 20, 2023 at 03:31:26PM -0800, Andrii Nakryiko wrote:
> > > This limitation was the reason to add btf_decl_tag("arg:ctx"), making
> > > the actual argument type not important, so that user can just define
> > > "generic" signature:
> > >
> > >   __noinline int global_subprog(void *ctx __arg_ctx) { ... }
> >
> > Just realized that we probably need to enforce in both libbpf doing
> > rewrite and in the kernel that __arg_ctx is either valid
> > 'struct correct_type_for_bpf_prog *' or 'void *'.
> >
> > Otherwise the user will get surprising behavior when
> > int foo(struct __sk_buff *ctx __arg_ctx)
> > {
> >   ctx->len;
> > }
> > will get rewritten to 'struct pt_regs *ctx' based on prog type
> > while all asm instructions inside prog were compiled with 'struct __sk_buff'
> > and CO_RE performs relocations against that type.
>
> Nothing really prevents users from misusing types even today, so it
> doesn't seem like a common problem, luckily.
>
> But really the problem is that some program types don't have an
> associated struct name at all, but still a valid context. Like for
> LSM/TRACING programs it's a u64[]/u64 *. For tracepoints context is
> defined as just plain u64 (according to bpf_ctx_convert), and so on.
>
> Oh, and there is KPROBE program type, where it's (typedef)
> bpf_user_pt_regs_t, and for backwards compatibility reasons we also
> allow basically non-existing `struct bpf_user_pt_regs_t`.
>
> So it gets messy. Either way, I have a patch set coming up for
> kernel-side __arg_xxx tags support, so let's discuss it there?
>
> >
> > > +static struct {
> > > +     enum bpf_prog_type prog_type;
> > > +     const char *ctx_name;
> > > +} global_ctx_map[] = {
> > > +     { BPF_PROG_TYPE_CGROUP_DEVICE,           "bpf_cgroup_dev_ctx" },
> > > +     { BPF_PROG_TYPE_CGROUP_SKB,              "__sk_buff" },
> > > +     { BPF_PROG_TYPE_CGROUP_SOCK,             "bpf_sock" },
> > > +     { BPF_PROG_TYPE_CGROUP_SOCK_ADDR,        "bpf_sock_addr" },
> > > +     { BPF_PROG_TYPE_CGROUP_SOCKOPT,          "bpf_sockopt" },
> > > +     { BPF_PROG_TYPE_CGROUP_SYSCTL,           "bpf_sysctl" },
> > > +     { BPF_PROG_TYPE_FLOW_DISSECTOR,          "__sk_buff" },
> > > +     { BPF_PROG_TYPE_KPROBE,                  "bpf_user_pt_regs_t" },
> > > +     { BPF_PROG_TYPE_LWT_IN,                  "__sk_buff" },
> > > +     { BPF_PROG_TYPE_LWT_OUT,                 "__sk_buff" },
> > > +     { BPF_PROG_TYPE_LWT_SEG6LOCAL,           "__sk_buff" },
> > > +     { BPF_PROG_TYPE_LWT_XMIT,                "__sk_buff" },
> > > +     { BPF_PROG_TYPE_NETFILTER,               "bpf_nf_ctx" },
> > > +     { BPF_PROG_TYPE_PERF_EVENT,              "bpf_perf_event_data" },
> > > +     { BPF_PROG_TYPE_RAW_TRACEPOINT,          "bpf_raw_tracepoint_args" },
> > > +     { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
> > > +     { BPF_PROG_TYPE_SCHED_ACT,               "__sk_buff" },
> > > +     { BPF_PROG_TYPE_SCHED_CLS,               "__sk_buff" },
> > > +     { BPF_PROG_TYPE_SK_LOOKUP,               "bpf_sk_lookup" },
> > > +     { BPF_PROG_TYPE_SK_MSG,                  "sk_msg_md" },
> > > +     { BPF_PROG_TYPE_SK_REUSEPORT,            "sk_reuseport_md" },
> > > +     { BPF_PROG_TYPE_SK_SKB,                  "__sk_buff" },
> > > +     { BPF_PROG_TYPE_SOCK_OPS,                "bpf_sock_ops" },
> > > +     { BPF_PROG_TYPE_SOCKET_FILTER,           "__sk_buff" },
> > > +     { BPF_PROG_TYPE_XDP,                     "xdp_md" },
> >
> > We already share the .c files (like relo_core.c) between kernel and libbpf
> > let's share here as well to avoid copy paste.
> > All of the above is available in include/linux/bpf_types.h
>
> True, but libbpf sources are built both as part of the kernel repo and
> separately on Github, so we'll need to start syncing
> include/linux/bpf_types.h into tools/include, so that's a bit of
> inconvenience.
>
> But most of all I don't want to do it for a few other reasons.
>
> This table was manually constructed by inspecting struct bpf_ctx_convert with:
>
>   bpftool btf dump file /sys/kernel/btf/vmlinux format c | rg
> bpf_ctx_convert -A65 | rg _prog
>
> And it has to be manual because of other program types that don't have
> associated struct for context. So even if there was bpf_types.h, we
> can't use it as is.

Another headache I realized as I was reading someone else's patch is
all the #ifdef CONFIG_xxx checks, which we'd need to fake to even get
a full list of program types. In short, it's more trouble than it's
worth.

>
> But, if your concern is maintainability of this, I don't think that's
> a problem at all. Even if we add a new program type with its own
> struct name for context, we don't even have to extend this table
> (though we might, if we want to), because at that point kernel is
> guaranteed to have in-kernel native support for __arg_ctx, so libbpf
> doesn't have to do type rewriting.
>
> Also, this probably is the first explicit table that shows which
> struct names should be used for global subprog context argument (if
> not using __arg_ctx, of course). Which is not really a reason per se,
> but it beats reading kernel code, and (non-trivially) figuring out
> that one needs to look how struct bpf_ctx_convert is generated, etc.
> Having this explicit table is much easier to link as a reference for
> those special context type names.
>
> >
> > > +             /* clone fn/fn_proto, unless we already did it for another arg */
> > > +             if (func_rec->type_id == orig_fn_id) {
> >
> > It feels that body of this 'if' can be factored out as a separate helper function.
> >
>
> Sure, I'll try to factor it out.
>
> > > -static int
> > > -bpf_object__load_progs(struct bpf_object *obj, int log_level)
> > > +static int bpf_object_load_progs(struct bpf_object *obj, int log_level)
> >
> > pls keep __ convention.
>
> replied on another patch, I'll do a conversion to consistent naming in
> the next revision
diff mbox series

Patch

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 92171bcf4c25..1a7354b6a289 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6168,7 +6168,7 @@  reloc_prog_func_and_line_info(const struct bpf_object *obj,
 	int err;
 
 	/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
-	 * supprot func/line info
+	 * support func/line info
 	 */
 	if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
 		return 0;
@@ -6650,8 +6650,223 @@  static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *pr
 	return 0;
 }
 
-static int
-bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
+static struct {
+	enum bpf_prog_type prog_type;
+	const char *ctx_name;
+} global_ctx_map[] = {
+	{ BPF_PROG_TYPE_CGROUP_DEVICE,           "bpf_cgroup_dev_ctx" },
+	{ BPF_PROG_TYPE_CGROUP_SKB,              "__sk_buff" },
+	{ BPF_PROG_TYPE_CGROUP_SOCK,             "bpf_sock" },
+	{ BPF_PROG_TYPE_CGROUP_SOCK_ADDR,        "bpf_sock_addr" },
+	{ BPF_PROG_TYPE_CGROUP_SOCKOPT,          "bpf_sockopt" },
+	{ BPF_PROG_TYPE_CGROUP_SYSCTL,           "bpf_sysctl" },
+	{ BPF_PROG_TYPE_FLOW_DISSECTOR,          "__sk_buff" },
+	{ BPF_PROG_TYPE_KPROBE,                  "bpf_user_pt_regs_t" },
+	{ BPF_PROG_TYPE_LWT_IN,                  "__sk_buff" },
+	{ BPF_PROG_TYPE_LWT_OUT,                 "__sk_buff" },
+	{ BPF_PROG_TYPE_LWT_SEG6LOCAL,           "__sk_buff" },
+	{ BPF_PROG_TYPE_LWT_XMIT,                "__sk_buff" },
+	{ BPF_PROG_TYPE_NETFILTER,               "bpf_nf_ctx" },
+	{ BPF_PROG_TYPE_PERF_EVENT,              "bpf_perf_event_data" },
+	{ BPF_PROG_TYPE_RAW_TRACEPOINT,          "bpf_raw_tracepoint_args" },
+	{ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
+	{ BPF_PROG_TYPE_SCHED_ACT,               "__sk_buff" },
+	{ BPF_PROG_TYPE_SCHED_CLS,               "__sk_buff" },
+	{ BPF_PROG_TYPE_SK_LOOKUP,               "bpf_sk_lookup" },
+	{ BPF_PROG_TYPE_SK_MSG,                  "sk_msg_md" },
+	{ BPF_PROG_TYPE_SK_REUSEPORT,            "sk_reuseport_md" },
+	{ BPF_PROG_TYPE_SK_SKB,                  "__sk_buff" },
+	{ BPF_PROG_TYPE_SOCK_OPS,                "bpf_sock_ops" },
+	{ BPF_PROG_TYPE_SOCKET_FILTER,           "__sk_buff" },
+	{ BPF_PROG_TYPE_XDP,                     "xdp_md" },
+	/* all other program types don't have "named" context structs */
+};
+
+/* Check if main program or global subprog's function prototype has `arg:ctx`
+ * argument tags, and, if necessary, substitute correct type to match what BPF
+ * verifier would expect, taking into account specific program type. This
+ * allows to support __arg_ctx tag transparently on old kernels that don't yet
+ * have a native support for it in the verifier, making user's life much
+ * easier.
+ */
+static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
+{
+	const char *ctx_name = NULL, *ctx_tag = "arg:ctx";
+	struct bpf_func_info_min *func_rec;
+	struct btf_type *fn_t, *fn_proto_t;
+	struct btf *btf = obj->btf;
+	const struct btf_type *t;
+	struct btf_param *p;
+	int ptr_id = 0, struct_id, tag_id, orig_fn_id;
+	int i, j, n, arg_idx, arg_cnt, err, name_off, rec_idx;
+	int *orig_ids;
+
+	/* no .BTF.ext, no problem */
+	if (!obj->btf_ext || !prog->func_info)
+		return 0;
+
+	/* some BPF program types just don't have named context structs, so
+	 * this fallback mechanism doesn't work for them
+	 */
+	for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
+		if (global_ctx_map[i].prog_type != prog->type)
+			continue;
+		ctx_name = global_ctx_map[i].ctx_name;
+		break;
+	}
+	if (!ctx_name)
+		return 0;
+
+	/* remember original func BTF IDs to detect if we already cloned them */
+	orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
+	if (!orig_ids)
+		return -ENOMEM;
+	for (i = 0; i < prog->func_info_cnt; i++) {
+		func_rec = prog->func_info + prog->func_info_rec_size * i;
+		orig_ids[i] = func_rec->type_id;
+	}
+
+	/* go through each DECL_TAG with "arg:ctx" and see if it points to one
+	 * of our subprogs; if yes and subprog is global and needs adjustment,
+	 * clone and adjust FUNC -> FUNC_PROTO combo
+	 */
+	for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
+		/* only DECL_TAG with "arg:ctx" value are interesting */
+		t = btf__type_by_id(btf, i);
+		if (!btf_is_decl_tag(t))
+			continue;
+		if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
+			continue;
+
+		/* only global funcs need adjustment, if at all */
+		orig_fn_id = t->type;
+		fn_t = btf_type_by_id(btf, orig_fn_id);
+		if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
+			continue;
+
+		/* sanity check FUNC -> FUNC_PROTO chain, just in case */
+		fn_proto_t = btf_type_by_id(btf, fn_t->type);
+		if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
+			continue;
+
+		/* find corresponding func_info record */
+		func_rec = NULL;
+		for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
+			if (orig_ids[rec_idx] == t->type) {
+				func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
+				break;
+			}
+		}
+		/* current main program doesn't call into this subprog */
+		if (!func_rec)
+			continue;
+
+		/* some more sanity checking of DECL_TAG */
+		arg_cnt = btf_vlen(fn_proto_t);
+		arg_idx = btf_decl_tag(t)->component_idx;
+		if (arg_idx < 0 || arg_idx >= arg_cnt)
+			continue;
+
+		/* check if existing parameter already matches verifier expectations */
+		p = &btf_params(fn_proto_t)[arg_idx];
+		t = skip_mods_and_typedefs(btf, p->type, NULL);
+		if (btf_is_ptr(t) &&
+		    (t = skip_mods_and_typedefs(btf, t->type, NULL)) &&
+		    btf_is_struct(t) &&
+		    strcmp(btf__str_by_offset(btf, t->name_off), ctx_name) == 0) {
+			continue; /* no need for fix up */
+		}
+
+		/* clone fn/fn_proto, unless we already did it for another arg */
+		if (func_rec->type_id == orig_fn_id) {
+			int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
+
+			/* Note that each btf__add_xxx() operation invalidates
+			 * all btf_type and string pointers, so we need to be
+			 * very careful when cloning BTF types. BTF type
+			 * pointers have to be always refetched. And to avoid
+			 * problems with invalidated string pointers, we
+			 * add empty strings initially, then just fix up
+			 * name_off offsets in place. Offsets are stable for
+			 * existing strings, so that works out.
+			 */
+			name_off = fn_t->name_off; /* we are about to invalidate fn_t */
+			ret_type_id = fn_proto_t->type; /* and fn_proto_t as well */
+			orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
+
+			/* clone FUNC first, btf__add_func() enforces
+			 * non-empty name, so use entry program's name as
+			 * a placeholder, which we replace immediately
+			 */
+			fn_id = btf__add_func(btf, prog->name, btf_func_linkage(fn_t), fn_t->type);
+			if (fn_id < 0)
+				return -EINVAL;
+			fn_t = btf_type_by_id(btf, fn_id);
+			fn_t->name_off = name_off; /* reuse original string */
+			fn_t->type = fn_id + 1; /* we can predict FUNC_PROTO ID */
+
+			/* clone FUNC_PROTO and its params now */
+			fn_proto_id = btf__add_func_proto(btf, ret_type_id);
+			if (fn_proto_id < 0) {
+				err = -EINVAL;
+				goto err_out;
+			}
+			for (j = 0; j < arg_cnt; j++) {
+				/* copy original parameter data */
+				t = btf_type_by_id(btf, orig_proto_id);
+				p = &btf_params(t)[j];
+				name_off = p->name_off;
+
+				err = btf__add_func_param(btf, "", p->type);
+				if (err)
+					goto err_out;
+				fn_proto_t = btf_type_by_id(btf, fn_proto_id);
+				p = &btf_params(fn_proto_t)[j];
+				p->name_off = name_off; /* use remembered str offset */
+			}
+
+			/* point func_info record to a cloned FUNC type */
+			func_rec->type_id = fn_id;
+		}
+
+		/* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
+		 * we do it just once per main BPF program, as all global
+		 * funcs share the same program type, so need only PTR ->
+		 * STRUCT type chain
+		 */
+		if (ptr_id == 0) {
+			struct_id = btf__add_struct(btf, ctx_name, 0);
+			ptr_id = btf__add_ptr(btf, struct_id);
+			if (ptr_id < 0 || struct_id < 0) {
+				err = -EINVAL;
+				goto err_out;
+			}
+		}
+
+		/* for completeness, clone DECL_TAG and point it to cloned param */
+		tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
+		if (tag_id < 0) {
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		/* all the BTF manipulations invalidated pointers, refetch them */
+		fn_t = btf_type_by_id(btf, func_rec->type_id);
+		fn_proto_t = btf_type_by_id(btf, fn_t->type);
+
+		/* fix up type ID pointed to by param */
+		p = &btf_params(fn_proto_t)[arg_idx];
+		p->type = ptr_id;
+	}
+
+	free(orig_ids);
+	return 0;
+err_out:
+	free(orig_ids);
+	return err;
+}
+
+static int bpf_object_relocate(struct bpf_object *obj, const char *targ_btf_path)
 {
 	struct bpf_program *prog;
 	size_t i, j;
@@ -6732,19 +6947,28 @@  bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 			}
 		}
 	}
-	/* Process data relos for main programs */
 	for (i = 0; i < obj->nr_programs; i++) {
 		prog = &obj->programs[i];
 		if (prog_is_subprog(obj, prog))
 			continue;
 		if (!prog->autoload)
 			continue;
+
+		/* Process data relos for main programs */
 		err = bpf_object__relocate_data(obj, prog);
 		if (err) {
 			pr_warn("prog '%s': failed to relocate data references: %d\n",
 				prog->name, err);
 			return err;
 		}
+
+		/* Fix up .BTF.ext information, if necessary */
+		err = bpf_program_fixup_func_info(obj, prog);
+		if (err) {
+			pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n",
+				prog->name, err);
+			return err;
+		}
 	}
 
 	return 0;
@@ -7456,8 +7680,7 @@  static int bpf_program_record_relos(struct bpf_program *prog)
 	return 0;
 }
 
-static int
-bpf_object__load_progs(struct bpf_object *obj, int log_level)
+static int bpf_object_load_progs(struct bpf_object *obj, int log_level)
 {
 	struct bpf_program *prog;
 	size_t i;
@@ -8093,10 +8316,10 @@  static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
 	err = err ? : bpf_object__sanitize_maps(obj);
 	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
-	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
+	err = err ? : bpf_object_relocate(obj, obj->btf_custom_path ? : target_btf_path);
 	err = err ? : bpf_object_load_btf(obj);
 	err = err ? : bpf_object_create_maps(obj);
-	err = err ? : bpf_object__load_progs(obj, extra_log_level);
+	err = err ? : bpf_object_load_progs(obj, extra_log_level);
 	err = err ? : bpf_object_init_prog_arrays(obj);
 	err = err ? : bpf_object_prepare_struct_ops(obj);