diff mbox series

[bpf-next,v3,2/2] bpf: Add kernel symbol for struct_ops trampoline

Message ID 20241111121641.2679885-3-xukuohai@huaweicloud.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Add kernel symbol for struct_ops trampoline | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-7 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-31 fail Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-38 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-37 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-21 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 204 this patch: 204
netdev/build_tools success Errors and warnings before: 0 (+0) this patch: 0 (+0)
netdev/cc_maintainers warning 6 maintainers not CCed: kpsingh@kernel.org jolsa@kernel.org song@kernel.org haoluo@google.com john.fastabend@gmail.com sdf@fomichev.me
netdev/build_clang success Errors and warnings before: 255 this patch: 255
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 6971 this patch: 6971
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0

Commit Message

Xu Kuohai Nov. 11, 2024, 12:16 p.m. UTC
From: Xu Kuohai <xukuohai@huawei.com>

Without kernel symbols for struct_ops trampoline, the unwinder may
produce unexpected stacktraces.

For example, the x86 ORC and FP unwinders check if an IP is in kernel
text by verifying the presence of the IP's kernel symbol. When a
struct_ops trampoline address is encountered, the unwinder stops due
to the absence of symbol, resulting in an incomplete stacktrace that
consists only of direct and indirect child functions called from the
trampoline.

The arm64 unwinder is another example. While the arm64 unwinder can
proceed across a struct_ops trampoline address, the corresponding
symbol name is displayed as "unknown", which is confusing.

Thus, add kernel symbol for struct_ops trampoline. The name is
bpf__<struct_ops_name>_<member_name>, where <struct_ops_name> is the
type name of the struct_ops, and <member_name> is the name of
the member that the trampoline is linked to.

Below is a comparison of stacktraces captured on x86 by perf record,
before and after this patch.

Before:
ffffffff8116545d __lock_acquire+0xad ([kernel.kallsyms])
ffffffff81167fcc lock_acquire+0xcc ([kernel.kallsyms])
ffffffff813088f4 __bpf_prog_enter+0x34 ([kernel.kallsyms])

After:
ffffffff811656bd __lock_acquire+0x30d ([kernel.kallsyms])
ffffffff81167fcc lock_acquire+0xcc ([kernel.kallsyms])
ffffffff81309024 __bpf_prog_enter+0x34 ([kernel.kallsyms])
ffffffffc000d7e9 bpf__tcp_congestion_ops_cong_avoid+0x3e ([kernel.kallsyms])
ffffffff81f250a5 tcp_ack+0x10d5 ([kernel.kallsyms])
ffffffff81f27c66 tcp_rcv_established+0x3b6 ([kernel.kallsyms])
ffffffff81f3ad03 tcp_v4_do_rcv+0x193 ([kernel.kallsyms])
ffffffff81d65a18 __release_sock+0xd8 ([kernel.kallsyms])
ffffffff81d65af4 release_sock+0x34 ([kernel.kallsyms])
ffffffff81f15c4b tcp_sendmsg+0x3b ([kernel.kallsyms])
ffffffff81f663d7 inet_sendmsg+0x47 ([kernel.kallsyms])
ffffffff81d5ab40 sock_write_iter+0x160 ([kernel.kallsyms])
ffffffff8149c67b vfs_write+0x3fb ([kernel.kallsyms])
ffffffff8149caf6 ksys_write+0xc6 ([kernel.kallsyms])
ffffffff8149cb5d __x64_sys_write+0x1d ([kernel.kallsyms])
ffffffff81009200 x64_sys_call+0x1d30 ([kernel.kallsyms])
ffffffff82232d28 do_syscall_64+0x68 ([kernel.kallsyms])
ffffffff8240012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kallsyms])

Note that while adding new member ksyms to struct bpf_struct_ops_map,
this patch also removes an unused member rcu from the structure.

Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
---
 include/linux/bpf.h         |  3 +-
 kernel/bpf/bpf_struct_ops.c | 81 ++++++++++++++++++++++++++++++++++++-
 kernel/bpf/dispatcher.c     |  3 +-
 kernel/bpf/trampoline.c     |  9 ++++-
 4 files changed, 90 insertions(+), 6 deletions(-)

Comments

Martin KaFai Lau Nov. 11, 2024, 11:04 p.m. UTC | #1
On 11/11/24 4:16 AM, Xu Kuohai wrote:
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index e99fce81e916..d6dd56fc80d8 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -23,7 +23,6 @@ struct bpf_struct_ops_value {
>   
>   struct bpf_struct_ops_map {
>   	struct bpf_map map;
> -	struct rcu_head rcu;

Since it needs a respin (more on it later), it will be useful to separate this 
cleanup as a separate patch in the same patch series.

>   	const struct bpf_struct_ops_desc *st_ops_desc;
>   	/* protect map_update */
>   	struct mutex lock;
> @@ -32,6 +31,8 @@ struct bpf_struct_ops_map {
>   	 * (in kvalue.data).
>   	 */
>   	struct bpf_link **links;
> +	/* ksyms for bpf trampolines */
> +	struct bpf_ksym **ksyms;
>   	u32 funcs_cnt;
>   	u32 image_pages_cnt;
>   	/* image_pages is an array of pages that has all the trampolines
> @@ -586,6 +587,49 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
>   	return 0;
>   }
>   
> +static void bpf_struct_ops_ksym_init(const char *tname, const char *mname,
> +				     void *image, unsigned int size,
> +				     struct bpf_ksym *ksym)
> +{
> +	snprintf(ksym->name, KSYM_NAME_LEN, "bpf__%s_%s", tname, mname);
> +	INIT_LIST_HEAD_RCU(&ksym->lnode);
> +	bpf_image_ksym_init(image, size, ksym);
> +}
> +
> +static void bpf_struct_ops_map_ksyms_add(struct bpf_struct_ops_map *st_map)
> +{
> +	u32 i;
> +
> +	for (i = 0; i < st_map->funcs_cnt; i++) {
> +		if (!st_map->ksyms[i])
> +			break;
> +		bpf_image_ksym_add(st_map->ksyms[i]);
> +	}
> +}
> +
> +static void bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map *st_map)
> +{
> +	u32 i;
> +
> +	for (i = 0; i < st_map->funcs_cnt; i++) {
> +		if (!st_map->ksyms[i])
> +			break;
> +		bpf_image_ksym_del(st_map->ksyms[i]);
> +	}
> +}
> +
> +static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
> +{
> +	u32 i;
> +
> +	for (i = 0; i < st_map->funcs_cnt; i++) {
> +		if (!st_map->ksyms[i])
> +			break;
> +		kfree(st_map->ksyms[i]);
> +		st_map->links[i] = NULL;

s/links/ksyms/

pw-bot: cr

> +	}
> +}
> +
>   static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   					   void *value, u64 flags)
>   {
> @@ -602,6 +646,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   	u32 i, trampoline_start, image_off = 0;
>   	void *cur_image = NULL, *image = NULL;
>   	struct bpf_link **plink;
> +	struct bpf_ksym **pksym;
> +	const char *tname, *mname;
>   
>   	if (flags)
>   		return -EINVAL;
> @@ -641,14 +687,18 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   	kdata = &kvalue->data;
>   
>   	plink = st_map->links;
> +	pksym = st_map->ksyms;
> +	tname = btf_name_by_offset(st_map->btf, t->name_off);
>   	module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
>   	for_each_member(i, t, member) {
>   		const struct btf_type *mtype, *ptype;
>   		struct bpf_prog *prog;
>   		struct bpf_tramp_link *link;
> +		struct bpf_ksym *ksym;
>   		u32 moff;
>   
>   		moff = __btf_member_bit_offset(t, member) / 8;
> +		mname = btf_name_by_offset(st_map->btf, member->name_off);
>   		ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
>   		if (ptype == module_type) {
>   			if (*(void **)(udata + moff))
> @@ -718,6 +768,14 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   			      &bpf_struct_ops_link_lops, prog);
>   		*plink++ = &link->link;
>   
> +		ksym = kzalloc(sizeof(*ksym), GFP_USER);

link is also using kzalloc but probably both link and ksym allocation should use 
bpf_map_kzalloc instead. This switch can be done for both together later as a 
follow up patch.

> +		if (!ksym) {
> +			bpf_prog_put(prog);

afaik, this bpf_prog_put is not needed. The bpf_link_init above took the prog 
ownership and the bpf_struct_ops_map_put_progs() at the error path will take 
care of it.

> +			err = -ENOMEM;
> +			goto reset_unlock;
> +		}
> +		*pksym = ksym;

nit. Follow the *plink++ style above and does the same *pksym++ here.

> +
>   		trampoline_start = image_off;
>   		err = bpf_struct_ops_prepare_trampoline(tlinks, link,
>   						&st_ops->func_models[i],
> @@ -737,6 +795,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   
>   		/* put prog_id to udata */
>   		*(unsigned long *)(udata + moff) = prog->aux->id;
> +
> +		/* init ksym for this trampoline */
> +		bpf_struct_ops_ksym_init(tname, mname,
> +					 image + trampoline_start,
> +					 image_off - trampoline_start,
> +					 *pksym++);

then uses "ksym" here.

>   	}
>   
>   	if (st_ops->validate) {
> @@ -785,6 +849,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   	 */
>   
>   reset_unlock:
> +	bpf_struct_ops_map_free_ksyms(st_map);
>   	bpf_struct_ops_map_free_image(st_map);
>   	bpf_struct_ops_map_put_progs(st_map);
>   	memset(uvalue, 0, map->value_size);
> @@ -792,6 +857,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   unlock:
>   	kfree(tlinks);
>   	mutex_unlock(&st_map->lock);
> +	if (!err)
> +		bpf_struct_ops_map_ksyms_add(st_map);
>   	return err;
>   }
>   
> @@ -851,7 +918,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
>   
>   	if (st_map->links)
>   		bpf_struct_ops_map_put_progs(st_map);
> +	if (st_map->ksyms)
> +		bpf_struct_ops_map_free_ksyms(st_map);
>   	bpf_map_area_free(st_map->links);
> +	bpf_map_area_free(st_map->ksyms);
>   	bpf_struct_ops_map_free_image(st_map);
>   	bpf_map_area_free(st_map->uvalue);
>   	bpf_map_area_free(st_map);
> @@ -868,6 +938,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
>   	if (btf_is_module(st_map->btf))
>   		module_put(st_map->st_ops_desc->st_ops->owner);
>   
> +	if (st_map->ksyms)

This null test should not be needed.

> +		bpf_struct_ops_map_del_ksyms(st_map);
> +
>   	/* The struct_ops's function may switch to another struct_ops.
>   	 *
>   	 * For example, bpf_tcp_cc_x->init() may switch to
> @@ -980,7 +1053,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>   	st_map->links =
>   		bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_links *),
>   				   NUMA_NO_NODE);
> -	if (!st_map->uvalue || !st_map->links) {
> +
> +	st_map->ksyms =
> +		bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_ksyms *),
> +				   NUMA_NO_NODE);

.map_mem_usage at least needs to include the st_map->ksyms[] pointer array. 
func_cnts should be used instead of btf_type_vlen(vt) for link also in 
.map_mem_usage.

> +	if (!st_map->uvalue || !st_map->links || !st_map->ksyms) {
>   		ret = -ENOMEM;
>   		goto errout_free;
>   	}
Xu Kuohai Nov. 12, 2024, 12:06 p.m. UTC | #2
On 11/12/2024 7:04 AM, Martin KaFai Lau wrote:
> On 11/11/24 4:16 AM, Xu Kuohai wrote:
>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>> index e99fce81e916..d6dd56fc80d8 100644
>> --- a/kernel/bpf/bpf_struct_ops.c
>> +++ b/kernel/bpf/bpf_struct_ops.c
>> @@ -23,7 +23,6 @@ struct bpf_struct_ops_value {
>>   struct bpf_struct_ops_map {
>>       struct bpf_map map;
>> -    struct rcu_head rcu;
> 
> Since it needs a respin (more on it later), it will be useful to separate this cleanup as a separate patch in the same patch series.
>

OK

>>       const struct bpf_struct_ops_desc *st_ops_desc;
>>       /* protect map_update */
>>       struct mutex lock;
>> @@ -32,6 +31,8 @@ struct bpf_struct_ops_map {
>>        * (in kvalue.data).
>>        */
>>       struct bpf_link **links;
>> +    /* ksyms for bpf trampolines */
>> +    struct bpf_ksym **ksyms;
>>       u32 funcs_cnt;
>>       u32 image_pages_cnt;
>>       /* image_pages is an array of pages that has all the trampolines
>> @@ -586,6 +587,49 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
>>       return 0;
>>   }
>> +static void bpf_struct_ops_ksym_init(const char *tname, const char *mname,
>> +                     void *image, unsigned int size,
>> +                     struct bpf_ksym *ksym)
>> +{
>> +    snprintf(ksym->name, KSYM_NAME_LEN, "bpf__%s_%s", tname, mname);
>> +    INIT_LIST_HEAD_RCU(&ksym->lnode);
>> +    bpf_image_ksym_init(image, size, ksym);
>> +}
>> +
>> +static void bpf_struct_ops_map_ksyms_add(struct bpf_struct_ops_map *st_map)
>> +{
>> +    u32 i;
>> +
>> +    for (i = 0; i < st_map->funcs_cnt; i++) {
>> +        if (!st_map->ksyms[i])
>> +            break;
>> +        bpf_image_ksym_add(st_map->ksyms[i]);
>> +    }
>> +}
>> +
>> +static void bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map *st_map)
>> +{
>> +    u32 i;
>> +
>> +    for (i = 0; i < st_map->funcs_cnt; i++) {
>> +        if (!st_map->ksyms[i])
>> +            break;
>> +        bpf_image_ksym_del(st_map->ksyms[i]);
>> +    }
>> +}
>> +
>> +static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
>> +{
>> +    u32 i;
>> +
>> +    for (i = 0; i < st_map->funcs_cnt; i++) {
>> +        if (!st_map->ksyms[i])
>> +            break;
>> +        kfree(st_map->ksyms[i]);
>> +        st_map->links[i] = NULL;
> 
> s/links/ksyms/
> 
> pw-bot: cr
>

Oops, good catch

>> +    }
>> +}
>> +
>>   static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>>                          void *value, u64 flags)
>>   {
>> @@ -602,6 +646,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>>       u32 i, trampoline_start, image_off = 0;
>>       void *cur_image = NULL, *image = NULL;
>>       struct bpf_link **plink;
>> +    struct bpf_ksym **pksym;
>> +    const char *tname, *mname;
>>       if (flags)
>>           return -EINVAL;
>> @@ -641,14 +687,18 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>>       kdata = &kvalue->data;
>>       plink = st_map->links;
>> +    pksym = st_map->ksyms;
>> +    tname = btf_name_by_offset(st_map->btf, t->name_off);
>>       module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
>>       for_each_member(i, t, member) {
>>           const struct btf_type *mtype, *ptype;
>>           struct bpf_prog *prog;
>>           struct bpf_tramp_link *link;
>> +        struct bpf_ksym *ksym;
>>           u32 moff;
>>           moff = __btf_member_bit_offset(t, member) / 8;
>> +        mname = btf_name_by_offset(st_map->btf, member->name_off);
>>           ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
>>           if (ptype == module_type) {
>>               if (*(void **)(udata + moff))
>> @@ -718,6 +768,14 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>>                     &bpf_struct_ops_link_lops, prog);
>>           *plink++ = &link->link;
>> +        ksym = kzalloc(sizeof(*ksym), GFP_USER);
> 
> link is also using kzalloc but probably both link and ksym allocation should use bpf_map_kzalloc instead. This switch can be done for both together later as a follow up patch.
>

OK, will do.

>> +        if (!ksym) {
>> +            bpf_prog_put(prog);
> 
> afaik, this bpf_prog_put is not needed. The bpf_link_init above took the prog ownership and the bpf_struct_ops_map_put_progs() at the error path will take care of it.
>

Right, good catch

>> +            err = -ENOMEM;
>> +            goto reset_unlock;
>> +        }
>> +        *pksym = ksym;
> 
> nit. Follow the *plink++ style above and does the same *pksym++ here.
>

OK

>> +
>>           trampoline_start = image_off;
>>           err = bpf_struct_ops_prepare_trampoline(tlinks, link,
>>                           &st_ops->func_models[i],
>> @@ -737,6 +795,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>>           /* put prog_id to udata */
>>           *(unsigned long *)(udata + moff) = prog->aux->id;
>> +
>> +        /* init ksym for this trampoline */
>> +        bpf_struct_ops_ksym_init(tname, mname,
>> +                     image + trampoline_start,
>> +                     image_off - trampoline_start,
>> +                     *pksym++);
> 
> then uses "ksym" here.
> 
>>       }
>>       if (st_ops->validate) {
>> @@ -785,6 +849,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>>        */
>>   reset_unlock:
>> +    bpf_struct_ops_map_free_ksyms(st_map);
>>       bpf_struct_ops_map_free_image(st_map);
>>       bpf_struct_ops_map_put_progs(st_map);
>>       memset(uvalue, 0, map->value_size);
>> @@ -792,6 +857,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>>   unlock:
>>       kfree(tlinks);
>>       mutex_unlock(&st_map->lock);
>> +    if (!err)
>> +        bpf_struct_ops_map_ksyms_add(st_map);
>>       return err;
>>   }
>> @@ -851,7 +918,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
>>       if (st_map->links)
>>           bpf_struct_ops_map_put_progs(st_map);
>> +    if (st_map->ksyms)
>> +        bpf_struct_ops_map_free_ksyms(st_map);
>>       bpf_map_area_free(st_map->links);
>> +    bpf_map_area_free(st_map->ksyms);
>>       bpf_struct_ops_map_free_image(st_map);
>>       bpf_map_area_free(st_map->uvalue);
>>       bpf_map_area_free(st_map);
>> @@ -868,6 +938,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
>>       if (btf_is_module(st_map->btf))
>>           module_put(st_map->st_ops_desc->st_ops->owner);
>> +    if (st_map->ksyms)
> 
> This null test should not be needed.
>

Agree, will remove it.

>> +        bpf_struct_ops_map_del_ksyms(st_map);
>> +
>>       /* The struct_ops's function may switch to another struct_ops.
>>        *
>>        * For example, bpf_tcp_cc_x->init() may switch to
>> @@ -980,7 +1053,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>>       st_map->links =
>>           bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_links *),
>>                      NUMA_NO_NODE);
>> -    if (!st_map->uvalue || !st_map->links) {
>> +
>> +    st_map->ksyms =
>> +        bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_ksyms *),
>> +                   NUMA_NO_NODE);
> 
> .map_mem_usage at least needs to include the st_map->ksyms[] pointer array. func_cnts should be used instead of btf_type_vlen(vt) for link also in .map_mem_usage.
>

Right, agree

>> +    if (!st_map->uvalue || !st_map->links || !st_map->ksyms) {
>>           ret = -ENOMEM;
>>           goto errout_free;
>>       }
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1b84613b10ac..6fc6398d86c6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1402,7 +1402,8 @@  int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
 void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
 				struct bpf_prog *to);
 /* Called only from JIT-enabled code, so there's no need for stubs. */
-void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym);
+void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym);
+void bpf_image_ksym_add(struct bpf_ksym *ksym);
 void bpf_image_ksym_del(struct bpf_ksym *ksym);
 void bpf_ksym_add(struct bpf_ksym *ksym);
 void bpf_ksym_del(struct bpf_ksym *ksym);
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index e99fce81e916..d6dd56fc80d8 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -23,7 +23,6 @@  struct bpf_struct_ops_value {
 
 struct bpf_struct_ops_map {
 	struct bpf_map map;
-	struct rcu_head rcu;
 	const struct bpf_struct_ops_desc *st_ops_desc;
 	/* protect map_update */
 	struct mutex lock;
@@ -32,6 +31,8 @@  struct bpf_struct_ops_map {
 	 * (in kvalue.data).
 	 */
 	struct bpf_link **links;
+	/* ksyms for bpf trampolines */
+	struct bpf_ksym **ksyms;
 	u32 funcs_cnt;
 	u32 image_pages_cnt;
 	/* image_pages is an array of pages that has all the trampolines
@@ -586,6 +587,49 @@  int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
 	return 0;
 }
 
+static void bpf_struct_ops_ksym_init(const char *tname, const char *mname,
+				     void *image, unsigned int size,
+				     struct bpf_ksym *ksym)
+{
+	snprintf(ksym->name, KSYM_NAME_LEN, "bpf__%s_%s", tname, mname);
+	INIT_LIST_HEAD_RCU(&ksym->lnode);
+	bpf_image_ksym_init(image, size, ksym);
+}
+
+static void bpf_struct_ops_map_ksyms_add(struct bpf_struct_ops_map *st_map)
+{
+	u32 i;
+
+	for (i = 0; i < st_map->funcs_cnt; i++) {
+		if (!st_map->ksyms[i])
+			break;
+		bpf_image_ksym_add(st_map->ksyms[i]);
+	}
+}
+
+static void bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map *st_map)
+{
+	u32 i;
+
+	for (i = 0; i < st_map->funcs_cnt; i++) {
+		if (!st_map->ksyms[i])
+			break;
+		bpf_image_ksym_del(st_map->ksyms[i]);
+	}
+}
+
+static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
+{
+	u32 i;
+
+	for (i = 0; i < st_map->funcs_cnt; i++) {
+		if (!st_map->ksyms[i])
+			break;
+		kfree(st_map->ksyms[i]);
+		st_map->links[i] = NULL;
+	}
+}
+
 static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 					   void *value, u64 flags)
 {
@@ -602,6 +646,8 @@  static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 	u32 i, trampoline_start, image_off = 0;
 	void *cur_image = NULL, *image = NULL;
 	struct bpf_link **plink;
+	struct bpf_ksym **pksym;
+	const char *tname, *mname;
 
 	if (flags)
 		return -EINVAL;
@@ -641,14 +687,18 @@  static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 	kdata = &kvalue->data;
 
 	plink = st_map->links;
+	pksym = st_map->ksyms;
+	tname = btf_name_by_offset(st_map->btf, t->name_off);
 	module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
 	for_each_member(i, t, member) {
 		const struct btf_type *mtype, *ptype;
 		struct bpf_prog *prog;
 		struct bpf_tramp_link *link;
+		struct bpf_ksym *ksym;
 		u32 moff;
 
 		moff = __btf_member_bit_offset(t, member) / 8;
+		mname = btf_name_by_offset(st_map->btf, member->name_off);
 		ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
 		if (ptype == module_type) {
 			if (*(void **)(udata + moff))
@@ -718,6 +768,14 @@  static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 			      &bpf_struct_ops_link_lops, prog);
 		*plink++ = &link->link;
 
+		ksym = kzalloc(sizeof(*ksym), GFP_USER);
+		if (!ksym) {
+			bpf_prog_put(prog);
+			err = -ENOMEM;
+			goto reset_unlock;
+		}
+		*pksym = ksym;
+
 		trampoline_start = image_off;
 		err = bpf_struct_ops_prepare_trampoline(tlinks, link,
 						&st_ops->func_models[i],
@@ -737,6 +795,12 @@  static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 
 		/* put prog_id to udata */
 		*(unsigned long *)(udata + moff) = prog->aux->id;
+
+		/* init ksym for this trampoline */
+		bpf_struct_ops_ksym_init(tname, mname,
+					 image + trampoline_start,
+					 image_off - trampoline_start,
+					 *pksym++);
 	}
 
 	if (st_ops->validate) {
@@ -785,6 +849,7 @@  static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 	 */
 
 reset_unlock:
+	bpf_struct_ops_map_free_ksyms(st_map);
 	bpf_struct_ops_map_free_image(st_map);
 	bpf_struct_ops_map_put_progs(st_map);
 	memset(uvalue, 0, map->value_size);
@@ -792,6 +857,8 @@  static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 unlock:
 	kfree(tlinks);
 	mutex_unlock(&st_map->lock);
+	if (!err)
+		bpf_struct_ops_map_ksyms_add(st_map);
 	return err;
 }
 
@@ -851,7 +918,10 @@  static void __bpf_struct_ops_map_free(struct bpf_map *map)
 
 	if (st_map->links)
 		bpf_struct_ops_map_put_progs(st_map);
+	if (st_map->ksyms)
+		bpf_struct_ops_map_free_ksyms(st_map);
 	bpf_map_area_free(st_map->links);
+	bpf_map_area_free(st_map->ksyms);
 	bpf_struct_ops_map_free_image(st_map);
 	bpf_map_area_free(st_map->uvalue);
 	bpf_map_area_free(st_map);
@@ -868,6 +938,9 @@  static void bpf_struct_ops_map_free(struct bpf_map *map)
 	if (btf_is_module(st_map->btf))
 		module_put(st_map->st_ops_desc->st_ops->owner);
 
+	if (st_map->ksyms)
+		bpf_struct_ops_map_del_ksyms(st_map);
+
 	/* The struct_ops's function may switch to another struct_ops.
 	 *
 	 * For example, bpf_tcp_cc_x->init() may switch to
@@ -980,7 +1053,11 @@  static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 	st_map->links =
 		bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_links *),
 				   NUMA_NO_NODE);
-	if (!st_map->uvalue || !st_map->links) {
+
+	st_map->ksyms =
+		bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_ksyms *),
+				   NUMA_NO_NODE);
+	if (!st_map->uvalue || !st_map->links || !st_map->ksyms) {
 		ret = -ENOMEM;
 		goto errout_free;
 	}
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index 70fb82bf1637..aad8a11cc7e5 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -154,7 +154,8 @@  void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
 			d->image = NULL;
 			goto out;
 		}
-		bpf_image_ksym_add(d->image, PAGE_SIZE, &d->ksym);
+		bpf_image_ksym_init(d->image, PAGE_SIZE, &d->ksym);
+		bpf_image_ksym_add(d->image);
 	}
 
 	prev_num_progs = d->num_progs;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 9f36c049f4c2..c3efca44c8f7 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -115,10 +115,14 @@  bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
 		(ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
 }
 
-void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym)
+void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym)
 {
 	ksym->start = (unsigned long) data;
 	ksym->end = ksym->start + size;
+}
+
+void bpf_image_ksym_add(struct bpf_ksym *ksym)
+{
 	bpf_ksym_add(ksym);
 	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
 			   PAGE_SIZE, false, ksym->name);
@@ -377,7 +381,8 @@  static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
 	ksym = &im->ksym;
 	INIT_LIST_HEAD_RCU(&ksym->lnode);
 	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key);
-	bpf_image_ksym_add(image, size, ksym);
+	bpf_image_ksym_init(image, size, ksym);
+	bpf_image_ksym_add(image);
 	return im;
 
 out_free_image: