diff mbox series

[v3,bpf-next,2/5] bpf: pass whole link instead of prog when triggering raw tracepoint

Message ID 20240319233852.1977493-3-andrii@kernel.org (mailing list archive)
State Accepted
Commit d4dfc5700e867b22ab94f960f9a9972696a637d5
Delegated to: BPF
Headers show
Series BPF raw tracepoint support for BPF cookie | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 7482 this patch: 7482
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 12 maintainers not CCed: haoluo@google.com linux-trace-kernel@vger.kernel.org john.fastabend@gmail.com rostedt@goodmis.org mhiramat@kernel.org eddyz87@gmail.com song@kernel.org kpsingh@kernel.org yonghong.song@linux.dev martin.lau@linux.dev jolsa@kernel.org mathieu.desnoyers@efficios.com
netdev/build_clang success Errors and warnings before: 2244 this patch: 2244
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 7870 this patch: 7870
netdev/checkpatch warning CHECK: Please use a blank line after function/struct/union/enum declarations WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary

Commit Message

Andrii Nakryiko March 19, 2024, 11:38 p.m. UTC
Instead of passing prog as an argument to bpf_trace_runX() helpers, that
are called from tracepoint triggering calls, store BPF link itself
(struct bpf_raw_tp_link for raw tracepoints). This will allow to pass
extra information like BPF cookie into raw tracepoint registration.

Instead of replacing `struct bpf_prog *prog = __data;` with
corresponding `struct bpf_raw_tp_link *link = __data;` assignment in
`__bpf_trace_##call` I just passed `__data` through into underlying
bpf_trace_runX() call. This works well because we implicitly cast `void *`,
and it also avoids naming clashes with arguments coming from
tracepoint's "proto" list. We could have run into the same problem with
"prog", we just happened to not have a tracepoint that has "prog" input
argument. We are less lucky with "link", as there are tracepoints using
"link" argument name already. So instead of trying to avoid naming
conflicts, let's just remove intermediate local variable. It doesn't
hurt readibility, it's either way a bit of a maze of calls and macros,
that requires careful reading.

Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h          |  5 +++++
 include/linux/trace_events.h | 36 ++++++++++++++++++++----------------
 include/trace/bpf_probe.h    |  3 +--
 kernel/bpf/syscall.c         |  9 ++-------
 kernel/trace/bpf_trace.c     | 18 ++++++++++--------
 5 files changed, 38 insertions(+), 33 deletions(-)

Comments

Pengfei Xu April 9, 2024, 8:37 a.m. UTC | #1
Hi Andrii Nakryiko,

Greeting!

On 2024-03-19 at 16:38:49 -0700, Andrii Nakryiko wrote:
> Instead of passing prog as an argument to bpf_trace_runX() helpers, that
> are called from tracepoint triggering calls, store BPF link itself
> (struct bpf_raw_tp_link for raw tracepoints). This will allow to pass
> extra information like BPF cookie into raw tracepoint registration.
> 
> Instead of replacing `struct bpf_prog *prog = __data;` with
> corresponding `struct bpf_raw_tp_link *link = __data;` assignment in
> `__bpf_trace_##call` I just passed `__data` through into underlying
> bpf_trace_runX() call. This works well because we implicitly cast `void *`,
> and it also avoids naming clashes with arguments coming from
> tracepoint's "proto" list. We could have run into the same problem with
> "prog", we just happened to not have a tracepoint that has "prog" input
> argument. We are less lucky with "link", as there are tracepoints using
> "link" argument name already. So instead of trying to avoid naming
> conflicts, let's just remove intermediate local variable. It doesn't
> hurt readibility, it's either way a bit of a maze of calls and macros,
> that requires careful reading.
> 
> Acked-by: Stanislav Fomichev <sdf@google.com>
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  include/linux/bpf.h          |  5 +++++
>  include/linux/trace_events.h | 36 ++++++++++++++++++++----------------
>  include/trace/bpf_probe.h    |  3 +--
>  kernel/bpf/syscall.c         |  9 ++-------
>  kernel/trace/bpf_trace.c     | 18 ++++++++++--------
>  5 files changed, 38 insertions(+), 33 deletions(-)
> 

I used syzkaller and test intel internal kernel and found "KASAN:
slab-use-after-free Read in bpf_trace_run4" problem.

Bisected and found related commit:
d4dfc5700e86 bpf: pass whole link instead of prog when triggering raw tracepoint

Checked that the commit above is the same as this commit.

All detailed info:https://github.com/xupengfe/syzkaller_logs/tree/main/240409_092216_bpf_trace_run4
Syzkaller repro code: https://github.com/xupengfe/syzkaller_logs/blob/main/240409_092216_bpf_trace_run4/repro.c
Syzkaller syscall repro steps: https://github.com/xupengfe/syzkaller_logs/blob/main/240409_092216_bpf_trace_run4/repro.prog
Kconfig(make olddefconfig): https://github.com/xupengfe/syzkaller_logs/blob/main/240409_092216_bpf_trace_run4/kconfig_origin
Bisect info: https://github.com/xupengfe/syzkaller_logs/blob/main/240409_092216_bpf_trace_run4/bisect_info.log
issue_bzImage: https://github.com/xupengfe/syzkaller_logs/raw/main/240409_092216_bpf_trace_run4/bzImage_v6.9-rc2_next.tar.gz
issue dmesg: https://github.com/xupengfe/syzkaller_logs/blob/main/240409_092216_bpf_trace_run4/5d8569db0cb982d3c630482c285578e98a75fc90_dmesg.log

"
[   24.977435] ==================================================================
[   24.978307] BUG: KASAN: slab-use-after-free in bpf_trace_run4+0x547/0x5e0
[   24.979138] Read of size 8 at addr ffff888015676218 by task rcu_preempt/16
[   24.979936] 
[   24.980152] CPU: 0 PID: 16 Comm: rcu_preempt Not tainted 6.9.0-rc2-5d8569db0cb9+ #1
[   24.981040] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
[   24.982352] Call Trace:
[   24.982672]  <TASK>
[   24.982952]  dump_stack_lvl+0xe9/0x150
[   24.983449]  print_report+0xd0/0x610
[   24.983904]  ? bpf_trace_run4+0x547/0x5e0
[   24.984393]  ? kasan_complete_mode_report_info+0x80/0x200
[   24.985039]  ? bpf_trace_run4+0x547/0x5e0
[   24.985528]  kasan_report+0x9f/0xe0
[   24.985961]  ? bpf_trace_run4+0x547/0x5e0
[   24.986457]  __asan_report_load8_noabort+0x18/0x20
[   24.987055]  bpf_trace_run4+0x547/0x5e0
[   24.987532]  ? __pfx_bpf_trace_run4+0x10/0x10
[   24.988061]  ? __this_cpu_preempt_check+0x20/0x30
[   24.988670]  ? lock_is_held_type+0xe5/0x140
[   24.989185]  ? set_next_entity+0x38c/0x630
[   24.989698]  ? put_prev_entity+0x50/0x1f0
[   24.990199]  __bpf_trace_sched_switch+0x14/0x20
[   24.990776]  __traceiter_sched_switch+0x7a/0xd0
[   24.991293]  __schedule+0xc6d/0x2840
[   24.991721]  ? __pfx___schedule+0x10/0x10
[   24.992170]  ? lock_release+0x3f6/0x790
[   24.992616]  ? __this_cpu_preempt_check+0x20/0x30
[   24.993140]  ? schedule+0x1f3/0x290
[   24.993536]  ? __pfx_lock_release+0x10/0x10
[   24.994003]  ? _raw_spin_unlock_irqrestore+0x39/0x70
[   24.994561]  ? schedule_timeout+0x559/0x940
[   24.995021]  ? __this_cpu_preempt_check+0x20/0x30
[   24.995548]  schedule+0xcf/0x290
[   24.995922]  schedule_timeout+0x55e/0x940
[   24.996369]  ? __pfx_schedule_timeout+0x10/0x10
[   24.996870]  ? prepare_to_swait_event+0xff/0x450
[   24.997401]  ? prepare_to_swait_event+0xc4/0x450
[   24.997916]  ? __this_cpu_preempt_check+0x20/0x30
[   24.998445]  ? __pfx_process_timeout+0x10/0x10
[   24.998971]  ? tcp_get_idx+0xd0/0x270
[   24.999408]  ? prepare_to_swait_event+0xff/0x450
[   24.999934]  rcu_gp_fqs_loop+0x661/0xa70
[   25.000399]  ? __pfx_rcu_gp_fqs_loop+0x10/0x10
[   25.000913]  ? __pfx_rcu_gp_init+0x10/0x10
[   25.001381]  rcu_gp_kthread+0x25e/0x360
[   25.001822]  ? __pfx_rcu_gp_kthread+0x10/0x10
[   25.002324]  ? __sanitizer_cov_trace_const_cmp1+0x1e/0x30
[   25.002966]  ? __kthread_parkme+0x146/0x220
[   25.003472]  ? __pfx_rcu_gp_kthread+0x10/0x10
[   25.003995]  kthread+0x354/0x470
[   25.004400]  ? __pfx_kthread+0x10/0x10
[   25.004862]  ret_from_fork+0x57/0x90
[   25.005315]  ? __pfx_kthread+0x10/0x10
[   25.005778]  ret_from_fork_asm+0x1a/0x30
[   25.006282]  </TASK>
[   25.006560] 
[   25.006773] Allocated by task 732:
[   25.007187]  kasan_save_stack+0x2a/0x50
[   25.007660]  kasan_save_track+0x18/0x40
[   25.008124]  kasan_save_alloc_info+0x3b/0x50
[   25.008649]  __kasan_kmalloc+0x86/0xa0
[   25.009107]  kmalloc_trace+0x1c5/0x3d0
[   25.009599]  bpf_raw_tp_link_attach+0x28e/0x5a0
[   25.010163]  __sys_bpf+0x452/0x5550
[   25.010599]  __x64_sys_bpf+0x7e/0xc0
[   25.011054]  do_syscall_64+0x73/0x150
[   25.011523]  entry_SYSCALL_64_after_hwframe+0x71/0x79
[   25.012156] 
[   25.012360] Freed by task 732:
[   25.012740]  kasan_save_stack+0x2a/0x50
[   25.013211]  kasan_save_track+0x18/0x40
[   25.013689]  kasan_save_free_info+0x3e/0x60
[   25.014198]  __kasan_slab_free+0x107/0x190
[   25.014694]  kfree+0xf3/0x320
[   25.015085]  bpf_raw_tp_link_dealloc+0x1e/0x30
[   25.015632]  bpf_link_free+0x145/0x1b0
[   25.016094]  bpf_link_put_direct+0x45/0x60
[   25.016593]  bpf_link_release+0x40/0x50
[   25.017064]  __fput+0x273/0xb70
[   25.017489]  ____fput+0x1e/0x30
[   25.017890]  task_work_run+0x1a3/0x2d0
[   25.018356]  do_exit+0xad3/0x31b0
[   25.018800]  do_group_exit+0xdf/0x2b0
[   25.019256]  __x64_sys_exit_group+0x47/0x50
[   25.019763]  do_syscall_64+0x73/0x150
[   25.020215]  entry_SYSCALL_64_after_hwframe+0x71/0x79
[   25.020830] 
[   25.021036] The buggy address belongs to the object at ffff888015676200
[   25.021036]  which belongs to the cache kmalloc-128 of size 128
[   25.022465] The buggy address is located 24 bytes inside of
[   25.022465]  freed 128-byte region [ffff888015676200, ffff888015676280)
[   25.023780] 
[   25.023970] The buggy address belongs to the physical page:
[   25.024563] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x15676
[   25.025401] flags: 0xfffffe0000800(slab|node=0|zone=1|lastcpupid=0x3fffff)
[   25.026140] page_type: 0xffffffff()
[   25.026535] raw: 000fffffe0000800 ffff88800a0418c0 dead000000000122 0000000000000000
[   25.027363] raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
[   25.028182] page dumped because: kasan: bad access detected
[   25.028773] 
[   25.028957] Memory state around the buggy address:
[   25.029476]  ffff888015676100: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   25.030244]  ffff888015676180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[   25.031018] >ffff888015676200: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   25.031780]                             ^
[   25.032221]  ffff888015676280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[   25.032992]  ffff888015676300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   25.033769] ==================================================================
[   25.034571] Disabling lock debugging due to kernel taint
"

Could you take a look is it useful?

Thanks!

---

If you don't need the following environment to reproduce the problem or if you
already have one reproduced environment, please ignore the following information.

How to reproduce:
git clone https://gitlab.com/xupengfe/repro_vm_env.git
cd repro_vm_env
tar -xvf repro_vm_env.tar.gz
cd repro_vm_env; ./start3.sh  // it needs qemu-system-x86_64 and I used v7.1.0
  // start3.sh will load bzImage_2241ab53cbb5cdb08a6b2d4688feb13971058f65 v6.2-rc5 kernel
  // You could change the bzImage_xxx as you want
  // Maybe you need to remove line "-drive if=pflash,format=raw,readonly=on,file=./OVMF_CODE.fd \" for different qemu version
You could use below command to log in, there is no password for root.
ssh -p 10023 root@localhost

After login vm(virtual machine) successfully, you could transfer reproduced
binary to the vm by below way, and reproduce the problem in vm:
gcc -pthread -o repro repro.c
scp -P 10023 repro root@localhost:/root/

Get the bzImage for target kernel:
Please use target kconfig and copy it to kernel_src/.config
make olddefconfig
make -jx bzImage           //x should equal or less than cpu num your pc has

Fill the bzImage file into above start3.sh to load the target kernel in vm.


Tips:
If you already have qemu-system-x86_64, please ignore below info.
If you want to install qemu v7.1.0 version:
git clone https://github.com/qemu/qemu.git
cd qemu
git checkout -f v7.1.0
mkdir build
cd build
yum install -y ninja-build.x86_64
yum -y install libslirp-devel.x86_64
../configure --target-list=x86_64-softmmu --enable-kvm --enable-vnc --enable-gtk --enable-sdl --enable-usb-redir --enable-slirp
make
make install

Best Regards,
Thanks!



> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 17843e66a1d3..2ea8ce59f582 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1607,6 +1607,11 @@ struct bpf_tracing_link {
>  	struct bpf_prog *tgt_prog;
>  };
>  
> +struct bpf_raw_tp_link {
> +	struct bpf_link link;
> +	struct bpf_raw_event_map *btp;
> +};
> +
>  struct bpf_link_primer {
>  	struct bpf_link *link;
>  	struct file *file;
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index d68ff9b1247f..a7fc6fb6de3c 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -759,8 +759,11 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
>  int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
>  void perf_event_detach_bpf_prog(struct perf_event *event);
>  int perf_event_query_prog_array(struct perf_event *event, void __user *info);
> -int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
> -int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
> +
> +struct bpf_raw_tp_link;
> +int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link);
> +int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link);
> +
>  struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
>  void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
>  int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
> @@ -788,11 +791,12 @@ perf_event_query_prog_array(struct perf_event *event, void __user *info)
>  {
>  	return -EOPNOTSUPP;
>  }
> -static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *p)
> +struct bpf_raw_tp_link;
> +static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
>  {
>  	return -EOPNOTSUPP;
>  }
> -static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *p)
> +static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
>  {
>  	return -EOPNOTSUPP;
>  }
> @@ -903,31 +907,31 @@ void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
>  int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
>  void perf_event_free_bpf_prog(struct perf_event *event);
>  
> -void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
> -void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
> -void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +void bpf_trace_run1(struct bpf_raw_tp_link *link, u64 arg1);
> +void bpf_trace_run2(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2);
> +void bpf_trace_run3(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
>  		    u64 arg3);
> -void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +void bpf_trace_run4(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
>  		    u64 arg3, u64 arg4);
> -void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +void bpf_trace_run5(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
>  		    u64 arg3, u64 arg4, u64 arg5);
> -void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +void bpf_trace_run6(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
>  		    u64 arg3, u64 arg4, u64 arg5, u64 arg6);
> -void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +void bpf_trace_run7(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
>  		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
> -void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +void bpf_trace_run8(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
>  		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>  		    u64 arg8);
> -void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +void bpf_trace_run9(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
>  		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>  		    u64 arg8, u64 arg9);
> -void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +void bpf_trace_run10(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
>  		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>  		     u64 arg8, u64 arg9, u64 arg10);
> -void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +void bpf_trace_run11(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
>  		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>  		     u64 arg8, u64 arg9, u64 arg10, u64 arg11);
> -void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
> +void bpf_trace_run12(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
>  		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
>  		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
>  void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
> diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
> index e609cd7da47e..a2ea11cc912e 100644
> --- a/include/trace/bpf_probe.h
> +++ b/include/trace/bpf_probe.h
> @@ -46,8 +46,7 @@
>  static notrace void							\
>  __bpf_trace_##call(void *__data, proto)					\
>  {									\
> -	struct bpf_prog *prog = __data;					\
> -	CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(prog, CAST_TO_U64(args));	\
> +	CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args));	\
>  }
>  
>  #undef DECLARE_EVENT_CLASS
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index ae2ff73bde7e..1cb4c3809af4 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -3469,17 +3469,12 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
>  	return err;
>  }
>  
> -struct bpf_raw_tp_link {
> -	struct bpf_link link;
> -	struct bpf_raw_event_map *btp;
> -};
> -
>  static void bpf_raw_tp_link_release(struct bpf_link *link)
>  {
>  	struct bpf_raw_tp_link *raw_tp =
>  		container_of(link, struct bpf_raw_tp_link, link);
>  
> -	bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
> +	bpf_probe_unregister(raw_tp->btp, raw_tp);
>  	bpf_put_raw_tracepoint(raw_tp->btp);
>  }
>  
> @@ -3833,7 +3828,7 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog,
>  		goto out_put_btp;
>  	}
>  
> -	err = bpf_probe_register(link->btp, prog);
> +	err = bpf_probe_register(link->btp, link);
>  	if (err) {
>  		bpf_link_cleanup(&link_primer);
>  		goto out_put_btp;
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 30ecf62f8a17..17de91ad4a1f 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -2366,8 +2366,10 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
>  }
>  
>  static __always_inline
> -void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
> +void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
>  {
> +	struct bpf_prog *prog = link->link.prog;
> +
>  	cant_sleep();
>  	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
>  		bpf_prog_inc_misses_counter(prog);
> @@ -2404,12 +2406,12 @@ void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
>  #define __SEQ_0_11	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
>  
>  #define BPF_TRACE_DEFN_x(x)						\
> -	void bpf_trace_run##x(struct bpf_prog *prog,			\
> +	void bpf_trace_run##x(struct bpf_raw_tp_link *link,		\
>  			      REPEAT(x, SARG, __DL_COM, __SEQ_0_11))	\
>  	{								\
>  		u64 args[x];						\
>  		REPEAT(x, COPY, __DL_SEM, __SEQ_0_11);			\
> -		__bpf_trace_run(prog, args);				\
> +		__bpf_trace_run(link, args);				\
>  	}								\
>  	EXPORT_SYMBOL_GPL(bpf_trace_run##x)
>  BPF_TRACE_DEFN_x(1);
> @@ -2425,9 +2427,10 @@ BPF_TRACE_DEFN_x(10);
>  BPF_TRACE_DEFN_x(11);
>  BPF_TRACE_DEFN_x(12);
>  
> -int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
> +int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
>  {
>  	struct tracepoint *tp = btp->tp;
> +	struct bpf_prog *prog = link->link.prog;
>  
>  	/*
>  	 * check that program doesn't access arguments beyond what's
> @@ -2439,13 +2442,12 @@ int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
>  	if (prog->aux->max_tp_access > btp->writable_size)
>  		return -EINVAL;
>  
> -	return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func,
> -						   prog);
> +	return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link);
>  }
>  
> -int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
> +int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
>  {
> -	return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
> +	return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, link);
>  }
>  
>  int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
> -- 
> 2.43.0
>
Andrii Nakryiko April 18, 2024, 8:52 p.m. UTC | #2
On Tue, Apr 9, 2024 at 1:41 AM Pengfei Xu <pengfei.xu@intel.com> wrote:
>
> Hi Andrii Nakryiko,
>
> Greeting!
>
> On 2024-03-19 at 16:38:49 -0700, Andrii Nakryiko wrote:
> > Instead of passing prog as an argument to bpf_trace_runX() helpers, that
> > are called from tracepoint triggering calls, store BPF link itself
> > (struct bpf_raw_tp_link for raw tracepoints). This will allow to pass
> > extra information like BPF cookie into raw tracepoint registration.
> >
> > Instead of replacing `struct bpf_prog *prog = __data;` with
> > corresponding `struct bpf_raw_tp_link *link = __data;` assignment in
> > `__bpf_trace_##call` I just passed `__data` through into underlying
> > bpf_trace_runX() call. This works well because we implicitly cast `void *`,
> > and it also avoids naming clashes with arguments coming from
> > tracepoint's "proto" list. We could have run into the same problem with
> > "prog", we just happened to not have a tracepoint that has "prog" input
> > argument. We are less lucky with "link", as there are tracepoints using
> > "link" argument name already. So instead of trying to avoid naming
> > conflicts, let's just remove intermediate local variable. It doesn't
> > hurt readibility, it's either way a bit of a maze of calls and macros,
> > that requires careful reading.
> >
> > Acked-by: Stanislav Fomichev <sdf@google.com>
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >  include/linux/bpf.h          |  5 +++++
> >  include/linux/trace_events.h | 36 ++++++++++++++++++++----------------
> >  include/trace/bpf_probe.h    |  3 +--
> >  kernel/bpf/syscall.c         |  9 ++-------
> >  kernel/trace/bpf_trace.c     | 18 ++++++++++--------
> >  5 files changed, 38 insertions(+), 33 deletions(-)
> >
>
> I used syzkaller and test intel internal kernel and found "KASAN:
> slab-use-after-free Read in bpf_trace_run4" problem.
>
> Bisected and found related commit:
> d4dfc5700e86 bpf: pass whole link instead of prog when triggering raw tracepoint

I think [0] is fixing this problem, it landed into bpf tree

  [0] https://lore.kernel.org/all/20240328052426.3042617-2-andrii@kernel.org/

>
> Checked that the commit above is the same as this commit.
>
> All detailed info:https://github.com/xupengfe/syzkaller_logs/tree/main/240409_092216_bpf_trace_run4
> Syzkaller repro code: https://github.com/xupengfe/syzkaller_logs/blob/main/240409_092216_bpf_trace_run4/repro.c
> Syzkaller syscall repro steps: https://github.com/xupengfe/syzkaller_logs/blob/main/240409_092216_bpf_trace_run4/repro.prog
> Kconfig(make olddefconfig): https://github.com/xupengfe/syzkaller_logs/blob/main/240409_092216_bpf_trace_run4/kconfig_origin
> Bisect info: https://github.com/xupengfe/syzkaller_logs/blob/main/240409_092216_bpf_trace_run4/bisect_info.log
> issue_bzImage: https://github.com/xupengfe/syzkaller_logs/raw/main/240409_092216_bpf_trace_run4/bzImage_v6.9-rc2_next.tar.gz
> issue dmesg: https://github.com/xupengfe/syzkaller_logs/blob/main/240409_092216_bpf_trace_run4/5d8569db0cb982d3c630482c285578e98a75fc90_dmesg.log
>
> "
> [   24.977435] ==================================================================
> [   24.978307] BUG: KASAN: slab-use-after-free in bpf_trace_run4+0x547/0x5e0
> [   24.979138] Read of size 8 at addr ffff888015676218 by task rcu_preempt/16
> [   24.979936]
> [   24.980152] CPU: 0 PID: 16 Comm: rcu_preempt Not tainted 6.9.0-rc2-5d8569db0cb9+ #1
> [   24.981040] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
> [   24.982352] Call Trace:
> [   24.982672]  <TASK>
> [   24.982952]  dump_stack_lvl+0xe9/0x150
> [   24.983449]  print_report+0xd0/0x610
> [   24.983904]  ? bpf_trace_run4+0x547/0x5e0
> [   24.984393]  ? kasan_complete_mode_report_info+0x80/0x200
> [   24.985039]  ? bpf_trace_run4+0x547/0x5e0
> [   24.985528]  kasan_report+0x9f/0xe0
> [   24.985961]  ? bpf_trace_run4+0x547/0x5e0
> [   24.986457]  __asan_report_load8_noabort+0x18/0x20
> [   24.987055]  bpf_trace_run4+0x547/0x5e0
> [   24.987532]  ? __pfx_bpf_trace_run4+0x10/0x10
> [   24.988061]  ? __this_cpu_preempt_check+0x20/0x30
> [   24.988670]  ? lock_is_held_type+0xe5/0x140
> [   24.989185]  ? set_next_entity+0x38c/0x630
> [   24.989698]  ? put_prev_entity+0x50/0x1f0
> [   24.990199]  __bpf_trace_sched_switch+0x14/0x20
> [   24.990776]  __traceiter_sched_switch+0x7a/0xd0
> [   24.991293]  __schedule+0xc6d/0x2840
> [   24.991721]  ? __pfx___schedule+0x10/0x10
> [   24.992170]  ? lock_release+0x3f6/0x790
> [   24.992616]  ? __this_cpu_preempt_check+0x20/0x30
> [   24.993140]  ? schedule+0x1f3/0x290
> [   24.993536]  ? __pfx_lock_release+0x10/0x10
> [   24.994003]  ? _raw_spin_unlock_irqrestore+0x39/0x70
> [   24.994561]  ? schedule_timeout+0x559/0x940
> [   24.995021]  ? __this_cpu_preempt_check+0x20/0x30
> [   24.995548]  schedule+0xcf/0x290
> [   24.995922]  schedule_timeout+0x55e/0x940
> [   24.996369]  ? __pfx_schedule_timeout+0x10/0x10
> [   24.996870]  ? prepare_to_swait_event+0xff/0x450
> [   24.997401]  ? prepare_to_swait_event+0xc4/0x450
> [   24.997916]  ? __this_cpu_preempt_check+0x20/0x30
> [   24.998445]  ? __pfx_process_timeout+0x10/0x10
> [   24.998971]  ? tcp_get_idx+0xd0/0x270
> [   24.999408]  ? prepare_to_swait_event+0xff/0x450
> [   24.999934]  rcu_gp_fqs_loop+0x661/0xa70
> [   25.000399]  ? __pfx_rcu_gp_fqs_loop+0x10/0x10
> [   25.000913]  ? __pfx_rcu_gp_init+0x10/0x10
> [   25.001381]  rcu_gp_kthread+0x25e/0x360
> [   25.001822]  ? __pfx_rcu_gp_kthread+0x10/0x10
> [   25.002324]  ? __sanitizer_cov_trace_const_cmp1+0x1e/0x30
> [   25.002966]  ? __kthread_parkme+0x146/0x220
> [   25.003472]  ? __pfx_rcu_gp_kthread+0x10/0x10
> [   25.003995]  kthread+0x354/0x470
> [   25.004400]  ? __pfx_kthread+0x10/0x10
> [   25.004862]  ret_from_fork+0x57/0x90
> [   25.005315]  ? __pfx_kthread+0x10/0x10
> [   25.005778]  ret_from_fork_asm+0x1a/0x30
> [   25.006282]  </TASK>
> [   25.006560]
> [   25.006773] Allocated by task 732:
> [   25.007187]  kasan_save_stack+0x2a/0x50
> [   25.007660]  kasan_save_track+0x18/0x40
> [   25.008124]  kasan_save_alloc_info+0x3b/0x50
> [   25.008649]  __kasan_kmalloc+0x86/0xa0
> [   25.009107]  kmalloc_trace+0x1c5/0x3d0
> [   25.009599]  bpf_raw_tp_link_attach+0x28e/0x5a0
> [   25.010163]  __sys_bpf+0x452/0x5550
> [   25.010599]  __x64_sys_bpf+0x7e/0xc0
> [   25.011054]  do_syscall_64+0x73/0x150
> [   25.011523]  entry_SYSCALL_64_after_hwframe+0x71/0x79
> [   25.012156]
> [   25.012360] Freed by task 732:
> [   25.012740]  kasan_save_stack+0x2a/0x50
> [   25.013211]  kasan_save_track+0x18/0x40
> [   25.013689]  kasan_save_free_info+0x3e/0x60
> [   25.014198]  __kasan_slab_free+0x107/0x190
> [   25.014694]  kfree+0xf3/0x320
> [   25.015085]  bpf_raw_tp_link_dealloc+0x1e/0x30
> [   25.015632]  bpf_link_free+0x145/0x1b0
> [   25.016094]  bpf_link_put_direct+0x45/0x60
> [   25.016593]  bpf_link_release+0x40/0x50
> [   25.017064]  __fput+0x273/0xb70
> [   25.017489]  ____fput+0x1e/0x30
> [   25.017890]  task_work_run+0x1a3/0x2d0
> [   25.018356]  do_exit+0xad3/0x31b0
> [   25.018800]  do_group_exit+0xdf/0x2b0
> [   25.019256]  __x64_sys_exit_group+0x47/0x50
> [   25.019763]  do_syscall_64+0x73/0x150
> [   25.020215]  entry_SYSCALL_64_after_hwframe+0x71/0x79
> [   25.020830]
> [   25.021036] The buggy address belongs to the object at ffff888015676200
> [   25.021036]  which belongs to the cache kmalloc-128 of size 128
> [   25.022465] The buggy address is located 24 bytes inside of
> [   25.022465]  freed 128-byte region [ffff888015676200, ffff888015676280)
> [   25.023780]
> [   25.023970] The buggy address belongs to the physical page:
> [   25.024563] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x15676
> [   25.025401] flags: 0xfffffe0000800(slab|node=0|zone=1|lastcpupid=0x3fffff)
> [   25.026140] page_type: 0xffffffff()
> [   25.026535] raw: 000fffffe0000800 ffff88800a0418c0 dead000000000122 0000000000000000
> [   25.027363] raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
> [   25.028182] page dumped because: kasan: bad access detected
> [   25.028773]
> [   25.028957] Memory state around the buggy address:
> [   25.029476]  ffff888015676100: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [   25.030244]  ffff888015676180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> [   25.031018] >ffff888015676200: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [   25.031780]                             ^
> [   25.032221]  ffff888015676280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> [   25.032992]  ffff888015676300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [   25.033769] ==================================================================
> [   25.034571] Disabling lock debugging due to kernel taint
> "
>
> Could you take a look is it useful?
>
> Thanks!
>
> ---
>
> If you don't need the following environment to reproduce the problem or if you
> already have one reproduced environment, please ignore the following information.
>
> How to reproduce:
> git clone https://gitlab.com/xupengfe/repro_vm_env.git
> cd repro_vm_env
> tar -xvf repro_vm_env.tar.gz
> cd repro_vm_env; ./start3.sh  // it needs qemu-system-x86_64 and I used v7.1.0
>   // start3.sh will load bzImage_2241ab53cbb5cdb08a6b2d4688feb13971058f65 v6.2-rc5 kernel
>   // You could change the bzImage_xxx as you want
>   // Maybe you need to remove line "-drive if=pflash,format=raw,readonly=on,file=./OVMF_CODE.fd \" for different qemu version
> You could use below command to log in, there is no password for root.
> ssh -p 10023 root@localhost
>
> After login vm(virtual machine) successfully, you could transfer reproduced
> binary to the vm by below way, and reproduce the problem in vm:
> gcc -pthread -o repro repro.c
> scp -P 10023 repro root@localhost:/root/
>
> Get the bzImage for target kernel:
> Please use target kconfig and copy it to kernel_src/.config
> make olddefconfig
> make -jx bzImage           //x should equal or less than cpu num your pc has
>
> Fill the bzImage file into above start3.sh to load the target kernel in vm.
>
>
> Tips:
> If you already have qemu-system-x86_64, please ignore below info.
> If you want to install qemu v7.1.0 version:
> git clone https://github.com/qemu/qemu.git
> cd qemu
> git checkout -f v7.1.0
> mkdir build
> cd build
> yum install -y ninja-build.x86_64
> yum -y install libslirp-devel.x86_64
> ../configure --target-list=x86_64-softmmu --enable-kvm --enable-vnc --enable-gtk --enable-sdl --enable-usb-redir --enable-slirp
> make
> make install
>
> Best Regards,
> Thanks!
>

[...]
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 17843e66a1d3..2ea8ce59f582 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1607,6 +1607,11 @@  struct bpf_tracing_link {
 	struct bpf_prog *tgt_prog;
 };
 
+struct bpf_raw_tp_link {
+	struct bpf_link link;
+	struct bpf_raw_event_map *btp;
+};
+
 struct bpf_link_primer {
 	struct bpf_link *link;
 	struct file *file;
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index d68ff9b1247f..a7fc6fb6de3c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -759,8 +759,11 @@  unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
 int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
 void perf_event_detach_bpf_prog(struct perf_event *event);
 int perf_event_query_prog_array(struct perf_event *event, void __user *info);
-int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
-int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
+
+struct bpf_raw_tp_link;
+int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link);
+int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link);
+
 struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
 void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
@@ -788,11 +791,12 @@  perf_event_query_prog_array(struct perf_event *event, void __user *info)
 {
 	return -EOPNOTSUPP;
 }
-static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *p)
+struct bpf_raw_tp_link;
+static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
 {
 	return -EOPNOTSUPP;
 }
-static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *p)
+static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
 {
 	return -EOPNOTSUPP;
 }
@@ -903,31 +907,31 @@  void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
 int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
 void perf_event_free_bpf_prog(struct perf_event *event);
 
-void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
-void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
-void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
+void bpf_trace_run1(struct bpf_raw_tp_link *link, u64 arg1);
+void bpf_trace_run2(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2);
+void bpf_trace_run3(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
 		    u64 arg3);
-void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
+void bpf_trace_run4(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
 		    u64 arg3, u64 arg4);
-void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
+void bpf_trace_run5(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
 		    u64 arg3, u64 arg4, u64 arg5);
-void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
+void bpf_trace_run6(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
 		    u64 arg3, u64 arg4, u64 arg5, u64 arg6);
-void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
+void bpf_trace_run7(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
 		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
-void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
+void bpf_trace_run8(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
 		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
 		    u64 arg8);
-void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
+void bpf_trace_run9(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
 		    u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
 		    u64 arg8, u64 arg9);
-void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
+void bpf_trace_run10(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
 		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
 		     u64 arg8, u64 arg9, u64 arg10);
-void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
+void bpf_trace_run11(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
 		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
 		     u64 arg8, u64 arg9, u64 arg10, u64 arg11);
-void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
+void bpf_trace_run12(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2,
 		     u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
 		     u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
 void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index e609cd7da47e..a2ea11cc912e 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -46,8 +46,7 @@ 
 static notrace void							\
 __bpf_trace_##call(void *__data, proto)					\
 {									\
-	struct bpf_prog *prog = __data;					\
-	CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(prog, CAST_TO_U64(args));	\
+	CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args));	\
 }
 
 #undef DECLARE_EVENT_CLASS
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ae2ff73bde7e..1cb4c3809af4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3469,17 +3469,12 @@  static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 	return err;
 }
 
-struct bpf_raw_tp_link {
-	struct bpf_link link;
-	struct bpf_raw_event_map *btp;
-};
-
 static void bpf_raw_tp_link_release(struct bpf_link *link)
 {
 	struct bpf_raw_tp_link *raw_tp =
 		container_of(link, struct bpf_raw_tp_link, link);
 
-	bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
+	bpf_probe_unregister(raw_tp->btp, raw_tp);
 	bpf_put_raw_tracepoint(raw_tp->btp);
 }
 
@@ -3833,7 +3828,7 @@  static int bpf_raw_tp_link_attach(struct bpf_prog *prog,
 		goto out_put_btp;
 	}
 
-	err = bpf_probe_register(link->btp, prog);
+	err = bpf_probe_register(link->btp, link);
 	if (err) {
 		bpf_link_cleanup(&link_primer);
 		goto out_put_btp;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 30ecf62f8a17..17de91ad4a1f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2366,8 +2366,10 @@  void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
 }
 
 static __always_inline
-void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
+void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
 {
+	struct bpf_prog *prog = link->link.prog;
+
 	cant_sleep();
 	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
 		bpf_prog_inc_misses_counter(prog);
@@ -2404,12 +2406,12 @@  void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
 #define __SEQ_0_11	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
 
 #define BPF_TRACE_DEFN_x(x)						\
-	void bpf_trace_run##x(struct bpf_prog *prog,			\
+	void bpf_trace_run##x(struct bpf_raw_tp_link *link,		\
 			      REPEAT(x, SARG, __DL_COM, __SEQ_0_11))	\
 	{								\
 		u64 args[x];						\
 		REPEAT(x, COPY, __DL_SEM, __SEQ_0_11);			\
-		__bpf_trace_run(prog, args);				\
+		__bpf_trace_run(link, args);				\
 	}								\
 	EXPORT_SYMBOL_GPL(bpf_trace_run##x)
 BPF_TRACE_DEFN_x(1);
@@ -2425,9 +2427,10 @@  BPF_TRACE_DEFN_x(10);
 BPF_TRACE_DEFN_x(11);
 BPF_TRACE_DEFN_x(12);
 
-int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
+int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
 {
 	struct tracepoint *tp = btp->tp;
+	struct bpf_prog *prog = link->link.prog;
 
 	/*
 	 * check that program doesn't access arguments beyond what's
@@ -2439,13 +2442,12 @@  int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
 	if (prog->aux->max_tp_access > btp->writable_size)
 		return -EINVAL;
 
-	return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func,
-						   prog);
+	return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link);
 }
 
-int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
+int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
 {
-	return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
+	return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, link);
 }
 
 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,