diff mbox series

[bpf-next,v5,2/5] bpf, x86: Create bpf_tramp_run_ctx on the caller thread's stack

Message ID 20220412165555.4146407-3-kuifeng@fb.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Attach a cookie to a tracing program. | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 fail Logs for Kernel LATEST on ubuntu-latest + selftests
bpf/vmtest-bpf-next-VM_Test-2 fail Logs for Kernel LATEST on z15 + selftests
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1459 this patch: 1459
netdev/cc_maintainers warning 15 maintainers not CCed: songliubraving@fb.com davem@davemloft.net hpa@zytor.com netdev@vger.kernel.org x86@kernel.org yoshfuji@linux-ipv6.org dsahern@kernel.org dave.hansen@linux.intel.com kafai@fb.com bp@alien8.de john.fastabend@gmail.com yhs@fb.com mingo@redhat.com tglx@linutronix.de kpsingh@kernel.org
netdev/build_clang success Errors and warnings before: 173 this patch: 173
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1466 this patch: 1466
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Kui-Feng Lee April 12, 2022, 4:55 p.m. UTC
BPF trampolines will create a bpf_tramp_run_ctx, a bpf_run_ctx, on
stacks and set/reset the current bpf_run_ctx before/after calling a
bpf_prog.

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
---
 arch/x86/net/bpf_jit_comp.c | 55 +++++++++++++++++++++++++++++++++++++
 include/linux/bpf.h         | 17 +++++++++---
 kernel/bpf/syscall.c        |  7 +++--
 kernel/bpf/trampoline.c     | 20 +++++++++++---
 4 files changed, 89 insertions(+), 10 deletions(-)

Comments

Andrii Nakryiko April 13, 2022, 2:55 a.m. UTC | #1
On Tue, Apr 12, 2022 at 9:56 AM Kui-Feng Lee <kuifeng@fb.com> wrote:
>
> BPF trampolines will create a bpf_tramp_run_ctx, a bpf_run_ctx, on
> stacks and set/reset the current bpf_run_ctx before/after calling a
> bpf_prog.
>
> Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
> ---
>  arch/x86/net/bpf_jit_comp.c | 55 +++++++++++++++++++++++++++++++++++++
>  include/linux/bpf.h         | 17 +++++++++---
>  kernel/bpf/syscall.c        |  7 +++--
>  kernel/bpf/trampoline.c     | 20 +++++++++++---
>  4 files changed, 89 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 4dcc0b1ac770..0f521be68f7b 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -1766,10 +1766,26 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
>  {
>         u8 *prog = *pprog;
>         u8 *jmp_insn;
> +       int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
>         struct bpf_prog *p = l->link.prog;
>
> +       /* mov rdi, 0 */
> +       emit_mov_imm64(&prog, BPF_REG_1, 0, 0);
> +
> +       /* Prepare struct bpf_tramp_run_ctx.
> +        *
> +        * bpf_tramp_run_ctx is already preserved by
> +        * arch_prepare_bpf_trampoline().
> +        *
> +        * mov QWORD PTR [rsp + ctx_cookie_off], rdi
> +        */
> +       EMIT4(0x48, 0x89, 0x7C, 0x24); EMIT1(ctx_cookie_off);
> +
>         /* arg1: mov rdi, progs[i] */
>         emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
> +       /* arg2: mov rsi, rsp (struct bpf_run_ctx *) */
> +       EMIT3(0x48, 0x89, 0xE6);
> +
>         if (emit_call(&prog,
>                       p->aux->sleepable ? __bpf_prog_enter_sleepable :
>                       __bpf_prog_enter, prog))
> @@ -1815,6 +1831,8 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
>         emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
>         /* arg2: mov rsi, rbx <- start time in nsec */
>         emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
> +       /* arg3: mov rdx, rsp (struct bpf_run_ctx *) */
> +       EMIT3(0x48, 0x89, 0xE2);
>         if (emit_call(&prog,
>                       p->aux->sleepable ? __bpf_prog_exit_sleepable :
>                       __bpf_prog_exit, prog))
> @@ -2079,6 +2097,16 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>                 }
>         }
>
> +       if (nr_args < 3 && (fentry->nr_links || fexit->nr_links || fmod_ret->nr_links))
> +               EMIT1(0x52);    /* push rdx */

this nr_args < 3 condition is new, maybe leave a comment on why we
need this? Also instead of repeating this whole (fentry->nr_links ||
... || ...) check, why not move if (nr_args < 3) inside the if right
below?

> +
> +       if (fentry->nr_links || fexit->nr_links || fmod_ret->nr_links) {

if (nr_args > 3) here?

> +               /* Prepare struct bpf_tramp_run_ctx.
> +                * sub rsp, sizeof(struct bpf_tramp_run_ctx)
> +                */
> +               EMIT4(0x48, 0x83, 0xEC, sizeof(struct bpf_tramp_run_ctx));
> +       }
> +
>         if (fentry->nr_links)
>                 if (invoke_bpf(m, &prog, fentry, regs_off,
>                                flags & BPF_TRAMP_F_RET_FENTRY_RET))

[...]

>         if (fmod_ret->nr_links) {
> @@ -2133,6 +2179,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>                         goto cleanup;
>                 }
>
> +       /* pop struct bpf_tramp_run_ctx
> +        * add rsp, sizeof(struct bpf_tramp_run_ctx)
> +        */
> +       if (fentry->nr_links || fexit->nr_links || fmod_ret->nr_links)

well, actually, can it ever be that this condition doesn't hold? That
would mean we are generating empty trampoline for some reason, no? Do
we do that? Checking bpf_trampoline_update() and
bpf_struct_ops_prepare_trampoline() doesn't seem like we ever do this.
So seems like all these checks can be dropped?

> +               EMIT4(0x48, 0x83, 0xC4, sizeof(struct bpf_tramp_run_ctx));
> +
> +       if (nr_args < 3 && (fentry->nr_links || fexit->nr_links || fmod_ret->nr_links))
> +               EMIT1(0x5A); /* pop rdx */

same, move it inside if above?

> +
>         if (flags & BPF_TRAMP_F_RESTORE_REGS)
>                 restore_regs(m, &prog, nr_args, regs_off);
>

[...]
Kui-Feng Lee April 13, 2022, 6:06 p.m. UTC | #2
On Tue, 2022-04-12 at 19:55 -0700, Andrii Nakryiko wrote:
> On Tue, Apr 12, 2022 at 9:56 AM Kui-Feng Lee <kuifeng@fb.com> wrote:
> > 
> > BPF trampolines will create a bpf_tramp_run_ctx, a bpf_run_ctx, on
> > stacks and set/reset the current bpf_run_ctx before/after calling a
> > bpf_prog.
> > 
> > Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
> > ---
> >  arch/x86/net/bpf_jit_comp.c | 55
> > +++++++++++++++++++++++++++++++++++++
> >  include/linux/bpf.h         | 17 +++++++++---
> >  kernel/bpf/syscall.c        |  7 +++--
> >  kernel/bpf/trampoline.c     | 20 +++++++++++---
> >  4 files changed, 89 insertions(+), 10 deletions(-)
> > 
> > diff --git a/arch/x86/net/bpf_jit_comp.c
> > b/arch/x86/net/bpf_jit_comp.c
> > index 4dcc0b1ac770..0f521be68f7b 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -1766,10 +1766,26 @@ static int invoke_bpf_prog(const struct
> > btf_func_model *m, u8 **pprog,
> >  {
> >         u8 *prog = *pprog;
> >         u8 *jmp_insn;
> > +       int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx,
> > bpf_cookie);
> >         struct bpf_prog *p = l->link.prog;
> > 
> > +       /* mov rdi, 0 */
> > +       emit_mov_imm64(&prog, BPF_REG_1, 0, 0);
> > +
> > +       /* Prepare struct bpf_tramp_run_ctx.
> > +        *
> > +        * bpf_tramp_run_ctx is already preserved by
> > +        * arch_prepare_bpf_trampoline().
> > +        *
> > +        * mov QWORD PTR [rsp + ctx_cookie_off], rdi
> > +        */
> > +       EMIT4(0x48, 0x89, 0x7C, 0x24); EMIT1(ctx_cookie_off);
> > +
> >         /* arg1: mov rdi, progs[i] */
> >         emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32)
> > (long) p);
> > +       /* arg2: mov rsi, rsp (struct bpf_run_ctx *) */
> > +       EMIT3(0x48, 0x89, 0xE6);
> > +
> >         if (emit_call(&prog,
> >                       p->aux->sleepable ?
> > __bpf_prog_enter_sleepable :
> >                       __bpf_prog_enter, prog))
> > @@ -1815,6 +1831,8 @@ static int invoke_bpf_prog(const struct
> > btf_func_model *m, u8 **pprog,
> >         emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32)
> > (long) p);
> >         /* arg2: mov rsi, rbx <- start time in nsec */
> >         emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
> > +       /* arg3: mov rdx, rsp (struct bpf_run_ctx *) */
> > +       EMIT3(0x48, 0x89, 0xE2);
> >         if (emit_call(&prog,
> >                       p->aux->sleepable ? __bpf_prog_exit_sleepable
> > :
> >                       __bpf_prog_exit, prog))
> > @@ -2079,6 +2097,16 @@ int arch_prepare_bpf_trampoline(struct
> > bpf_tramp_image *im, void *image, void *i
> >                 }
> >         }
> > 
> > +       if (nr_args < 3 && (fentry->nr_links || fexit->nr_links ||
> > fmod_ret->nr_links))
> > +               EMIT1(0x52);    /* push rdx */
> 
> this nr_args < 3 condition is new, maybe leave a comment on why we
> need this? Also instead of repeating this whole (fentry->nr_links ||
> ... || ...) check, why not move if (nr_args < 3) inside the if right
> below?
> 
> > +
> > +       if (fentry->nr_links || fexit->nr_links || fmod_ret-
> > >nr_links) {
> 
> if (nr_args > 3) here?
> 
> > +               /* Prepare struct bpf_tramp_run_ctx.
> > +                * sub rsp, sizeof(struct bpf_tramp_run_ctx)
> > +                */
> > +               EMIT4(0x48, 0x83, 0xEC, sizeof(struct
> > bpf_tramp_run_ctx));
> > +       }
> > +
> >         if (fentry->nr_links)
> >                 if (invoke_bpf(m, &prog, fentry, regs_off,
> >                                flags & BPF_TRAMP_F_RET_FENTRY_RET))
> 
> [...]
> 
> >         if (fmod_ret->nr_links) {
> > @@ -2133,6 +2179,15 @@ int arch_prepare_bpf_trampoline(struct
> > bpf_tramp_image *im, void *image, void *i
> >                         goto cleanup;
> >                 }
> > 
> > +       /* pop struct bpf_tramp_run_ctx
> > +        * add rsp, sizeof(struct bpf_tramp_run_ctx)
> > +        */
> > +       if (fentry->nr_links || fexit->nr_links || fmod_ret-
> > >nr_links)
> 
> well, actually, can it ever be that this condition doesn't hold? That
> would mean we are generating empty trampoline for some reason, no? Do
> we do that? Checking bpf_trampoline_update() and
> bpf_struct_ops_prepare_trampoline() doesn't seem like we ever do
> this.
> So seems like all these checks can be dropped?

You are right.  I had added this check for doing the following line
only for some cases, and didn't aware the check is no more useful after
changing the way of doing it.


> > +               EMIT4(0x48, 0x83, 0xC4, sizeof(struct
> > bpf_tramp_run_ctx));
> > +
> > +       if (nr_args < 3 && (fentry->nr_links || fexit->nr_links ||
> > fmod_ret->nr_links))
> > +               EMIT1(0x5A); /* pop rdx */
> 
> same, move it inside if above?
> 
> > +
> >         if (flags & BPF_TRAMP_F_RESTORE_REGS)
> >                 restore_regs(m, &prog, nr_args, regs_off);
> > 
> 
> [...]
Kui-Feng Lee April 13, 2022, 8:14 p.m. UTC | #3
On Tue, 2022-04-12 at 19:55 -0700, Andrii Nakryiko wrote:
> On Tue, Apr 12, 2022 at 9:56 AM Kui-Feng Lee <kuifeng@fb.com> wrote:
> > 
> > BPF trampolines will create a bpf_tramp_run_ctx, a bpf_run_ctx, on
> > stacks and set/reset the current bpf_run_ctx before/after calling a
> > bpf_prog.
> > 
> > Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
> > ---
> >  arch/x86/net/bpf_jit_comp.c | 55
> > +++++++++++++++++++++++++++++++++++++
> >  include/linux/bpf.h         | 17 +++++++++---
> >  kernel/bpf/syscall.c        |  7 +++--
> >  kernel/bpf/trampoline.c     | 20 +++++++++++---
> >  4 files changed, 89 insertions(+), 10 deletions(-)
> > 
> > diff --git a/arch/x86/net/bpf_jit_comp.c
> > b/arch/x86/net/bpf_jit_comp.c
> > index 4dcc0b1ac770..0f521be68f7b 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -1766,10 +1766,26 @@ static int invoke_bpf_prog(const struct
> > btf_func_model *m, u8 **pprog,
> >  {
> >         u8 *prog = *pprog;
> >         u8 *jmp_insn;
> > +       int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx,
> > bpf_cookie);
> >         struct bpf_prog *p = l->link.prog;
> > 
> > +       /* mov rdi, 0 */
> > +       emit_mov_imm64(&prog, BPF_REG_1, 0, 0);
> > +
> > +       /* Prepare struct bpf_tramp_run_ctx.
> > +        *
> > +        * bpf_tramp_run_ctx is already preserved by
> > +        * arch_prepare_bpf_trampoline().
> > +        *
> > +        * mov QWORD PTR [rsp + ctx_cookie_off], rdi
> > +        */
> > +       EMIT4(0x48, 0x89, 0x7C, 0x24); EMIT1(ctx_cookie_off);
> > +
> >         /* arg1: mov rdi, progs[i] */
> >         emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32)
> > (long) p);
> > +       /* arg2: mov rsi, rsp (struct bpf_run_ctx *) */
> > +       EMIT3(0x48, 0x89, 0xE6);
> > +
> >         if (emit_call(&prog,
> >                       p->aux->sleepable ?
> > __bpf_prog_enter_sleepable :
> >                       __bpf_prog_enter, prog))
> > @@ -1815,6 +1831,8 @@ static int invoke_bpf_prog(const struct
> > btf_func_model *m, u8 **pprog,
> >         emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32)
> > (long) p);
> >         /* arg2: mov rsi, rbx <- start time in nsec */
> >         emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
> > +       /* arg3: mov rdx, rsp (struct bpf_run_ctx *) */
> > +       EMIT3(0x48, 0x89, 0xE2);
> >         if (emit_call(&prog,
> >                       p->aux->sleepable ? __bpf_prog_exit_sleepable
> > :
> >                       __bpf_prog_exit, prog))
> > @@ -2079,6 +2097,16 @@ int arch_prepare_bpf_trampoline(struct
> > bpf_tramp_image *im, void *image, void *i
> >                 }
> >         }
> > 
> > +       if (nr_args < 3 && (fentry->nr_links || fexit->nr_links ||
> > fmod_ret->nr_links))
> > +               EMIT1(0x52);    /* push rdx */
> 
> this nr_args < 3 condition is new, maybe leave a comment on why we
> need this? Also instead of repeating this whole (fentry->nr_links ||
> ... || ...) check, why not move if (nr_args < 3) inside the if right
> below?

I thought rdx is a nonvolatile (callee-saved) register.  Checking ABI
again, I was wrong.  I am removing this part.

> 
> > +
> > +       if (fentry->nr_links || fexit->nr_links || fmod_ret-
> > >nr_links) {
> 
> if (nr_args > 3) here?
> 
> > +               /* Prepare struct bpf_tramp_run_ctx.
> > +                * sub rsp, sizeof(struct bpf_tramp_run_ctx)
> > +                */
> > +               EMIT4(0x48, 0x83, 0xEC, sizeof(struct
> > bpf_tramp_run_ctx));
> > +       }
> > +
> >         if (fentry->nr_links)
> >                 if (invoke_bpf(m, &prog, fentry, regs_off,
> >                                flags & BPF_TRAMP_F_RET_FENTRY_RET))
> 
> [...]
> 
> >         if (fmod_ret->nr_links) {
> > @@ -2133,6 +2179,15 @@ int arch_prepare_bpf_trampoline(struct
> > bpf_tramp_image *im, void *image, void *i
> >                         goto cleanup;
> >                 }
> > 
> > +       /* pop struct bpf_tramp_run_ctx
> > +        * add rsp, sizeof(struct bpf_tramp_run_ctx)
> > +        */
> > +       if (fentry->nr_links || fexit->nr_links || fmod_ret-
> > >nr_links)
> 
> well, actually, can it ever be that this condition doesn't hold? That
> would mean we are generating empty trampoline for some reason, no? Do
> we do that? Checking bpf_trampoline_update() and
> bpf_struct_ops_prepare_trampoline() doesn't seem like we ever do
> this.
> So seems like all these checks can be dropped?
> 
> > +               EMIT4(0x48, 0x83, 0xC4, sizeof(struct
> > bpf_tramp_run_ctx));
> > +
> > +       if (nr_args < 3 && (fentry->nr_links || fexit->nr_links ||
> > fmod_ret->nr_links))
> > +               EMIT1(0x5A); /* pop rdx */
> 
> same, move it inside if above?
> 
> > +
> >         if (flags & BPF_TRAMP_F_RESTORE_REGS)
> >                 restore_regs(m, &prog, nr_args, regs_off);
> > 
> 
> [...]
diff mbox series

Patch

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4dcc0b1ac770..0f521be68f7b 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1766,10 +1766,26 @@  static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 {
 	u8 *prog = *pprog;
 	u8 *jmp_insn;
+	int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
 	struct bpf_prog *p = l->link.prog;
 
+	/* mov rdi, 0 */
+	emit_mov_imm64(&prog, BPF_REG_1, 0, 0);
+
+	/* Prepare struct bpf_tramp_run_ctx.
+	 *
+	 * bpf_tramp_run_ctx is already preserved by
+	 * arch_prepare_bpf_trampoline().
+	 *
+	 * mov QWORD PTR [rsp + ctx_cookie_off], rdi
+	 */
+	EMIT4(0x48, 0x89, 0x7C, 0x24); EMIT1(ctx_cookie_off);
+
 	/* arg1: mov rdi, progs[i] */
 	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
+	/* arg2: mov rsi, rsp (struct bpf_run_ctx *) */
+	EMIT3(0x48, 0x89, 0xE6);
+
 	if (emit_call(&prog,
 		      p->aux->sleepable ? __bpf_prog_enter_sleepable :
 		      __bpf_prog_enter, prog))
@@ -1815,6 +1831,8 @@  static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
 	/* arg2: mov rsi, rbx <- start time in nsec */
 	emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+	/* arg3: mov rdx, rsp (struct bpf_run_ctx *) */
+	EMIT3(0x48, 0x89, 0xE2);
 	if (emit_call(&prog,
 		      p->aux->sleepable ? __bpf_prog_exit_sleepable :
 		      __bpf_prog_exit, prog))
@@ -2079,6 +2097,16 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 		}
 	}
 
+	if (nr_args < 3 && (fentry->nr_links || fexit->nr_links || fmod_ret->nr_links))
+		EMIT1(0x52);	/* push rdx */
+
+	if (fentry->nr_links || fexit->nr_links || fmod_ret->nr_links) {
+		/* Prepare struct bpf_tramp_run_ctx.
+		 * sub rsp, sizeof(struct bpf_tramp_run_ctx)
+		 */
+		EMIT4(0x48, 0x83, 0xEC, sizeof(struct bpf_tramp_run_ctx));
+	}
+
 	if (fentry->nr_links)
 		if (invoke_bpf(m, &prog, fentry, regs_off,
 			       flags & BPF_TRAMP_F_RET_FENTRY_RET))
@@ -2098,6 +2126,15 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	}
 
 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
+		/* pop struct bpf_tramp_run_ctx
+		 * add rsp, sizeof(struct bpf_tramp_run_ctx)
+		 */
+		if (fentry->nr_links || fexit->nr_links || fmod_ret->nr_links)
+			EMIT4(0x48, 0x83, 0xC4, sizeof(struct bpf_tramp_run_ctx));
+
+		if (nr_args < 3 && (fentry->nr_links || fexit->nr_links || fmod_ret->nr_links))
+			EMIT1(0x5A); /* pop rdx */
+
 		restore_regs(m, &prog, nr_args, regs_off);
 
 		/* call original function */
@@ -2110,6 +2147,15 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 		im->ip_after_call = prog;
 		memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
 		prog += X86_PATCH_SIZE;
+
+		if (nr_args < 3 && (fentry->nr_links || fexit->nr_links || fmod_ret->nr_links))
+			EMIT1(0x52);	/* push rdx */
+
+		/* Prepare struct bpf_tramp_run_ctx.
+		 * sub rsp, sizeof(struct bpf_tramp_run_ctx)
+		 */
+		if (fentry->nr_links || fexit->nr_links || fmod_ret->nr_links)
+			EMIT4(0x48, 0x83, 0xEC, sizeof(struct bpf_tramp_run_ctx));
 	}
 
 	if (fmod_ret->nr_links) {
@@ -2133,6 +2179,15 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 			goto cleanup;
 		}
 
+	/* pop struct bpf_tramp_run_ctx
+	 * add rsp, sizeof(struct bpf_tramp_run_ctx)
+	 */
+	if (fentry->nr_links || fexit->nr_links || fmod_ret->nr_links)
+		EMIT4(0x48, 0x83, 0xC4, sizeof(struct bpf_tramp_run_ctx));
+
+	if (nr_args < 3 && (fentry->nr_links || fexit->nr_links || fmod_ret->nr_links))
+		EMIT1(0x5A); /* pop rdx */
+
 	if (flags & BPF_TRAMP_F_RESTORE_REGS)
 		restore_regs(m, &prog, nr_args, regs_off);
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2f7909eb51ec..d87df049e6b1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -681,6 +681,8 @@  struct bpf_tramp_links {
 	int nr_links;
 };
 
+struct bpf_tramp_run_ctx;
+
 /* Different use cases for BPF trampoline:
  * 1. replace nop at the function entry (kprobe equivalent)
  *    flags = BPF_TRAMP_F_RESTORE_REGS
@@ -707,10 +709,11 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i
 				struct bpf_tramp_links *tlinks,
 				void *orig_call);
 /* these two functions are called from generated trampoline */
-u64 notrace __bpf_prog_enter(struct bpf_prog *prog);
-void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
-u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
-void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
+u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx);
+u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
+void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
+				       struct bpf_tramp_run_ctx *run_ctx);
 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
 void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
 
@@ -1304,6 +1307,12 @@  struct bpf_trace_run_ctx {
 	u64 bpf_cookie;
 };
 
+struct bpf_tramp_run_ctx {
+	struct bpf_run_ctx run_ctx;
+	u64 bpf_cookie;
+	struct bpf_run_ctx *saved_run_ctx;
+};
+
 static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
 {
 	struct bpf_run_ctx *old_ctx = NULL;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3078c0c9317f..56e69a582b21 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4775,6 +4775,7 @@  static bool syscall_prog_is_valid_access(int off, int size,
 BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
 {
 	struct bpf_prog * __maybe_unused prog;
+	struct bpf_tramp_run_ctx __maybe_unused run_ctx;
 
 	switch (cmd) {
 	case BPF_MAP_CREATE:
@@ -4802,13 +4803,15 @@  BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
 			return -EINVAL;
 		}
 
-		if (!__bpf_prog_enter_sleepable(prog)) {
+		run_ctx.bpf_cookie = 0;
+		run_ctx.saved_run_ctx = NULL;
+		if (!__bpf_prog_enter_sleepable(prog, &run_ctx)) {
 			/* recursion detected */
 			bpf_prog_put(prog);
 			return -EBUSY;
 		}
 		attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in);
-		__bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */);
+		__bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, &run_ctx);
 		bpf_prog_put(prog);
 		return 0;
 #endif
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index d5e6bc5517cb..baf1b65d523e 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -568,11 +568,14 @@  static void notrace inc_misses_counter(struct bpf_prog *prog)
  * [2..MAX_U64] - execute bpf prog and record execution time.
  *     This is start time.
  */
-u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
+u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
 	__acquires(RCU)
 {
 	rcu_read_lock();
 	migrate_disable();
+
+	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
 	if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
 		inc_misses_counter(prog);
 		return 0;
@@ -602,29 +605,38 @@  static void notrace update_prog_stats(struct bpf_prog *prog,
 	}
 }
 
-void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx)
 	__releases(RCU)
 {
+	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
+
 	update_prog_stats(prog, start);
 	__this_cpu_dec(*(prog->active));
 	migrate_enable();
 	rcu_read_unlock();
 }
 
-u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
+u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
 {
 	rcu_read_lock_trace();
 	migrate_disable();
 	might_fault();
+
 	if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
 		inc_misses_counter(prog);
 		return 0;
 	}
+
+	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
 	return bpf_prog_start_time();
 }
 
-void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
+void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
+				       struct bpf_tramp_run_ctx *run_ctx)
 {
+	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
+
 	update_prog_stats(prog, start);
 	__this_cpu_dec(*(prog->active));
 	migrate_enable();