diff mbox series

[RFC,bpf] selftests/bpf: Curious case of a successful tailcall that returns to caller

Message ID 20220616110252.418333-1-jakub@cloudflare.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series [RFC,bpf] selftests/bpf: Curious case of a successful tailcall that returns to caller | expand

Checks

Context Check Description
bpf/vmtest-bpf-PR fail PR summary
bpf/vmtest-bpf-VM_Test-1 fail Logs for Kernel LATEST on ubuntu-latest with gcc
bpf/vmtest-bpf-VM_Test-2 fail Logs for Kernel LATEST on ubuntu-latest with llvm-15
bpf/vmtest-bpf-VM_Test-3 fail Logs for Kernel LATEST on z15 with gcc
netdev/tree_selection success Clearly marked for bpf, async
netdev/fixes_present fail Series targets non-next tree, but doesn't contain any Fixes tags
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 8 maintainers not CCed: delyank@fb.com songliubraving@fb.com linux-kselftest@vger.kernel.org yhs@fb.com john.fastabend@gmail.com kafai@fb.com shuah@kernel.org kpsingh@kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning CHECK: Alignment should match open parenthesis WARNING: Missing a blank line after declarations WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Jakub Sitnicki June 16, 2022, 11:02 a.m. UTC
While working aarch64 JIT to allow mixing bpf2bpf calls with tailcalls, I
noticed unexpected tailcall behavior in x86 JIT.

I don't know if it is by design or a bug. The bpf_tail_call helper
documentation says that the user should not expect the control flow to
return to the previous program, if the tail call was successful:

> If the call succeeds, the kernel immediately runs the first
> instruction of the new program. This is not a function call,
> and it never returns to the previous program.

However, when a tailcall happens from a subprogram, that is after a bpf2bpf
call, that is not the case. We return to the caller program because the
stack destruction is too shallow. BPF stack of just the top-most BPF
function gets destroyed.

This in turn allows the return value of the tailcall'ed program to get
overwritten, as the test below test demonstrates. It currently fails on
x86:

test_tailcall_bpf2bpf_7:PASS:open and load 0 nsec
test_tailcall_bpf2bpf_7:PASS:entry prog fd 0 nsec
test_tailcall_bpf2bpf_7:PASS:jmp_table map fd 0 nsec
test_tailcall_bpf2bpf_7:PASS:classifier_0 prog fd 0 nsec
test_tailcall_bpf2bpf_7:PASS:jmp_table map update 0 nsec
test_tailcall_bpf2bpf_7:PASS:entry prog test run 0 nsec
test_tailcall_bpf2bpf_7:FAIL:tailcall retval unexpected tailcall retval: actual 2 != expected 0
test_tailcall_bpf2bpf_7:PASS:bss map fd 0 nsec
test_tailcall_bpf2bpf_7:PASS:bss map lookup 0 nsec
test_tailcall_bpf2bpf_7:PASS:done flag is set 0 nsec

If we step through the program, we can observe the flow as so:

int entry(struct __sk_buff * skb):
bpf_prog_3bb007ac57240471_entry:
; subprog_tail(skb);
   0:   nopl   0x0(%rax,%rax,1)
   5:   xor    %eax,%eax
   7:   push   %rbp
   8:   mov    %rsp,%rbp
   b:   push   %rax
   c:   mov    -0x8(%rbp),%rax
  13:   call   0x0000000000000048 ---------.
; return 2;                                |
  18:   mov    $0x2,%eax <--------------------------------------.
  1d:   leave                              |                    |
  1e:   ret                                |                    |
                                           |                    |
int subprog_tail(struct __sk_buff * skb):  |                    |
bpf_prog_3a140cef239a4b4f_F:               |                    |
; int subprog_tail(struct __sk_buff *skb)  |                    |
   0:   nopl   0x0(%rax,%rax,1) <----------'                    |
   5:   xchg   %ax,%ax                                          |
   7:   push   %rbp                                             |
   8:   mov    %rsp,%rbp                                        |
   b:   push   %rax                                             |
   c:   push   %rbx                                             |
   d:   push   %r13                                             |
   f:   mov    %rdi,%rbx                                        |
; asm volatile("r1 = %[ctx]\n\t"                                |
  12:   movabs $0xffff888104119000,%r13                         |
  1c:   mov    %rbx,%rdi                                        |
  1f:   mov    %r13,%rsi                                        |
  22:   xor    %edx,%edx                                        |
  24:   mov    -0x4(%rbp),%eax                                  |
  2a:   cmp    $0x21,%eax                                       |
  2d:   jae    0x0000000000000046                               |
  2f:   add    $0x1,%eax                                        |
  32:   mov    %eax,-0x4(%rbp)                                  |
  38:   jmp    0x0000000000000046 ---------------------------.  |
  3d:   pop    %r13                                          |  |
  3f:   pop    %rbx                                          |  |
  40:   pop    %rax                                          |  |
  41:   nopl   0x0(%rax,%rax,1)                              |  |
; return 1;                                                  |  |
  46:   pop    %r13                                          |  |
  48:   pop    %rbx                                          |  |
  49:   leave                                                |  |
  4a:   ret                                                  |  |
                                                             |  |
int classifier_0(struct __sk_buff * skb):                    |  |
bpf_prog_6e664b22811ace0d_classifier_0:                      |  |
; done = 1;                                                  |  |
   0:   nopl   0x0(%rax,%rax,1)                              |  |
   5:   xchg   %ax,%ax                                       |  |
   7:   push   %rbp                                          |  |
   8:   mov    %rsp,%rbp                                     |  |
   b:   movabs $0xffffc900000b6000,%rdi <--------------------'  |
  15:   mov    $0x1,%esi                                        |
  1a:   mov    %esi,0x0(%rdi)                                   |
; return 0;                                                     |
  1d:   xor    %eax,%eax                                        |
  1f:   leave                                                   |
  20:   ret ----------------------------------------------------'

My question is - is it a bug or intended behavior that other JITs should
replicate?

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
---
 .../selftests/bpf/prog_tests/tailcalls.c      | 55 +++++++++++++++++++
 .../selftests/bpf/progs/tailcall_bpf2bpf7.c   | 37 +++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf7.c

Comments

Fijalkowski, Maciej June 16, 2022, 3 p.m. UTC | #1
On Thu, Jun 16, 2022 at 01:02:52PM +0200, Jakub Sitnicki wrote:
> While working aarch64 JIT to allow mixing bpf2bpf calls with tailcalls, I
> noticed unexpected tailcall behavior in x86 JIT.
> 
> I don't know if it is by design or a bug. The bpf_tail_call helper
> documentation says that the user should not expect the control flow to
> return to the previous program, if the tail call was successful:
> 
> > If the call succeeds, the kernel immediately runs the first
> > instruction of the new program. This is not a function call,
> > and it never returns to the previous program.
> 
> However, when a tailcall happens from a subprogram, that is after a bpf2bpf
> call, that is not the case. We return to the caller program because the
> stack destruction is too shallow. BPF stack of just the top-most BPF
> function gets destroyed.
> 
> This in turn allows the return value of the tailcall'ed program to get
> overwritten, as the test below test demonstrates. It currently fails on
> x86:

Disclaimer: some time has passed by since I looked into this :P

To me the bug would be if test would have returned 1 in your case. If I
recall correctly that was the design choice, so tailcalls when mixed with
bpf2bpf will consume current stack frame. When tailcall happens from
subprogram then we would return to the caller of this subprog. We added
logic to verifier that checks if this (tc + bpf2bpf) mix wouldn't cause
stack overflow. We even limit the stack frame size to 256 in such case.

Cilium docs explain this:
https://docs.cilium.io/en/latest/bpf/#bpf-to-bpf-calls

> 
> test_tailcall_bpf2bpf_7:PASS:open and load 0 nsec
> test_tailcall_bpf2bpf_7:PASS:entry prog fd 0 nsec
> test_tailcall_bpf2bpf_7:PASS:jmp_table map fd 0 nsec
> test_tailcall_bpf2bpf_7:PASS:classifier_0 prog fd 0 nsec
> test_tailcall_bpf2bpf_7:PASS:jmp_table map update 0 nsec
> test_tailcall_bpf2bpf_7:PASS:entry prog test run 0 nsec
> test_tailcall_bpf2bpf_7:FAIL:tailcall retval unexpected tailcall retval: actual 2 != expected 0
> test_tailcall_bpf2bpf_7:PASS:bss map fd 0 nsec
> test_tailcall_bpf2bpf_7:PASS:bss map lookup 0 nsec
> test_tailcall_bpf2bpf_7:PASS:done flag is set 0 nsec
> 
> If we step through the program, we can observe the flow as so:
> 
> int entry(struct __sk_buff * skb):
> bpf_prog_3bb007ac57240471_entry:
> ; subprog_tail(skb);
>    0:   nopl   0x0(%rax,%rax,1)
>    5:   xor    %eax,%eax
>    7:   push   %rbp
>    8:   mov    %rsp,%rbp
>    b:   push   %rax
>    c:   mov    -0x8(%rbp),%rax
>   13:   call   0x0000000000000048 ---------.
> ; return 2;                                |
>   18:   mov    $0x2,%eax <--------------------------------------.
>   1d:   leave                              |                    |
>   1e:   ret                                |                    |
>                                            |                    |
> int subprog_tail(struct __sk_buff * skb):  |                    |
> bpf_prog_3a140cef239a4b4f_F:               |                    |
> ; int subprog_tail(struct __sk_buff *skb)  |                    |
>    0:   nopl   0x0(%rax,%rax,1) <----------'                    |
>    5:   xchg   %ax,%ax                                          |
>    7:   push   %rbp                                             |
>    8:   mov    %rsp,%rbp                                        |
>    b:   push   %rax                                             |
>    c:   push   %rbx                                             |
>    d:   push   %r13                                             |
>    f:   mov    %rdi,%rbx                                        |
> ; asm volatile("r1 = %[ctx]\n\t"                                |
>   12:   movabs $0xffff888104119000,%r13                         |
>   1c:   mov    %rbx,%rdi                                        |
>   1f:   mov    %r13,%rsi                                        |
>   22:   xor    %edx,%edx                                        |
>   24:   mov    -0x4(%rbp),%eax                                  |
>   2a:   cmp    $0x21,%eax                                       |
>   2d:   jae    0x0000000000000046                               |
>   2f:   add    $0x1,%eax                                        |
>   32:   mov    %eax,-0x4(%rbp)                                  |
>   38:   jmp    0x0000000000000046 ---------------------------.  |
>   3d:   pop    %r13                                          |  |
>   3f:   pop    %rbx                                          |  |
>   40:   pop    %rax                                          |  |
>   41:   nopl   0x0(%rax,%rax,1)                              |  |
> ; return 1;                                                  |  |
>   46:   pop    %r13                                          |  |
>   48:   pop    %rbx                                          |  |
>   49:   leave                                                |  |
>   4a:   ret                                                  |  |
>                                                              |  |
> int classifier_0(struct __sk_buff * skb):                    |  |
> bpf_prog_6e664b22811ace0d_classifier_0:                      |  |
> ; done = 1;                                                  |  |
>    0:   nopl   0x0(%rax,%rax,1)                              |  |
>    5:   xchg   %ax,%ax                                       |  |
>    7:   push   %rbp                                          |  |
>    8:   mov    %rsp,%rbp                                     |  |
>    b:   movabs $0xffffc900000b6000,%rdi <--------------------'  |
>   15:   mov    $0x1,%esi                                        |
>   1a:   mov    %esi,0x0(%rdi)                                   |
> ; return 0;                                                     |
>   1d:   xor    %eax,%eax                                        |
>   1f:   leave                                                   |
>   20:   ret ----------------------------------------------------'
> 
> My question is - is it a bug or intended behavior that other JITs should
> replicate?
> 
> Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
> ---
>  .../selftests/bpf/prog_tests/tailcalls.c      | 55 +++++++++++++++++++
>  .../selftests/bpf/progs/tailcall_bpf2bpf7.c   | 37 +++++++++++++
>  2 files changed, 92 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf7.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
> index c4da87ec3ba4..696c307a1bee 100644
> --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
> +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
> @@ -831,6 +831,59 @@ static void test_tailcall_bpf2bpf_4(bool noise)
>  	bpf_object__close(obj);
>  }
>  
> +#include "tailcall_bpf2bpf7.skel.h"
> +
> +/* The tail call should never return to the previous program, if the
> + * jump was successful.
> + */
> +static void test_tailcall_bpf2bpf_7(void)
> +{
> +	struct tailcall_bpf2bpf7 *obj;
> +	int err, map_fd, prog_fd, main_fd, data_fd, i, val;
> +	LIBBPF_OPTS(bpf_test_run_opts, topts,
> +		.data_in = &pkt_v4,
> +		.data_size_in = sizeof(pkt_v4),
> +		.repeat = 1,
> +	);
> +
> +	obj = tailcall_bpf2bpf7__open_and_load();
> +	if (!ASSERT_OK_PTR(obj, "open and load"))
> +		return;
> +
> +	main_fd = bpf_program__fd(obj->progs.entry);
> +	if (!ASSERT_GE(main_fd, 0, "entry prog fd"))
> +		goto out;
> +
> +	map_fd = bpf_map__fd(obj->maps.jmp_table);
> +	if (!ASSERT_GE(map_fd, 0, "jmp_table map fd"))
> +		goto out;
> +
> +	prog_fd = bpf_program__fd(obj->progs.classifier_0);
> +	if (!ASSERT_GE(prog_fd, 0, "classifier_0 prog fd"))
> +		goto out;
> +
> +	i = 0;
> +	err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
> +	if (!ASSERT_OK(err, "jmp_table map update"))
> +		goto out;
> +
> +	err = bpf_prog_test_run_opts(main_fd, &topts);
> +	ASSERT_OK(err, "entry prog test run");
> +	ASSERT_EQ(topts.retval, 0, "tailcall retval");
> +
> +	data_fd = bpf_map__fd(obj->maps.bss);
> +	if (!ASSERT_GE(map_fd, 0, "bss map fd"))
> +		goto out;
> +
> +	i = 0;
> +	err = bpf_map_lookup_elem(data_fd, &i, &val);
> +	ASSERT_OK(err, "bss map lookup");
> +	ASSERT_EQ(val, 1, "done flag is set");
> +
> +out:
> +	tailcall_bpf2bpf7__destroy(obj);
> +}
> +
>  void test_tailcalls(void)
>  {
>  	if (test__start_subtest("tailcall_1"))
> @@ -855,4 +908,6 @@ void test_tailcalls(void)
>  		test_tailcall_bpf2bpf_4(false);
>  	if (test__start_subtest("tailcall_bpf2bpf_5"))
>  		test_tailcall_bpf2bpf_4(true);
> +	if (test__start_subtest("tailcall_bpf2bpf_7"))
> +		test_tailcall_bpf2bpf_7();
>  }
> diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf7.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf7.c
> new file mode 100644
> index 000000000000..1be27cfa1702
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf7.c
> @@ -0,0 +1,37 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/bpf.h>
> +#include <bpf/bpf_helpers.h>
> +
> +#define __unused __attribute__((always_unused))
> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
> +	__uint(max_entries, 1);
> +	__uint(key_size, sizeof(__u32));
> +	__uint(value_size, sizeof(__u32));
> +} jmp_table SEC(".maps");
> +
> +int done = 0;
> +
> +SEC("tc")
> +int classifier_0(struct __sk_buff *skb __unused)
> +{
> +	done = 1;
> +	return 0;
> +}
> +
> +static __noinline
> +int subprog_tail(struct __sk_buff *skb)
> +{
> +	bpf_tail_call_static(skb, &jmp_table, 0);
> +	return 1;
> +}
> +
> +SEC("tc")
> +int entry(struct __sk_buff *skb)
> +{
> +	subprog_tail(skb);
> +	return 2;
> +}
> +
> +char __license[] SEC("license") = "GPL";
> -- 
> 2.35.3
>
Daniel Borkmann June 16, 2022, 3:22 p.m. UTC | #2
On 6/16/22 5:00 PM, Maciej Fijalkowski wrote:
> On Thu, Jun 16, 2022 at 01:02:52PM +0200, Jakub Sitnicki wrote:
>> While working aarch64 JIT to allow mixing bpf2bpf calls with tailcalls, I
>> noticed unexpected tailcall behavior in x86 JIT.
>>
>> I don't know if it is by design or a bug. The bpf_tail_call helper
>> documentation says that the user should not expect the control flow to
>> return to the previous program, if the tail call was successful:
>>
>>> If the call succeeds, the kernel immediately runs the first
>>> instruction of the new program. This is not a function call,
>>> and it never returns to the previous program.
>>
>> However, when a tailcall happens from a subprogram, that is after a bpf2bpf
>> call, that is not the case. We return to the caller program because the
>> stack destruction is too shallow. BPF stack of just the top-most BPF
>> function gets destroyed.
>>
>> This in turn allows the return value of the tailcall'ed program to get
>> overwritten, as the test below test demonstrates. It currently fails on
>> x86:
> 
> Disclaimer: some time has passed by since I looked into this :P
> 
> To me the bug would be if test would have returned 1 in your case. If I
> recall correctly that was the design choice, so tailcalls when mixed with
> bpf2bpf will consume current stack frame. When tailcall happens from
> subprogram then we would return to the caller of this subprog. We added
> logic to verifier that checks if this (tc + bpf2bpf) mix wouldn't cause
> stack overflow. We even limit the stack frame size to 256 in such case.

Yes, that is the desired behavior, so return 2 from your example below looks
correct / expected:

+SEC("tc")
+int classifier_0(struct __sk_buff *skb __unused)
+{
+	done = 1;
+	return 0;
+}
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+	bpf_tail_call_static(skb, &jmp_table, 0);
+	return 1;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+	subprog_tail(skb);
+	return 2;
+}

> Cilium docs explain this:
> https://docs.cilium.io/en/latest/bpf/#bpf-to-bpf-calls
Jakub Sitnicki June 16, 2022, 3:24 p.m. UTC | #3
On Thu, Jun 16, 2022 at 05:00 PM +02, Maciej Fijalkowski wrote:
> On Thu, Jun 16, 2022 at 01:02:52PM +0200, Jakub Sitnicki wrote:
>> While working aarch64 JIT to allow mixing bpf2bpf calls with tailcalls, I
>> noticed unexpected tailcall behavior in x86 JIT.
>> 
>> I don't know if it is by design or a bug. The bpf_tail_call helper
>> documentation says that the user should not expect the control flow to
>> return to the previous program, if the tail call was successful:
>> 
>> > If the call succeeds, the kernel immediately runs the first
>> > instruction of the new program. This is not a function call,
>> > and it never returns to the previous program.
>> 
>> However, when a tailcall happens from a subprogram, that is after a bpf2bpf
>> call, that is not the case. We return to the caller program because the
>> stack destruction is too shallow. BPF stack of just the top-most BPF
>> function gets destroyed.
>> 
>> This in turn allows the return value of the tailcall'ed program to get
>> overwritten, as the test below test demonstrates. It currently fails on
>> x86:
>
> Disclaimer: some time has passed by since I looked into this :P
>
> To me the bug would be if test would have returned 1 in your case. If I
> recall correctly that was the design choice, so tailcalls when mixed with
> bpf2bpf will consume current stack frame. When tailcall happens from
> subprogram then we would return to the caller of this subprog. We added
> logic to verifier that checks if this (tc + bpf2bpf) mix wouldn't cause
> stack overflow. We even limit the stack frame size to 256 in such case.
>
> Cilium docs explain this:
> https://docs.cilium.io/en/latest/bpf/#bpf-to-bpf-calls

Thanks for such a quick response.

This answers my question. I should have looked in Cilium docs.

I will see how to work this bit of info into the helper docs.

[...]
Jakub Sitnicki June 16, 2022, 3:28 p.m. UTC | #4
On Thu, Jun 16, 2022 at 05:22 PM +02, Daniel Borkmann wrote:
> On 6/16/22 5:00 PM, Maciej Fijalkowski wrote:
>> On Thu, Jun 16, 2022 at 01:02:52PM +0200, Jakub Sitnicki wrote:
>>> While working aarch64 JIT to allow mixing bpf2bpf calls with tailcalls, I
>>> noticed unexpected tailcall behavior in x86 JIT.
>>>
>>> I don't know if it is by design or a bug. The bpf_tail_call helper
>>> documentation says that the user should not expect the control flow to
>>> return to the previous program, if the tail call was successful:
>>>
>>>> If the call succeeds, the kernel immediately runs the first
>>>> instruction of the new program. This is not a function call,
>>>> and it never returns to the previous program.
>>>
>>> However, when a tailcall happens from a subprogram, that is after a bpf2bpf
>>> call, that is not the case. We return to the caller program because the
>>> stack destruction is too shallow. BPF stack of just the top-most BPF
>>> function gets destroyed.
>>>
>>> This in turn allows the return value of the tailcall'ed program to get
>>> overwritten, as the test below test demonstrates. It currently fails on
>>> x86:
>> Disclaimer: some time has passed by since I looked into this :P
>> To me the bug would be if test would have returned 1 in your case. If I
>> recall correctly that was the design choice, so tailcalls when mixed with
>> bpf2bpf will consume current stack frame. When tailcall happens from
>> subprogram then we would return to the caller of this subprog. We added
>> logic to verifier that checks if this (tc + bpf2bpf) mix wouldn't cause
>> stack overflow. We even limit the stack frame size to 256 in such case.
>
> Yes, that is the desired behavior, so return 2 from your example below looks
> correct / expected:
>
> +SEC("tc")
> +int classifier_0(struct __sk_buff *skb __unused)
> +{
> +	done = 1;
> +	return 0;
> +}
> +
> +static __noinline
> +int subprog_tail(struct __sk_buff *skb)
> +{
> +	bpf_tail_call_static(skb, &jmp_table, 0);
> +	return 1;
> +}
> +
> +SEC("tc")
> +int entry(struct __sk_buff *skb)
> +{
> +	subprog_tail(skb);
> +	return 2;
> +}

Great. Thanks for confirming.

Since I have the test ready, I might as well submit it.
I think the case of ignoring the tailcall result is not covered yet.

Also, this makes changes needed to support bpf2bpf+tailcalls on arm64
simpler. Will post soon.

>
>> Cilium docs explain this:
>> https://docs.cilium.io/en/latest/bpf/#bpf-to-bpf-calls
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index c4da87ec3ba4..696c307a1bee 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -831,6 +831,59 @@  static void test_tailcall_bpf2bpf_4(bool noise)
 	bpf_object__close(obj);
 }
 
+#include "tailcall_bpf2bpf7.skel.h"
+
+/* The tail call should never return to the previous program, if the
+ * jump was successful.
+ */
+static void test_tailcall_bpf2bpf_7(void)
+{
+	struct tailcall_bpf2bpf7 *obj;
+	int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
+
+	obj = tailcall_bpf2bpf7__open_and_load();
+	if (!ASSERT_OK_PTR(obj, "open and load"))
+		return;
+
+	main_fd = bpf_program__fd(obj->progs.entry);
+	if (!ASSERT_GE(main_fd, 0, "entry prog fd"))
+		goto out;
+
+	map_fd = bpf_map__fd(obj->maps.jmp_table);
+	if (!ASSERT_GE(map_fd, 0, "jmp_table map fd"))
+		goto out;
+
+	prog_fd = bpf_program__fd(obj->progs.classifier_0);
+	if (!ASSERT_GE(prog_fd, 0, "classifier_0 prog fd"))
+		goto out;
+
+	i = 0;
+	err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+	if (!ASSERT_OK(err, "jmp_table map update"))
+		goto out;
+
+	err = bpf_prog_test_run_opts(main_fd, &topts);
+	ASSERT_OK(err, "entry prog test run");
+	ASSERT_EQ(topts.retval, 0, "tailcall retval");
+
+	data_fd = bpf_map__fd(obj->maps.bss);
+	if (!ASSERT_GE(map_fd, 0, "bss map fd"))
+		goto out;
+
+	i = 0;
+	err = bpf_map_lookup_elem(data_fd, &i, &val);
+	ASSERT_OK(err, "bss map lookup");
+	ASSERT_EQ(val, 1, "done flag is set");
+
+out:
+	tailcall_bpf2bpf7__destroy(obj);
+}
+
 void test_tailcalls(void)
 {
 	if (test__start_subtest("tailcall_1"))
@@ -855,4 +908,6 @@  void test_tailcalls(void)
 		test_tailcall_bpf2bpf_4(false);
 	if (test__start_subtest("tailcall_bpf2bpf_5"))
 		test_tailcall_bpf2bpf_4(true);
+	if (test__start_subtest("tailcall_bpf2bpf_7"))
+		test_tailcall_bpf2bpf_7();
 }
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf7.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf7.c
new file mode 100644
index 000000000000..1be27cfa1702
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf7.c
@@ -0,0 +1,37 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define __unused __attribute__((always_unused))
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int done = 0;
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb __unused)
+{
+	done = 1;
+	return 0;
+}
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+	bpf_tail_call_static(skb, &jmp_table, 0);
+	return 1;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+	subprog_tail(skb);
+	return 2;
+}
+
+char __license[] SEC("license") = "GPL";