diff mbox series

[bpf-next,v2,2/4] bpf, arm64: Fix tailcall infinite loop caused by freplace

Message ID 20240901133856.64367-3-leon.hwang@linux.dev (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series bpf: Fix tailcall infinite loop caused by freplace | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 7 this patch: 7
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers fail 1 blamed authors not CCed: martin.lau@linux.dev; 10 maintainers not CCed: sdf@fomichev.me haoluo@google.com linux-arm-kernel@lists.infradead.org jolsa@kernel.org catalin.marinas@arm.com song@kernel.org will@kernel.org kpsingh@kernel.org martin.lau@linux.dev john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 7 this patch: 7
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 7 this patch: 7
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 115 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18

Commit Message

Leon Hwang Sept. 1, 2024, 1:38 p.m. UTC
Like "bpf, x64: Fix tailcall infinite loop caused by freplace", the same
issue happens on arm64, too.

For example:

tc_bpf2bpf.c:

// SPDX-License-Identifier: GPL-2.0
\#include <linux/bpf.h>
\#include <bpf/bpf_helpers.h>

__noinline
int subprog_tc(struct __sk_buff *skb)
{
	return skb->len * 2;
}

SEC("tc")
int entry_tc(struct __sk_buff *skb)
{
	return subprog(skb);
}

char __license[] SEC("license") = "GPL";

tailcall_bpf2bpf_hierarchy_freplace.c:

// SPDX-License-Identifier: GPL-2.0
\#include <linux/bpf.h>
\#include <bpf/bpf_helpers.h>

struct {
	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
	__uint(max_entries, 1);
	__uint(key_size, sizeof(__u32));
	__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");

int count = 0;

static __noinline
int subprog_tail(struct __sk_buff *skb)
{
	bpf_tail_call_static(skb, &jmp_table, 0);
	return 0;
}

SEC("freplace")
int entry_freplace(struct __sk_buff *skb)
{
	count++;
	subprog_tail(skb);
	subprog_tail(skb);
	return count;
}

char __license[] SEC("license") = "GPL";

The attach target of entry_freplace is subprog_tc, and the tail callee
in subprog_tail is entry_tc.

Then, the infinite loop will be entry_tc -> entry_tc -> entry_freplace ->
subprog_tail --tailcall-> entry_tc, because tail_call_cnt in
entry_freplace will count from zero for every time of entry_freplace
execution.

This patch fixes the issue by avoiding touching tail_call_cnt at
prologue when it's subprog or freplace prog.

Then, when freplace prog attaches to entry_tc, it has to initialize
tail_call_cnt and tail_call_cnt_ptr, because its target is main prog and
its target's prologue hasn't initialize them before the attach hook.

So, this patch uses x7 register to tell freplace prog that its target
prog is main prog or not.

Meanwhile, while tail calling to a freplace prog, it is required to
reset x7 register to prevent re-initializing tail_call_cnt at freplace
prog's prologue.

Fixes: 1c123c567fb1 ("bpf: Resolve fext program type when checking map compatibility")
Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 arch/arm64/net/bpf_jit_comp.c | 44 +++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 5 deletions(-)

Comments

Leon Hwang Sept. 8, 2024, 1:01 p.m. UTC | #1
On 1/9/24 21:38, Leon Hwang wrote:
> Like "bpf, x64: Fix tailcall infinite loop caused by freplace", the same
> issue happens on arm64, too.
> 
> For example:
> 
> tc_bpf2bpf.c:
> 
> // SPDX-License-Identifier: GPL-2.0
> \#include <linux/bpf.h>
> \#include <bpf/bpf_helpers.h>
> 
> __noinline
> int subprog_tc(struct __sk_buff *skb)
> {
> 	return skb->len * 2;
> }
> 
> SEC("tc")
> int entry_tc(struct __sk_buff *skb)
> {
> 	return subprog(skb);
> }
> 
> char __license[] SEC("license") = "GPL";
> 
> tailcall_bpf2bpf_hierarchy_freplace.c:
> 
> // SPDX-License-Identifier: GPL-2.0
> \#include <linux/bpf.h>
> \#include <bpf/bpf_helpers.h>
> 
> struct {
> 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
> 	__uint(max_entries, 1);
> 	__uint(key_size, sizeof(__u32));
> 	__uint(value_size, sizeof(__u32));
> } jmp_table SEC(".maps");
> 
> int count = 0;
> 
> static __noinline
> int subprog_tail(struct __sk_buff *skb)
> {
> 	bpf_tail_call_static(skb, &jmp_table, 0);
> 	return 0;
> }
> 
> SEC("freplace")
> int entry_freplace(struct __sk_buff *skb)
> {
> 	count++;
> 	subprog_tail(skb);
> 	subprog_tail(skb);
> 	return count;
> }
> 
> char __license[] SEC("license") = "GPL";
> 
> The attach target of entry_freplace is subprog_tc, and the tail callee
> in subprog_tail is entry_tc.
> 
> Then, the infinite loop will be entry_tc -> entry_tc -> entry_freplace ->
> subprog_tail --tailcall-> entry_tc, because tail_call_cnt in
> entry_freplace will count from zero for every time of entry_freplace
> execution.
> 
> This patch fixes the issue by avoiding touching tail_call_cnt at
> prologue when it's subprog or freplace prog.
> 
> Then, when freplace prog attaches to entry_tc, it has to initialize
> tail_call_cnt and tail_call_cnt_ptr, because its target is main prog and
> its target's prologue hasn't initialize them before the attach hook.
> 
> So, this patch uses x7 register to tell freplace prog that its target
> prog is main prog or not.
> 
> Meanwhile, while tail calling to a freplace prog, it is required to
> reset x7 register to prevent re-initializing tail_call_cnt at freplace
> prog's prologue.
> 
> Fixes: 1c123c567fb1 ("bpf: Resolve fext program type when checking map compatibility")
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
>  arch/arm64/net/bpf_jit_comp.c | 44 +++++++++++++++++++++++++++++++----
>  1 file changed, 39 insertions(+), 5 deletions(-)
> 
Hi Puranjay and Kuohai,

As it's not recommended to introduce arch_bpf_run(), this is my approach
to fix the niche case on arm64.

Do you have any better idea to fix it?

Thanks,
Leon
Xu Kuohai Sept. 9, 2024, 9:02 a.m. UTC | #2
On 9/8/2024 9:01 PM, Leon Hwang wrote:
> 
> 
> On 1/9/24 21:38, Leon Hwang wrote:
>> Like "bpf, x64: Fix tailcall infinite loop caused by freplace", the same
>> issue happens on arm64, too.
>>
>> For example:
>>
>> tc_bpf2bpf.c:
>>
>> // SPDX-License-Identifier: GPL-2.0
>> \#include <linux/bpf.h>
>> \#include <bpf/bpf_helpers.h>
>>
>> __noinline
>> int subprog_tc(struct __sk_buff *skb)
>> {
>> 	return skb->len * 2;
>> }
>>
>> SEC("tc")
>> int entry_tc(struct __sk_buff *skb)
>> {
>> 	return subprog(skb);
>> }
>>
>> char __license[] SEC("license") = "GPL";
>>
>> tailcall_bpf2bpf_hierarchy_freplace.c:
>>
>> // SPDX-License-Identifier: GPL-2.0
>> \#include <linux/bpf.h>
>> \#include <bpf/bpf_helpers.h>
>>
>> struct {
>> 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
>> 	__uint(max_entries, 1);
>> 	__uint(key_size, sizeof(__u32));
>> 	__uint(value_size, sizeof(__u32));
>> } jmp_table SEC(".maps");
>>
>> int count = 0;
>>
>> static __noinline
>> int subprog_tail(struct __sk_buff *skb)
>> {
>> 	bpf_tail_call_static(skb, &jmp_table, 0);
>> 	return 0;
>> }
>>
>> SEC("freplace")
>> int entry_freplace(struct __sk_buff *skb)
>> {
>> 	count++;
>> 	subprog_tail(skb);
>> 	subprog_tail(skb);
>> 	return count;
>> }
>>
>> char __license[] SEC("license") = "GPL";
>>
>> The attach target of entry_freplace is subprog_tc, and the tail callee
>> in subprog_tail is entry_tc.
>>
>> Then, the infinite loop will be entry_tc -> entry_tc -> entry_freplace ->
>> subprog_tail --tailcall-> entry_tc, because tail_call_cnt in
>> entry_freplace will count from zero for every time of entry_freplace
>> execution.
>>
>> This patch fixes the issue by avoiding touching tail_call_cnt at
>> prologue when it's subprog or freplace prog.
>>
>> Then, when freplace prog attaches to entry_tc, it has to initialize
>> tail_call_cnt and tail_call_cnt_ptr, because its target is main prog and
>> its target's prologue hasn't initialize them before the attach hook.
>>
>> So, this patch uses x7 register to tell freplace prog that its target
>> prog is main prog or not.
>>
>> Meanwhile, while tail calling to a freplace prog, it is required to
>> reset x7 register to prevent re-initializing tail_call_cnt at freplace
>> prog's prologue.
>>
>> Fixes: 1c123c567fb1 ("bpf: Resolve fext program type when checking map compatibility")
>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>> ---
>>   arch/arm64/net/bpf_jit_comp.c | 44 +++++++++++++++++++++++++++++++----
>>   1 file changed, 39 insertions(+), 5 deletions(-)
>>
> Hi Puranjay and Kuohai,
> 
> As it's not recommended to introduce arch_bpf_run(), this is my approach
> to fix the niche case on arm64.
> 
> Do you have any better idea to fix it?
>

IIUC, the recommended appraoch is to teach verifier to reject the
freplace + tailcall combination. If this combiation is allowed, we
will face more than just this issue. For example, what happens if
a freplace prog is attached to tail callee? The freplace prog is not
reachable through the tail call, right?

> Thanks,
> Leon
> 
>
Leon Hwang Sept. 9, 2024, 10:38 a.m. UTC | #3
On 9/9/24 17:02, Xu Kuohai wrote:
> On 9/8/2024 9:01 PM, Leon Hwang wrote:
>>
>>
>> On 1/9/24 21:38, Leon Hwang wrote:
>>> Like "bpf, x64: Fix tailcall infinite loop caused by freplace", the same
>>> issue happens on arm64, too.
>>>
>>> For example:
>>>
>>> tc_bpf2bpf.c:
>>>
>>> // SPDX-License-Identifier: GPL-2.0
>>> \#include <linux/bpf.h>
>>> \#include <bpf/bpf_helpers.h>
>>>
>>> __noinline
>>> int subprog_tc(struct __sk_buff *skb)
>>> {
>>>     return skb->len * 2;
>>> }
>>>
>>> SEC("tc")
>>> int entry_tc(struct __sk_buff *skb)
>>> {
>>>     return subprog(skb);
>>> }
>>>
>>> char __license[] SEC("license") = "GPL";
>>>
>>> tailcall_bpf2bpf_hierarchy_freplace.c:
>>>
>>> // SPDX-License-Identifier: GPL-2.0
>>> \#include <linux/bpf.h>
>>> \#include <bpf/bpf_helpers.h>
>>>
>>> struct {
>>>     __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
>>>     __uint(max_entries, 1);
>>>     __uint(key_size, sizeof(__u32));
>>>     __uint(value_size, sizeof(__u32));
>>> } jmp_table SEC(".maps");
>>>
>>> int count = 0;
>>>
>>> static __noinline
>>> int subprog_tail(struct __sk_buff *skb)
>>> {
>>>     bpf_tail_call_static(skb, &jmp_table, 0);
>>>     return 0;
>>> }
>>>
>>> SEC("freplace")
>>> int entry_freplace(struct __sk_buff *skb)
>>> {
>>>     count++;
>>>     subprog_tail(skb);
>>>     subprog_tail(skb);
>>>     return count;
>>> }
>>>
>>> char __license[] SEC("license") = "GPL";
>>>
>>> The attach target of entry_freplace is subprog_tc, and the tail callee
>>> in subprog_tail is entry_tc.
>>>
>>> Then, the infinite loop will be entry_tc -> entry_tc ->
>>> entry_freplace ->
>>> subprog_tail --tailcall-> entry_tc, because tail_call_cnt in
>>> entry_freplace will count from zero for every time of entry_freplace
>>> execution.
>>>
>>> This patch fixes the issue by avoiding touching tail_call_cnt at
>>> prologue when it's subprog or freplace prog.
>>>
>>> Then, when freplace prog attaches to entry_tc, it has to initialize
>>> tail_call_cnt and tail_call_cnt_ptr, because its target is main prog and
>>> its target's prologue hasn't initialize them before the attach hook.
>>>
>>> So, this patch uses x7 register to tell freplace prog that its target
>>> prog is main prog or not.
>>>
>>> Meanwhile, while tail calling to a freplace prog, it is required to
>>> reset x7 register to prevent re-initializing tail_call_cnt at freplace
>>> prog's prologue.
>>>
>>> Fixes: 1c123c567fb1 ("bpf: Resolve fext program type when checking
>>> map compatibility")
>>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>>> ---
>>>   arch/arm64/net/bpf_jit_comp.c | 44 +++++++++++++++++++++++++++++++----
>>>   1 file changed, 39 insertions(+), 5 deletions(-)
>>>
>> Hi Puranjay and Kuohai,
>>
>> As it's not recommended to introduce arch_bpf_run(), this is my approach
>> to fix the niche case on arm64.
>>
>> Do you have any better idea to fix it?
>>
> 
> IIUC, the recommended appraoch is to teach verifier to reject the
> freplace + tailcall combination. If this combiation is allowed, we
> will face more than just this issue. For example, what happens if
> a freplace prog is attached to tail callee? The freplace prog is not
> reachable through the tail call, right?
> 

It's to reject the freplace + tailcall combination partially, see "bpf,
x64: Fix tailcall infinite loop caused by freplace". (Oh, I should
separate the rejection to a standalone patch.)
It rejects the case that freplace prog has tailcall and its attach
target has no tailcall.

As for your example, it depends on:

                freplace       target    reject?
Has tailcall?     YES            NO        YES
Has tailcall?     YES            YES       NO
Has tailcall?     NO             YES       NO
Has tailcall?     NO             YES       NO

Then, freplace prog can be tail callee always. I haven't seen any bad
case when freplace prog is tail callee.

I'm not planning to disable freplace + tailcall combination totally,
because I use this combination in an in-house XDP project of my company.

Thanks,
Leon
Xu Kuohai Sept. 9, 2024, 12:08 p.m. UTC | #4
On 9/9/2024 6:38 PM, Leon Hwang wrote:
> 
> 
> On 9/9/24 17:02, Xu Kuohai wrote:
>> On 9/8/2024 9:01 PM, Leon Hwang wrote:
>>>
>>>
>>> On 1/9/24 21:38, Leon Hwang wrote:
>>>> Like "bpf, x64: Fix tailcall infinite loop caused by freplace", the same
>>>> issue happens on arm64, too.
>>>>
>>>> For example:
>>>>
>>>> tc_bpf2bpf.c:
>>>>
>>>> // SPDX-License-Identifier: GPL-2.0
>>>> \#include <linux/bpf.h>
>>>> \#include <bpf/bpf_helpers.h>
>>>>
>>>> __noinline
>>>> int subprog_tc(struct __sk_buff *skb)
>>>> {
>>>>      return skb->len * 2;
>>>> }
>>>>
>>>> SEC("tc")
>>>> int entry_tc(struct __sk_buff *skb)
>>>> {
>>>>      return subprog(skb);
>>>> }
>>>>
>>>> char __license[] SEC("license") = "GPL";
>>>>
>>>> tailcall_bpf2bpf_hierarchy_freplace.c:
>>>>
>>>> // SPDX-License-Identifier: GPL-2.0
>>>> \#include <linux/bpf.h>
>>>> \#include <bpf/bpf_helpers.h>
>>>>
>>>> struct {
>>>>      __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
>>>>      __uint(max_entries, 1);
>>>>      __uint(key_size, sizeof(__u32));
>>>>      __uint(value_size, sizeof(__u32));
>>>> } jmp_table SEC(".maps");
>>>>
>>>> int count = 0;
>>>>
>>>> static __noinline
>>>> int subprog_tail(struct __sk_buff *skb)
>>>> {
>>>>      bpf_tail_call_static(skb, &jmp_table, 0);
>>>>      return 0;
>>>> }
>>>>
>>>> SEC("freplace")
>>>> int entry_freplace(struct __sk_buff *skb)
>>>> {
>>>>      count++;
>>>>      subprog_tail(skb);
>>>>      subprog_tail(skb);
>>>>      return count;
>>>> }
>>>>
>>>> char __license[] SEC("license") = "GPL";
>>>>
>>>> The attach target of entry_freplace is subprog_tc, and the tail callee
>>>> in subprog_tail is entry_tc.
>>>>
>>>> Then, the infinite loop will be entry_tc -> entry_tc ->
>>>> entry_freplace ->
>>>> subprog_tail --tailcall-> entry_tc, because tail_call_cnt in
>>>> entry_freplace will count from zero for every time of entry_freplace
>>>> execution.
>>>>
>>>> This patch fixes the issue by avoiding touching tail_call_cnt at
>>>> prologue when it's subprog or freplace prog.
>>>>
>>>> Then, when freplace prog attaches to entry_tc, it has to initialize
>>>> tail_call_cnt and tail_call_cnt_ptr, because its target is main prog and
>>>> its target's prologue hasn't initialize them before the attach hook.
>>>>
>>>> So, this patch uses x7 register to tell freplace prog that its target
>>>> prog is main prog or not.
>>>>
>>>> Meanwhile, while tail calling to a freplace prog, it is required to
>>>> reset x7 register to prevent re-initializing tail_call_cnt at freplace
>>>> prog's prologue.
>>>>
>>>> Fixes: 1c123c567fb1 ("bpf: Resolve fext program type when checking
>>>> map compatibility")
>>>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>>>> ---
>>>>    arch/arm64/net/bpf_jit_comp.c | 44 +++++++++++++++++++++++++++++++----
>>>>    1 file changed, 39 insertions(+), 5 deletions(-)
>>>>
>>> Hi Puranjay and Kuohai,
>>>
>>> As it's not recommended to introduce arch_bpf_run(), this is my approach
>>> to fix the niche case on arm64.
>>>
>>> Do you have any better idea to fix it?
>>>
>>
>> IIUC, the recommended appraoch is to teach verifier to reject the
>> freplace + tailcall combination. If this combiation is allowed, we
>> will face more than just this issue. For example, what happens if
>> a freplace prog is attached to tail callee? The freplace prog is not
>> reachable through the tail call, right?
>>
> 
> It's to reject the freplace + tailcall combination partially, see "bpf,
> x64: Fix tailcall infinite loop caused by freplace". (Oh, I should
> separate the rejection to a standalone patch.)
> It rejects the case that freplace prog has tailcall and its attach
> target has no tailcall.
> 
> As for your example, it depends on:
> 
>                  freplace       target    reject?
> Has tailcall?     YES            NO        YES
> Has tailcall?     YES            YES       NO
> Has tailcall?     NO             YES       NO
> Has tailcall?     NO             YES       NO
> 
> Then, freplace prog can be tail callee always. I haven't seen any bad
> case when freplace prog is tail callee.
>

Here is a concrete case. prog1 tail calls prog2, and prog2_new is
attached to prog2 via freplace.

SEC("tc")
int prog1(struct __sk_buff *skb)
{
         bpf_tail_call_static(skb, &progs, 0); // tail call prog2
         return 0;
}

SEC("tc")
int prog2(struct __sk_buff *skb)
{
         return 0;
}

SEC("freplace")
int prog2_new(struct __sk_buff *skb) // target is prog2
{
         return 0;
}

In this case, prog2_new is not reachable, since the tail call
target in prog2 is start address of prog2  + TAIL_CALL_OFFSET,
which locates behind freplace/fentry callsite of prog2.

> I'm not planning to disable freplace + tailcall combination totally,
> because I use this combination in an in-house XDP project of my company.
> 
> Thanks,
> Leon
Leon Hwang Sept. 9, 2024, 2:42 p.m. UTC | #5
On 9/9/24 20:08, Xu Kuohai wrote:
> On 9/9/2024 6:38 PM, Leon Hwang wrote:
>>
>>
>> On 9/9/24 17:02, Xu Kuohai wrote:
>>> On 9/8/2024 9:01 PM, Leon Hwang wrote:
>>>>
>>>>
>>>> On 1/9/24 21:38, Leon Hwang wrote:
>>>>> Like "bpf, x64: Fix tailcall infinite loop caused by freplace", the
>>>>> same
>>>>> issue happens on arm64, too.
>>>>>

[...]

>>>>>
>>>> Hi Puranjay and Kuohai,
>>>>
>>>> As it's not recommended to introduce arch_bpf_run(), this is my
>>>> approach
>>>> to fix the niche case on arm64.
>>>>
>>>> Do you have any better idea to fix it?
>>>>
>>>
>>> IIUC, the recommended appraoch is to teach verifier to reject the
>>> freplace + tailcall combination. If this combiation is allowed, we
>>> will face more than just this issue. For example, what happens if
>>> a freplace prog is attached to tail callee? The freplace prog is not
>>> reachable through the tail call, right?
>>>
>>
>> It's to reject the freplace + tailcall combination partially, see "bpf,
>> x64: Fix tailcall infinite loop caused by freplace". (Oh, I should
>> separate the rejection to a standalone patch.)
>> It rejects the case that freplace prog has tailcall and its attach
>> target has no tailcall.
>>
>> As for your example, it depends on:
>>
>>                  freplace       target    reject?
>> Has tailcall?     YES            NO        YES
>> Has tailcall?     YES            YES       NO
>> Has tailcall?     NO             YES       NO
>> Has tailcall?     NO             YES       NO
>>
>> Then, freplace prog can be tail callee always. I haven't seen any bad
>> case when freplace prog is tail callee.
>>
> 
> Here is a concrete case. prog1 tail calls prog2, and prog2_new is
> attached to prog2 via freplace.
> 
> SEC("tc")
> int prog1(struct __sk_buff *skb)
> {
>         bpf_tail_call_static(skb, &progs, 0); // tail call prog2
>         return 0;
> }
> 
> SEC("tc")
> int prog2(struct __sk_buff *skb)
> {
>         return 0;
> }
> 
> SEC("freplace")
> int prog2_new(struct __sk_buff *skb) // target is prog2
> {
>         return 0;
> }
> 
> In this case, prog2_new is not reachable, since the tail call
> target in prog2 is start address of prog2  + TAIL_CALL_OFFSET,
> which locates behind freplace/fentry callsite of prog2.
> 

This is an abnormal use case. We can do nothing with it, e.g. we're
unable to notify user that prog2_new is not reachable for this case.

Thanks,
Leon
Alexei Starovoitov Sept. 13, 2024, 5:47 p.m. UTC | #6
On Mon, Sep 9, 2024 at 7:42 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
>
>
> On 9/9/24 20:08, Xu Kuohai wrote:
> > On 9/9/2024 6:38 PM, Leon Hwang wrote:
> >>
> >>
> >> On 9/9/24 17:02, Xu Kuohai wrote:
> >>> On 9/8/2024 9:01 PM, Leon Hwang wrote:
> >>>>
> >>>>
> >>>> On 1/9/24 21:38, Leon Hwang wrote:
> >>>>> Like "bpf, x64: Fix tailcall infinite loop caused by freplace", the
> >>>>> same
> >>>>> issue happens on arm64, too.
> >>>>>
>
> [...]
>
> >>>>>
> >>>> Hi Puranjay and Kuohai,
> >>>>
> >>>> As it's not recommended to introduce arch_bpf_run(), this is my
> >>>> approach
> >>>> to fix the niche case on arm64.
> >>>>
> >>>> Do you have any better idea to fix it?
> >>>>
> >>>
> >>> IIUC, the recommended appraoch is to teach verifier to reject the
> >>> freplace + tailcall combination. If this combiation is allowed, we
> >>> will face more than just this issue. For example, what happens if
> >>> a freplace prog is attached to tail callee? The freplace prog is not
> >>> reachable through the tail call, right?
> >>>
> >>
> >> It's to reject the freplace + tailcall combination partially, see "bpf,
> >> x64: Fix tailcall infinite loop caused by freplace". (Oh, I should
> >> separate the rejection to a standalone patch.)
> >> It rejects the case that freplace prog has tailcall and its attach
> >> target has no tailcall.
> >>
> >> As for your example, it depends on:
> >>
> >>                  freplace       target    reject?
> >> Has tailcall?     YES            NO        YES
> >> Has tailcall?     YES            YES       NO
> >> Has tailcall?     NO             YES       NO
> >> Has tailcall?     NO             YES       NO
> >>
> >> Then, freplace prog can be tail callee always. I haven't seen any bad
> >> case when freplace prog is tail callee.
> >>
> >
> > Here is a concrete case. prog1 tail calls prog2, and prog2_new is
> > attached to prog2 via freplace.
> >
> > SEC("tc")
> > int prog1(struct __sk_buff *skb)
> > {
> >         bpf_tail_call_static(skb, &progs, 0); // tail call prog2
> >         return 0;
> > }
> >
> > SEC("tc")
> > int prog2(struct __sk_buff *skb)
> > {
> >         return 0;
> > }
> >
> > SEC("freplace")
> > int prog2_new(struct __sk_buff *skb) // target is prog2
> > {
> >         return 0;
> > }
> >
> > In this case, prog2_new is not reachable, since the tail call
> > target in prog2 is start address of prog2  + TAIL_CALL_OFFSET,
> > which locates behind freplace/fentry callsite of prog2.
> >
>
> This is an abnormal use case. We can do nothing with it, e.g. we're
> unable to notify user that prog2_new is not reachable for this case.

Since it doesn't behave as the user would expect, I think, it's better
to disallow such combinations in the verifier.
Either freplace is trying to patch a prog that is already in prog_array
then the load of freplace prog can report a meaningful error into the
verifier log or
already freplaced prog is being added to prog array.
I think in this case main prog tail calling into this freplace prog
will actually succeed, but it's better to reject sys_bpf update
command in bpf_fd_array_map_update_elem(),
since being-freplaced prog is not in prog_array and won't be called,
but freplace prog in prog array can be called which is inconsistent.
freplace prog should act and be called just like target being-freplaced prog.

I don't think this will break any actual use cases where freplace and
tail call are used together.
Xu Kuohai Sept. 14, 2024, 9:14 a.m. UTC | #7
On 9/14/2024 1:47 AM, Alexei Starovoitov wrote:
> On Mon, Sep 9, 2024 at 7:42 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>>
>>
>> On 9/9/24 20:08, Xu Kuohai wrote:
>>> On 9/9/2024 6:38 PM, Leon Hwang wrote:
>>>>
>>>>
>>>> On 9/9/24 17:02, Xu Kuohai wrote:
>>>>> On 9/8/2024 9:01 PM, Leon Hwang wrote:
>>>>>>
>>>>>>
>>>>>> On 1/9/24 21:38, Leon Hwang wrote:
>>>>>>> Like "bpf, x64: Fix tailcall infinite loop caused by freplace", the
>>>>>>> same
>>>>>>> issue happens on arm64, too.
>>>>>>>
>>
>> [...]
>>
>>>>>>>
>>>>>> Hi Puranjay and Kuohai,
>>>>>>
>>>>>> As it's not recommended to introduce arch_bpf_run(), this is my
>>>>>> approach
>>>>>> to fix the niche case on arm64.
>>>>>>
>>>>>> Do you have any better idea to fix it?
>>>>>>
>>>>>
>>>>> IIUC, the recommended appraoch is to teach verifier to reject the
>>>>> freplace + tailcall combination. If this combiation is allowed, we
>>>>> will face more than just this issue. For example, what happens if
>>>>> a freplace prog is attached to tail callee? The freplace prog is not
>>>>> reachable through the tail call, right?
>>>>>
>>>>
>>>> It's to reject the freplace + tailcall combination partially, see "bpf,
>>>> x64: Fix tailcall infinite loop caused by freplace". (Oh, I should
>>>> separate the rejection to a standalone patch.)
>>>> It rejects the case that freplace prog has tailcall and its attach
>>>> target has no tailcall.
>>>>
>>>> As for your example, it depends on:
>>>>
>>>>                   freplace       target    reject?
>>>> Has tailcall?     YES            NO        YES
>>>> Has tailcall?     YES            YES       NO
>>>> Has tailcall?     NO             YES       NO
>>>> Has tailcall?     NO             YES       NO
>>>>
>>>> Then, freplace prog can be tail callee always. I haven't seen any bad
>>>> case when freplace prog is tail callee.
>>>>
>>>
>>> Here is a concrete case. prog1 tail calls prog2, and prog2_new is
>>> attached to prog2 via freplace.
>>>
>>> SEC("tc")
>>> int prog1(struct __sk_buff *skb)
>>> {
>>>          bpf_tail_call_static(skb, &progs, 0); // tail call prog2
>>>          return 0;
>>> }
>>>
>>> SEC("tc")
>>> int prog2(struct __sk_buff *skb)
>>> {
>>>          return 0;
>>> }
>>>
>>> SEC("freplace")
>>> int prog2_new(struct __sk_buff *skb) // target is prog2
>>> {
>>>          return 0;
>>> }
>>>
>>> In this case, prog2_new is not reachable, since the tail call
>>> target in prog2 is start address of prog2  + TAIL_CALL_OFFSET,
>>> which locates behind freplace/fentry callsite of prog2.
>>>
>>
>> This is an abnormal use case. We can do nothing with it, e.g. we're
>> unable to notify user that prog2_new is not reachable for this case.
> 
> Since it doesn't behave as the user would expect, I think, it's better
> to disallow such combinations in the verifier.
> Either freplace is trying to patch a prog that is already in prog_array
> then the load of freplace prog can report a meaningful error into the
> verifier log or
> already freplaced prog is being added to prog array.
> I think in this case main prog tail calling into this freplace prog
> will actually succeed, but it's better to reject sys_bpf update
> command in bpf_fd_array_map_update_elem(),
> since being-freplaced prog is not in prog_array and won't be called,
> but freplace prog in prog array can be called which is inconsistent.
> freplace prog should act and be called just like target being-freplaced prog.
> 
> I don't think this will break any actual use cases where freplace and
> tail call are used together.

+1, we should not ignore it silently. If the freplace + tailcall
combination can not be disallowed completely, we should disallow
this case separately (or maybe we could find an acceptable way to
make it work as expected?) . Let me give it a try.
diff mbox series

Patch

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 8aa32cb140b9e..cdc12dd83c4be 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -277,6 +277,7 @@  static bool is_lsi_offset(int offset, int scale)
 /* generated main prog prologue:
  *      bti c // if CONFIG_ARM64_BTI_KERNEL
  *      mov x9, lr
+ *      mov x7, 1 // if not-freplace main prog
  *      nop  // POKE_OFFSET
  *      paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL
  *      stp x29, lr, [sp, #-16]!
@@ -288,15 +289,19 @@  static bool is_lsi_offset(int offset, int scale)
  */
 static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx)
 {
+	const bool is_ext = ctx->prog->type == BPF_PROG_TYPE_EXT;
 	const bool is_main_prog = !bpf_is_subprog(ctx->prog);
 	const u8 ptr = bpf2a64[TCCNT_PTR];
 
-	if (is_main_prog) {
+	if (is_main_prog && !is_ext) {
 		/* Initialize tail_call_cnt. */
 		emit(A64_PUSH(A64_ZR, ptr, A64_SP), ctx);
 		emit(A64_MOV(1, ptr, A64_SP), ctx);
-	} else
+	} else {
+		/* Keep the same insn layout for freplace main prog. */
 		emit(A64_PUSH(ptr, ptr, A64_SP), ctx);
+		emit(A64_NOP, ctx);
+	}
 }
 
 static void find_used_callee_regs(struct jit_ctx *ctx)
@@ -416,16 +421,20 @@  static void pop_callee_regs(struct jit_ctx *ctx)
 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
 
 /* Offset of nop instruction in bpf prog entry to be poked */
-#define POKE_OFFSET (BTI_INSNS + 1)
+#define POKE_OFFSET (BTI_INSNS + 2)
 
 /* Tail call offset to jump into */
-#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 4)
+#define PROLOGUE_OFFSET (BTI_INSNS + 3 + PAC_INSNS + 4)
 
 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 {
 	const struct bpf_prog *prog = ctx->prog;
+	const bool is_ext = prog->type == BPF_PROG_TYPE_EXT;
 	const bool is_main_prog = !bpf_is_subprog(prog);
+	const u8 r0 = bpf2a64[BPF_REG_0];
 	const u8 fp = bpf2a64[BPF_REG_FP];
+	const u8 ptr = bpf2a64[TCCNT_PTR];
+	const u8 tmp = bpf2a64[TMP_REG_1];
 	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
 	const int idx0 = ctx->idx;
 	int cur_offset;
@@ -462,6 +471,10 @@  static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 	emit_bti(A64_BTI_JC, ctx);
 
 	emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
+	if (!is_ext)
+		emit(A64_MOVZ(1, r0, is_main_prog, 0), ctx);
+	else
+		emit(A64_NOP, ctx);
 	emit(A64_NOP, ctx);
 
 	if (!prog->aux->exception_cb) {
@@ -485,6 +498,18 @@  static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 			/* BTI landing pad for the tail call, done with a BR */
 			emit_bti(A64_BTI_J, ctx);
 		}
+		/* If freplace's target prog is main prog, it has to make x26 as
+		 * tail_call_cnt_ptr, and then initialize tail_call_cnt via the
+		 * tail_call_cnt_ptr.
+		 */
+		if (is_main_prog && is_ext) {
+			emit(A64_MOVZ(1, tmp, 1, 0), ctx);
+			emit(A64_CMP(1, r0, tmp), ctx);
+			emit(A64_B_(A64_COND_NE, 4), ctx);
+			emit(A64_MOV(1, ptr, A64_SP), ctx);
+			emit(A64_MOVZ(1, r0, 0, 0), ctx);
+			emit(A64_STR64I(r0, ptr, 0), ctx);
+		}
 		push_callee_regs(ctx);
 	} else {
 		/*
@@ -521,6 +546,7 @@  static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 
 static int emit_bpf_tail_call(struct jit_ctx *ctx)
 {
+	const u8 r0 = bpf2a64[BPF_REG_0];
 	/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
 	const u8 r2 = bpf2a64[BPF_REG_2];
 	const u8 r3 = bpf2a64[BPF_REG_3];
@@ -579,6 +605,12 @@  static int emit_bpf_tail_call(struct jit_ctx *ctx)
 
 	pop_callee_regs(ctx);
 
+	/* When freplace prog tail calls freplace prog, setting r0 as 0 is to
+	 * prevent re-initializing tail_call_cnt at the prologue of target
+	 * freplace prog.
+	 */
+	emit(A64_MOVZ(1, r0, 0, 0), ctx);
+
 	/* goto *(prog->bpf_func + prologue_offset); */
 	off = offsetof(struct bpf_prog, bpf_func);
 	emit_a64_mov_i64(tmp, off, ctx);
@@ -2189,9 +2221,10 @@  static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
 		emit(A64_RET(A64_R(10)), ctx);
 		/* store return value */
 		emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
-		/* reserve a nop for bpf_tramp_image_put */
+		/* reserve two nops for bpf_tramp_image_put */
 		im->ip_after_call = ctx->ro_image + ctx->idx;
 		emit(A64_NOP, ctx);
+		emit(A64_NOP, ctx);
 	}
 
 	/* update the branches saved in invoke_bpf_mod_ret with cbnz */
@@ -2474,6 +2507,7 @@  int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
 		/* skip to the nop instruction in bpf prog entry:
 		 * bti c // if BTI enabled
 		 * mov x9, x30
+		 * mov x7, 1 // if not-freplace main prog
 		 * nop
 		 */
 		ip = image + POKE_OFFSET * AARCH64_INSN_SIZE;