diff mbox series

[bpf-next,1/2] libbpf: fix LDX/STX/ST CO-RE relocation size adjustment logic

Message ID 20250207014809.1573841-1-andrii@kernel.org (mailing list archive)
State Accepted
Commit 06096d19ee3897a7e70922580159607fe315da7a
Delegated to: BPF
Headers show
Series [bpf-next,1/2] libbpf: fix LDX/STX/ST CO-RE relocation size adjustment logic | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success Errors and warnings before: 28 (+1) this patch: 28 (+1)
netdev/cc_maintainers warning 9 maintainers not CCed: kpsingh@kernel.org sdf@fomichev.me jolsa@kernel.org yonghong.song@linux.dev song@kernel.org john.fastabend@gmail.com haoluo@google.com eddyz87@gmail.com martin.lau@linux.dev
netdev/build_clang success Errors and warnings before: 2 this patch: 2
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 100 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-12 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-20 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-21 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-50 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-51 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-49 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-48 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-1 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-3 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-6 success Logs for x86_64-gcc / build-release

Commit Message

Andrii Nakryiko Feb. 7, 2025, 1:48 a.m. UTC
Libbpf has a somewhat obscure feature of automatically adjusting the
"size" of LDX/STX/ST instruction (memory store and load instructions),
based on originally recorded access size (u8, u16, u32, or u64) and the
actual size of the field on target kernel. This is meant to facilitate
using BPF CO-RE on 32-bit architectures (pointers are always 64-bit in
BPF, but host kernel's BTF will have it as 32-bit type), as well as
generally supporting safe type changes (unsigned integer type changes
can be transparently "relocated").

One issue that surfaced only now, 5 years after this logic was
implemented, is how this all works when dealing with fields that are
arrays. This isn't all that easy and straightforward to hit (see
selftests that reproduce this condition), but one of sched_ext BPF
programs did hit it with innocent looking loop.

Long story short, libbpf used to calculate entire array size, instead of
making sure to only calculate array's element size. But it's the element
that is loaded by LDX/STX/ST instructions (1, 2, 4, or 8 bytes), so
that's what libbpf should check. This patch adjusts the logic for
arrays and fixed the issue.

Reported-by: Emil Tsalapatis <emil@etsalapatis.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 tools/lib/bpf/relo_core.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

Comments

Eduard Zingerman Feb. 7, 2025, 9:45 p.m. UTC | #1
On Thu, 2025-02-06 at 17:48 -0800, Andrii Nakryiko wrote:
> Libbpf has a somewhat obscure feature of automatically adjusting the
> "size" of LDX/STX/ST instruction (memory store and load instructions),
> based on originally recorded access size (u8, u16, u32, or u64) and the
> actual size of the field on target kernel. This is meant to facilitate
> using BPF CO-RE on 32-bit architectures (pointers are always 64-bit in
> BPF, but host kernel's BTF will have it as 32-bit type), as well as
> generally supporting safe type changes (unsigned integer type changes
> can be transparently "relocated").
> 
> One issue that surfaced only now, 5 years after this logic was
> implemented, is how this all works when dealing with fields that are
> arrays. This isn't all that easy and straightforward to hit (see
> selftests that reproduce this condition), but one of sched_ext BPF
> programs did hit it with innocent looking loop.
> 
> Long story short, libbpf used to calculate entire array size, instead of
> making sure to only calculate array's element size. But it's the element
> that is loaded by LDX/STX/ST instructions (1, 2, 4, or 8 bytes), so
> that's what libbpf should check. This patch adjusts the logic for
> arrays and fixed the issue.
> 
> Reported-by: Emil Tsalapatis <emil@etsalapatis.com>
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---

Do I understand correctly, that for nested arrays relocation size
would be resolved to the innermost element size?
To allow e.g.:

    struct { int a[2][3]; }
    ...
    int *a = __builtin_preserve_access_index(({ in->a; }));
    a[0] = 42;

With a justification that nothing useful could be done with 'int **a'
type when dimensions are not known?
I guess this makes sense.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>?

>  tools/lib/bpf/relo_core.c | 24 ++++++++++++++++++++----
>  1 file changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
> index 7632e9d41827..2b83c98a1137 100644
> --- a/tools/lib/bpf/relo_core.c
> +++ b/tools/lib/bpf/relo_core.c
> @@ -683,7 +683,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
>  {
>  	const struct bpf_core_accessor *acc;
>  	const struct btf_type *t;
> -	__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
> +	__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id, elem_id;
>  	const struct btf_member *m;
>  	const struct btf_type *mt;
>  	bool bitfield;
> @@ -706,8 +706,14 @@ static int bpf_core_calc_field_relo(const char *prog_name,
>  	if (!acc->name) {
>  		if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) {
>  			*val = spec->bit_offset / 8;
> -			/* remember field size for load/store mem size */
> -			sz = btf__resolve_size(spec->btf, acc->type_id);
> +			/* remember field size for load/store mem size;
> +			 * note, for arrays we care about individual element
> +			 * sizes, not the overall array size
> +			 */
> +			t = skip_mods_and_typedefs(spec->btf, acc->type_id, &elem_id);
> +			while (btf_is_array(t))
> +				t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id);
> +			sz = btf__resolve_size(spec->btf, elem_id);

Nit: while trying to figure out what this change is about
     I commented out the above hunk and this did not trigger any test failures.

[...]
Andrii Nakryiko Feb. 10, 2025, 8:05 p.m. UTC | #2
On Fri, Feb 7, 2025 at 1:45 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Thu, 2025-02-06 at 17:48 -0800, Andrii Nakryiko wrote:
> > Libbpf has a somewhat obscure feature of automatically adjusting the
> > "size" of LDX/STX/ST instruction (memory store and load instructions),
> > based on originally recorded access size (u8, u16, u32, or u64) and the
> > actual size of the field on target kernel. This is meant to facilitate
> > using BPF CO-RE on 32-bit architectures (pointers are always 64-bit in
> > BPF, but host kernel's BTF will have it as 32-bit type), as well as
> > generally supporting safe type changes (unsigned integer type changes
> > can be transparently "relocated").
> >
> > One issue that surfaced only now, 5 years after this logic was
> > implemented, is how this all works when dealing with fields that are
> > arrays. This isn't all that easy and straightforward to hit (see
> > selftests that reproduce this condition), but one of sched_ext BPF
> > programs did hit it with innocent looking loop.
> >
> > Long story short, libbpf used to calculate entire array size, instead of
> > making sure to only calculate array's element size. But it's the element
> > that is loaded by LDX/STX/ST instructions (1, 2, 4, or 8 bytes), so
> > that's what libbpf should check. This patch adjusts the logic for
> > arrays and fixed the issue.
> >
> > Reported-by: Emil Tsalapatis <emil@etsalapatis.com>
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
>
> Do I understand correctly, that for nested arrays relocation size
> would be resolved to the innermost element size?
> To allow e.g.:
>
>     struct { int a[2][3]; }
>     ...
>     int *a = __builtin_preserve_access_index(({ in->a; }));
>     a[0] = 42;
>
> With a justification that nothing useful could be done with 'int **a'
> type when dimensions are not known?
> I guess this makes sense.

Known or not, a multi-dimensional array at the lowest level is still
an array of elements, and it is the elements that will be read (up to
u64), so that's why I'm flattening the array and getting to the actual
item.

>
> Acked-by: Eduard Zingerman <eddyz87@gmail.com>?
>
> >  tools/lib/bpf/relo_core.c | 24 ++++++++++++++++++++----
> >  1 file changed, 20 insertions(+), 4 deletions(-)
> >
> > diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
> > index 7632e9d41827..2b83c98a1137 100644
> > --- a/tools/lib/bpf/relo_core.c
> > +++ b/tools/lib/bpf/relo_core.c
> > @@ -683,7 +683,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
> >  {
> >       const struct bpf_core_accessor *acc;
> >       const struct btf_type *t;
> > -     __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
> > +     __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id, elem_id;
> >       const struct btf_member *m;
> >       const struct btf_type *mt;
> >       bool bitfield;
> > @@ -706,8 +706,14 @@ static int bpf_core_calc_field_relo(const char *prog_name,
> >       if (!acc->name) {
> >               if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) {
> >                       *val = spec->bit_offset / 8;
> > -                     /* remember field size for load/store mem size */
> > -                     sz = btf__resolve_size(spec->btf, acc->type_id);
> > +                     /* remember field size for load/store mem size;
> > +                      * note, for arrays we care about individual element
> > +                      * sizes, not the overall array size
> > +                      */
> > +                     t = skip_mods_and_typedefs(spec->btf, acc->type_id, &elem_id);
> > +                     while (btf_is_array(t))
> > +                             t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id);
> > +                     sz = btf__resolve_size(spec->btf, elem_id);
>
> Nit: while trying to figure out what this change is about
>      I commented out the above hunk and this did not trigger any test failures.

I don't remember exactly under which conditions we'll have this
branch, something about array element access. But this whole logic has
to stay in sync with non-array-element CO-RE relocation.

>
> [...]
>
patchwork-bot+netdevbpf@kernel.org Feb. 15, 2025, 4:10 a.m. UTC | #3
Hello:

This series was applied to bpf/bpf-next.git (master)
by Alexei Starovoitov <ast@kernel.org>:

On Thu,  6 Feb 2025 17:48:08 -0800 you wrote:
> Libbpf has a somewhat obscure feature of automatically adjusting the
> "size" of LDX/STX/ST instruction (memory store and load instructions),
> based on originally recorded access size (u8, u16, u32, or u64) and the
> actual size of the field on target kernel. This is meant to facilitate
> using BPF CO-RE on 32-bit architectures (pointers are always 64-bit in
> BPF, but host kernel's BTF will have it as 32-bit type), as well as
> generally supporting safe type changes (unsigned integer type changes
> can be transparently "relocated").
> 
> [...]

Here is the summary with links:
  - [bpf-next,1/2] libbpf: fix LDX/STX/ST CO-RE relocation size adjustment logic
    https://git.kernel.org/bpf/bpf-next/c/06096d19ee38
  - [bpf-next,2/2] selftests/bpf: add test for LDX/STX/ST relocations over array field
    https://git.kernel.org/bpf/bpf-next/c/4eb93fea5919

You are awesome, thank you!
diff mbox series

Patch

diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index 7632e9d41827..2b83c98a1137 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -683,7 +683,7 @@  static int bpf_core_calc_field_relo(const char *prog_name,
 {
 	const struct bpf_core_accessor *acc;
 	const struct btf_type *t;
-	__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
+	__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id, elem_id;
 	const struct btf_member *m;
 	const struct btf_type *mt;
 	bool bitfield;
@@ -706,8 +706,14 @@  static int bpf_core_calc_field_relo(const char *prog_name,
 	if (!acc->name) {
 		if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) {
 			*val = spec->bit_offset / 8;
-			/* remember field size for load/store mem size */
-			sz = btf__resolve_size(spec->btf, acc->type_id);
+			/* remember field size for load/store mem size;
+			 * note, for arrays we care about individual element
+			 * sizes, not the overall array size
+			 */
+			t = skip_mods_and_typedefs(spec->btf, acc->type_id, &elem_id);
+			while (btf_is_array(t))
+				t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id);
+			sz = btf__resolve_size(spec->btf, elem_id);
 			if (sz < 0)
 				return -EINVAL;
 			*field_sz = sz;
@@ -767,7 +773,17 @@  static int bpf_core_calc_field_relo(const char *prog_name,
 	case BPF_CORE_FIELD_BYTE_OFFSET:
 		*val = byte_off;
 		if (!bitfield) {
-			*field_sz = byte_sz;
+			/* remember field size for load/store mem size;
+			 * note, for arrays we care about individual element
+			 * sizes, not the overall array size
+			 */
+			t = skip_mods_and_typedefs(spec->btf, field_type_id, &elem_id);
+			while (btf_is_array(t))
+				t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id);
+			sz = btf__resolve_size(spec->btf, elem_id);
+			if (sz < 0)
+				return -EINVAL;
+			*field_sz = sz;
 			*type_id = field_type_id;
 		}
 		break;