diff mbox series

[bpf-next] selftests/bpf: Make sure libbpf doesn't enforce the signature of a func pointer.

Message ID 20240401223058.1503400-1-thinker.li@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [bpf-next] selftests/bpf: Make sure libbpf doesn't enforce the signature of a func pointer. | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 11 maintainers not CCed: john.fastabend@gmail.com sdf@google.com daniel@iogearbox.net kpsingh@kernel.org yonghong.song@linux.dev shuah@kernel.org linux-kselftest@vger.kernel.org mykolal@fb.com haoluo@google.com jolsa@kernel.org eddyz87@gmail.com
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 51 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-15 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc

Commit Message

Kui-Feng Lee April 1, 2024, 10:30 p.m. UTC
The verifier in the kernel checks the signatures of struct_ops
operators. Libbpf should not verify it in order to allow flexibility in
loading different implementations of an operator with different signatures
to try to comply with the kernel, even if the signature defined in the BPF
programs does not match with the implementations and the kernel.

This feature enables user space applications to manage the variations
between different versions of the kernel by attempting various
implementations of an operator.

This is a follow-up of the commit c911fc61a7ce ("libbpf: Skip zeroed or
null fields if not found in the kernel type.")

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
 .../bpf/prog_tests/test_struct_ops_module.c   | 24 +++++++++++++++++++
 .../selftests/bpf/progs/struct_ops_module.c   | 13 ++++++++++
 2 files changed, 37 insertions(+)

Comments

John Fastabend April 2, 2024, 1:43 a.m. UTC | #1
Kui-Feng Lee wrote:
> The verifier in the kernel checks the signatures of struct_ops
> operators. Libbpf should not verify it in order to allow flexibility in
> loading different implementations of an operator with different signatures
> to try to comply with the kernel, even if the signature defined in the BPF
> programs does not match with the implementations and the kernel.
> 
> This feature enables user space applications to manage the variations
> between different versions of the kernel by attempting various
> implementations of an operator.

What is the utility of this? I'm missing what difference it would be
if libbpf rejected vs kernel rejecting it? For backwards compat the
kernel will fail or libbpf might throw an error and user will have to
fixup signature regardless right? Why not get the error as early as
possible.

> 
> This is a follow-up of the commit c911fc61a7ce ("libbpf: Skip zeroed or
> null fields if not found in the kernel type.")
> 
> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
> ---
>  .../bpf/prog_tests/test_struct_ops_module.c   | 24 +++++++++++++++++++
>  .../selftests/bpf/progs/struct_ops_module.c   | 13 ++++++++++
>  2 files changed, 37 insertions(+)
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
> index 098776d00ab4..7cf2b9ddd3e1 100644
> --- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
> +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
> @@ -138,11 +138,35 @@ static void test_struct_ops_not_zeroed(void)
>  	struct_ops_module__destroy(skel);
>  }
>  
> +/* The signature of an implementation might not match the signature of the
> + * function pointer prototype defined in the BPF program. This mismatch
> + * should be allowed as long as the behavior of the operator program
> + * adheres to the signature in the kernel. Libbpf should not enforce the
> + * signature; rather, let the kernel verifier handle the enforcement.
> + */
> +static void test_struct_ops_incompatible(void)
> +{
> +	struct struct_ops_module *skel;
> +	struct bpf_link *link;
> +
> +	skel = struct_ops_module__open_and_load();
> +	if (!ASSERT_OK_PTR(skel, "open_and_load"))
> +		return;
> +
> +	link = bpf_map__attach_struct_ops(skel->maps.testmod_incompatible);
> +	if (ASSERT_OK_PTR(link, "attach_struct_ops"))
> +		bpf_link__destroy(link);
> +
> +	struct_ops_module__destroy(skel);
> +}
> +
>  void serial_test_struct_ops_module(void)
>  {
>  	if (test__start_subtest("test_struct_ops_load"))
>  		test_struct_ops_load();
>  	if (test__start_subtest("test_struct_ops_not_zeroed"))
>  		test_struct_ops_not_zeroed();
> +	if (test__start_subtest("test_struct_ops_incompatible"))
> +		test_struct_ops_incompatible();
>  }
>  
> diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c
> index 86e1e50c5531..63b065dae002 100644
> --- a/tools/testing/selftests/bpf/progs/struct_ops_module.c
> +++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c
> @@ -68,3 +68,16 @@ struct bpf_testmod_ops___zeroed testmod_zeroed = {
>  	.test_1 = (void *)test_1,
>  	.test_2 = (void *)test_2_v2,
>  };
> +
> +struct bpf_testmod_ops___incompatible {
> +	int (*test_1)(void);
> +	void (*test_2)(int *a);
> +	int data;
> +};
> +
> +SEC(".struct_ops.link")
> +struct bpf_testmod_ops___incompatible testmod_incompatible = {
> +	.test_1 = (void *)test_1,
> +	.test_2 = (void *)test_2,
> +	.data = 3,
> +};
> -- 
> 2.34.1
> 
>
Kui-Feng Lee April 2, 2024, 5 p.m. UTC | #2
On 4/1/24 18:43, John Fastabend wrote:
> Kui-Feng Lee wrote:
>> The verifier in the kernel checks the signatures of struct_ops
>> operators. Libbpf should not verify it in order to allow flexibility in
>> loading different implementations of an operator with different signatures
>> to try to comply with the kernel, even if the signature defined in the BPF
>> programs does not match with the implementations and the kernel.
>>
>> This feature enables user space applications to manage the variations
>> between different versions of the kernel by attempting various
>> implementations of an operator.
> 
> What is the utility of this? I'm missing what difference it would be
> if libbpf rejected vs kernel rejecting it? For backwards compat the
> kernel will fail or libbpf might throw an error and user will have to
> fixup signature regardless right? Why not get the error as early as
> possible.

The check described here is that libbpf compares BTF types of functions
and function pointers in struct_ops types in BPF programs, which may
differ from kernel definitions.

A scenario here is a struct_ops type that includes an operator op_A with
different versions depending on the kernel. All other fields in the
struct_ops type have the same types. The application has only one
definition for this struct_ops type, but the implementation of op_A is
done separately for each version.

The application can try variations by assigning implementations to the
op_A field until one is accepted by the kernel if libbpf doesn’t enforce
signatures. Otherwise, the application has to define this struct_ops
type for each variant if libbpf enforces signatures.

Does that make sense to you?

> 
>>
>> This is a follow-up of the commit c911fc61a7ce ("libbpf: Skip zeroed or
>> null fields if not found in the kernel type.")
>>
>> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
>> ---
>>   .../bpf/prog_tests/test_struct_ops_module.c   | 24 +++++++++++++++++++
>>   .../selftests/bpf/progs/struct_ops_module.c   | 13 ++++++++++
>>   2 files changed, 37 insertions(+)
>>
>> diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
>> index 098776d00ab4..7cf2b9ddd3e1 100644
>> --- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
>> +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
>> @@ -138,11 +138,35 @@ static void test_struct_ops_not_zeroed(void)
>>   	struct_ops_module__destroy(skel);
>>   }
>>   
>> +/* The signature of an implementation might not match the signature of the
>> + * function pointer prototype defined in the BPF program. This mismatch
>> + * should be allowed as long as the behavior of the operator program
>> + * adheres to the signature in the kernel. Libbpf should not enforce the
>> + * signature; rather, let the kernel verifier handle the enforcement.
>> + */
>> +static void test_struct_ops_incompatible(void)
>> +{
>> +	struct struct_ops_module *skel;
>> +	struct bpf_link *link;
>> +
>> +	skel = struct_ops_module__open_and_load();
>> +	if (!ASSERT_OK_PTR(skel, "open_and_load"))
>> +		return;
>> +
>> +	link = bpf_map__attach_struct_ops(skel->maps.testmod_incompatible);
>> +	if (ASSERT_OK_PTR(link, "attach_struct_ops"))
>> +		bpf_link__destroy(link);
>> +
>> +	struct_ops_module__destroy(skel);
>> +}
>> +
>>   void serial_test_struct_ops_module(void)
>>   {
>>   	if (test__start_subtest("test_struct_ops_load"))
>>   		test_struct_ops_load();
>>   	if (test__start_subtest("test_struct_ops_not_zeroed"))
>>   		test_struct_ops_not_zeroed();
>> +	if (test__start_subtest("test_struct_ops_incompatible"))
>> +		test_struct_ops_incompatible();
>>   }
>>   
>> diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c
>> index 86e1e50c5531..63b065dae002 100644
>> --- a/tools/testing/selftests/bpf/progs/struct_ops_module.c
>> +++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c
>> @@ -68,3 +68,16 @@ struct bpf_testmod_ops___zeroed testmod_zeroed = {
>>   	.test_1 = (void *)test_1,
>>   	.test_2 = (void *)test_2_v2,
>>   };
>> +
>> +struct bpf_testmod_ops___incompatible {
>> +	int (*test_1)(void);
>> +	void (*test_2)(int *a);
>> +	int data;
>> +};
>> +
>> +SEC(".struct_ops.link")
>> +struct bpf_testmod_ops___incompatible testmod_incompatible = {
>> +	.test_1 = (void *)test_1,
>> +	.test_2 = (void *)test_2,
>> +	.data = 3,
>> +};
>> -- 
>> 2.34.1
>>
>>
> 
>
Martin KaFai Lau April 3, 2024, 8:52 p.m. UTC | #3
On 4/2/24 10:00 AM, Kui-Feng Lee wrote:
> 
> 
> 
> On 4/1/24 18:43, John Fastabend wrote:
>> Kui-Feng Lee wrote:
>>> The verifier in the kernel checks the signatures of struct_ops
>>> operators. Libbpf should not verify it in order to allow flexibility in

This description probably is not accurate. iirc, the verifier does not check the 
function signature either. The verifier rejects only when the struct_ops prog 
tries to access something invalid. e.g. reading a function argument that does 
not exist in the running kernel.

>>> loading different implementations of an operator with different signatures
>>> to try to comply with the kernel, even if the signature defined in the BPF
>>> programs does not match with the implementations and the kernel.

>>> This feature enables user space applications to manage the variations
>>> between different versions of the kernel by attempting various
>>> implementations of an operator.
>>
>> What is the utility of this? I'm missing what difference it would be
>> if libbpf rejected vs kernel rejecting it? For backwards compat the
>> kernel will fail or libbpf might throw an error and user will have to
>> fixup signature regardless right? Why not get the error as early as
>> possible.
> 
> The check described here is that libbpf compares BTF types of functions
> and function pointers in struct_ops types in BPF programs, which may
> differ from kernel definitions.
> 
> A scenario here is a struct_ops type that includes an operator op_A with
> different versions depending on the kernel. All other fields in the
> struct_ops type have the same types. The application has only one
> definition for this struct_ops type, but the implementation of op_A is
> done separately for each version.
> 
> The application can try variations by assigning implementations to the
> op_A field until one is accepted by the kernel if libbpf doesn’t enforce

It probably would be clearer if the test actually does the retry. e.g. Try to 
load a struct_ops prog which reads an extra arg that is not supported by the 
running kernel and gets rejected by verifier. Then assigns an older struct_ops 
prog to the skel->struct_ops...->fn and loads successfully by the verifier.
Andrii Nakryiko April 3, 2024, 9:15 p.m. UTC | #4
On Wed, Apr 3, 2024 at 1:52 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 4/2/24 10:00 AM, Kui-Feng Lee wrote:
> >
> >
> >
> > On 4/1/24 18:43, John Fastabend wrote:
> >> Kui-Feng Lee wrote:
> >>> The verifier in the kernel checks the signatures of struct_ops
> >>> operators. Libbpf should not verify it in order to allow flexibility in
>
> This description probably is not accurate. iirc, the verifier does not check the
> function signature either. The verifier rejects only when the struct_ops prog
> tries to access something invalid. e.g. reading a function argument that does
> not exist in the running kernel.
>
> >>> loading different implementations of an operator with different signatures
> >>> to try to comply with the kernel, even if the signature defined in the BPF
> >>> programs does not match with the implementations and the kernel.
>
> >>> This feature enables user space applications to manage the variations
> >>> between different versions of the kernel by attempting various
> >>> implementations of an operator.
> >>
> >> What is the utility of this? I'm missing what difference it would be
> >> if libbpf rejected vs kernel rejecting it? For backwards compat the
> >> kernel will fail or libbpf might throw an error and user will have to
> >> fixup signature regardless right? Why not get the error as early as
> >> possible.
> >
> > The check described here is that libbpf compares BTF types of functions
> > and function pointers in struct_ops types in BPF programs, which may
> > differ from kernel definitions.
> >
> > A scenario here is a struct_ops type that includes an operator op_A with
> > different versions depending on the kernel. All other fields in the
> > struct_ops type have the same types. The application has only one
> > definition for this struct_ops type, but the implementation of op_A is
> > done separately for each version.
> >
> > The application can try variations by assigning implementations to the
> > op_A field until one is accepted by the kernel if libbpf doesn’t enforce
>
> It probably would be clearer if the test actually does the retry. e.g. Try to
> load a struct_ops prog which reads an extra arg that is not supported by the
> running kernel and gets rejected by verifier. Then assigns an older struct_ops
> prog to the skel->struct_ops...->fn and loads successfully by the verifier.
>

This is actually a discouraged practice. In practice in production
user-space logic does feature detection (using BTF or whatever else
necessary) and then decides on specific BPF program implementation. So
I wouldn't overstress this approach (trial-and-error one) in tests,
it's a bad and sloppy practice.
Kui-Feng Lee April 3, 2024, 9:28 p.m. UTC | #5
On 4/3/24 13:52, Martin KaFai Lau wrote:
> On 4/2/24 10:00 AM, Kui-Feng Lee wrote:
>>
>>
>>
>> On 4/1/24 18:43, John Fastabend wrote:
>>> Kui-Feng Lee wrote:
>>>> The verifier in the kernel checks the signatures of struct_ops
>>>> operators. Libbpf should not verify it in order to allow flexibility in
> 
> This description probably is not accurate. iirc, the verifier does not 
> check the function signature either. The verifier rejects only when the 
> struct_ops prog tries to access something invalid. e.g. reading a 
> function argument that does not exist in the running kernel.

Yes, kernel checks the behavior of programs. I will change the description.

> 
>>>> loading different implementations of an operator with different 
>>>> signatures
>>>> to try to comply with the kernel, even if the signature defined in 
>>>> the BPF
>>>> programs does not match with the implementations and the kernel.
> 
>>>> This feature enables user space applications to manage the variations
>>>> between different versions of the kernel by attempting various
>>>> implementations of an operator.
>>>
>>> What is the utility of this? I'm missing what difference it would be
>>> if libbpf rejected vs kernel rejecting it? For backwards compat the
>>> kernel will fail or libbpf might throw an error and user will have to
>>> fixup signature regardless right? Why not get the error as early as
>>> possible.
>>
>> The check described here is that libbpf compares BTF types of functions
>> and function pointers in struct_ops types in BPF programs, which may
>> differ from kernel definitions.
>>
>> A scenario here is a struct_ops type that includes an operator op_A with
>> different versions depending on the kernel. All other fields in the
>> struct_ops type have the same types. The application has only one
>> definition for this struct_ops type, but the implementation of op_A is
>> done separately for each version.
>>
>> The application can try variations by assigning implementations to the
>> op_A field until one is accepted by the kernel if libbpf doesn’t enforce
> 
> It probably would be clearer if the test actually does the retry. e.g. 
> Try to load a struct_ops prog which reads an extra arg that is not 
> supported by the running kernel and gets rejected by verifier. Then 
> assigns an older struct_ops prog to the skel->struct_ops...->fn and 
> loads successfully by the verifier.
>
Kui-Feng Lee April 3, 2024, 9:34 p.m. UTC | #6
On 4/3/24 14:15, Andrii Nakryiko wrote:
> On Wed, Apr 3, 2024 at 1:52 PM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>>
>> On 4/2/24 10:00 AM, Kui-Feng Lee wrote:
>>>
>>>
>>>
>>> On 4/1/24 18:43, John Fastabend wrote:
>>>> Kui-Feng Lee wrote:
>>>>> The verifier in the kernel checks the signatures of struct_ops
>>>>> operators. Libbpf should not verify it in order to allow flexibility in
>>
>> This description probably is not accurate. iirc, the verifier does not check the
>> function signature either. The verifier rejects only when the struct_ops prog
>> tries to access something invalid. e.g. reading a function argument that does
>> not exist in the running kernel.
>>
>>>>> loading different implementations of an operator with different signatures
>>>>> to try to comply with the kernel, even if the signature defined in the BPF
>>>>> programs does not match with the implementations and the kernel.
>>
>>>>> This feature enables user space applications to manage the variations
>>>>> between different versions of the kernel by attempting various
>>>>> implementations of an operator.
>>>>
>>>> What is the utility of this? I'm missing what difference it would be
>>>> if libbpf rejected vs kernel rejecting it? For backwards compat the
>>>> kernel will fail or libbpf might throw an error and user will have to
>>>> fixup signature regardless right? Why not get the error as early as
>>>> possible.
>>>
>>> The check described here is that libbpf compares BTF types of functions
>>> and function pointers in struct_ops types in BPF programs, which may
>>> differ from kernel definitions.
>>>
>>> A scenario here is a struct_ops type that includes an operator op_A with
>>> different versions depending on the kernel. All other fields in the
>>> struct_ops type have the same types. The application has only one
>>> definition for this struct_ops type, but the implementation of op_A is
>>> done separately for each version.
>>>
>>> The application can try variations by assigning implementations to the
>>> op_A field until one is accepted by the kernel if libbpf doesn’t enforce
>>
>> It probably would be clearer if the test actually does the retry. e.g. Try to
>> load a struct_ops prog which reads an extra arg that is not supported by the
>> running kernel and gets rejected by verifier. Then assigns an older struct_ops
>> prog to the skel->struct_ops...->fn and loads successfully by the verifier.
>>
> 
> This is actually a discouraged practice. In practice in production
> user-space logic does feature detection (using BTF or whatever else
> necessary) and then decides on specific BPF program implementation. So
> I wouldn't overstress this approach (trial-and-error one) in tests,
> it's a bad and sloppy practice.

It makes sense for me. I will rephrase this paragraph by using "feature
detection" to replace "Try variations...".
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
index 098776d00ab4..7cf2b9ddd3e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
@@ -138,11 +138,35 @@  static void test_struct_ops_not_zeroed(void)
 	struct_ops_module__destroy(skel);
 }
 
+/* The signature of an implementation might not match the signature of the
+ * function pointer prototype defined in the BPF program. This mismatch
+ * should be allowed as long as the behavior of the operator program
+ * adheres to the signature in the kernel. Libbpf should not enforce the
+ * signature; rather, let the kernel verifier handle the enforcement.
+ */
+static void test_struct_ops_incompatible(void)
+{
+	struct struct_ops_module *skel;
+	struct bpf_link *link;
+
+	skel = struct_ops_module__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	link = bpf_map__attach_struct_ops(skel->maps.testmod_incompatible);
+	if (ASSERT_OK_PTR(link, "attach_struct_ops"))
+		bpf_link__destroy(link);
+
+	struct_ops_module__destroy(skel);
+}
+
 void serial_test_struct_ops_module(void)
 {
 	if (test__start_subtest("test_struct_ops_load"))
 		test_struct_ops_load();
 	if (test__start_subtest("test_struct_ops_not_zeroed"))
 		test_struct_ops_not_zeroed();
+	if (test__start_subtest("test_struct_ops_incompatible"))
+		test_struct_ops_incompatible();
 }
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c
index 86e1e50c5531..63b065dae002 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_module.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c
@@ -68,3 +68,16 @@  struct bpf_testmod_ops___zeroed testmod_zeroed = {
 	.test_1 = (void *)test_1,
 	.test_2 = (void *)test_2_v2,
 };
+
+struct bpf_testmod_ops___incompatible {
+	int (*test_1)(void);
+	void (*test_2)(int *a);
+	int data;
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___incompatible testmod_incompatible = {
+	.test_1 = (void *)test_1,
+	.test_2 = (void *)test_2,
+	.data = 3,
+};