diff mbox series

[bpf,3/4] bpf: Ensure unit_size is matched with slab cache object size

Message ID 20230908133923.2675053-4-houtao@huaweicloud.com (mailing list archive)
State Accepted
Commit c930472552022bd09aab3cd946ba3f243070d5c7
Delegated to: BPF
Headers show
Series Fix the unmatched unit_size of bpf_mem_cache | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1345 this patch: 1345
netdev/cc_maintainers success CCed 12 of 12 maintainers
netdev/build_clang success Errors and warnings before: 1364 this patch: 1364
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1368 this patch: 1368
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 63 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-0 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-5 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-1 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-3 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-4 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-2 success Logs for build for s390x with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-28 success Logs for veristat
bpf/vmtest-bpf-VM_Test-26 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-6 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-8 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-9 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-10 fail Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-12 fail Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-13 fail Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-14 fail Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-16 fail Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-17 fail Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-18 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-19 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-21 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-VM_Test-25 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-11 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-VM_Test-15 fail Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-7 success Logs for test_maps on s390x with gcc

Commit Message

Hou Tao Sept. 8, 2023, 1:39 p.m. UTC
From: Hou Tao <houtao1@huawei.com>

Add extra check in bpf_mem_alloc_init() to ensure the unit_size of
bpf_mem_cache is matched with the object_size of underlying slab cache.
If these two sizes are unmatched, print a warning once and return
-EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and
the potential issue can be prevented.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 kernel/bpf/memalloc.c | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

Comments

Nathan Chancellor Sept. 14, 2023, 6:14 p.m. UTC | #1
Hi Hou,

On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote:
> From: Hou Tao <houtao1@huawei.com>
> 
> Add extra check in bpf_mem_alloc_init() to ensure the unit_size of
> bpf_mem_cache is matched with the object_size of underlying slab cache.
> If these two sizes are unmatched, print a warning once and return
> -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and
> the potential issue can be prevented.
> 
> Suggested-by: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
>  kernel/bpf/memalloc.c | 33 +++++++++++++++++++++++++++++++--
>  1 file changed, 31 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> index 90c1ed8210a2..1c22b90e754a 100644
> --- a/kernel/bpf/memalloc.c
> +++ b/kernel/bpf/memalloc.c
> @@ -486,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
>  	alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);
>  }
>  
> +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
> +{
> +	struct llist_node *first;
> +	unsigned int obj_size;
> +
> +	first = c->free_llist.first;
> +	if (!first)
> +		return 0;
> +
> +	obj_size = ksize(first);
> +	if (obj_size != c->unit_size) {
> +		WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
> +			  idx, obj_size, c->unit_size);
> +		return -EINVAL;
> +	}
> +	return 0;
> +}

I am seeing the warning added by this change as commit c93047255202
("bpf: Ensure unit_size is matched with slab cache object size") when
booting ARCH=riscv defconfig in QEMU. I have seen some discussion on the
mailing list around this, so I apologize if this is a duplicate report
but it sounded like the previously reported instance of this warning was
already resolved by some other changeor supposed to be resolved by [1].
Unfortunately, I tested both current bpf master (currently at
6bd5bcb18f94) with and without that change and I still see the warning
in both cases. The rootfs is available at [2], if it is relevant.

$ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux- mrproper defconfig Image

$ qemu-system-riscv64 \
    -display none \
    -nodefaults \
    -bios default \
    -M virt \
    -append earlycon \
    -kernel arch/riscv/boot/Image \
    -initrd riscv-rootfs.cpio \
    -m 512m \
    -serial mon:stdio
...
[    0.000000] Linux version 6.5.0-12679-gc93047255202 (nathan@dev-arch.thelio-3990X) (riscv64-linux-gcc (GCC) 13.2.0, GNU ld (GNU Binutils) 2.41) #1 SMP Thu Sep 14 10:44:41 MST 2023
...
[    0.433002] ------------[ cut here ]------------
[    0.433128] bpf_mem_cache[0]: unexpected object size 128, expect 96
[    0.433585] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:500 bpf_mem_alloc_init+0x348/0x354
[    0.433810] Modules linked in:
[    0.433928] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-12679-gc93047255202 #1
[    0.434025] Hardware name: riscv-virtio,qemu (DT)
[    0.434105] epc : bpf_mem_alloc_init+0x348/0x354
[    0.434140]  ra : bpf_mem_alloc_init+0x348/0x354
[    0.434163] epc : ffffffff80112572 ra : ffffffff80112572 sp : ff2000000000bd30
[    0.434177]  gp : ffffffff81501588 tp : ff600000018d0000 t0 : ffffffff808cd1a0
[    0.434190]  t1 : 0720072007200720 t2 : 635f6d656d5f6670 s0 : ff2000000000bdd0
[    0.434202]  s1 : ffffffff80e17620 a0 : 0000000000000037 a1 : ffffffff814866b8
[    0.434215]  a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000
[    0.434227]  a5 : 0000000000000000 a6 : 0000000000000047 a7 : 0000000000000046
[    0.434239]  s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000
[    0.434251]  s5 : 0000000000000100 s6 : ffffffff815031f8 s7 : ffffffff8153a610
[    0.434264]  s8 : 0000000000000060 s9 : 0000000000000060 s10: 0000000000000000
[    0.434276]  s11: ff6000001ffe5410 t3 : ff60000001858f00 t4 : ff60000001858f00
[    0.434288]  t5 : ff60000001858000 t6 : ff2000000000bb48
[    0.434299] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
[    0.434394] [<ffffffff80112572>] bpf_mem_alloc_init+0x348/0x354
[    0.434492] [<ffffffff80a0f302>] bpf_global_ma_init+0x1c/0x30
[    0.434516] [<ffffffff8000212c>] do_one_initcall+0x58/0x19c
[    0.434526] [<ffffffff80a0105e>] kernel_init_freeable+0x214/0x27e
[    0.434537] [<ffffffff808db4dc>] kernel_init+0x1e/0x10a
[    0.434548] [<ffffffff80003386>] ret_from_fork+0xa/0x1c
[    0.434618] ---[ end trace 0000000000000000 ]---

[1]: https://lore.kernel.org/20230913135943.3137292-1-houtao@huaweicloud.com/
[2]: https://github.com/ClangBuiltLinux/boot-utils/releases

Cheers,
Nathan

# bad: [98897dc735cf6635f0966f76eb0108354168fb15] Add linux-next specific files for 20230914
# good: [aed8aee11130a954356200afa3f1b8753e8a9482] Merge tag 'pmdomain-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm
git bisect start '98897dc735cf6635f0966f76eb0108354168fb15' 'aed8aee11130a954356200afa3f1b8753e8a9482'
# good: [ea1bbd78a48c8b325583e8c0bc2690850cb51807] bcachefs: Fix assorted checkpatch nits
git bisect good ea1bbd78a48c8b325583e8c0bc2690850cb51807
# bad: [9c4e2139cfa15d769eafd51bf3e051293b106986] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
git bisect bad 9c4e2139cfa15d769eafd51bf3e051293b106986
# bad: [4f07b13481ab390108b015da2bc8f560416e48d2] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git
git bisect bad 4f07b13481ab390108b015da2bc8f560416e48d2
# bad: [bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471] Merge branch 'for-linux-next-fixes' of git://anongit.freedesktop.org/drm/drm-misc
git bisect bad bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471
# bad: [95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
git bisect bad 95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac
# good: [6836d373943afeeeb8e2989c22aaaa51218a83c6] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc.git
git bisect good 6836d373943afeeeb8e2989c22aaaa51218a83c6
# good: [3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9] Merge branch 'main' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
git bisect good 3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9
# bad: [51d56d51d3881addaea2c7242ae859155ae75607] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git
git bisect bad 51d56d51d3881addaea2c7242ae859155ae75607
# bad: [1a49f4195d3498fe458a7f5ff7ec5385da70d92e] bpf: Avoid dummy bpf_offload_netdev in __bpf_prog_dev_bound_init
git bisect bad 1a49f4195d3498fe458a7f5ff7ec5385da70d92e
# bad: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size
git bisect bad c930472552022bd09aab3cd946ba3f243070d5c7
# good: [7182e56411b9a8b76797ed7b6095fc84be76dfb0] selftests/bpf: Add kprobe_multi override test
git bisect good 7182e56411b9a8b76797ed7b6095fc84be76dfb0
# good: [b1d53958b69312e43c118d4093d8f93d3f6f80af] bpf: Don't prefill for unused bpf_mem_cache
git bisect good b1d53958b69312e43c118d4093d8f93d3f6f80af
# first bad commit: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size
Hou Tao Sept. 16, 2023, 2:38 a.m. UTC | #2
Hi,

On 9/15/2023 2:14 AM, Nathan Chancellor wrote:
> Hi Hou,
>
> On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote:
>> From: Hou Tao <houtao1@huawei.com>
>>
>> Add extra check in bpf_mem_alloc_init() to ensure the unit_size of
>> bpf_mem_cache is matched with the object_size of underlying slab cache.
>> If these two sizes are unmatched, print a warning once and return
>> -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and
>> the potential issue can be prevented.
>>
>> Suggested-by: Alexei Starovoitov <ast@kernel.org>
>> Signed-off-by: Hou Tao <houtao1@huawei.com>
>> ---
>>  kernel/bpf/memalloc.c | 33 +++++++++++++++++++++++++++++++--
>>  1 file changed, 31 insertions(+), 2 deletions(-)
>>
>> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
>> index 90c1ed8210a2..1c22b90e754a 100644
>> --- a/kernel/bpf/memalloc.c
>> +++ b/kernel/bpf/memalloc.c
>> @@ -486,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
>>  	alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);
>>  }
>>  
>> +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
>> +{
>> +	struct llist_node *first;
>> +	unsigned int obj_size;
>> +
>> +	first = c->free_llist.first;
>> +	if (!first)
>> +		return 0;
>> +
>> +	obj_size = ksize(first);
>> +	if (obj_size != c->unit_size) {
>> +		WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
>> +			  idx, obj_size, c->unit_size);
>> +		return -EINVAL;
>> +	}
>> +	return 0;
>> +}
> I am seeing the warning added by this change as commit c93047255202
> ("bpf: Ensure unit_size is matched with slab cache object size") when
> booting ARCH=riscv defconfig in QEMU. I have seen some discussion on the
> mailing list around this, so I apologize if this is a duplicate report
> but it sounded like the previously reported instance of this warning was
> already resolved by some other changeor supposed to be resolved by [1].
> Unfortunately, I tested both current bpf master (currently at
> 6bd5bcb18f94) with and without that change and I still see the warning
> in both cases. The rootfs is available at [2], if it is relevant.

Thanks for the report. It seems it is a new problem which should be
fixed by commit d52b59315bf5 ("bpf: Adjust size_index according to the
value of KMALLOC_MIN_SIZE"), but failed to. However I could not
reproduce the problem by using the provided steps. Do you configure any
boot cmd line in your local setup ? I suspect dma_get_cache_alignment()
returns 64 of 1 in your setup, so slab-96 is rounded up to slab-128.
Could you please run the attached debug patch and post its output ?

> $ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux- mrproper defconfig Image
>
> $ qemu-system-riscv64 \
>     -display none \
>     -nodefaults \
>     -bios default \
>     -M virt \
>     -append earlycon \
>     -kernel arch/riscv/boot/Image \
>     -initrd riscv-rootfs.cpio \
>     -m 512m \
>     -serial mon:stdio
> ...
> [    0.000000] Linux version 6.5.0-12679-gc93047255202 (nathan@dev-arch.thelio-3990X) (riscv64-linux-gcc (GCC) 13.2.0, GNU ld (GNU Binutils) 2.41) #1 SMP Thu Sep 14 10:44:41 MST 2023
> ...
> [    0.433002] ------------[ cut here ]------------
> [    0.433128] bpf_mem_cache[0]: unexpected object size 128, expect 96
> [    0.433585] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:500 bpf_mem_alloc_init+0x348/0x354
> [    0.433810] Modules linked in:
> [    0.433928] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-12679-gc93047255202 #1
> [    0.434025] Hardware name: riscv-virtio,qemu (DT)
> [    0.434105] epc : bpf_mem_alloc_init+0x348/0x354
> [    0.434140]  ra : bpf_mem_alloc_init+0x348/0x354
> [    0.434163] epc : ffffffff80112572 ra : ffffffff80112572 sp : ff2000000000bd30
> [    0.434177]  gp : ffffffff81501588 tp : ff600000018d0000 t0 : ffffffff808cd1a0
> [    0.434190]  t1 : 0720072007200720 t2 : 635f6d656d5f6670 s0 : ff2000000000bdd0
> [    0.434202]  s1 : ffffffff80e17620 a0 : 0000000000000037 a1 : ffffffff814866b8
> [    0.434215]  a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000
> [    0.434227]  a5 : 0000000000000000 a6 : 0000000000000047 a7 : 0000000000000046
> [    0.434239]  s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000
> [    0.434251]  s5 : 0000000000000100 s6 : ffffffff815031f8 s7 : ffffffff8153a610
> [    0.434264]  s8 : 0000000000000060 s9 : 0000000000000060 s10: 0000000000000000
> [    0.434276]  s11: ff6000001ffe5410 t3 : ff60000001858f00 t4 : ff60000001858f00
> [    0.434288]  t5 : ff60000001858000 t6 : ff2000000000bb48
> [    0.434299] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
> [    0.434394] [<ffffffff80112572>] bpf_mem_alloc_init+0x348/0x354
> [    0.434492] [<ffffffff80a0f302>] bpf_global_ma_init+0x1c/0x30
> [    0.434516] [<ffffffff8000212c>] do_one_initcall+0x58/0x19c
> [    0.434526] [<ffffffff80a0105e>] kernel_init_freeable+0x214/0x27e
> [    0.434537] [<ffffffff808db4dc>] kernel_init+0x1e/0x10a
> [    0.434548] [<ffffffff80003386>] ret_from_fork+0xa/0x1c
> [    0.434618] ---[ end trace 0000000000000000 ]---
>
> [1]: https://lore.kernel.org/20230913135943.3137292-1-houtao@huaweicloud.com/
> [2]: https://github.com/ClangBuiltLinux/boot-utils/releases
>
> Cheers,
> Nathan
>
> # bad: [98897dc735cf6635f0966f76eb0108354168fb15] Add linux-next specific files for 20230914
> # good: [aed8aee11130a954356200afa3f1b8753e8a9482] Merge tag 'pmdomain-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm
> git bisect start '98897dc735cf6635f0966f76eb0108354168fb15' 'aed8aee11130a954356200afa3f1b8753e8a9482'
> # good: [ea1bbd78a48c8b325583e8c0bc2690850cb51807] bcachefs: Fix assorted checkpatch nits
> git bisect good ea1bbd78a48c8b325583e8c0bc2690850cb51807
> # bad: [9c4e2139cfa15d769eafd51bf3e051293b106986] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
> git bisect bad 9c4e2139cfa15d769eafd51bf3e051293b106986
> # bad: [4f07b13481ab390108b015da2bc8f560416e48d2] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git
> git bisect bad 4f07b13481ab390108b015da2bc8f560416e48d2
> # bad: [bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471] Merge branch 'for-linux-next-fixes' of git://anongit.freedesktop.org/drm/drm-misc
> git bisect bad bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471
> # bad: [95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
> git bisect bad 95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac
> # good: [6836d373943afeeeb8e2989c22aaaa51218a83c6] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc.git
> git bisect good 6836d373943afeeeb8e2989c22aaaa51218a83c6
> # good: [3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9] Merge branch 'main' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
> git bisect good 3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9
> # bad: [51d56d51d3881addaea2c7242ae859155ae75607] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git
> git bisect bad 51d56d51d3881addaea2c7242ae859155ae75607
> # bad: [1a49f4195d3498fe458a7f5ff7ec5385da70d92e] bpf: Avoid dummy bpf_offload_netdev in __bpf_prog_dev_bound_init
> git bisect bad 1a49f4195d3498fe458a7f5ff7ec5385da70d92e
> # bad: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size
> git bisect bad c930472552022bd09aab3cd946ba3f243070d5c7
> # good: [7182e56411b9a8b76797ed7b6095fc84be76dfb0] selftests/bpf: Add kprobe_multi override test
> git bisect good 7182e56411b9a8b76797ed7b6095fc84be76dfb0
> # good: [b1d53958b69312e43c118d4093d8f93d3f6f80af] bpf: Don't prefill for unused bpf_mem_cache
> git bisect good b1d53958b69312e43c118d4093d8f93d3f6f80af
> # first bad commit: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size
From 369f2e59cc6c63c3924f654ba3f8491dba58b87b Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Sat, 16 Sep 2023 10:35:52 +0800
Subject: [PATCH] bpf: Check the return value of dma_get_cache_alignment()

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 kernel/bpf/memalloc.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 1c22b90e754a..c8d2c3097c43 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -6,6 +6,8 @@
 #include <linux/irq_work.h>
 #include <linux/bpf_mem_alloc.h>
 #include <linux/memcontrol.h>
+#include <linux/dma-mapping.h>
+#include <linux/swiotlb.h>
 #include <asm/local.h>
 
 /* Any context (including NMI) BPF specific memory allocator.
@@ -958,6 +960,14 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
 	return !ret ? NULL : ret + LLIST_NODE_SZ;
 }
 
+static unsigned int __kmalloc_minalign(void)
+{
+	if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) &&
+	    is_swiotlb_allocated())
+		return ARCH_KMALLOC_MINALIGN;
+	return dma_get_cache_alignment();
+}
+
 /* Most of the logic is taken from setup_kmalloc_cache_index_table() */
 static __init int bpf_mem_cache_adjust_size(void)
 {
@@ -992,6 +1002,9 @@ static __init int bpf_mem_cache_adjust_size(void)
 			size_index[(size - 1) / 8] = index;
 	}
 
+	pr_info("==== bpf_mem_cache: KMALLOC_MIN_SIZE %u ARCH_KMALLOC_MINALIGN %u min_align %u\n",
+		KMALLOC_MIN_SIZE, ARCH_KMALLOC_MINALIGN, __kmalloc_minalign());
+
 	return 0;
 }
 subsys_initcall(bpf_mem_cache_adjust_size);
Nathan Chancellor Sept. 19, 2023, 11:46 p.m. UTC | #3
On Sat, Sep 16, 2023 at 10:38:09AM +0800, Hou Tao wrote:
> Hi,
> 
> On 9/15/2023 2:14 AM, Nathan Chancellor wrote:
> > Hi Hou,
> >
> > On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote:
> >> From: Hou Tao <houtao1@huawei.com>
> >>
> >> Add extra check in bpf_mem_alloc_init() to ensure the unit_size of
> >> bpf_mem_cache is matched with the object_size of underlying slab cache.
> >> If these two sizes are unmatched, print a warning once and return
> >> -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and
> >> the potential issue can be prevented.
> >>
> >> Suggested-by: Alexei Starovoitov <ast@kernel.org>
> >> Signed-off-by: Hou Tao <houtao1@huawei.com>
> >> ---
> >>  kernel/bpf/memalloc.c | 33 +++++++++++++++++++++++++++++++--
> >>  1 file changed, 31 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> >> index 90c1ed8210a2..1c22b90e754a 100644
> >> --- a/kernel/bpf/memalloc.c
> >> +++ b/kernel/bpf/memalloc.c
> >> @@ -486,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
> >>  	alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);
> >>  }
> >>  
> >> +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
> >> +{
> >> +	struct llist_node *first;
> >> +	unsigned int obj_size;
> >> +
> >> +	first = c->free_llist.first;
> >> +	if (!first)
> >> +		return 0;
> >> +
> >> +	obj_size = ksize(first);
> >> +	if (obj_size != c->unit_size) {
> >> +		WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
> >> +			  idx, obj_size, c->unit_size);
> >> +		return -EINVAL;
> >> +	}
> >> +	return 0;
> >> +}
> > I am seeing the warning added by this change as commit c93047255202
> > ("bpf: Ensure unit_size is matched with slab cache object size") when
> > booting ARCH=riscv defconfig in QEMU. I have seen some discussion on the
> > mailing list around this, so I apologize if this is a duplicate report
> > but it sounded like the previously reported instance of this warning was
> > already resolved by some other changeor supposed to be resolved by [1].
> > Unfortunately, I tested both current bpf master (currently at
> > 6bd5bcb18f94) with and without that change and I still see the warning
> > in both cases. The rootfs is available at [2], if it is relevant.
> 
> Thanks for the report. It seems it is a new problem which should be
> fixed by commit d52b59315bf5 ("bpf: Adjust size_index according to the
> value of KMALLOC_MIN_SIZE"), but failed to. However I could not
> reproduce the problem by using the provided steps. Do you configure any
> boot cmd line in your local setup ? I suspect dma_get_cache_alignment()
> returns 64 of 1 in your setup, so slab-96 is rounded up to slab-128.
> Could you please run the attached debug patch and post its output ?

Apologies for the delay, I have been traveling (and will be again
shortly). It seems like your theory could be accurate, as I see:

[    0.120969] ==== bpf_mem_cache: KMALLOC_MIN_SIZE 8 ARCH_KMALLOC_MINALIGN 8 min_align 64

There are no cmdline arguments aside from 'earlycon'. Could the QEMU
version matter here for reproduction purposes, as I am still able to
reproduce with my commands on next-20230919? I am using 8.1.0.

Cheers,
Nathan

> > $ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux- mrproper defconfig Image
> >
> > $ qemu-system-riscv64 \
> >     -display none \
> >     -nodefaults \
> >     -bios default \
> >     -M virt \
> >     -append earlycon \
> >     -kernel arch/riscv/boot/Image \
> >     -initrd riscv-rootfs.cpio \
> >     -m 512m \
> >     -serial mon:stdio
> > ...
> > [    0.000000] Linux version 6.5.0-12679-gc93047255202 (nathan@dev-arch.thelio-3990X) (riscv64-linux-gcc (GCC) 13.2.0, GNU ld (GNU Binutils) 2.41) #1 SMP Thu Sep 14 10:44:41 MST 2023
> > ...
> > [    0.433002] ------------[ cut here ]------------
> > [    0.433128] bpf_mem_cache[0]: unexpected object size 128, expect 96
> > [    0.433585] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:500 bpf_mem_alloc_init+0x348/0x354
> > [    0.433810] Modules linked in:
> > [    0.433928] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-12679-gc93047255202 #1
> > [    0.434025] Hardware name: riscv-virtio,qemu (DT)
> > [    0.434105] epc : bpf_mem_alloc_init+0x348/0x354
> > [    0.434140]  ra : bpf_mem_alloc_init+0x348/0x354
> > [    0.434163] epc : ffffffff80112572 ra : ffffffff80112572 sp : ff2000000000bd30
> > [    0.434177]  gp : ffffffff81501588 tp : ff600000018d0000 t0 : ffffffff808cd1a0
> > [    0.434190]  t1 : 0720072007200720 t2 : 635f6d656d5f6670 s0 : ff2000000000bdd0
> > [    0.434202]  s1 : ffffffff80e17620 a0 : 0000000000000037 a1 : ffffffff814866b8
> > [    0.434215]  a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000
> > [    0.434227]  a5 : 0000000000000000 a6 : 0000000000000047 a7 : 0000000000000046
> > [    0.434239]  s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000
> > [    0.434251]  s5 : 0000000000000100 s6 : ffffffff815031f8 s7 : ffffffff8153a610
> > [    0.434264]  s8 : 0000000000000060 s9 : 0000000000000060 s10: 0000000000000000
> > [    0.434276]  s11: ff6000001ffe5410 t3 : ff60000001858f00 t4 : ff60000001858f00
> > [    0.434288]  t5 : ff60000001858000 t6 : ff2000000000bb48
> > [    0.434299] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
> > [    0.434394] [<ffffffff80112572>] bpf_mem_alloc_init+0x348/0x354
> > [    0.434492] [<ffffffff80a0f302>] bpf_global_ma_init+0x1c/0x30
> > [    0.434516] [<ffffffff8000212c>] do_one_initcall+0x58/0x19c
> > [    0.434526] [<ffffffff80a0105e>] kernel_init_freeable+0x214/0x27e
> > [    0.434537] [<ffffffff808db4dc>] kernel_init+0x1e/0x10a
> > [    0.434548] [<ffffffff80003386>] ret_from_fork+0xa/0x1c
> > [    0.434618] ---[ end trace 0000000000000000 ]---
> >
> > [1]: https://lore.kernel.org/20230913135943.3137292-1-houtao@huaweicloud.com/
> > [2]: https://github.com/ClangBuiltLinux/boot-utils/releases
> >
> > Cheers,
> > Nathan
> >
> > # bad: [98897dc735cf6635f0966f76eb0108354168fb15] Add linux-next specific files for 20230914
> > # good: [aed8aee11130a954356200afa3f1b8753e8a9482] Merge tag 'pmdomain-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm
> > git bisect start '98897dc735cf6635f0966f76eb0108354168fb15' 'aed8aee11130a954356200afa3f1b8753e8a9482'
> > # good: [ea1bbd78a48c8b325583e8c0bc2690850cb51807] bcachefs: Fix assorted checkpatch nits
> > git bisect good ea1bbd78a48c8b325583e8c0bc2690850cb51807
> > # bad: [9c4e2139cfa15d769eafd51bf3e051293b106986] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
> > git bisect bad 9c4e2139cfa15d769eafd51bf3e051293b106986
> > # bad: [4f07b13481ab390108b015da2bc8f560416e48d2] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git
> > git bisect bad 4f07b13481ab390108b015da2bc8f560416e48d2
> > # bad: [bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471] Merge branch 'for-linux-next-fixes' of git://anongit.freedesktop.org/drm/drm-misc
> > git bisect bad bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471
> > # bad: [95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
> > git bisect bad 95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac
> > # good: [6836d373943afeeeb8e2989c22aaaa51218a83c6] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc.git
> > git bisect good 6836d373943afeeeb8e2989c22aaaa51218a83c6
> > # good: [3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9] Merge branch 'main' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
> > git bisect good 3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9
> > # bad: [51d56d51d3881addaea2c7242ae859155ae75607] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git
> > git bisect bad 51d56d51d3881addaea2c7242ae859155ae75607
> > # bad: [1a49f4195d3498fe458a7f5ff7ec5385da70d92e] bpf: Avoid dummy bpf_offload_netdev in __bpf_prog_dev_bound_init
> > git bisect bad 1a49f4195d3498fe458a7f5ff7ec5385da70d92e
> > # bad: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size
> > git bisect bad c930472552022bd09aab3cd946ba3f243070d5c7
> > # good: [7182e56411b9a8b76797ed7b6095fc84be76dfb0] selftests/bpf: Add kprobe_multi override test
> > git bisect good 7182e56411b9a8b76797ed7b6095fc84be76dfb0
> > # good: [b1d53958b69312e43c118d4093d8f93d3f6f80af] bpf: Don't prefill for unused bpf_mem_cache
> > git bisect good b1d53958b69312e43c118d4093d8f93d3f6f80af
> > # first bad commit: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size
> 

> From 369f2e59cc6c63c3924f654ba3f8491dba58b87b Mon Sep 17 00:00:00 2001
> From: Hou Tao <houtao1@huawei.com>
> Date: Sat, 16 Sep 2023 10:35:52 +0800
> Subject: [PATCH] bpf: Check the return value of dma_get_cache_alignment()
> 
> Signed-off-by: Hou Tao <houtao1@huawei.com>
> ---
>  kernel/bpf/memalloc.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> index 1c22b90e754a..c8d2c3097c43 100644
> --- a/kernel/bpf/memalloc.c
> +++ b/kernel/bpf/memalloc.c
> @@ -6,6 +6,8 @@
>  #include <linux/irq_work.h>
>  #include <linux/bpf_mem_alloc.h>
>  #include <linux/memcontrol.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/swiotlb.h>
>  #include <asm/local.h>
>  
>  /* Any context (including NMI) BPF specific memory allocator.
> @@ -958,6 +960,14 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
>  	return !ret ? NULL : ret + LLIST_NODE_SZ;
>  }
>  
> +static unsigned int __kmalloc_minalign(void)
> +{
> +	if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) &&
> +	    is_swiotlb_allocated())
> +		return ARCH_KMALLOC_MINALIGN;
> +	return dma_get_cache_alignment();
> +}
> +
>  /* Most of the logic is taken from setup_kmalloc_cache_index_table() */
>  static __init int bpf_mem_cache_adjust_size(void)
>  {
> @@ -992,6 +1002,9 @@ static __init int bpf_mem_cache_adjust_size(void)
>  			size_index[(size - 1) / 8] = index;
>  	}
>  
> +	pr_info("==== bpf_mem_cache: KMALLOC_MIN_SIZE %u ARCH_KMALLOC_MINALIGN %u min_align %u\n",
> +		KMALLOC_MIN_SIZE, ARCH_KMALLOC_MINALIGN, __kmalloc_minalign());
> +
>  	return 0;
>  }
>  subsys_initcall(bpf_mem_cache_adjust_size);
> -- 
> 2.29.2
>
Guenter Roeck Sept. 22, 2023, 6:43 p.m. UTC | #4
Hi,

On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote:
> From: Hou Tao <houtao1@huawei.com>
> 
> Add extra check in bpf_mem_alloc_init() to ensure the unit_size of
> bpf_mem_cache is matched with the object_size of underlying slab cache.
> If these two sizes are unmatched, print a warning once and return
> -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and
> the potential issue can be prevented.
> 
> Suggested-by: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Hou Tao <houtao1@huawei.com>

With this patch in place, I see the following backtrace on riscv systems.

[    2.953088] bpf_mem_cache[0]: unexpected object size 128, expect 96
[    2.953481] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:507 bpf_mem_alloc_init+0x326/0x32e
[    2.953645] Modules linked in:
[    2.953736] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc2-00244-g27bbf45eae9c #1
[    2.953790] Hardware name: riscv-virtio,qemu (DT)
[    2.953855] epc : bpf_mem_alloc_init+0x326/0x32e
[    2.953891]  ra : bpf_mem_alloc_init+0x326/0x32e
[    2.953909] epc : ffffffff8016cbd2 ra : ffffffff8016cbd2 sp : ff2000000000bd20
[    2.953920]  gp : ffffffff81c39298 tp : ff60000002e80040 t0 : 0000000000000000
[    2.953930]  t1 : ffffffffbbbabbc3 t2 : 635f6d656d5f6670 s0 : ff2000000000bdc0
[    2.953940]  s1 : ffffffff8121c7da a0 : 0000000000000037 a1 : ffffffff81a93048
[    2.953949]  a2 : 0000000000000010 a3 : 0000000000000001 a4 : 0000000000000000
[    2.953959]  a5 : 0000000000000000 a6 : ffffffff81c4fe08 a7 : 0000000000000000
[    2.953968]  s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000
[    2.953977]  s5 : 0000000000000000 s6 : 0000000000000100 s7 : ff5ffffffffd3128
[    2.953986]  s8 : ffffffff81c3d1f8 s9 : 0000000000000060 s10: 0000000000000000
[    2.953996]  s11: 0000000000000060 t3 : 0000000065a61b33 t4 : 0000000000000009
[    2.954005]  t5 : ffffffffde180000 t6 : ff2000000000bb08
[    2.954014] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
[    2.954047] [<ffffffff8016cbd2>] bpf_mem_alloc_init+0x326/0x32e
[    2.954087] [<ffffffff80e11426>] bpf_global_ma_init+0x1c/0x30
[    2.954097] [<ffffffff8000285e>] do_one_initcall+0x5c/0x238
[    2.954105] [<ffffffff80e011ae>] kernel_init_freeable+0x29a/0x30e
[    2.954115] [<ffffffff80c0312c>] kernel_init+0x1e/0x112
[    2.954124] [<ffffffff80003d82>] ret_from_fork+0xa/0x1c

Copying riscv maintainers and mailing list for feedback / comments.

Thanks,
Guenter
Hou Tao Sept. 25, 2023, 8:05 a.m. UTC | #5
Hi,

On 9/20/2023 7:46 AM, Nathan Chancellor wrote:
> On Sat, Sep 16, 2023 at 10:38:09AM +0800, Hou Tao wrote:
>> Hi,
>>
>> On 9/15/2023 2:14 AM, Nathan Chancellor wrote:
>>> Hi Hou,
>>>
>>> On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote:
>>>> From: Hou Tao <houtao1@huawei.com>
>>>>
>>>> Add extra check in bpf_mem_alloc_init() to ensure the unit_size of
>>>> bpf_mem_cache is matched with the object_size of underlying slab cache.
>>>> If these two sizes are unmatched, print a warning once and return
>>>> -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and
>>>> the potential issue can be prevented.
>>>>
>>>> Suggested-by: Alexei Starovoitov <ast@kernel.org>
>>>> Signed-off-by: Hou Tao <houtao1@huawei.com>
>>>> ---
>>>>  kernel/bpf/memalloc.c | 33 +++++++++++++++++++++++++++++++--
>>>>  1 file changed, 31 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
>>>> index 90c1ed8210a2..1c22b90e754a 100644
>>>> --- a/kernel/bpf/memalloc.c
>>>> +++ b/kernel/bpf/memalloc.c
>>>> @@ -486,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
>>>>  	alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);
>>>>  }
>>>>  
>>>> +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
>>>> +{
>>>> +	struct llist_node *first;
>>>> +	unsigned int obj_size;
>>>> +
>>>> +	first = c->free_llist.first;
>>>> +	if (!first)
>>>> +		return 0;
>>>> +
>>>> +	obj_size = ksize(first);
>>>> +	if (obj_size != c->unit_size) {
>>>> +		WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
>>>> +			  idx, obj_size, c->unit_size);
>>>> +		return -EINVAL;
>>>> +	}
>>>> +	return 0;
>>>> +}
>>> I am seeing the warning added by this change as commit c93047255202
>>> ("bpf: Ensure unit_size is matched with slab cache object size") when
>>> booting ARCH=riscv defconfig in QEMU. I have seen some discussion on the
>>> mailing list around this, so I apologize if this is a duplicate report
>>> but it sounded like the previously reported instance of this warning was
>>> already resolved by some other changeor supposed to be resolved by [1].
>>> Unfortunately, I tested both current bpf master (currently at
>>> 6bd5bcb18f94) with and without that change and I still see the warning
>>> in both cases. The rootfs is available at [2], if it is relevant.
>> Thanks for the report. It seems it is a new problem which should be
>> fixed by commit d52b59315bf5 ("bpf: Adjust size_index according to the
>> value of KMALLOC_MIN_SIZE"), but failed to. However I could not
>> reproduce the problem by using the provided steps. Do you configure any
>> boot cmd line in your local setup ? I suspect dma_get_cache_alignment()
>> returns 64 of 1 in your setup, so slab-96 is rounded up to slab-128.
>> Could you please run the attached debug patch and post its output ?

Sorry for the delay. I was in off-site training last week.

> Apologies for the delay, I have been traveling (and will be again
> shortly). It seems like your theory could be accurate, as I see:
>
> [    0.120969] ==== bpf_mem_cache: KMALLOC_MIN_SIZE 8 ARCH_KMALLOC_MINALIGN 8 min_align 64
>
> There are no cmdline arguments aside from 'earlycon'. Could the QEMU
> version matter here for reproduction purposes, as I am still able to
> reproduce with my commands on next-20230919? I am using 8.1.0.

The QEMU version matters. I can reproduce the problem by using v8.1.0
but can not reproduce by using v4.2.1. Will post a patch to fix it soon.
>
> Cheers,
> Nathan
>
>>> $ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux- mrproper defconfig Image
>>>
>>> $ qemu-system-riscv64 \
>>>     -display none \
>>>     -nodefaults \
>>>     -bios default \
>>>     -M virt \
>>>     -append earlycon \
>>>     -kernel arch/riscv/boot/Image \
>>>     -initrd riscv-rootfs.cpio \
>>>     -m 512m \
>>>     -serial mon:stdio
>>> ...
>>> [    0.000000] Linux version 6.5.0-12679-gc93047255202 (nathan@dev-arch.thelio-3990X) (riscv64-linux-gcc (GCC) 13.2.0, GNU ld (GNU Binutils) 2.41) #1 SMP Thu Sep 14 10:44:41 MST 2023
>>> ...
>>> [    0.433002] ------------[ cut here ]------------
>>> [    0.433128] bpf_mem_cache[0]: unexpected object size 128, expect 96
>>> [    0.433585] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:500 bpf_mem_alloc_init+0x348/0x354
>>> [    0.433810] Modules linked in:
>>> [    0.433928] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-12679-gc93047255202 #1
>>> [    0.434025] Hardware name: riscv-virtio,qemu (DT)
>>> [    0.434105] epc : bpf_mem_alloc_init+0x348/0x354
>>> [    0.434140]  ra : bpf_mem_alloc_init+0x348/0x354
>>> [    0.434163] epc : ffffffff80112572 ra : ffffffff80112572 sp : ff2000000000bd30
>>> [    0.434177]  gp : ffffffff81501588 tp : ff600000018d0000 t0 : ffffffff808cd1a0
>>> [    0.434190]  t1 : 0720072007200720 t2 : 635f6d656d5f6670 s0 : ff2000000000bdd0
>>> [    0.434202]  s1 : ffffffff80e17620 a0 : 0000000000000037 a1 : ffffffff814866b8
>>> [    0.434215]  a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000
>>> [    0.434227]  a5 : 0000000000000000 a6 : 0000000000000047 a7 : 0000000000000046
>>> [    0.434239]  s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000
>>> [    0.434251]  s5 : 0000000000000100 s6 : ffffffff815031f8 s7 : ffffffff8153a610
>>> [    0.434264]  s8 : 0000000000000060 s9 : 0000000000000060 s10: 0000000000000000
>>> [    0.434276]  s11: ff6000001ffe5410 t3 : ff60000001858f00 t4 : ff60000001858f00
>>> [    0.434288]  t5 : ff60000001858000 t6 : ff2000000000bb48
>>> [    0.434299] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
>>> [    0.434394] [<ffffffff80112572>] bpf_mem_alloc_init+0x348/0x354
>>> [    0.434492] [<ffffffff80a0f302>] bpf_global_ma_init+0x1c/0x30
>>> [    0.434516] [<ffffffff8000212c>] do_one_initcall+0x58/0x19c
>>> [    0.434526] [<ffffffff80a0105e>] kernel_init_freeable+0x214/0x27e
>>> [    0.434537] [<ffffffff808db4dc>] kernel_init+0x1e/0x10a
>>> [    0.434548] [<ffffffff80003386>] ret_from_fork+0xa/0x1c
>>> [    0.434618] ---[ end trace 0000000000000000 ]---
>>>
>>> [1]: https://lore.kernel.org/20230913135943.3137292-1-houtao@huaweicloud.com/
>>> [2]: https://github.com/ClangBuiltLinux/boot-utils/releases
>>>
>>> Cheers,
>>> Nathan
>>>
>>> # bad: [98897dc735cf6635f0966f76eb0108354168fb15] Add linux-next specific files for 20230914
>>> # good: [aed8aee11130a954356200afa3f1b8753e8a9482] Merge tag 'pmdomain-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm
>>> git bisect start '98897dc735cf6635f0966f76eb0108354168fb15' 'aed8aee11130a954356200afa3f1b8753e8a9482'
>>> # good: [ea1bbd78a48c8b325583e8c0bc2690850cb51807] bcachefs: Fix assorted checkpatch nits
>>> git bisect good ea1bbd78a48c8b325583e8c0bc2690850cb51807
>>> # bad: [9c4e2139cfa15d769eafd51bf3e051293b106986] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
>>> git bisect bad 9c4e2139cfa15d769eafd51bf3e051293b106986
>>> # bad: [4f07b13481ab390108b015da2bc8f560416e48d2] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git
>>> git bisect bad 4f07b13481ab390108b015da2bc8f560416e48d2
>>> # bad: [bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471] Merge branch 'for-linux-next-fixes' of git://anongit.freedesktop.org/drm/drm-misc
>>> git bisect bad bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471
>>> # bad: [95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git
>>> git bisect bad 95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac
>>> # good: [6836d373943afeeeb8e2989c22aaaa51218a83c6] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc.git
>>> git bisect good 6836d373943afeeeb8e2989c22aaaa51218a83c6
>>> # good: [3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9] Merge branch 'main' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
>>> git bisect good 3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9
>>> # bad: [51d56d51d3881addaea2c7242ae859155ae75607] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git
>>> git bisect bad 51d56d51d3881addaea2c7242ae859155ae75607
>>> # bad: [1a49f4195d3498fe458a7f5ff7ec5385da70d92e] bpf: Avoid dummy bpf_offload_netdev in __bpf_prog_dev_bound_init
>>> git bisect bad 1a49f4195d3498fe458a7f5ff7ec5385da70d92e
>>> # bad: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size
>>> git bisect bad c930472552022bd09aab3cd946ba3f243070d5c7
>>> # good: [7182e56411b9a8b76797ed7b6095fc84be76dfb0] selftests/bpf: Add kprobe_multi override test
>>> git bisect good 7182e56411b9a8b76797ed7b6095fc84be76dfb0
>>> # good: [b1d53958b69312e43c118d4093d8f93d3f6f80af] bpf: Don't prefill for unused bpf_mem_cache
>>> git bisect good b1d53958b69312e43c118d4093d8f93d3f6f80af
>>> # first bad commit: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size
>> From 369f2e59cc6c63c3924f654ba3f8491dba58b87b Mon Sep 17 00:00:00 2001
>> From: Hou Tao <houtao1@huawei.com>
>> Date: Sat, 16 Sep 2023 10:35:52 +0800
>> Subject: [PATCH] bpf: Check the return value of dma_get_cache_alignment()
>>
>> Signed-off-by: Hou Tao <houtao1@huawei.com>
>> ---
>>  kernel/bpf/memalloc.c | 13 +++++++++++++
>>  1 file changed, 13 insertions(+)
>>
>> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
>> index 1c22b90e754a..c8d2c3097c43 100644
>> --- a/kernel/bpf/memalloc.c
>> +++ b/kernel/bpf/memalloc.c
>> @@ -6,6 +6,8 @@
>>  #include <linux/irq_work.h>
>>  #include <linux/bpf_mem_alloc.h>
>>  #include <linux/memcontrol.h>
>> +#include <linux/dma-mapping.h>
>> +#include <linux/swiotlb.h>
>>  #include <asm/local.h>
>>  
>>  /* Any context (including NMI) BPF specific memory allocator.
>> @@ -958,6 +960,14 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
>>  	return !ret ? NULL : ret + LLIST_NODE_SZ;
>>  }
>>  
>> +static unsigned int __kmalloc_minalign(void)
>> +{
>> +	if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) &&
>> +	    is_swiotlb_allocated())
>> +		return ARCH_KMALLOC_MINALIGN;
>> +	return dma_get_cache_alignment();
>> +}
>> +
>>  /* Most of the logic is taken from setup_kmalloc_cache_index_table() */
>>  static __init int bpf_mem_cache_adjust_size(void)
>>  {
>> @@ -992,6 +1002,9 @@ static __init int bpf_mem_cache_adjust_size(void)
>>  			size_index[(size - 1) / 8] = index;
>>  	}
>>  
>> +	pr_info("==== bpf_mem_cache: KMALLOC_MIN_SIZE %u ARCH_KMALLOC_MINALIGN %u min_align %u\n",
>> +		KMALLOC_MIN_SIZE, ARCH_KMALLOC_MINALIGN, __kmalloc_minalign());
>> +
>>  	return 0;
>>  }
>>  subsys_initcall(bpf_mem_cache_adjust_size);
>> -- 
>> 2.29.2
>>
> .
Emil Renner Berthing Sept. 29, 2023, 6:51 p.m. UTC | #6
Guenter Roeck wrote:
> Hi,
>
> On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote:
> > From: Hou Tao <houtao1@huawei.com>
> >
> > Add extra check in bpf_mem_alloc_init() to ensure the unit_size of
> > bpf_mem_cache is matched with the object_size of underlying slab cache.
> > If these two sizes are unmatched, print a warning once and return
> > -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and
> > the potential issue can be prevented.
> >
> > Suggested-by: Alexei Starovoitov <ast@kernel.org>
> > Signed-off-by: Hou Tao <houtao1@huawei.com>
>
> With this patch in place, I see the following backtrace on riscv systems.
>
> [    2.953088] bpf_mem_cache[0]: unexpected object size 128, expect 96
> [    2.953481] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:507 bpf_mem_alloc_init+0x326/0x32e
> [    2.953645] Modules linked in:
> [    2.953736] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc2-00244-g27bbf45eae9c #1
> [    2.953790] Hardware name: riscv-virtio,qemu (DT)
> [    2.953855] epc : bpf_mem_alloc_init+0x326/0x32e
> [    2.953891]  ra : bpf_mem_alloc_init+0x326/0x32e
> [    2.953909] epc : ffffffff8016cbd2 ra : ffffffff8016cbd2 sp : ff2000000000bd20
> [    2.953920]  gp : ffffffff81c39298 tp : ff60000002e80040 t0 : 0000000000000000
> [    2.953930]  t1 : ffffffffbbbabbc3 t2 : 635f6d656d5f6670 s0 : ff2000000000bdc0
> [    2.953940]  s1 : ffffffff8121c7da a0 : 0000000000000037 a1 : ffffffff81a93048
> [    2.953949]  a2 : 0000000000000010 a3 : 0000000000000001 a4 : 0000000000000000
> [    2.953959]  a5 : 0000000000000000 a6 : ffffffff81c4fe08 a7 : 0000000000000000
> [    2.953968]  s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000
> [    2.953977]  s5 : 0000000000000000 s6 : 0000000000000100 s7 : ff5ffffffffd3128
> [    2.953986]  s8 : ffffffff81c3d1f8 s9 : 0000000000000060 s10: 0000000000000000
> [    2.953996]  s11: 0000000000000060 t3 : 0000000065a61b33 t4 : 0000000000000009
> [    2.954005]  t5 : ffffffffde180000 t6 : ff2000000000bb08
> [    2.954014] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
> [    2.954047] [<ffffffff8016cbd2>] bpf_mem_alloc_init+0x326/0x32e
> [    2.954087] [<ffffffff80e11426>] bpf_global_ma_init+0x1c/0x30
> [    2.954097] [<ffffffff8000285e>] do_one_initcall+0x5c/0x238
> [    2.954105] [<ffffffff80e011ae>] kernel_init_freeable+0x29a/0x30e
> [    2.954115] [<ffffffff80c0312c>] kernel_init+0x1e/0x112
> [    2.954124] [<ffffffff80003d82>] ret_from_fork+0xa/0x1c
>
> Copying riscv maintainers and mailing list for feedback / comments.

If it makes a difference I also see this with 6.6-rc3 on my Nezha board
(Allwinner D1), but not on my VisionFive 2 (JH7110) running the same kernel.

/Emil
Lad, Prabhakar Sept. 29, 2023, 8:23 p.m. UTC | #7
On Fri, Sep 29, 2023 at 7:52 PM Emil Renner Berthing
<emil.renner.berthing@canonical.com> wrote:
>
> Guenter Roeck wrote:
> > Hi,
> >
> > On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote:
> > > From: Hou Tao <houtao1@huawei.com>
> > >
> > > Add extra check in bpf_mem_alloc_init() to ensure the unit_size of
> > > bpf_mem_cache is matched with the object_size of underlying slab cache.
> > > If these two sizes are unmatched, print a warning once and return
> > > -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and
> > > the potential issue can be prevented.
> > >
> > > Suggested-by: Alexei Starovoitov <ast@kernel.org>
> > > Signed-off-by: Hou Tao <houtao1@huawei.com>
> >
> > With this patch in place, I see the following backtrace on riscv systems.
> >
> > [    2.953088] bpf_mem_cache[0]: unexpected object size 128, expect 96
> > [    2.953481] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:507 bpf_mem_alloc_init+0x326/0x32e
> > [    2.953645] Modules linked in:
> > [    2.953736] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc2-00244-g27bbf45eae9c #1
> > [    2.953790] Hardware name: riscv-virtio,qemu (DT)
> > [    2.953855] epc : bpf_mem_alloc_init+0x326/0x32e
> > [    2.953891]  ra : bpf_mem_alloc_init+0x326/0x32e
> > [    2.953909] epc : ffffffff8016cbd2 ra : ffffffff8016cbd2 sp : ff2000000000bd20
> > [    2.953920]  gp : ffffffff81c39298 tp : ff60000002e80040 t0 : 0000000000000000
> > [    2.953930]  t1 : ffffffffbbbabbc3 t2 : 635f6d656d5f6670 s0 : ff2000000000bdc0
> > [    2.953940]  s1 : ffffffff8121c7da a0 : 0000000000000037 a1 : ffffffff81a93048
> > [    2.953949]  a2 : 0000000000000010 a3 : 0000000000000001 a4 : 0000000000000000
> > [    2.953959]  a5 : 0000000000000000 a6 : ffffffff81c4fe08 a7 : 0000000000000000
> > [    2.953968]  s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000
> > [    2.953977]  s5 : 0000000000000000 s6 : 0000000000000100 s7 : ff5ffffffffd3128
> > [    2.953986]  s8 : ffffffff81c3d1f8 s9 : 0000000000000060 s10: 0000000000000000
> > [    2.953996]  s11: 0000000000000060 t3 : 0000000065a61b33 t4 : 0000000000000009
> > [    2.954005]  t5 : ffffffffde180000 t6 : ff2000000000bb08
> > [    2.954014] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
> > [    2.954047] [<ffffffff8016cbd2>] bpf_mem_alloc_init+0x326/0x32e
> > [    2.954087] [<ffffffff80e11426>] bpf_global_ma_init+0x1c/0x30
> > [    2.954097] [<ffffffff8000285e>] do_one_initcall+0x5c/0x238
> > [    2.954105] [<ffffffff80e011ae>] kernel_init_freeable+0x29a/0x30e
> > [    2.954115] [<ffffffff80c0312c>] kernel_init+0x1e/0x112
> > [    2.954124] [<ffffffff80003d82>] ret_from_fork+0xa/0x1c
> >
> > Copying riscv maintainers and mailing list for feedback / comments.
>
> If it makes a difference I also see this with 6.6-rc3 on my Nezha board
> (Allwinner D1), but not on my VisionFive 2 (JH7110) running the same kernel.
>

Adding one more RISC-V board (Renesas RZ/Five) to list where I see this issue:
[    0.268936] ------------[ cut here ]------------
[    0.268953] bpf_mem_cache[0]: unexpected object size 128, expect 96
[    0.268993] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:507
bpf_mem_alloc_init+0x306/0x30e
[    0.269026] Modules linked in:
[    0.269038] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
6.6.0-rc3-00091-g6acfe6a7c746 #538
[    0.269049] Hardware name: Renesas SMARC EVK based on r9a07g043f01 (DT)
[    0.269054] epc : bpf_mem_alloc_init+0x306/0x30e
[    0.269066]  ra : bpf_mem_alloc_init+0x306/0x30e
[    0.269077] epc : ffffffff8010e7ac ra : ffffffff8010e7ac sp :
ffffffc80000bd30
[    0.269084]  gp : ffffffff81506d08 tp : ffffffd801938000 t0 :
ffffffff81419e40
[    0.269090]  t1 : ffffffffffffffff t2 : 2d2d2d2d2d2d2d2d s0 :
ffffffc80000bdd0
[    0.269096]  s1 : 000000000000000b a0 : 0000000000000037 a1 :
0000000200000020
[    0.269102]  a2 : 0000000000000000 a3 : 0000000000000001 a4 :
0000000000000000
[    0.269107]  a5 : 0000000000000000 a6 : 0000000000000000 a7 :
0000000000000000
[    0.269112]  s2 : 0000000000000000 s3 : 0000000000000000 s4 :
0000000000000100
[    0.269118]  s5 : ffffffff815081f8 s6 : ffffffff8153f610 s7 :
0000000000000060
[    0.269124]  s8 : 0000000000000060 s9 : ffffffd836fd2770 s10:
ffffffff80e18dd8
[    0.269130]  s11: 0000000000000000 t3 : ffffffff8151e174 t4 :
ffffffff8151e174
[    0.269135]  t5 : ffffffff8151e150 t6 : ffffffff8151e1b8
[    0.269140] status: 0000000200000120 badaddr: 0000000000000000
cause: 0000000000000003
[    0.269147] [<ffffffff8010e7ac>] bpf_mem_alloc_init+0x306/0x30e
[    0.269160] [<ffffffff80a0f3e4>] bpf_global_ma_init+0x1c/0x30
[    0.269174] [<ffffffff8000212c>] do_one_initcall+0x58/0x19c
[    0.269186] [<ffffffff80a00ffc>] kernel_init_freeable+0x200/0x26a
[    0.269203] [<ffffffff809195a4>] kernel_init+0x1e/0x10a
[    0.269213] [<ffffffff800035ee>] ret_from_fork+0xa/0x1c
[    0.269224] ---[ end trace 0000000000000000 ]---
[    0.281983] debug_vm_pgtable: [debug_vm_pgtable         ]:
Validating architecture page table helpers

Cheers,
Prabhakar
Alexei Starovoitov Sept. 29, 2023, 9 p.m. UTC | #8
On Fri, Sep 29, 2023 at 1:24 PM Lad, Prabhakar
<prabhakar.csengg@gmail.com> wrote:
>
> On Fri, Sep 29, 2023 at 7:52 PM Emil Renner Berthing
> <emil.renner.berthing@canonical.com> wrote:
> >
> > Guenter Roeck wrote:
> > > Hi,
> > >
> > > On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote:
> > > > From: Hou Tao <houtao1@huawei.com>
> > > >
> > > > Add extra check in bpf_mem_alloc_init() to ensure the unit_size of
> > > > bpf_mem_cache is matched with the object_size of underlying slab cache.
> > > > If these two sizes are unmatched, print a warning once and return
> > > > -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and
> > > > the potential issue can be prevented.
> > > >
> > > > Suggested-by: Alexei Starovoitov <ast@kernel.org>
> > > > Signed-off-by: Hou Tao <houtao1@huawei.com>
> > >
> > > With this patch in place, I see the following backtrace on riscv systems.
> > >
> > > [    2.953088] bpf_mem_cache[0]: unexpected object size 128, expect 96
> > > [    2.953481] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:507 bpf_mem_alloc_init+0x326/0x32e
> > > [    2.953645] Modules linked in:
> > > [    2.953736] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc2-00244-g27bbf45eae9c #1
> > > [    2.953790] Hardware name: riscv-virtio,qemu (DT)
> > > [    2.953855] epc : bpf_mem_alloc_init+0x326/0x32e
> > > [    2.953891]  ra : bpf_mem_alloc_init+0x326/0x32e
> > > [    2.953909] epc : ffffffff8016cbd2 ra : ffffffff8016cbd2 sp : ff2000000000bd20
> > > [    2.953920]  gp : ffffffff81c39298 tp : ff60000002e80040 t0 : 0000000000000000
> > > [    2.953930]  t1 : ffffffffbbbabbc3 t2 : 635f6d656d5f6670 s0 : ff2000000000bdc0
> > > [    2.953940]  s1 : ffffffff8121c7da a0 : 0000000000000037 a1 : ffffffff81a93048
> > > [    2.953949]  a2 : 0000000000000010 a3 : 0000000000000001 a4 : 0000000000000000
> > > [    2.953959]  a5 : 0000000000000000 a6 : ffffffff81c4fe08 a7 : 0000000000000000
> > > [    2.953968]  s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000
> > > [    2.953977]  s5 : 0000000000000000 s6 : 0000000000000100 s7 : ff5ffffffffd3128
> > > [    2.953986]  s8 : ffffffff81c3d1f8 s9 : 0000000000000060 s10: 0000000000000000
> > > [    2.953996]  s11: 0000000000000060 t3 : 0000000065a61b33 t4 : 0000000000000009
> > > [    2.954005]  t5 : ffffffffde180000 t6 : ff2000000000bb08
> > > [    2.954014] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003
> > > [    2.954047] [<ffffffff8016cbd2>] bpf_mem_alloc_init+0x326/0x32e
> > > [    2.954087] [<ffffffff80e11426>] bpf_global_ma_init+0x1c/0x30
> > > [    2.954097] [<ffffffff8000285e>] do_one_initcall+0x5c/0x238
> > > [    2.954105] [<ffffffff80e011ae>] kernel_init_freeable+0x29a/0x30e
> > > [    2.954115] [<ffffffff80c0312c>] kernel_init+0x1e/0x112
> > > [    2.954124] [<ffffffff80003d82>] ret_from_fork+0xa/0x1c
> > >
> > > Copying riscv maintainers and mailing list for feedback / comments.
> >
> > If it makes a difference I also see this with 6.6-rc3 on my Nezha board
> > (Allwinner D1), but not on my VisionFive 2 (JH7110) running the same kernel.
> >
>
> Adding one more RISC-V board (Renesas RZ/Five) to list where I see this issue:

Could you please help test the proposed fix:
https://patchwork.kernel.org/project/netdevbpf/patch/20230928101558.2594068-1-houtao@huaweicloud.com/
diff mbox series

Patch

diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 90c1ed8210a2..1c22b90e754a 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -486,6 +486,24 @@  static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
 	alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);
 }
 
+static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
+{
+	struct llist_node *first;
+	unsigned int obj_size;
+
+	first = c->free_llist.first;
+	if (!first)
+		return 0;
+
+	obj_size = ksize(first);
+	if (obj_size != c->unit_size) {
+		WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
+			  idx, obj_size, c->unit_size);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /* When size != 0 bpf_mem_cache for each cpu.
  * This is typical bpf hash map use case when all elements have equal size.
  *
@@ -496,10 +514,10 @@  static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
 int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 {
 	static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
+	int cpu, i, err, unit_size, percpu_size = 0;
 	struct bpf_mem_caches *cc, __percpu *pcc;
 	struct bpf_mem_cache *c, __percpu *pc;
 	struct obj_cgroup *objcg = NULL;
-	int cpu, i, unit_size, percpu_size = 0;
 
 	if (size) {
 		pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
@@ -537,6 +555,7 @@  int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 	pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
 	if (!pcc)
 		return -ENOMEM;
+	err = 0;
 #ifdef CONFIG_MEMCG_KMEM
 	objcg = get_obj_cgroup_from_current();
 #endif
@@ -557,10 +576,20 @@  int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 			if (i != bpf_mem_cache_idx(c->unit_size))
 				continue;
 			prefill_mem_cache(c, cpu);
+			err = check_obj_size(c, i);
+			if (err)
+				goto out;
 		}
 	}
+
+out:
 	ma->caches = pcc;
-	return 0;
+	/* refill_work is either zeroed or initialized, so it is safe to
+	 * call irq_work_sync().
+	 */
+	if (err)
+		bpf_mem_alloc_destroy(ma);
+	return err;
 }
 
 static void drain_mem_cache(struct bpf_mem_cache *c)