Message ID | 20230908133923.2675053-4-houtao@huaweicloud.com (mailing list archive) |
---|---|
State | Accepted |
Commit | c930472552022bd09aab3cd946ba3f243070d5c7 |
Delegated to: | BPF |
Headers | show |
Series | Fix the unmatched unit_size of bpf_mem_cache | expand |
Hi Hou, On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote: > From: Hou Tao <houtao1@huawei.com> > > Add extra check in bpf_mem_alloc_init() to ensure the unit_size of > bpf_mem_cache is matched with the object_size of underlying slab cache. > If these two sizes are unmatched, print a warning once and return > -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and > the potential issue can be prevented. > > Suggested-by: Alexei Starovoitov <ast@kernel.org> > Signed-off-by: Hou Tao <houtao1@huawei.com> > --- > kernel/bpf/memalloc.c | 33 +++++++++++++++++++++++++++++++-- > 1 file changed, 31 insertions(+), 2 deletions(-) > > diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c > index 90c1ed8210a2..1c22b90e754a 100644 > --- a/kernel/bpf/memalloc.c > +++ b/kernel/bpf/memalloc.c > @@ -486,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) > alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false); > } > > +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) > +{ > + struct llist_node *first; > + unsigned int obj_size; > + > + first = c->free_llist.first; > + if (!first) > + return 0; > + > + obj_size = ksize(first); > + if (obj_size != c->unit_size) { > + WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n", > + idx, obj_size, c->unit_size); > + return -EINVAL; > + } > + return 0; > +} I am seeing the warning added by this change as commit c93047255202 ("bpf: Ensure unit_size is matched with slab cache object size") when booting ARCH=riscv defconfig in QEMU. I have seen some discussion on the mailing list around this, so I apologize if this is a duplicate report but it sounded like the previously reported instance of this warning was already resolved by some other changeor supposed to be resolved by [1]. Unfortunately, I tested both current bpf master (currently at 6bd5bcb18f94) with and without that change and I still see the warning in both cases. The rootfs is available at [2], if it is relevant. $ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux- mrproper defconfig Image $ qemu-system-riscv64 \ -display none \ -nodefaults \ -bios default \ -M virt \ -append earlycon \ -kernel arch/riscv/boot/Image \ -initrd riscv-rootfs.cpio \ -m 512m \ -serial mon:stdio ... [ 0.000000] Linux version 6.5.0-12679-gc93047255202 (nathan@dev-arch.thelio-3990X) (riscv64-linux-gcc (GCC) 13.2.0, GNU ld (GNU Binutils) 2.41) #1 SMP Thu Sep 14 10:44:41 MST 2023 ... [ 0.433002] ------------[ cut here ]------------ [ 0.433128] bpf_mem_cache[0]: unexpected object size 128, expect 96 [ 0.433585] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:500 bpf_mem_alloc_init+0x348/0x354 [ 0.433810] Modules linked in: [ 0.433928] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-12679-gc93047255202 #1 [ 0.434025] Hardware name: riscv-virtio,qemu (DT) [ 0.434105] epc : bpf_mem_alloc_init+0x348/0x354 [ 0.434140] ra : bpf_mem_alloc_init+0x348/0x354 [ 0.434163] epc : ffffffff80112572 ra : ffffffff80112572 sp : ff2000000000bd30 [ 0.434177] gp : ffffffff81501588 tp : ff600000018d0000 t0 : ffffffff808cd1a0 [ 0.434190] t1 : 0720072007200720 t2 : 635f6d656d5f6670 s0 : ff2000000000bdd0 [ 0.434202] s1 : ffffffff80e17620 a0 : 0000000000000037 a1 : ffffffff814866b8 [ 0.434215] a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000 [ 0.434227] a5 : 0000000000000000 a6 : 0000000000000047 a7 : 0000000000000046 [ 0.434239] s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000 [ 0.434251] s5 : 0000000000000100 s6 : ffffffff815031f8 s7 : ffffffff8153a610 [ 0.434264] s8 : 0000000000000060 s9 : 0000000000000060 s10: 0000000000000000 [ 0.434276] s11: ff6000001ffe5410 t3 : ff60000001858f00 t4 : ff60000001858f00 [ 0.434288] t5 : ff60000001858000 t6 : ff2000000000bb48 [ 0.434299] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 [ 0.434394] [<ffffffff80112572>] bpf_mem_alloc_init+0x348/0x354 [ 0.434492] [<ffffffff80a0f302>] bpf_global_ma_init+0x1c/0x30 [ 0.434516] [<ffffffff8000212c>] do_one_initcall+0x58/0x19c [ 0.434526] [<ffffffff80a0105e>] kernel_init_freeable+0x214/0x27e [ 0.434537] [<ffffffff808db4dc>] kernel_init+0x1e/0x10a [ 0.434548] [<ffffffff80003386>] ret_from_fork+0xa/0x1c [ 0.434618] ---[ end trace 0000000000000000 ]--- [1]: https://lore.kernel.org/20230913135943.3137292-1-houtao@huaweicloud.com/ [2]: https://github.com/ClangBuiltLinux/boot-utils/releases Cheers, Nathan # bad: [98897dc735cf6635f0966f76eb0108354168fb15] Add linux-next specific files for 20230914 # good: [aed8aee11130a954356200afa3f1b8753e8a9482] Merge tag 'pmdomain-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm git bisect start '98897dc735cf6635f0966f76eb0108354168fb15' 'aed8aee11130a954356200afa3f1b8753e8a9482' # good: [ea1bbd78a48c8b325583e8c0bc2690850cb51807] bcachefs: Fix assorted checkpatch nits git bisect good ea1bbd78a48c8b325583e8c0bc2690850cb51807 # bad: [9c4e2139cfa15d769eafd51bf3e051293b106986] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git git bisect bad 9c4e2139cfa15d769eafd51bf3e051293b106986 # bad: [4f07b13481ab390108b015da2bc8f560416e48d2] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git git bisect bad 4f07b13481ab390108b015da2bc8f560416e48d2 # bad: [bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471] Merge branch 'for-linux-next-fixes' of git://anongit.freedesktop.org/drm/drm-misc git bisect bad bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471 # bad: [95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git git bisect bad 95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac # good: [6836d373943afeeeb8e2989c22aaaa51218a83c6] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc.git git bisect good 6836d373943afeeeb8e2989c22aaaa51218a83c6 # good: [3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9] Merge branch 'main' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git git bisect good 3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9 # bad: [51d56d51d3881addaea2c7242ae859155ae75607] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git git bisect bad 51d56d51d3881addaea2c7242ae859155ae75607 # bad: [1a49f4195d3498fe458a7f5ff7ec5385da70d92e] bpf: Avoid dummy bpf_offload_netdev in __bpf_prog_dev_bound_init git bisect bad 1a49f4195d3498fe458a7f5ff7ec5385da70d92e # bad: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size git bisect bad c930472552022bd09aab3cd946ba3f243070d5c7 # good: [7182e56411b9a8b76797ed7b6095fc84be76dfb0] selftests/bpf: Add kprobe_multi override test git bisect good 7182e56411b9a8b76797ed7b6095fc84be76dfb0 # good: [b1d53958b69312e43c118d4093d8f93d3f6f80af] bpf: Don't prefill for unused bpf_mem_cache git bisect good b1d53958b69312e43c118d4093d8f93d3f6f80af # first bad commit: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size
Hi, On 9/15/2023 2:14 AM, Nathan Chancellor wrote: > Hi Hou, > > On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote: >> From: Hou Tao <houtao1@huawei.com> >> >> Add extra check in bpf_mem_alloc_init() to ensure the unit_size of >> bpf_mem_cache is matched with the object_size of underlying slab cache. >> If these two sizes are unmatched, print a warning once and return >> -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and >> the potential issue can be prevented. >> >> Suggested-by: Alexei Starovoitov <ast@kernel.org> >> Signed-off-by: Hou Tao <houtao1@huawei.com> >> --- >> kernel/bpf/memalloc.c | 33 +++++++++++++++++++++++++++++++-- >> 1 file changed, 31 insertions(+), 2 deletions(-) >> >> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c >> index 90c1ed8210a2..1c22b90e754a 100644 >> --- a/kernel/bpf/memalloc.c >> +++ b/kernel/bpf/memalloc.c >> @@ -486,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) >> alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false); >> } >> >> +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) >> +{ >> + struct llist_node *first; >> + unsigned int obj_size; >> + >> + first = c->free_llist.first; >> + if (!first) >> + return 0; >> + >> + obj_size = ksize(first); >> + if (obj_size != c->unit_size) { >> + WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n", >> + idx, obj_size, c->unit_size); >> + return -EINVAL; >> + } >> + return 0; >> +} > I am seeing the warning added by this change as commit c93047255202 > ("bpf: Ensure unit_size is matched with slab cache object size") when > booting ARCH=riscv defconfig in QEMU. I have seen some discussion on the > mailing list around this, so I apologize if this is a duplicate report > but it sounded like the previously reported instance of this warning was > already resolved by some other changeor supposed to be resolved by [1]. > Unfortunately, I tested both current bpf master (currently at > 6bd5bcb18f94) with and without that change and I still see the warning > in both cases. The rootfs is available at [2], if it is relevant. Thanks for the report. It seems it is a new problem which should be fixed by commit d52b59315bf5 ("bpf: Adjust size_index according to the value of KMALLOC_MIN_SIZE"), but failed to. However I could not reproduce the problem by using the provided steps. Do you configure any boot cmd line in your local setup ? I suspect dma_get_cache_alignment() returns 64 of 1 in your setup, so slab-96 is rounded up to slab-128. Could you please run the attached debug patch and post its output ? > $ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux- mrproper defconfig Image > > $ qemu-system-riscv64 \ > -display none \ > -nodefaults \ > -bios default \ > -M virt \ > -append earlycon \ > -kernel arch/riscv/boot/Image \ > -initrd riscv-rootfs.cpio \ > -m 512m \ > -serial mon:stdio > ... > [ 0.000000] Linux version 6.5.0-12679-gc93047255202 (nathan@dev-arch.thelio-3990X) (riscv64-linux-gcc (GCC) 13.2.0, GNU ld (GNU Binutils) 2.41) #1 SMP Thu Sep 14 10:44:41 MST 2023 > ... > [ 0.433002] ------------[ cut here ]------------ > [ 0.433128] bpf_mem_cache[0]: unexpected object size 128, expect 96 > [ 0.433585] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:500 bpf_mem_alloc_init+0x348/0x354 > [ 0.433810] Modules linked in: > [ 0.433928] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-12679-gc93047255202 #1 > [ 0.434025] Hardware name: riscv-virtio,qemu (DT) > [ 0.434105] epc : bpf_mem_alloc_init+0x348/0x354 > [ 0.434140] ra : bpf_mem_alloc_init+0x348/0x354 > [ 0.434163] epc : ffffffff80112572 ra : ffffffff80112572 sp : ff2000000000bd30 > [ 0.434177] gp : ffffffff81501588 tp : ff600000018d0000 t0 : ffffffff808cd1a0 > [ 0.434190] t1 : 0720072007200720 t2 : 635f6d656d5f6670 s0 : ff2000000000bdd0 > [ 0.434202] s1 : ffffffff80e17620 a0 : 0000000000000037 a1 : ffffffff814866b8 > [ 0.434215] a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000 > [ 0.434227] a5 : 0000000000000000 a6 : 0000000000000047 a7 : 0000000000000046 > [ 0.434239] s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000 > [ 0.434251] s5 : 0000000000000100 s6 : ffffffff815031f8 s7 : ffffffff8153a610 > [ 0.434264] s8 : 0000000000000060 s9 : 0000000000000060 s10: 0000000000000000 > [ 0.434276] s11: ff6000001ffe5410 t3 : ff60000001858f00 t4 : ff60000001858f00 > [ 0.434288] t5 : ff60000001858000 t6 : ff2000000000bb48 > [ 0.434299] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 > [ 0.434394] [<ffffffff80112572>] bpf_mem_alloc_init+0x348/0x354 > [ 0.434492] [<ffffffff80a0f302>] bpf_global_ma_init+0x1c/0x30 > [ 0.434516] [<ffffffff8000212c>] do_one_initcall+0x58/0x19c > [ 0.434526] [<ffffffff80a0105e>] kernel_init_freeable+0x214/0x27e > [ 0.434537] [<ffffffff808db4dc>] kernel_init+0x1e/0x10a > [ 0.434548] [<ffffffff80003386>] ret_from_fork+0xa/0x1c > [ 0.434618] ---[ end trace 0000000000000000 ]--- > > [1]: https://lore.kernel.org/20230913135943.3137292-1-houtao@huaweicloud.com/ > [2]: https://github.com/ClangBuiltLinux/boot-utils/releases > > Cheers, > Nathan > > # bad: [98897dc735cf6635f0966f76eb0108354168fb15] Add linux-next specific files for 20230914 > # good: [aed8aee11130a954356200afa3f1b8753e8a9482] Merge tag 'pmdomain-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm > git bisect start '98897dc735cf6635f0966f76eb0108354168fb15' 'aed8aee11130a954356200afa3f1b8753e8a9482' > # good: [ea1bbd78a48c8b325583e8c0bc2690850cb51807] bcachefs: Fix assorted checkpatch nits > git bisect good ea1bbd78a48c8b325583e8c0bc2690850cb51807 > # bad: [9c4e2139cfa15d769eafd51bf3e051293b106986] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git > git bisect bad 9c4e2139cfa15d769eafd51bf3e051293b106986 > # bad: [4f07b13481ab390108b015da2bc8f560416e48d2] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git > git bisect bad 4f07b13481ab390108b015da2bc8f560416e48d2 > # bad: [bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471] Merge branch 'for-linux-next-fixes' of git://anongit.freedesktop.org/drm/drm-misc > git bisect bad bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471 > # bad: [95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git > git bisect bad 95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac > # good: [6836d373943afeeeb8e2989c22aaaa51218a83c6] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc.git > git bisect good 6836d373943afeeeb8e2989c22aaaa51218a83c6 > # good: [3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9] Merge branch 'main' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git > git bisect good 3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9 > # bad: [51d56d51d3881addaea2c7242ae859155ae75607] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git > git bisect bad 51d56d51d3881addaea2c7242ae859155ae75607 > # bad: [1a49f4195d3498fe458a7f5ff7ec5385da70d92e] bpf: Avoid dummy bpf_offload_netdev in __bpf_prog_dev_bound_init > git bisect bad 1a49f4195d3498fe458a7f5ff7ec5385da70d92e > # bad: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size > git bisect bad c930472552022bd09aab3cd946ba3f243070d5c7 > # good: [7182e56411b9a8b76797ed7b6095fc84be76dfb0] selftests/bpf: Add kprobe_multi override test > git bisect good 7182e56411b9a8b76797ed7b6095fc84be76dfb0 > # good: [b1d53958b69312e43c118d4093d8f93d3f6f80af] bpf: Don't prefill for unused bpf_mem_cache > git bisect good b1d53958b69312e43c118d4093d8f93d3f6f80af > # first bad commit: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size From 369f2e59cc6c63c3924f654ba3f8491dba58b87b Mon Sep 17 00:00:00 2001 From: Hou Tao <houtao1@huawei.com> Date: Sat, 16 Sep 2023 10:35:52 +0800 Subject: [PATCH] bpf: Check the return value of dma_get_cache_alignment() Signed-off-by: Hou Tao <houtao1@huawei.com> --- kernel/bpf/memalloc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 1c22b90e754a..c8d2c3097c43 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -6,6 +6,8 @@ #include <linux/irq_work.h> #include <linux/bpf_mem_alloc.h> #include <linux/memcontrol.h> +#include <linux/dma-mapping.h> +#include <linux/swiotlb.h> #include <asm/local.h> /* Any context (including NMI) BPF specific memory allocator. @@ -958,6 +960,14 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) return !ret ? NULL : ret + LLIST_NODE_SZ; } +static unsigned int __kmalloc_minalign(void) +{ + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && + is_swiotlb_allocated()) + return ARCH_KMALLOC_MINALIGN; + return dma_get_cache_alignment(); +} + /* Most of the logic is taken from setup_kmalloc_cache_index_table() */ static __init int bpf_mem_cache_adjust_size(void) { @@ -992,6 +1002,9 @@ static __init int bpf_mem_cache_adjust_size(void) size_index[(size - 1) / 8] = index; } + pr_info("==== bpf_mem_cache: KMALLOC_MIN_SIZE %u ARCH_KMALLOC_MINALIGN %u min_align %u\n", + KMALLOC_MIN_SIZE, ARCH_KMALLOC_MINALIGN, __kmalloc_minalign()); + return 0; } subsys_initcall(bpf_mem_cache_adjust_size);
On Sat, Sep 16, 2023 at 10:38:09AM +0800, Hou Tao wrote: > Hi, > > On 9/15/2023 2:14 AM, Nathan Chancellor wrote: > > Hi Hou, > > > > On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote: > >> From: Hou Tao <houtao1@huawei.com> > >> > >> Add extra check in bpf_mem_alloc_init() to ensure the unit_size of > >> bpf_mem_cache is matched with the object_size of underlying slab cache. > >> If these two sizes are unmatched, print a warning once and return > >> -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and > >> the potential issue can be prevented. > >> > >> Suggested-by: Alexei Starovoitov <ast@kernel.org> > >> Signed-off-by: Hou Tao <houtao1@huawei.com> > >> --- > >> kernel/bpf/memalloc.c | 33 +++++++++++++++++++++++++++++++-- > >> 1 file changed, 31 insertions(+), 2 deletions(-) > >> > >> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c > >> index 90c1ed8210a2..1c22b90e754a 100644 > >> --- a/kernel/bpf/memalloc.c > >> +++ b/kernel/bpf/memalloc.c > >> @@ -486,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) > >> alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false); > >> } > >> > >> +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) > >> +{ > >> + struct llist_node *first; > >> + unsigned int obj_size; > >> + > >> + first = c->free_llist.first; > >> + if (!first) > >> + return 0; > >> + > >> + obj_size = ksize(first); > >> + if (obj_size != c->unit_size) { > >> + WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n", > >> + idx, obj_size, c->unit_size); > >> + return -EINVAL; > >> + } > >> + return 0; > >> +} > > I am seeing the warning added by this change as commit c93047255202 > > ("bpf: Ensure unit_size is matched with slab cache object size") when > > booting ARCH=riscv defconfig in QEMU. I have seen some discussion on the > > mailing list around this, so I apologize if this is a duplicate report > > but it sounded like the previously reported instance of this warning was > > already resolved by some other changeor supposed to be resolved by [1]. > > Unfortunately, I tested both current bpf master (currently at > > 6bd5bcb18f94) with and without that change and I still see the warning > > in both cases. The rootfs is available at [2], if it is relevant. > > Thanks for the report. It seems it is a new problem which should be > fixed by commit d52b59315bf5 ("bpf: Adjust size_index according to the > value of KMALLOC_MIN_SIZE"), but failed to. However I could not > reproduce the problem by using the provided steps. Do you configure any > boot cmd line in your local setup ? I suspect dma_get_cache_alignment() > returns 64 of 1 in your setup, so slab-96 is rounded up to slab-128. > Could you please run the attached debug patch and post its output ? Apologies for the delay, I have been traveling (and will be again shortly). It seems like your theory could be accurate, as I see: [ 0.120969] ==== bpf_mem_cache: KMALLOC_MIN_SIZE 8 ARCH_KMALLOC_MINALIGN 8 min_align 64 There are no cmdline arguments aside from 'earlycon'. Could the QEMU version matter here for reproduction purposes, as I am still able to reproduce with my commands on next-20230919? I am using 8.1.0. Cheers, Nathan > > $ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux- mrproper defconfig Image > > > > $ qemu-system-riscv64 \ > > -display none \ > > -nodefaults \ > > -bios default \ > > -M virt \ > > -append earlycon \ > > -kernel arch/riscv/boot/Image \ > > -initrd riscv-rootfs.cpio \ > > -m 512m \ > > -serial mon:stdio > > ... > > [ 0.000000] Linux version 6.5.0-12679-gc93047255202 (nathan@dev-arch.thelio-3990X) (riscv64-linux-gcc (GCC) 13.2.0, GNU ld (GNU Binutils) 2.41) #1 SMP Thu Sep 14 10:44:41 MST 2023 > > ... > > [ 0.433002] ------------[ cut here ]------------ > > [ 0.433128] bpf_mem_cache[0]: unexpected object size 128, expect 96 > > [ 0.433585] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:500 bpf_mem_alloc_init+0x348/0x354 > > [ 0.433810] Modules linked in: > > [ 0.433928] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-12679-gc93047255202 #1 > > [ 0.434025] Hardware name: riscv-virtio,qemu (DT) > > [ 0.434105] epc : bpf_mem_alloc_init+0x348/0x354 > > [ 0.434140] ra : bpf_mem_alloc_init+0x348/0x354 > > [ 0.434163] epc : ffffffff80112572 ra : ffffffff80112572 sp : ff2000000000bd30 > > [ 0.434177] gp : ffffffff81501588 tp : ff600000018d0000 t0 : ffffffff808cd1a0 > > [ 0.434190] t1 : 0720072007200720 t2 : 635f6d656d5f6670 s0 : ff2000000000bdd0 > > [ 0.434202] s1 : ffffffff80e17620 a0 : 0000000000000037 a1 : ffffffff814866b8 > > [ 0.434215] a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000 > > [ 0.434227] a5 : 0000000000000000 a6 : 0000000000000047 a7 : 0000000000000046 > > [ 0.434239] s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000 > > [ 0.434251] s5 : 0000000000000100 s6 : ffffffff815031f8 s7 : ffffffff8153a610 > > [ 0.434264] s8 : 0000000000000060 s9 : 0000000000000060 s10: 0000000000000000 > > [ 0.434276] s11: ff6000001ffe5410 t3 : ff60000001858f00 t4 : ff60000001858f00 > > [ 0.434288] t5 : ff60000001858000 t6 : ff2000000000bb48 > > [ 0.434299] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 > > [ 0.434394] [<ffffffff80112572>] bpf_mem_alloc_init+0x348/0x354 > > [ 0.434492] [<ffffffff80a0f302>] bpf_global_ma_init+0x1c/0x30 > > [ 0.434516] [<ffffffff8000212c>] do_one_initcall+0x58/0x19c > > [ 0.434526] [<ffffffff80a0105e>] kernel_init_freeable+0x214/0x27e > > [ 0.434537] [<ffffffff808db4dc>] kernel_init+0x1e/0x10a > > [ 0.434548] [<ffffffff80003386>] ret_from_fork+0xa/0x1c > > [ 0.434618] ---[ end trace 0000000000000000 ]--- > > > > [1]: https://lore.kernel.org/20230913135943.3137292-1-houtao@huaweicloud.com/ > > [2]: https://github.com/ClangBuiltLinux/boot-utils/releases > > > > Cheers, > > Nathan > > > > # bad: [98897dc735cf6635f0966f76eb0108354168fb15] Add linux-next specific files for 20230914 > > # good: [aed8aee11130a954356200afa3f1b8753e8a9482] Merge tag 'pmdomain-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm > > git bisect start '98897dc735cf6635f0966f76eb0108354168fb15' 'aed8aee11130a954356200afa3f1b8753e8a9482' > > # good: [ea1bbd78a48c8b325583e8c0bc2690850cb51807] bcachefs: Fix assorted checkpatch nits > > git bisect good ea1bbd78a48c8b325583e8c0bc2690850cb51807 > > # bad: [9c4e2139cfa15d769eafd51bf3e051293b106986] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git > > git bisect bad 9c4e2139cfa15d769eafd51bf3e051293b106986 > > # bad: [4f07b13481ab390108b015da2bc8f560416e48d2] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git > > git bisect bad 4f07b13481ab390108b015da2bc8f560416e48d2 > > # bad: [bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471] Merge branch 'for-linux-next-fixes' of git://anongit.freedesktop.org/drm/drm-misc > > git bisect bad bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471 > > # bad: [95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git > > git bisect bad 95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac > > # good: [6836d373943afeeeb8e2989c22aaaa51218a83c6] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc.git > > git bisect good 6836d373943afeeeb8e2989c22aaaa51218a83c6 > > # good: [3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9] Merge branch 'main' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git > > git bisect good 3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9 > > # bad: [51d56d51d3881addaea2c7242ae859155ae75607] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git > > git bisect bad 51d56d51d3881addaea2c7242ae859155ae75607 > > # bad: [1a49f4195d3498fe458a7f5ff7ec5385da70d92e] bpf: Avoid dummy bpf_offload_netdev in __bpf_prog_dev_bound_init > > git bisect bad 1a49f4195d3498fe458a7f5ff7ec5385da70d92e > > # bad: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size > > git bisect bad c930472552022bd09aab3cd946ba3f243070d5c7 > > # good: [7182e56411b9a8b76797ed7b6095fc84be76dfb0] selftests/bpf: Add kprobe_multi override test > > git bisect good 7182e56411b9a8b76797ed7b6095fc84be76dfb0 > > # good: [b1d53958b69312e43c118d4093d8f93d3f6f80af] bpf: Don't prefill for unused bpf_mem_cache > > git bisect good b1d53958b69312e43c118d4093d8f93d3f6f80af > > # first bad commit: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size > > From 369f2e59cc6c63c3924f654ba3f8491dba58b87b Mon Sep 17 00:00:00 2001 > From: Hou Tao <houtao1@huawei.com> > Date: Sat, 16 Sep 2023 10:35:52 +0800 > Subject: [PATCH] bpf: Check the return value of dma_get_cache_alignment() > > Signed-off-by: Hou Tao <houtao1@huawei.com> > --- > kernel/bpf/memalloc.c | 13 +++++++++++++ > 1 file changed, 13 insertions(+) > > diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c > index 1c22b90e754a..c8d2c3097c43 100644 > --- a/kernel/bpf/memalloc.c > +++ b/kernel/bpf/memalloc.c > @@ -6,6 +6,8 @@ > #include <linux/irq_work.h> > #include <linux/bpf_mem_alloc.h> > #include <linux/memcontrol.h> > +#include <linux/dma-mapping.h> > +#include <linux/swiotlb.h> > #include <asm/local.h> > > /* Any context (including NMI) BPF specific memory allocator. > @@ -958,6 +960,14 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) > return !ret ? NULL : ret + LLIST_NODE_SZ; > } > > +static unsigned int __kmalloc_minalign(void) > +{ > + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && > + is_swiotlb_allocated()) > + return ARCH_KMALLOC_MINALIGN; > + return dma_get_cache_alignment(); > +} > + > /* Most of the logic is taken from setup_kmalloc_cache_index_table() */ > static __init int bpf_mem_cache_adjust_size(void) > { > @@ -992,6 +1002,9 @@ static __init int bpf_mem_cache_adjust_size(void) > size_index[(size - 1) / 8] = index; > } > > + pr_info("==== bpf_mem_cache: KMALLOC_MIN_SIZE %u ARCH_KMALLOC_MINALIGN %u min_align %u\n", > + KMALLOC_MIN_SIZE, ARCH_KMALLOC_MINALIGN, __kmalloc_minalign()); > + > return 0; > } > subsys_initcall(bpf_mem_cache_adjust_size); > -- > 2.29.2 >
Hi, On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote: > From: Hou Tao <houtao1@huawei.com> > > Add extra check in bpf_mem_alloc_init() to ensure the unit_size of > bpf_mem_cache is matched with the object_size of underlying slab cache. > If these two sizes are unmatched, print a warning once and return > -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and > the potential issue can be prevented. > > Suggested-by: Alexei Starovoitov <ast@kernel.org> > Signed-off-by: Hou Tao <houtao1@huawei.com> With this patch in place, I see the following backtrace on riscv systems. [ 2.953088] bpf_mem_cache[0]: unexpected object size 128, expect 96 [ 2.953481] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:507 bpf_mem_alloc_init+0x326/0x32e [ 2.953645] Modules linked in: [ 2.953736] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc2-00244-g27bbf45eae9c #1 [ 2.953790] Hardware name: riscv-virtio,qemu (DT) [ 2.953855] epc : bpf_mem_alloc_init+0x326/0x32e [ 2.953891] ra : bpf_mem_alloc_init+0x326/0x32e [ 2.953909] epc : ffffffff8016cbd2 ra : ffffffff8016cbd2 sp : ff2000000000bd20 [ 2.953920] gp : ffffffff81c39298 tp : ff60000002e80040 t0 : 0000000000000000 [ 2.953930] t1 : ffffffffbbbabbc3 t2 : 635f6d656d5f6670 s0 : ff2000000000bdc0 [ 2.953940] s1 : ffffffff8121c7da a0 : 0000000000000037 a1 : ffffffff81a93048 [ 2.953949] a2 : 0000000000000010 a3 : 0000000000000001 a4 : 0000000000000000 [ 2.953959] a5 : 0000000000000000 a6 : ffffffff81c4fe08 a7 : 0000000000000000 [ 2.953968] s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000 [ 2.953977] s5 : 0000000000000000 s6 : 0000000000000100 s7 : ff5ffffffffd3128 [ 2.953986] s8 : ffffffff81c3d1f8 s9 : 0000000000000060 s10: 0000000000000000 [ 2.953996] s11: 0000000000000060 t3 : 0000000065a61b33 t4 : 0000000000000009 [ 2.954005] t5 : ffffffffde180000 t6 : ff2000000000bb08 [ 2.954014] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 [ 2.954047] [<ffffffff8016cbd2>] bpf_mem_alloc_init+0x326/0x32e [ 2.954087] [<ffffffff80e11426>] bpf_global_ma_init+0x1c/0x30 [ 2.954097] [<ffffffff8000285e>] do_one_initcall+0x5c/0x238 [ 2.954105] [<ffffffff80e011ae>] kernel_init_freeable+0x29a/0x30e [ 2.954115] [<ffffffff80c0312c>] kernel_init+0x1e/0x112 [ 2.954124] [<ffffffff80003d82>] ret_from_fork+0xa/0x1c Copying riscv maintainers and mailing list for feedback / comments. Thanks, Guenter
Hi, On 9/20/2023 7:46 AM, Nathan Chancellor wrote: > On Sat, Sep 16, 2023 at 10:38:09AM +0800, Hou Tao wrote: >> Hi, >> >> On 9/15/2023 2:14 AM, Nathan Chancellor wrote: >>> Hi Hou, >>> >>> On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote: >>>> From: Hou Tao <houtao1@huawei.com> >>>> >>>> Add extra check in bpf_mem_alloc_init() to ensure the unit_size of >>>> bpf_mem_cache is matched with the object_size of underlying slab cache. >>>> If these two sizes are unmatched, print a warning once and return >>>> -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and >>>> the potential issue can be prevented. >>>> >>>> Suggested-by: Alexei Starovoitov <ast@kernel.org> >>>> Signed-off-by: Hou Tao <houtao1@huawei.com> >>>> --- >>>> kernel/bpf/memalloc.c | 33 +++++++++++++++++++++++++++++++-- >>>> 1 file changed, 31 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c >>>> index 90c1ed8210a2..1c22b90e754a 100644 >>>> --- a/kernel/bpf/memalloc.c >>>> +++ b/kernel/bpf/memalloc.c >>>> @@ -486,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) >>>> alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false); >>>> } >>>> >>>> +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) >>>> +{ >>>> + struct llist_node *first; >>>> + unsigned int obj_size; >>>> + >>>> + first = c->free_llist.first; >>>> + if (!first) >>>> + return 0; >>>> + >>>> + obj_size = ksize(first); >>>> + if (obj_size != c->unit_size) { >>>> + WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n", >>>> + idx, obj_size, c->unit_size); >>>> + return -EINVAL; >>>> + } >>>> + return 0; >>>> +} >>> I am seeing the warning added by this change as commit c93047255202 >>> ("bpf: Ensure unit_size is matched with slab cache object size") when >>> booting ARCH=riscv defconfig in QEMU. I have seen some discussion on the >>> mailing list around this, so I apologize if this is a duplicate report >>> but it sounded like the previously reported instance of this warning was >>> already resolved by some other changeor supposed to be resolved by [1]. >>> Unfortunately, I tested both current bpf master (currently at >>> 6bd5bcb18f94) with and without that change and I still see the warning >>> in both cases. The rootfs is available at [2], if it is relevant. >> Thanks for the report. It seems it is a new problem which should be >> fixed by commit d52b59315bf5 ("bpf: Adjust size_index according to the >> value of KMALLOC_MIN_SIZE"), but failed to. However I could not >> reproduce the problem by using the provided steps. Do you configure any >> boot cmd line in your local setup ? I suspect dma_get_cache_alignment() >> returns 64 of 1 in your setup, so slab-96 is rounded up to slab-128. >> Could you please run the attached debug patch and post its output ? Sorry for the delay. I was in off-site training last week. > Apologies for the delay, I have been traveling (and will be again > shortly). It seems like your theory could be accurate, as I see: > > [ 0.120969] ==== bpf_mem_cache: KMALLOC_MIN_SIZE 8 ARCH_KMALLOC_MINALIGN 8 min_align 64 > > There are no cmdline arguments aside from 'earlycon'. Could the QEMU > version matter here for reproduction purposes, as I am still able to > reproduce with my commands on next-20230919? I am using 8.1.0. The QEMU version matters. I can reproduce the problem by using v8.1.0 but can not reproduce by using v4.2.1. Will post a patch to fix it soon. > > Cheers, > Nathan > >>> $ make -skj"$(nproc)" ARCH=riscv CROSS_COMPILE=riscv64-linux- mrproper defconfig Image >>> >>> $ qemu-system-riscv64 \ >>> -display none \ >>> -nodefaults \ >>> -bios default \ >>> -M virt \ >>> -append earlycon \ >>> -kernel arch/riscv/boot/Image \ >>> -initrd riscv-rootfs.cpio \ >>> -m 512m \ >>> -serial mon:stdio >>> ... >>> [ 0.000000] Linux version 6.5.0-12679-gc93047255202 (nathan@dev-arch.thelio-3990X) (riscv64-linux-gcc (GCC) 13.2.0, GNU ld (GNU Binutils) 2.41) #1 SMP Thu Sep 14 10:44:41 MST 2023 >>> ... >>> [ 0.433002] ------------[ cut here ]------------ >>> [ 0.433128] bpf_mem_cache[0]: unexpected object size 128, expect 96 >>> [ 0.433585] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:500 bpf_mem_alloc_init+0x348/0x354 >>> [ 0.433810] Modules linked in: >>> [ 0.433928] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-12679-gc93047255202 #1 >>> [ 0.434025] Hardware name: riscv-virtio,qemu (DT) >>> [ 0.434105] epc : bpf_mem_alloc_init+0x348/0x354 >>> [ 0.434140] ra : bpf_mem_alloc_init+0x348/0x354 >>> [ 0.434163] epc : ffffffff80112572 ra : ffffffff80112572 sp : ff2000000000bd30 >>> [ 0.434177] gp : ffffffff81501588 tp : ff600000018d0000 t0 : ffffffff808cd1a0 >>> [ 0.434190] t1 : 0720072007200720 t2 : 635f6d656d5f6670 s0 : ff2000000000bdd0 >>> [ 0.434202] s1 : ffffffff80e17620 a0 : 0000000000000037 a1 : ffffffff814866b8 >>> [ 0.434215] a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000 >>> [ 0.434227] a5 : 0000000000000000 a6 : 0000000000000047 a7 : 0000000000000046 >>> [ 0.434239] s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000 >>> [ 0.434251] s5 : 0000000000000100 s6 : ffffffff815031f8 s7 : ffffffff8153a610 >>> [ 0.434264] s8 : 0000000000000060 s9 : 0000000000000060 s10: 0000000000000000 >>> [ 0.434276] s11: ff6000001ffe5410 t3 : ff60000001858f00 t4 : ff60000001858f00 >>> [ 0.434288] t5 : ff60000001858000 t6 : ff2000000000bb48 >>> [ 0.434299] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 >>> [ 0.434394] [<ffffffff80112572>] bpf_mem_alloc_init+0x348/0x354 >>> [ 0.434492] [<ffffffff80a0f302>] bpf_global_ma_init+0x1c/0x30 >>> [ 0.434516] [<ffffffff8000212c>] do_one_initcall+0x58/0x19c >>> [ 0.434526] [<ffffffff80a0105e>] kernel_init_freeable+0x214/0x27e >>> [ 0.434537] [<ffffffff808db4dc>] kernel_init+0x1e/0x10a >>> [ 0.434548] [<ffffffff80003386>] ret_from_fork+0xa/0x1c >>> [ 0.434618] ---[ end trace 0000000000000000 ]--- >>> >>> [1]: https://lore.kernel.org/20230913135943.3137292-1-houtao@huaweicloud.com/ >>> [2]: https://github.com/ClangBuiltLinux/boot-utils/releases >>> >>> Cheers, >>> Nathan >>> >>> # bad: [98897dc735cf6635f0966f76eb0108354168fb15] Add linux-next specific files for 20230914 >>> # good: [aed8aee11130a954356200afa3f1b8753e8a9482] Merge tag 'pmdomain-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm >>> git bisect start '98897dc735cf6635f0966f76eb0108354168fb15' 'aed8aee11130a954356200afa3f1b8753e8a9482' >>> # good: [ea1bbd78a48c8b325583e8c0bc2690850cb51807] bcachefs: Fix assorted checkpatch nits >>> git bisect good ea1bbd78a48c8b325583e8c0bc2690850cb51807 >>> # bad: [9c4e2139cfa15d769eafd51bf3e051293b106986] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git >>> git bisect bad 9c4e2139cfa15d769eafd51bf3e051293b106986 >>> # bad: [4f07b13481ab390108b015da2bc8f560416e48d2] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git >>> git bisect bad 4f07b13481ab390108b015da2bc8f560416e48d2 >>> # bad: [bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471] Merge branch 'for-linux-next-fixes' of git://anongit.freedesktop.org/drm/drm-misc >>> git bisect bad bcfe98207530e1ea0004f4e5dbd6e7e4d9eb2471 >>> # bad: [95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git >>> git bisect bad 95d3e99b1ca8ad3da86c525cc1c00e4ba27592ac >>> # good: [6836d373943afeeeb8e2989c22aaaa51218a83c6] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc.git >>> git bisect good 6836d373943afeeeb8e2989c22aaaa51218a83c6 >>> # good: [3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9] Merge branch 'main' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git >>> git bisect good 3d3e2fb5e45a08a45ae01f0dfaf9621ae0e439f9 >>> # bad: [51d56d51d3881addaea2c7242ae859155ae75607] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git >>> git bisect bad 51d56d51d3881addaea2c7242ae859155ae75607 >>> # bad: [1a49f4195d3498fe458a7f5ff7ec5385da70d92e] bpf: Avoid dummy bpf_offload_netdev in __bpf_prog_dev_bound_init >>> git bisect bad 1a49f4195d3498fe458a7f5ff7ec5385da70d92e >>> # bad: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size >>> git bisect bad c930472552022bd09aab3cd946ba3f243070d5c7 >>> # good: [7182e56411b9a8b76797ed7b6095fc84be76dfb0] selftests/bpf: Add kprobe_multi override test >>> git bisect good 7182e56411b9a8b76797ed7b6095fc84be76dfb0 >>> # good: [b1d53958b69312e43c118d4093d8f93d3f6f80af] bpf: Don't prefill for unused bpf_mem_cache >>> git bisect good b1d53958b69312e43c118d4093d8f93d3f6f80af >>> # first bad commit: [c930472552022bd09aab3cd946ba3f243070d5c7] bpf: Ensure unit_size is matched with slab cache object size >> From 369f2e59cc6c63c3924f654ba3f8491dba58b87b Mon Sep 17 00:00:00 2001 >> From: Hou Tao <houtao1@huawei.com> >> Date: Sat, 16 Sep 2023 10:35:52 +0800 >> Subject: [PATCH] bpf: Check the return value of dma_get_cache_alignment() >> >> Signed-off-by: Hou Tao <houtao1@huawei.com> >> --- >> kernel/bpf/memalloc.c | 13 +++++++++++++ >> 1 file changed, 13 insertions(+) >> >> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c >> index 1c22b90e754a..c8d2c3097c43 100644 >> --- a/kernel/bpf/memalloc.c >> +++ b/kernel/bpf/memalloc.c >> @@ -6,6 +6,8 @@ >> #include <linux/irq_work.h> >> #include <linux/bpf_mem_alloc.h> >> #include <linux/memcontrol.h> >> +#include <linux/dma-mapping.h> >> +#include <linux/swiotlb.h> >> #include <asm/local.h> >> >> /* Any context (including NMI) BPF specific memory allocator. >> @@ -958,6 +960,14 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) >> return !ret ? NULL : ret + LLIST_NODE_SZ; >> } >> >> +static unsigned int __kmalloc_minalign(void) >> +{ >> + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && >> + is_swiotlb_allocated()) >> + return ARCH_KMALLOC_MINALIGN; >> + return dma_get_cache_alignment(); >> +} >> + >> /* Most of the logic is taken from setup_kmalloc_cache_index_table() */ >> static __init int bpf_mem_cache_adjust_size(void) >> { >> @@ -992,6 +1002,9 @@ static __init int bpf_mem_cache_adjust_size(void) >> size_index[(size - 1) / 8] = index; >> } >> >> + pr_info("==== bpf_mem_cache: KMALLOC_MIN_SIZE %u ARCH_KMALLOC_MINALIGN %u min_align %u\n", >> + KMALLOC_MIN_SIZE, ARCH_KMALLOC_MINALIGN, __kmalloc_minalign()); >> + >> return 0; >> } >> subsys_initcall(bpf_mem_cache_adjust_size); >> -- >> 2.29.2 >> > .
Guenter Roeck wrote: > Hi, > > On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote: > > From: Hou Tao <houtao1@huawei.com> > > > > Add extra check in bpf_mem_alloc_init() to ensure the unit_size of > > bpf_mem_cache is matched with the object_size of underlying slab cache. > > If these two sizes are unmatched, print a warning once and return > > -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and > > the potential issue can be prevented. > > > > Suggested-by: Alexei Starovoitov <ast@kernel.org> > > Signed-off-by: Hou Tao <houtao1@huawei.com> > > With this patch in place, I see the following backtrace on riscv systems. > > [ 2.953088] bpf_mem_cache[0]: unexpected object size 128, expect 96 > [ 2.953481] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:507 bpf_mem_alloc_init+0x326/0x32e > [ 2.953645] Modules linked in: > [ 2.953736] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc2-00244-g27bbf45eae9c #1 > [ 2.953790] Hardware name: riscv-virtio,qemu (DT) > [ 2.953855] epc : bpf_mem_alloc_init+0x326/0x32e > [ 2.953891] ra : bpf_mem_alloc_init+0x326/0x32e > [ 2.953909] epc : ffffffff8016cbd2 ra : ffffffff8016cbd2 sp : ff2000000000bd20 > [ 2.953920] gp : ffffffff81c39298 tp : ff60000002e80040 t0 : 0000000000000000 > [ 2.953930] t1 : ffffffffbbbabbc3 t2 : 635f6d656d5f6670 s0 : ff2000000000bdc0 > [ 2.953940] s1 : ffffffff8121c7da a0 : 0000000000000037 a1 : ffffffff81a93048 > [ 2.953949] a2 : 0000000000000010 a3 : 0000000000000001 a4 : 0000000000000000 > [ 2.953959] a5 : 0000000000000000 a6 : ffffffff81c4fe08 a7 : 0000000000000000 > [ 2.953968] s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000 > [ 2.953977] s5 : 0000000000000000 s6 : 0000000000000100 s7 : ff5ffffffffd3128 > [ 2.953986] s8 : ffffffff81c3d1f8 s9 : 0000000000000060 s10: 0000000000000000 > [ 2.953996] s11: 0000000000000060 t3 : 0000000065a61b33 t4 : 0000000000000009 > [ 2.954005] t5 : ffffffffde180000 t6 : ff2000000000bb08 > [ 2.954014] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 > [ 2.954047] [<ffffffff8016cbd2>] bpf_mem_alloc_init+0x326/0x32e > [ 2.954087] [<ffffffff80e11426>] bpf_global_ma_init+0x1c/0x30 > [ 2.954097] [<ffffffff8000285e>] do_one_initcall+0x5c/0x238 > [ 2.954105] [<ffffffff80e011ae>] kernel_init_freeable+0x29a/0x30e > [ 2.954115] [<ffffffff80c0312c>] kernel_init+0x1e/0x112 > [ 2.954124] [<ffffffff80003d82>] ret_from_fork+0xa/0x1c > > Copying riscv maintainers and mailing list for feedback / comments. If it makes a difference I also see this with 6.6-rc3 on my Nezha board (Allwinner D1), but not on my VisionFive 2 (JH7110) running the same kernel. /Emil
On Fri, Sep 29, 2023 at 7:52 PM Emil Renner Berthing <emil.renner.berthing@canonical.com> wrote: > > Guenter Roeck wrote: > > Hi, > > > > On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote: > > > From: Hou Tao <houtao1@huawei.com> > > > > > > Add extra check in bpf_mem_alloc_init() to ensure the unit_size of > > > bpf_mem_cache is matched with the object_size of underlying slab cache. > > > If these two sizes are unmatched, print a warning once and return > > > -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and > > > the potential issue can be prevented. > > > > > > Suggested-by: Alexei Starovoitov <ast@kernel.org> > > > Signed-off-by: Hou Tao <houtao1@huawei.com> > > > > With this patch in place, I see the following backtrace on riscv systems. > > > > [ 2.953088] bpf_mem_cache[0]: unexpected object size 128, expect 96 > > [ 2.953481] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:507 bpf_mem_alloc_init+0x326/0x32e > > [ 2.953645] Modules linked in: > > [ 2.953736] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc2-00244-g27bbf45eae9c #1 > > [ 2.953790] Hardware name: riscv-virtio,qemu (DT) > > [ 2.953855] epc : bpf_mem_alloc_init+0x326/0x32e > > [ 2.953891] ra : bpf_mem_alloc_init+0x326/0x32e > > [ 2.953909] epc : ffffffff8016cbd2 ra : ffffffff8016cbd2 sp : ff2000000000bd20 > > [ 2.953920] gp : ffffffff81c39298 tp : ff60000002e80040 t0 : 0000000000000000 > > [ 2.953930] t1 : ffffffffbbbabbc3 t2 : 635f6d656d5f6670 s0 : ff2000000000bdc0 > > [ 2.953940] s1 : ffffffff8121c7da a0 : 0000000000000037 a1 : ffffffff81a93048 > > [ 2.953949] a2 : 0000000000000010 a3 : 0000000000000001 a4 : 0000000000000000 > > [ 2.953959] a5 : 0000000000000000 a6 : ffffffff81c4fe08 a7 : 0000000000000000 > > [ 2.953968] s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000 > > [ 2.953977] s5 : 0000000000000000 s6 : 0000000000000100 s7 : ff5ffffffffd3128 > > [ 2.953986] s8 : ffffffff81c3d1f8 s9 : 0000000000000060 s10: 0000000000000000 > > [ 2.953996] s11: 0000000000000060 t3 : 0000000065a61b33 t4 : 0000000000000009 > > [ 2.954005] t5 : ffffffffde180000 t6 : ff2000000000bb08 > > [ 2.954014] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 > > [ 2.954047] [<ffffffff8016cbd2>] bpf_mem_alloc_init+0x326/0x32e > > [ 2.954087] [<ffffffff80e11426>] bpf_global_ma_init+0x1c/0x30 > > [ 2.954097] [<ffffffff8000285e>] do_one_initcall+0x5c/0x238 > > [ 2.954105] [<ffffffff80e011ae>] kernel_init_freeable+0x29a/0x30e > > [ 2.954115] [<ffffffff80c0312c>] kernel_init+0x1e/0x112 > > [ 2.954124] [<ffffffff80003d82>] ret_from_fork+0xa/0x1c > > > > Copying riscv maintainers and mailing list for feedback / comments. > > If it makes a difference I also see this with 6.6-rc3 on my Nezha board > (Allwinner D1), but not on my VisionFive 2 (JH7110) running the same kernel. > Adding one more RISC-V board (Renesas RZ/Five) to list where I see this issue: [ 0.268936] ------------[ cut here ]------------ [ 0.268953] bpf_mem_cache[0]: unexpected object size 128, expect 96 [ 0.268993] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:507 bpf_mem_alloc_init+0x306/0x30e [ 0.269026] Modules linked in: [ 0.269038] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc3-00091-g6acfe6a7c746 #538 [ 0.269049] Hardware name: Renesas SMARC EVK based on r9a07g043f01 (DT) [ 0.269054] epc : bpf_mem_alloc_init+0x306/0x30e [ 0.269066] ra : bpf_mem_alloc_init+0x306/0x30e [ 0.269077] epc : ffffffff8010e7ac ra : ffffffff8010e7ac sp : ffffffc80000bd30 [ 0.269084] gp : ffffffff81506d08 tp : ffffffd801938000 t0 : ffffffff81419e40 [ 0.269090] t1 : ffffffffffffffff t2 : 2d2d2d2d2d2d2d2d s0 : ffffffc80000bdd0 [ 0.269096] s1 : 000000000000000b a0 : 0000000000000037 a1 : 0000000200000020 [ 0.269102] a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000 [ 0.269107] a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000000000 [ 0.269112] s2 : 0000000000000000 s3 : 0000000000000000 s4 : 0000000000000100 [ 0.269118] s5 : ffffffff815081f8 s6 : ffffffff8153f610 s7 : 0000000000000060 [ 0.269124] s8 : 0000000000000060 s9 : ffffffd836fd2770 s10: ffffffff80e18dd8 [ 0.269130] s11: 0000000000000000 t3 : ffffffff8151e174 t4 : ffffffff8151e174 [ 0.269135] t5 : ffffffff8151e150 t6 : ffffffff8151e1b8 [ 0.269140] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 [ 0.269147] [<ffffffff8010e7ac>] bpf_mem_alloc_init+0x306/0x30e [ 0.269160] [<ffffffff80a0f3e4>] bpf_global_ma_init+0x1c/0x30 [ 0.269174] [<ffffffff8000212c>] do_one_initcall+0x58/0x19c [ 0.269186] [<ffffffff80a00ffc>] kernel_init_freeable+0x200/0x26a [ 0.269203] [<ffffffff809195a4>] kernel_init+0x1e/0x10a [ 0.269213] [<ffffffff800035ee>] ret_from_fork+0xa/0x1c [ 0.269224] ---[ end trace 0000000000000000 ]--- [ 0.281983] debug_vm_pgtable: [debug_vm_pgtable ]: Validating architecture page table helpers Cheers, Prabhakar
On Fri, Sep 29, 2023 at 1:24 PM Lad, Prabhakar <prabhakar.csengg@gmail.com> wrote: > > On Fri, Sep 29, 2023 at 7:52 PM Emil Renner Berthing > <emil.renner.berthing@canonical.com> wrote: > > > > Guenter Roeck wrote: > > > Hi, > > > > > > On Fri, Sep 08, 2023 at 09:39:22PM +0800, Hou Tao wrote: > > > > From: Hou Tao <houtao1@huawei.com> > > > > > > > > Add extra check in bpf_mem_alloc_init() to ensure the unit_size of > > > > bpf_mem_cache is matched with the object_size of underlying slab cache. > > > > If these two sizes are unmatched, print a warning once and return > > > > -EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and > > > > the potential issue can be prevented. > > > > > > > > Suggested-by: Alexei Starovoitov <ast@kernel.org> > > > > Signed-off-by: Hou Tao <houtao1@huawei.com> > > > > > > With this patch in place, I see the following backtrace on riscv systems. > > > > > > [ 2.953088] bpf_mem_cache[0]: unexpected object size 128, expect 96 > > > [ 2.953481] WARNING: CPU: 0 PID: 1 at kernel/bpf/memalloc.c:507 bpf_mem_alloc_init+0x326/0x32e > > > [ 2.953645] Modules linked in: > > > [ 2.953736] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc2-00244-g27bbf45eae9c #1 > > > [ 2.953790] Hardware name: riscv-virtio,qemu (DT) > > > [ 2.953855] epc : bpf_mem_alloc_init+0x326/0x32e > > > [ 2.953891] ra : bpf_mem_alloc_init+0x326/0x32e > > > [ 2.953909] epc : ffffffff8016cbd2 ra : ffffffff8016cbd2 sp : ff2000000000bd20 > > > [ 2.953920] gp : ffffffff81c39298 tp : ff60000002e80040 t0 : 0000000000000000 > > > [ 2.953930] t1 : ffffffffbbbabbc3 t2 : 635f6d656d5f6670 s0 : ff2000000000bdc0 > > > [ 2.953940] s1 : ffffffff8121c7da a0 : 0000000000000037 a1 : ffffffff81a93048 > > > [ 2.953949] a2 : 0000000000000010 a3 : 0000000000000001 a4 : 0000000000000000 > > > [ 2.953959] a5 : 0000000000000000 a6 : ffffffff81c4fe08 a7 : 0000000000000000 > > > [ 2.953968] s2 : 000000000000000b s3 : 0000000000000000 s4 : 0000000000000000 > > > [ 2.953977] s5 : 0000000000000000 s6 : 0000000000000100 s7 : ff5ffffffffd3128 > > > [ 2.953986] s8 : ffffffff81c3d1f8 s9 : 0000000000000060 s10: 0000000000000000 > > > [ 2.953996] s11: 0000000000000060 t3 : 0000000065a61b33 t4 : 0000000000000009 > > > [ 2.954005] t5 : ffffffffde180000 t6 : ff2000000000bb08 > > > [ 2.954014] status: 0000000200000120 badaddr: 0000000000000000 cause: 0000000000000003 > > > [ 2.954047] [<ffffffff8016cbd2>] bpf_mem_alloc_init+0x326/0x32e > > > [ 2.954087] [<ffffffff80e11426>] bpf_global_ma_init+0x1c/0x30 > > > [ 2.954097] [<ffffffff8000285e>] do_one_initcall+0x5c/0x238 > > > [ 2.954105] [<ffffffff80e011ae>] kernel_init_freeable+0x29a/0x30e > > > [ 2.954115] [<ffffffff80c0312c>] kernel_init+0x1e/0x112 > > > [ 2.954124] [<ffffffff80003d82>] ret_from_fork+0xa/0x1c > > > > > > Copying riscv maintainers and mailing list for feedback / comments. > > > > If it makes a difference I also see this with 6.6-rc3 on my Nezha board > > (Allwinner D1), but not on my VisionFive 2 (JH7110) running the same kernel. > > > > Adding one more RISC-V board (Renesas RZ/Five) to list where I see this issue: Could you please help test the proposed fix: https://patchwork.kernel.org/project/netdevbpf/patch/20230928101558.2594068-1-houtao@huaweicloud.com/
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 90c1ed8210a2..1c22b90e754a 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -486,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false); } +static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx) +{ + struct llist_node *first; + unsigned int obj_size; + + first = c->free_llist.first; + if (!first) + return 0; + + obj_size = ksize(first); + if (obj_size != c->unit_size) { + WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n", + idx, obj_size, c->unit_size); + return -EINVAL; + } + return 0; +} + /* When size != 0 bpf_mem_cache for each cpu. * This is typical bpf hash map use case when all elements have equal size. * @@ -496,10 +514,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) { static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; + int cpu, i, err, unit_size, percpu_size = 0; struct bpf_mem_caches *cc, __percpu *pcc; struct bpf_mem_cache *c, __percpu *pc; struct obj_cgroup *objcg = NULL; - int cpu, i, unit_size, percpu_size = 0; if (size) { pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL); @@ -537,6 +555,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL); if (!pcc) return -ENOMEM; + err = 0; #ifdef CONFIG_MEMCG_KMEM objcg = get_obj_cgroup_from_current(); #endif @@ -557,10 +576,20 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) if (i != bpf_mem_cache_idx(c->unit_size)) continue; prefill_mem_cache(c, cpu); + err = check_obj_size(c, i); + if (err) + goto out; } } + +out: ma->caches = pcc; - return 0; + /* refill_work is either zeroed or initialized, so it is safe to + * call irq_work_sync(). + */ + if (err) + bpf_mem_alloc_destroy(ma); + return err; } static void drain_mem_cache(struct bpf_mem_cache *c)