diff mbox series

[bpf-next] bpf: Fix kmemleak warnings for percpu hashmap

Message ID 20250224175514.2207227-1-yonghong.song@linux.dev (mailing list archive)
State Accepted
Commit 11ba7ce076e5903e7bdc1fd1498979c331b3c286
Delegated to: BPF
Headers show
Series [bpf-next] bpf: Fix kmemleak warnings for percpu hashmap | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 8 maintainers not CCed: song@kernel.org kpsingh@kernel.org john.fastabend@gmail.com eddyz87@gmail.com jolsa@kernel.org haoluo@google.com sdf@fomichev.me martin.lau@linux.dev
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6 this patch: 6
netdev/checkpatch warning WARNING: line length of 93 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-12 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-49 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-20 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-19 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-21 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-48 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-51 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-50 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-47 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc

Commit Message

Yonghong Song Feb. 24, 2025, 5:55 p.m. UTC
Vlad Poenaru from Meta reported the following kmemleak issues:

  ...
  unreferenced object 0x606fd7c44ac8 (size 32):
    comm "floodgate_agent", pid 5077, jiffies 4294746072
    hex dump (first 32 bytes on cpu 32):
      00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
      00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    backtrace (crc 0):
      pcpu_alloc_noprof+0x730/0xeb0
      bpf_map_alloc_percpu+0x69/0xc0
      prealloc_init+0x9d/0x1b0
      htab_map_alloc+0x363/0x510
      map_create+0x215/0x3a0
      __sys_bpf+0x16b/0x3e0
      __x64_sys_bpf+0x18/0x20
      do_syscall_64+0x7b/0x150
      entry_SYSCALL_64_after_hwframe+0x4b/0x53
  unreferenced object 0x606fd7c44ae8 (size 32):
    comm "floodgate_agent", pid 5077, jiffies 4294746072
    hex dump (first 32 bytes on cpu 32):
      d3 08 00 00 00 00 00 00 d3 08 00 00 00 00 00 00  ................
      00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    backtrace (crc d197b0fe):
      pcpu_alloc_noprof+0x730/0xeb0
      bpf_map_alloc_percpu+0x69/0xc0
      prealloc_init+0x9d/0x1b0
      htab_map_alloc+0x363/0x510
      map_create+0x215/0x3a0
      __sys_bpf+0x16b/0x3e0
      __x64_sys_bpf+0x18/0x20
      do_syscall_64+0x7b/0x150
      entry_SYSCALL_64_after_hwframe+0x4b/0x53
  ...

Further investigation shows the reason is due to not 8-byte aligned
store of percpu pointer in htab_elem_set_ptr():
  *(void __percpu **)(l->key + key_size) = pptr;

Note that the whole htab_elem alignment is 8 (for x86_64). If the key_size
is 4, that means pptr is stored in a location which is 4 byte aligned but
not 8 byte aligned. In mm/kmemleak.c, scan_block() scans the memory based
on 8 byte stride, so it won't detect above pptr, hence reporting the memory
leak.

In htab_map_alloc(), we already have

        htab->elem_size = sizeof(struct htab_elem) +
                          round_up(htab->map.key_size, 8);
        if (percpu)
                htab->elem_size += sizeof(void *);
        else
                htab->elem_size += round_up(htab->map.value_size, 8);

So storing pptr with 8-byte alignment won't cause any problem and can fix
kmemleak too.

The issue can be reproduced with bpf selftest as well:
  1. Enable CONFIG_DEBUG_KMEMLEAK config
  2. Add a getchar() before skel destroy in test_hash_map() in prog_tests/for_each.c.
     The purpose is to keep map available so kmemleak can be detected.
  3. run './test_progs -t for_each/hash_map &' and a kmemleak should be reported.

     unreferenced object 0x607e08c1fd30 (size 8):
       comm "test_progs", pid 1969, jiffies 4294706961
       hex dump (first 8 bytes on cpu 2):
         03 00 00 00 00 00 00 00                          ........
       backtrace (crc 844a0efa):
         pcpu_alloc_noprof+0xf33/0x14a0
         bpf_map_alloc_percpu+0x9c/0x200
         prealloc_init+0x1e7/0x730
         htab_map_alloc+0x698/0xc70
         map_create+0x489/0xcb0
         __sys_bpf+0x443/0x560
         __x64_sys_bpf+0x7c/0x90
         do_syscall_64+0x58/0xf0
         entry_SYSCALL_64_after_hwframe+0x76/0x7e

cc: Vlad Poenaru <thevlad@meta.com>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 kernel/bpf/hashtab.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

Comments

Martin KaFai Lau Feb. 24, 2025, 7:47 p.m. UTC | #1
On 2/24/25 9:55 AM, Yonghong Song wrote:
> Vlad Poenaru from Meta reported the following kmemleak issues:
> 
>    ...
>    unreferenced object 0x606fd7c44ac8 (size 32):
>      comm "floodgate_agent", pid 5077, jiffies 4294746072
>      hex dump (first 32 bytes on cpu 32):
>        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>      backtrace (crc 0):
>        pcpu_alloc_noprof+0x730/0xeb0
>        bpf_map_alloc_percpu+0x69/0xc0
>        prealloc_init+0x9d/0x1b0
>        htab_map_alloc+0x363/0x510
>        map_create+0x215/0x3a0
>        __sys_bpf+0x16b/0x3e0
>        __x64_sys_bpf+0x18/0x20
>        do_syscall_64+0x7b/0x150
>        entry_SYSCALL_64_after_hwframe+0x4b/0x53
>    unreferenced object 0x606fd7c44ae8 (size 32):
>      comm "floodgate_agent", pid 5077, jiffies 4294746072
>      hex dump (first 32 bytes on cpu 32):
>        d3 08 00 00 00 00 00 00 d3 08 00 00 00 00 00 00  ................
>        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>      backtrace (crc d197b0fe):
>        pcpu_alloc_noprof+0x730/0xeb0
>        bpf_map_alloc_percpu+0x69/0xc0
>        prealloc_init+0x9d/0x1b0
>        htab_map_alloc+0x363/0x510
>        map_create+0x215/0x3a0
>        __sys_bpf+0x16b/0x3e0
>        __x64_sys_bpf+0x18/0x20
>        do_syscall_64+0x7b/0x150
>        entry_SYSCALL_64_after_hwframe+0x4b/0x53
>    ...
> 
> Further investigation shows the reason is due to not 8-byte aligned
> store of percpu pointer in htab_elem_set_ptr():
>    *(void __percpu **)(l->key + key_size) = pptr;
> 
> Note that the whole htab_elem alignment is 8 (for x86_64). If the key_size
> is 4, that means pptr is stored in a location which is 4 byte aligned but
> not 8 byte aligned. In mm/kmemleak.c, scan_block() scans the memory based
> on 8 byte stride, so it won't detect above pptr, hence reporting the memory
> leak.
> 
> In htab_map_alloc(), we already have
> 
>          htab->elem_size = sizeof(struct htab_elem) +
>                            round_up(htab->map.key_size, 8);
>          if (percpu)
>                  htab->elem_size += sizeof(void *);
>          else
>                  htab->elem_size += round_up(htab->map.value_size, 8);
> 
> So storing pptr with 8-byte alignment won't cause any problem and can fix
> kmemleak too.

Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
patchwork-bot+netdevbpf@kernel.org Feb. 24, 2025, 8:20 p.m. UTC | #2
Hello:

This patch was applied to bpf/bpf-next.git (master)
by Alexei Starovoitov <ast@kernel.org>:

On Mon, 24 Feb 2025 09:55:14 -0800 you wrote:
> Vlad Poenaru from Meta reported the following kmemleak issues:
> 
>   ...
>   unreferenced object 0x606fd7c44ac8 (size 32):
>     comm "floodgate_agent", pid 5077, jiffies 4294746072
>     hex dump (first 32 bytes on cpu 32):
>       00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>       00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>     backtrace (crc 0):
>       pcpu_alloc_noprof+0x730/0xeb0
>       bpf_map_alloc_percpu+0x69/0xc0
>       prealloc_init+0x9d/0x1b0
>       htab_map_alloc+0x363/0x510
>       map_create+0x215/0x3a0
>       __sys_bpf+0x16b/0x3e0
>       __x64_sys_bpf+0x18/0x20
>       do_syscall_64+0x7b/0x150
>       entry_SYSCALL_64_after_hwframe+0x4b/0x53
>   unreferenced object 0x606fd7c44ae8 (size 32):
>     comm "floodgate_agent", pid 5077, jiffies 4294746072
>     hex dump (first 32 bytes on cpu 32):
>       d3 08 00 00 00 00 00 00 d3 08 00 00 00 00 00 00  ................
>       00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>     backtrace (crc d197b0fe):
>       pcpu_alloc_noprof+0x730/0xeb0
>       bpf_map_alloc_percpu+0x69/0xc0
>       prealloc_init+0x9d/0x1b0
>       htab_map_alloc+0x363/0x510
>       map_create+0x215/0x3a0
>       __sys_bpf+0x16b/0x3e0
>       __x64_sys_bpf+0x18/0x20
>       do_syscall_64+0x7b/0x150
>       entry_SYSCALL_64_after_hwframe+0x4b/0x53
>   ...
> 
> [...]

Here is the summary with links:
  - [bpf-next] bpf: Fix kmemleak warnings for percpu hashmap
    https://git.kernel.org/bpf/bpf-next/c/11ba7ce076e5

You are awesome, thank you!
diff mbox series

Patch

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 4a9eeb7aef85..c308300fc72f 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -198,12 +198,12 @@  static bool htab_is_percpu(const struct bpf_htab *htab)
 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
 				     void __percpu *pptr)
 {
-	*(void __percpu **)(l->key + key_size) = pptr;
+	*(void __percpu **)(l->key + roundup(key_size, 8)) = pptr;
 }
 
 static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
 {
-	return *(void __percpu **)(l->key + key_size);
+	return *(void __percpu **)(l->key + roundup(key_size, 8));
 }
 
 static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
@@ -2354,7 +2354,7 @@  static int htab_percpu_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn
 	*insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
 	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3);
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0,
-				offsetof(struct htab_elem, key) + map->key_size);
+				offsetof(struct htab_elem, key) + roundup(map->key_size, 8));
 	*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
 	*insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);