diff mbox series

[bpf-next] bpf: Alloc bpf_async_cb by using bpf_global_ma under PREEMPT_RT

Message ID 20250114081338.2375090-1-houtao@huaweicloud.com (mailing list archive)
State New
Delegated to: BPF
Headers show
Series [bpf-next] bpf: Alloc bpf_async_cb by using bpf_global_ma under PREEMPT_RT | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1 this patch: 1
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 3 maintainers not CCed: clrkwllms@kernel.org linux-rt-devel@lists.linux.dev rostedt@goodmis.org
netdev/build_clang success Errors and warnings before: 19 this patch: 19
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 58 this patch: 58
netdev/checkpatch warning WARNING: The commit message has 'Call Trace:', perhaps it also needs a 'Fixes:' tag? WARNING: line length of 81 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 11 this patch: 11
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-19 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Hou Tao Jan. 14, 2025, 8:13 a.m. UTC
From: Hou Tao <houtao1@huawei.com>

Under PREEMPT_RT, it is not safe to use GPF_ATOMIC kmalloc when
preemption or irq is disabled. The following warning is reported when
running test_progs under PREEMPT_RT:

  BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
  in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 675, name: test_progs
  preempt_count: 1, expected: 0
  RCU nest depth: 0, expected: 0
  2 locks held by test_progs/675:
   #0: ffffffff864b0240 (rcu_read_lock_trace){....}-{0:0}, at: bpf_prog_test_run_syscall+0x2c0/0x830
   #1: ffff8881f4ec40c8 ((&c->lock)){....}-{2:2}, at: ___slab_alloc+0xbc/0x1280
  Preemption disabled at:
  [<ffffffff8175ae2b>] __bpf_async_init+0xbb/0xb10
  CPU: 1 UID: 0 PID: 675 Comm: test_progs Tainted: G           O       6.12.0+ #11
  Tainted: [O]=OOT_MODULE
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ...
  Call Trace:
   <TASK>
   dump_stack_lvl+0x57/0x70
   dump_stack+0x10/0x20
   __might_resched+0x337/0x4d0
   rt_spin_lock+0xd4/0x230
   ___slab_alloc+0xbc/0x1280
   __slab_alloc.isra.0+0x5d/0xa0
   __kmalloc_node_noprof+0xf7/0x4f0
   bpf_map_kmalloc_node+0xf5/0x6b0
   __bpf_async_init+0x20e/0xb10
   bpf_timer_init+0x30/0x40
   bpf_prog_c7e2dc9ff3d5ba62_start_cb+0x55/0x85
   bpf_prog_4eb421be69ae82fa_start_timer+0x5d/0x7e
   bpf_prog_test_run_syscall+0x322/0x830
   __sys_bpf+0x135d/0x3ca0
   __x64_sys_bpf+0x75/0xb0
   x64_sys_call+0x1b5/0xa10
   do_syscall_64+0x3b/0xc0
   entry_SYSCALL_64_after_hwframe+0x4b/0x53

Fix the problem by using bpf_global_ma to allocate bpf_async_cb when
PREEMPT_RT is enabled. The reason for still using kmalloc for
no-PREEMPT_RT case is that bpf_global_ma doesn't support accouting the
allocated memory to specific memcg. Also doing the memory allocation
before invoking __bpf_spin_lock_irqsave() to reduce the possibility of
-ENOMEM for bpf_global_ma.

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 kernel/bpf/helpers.c | 48 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index bcda671feafd9..5041f22812936 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1109,12 +1109,14 @@  struct bpf_async_cb {
  * freeing the timers when inner map is replaced or deleted by user space.
  */
 struct bpf_hrtimer {
+	/* cb must be the first member */
 	struct bpf_async_cb cb;
 	struct hrtimer timer;
 	atomic_t cancelling;
 };
 
 struct bpf_work {
+	/* cb must be the first member */
 	struct bpf_async_cb cb;
 	struct work_struct work;
 	struct work_struct delete_work;
@@ -1141,6 +1143,34 @@  enum bpf_async_type {
 
 static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
 
+static void bpf_async_free(struct bpf_async_cb *cb)
+{
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		bpf_mem_free(&bpf_global_ma, cb);
+	else
+		kfree(cb);
+}
+
+static void bpf_async_free_rcu(struct bpf_async_cb *cb)
+{
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		bpf_mem_free_rcu(&bpf_global_ma, cb);
+	else
+		kfree_rcu(cb, rcu);
+}
+
+static struct bpf_async_cb *bpf_async_alloc(struct bpf_map *map, size_t size)
+{
+	struct bpf_async_cb *cb;
+
+	if (IS_ENABLED(CONFIG_PREEMPT_RT))
+		cb = bpf_mem_alloc(&bpf_global_ma, size);
+	else
+		/* allocate hrtimer via map_kmalloc to use memcg accounting */
+		cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node);
+	return cb;
+}
+
 static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
 {
 	struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
@@ -1221,7 +1251,7 @@  static void bpf_wq_delete_work(struct work_struct *work)
 
 	cancel_work_sync(&w->work);
 
-	kfree_rcu(w, cb.rcu);
+	bpf_async_free_rcu(&w->cb);
 }
 
 static void bpf_timer_delete_work(struct work_struct *work)
@@ -1236,7 +1266,7 @@  static void bpf_timer_delete_work(struct work_struct *work)
 	 * bpf_timer_cancel_and_free will have been cancelled.
 	 */
 	hrtimer_cancel(&t->timer);
-	kfree_rcu(t, cb.rcu);
+	bpf_async_free_rcu(&t->cb);
 }
 
 static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags,
@@ -1263,20 +1293,18 @@  static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
 		return -EINVAL;
 	}
 
+	cb = bpf_async_alloc(map, size);
+	if (!cb)
+		return -ENOMEM;
+
 	__bpf_spin_lock_irqsave(&async->lock);
 	t = async->timer;
 	if (t) {
+		bpf_async_free(cb);
 		ret = -EBUSY;
 		goto out;
 	}
 
-	/* allocate hrtimer via map_kmalloc to use memcg accounting */
-	cb = bpf_map_kmalloc_node(map, size, GFP_ATOMIC, map->numa_node);
-	if (!cb) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
 	switch (type) {
 	case BPF_ASYNC_TYPE_TIMER:
 		clockid = flags & (MAX_CLOCKS - 1);
@@ -1313,7 +1341,7 @@  static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u
 		 * or pinned in bpffs.
 		 */
 		WRITE_ONCE(async->cb, NULL);
-		kfree(cb);
+		bpf_async_free(cb);
 		ret = -EPERM;
 	}
 out: