Context |
Check |
Description |
bpf/vmtest-bpf-next-PR |
success
|
PR summary
|
bpf/vmtest-bpf-next-VM_Test-1 |
success
|
Logs for ShellCheck
|
bpf/vmtest-bpf-next-VM_Test-3 |
success
|
Logs for Validate matrix.py
|
bpf/vmtest-bpf-next-VM_Test-2 |
success
|
Logs for Unittests
|
bpf/vmtest-bpf-next-VM_Test-5 |
success
|
Logs for aarch64-gcc / build-release
|
bpf/vmtest-bpf-next-VM_Test-0 |
success
|
Logs for Lint
|
bpf/vmtest-bpf-next-VM_Test-14 |
success
|
Logs for x86_64-gcc / build-release
|
bpf/vmtest-bpf-next-VM_Test-4 |
success
|
Logs for aarch64-gcc / build / build for aarch64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-7 |
success
|
Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-10 |
success
|
Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-16 |
pending
|
Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
|
bpf/vmtest-bpf-next-VM_Test-6 |
success
|
Logs for aarch64-gcc / test (sched_ext, false, 360) / sched_ext on aarch64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-17 |
success
|
Logs for s390x-gcc / veristat
|
bpf/vmtest-bpf-next-VM_Test-20 |
success
|
Logs for x86_64-gcc / build-release
|
bpf/vmtest-bpf-next-VM_Test-19 |
success
|
Logs for x86_64-gcc / build / build for x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-21 |
success
|
Logs for x86_64-gcc / test (sched_ext, false, 360) / sched_ext on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-11 |
success
|
Logs for aarch64-gcc / veristat
|
bpf/vmtest-bpf-next-VM_Test-23 |
success
|
Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-13 |
success
|
Logs for s390x-gcc / build-release
|
bpf/vmtest-bpf-next-VM_Test-18 |
success
|
Logs for set-matrix
|
bpf/vmtest-bpf-next-VM_Test-22 |
success
|
Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-15 |
pending
|
Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
|
bpf/vmtest-bpf-next-VM_Test-12 |
success
|
Logs for s390x-gcc / build / build for s390x with gcc
|
bpf/vmtest-bpf-next-VM_Test-24 |
success
|
Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-25 |
success
|
Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-26 |
success
|
Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-27 |
success
|
Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-28 |
success
|
Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-29 |
success
|
Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
|
bpf/vmtest-bpf-next-VM_Test-33 |
success
|
Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
|
bpf/vmtest-bpf-next-VM_Test-32 |
success
|
Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
|
bpf/vmtest-bpf-next-VM_Test-34 |
success
|
Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
|
bpf/vmtest-bpf-next-VM_Test-35 |
success
|
Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
|
bpf/vmtest-bpf-next-VM_Test-36 |
success
|
Logs for x86_64-llvm-17 / veristat
|
bpf/vmtest-bpf-next-VM_Test-37 |
success
|
Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
|
bpf/vmtest-bpf-next-VM_Test-38 |
success
|
Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
|
bpf/vmtest-bpf-next-VM_Test-39 |
success
|
Logs for x86_64-llvm-18 / test (sched_ext, false, 360) / sched_ext on x86_64 with llvm-18
|
bpf/vmtest-bpf-next-VM_Test-40 |
success
|
Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
|
bpf/vmtest-bpf-next-VM_Test-42 |
success
|
Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
|
bpf/vmtest-bpf-next-VM_Test-41 |
success
|
Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
|
bpf/vmtest-bpf-next-VM_Test-44 |
success
|
Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
|
bpf/vmtest-bpf-next-VM_Test-43 |
success
|
Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
|
bpf/vmtest-bpf-next-VM_Test-45 |
success
|
Logs for x86_64-llvm-18 / veristat
|
bpf/vmtest-bpf-next-VM_Test-30 |
success
|
Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
|
bpf/vmtest-bpf-next-VM_Test-31 |
success
|
Logs for x86_64-llvm-17 / test (sched_ext, false, 360) / sched_ext on x86_64 with llvm-17
|
bpf/vmtest-bpf-next-VM_Test-8 |
success
|
Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
|
bpf/vmtest-bpf-next-VM_Test-9 |
success
|
Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
|
@@ -62,9 +62,11 @@ struct bpf_cpu_map_entry {
/* XDP can run multiple RX-ring queues, need __percpu enqueue store */
struct xdp_bulk_queue __percpu *bulkq;
- /* Queue with potential multi-producers, and single-consumer kthread */
+ /* Queue with potential multi-producers, and single-consumer
+ * NAPI-kthread
+ */
struct ptr_ring *queue;
- struct task_struct *kthread;
+ struct napi_struct napi;
struct bpf_cpumap_val value;
struct bpf_prog *prog;
@@ -261,58 +263,42 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
return nframes;
}
-static int cpu_map_kthread_run(void *data)
+static int cpu_map_poll(struct napi_struct *napi, int budget)
{
- struct bpf_cpu_map_entry *rcpu = data;
- unsigned long last_qs = jiffies;
+ struct xdp_cpumap_stats stats = {}; /* zero stats */
+ unsigned int kmem_alloc_drops = 0;
+ struct bpf_cpu_map_entry *rcpu;
+ int done = 0;
+ rcu_read_lock();
+ rcpu = container_of(napi, struct bpf_cpu_map_entry, napi);
complete(&rcpu->kthread_running);
- set_current_state(TASK_INTERRUPTIBLE);
- /* When kthread gives stop order, then rcpu have been disconnected
- * from map, thus no new packets can enter. Remaining in-flight
- * per CPU stored packets are flushed to this queue. Wait honoring
- * kthread_stop signal until queue is empty.
- */
- while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
- struct xdp_cpumap_stats stats = {}; /* zero stats */
- unsigned int kmem_alloc_drops = 0, sched = 0;
+ while (done < budget) {
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
- int i, n, m, nframes, xdp_n;
+ int n, i, m, xdp_n = 0, nframes;
void *frames[CPUMAP_BATCH];
+ struct sk_buff *skb, *tmp;
void *skbs[CPUMAP_BATCH];
LIST_HEAD(list);
- /* Release CPU reschedule checks */
- if (__ptr_ring_empty(rcpu->queue)) {
- set_current_state(TASK_INTERRUPTIBLE);
- /* Recheck to avoid lost wake-up */
- if (__ptr_ring_empty(rcpu->queue)) {
- schedule();
- sched = 1;
- last_qs = jiffies;
- } else {
- __set_current_state(TASK_RUNNING);
- }
- } else {
- rcu_softirq_qs_periodic(last_qs);
- sched = cond_resched();
- }
-
+ if (__ptr_ring_empty(rcpu->queue))
+ break;
/*
* The bpf_cpu_map_entry is single consumer, with this
* kthread CPU pinned. Lockless access to ptr_ring
* consume side valid as no-resize allowed of queue.
*/
- n = __ptr_ring_consume_batched(rcpu->queue, frames,
- CPUMAP_BATCH);
- for (i = 0, xdp_n = 0; i < n; i++) {
+ n = min(budget - done, CPUMAP_BATCH);
+ n = __ptr_ring_consume_batched(rcpu->queue, frames, n);
+ done += n;
+
+ for (i = 0; i < n; i++) {
void *f = frames[i];
struct page *page;
if (unlikely(__ptr_test_bit(0, &f))) {
- struct sk_buff *skb = f;
-
+ skb = f;
__ptr_clear_bit(0, &skb);
list_add_tail(&skb->list, &list);
continue;
@@ -340,12 +326,10 @@ static int cpu_map_kthread_run(void *data)
}
}
- local_bh_disable();
for (i = 0; i < nframes; i++) {
struct xdp_frame *xdpf = frames[i];
- struct sk_buff *skb = skbs[i];
- skb = __xdp_build_skb_from_frame(xdpf, skb,
+ skb = __xdp_build_skb_from_frame(xdpf, skbs[i],
xdpf->dev_rx);
if (!skb) {
xdp_return_frame(xdpf);
@@ -355,18 +339,20 @@ static int cpu_map_kthread_run(void *data)
list_add_tail(&skb->list, &list);
}
- /* Feedback loop via tracepoint.
- * NB: keep before recv to allow measuring enqueue/dequeue latency.
- */
- trace_xdp_cpumap_kthread(rcpu->map_id, n, kmem_alloc_drops,
- sched, &stats);
-
- netif_receive_skb_list(&list);
- local_bh_enable(); /* resched point, may call do_softirq() */
+ list_for_each_entry_safe(skb, tmp, &list, list) {
+ skb_list_del_init(skb);
+ napi_gro_receive(napi, skb);
+ }
}
- __set_current_state(TASK_RUNNING);
- return 0;
+ rcu_read_unlock();
+ /* Feedback loop via tracepoint */
+ trace_xdp_cpumap_kthread(rcpu->map_id, done, kmem_alloc_drops, 0,
+ &stats);
+ if (done < budget)
+ napi_complete(napi);
+
+ return done;
}
static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu,
@@ -434,18 +420,19 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd))
goto free_ptr_ring;
+ napi_init_for_gro(NULL, &rcpu->napi, cpu_map_poll,
+ NAPI_POLL_WEIGHT);
+ set_bit(NAPI_STATE_THREADED, &rcpu->napi.state);
+
/* Setup kthread */
init_completion(&rcpu->kthread_running);
- rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
- "cpumap/%d/map:%d", cpu,
- map->id);
- if (IS_ERR(rcpu->kthread))
+ rcpu->napi.thread = kthread_run_on_cpu(napi_threaded_poll,
+ &rcpu->napi, cpu,
+ "cpumap-napi/%d");
+ if (IS_ERR(rcpu->napi.thread))
goto free_prog;
- /* Make sure kthread runs on a single CPU */
- kthread_bind(rcpu->kthread, cpu);
- wake_up_process(rcpu->kthread);
-
+ napi_schedule(&rcpu->napi);
/* Make sure kthread has been running, so kthread_stop() will not
* stop the kthread prematurely and all pending frames or skbs
* will be handled by the kthread before kthread_stop() returns.
@@ -479,12 +466,8 @@ static void __cpu_map_entry_free(struct work_struct *work)
*/
rcpu = container_of(to_rcu_work(work), struct bpf_cpu_map_entry, free_work);
- /* kthread_stop will wake_up_process and wait for it to complete.
- * cpu_map_kthread_run() makes sure the pointer ring is empty
- * before exiting.
- */
- kthread_stop(rcpu->kthread);
-
+ napi_disable(&rcpu->napi);
+ __netif_napi_del(&rcpu->napi);
if (rcpu->prog)
bpf_prog_put(rcpu->prog);
/* The queue should be empty at this point */
@@ -500,8 +483,8 @@ static void __cpu_map_entry_free(struct work_struct *work)
* __cpu_map_entry_free() in a separate workqueue after waiting for an RCU grace
* period. This means that (a) all pending enqueue and flush operations have
* completed (because of the RCU callback), and (b) we are in a workqueue
- * context where we can stop the kthread and wait for it to exit before freeing
- * everything.
+ * context where we can stop the NAPI-kthread and wait for it to exit before
+ * freeing everything.
*/
static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
u32 key_cpu, struct bpf_cpu_map_entry *rcpu)
@@ -581,9 +564,7 @@ static void cpu_map_free(struct bpf_map *map)
*/
synchronize_rcu();
- /* The only possible user of bpf_cpu_map_entry is
- * cpu_map_kthread_run().
- */
+ /* The only possible user of bpf_cpu_map_entry is the NAPI-kthread. */
for (i = 0; i < cmap->map.max_entries; i++) {
struct bpf_cpu_map_entry *rcpu;
@@ -591,7 +572,7 @@ static void cpu_map_free(struct bpf_map *map)
if (!rcpu)
continue;
- /* Stop kthread and cleanup entry directly */
+ /* Stop NAPI-kthread and cleanup entry directly */
__cpu_map_entry_free(&rcpu->free_work.work);
}
bpf_map_area_free(cmap->cpu_map);
@@ -755,7 +736,7 @@ int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
if (ret < 0)
goto trace;
- wake_up_process(rcpu->kthread);
+ napi_schedule(&rcpu->napi);
trace:
trace_xdp_cpumap_enqueue(rcpu->map_id, !ret, !!ret, rcpu->cpu);
return ret;
@@ -767,8 +748,6 @@ void __cpu_map_flush(struct list_head *flush_list)
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
bq_flush_to_queue(bq);
-
- /* If already running, costs spin_lock_irqsave + smb_mb */
- wake_up_process(bq->obj->kthread);
+ napi_schedule(&bq->obj->napi);
}
}
Introduce GRO support to cpumap codebase moving the cpu_map_entry kthread to a NAPI-kthread pinned on the selected cpu. Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> --- kernel/bpf/cpumap.c | 125 ++++++++++++++++++++++------------------------------ 1 file changed, 52 insertions(+), 73 deletions(-)