Message ID | 20230729095107.1722450-2-houtao@huaweicloud.com (mailing list archive) |
---|---|
State | Accepted |
Delegated to: | BPF |
Headers | show |
Series | Two fixes for cpu-map | expand |
On 2023/7/29 17:51, Hou Tao wrote: > From: Hou Tao <houtao1@huawei.com> > > The following warning was reported when running stress-mode enabled > xdp_redirect_cpu with some RT threads: > > ------------[ cut here ]------------ > WARNING: CPU: 4 PID: 65 at kernel/bpf/cpumap.c:135 > CPU: 4 PID: 65 Comm: kworker/4:1 Not tainted 6.5.0-rc2+ #1 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) > Workqueue: events cpu_map_kthread_stop > RIP: 0010:put_cpu_map_entry+0xda/0x220 > ...... > Call Trace: > <TASK> > ? show_regs+0x65/0x70 > ? __warn+0xa5/0x240 > ...... > ? put_cpu_map_entry+0xda/0x220 > cpu_map_kthread_stop+0x41/0x60 > process_one_work+0x6b0/0xb80 > worker_thread+0x96/0x720 > kthread+0x1a5/0x1f0 > ret_from_fork+0x3a/0x70 > ret_from_fork_asm+0x1b/0x30 > </TASK> > > The root cause is the same as commit 436901649731 ("bpf: cpumap: Fix memory > leak in cpu_map_update_elem"). The kthread is stopped prematurely by > kthread_stop() in cpu_map_kthread_stop(), and kthread() doesn't call > cpu_map_kthread_run() at all but XDP program has already queued some > frames or skbs into ptr_ring. So when __cpu_map_ring_cleanup() checks > the ptr_ring, it will find it was not emptied and report a warning. > > An alternative fix is to use __cpu_map_ring_cleanup() to drop these > pending frames or skbs when kthread_stop() returns -EINTR, but it may > confuse the user, because these frames or skbs have been handled > correctly by XDP program. So instead of dropping these frames or skbs, > just make sure the per-cpu kthread is running before > __cpu_map_entry_alloc() returns. > > After apply the fix, the error handle for kthread_stop() will be > unnecessary because it will always return 0, so just remove it. > > Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP") > Signed-off-by: Hou Tao <houtao1@huawei.com> > --- > kernel/bpf/cpumap.c | 21 +++++++++++---------- > 1 file changed, 11 insertions(+), 10 deletions(-) > > diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c > index 0a16e30b16ef..08351e0863e5 100644 > --- a/kernel/bpf/cpumap.c > +++ b/kernel/bpf/cpumap.c > @@ -28,6 +28,7 @@ > #include <linux/sched.h> > #include <linux/workqueue.h> > #include <linux/kthread.h> > +#include <linux/completion.h> > #include <trace/events/xdp.h> > #include <linux/btf_ids.h> > > @@ -71,6 +72,7 @@ struct bpf_cpu_map_entry { > struct rcu_head rcu; > > struct work_struct kthread_stop_wq; > + struct completion kthread_running; > }; > > struct bpf_cpu_map { > @@ -151,7 +153,6 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) > static void cpu_map_kthread_stop(struct work_struct *work) > { > struct bpf_cpu_map_entry *rcpu; > - int err; > > rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); > > @@ -161,14 +162,7 @@ static void cpu_map_kthread_stop(struct work_struct *work) > rcu_barrier(); > > /* kthread_stop will wake_up_process and wait for it to complete */ > - err = kthread_stop(rcpu->kthread); > - if (err) { > - /* kthread_stop may be called before cpu_map_kthread_run > - * is executed, so we need to release the memory related > - * to rcpu. > - */ > - put_cpu_map_entry(rcpu); > - } > + kthread_stop(rcpu->kthread); > } Looks good to me. Feel free to add: Reviewed-by: Pu Lehui <pulehui@huawei.com> > > static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, > @@ -296,11 +290,11 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, > return nframes; > } > > - > static int cpu_map_kthread_run(void *data) > { > struct bpf_cpu_map_entry *rcpu = data; > > + complete(&rcpu->kthread_running); > set_current_state(TASK_INTERRUPTIBLE); > > /* When kthread gives stop order, then rcpu have been disconnected > @@ -465,6 +459,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, > goto free_ptr_ring; > > /* Setup kthread */ > + init_completion(&rcpu->kthread_running); > rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, > "cpumap/%d/map:%d", cpu, > map->id); > @@ -478,6 +473,12 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, > kthread_bind(rcpu->kthread, cpu); > wake_up_process(rcpu->kthread); > > + /* Make sure kthread has been running, so kthread_stop() will not > + * stop the kthread prematurely and all pending frames or skbs > + * will be handled by the kthread before kthread_stop() returns. > + */ > + wait_for_completion(&rcpu->kthread_running); > + > return rcpu; > > free_prog:
On 29/07/2023 11.51, Hou Tao wrote: > From: Hou Tao <houtao1@huawei.com> > > The following warning was reported when running stress-mode enabled > xdp_redirect_cpu with some RT threads: Cool stress-mode test that leverage RT to provoke this. > > ------------[ cut here ]------------ > WARNING: CPU: 4 PID: 65 at kernel/bpf/cpumap.c:135 > CPU: 4 PID: 65 Comm: kworker/4:1 Not tainted 6.5.0-rc2+ #1 As you mention RT, I want to mention that it also possible to change the sched prio on the kthread PID. > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) > Workqueue: events cpu_map_kthread_stop > RIP: 0010:put_cpu_map_entry+0xda/0x220 > ...... > Call Trace: > <TASK> > ? show_regs+0x65/0x70 > ? __warn+0xa5/0x240 > ...... > ? put_cpu_map_entry+0xda/0x220 > cpu_map_kthread_stop+0x41/0x60 > process_one_work+0x6b0/0xb80 > worker_thread+0x96/0x720 > kthread+0x1a5/0x1f0 > ret_from_fork+0x3a/0x70 > ret_from_fork_asm+0x1b/0x30 > </TASK> > > The root cause is the same as commit 436901649731 ("bpf: cpumap: Fix memory > leak in cpu_map_update_elem"). The kthread is stopped prematurely by > kthread_stop() in cpu_map_kthread_stop(), and kthread() doesn't call > cpu_map_kthread_run() at all but XDP program has already queued some > frames or skbs into ptr_ring. So when __cpu_map_ring_cleanup() checks > the ptr_ring, it will find it was not emptied and report a warning. > > An alternative fix is to use __cpu_map_ring_cleanup() to drop these > pending frames or skbs when kthread_stop() returns -EINTR, but it may > confuse the user, because these frames or skbs have been handled > correctly by XDP program. So instead of dropping these frames or skbs, > just make sure the per-cpu kthread is running before > __cpu_map_entry_alloc() returns. > > After apply the fix, the error handle for kthread_stop() will be > unnecessary because it will always return 0, so just remove it. > > Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP") > Signed-off-by: Hou Tao <houtao1@huawei.com> Acked-by: Jesper Dangaard Brouer <hawk@kernel.org> Thanks for catching this! > --- > kernel/bpf/cpumap.c | 21 +++++++++++---------- > 1 file changed, 11 insertions(+), 10 deletions(-) > > diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c > index 0a16e30b16ef..08351e0863e5 100644 > --- a/kernel/bpf/cpumap.c > +++ b/kernel/bpf/cpumap.c > @@ -28,6 +28,7 @@ > #include <linux/sched.h> > #include <linux/workqueue.h> > #include <linux/kthread.h> > +#include <linux/completion.h> > #include <trace/events/xdp.h> > #include <linux/btf_ids.h> > > @@ -71,6 +72,7 @@ struct bpf_cpu_map_entry { > struct rcu_head rcu; > > struct work_struct kthread_stop_wq; > + struct completion kthread_running; > }; > > struct bpf_cpu_map { > @@ -151,7 +153,6 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) > static void cpu_map_kthread_stop(struct work_struct *work) > { > struct bpf_cpu_map_entry *rcpu; > - int err; > > rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); > > @@ -161,14 +162,7 @@ static void cpu_map_kthread_stop(struct work_struct *work) > rcu_barrier(); > > /* kthread_stop will wake_up_process and wait for it to complete */ > - err = kthread_stop(rcpu->kthread); > - if (err) { > - /* kthread_stop may be called before cpu_map_kthread_run > - * is executed, so we need to release the memory related > - * to rcpu. > - */ > - put_cpu_map_entry(rcpu); > - } > + kthread_stop(rcpu->kthread); > } > > static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, > @@ -296,11 +290,11 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, > return nframes; > } > > - > static int cpu_map_kthread_run(void *data) > { > struct bpf_cpu_map_entry *rcpu = data; > > + complete(&rcpu->kthread_running); > set_current_state(TASK_INTERRUPTIBLE); > Diff is missing next lines that show this is correct. I checked this manually and for other reviewers here are the next lines: set_current_state(TASK_INTERRUPTIBLE); /* When kthread gives stop order, then rcpu have been disconnected * from map, thus no new packets can enter. Remaining in-flight * per CPU stored packets are flushed to this queue. Wait honoring * kthread_stop signal until queue is empty. */ while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { The patch is correct in setting complete(&rcpu->kthread_running) before the while-loop, as the code also checks if ptr_ring is not empty. > /* When kthread gives stop order, then rcpu have been disconnected > @@ -465,6 +459,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, > goto free_ptr_ring; > > /* Setup kthread */ > + init_completion(&rcpu->kthread_running); > rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, > "cpumap/%d/map:%d", cpu, > map->id); > @@ -478,6 +473,12 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, > kthread_bind(rcpu->kthread, cpu); > wake_up_process(rcpu->kthread); > > + /* Make sure kthread has been running, so kthread_stop() will not > + * stop the kthread prematurely and all pending frames or skbs > + * will be handled by the kthread before kthread_stop() returns. > + */ > + wait_for_completion(&rcpu->kthread_running); > + > return rcpu; > > free_prog:
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 0a16e30b16ef..08351e0863e5 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -28,6 +28,7 @@ #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/kthread.h> +#include <linux/completion.h> #include <trace/events/xdp.h> #include <linux/btf_ids.h> @@ -71,6 +72,7 @@ struct bpf_cpu_map_entry { struct rcu_head rcu; struct work_struct kthread_stop_wq; + struct completion kthread_running; }; struct bpf_cpu_map { @@ -151,7 +153,6 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) static void cpu_map_kthread_stop(struct work_struct *work) { struct bpf_cpu_map_entry *rcpu; - int err; rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); @@ -161,14 +162,7 @@ static void cpu_map_kthread_stop(struct work_struct *work) rcu_barrier(); /* kthread_stop will wake_up_process and wait for it to complete */ - err = kthread_stop(rcpu->kthread); - if (err) { - /* kthread_stop may be called before cpu_map_kthread_run - * is executed, so we need to release the memory related - * to rcpu. - */ - put_cpu_map_entry(rcpu); - } + kthread_stop(rcpu->kthread); } static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, @@ -296,11 +290,11 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, return nframes; } - static int cpu_map_kthread_run(void *data) { struct bpf_cpu_map_entry *rcpu = data; + complete(&rcpu->kthread_running); set_current_state(TASK_INTERRUPTIBLE); /* When kthread gives stop order, then rcpu have been disconnected @@ -465,6 +459,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, goto free_ptr_ring; /* Setup kthread */ + init_completion(&rcpu->kthread_running); rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, "cpumap/%d/map:%d", cpu, map->id); @@ -478,6 +473,12 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, kthread_bind(rcpu->kthread, cpu); wake_up_process(rcpu->kthread); + /* Make sure kthread has been running, so kthread_stop() will not + * stop the kthread prematurely and all pending frames or skbs + * will be handled by the kthread before kthread_stop() returns. + */ + wait_for_completion(&rcpu->kthread_running); + return rcpu; free_prog: