@@ -365,7 +365,6 @@ static int cpu_map_kthread_run(void *data)
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
struct xdp_cpumap_stats stats = {}; /* zero stats */
unsigned int kmem_alloc_drops = 0, sched = 0;
- gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
int i, n, m, nframes, xdp_n;
void *frames[CPUMAP_BATCH];
void *skbs[CPUMAP_BATCH];
@@ -416,8 +415,10 @@ static int cpu_map_kthread_run(void *data)
/* Support running another XDP prog on this CPU */
nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list);
+ local_bh_disable();
+
if (nframes) {
- m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
+ m = napi_skb_cache_get_bulk(skbs, nframes);
if (unlikely(m == 0)) {
for (i = 0; i < nframes; i++)
skbs[i] = NULL; /* effect: xdp_return_frame */
@@ -425,7 +426,6 @@ static int cpu_map_kthread_run(void *data)
}
}
- local_bh_disable();
for (i = 0; i < nframes; i++) {
struct xdp_frame *xdpf = frames[i];
struct sk_buff *skb = skbs[i];
Now that cpumap uses GRO, which drops unused skb heads to the NAPI cache, use napi_skb_cache_get_bulk() to try to reuse cached entries and lower the MM layer pressure. In the situation when all 8 skbs from one cpumap batch goes into one GRO skb (so the rest 7 go into the cache), there will now be only 1 skb to allocate per cycle instead of 8. If there is some other work happening in between the cycles, even all 8 might be getting decached each cycle. This makes the BH-off period per each batch slightly longer -- previously, skb allocation was happening in the process context. Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com> --- kernel/bpf/cpumap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)