@@ -838,9 +838,12 @@ static void __skb2xdp_steal_data(struct sk_buff *skb,
if (local_pp_alloc) {
/* This is the most common case where the skb was reallocated locally in
* veth_convert_skb_to_xdp_buff, and it's safe to use the xdp_mem_pp model.
+ * Since the skb is "reallocated" in the NAPI context of veth, it is possible
+ * to use the NAPI version of the "head stolen" function to optimize the
+ * reuse of skb as well.
*/
xdp->rxq->mem = rq->xdp_mem_pp;
- kfree_skb_partial(skb, true);
+ napi_skb_free_stolen_head(skb);
} else if (!skb->pp_recycle) {
/* We can safely use kfree_skb_partial here because this cannot be an fclone
* skb. Fclone skbs are allocated via __alloc_skb, with their head buffer
@@ -1284,6 +1284,7 @@ void napi_skb_free_stolen_head(struct sk_buff *skb)
}
napi_skb_cache_put(skb);
}
+EXPORT_SYMBOL(napi_skb_free_stolen_head);
void napi_consume_skb(struct sk_buff *skb, int budget)
{
In cases where the skb is reallocated by veth_convert_skb_to_xdp_buff, we leverage the NAPI version of the "head stolen" function to enable fast skb reuse. The following test results evaluate the performance improvement resulting from reusing skb in the NAPI context with pktgen-generated traffic. Test environment setup: ns1 ns2 veth0 <-peer-> veth1 veth2 <-peer-> veth3 Test Results: pktgen -> veth1 -> veth0(XDP_TX) -> veth1(XDP_DROP) without reusing skb: 2,033,439 reusing skb: 2,167,749 improvement: ~6% pktgen -> veth1 -> veth0(XDP_TX) -> veth1(XDP_PASS) without reusing skb: 1,585,462 reusing skb: 1,650,572 improvement: ~4% pktgen -> veth1 -> veth0(XDP_REDIRECT) -> veth2 -> veth3(XDP_DROP) without reusing skb: 1,787,342 reusing skb: 1,848,516 improvement: ~3% pktgen -> veth1 -> veth0(XDP_REDIRECT) -> veth2 -> veth3(XDP_PASS) without reusing skb: 1,391,587 reusing skb: 1,439,866 improvement: ~3% pktgen -> veth1 -> veth0(AF_XDP) -> user space(DROP) without reusing skb: 1,811,844 with reusing skb: 1,861,027 improvement: ~3% Signed-off-by: Liang Chen <liangchen.linux@gmail.com> --- drivers/net/veth.c | 5 ++++- net/core/skbuff.c | 1 + 2 files changed, 5 insertions(+), 1 deletion(-)