diff mbox series

[net-next,07/12] xsk: add generic XSk &xdp_buff -> skb conversion

Message ID 20241211172649.761483-8-aleksander.lobakin@intel.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series xdp: a fistful of generic changes pt. II | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2 this patch: 2
netdev/build_tools success Errors and warnings before: 0 (+23) this patch: 0 (+23)
netdev/cc_maintainers warning 2 maintainers not CCed: horms@kernel.org hawk@kernel.org
netdev/build_clang success Errors and warnings before: 6 this patch: 6
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 234 this patch: 234
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 157 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-12-12--00-00 (tests: 795)

Commit Message

Alexander Lobakin Dec. 11, 2024, 5:26 p.m. UTC
Same as with converting &xdp_buff to skb on Rx, the code which allocates
a new skb and copies the XSk frame there is identical across the
drivers, so make it generic. This includes copying all the frags if they
are present in the original buff.
System percpu Page Pools help here a lot: when available, allocate pages
from there instead of the MM layer. This greatly improves XDP_PASS
performance on XSk: instead of page_alloc() + page_free(), the net core
recycles the same pages, so the only overhead left is memcpy()s.
Note that the passed buff gets freed if the conversion is done w/o any
error, assuming you don't need this buffer after you convert it to an
skb.

Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
---
 include/net/xdp.h |   1 +
 net/core/xdp.c    | 138 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 139 insertions(+)

Comments

Jakub Kicinski Dec. 13, 2024, 2:19 a.m. UTC | #1
On Wed, 11 Dec 2024 18:26:44 +0100 Alexander Lobakin wrote:
> +#else /* !CONFIG_PAGE_POOL */
> +	struct napi_struct *napi;
> +
> +	pp = NULL;
> +	napi = napi_by_id(rxq->napi_id);
> +	if (likely(napi))
> +		skb = napi_alloc_skb(napi, len);
> +	else
> +		skb = __netdev_alloc_skb_ip_align(rxq->dev, len,
> +						  GFP_ATOMIC | __GFP_NOWARN);
> +	if (unlikely(!skb))
> +		return NULL;
> +#endif /* !CONFIG_PAGE_POOL */

What are the chances of having a driver with AF_XDP support 
and without page pool support? I think it's zero :S
Can we kill all the if !CONFIG_PAGE_POOL sections and hide
the entire helper under if CONFIG_PAGE_POLL ?
Alexander Lobakin Dec. 13, 2024, 5:31 p.m. UTC | #2
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 12 Dec 2024 18:19:44 -0800

> On Wed, 11 Dec 2024 18:26:44 +0100 Alexander Lobakin wrote:
>> +#else /* !CONFIG_PAGE_POOL */
>> +	struct napi_struct *napi;
>> +
>> +	pp = NULL;
>> +	napi = napi_by_id(rxq->napi_id);
>> +	if (likely(napi))
>> +		skb = napi_alloc_skb(napi, len);
>> +	else
>> +		skb = __netdev_alloc_skb_ip_align(rxq->dev, len,
>> +						  GFP_ATOMIC | __GFP_NOWARN);
>> +	if (unlikely(!skb))
>> +		return NULL;
>> +#endif /* !CONFIG_PAGE_POOL */
> 
> What are the chances of having a driver with AF_XDP support 
> and without page pool support? I think it's zero :S

Right, as CONFIG_BPF_SYSCALL selects PAGE_POOL if NET.

I think I wrote this when it wasn't true (before Lorenzo introduced
generic page_pools) and then just forgot to change that ._.

> Can we kill all the if !CONFIG_PAGE_POOL sections and hide
> the entire helper under if CONFIG_PAGE_POLL ?

We can. But I think I'd need to introduce a return-NULL wrapper in case
of !PAGE_POOL to satisfy the linker, as lots of drivers build their XSk
code unconditionally.

Thanks,
Olek
Jakub Kicinski Dec. 14, 2024, 2:31 a.m. UTC | #3
On Fri, 13 Dec 2024 18:31:59 +0100 Alexander Lobakin wrote:
> > Can we kill all the if !CONFIG_PAGE_POOL sections and hide
> > the entire helper under if CONFIG_PAGE_POLL ?  
> 
> We can. But I think I'd need to introduce a return-NULL wrapper in case
> of !PAGE_POOL to satisfy the linker, as lots of drivers build their XSk
> code unconditionally.

Oh wow, you're right. Bunch of drivers of a certain vendor still don't
use page pool.. return NULL wrapped SGTM.
diff mbox series

Patch

diff --git a/include/net/xdp.h b/include/net/xdp.h
index aa24fa78cbe6..6da0e746cf75 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -337,6 +337,7 @@  void xdp_warn(const char *msg, const char *func, const int line);
 #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
 
 struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp);
+struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp);
 struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
 struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
 					   struct sk_buff *skb,
diff --git a/net/core/xdp.c b/net/core/xdp.c
index c4d824cf27da..6e319e00ae78 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -22,6 +22,8 @@ 
 #include <trace/events/xdp.h>
 #include <net/xdp_sock_drv.h>
 
+#include "dev.h"
+
 #define REG_STATE_NEW		0x0
 #define REG_STATE_REGISTERED	0x1
 #define REG_STATE_UNREGISTERED	0x2
@@ -684,6 +686,142 @@  struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp)
 }
 EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff);
 
+/**
+ * xdp_copy_frags_from_zc - copy frags from XSk buff to skb
+ * @skb: skb to copy frags to
+ * @xdp: XSk &xdp_buff from which the frags will be copied
+ * @pp: &page_pool backing page allocation, if available
+ *
+ * Copy all frags from XSk &xdp_buff to the skb to pass it up the stack.
+ * Allocate a new page / page frag for each frag, copy it and attach to
+ * the skb.
+ *
+ * Return: true on success, false on page allocation fail.
+ */
+static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb,
+					    const struct xdp_buff *xdp,
+					    struct page_pool *pp)
+{
+	const struct skb_shared_info *xinfo;
+	struct skb_shared_info *sinfo;
+	u32 nr_frags, ts;
+
+	xinfo = xdp_get_shared_info_from_buff(xdp);
+	nr_frags = xinfo->nr_frags;
+	sinfo = skb_shinfo(skb);
+
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+	ts = 0;
+#else
+	ts = xinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz;
+#endif
+
+	for (u32 i = 0; i < nr_frags; i++) {
+		u32 len = skb_frag_size(&xinfo->frags[i]);
+		void *data;
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+		u32 truesize = len;
+
+		data = page_pool_dev_alloc_va(pp, &truesize);
+		ts += truesize;
+#else
+		data = napi_alloc_frag(len);
+#endif
+		if (unlikely(!data))
+			return false;
+
+		memcpy(data, skb_frag_address(&xinfo->frags[i]),
+		       LARGEST_ALIGN(len));
+		__skb_fill_netmem_desc(skb, sinfo->nr_frags++,
+				       virt_to_netmem(data),
+				       offset_in_page(data), len);
+	}
+
+	xdp_update_skb_shared_info(skb, nr_frags, xinfo->xdp_frags_size,
+				   ts, false);
+
+	return true;
+}
+
+/**
+ * xdp_build_skb_from_zc - create an skb from XSk &xdp_buff
+ * @xdp: source XSk buff
+ *
+ * Similar to xdp_build_skb_from_buff(), but for XSk frames. Allocate an skb
+ * head, new page for the head, copy the data and initialize the skb fields.
+ * If there are frags, allocate new pages for them and copy.
+ * If Page Pool is available, the function allocates memory from the system
+ * percpu pools to try recycling the pages, otherwise it uses the NAPI page
+ * frag caches.
+ * If new skb was built successfully, @xdp is returned to XSk pool's freelist.
+ * On error, it remains untouched and the caller must take care of this.
+ *
+ * Return: new &sk_buff on success, %NULL on error.
+ */
+struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp)
+{
+	const struct xdp_rxq_info *rxq = xdp->rxq;
+	u32 len = xdp->data_end - xdp->data_meta;
+	struct page_pool *pp;
+	struct sk_buff *skb;
+	int metalen;
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+	u32 truesize;
+	void *data;
+
+	pp = this_cpu_read(system_page_pool);
+	truesize = xdp->frame_sz;
+
+	data = page_pool_dev_alloc_va(pp, &truesize);
+	if (unlikely(!data))
+		return NULL;
+
+	skb = napi_build_skb(data, truesize);
+	if (unlikely(!skb)) {
+		page_pool_free_va(pp, data, true);
+		return NULL;
+	}
+
+	skb_mark_for_recycle(skb);
+	skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
+#else /* !CONFIG_PAGE_POOL */
+	struct napi_struct *napi;
+
+	pp = NULL;
+	napi = napi_by_id(rxq->napi_id);
+	if (likely(napi))
+		skb = napi_alloc_skb(napi, len);
+	else
+		skb = __netdev_alloc_skb_ip_align(rxq->dev, len,
+						  GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+#endif /* !CONFIG_PAGE_POOL */
+
+	memcpy(__skb_put(skb, len), xdp->data_meta, LARGEST_ALIGN(len));
+
+	metalen = xdp->data - xdp->data_meta;
+	if (metalen > 0) {
+		skb_metadata_set(skb, metalen);
+		__skb_pull(skb, metalen);
+	}
+
+	skb_record_rx_queue(skb, rxq->queue_index);
+
+	if (unlikely(xdp_buff_has_frags(xdp)) &&
+	    unlikely(!xdp_copy_frags_from_zc(skb, xdp, pp))) {
+		napi_consume_skb(skb, true);
+		return NULL;
+	}
+
+	xsk_buff_free(xdp);
+
+	skb->protocol = eth_type_trans(skb, rxq->dev);
+
+	return skb;
+}
+EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc);
+
 struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
 					   struct sk_buff *skb,
 					   struct net_device *dev)