diff mbox series

[RFC,6/8] skbuff: always try to do page pool frag reference counting

Message ID 20231113130041.58124-7-linyunsheng@huawei.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series A possible proposal for intergating dmabuf to page pool | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 6218 this patch: 6218
netdev/cc_maintainers warning 3 maintainers not CCed: horms@kernel.org sd@queasysnail.net anirudh.venkataramanan@intel.com
netdev/build_clang success Errors and warnings before: 2385 this patch: 2385
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6571 this patch: 6571
netdev/checkpatch warning WARNING: Do not crash the kernel unless it is absolutely unavoidable--use WARN_ON_ONCE() plus recovery code (if feasible) instead of BUG() or variants
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc fail Errors and warnings before: 2 this patch: 3
netdev/source_inline success Was 0 now: 0

Commit Message

Yunsheng Lin Nov. 13, 2023, 1 p.m. UTC
As we have unified the frag_count handling in [1], we can
do frag reference counting instead of native page reference
counting whenever possible in net stack, in order to enable
the best possibility of recycling as we assume other
subsystem other than page pool is also hodling on to a page
if page_ref_count(page) != 1, so we stop the recycling and
release the page from page pool.

As the page from the devmem, we reuse most of the fields in
'struct page' for devmem in order to have unified handling
for both normal memory and devmem, but the meta data is not
really tied to mm, so we can't really use page->_refcount
as reference counting for devmem, instead it relies on page
pool's frag reference counting.

After this patch, pp_frag_count is not appropriate name as
we don't ensure only one user holding on to a frag, we may
rename it to frag_refcount in the future to avoid confusion.

1. https://lore.kernel.org/all/20231020095952.11055-1-linyunsheng@huawei.com/

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c       |  2 +-
 drivers/net/ethernet/sun/cassini.c                  |  4 ++--
 drivers/net/veth.c                                  |  2 +-
 include/linux/skbuff.h                              | 12 +++++++++---
 include/net/page_pool/types.h                       | 13 +++++++++++++
 net/core/skbuff.c                                   |  6 +++---
 net/tls/tls_device_fallback.c                       |  2 +-
 7 files changed, 30 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
index 6482728794dd..87fda3e9d7d1 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -1658,7 +1658,7 @@  static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb,
 	for (i = 0; i < record->num_frags; i++) {
 		skb_shinfo(nskb)->frags[i] = record->frags[i];
 		/* increase the frag ref count */
-		__skb_frag_ref(&skb_shinfo(nskb)->frags[i]);
+		skb_frag_ref(nskb, i);
 	}
 
 	skb_shinfo(nskb)->nr_frags = record->num_frags;
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index b317b9486455..8b818809c721 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -1999,7 +1999,7 @@  static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 		skb->len      += hlen - swivel;
 
 		skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel);
-		__skb_frag_ref(frag);
+		__skb_frag_ref(frag, skb->pp_recycle);
 
 		/* any more data? */
 		if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
@@ -2023,7 +2023,7 @@  static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 			frag++;
 
 			skb_frag_fill_page_desc(frag, page->buffer, 0, hlen);
-			__skb_frag_ref(frag);
+			__skb_frag_ref(frag, skb->pp_recycle);
 			RX_USED_ADD(page, hlen + cp->crc_size);
 		}
 
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 9980517ed8b0..8ee323a1184b 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -724,7 +724,7 @@  static void veth_xdp_get(struct xdp_buff *xdp)
 		return;
 
 	for (i = 0; i < sinfo->nr_frags; i++)
-		__skb_frag_ref(&sinfo->frags[i]);
+		__skb_frag_ref(&sinfo->frags[i], false);
 }
 
 static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1889b0968be0..b77043a2b446 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -37,6 +37,7 @@ 
 #endif
 #include <net/net_debug.h>
 #include <net/dropreason-core.h>
+#include <net/page_pool/types.h>
 
 /**
  * DOC: skb checksums
@@ -3429,9 +3430,14 @@  static inline struct page *skb_frag_page(const skb_frag_t *frag)
  *
  * Takes an additional reference on the paged fragment @frag.
  */
-static inline void __skb_frag_ref(skb_frag_t *frag)
+static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
 {
-	page_ref_inc(skb_frag_page(frag));
+	struct page *page = skb_frag_page(frag);
+
+	if (page_pool_frag_ref(page, recycle))
+		return;
+
+	page_ref_inc(page);
 }
 
 /**
@@ -3443,7 +3449,7 @@  static inline void __skb_frag_ref(skb_frag_t *frag)
  */
 static inline void skb_frag_ref(struct sk_buff *skb, int f)
 {
-	__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
+	__skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
 }
 
 bool napi_pp_put_page(struct page *page, bool napi_safe);
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 52e4cf98ebc6..ea4e654168bd 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -275,6 +275,19 @@  static inline bool is_page_pool_compiled_in(void)
 #endif
 }
 
+static inline bool page_pool_frag_ref(struct page *page, bool recycle)
+{
+	if (recycle && (page->pp_magic & ~0x3UL) == PP_SIGNATURE) {
+		atomic_long_inc(&page->pp_frag_count);
+		return true;
+	}
+
+	BUG_ON((page->pp_magic & ~0x3UL) == PP_SIGNATURE &&
+	       page->pp->mp_ops);
+
+	return false;
+}
+
 /* Caller must provide appropriate safe context, e.g. NAPI. */
 void page_pool_update_nid(struct page_pool *pool, int new_nid);
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ada3da4fe221..a33df6e49694 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4046,7 +4046,7 @@  int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 			to++;
 
 		} else {
-			__skb_frag_ref(fragfrom);
+			__skb_frag_ref(fragfrom, skb->pp_recycle);
 			skb_frag_page_copy(fragto, fragfrom);
 			skb_frag_off_copy(fragto, fragfrom);
 			skb_frag_size_set(fragto, todo);
@@ -4695,7 +4695,7 @@  struct sk_buff *skb_segment(struct sk_buff *head_skb,
 			}
 
 			*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
-			__skb_frag_ref(nskb_frag);
+			__skb_frag_ref(nskb_frag, nskb->pp_recycle);
 			size = skb_frag_size(nskb_frag);
 
 			if (pos < offset) {
@@ -5830,7 +5830,7 @@  bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 	 * since we set nr_frags to 0.
 	 */
 	for (i = 0; i < from_shinfo->nr_frags; i++)
-		__skb_frag_ref(&from_shinfo->frags[i]);
+		__skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle);
 
 	to->truesize += delta;
 	to->len += len;
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 4e7228f275fa..d4000b4a1f7d 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -277,7 +277,7 @@  static int fill_sg_in(struct scatterlist *sg_in,
 	for (i = 0; remaining > 0; i++) {
 		skb_frag_t *frag = &record->frags[i];
 
-		__skb_frag_ref(frag);
+		__skb_frag_ref(frag, false);
 		sg_set_page(sg_in + i, skb_frag_page(frag),
 			    skb_frag_size(frag), skb_frag_off(frag));