diff mbox series

[RFC,2/7] skbuff: add interface to manipulate frag count for tx recycling

Message ID 1629257542-36145-3-git-send-email-linyunsheng@huawei.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series add socket to netdev page frag recycling support | expand

Commit Message

Yunsheng Lin Aug. 18, 2021, 3:32 a.m. UTC
As the skb->pp_recycle and page->pp_magic may not be enough
to track if a frag page is from page pool after the calling
of __skb_frag_ref(), mostly because of a data race, see:
commit 2cc3aeb5eccc ("skbuff: Fix a potential race while
recycling page_pool packets").

As the case of tcp, there may be fragmenting, coalescing or
retransmiting case that might lose the track if a frag page
is from page pool or not.

So increment the frag count when __skb_frag_ref() is called,
and use the bit 0 in frag->bv_page to indicate if a page is
from a page pool, which automically pass down to another
frag->bv_page when doing a '*new_frag = *frag' or memcpying
the shinfo.

It seems we could do the trick for rx too if it makes sense.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/linux/skbuff.h  | 43 ++++++++++++++++++++++++++++++++++++++++---
 include/net/page_pool.h |  5 +++++
 2 files changed, 45 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6bdb0db..2878d26 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -331,6 +331,11 @@  static inline unsigned int skb_frag_size(const skb_frag_t *frag)
 	return frag->bv_len;
 }
 
+static inline bool skb_frag_is_pp(const skb_frag_t *frag)
+{
+	return (unsigned long)frag->bv_page & 1UL;
+}
+
 /**
  * skb_frag_size_set() - Sets the size of a skb fragment
  * @frag: skb fragment
@@ -2190,6 +2195,21 @@  static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
 		skb->pfmemalloc	= true;
 }
 
+static inline void __skb_fill_pp_page_desc(struct sk_buff *skb, int i,
+					   struct page *page, int off,
+					   int size)
+{
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+	frag->bv_page = (struct page *)((unsigned long)page | 0x1UL);
+	frag->bv_offset = off;
+	skb_frag_size_set(frag, size);
+
+	page = compound_head(page);
+	if (page_is_pfmemalloc(page))
+		skb->pfmemalloc = true;
+}
+
 /**
  * skb_fill_page_desc - initialise a paged fragment in an skb
  * @skb: buffer containing fragment to be initialised
@@ -2211,6 +2231,14 @@  static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
 	skb_shinfo(skb)->nr_frags = i + 1;
 }
 
+static inline void skb_fill_pp_page_desc(struct sk_buff *skb, int i,
+					 struct page *page, int off,
+					 int size)
+{
+	__skb_fill_pp_page_desc(skb, i, page, off, size);
+	skb_shinfo(skb)->nr_frags = i + 1;
+}
+
 void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 		     int size, unsigned int truesize);
 
@@ -3062,7 +3090,10 @@  static inline void skb_frag_off_copy(skb_frag_t *fragto,
  */
 static inline struct page *skb_frag_page(const skb_frag_t *frag)
 {
-	return frag->bv_page;
+	unsigned long page = (unsigned long)frag->bv_page;
+
+	page &= ~1UL;
+	return (struct page *)page;
 }
 
 /**
@@ -3073,7 +3104,12 @@  static inline struct page *skb_frag_page(const skb_frag_t *frag)
  */
 static inline void __skb_frag_ref(skb_frag_t *frag)
 {
-	get_page(skb_frag_page(frag));
+	struct page *page = skb_frag_page(frag);
+
+	if (skb_frag_is_pp(frag))
+		page_pool_atomic_inc_frag_count(page);
+	else
+		get_page(page);
 }
 
 /**
@@ -3101,7 +3137,8 @@  static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
 	struct page *page = skb_frag_page(frag);
 
 #ifdef CONFIG_PAGE_POOL
-	if (recycle && page_pool_return_skb_page(page))
+	if ((recycle || skb_frag_is_pp(frag)) &&
+	    page_pool_return_skb_page(page))
 		return;
 #endif
 	put_page(page);
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 8d4ae4b..86babb2 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -270,6 +270,11 @@  static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
 	return ret;
 }
 
+static void page_pool_atomic_inc_frag_count(struct page *page)
+{
+	atomic_long_inc(&page->pp_frag_count);
+}
+
 static inline bool is_page_pool_compiled_in(void)
 {
 #ifdef CONFIG_PAGE_POOL