@@ -331,6 +331,11 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag)
return frag->bv_len;
}
+static inline bool skb_frag_is_pp(const skb_frag_t *frag)
+{
+ return (unsigned long)frag->bv_page & 1UL;
+}
+
/**
* skb_frag_size_set() - Sets the size of a skb fragment
* @frag: skb fragment
@@ -2190,6 +2195,21 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
skb->pfmemalloc = true;
}
+static inline void __skb_fill_pp_page_desc(struct sk_buff *skb, int i,
+ struct page *page, int off,
+ int size)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ frag->bv_page = (struct page *)((unsigned long)page | 0x1UL);
+ frag->bv_offset = off;
+ skb_frag_size_set(frag, size);
+
+ page = compound_head(page);
+ if (page_is_pfmemalloc(page))
+ skb->pfmemalloc = true;
+}
+
/**
* skb_fill_page_desc - initialise a paged fragment in an skb
* @skb: buffer containing fragment to be initialised
@@ -2211,6 +2231,14 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
skb_shinfo(skb)->nr_frags = i + 1;
}
+static inline void skb_fill_pp_page_desc(struct sk_buff *skb, int i,
+ struct page *page, int off,
+ int size)
+{
+ __skb_fill_pp_page_desc(skb, i, page, off, size);
+ skb_shinfo(skb)->nr_frags = i + 1;
+}
+
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
int size, unsigned int truesize);
@@ -3062,7 +3090,10 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto,
*/
static inline struct page *skb_frag_page(const skb_frag_t *frag)
{
- return frag->bv_page;
+ unsigned long page = (unsigned long)frag->bv_page;
+
+ page &= ~1UL;
+ return (struct page *)page;
}
/**
@@ -3073,7 +3104,12 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ struct page *page = skb_frag_page(frag);
+
+ if (skb_frag_is_pp(frag))
+ page_pool_atomic_inc_frag_count(page);
+ else
+ get_page(page);
}
/**
@@ -3101,7 +3137,8 @@ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
struct page *page = skb_frag_page(frag);
#ifdef CONFIG_PAGE_POOL
- if (recycle && page_pool_return_skb_page(page))
+ if ((recycle || skb_frag_is_pp(frag)) &&
+ page_pool_return_skb_page(page))
return;
#endif
put_page(page);
@@ -270,6 +270,11 @@ static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
return ret;
}
+static void page_pool_atomic_inc_frag_count(struct page *page)
+{
+ atomic_long_inc(&page->pp_frag_count);
+}
+
static inline bool is_page_pool_compiled_in(void)
{
#ifdef CONFIG_PAGE_POOL
As the skb->pp_recycle and page->pp_magic may not be enough to track if a frag page is from page pool after the calling of __skb_frag_ref(), mostly because of a data race, see: commit 2cc3aeb5eccc ("skbuff: Fix a potential race while recycling page_pool packets"). As the case of tcp, there may be fragmenting, coalescing or retransmiting case that might lose the track if a frag page is from page pool or not. So increment the frag count when __skb_frag_ref() is called, and use the bit 0 in frag->bv_page to indicate if a page is from a page pool, which automically pass down to another frag->bv_page when doing a '*new_frag = *frag' or memcpying the shinfo. It seems we could do the trick for rx too if it makes sense. Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> --- include/linux/skbuff.h | 43 ++++++++++++++++++++++++++++++++++++++++--- include/net/page_pool.h | 5 +++++ 2 files changed, 45 insertions(+), 3 deletions(-)