diff mbox series

[net-next,6/7] net: hns3: optimize the rx page reuse handling process

Message ID 1623825377-41948-7-git-send-email-huangguangbin2@huawei.com (mailing list archive)
State Accepted
Delegated to: Netdev Maintainers
Headers show
Series net: hns3: updates for -next | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 1 maintainers not CCed: yisen.zhuang@huawei.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 69 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link

Commit Message

Guangbin Huang June 16, 2021, 6:36 a.m. UTC
From: Yunsheng Lin <linyunsheng@huawei.com>

Current rx page offset only reset to zero when all the below
conditions are satisfied:
1. rx page is only owned by driver.
2. rx page is reusable.
3. the page offset that is above to be given to the stack has
reached the end of the page.

If the page offset is over the hns3_buf_size(), it means the
buffer below the offset of the page is usable when the above
condition 1 & 2 are satisfied, so page offset can be reset to
zero instead of increasing the offset. We may be able to always
reuse the first 4K buffer of a 64K page, which means we can
limit the hot buffer size as much as possible.

The above optimization is a side effect when refacting the
rx page reuse handling in order to support the rx copybreak.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 44 ++++++++++++-------------
 1 file changed, 22 insertions(+), 22 deletions(-)

Comments

Yunsheng Lin June 16, 2021, 8:47 a.m. UTC | #1
On 2021/6/16 14:36, Guangbin Huang wrote:
> From: Yunsheng Lin <linyunsheng@huawei.com>
> 
> Current rx page offset only reset to zero when all the below
> conditions are satisfied:
> 1. rx page is only owned by driver.
> 2. rx page is reusable.
> 3. the page offset that is above to be given to the stack has
> reached the end of the page.
> 
> If the page offset is over the hns3_buf_size(), it means the
> buffer below the offset of the page is usable when the above
> condition 1 & 2 are satisfied, so page offset can be reset to
> zero instead of increasing the offset. We may be able to always
> reuse the first 4K buffer of a 64K page, which means we can
> limit the hot buffer size as much as possible.
> 
> The above optimization is a side effect when refacting the
> rx page reuse handling in order to support the rx copybreak.
> 
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
> ---
>  drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 44 ++++++++++++-------------
>  1 file changed, 22 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> index f60a344a6a9f..98e8a548edb8 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> @@ -3525,7 +3525,7 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
>  
>  static bool hns3_can_reuse_page(struct hns3_desc_cb *cb)
>  {
> -	return (page_count(cb->priv) - cb->pagecnt_bias) == 1;
> +	return page_count(cb->priv) == cb->pagecnt_bias;
>  }
>  
>  static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
> @@ -3533,40 +3533,40 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
>  				struct hns3_desc_cb *desc_cb)
>  {
>  	struct hns3_desc *desc = &ring->desc[ring->next_to_clean];
> +	u32 frag_offset = desc_cb->page_offset + pull_len;
>  	int size = le16_to_cpu(desc->rx.size);
>  	u32 truesize = hns3_buf_size(ring);
> +	u32 frag_size = size - pull_len;
>  
> -	desc_cb->pagecnt_bias--;
> -	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
> -			size - pull_len, truesize);
> +	/* Avoid re-using remote or pfmem page */
> +	if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
> +		goto out;
>  
> -	/* Avoid re-using remote and pfmemalloc pages, or the stack is still
> -	 * using the page when page_offset rollback to zero, flag default
> -	 * unreuse
> +	/* Stack is not using and current page_offset is non-zero, we can
> +	 * reuse from the zero offset.
>  	 */
> -	if (!dev_page_is_reusable(desc_cb->priv) ||
> -	    (!desc_cb->page_offset && !hns3_can_reuse_page(desc_cb))) {
> -		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
> -		return;
> -	}
> -
> -	/* Move offset up to the next cache line */
> -	desc_cb->page_offset += truesize;
> -
> -	if (desc_cb->page_offset + truesize <= hns3_page_size(ring)) {
> +	if (desc_cb->page_offset && hns3_can_reuse_page(desc_cb)) {
> +		desc_cb->page_offset = 0;
>  		desc_cb->reuse_flag = 1;
> -	} else if (hns3_can_reuse_page(desc_cb)) {
> +	} else if (desc_cb->page_offset + truesize * 2 <=
> +		   hns3_page_size(ring)) {

The above assumption is wrong, we need to check the if the page
is only owned by driver at the begin and at the end of a page
to make sure there is no reuse conflict beteween driver and stack
when desc_cb->page_offset is rollback to zero or incremented.

The fix for above problem is pending internally, which was supposed to
merged with this patch when upstreaming.

It seems davem has merged this patch, will send out the fix later, sorry
for the inconvenience.


> +		desc_cb->page_offset += truesize;
>  		desc_cb->reuse_flag = 1;
> -		desc_cb->page_offset = 0;
> -	} else if (desc_cb->pagecnt_bias) {
> -		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
> -		return;
>  	}
>  
> +out:
> +	desc_cb->pagecnt_bias--;
> +
>  	if (unlikely(!desc_cb->pagecnt_bias)) {
>  		page_ref_add(desc_cb->priv, USHRT_MAX);
>  		desc_cb->pagecnt_bias = USHRT_MAX;
>  	}
> +
> +	skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset,
> +			frag_size, truesize);
> +
> +	if (unlikely(!desc_cb->reuse_flag))
> +		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
>  }
>  
>  static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index f60a344a6a9f..98e8a548edb8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3525,7 +3525,7 @@  static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring,
 
 static bool hns3_can_reuse_page(struct hns3_desc_cb *cb)
 {
-	return (page_count(cb->priv) - cb->pagecnt_bias) == 1;
+	return page_count(cb->priv) == cb->pagecnt_bias;
 }
 
 static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
@@ -3533,40 +3533,40 @@  static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
 				struct hns3_desc_cb *desc_cb)
 {
 	struct hns3_desc *desc = &ring->desc[ring->next_to_clean];
+	u32 frag_offset = desc_cb->page_offset + pull_len;
 	int size = le16_to_cpu(desc->rx.size);
 	u32 truesize = hns3_buf_size(ring);
+	u32 frag_size = size - pull_len;
 
-	desc_cb->pagecnt_bias--;
-	skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len,
-			size - pull_len, truesize);
+	/* Avoid re-using remote or pfmem page */
+	if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
+		goto out;
 
-	/* Avoid re-using remote and pfmemalloc pages, or the stack is still
-	 * using the page when page_offset rollback to zero, flag default
-	 * unreuse
+	/* Stack is not using and current page_offset is non-zero, we can
+	 * reuse from the zero offset.
 	 */
-	if (!dev_page_is_reusable(desc_cb->priv) ||
-	    (!desc_cb->page_offset && !hns3_can_reuse_page(desc_cb))) {
-		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
-		return;
-	}
-
-	/* Move offset up to the next cache line */
-	desc_cb->page_offset += truesize;
-
-	if (desc_cb->page_offset + truesize <= hns3_page_size(ring)) {
+	if (desc_cb->page_offset && hns3_can_reuse_page(desc_cb)) {
+		desc_cb->page_offset = 0;
 		desc_cb->reuse_flag = 1;
-	} else if (hns3_can_reuse_page(desc_cb)) {
+	} else if (desc_cb->page_offset + truesize * 2 <=
+		   hns3_page_size(ring)) {
+		desc_cb->page_offset += truesize;
 		desc_cb->reuse_flag = 1;
-		desc_cb->page_offset = 0;
-	} else if (desc_cb->pagecnt_bias) {
-		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
-		return;
 	}
 
+out:
+	desc_cb->pagecnt_bias--;
+
 	if (unlikely(!desc_cb->pagecnt_bias)) {
 		page_ref_add(desc_cb->priv, USHRT_MAX);
 		desc_cb->pagecnt_bias = USHRT_MAX;
 	}
+
+	skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset,
+			frag_size, truesize);
+
+	if (unlikely(!desc_cb->reuse_flag))
+		__page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias);
 }
 
 static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)