diff mbox series

[net-next,v2,1/2] net: ethernet: mtk_eth_soc: use prefetch methods

Message ID 20240729183038.1959-2-eladwf@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net: ethernet: mtk_eth_soc: improve RX performance | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 42 this patch: 42
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 5 maintainers not CCed: ast@kernel.org hawk@kernel.org daniel@iogearbox.net bpf@vger.kernel.org john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 43 this patch: 43
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 50 this patch: 50
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 44 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-07-31--12-00 (tests: 707)

Commit Message

Elad Yifee July 29, 2024, 6:29 p.m. UTC
Utilize kernel prefetch methods for faster cache line access.
This change boosts driver performance,
allowing the CPU to handle about 5% more packets/sec.

Signed-off-by: Elad Yifee <eladwf@gmail.com>
---
Changes in v2:
	- use net_prefetchw as suggested by Joe Damato
	- add (NET_SKB_PAD + eth->ip_align) offset to prefetched data
	- use eth->ip_align instead of NET_IP_ALIGN as it could be 0,
	depending on the platform 
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

Comments

Joe Damato July 30, 2024, 8:59 a.m. UTC | #1
On Mon, Jul 29, 2024 at 09:29:54PM +0300, Elad Yifee wrote:
> Utilize kernel prefetch methods for faster cache line access.
> This change boosts driver performance,
> allowing the CPU to handle about 5% more packets/sec.
> 
> Signed-off-by: Elad Yifee <eladwf@gmail.com>
> ---
> Changes in v2:
> 	- use net_prefetchw as suggested by Joe Damato
> 	- add (NET_SKB_PAD + eth->ip_align) offset to prefetched data
> 	- use eth->ip_align instead of NET_IP_ALIGN as it could be 0,
> 	depending on the platform 
> ---
>  drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 +++++++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> index 16ca427cf4c3..4d0052dbe3f4 100644
> --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c

[...]

> @@ -2143,6 +2147,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
>  			dma_unmap_single(eth->dma_dev, ((u64)trxd.rxd1 | addr64),
>  					 ring->buf_size, DMA_FROM_DEVICE);
>  
> +			net_prefetch(data + NET_SKB_PAD + eth->ip_align);
>  			skb = build_skb(data, ring->frag_size);
>  			if (unlikely(!skb)) {
>  				netdev->stats.rx_dropped++;
> @@ -2150,7 +2155,8 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
>  				goto skip_rx;
>  			}
>  
> -			skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
> +			net_prefetchw(skb->data);
> +			skb_reserve(skb, NET_SKB_PAD + eth->ip_align);

Based on the code in mtk_probe, I am guessing that only
MTK_SOC_MT7628 can DMA to unaligned addresses, because for
everything else eth->ip_align would be 0.

Is that right?

I am asking because the documentation in
Documentation/core-api/unaligned-memory-access.rst refers to the
case you mention, NET_IP_ALIGN = 0, suggesting that this is
intentional for performance reasons on powerpc:

  One notable exception here is powerpc which defines NET_IP_ALIGN to
  0 because DMA to unaligned addresses can be very expensive and dwarf
  the cost of unaligned loads.

It goes on to explain that some devices cannot DMA to unaligned
addresses and I assume that for your driver that is everything which
is not MTK_SOC_MT7628 ?
Elad Yifee July 30, 2024, 6:35 p.m. UTC | #2
On Tue, Jul 30, 2024 at 11:59 AM Joe Damato <jdamato@fastly.com> wrote:
>
> Based on the code in mtk_probe, I am guessing that only
> MTK_SOC_MT7628 can DMA to unaligned addresses, because for
> everything else eth->ip_align would be 0.
>
> Is that right?
>
> I am asking because the documentation in
> Documentation/core-api/unaligned-memory-access.rst refers to the
> case you mention, NET_IP_ALIGN = 0, suggesting that this is
> intentional for performance reasons on powerpc:
>
>   One notable exception here is powerpc which defines NET_IP_ALIGN to
>   0 because DMA to unaligned addresses can be very expensive and dwarf
>   the cost of unaligned loads.
>
> It goes on to explain that some devices cannot DMA to unaligned
> addresses and I assume that for your driver that is everything which
> is not MTK_SOC_MT7628 ?

I have no explanation for this partial use of 'eth->ip_align', it
could be a mistake
or maybe I'm missing something.
Perhaps Stefan Roese, who wrote this part, has an explanation.
(adding Stefan to CC)
Stefan Roese Aug. 1, 2024, 7:09 a.m. UTC | #3
On 7/30/24 20:35, Elad Yifee wrote:
> On Tue, Jul 30, 2024 at 11:59 AM Joe Damato <jdamato@fastly.com> wrote:
>>
>> Based on the code in mtk_probe, I am guessing that only
>> MTK_SOC_MT7628 can DMA to unaligned addresses, because for
>> everything else eth->ip_align would be 0.
>>
>> Is that right?
>>
>> I am asking because the documentation in
>> Documentation/core-api/unaligned-memory-access.rst refers to the
>> case you mention, NET_IP_ALIGN = 0, suggesting that this is
>> intentional for performance reasons on powerpc:
>>
>>    One notable exception here is powerpc which defines NET_IP_ALIGN to
>>    0 because DMA to unaligned addresses can be very expensive and dwarf
>>    the cost of unaligned loads.
>>
>> It goes on to explain that some devices cannot DMA to unaligned
>> addresses and I assume that for your driver that is everything which
>> is not MTK_SOC_MT7628 ?
> 
> I have no explanation for this partial use of 'eth->ip_align', it
> could be a mistake
> or maybe I'm missing something.
> Perhaps Stefan Roese, who wrote this part, has an explanation.
> (adding Stefan to CC)

Sorry, I can't answer this w/o digging deeper into this driver and
SoC again. And I didn't use it for a few years now. It might be a
mistake.

Thanks,
Stefan
Joe Damato Aug. 1, 2024, 1:14 p.m. UTC | #4
On Thu, Aug 01, 2024 at 09:09:27AM +0200, Stefan Roese wrote:
> On 7/30/24 20:35, Elad Yifee wrote:
> > On Tue, Jul 30, 2024 at 11:59 AM Joe Damato <jdamato@fastly.com> wrote:
> > > 
> > > Based on the code in mtk_probe, I am guessing that only
> > > MTK_SOC_MT7628 can DMA to unaligned addresses, because for
> > > everything else eth->ip_align would be 0.
> > > 
> > > Is that right?
> > > 
> > > I am asking because the documentation in
> > > Documentation/core-api/unaligned-memory-access.rst refers to the
> > > case you mention, NET_IP_ALIGN = 0, suggesting that this is
> > > intentional for performance reasons on powerpc:
> > > 
> > >    One notable exception here is powerpc which defines NET_IP_ALIGN to
> > >    0 because DMA to unaligned addresses can be very expensive and dwarf
> > >    the cost of unaligned loads.
> > > 
> > > It goes on to explain that some devices cannot DMA to unaligned
> > > addresses and I assume that for your driver that is everything which
> > > is not MTK_SOC_MT7628 ?
> > 
> > I have no explanation for this partial use of 'eth->ip_align', it
> > could be a mistake
> > or maybe I'm missing something.
> > Perhaps Stefan Roese, who wrote this part, has an explanation.
> > (adding Stefan to CC)
> 
> Sorry, I can't answer this w/o digging deeper into this driver and
> SoC again. And I didn't use it for a few years now. It might be a
> mistake.

I asked about it because it was added in v2 of the patch, see the
changelog from the patch:

  - use eth->ip_align instead of NET_IP_ALIGN as it could be 0,
  depending on the platform 

It seemed like from the changelog some one decided adding that made
sense and I was just confirming the reasoning above.
Shengyu Qu Jan. 6, 2025, 2:28 p.m. UTC | #5
Hello,

Sorry to bother, but what happened to this patch? Is it given up or
something?

Best regards,
Shengyu

在 2024/7/30 2:29, Elad Yifee 写道:
> Utilize kernel prefetch methods for faster cache line access.
> This change boosts driver performance,
> allowing the CPU to handle about 5% more packets/sec.
> 
> Signed-off-by: Elad Yifee <eladwf@gmail.com>
> ---
> Changes in v2:
> 	- use net_prefetchw as suggested by Joe Damato
> 	- add (NET_SKB_PAD + eth->ip_align) offset to prefetched data
> 	- use eth->ip_align instead of NET_IP_ALIGN as it could be 0,
> 	depending on the platform
> ---
>   drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 +++++++-
>   1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> index 16ca427cf4c3..4d0052dbe3f4 100644
> --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> @@ -1963,6 +1963,7 @@ static u32 mtk_xdp_run(struct mtk_eth *eth, struct mtk_rx_ring *ring,
>   	if (!prog)
>   		goto out;
>   
> +	net_prefetchw(xdp->data_hard_start);
>   	act = bpf_prog_run_xdp(prog, xdp);
>   	switch (act) {
>   	case XDP_PASS:
> @@ -2038,6 +2039,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
>   
>   		idx = NEXT_DESP_IDX(ring->calc_idx, ring->dma_size);
>   		rxd = ring->dma + idx * eth->soc->rx.desc_size;
> +		prefetch(rxd);
>   		data = ring->data[idx];
>   
>   		if (!mtk_rx_get_desc(eth, &trxd, rxd))
> @@ -2105,6 +2107,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
>   			if (ret != XDP_PASS)
>   				goto skip_rx;
>   
> +			net_prefetch(xdp.data_meta);
>   			skb = build_skb(data, PAGE_SIZE);
>   			if (unlikely(!skb)) {
>   				page_pool_put_full_page(ring->page_pool,
> @@ -2113,6 +2116,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
>   				goto skip_rx;
>   			}
>   
> +			net_prefetchw(skb->data);
>   			skb_reserve(skb, xdp.data - xdp.data_hard_start);
>   			skb_put(skb, xdp.data_end - xdp.data);
>   			skb_mark_for_recycle(skb);
> @@ -2143,6 +2147,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
>   			dma_unmap_single(eth->dma_dev, ((u64)trxd.rxd1 | addr64),
>   					 ring->buf_size, DMA_FROM_DEVICE);
>   
> +			net_prefetch(data + NET_SKB_PAD + eth->ip_align);
>   			skb = build_skb(data, ring->frag_size);
>   			if (unlikely(!skb)) {
>   				netdev->stats.rx_dropped++;
> @@ -2150,7 +2155,8 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
>   				goto skip_rx;
>   			}
>   
> -			skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
> +			net_prefetchw(skb->data);
> +			skb_reserve(skb, NET_SKB_PAD + eth->ip_align);
>   			skb_put(skb, pktlen);
>   		}
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 16ca427cf4c3..4d0052dbe3f4 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1963,6 +1963,7 @@  static u32 mtk_xdp_run(struct mtk_eth *eth, struct mtk_rx_ring *ring,
 	if (!prog)
 		goto out;
 
+	net_prefetchw(xdp->data_hard_start);
 	act = bpf_prog_run_xdp(prog, xdp);
 	switch (act) {
 	case XDP_PASS:
@@ -2038,6 +2039,7 @@  static int mtk_poll_rx(struct napi_struct *napi, int budget,
 
 		idx = NEXT_DESP_IDX(ring->calc_idx, ring->dma_size);
 		rxd = ring->dma + idx * eth->soc->rx.desc_size;
+		prefetch(rxd);
 		data = ring->data[idx];
 
 		if (!mtk_rx_get_desc(eth, &trxd, rxd))
@@ -2105,6 +2107,7 @@  static int mtk_poll_rx(struct napi_struct *napi, int budget,
 			if (ret != XDP_PASS)
 				goto skip_rx;
 
+			net_prefetch(xdp.data_meta);
 			skb = build_skb(data, PAGE_SIZE);
 			if (unlikely(!skb)) {
 				page_pool_put_full_page(ring->page_pool,
@@ -2113,6 +2116,7 @@  static int mtk_poll_rx(struct napi_struct *napi, int budget,
 				goto skip_rx;
 			}
 
+			net_prefetchw(skb->data);
 			skb_reserve(skb, xdp.data - xdp.data_hard_start);
 			skb_put(skb, xdp.data_end - xdp.data);
 			skb_mark_for_recycle(skb);
@@ -2143,6 +2147,7 @@  static int mtk_poll_rx(struct napi_struct *napi, int budget,
 			dma_unmap_single(eth->dma_dev, ((u64)trxd.rxd1 | addr64),
 					 ring->buf_size, DMA_FROM_DEVICE);
 
+			net_prefetch(data + NET_SKB_PAD + eth->ip_align);
 			skb = build_skb(data, ring->frag_size);
 			if (unlikely(!skb)) {
 				netdev->stats.rx_dropped++;
@@ -2150,7 +2155,8 @@  static int mtk_poll_rx(struct napi_struct *napi, int budget,
 				goto skip_rx;
 			}
 
-			skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+			net_prefetchw(skb->data);
+			skb_reserve(skb, NET_SKB_PAD + eth->ip_align);
 			skb_put(skb, pktlen);
 		}