diff mbox series

[RFC,08/12] eth: bnxt: let the page pool manage the DMA mapping

Message ID 20230707183935.997267-9-kuba@kernel.org (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series net: huge page backed page_pool | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1342 this patch: 1342
netdev/cc_maintainers warning 2 maintainers not CCed: pabeni@redhat.com davem@davemloft.net
netdev/build_clang success Errors and warnings before: 1364 this patch: 1364
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1365 this patch: 1365
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 72 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Jakub Kicinski July 7, 2023, 6:39 p.m. UTC
Use the page pool's ability to maintain DMA mappings for us.
This avoid re-mapping recycled pages.

Note that pages in the pool are always mapped DMA_BIDIRECTIONAL,
so we should use that instead of looking at bp->rx_dir.

The syncing is probably wrong, TBH, I haven't studied the page
pool rules, they always confused me. But for a hack, who cares,
x86 :D

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 24 ++++++++---------------
 1 file changed, 8 insertions(+), 16 deletions(-)

Comments

Jesper Dangaard Brouer July 10, 2023, 10:12 a.m. UTC | #1
On 07/07/2023 20.39, Jakub Kicinski wrote:
> Use the page pool's ability to maintain DMA mappings for us.
> This avoid re-mapping recycled pages.
> 

For DMA using IOMMU mappings, using page_pool like this patch solves the
main bottleneck.  Thus, I suspect this patch will give the biggest
performance boost on it's own.

As you have already discovered, the next bottleneck then becomes the
IOMMU's address resolution, which the IOTLB (I/O Translation Lookaside
Buffer) hardware helps speed up.

There are a number of techniques for reducing IOTLB misses.
I recommend reading:
  IOMMU: Strategies for Mitigating the IOTLB Bottleneck
  - https://inria.hal.science/inria-00493752/document


> Note that pages in the pool are always mapped DMA_BIDIRECTIONAL,
> so we should use that instead of looking at bp->rx_dir.
> 
> The syncing is probably wrong, TBH, I haven't studied the page
> pool rules, they always confused me. But for a hack, who cares,
> x86 :D
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
>   drivers/net/ethernet/broadcom/bnxt/bnxt.c | 24 ++++++++---------------
>   1 file changed, 8 insertions(+), 16 deletions(-)

Love seeing these stats, where page_pool reduce lines in drivers.

> 
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> index e5b54e6025be..6512514cd498 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -706,12 +706,9 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
>   	if (!page)
>   		return NULL;
>   
> -	*mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir,
> -				      DMA_ATTR_WEAK_ORDERING);
> -	if (dma_mapping_error(dev, *mapping)) {
> -		page_pool_recycle_direct(rxr->page_pool, page);
> -		return NULL;
> -	}
> +	*mapping = page_pool_get_dma_addr(page);
> +	dma_sync_single_for_device(dev, *mapping, PAGE_SIZE, DMA_BIDIRECTIONAL);
> +

You can keep this as-is, but I just wanted mention that page_pool
supports doing the "dma_sync_for_device" via PP_FLAG_DMA_SYNC_DEV.
Thus, removing more lines from driver code.

>   	return page;
>   }
>   
> @@ -951,6 +948,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
>   					      unsigned int offset_and_len)
>   {
>   	unsigned int len = offset_and_len & 0xffff;
> +	struct device *dev = &bp->pdev->dev;
>   	struct page *page = data;
>   	u16 prod = rxr->rx_prod;
>   	struct sk_buff *skb;
> @@ -962,8 +960,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
>   		return NULL;
>   	}
>   	dma_addr -= bp->rx_dma_offset;
> -	dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
> -			     DMA_ATTR_WEAK_ORDERING);
> +	dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
>   	skb = build_skb(page_address(page), PAGE_SIZE);
>   	if (!skb) {
>   		page_pool_recycle_direct(rxr->page_pool, page);
> @@ -984,6 +981,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
>   {
>   	unsigned int payload = offset_and_len >> 16;
>   	unsigned int len = offset_and_len & 0xffff;
> +	struct device *dev = &bp->pdev->dev;
>   	skb_frag_t *frag;
>   	struct page *page = data;
>   	u16 prod = rxr->rx_prod;
> @@ -996,8 +994,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
>   		return NULL;
>   	}
>   	dma_addr -= bp->rx_dma_offset;
> -	dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
> -			     DMA_ATTR_WEAK_ORDERING);
> +	dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
>   
>   	if (unlikely(!payload))
>   		payload = eth_get_headlen(bp->dev, data_ptr, len);
> @@ -2943,9 +2940,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
>   		rx_buf->data = NULL;
>   		if (BNXT_RX_PAGE_MODE(bp)) {
>   			mapping -= bp->rx_dma_offset;
> -			dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE,
> -					     bp->rx_dir,
> -					     DMA_ATTR_WEAK_ORDERING);
>   			page_pool_recycle_direct(rxr->page_pool, data);
>   		} else {
>   			dma_unmap_single_attrs(&pdev->dev, mapping,
> @@ -2967,9 +2961,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
>   			continue;
>   
>   		if (BNXT_RX_PAGE_MODE(bp)) {
> -			dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping,
> -					     BNXT_RX_PAGE_SIZE, bp->rx_dir,
> -					     DMA_ATTR_WEAK_ORDERING);
>   			rx_agg_buf->page = NULL;
>   			__clear_bit(i, rxr->rx_agg_bmap);
>   
> @@ -3208,6 +3199,7 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
>   {
>   	struct page_pool_params pp = { 0 };
>   
> +	pp.flags = PP_FLAG_DMA_MAP;
>   	pp.pool_size = bp->rx_ring_size;
>   	pp.nid = dev_to_node(&bp->pdev->dev);
>   	pp.napi = &rxr->bnapi->napi;
Ilias Apalodimas July 26, 2023, 6:56 a.m. UTC | #2
[...]

> > -     *mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir,
> > -                                   DMA_ATTR_WEAK_ORDERING);
> > -     if (dma_mapping_error(dev, *mapping)) {
> > -             page_pool_recycle_direct(rxr->page_pool, page);
> > -             return NULL;
> > -     }
> > +     *mapping = page_pool_get_dma_addr(page);
> > +     dma_sync_single_for_device(dev, *mapping, PAGE_SIZE, DMA_BIDIRECTIONAL);
> > +
>
> You can keep this as-is, but I just wanted mention that page_pool
> supports doing the "dma_sync_for_device" via PP_FLAG_DMA_SYNC_DEV.
> Thus, removing more lines from driver code.

+1 to that.  Also, the direction is stored in pp->dma_dir, so it
should automatically do the right thing.

Regards
/Ilias

>
> >       return page;
> >   }
> >
> > @@ -951,6 +948,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
> >                                             unsigned int offset_and_len)
> >   {
> >       unsigned int len = offset_and_len & 0xffff;
> > +     struct device *dev = &bp->pdev->dev;
> >       struct page *page = data;
> >       u16 prod = rxr->rx_prod;
> >       struct sk_buff *skb;
> > @@ -962,8 +960,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
> >               return NULL;
> >       }
> >       dma_addr -= bp->rx_dma_offset;
> > -     dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
> > -                          DMA_ATTR_WEAK_ORDERING);
> > +     dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
> >       skb = build_skb(page_address(page), PAGE_SIZE);
> >       if (!skb) {
> >               page_pool_recycle_direct(rxr->page_pool, page);
> > @@ -984,6 +981,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
> >   {
> >       unsigned int payload = offset_and_len >> 16;
> >       unsigned int len = offset_and_len & 0xffff;
> > +     struct device *dev = &bp->pdev->dev;
> >       skb_frag_t *frag;
> >       struct page *page = data;
> >       u16 prod = rxr->rx_prod;
> > @@ -996,8 +994,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
> >               return NULL;
> >       }
> >       dma_addr -= bp->rx_dma_offset;
> > -     dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
> > -                          DMA_ATTR_WEAK_ORDERING);
> > +     dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
> >
> >       if (unlikely(!payload))
> >               payload = eth_get_headlen(bp->dev, data_ptr, len);
> > @@ -2943,9 +2940,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
> >               rx_buf->data = NULL;
> >               if (BNXT_RX_PAGE_MODE(bp)) {
> >                       mapping -= bp->rx_dma_offset;
> > -                     dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE,
> > -                                          bp->rx_dir,
> > -                                          DMA_ATTR_WEAK_ORDERING);
> >                       page_pool_recycle_direct(rxr->page_pool, data);
> >               } else {
> >                       dma_unmap_single_attrs(&pdev->dev, mapping,
> > @@ -2967,9 +2961,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
> >                       continue;
> >
> >               if (BNXT_RX_PAGE_MODE(bp)) {
> > -                     dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping,
> > -                                          BNXT_RX_PAGE_SIZE, bp->rx_dir,
> > -                                          DMA_ATTR_WEAK_ORDERING);
> >                       rx_agg_buf->page = NULL;
> >                       __clear_bit(i, rxr->rx_agg_bmap);
> >
> > @@ -3208,6 +3199,7 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
> >   {
> >       struct page_pool_params pp = { 0 };
> >
> > +     pp.flags = PP_FLAG_DMA_MAP;
> >       pp.pool_size = bp->rx_ring_size;
> >       pp.nid = dev_to_node(&bp->pdev->dev);
> >       pp.napi = &rxr->bnapi->napi;
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e5b54e6025be..6512514cd498 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -706,12 +706,9 @@  static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
 	if (!page)
 		return NULL;
 
-	*mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir,
-				      DMA_ATTR_WEAK_ORDERING);
-	if (dma_mapping_error(dev, *mapping)) {
-		page_pool_recycle_direct(rxr->page_pool, page);
-		return NULL;
-	}
+	*mapping = page_pool_get_dma_addr(page);
+	dma_sync_single_for_device(dev, *mapping, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
 	return page;
 }
 
@@ -951,6 +948,7 @@  static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
 					      unsigned int offset_and_len)
 {
 	unsigned int len = offset_and_len & 0xffff;
+	struct device *dev = &bp->pdev->dev;
 	struct page *page = data;
 	u16 prod = rxr->rx_prod;
 	struct sk_buff *skb;
@@ -962,8 +960,7 @@  static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
 		return NULL;
 	}
 	dma_addr -= bp->rx_dma_offset;
-	dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
-			     DMA_ATTR_WEAK_ORDERING);
+	dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 	skb = build_skb(page_address(page), PAGE_SIZE);
 	if (!skb) {
 		page_pool_recycle_direct(rxr->page_pool, page);
@@ -984,6 +981,7 @@  static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
 {
 	unsigned int payload = offset_and_len >> 16;
 	unsigned int len = offset_and_len & 0xffff;
+	struct device *dev = &bp->pdev->dev;
 	skb_frag_t *frag;
 	struct page *page = data;
 	u16 prod = rxr->rx_prod;
@@ -996,8 +994,7 @@  static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
 		return NULL;
 	}
 	dma_addr -= bp->rx_dma_offset;
-	dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
-			     DMA_ATTR_WEAK_ORDERING);
+	dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 
 	if (unlikely(!payload))
 		payload = eth_get_headlen(bp->dev, data_ptr, len);
@@ -2943,9 +2940,6 @@  static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
 		rx_buf->data = NULL;
 		if (BNXT_RX_PAGE_MODE(bp)) {
 			mapping -= bp->rx_dma_offset;
-			dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE,
-					     bp->rx_dir,
-					     DMA_ATTR_WEAK_ORDERING);
 			page_pool_recycle_direct(rxr->page_pool, data);
 		} else {
 			dma_unmap_single_attrs(&pdev->dev, mapping,
@@ -2967,9 +2961,6 @@  static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
 			continue;
 
 		if (BNXT_RX_PAGE_MODE(bp)) {
-			dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping,
-					     BNXT_RX_PAGE_SIZE, bp->rx_dir,
-					     DMA_ATTR_WEAK_ORDERING);
 			rx_agg_buf->page = NULL;
 			__clear_bit(i, rxr->rx_agg_bmap);
 
@@ -3208,6 +3199,7 @@  static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
 {
 	struct page_pool_params pp = { 0 };
 
+	pp.flags = PP_FLAG_DMA_MAP;
 	pp.pool_size = bp->rx_ring_size;
 	pp.nid = dev_to_node(&bp->pdev->dev);
 	pp.napi = &rxr->bnapi->napi;