Message ID | 20230707183935.997267-9-kuba@kernel.org (mailing list archive) |
---|---|
State | RFC |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: huge page backed page_pool | expand |
On 07/07/2023 20.39, Jakub Kicinski wrote: > Use the page pool's ability to maintain DMA mappings for us. > This avoid re-mapping recycled pages. > For DMA using IOMMU mappings, using page_pool like this patch solves the main bottleneck. Thus, I suspect this patch will give the biggest performance boost on it's own. As you have already discovered, the next bottleneck then becomes the IOMMU's address resolution, which the IOTLB (I/O Translation Lookaside Buffer) hardware helps speed up. There are a number of techniques for reducing IOTLB misses. I recommend reading: IOMMU: Strategies for Mitigating the IOTLB Bottleneck - https://inria.hal.science/inria-00493752/document > Note that pages in the pool are always mapped DMA_BIDIRECTIONAL, > so we should use that instead of looking at bp->rx_dir. > > The syncing is probably wrong, TBH, I haven't studied the page > pool rules, they always confused me. But for a hack, who cares, > x86 :D > > Signed-off-by: Jakub Kicinski <kuba@kernel.org> > --- > drivers/net/ethernet/broadcom/bnxt/bnxt.c | 24 ++++++++--------------- > 1 file changed, 8 insertions(+), 16 deletions(-) Love seeing these stats, where page_pool reduce lines in drivers. > > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c > index e5b54e6025be..6512514cd498 100644 > --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c > +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c > @@ -706,12 +706,9 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, > if (!page) > return NULL; > > - *mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir, > - DMA_ATTR_WEAK_ORDERING); > - if (dma_mapping_error(dev, *mapping)) { > - page_pool_recycle_direct(rxr->page_pool, page); > - return NULL; > - } > + *mapping = page_pool_get_dma_addr(page); > + dma_sync_single_for_device(dev, *mapping, PAGE_SIZE, DMA_BIDIRECTIONAL); > + You can keep this as-is, but I just wanted mention that page_pool supports doing the "dma_sync_for_device" via PP_FLAG_DMA_SYNC_DEV. Thus, removing more lines from driver code. > return page; > } > > @@ -951,6 +948,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, > unsigned int offset_and_len) > { > unsigned int len = offset_and_len & 0xffff; > + struct device *dev = &bp->pdev->dev; > struct page *page = data; > u16 prod = rxr->rx_prod; > struct sk_buff *skb; > @@ -962,8 +960,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, > return NULL; > } > dma_addr -= bp->rx_dma_offset; > - dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, > - DMA_ATTR_WEAK_ORDERING); > + dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); > skb = build_skb(page_address(page), PAGE_SIZE); > if (!skb) { > page_pool_recycle_direct(rxr->page_pool, page); > @@ -984,6 +981,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, > { > unsigned int payload = offset_and_len >> 16; > unsigned int len = offset_and_len & 0xffff; > + struct device *dev = &bp->pdev->dev; > skb_frag_t *frag; > struct page *page = data; > u16 prod = rxr->rx_prod; > @@ -996,8 +994,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, > return NULL; > } > dma_addr -= bp->rx_dma_offset; > - dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, > - DMA_ATTR_WEAK_ORDERING); > + dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); > > if (unlikely(!payload)) > payload = eth_get_headlen(bp->dev, data_ptr, len); > @@ -2943,9 +2940,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr) > rx_buf->data = NULL; > if (BNXT_RX_PAGE_MODE(bp)) { > mapping -= bp->rx_dma_offset; > - dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE, > - bp->rx_dir, > - DMA_ATTR_WEAK_ORDERING); > page_pool_recycle_direct(rxr->page_pool, data); > } else { > dma_unmap_single_attrs(&pdev->dev, mapping, > @@ -2967,9 +2961,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr) > continue; > > if (BNXT_RX_PAGE_MODE(bp)) { > - dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping, > - BNXT_RX_PAGE_SIZE, bp->rx_dir, > - DMA_ATTR_WEAK_ORDERING); > rx_agg_buf->page = NULL; > __clear_bit(i, rxr->rx_agg_bmap); > > @@ -3208,6 +3199,7 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, > { > struct page_pool_params pp = { 0 }; > > + pp.flags = PP_FLAG_DMA_MAP; > pp.pool_size = bp->rx_ring_size; > pp.nid = dev_to_node(&bp->pdev->dev); > pp.napi = &rxr->bnapi->napi;
[...] > > - *mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir, > > - DMA_ATTR_WEAK_ORDERING); > > - if (dma_mapping_error(dev, *mapping)) { > > - page_pool_recycle_direct(rxr->page_pool, page); > > - return NULL; > > - } > > + *mapping = page_pool_get_dma_addr(page); > > + dma_sync_single_for_device(dev, *mapping, PAGE_SIZE, DMA_BIDIRECTIONAL); > > + > > You can keep this as-is, but I just wanted mention that page_pool > supports doing the "dma_sync_for_device" via PP_FLAG_DMA_SYNC_DEV. > Thus, removing more lines from driver code. +1 to that. Also, the direction is stored in pp->dma_dir, so it should automatically do the right thing. Regards /Ilias > > > return page; > > } > > > > @@ -951,6 +948,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, > > unsigned int offset_and_len) > > { > > unsigned int len = offset_and_len & 0xffff; > > + struct device *dev = &bp->pdev->dev; > > struct page *page = data; > > u16 prod = rxr->rx_prod; > > struct sk_buff *skb; > > @@ -962,8 +960,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, > > return NULL; > > } > > dma_addr -= bp->rx_dma_offset; > > - dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, > > - DMA_ATTR_WEAK_ORDERING); > > + dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); > > skb = build_skb(page_address(page), PAGE_SIZE); > > if (!skb) { > > page_pool_recycle_direct(rxr->page_pool, page); > > @@ -984,6 +981,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, > > { > > unsigned int payload = offset_and_len >> 16; > > unsigned int len = offset_and_len & 0xffff; > > + struct device *dev = &bp->pdev->dev; > > skb_frag_t *frag; > > struct page *page = data; > > u16 prod = rxr->rx_prod; > > @@ -996,8 +994,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, > > return NULL; > > } > > dma_addr -= bp->rx_dma_offset; > > - dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, > > - DMA_ATTR_WEAK_ORDERING); > > + dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); > > > > if (unlikely(!payload)) > > payload = eth_get_headlen(bp->dev, data_ptr, len); > > @@ -2943,9 +2940,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr) > > rx_buf->data = NULL; > > if (BNXT_RX_PAGE_MODE(bp)) { > > mapping -= bp->rx_dma_offset; > > - dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE, > > - bp->rx_dir, > > - DMA_ATTR_WEAK_ORDERING); > > page_pool_recycle_direct(rxr->page_pool, data); > > } else { > > dma_unmap_single_attrs(&pdev->dev, mapping, > > @@ -2967,9 +2961,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr) > > continue; > > > > if (BNXT_RX_PAGE_MODE(bp)) { > > - dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping, > > - BNXT_RX_PAGE_SIZE, bp->rx_dir, > > - DMA_ATTR_WEAK_ORDERING); > > rx_agg_buf->page = NULL; > > __clear_bit(i, rxr->rx_agg_bmap); > > > > @@ -3208,6 +3199,7 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, > > { > > struct page_pool_params pp = { 0 }; > > > > + pp.flags = PP_FLAG_DMA_MAP; > > pp.pool_size = bp->rx_ring_size; > > pp.nid = dev_to_node(&bp->pdev->dev); > > pp.napi = &rxr->bnapi->napi; >
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e5b54e6025be..6512514cd498 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -706,12 +706,9 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, if (!page) return NULL; - *mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); - if (dma_mapping_error(dev, *mapping)) { - page_pool_recycle_direct(rxr->page_pool, page); - return NULL; - } + *mapping = page_pool_get_dma_addr(page); + dma_sync_single_for_device(dev, *mapping, PAGE_SIZE, DMA_BIDIRECTIONAL); + return page; } @@ -951,6 +948,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, unsigned int offset_and_len) { unsigned int len = offset_and_len & 0xffff; + struct device *dev = &bp->pdev->dev; struct page *page = data; u16 prod = rxr->rx_prod; struct sk_buff *skb; @@ -962,8 +960,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, return NULL; } dma_addr -= bp->rx_dma_offset; - dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); + dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); skb = build_skb(page_address(page), PAGE_SIZE); if (!skb) { page_pool_recycle_direct(rxr->page_pool, page); @@ -984,6 +981,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, { unsigned int payload = offset_and_len >> 16; unsigned int len = offset_and_len & 0xffff; + struct device *dev = &bp->pdev->dev; skb_frag_t *frag; struct page *page = data; u16 prod = rxr->rx_prod; @@ -996,8 +994,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, return NULL; } dma_addr -= bp->rx_dma_offset; - dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); + dma_sync_single_for_cpu(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); if (unlikely(!payload)) payload = eth_get_headlen(bp->dev, data_ptr, len); @@ -2943,9 +2940,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr) rx_buf->data = NULL; if (BNXT_RX_PAGE_MODE(bp)) { mapping -= bp->rx_dma_offset; - dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE, - bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); page_pool_recycle_direct(rxr->page_pool, data); } else { dma_unmap_single_attrs(&pdev->dev, mapping, @@ -2967,9 +2961,6 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr) continue; if (BNXT_RX_PAGE_MODE(bp)) { - dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping, - BNXT_RX_PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); rx_agg_buf->page = NULL; __clear_bit(i, rxr->rx_agg_bmap); @@ -3208,6 +3199,7 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, { struct page_pool_params pp = { 0 }; + pp.flags = PP_FLAG_DMA_MAP; pp.pool_size = bp->rx_ring_size; pp.nid = dev_to_node(&bp->pdev->dev); pp.napi = &rxr->bnapi->napi;
Use the page pool's ability to maintain DMA mappings for us. This avoid re-mapping recycled pages. Note that pages in the pool are always mapped DMA_BIDIRECTIONAL, so we should use that instead of looking at bp->rx_dir. The syncing is probably wrong, TBH, I haven't studied the page pool rules, they always confused me. But for a hack, who cares, x86 :D Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 24 ++++++++--------------- 1 file changed, 8 insertions(+), 16 deletions(-)