diff mbox series

[net,2/5] mlxsw: pci: Sync Rx buffers for CPU

Message ID 461486fac91755ca4e04c2068c102250026dcd0b.1729866134.git.petrm@nvidia.com (mailing list archive)
State New
Delegated to: Netdev Maintainers
Headers show
Series mlxsw: Fixes | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5 this patch: 5
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 3 this patch: 3
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 4 this patch: 4
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 54 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-10-26--21-00 (tests: 777)

Commit Message

Petr Machata Oct. 25, 2024, 2:26 p.m. UTC
From: Amit Cohen <amcohen@nvidia.com>

When Rx packet is received, drivers should sync the pages for CPU, to
ensure the CPU reads the data written by the device and not stale
data from its cache.

Add the missing sync call in Rx path, sync the actual length of data for
each fragment.

Cc: Jiri Pirko <jiri@resnulli.us>
Fixes: b5b60bb491b2 ("mlxsw: pci: Use page pool for Rx buffers allocation")
Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlxsw/pci.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

Comments

Alexander Lobakin Oct. 25, 2024, 3 p.m. UTC | #1
From: Petr Machata <petrm@nvidia.com>
Date: Fri, 25 Oct 2024 16:26:26 +0200

> From: Amit Cohen <amcohen@nvidia.com>
> 
> When Rx packet is received, drivers should sync the pages for CPU, to
> ensure the CPU reads the data written by the device and not stale
> data from its cache.

[...]

> -static struct sk_buff *mlxsw_pci_rdq_build_skb(struct page *pages[],
> +static struct sk_buff *mlxsw_pci_rdq_build_skb(struct mlxsw_pci_queue *q,
> +					       struct page *pages[],
>  					       u16 byte_count)
>  {
> +	struct mlxsw_pci_queue *cq = q->u.rdq.cq;
>  	unsigned int linear_data_size;
> +	struct page_pool *page_pool;
>  	struct sk_buff *skb;
>  	int page_index = 0;
>  	bool linear_only;
>  	void *data;
>  
> +	linear_only = byte_count + MLXSW_PCI_RX_BUF_SW_OVERHEAD <= PAGE_SIZE;
> +	linear_data_size = linear_only ? byte_count :
> +					 PAGE_SIZE -
> +					 MLXSW_PCI_RX_BUF_SW_OVERHEAD;

Maybe reformat the line while at it?

	linear_data_size = linear_only ? byte_count :
			   PAGE_SIZE - MLXSW_PCI_RX_BUF_SW_OVERHEAD;

> +
> +	page_pool = cq->u.cq.page_pool;
> +	page_pool_dma_sync_for_cpu(page_pool, pages[page_index],
> +				   MLXSW_PCI_SKB_HEADROOM, linear_data_size);

page_pool_dma_sync_for_cpu() already skips the headroom:

	dma_sync_single_range_for_cpu(pool->p.dev,
				      offset + pool->p.offset, ...

Since your pool->p.offset is MLXSW_PCI_SKB_HEADROOM, I believe you need
to pass 0 here.

> +
>  	data = page_address(pages[page_index]);
>  	net_prefetch(data);

Thanks,
Olek
Amit Cohen Oct. 27, 2024, 7:29 a.m. UTC | #2
> -----Original Message-----
> From: Alexander Lobakin <aleksander.lobakin@intel.com>
> Sent: Friday, 25 October 2024 18:00
> To: Petr Machata <petrm@nvidia.com>; Amit Cohen <amcohen@nvidia.com>
> Cc: netdev@vger.kernel.org; Andrew Lunn <andrew+netdev@lunn.ch>; David S. Miller <davem@davemloft.net>; Eric Dumazet
> <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>; Simon Horman <horms@kernel.org>;
> Danielle Ratson <danieller@nvidia.com>; Ido Schimmel <idosch@nvidia.com>; mlxsw <mlxsw@nvidia.com>; Jiri Pirko <jiri@resnulli.us>
> Subject: Re: [PATCH net 2/5] mlxsw: pci: Sync Rx buffers for CPU
> 
> From: Petr Machata <petrm@nvidia.com>
> Date: Fri, 25 Oct 2024 16:26:26 +0200
> 
> > From: Amit Cohen <amcohen@nvidia.com>
> >
> > When Rx packet is received, drivers should sync the pages for CPU, to
> > ensure the CPU reads the data written by the device and not stale
> > data from its cache.
> 
> [...]
> 
> > -static struct sk_buff *mlxsw_pci_rdq_build_skb(struct page *pages[],
> > +static struct sk_buff *mlxsw_pci_rdq_build_skb(struct mlxsw_pci_queue *q,
> > +					       struct page *pages[],
> >  					       u16 byte_count)
> >  {
> > +	struct mlxsw_pci_queue *cq = q->u.rdq.cq;
> >  	unsigned int linear_data_size;
> > +	struct page_pool *page_pool;
> >  	struct sk_buff *skb;
> >  	int page_index = 0;
> >  	bool linear_only;
> >  	void *data;
> >
> > +	linear_only = byte_count + MLXSW_PCI_RX_BUF_SW_OVERHEAD <= PAGE_SIZE;
> > +	linear_data_size = linear_only ? byte_count :
> > +					 PAGE_SIZE -
> > +					 MLXSW_PCI_RX_BUF_SW_OVERHEAD;
> 
> Maybe reformat the line while at it?
> 
> 	linear_data_size = linear_only ? byte_count :
> 			   PAGE_SIZE - MLXSW_PCI_RX_BUF_SW_OVERHEAD;
> 
> > +
> > +	page_pool = cq->u.cq.page_pool;
> > +	page_pool_dma_sync_for_cpu(page_pool, pages[page_index],
> > +				   MLXSW_PCI_SKB_HEADROOM, linear_data_size);
> 
> page_pool_dma_sync_for_cpu() already skips the headroom:
> 
> 	dma_sync_single_range_for_cpu(pool->p.dev,
> 				      offset + pool->p.offset, ...
> 
> Since your pool->p.offset is MLXSW_PCI_SKB_HEADROOM, I believe you need
> to pass 0 here.

Our pool->p.offset is zero.
We use the page pool to allocate buffers for scatter/gather entries.
Only the first entry saves headroom for software usage, so only for the first buffer of the packet we pass headroom to page_pool_dma_sync_for_cpu(). 

> 
> > +
> >  	data = page_address(pages[page_index]);
> >  	net_prefetch(data);
> 
> Thanks,
> Olek
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 060e5b939211..2320a5f323b4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -389,15 +389,27 @@  static void mlxsw_pci_wqe_frag_unmap(struct mlxsw_pci *mlxsw_pci, char *wqe,
 	dma_unmap_single(&pdev->dev, mapaddr, frag_len, direction);
 }
 
-static struct sk_buff *mlxsw_pci_rdq_build_skb(struct page *pages[],
+static struct sk_buff *mlxsw_pci_rdq_build_skb(struct mlxsw_pci_queue *q,
+					       struct page *pages[],
 					       u16 byte_count)
 {
+	struct mlxsw_pci_queue *cq = q->u.rdq.cq;
 	unsigned int linear_data_size;
+	struct page_pool *page_pool;
 	struct sk_buff *skb;
 	int page_index = 0;
 	bool linear_only;
 	void *data;
 
+	linear_only = byte_count + MLXSW_PCI_RX_BUF_SW_OVERHEAD <= PAGE_SIZE;
+	linear_data_size = linear_only ? byte_count :
+					 PAGE_SIZE -
+					 MLXSW_PCI_RX_BUF_SW_OVERHEAD;
+
+	page_pool = cq->u.cq.page_pool;
+	page_pool_dma_sync_for_cpu(page_pool, pages[page_index],
+				   MLXSW_PCI_SKB_HEADROOM, linear_data_size);
+
 	data = page_address(pages[page_index]);
 	net_prefetch(data);
 
@@ -405,11 +417,6 @@  static struct sk_buff *mlxsw_pci_rdq_build_skb(struct page *pages[],
 	if (unlikely(!skb))
 		return ERR_PTR(-ENOMEM);
 
-	linear_only = byte_count + MLXSW_PCI_RX_BUF_SW_OVERHEAD <= PAGE_SIZE;
-	linear_data_size = linear_only ? byte_count :
-					 PAGE_SIZE -
-					 MLXSW_PCI_RX_BUF_SW_OVERHEAD;
-
 	skb_reserve(skb, MLXSW_PCI_SKB_HEADROOM);
 	skb_put(skb, linear_data_size);
 
@@ -425,6 +432,7 @@  static struct sk_buff *mlxsw_pci_rdq_build_skb(struct page *pages[],
 
 		page = pages[page_index];
 		frag_size = min(byte_count, PAGE_SIZE);
+		page_pool_dma_sync_for_cpu(page_pool, page, 0, frag_size);
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
 				page, 0, frag_size, PAGE_SIZE);
 		byte_count -= frag_size;
@@ -760,7 +768,7 @@  static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
 	if (err)
 		goto out;
 
-	skb = mlxsw_pci_rdq_build_skb(pages, byte_count);
+	skb = mlxsw_pci_rdq_build_skb(q, pages, byte_count);
 	if (IS_ERR(skb)) {
 		dev_err_ratelimited(&pdev->dev, "Failed to build skb for RDQ\n");
 		mlxsw_pci_rdq_pages_recycle(q, pages, num_sg_entries);