diff mbox series

[net] net: ethernet: ti: am65-cpsw: Fix multi queue Rx on J7

Message ID 20241029-am65-cpsw-multi-rx-j7-fix-v1-1-426ca805918c@kernel.org (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net] net: ethernet: ti: am65-cpsw: Fix multi queue Rx on J7 | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5 this patch: 5
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: jpanis@baylibre.com
netdev/build_clang success Errors and warnings before: 3 this patch: 3
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 3 this patch: 3
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 237 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Roger Quadros Oct. 29, 2024, 3:05 p.m. UTC
On J7 platforms, setting up multiple RX flows was failing
as the RX free descriptor ring 0 is shared among all flows
and we did not allocate enough elements in the RX free descriptor
ring 0 to accommodate for all RX flows.

This issue is not present on AM62 as separate pair of
rings are used for free and completion rings for each flow.

Fix this by allocating enough elements for RX free descriptor
ring 0.

However, we can no longer rely on desc_idx (descriptor based
offsets) to identify the pages in the respective flows as
free descriptor ring includes elements for all flows.
To solve this, introduce a new swdata data structure to store
flow_id and page. This can be used to identify which flow (page_pool)
and page the descriptor belonged to when popped out of the
RX rings.

Fixes: da70d184a8c3 ("net: ethernet: ti: am65-cpsw: Introduce multi queue Rx")
Signed-off-by: Roger Quadros <rogerq@kernel.org>
---
 drivers/net/ethernet/ti/am65-cpsw-nuss.c | 73 +++++++++++++++-----------------
 drivers/net/ethernet/ti/am65-cpsw-nuss.h |  6 ++-
 2 files changed, 38 insertions(+), 41 deletions(-)


---
base-commit: 42f7652d3eb527d03665b09edac47f85fb600924
change-id: 20241023-am65-cpsw-multi-rx-j7-fix-f9a2149be6dd

Best regards,

Comments

Roger Quadros Oct. 30, 2024, 7:14 a.m. UTC | #1
On 29/10/2024 17:05, Roger Quadros wrote:
> On J7 platforms, setting up multiple RX flows was failing
> as the RX free descriptor ring 0 is shared among all flows
> and we did not allocate enough elements in the RX free descriptor
> ring 0 to accommodate for all RX flows.
> 
> This issue is not present on AM62 as separate pair of
> rings are used for free and completion rings for each flow.
> 
> Fix this by allocating enough elements for RX free descriptor
> ring 0.
> 
> However, we can no longer rely on desc_idx (descriptor based
> offsets) to identify the pages in the respective flows as
> free descriptor ring includes elements for all flows.
> To solve this, introduce a new swdata data structure to store
> flow_id and page. This can be used to identify which flow (page_pool)
> and page the descriptor belonged to when popped out of the
> RX rings.
> 
> Fixes: da70d184a8c3 ("net: ethernet: ti: am65-cpsw: Introduce multi queue Rx")
> Signed-off-by: Roger Quadros <rogerq@kernel.org>
> ---
>  drivers/net/ethernet/ti/am65-cpsw-nuss.c | 73 +++++++++++++++-----------------
>  drivers/net/ethernet/ti/am65-cpsw-nuss.h |  6 ++-
>  2 files changed, 38 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
> index 0520e9f4bea7..4c46574e111c 100644
> --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
> +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
> @@ -339,7 +339,7 @@ static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common,
>  	struct device *dev = common->dev;
>  	dma_addr_t desc_dma;
>  	dma_addr_t buf_dma;
> -	void *swdata;
> +	struct am65_cpsw_swdata *swdata;
>  
>  	desc_rx = k3_cppi_desc_pool_alloc(rx_chn->desc_pool);
>  	if (!desc_rx) {
> @@ -363,7 +363,8 @@ static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common,
>  	cppi5_hdesc_attach_buf(desc_rx, buf_dma, AM65_CPSW_MAX_PACKET_SIZE,
>  			       buf_dma, AM65_CPSW_MAX_PACKET_SIZE);
>  	swdata = cppi5_hdesc_get_swdata(desc_rx);
> -	*((void **)swdata) = page_address(page);
> +	swdata->page = page;
> +	swdata->flow_id = flow_idx;
>  
>  	return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, flow_idx,
>  					desc_rx, desc_dma);
> @@ -519,36 +520,31 @@ static enum am65_cpsw_tx_buf_type am65_cpsw_nuss_buf_type(struct am65_cpsw_tx_ch
>  
>  static inline void am65_cpsw_put_page(struct am65_cpsw_rx_flow *flow,
>  				      struct page *page,
> -				      bool allow_direct,
> -				      int desc_idx)
> +				      bool allow_direct)
>  {
>  	page_pool_put_full_page(flow->page_pool, page, allow_direct);
> -	flow->pages[desc_idx] = NULL;
>  }
>  
>  static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma)
>  {
> -	struct am65_cpsw_rx_flow *flow = data;
> +	struct am65_cpsw_rx_chn *rx_chn = data;
>  	struct cppi5_host_desc_t *desc_rx;
> -	struct am65_cpsw_rx_chn *rx_chn;
> +	struct am65_cpsw_swdata *swdata;
>  	dma_addr_t buf_dma;
>  	u32 buf_dma_len;
> -	void *page_addr;
> -	void **swdata;
> -	int desc_idx;
> +	struct page *page;
> +	u32 flow_id;
>  
> -	rx_chn = &flow->common->rx_chns;
>  	desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma);
>  	swdata = cppi5_hdesc_get_swdata(desc_rx);
> -	page_addr = *swdata;
> +	page = swdata->page;
> +	flow_id = swdata->flow_id;
>  	cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len);
>  	k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma);
>  	dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE);
>  	k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
>  
> -	desc_idx = am65_cpsw_nuss_desc_idx(rx_chn->desc_pool, desc_rx,
> -					   rx_chn->dsize_log2);
> -	am65_cpsw_put_page(flow, virt_to_page(page_addr), false, desc_idx);
> +	am65_cpsw_put_page(&rx_chn->flows[flow_id], page, false);
>  }
>  
>  static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn,
> @@ -703,14 +699,13 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
>  				ret = -ENOMEM;
>  				goto fail_rx;
>  			}
> -			flow->pages[i] = page;
>  
>  			ret = am65_cpsw_nuss_rx_push(common, page, flow_idx);
>  			if (ret < 0) {
>  				dev_err(common->dev,
>  					"cannot submit page to rx channel flow %d, error %d\n",
>  					flow_idx, ret);
> -				am65_cpsw_put_page(flow, page, false, i);
> +				am65_cpsw_put_page(flow, page, false);
>  				goto fail_rx;
>  			}
>  		}
> @@ -764,8 +759,8 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
>  
>  fail_rx:
>  	for (i = 0; i < common->rx_ch_num_flows; i++)
> -		k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, &rx_chn->flows[i],
> -					  am65_cpsw_nuss_rx_cleanup, 0);
> +		k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, rx_chn,
> +					  am65_cpsw_nuss_rx_cleanup, !!i);
>  
>  	am65_cpsw_destroy_xdp_rxqs(common);
>  
> @@ -777,6 +772,7 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
>  	struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns;
>  	struct am65_cpsw_tx_chn *tx_chn = common->tx_chns;
>  	int i;
> +	struct am65_cpsw_rx_flow *flow;

This is also unused. Will drop it in v2.

>  
>  	if (common->usage_count != 1)
>  		return 0;
> @@ -817,11 +813,12 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
>  			dev_err(common->dev, "rx teardown timeout\n");
>  	}
>  
> -	for (i = 0; i < common->rx_ch_num_flows; i++) {
> +	for (i = common->rx_ch_num_flows - 1; i >= 0; i--) {
> +		flow = &rx_chn->flows[i];
>  		napi_disable(&rx_chn->flows[i].napi_rx);
>  		hrtimer_cancel(&rx_chn->flows[i].rx_hrtimer);
> -		k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, &rx_chn->flows[i],
> -					  am65_cpsw_nuss_rx_cleanup, 0);
> +		k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, rx_chn,
> +					  am65_cpsw_nuss_rx_cleanup, !!i);
>  	}
>  
>  	k3_udma_glue_disable_rx_chn(rx_chn->rx_chn);
> @@ -1028,7 +1025,7 @@ static int am65_cpsw_xdp_tx_frame(struct net_device *ndev,
>  static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow,
>  			     struct am65_cpsw_port *port,
>  			     struct xdp_buff *xdp,
> -			     int desc_idx, int cpu, int *len)
> +			     int cpu, int *len)
>  {
>  	struct am65_cpsw_common *common = flow->common;
>  	struct am65_cpsw_ndev_priv *ndev_priv;
> @@ -1101,7 +1098,7 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow,
>  	}
>  
>  	page = virt_to_head_page(xdp->data);
> -	am65_cpsw_put_page(flow, page, true, desc_idx);
> +	am65_cpsw_put_page(flow, page, true);
>  
>  out:
>  	return ret;
> @@ -1150,6 +1147,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
>  	struct am65_cpsw_ndev_stats *stats;
>  	struct cppi5_host_desc_t *desc_rx;
>  	struct device *dev = common->dev;
> +	struct am65_cpsw_swdata *swdata;
>  	struct page *page, *new_page;
>  	dma_addr_t desc_dma, buf_dma;
>  	struct am65_cpsw_port *port;
> @@ -1159,7 +1157,6 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
>  	struct sk_buff *skb;
>  	struct xdp_buff	xdp;
>  	void *page_addr;
> -	void **swdata;
>  	u32 *psdata;
>  
>  	*xdp_state = AM65_CPSW_XDP_PASS;
> @@ -1182,8 +1179,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
>  		__func__, flow_idx, &desc_dma);
>  
>  	swdata = cppi5_hdesc_get_swdata(desc_rx);
> -	page_addr = *swdata;
> -	page = virt_to_page(page_addr);
> +	page = swdata->page;
> +	page_addr = page_address(page);
>  	cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len);
>  	k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma);
>  	pkt_len = cppi5_hdesc_get_pktlen(desc_rx);
> @@ -1201,7 +1198,6 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
>  
>  	desc_idx = am65_cpsw_nuss_desc_idx(rx_chn->desc_pool, desc_rx,
>  					   rx_chn->dsize_log2);

This is no longer required so need to drop it.
Will send a v2.

> -
>  	skb = am65_cpsw_build_skb(page_addr, ndev,
>  				  AM65_CPSW_MAX_PACKET_SIZE);
>  	if (unlikely(!skb)) {
> @@ -1213,7 +1209,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
>  		xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq[flow->id]);
>  		xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM,
>  				 pkt_len, false);
> -		*xdp_state = am65_cpsw_run_xdp(flow, port, &xdp, desc_idx,
> +		*xdp_state = am65_cpsw_run_xdp(flow, port, &xdp,
>  					       cpu, &pkt_len);
>  		if (*xdp_state != AM65_CPSW_XDP_PASS)
>  			goto allocate;
> @@ -1247,10 +1243,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
>  		return -ENOMEM;
>  	}
>  
> -	flow->pages[desc_idx] = new_page;
> -
>  	if (netif_dormant(ndev)) {
> -		am65_cpsw_put_page(flow, new_page, true, desc_idx);
> +		am65_cpsw_put_page(flow, new_page, true);
>  		ndev->stats.rx_dropped++;
>  		return 0;
>  	}
> @@ -1258,7 +1252,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
>  requeue:
>  	ret = am65_cpsw_nuss_rx_push(common, new_page, flow_idx);
>  	if (WARN_ON(ret < 0)) {
> -		am65_cpsw_put_page(flow, new_page, true, desc_idx);
> +		am65_cpsw_put_page(flow, new_page, true);
>  		ndev->stats.rx_errors++;
>  		ndev->stats.rx_dropped++;
>  	}
> @@ -2402,10 +2396,6 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
>  	for (i = 0; i < common->rx_ch_num_flows; i++) {
>  		flow = &rx_chn->flows[i];
>  		flow->page_pool = NULL;
> -		flow->pages = devm_kcalloc(dev, AM65_CPSW_MAX_RX_DESC,
> -					   sizeof(*flow->pages), GFP_KERNEL);
> -		if (!flow->pages)
> -			return -ENOMEM;
>  	}
>  
>  	rx_chn->rx_chn = k3_udma_glue_request_rx_chn(dev, "rx", &rx_cfg);
> @@ -2455,10 +2445,12 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
>  		flow = &rx_chn->flows[i];
>  		flow->id = i;
>  		flow->common = common;
> +		flow->irq = -EINVAL;
>  
>  		rx_flow_cfg.ring_rxfdq0_id = fdqring_id;
>  		rx_flow_cfg.rx_cfg.size = max_desc_num;
> -		rx_flow_cfg.rxfdq_cfg.size = max_desc_num;
> +		/* share same FDQ for all flows */
> +		rx_flow_cfg.rxfdq_cfg.size = max_desc_num * rx_cfg.flow_id_num;
>  		rx_flow_cfg.rxfdq_cfg.mode = common->pdata.fdqring_mode;
>  
>  		ret = k3_udma_glue_rx_flow_init(rx_chn->rx_chn,
> @@ -2496,6 +2488,7 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
>  		if (ret) {
>  			dev_err(dev, "failure requesting rx %d irq %u, %d\n",
>  				i, flow->irq, ret);
> +			flow->irq = -EINVAL;
>  			goto err;
>  		}
>  	}
> @@ -3349,8 +3342,8 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
>  
>  	for (i = 0; i < common->rx_ch_num_flows; i++)
>  		k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i,
> -					  &rx_chan->flows[i],
> -					  am65_cpsw_nuss_rx_cleanup, 0);
> +					  rx_chan,
> +					  am65_cpsw_nuss_rx_cleanup, !!i);
>  
>  	k3_udma_glue_disable_rx_chn(rx_chan->rx_chn);
>  
> diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
> index dc8d544230dc..92a27ba4c601 100644
> --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
> +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
> @@ -101,10 +101,14 @@ struct am65_cpsw_rx_flow {
>  	struct hrtimer rx_hrtimer;
>  	unsigned long rx_pace_timeout;
>  	struct page_pool *page_pool;
> -	struct page **pages;
>  	char name[32];
>  };
>  
> +struct am65_cpsw_swdata {
> +	u32 flow_id;
> +	struct page *page;
> +};
> +
>  struct am65_cpsw_rx_chn {
>  	struct device *dev;
>  	struct device *dma_dev;
> 
> ---
> base-commit: 42f7652d3eb527d03665b09edac47f85fb600924
> change-id: 20241023-am65-cpsw-multi-rx-j7-fix-f9a2149be6dd
> 
> Best regards,
diff mbox series

Patch

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 0520e9f4bea7..4c46574e111c 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -339,7 +339,7 @@  static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common,
 	struct device *dev = common->dev;
 	dma_addr_t desc_dma;
 	dma_addr_t buf_dma;
-	void *swdata;
+	struct am65_cpsw_swdata *swdata;
 
 	desc_rx = k3_cppi_desc_pool_alloc(rx_chn->desc_pool);
 	if (!desc_rx) {
@@ -363,7 +363,8 @@  static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common,
 	cppi5_hdesc_attach_buf(desc_rx, buf_dma, AM65_CPSW_MAX_PACKET_SIZE,
 			       buf_dma, AM65_CPSW_MAX_PACKET_SIZE);
 	swdata = cppi5_hdesc_get_swdata(desc_rx);
-	*((void **)swdata) = page_address(page);
+	swdata->page = page;
+	swdata->flow_id = flow_idx;
 
 	return k3_udma_glue_push_rx_chn(rx_chn->rx_chn, flow_idx,
 					desc_rx, desc_dma);
@@ -519,36 +520,31 @@  static enum am65_cpsw_tx_buf_type am65_cpsw_nuss_buf_type(struct am65_cpsw_tx_ch
 
 static inline void am65_cpsw_put_page(struct am65_cpsw_rx_flow *flow,
 				      struct page *page,
-				      bool allow_direct,
-				      int desc_idx)
+				      bool allow_direct)
 {
 	page_pool_put_full_page(flow->page_pool, page, allow_direct);
-	flow->pages[desc_idx] = NULL;
 }
 
 static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma)
 {
-	struct am65_cpsw_rx_flow *flow = data;
+	struct am65_cpsw_rx_chn *rx_chn = data;
 	struct cppi5_host_desc_t *desc_rx;
-	struct am65_cpsw_rx_chn *rx_chn;
+	struct am65_cpsw_swdata *swdata;
 	dma_addr_t buf_dma;
 	u32 buf_dma_len;
-	void *page_addr;
-	void **swdata;
-	int desc_idx;
+	struct page *page;
+	u32 flow_id;
 
-	rx_chn = &flow->common->rx_chns;
 	desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma);
 	swdata = cppi5_hdesc_get_swdata(desc_rx);
-	page_addr = *swdata;
+	page = swdata->page;
+	flow_id = swdata->flow_id;
 	cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len);
 	k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma);
 	dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE);
 	k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
 
-	desc_idx = am65_cpsw_nuss_desc_idx(rx_chn->desc_pool, desc_rx,
-					   rx_chn->dsize_log2);
-	am65_cpsw_put_page(flow, virt_to_page(page_addr), false, desc_idx);
+	am65_cpsw_put_page(&rx_chn->flows[flow_id], page, false);
 }
 
 static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn,
@@ -703,14 +699,13 @@  static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
 				ret = -ENOMEM;
 				goto fail_rx;
 			}
-			flow->pages[i] = page;
 
 			ret = am65_cpsw_nuss_rx_push(common, page, flow_idx);
 			if (ret < 0) {
 				dev_err(common->dev,
 					"cannot submit page to rx channel flow %d, error %d\n",
 					flow_idx, ret);
-				am65_cpsw_put_page(flow, page, false, i);
+				am65_cpsw_put_page(flow, page, false);
 				goto fail_rx;
 			}
 		}
@@ -764,8 +759,8 @@  static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
 
 fail_rx:
 	for (i = 0; i < common->rx_ch_num_flows; i++)
-		k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, &rx_chn->flows[i],
-					  am65_cpsw_nuss_rx_cleanup, 0);
+		k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, rx_chn,
+					  am65_cpsw_nuss_rx_cleanup, !!i);
 
 	am65_cpsw_destroy_xdp_rxqs(common);
 
@@ -777,6 +772,7 @@  static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
 	struct am65_cpsw_rx_chn *rx_chn = &common->rx_chns;
 	struct am65_cpsw_tx_chn *tx_chn = common->tx_chns;
 	int i;
+	struct am65_cpsw_rx_flow *flow;
 
 	if (common->usage_count != 1)
 		return 0;
@@ -817,11 +813,12 @@  static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
 			dev_err(common->dev, "rx teardown timeout\n");
 	}
 
-	for (i = 0; i < common->rx_ch_num_flows; i++) {
+	for (i = common->rx_ch_num_flows - 1; i >= 0; i--) {
+		flow = &rx_chn->flows[i];
 		napi_disable(&rx_chn->flows[i].napi_rx);
 		hrtimer_cancel(&rx_chn->flows[i].rx_hrtimer);
-		k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, &rx_chn->flows[i],
-					  am65_cpsw_nuss_rx_cleanup, 0);
+		k3_udma_glue_reset_rx_chn(rx_chn->rx_chn, i, rx_chn,
+					  am65_cpsw_nuss_rx_cleanup, !!i);
 	}
 
 	k3_udma_glue_disable_rx_chn(rx_chn->rx_chn);
@@ -1028,7 +1025,7 @@  static int am65_cpsw_xdp_tx_frame(struct net_device *ndev,
 static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow,
 			     struct am65_cpsw_port *port,
 			     struct xdp_buff *xdp,
-			     int desc_idx, int cpu, int *len)
+			     int cpu, int *len)
 {
 	struct am65_cpsw_common *common = flow->common;
 	struct am65_cpsw_ndev_priv *ndev_priv;
@@ -1101,7 +1098,7 @@  static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow,
 	}
 
 	page = virt_to_head_page(xdp->data);
-	am65_cpsw_put_page(flow, page, true, desc_idx);
+	am65_cpsw_put_page(flow, page, true);
 
 out:
 	return ret;
@@ -1150,6 +1147,7 @@  static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
 	struct am65_cpsw_ndev_stats *stats;
 	struct cppi5_host_desc_t *desc_rx;
 	struct device *dev = common->dev;
+	struct am65_cpsw_swdata *swdata;
 	struct page *page, *new_page;
 	dma_addr_t desc_dma, buf_dma;
 	struct am65_cpsw_port *port;
@@ -1159,7 +1157,6 @@  static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
 	struct sk_buff *skb;
 	struct xdp_buff	xdp;
 	void *page_addr;
-	void **swdata;
 	u32 *psdata;
 
 	*xdp_state = AM65_CPSW_XDP_PASS;
@@ -1182,8 +1179,8 @@  static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
 		__func__, flow_idx, &desc_dma);
 
 	swdata = cppi5_hdesc_get_swdata(desc_rx);
-	page_addr = *swdata;
-	page = virt_to_page(page_addr);
+	page = swdata->page;
+	page_addr = page_address(page);
 	cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len);
 	k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma);
 	pkt_len = cppi5_hdesc_get_pktlen(desc_rx);
@@ -1201,7 +1198,6 @@  static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
 
 	desc_idx = am65_cpsw_nuss_desc_idx(rx_chn->desc_pool, desc_rx,
 					   rx_chn->dsize_log2);
-
 	skb = am65_cpsw_build_skb(page_addr, ndev,
 				  AM65_CPSW_MAX_PACKET_SIZE);
 	if (unlikely(!skb)) {
@@ -1213,7 +1209,7 @@  static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
 		xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq[flow->id]);
 		xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM,
 				 pkt_len, false);
-		*xdp_state = am65_cpsw_run_xdp(flow, port, &xdp, desc_idx,
+		*xdp_state = am65_cpsw_run_xdp(flow, port, &xdp,
 					       cpu, &pkt_len);
 		if (*xdp_state != AM65_CPSW_XDP_PASS)
 			goto allocate;
@@ -1247,10 +1243,8 @@  static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
 		return -ENOMEM;
 	}
 
-	flow->pages[desc_idx] = new_page;
-
 	if (netif_dormant(ndev)) {
-		am65_cpsw_put_page(flow, new_page, true, desc_idx);
+		am65_cpsw_put_page(flow, new_page, true);
 		ndev->stats.rx_dropped++;
 		return 0;
 	}
@@ -1258,7 +1252,7 @@  static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow,
 requeue:
 	ret = am65_cpsw_nuss_rx_push(common, new_page, flow_idx);
 	if (WARN_ON(ret < 0)) {
-		am65_cpsw_put_page(flow, new_page, true, desc_idx);
+		am65_cpsw_put_page(flow, new_page, true);
 		ndev->stats.rx_errors++;
 		ndev->stats.rx_dropped++;
 	}
@@ -2402,10 +2396,6 @@  static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
 	for (i = 0; i < common->rx_ch_num_flows; i++) {
 		flow = &rx_chn->flows[i];
 		flow->page_pool = NULL;
-		flow->pages = devm_kcalloc(dev, AM65_CPSW_MAX_RX_DESC,
-					   sizeof(*flow->pages), GFP_KERNEL);
-		if (!flow->pages)
-			return -ENOMEM;
 	}
 
 	rx_chn->rx_chn = k3_udma_glue_request_rx_chn(dev, "rx", &rx_cfg);
@@ -2455,10 +2445,12 @@  static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
 		flow = &rx_chn->flows[i];
 		flow->id = i;
 		flow->common = common;
+		flow->irq = -EINVAL;
 
 		rx_flow_cfg.ring_rxfdq0_id = fdqring_id;
 		rx_flow_cfg.rx_cfg.size = max_desc_num;
-		rx_flow_cfg.rxfdq_cfg.size = max_desc_num;
+		/* share same FDQ for all flows */
+		rx_flow_cfg.rxfdq_cfg.size = max_desc_num * rx_cfg.flow_id_num;
 		rx_flow_cfg.rxfdq_cfg.mode = common->pdata.fdqring_mode;
 
 		ret = k3_udma_glue_rx_flow_init(rx_chn->rx_chn,
@@ -2496,6 +2488,7 @@  static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
 		if (ret) {
 			dev_err(dev, "failure requesting rx %d irq %u, %d\n",
 				i, flow->irq, ret);
+			flow->irq = -EINVAL;
 			goto err;
 		}
 	}
@@ -3349,8 +3342,8 @@  static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
 
 	for (i = 0; i < common->rx_ch_num_flows; i++)
 		k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i,
-					  &rx_chan->flows[i],
-					  am65_cpsw_nuss_rx_cleanup, 0);
+					  rx_chan,
+					  am65_cpsw_nuss_rx_cleanup, !!i);
 
 	k3_udma_glue_disable_rx_chn(rx_chan->rx_chn);
 
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index dc8d544230dc..92a27ba4c601 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -101,10 +101,14 @@  struct am65_cpsw_rx_flow {
 	struct hrtimer rx_hrtimer;
 	unsigned long rx_pace_timeout;
 	struct page_pool *page_pool;
-	struct page **pages;
 	char name[32];
 };
 
+struct am65_cpsw_swdata {
+	u32 flow_id;
+	struct page *page;
+};
+
 struct am65_cpsw_rx_chn {
 	struct device *dev;
 	struct device *dma_dev;