diff mbox series

[v2,1/1] net: fec: handle page_pool_dev_alloc_pages error

Message ID 20250113154846.1765414-1-kgroeneveld@lenbrook.com (mailing list archive)
State New
Headers show
Series [v2,1/1] net: fec: handle page_pool_dev_alloc_pages error | expand

Commit Message

Kevin Groeneveld Jan. 13, 2025, 3:48 p.m. UTC
The fec_enet_update_cbd function calls page_pool_dev_alloc_pages but did
not handle the case when it returned NULL. There was a WARN_ON(!new_page)
but it would still proceed to use the NULL pointer and then crash.

This case does seem somewhat rare but when the system is under memory
pressure it can happen. One case where I can duplicate this with some
frequency is when writing over a smbd share to a SATA HDD attached to an
imx6q.

Setting /proc/sys/vm/min_free_kbytes to higher values also seems to solve
the problem for my test case. But it still seems wrong that the fec driver
ignores the memory allocation error and can crash.

This commit handles the allocation error by dropping the current packet.

Fixes: 95698ff6177b5 ("net: fec: using page pool to manage RX buffers")
Signed-off-by: Kevin Groeneveld <kgroeneveld@lenbrook.com>
---
v1 -> v2:
- Simplify commit message.
- As suggested by and based on partial patch from Wei Fang, re-write to
  drop packet instead of trying to return from fec_enet_rx_napi early.

 drivers/net/ethernet/freescale/fec_main.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

Comments

Jacob Keller Jan. 13, 2025, 6:55 p.m. UTC | #1
On 1/13/2025 7:48 AM, Kevin Groeneveld wrote:
> The fec_enet_update_cbd function calls page_pool_dev_alloc_pages but did
> not handle the case when it returned NULL. There was a WARN_ON(!new_page)
> but it would still proceed to use the NULL pointer and then crash.
> 
> This case does seem somewhat rare but when the system is under memory
> pressure it can happen. One case where I can duplicate this with some
> frequency is when writing over a smbd share to a SATA HDD attached to an
> imx6q.
> 
> Setting /proc/sys/vm/min_free_kbytes to higher values also seems to solve
> the problem for my test case. But it still seems wrong that the fec driver
> ignores the memory allocation error and can crash.
> 
> This commit handles the allocation error by dropping the current packet.
> 
> Fixes: 95698ff6177b5 ("net: fec: using page pool to manage RX buffers")
> Signed-off-by: Kevin Groeneveld <kgroeneveld@lenbrook.com>
> ---

Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Wei Fang Jan. 14, 2025, 1:19 a.m. UTC | #2
> The fec_enet_update_cbd function calls page_pool_dev_alloc_pages but did
> not handle the case when it returned NULL. There was a
> WARN_ON(!new_page)
> but it would still proceed to use the NULL pointer and then crash.
> 
> This case does seem somewhat rare but when the system is under memory
> pressure it can happen. One case where I can duplicate this with some
> frequency is when writing over a smbd share to a SATA HDD attached to an
> imx6q.
> 
> Setting /proc/sys/vm/min_free_kbytes to higher values also seems to solve
> the problem for my test case. But it still seems wrong that the fec driver
> ignores the memory allocation error and can crash.
> 
> This commit handles the allocation error by dropping the current packet.
> 
> Fixes: 95698ff6177b5 ("net: fec: using page pool to manage RX buffers")
> Signed-off-by: Kevin Groeneveld <kgroeneveld@lenbrook.com>
> ---
> v1 -> v2:
> - Simplify commit message.
> - As suggested by and based on partial patch from Wei Fang, re-write to
>   drop packet instead of trying to return from fec_enet_rx_napi early.
> 
>  drivers/net/ethernet/freescale/fec_main.c | 19 ++++++++++++++-----
>  1 file changed, 14 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec_main.c
> b/drivers/net/ethernet/freescale/fec_main.c
> index 1b55047c0237..4566848e1d7c 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -1591,19 +1591,22 @@ static void fec_enet_tx(struct net_device *ndev,
> int budget)
>  		fec_enet_tx_queue(ndev, i, budget);
>  }
> 
> -static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
> +static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
>  				struct bufdesc *bdp, int index)
>  {
>  	struct page *new_page;
>  	dma_addr_t phys_addr;
> 
>  	new_page = page_pool_dev_alloc_pages(rxq->page_pool);
> -	WARN_ON(!new_page);
> -	rxq->rx_skb_info[index].page = new_page;
> +	if (unlikely(!new_page))
> +		return -ENOMEM;
> 
> +	rxq->rx_skb_info[index].page = new_page;
>  	rxq->rx_skb_info[index].offset = FEC_ENET_XDP_HEADROOM;
>  	phys_addr = page_pool_get_dma_addr(new_page) +
> FEC_ENET_XDP_HEADROOM;
>  	bdp->cbd_bufaddr = cpu_to_fec32(phys_addr);
> +
> +	return 0;
>  }
> 
>  static u32
> @@ -1698,6 +1701,7 @@ fec_enet_rx_queue(struct net_device *ndev, int
> budget, u16 queue_id)
>  	int cpu = smp_processor_id();
>  	struct xdp_buff xdp;
>  	struct page *page;
> +	__fec32 cbd_bufaddr;
>  	u32 sub_len = 4;
> 
>  #if !defined(CONFIG_M5272)
> @@ -1766,12 +1770,17 @@ fec_enet_rx_queue(struct net_device *ndev, int
> budget, u16 queue_id)
> 
>  		index = fec_enet_get_bd_index(bdp, &rxq->bd);
>  		page = rxq->rx_skb_info[index].page;
> +		cbd_bufaddr = bdp->cbd_bufaddr;
> +		if (fec_enet_update_cbd(rxq, bdp, index)) {
> +			ndev->stats.rx_dropped++;
> +			goto rx_processing_done;
> +		}
> +
>  		dma_sync_single_for_cpu(&fep->pdev->dev,
> -					fec32_to_cpu(bdp->cbd_bufaddr),
> +					fec32_to_cpu(cbd_bufaddr),
>  					pkt_len,
>  					DMA_FROM_DEVICE);
>  		prefetch(page_address(page));
> -		fec_enet_update_cbd(rxq, bdp, index);
> 
>  		if (xdp_prog) {
>  			xdp_buff_clear_frags_flag(&xdp);
> --
> 2.43.0

Thanks.

Reviewed-by: Wei Fang <wei.fang@nxp.com>
patchwork-bot+netdevbpf@kernel.org Jan. 15, 2025, 2:50 a.m. UTC | #3
Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Mon, 13 Jan 2025 10:48:45 -0500 you wrote:
> The fec_enet_update_cbd function calls page_pool_dev_alloc_pages but did
> not handle the case when it returned NULL. There was a WARN_ON(!new_page)
> but it would still proceed to use the NULL pointer and then crash.
> 
> This case does seem somewhat rare but when the system is under memory
> pressure it can happen. One case where I can duplicate this with some
> frequency is when writing over a smbd share to a SATA HDD attached to an
> imx6q.
> 
> [...]

Here is the summary with links:
  - [v2,1/1] net: fec: handle page_pool_dev_alloc_pages error
    https://git.kernel.org/netdev/net/c/001ba0902046

You are awesome, thank you!
diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 1b55047c0237..4566848e1d7c 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1591,19 +1591,22 @@  static void fec_enet_tx(struct net_device *ndev, int budget)
 		fec_enet_tx_queue(ndev, i, budget);
 }
 
-static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
+static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
 				struct bufdesc *bdp, int index)
 {
 	struct page *new_page;
 	dma_addr_t phys_addr;
 
 	new_page = page_pool_dev_alloc_pages(rxq->page_pool);
-	WARN_ON(!new_page);
-	rxq->rx_skb_info[index].page = new_page;
+	if (unlikely(!new_page))
+		return -ENOMEM;
 
+	rxq->rx_skb_info[index].page = new_page;
 	rxq->rx_skb_info[index].offset = FEC_ENET_XDP_HEADROOM;
 	phys_addr = page_pool_get_dma_addr(new_page) + FEC_ENET_XDP_HEADROOM;
 	bdp->cbd_bufaddr = cpu_to_fec32(phys_addr);
+
+	return 0;
 }
 
 static u32
@@ -1698,6 +1701,7 @@  fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 	int cpu = smp_processor_id();
 	struct xdp_buff xdp;
 	struct page *page;
+	__fec32 cbd_bufaddr;
 	u32 sub_len = 4;
 
 #if !defined(CONFIG_M5272)
@@ -1766,12 +1770,17 @@  fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 
 		index = fec_enet_get_bd_index(bdp, &rxq->bd);
 		page = rxq->rx_skb_info[index].page;
+		cbd_bufaddr = bdp->cbd_bufaddr;
+		if (fec_enet_update_cbd(rxq, bdp, index)) {
+			ndev->stats.rx_dropped++;
+			goto rx_processing_done;
+		}
+
 		dma_sync_single_for_cpu(&fep->pdev->dev,
-					fec32_to_cpu(bdp->cbd_bufaddr),
+					fec32_to_cpu(cbd_bufaddr),
 					pkt_len,
 					DMA_FROM_DEVICE);
 		prefetch(page_address(page));
-		fec_enet_update_cbd(rxq, bdp, index);
 
 		if (xdp_prog) {
 			xdp_buff_clear_frags_flag(&xdp);