diff mbox series

[net] net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup

Message ID 1724149347-14430-1-git-send-email-schakrabarti@linux.microsoft.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net] net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 7 this patch: 7
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: shradhagupta@linux.microsoft.com
netdev/build_clang success Errors and warnings before: 16 this patch: 16
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes fail Problems with Fixes tag: 1
netdev/build_allmodconfig_warn success Errors and warnings before: 16 this patch: 16
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 83 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-08-20--15-00 (tests: 712)

Commit Message

Souradeep Chakrabarti Aug. 20, 2024, 10:22 a.m. UTC
Currently napi_disable() gets called during rxq and txq cleanup,
even before napi is enabled and hrtimer is initialized. It causes
kernel panic.

? page_fault_oops+0x136/0x2b0
  ? page_counter_cancel+0x2e/0x80
  ? do_user_addr_fault+0x2f2/0x640
  ? refill_obj_stock+0xc4/0x110
  ? exc_page_fault+0x71/0x160
  ? asm_exc_page_fault+0x27/0x30
  ? __mmdrop+0x10/0x180
  ? __mmdrop+0xec/0x180
  ? hrtimer_active+0xd/0x50
  hrtimer_try_to_cancel+0x2c/0xf0
  hrtimer_cancel+0x15/0x30
  napi_disable+0x65/0x90
  mana_destroy_rxq+0x4c/0x2f0
  mana_create_rxq.isra.0+0x56c/0x6d0
  ? mana_uncfg_vport+0x50/0x50
  mana_alloc_queues+0x21b/0x320
  ? skb_dequeue+0x5f/0x80

Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")

Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 41 +++++++++++++------
 1 file changed, 28 insertions(+), 13 deletions(-)

Comments

Shradha Gupta Aug. 20, 2024, 2:31 p.m. UTC | #1
On Tue, Aug 20, 2024 at 03:22:27AM -0700, Souradeep Chakrabarti wrote:
> Currently napi_disable() gets called during rxq and txq cleanup,
> even before napi is enabled and hrtimer is initialized. It causes
> kernel panic.
> 
> ? page_fault_oops+0x136/0x2b0
>   ? page_counter_cancel+0x2e/0x80
>   ? do_user_addr_fault+0x2f2/0x640
>   ? refill_obj_stock+0xc4/0x110
>   ? exc_page_fault+0x71/0x160
>   ? asm_exc_page_fault+0x27/0x30
>   ? __mmdrop+0x10/0x180
>   ? __mmdrop+0xec/0x180
>   ? hrtimer_active+0xd/0x50
>   hrtimer_try_to_cancel+0x2c/0xf0
>   hrtimer_cancel+0x15/0x30
>   napi_disable+0x65/0x90
>   mana_destroy_rxq+0x4c/0x2f0
>   mana_create_rxq.isra.0+0x56c/0x6d0
>   ? mana_uncfg_vport+0x50/0x50
>   mana_alloc_queues+0x21b/0x320
>   ? skb_dequeue+0x5f/0x80
> 
> Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
> 
> Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
> ---
>  drivers/net/ethernet/microsoft/mana/mana_en.c | 41 +++++++++++++------
>  1 file changed, 28 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 39f56973746d..882b05e087b9 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1862,7 +1862,7 @@ static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq)
>  	mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq);
>  }
>  
> -static void mana_destroy_txq(struct mana_port_context *apc)
> +static void mana_cleanup_napi_txq(struct mana_port_context *apc)
>  {
>  	struct napi_struct *napi;
>  	int i;
> @@ -1875,7 +1875,17 @@ static void mana_destroy_txq(struct mana_port_context *apc)
>  		napi_synchronize(napi);
>  		napi_disable(napi);
>  		netif_napi_del(napi);
> +	}
> +}
> +
> +static void mana_destroy_txq(struct mana_port_context *apc)
> +{
> +	int i;
> +
> +	if (!apc->tx_qp)
> +		return;
>  
> +	for (i = 0; i < apc->num_queues; i++) {
>  		mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
>  
>  		mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
> @@ -2007,6 +2017,21 @@ static int mana_create_txq(struct mana_port_context *apc,
>  	return err;
>  }
I think the mana_cleanup_napi_txq() call should also be added in the out
path of mana_create_txq(). Consider this, the napi enable for first few
tx queue succeeds but if queue creation for any further SQ fails, we
don't cleanup the napi's for previously successful ones.
>  
> +static void mana_cleanup_napi_rxq(struct mana_port_context *apc,
> +				  struct mana_rxq *rxq, bool validate_state)
> +{
> +	struct napi_struct *napi;
> +
> +	if (!rxq)
> +		return;
> +
> +	napi = &rxq->rx_cq.napi;
> +	if (validate_state)
> +		napi_synchronize(napi);
> +	napi_disable(napi);
> +	netif_napi_del(napi);
> +}
> +
>  static void mana_destroy_rxq(struct mana_port_context *apc,
>  			     struct mana_rxq *rxq, bool validate_state)
>  
> @@ -2014,24 +2039,14 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
>  	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
>  	struct mana_recv_buf_oob *rx_oob;
>  	struct device *dev = gc->dev;
> -	struct napi_struct *napi;
>  	struct page *page;
>  	int i;
>  
>  	if (!rxq)
>  		return;
>  
> -	napi = &rxq->rx_cq.napi;
> -
> -	if (validate_state)
> -		napi_synchronize(napi);
> -
> -	napi_disable(napi);
> -
>  	xdp_rxq_info_unreg(&rxq->xdp_rxq);
>  
> -	netif_napi_del(napi);
> -
>  	mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
>  
>  	mana_deinit_cq(apc, &rxq->rx_cq);
> @@ -2336,11 +2351,11 @@ static void mana_destroy_vport(struct mana_port_context *apc)
>  		rxq = apc->rxqs[rxq_idx];
>  		if (!rxq)
>  			continue;
> -
> +		mana_cleanup_napi_rxq(apc, rxq, true);
>  		mana_destroy_rxq(apc, rxq, true);
>  		apc->rxqs[rxq_idx] = NULL;
>  	}
> -
> +	mana_cleanup_napi_txq(apc);
>  	mana_destroy_txq(apc);
>  	mana_uncfg_vport(apc);
>  
> -- 
> 2.34.1
> 
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 39f56973746d..882b05e087b9 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1862,7 +1862,7 @@  static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq)
 	mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq);
 }
 
-static void mana_destroy_txq(struct mana_port_context *apc)
+static void mana_cleanup_napi_txq(struct mana_port_context *apc)
 {
 	struct napi_struct *napi;
 	int i;
@@ -1875,7 +1875,17 @@  static void mana_destroy_txq(struct mana_port_context *apc)
 		napi_synchronize(napi);
 		napi_disable(napi);
 		netif_napi_del(napi);
+	}
+}
+
+static void mana_destroy_txq(struct mana_port_context *apc)
+{
+	int i;
+
+	if (!apc->tx_qp)
+		return;
 
+	for (i = 0; i < apc->num_queues; i++) {
 		mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
 
 		mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
@@ -2007,6 +2017,21 @@  static int mana_create_txq(struct mana_port_context *apc,
 	return err;
 }
 
+static void mana_cleanup_napi_rxq(struct mana_port_context *apc,
+				  struct mana_rxq *rxq, bool validate_state)
+{
+	struct napi_struct *napi;
+
+	if (!rxq)
+		return;
+
+	napi = &rxq->rx_cq.napi;
+	if (validate_state)
+		napi_synchronize(napi);
+	napi_disable(napi);
+	netif_napi_del(napi);
+}
+
 static void mana_destroy_rxq(struct mana_port_context *apc,
 			     struct mana_rxq *rxq, bool validate_state)
 
@@ -2014,24 +2039,14 @@  static void mana_destroy_rxq(struct mana_port_context *apc,
 	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
 	struct mana_recv_buf_oob *rx_oob;
 	struct device *dev = gc->dev;
-	struct napi_struct *napi;
 	struct page *page;
 	int i;
 
 	if (!rxq)
 		return;
 
-	napi = &rxq->rx_cq.napi;
-
-	if (validate_state)
-		napi_synchronize(napi);
-
-	napi_disable(napi);
-
 	xdp_rxq_info_unreg(&rxq->xdp_rxq);
 
-	netif_napi_del(napi);
-
 	mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
 
 	mana_deinit_cq(apc, &rxq->rx_cq);
@@ -2336,11 +2351,11 @@  static void mana_destroy_vport(struct mana_port_context *apc)
 		rxq = apc->rxqs[rxq_idx];
 		if (!rxq)
 			continue;
-
+		mana_cleanup_napi_rxq(apc, rxq, true);
 		mana_destroy_rxq(apc, rxq, true);
 		apc->rxqs[rxq_idx] = NULL;
 	}
-
+	mana_cleanup_napi_txq(apc);
 	mana_destroy_txq(apc);
 	mana_uncfg_vport(apc);