diff mbox series

[V3,net] net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup

Message ID 1724920610-15546-1-git-send-email-schakrabarti@linux.microsoft.com (mailing list archive)
State Superseded
Headers show
Series [V3,net] net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup | expand

Commit Message

Souradeep Chakrabarti Aug. 29, 2024, 8:36 a.m. UTC
Currently napi_disable() gets called during rxq and txq cleanup,
even before napi is enabled and hrtimer is initialized. It causes
kernel panic.

? page_fault_oops+0x136/0x2b0
  ? page_counter_cancel+0x2e/0x80
  ? do_user_addr_fault+0x2f2/0x640
  ? refill_obj_stock+0xc4/0x110
  ? exc_page_fault+0x71/0x160
  ? asm_exc_page_fault+0x27/0x30
  ? __mmdrop+0x10/0x180
  ? __mmdrop+0xec/0x180
  ? hrtimer_active+0xd/0x50
  hrtimer_try_to_cancel+0x2c/0xf0
  hrtimer_cancel+0x15/0x30
  napi_disable+0x65/0x90
  mana_destroy_rxq+0x4c/0x2f0
  mana_create_rxq.isra.0+0x56c/0x6d0
  ? mana_uncfg_vport+0x50/0x50
  mana_alloc_queues+0x21b/0x320
  ? skb_dequeue+0x5f/0x80

Cc: stable@vger.kernel.org
Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
---
V3 -> V2:
Instead of using napi internal attribute, using an atomic
attribute to verify napi is initialized for a particular txq / rxq.

V2 -> V1:
Addressed the comment on cleaning up napi for the queues,
where queue creation was successful.
---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 30 ++++++++++++-------
 include/net/mana/mana.h                       |  4 +++
 2 files changed, 24 insertions(+), 10 deletions(-)

Comments

Haiyang Zhang Aug. 29, 2024, 3:39 p.m. UTC | #1
> -----Original Message-----
> From: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
> Sent: Thursday, August 29, 2024 4:37 AM
> To: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; wei.liu@kernel.org; Dexuan Cui
> <decui@microsoft.com>; davem@davemloft.net; Long Li
> <longli@microsoft.com>; ssengar@linux.microsoft.com; linux-
> hyperv@vger.kernel.org; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org; linux-rdma@vger.kernel.org
> Cc: Souradeep Chakrabarti <schakrabarti@microsoft.com>; Souradeep
> Chakrabarti <schakrabarti@linux.microsoft.com>; stable@vger.kernel.org
> Subject: [PATCH V3 net] net: mana: Fix error handling in
> mana_create_txq/rxq's NAPI cleanup
> 
> Currently napi_disable() gets called during rxq and txq cleanup,
> even before napi is enabled and hrtimer is initialized. It causes
> kernel panic.
> 
> ? page_fault_oops+0x136/0x2b0
>   ? page_counter_cancel+0x2e/0x80
>   ? do_user_addr_fault+0x2f2/0x640
>   ? refill_obj_stock+0xc4/0x110
>   ? exc_page_fault+0x71/0x160
>   ? asm_exc_page_fault+0x27/0x30
>   ? __mmdrop+0x10/0x180
>   ? __mmdrop+0xec/0x180
>   ? hrtimer_active+0xd/0x50
>   hrtimer_try_to_cancel+0x2c/0xf0
>   hrtimer_cancel+0x15/0x30
>   napi_disable+0x65/0x90
>   mana_destroy_rxq+0x4c/0x2f0
>   mana_create_rxq.isra.0+0x56c/0x6d0
>   ? mana_uncfg_vport+0x50/0x50
>   mana_alloc_queues+0x21b/0x320
>   ? skb_dequeue+0x5f/0x80
> 
> Cc: stable@vger.kernel.org
> Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
> Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
> Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
> Reviewed-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
> ---
> V3 -> V2:
> Instead of using napi internal attribute, using an atomic
> attribute to verify napi is initialized for a particular txq / rxq.
> 
> V2 -> V1:
> Addressed the comment on cleaning up napi for the queues,
> where queue creation was successful.
> ---
>  drivers/net/ethernet/microsoft/mana/mana_en.c | 30 ++++++++++++-------
>  include/net/mana/mana.h                       |  4 +++
>  2 files changed, 24 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c
> b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 39f56973746d..bd303c89cfa6 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1872,10 +1872,12 @@ static void mana_destroy_txq(struct
> mana_port_context *apc)
> 
>  	for (i = 0; i < apc->num_queues; i++) {
>  		napi = &apc->tx_qp[i].tx_cq.napi;
> -		napi_synchronize(napi);
> -		napi_disable(napi);
> -		netif_napi_del(napi);
> -
> +		if (atomic_read(&apc->tx_qp[i].txq.napi_initialized)) {
> +			napi_synchronize(napi);
> +			napi_disable(napi);
> +			netif_napi_del(napi);
> +			atomic_set(&apc->tx_qp[i].txq.napi_initialized, 0);
> +		}
>  		mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
> 
>  		mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
> @@ -1931,6 +1933,7 @@ static int mana_create_txq(struct mana_port_context
> *apc,
>  		txq->ndev = net;
>  		txq->net_txq = netdev_get_tx_queue(net, i);
>  		txq->vp_offset = apc->tx_vp_offset;
> +		atomic_set(&txq->napi_initialized, 0);
>  		skb_queue_head_init(&txq->pending_skbs);
> 
>  		memset(&spec, 0, sizeof(spec));
> @@ -1997,6 +2000,7 @@ static int mana_create_txq(struct mana_port_context
> *apc,
> 
>  		netif_napi_add_tx(net, &cq->napi, mana_poll);
>  		napi_enable(&cq->napi);
> +		atomic_set(&txq->napi_initialized, 1);
> 
>  		mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
>  	}
> @@ -2023,14 +2027,18 @@ static void mana_destroy_rxq(struct
> mana_port_context *apc,
> 
>  	napi = &rxq->rx_cq.napi;
> 
> -	if (validate_state)
> -		napi_synchronize(napi);
> +	if (atomic_read(&rxq->napi_initialized)) {
> 
> -	napi_disable(napi);
> +		if (validate_state)
> +			napi_synchronize(napi);
> 
> -	xdp_rxq_info_unreg(&rxq->xdp_rxq);
> +		napi_disable(napi);
> 
> -	netif_napi_del(napi);
> +		netif_napi_del(napi);
> +		atomic_set(&rxq->napi_initialized, 0);
> +	}
> +
> +	xdp_rxq_info_unreg(&rxq->xdp_rxq);
> 
>  	mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
> 
> @@ -2199,6 +2207,7 @@ static struct mana_rxq *mana_create_rxq(struct
> mana_port_context *apc,
>  	rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
>  	rxq->rxq_idx = rxq_idx;
>  	rxq->rxobj = INVALID_MANA_HANDLE;
> +	atomic_set(&rxq->napi_initialized, 0);
> 
>  	mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
>  			   &rxq->headroom);
> @@ -2286,6 +2295,8 @@ static struct mana_rxq *mana_create_rxq(struct
> mana_port_context *apc,
> 
>  	napi_enable(&cq->napi);
> 
> +	atomic_set(&rxq->napi_initialized, 1);
> +
>  	mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
>  out:
>  	if (!err)
> @@ -2336,7 +2347,6 @@ static void mana_destroy_vport(struct
> mana_port_context *apc)
>  		rxq = apc->rxqs[rxq_idx];
>  		if (!rxq)
>  			continue;
> -
>  		mana_destroy_rxq(apc, rxq, true);
>  		apc->rxqs[rxq_idx] = NULL;
>  	}
> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
> index 7caa334f4888..be75abd63dc8 100644
> --- a/include/net/mana/mana.h
> +++ b/include/net/mana/mana.h
> @@ -98,6 +98,8 @@ struct mana_txq {
> 
>  	atomic_t pending_sends;
> 
> +	atomic_t napi_initialized;
> +
>  	struct mana_stats_tx stats;
>  };
> 
> @@ -335,6 +337,8 @@ struct mana_rxq {
>  	bool xdp_flush;
>  	int xdp_rc; /* XDP redirect return code */
> 
> +	atomic_t napi_initialized;
> +
>  	struct page_pool *page_pool;
> 
>  	/* MUST BE THE LAST MEMBER:
> --
> 2.34.1

I agree with the idea that adds napi_initialized for the
state.

But the Rx/Tx queue creation/removal are either on the same
thread, or protected by rtnl_lock, correct? So napi_initialized
should be a regular variable, not atomic_t.

Thanks,
- Haiyang
Haiyang Zhang Aug. 29, 2024, 4:55 p.m. UTC | #2
> -----Original Message-----
> From: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
> Sent: Thursday, August 29, 2024 4:37 AM
> To: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; wei.liu@kernel.org; Dexuan Cui
> <decui@microsoft.com>; davem@davemloft.net; Long Li
> <longli@microsoft.com>; ssengar@linux.microsoft.com; linux-
> hyperv@vger.kernel.org; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org; linux-rdma@vger.kernel.org
> Cc: Souradeep Chakrabarti <schakrabarti@microsoft.com>; Souradeep
> Chakrabarti <schakrabarti@linux.microsoft.com>; stable@vger.kernel.org
> Subject: [PATCH V3 net] net: mana: Fix error handling in
> mana_create_txq/rxq's NAPI cleanup
> 
> Currently napi_disable() gets called during rxq and txq cleanup,
> even before napi is enabled and hrtimer is initialized. It causes
> kernel panic.
> 
> ? page_fault_oops+0x136/0x2b0
>   ? page_counter_cancel+0x2e/0x80
>   ? do_user_addr_fault+0x2f2/0x640
>   ? refill_obj_stock+0xc4/0x110
>   ? exc_page_fault+0x71/0x160
>   ? asm_exc_page_fault+0x27/0x30
>   ? __mmdrop+0x10/0x180
>   ? __mmdrop+0xec/0x180
>   ? hrtimer_active+0xd/0x50
>   hrtimer_try_to_cancel+0x2c/0xf0
>   hrtimer_cancel+0x15/0x30
>   napi_disable+0x65/0x90
>   mana_destroy_rxq+0x4c/0x2f0
>   mana_create_rxq.isra.0+0x56c/0x6d0
>   ? mana_uncfg_vport+0x50/0x50
>   mana_alloc_queues+0x21b/0x320
>   ? skb_dequeue+0x5f/0x80
> 
> Cc: stable@vger.kernel.org
> Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
> Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
> Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
> Reviewed-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
> ---
> V3 -> V2:
> Instead of using napi internal attribute, using an atomic
> attribute to verify napi is initialized for a particular txq / rxq.
> 
> V2 -> V1:
> Addressed the comment on cleaning up napi for the queues,
> where queue creation was successful.
> ---
>  drivers/net/ethernet/microsoft/mana/mana_en.c | 30 ++++++++++++-------
>  include/net/mana/mana.h                       |  4 +++
>  2 files changed, 24 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c
> b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 39f56973746d..bd303c89cfa6 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1872,10 +1872,12 @@ static void mana_destroy_txq(struct
> mana_port_context *apc)
> 
>  	for (i = 0; i < apc->num_queues; i++) {
>  		napi = &apc->tx_qp[i].tx_cq.napi;
> -		napi_synchronize(napi);
> -		napi_disable(napi);
> -		netif_napi_del(napi);
> -
> +		if (atomic_read(&apc->tx_qp[i].txq.napi_initialized)) {
> +			napi_synchronize(napi);
> +			napi_disable(napi);
> +			netif_napi_del(napi);
> +			atomic_set(&apc->tx_qp[i].txq.napi_initialized, 0);
> +		}
>  		mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
> 
>  		mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
> @@ -1931,6 +1933,7 @@ static int mana_create_txq(struct mana_port_context
> *apc,
>  		txq->ndev = net;
>  		txq->net_txq = netdev_get_tx_queue(net, i);
>  		txq->vp_offset = apc->tx_vp_offset;
> +		atomic_set(&txq->napi_initialized, 0);
>  		skb_queue_head_init(&txq->pending_skbs);
> 
>  		memset(&spec, 0, sizeof(spec));
> @@ -1997,6 +2000,7 @@ static int mana_create_txq(struct mana_port_context
> *apc,
> 
>  		netif_napi_add_tx(net, &cq->napi, mana_poll);
>  		napi_enable(&cq->napi);
> +		atomic_set(&txq->napi_initialized, 1);
> 
>  		mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
>  	}
> @@ -2023,14 +2027,18 @@ static void mana_destroy_rxq(struct
> mana_port_context *apc,
> 
>  	napi = &rxq->rx_cq.napi;
> 
> -	if (validate_state)
> -		napi_synchronize(napi);
> +	if (atomic_read(&rxq->napi_initialized)) {
> 
> -	napi_disable(napi);
> +		if (validate_state)
> +			napi_synchronize(napi);
> 
> -	xdp_rxq_info_unreg(&rxq->xdp_rxq);
> +		napi_disable(napi);
> 
> -	netif_napi_del(napi);
> +		netif_napi_del(napi);
> +		atomic_set(&rxq->napi_initialized, 0);
> +	}
> +
> +	xdp_rxq_info_unreg(&rxq->xdp_rxq);
> 
>  	mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
> 
> @@ -2199,6 +2207,7 @@ static struct mana_rxq *mana_create_rxq(struct
> mana_port_context *apc,
>  	rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
>  	rxq->rxq_idx = rxq_idx;
>  	rxq->rxobj = INVALID_MANA_HANDLE;
> +	atomic_set(&rxq->napi_initialized, 0);
> 
>  	mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
>  			   &rxq->headroom);
> @@ -2286,6 +2295,8 @@ static struct mana_rxq *mana_create_rxq(struct
> mana_port_context *apc,
> 
>  	napi_enable(&cq->napi);
> 
> +	atomic_set(&rxq->napi_initialized, 1);
> +
>  	mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
>  out:
>  	if (!err)
> @@ -2336,7 +2347,6 @@ static void mana_destroy_vport(struct
> mana_port_context *apc)
>  		rxq = apc->rxqs[rxq_idx];
>  		if (!rxq)
>  			continue;
> -
>  		mana_destroy_rxq(apc, rxq, true);

Also, this line removal is not necessary.
Shradha Gupta Aug. 30, 2024, 6:13 a.m. UTC | #3
On Thu, Aug 29, 2024 at 01:36:50AM -0700, Souradeep Chakrabarti wrote:
> Currently napi_disable() gets called during rxq and txq cleanup,
> even before napi is enabled and hrtimer is initialized. It causes
> kernel panic.
> 
> ? page_fault_oops+0x136/0x2b0
>   ? page_counter_cancel+0x2e/0x80
>   ? do_user_addr_fault+0x2f2/0x640
>   ? refill_obj_stock+0xc4/0x110
>   ? exc_page_fault+0x71/0x160
>   ? asm_exc_page_fault+0x27/0x30
>   ? __mmdrop+0x10/0x180
>   ? __mmdrop+0xec/0x180
>   ? hrtimer_active+0xd/0x50
>   hrtimer_try_to_cancel+0x2c/0xf0
>   hrtimer_cancel+0x15/0x30
>   napi_disable+0x65/0x90
>   mana_destroy_rxq+0x4c/0x2f0
>   mana_create_rxq.isra.0+0x56c/0x6d0
>   ? mana_uncfg_vport+0x50/0x50
>   mana_alloc_queues+0x21b/0x320
>   ? skb_dequeue+0x5f/0x80
> 
> Cc: stable@vger.kernel.org
> Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
> Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
> Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
> Reviewed-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
> ---
> V3 -> V2:
> Instead of using napi internal attribute, using an atomic
> attribute to verify napi is initialized for a particular txq / rxq.
> 
> V2 -> V1:
> Addressed the comment on cleaning up napi for the queues,
> where queue creation was successful.
> ---
>  drivers/net/ethernet/microsoft/mana/mana_en.c | 30 ++++++++++++-------
>  include/net/mana/mana.h                       |  4 +++
>  2 files changed, 24 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 39f56973746d..bd303c89cfa6 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1872,10 +1872,12 @@ static void mana_destroy_txq(struct mana_port_context *apc)
>  
>  	for (i = 0; i < apc->num_queues; i++) {
>  		napi = &apc->tx_qp[i].tx_cq.napi;
> -		napi_synchronize(napi);
> -		napi_disable(napi);
> -		netif_napi_del(napi);
> -
> +		if (atomic_read(&apc->tx_qp[i].txq.napi_initialized)) {
> +			napi_synchronize(napi);
> +			napi_disable(napi);
> +			netif_napi_del(napi);
> +			atomic_set(&apc->tx_qp[i].txq.napi_initialized, 0);
> +		}
>  		mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
>  
>  		mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
> @@ -1931,6 +1933,7 @@ static int mana_create_txq(struct mana_port_context *apc,
>  		txq->ndev = net;
>  		txq->net_txq = netdev_get_tx_queue(net, i);
>  		txq->vp_offset = apc->tx_vp_offset;
> +		atomic_set(&txq->napi_initialized, 0);
>  		skb_queue_head_init(&txq->pending_skbs);
>  
>  		memset(&spec, 0, sizeof(spec));
> @@ -1997,6 +2000,7 @@ static int mana_create_txq(struct mana_port_context *apc,
>  
>  		netif_napi_add_tx(net, &cq->napi, mana_poll);
>  		napi_enable(&cq->napi);
> +		atomic_set(&txq->napi_initialized, 1);
>  
>  		mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
>  	}
> @@ -2023,14 +2027,18 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
>  
>  	napi = &rxq->rx_cq.napi;
>  
> -	if (validate_state)
> -		napi_synchronize(napi);
> +	if (atomic_read(&rxq->napi_initialized)) {
>  
> -	napi_disable(napi);
> +		if (validate_state)
> +			napi_synchronize(napi);

Is this validate_state flag still needed? The new napi_initialized
variable will make sure the napi_synchronize() is called only for rxqs
that have napi_enabled.

Regards,
Shradha.

>  
> -	xdp_rxq_info_unreg(&rxq->xdp_rxq);
> +		napi_disable(napi);
>  
> -	netif_napi_del(napi);
> +		netif_napi_del(napi);
> +		atomic_set(&rxq->napi_initialized, 0);
> +	}
> +
> +	xdp_rxq_info_unreg(&rxq->xdp_rxq);
>  
>  	mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
>  
> @@ -2199,6 +2207,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
>  	rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
>  	rxq->rxq_idx = rxq_idx;
>  	rxq->rxobj = INVALID_MANA_HANDLE;
> +	atomic_set(&rxq->napi_initialized, 0);
>  
>  	mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
>  			   &rxq->headroom);
> @@ -2286,6 +2295,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
>  
>  	napi_enable(&cq->napi);
>  
> +	atomic_set(&rxq->napi_initialized, 1);
> +
>  	mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
>  out:
>  	if (!err)
> @@ -2336,7 +2347,6 @@ static void mana_destroy_vport(struct mana_port_context *apc)
>  		rxq = apc->rxqs[rxq_idx];
>  		if (!rxq)
>  			continue;
> -
>  		mana_destroy_rxq(apc, rxq, true);
>  		apc->rxqs[rxq_idx] = NULL;
>  	}
> diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
> index 7caa334f4888..be75abd63dc8 100644
> --- a/include/net/mana/mana.h
> +++ b/include/net/mana/mana.h
> @@ -98,6 +98,8 @@ struct mana_txq {
>  
>  	atomic_t pending_sends;
>  
> +	atomic_t napi_initialized;
> +
>  	struct mana_stats_tx stats;
>  };
>  
> @@ -335,6 +337,8 @@ struct mana_rxq {
>  	bool xdp_flush;
>  	int xdp_rc; /* XDP redirect return code */
>  
> +	atomic_t napi_initialized;
> +
>  	struct page_pool *page_pool;
>  
>  	/* MUST BE THE LAST MEMBER:
> -- 
> 2.34.1
> 
>
Haiyang Zhang Aug. 30, 2024, 5:36 p.m. UTC | #4
> -----Original Message-----
> From: Shradha Gupta <shradhagupta@linux.microsoft.com>
> Sent: Friday, August 30, 2024 2:14 AM
> To: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
> Cc: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; wei.liu@kernel.org; Dexuan Cui
> <decui@microsoft.com>; davem@davemloft.net; Long Li
> <longli@microsoft.com>; ssengar@linux.microsoft.com; linux-
> hyperv@vger.kernel.org; netdev@vger.kernel.org; linux-
> kernel@vger.kernel.org; linux-rdma@vger.kernel.org; Souradeep Chakrabarti
> <schakrabarti@microsoft.com>; stable@vger.kernel.org
> Subject: Re: [PATCH V3 net] net: mana: Fix error handling in
> mana_create_txq/rxq's NAPI cleanup
> 
> On Thu, Aug 29, 2024 at 01:36:50AM -0700, Souradeep Chakrabarti wrote:
> > Currently napi_disable() gets called during rxq and txq cleanup,
> > even before napi is enabled and hrtimer is initialized. It causes
> > kernel panic.
> >
> > ? page_fault_oops+0x136/0x2b0
> >   ? page_counter_cancel+0x2e/0x80
> >   ? do_user_addr_fault+0x2f2/0x640
> >   ? refill_obj_stock+0xc4/0x110
> >   ? exc_page_fault+0x71/0x160
> >   ? asm_exc_page_fault+0x27/0x30
> >   ? __mmdrop+0x10/0x180
> >   ? __mmdrop+0xec/0x180
> >   ? hrtimer_active+0xd/0x50
> >   hrtimer_try_to_cancel+0x2c/0xf0
> >   hrtimer_cancel+0x15/0x30
> >   napi_disable+0x65/0x90
> >   mana_destroy_rxq+0x4c/0x2f0
> >   mana_create_rxq.isra.0+0x56c/0x6d0
> >   ? mana_uncfg_vport+0x50/0x50
> >   mana_alloc_queues+0x21b/0x320
> >   ? skb_dequeue+0x5f/0x80
> >
> > Cc: stable@vger.kernel.org
> > Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ")
> > Signed-off-by: Souradeep Chakrabarti <schakrabarti@linux.microsoft.com>
> > Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
> > Reviewed-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
> > ---
> > V3 -> V2:
> > Instead of using napi internal attribute, using an atomic
> > attribute to verify napi is initialized for a particular txq / rxq.
> >
> > V2 -> V1:
> > Addressed the comment on cleaning up napi for the queues,
> > where queue creation was successful.
> > ---
> >  drivers/net/ethernet/microsoft/mana/mana_en.c | 30 ++++++++++++-------
> >  include/net/mana/mana.h                       |  4 +++
> >  2 files changed, 24 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c
> b/drivers/net/ethernet/microsoft/mana/mana_en.c
> > index 39f56973746d..bd303c89cfa6 100644
> > --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> > +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> > @@ -1872,10 +1872,12 @@ static void mana_destroy_txq(struct
> mana_port_context *apc)
> >
> >  	for (i = 0; i < apc->num_queues; i++) {
> >  		napi = &apc->tx_qp[i].tx_cq.napi;
> > -		napi_synchronize(napi);
> > -		napi_disable(napi);
> > -		netif_napi_del(napi);
> > -
> > +		if (atomic_read(&apc->tx_qp[i].txq.napi_initialized)) {
> > +			napi_synchronize(napi);
> > +			napi_disable(napi);
> > +			netif_napi_del(napi);
> > +			atomic_set(&apc->tx_qp[i].txq.napi_initialized, 0);
> > +		}
> >  		mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
> >
> >  		mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
> > @@ -1931,6 +1933,7 @@ static int mana_create_txq(struct
> mana_port_context *apc,
> >  		txq->ndev = net;
> >  		txq->net_txq = netdev_get_tx_queue(net, i);
> >  		txq->vp_offset = apc->tx_vp_offset;
> > +		atomic_set(&txq->napi_initialized, 0);
> >  		skb_queue_head_init(&txq->pending_skbs);
> >
> >  		memset(&spec, 0, sizeof(spec));
> > @@ -1997,6 +2000,7 @@ static int mana_create_txq(struct
> mana_port_context *apc,
> >
> >  		netif_napi_add_tx(net, &cq->napi, mana_poll);
> >  		napi_enable(&cq->napi);
> > +		atomic_set(&txq->napi_initialized, 1);
> >
> >  		mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
> >  	}
> > @@ -2023,14 +2027,18 @@ static void mana_destroy_rxq(struct
> mana_port_context *apc,
> >
> >  	napi = &rxq->rx_cq.napi;
> >
> > -	if (validate_state)
> > -		napi_synchronize(napi);
> > +	if (atomic_read(&rxq->napi_initialized)) {
> >
> > -	napi_disable(napi);
> > +		if (validate_state)
> > +			napi_synchronize(napi);
> 
> Is this validate_state flag still needed? The new napi_initialized
> variable will make sure the napi_synchronize() is called only for rxqs
> that have napi_enabled.
> 

Yes, the validate_state flag for mana_destroy_rxq() can be combined/renamed.

@Souradeep Chakrabarti, As Jakub suggested in V2, you may:
Rename the parameter validate_state to napi_initialized. The two callers
below can still call like before.
1 mana_en.c mana_create_rxq    2296 mana_destroy_rxq(apc, rxq, false);
2 mana_en.c mana_destroy_vport 2340 mana_destroy_rxq(apc, rxq, true);

So you don't need to add napi_initialized struct mana_rxq.

In mana_destroy_rxq() you can have code like this
if (napi_initialized) {
        napi = &rxq->rx_cq.napi;

        napi_synchronize(napi);

        napi_disable(napi);

        xdp_rxq_info_unreg(&rxq->xdp_rxq);

        netif_napi_del(napi);
}

For mana_destroy_txq, it looks more complex, your adding napi_initialized
to struct mana_txq is good.

Thanks,
- Haiyang
diff mbox series

Patch

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 39f56973746d..bd303c89cfa6 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1872,10 +1872,12 @@  static void mana_destroy_txq(struct mana_port_context *apc)
 
 	for (i = 0; i < apc->num_queues; i++) {
 		napi = &apc->tx_qp[i].tx_cq.napi;
-		napi_synchronize(napi);
-		napi_disable(napi);
-		netif_napi_del(napi);
-
+		if (atomic_read(&apc->tx_qp[i].txq.napi_initialized)) {
+			napi_synchronize(napi);
+			napi_disable(napi);
+			netif_napi_del(napi);
+			atomic_set(&apc->tx_qp[i].txq.napi_initialized, 0);
+		}
 		mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
 
 		mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
@@ -1931,6 +1933,7 @@  static int mana_create_txq(struct mana_port_context *apc,
 		txq->ndev = net;
 		txq->net_txq = netdev_get_tx_queue(net, i);
 		txq->vp_offset = apc->tx_vp_offset;
+		atomic_set(&txq->napi_initialized, 0);
 		skb_queue_head_init(&txq->pending_skbs);
 
 		memset(&spec, 0, sizeof(spec));
@@ -1997,6 +2000,7 @@  static int mana_create_txq(struct mana_port_context *apc,
 
 		netif_napi_add_tx(net, &cq->napi, mana_poll);
 		napi_enable(&cq->napi);
+		atomic_set(&txq->napi_initialized, 1);
 
 		mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
 	}
@@ -2023,14 +2027,18 @@  static void mana_destroy_rxq(struct mana_port_context *apc,
 
 	napi = &rxq->rx_cq.napi;
 
-	if (validate_state)
-		napi_synchronize(napi);
+	if (atomic_read(&rxq->napi_initialized)) {
 
-	napi_disable(napi);
+		if (validate_state)
+			napi_synchronize(napi);
 
-	xdp_rxq_info_unreg(&rxq->xdp_rxq);
+		napi_disable(napi);
 
-	netif_napi_del(napi);
+		netif_napi_del(napi);
+		atomic_set(&rxq->napi_initialized, 0);
+	}
+
+	xdp_rxq_info_unreg(&rxq->xdp_rxq);
 
 	mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
 
@@ -2199,6 +2207,7 @@  static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 	rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
 	rxq->rxq_idx = rxq_idx;
 	rxq->rxobj = INVALID_MANA_HANDLE;
+	atomic_set(&rxq->napi_initialized, 0);
 
 	mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size,
 			   &rxq->headroom);
@@ -2286,6 +2295,8 @@  static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 
 	napi_enable(&cq->napi);
 
+	atomic_set(&rxq->napi_initialized, 1);
+
 	mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
 out:
 	if (!err)
@@ -2336,7 +2347,6 @@  static void mana_destroy_vport(struct mana_port_context *apc)
 		rxq = apc->rxqs[rxq_idx];
 		if (!rxq)
 			continue;
-
 		mana_destroy_rxq(apc, rxq, true);
 		apc->rxqs[rxq_idx] = NULL;
 	}
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 7caa334f4888..be75abd63dc8 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -98,6 +98,8 @@  struct mana_txq {
 
 	atomic_t pending_sends;
 
+	atomic_t napi_initialized;
+
 	struct mana_stats_tx stats;
 };
 
@@ -335,6 +337,8 @@  struct mana_rxq {
 	bool xdp_flush;
 	int xdp_rc; /* XDP redirect return code */
 
+	atomic_t napi_initialized;
+
 	struct page_pool *page_pool;
 
 	/* MUST BE THE LAST MEMBER: