diff mbox series

[net] net: mana: Fix race of mana_hwc_post_rx_wqe and new hwc response

Message ID 1724272949-2044-1-git-send-email-haiyangz@microsoft.com (mailing list archive)
State Not Applicable
Headers show
Series [net] net: mana: Fix race of mana_hwc_post_rx_wqe and new hwc response | expand

Commit Message

Haiyang Zhang Aug. 21, 2024, 8:42 p.m. UTC
The mana_hwc_rx_event_handler() / mana_hwc_handle_resp() calls
complete(&ctx->comp_event) before posting the wqe back. It's
possible that other callers, like mana_create_txq(), start the
next round of mana_hwc_send_request() before the posting of wqe.
And if the HW is fast enough to respond, it can hit no_wqe error
on the HW channel, then the response message is lost. The mana
driver may fail to create queues and open, because of waiting for
the HW response and timed out.
Sample dmesg:
[  528.610840] mana 39d4:00:02.0: HWC: Request timed out!
[  528.614452] mana 39d4:00:02.0: Failed to send mana message: -110, 0x0
[  528.618326] mana 39d4:00:02.0 enP14804s2: Failed to create WQ object: -110

To fix it, move posting of rx wqe before complete(&ctx->comp_event).

Cc: stable@vger.kernel.org
Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)")
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 .../net/ethernet/microsoft/mana/hw_channel.c  | 62 ++++++++++---------
 1 file changed, 34 insertions(+), 28 deletions(-)

Comments

Long Li Aug. 22, 2024, 2:21 a.m. UTC | #1
> Subject: [PATCH net] net: mana: Fix race of mana_hwc_post_rx_wqe and new
> hwc response
> 
> The mana_hwc_rx_event_handler() / mana_hwc_handle_resp() calls
> complete(&ctx->comp_event) before posting the wqe back. It's possible that
> other callers, like mana_create_txq(), start the next round of
> mana_hwc_send_request() before the posting of wqe.
> And if the HW is fast enough to respond, it can hit no_wqe error on the HW
> channel, then the response message is lost. The mana driver may fail to create
> queues and open, because of waiting for the HW response and timed out.
> Sample dmesg:
> [  528.610840] mana 39d4:00:02.0: HWC: Request timed out!
> [  528.614452] mana 39d4:00:02.0: Failed to send mana message: -110, 0x0
> [  528.618326] mana 39d4:00:02.0 enP14804s2: Failed to create WQ object: -110
> 
> To fix it, move posting of rx wqe before complete(&ctx->comp_event).
> 
> Cc: stable@vger.kernel.org
> Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network
> Adapter (MANA)")
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>

Reviewed-by: Long Li <longli@microsoft.com>

> ---
>  .../net/ethernet/microsoft/mana/hw_channel.c  | 62 ++++++++++---------
>  1 file changed, 34 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c
> b/drivers/net/ethernet/microsoft/mana/hw_channel.c
> index cafded2f9382..a00f915c5188 100644
> --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
> +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
> @@ -52,9 +52,33 @@ static int mana_hwc_verify_resp_msg(const struct
> hwc_caller_ctx *caller_ctx,
>  	return 0;
>  }
> 
> +static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq,
> +				struct hwc_work_request *req)
> +{
> +	struct device *dev = hwc_rxq->hwc->dev;
> +	struct gdma_sge *sge;
> +	int err;
> +
> +	sge = &req->sge;
> +	sge->address = (u64)req->buf_sge_addr;
> +	sge->mem_key = hwc_rxq->msg_buf->gpa_mkey;
> +	sge->size = req->buf_len;
> +
> +	memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
> +	req->wqe_req.sgl = sge;
> +	req->wqe_req.num_sge = 1;
> +	req->wqe_req.client_data_unit = 0;
> +
> +	err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req,
> NULL);
> +	if (err)
> +		dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err);
> +	return err;
> +}
> +
>  static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32
> resp_len,
> -				 const struct gdma_resp_hdr *resp_msg)
> +				 struct hwc_work_request *rx_req)
>  {
> +	const struct gdma_resp_hdr *resp_msg = rx_req->buf_va;
>  	struct hwc_caller_ctx *ctx;
>  	int err;
> 
> @@ -62,6 +86,7 @@ static void mana_hwc_handle_resp(struct
> hw_channel_context *hwc, u32 resp_len,
>  		      hwc->inflight_msg_res.map)) {
>  		dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n",
>  			resp_msg->response.hwc_msg_id);
> +		mana_hwc_post_rx_wqe(hwc->rxq, rx_req);
>  		return;
>  	}
> 
> @@ -75,30 +100,13 @@ static void mana_hwc_handle_resp(struct
> hw_channel_context *hwc, u32 resp_len,
>  	memcpy(ctx->output_buf, resp_msg, resp_len);
>  out:
>  	ctx->error = err;
> -	complete(&ctx->comp_event);
> -}
> -
> -static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq,
> -				struct hwc_work_request *req)
> -{
> -	struct device *dev = hwc_rxq->hwc->dev;
> -	struct gdma_sge *sge;
> -	int err;
> -
> -	sge = &req->sge;
> -	sge->address = (u64)req->buf_sge_addr;
> -	sge->mem_key = hwc_rxq->msg_buf->gpa_mkey;
> -	sge->size = req->buf_len;
> 
> -	memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
> -	req->wqe_req.sgl = sge;
> -	req->wqe_req.num_sge = 1;
> -	req->wqe_req.client_data_unit = 0;
> +	/* Must post rx wqe before complete(), otherwise the next rx may
> +	 * hit no_wqe error.
> +	 */
> +	mana_hwc_post_rx_wqe(hwc->rxq, rx_req);
> 
> -	err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req,
> NULL);
> -	if (err)
> -		dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err);
> -	return err;
> +	complete(&ctx->comp_event);
>  }
> 
>  static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
> @@ -235,14 +243,12 @@ static void mana_hwc_rx_event_handler(void *ctx,
> u32 gdma_rxq_id,
>  		return;
>  	}
> 
> -	mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp);
> +	mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req);
> 
> -	/* Do no longer use 'resp', because the buffer is posted to the HW
> -	 * in the below mana_hwc_post_rx_wqe().
> +	/* Can no longer use 'resp', because the buffer is posted to the HW
> +	 * in mana_hwc_handle_resp() above.
>  	 */
>  	resp = NULL;
> -
> -	mana_hwc_post_rx_wqe(hwc_rxq, rx_req);
>  }
> 
>  static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id,
> --
> 2.34.1
Christophe JAILLET Aug. 22, 2024, 5:34 a.m. UTC | #2
Le 21/08/2024 à 22:42, Haiyang Zhang a écrit :
> The mana_hwc_rx_event_handler() / mana_hwc_handle_resp() calls
> complete(&ctx->comp_event) before posting the wqe back. It's
> possible that other callers, like mana_create_txq(), start the
> next round of mana_hwc_send_request() before the posting of wqe.
> And if the HW is fast enough to respond, it can hit no_wqe error
> on the HW channel, then the response message is lost. The mana
> driver may fail to create queues and open, because of waiting for
> the HW response and timed out.
> Sample dmesg:
> [  528.610840] mana 39d4:00:02.0: HWC: Request timed out!
> [  528.614452] mana 39d4:00:02.0: Failed to send mana message: -110, 0x0
> [  528.618326] mana 39d4:00:02.0 enP14804s2: Failed to create WQ object: -110
> 
> To fix it, move posting of rx wqe before complete(&ctx->comp_event).
> 
> Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)")
> Signed-off-by: Haiyang Zhang <haiyangz-0li6OtcxBFHby3iVrkZq2A@public.gmane.org>
> ---
>   .../net/ethernet/microsoft/mana/hw_channel.c  | 62 ++++++++++---------
>   1 file changed, 34 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
> index cafded2f9382..a00f915c5188 100644
> --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
> +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
> @@ -52,9 +52,33 @@ static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx,
>   	return 0;
>   }
>   
> +static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq,
> +				struct hwc_work_request *req)
> +{
> +	struct device *dev = hwc_rxq->hwc->dev;
> +	struct gdma_sge *sge;
> +	int err;
> +
> +	sge = &req->sge;
> +	sge->address = (u64)req->buf_sge_addr;
> +	sge->mem_key = hwc_rxq->msg_buf->gpa_mkey;
> +	sge->size = req->buf_len;
> +
> +	memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
> +	req->wqe_req.sgl = sge;
> +	req->wqe_req.num_sge = 1;
> +	req->wqe_req.client_data_unit = 0;

Hi,

unrelated to your patch, but this initialization is useless, it is 
already memset(0)'ed a few lines above.
So why client_data_unit and not some other fields?

> +
> +	err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL);
> +	if (err)
> +		dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err);
> +	return err;
> +}

...

Just my 2c.

CJ
Haiyang Zhang Aug. 22, 2024, 2:30 p.m. UTC | #3
> -----Original Message-----
> From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
> Sent: Thursday, August 22, 2024 1:34 AM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: ast@kernel.org; bpf@vger.kernel.org; daniel@iogearbox.net;
> davem@davemloft.net; Dexuan Cui <decui@microsoft.com>;
> edumazet@google.com; hawk@kernel.org; jesse.brandeburg@intel.com;
> john.fastabend@gmail.com; kuba@kernel.org; KY Srinivasan
> <kys@microsoft.com>; leon@kernel.org; linux-hyperv@vger.kernel.org; linux-
> kernel@vger.kernel.org; linux-rdma@vger.kernel.org; Long Li
> <longli@microsoft.com>; netdev@vger.kernel.org; olaf@aepfle.de;
> pabeni@redhat.com; Paul Rosswurm <paulros@microsoft.com>;
> shradhagupta@linux.microsoft.com; ssengar@linux.microsoft.com;
> stable@vger.kernel.org; stephen@networkplumber.org; tglx@linutronix.de;
> vkuznets@redhat.com; wei.liu@kernel.org
> Subject: Re: [PATCH net] net: mana: Fix race of mana_hwc_post_rx_wqe and
> new hwc response
> 
> Le 21/08/2024 à 22:42, Haiyang Zhang a écrit :
> > The mana_hwc_rx_event_handler() / mana_hwc_handle_resp() calls
> > complete(&ctx->comp_event) before posting the wqe back. It's
> > possible that other callers, like mana_create_txq(), start the
> > next round of mana_hwc_send_request() before the posting of wqe.
> > And if the HW is fast enough to respond, it can hit no_wqe error
> > on the HW channel, then the response message is lost. The mana
> > driver may fail to create queues and open, because of waiting for
> > the HW response and timed out.
> > Sample dmesg:
> > [  528.610840] mana 39d4:00:02.0: HWC: Request timed out!
> > [  528.614452] mana 39d4:00:02.0: Failed to send mana message: -110, 0x0
> > [  528.618326] mana 39d4:00:02.0 enP14804s2: Failed to create WQ object:
> -110
> >
> > To fix it, move posting of rx wqe before complete(&ctx->comp_event).
> >
> > Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> > Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure
> Network Adapter (MANA)")
> > Signed-off-by: Haiyang Zhang <haiyangz-
> 0li6OtcxBFHby3iVrkZq2A@public.gmane.org>
> > ---
> >   .../net/ethernet/microsoft/mana/hw_channel.c  | 62 ++++++++++---------
> >   1 file changed, 34 insertions(+), 28 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c
> b/drivers/net/ethernet/microsoft/mana/hw_channel.c
> > index cafded2f9382..a00f915c5188 100644
> > --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
> > +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
> > @@ -52,9 +52,33 @@ static int mana_hwc_verify_resp_msg(const struct
> hwc_caller_ctx *caller_ctx,
> >   	return 0;
> >   }
> >
> > +static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq,
> > +				struct hwc_work_request *req)
> > +{
> > +	struct device *dev = hwc_rxq->hwc->dev;
> > +	struct gdma_sge *sge;
> > +	int err;
> > +
> > +	sge = &req->sge;
> > +	sge->address = (u64)req->buf_sge_addr;
> > +	sge->mem_key = hwc_rxq->msg_buf->gpa_mkey;
> > +	sge->size = req->buf_len;
> > +
> > +	memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
> > +	req->wqe_req.sgl = sge;
> > +	req->wqe_req.num_sge = 1;
> > +	req->wqe_req.client_data_unit = 0;
> 
> Hi,
> 
> unrelated to your patch, but this initialization is useless, it is
> already memset(0)'ed a few lines above.
> So why client_data_unit and not some other fields?

Agreed. This patch just moves the function for the bug fix.
We will do code cleanups in other patches.

Thanks,
- Haiyang
patchwork-bot+netdevbpf@kernel.org Aug. 23, 2024, 1:30 p.m. UTC | #4
Hello:

This patch was applied to netdev/net.git (main)
by David S. Miller <davem@davemloft.net>:

On Wed, 21 Aug 2024 13:42:29 -0700 you wrote:
> The mana_hwc_rx_event_handler() / mana_hwc_handle_resp() calls
> complete(&ctx->comp_event) before posting the wqe back. It's
> possible that other callers, like mana_create_txq(), start the
> next round of mana_hwc_send_request() before the posting of wqe.
> And if the HW is fast enough to respond, it can hit no_wqe error
> on the HW channel, then the response message is lost. The mana
> driver may fail to create queues and open, because of waiting for
> the HW response and timed out.
> Sample dmesg:
> [  528.610840] mana 39d4:00:02.0: HWC: Request timed out!
> [  528.614452] mana 39d4:00:02.0: Failed to send mana message: -110, 0x0
> [  528.618326] mana 39d4:00:02.0 enP14804s2: Failed to create WQ object: -110
> 
> [...]

Here is the summary with links:
  - [net] net: mana: Fix race of mana_hwc_post_rx_wqe and new hwc response
    https://git.kernel.org/netdev/net/c/8af174ea863c

You are awesome, thank you!
diff mbox series

Patch

diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index cafded2f9382..a00f915c5188 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -52,9 +52,33 @@  static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx,
 	return 0;
 }
 
+static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq,
+				struct hwc_work_request *req)
+{
+	struct device *dev = hwc_rxq->hwc->dev;
+	struct gdma_sge *sge;
+	int err;
+
+	sge = &req->sge;
+	sge->address = (u64)req->buf_sge_addr;
+	sge->mem_key = hwc_rxq->msg_buf->gpa_mkey;
+	sge->size = req->buf_len;
+
+	memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
+	req->wqe_req.sgl = sge;
+	req->wqe_req.num_sge = 1;
+	req->wqe_req.client_data_unit = 0;
+
+	err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL);
+	if (err)
+		dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err);
+	return err;
+}
+
 static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len,
-				 const struct gdma_resp_hdr *resp_msg)
+				 struct hwc_work_request *rx_req)
 {
+	const struct gdma_resp_hdr *resp_msg = rx_req->buf_va;
 	struct hwc_caller_ctx *ctx;
 	int err;
 
@@ -62,6 +86,7 @@  static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len,
 		      hwc->inflight_msg_res.map)) {
 		dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n",
 			resp_msg->response.hwc_msg_id);
+		mana_hwc_post_rx_wqe(hwc->rxq, rx_req);
 		return;
 	}
 
@@ -75,30 +100,13 @@  static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len,
 	memcpy(ctx->output_buf, resp_msg, resp_len);
 out:
 	ctx->error = err;
-	complete(&ctx->comp_event);
-}
-
-static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq,
-				struct hwc_work_request *req)
-{
-	struct device *dev = hwc_rxq->hwc->dev;
-	struct gdma_sge *sge;
-	int err;
-
-	sge = &req->sge;
-	sge->address = (u64)req->buf_sge_addr;
-	sge->mem_key = hwc_rxq->msg_buf->gpa_mkey;
-	sge->size = req->buf_len;
 
-	memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
-	req->wqe_req.sgl = sge;
-	req->wqe_req.num_sge = 1;
-	req->wqe_req.client_data_unit = 0;
+	/* Must post rx wqe before complete(), otherwise the next rx may
+	 * hit no_wqe error.
+	 */
+	mana_hwc_post_rx_wqe(hwc->rxq, rx_req);
 
-	err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL);
-	if (err)
-		dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err);
-	return err;
+	complete(&ctx->comp_event);
 }
 
 static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
@@ -235,14 +243,12 @@  static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id,
 		return;
 	}
 
-	mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp);
+	mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, rx_req);
 
-	/* Do no longer use 'resp', because the buffer is posted to the HW
-	 * in the below mana_hwc_post_rx_wqe().
+	/* Can no longer use 'resp', because the buffer is posted to the HW
+	 * in mana_hwc_handle_resp() above.
 	 */
 	resp = NULL;
-
-	mana_hwc_post_rx_wqe(hwc_rxq, rx_req);
 }
 
 static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id,