diff mbox series

[net-next,6/7] net/smc: don't req_notify until all CQEs drained

Message ID 20220301094402.14992-7-dust.li@linux.alibaba.com (mailing list archive)
State Not Applicable
Headers show
Series net/smc: some datapath performance optimizations | expand

Commit Message

Dust Li March 1, 2022, 9:44 a.m. UTC
When we are handling softirq workload, enable hardirq may
again interrupt the current routine of softirq, and then
try to raise softirq again. This only wastes CPU cycles
and won't have any real gain.

Since IB_CQ_REPORT_MISSED_EVENTS already make sure if
ib_req_notify_cq() returns 0, it is safe to wait for the
next event, with no need to poll the CQ again in this case.

This patch disables hardirq during the processing of softirq,
and re-arm the CQ after softirq is done. Somehow like NAPI.

Co-developed-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
---
 net/smc/smc_wr.c | 49 +++++++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 21 deletions(-)

Comments

Leon Romanovsky March 1, 2022, 10:14 a.m. UTC | #1
On Tue, Mar 01, 2022 at 05:44:01PM +0800, Dust Li wrote:
> When we are handling softirq workload, enable hardirq may
> again interrupt the current routine of softirq, and then
> try to raise softirq again. This only wastes CPU cycles
> and won't have any real gain.
> 
> Since IB_CQ_REPORT_MISSED_EVENTS already make sure if
> ib_req_notify_cq() returns 0, it is safe to wait for the
> next event, with no need to poll the CQ again in this case.
> 
> This patch disables hardirq during the processing of softirq,
> and re-arm the CQ after softirq is done. Somehow like NAPI.
> 
> Co-developed-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
> Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
> ---
>  net/smc/smc_wr.c | 49 +++++++++++++++++++++++++++---------------------
>  1 file changed, 28 insertions(+), 21 deletions(-)
> 
> diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
> index 24be1d03fef9..34d616406d51 100644
> --- a/net/smc/smc_wr.c
> +++ b/net/smc/smc_wr.c
> @@ -137,25 +137,28 @@ static void smc_wr_tx_tasklet_fn(struct tasklet_struct *t)
>  {
>  	struct smc_ib_device *dev = from_tasklet(dev, t, send_tasklet);
>  	struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
> -	int i = 0, rc;
> -	int polled = 0;
> +	int i, rc;
>  
>  again:
> -	polled++;
>  	do {
>  		memset(&wc, 0, sizeof(wc));
>  		rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
> -		if (polled == 1) {
> -			ib_req_notify_cq(dev->roce_cq_send,
> -					 IB_CQ_NEXT_COMP |
> -					 IB_CQ_REPORT_MISSED_EVENTS);
> -		}
> -		if (!rc)
> -			break;
>  		for (i = 0; i < rc; i++)
>  			smc_wr_tx_process_cqe(&wc[i]);
> +		if (rc < SMC_WR_MAX_POLL_CQE)
> +			/* If < SMC_WR_MAX_POLL_CQE, the CQ should have been
> +			 * drained, no need to poll again. --Guangguan Wang

1. Please remove "--Guangguan Wang".
2. We already discussed that. SMC should be changed to use RDMA CQ pool API
drivers/infiniband/core/cq.c. 
ib_poll_handler() has much better implementation (tracing, IRQ rescheduling,
proper error handling) than this SMC variant.

Thanks

> +			 */
> +			break;
>  	} while (rc > 0);
> -	if (polled == 1)
> +
> +	/* IB_CQ_REPORT_MISSED_EVENTS make sure if ib_req_notify_cq() returns
> +	 * 0, it is safe to wait for the next event.
> +	 * Else we must poll the CQ again to make sure we won't miss any event
> +	 */
> +	if (ib_req_notify_cq(dev->roce_cq_send,
> +			     IB_CQ_NEXT_COMP |
> +			     IB_CQ_REPORT_MISSED_EVENTS))
>  		goto again;
>  }
>  
> @@ -478,24 +481,28 @@ static void smc_wr_rx_tasklet_fn(struct tasklet_struct *t)
>  {
>  	struct smc_ib_device *dev = from_tasklet(dev, t, recv_tasklet);
>  	struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
> -	int polled = 0;
>  	int rc;
>  
>  again:
> -	polled++;
>  	do {
>  		memset(&wc, 0, sizeof(wc));
>  		rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc);
> -		if (polled == 1) {
> -			ib_req_notify_cq(dev->roce_cq_recv,
> -					 IB_CQ_SOLICITED_MASK
> -					 | IB_CQ_REPORT_MISSED_EVENTS);
> -		}
> -		if (!rc)
> +		if (rc > 0)
> +			smc_wr_rx_process_cqes(&wc[0], rc);
> +		if (rc < SMC_WR_MAX_POLL_CQE)
> +			/* If < SMC_WR_MAX_POLL_CQE, the CQ should have been
> +			 * drained, no need to poll again. --Guangguan Wang
> +			 */
>  			break;
> -		smc_wr_rx_process_cqes(&wc[0], rc);
>  	} while (rc > 0);
> -	if (polled == 1)
> +
> +	/* IB_CQ_REPORT_MISSED_EVENTS make sure if ib_req_notify_cq() returns
> +	 * 0, it is safe to wait for the next event.
> +	 * Else we must poll the CQ again to make sure we won't miss any event
> +	 */
> +	if (ib_req_notify_cq(dev->roce_cq_recv,
> +			     IB_CQ_SOLICITED_MASK |
> +			     IB_CQ_REPORT_MISSED_EVENTS))
>  		goto again;
>  }
>  
> -- 
> 2.19.1.3.ge56e4f7
>
Dust Li March 1, 2022, 10:53 a.m. UTC | #2
On Tue, Mar 01, 2022 at 12:14:15PM +0200, Leon Romanovsky wrote:
>On Tue, Mar 01, 2022 at 05:44:01PM +0800, Dust Li wrote:
>> When we are handling softirq workload, enable hardirq may
>> again interrupt the current routine of softirq, and then
>> try to raise softirq again. This only wastes CPU cycles
>> and won't have any real gain.
>> 
>> Since IB_CQ_REPORT_MISSED_EVENTS already make sure if
>> ib_req_notify_cq() returns 0, it is safe to wait for the
>> next event, with no need to poll the CQ again in this case.
>> 
>> This patch disables hardirq during the processing of softirq,
>> and re-arm the CQ after softirq is done. Somehow like NAPI.
>> 
>> Co-developed-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
>> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
>> Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
>> ---
>>  net/smc/smc_wr.c | 49 +++++++++++++++++++++++++++---------------------
>>  1 file changed, 28 insertions(+), 21 deletions(-)
>> 
>> diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
>> index 24be1d03fef9..34d616406d51 100644
>> --- a/net/smc/smc_wr.c
>> +++ b/net/smc/smc_wr.c
>> @@ -137,25 +137,28 @@ static void smc_wr_tx_tasklet_fn(struct tasklet_struct *t)
>>  {
>>  	struct smc_ib_device *dev = from_tasklet(dev, t, send_tasklet);
>>  	struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
>> -	int i = 0, rc;
>> -	int polled = 0;
>> +	int i, rc;
>>  
>>  again:
>> -	polled++;
>>  	do {
>>  		memset(&wc, 0, sizeof(wc));
>>  		rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
>> -		if (polled == 1) {
>> -			ib_req_notify_cq(dev->roce_cq_send,
>> -					 IB_CQ_NEXT_COMP |
>> -					 IB_CQ_REPORT_MISSED_EVENTS);
>> -		}
>> -		if (!rc)
>> -			break;
>>  		for (i = 0; i < rc; i++)
>>  			smc_wr_tx_process_cqe(&wc[i]);
>> +		if (rc < SMC_WR_MAX_POLL_CQE)
>> +			/* If < SMC_WR_MAX_POLL_CQE, the CQ should have been
>> +			 * drained, no need to poll again. --Guangguan Wang
>
>1. Please remove "--Guangguan Wang".
>2. We already discussed that. SMC should be changed to use RDMA CQ pool API
>drivers/infiniband/core/cq.c.
>ib_poll_handler() has much better implementation (tracing, IRQ rescheduling,
>proper error handling) than this SMC variant.

OK, I'll remove this patch in the next version.

Thanks
Karsten Graul March 4, 2022, 8:19 a.m. UTC | #3
On 01/03/2022 11:53, dust.li wrote:
> On Tue, Mar 01, 2022 at 12:14:15PM +0200, Leon Romanovsky wrote:
>> On Tue, Mar 01, 2022 at 05:44:01PM +0800, Dust Li wrote:
>> 1. Please remove "--Guangguan Wang".
>> 2. We already discussed that. SMC should be changed to use RDMA CQ pool API
>> drivers/infiniband/core/cq.c.
>> ib_poll_handler() has much better implementation (tracing, IRQ rescheduling,
>> proper error handling) than this SMC variant.
> 
> OK, I'll remove this patch in the next version.

Looks like this one was accepted already, but per discussion (and I agree with that) -
please revert this patch. Thank you.
Dust Li March 4, 2022, 8:23 a.m. UTC | #4
On Fri, Mar 04, 2022 at 09:19:27AM +0100, Karsten Graul wrote:
>On 01/03/2022 11:53, dust.li wrote:
>> On Tue, Mar 01, 2022 at 12:14:15PM +0200, Leon Romanovsky wrote:
>>> On Tue, Mar 01, 2022 at 05:44:01PM +0800, Dust Li wrote:
>>> 1. Please remove "--Guangguan Wang".
>>> 2. We already discussed that. SMC should be changed to use RDMA CQ pool API
>>> drivers/infiniband/core/cq.c.
>>> ib_poll_handler() has much better implementation (tracing, IRQ rescheduling,
>>> proper error handling) than this SMC variant.
>> 
>> OK, I'll remove this patch in the next version.
>
>Looks like this one was accepted already, but per discussion (and I agree with that) -
>please revert this patch. Thank you.

Yes. No problem, I will send a revert patch.

Thanks
diff mbox series

Patch

diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 24be1d03fef9..34d616406d51 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -137,25 +137,28 @@  static void smc_wr_tx_tasklet_fn(struct tasklet_struct *t)
 {
 	struct smc_ib_device *dev = from_tasklet(dev, t, send_tasklet);
 	struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
-	int i = 0, rc;
-	int polled = 0;
+	int i, rc;
 
 again:
-	polled++;
 	do {
 		memset(&wc, 0, sizeof(wc));
 		rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
-		if (polled == 1) {
-			ib_req_notify_cq(dev->roce_cq_send,
-					 IB_CQ_NEXT_COMP |
-					 IB_CQ_REPORT_MISSED_EVENTS);
-		}
-		if (!rc)
-			break;
 		for (i = 0; i < rc; i++)
 			smc_wr_tx_process_cqe(&wc[i]);
+		if (rc < SMC_WR_MAX_POLL_CQE)
+			/* If < SMC_WR_MAX_POLL_CQE, the CQ should have been
+			 * drained, no need to poll again. --Guangguan Wang
+			 */
+			break;
 	} while (rc > 0);
-	if (polled == 1)
+
+	/* IB_CQ_REPORT_MISSED_EVENTS make sure if ib_req_notify_cq() returns
+	 * 0, it is safe to wait for the next event.
+	 * Else we must poll the CQ again to make sure we won't miss any event
+	 */
+	if (ib_req_notify_cq(dev->roce_cq_send,
+			     IB_CQ_NEXT_COMP |
+			     IB_CQ_REPORT_MISSED_EVENTS))
 		goto again;
 }
 
@@ -478,24 +481,28 @@  static void smc_wr_rx_tasklet_fn(struct tasklet_struct *t)
 {
 	struct smc_ib_device *dev = from_tasklet(dev, t, recv_tasklet);
 	struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
-	int polled = 0;
 	int rc;
 
 again:
-	polled++;
 	do {
 		memset(&wc, 0, sizeof(wc));
 		rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc);
-		if (polled == 1) {
-			ib_req_notify_cq(dev->roce_cq_recv,
-					 IB_CQ_SOLICITED_MASK
-					 | IB_CQ_REPORT_MISSED_EVENTS);
-		}
-		if (!rc)
+		if (rc > 0)
+			smc_wr_rx_process_cqes(&wc[0], rc);
+		if (rc < SMC_WR_MAX_POLL_CQE)
+			/* If < SMC_WR_MAX_POLL_CQE, the CQ should have been
+			 * drained, no need to poll again. --Guangguan Wang
+			 */
 			break;
-		smc_wr_rx_process_cqes(&wc[0], rc);
 	} while (rc > 0);
-	if (polled == 1)
+
+	/* IB_CQ_REPORT_MISSED_EVENTS make sure if ib_req_notify_cq() returns
+	 * 0, it is safe to wait for the next event.
+	 * Else we must poll the CQ again to make sure we won't miss any event
+	 */
+	if (ib_req_notify_cq(dev->roce_cq_recv,
+			     IB_CQ_SOLICITED_MASK |
+			     IB_CQ_REPORT_MISSED_EVENTS))
 		goto again;
 }