Message ID | 20220301094402.14992-7-dust.li@linux.alibaba.com (mailing list archive) |
---|---|
State | Accepted |
Commit | a505cce6f7cfaf2aa2385aab7286063c96444526 |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net/smc: some datapath performance optimizations | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Clearly marked for net-next |
netdev/apply | success | Patch already applied to net-next |
On Tue, Mar 01, 2022 at 05:44:01PM +0800, Dust Li wrote: > When we are handling softirq workload, enable hardirq may > again interrupt the current routine of softirq, and then > try to raise softirq again. This only wastes CPU cycles > and won't have any real gain. > > Since IB_CQ_REPORT_MISSED_EVENTS already make sure if > ib_req_notify_cq() returns 0, it is safe to wait for the > next event, with no need to poll the CQ again in this case. > > This patch disables hardirq during the processing of softirq, > and re-arm the CQ after softirq is done. Somehow like NAPI. > > Co-developed-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> > Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> > Signed-off-by: Dust Li <dust.li@linux.alibaba.com> > --- > net/smc/smc_wr.c | 49 +++++++++++++++++++++++++++--------------------- > 1 file changed, 28 insertions(+), 21 deletions(-) > > diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c > index 24be1d03fef9..34d616406d51 100644 > --- a/net/smc/smc_wr.c > +++ b/net/smc/smc_wr.c > @@ -137,25 +137,28 @@ static void smc_wr_tx_tasklet_fn(struct tasklet_struct *t) > { > struct smc_ib_device *dev = from_tasklet(dev, t, send_tasklet); > struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; > - int i = 0, rc; > - int polled = 0; > + int i, rc; > > again: > - polled++; > do { > memset(&wc, 0, sizeof(wc)); > rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc); > - if (polled == 1) { > - ib_req_notify_cq(dev->roce_cq_send, > - IB_CQ_NEXT_COMP | > - IB_CQ_REPORT_MISSED_EVENTS); > - } > - if (!rc) > - break; > for (i = 0; i < rc; i++) > smc_wr_tx_process_cqe(&wc[i]); > + if (rc < SMC_WR_MAX_POLL_CQE) > + /* If < SMC_WR_MAX_POLL_CQE, the CQ should have been > + * drained, no need to poll again. --Guangguan Wang 1. Please remove "--Guangguan Wang". 2. We already discussed that. SMC should be changed to use RDMA CQ pool API drivers/infiniband/core/cq.c. ib_poll_handler() has much better implementation (tracing, IRQ rescheduling, proper error handling) than this SMC variant. Thanks > + */ > + break; > } while (rc > 0); > - if (polled == 1) > + > + /* IB_CQ_REPORT_MISSED_EVENTS make sure if ib_req_notify_cq() returns > + * 0, it is safe to wait for the next event. > + * Else we must poll the CQ again to make sure we won't miss any event > + */ > + if (ib_req_notify_cq(dev->roce_cq_send, > + IB_CQ_NEXT_COMP | > + IB_CQ_REPORT_MISSED_EVENTS)) > goto again; > } > > @@ -478,24 +481,28 @@ static void smc_wr_rx_tasklet_fn(struct tasklet_struct *t) > { > struct smc_ib_device *dev = from_tasklet(dev, t, recv_tasklet); > struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; > - int polled = 0; > int rc; > > again: > - polled++; > do { > memset(&wc, 0, sizeof(wc)); > rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc); > - if (polled == 1) { > - ib_req_notify_cq(dev->roce_cq_recv, > - IB_CQ_SOLICITED_MASK > - | IB_CQ_REPORT_MISSED_EVENTS); > - } > - if (!rc) > + if (rc > 0) > + smc_wr_rx_process_cqes(&wc[0], rc); > + if (rc < SMC_WR_MAX_POLL_CQE) > + /* If < SMC_WR_MAX_POLL_CQE, the CQ should have been > + * drained, no need to poll again. --Guangguan Wang > + */ > break; > - smc_wr_rx_process_cqes(&wc[0], rc); > } while (rc > 0); > - if (polled == 1) > + > + /* IB_CQ_REPORT_MISSED_EVENTS make sure if ib_req_notify_cq() returns > + * 0, it is safe to wait for the next event. > + * Else we must poll the CQ again to make sure we won't miss any event > + */ > + if (ib_req_notify_cq(dev->roce_cq_recv, > + IB_CQ_SOLICITED_MASK | > + IB_CQ_REPORT_MISSED_EVENTS)) > goto again; > } > > -- > 2.19.1.3.ge56e4f7 >
On Tue, Mar 01, 2022 at 12:14:15PM +0200, Leon Romanovsky wrote: >On Tue, Mar 01, 2022 at 05:44:01PM +0800, Dust Li wrote: >> When we are handling softirq workload, enable hardirq may >> again interrupt the current routine of softirq, and then >> try to raise softirq again. This only wastes CPU cycles >> and won't have any real gain. >> >> Since IB_CQ_REPORT_MISSED_EVENTS already make sure if >> ib_req_notify_cq() returns 0, it is safe to wait for the >> next event, with no need to poll the CQ again in this case. >> >> This patch disables hardirq during the processing of softirq, >> and re-arm the CQ after softirq is done. Somehow like NAPI. >> >> Co-developed-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> >> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> >> Signed-off-by: Dust Li <dust.li@linux.alibaba.com> >> --- >> net/smc/smc_wr.c | 49 +++++++++++++++++++++++++++--------------------- >> 1 file changed, 28 insertions(+), 21 deletions(-) >> >> diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c >> index 24be1d03fef9..34d616406d51 100644 >> --- a/net/smc/smc_wr.c >> +++ b/net/smc/smc_wr.c >> @@ -137,25 +137,28 @@ static void smc_wr_tx_tasklet_fn(struct tasklet_struct *t) >> { >> struct smc_ib_device *dev = from_tasklet(dev, t, send_tasklet); >> struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; >> - int i = 0, rc; >> - int polled = 0; >> + int i, rc; >> >> again: >> - polled++; >> do { >> memset(&wc, 0, sizeof(wc)); >> rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc); >> - if (polled == 1) { >> - ib_req_notify_cq(dev->roce_cq_send, >> - IB_CQ_NEXT_COMP | >> - IB_CQ_REPORT_MISSED_EVENTS); >> - } >> - if (!rc) >> - break; >> for (i = 0; i < rc; i++) >> smc_wr_tx_process_cqe(&wc[i]); >> + if (rc < SMC_WR_MAX_POLL_CQE) >> + /* If < SMC_WR_MAX_POLL_CQE, the CQ should have been >> + * drained, no need to poll again. --Guangguan Wang > >1. Please remove "--Guangguan Wang". >2. We already discussed that. SMC should be changed to use RDMA CQ pool API >drivers/infiniband/core/cq.c. >ib_poll_handler() has much better implementation (tracing, IRQ rescheduling, >proper error handling) than this SMC variant. OK, I'll remove this patch in the next version. Thanks
On 01/03/2022 11:53, dust.li wrote: > On Tue, Mar 01, 2022 at 12:14:15PM +0200, Leon Romanovsky wrote: >> On Tue, Mar 01, 2022 at 05:44:01PM +0800, Dust Li wrote: >> 1. Please remove "--Guangguan Wang". >> 2. We already discussed that. SMC should be changed to use RDMA CQ pool API >> drivers/infiniband/core/cq.c. >> ib_poll_handler() has much better implementation (tracing, IRQ rescheduling, >> proper error handling) than this SMC variant. > > OK, I'll remove this patch in the next version. Looks like this one was accepted already, but per discussion (and I agree with that) - please revert this patch. Thank you.
On Fri, Mar 04, 2022 at 09:19:27AM +0100, Karsten Graul wrote: >On 01/03/2022 11:53, dust.li wrote: >> On Tue, Mar 01, 2022 at 12:14:15PM +0200, Leon Romanovsky wrote: >>> On Tue, Mar 01, 2022 at 05:44:01PM +0800, Dust Li wrote: >>> 1. Please remove "--Guangguan Wang". >>> 2. We already discussed that. SMC should be changed to use RDMA CQ pool API >>> drivers/infiniband/core/cq.c. >>> ib_poll_handler() has much better implementation (tracing, IRQ rescheduling, >>> proper error handling) than this SMC variant. >> >> OK, I'll remove this patch in the next version. > >Looks like this one was accepted already, but per discussion (and I agree with that) - >please revert this patch. Thank you. Yes. No problem, I will send a revert patch. Thanks
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 24be1d03fef9..34d616406d51 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -137,25 +137,28 @@ static void smc_wr_tx_tasklet_fn(struct tasklet_struct *t) { struct smc_ib_device *dev = from_tasklet(dev, t, send_tasklet); struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; - int i = 0, rc; - int polled = 0; + int i, rc; again: - polled++; do { memset(&wc, 0, sizeof(wc)); rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc); - if (polled == 1) { - ib_req_notify_cq(dev->roce_cq_send, - IB_CQ_NEXT_COMP | - IB_CQ_REPORT_MISSED_EVENTS); - } - if (!rc) - break; for (i = 0; i < rc; i++) smc_wr_tx_process_cqe(&wc[i]); + if (rc < SMC_WR_MAX_POLL_CQE) + /* If < SMC_WR_MAX_POLL_CQE, the CQ should have been + * drained, no need to poll again. --Guangguan Wang + */ + break; } while (rc > 0); - if (polled == 1) + + /* IB_CQ_REPORT_MISSED_EVENTS make sure if ib_req_notify_cq() returns + * 0, it is safe to wait for the next event. + * Else we must poll the CQ again to make sure we won't miss any event + */ + if (ib_req_notify_cq(dev->roce_cq_send, + IB_CQ_NEXT_COMP | + IB_CQ_REPORT_MISSED_EVENTS)) goto again; } @@ -478,24 +481,28 @@ static void smc_wr_rx_tasklet_fn(struct tasklet_struct *t) { struct smc_ib_device *dev = from_tasklet(dev, t, recv_tasklet); struct ib_wc wc[SMC_WR_MAX_POLL_CQE]; - int polled = 0; int rc; again: - polled++; do { memset(&wc, 0, sizeof(wc)); rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc); - if (polled == 1) { - ib_req_notify_cq(dev->roce_cq_recv, - IB_CQ_SOLICITED_MASK - | IB_CQ_REPORT_MISSED_EVENTS); - } - if (!rc) + if (rc > 0) + smc_wr_rx_process_cqes(&wc[0], rc); + if (rc < SMC_WR_MAX_POLL_CQE) + /* If < SMC_WR_MAX_POLL_CQE, the CQ should have been + * drained, no need to poll again. --Guangguan Wang + */ break; - smc_wr_rx_process_cqes(&wc[0], rc); } while (rc > 0); - if (polled == 1) + + /* IB_CQ_REPORT_MISSED_EVENTS make sure if ib_req_notify_cq() returns + * 0, it is safe to wait for the next event. + * Else we must poll the CQ again to make sure we won't miss any event + */ + if (ib_req_notify_cq(dev->roce_cq_recv, + IB_CQ_SOLICITED_MASK | + IB_CQ_REPORT_MISSED_EVENTS)) goto again; }