diff mbox series

[for-next,04/10] RDMA/hns: Add SRQ asynchronous event support

Message ID 1540624524-118822-5-git-send-email-oulijun@huawei.com (mailing list archive)
State Changes Requested
Delegated to: Jason Gunthorpe
Headers show
Series hns misc updates for 4.20 | expand

Commit Message

Lijun Ou Oct. 27, 2018, 7:15 a.m. UTC
This patch implements the process flow of SRQ asynchronous
event.

Signed-off-by: Lijun Ou <oulijun@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |  7 +++++++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  7 ++++++-
 drivers/infiniband/hw/hns/hns_roce_srq.c    | 23 +++++++++++++++++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

Comments

Jason Gunthorpe Nov. 22, 2018, 6:13 p.m. UTC | #1
On Sat, Oct 27, 2018 at 03:15:18PM +0800, Lijun Ou wrote:
> +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
> +{
> +	struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
> +	struct hns_roce_srq *srq;
> +
> +	rcu_read_lock();
> +	srq = radix_tree_lookup(&srq_table->tree,
> +				srqn & (hr_dev->caps.num_srqs - 1));
> +	rcu_read_unlock();
> +	if (srq) {
> +		refcount_inc(&srq->refcount);

This doesn't look right to me..

The refcount_inc should be inside the RCU and it should almost
certainly be refcount_inc_not_zero.

And I don't see any calls to kfree_rcu or synchronize_rcu in this
driver, so I'm very skeptical this code is OK. Please go over the RCU
check list and make sure RCU is used properly. 

Perhaps the spinlock should be held here instead of RCU.

Also.. Use of the radix tree is being phased out with xarray as the
preferred API now. I don't see anything complicated about the use of
the radix tree API here, so could you please respin these patches to use
xarray? It should be straightforward, and I'd prefer not to add new
users while Matthew is trying to get rid of it.

Thanks,
Jason
Leon Romanovsky Nov. 22, 2018, 6:50 p.m. UTC | #2
On Thu, Nov 22, 2018 at 11:13:46AM -0700, Jason Gunthorpe wrote:
> On Sat, Oct 27, 2018 at 03:15:18PM +0800, Lijun Ou wrote:
> > +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
> > +{
> > +	struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
> > +	struct hns_roce_srq *srq;
> > +
> > +	rcu_read_lock();
> > +	srq = radix_tree_lookup(&srq_table->tree,
> > +				srqn & (hr_dev->caps.num_srqs - 1));
> > +	rcu_read_unlock();
> > +	if (srq) {
> > +		refcount_inc(&srq->refcount);
>
> This doesn't look right to me..
>
> The refcount_inc should be inside the RCU and it should almost
> certainly be refcount_inc_not_zero.

Are you sure that you can update the variable inside of struct which is
protected by RCU?

Thanks
Matthew Wilcox (Oracle) Nov. 22, 2018, 7:11 p.m. UTC | #3
On Thu, Nov 22, 2018 at 08:50:00PM +0200, Leon Romanovsky wrote:
> On Thu, Nov 22, 2018 at 11:13:46AM -0700, Jason Gunthorpe wrote:
> > On Sat, Oct 27, 2018 at 03:15:18PM +0800, Lijun Ou wrote:
> > > +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
> > > +{
> > > +	struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
> > > +	struct hns_roce_srq *srq;
> > > +
> > > +	rcu_read_lock();
> > > +	srq = radix_tree_lookup(&srq_table->tree,
> > > +				srqn & (hr_dev->caps.num_srqs - 1));
> > > +	rcu_read_unlock();
> > > +	if (srq) {
> > > +		refcount_inc(&srq->refcount);
> >
> > This doesn't look right to me..
> >
> > The refcount_inc should be inside the RCU and it should almost
> > certainly be refcount_inc_not_zero.
> 
> Are you sure that you can update the variable inside of struct which is
> protected by RCU?

Yes.  Holding the rcu_read_lock() prevents the structure from being freed
after its refcount hits zero.  refcount_inc_not_zero() will fail if the
structure is waiting to be freed.
Jason Gunthorpe Nov. 22, 2018, 7:13 p.m. UTC | #4
On Thu, Nov 22, 2018 at 08:50:00PM +0200, Leon Romanovsky wrote:
> On Thu, Nov 22, 2018 at 11:13:46AM -0700, Jason Gunthorpe wrote:
> > On Sat, Oct 27, 2018 at 03:15:18PM +0800, Lijun Ou wrote:
> > > +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
> > > +{
> > > +	struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
> > > +	struct hns_roce_srq *srq;
> > > +
> > > +	rcu_read_lock();
> > > +	srq = radix_tree_lookup(&srq_table->tree,
> > > +				srqn & (hr_dev->caps.num_srqs - 1));
> > > +	rcu_read_unlock();
> > > +	if (srq) {
> > > +		refcount_inc(&srq->refcount);
> >
> > This doesn't look right to me..
> >
> > The refcount_inc should be inside the RCU and it should almost
> > certainly be refcount_inc_not_zero.
> 
> Are you sure that you can update the variable inside of struct which
> is protected by RCU?

If there is concurrency here then some locking is needed to make sure
srq is valid. Maybe it is not RCU, but then it is unclear why RCU is
being used here..

Most probably the existing spinlock around the radix tree should be
used here, not RCU.

Jason
Leon Romanovsky Nov. 22, 2018, 7:22 p.m. UTC | #5
On Thu, Nov 22, 2018 at 11:11:00AM -0800, Matthew Wilcox wrote:
> On Thu, Nov 22, 2018 at 08:50:00PM +0200, Leon Romanovsky wrote:
> > On Thu, Nov 22, 2018 at 11:13:46AM -0700, Jason Gunthorpe wrote:
> > > On Sat, Oct 27, 2018 at 03:15:18PM +0800, Lijun Ou wrote:
> > > > +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
> > > > +{
> > > > +	struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
> > > > +	struct hns_roce_srq *srq;
> > > > +
> > > > +	rcu_read_lock();
> > > > +	srq = radix_tree_lookup(&srq_table->tree,
> > > > +				srqn & (hr_dev->caps.num_srqs - 1));
> > > > +	rcu_read_unlock();
> > > > +	if (srq) {
> > > > +		refcount_inc(&srq->refcount);
> > >
> > > This doesn't look right to me..
> > >
> > > The refcount_inc should be inside the RCU and it should almost
> > > certainly be refcount_inc_not_zero.
> >
> > Are you sure that you can update the variable inside of struct which is
> > protected by RCU?
>
> Yes.  Holding the rcu_read_lock() prevents the structure from being freed
> after its refcount hits zero.  refcount_inc_not_zero() will fail if the
> structure is waiting to be freed.

Matthew,

In your answer, you rely on the assumption that they use refcount for
free paths only. I'm not so sure.

Thanks

>
Lijun Ou Nov. 23, 2018, 9:30 a.m. UTC | #6
在 2018/11/23 2:13, Jason Gunthorpe 写道:
> On Sat, Oct 27, 2018 at 03:15:18PM +0800, Lijun Ou wrote:
>> +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
>> +{
>> +	struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
>> +	struct hns_roce_srq *srq;
>> +
>> +	rcu_read_lock();
>> +	srq = radix_tree_lookup(&srq_table->tree,
>> +				srqn & (hr_dev->caps.num_srqs - 1));
>> +	rcu_read_unlock();
>> +	if (srq) {
>> +		refcount_inc(&srq->refcount);
> This doesn't look right to me..
>
> The refcount_inc should be inside the RCU and it should almost
> certainly be refcount_inc_not_zero.
>
> And I don't see any calls to kfree_rcu or synchronize_rcu in this
> driver, so I'm very skeptical this code is OK. Please go over the RCU
> check list and make sure RCU is used properly. 
>
> Perhaps the spinlock should be held here instead of RCU.
ok, I analysis the rcu. The origin idea that it could reduce the frequency with getting lock when read srq
The spinlock may be  more proper here. because it should keep the same with
qp event. Also, it use the spin lock in hns_roce_srq_alloc when insert the srq radix tree.
> Also.. Use of the radix tree is being phased out with xarray as the
> preferred API now. I don't see anything complicated about the use of
> the radix tree API here, so could you please respin these patches to use
> xarray? It should be straightforward, and I'd prefer not to add new
> users while Matthew is trying to get rid of it.
I have learning the new API with xarray and need a more comprehensive understanding it .
https://lwn.net/Articles/745073/
I also found all the places in the kernel that use xarray and it is used less. Maybe the xarray will
be considered unified insteading of radix tree API in the hns driver in the following patch in future.
> Thanks,
> Jason
>
>
Jason Gunthorpe Nov. 23, 2018, 7:16 p.m. UTC | #7
On Fri, Nov 23, 2018 at 05:30:21PM +0800, oulijun wrote:
> 在 2018/11/23 2:13, Jason Gunthorpe 写道:
> > On Sat, Oct 27, 2018 at 03:15:18PM +0800, Lijun Ou wrote:
> >> +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
> >> +{
> >> +	struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
> >> +	struct hns_roce_srq *srq;
> >> +
> >> +	rcu_read_lock();
> >> +	srq = radix_tree_lookup(&srq_table->tree,
> >> +				srqn & (hr_dev->caps.num_srqs - 1));
> >> +	rcu_read_unlock();
> >> +	if (srq) {
> >> +		refcount_inc(&srq->refcount);
> > This doesn't look right to me..
> >
> > The refcount_inc should be inside the RCU and it should almost
> > certainly be refcount_inc_not_zero.
> >
> > And I don't see any calls to kfree_rcu or synchronize_rcu in this
> > driver, so I'm very skeptical this code is OK. Please go over the RCU
> > check list and make sure RCU is used properly. 
> >
> > Perhaps the spinlock should be held here instead of RCU.
> ok, I analysis the rcu. The origin idea that it could reduce the frequency with getting lock when read srq
> The spinlock may be  more proper here. because it should keep the same with
> qp event. Also, it use the spin lock in hns_roce_srq_alloc when insert the srq radix tree.
> > Also.. Use of the radix tree is being phased out with xarray as the
> > preferred API now. I don't see anything complicated about the use of
> > the radix tree API here, so could you please respin these patches to use
> > xarray? It should be straightforward, and I'd prefer not to add new
> > users while Matthew is trying to get rid of it.

> I have learning the new API with xarray and need a more
> comprehensive understanding it .  https://lwn.net/Articles/745073/ I
> also found all the places in the kernel that use xarray and it is
> used less. Maybe the xarray will be considered unified insteading of
> radix tree API in the hns driver in the following patch in future.

The usage here is extremely simple, I would like it if you could help
Matthew's conversion by just starting out with xarray.

His conversion patches are here:
http://git.infradead.org/users/willy/linux-dax.git/shortlog/refs/heads/xarray-conv

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 11a9d0c..a662a95 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -648,6 +648,12 @@  struct hns_roce_aeqe {
 		} qp_event;
 
 		struct {
+			__le32 srq;
+			u32 rsv0;
+			u32 rsv1;
+		} srq_event;
+
+		struct {
 			__le32 cq;
 			u32 rsv0;
 			u32 rsv1;
@@ -1136,6 +1142,7 @@  int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db,
 void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
 void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
 void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
 int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
 int hns_roce_init(struct hns_roce_dev *hr_dev);
 void hns_roce_exit(struct hns_roce_dev *hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 452167a..e677e8a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -4459,6 +4459,7 @@  static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 	int aeqe_found = 0;
 	int event_type;
 	int sub_type;
+	u32 srqn;
 	u32 qpn;
 	u32 cqn;
 
@@ -4481,6 +4482,9 @@  static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 		cqn = roce_get_field(aeqe->event.cq_event.cq,
 				     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
 				     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
+		srqn = roce_get_field(aeqe->event.srq_event.srq,
+				     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
+				     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
 
 		switch (event_type) {
 		case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -4488,13 +4492,14 @@  static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 		case HNS_ROCE_EVENT_TYPE_COMM_EST:
 		case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
 		case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+		case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
 		case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
 		case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
 			hns_roce_qp_event(hr_dev, qpn, event_type);
 			break;
 		case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
-		case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
 		case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+			hns_roce_srq_event(hr_dev, srqn, event_type);
 			break;
 		case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
 		case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 4f1a913..c94a719 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -36,6 +36,29 @@ 
 #include "hns_roce_cmd.h"
 #include "hns_roce_hem.h"
 
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
+{
+	struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+	struct hns_roce_srq *srq;
+
+	rcu_read_lock();
+	srq = radix_tree_lookup(&srq_table->tree,
+				srqn & (hr_dev->caps.num_srqs - 1));
+	rcu_read_unlock();
+	if (srq) {
+		refcount_inc(&srq->refcount);
+	} else {
+		dev_warn(hr_dev->dev, "Async event for bogus SRQ %08x\n", srqn);
+		return;
+	}
+
+	srq->event(srq, event_type);
+
+	if (refcount_dec_and_test(&srq->refcount))
+		complete(&srq->free);
+}
+EXPORT_SYMBOL_GPL(hns_roce_srq_event);
+
 static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
 				  enum hns_roce_event event_type)
 {