diff mbox series

[for-next,03/13] RDMA/rtrs-clt: avoid run destroy_con_cq_qp/create_con_cq_qp in parallel

Message ID 20201012131814.121096-4-jinpu.wang@cloud.ionos.com (mailing list archive)
State Superseded
Headers show
Series rtrs: misc fix and cleanup | expand

Commit Message

Jinpu Wang Oct. 12, 2020, 1:18 p.m. UTC
For redmine #54314, too kworker.

    addr_resolver kworker           reconnect kworker
    rtrs_clt_rdma_cm_handler
    rtrs_rdma_addr_resolved
    create_con_cq_qp: s.dev_ref++
    "s.dev_ref is 1"
                                    wait in create_cm fails with TIMEOUT
                                    destroy_con_cq_qp: --s.dev_ref
                                    "s.dev_ref is 0"
                                    destroy_con_cq_qp: sess->s.dev = NULL
     rtrs_cq_qp_create -> create_qp(con, sess->dev->ib_pd...)
    sess->dev is NULL, panic.

To fix the problem using mutex to serialize create_con_cq_qp
and destroy_con_cq_qp.

redmine: https://redmine.pb.local/issues/54314

Signed-off-by: Jack Wang <jinpu.wang@cloud.ionos.com>
---
 drivers/infiniband/ulp/rtrs/rtrs-clt.c | 15 +++++++++++++--
 drivers/infiniband/ulp/rtrs/rtrs-clt.h |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index fb840b152b37..4677e8ed29ae 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -1499,6 +1499,7 @@  static int create_con(struct rtrs_clt_sess *sess, unsigned int cid)
 	con->c.cid = cid;
 	con->c.sess = &sess->s;
 	atomic_set(&con->io_cnt, 0);
+	mutex_init(&con->con_mutex);
 
 	sess->s.con[cid] = &con->c;
 
@@ -1510,6 +1511,7 @@  static void destroy_con(struct rtrs_clt_con *con)
 	struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess);
 
 	sess->s.con[con->c.cid] = NULL;
+	mutex_destroy(&con->con_mutex);
 	kfree(con);
 }
 
@@ -1520,6 +1522,7 @@  static int create_con_cq_qp(struct rtrs_clt_con *con)
 	int err, cq_vector;
 	struct rtrs_msg_rkey_rsp *rsp;
 
+	lockdep_assert_held(&con->con_mutex);
 	if (con->c.cid == 0) {
 		/*
 		 * One completion for each receive and two for each send
@@ -1593,7 +1596,7 @@  static void destroy_con_cq_qp(struct rtrs_clt_con *con)
 	 * Be careful here: destroy_con_cq_qp() can be called even
 	 * create_con_cq_qp() failed, see comments there.
 	 */
-
+	lockdep_assert_held(&con->con_mutex);
 	rtrs_cq_qp_destroy(&con->c);
 	if (con->rsp_ius) {
 		rtrs_iu_free(con->rsp_ius, DMA_FROM_DEVICE,
@@ -1625,7 +1628,9 @@  static int rtrs_rdma_addr_resolved(struct rtrs_clt_con *con)
 	struct rtrs_sess *s = con->c.sess;
 	int err;
 
+	mutex_lock(&con->con_mutex);
 	err = create_con_cq_qp(con);
+	mutex_unlock(&con->con_mutex);
 	if (err) {
 		rtrs_err(s, "create_con_cq_qp(), err: %d\n", err);
 		return err;
@@ -1938,8 +1943,9 @@  static int create_cm(struct rtrs_clt_con *con)
 
 errr:
 	stop_cm(con);
-	/* Is safe to call destroy if cq_qp is not inited */
+	mutex_lock(&con->con_mutex);
 	destroy_con_cq_qp(con);
+	mutex_unlock(&con->con_mutex);
 destroy_cm:
 	destroy_cm(con);
 
@@ -2046,7 +2052,9 @@  static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_sess *sess)
 		if (!sess->s.con[cid])
 			break;
 		con = to_clt_con(sess->s.con[cid]);
+		mutex_lock(&con->con_mutex);
 		destroy_con_cq_qp(con);
+		mutex_unlock(&con->con_mutex);
 		destroy_cm(con);
 		destroy_con(con);
 	}
@@ -2213,7 +2221,10 @@  static int init_conns(struct rtrs_clt_sess *sess)
 		struct rtrs_clt_con *con = to_clt_con(sess->s.con[cid]);
 
 		stop_cm(con);
+
+		mutex_lock(&con->con_mutex);
 		destroy_con_cq_qp(con);
+		mutex_unlock(&con->con_mutex);
 		destroy_cm(con);
 		destroy_con(con);
 	}
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
index 167acd3c90fc..b8dbd701b3cb 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h
@@ -72,6 +72,7 @@  struct rtrs_clt_con {
 	struct rtrs_iu		*rsp_ius;
 	u32			queue_size;
 	unsigned int		cpu;
+	struct mutex		con_mutex;
 	atomic_t		io_cnt;
 	int			cm_err;
 };