diff mbox

[PATCHv3,rdma-core,1/2] srp_daemon: handle SM lid change

Message ID 6ea765f9-e770-74a3-bbe7-19b7ebc76ebe@suse.com (mailing list archive)
State Accepted
Delegated to: Leon Romanovsky
Headers show

Commit Message

Nicolas Morey-Chaisemartin Dec. 15, 2017, 4:36 p.m. UTC
When srp_daemon was running and the master SM host changes,
 srp_daemon output these errors at every scan:
srp_daemon[25394]: No response to inform info registration
srp_daemon[25394]: Fail to register to traps, maybe there is no opensm
 running on fabric or IB port is down

This was introduced by commit 4952e5f Fix a memory leak.
A side effect of this patch was that create_ah was only called when the
 port lid changes. Which meant register_to_traps used an older, obsolete,
 version of sm_lid and failed to connect to it.

This patch fixes this behaviour by checking for both local lid changes and
 SM lid changes, and calling create_ah on any of these events.

Fixes: 4952e5f7df0c (Fix a memory leak)
Signed-off-by: Nicolas Morey-Chaisemartin <NMoreyChaisemartin@suse.com>
Cc: stable@linux-rdma.org # v14, v15, v16
---

Since v2, expand abbrev sha1 of Fixes:... to 12B

 srp_daemon/srp_daemon.c       | 10 ++++++----
 srp_daemon/srp_daemon.h       |  2 +-
 srp_daemon/srp_handle_traps.c | 14 +++++++++++---
 3 files changed, 18 insertions(+), 8 deletions(-)

Comments

Bart Van Assche Dec. 15, 2017, 4:53 p.m. UTC | #1
On Fri, 2017-12-15 at 17:36 +0100, Nicolas Morey-Chaisemartin wrote:
> Since v2, expand abbrev sha1 of Fixes:... to 12B


Hello Nicolas,

Please keep Reviewed-by tags when making small changes like this.

BTW, have you already uploaded these patches to github and have you already
sent a pull request to https://github.com/linux-rdma/rdma-core? That is how
rdma-core patches usually end up upstream. You will see that after you have
created a pull request that several validation checks (Travis CI) are started
automatically.

Bart.
diff mbox

Patch

diff --git a/srp_daemon/srp_daemon.c b/srp_daemon/srp_daemon.c
index cec36db2e0f1..38501886110a 100644
--- a/srp_daemon/srp_daemon.c
+++ b/srp_daemon/srp_daemon.c
@@ -1103,7 +1103,7 @@  static int get_shared_pkeys(struct resources *res,
 	int i, num_pkeys = 0;
 	uint16_t pkey;
 	uint16_t local_port_lid = get_port_lid(res->ud_res->ib_ctx,
-					       config->port_num);
+					       config->port_num, NULL);
 
 	in_mad_buf = malloc(sizeof(struct ib_user_mad) +
 			    node_table_response_size);
@@ -2092,7 +2092,7 @@  int main(int argc, char *argv[])
 {
 	int			ret;
 	struct resources       *res;
-	uint16_t 		lid;
+	uint16_t 		lid, sm_lid;
 	uint16_t 		pkey;
 	union umad_gid 		gid;
 	struct target_details  *target;
@@ -2196,8 +2196,10 @@  catas_start:
 
 			pr_debug("Starting a recalculation\n");
 			port_lid = get_port_lid(res->ud_res->ib_ctx,
-					   config->port_num);
-			if (port_lid != res->ud_res->port_attr.lid) {
+						config->port_num, &sm_lid);
+			if (port_lid != res->ud_res->port_attr.lid ||
+				sm_lid != res->ud_res->port_attr.sm_lid) {
+
 				if (res->ud_res->ah) {
 					ibv_destroy_ah(res->ud_res->ah);
 					res->ud_res->ah = NULL;
diff --git a/srp_daemon/srp_daemon.h b/srp_daemon/srp_daemon.h
index 5d268ed395e1..864b3d42fb46 100644
--- a/srp_daemon/srp_daemon.h
+++ b/srp_daemon/srp_daemon.h
@@ -299,7 +299,7 @@  void *run_thread_listen_to_events(void *res_in);
 int get_node(struct umad_resources *umad_res, uint16_t dlid, uint64_t *guid);
 int create_trap_resources(struct ud_resources *ud_res);
 int register_to_traps(struct resources *res, int subscribe);
-uint16_t get_port_lid(struct ibv_context *ib_ctx, int port_num);
+uint16_t get_port_lid(struct ibv_context *ib_ctx, int port_num, uint16_t *sm_lid);
 int create_ah(struct ud_resources *ud_res);
 void push_gid_to_list(struct sync_resources *res, union umad_gid *gid,
 		      uint16_t pkey);
diff --git a/srp_daemon/srp_handle_traps.c b/srp_daemon/srp_handle_traps.c
index 6b36b15cc84c..8c428756a379 100644
--- a/srp_daemon/srp_handle_traps.c
+++ b/srp_daemon/srp_handle_traps.c
@@ -340,12 +340,20 @@  int ud_resources_create(struct ud_resources *res)
 	return 0;
 }
 
-uint16_t get_port_lid(struct ibv_context *ib_ctx, int port_num)
+uint16_t get_port_lid(struct ibv_context *ib_ctx, int port_num, uint16_t *sm_lid)
 {
 	struct ibv_port_attr port_attr;
+	int ret;
+
+	ret = ibv_query_port(ib_ctx, port_num, &port_attr);
 
-	return ibv_query_port(ib_ctx, port_num, &port_attr) == 0 ?
-		port_attr.lid : 0;
+	if (!ret) {
+		if (sm_lid)
+			*sm_lid = port_attr.sm_lid;
+		return port_attr.lid;
+	}
+
+	return 0;
 }
 
 int create_ah(struct ud_resources *ud_res)