diff mbox series

[11/27] lnet: add 'lock_prim_nid" lnet module parameter

Message ID 1681739243-29375-12-git-send-email-jsimmons@infradead.org (mailing list archive)
State New, archived
Headers show
Series lustre: sync to OpenSFS branch April 17, 2023 | expand

Commit Message

James Simmons April 17, 2023, 1:47 p.m. UTC
From: Serguei Smirnov <ssmirnov@whamcloud.com>

Add 'lock_prim_nid' lnet module parameter to allow control
of how Lustre peer primary NID is selected.
If set to 1 (default), the NID specified by Lustre when
calling LNet API is designated as primary for the peer,
allowing for non-blocking discovery in the background.
If set to 0, peer discovery is blocking until complete
and the NID listed first in discovery response is designated
as primary.

WC-bug-id: https://jira.whamcloud.com/browse/LU-14668
Lustre-commit: fc7a0d6013b46ebc1 ("LU-14668 lnet: add 'lock_prim_nid" lnet module parameter")
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50159
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 include/linux/lnet/lib-lnet.h |   1 +
 net/lnet/lnet/api-ni.c        |   5 ++
 net/lnet/lnet/peer.c          | 105 +++++++++++++++++++++++++++---------------
 3 files changed, 73 insertions(+), 38 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/lnet/lib-lnet.h b/include/linux/lnet/lib-lnet.h
index f9f4815..4aa1e5c 100644
--- a/include/linux/lnet/lib-lnet.h
+++ b/include/linux/lnet/lib-lnet.h
@@ -565,6 +565,7 @@  unsigned int lnet_nid_cpt_hash(struct lnet_nid *nid,
 extern int live_router_check_interval;
 extern int dead_router_check_interval;
 extern int portal_rotor;
+extern int lock_prim_nid;
 
 int lnet_lib_init(void);
 void lnet_lib_exit(void);
diff --git a/net/lnet/lnet/api-ni.c b/net/lnet/lnet/api-ni.c
index 9f01dbe..fb596ed 100644
--- a/net/lnet/lnet/api-ni.c
+++ b/net/lnet/lnet/api-ni.c
@@ -208,6 +208,11 @@  static int response_tracking_set(const char *val,
 MODULE_PARM_DESC(lnet_response_tracking,
 		 "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)");
 
+int lock_prim_nid = 1;
+module_param(lock_prim_nid, int, 0444);
+MODULE_PARM_DESC(lock_prim_nid,
+		 "Whether nid passed down by Lustre is locked as primary");
+
 #define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \
 				  (LNET_RETRY_COUNT_DEFAULT + 1))
 unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT;
diff --git a/net/lnet/lnet/peer.c b/net/lnet/lnet/peer.c
index ef924ce..f1b0eb0d 100644
--- a/net/lnet/lnet/peer.c
+++ b/net/lnet/lnet/peer.c
@@ -1346,6 +1346,7 @@  struct lnet_peer_ni *
 	struct lnet_nid pnid = LNET_ANY_NID;
 	bool mr;
 	int i, rc;
+	int flags = lock_prim_nid ? LNET_PEER_LOCK_PRIMARY : 0;
 
 	if (!nids || num_nids < 1)
 		return -EINVAL;
@@ -1368,8 +1369,7 @@  struct lnet_peer_ni *
 		lnet_nid4_to_nid(nids[i], &nid);
 		if (LNET_NID_IS_ANY(&pnid)) {
 			lnet_nid4_to_nid(nids[i], &pnid);
-			rc = lnet_add_peer_ni(&pnid, &LNET_ANY_NID, mr,
-					      LNET_PEER_LOCK_PRIMARY);
+			rc = lnet_add_peer_ni(&pnid, &LNET_ANY_NID, mr, flags);
 			if (rc == -EALREADY) {
 				struct lnet_peer *lp;
 
@@ -1385,12 +1385,10 @@  struct lnet_peer_ni *
 			}
 		} else if (lnet_peer_discovery_disabled) {
 			lnet_nid4_to_nid(nids[i], &nid);
-			rc = lnet_add_peer_ni(&nid, &LNET_ANY_NID, mr,
-					      LNET_PEER_LOCK_PRIMARY);
+			rc = lnet_add_peer_ni(&nid, &LNET_ANY_NID, mr, flags);
 		} else {
 			lnet_nid4_to_nid(nids[i], &nid);
-			rc = lnet_add_peer_ni(&pnid, &nid, mr,
-					      LNET_PEER_LOCK_PRIMARY);
+			rc = lnet_add_peer_ni(&pnid, &nid, mr, flags);
 		}
 
 		if (rc && rc != -EEXIST)
@@ -1432,36 +1430,53 @@  void LNetPrimaryNID(struct lnet_nid *nid)
 	 * down then this discovery can introduce long delays into the mount
 	 * process, so skip it if it isn't necessary.
 	 */
+again:
 	spin_lock(&lp->lp_lock);
-	if (!lnet_peer_discovery_disabled &&
-	    (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) ||
-	     !lnet_peer_is_uptodate_locked(lp))) {
-		/* force a full discovery cycle */
-		lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH |
-				LNET_PEER_LOCK_PRIMARY;
+	if (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) && lock_prim_nid)
+		lp->lp_state |= LNET_PEER_LOCK_PRIMARY;
+
+	/* DD disabled, nothing to do */
+	if (lnet_peer_discovery_disabled) {
+		*nid = lp->lp_primary_nid;
 		spin_unlock(&lp->lp_lock);
+		goto out_decref;
+	}
 
-		/* start discovery in the background. Messages to that
-		 * peer will not go through until the discovery is
-		 * complete
-		 */
-		rc = lnet_discover_peer_locked(lpni, cpt, false);
-		if (rc)
-			goto out_decref;
-		/* The lpni (or lp) for this NID may have changed and our ref is
-		 * the only thing keeping the old one around. Release the ref
-		 * and lookup the lpni again
-		 */
-		lnet_peer_ni_decref_locked(lpni);
-		lpni = lnet_peer_ni_find_locked(nid);
-		if (!lpni) {
-			rc = -ENOENT;
-			goto out_unlock;
-		}
-		lp = lpni->lpni_peer_net->lpn_peer;
-	} else {
+	/* Peer already up to date, nothing to do */
+	if (lnet_peer_is_uptodate_locked(lp)) {
+		*nid = lp->lp_primary_nid;
 		spin_unlock(&lp->lp_lock);
+		goto out_decref;
 	}
+	spin_unlock(&lp->lp_lock);
+
+	/* If primary nid locking is enabled, discovery is performed
+	 * in the background.
+	 * If primary nid locking is disabled, discovery blocks here.
+	 * Messages to the peer will not go through until the discovery is
+	 * complete.
+	 */
+	if (lock_prim_nid)
+		rc = lnet_discover_peer_locked(lpni, cpt, false);
+	else
+		rc = lnet_discover_peer_locked(lpni, cpt, true);
+	if (rc)
+		goto out_decref;
+
+	/* The lpni (or lp) for this NID may have changed and our ref is
+	 * the only thing keeping the old one around. Release the ref
+	 * and lookup the lpni again
+	 */
+	lnet_peer_ni_decref_locked(lpni);
+	lpni = lnet_peer_ni_find_locked(nid);
+	if (!lpni) {
+		rc = -ENOENT;
+		goto out_unlock;
+	}
+	lp = lpni->lpni_peer_net->lpn_peer;
+
+	if (!lock_prim_nid && !lnet_is_discovery_disabled(lp))
+		goto again;
 	*nid = lp->lp_primary_nid;
 out_decref:
 	lnet_peer_ni_decref_locked(lpni);
@@ -1553,7 +1568,6 @@  struct lnet_peer_net *
 		ptable->pt_peers++;
 	}
 
-
 	/* Update peer state */
 	spin_lock(&lp->lp_lock);
 	if (flags & LNET_PEER_CONFIGURED) {
@@ -1630,10 +1644,8 @@  struct lnet_peer_net *
 				rc = -EPERM;
 			goto out;
 		} else if (lp->lp_state & LNET_PEER_LOCK_PRIMARY) {
-			if (nid_same(&lp->lp_primary_nid, nid)) {
+			if (nid_same(&lp->lp_primary_nid, nid))
 				rc = -EEXIST;
-				goto out;
-			}
 			/* we're trying to recreate an existing peer which
 			 * has already been created and its primary
 			 * locked. This is likely due to two servers
@@ -1641,8 +1653,18 @@  struct lnet_peer_net *
 			 * to that node with the primary NID which was
 			 * first added by Lustre
 			 */
-			rc = -EALREADY;
+			else
+				rc = -EALREADY;
 			goto out;
+		} else if (!(flags & (LNET_PEER_LOCK_PRIMARY | LNET_PEER_CONFIGURED))) {
+			/* if not recreating peer as configured and
+			 * not locking primary nid, no need to
+			 * do anything if primary nid is not being changed
+			 */
+			if (nid_same(&lp->lp_primary_nid, nid)) {
+				rc = -EEXIST;
+				goto out;
+			}
 		}
 		/* Delete and recreate the peer.
 		 * We can get here:
@@ -1952,6 +1974,14 @@  struct lnet_peer_net *
 	lnet_peer_ni_decref_locked(lpni);
 	lp = lpni->lpni_peer_net->lpn_peer;
 
+	/* Peer must have been configured. */
+	if ((flags & LNET_PEER_CONFIGURED) &&
+	    !(lp->lp_state & LNET_PEER_CONFIGURED)) {
+		CDEBUG(D_NET, "peer %s was not configured\n",
+		       libcfs_nidstr(prim_nid));
+		return -ENOENT;
+	}
+
 	/* Primary NID must match */
 	if (!nid_same(&lp->lp_primary_nid, prim_nid)) {
 		CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n",
@@ -1967,8 +1997,7 @@  struct lnet_peer_net *
 		return -EPERM;
 	}
 
-	if ((flags & LNET_PEER_LOCK_PRIMARY) &&
-	    (lnet_peer_is_uptodate(lp) && (lp->lp_state & LNET_PEER_LOCK_PRIMARY))) {
+	if (lnet_peer_is_uptodate(lp) && !(flags & LNET_PEER_CONFIGURED)) {
 		CDEBUG(D_NET,
 		       "Don't add temporary peer NI for uptodate peer %s\n",
 		       libcfs_nidstr(&lp->lp_primary_nid));