diff mbox series

[05/49] lnet: Prevent discovery on peer marked deletion

Message ID 1618459361-17909-6-git-send-email-jsimmons@infradead.org (mailing list archive)
State New, archived
Headers show
Series lustre: sync to OpenSFS as of March 30 2021 | expand

Commit Message

James Simmons April 15, 2021, 4:01 a.m. UTC
From: Chris Horn <chris.horn@hpe.com>

If a peer has been marked for deletion then we needn't perform any
other discovery operation on it. Integrate this peer state into the
top level of the discovery state machine so that it is checked before
any other state.

HPE-bug-id: LUS-9192
WC-bug-id: https://jira.whamcloud.com/browse/LU-13895
Lustre-commit: aa7de0af6969df77 ("LU-13895 lnet: Prevent discovery on peer marked deletion")
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Reviewed-on: https://review.whamcloud.com/39604
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 net/lnet/lnet/peer.c | 109 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 65 insertions(+), 44 deletions(-)
diff mbox series

Patch

diff --git a/net/lnet/lnet/peer.c b/net/lnet/lnet/peer.c
index 8ee5ec3..48f78ef 100644
--- a/net/lnet/lnet/peer.c
+++ b/net/lnet/lnet/peer.c
@@ -2934,6 +2934,68 @@  static bool lnet_is_nid_in_ping_info(lnet_nid_t nid,
 	return false;
 }
 
+/* Delete a peer that has been marked for deletion. NB: when this peer was added
+ * to the discovery queue a reference was taken that will prevent the peer from
+ * actually being freed by this function. After this function exits the
+ * discovery thread should call lnet_peer_discovery_complete() which will
+ * drop that reference as well as wake any waiters that may also be holding a
+ * ref on the peer
+ */
+static int lnet_peer_deletion(struct lnet_peer *lp)
+__must_hold(&lp->lp_lock)
+{
+	struct list_head rlist;
+	struct lnet_route *route, *tmp;
+	int sensitivity = lp->lp_health_sensitivity;
+
+	INIT_LIST_HEAD(&rlist);
+
+	lp->lp_state &= ~(LNET_PEER_DISCOVERING | LNET_PEER_FORCE_PING |
+			  LNET_PEER_FORCE_PUSH);
+	CDEBUG(D_NET, "peer %s(%p) state %#x\n",
+	       libcfs_nid2str(lp->lp_primary_nid), lp, lp->lp_state);
+
+	if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING)
+		return -ESHUTDOWN;
+
+	spin_unlock(&lp->lp_lock);
+
+	mutex_lock(&the_lnet.ln_api_mutex);
+
+	lnet_net_lock(LNET_LOCK_EX);
+	/* remove the peer from the discovery work
+	 * queue if it's on there in preparation
+	 * of deleting it.
+	 */
+	if (!list_empty(&lp->lp_dc_list))
+		list_del(&lp->lp_dc_list);
+	list_for_each_entry_safe(route, tmp,
+				 &lp->lp_routes,
+				 lr_gwlist)
+		lnet_move_route(route, NULL, &rlist);
+	lnet_net_unlock(LNET_LOCK_EX);
+
+	/* lnet_peer_del() deletes all the peer NIs owned by this peer */
+	lnet_peer_del(lp);
+
+	list_for_each_entry_safe(route, tmp,
+				 &rlist, lr_list) {
+		/* re-add these routes */
+		lnet_add_route(route->lr_net,
+			       route->lr_hops,
+			       route->lr_nid,
+			       route->lr_priority,
+			       sensitivity);
+		kfree(route);
+	}
+
+	mutex_unlock(&the_lnet.ln_api_mutex);
+
+	spin_lock(&lp->lp_lock);
+
+	return 0;
+}
+
 /*
  * Update a peer using the data received.
  */
@@ -3504,7 +3566,9 @@  static int lnet_peer_discovery(void *arg)
 			CDEBUG(D_NET, "peer %s(%p) state %#x\n",
 			       libcfs_nid2str(lp->lp_primary_nid), lp,
 			       lp->lp_state);
-			if (lp->lp_state & LNET_PEER_DATA_PRESENT)
+			if (lp->lp_state & LNET_PEER_MARK_DELETION)
+				rc = lnet_peer_deletion(lp);
+			else if (lp->lp_state & LNET_PEER_DATA_PRESENT)
 				rc = lnet_peer_data_present(lp);
 			else if (lp->lp_state & LNET_PEER_PING_FAILED)
 				rc = lnet_peer_ping_failed(lp);
@@ -3536,49 +3600,6 @@  static int lnet_peer_discovery(void *arg)
 				lnet_peer_discovery_complete(lp);
 			if (the_lnet.ln_dc_state == LNET_DC_STATE_STOPPING)
 				break;
-
-			if (lp->lp_state & LNET_PEER_MARK_DELETION) {
-				struct list_head rlist;
-				struct lnet_route *route, *tmp;
-				int sensitivity = lp->lp_health_sensitivity;
-
-				INIT_LIST_HEAD(&rlist);
-
-				/* remove the peer from the discovery work
-				 * queue if it's on there in preparation
-				 * of deleting it.
-				 */
-				if (!list_empty(&lp->lp_dc_list))
-					list_del(&lp->lp_dc_list);
-
-				lnet_net_unlock(LNET_LOCK_EX);
-
-				mutex_lock(&the_lnet.ln_api_mutex);
-
-				lnet_net_lock(LNET_LOCK_EX);
-				list_for_each_entry_safe(route, tmp,
-							 &lp->lp_routes,
-							 lr_gwlist)
-					lnet_move_route(route, NULL, &rlist);
-				lnet_net_unlock(LNET_LOCK_EX);
-
-				/* delete the peer */
-				lnet_peer_del(lp);
-
-				list_for_each_entry_safe(route, tmp,
-							 &rlist, lr_list) {
-					/* re-add these routes */
-					lnet_add_route(route->lr_net,
-						       route->lr_hops,
-						       route->lr_nid,
-						       route->lr_priority,
-						       sensitivity);
-					kfree(route);
-				}
-				mutex_unlock(&the_lnet.ln_api_mutex);
-
-				lnet_net_lock(LNET_LOCK_EX);
-			}
 		}
 
 		lnet_net_unlock(LNET_LOCK_EX);