@@ -987,6 +987,9 @@ struct lnet_peer {
* nets/NIs
*/
u32 lp_send_seq;
+
+ /* timestamp (ns) of primary nid lock */
+ u64 lp_prim_lock_ts;
};
/*
@@ -156,6 +156,7 @@ struct libcfs_ioctl_data {
#define IOC_LIBCFS_GET_CONST_UDSP_INFO _IOWR(IOC_LIBCFS_TYPE, 109, IOCTL_CONFIG_SIZE)
#define IOC_LIBCFS_RESET_LNET_STATS _IOWR(IOC_LIBCFS_TYPE, 110, IOCTL_CONFIG_SIZE)
#define IOC_LIBCFS_SET_CONNS_PER_PEER _IOWR(IOC_LIBCFS_TYPE, 111, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_MAX_NR 111
+#define IOC_LIBCFS_SET_PEER _IOWR(IOC_LIBCFS_TYPE, 112, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_MAX_NR 112
#endif /* __LIBCFS_IOCTL_H__ */
@@ -4384,6 +4384,30 @@ LNetCtl(unsigned int cmd, void *arg)
return 0;
}
+ case IOC_LIBCFS_SET_PEER: {
+ struct lnet_ioctl_peer_cfg *cfg = arg;
+ struct lnet_peer *lp;
+
+ if (cfg->prcfg_hdr.ioc_len < sizeof(*cfg))
+ return -EINVAL;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ lnet_nid4_to_nid(cfg->prcfg_prim_nid, &nid);
+ lp = lnet_find_peer(&nid);
+ if (!lp) {
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ return -ENOENT;
+ }
+ spin_lock(&lp->lp_lock);
+ lp->lp_state = cfg->prcfg_state;
+ spin_unlock(&lp->lp_lock);
+ lnet_peer_decref_locked(lp);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+ CDEBUG(D_NET, "Set peer %s state to %u\n",
+ libcfs_nid2str(cfg->prcfg_prim_nid), cfg->prcfg_state);
+ return 0;
+ }
+
case IOC_LIBCFS_SET_CONNS_PER_PEER: {
struct lnet_ioctl_reset_conns_per_peer_cfg *cfg = arg;
int value;
@@ -1429,8 +1429,10 @@ void LNetPrimaryNID(struct lnet_nid *nid)
*/
again:
spin_lock(&lp->lp_lock);
- if (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) && lock_prim_nid)
+ if (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) && lock_prim_nid) {
lp->lp_state |= LNET_PEER_LOCK_PRIMARY;
+ lp->lp_prim_lock_ts = ktime_get_ns();
+ }
/* DD disabled, nothing to do */
if (lnet_peer_discovery_disabled) {
@@ -1577,8 +1579,10 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp,
lnet_peer_clr_non_mr_pref_nids(lp);
}
}
- if (flags & LNET_PEER_LOCK_PRIMARY)
+ if (flags & LNET_PEER_LOCK_PRIMARY) {
lp->lp_state |= LNET_PEER_LOCK_PRIMARY;
+ lp->lp_prim_lock_ts = ktime_get_ns();
+ }
spin_unlock(&lp->lp_lock);
lp->lp_nnis++;
@@ -1762,24 +1766,53 @@ lnet_peer_add_nid(struct lnet_peer *lp, struct lnet_nid *nid,
struct lnet_peer *lp2 =
lpni->lpni_peer_net->lpn_peer;
int rtr_refcount = lp2->lp_rtr_refcount;
-
- /* If the new peer that this NID belongs to is
- * a primary NID for another peer which we're
- * suppose to preserve the Primary for then we
- * don't want to mess with it. But the
- * configuration is wrong at this point, so we
- * should flag both of these peers as in a bad
+ unsigned int peer2_state;
+ u64 peer2_prim_lock_ts;
+
+ /* If there's another peer that this NID belongs to
+ * and the primary NID for that peer is locked,
+ * then, unless it is the only NID, we don't want
+ * to mess with it.
+ * But the configuration is wrong at this point,
+ * so we should flag both of these peers as in a bad
* state
*/
- if (lp2->lp_state & LNET_PEER_LOCK_PRIMARY) {
+ spin_lock(&lp2->lp_lock);
+ if (lp2->lp_state & LNET_PEER_LOCK_PRIMARY &&
+ lp2->lp_nnis > 1) {
+ lp2->lp_state |= LNET_PEER_BAD_CONFIG;
+ spin_unlock(&lp2->lp_lock);
spin_lock(&lp->lp_lock);
lp->lp_state |= LNET_PEER_BAD_CONFIG;
spin_unlock(&lp->lp_lock);
- spin_lock(&lp2->lp_lock);
- lp2->lp_state |= LNET_PEER_BAD_CONFIG;
- spin_unlock(&lp2->lp_lock);
+ CERROR("Peer %s NID %s is already locked with peer %s\n",
+ libcfs_nidstr(&lp->lp_primary_nid),
+ libcfs_nidstr(nid),
+ libcfs_nidstr(&lp2->lp_primary_nid));
goto out_free_lpni;
}
+ peer2_state = lp2->lp_state;
+ peer2_prim_lock_ts = lp2->lp_prim_lock_ts;
+ spin_unlock(&lp2->lp_lock);
+
+ /* NID which got locked the earliest should be
+ * kept as primary. In case if the peers were
+ * created by Lustre, this allows the
+ * first listed NID to stay primary as intended
+ * for the purpose of communicating with Lustre
+ * even if peer discovery succeeded using
+ * a different NID of MR peer.
+ */
+ spin_lock(&lp->lp_lock);
+ if (peer2_state & LNET_PEER_LOCK_PRIMARY &&
+ ((lp->lp_state & LNET_PEER_LOCK_PRIMARY &&
+ peer2_prim_lock_ts < lp->lp_prim_lock_ts) ||
+ !(lp->lp_state & LNET_PEER_LOCK_PRIMARY))) {
+ lp->lp_prim_lock_ts = peer2_prim_lock_ts;
+ lp->lp_primary_nid = *nid;
+ lp->lp_state |= LNET_PEER_LOCK_PRIMARY;
+ }
+ spin_unlock(&lp->lp_lock);
/* if we're trying to delete a router it means
* we're moving this peer NI to a new peer so must
* transfer router properties to the new peer