From patchwork Sun Oct 7 23:19:37 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: NeilBrown X-Patchwork-Id: 10629813 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id CDD21112B for ; Sun, 7 Oct 2018 23:31:05 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id BEE2828CBF for ; Sun, 7 Oct 2018 23:31:05 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id B2CBA28CC8; Sun, 7 Oct 2018 23:31:05 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-2.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_NONE autolearn=ham version=3.3.1 Received: from pdx1-mailman02.dreamhost.com (pdx1-mailman02.dreamhost.com [64.90.62.194]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 290EA28CBF for ; Sun, 7 Oct 2018 23:31:05 +0000 (UTC) Received: from pdx1-mailman02.dreamhost.com (localhost [IPv6:::1]) by pdx1-mailman02.dreamhost.com (Postfix) with ESMTP id E79A38617B0; Sun, 7 Oct 2018 16:31:04 -0700 (PDT) X-Original-To: lustre-devel@lists.lustre.org Delivered-To: lustre-devel-lustre.org@pdx1-mailman02.dreamhost.com Received: from mx1.suse.de (mx2.suse.de [195.135.220.15]) by pdx1-mailman02.dreamhost.com (Postfix) with ESMTP id 2949621F5E1 for ; Sun, 7 Oct 2018 16:31:03 -0700 (PDT) X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay1.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 1BEDFAE87; Sun, 7 Oct 2018 23:31:02 +0000 (UTC) From: NeilBrown To: Oleg Drokin , Doug Oucharek , James Simmons , Andreas Dilger Date: Mon, 08 Oct 2018 10:19:37 +1100 Message-ID: <153895437796.16383.5518559009775786439.stgit@noble> In-Reply-To: <153895417139.16383.3791701638653772865.stgit@noble> References: <153895417139.16383.3791701638653772865.stgit@noble> User-Agent: StGit/0.17.1-dirty MIME-Version: 1.0 Subject: [lustre-devel] [PATCH 11/24] lustre: lnet: introduce LNET_PEER_MULTI_RAIL flag bit X-BeenThere: lustre-devel@lists.lustre.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: "For discussing Lustre software development." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Amir Shehata , Olaf Weber , Lustre Development List Errors-To: lustre-devel-bounces@lists.lustre.org Sender: "lustre-devel" X-Virus-Scanned: ClamAV using ClamSMTP From: Olaf Weber Add lp_state as a flag word to lnet_peer, and add lp_lock to protect it. This lock needs to be taken whenever the field is updated, because setting or clearing a bit is a read-modify-write cycle. The lp_multi_rail is removed, its function is replaced by the new LNET_PEER_MULTI_RAIL flag bit. The helper lnet_peer_is_multi_rail() tests the bit. WC-bug-id: https://jira.whamcloud.com/browse/LU-9480 Signed-off-by: Olaf Weber Reviewed-on: https://review.whamcloud.com/25781 Reviewed-by: Olaf Weber Reviewed-by: Amir Shehata Tested-by: Amir Shehata Signed-off-by: NeilBrown Reviewed-by: James Simmons --- .../staging/lustre/include/linux/lnet/lib-lnet.h | 6 +++++ .../staging/lustre/include/linux/lnet/lib-types.h | 11 ++++++++-- drivers/staging/lustre/lnet/lnet/lib-move.c | 9 +++++--- drivers/staging/lustre/lnet/lnet/peer.c | 22 +++++++++++++------- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h index fc748ffa251d..75b47628c70e 100644 --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h @@ -757,4 +757,10 @@ lnet_peer_set_alive(struct lnet_peer_ni *lp) lnet_notify_locked(lp, 0, 1, lp->lpni_last_alive); } +static inline bool +lnet_peer_is_multi_rail(struct lnet_peer *lp) +{ + return lp->lp_state & LNET_PEER_MULTI_RAIL; +} + #endif diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h index f28fa5342914..602978a1c86e 100644 --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h @@ -467,6 +467,8 @@ struct lnet_peer_ni { atomic_t lpni_refcount; /* CPT this peer attached on */ int lpni_cpt; + /* state flags -- protected by lpni_lock */ + unsigned int lpni_state; /* # refs from lnet_route::lr_gateway */ int lpni_rtr_refcount; /* sequence number used to round robin over peer nis within a net */ @@ -497,10 +499,15 @@ struct lnet_peer { /* primary NID of the peer */ lnet_nid_t lp_primary_nid; - /* peer is Multi-Rail enabled peer */ - bool lp_multi_rail; + /* lock protecting peer state flags */ + spinlock_t lp_lock; + + /* peer state flags */ + unsigned int lp_state; }; +#define LNET_PEER_MULTI_RAIL BIT(0) + struct lnet_peer_net { /* chain on peer block */ struct list_head lpn_on_peer_list; diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c index 59ae8d0649e5..0d0ad30bb164 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c @@ -1281,7 +1281,8 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid, return -EHOSTUNREACH; } - if (!peer->lp_multi_rail && lnet_get_num_peer_nis(peer) > 1) { + if (!lnet_peer_is_multi_rail(peer) && + lnet_get_num_peer_nis(peer) > 1) { lnet_net_unlock(cpt); CERROR("peer %s is declared to be non MR capable, yet configured with more than one NID\n", libcfs_nid2str(dst_nid)); @@ -1307,7 +1308,7 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid, if (msg->msg_type == LNET_MSG_REPLY || msg->msg_type == LNET_MSG_ACK || - !peer->lp_multi_rail || + !lnet_peer_is_multi_rail(peer) || best_ni) { /* * for replies we want to respond on the same peer_ni we @@ -1354,7 +1355,7 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid, * then use the best_gw found to send * the message to */ - if (!peer->lp_multi_rail) + if (!lnet_peer_is_multi_rail(peer)) best_lpni = best_gw; else best_lpni = NULL; @@ -1375,7 +1376,7 @@ lnet_select_pathway(lnet_nid_t src_nid, lnet_nid_t dst_nid, * if the peer is not MR capable, then we should always send to it * using the first NI in the NET we determined. */ - if (!peer->lp_multi_rail) { + if (!lnet_peer_is_multi_rail(peer)) { if (!best_lpni) { lnet_net_unlock(cpt); CERROR("no route to %s\n", diff --git a/drivers/staging/lustre/lnet/lnet/peer.c b/drivers/staging/lustre/lnet/lnet/peer.c index 6b7ca5c361b8..cc2b926b76e4 100644 --- a/drivers/staging/lustre/lnet/lnet/peer.c +++ b/drivers/staging/lustre/lnet/lnet/peer.c @@ -182,6 +182,7 @@ lnet_peer_alloc(lnet_nid_t nid) INIT_LIST_HEAD(&lp->lp_on_lnet_peer_list); INIT_LIST_HEAD(&lp->lp_peer_nets); + spin_lock_init(&lp->lp_lock); lp->lp_primary_nid = nid; /* TODO: update flags */ @@ -798,13 +799,15 @@ lnet_peer_add(lnet_nid_t nid, bool mr) * * TODO: update flags if necessary */ - if (mr && !lp->lp_multi_rail) { - lp->lp_multi_rail = true; - } else if (!mr && lp->lp_multi_rail) { + spin_lock(&lp->lp_lock); + if (mr && !(lp->lp_state & LNET_PEER_MULTI_RAIL)) { + lp->lp_state |= LNET_PEER_MULTI_RAIL; + } else if (!mr && (lp->lp_state & LNET_PEER_MULTI_RAIL)) { /* The mr state is sticky. */ - CDEBUG(D_NET, "Cannot clear multi-flag from peer %s\n", + CDEBUG(D_NET, "Cannot clear multi-rail flag from peer %s\n", libcfs_nid2str(nid)); } + spin_unlock(&lp->lp_lock); return 0; } @@ -817,15 +820,18 @@ lnet_peer_add_nid(struct lnet_peer *lp, lnet_nid_t nid, bool mr) LASSERT(lp); LASSERT(nid != LNET_NID_ANY); - if (!mr && !lp->lp_multi_rail) { + spin_lock(&lp->lp_lock); + if (!mr && !(lp->lp_state & LNET_PEER_MULTI_RAIL)) { + spin_unlock(&lp->lp_lock); CERROR("Cannot add nid %s to non-multi-rail peer %s\n", libcfs_nid2str(nid), libcfs_nid2str(lp->lp_primary_nid)); return -EPERM; } - if (!lp->lp_multi_rail) - lp->lp_multi_rail = true; + if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) + lp->lp_state |= LNET_PEER_MULTI_RAIL; + spin_unlock(&lp->lp_lock); lpni = lnet_find_peer_ni_locked(nid); if (!lpni) @@ -1183,7 +1189,7 @@ int lnet_get_peer_info(__u32 idx, lnet_nid_t *primary_nid, lnet_nid_t *nid, return -ENOENT; *primary_nid = lp->lp_primary_nid; - *mr = lp->lp_multi_rail; + *mr = lnet_peer_is_multi_rail(lp); *nid = lpni->lpni_nid; snprintf(ni_info.cr_aliveness, LNET_MAX_STR_LEN, "NA"); if (lnet_isrouter(lpni) ||