Message ID | 153628137147.8267.3706504130592682241.stgit@noble (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Beginning of multi-rail support for drivers/staging/lustre | expand |
Reviewed-by: Doug Oucharek <dougso@me.com<mailto:dougso@me.com>> Doug On Sep 6, 2018, at 5:49 PM, NeilBrown <neilb@suse.com<mailto:neilb@suse.com>> wrote: Currently we store the net-interface in the peer, but the peer should identify just the network, not the particular interface. To help track which actual interface is used for each message, store them explicitly. This is part of 8cbb8cd3e771e7f7e0f99cafc19fad32770dc015 LU-7734 lnet: Multi-Rail local NI split and includes commit 63c3e5129873 ("LU-7734 lnet: Fix lnet_msg_free()") Signed-off-by: NeilBrown <neilb@suse.com<mailto:neilb@suse.com>> --- .../staging/lustre/include/linux/lnet/lib-types.h | 3 +++ drivers/staging/lustre/lnet/lnet/lib-move.c | 21 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h index 5f0d4703bf86..16a493529a46 100644 --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h @@ -98,6 +98,9 @@ struct lnet_msg { void *msg_private; struct lnet_libmd *msg_md; + /* the NI the message was sent or received over */ + struct lnet_ni *msg_txni; + struct lnet_ni *msg_rxni; unsigned int msg_len; unsigned int msg_wanted; diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c index 1c874025fa74..b2a52ddcefcb 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c @@ -782,6 +782,7 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg) { struct lnet_peer *txpeer = msg->msg_txpeer; struct lnet_msg *msg2; + struct lnet_ni *txni = msg->msg_txni; if (msg->msg_txcredit) { struct lnet_ni *ni = txpeer->lp_ni; @@ -829,6 +830,11 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg) } } + if (txni != NULL) { + msg->msg_txni = NULL; + lnet_ni_decref_locked(txni, msg->msg_tx_cpt); + } + if (txpeer) { msg->msg_txpeer = NULL; lnet_peer_decref_locked(txpeer); @@ -876,6 +882,7 @@ void lnet_return_rx_credits_locked(struct lnet_msg *msg) { struct lnet_peer *rxpeer = msg->msg_rxpeer; + struct lnet_ni *rxni = msg->msg_rxni; struct lnet_msg *msg2; if (msg->msg_rtrcredit) { @@ -951,6 +958,10 @@ lnet_return_rx_credits_locked(struct lnet_msg *msg) (void)lnet_post_routed_recv_locked(msg2, 1); } } + if (rxni != NULL) { + msg->msg_rxni = NULL; + lnet_ni_decref_locked(rxni, msg->msg_rx_cpt); + } if (rxpeer) { msg->msg_rxpeer = NULL; lnet_peer_decref_locked(rxpeer); @@ -1218,9 +1229,12 @@ lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid) LASSERT(!msg->msg_peertxcredit); LASSERT(!msg->msg_txcredit); - LASSERT(!msg->msg_txpeer); + LASSERT(msg->msg_txpeer == NULL); - msg->msg_txpeer = lp; /* msg takes my ref on lp */ + msg->msg_txpeer = lp; /* msg takes my ref on lp */ + /* set the NI for this message */ + msg->msg_txni = src_ni; + lnet_ni_addref_locked(msg->msg_txni, cpt); rc = lnet_post_send_locked(msg, 0); lnet_net_unlock(cpt); @@ -1818,6 +1832,8 @@ lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid, return 0; goto drop; } + msg->msg_rxni = ni; + lnet_ni_addref_locked(ni, cpt); if (lnet_isrouter(msg->msg_rxpeer)) { lnet_peer_set_alive(msg->msg_rxpeer); @@ -1934,6 +1950,7 @@ lnet_recv_delayed_msg_list(struct list_head *head) LASSERT(msg->msg_rx_delayed); LASSERT(msg->msg_md); LASSERT(msg->msg_rxpeer); + LASSERT(msg->msg_rxni); LASSERT(msg->msg_hdr.type == LNET_MSG_PUT); CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n", <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=us-ascii"> </head> <body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;" class=""> Reviewed-by: Doug Oucharek <<a href="mailto:dougso@me.com" class="">dougso@me.com</a>> <div class=""><br class=""> </div> <div class="">Doug</div> <div class=""><br class=""> <div style=""> <blockquote type="cite" class=""> <div class="">On Sep 6, 2018, at 5:49 PM, NeilBrown <<a href="mailto:neilb@suse.com" class="">neilb@suse.com</a>> wrote:</div> <br class="Apple-interchange-newline"> <div class=""> <div class="">Currently we store the net-interface in the peer, but the<br class=""> peer should identify just the network, not the particular interface.<br class=""> To help track which actual interface is used for each<br class=""> message, store them explicitly.<br class=""> <br class=""> This is part of<br class=""> 8cbb8cd3e771e7f7e0f99cafc19fad32770dc015<br class=""> LU-7734 lnet: Multi-Rail local NI split<br class=""> <br class=""> and includes commit 63c3e5129873 ("LU-7734 lnet: Fix lnet_msg_free()")<br class=""> <br class=""> Signed-off-by: NeilBrown <<a href="mailto:neilb@suse.com" class="">neilb@suse.com</a>><br class=""> ---<br class=""> .../staging/lustre/include/linux/lnet/lib-types.h | 3 +++<br class=""> drivers/staging/lustre/lnet/lnet/lib-move.c | 21 ++++++++++++++++++--<br class=""> 2 files changed, 22 insertions(+), 2 deletions(-)<br class=""> <br class=""> diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h<br class=""> index 5f0d4703bf86..16a493529a46 100644<br class=""> --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h<br class=""> +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h<br class=""> @@ -98,6 +98,9 @@ struct lnet_msg {<br class=""> <br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>void<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>*msg_private;<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>struct lnet_libmd<span class="Apple-tab-span" style="white-space:pre"> </span>*msg_md;<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>/* the NI the message was sent or received over */<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>struct lnet_ni *msg_txni;<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>struct lnet_ni *msg_rxni;<br class=""> <br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>unsigned int<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"></span>msg_len;<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>unsigned int<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"></span>msg_wanted;<br class=""> diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c<br class=""> index 1c874025fa74..b2a52ddcefcb 100644<br class=""> --- a/drivers/staging/lustre/lnet/lnet/lib-move.c<br class=""> +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c<br class=""> @@ -782,6 +782,7 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg)<br class=""> {<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>struct lnet_peer *txpeer = msg->msg_txpeer;<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>struct lnet_msg *msg2;<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>struct lnet_ni<span class="Apple-tab-span" style="white-space:pre"> </span>*txni = msg->msg_txni;<br class=""> <br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>if (msg->msg_txcredit) {<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>struct lnet_ni *ni = txpeer->lp_ni;<br class=""> @@ -829,6 +830,11 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg)<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>}<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>}<br class=""> <br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>if (txni != NULL) {<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"></span>msg->msg_txni = NULL;<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"></span>lnet_ni_decref_locked(txni, msg->msg_tx_cpt);<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>}<br class=""> +<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>if (txpeer) {<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>msg->msg_txpeer = NULL;<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>lnet_peer_decref_locked(txpeer);<br class=""> @@ -876,6 +882,7 @@ void<br class=""> lnet_return_rx_credits_locked(struct lnet_msg *msg)<br class=""> {<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>struct lnet_peer *rxpeer = msg->msg_rxpeer;<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>struct lnet_ni<span class="Apple-tab-span" style="white-space:pre"> </span>*rxni = msg->msg_rxni;<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>struct lnet_msg *msg2;<br class=""> <br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>if (msg->msg_rtrcredit) {<br class=""> @@ -951,6 +958,10 @@ lnet_return_rx_credits_locked(struct lnet_msg *msg)<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>(void)lnet_post_routed_recv_locked(msg2, 1);<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>}<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>}<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>if (rxni != NULL) {<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"></span>msg->msg_rxni = NULL;<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"></span>lnet_ni_decref_locked(rxni, msg->msg_rx_cpt);<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>}<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>if (rxpeer) {<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>msg->msg_rxpeer = NULL;<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>lnet_peer_decref_locked(rxpeer);<br class=""> @@ -1218,9 +1229,12 @@ lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid)<br class=""> <br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>LASSERT(!msg->msg_peertxcredit);<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>LASSERT(!msg->msg_txcredit);<br class=""> -<span class="Apple-tab-span" style="white-space:pre"> </span>LASSERT(!msg->msg_txpeer);<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>LASSERT(msg->msg_txpeer == NULL);<br class=""> <br class=""> -<span class="Apple-tab-span" style="white-space:pre"> </span>msg->msg_txpeer = lp;<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"></span> /* msg takes my ref on lp */<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>msg->msg_txpeer = lp; /* msg takes my ref on lp */<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>/* set the NI for this message */<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>msg->msg_txni = src_ni;<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>lnet_ni_addref_locked(msg->msg_txni, cpt);<br class=""> <br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>rc = lnet_post_send_locked(msg, 0);<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>lnet_net_unlock(cpt);<br class=""> @@ -1818,6 +1832,8 @@ lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid,<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>return 0;<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>goto drop;<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>}<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>msg->msg_rxni = ni;<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span>lnet_ni_addref_locked(ni, cpt);<br class=""> <br class=""> <span class="Apple-tab-span" style="white-space:pre"></span>if (lnet_isrouter(msg->msg_rxpeer)) {<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>lnet_peer_set_alive(msg->msg_rxpeer);<br class=""> @@ -1934,6 +1950,7 @@ lnet_recv_delayed_msg_list(struct list_head *head)<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>LASSERT(msg->msg_rx_delayed);<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>LASSERT(msg->msg_md);<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>LASSERT(msg->msg_rxpeer);<br class=""> +<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"></span>LASSERT(msg->msg_rxni);<br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);<br class=""> <br class=""> <span class="Apple-tab-span" style="white-space:pre"></span><span class="Apple-tab-span" style="white-space:pre"></span>CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n",<br class=""> <br class=""> <br class=""> </div> </div> </blockquote> </div> <br class=""> </div> </body> </html>
> Currently we store the net-interface in the peer, but the > peer should identify just the network, not the particular interface. > To help track which actual interface is used for each > message, store them explicitly. Reviewed-by: James Simmons <jsimmons@infradead.org> The below needs fixing based on response to cover letter. With a combo patch the following works well. Signed-off-by: Amir Shehata <ashehata@whamcloud.com> WC-bug-id: https://jira.whamcloud.com/browse/LU-7734 Reviewed-on: http://review.whamcloud.com/18274 Reviewed-on: http://review.whamcloud.com/20729 Reviewed-by: Doug Oucharek <dougso@me.com> Reviewed-by: Olaf Weber <olaf.weber@hpe.com> Signed-off-by: NeilBrown <neilb@suse.com> > This is part of > 8cbb8cd3e771e7f7e0f99cafc19fad32770dc015 > LU-7734 lnet: Multi-Rail local NI split > > and includes commit 63c3e5129873 ("LU-7734 lnet: Fix lnet_msg_free()") > > Signed-off-by: NeilBrown <neilb@suse.com> > --- > .../staging/lustre/include/linux/lnet/lib-types.h | 3 +++ > drivers/staging/lustre/lnet/lnet/lib-move.c | 21 ++++++++++++++++++-- > 2 files changed, 22 insertions(+), 2 deletions(-) > > diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h > index 5f0d4703bf86..16a493529a46 100644 > --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h > +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h > @@ -98,6 +98,9 @@ struct lnet_msg { > > void *msg_private; > struct lnet_libmd *msg_md; > + /* the NI the message was sent or received over */ > + struct lnet_ni *msg_txni; > + struct lnet_ni *msg_rxni; > > unsigned int msg_len; > unsigned int msg_wanted; > diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c > index 1c874025fa74..b2a52ddcefcb 100644 > --- a/drivers/staging/lustre/lnet/lnet/lib-move.c > +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c > @@ -782,6 +782,7 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg) > { > struct lnet_peer *txpeer = msg->msg_txpeer; > struct lnet_msg *msg2; > + struct lnet_ni *txni = msg->msg_txni; > > if (msg->msg_txcredit) { > struct lnet_ni *ni = txpeer->lp_ni; > @@ -829,6 +830,11 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg) > } > } > > + if (txni != NULL) { > + msg->msg_txni = NULL; > + lnet_ni_decref_locked(txni, msg->msg_tx_cpt); > + } > + > if (txpeer) { > msg->msg_txpeer = NULL; > lnet_peer_decref_locked(txpeer); > @@ -876,6 +882,7 @@ void > lnet_return_rx_credits_locked(struct lnet_msg *msg) > { > struct lnet_peer *rxpeer = msg->msg_rxpeer; > + struct lnet_ni *rxni = msg->msg_rxni; > struct lnet_msg *msg2; > > if (msg->msg_rtrcredit) { > @@ -951,6 +958,10 @@ lnet_return_rx_credits_locked(struct lnet_msg *msg) > (void)lnet_post_routed_recv_locked(msg2, 1); > } > } > + if (rxni != NULL) { > + msg->msg_rxni = NULL; > + lnet_ni_decref_locked(rxni, msg->msg_rx_cpt); > + } > if (rxpeer) { > msg->msg_rxpeer = NULL; > lnet_peer_decref_locked(rxpeer); > @@ -1218,9 +1229,12 @@ lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid) > > LASSERT(!msg->msg_peertxcredit); > LASSERT(!msg->msg_txcredit); > - LASSERT(!msg->msg_txpeer); > + LASSERT(msg->msg_txpeer == NULL); > > - msg->msg_txpeer = lp; /* msg takes my ref on lp */ > + msg->msg_txpeer = lp; /* msg takes my ref on lp */ > + /* set the NI for this message */ > + msg->msg_txni = src_ni; > + lnet_ni_addref_locked(msg->msg_txni, cpt); > > rc = lnet_post_send_locked(msg, 0); > lnet_net_unlock(cpt); > @@ -1818,6 +1832,8 @@ lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid, > return 0; > goto drop; > } > + msg->msg_rxni = ni; > + lnet_ni_addref_locked(ni, cpt); > > if (lnet_isrouter(msg->msg_rxpeer)) { > lnet_peer_set_alive(msg->msg_rxpeer); > @@ -1934,6 +1950,7 @@ lnet_recv_delayed_msg_list(struct list_head *head) > LASSERT(msg->msg_rx_delayed); > LASSERT(msg->msg_md); > LASSERT(msg->msg_rxpeer); > + LASSERT(msg->msg_rxni); > LASSERT(msg->msg_hdr.type == LNET_MSG_PUT); > > CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n", > > >
> Currently we store the net-interface in the peer, but the > peer should identify just the network, not the particular interface. > To help track which actual interface is used for each > message, store them explicitly. Reviewed-by: James Simmons <jsimmons@infradead.org> The below needs fixing based on response to cover letter. With a combo patch the following works well. Signed-off-by: Amir Shehata <ashehata@whamcloud.com> WC-bug-id: https://jira.whamcloud.com/browse/LU-7734 Reviewed-on: http://review.whamcloud.com/18274 Reviewed-on: http://review.whamcloud.com/20729 Reviewed-by: Doug Oucharek <dougso@me.com> Reviewed-by: Olaf Weber <olaf.weber@hpe.com> Signed-off-by: NeilBrown <neilb@suse.com> > This is part of > 8cbb8cd3e771e7f7e0f99cafc19fad32770dc015 > LU-7734 lnet: Multi-Rail local NI split > > and includes commit 63c3e5129873 ("LU-7734 lnet: Fix lnet_msg_free()") > > Signed-off-by: NeilBrown <neilb@suse.com> > --- > .../staging/lustre/include/linux/lnet/lib-types.h | 3 +++ > drivers/staging/lustre/lnet/lnet/lib-move.c | 21 ++++++++++++++++++-- > 2 files changed, 22 insertions(+), 2 deletions(-) > > diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h > index 5f0d4703bf86..16a493529a46 100644 > --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h > +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h > @@ -98,6 +98,9 @@ struct lnet_msg { > > void *msg_private; > struct lnet_libmd *msg_md; > + /* the NI the message was sent or received over */ > + struct lnet_ni *msg_txni; > + struct lnet_ni *msg_rxni; > > unsigned int msg_len; > unsigned int msg_wanted; > diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c > index 1c874025fa74..b2a52ddcefcb 100644 > --- a/drivers/staging/lustre/lnet/lnet/lib-move.c > +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c > @@ -782,6 +782,7 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg) > { > struct lnet_peer *txpeer = msg->msg_txpeer; > struct lnet_msg *msg2; > + struct lnet_ni *txni = msg->msg_txni; > > if (msg->msg_txcredit) { > struct lnet_ni *ni = txpeer->lp_ni; > @@ -829,6 +830,11 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg) > } > } > > + if (txni != NULL) { > + msg->msg_txni = NULL; > + lnet_ni_decref_locked(txni, msg->msg_tx_cpt); > + } > + > if (txpeer) { > msg->msg_txpeer = NULL; > lnet_peer_decref_locked(txpeer); > @@ -876,6 +882,7 @@ void > lnet_return_rx_credits_locked(struct lnet_msg *msg) > { > struct lnet_peer *rxpeer = msg->msg_rxpeer; > + struct lnet_ni *rxni = msg->msg_rxni; > struct lnet_msg *msg2; > > if (msg->msg_rtrcredit) { > @@ -951,6 +958,10 @@ lnet_return_rx_credits_locked(struct lnet_msg *msg) > (void)lnet_post_routed_recv_locked(msg2, 1); > } > } > + if (rxni != NULL) { > + msg->msg_rxni = NULL; > + lnet_ni_decref_locked(rxni, msg->msg_rx_cpt); > + } > if (rxpeer) { > msg->msg_rxpeer = NULL; > lnet_peer_decref_locked(rxpeer); > @@ -1218,9 +1229,12 @@ lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid) > > LASSERT(!msg->msg_peertxcredit); > LASSERT(!msg->msg_txcredit); > - LASSERT(!msg->msg_txpeer); > + LASSERT(msg->msg_txpeer == NULL); > > - msg->msg_txpeer = lp; /* msg takes my ref on lp */ > + msg->msg_txpeer = lp; /* msg takes my ref on lp */ > + /* set the NI for this message */ > + msg->msg_txni = src_ni; > + lnet_ni_addref_locked(msg->msg_txni, cpt); > > rc = lnet_post_send_locked(msg, 0); > lnet_net_unlock(cpt); > @@ -1818,6 +1832,8 @@ lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid, > return 0; > goto drop; > } > + msg->msg_rxni = ni; > + lnet_ni_addref_locked(ni, cpt); > > if (lnet_isrouter(msg->msg_rxpeer)) { > lnet_peer_set_alive(msg->msg_rxpeer); > @@ -1934,6 +1950,7 @@ lnet_recv_delayed_msg_list(struct list_head *head) > LASSERT(msg->msg_rx_delayed); > LASSERT(msg->msg_md); > LASSERT(msg->msg_rxpeer); > + LASSERT(msg->msg_rxni); > LASSERT(msg->msg_hdr.type == LNET_MSG_PUT); > > CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n", > > >
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h index 5f0d4703bf86..16a493529a46 100644 --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h @@ -98,6 +98,9 @@ struct lnet_msg { void *msg_private; struct lnet_libmd *msg_md; + /* the NI the message was sent or received over */ + struct lnet_ni *msg_txni; + struct lnet_ni *msg_rxni; unsigned int msg_len; unsigned int msg_wanted; diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c index 1c874025fa74..b2a52ddcefcb 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c @@ -782,6 +782,7 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg) { struct lnet_peer *txpeer = msg->msg_txpeer; struct lnet_msg *msg2; + struct lnet_ni *txni = msg->msg_txni; if (msg->msg_txcredit) { struct lnet_ni *ni = txpeer->lp_ni; @@ -829,6 +830,11 @@ lnet_return_tx_credits_locked(struct lnet_msg *msg) } } + if (txni != NULL) { + msg->msg_txni = NULL; + lnet_ni_decref_locked(txni, msg->msg_tx_cpt); + } + if (txpeer) { msg->msg_txpeer = NULL; lnet_peer_decref_locked(txpeer); @@ -876,6 +882,7 @@ void lnet_return_rx_credits_locked(struct lnet_msg *msg) { struct lnet_peer *rxpeer = msg->msg_rxpeer; + struct lnet_ni *rxni = msg->msg_rxni; struct lnet_msg *msg2; if (msg->msg_rtrcredit) { @@ -951,6 +958,10 @@ lnet_return_rx_credits_locked(struct lnet_msg *msg) (void)lnet_post_routed_recv_locked(msg2, 1); } } + if (rxni != NULL) { + msg->msg_rxni = NULL; + lnet_ni_decref_locked(rxni, msg->msg_rx_cpt); + } if (rxpeer) { msg->msg_rxpeer = NULL; lnet_peer_decref_locked(rxpeer); @@ -1218,9 +1229,12 @@ lnet_send(lnet_nid_t src_nid, struct lnet_msg *msg, lnet_nid_t rtr_nid) LASSERT(!msg->msg_peertxcredit); LASSERT(!msg->msg_txcredit); - LASSERT(!msg->msg_txpeer); + LASSERT(msg->msg_txpeer == NULL); - msg->msg_txpeer = lp; /* msg takes my ref on lp */ + msg->msg_txpeer = lp; /* msg takes my ref on lp */ + /* set the NI for this message */ + msg->msg_txni = src_ni; + lnet_ni_addref_locked(msg->msg_txni, cpt); rc = lnet_post_send_locked(msg, 0); lnet_net_unlock(cpt); @@ -1818,6 +1832,8 @@ lnet_parse(struct lnet_ni *ni, struct lnet_hdr *hdr, lnet_nid_t from_nid, return 0; goto drop; } + msg->msg_rxni = ni; + lnet_ni_addref_locked(ni, cpt); if (lnet_isrouter(msg->msg_rxpeer)) { lnet_peer_set_alive(msg->msg_rxpeer); @@ -1934,6 +1950,7 @@ lnet_recv_delayed_msg_list(struct list_head *head) LASSERT(msg->msg_rx_delayed); LASSERT(msg->msg_md); LASSERT(msg->msg_rxpeer); + LASSERT(msg->msg_rxni); LASSERT(msg->msg_hdr.type == LNET_MSG_PUT); CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d match %llu offset %d length %d.\n",
Currently we store the net-interface in the peer, but the peer should identify just the network, not the particular interface. To help track which actual interface is used for each message, store them explicitly. This is part of 8cbb8cd3e771e7f7e0f99cafc19fad32770dc015 LU-7734 lnet: Multi-Rail local NI split and includes commit 63c3e5129873 ("LU-7734 lnet: Fix lnet_msg_free()") Signed-off-by: NeilBrown <neilb@suse.com> --- .../staging/lustre/include/linux/lnet/lib-types.h | 3 +++ drivers/staging/lustre/lnet/lnet/lib-move.c | 21 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-)