diff mbox series

[12/34] lnet: split lnet_startup_lndni

Message ID 153628137171.8267.13510813931908233567.stgit@noble (mailing list archive)
State New, archived
Headers show
Series Beginning of multi-rail support for drivers/staging/lustre | expand

Commit Message

NeilBrown Sept. 7, 2018, 12:49 a.m. UTC
Split into
  lnet_startup_lndnet
which starts all nis in a net, and
  lnet_startup_lndni
which starts an individual ni.

lnet_startup_lndni()  returns 0 on success, or -ve error.
lnet_startup_lndnis() returned the count of interfaces started.

The new lnet_startup_lndnet() returns the count of started interfaces,

This requires adding lnet_shutdown_lndnet() to handle errors
in lnet_dyn_add_ni(), which now uses the new lnet_startup_lndnet().

We now drop the ln_lnd_mutex near the end of lnet_startup_lndnet(),
and re-claim it for each lnet_startup_lndni().

This is part of
    8cbb8cd3e771e7f7e0f99cafc19fad32770dc015
       LU-7734 lnet: Multi-Rail local NI split

Signed-off-by: NeilBrown <neilb@suse.com>
---
 drivers/staging/lustre/lnet/lnet/api-ni.c |  142 +++++++++++++++++++++++------
 1 file changed, 111 insertions(+), 31 deletions(-)

Comments

Doug Oucharek Sept. 12, 2018, 3:39 a.m. UTC | #1
Reviewed-by: Doug Oucharek <dougso@me.com>

Doug

On 9/6/18, 5:53 PM, "NeilBrown" <neilb@suse.com> wrote:

    Split into
      lnet_startup_lndnet
    which starts all nis in a net, and
      lnet_startup_lndni
    which starts an individual ni.
    
    lnet_startup_lndni()  returns 0 on success, or -ve error.
    lnet_startup_lndnis() returned the count of interfaces started.
    
    The new lnet_startup_lndnet() returns the count of started interfaces,
    
    This requires adding lnet_shutdown_lndnet() to handle errors
    in lnet_dyn_add_ni(), which now uses the new lnet_startup_lndnet().
    
    We now drop the ln_lnd_mutex near the end of lnet_startup_lndnet(),
    and re-claim it for each lnet_startup_lndni().
    
    This is part of
        8cbb8cd3e771e7f7e0f99cafc19fad32770dc015
           LU-7734 lnet: Multi-Rail local NI split
    
    Signed-off-by: NeilBrown <neilb@suse.com>
    ---
     drivers/staging/lustre/lnet/lnet/api-ni.c |  142 +++++++++++++++++++++++------
     1 file changed, 111 insertions(+), 31 deletions(-)
    
    diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
    index 53ecfd700db3..8afddf11b5e2 100644
    --- a/drivers/staging/lustre/lnet/lnet/api-ni.c
    +++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
    @@ -1239,32 +1239,61 @@ lnet_shutdown_lndni(struct lnet_ni *ni)
     	lnet_net_unlock(LNET_LOCK_EX);
     }
     
    +static void
    +lnet_shutdown_lndnet(struct lnet_net *net)
    +{
    +	struct lnet_ni *ni;
    +
    +	lnet_net_lock(LNET_LOCK_EX);
    +
    +	list_del_init(&net->net_list);
    +
    +	while (!list_empty(&net->net_ni_list)) {
    +		ni = list_entry(net->net_ni_list.next,
    +				struct lnet_ni, ni_netlist);
    +		lnet_net_unlock(LNET_LOCK_EX);
    +		lnet_shutdown_lndni(ni);
    +		lnet_net_lock(LNET_LOCK_EX);
    +	}
    +
    +	/*
    +	 * decrement ref count on lnd only when the entire network goes
    +	 * away
    +	 */
    +	net->net_lnd->lnd_refcount--;
    +
    +	lnet_net_unlock(LNET_LOCK_EX);
    +
    +	lnet_net_free(net);
    +}
    +
     static int
    -lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
    +lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun);
    +
    +static int
    +lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
     {
    -	int rc = -EINVAL;
    -	int lnd_type;
    -	struct lnet_lnd *lnd;
    -	struct lnet_tx_queue *tq;
    -	int i;
    -	u32 seed;
    +	struct lnet_ni		*ni;
    +	__u32			lnd_type;
    +	struct lnet_lnd		*lnd;
    +	int rc;
     
    -	lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
    +	lnd_type = LNET_NETTYP(net->net_id);
     
     	LASSERT(libcfs_isknown_lnd(lnd_type));
     
     	/* Make sure this new NI is unique. */
     	lnet_net_lock(LNET_LOCK_EX);
    -	rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nets);
    +	rc = lnet_net_unique(net->net_id, &the_lnet.ln_nets);
     	lnet_net_unlock(LNET_LOCK_EX);
     	if (!rc) {
     		if (lnd_type == LOLND) {
    -			lnet_ni_free(ni);
    +			lnet_net_free(net);
     			return 0;
     		}
     
     		CERROR("Net %s is not unique\n",
    -		       libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
    +		       libcfs_net2str(net->net_id));
     		rc = -EEXIST;
     		goto failed0;
     	}
    @@ -1291,8 +1320,32 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
     	lnet_net_lock(LNET_LOCK_EX);
     	lnd->lnd_refcount++;
     	lnet_net_unlock(LNET_LOCK_EX);
    +	net->net_lnd = lnd;
    +	mutex_unlock(&the_lnet.ln_lnd_mutex);
     
    -	ni->ni_net->net_lnd = lnd;
    +	ni = list_first_entry(&net->net_ni_list, struct lnet_ni, ni_netlist);
    +
    +	rc = lnet_startup_lndni(ni, tun);
    +	if (rc < 0)
    +		return rc;
    +	return 1;
    +
    +failed0:
    +	lnet_net_free(net);
    +
    +	return rc;
    +}
    +
    +static int
    +lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
    +{
    +	int			rc = -EINVAL;
    +	struct lnet_tx_queue	*tq;
    +	int			i;
    +	struct lnet_net		*net = ni->ni_net;
    +	u32			seed;
    +
    +	mutex_lock(&the_lnet.ln_lnd_mutex);
     
     	if (tun) {
     		memcpy(&ni->ni_lnd_tunables, tun,
    @@ -1300,15 +1353,15 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
     		ni->ni_lnd_tunables_set = true;
     	}
     
    -	rc = lnd->lnd_startup(ni);
    +	rc = net->net_lnd->lnd_startup(ni);
     
     	mutex_unlock(&the_lnet.ln_lnd_mutex);
     
     	if (rc) {
     		LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
    -				   rc, libcfs_lnd2str(lnd->lnd_type));
    +				   rc, libcfs_lnd2str(net->net_lnd->lnd_type));
     		lnet_net_lock(LNET_LOCK_EX);
    -		lnd->lnd_refcount--;
    +		net->net_lnd->lnd_refcount--;
     		lnet_net_unlock(LNET_LOCK_EX);
     		goto failed0;
     	}
    @@ -1324,7 +1377,7 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
     
     	lnet_net_unlock(LNET_LOCK_EX);
     
    -	if (lnd->lnd_type == LOLND) {
    +	if (net->net_lnd->lnd_type == LOLND) {
     		lnet_ni_addref(ni);
     		LASSERT(!the_lnet.ln_loni);
     		the_lnet.ln_loni = ni;
    @@ -1338,7 +1391,7 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
     	if (!ni->ni_net->net_tunables.lct_peer_tx_credits ||
     	    !ni->ni_net->net_tunables.lct_max_tx_credits) {
     		LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
    -				   libcfs_lnd2str(lnd->lnd_type),
    +				   libcfs_lnd2str(net->net_lnd->lnd_type),
     				   !ni->ni_net->net_tunables.lct_peer_tx_credits ?
     				   "" : "per-peer ");
     		/*
    @@ -1375,21 +1428,22 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
     }
     
     static int
    -lnet_startup_lndnis(struct list_head *nilist)
    +lnet_startup_lndnets(struct list_head *netlist)
     {
    -	struct lnet_ni *ni;
    +	struct lnet_net *net;
     	int rc;
     	int ni_count = 0;
     
    -	while (!list_empty(nilist)) {
    -		ni = list_entry(nilist->next, struct lnet_ni, ni_netlist);
    -		list_del(&ni->ni_netlist);
    -		rc = lnet_startup_lndni(ni, NULL);
    +	while (!list_empty(netlist)) {
    +		net = list_entry(netlist->next, struct lnet_net, net_list);
    +		list_del_init(&net->net_list);
    +
    +		rc = lnet_startup_lndnet(net, NULL);
     
     		if (rc < 0)
     			goto failed;
     
    -		ni_count++;
    +		ni_count += rc;
     	}
     
     	return ni_count;
    @@ -1552,7 +1606,7 @@ LNetNIInit(lnet_pid_t requested_pid)
     			goto err_empty_list;
     	}
     
    -	ni_count = lnet_startup_lndnis(&net_head);
    +	ni_count = lnet_startup_lndnets(&net_head);
     	if (ni_count < 0) {
     		rc = ni_count;
     		goto err_empty_list;
    @@ -1831,10 +1885,11 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
     	struct lnet_ping_info *pinfo;
     	struct lnet_handle_md md_handle;
     	struct lnet_net		*net;
    -	struct lnet_ni *ni;
     	struct list_head net_head;
     	struct lnet_remotenet *rnet;
     	int rc;
    +	int			num_acceptor_nets;
    +	__u32			net_type;
     	struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
     
     	INIT_LIST_HEAD(&net_head);
    @@ -1876,22 +1931,47 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
     		goto failed0;
     
     	list_del_init(&net->net_list);
    +
     	if (lnd_tunables)
     		memcpy(&net->net_tunables,
     		       &lnd_tunables->lt_cmn, sizeof(lnd_tunables->lt_cmn));
     
    -	ni = list_first_entry(&net->net_ni_list, struct lnet_ni, ni_netlist);
    -	rc = lnet_startup_lndni(ni, (lnd_tunables ?
    +	/*
    +	 * before starting this network get a count of the current TCP
    +	 * networks which require the acceptor thread running. If that
    +	 * count is == 0 before we start up this network, then we'd want to
    +	 * start up the acceptor thread after starting up this network
    +	 */
    +	num_acceptor_nets = lnet_count_acceptor_nets();
    +
    +	/*
    +	 * lnd_startup_lndnet() can deallocate 'net' even if it it returns
    +	 * success, because we endded up adding interfaces to an existing
    +	 * network. So grab the net_type now
    +	 */
    +	net_type = LNET_NETTYP(net->net_id);
    +
    +	rc = lnet_startup_lndnet(net, (lnd_tunables ?
     				     &lnd_tunables->lt_tun : NULL));
     	if (rc < 0)
     		goto failed1;
     
    -	if (ni->ni_net->net_lnd->lnd_accept) {
    +	/*
    +	 * Start the acceptor thread if this is the first network
    +	 * being added that requires the thread.
    +	 */
    +	if (net_type == SOCKLND && num_acceptor_nets == 0) {
     		rc = lnet_acceptor_start();
     		if (rc < 0) {
    -			/* shutdown the ni that we just started */
    +			/* shutdown the net that we just started */
     			CERROR("Failed to start up acceptor thread\n");
    -			lnet_shutdown_lndni(ni);
    +			/*
    +			 * Note that if we needed to start the acceptor
    +			 * thread, then 'net' must have been the first TCP
    +			 * network, therefore was unique, and therefore
    +			 * wasn't deallocated by lnet_startup_lndnet()
    +			 */
    +			lnet_shutdown_lndnet(net);
     			goto failed1;
     		}
     	}
diff mbox series

Patch

diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index 53ecfd700db3..8afddf11b5e2 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -1239,32 +1239,61 @@  lnet_shutdown_lndni(struct lnet_ni *ni)
 	lnet_net_unlock(LNET_LOCK_EX);
 }
 
+static void
+lnet_shutdown_lndnet(struct lnet_net *net)
+{
+	struct lnet_ni *ni;
+
+	lnet_net_lock(LNET_LOCK_EX);
+
+	list_del_init(&net->net_list);
+
+	while (!list_empty(&net->net_ni_list)) {
+		ni = list_entry(net->net_ni_list.next,
+				struct lnet_ni, ni_netlist);
+		lnet_net_unlock(LNET_LOCK_EX);
+		lnet_shutdown_lndni(ni);
+		lnet_net_lock(LNET_LOCK_EX);
+	}
+
+	/*
+	 * decrement ref count on lnd only when the entire network goes
+	 * away
+	 */
+	net->net_lnd->lnd_refcount--;
+
+	lnet_net_unlock(LNET_LOCK_EX);
+
+	lnet_net_free(net);
+}
+
 static int
-lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
+lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun);
+
+static int
+lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
 {
-	int rc = -EINVAL;
-	int lnd_type;
-	struct lnet_lnd *lnd;
-	struct lnet_tx_queue *tq;
-	int i;
-	u32 seed;
+	struct lnet_ni		*ni;
+	__u32			lnd_type;
+	struct lnet_lnd		*lnd;
+	int rc;
 
-	lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
+	lnd_type = LNET_NETTYP(net->net_id);
 
 	LASSERT(libcfs_isknown_lnd(lnd_type));
 
 	/* Make sure this new NI is unique. */
 	lnet_net_lock(LNET_LOCK_EX);
-	rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nets);
+	rc = lnet_net_unique(net->net_id, &the_lnet.ln_nets);
 	lnet_net_unlock(LNET_LOCK_EX);
 	if (!rc) {
 		if (lnd_type == LOLND) {
-			lnet_ni_free(ni);
+			lnet_net_free(net);
 			return 0;
 		}
 
 		CERROR("Net %s is not unique\n",
-		       libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
+		       libcfs_net2str(net->net_id));
 		rc = -EEXIST;
 		goto failed0;
 	}
@@ -1291,8 +1320,32 @@  lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
 	lnet_net_lock(LNET_LOCK_EX);
 	lnd->lnd_refcount++;
 	lnet_net_unlock(LNET_LOCK_EX);
+	net->net_lnd = lnd;
+	mutex_unlock(&the_lnet.ln_lnd_mutex);
 
-	ni->ni_net->net_lnd = lnd;
+	ni = list_first_entry(&net->net_ni_list, struct lnet_ni, ni_netlist);
+
+	rc = lnet_startup_lndni(ni, tun);
+	if (rc < 0)
+		return rc;
+	return 1;
+
+failed0:
+	lnet_net_free(net);
+
+	return rc;
+}
+
+static int
+lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
+{
+	int			rc = -EINVAL;
+	struct lnet_tx_queue	*tq;
+	int			i;
+	struct lnet_net		*net = ni->ni_net;
+	u32			seed;
+
+	mutex_lock(&the_lnet.ln_lnd_mutex);
 
 	if (tun) {
 		memcpy(&ni->ni_lnd_tunables, tun,
@@ -1300,15 +1353,15 @@  lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
 		ni->ni_lnd_tunables_set = true;
 	}
 
-	rc = lnd->lnd_startup(ni);
+	rc = net->net_lnd->lnd_startup(ni);
 
 	mutex_unlock(&the_lnet.ln_lnd_mutex);
 
 	if (rc) {
 		LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
-				   rc, libcfs_lnd2str(lnd->lnd_type));
+				   rc, libcfs_lnd2str(net->net_lnd->lnd_type));
 		lnet_net_lock(LNET_LOCK_EX);
-		lnd->lnd_refcount--;
+		net->net_lnd->lnd_refcount--;
 		lnet_net_unlock(LNET_LOCK_EX);
 		goto failed0;
 	}
@@ -1324,7 +1377,7 @@  lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
 
 	lnet_net_unlock(LNET_LOCK_EX);
 
-	if (lnd->lnd_type == LOLND) {
+	if (net->net_lnd->lnd_type == LOLND) {
 		lnet_ni_addref(ni);
 		LASSERT(!the_lnet.ln_loni);
 		the_lnet.ln_loni = ni;
@@ -1338,7 +1391,7 @@  lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
 	if (!ni->ni_net->net_tunables.lct_peer_tx_credits ||
 	    !ni->ni_net->net_tunables.lct_max_tx_credits) {
 		LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
-				   libcfs_lnd2str(lnd->lnd_type),
+				   libcfs_lnd2str(net->net_lnd->lnd_type),
 				   !ni->ni_net->net_tunables.lct_peer_tx_credits ?
 				   "" : "per-peer ");
 		/*
@@ -1375,21 +1428,22 @@  lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
 }
 
 static int
-lnet_startup_lndnis(struct list_head *nilist)
+lnet_startup_lndnets(struct list_head *netlist)
 {
-	struct lnet_ni *ni;
+	struct lnet_net *net;
 	int rc;
 	int ni_count = 0;
 
-	while (!list_empty(nilist)) {
-		ni = list_entry(nilist->next, struct lnet_ni, ni_netlist);
-		list_del(&ni->ni_netlist);
-		rc = lnet_startup_lndni(ni, NULL);
+	while (!list_empty(netlist)) {
+		net = list_entry(netlist->next, struct lnet_net, net_list);
+		list_del_init(&net->net_list);
+
+		rc = lnet_startup_lndnet(net, NULL);
 
 		if (rc < 0)
 			goto failed;
 
-		ni_count++;
+		ni_count += rc;
 	}
 
 	return ni_count;
@@ -1552,7 +1606,7 @@  LNetNIInit(lnet_pid_t requested_pid)
 			goto err_empty_list;
 	}
 
-	ni_count = lnet_startup_lndnis(&net_head);
+	ni_count = lnet_startup_lndnets(&net_head);
 	if (ni_count < 0) {
 		rc = ni_count;
 		goto err_empty_list;
@@ -1831,10 +1885,11 @@  lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
 	struct lnet_ping_info *pinfo;
 	struct lnet_handle_md md_handle;
 	struct lnet_net		*net;
-	struct lnet_ni *ni;
 	struct list_head net_head;
 	struct lnet_remotenet *rnet;
 	int rc;
+	int			num_acceptor_nets;
+	__u32			net_type;
 	struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
 
 	INIT_LIST_HEAD(&net_head);
@@ -1876,22 +1931,47 @@  lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
 		goto failed0;
 
 	list_del_init(&net->net_list);
+
 	if (lnd_tunables)
 		memcpy(&net->net_tunables,
 		       &lnd_tunables->lt_cmn, sizeof(lnd_tunables->lt_cmn));
 
-	ni = list_first_entry(&net->net_ni_list, struct lnet_ni, ni_netlist);
-	rc = lnet_startup_lndni(ni, (lnd_tunables ?
+	/*
+	 * before starting this network get a count of the current TCP
+	 * networks which require the acceptor thread running. If that
+	 * count is == 0 before we start up this network, then we'd want to
+	 * start up the acceptor thread after starting up this network
+	 */
+	num_acceptor_nets = lnet_count_acceptor_nets();
+
+	/*
+	 * lnd_startup_lndnet() can deallocate 'net' even if it it returns
+	 * success, because we endded up adding interfaces to an existing
+	 * network. So grab the net_type now
+	 */
+	net_type = LNET_NETTYP(net->net_id);
+
+	rc = lnet_startup_lndnet(net, (lnd_tunables ?
 				     &lnd_tunables->lt_tun : NULL));
 	if (rc < 0)
 		goto failed1;
 
-	if (ni->ni_net->net_lnd->lnd_accept) {
+	/*
+	 * Start the acceptor thread if this is the first network
+	 * being added that requires the thread.
+	 */
+	if (net_type == SOCKLND && num_acceptor_nets == 0) {
 		rc = lnet_acceptor_start();
 		if (rc < 0) {
-			/* shutdown the ni that we just started */
+			/* shutdown the net that we just started */
 			CERROR("Failed to start up acceptor thread\n");
-			lnet_shutdown_lndni(ni);
+			/*
+			 * Note that if we needed to start the acceptor
+			 * thread, then 'net' must have been the first TCP
+			 * network, therefore was unique, and therefore
+			 * wasn't deallocated by lnet_startup_lndnet()
+			 */
+			lnet_shutdown_lndnet(net);
 			goto failed1;
 		}
 	}