[18/34] lnet: add ni_state
diff mbox series

Message ID 153628137195.8267.16400748098054215181.stgit@noble
State New
Headers show
Series
  • Beginning of multi-rail support for drivers/staging/lustre
Related show

Commit Message

NeilBrown Sept. 7, 2018, 12:49 a.m. UTC
This is barely used.

This is part of
    8cbb8cd3e771e7f7e0f99cafc19fad32770dc015
       LU-7734 lnet: Multi-Rail local NI split

Signed-off-by: NeilBrown <neilb@suse.com>
---
 .../staging/lustre/include/linux/lnet/lib-lnet.h   |    1 +
 .../staging/lustre/include/linux/lnet/lib-types.h  |   16 ++++++++++++++++
 drivers/staging/lustre/lnet/lnet/api-ni.c          |   16 ++++++++++++++++
 drivers/staging/lustre/lnet/lnet/config.c          |    1 +
 4 files changed, 34 insertions(+)

Comments

Doug Oucharek Sept. 12, 2018, 3:59 a.m. UTC | #1
I believe the introduction of this state machine is to help us understand how healthy an NI is so we can avoid if it is not healthy and we have other paths which are still ok.

Reviewed-by: Doug Oucharek <dougso@me.com>

Doug

On 9/6/18, 5:54 PM, "NeilBrown" <neilb@suse.com> wrote:

    This is barely used.
    
    This is part of
        8cbb8cd3e771e7f7e0f99cafc19fad32770dc015
           LU-7734 lnet: Multi-Rail local NI split
    
    Signed-off-by: NeilBrown <neilb@suse.com>
    ---
     .../staging/lustre/include/linux/lnet/lib-lnet.h   |    1 +
     .../staging/lustre/include/linux/lnet/lib-types.h  |   16 ++++++++++++++++
     drivers/staging/lustre/lnet/lnet/api-ni.c          |   16 ++++++++++++++++
     drivers/staging/lustre/lnet/lnet/config.c          |    1 +
     4 files changed, 34 insertions(+)
    
    diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
    index faa3f19dd844..54a93235834c 100644
    --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
    +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
    @@ -400,6 +400,7 @@ int lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni);
     struct lnet_ni *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
     struct lnet_ni *lnet_net2ni_locked(__u32 net, int cpt);
     struct lnet_ni *lnet_net2ni(__u32 net);
    +bool lnet_is_ni_healthy_locked(struct lnet_ni *ni);
     
     extern int portal_rotor;
     
    diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
    index 1d372672e2de..6c34ecf22021 100644
    --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
    +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
    @@ -256,6 +256,19 @@ struct lnet_tx_queue {
     	struct list_head	tq_delayed;	/* delayed TXs */
     };
     
    +enum lnet_ni_state {
    +	/* set when NI block is allocated */
    +	LNET_NI_STATE_INIT = 0,
    +	/* set when NI is started successfully */
    +	LNET_NI_STATE_ACTIVE,
    +	/* set when LND notifies NI failed */
    +	LNET_NI_STATE_FAILED,
    +	/* set when LND notifies NI degraded */
    +	LNET_NI_STATE_DEGRADED,
    +	/* set when shuttding down NI */
    +	LNET_NI_STATE_DELETING
    +};
    +
     struct lnet_net {
     	/* chain on the ln_nets */
     	struct list_head	net_list;
    @@ -324,6 +337,9 @@ struct lnet_ni {
     	/* my health status */
     	struct lnet_ni_status	*ni_status;
     
    +	/* NI FSM */
    +	enum lnet_ni_state	ni_state;
    +
     	/* per NI LND tunables */
     	struct lnet_lnd_tunables ni_lnd_tunables;
     
    diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
    index 46c5ca71bc07..618fdf8141f0 100644
    --- a/drivers/staging/lustre/lnet/lnet/api-ni.c
    +++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
    @@ -780,6 +780,16 @@ lnet_islocalnet(__u32 net)
     	return !!ni;
     }
     
    +bool
    +lnet_is_ni_healthy_locked(struct lnet_ni *ni)
    +{
    +	if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
    +	    ni->ni_state == LNET_NI_STATE_DEGRADED)
    +		return true;
    +
    +	return false;
    +}
    +
     struct lnet_ni  *
     lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
     {
    @@ -1117,6 +1127,9 @@ lnet_clear_zombies_nis_locked(struct lnet_net *net)
     		ni = list_entry(zombie_list->next,
     				struct lnet_ni, ni_netlist);
     		list_del_init(&ni->ni_netlist);
    +		/* the ni should be in deleting state. If it's not it's
    +		 * a bug */
    +		LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
     		cfs_percpt_for_each(ref, j, ni->ni_refs) {
     			if (!*ref)
     				continue;
    @@ -1163,6 +1176,7 @@ lnet_shutdown_lndni(struct lnet_ni *ni)
     	struct lnet_net *net = ni->ni_net;
     
     	lnet_net_lock(LNET_LOCK_EX);
    +	ni->ni_state = LNET_NI_STATE_DELETING;
     	lnet_ni_unlink_locked(ni);
     	lnet_net_unlock(LNET_LOCK_EX);
     
    @@ -1291,6 +1305,8 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
     
     	lnet_net_unlock(LNET_LOCK_EX);
     
    +	ni->ni_state = LNET_NI_STATE_ACTIVE;
    +
     	if (net->net_lnd->lnd_type == LOLND) {
     		lnet_ni_addref(ni);
     		LASSERT(!the_lnet.ln_loni);
    diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
    index 2588d67fea1b..081812e19b13 100644
    --- a/drivers/staging/lustre/lnet/lnet/config.c
    +++ b/drivers/staging/lustre/lnet/lnet/config.c
    @@ -393,6 +393,7 @@ lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el, char *iface)
     		ni->ni_net_ns = NULL;
     
     	ni->ni_last_alive = ktime_get_real_seconds();
    +	ni->ni_state = LNET_NI_STATE_INIT;
     	rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts, net);
     	if (rc != 0)
     		goto failed;
NeilBrown Sept. 12, 2018, 4:25 a.m. UTC | #2
On Wed, Sep 12 2018, Doug Oucharek wrote:

> I believe the introduction of this state machine is to help us understand how healthy an NI is so we can avoid if it is not healthy and we have other paths which are still ok.
>
> Reviewed-by: Doug Oucharek <dougso@me.com>

Thanks.  Now reads:

-----------
lnet: add ni_state

This will be used more in later patches to track how healthy an NI is,
so we can avoid one if it isn't healthy and we have other paths which
are still OK.

Reviewed-by: Doug Oucharek <dougso@me.com>
Signed-off-by: NeilBrown <neilb@suse.com>
------------

I noticed that it was used more in later patches, which is why I didn't
discard the patch.  The original has a "net_state" in lnet_net - I
haven't included that change as net_state is still unused.

Thanks,
NeilBrown


>
> Doug
>
> On 9/6/18, 5:54 PM, "NeilBrown" <neilb@suse.com> wrote:
>
>     This is barely used.
>     
>     This is part of
>         8cbb8cd3e771e7f7e0f99cafc19fad32770dc015
>            LU-7734 lnet: Multi-Rail local NI split
>     
>     Signed-off-by: NeilBrown <neilb@suse.com>
>     ---
>      .../staging/lustre/include/linux/lnet/lib-lnet.h   |    1 +
>      .../staging/lustre/include/linux/lnet/lib-types.h  |   16 ++++++++++++++++
>      drivers/staging/lustre/lnet/lnet/api-ni.c          |   16 ++++++++++++++++
>      drivers/staging/lustre/lnet/lnet/config.c          |    1 +
>      4 files changed, 34 insertions(+)
>     
>     diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
>     index faa3f19dd844..54a93235834c 100644
>     --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
>     +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
>     @@ -400,6 +400,7 @@ int lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni);
>      struct lnet_ni *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
>      struct lnet_ni *lnet_net2ni_locked(__u32 net, int cpt);
>      struct lnet_ni *lnet_net2ni(__u32 net);
>     +bool lnet_is_ni_healthy_locked(struct lnet_ni *ni);
>      
>      extern int portal_rotor;
>      
>     diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
>     index 1d372672e2de..6c34ecf22021 100644
>     --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
>     +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
>     @@ -256,6 +256,19 @@ struct lnet_tx_queue {
>      	struct list_head	tq_delayed;	/* delayed TXs */
>      };
>      
>     +enum lnet_ni_state {
>     +	/* set when NI block is allocated */
>     +	LNET_NI_STATE_INIT = 0,
>     +	/* set when NI is started successfully */
>     +	LNET_NI_STATE_ACTIVE,
>     +	/* set when LND notifies NI failed */
>     +	LNET_NI_STATE_FAILED,
>     +	/* set when LND notifies NI degraded */
>     +	LNET_NI_STATE_DEGRADED,
>     +	/* set when shuttding down NI */
>     +	LNET_NI_STATE_DELETING
>     +};
>     +
>      struct lnet_net {
>      	/* chain on the ln_nets */
>      	struct list_head	net_list;
>     @@ -324,6 +337,9 @@ struct lnet_ni {
>      	/* my health status */
>      	struct lnet_ni_status	*ni_status;
>      
>     +	/* NI FSM */
>     +	enum lnet_ni_state	ni_state;
>     +
>      	/* per NI LND tunables */
>      	struct lnet_lnd_tunables ni_lnd_tunables;
>      
>     diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
>     index 46c5ca71bc07..618fdf8141f0 100644
>     --- a/drivers/staging/lustre/lnet/lnet/api-ni.c
>     +++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
>     @@ -780,6 +780,16 @@ lnet_islocalnet(__u32 net)
>      	return !!ni;
>      }
>      
>     +bool
>     +lnet_is_ni_healthy_locked(struct lnet_ni *ni)
>     +{
>     +	if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
>     +	    ni->ni_state == LNET_NI_STATE_DEGRADED)
>     +		return true;
>     +
>     +	return false;
>     +}
>     +
>      struct lnet_ni  *
>      lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
>      {
>     @@ -1117,6 +1127,9 @@ lnet_clear_zombies_nis_locked(struct lnet_net *net)
>      		ni = list_entry(zombie_list->next,
>      				struct lnet_ni, ni_netlist);
>      		list_del_init(&ni->ni_netlist);
>     +		/* the ni should be in deleting state. If it's not it's
>     +		 * a bug */
>     +		LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
>      		cfs_percpt_for_each(ref, j, ni->ni_refs) {
>      			if (!*ref)
>      				continue;
>     @@ -1163,6 +1176,7 @@ lnet_shutdown_lndni(struct lnet_ni *ni)
>      	struct lnet_net *net = ni->ni_net;
>      
>      	lnet_net_lock(LNET_LOCK_EX);
>     +	ni->ni_state = LNET_NI_STATE_DELETING;
>      	lnet_ni_unlink_locked(ni);
>      	lnet_net_unlock(LNET_LOCK_EX);
>      
>     @@ -1291,6 +1305,8 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
>      
>      	lnet_net_unlock(LNET_LOCK_EX);
>      
>     +	ni->ni_state = LNET_NI_STATE_ACTIVE;
>     +
>      	if (net->net_lnd->lnd_type == LOLND) {
>      		lnet_ni_addref(ni);
>      		LASSERT(!the_lnet.ln_loni);
>     diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
>     index 2588d67fea1b..081812e19b13 100644
>     --- a/drivers/staging/lustre/lnet/lnet/config.c
>     +++ b/drivers/staging/lustre/lnet/lnet/config.c
>     @@ -393,6 +393,7 @@ lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el, char *iface)
>      		ni->ni_net_ns = NULL;
>      
>      	ni->ni_last_alive = ktime_get_real_seconds();
>     +	ni->ni_state = LNET_NI_STATE_INIT;
>      	rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts, net);
>      	if (rc != 0)
>      		goto failed;
>     
>     
>

Patch
diff mbox series

diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
index faa3f19dd844..54a93235834c 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -400,6 +400,7 @@  int lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni);
 struct lnet_ni *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
 struct lnet_ni *lnet_net2ni_locked(__u32 net, int cpt);
 struct lnet_ni *lnet_net2ni(__u32 net);
+bool lnet_is_ni_healthy_locked(struct lnet_ni *ni);
 
 extern int portal_rotor;
 
diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h
index 1d372672e2de..6c34ecf22021 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-types.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h
@@ -256,6 +256,19 @@  struct lnet_tx_queue {
 	struct list_head	tq_delayed;	/* delayed TXs */
 };
 
+enum lnet_ni_state {
+	/* set when NI block is allocated */
+	LNET_NI_STATE_INIT = 0,
+	/* set when NI is started successfully */
+	LNET_NI_STATE_ACTIVE,
+	/* set when LND notifies NI failed */
+	LNET_NI_STATE_FAILED,
+	/* set when LND notifies NI degraded */
+	LNET_NI_STATE_DEGRADED,
+	/* set when shuttding down NI */
+	LNET_NI_STATE_DELETING
+};
+
 struct lnet_net {
 	/* chain on the ln_nets */
 	struct list_head	net_list;
@@ -324,6 +337,9 @@  struct lnet_ni {
 	/* my health status */
 	struct lnet_ni_status	*ni_status;
 
+	/* NI FSM */
+	enum lnet_ni_state	ni_state;
+
 	/* per NI LND tunables */
 	struct lnet_lnd_tunables ni_lnd_tunables;
 
diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c
index 46c5ca71bc07..618fdf8141f0 100644
--- a/drivers/staging/lustre/lnet/lnet/api-ni.c
+++ b/drivers/staging/lustre/lnet/lnet/api-ni.c
@@ -780,6 +780,16 @@  lnet_islocalnet(__u32 net)
 	return !!ni;
 }
 
+bool
+lnet_is_ni_healthy_locked(struct lnet_ni *ni)
+{
+	if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
+	    ni->ni_state == LNET_NI_STATE_DEGRADED)
+		return true;
+
+	return false;
+}
+
 struct lnet_ni  *
 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
 {
@@ -1117,6 +1127,9 @@  lnet_clear_zombies_nis_locked(struct lnet_net *net)
 		ni = list_entry(zombie_list->next,
 				struct lnet_ni, ni_netlist);
 		list_del_init(&ni->ni_netlist);
+		/* the ni should be in deleting state. If it's not it's
+		 * a bug */
+		LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
 		cfs_percpt_for_each(ref, j, ni->ni_refs) {
 			if (!*ref)
 				continue;
@@ -1163,6 +1176,7 @@  lnet_shutdown_lndni(struct lnet_ni *ni)
 	struct lnet_net *net = ni->ni_net;
 
 	lnet_net_lock(LNET_LOCK_EX);
+	ni->ni_state = LNET_NI_STATE_DELETING;
 	lnet_ni_unlink_locked(ni);
 	lnet_net_unlock(LNET_LOCK_EX);
 
@@ -1291,6 +1305,8 @@  lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
 
 	lnet_net_unlock(LNET_LOCK_EX);
 
+	ni->ni_state = LNET_NI_STATE_ACTIVE;
+
 	if (net->net_lnd->lnd_type == LOLND) {
 		lnet_ni_addref(ni);
 		LASSERT(!the_lnet.ln_loni);
diff --git a/drivers/staging/lustre/lnet/lnet/config.c b/drivers/staging/lustre/lnet/lnet/config.c
index 2588d67fea1b..081812e19b13 100644
--- a/drivers/staging/lustre/lnet/lnet/config.c
+++ b/drivers/staging/lustre/lnet/lnet/config.c
@@ -393,6 +393,7 @@  lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el, char *iface)
 		ni->ni_net_ns = NULL;
 
 	ni->ni_last_alive = ktime_get_real_seconds();
+	ni->ni_state = LNET_NI_STATE_INIT;
 	rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts, net);
 	if (rc != 0)
 		goto failed;