diff mbox series

[v2,03/14] multipathd: sync maps at end of checkerloop

Message ID 20241211225909.298770-4-mwilck@suse.com (mailing list archive)
State Not Applicable, archived
Delegated to: christophe varoqui
Headers show
Series multipathd: More map reload handling, and checkerloop work | expand

Commit Message

Martin Wilck Dec. 11, 2024, 10:58 p.m. UTC
Rather than calling sync_mpp early in the checkerloop and tracking
map synchronization with synced_count, call sync_mpp() in the CHECKER_FINISHED
state, if either at least one path of the map has been checked in the current
iteration, or the sync tick has expired. This avoids potentially deleting
paths from the pathvec through the do_sync_mpp() -> update_multipath_strings()
-> sync_paths -> check_removed_paths() call chain while we're iterating over
the pathvec. Also, the time gap between obtaining path states and syncing
the state with the kernel is smaller this way.

Suggested-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Martin Wilck <mwilck@suse.com>
---
 libmultipath/structs.h     |  2 +-
 libmultipath/structs_vec.c |  1 -
 multipathd/main.c          | 26 +++++++++++---------------
 3 files changed, 12 insertions(+), 17 deletions(-)

Comments

Benjamin Marzinski Dec. 19, 2024, 9:50 p.m. UTC | #1
On Wed, Dec 11, 2024 at 11:58:58PM +0100, Martin Wilck wrote:
> Rather than calling sync_mpp early in the checkerloop and tracking
> map synchronization with synced_count, call sync_mpp() in the CHECKER_FINISHED
> state, if either at least one path of the map has been checked in the current
> iteration, or the sync tick has expired. This avoids potentially deleting
> paths from the pathvec through the do_sync_mpp() -> update_multipath_strings()
> -> sync_paths -> check_removed_paths() call chain while we're iterating over
> the pathvec. Also, the time gap between obtaining path states and syncing
> the state with the kernel is smaller this way.
> 

Sorry for the delayed review. I've been busy lately and there was a lot
to look at with moving the syncs from before we check the paths till the
end. Turns out I'm mostly o.k. with this. Syncing right before we
checked the paths still left a small window where things could change in
the kernel state, and we wouldn't see them before we checked the paths.
It's just a larger window now.

The one place where we won't just pick up the change on the next checker
loop is enable_group(). If the kernel disables a pathgroup, multipathd
would re-enable it if a path in it switched states to PATH_UP. I'm not
totally sure how necessary this is. The kenel has code to re-enable the
pathgroups. But it's been there a while, and perhaps it does avoid an
issue.

The kernel doesn't even necessarily send an event if it disables a
pathgroup (since the pathgroup isn't actually disabled, it's just
bypassed, which means all the other pathgroups are checked first).  So
there is a real chance that since the last path check, the pgp->status
could have changed. By not checking before we check the path, we could
not realize that tht pgp is in PGSTATE_DISABLED when a path switches
states to PATH_UP.  It shouldn't be that hard to check in sync_mpp() if
there are any disabled path_groups the include a path where
pp->is_checked == CHECK_PATH_NEW_UP. So we could move the enable_group()
code to checker_finished() as well and fix this.

-Ben

> Suggested-by: Benjamin Marzinski <bmarzins@redhat.com>
> Signed-off-by: Martin Wilck <mwilck@suse.com>
> ---
>  libmultipath/structs.h     |  2 +-
>  libmultipath/structs_vec.c |  1 -
>  multipathd/main.c          | 26 +++++++++++---------------
>  3 files changed, 12 insertions(+), 17 deletions(-)
> 
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index 6a30c59..9d22bdd 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -471,7 +471,7 @@ struct multipath {
>  	int ghost_delay_tick;
>  	int queue_mode;
>  	unsigned int sync_tick;
> -	int synced_count;
> +	int checker_count;
>  	enum prio_update_type prio_update;
>  	uid_t uid;
>  	gid_t gid;
> diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c
> index 7a4e3eb..6aa744d 100644
> --- a/libmultipath/structs_vec.c
> +++ b/libmultipath/structs_vec.c
> @@ -530,7 +530,6 @@ update_multipath_table (struct multipath *mpp, vector pathvec, int flags)
>  	conf = get_multipath_config();
>  	mpp->sync_tick = conf->max_checkint;
>  	put_multipath_config(conf);
> -	mpp->synced_count++;
>  
>  	r = libmp_mapinfo(DM_MAP_BY_NAME | MAPINFO_MPATH_ONLY,
>  			  (mapid_t) { .str = mpp->alias },
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 4a28fbb..e4e6bf7 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -2470,7 +2470,7 @@ sync_mpp(struct vectors * vecs, struct multipath *mpp, unsigned int ticks)
>  	if (mpp->sync_tick)
>  		mpp->sync_tick -= (mpp->sync_tick > ticks) ? ticks :
>  				  mpp->sync_tick;
> -	if (mpp->sync_tick)
> +	if (mpp->sync_tick && !mpp->checker_count)
>  		return;
>  
>  	do_sync_mpp(vecs, mpp);
> @@ -2513,12 +2513,6 @@ update_path_state (struct vectors * vecs, struct path * pp)
>  		return handle_path_wwid_change(pp, vecs)? CHECK_PATH_REMOVED :
>  							  CHECK_PATH_SKIPPED;
>  	}
> -	if (pp->mpp->synced_count == 0) {
> -		do_sync_mpp(vecs, pp->mpp);
> -		/* if update_multipath_strings orphaned the path, quit early */
> -		if (!pp->mpp)
> -			return CHECK_PATH_SKIPPED;
> -	}
>  	if ((newstate != PATH_UP && newstate != PATH_GHOST &&
>  	     newstate != PATH_PENDING) && (pp->state == PATH_DELAYED)) {
>  		/* If path state become failed again cancel path delay state */
> @@ -2918,9 +2912,11 @@ check_paths(struct vectors *vecs, unsigned int ticks)
>  	vector_foreach_slot(vecs->pathvec, pp, i) {
>  		if (pp->is_checked != CHECK_PATH_UNCHECKED)
>  			continue;
> -		if (pp->mpp)
> +		if (pp->mpp) {
>  			pp->is_checked = check_path(pp, ticks);
> -		else
> +			if (pp->is_checked == CHECK_PATH_STARTED)
> +				pp->mpp->checker_count++;
> +		} else
>  			pp->is_checked = check_uninitialized_path(pp, ticks);
>  		if (pp->is_checked == CHECK_PATH_STARTED &&
>  		    checker_need_wait(&pp->checker))
> @@ -3014,12 +3010,10 @@ checkerloop (void *ap)
>  			pthread_cleanup_push(cleanup_lock, &vecs->lock);
>  			lock(&vecs->lock);
>  			pthread_testcancel();
> -			vector_foreach_slot(vecs->mpvec, mpp, i)
> -				mpp->synced_count = 0;
>  			if (checker_state == CHECKER_STARTING) {
>  				vector_foreach_slot(vecs->mpvec, mpp, i) {
> -					sync_mpp(vecs, mpp, ticks);
>  					mpp->prio_update = PRIO_UPDATE_NONE;
> +					mpp->checker_count = 0;
>  				}
>  				vector_foreach_slot(vecs->pathvec, pp, i)
>  					pp->is_checked = CHECK_PATH_UNCHECKED;
> @@ -3032,11 +3026,13 @@ checkerloop (void *ap)
>  							     start_time.tv_sec);
>  			if (checker_state == CHECKER_FINISHED) {
>  				vector_foreach_slot(vecs->mpvec, mpp, i) {
> -					if ((update_mpp_prio(mpp) ||
> -					     (mpp->need_reload && mpp->synced_count > 0)) &&
> -					    reload_and_sync_map(mpp, vecs) == 2)
> +					sync_mpp(vecs, mpp, ticks);
> +					if ((update_mpp_prio(mpp) || mpp->need_reload) &&
> +					    reload_and_sync_map(mpp, vecs) == 2) {
>  						/* multipath device deleted */
>  						i--;
> +						continue;
> +					}
>  				}
>  			}
>  			lock_cleanup_pop(vecs->lock);
> -- 
> 2.47.0
Benjamin Marzinski Dec. 19, 2024, 11:04 p.m. UTC | #2
On Wed, Dec 11, 2024 at 11:58:58PM +0100, Martin Wilck wrote:
> Rather than calling sync_mpp early in the checkerloop and tracking
> map synchronization with synced_count, call sync_mpp() in the CHECKER_FINISHED
> state, if either at least one path of the map has been checked in the current
> iteration, or the sync tick has expired. This avoids potentially deleting
> paths from the pathvec through the do_sync_mpp() -> update_multipath_strings()
> -> sync_paths -> check_removed_paths() call chain while we're iterating over
> the pathvec. Also, the time gap between obtaining path states and syncing
> the state with the kernel is smaller this way.
> 
> Suggested-by: Benjamin Marzinski <bmarzins@redhat.com>
> Signed-off-by: Martin Wilck <mwilck@suse.com>
> ---
>  libmultipath/structs.h     |  2 +-
>  libmultipath/structs_vec.c |  1 -
>  multipathd/main.c          | 26 +++++++++++---------------
>  3 files changed, 12 insertions(+), 17 deletions(-)
> 
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index 6a30c59..9d22bdd 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -471,7 +471,7 @@ struct multipath {
>  	int ghost_delay_tick;
>  	int queue_mode;
>  	unsigned int sync_tick;
> -	int synced_count;
> +	int checker_count;
>  	enum prio_update_type prio_update;
>  	uid_t uid;
>  	gid_t gid;
> diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c
> index 7a4e3eb..6aa744d 100644
> --- a/libmultipath/structs_vec.c
> +++ b/libmultipath/structs_vec.c
> @@ -530,7 +530,6 @@ update_multipath_table (struct multipath *mpp, vector pathvec, int flags)
>  	conf = get_multipath_config();
>  	mpp->sync_tick = conf->max_checkint;
>  	put_multipath_config(conf);
> -	mpp->synced_count++;
>  
>  	r = libmp_mapinfo(DM_MAP_BY_NAME | MAPINFO_MPATH_ONLY,
>  			  (mapid_t) { .str = mpp->alias },
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 4a28fbb..e4e6bf7 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -2470,7 +2470,7 @@ sync_mpp(struct vectors * vecs, struct multipath *mpp, unsigned int ticks)
>  	if (mpp->sync_tick)
>  		mpp->sync_tick -= (mpp->sync_tick > ticks) ? ticks :
>  				  mpp->sync_tick;
> -	if (mpp->sync_tick)
> +	if (mpp->sync_tick && !mpp->checker_count)
>  		return;
>  
>  	do_sync_mpp(vecs, mpp);
> @@ -2513,12 +2513,6 @@ update_path_state (struct vectors * vecs, struct path * pp)
>  		return handle_path_wwid_change(pp, vecs)? CHECK_PATH_REMOVED :
>  							  CHECK_PATH_SKIPPED;
>  	}
> -	if (pp->mpp->synced_count == 0) {
> -		do_sync_mpp(vecs, pp->mpp);
> -		/* if update_multipath_strings orphaned the path, quit early */
> -		if (!pp->mpp)
> -			return CHECK_PATH_SKIPPED;
> -	}
>  	if ((newstate != PATH_UP && newstate != PATH_GHOST &&
>  	     newstate != PATH_PENDING) && (pp->state == PATH_DELAYED)) {
>  		/* If path state become failed again cancel path delay state */
> @@ -2918,9 +2912,11 @@ check_paths(struct vectors *vecs, unsigned int ticks)
>  	vector_foreach_slot(vecs->pathvec, pp, i) {
>  		if (pp->is_checked != CHECK_PATH_UNCHECKED)
>  			continue;
> -		if (pp->mpp)
> +		if (pp->mpp) {
>  			pp->is_checked = check_path(pp, ticks);
> -		else
> +			if (pp->is_checked == CHECK_PATH_STARTED)
> +				pp->mpp->checker_count++;
> +		} else
>  			pp->is_checked = check_uninitialized_path(pp, ticks);
>  		if (pp->is_checked == CHECK_PATH_STARTED &&
>  		    checker_need_wait(&pp->checker))
> @@ -3014,12 +3010,10 @@ checkerloop (void *ap)
>  			pthread_cleanup_push(cleanup_lock, &vecs->lock);
>  			lock(&vecs->lock);
>  			pthread_testcancel();
> -			vector_foreach_slot(vecs->mpvec, mpp, i)
> -				mpp->synced_count = 0;
>  			if (checker_state == CHECKER_STARTING) {
>  				vector_foreach_slot(vecs->mpvec, mpp, i) {
> -					sync_mpp(vecs, mpp, ticks);
>  					mpp->prio_update = PRIO_UPDATE_NONE;
> +					mpp->checker_count = 0;
>  				}
>  				vector_foreach_slot(vecs->pathvec, pp, i)
>  					pp->is_checked = CHECK_PATH_UNCHECKED;
> @@ -3032,11 +3026,13 @@ checkerloop (void *ap)
>  							     start_time.tv_sec);
>  			if (checker_state == CHECKER_FINISHED) {
>  				vector_foreach_slot(vecs->mpvec, mpp, i) {
> -					if ((update_mpp_prio(mpp) ||
> -					     (mpp->need_reload && mpp->synced_count > 0)) &&

When you call reload_and_sync_map(), it will automatically resync the
map via setup_multipath() -> refresh_multipath() ->
update_multipath_strings().

This means that if for some reason multipathd and the kernel disagree
about a map, and reloading it doesn't fix the problem, you will
immediately set mpp->need_reload again. With the old mpp->synced_count
check, you only reload maps with need_reload() when a path is checked.
Without this check, or a (mpp->checker_count > 0) check to replace it,
you will keep reloading these maps every loop, roughly once a second. I
would rather not do this.

If you want to make sure to immediately handle a need_reload that wasn't
triggered by this call to reload_and_sync_map() which was because of an
earlier need_reload, we could make need_reload have three states, to
distinguish between a reload we want done immediately, and one we would
like to wait on because we just did a reload and it didn't fix the
problem. Then we could remember if need_reload was set before calling
reload_and_sync_map(), and if it was, and if it is still set after, we
could switch it to the delayed version.

Or perhaps I'm just being paranoid here. 

-Ben

> -					    reload_and_sync_map(mpp, vecs) == 2)
> +					sync_mpp(vecs, mpp, ticks);
> +					if ((update_mpp_prio(mpp) || mpp->need_reload) &&
> +					    reload_and_sync_map(mpp, vecs) == 2) {
>  						/* multipath device deleted */
>  						i--;
> +						continue;
> +					}
>  				}
>  			}
>  			lock_cleanup_pop(vecs->lock);
> -- 
> 2.47.0
diff mbox series

Patch

diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index 6a30c59..9d22bdd 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -471,7 +471,7 @@  struct multipath {
 	int ghost_delay_tick;
 	int queue_mode;
 	unsigned int sync_tick;
-	int synced_count;
+	int checker_count;
 	enum prio_update_type prio_update;
 	uid_t uid;
 	gid_t gid;
diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c
index 7a4e3eb..6aa744d 100644
--- a/libmultipath/structs_vec.c
+++ b/libmultipath/structs_vec.c
@@ -530,7 +530,6 @@  update_multipath_table (struct multipath *mpp, vector pathvec, int flags)
 	conf = get_multipath_config();
 	mpp->sync_tick = conf->max_checkint;
 	put_multipath_config(conf);
-	mpp->synced_count++;
 
 	r = libmp_mapinfo(DM_MAP_BY_NAME | MAPINFO_MPATH_ONLY,
 			  (mapid_t) { .str = mpp->alias },
diff --git a/multipathd/main.c b/multipathd/main.c
index 4a28fbb..e4e6bf7 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -2470,7 +2470,7 @@  sync_mpp(struct vectors * vecs, struct multipath *mpp, unsigned int ticks)
 	if (mpp->sync_tick)
 		mpp->sync_tick -= (mpp->sync_tick > ticks) ? ticks :
 				  mpp->sync_tick;
-	if (mpp->sync_tick)
+	if (mpp->sync_tick && !mpp->checker_count)
 		return;
 
 	do_sync_mpp(vecs, mpp);
@@ -2513,12 +2513,6 @@  update_path_state (struct vectors * vecs, struct path * pp)
 		return handle_path_wwid_change(pp, vecs)? CHECK_PATH_REMOVED :
 							  CHECK_PATH_SKIPPED;
 	}
-	if (pp->mpp->synced_count == 0) {
-		do_sync_mpp(vecs, pp->mpp);
-		/* if update_multipath_strings orphaned the path, quit early */
-		if (!pp->mpp)
-			return CHECK_PATH_SKIPPED;
-	}
 	if ((newstate != PATH_UP && newstate != PATH_GHOST &&
 	     newstate != PATH_PENDING) && (pp->state == PATH_DELAYED)) {
 		/* If path state become failed again cancel path delay state */
@@ -2918,9 +2912,11 @@  check_paths(struct vectors *vecs, unsigned int ticks)
 	vector_foreach_slot(vecs->pathvec, pp, i) {
 		if (pp->is_checked != CHECK_PATH_UNCHECKED)
 			continue;
-		if (pp->mpp)
+		if (pp->mpp) {
 			pp->is_checked = check_path(pp, ticks);
-		else
+			if (pp->is_checked == CHECK_PATH_STARTED)
+				pp->mpp->checker_count++;
+		} else
 			pp->is_checked = check_uninitialized_path(pp, ticks);
 		if (pp->is_checked == CHECK_PATH_STARTED &&
 		    checker_need_wait(&pp->checker))
@@ -3014,12 +3010,10 @@  checkerloop (void *ap)
 			pthread_cleanup_push(cleanup_lock, &vecs->lock);
 			lock(&vecs->lock);
 			pthread_testcancel();
-			vector_foreach_slot(vecs->mpvec, mpp, i)
-				mpp->synced_count = 0;
 			if (checker_state == CHECKER_STARTING) {
 				vector_foreach_slot(vecs->mpvec, mpp, i) {
-					sync_mpp(vecs, mpp, ticks);
 					mpp->prio_update = PRIO_UPDATE_NONE;
+					mpp->checker_count = 0;
 				}
 				vector_foreach_slot(vecs->pathvec, pp, i)
 					pp->is_checked = CHECK_PATH_UNCHECKED;
@@ -3032,11 +3026,13 @@  checkerloop (void *ap)
 							     start_time.tv_sec);
 			if (checker_state == CHECKER_FINISHED) {
 				vector_foreach_slot(vecs->mpvec, mpp, i) {
-					if ((update_mpp_prio(mpp) ||
-					     (mpp->need_reload && mpp->synced_count > 0)) &&
-					    reload_and_sync_map(mpp, vecs) == 2)
+					sync_mpp(vecs, mpp, ticks);
+					if ((update_mpp_prio(mpp) || mpp->need_reload) &&
+					    reload_and_sync_map(mpp, vecs) == 2) {
 						/* multipath device deleted */
 						i--;
+						continue;
+					}
 				}
 			}
 			lock_cleanup_pop(vecs->lock);