diff mbox series

[v2,12/22] multipathd: split check_paths into two functions

Message ID 20240912214947.783819-13-bmarzins@redhat.com (mailing list archive)
State Not Applicable, archived
Delegated to: Benjamin Marzinski
Headers show
Series Yet Another path checker refactor | expand

Commit Message

Benjamin Marzinski Sept. 12, 2024, 9:49 p.m. UTC
Instead of checking and updating each path, the checkerloop now starts
the checkers on all the paths in check_paths(), and then goes back and
updates all the paths in update_paths(). Since the async checkers use an
absolute time to wait for before returning PATH_PENDING, only one
checker actually needs to be waited for in update_paths(). The rest will
already have reached their timeout when update_path() is called for
them.

The check_paths() and update_paths() loop over the pathvec instead of
looping through the multipath device paths to avoid having to restart
checking of the device paths when the multipath device needs to be
resynced while updating the paths.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
 libmultipath/structs.h |  10 +++-
 multipathd/main.c      | 105 +++++++++++++++++++----------------------
 2 files changed, 57 insertions(+), 58 deletions(-)

Comments

Martin Wilck Oct. 3, 2024, 8:41 p.m. UTC | #1
On Thu, 2024-09-12 at 17:49 -0400, Benjamin Marzinski wrote:
> Instead of checking and updating each path, the checkerloop now
> starts
> the checkers on all the paths in check_paths(), and then goes back
> and
> updates all the paths in update_paths(). Since the async checkers use
> an
> absolute time to wait for before returning PATH_PENDING, only one
> checker actually needs to be waited for in update_paths(). The rest
> will
> already have reached their timeout when update_path() is called for
> them.
> 
> The check_paths() and update_paths() loop over the pathvec instead of
> looping through the multipath device paths to avoid having to restart
> checking of the device paths when the multipath device needs to be
> resynced while updating the paths.
> 
> Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> ---
>  libmultipath/structs.h |  10 +++-
>  multipathd/main.c      | 105 +++++++++++++++++++--------------------
> --
>  2 files changed, 57 insertions(+), 58 deletions(-)
> 
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index d8231e95..af8e31e9 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -314,6 +314,14 @@ enum recheck_wwid_states {
>  	RECHECK_WWID_ON = YNU_YES,
>  };
>  
> +enum check_path_states {
> +	CHECK_PATH_UNCHECKED,
> +	CHECK_PATH_STARTED,
> +	CHECK_PATH_CHECKED,
> +	CHECK_PATH_SKIPPED,
> +	CHECK_PATH_REMOVED,
> +};
> +
>  struct vpd_vendor_page {
>  	int pg;
>  	const char *name;
> @@ -395,7 +403,7 @@ struct path {
>  	int fast_io_fail;
>  	unsigned int dev_loss;
>  	int eh_deadline;
> -	bool is_checked;
> +	enum check_path_states is_checked;
>  	bool can_use_env_uid;
>  	unsigned int checker_timeout;
>  	/* configlet pointers */
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 45d40559..9519b6c5 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -2389,13 +2389,6 @@ sync_mpp(struct vectors * vecs, struct
> multipath *mpp, unsigned int ticks)
>  	do_sync_mpp(vecs, mpp);
>  }
>  
> -enum check_path_return {
> -	CHECK_PATH_STARTED,
> -	CHECK_PATH_CHECKED,
> -	CHECK_PATH_SKIPPED,
> -	CHECK_PATH_REMOVED,
> -};
> -
>  static int
>  update_path_state (struct vectors * vecs, struct path * pp)
>  {
> @@ -2735,6 +2728,7 @@ check_uninitialized_path(struct path * pp,
> unsigned int ticks)
>  	conf = get_multipath_config();
>  	retrigger_tries = conf->retrigger_tries;
>  	pp->tick = conf->max_checkint;
> +	pp->checkint = conf->checkint;
>  	put_multipath_config(conf);
>  
>  	if (pp->initialized == INIT_MISSING_UDEV) {
> @@ -2778,6 +2772,10 @@ update_uninitialized_path(struct vectors *
> vecs, struct path * pp)
>  	int newstate, ret;
>  	struct config *conf;
>  
> +	if (pp->initialized != INIT_NEW && pp->initialized !=
> INIT_FAILED &&
> +	    pp->initialized != INIT_MISSING_UDEV)
> +		return CHECK_PATH_SKIPPED;
> +
>  	newstate = get_new_state(pp);
>  

It seems to me that this hunk and the previous one belong into 11/22.

Martin
Benjamin Marzinski Oct. 8, 2024, 7:16 p.m. UTC | #2
On Thu, Oct 03, 2024 at 10:41:20PM +0200, Martin Wilck wrote:
> On Thu, 2024-09-12 at 17:49 -0400, Benjamin Marzinski wrote:
> > Instead of checking and updating each path, the checkerloop now
> > starts
> > the checkers on all the paths in check_paths(), and then goes back
> > and
> > updates all the paths in update_paths(). Since the async checkers use
> > an
> > absolute time to wait for before returning PATH_PENDING, only one
> > checker actually needs to be waited for in update_paths(). The rest
> > will
> > already have reached their timeout when update_path() is called for
> > them.
> > 
> > The check_paths() and update_paths() loop over the pathvec instead of
> > looping through the multipath device paths to avoid having to restart
> > checking of the device paths when the multipath device needs to be
> > resynced while updating the paths.
> > 
> > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> > ---
> >  libmultipath/structs.h |  10 +++-
> >  multipathd/main.c      | 105 +++++++++++++++++++--------------------
> > --
> >  2 files changed, 57 insertions(+), 58 deletions(-)
> > 
> > diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> > index d8231e95..af8e31e9 100644
> > --- a/libmultipath/structs.h
> > +++ b/libmultipath/structs.h
> > @@ -314,6 +314,14 @@ enum recheck_wwid_states {
> >  	RECHECK_WWID_ON = YNU_YES,
> >  };
> >  
> > +enum check_path_states {
> > +	CHECK_PATH_UNCHECKED,
> > +	CHECK_PATH_STARTED,
> > +	CHECK_PATH_CHECKED,
> > +	CHECK_PATH_SKIPPED,
> > +	CHECK_PATH_REMOVED,
> > +};
> > +
> >  struct vpd_vendor_page {
> >  	int pg;
> >  	const char *name;
> > @@ -395,7 +403,7 @@ struct path {
> >  	int fast_io_fail;
> >  	unsigned int dev_loss;
> >  	int eh_deadline;
> > -	bool is_checked;
> > +	enum check_path_states is_checked;
> >  	bool can_use_env_uid;
> >  	unsigned int checker_timeout;
> >  	/* configlet pointers */
> > diff --git a/multipathd/main.c b/multipathd/main.c
> > index 45d40559..9519b6c5 100644
> > --- a/multipathd/main.c
> > +++ b/multipathd/main.c
> > @@ -2389,13 +2389,6 @@ sync_mpp(struct vectors * vecs, struct
> > multipath *mpp, unsigned int ticks)
> >  	do_sync_mpp(vecs, mpp);
> >  }
> >  
> > -enum check_path_return {
> > -	CHECK_PATH_STARTED,
> > -	CHECK_PATH_CHECKED,
> > -	CHECK_PATH_SKIPPED,
> > -	CHECK_PATH_REMOVED,
> > -};
> > -
> >  static int
> >  update_path_state (struct vectors * vecs, struct path * pp)
> >  {
> > @@ -2735,6 +2728,7 @@ check_uninitialized_path(struct path * pp,
> > unsigned int ticks)
> >  	conf = get_multipath_config();
> >  	retrigger_tries = conf->retrigger_tries;
> >  	pp->tick = conf->max_checkint;
> > +	pp->checkint = conf->checkint;
> >  	put_multipath_config(conf);
> >  
> >  	if (pp->initialized == INIT_MISSING_UDEV) {
> > @@ -2778,6 +2772,10 @@ update_uninitialized_path(struct vectors *
> > vecs, struct path * pp)
> >  	int newstate, ret;
> >  	struct config *conf;
> >  
> > +	if (pp->initialized != INIT_NEW && pp->initialized !=
> > INIT_FAILED &&
> > +	    pp->initialized != INIT_MISSING_UDEV)
> > +		return CHECK_PATH_SKIPPED;
> > +
> >  	newstate = get_new_state(pp);
> >  
> 
> It seems to me that this hunk and the previous one belong into 11/22.

Setting pp->checkint could certainly go in the earlier patch. It's just
there to match check_path(), although to be honest, I'm still not sure
that there is any way for either check_path() or
check_uninitialized_path() to get called with pp->checkint unset.
 
The checking the path->initialized state in update_uninitialized_path()
makes less sense. That code is already in check_unintialized_path(). In
11/22, update_uninitialized_path() is always called immediately after
check_unintialized_path() if it's called at all, and pp->initialized
never has a chance to change.

In this patch, check_unintialized_path() is called in check_paths(), and
it's possible that path checking takes too long, and we need take a
break and yield to other vecs lock waiters before we call
update_paths(), where update_uninitialized_path() is called. In this
case, it's possible that a uevent got processed and changed
pp->initialized, so we need to recheck it in
update_uninitialized_path().

-Ben

> Martin
diff mbox series

Patch

diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index d8231e95..af8e31e9 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -314,6 +314,14 @@  enum recheck_wwid_states {
 	RECHECK_WWID_ON = YNU_YES,
 };
 
+enum check_path_states {
+	CHECK_PATH_UNCHECKED,
+	CHECK_PATH_STARTED,
+	CHECK_PATH_CHECKED,
+	CHECK_PATH_SKIPPED,
+	CHECK_PATH_REMOVED,
+};
+
 struct vpd_vendor_page {
 	int pg;
 	const char *name;
@@ -395,7 +403,7 @@  struct path {
 	int fast_io_fail;
 	unsigned int dev_loss;
 	int eh_deadline;
-	bool is_checked;
+	enum check_path_states is_checked;
 	bool can_use_env_uid;
 	unsigned int checker_timeout;
 	/* configlet pointers */
diff --git a/multipathd/main.c b/multipathd/main.c
index 45d40559..9519b6c5 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -2389,13 +2389,6 @@  sync_mpp(struct vectors * vecs, struct multipath *mpp, unsigned int ticks)
 	do_sync_mpp(vecs, mpp);
 }
 
-enum check_path_return {
-	CHECK_PATH_STARTED,
-	CHECK_PATH_CHECKED,
-	CHECK_PATH_SKIPPED,
-	CHECK_PATH_REMOVED,
-};
-
 static int
 update_path_state (struct vectors * vecs, struct path * pp)
 {
@@ -2735,6 +2728,7 @@  check_uninitialized_path(struct path * pp, unsigned int ticks)
 	conf = get_multipath_config();
 	retrigger_tries = conf->retrigger_tries;
 	pp->tick = conf->max_checkint;
+	pp->checkint = conf->checkint;
 	put_multipath_config(conf);
 
 	if (pp->initialized == INIT_MISSING_UDEV) {
@@ -2778,6 +2772,10 @@  update_uninitialized_path(struct vectors * vecs, struct path * pp)
 	int newstate, ret;
 	struct config *conf;
 
+	if (pp->initialized != INIT_NEW && pp->initialized != INIT_FAILED &&
+	    pp->initialized != INIT_MISSING_UDEV)
+		return CHECK_PATH_SKIPPED;
+
 	newstate = get_new_state(pp);
 
 	if (!strlen(pp->wwid) &&
@@ -2807,80 +2805,69 @@  update_uninitialized_path(struct vectors * vecs, struct path * pp)
 
 enum checker_state {
 	CHECKER_STARTING,
-	CHECKER_RUNNING,
+	CHECKER_CHECKING_PATHS,
+	CHECKER_UPDATING_PATHS,
 	CHECKER_FINISHED,
 };
 
 static enum checker_state
-check_paths(struct vectors *vecs, unsigned int ticks, int *num_paths_p)
+check_paths(struct vectors *vecs, unsigned int ticks)
 {
 	unsigned int paths_checked = 0;
 	struct timespec diff_time, start_time, end_time;
-	struct multipath *mpp;
 	struct path *pp;
-	int i, rc;
+	int i;
 
 	get_monotonic_time(&start_time);
 
-	vector_foreach_slot(vecs->mpvec, mpp, i) {
-		struct pathgroup *pgp;
-		struct path *pp;
-		int j, k;
-		bool check_for_waiters = false;
-		/* maps can be rechecked, so this is not always 0 */
-		int synced_count = mpp->synced_count;
-
-		vector_foreach_slot (mpp->pg, pgp, j) {
-			vector_foreach_slot (pgp->paths, pp, k) {
-				if (!pp->mpp || pp->is_checked)
-					continue;
-				pp->is_checked = true;
-				rc = check_path(pp, ticks);
-				if (rc == CHECK_PATH_STARTED)
-					rc = update_path(vecs, pp,
-							 start_time.tv_sec);
-				if (rc == CHECK_PATH_CHECKED)
-					(*num_paths_p)++;
-				if (++paths_checked % 128 == 0)
-					check_for_waiters = true;
-				/*
-				 * mpp has been removed or resynced. Path may
-				 * have been removed.
-				 */
-				if (VECTOR_SLOT(vecs->mpvec, i) != mpp ||
-				    synced_count != mpp->synced_count) {
-					i--;
-					goto next_mpp;
-				}
-			}
-		}
-next_mpp:
-		if (check_for_waiters &&
+	vector_foreach_slot(vecs->pathvec, pp, i) {
+		if (pp->is_checked != CHECK_PATH_UNCHECKED)
+			continue;
+		if (pp->mpp)
+			pp->is_checked = check_path(pp, ticks);
+		else
+			pp->is_checked = check_uninitialized_path(pp, ticks);
+		if (++paths_checked % 128 == 0 &&
 		    (lock_has_waiters(&vecs->lock) || waiting_clients())) {
 			get_monotonic_time(&end_time);
 			timespecsub(&end_time, &start_time, &diff_time);
 			if (diff_time.tv_sec > 0)
-				return CHECKER_RUNNING;
+				return CHECKER_CHECKING_PATHS;
 		}
 	}
+	return CHECKER_UPDATING_PATHS;
+}
+
+static enum checker_state
+update_paths(struct vectors *vecs, int *num_paths_p, time_t start_secs)
+{
+	unsigned int paths_checked = 0;
+	struct timespec diff_time, start_time, end_time;
+	struct path *pp;
+	int i, rc;
+
+	get_monotonic_time(&start_time);
+
 	vector_foreach_slot(vecs->pathvec, pp, i) {
-		if (pp->mpp || pp->is_checked)
+		if (pp->is_checked != CHECK_PATH_STARTED)
 			continue;
-		pp->is_checked = true;
-
-		rc = check_uninitialized_path(pp, ticks);
-		if (rc == CHECK_PATH_STARTED)
+		if (pp->mpp)
+			rc = update_path(vecs, pp, start_secs);
+		else
 			rc = update_uninitialized_path(vecs, pp);
 		if (rc == CHECK_PATH_REMOVED)
 			i--;
-		else if (rc == CHECK_PATH_CHECKED)
-			(*num_paths_p)++;
+		else {
+			pp->is_checked = rc;
+			if (rc == CHECK_PATH_CHECKED)
+				(*num_paths_p)++;
+		}
 		if (++paths_checked % 128 == 0 &&
 		    (lock_has_waiters(&vecs->lock) || waiting_clients())) {
 			get_monotonic_time(&end_time);
 			timespecsub(&end_time, &start_time, &diff_time);
 			if (diff_time.tv_sec > 0)
-				return CHECKER_RUNNING;
+				return CHECKER_UPDATING_PATHS;
 		}
 	}
 	return CHECKER_FINISHED;
@@ -2948,10 +2935,14 @@  checkerloop (void *ap)
 				vector_foreach_slot(vecs->mpvec, mpp, i)
 					sync_mpp(vecs, mpp, ticks);
 				vector_foreach_slot(vecs->pathvec, pp, i)
-					pp->is_checked = false;
-				checker_state = CHECKER_RUNNING;
+					pp->is_checked = CHECK_PATH_UNCHECKED;
+				checker_state = CHECKER_CHECKING_PATHS;
 			}
-			checker_state = check_paths(vecs, ticks, &num_paths);
+			if (checker_state == CHECKER_CHECKING_PATHS)
+				checker_state = check_paths(vecs, ticks);
+			if (checker_state == CHECKER_UPDATING_PATHS)
+				checker_state = update_paths(vecs, &num_paths,
+							     start_time.tv_sec);
 			lock_cleanup_pop(vecs->lock);
 			if (checker_state != CHECKER_FINISHED) {
 				/* Yield to waiters */