Message ID | 20240912214947.783819-13-bmarzins@redhat.com (mailing list archive) |
---|---|
State | Not Applicable, archived |
Delegated to: | Benjamin Marzinski |
Headers | show |
Series | Yet Another path checker refactor | expand |
On Thu, 2024-09-12 at 17:49 -0400, Benjamin Marzinski wrote: > Instead of checking and updating each path, the checkerloop now > starts > the checkers on all the paths in check_paths(), and then goes back > and > updates all the paths in update_paths(). Since the async checkers use > an > absolute time to wait for before returning PATH_PENDING, only one > checker actually needs to be waited for in update_paths(). The rest > will > already have reached their timeout when update_path() is called for > them. > > The check_paths() and update_paths() loop over the pathvec instead of > looping through the multipath device paths to avoid having to restart > checking of the device paths when the multipath device needs to be > resynced while updating the paths. > > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com> > --- > libmultipath/structs.h | 10 +++- > multipathd/main.c | 105 +++++++++++++++++++-------------------- > -- > 2 files changed, 57 insertions(+), 58 deletions(-) > > diff --git a/libmultipath/structs.h b/libmultipath/structs.h > index d8231e95..af8e31e9 100644 > --- a/libmultipath/structs.h > +++ b/libmultipath/structs.h > @@ -314,6 +314,14 @@ enum recheck_wwid_states { > RECHECK_WWID_ON = YNU_YES, > }; > > +enum check_path_states { > + CHECK_PATH_UNCHECKED, > + CHECK_PATH_STARTED, > + CHECK_PATH_CHECKED, > + CHECK_PATH_SKIPPED, > + CHECK_PATH_REMOVED, > +}; > + > struct vpd_vendor_page { > int pg; > const char *name; > @@ -395,7 +403,7 @@ struct path { > int fast_io_fail; > unsigned int dev_loss; > int eh_deadline; > - bool is_checked; > + enum check_path_states is_checked; > bool can_use_env_uid; > unsigned int checker_timeout; > /* configlet pointers */ > diff --git a/multipathd/main.c b/multipathd/main.c > index 45d40559..9519b6c5 100644 > --- a/multipathd/main.c > +++ b/multipathd/main.c > @@ -2389,13 +2389,6 @@ sync_mpp(struct vectors * vecs, struct > multipath *mpp, unsigned int ticks) > do_sync_mpp(vecs, mpp); > } > > -enum check_path_return { > - CHECK_PATH_STARTED, > - CHECK_PATH_CHECKED, > - CHECK_PATH_SKIPPED, > - CHECK_PATH_REMOVED, > -}; > - > static int > update_path_state (struct vectors * vecs, struct path * pp) > { > @@ -2735,6 +2728,7 @@ check_uninitialized_path(struct path * pp, > unsigned int ticks) > conf = get_multipath_config(); > retrigger_tries = conf->retrigger_tries; > pp->tick = conf->max_checkint; > + pp->checkint = conf->checkint; > put_multipath_config(conf); > > if (pp->initialized == INIT_MISSING_UDEV) { > @@ -2778,6 +2772,10 @@ update_uninitialized_path(struct vectors * > vecs, struct path * pp) > int newstate, ret; > struct config *conf; > > + if (pp->initialized != INIT_NEW && pp->initialized != > INIT_FAILED && > + pp->initialized != INIT_MISSING_UDEV) > + return CHECK_PATH_SKIPPED; > + > newstate = get_new_state(pp); > It seems to me that this hunk and the previous one belong into 11/22. Martin
On Thu, Oct 03, 2024 at 10:41:20PM +0200, Martin Wilck wrote: > On Thu, 2024-09-12 at 17:49 -0400, Benjamin Marzinski wrote: > > Instead of checking and updating each path, the checkerloop now > > starts > > the checkers on all the paths in check_paths(), and then goes back > > and > > updates all the paths in update_paths(). Since the async checkers use > > an > > absolute time to wait for before returning PATH_PENDING, only one > > checker actually needs to be waited for in update_paths(). The rest > > will > > already have reached their timeout when update_path() is called for > > them. > > > > The check_paths() and update_paths() loop over the pathvec instead of > > looping through the multipath device paths to avoid having to restart > > checking of the device paths when the multipath device needs to be > > resynced while updating the paths. > > > > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com> > > --- > > libmultipath/structs.h | 10 +++- > > multipathd/main.c | 105 +++++++++++++++++++-------------------- > > -- > > 2 files changed, 57 insertions(+), 58 deletions(-) > > > > diff --git a/libmultipath/structs.h b/libmultipath/structs.h > > index d8231e95..af8e31e9 100644 > > --- a/libmultipath/structs.h > > +++ b/libmultipath/structs.h > > @@ -314,6 +314,14 @@ enum recheck_wwid_states { > > RECHECK_WWID_ON = YNU_YES, > > }; > > > > +enum check_path_states { > > + CHECK_PATH_UNCHECKED, > > + CHECK_PATH_STARTED, > > + CHECK_PATH_CHECKED, > > + CHECK_PATH_SKIPPED, > > + CHECK_PATH_REMOVED, > > +}; > > + > > struct vpd_vendor_page { > > int pg; > > const char *name; > > @@ -395,7 +403,7 @@ struct path { > > int fast_io_fail; > > unsigned int dev_loss; > > int eh_deadline; > > - bool is_checked; > > + enum check_path_states is_checked; > > bool can_use_env_uid; > > unsigned int checker_timeout; > > /* configlet pointers */ > > diff --git a/multipathd/main.c b/multipathd/main.c > > index 45d40559..9519b6c5 100644 > > --- a/multipathd/main.c > > +++ b/multipathd/main.c > > @@ -2389,13 +2389,6 @@ sync_mpp(struct vectors * vecs, struct > > multipath *mpp, unsigned int ticks) > > do_sync_mpp(vecs, mpp); > > } > > > > -enum check_path_return { > > - CHECK_PATH_STARTED, > > - CHECK_PATH_CHECKED, > > - CHECK_PATH_SKIPPED, > > - CHECK_PATH_REMOVED, > > -}; > > - > > static int > > update_path_state (struct vectors * vecs, struct path * pp) > > { > > @@ -2735,6 +2728,7 @@ check_uninitialized_path(struct path * pp, > > unsigned int ticks) > > conf = get_multipath_config(); > > retrigger_tries = conf->retrigger_tries; > > pp->tick = conf->max_checkint; > > + pp->checkint = conf->checkint; > > put_multipath_config(conf); > > > > if (pp->initialized == INIT_MISSING_UDEV) { > > @@ -2778,6 +2772,10 @@ update_uninitialized_path(struct vectors * > > vecs, struct path * pp) > > int newstate, ret; > > struct config *conf; > > > > + if (pp->initialized != INIT_NEW && pp->initialized != > > INIT_FAILED && > > + pp->initialized != INIT_MISSING_UDEV) > > + return CHECK_PATH_SKIPPED; > > + > > newstate = get_new_state(pp); > > > > It seems to me that this hunk and the previous one belong into 11/22. Setting pp->checkint could certainly go in the earlier patch. It's just there to match check_path(), although to be honest, I'm still not sure that there is any way for either check_path() or check_uninitialized_path() to get called with pp->checkint unset. The checking the path->initialized state in update_uninitialized_path() makes less sense. That code is already in check_unintialized_path(). In 11/22, update_uninitialized_path() is always called immediately after check_unintialized_path() if it's called at all, and pp->initialized never has a chance to change. In this patch, check_unintialized_path() is called in check_paths(), and it's possible that path checking takes too long, and we need take a break and yield to other vecs lock waiters before we call update_paths(), where update_uninitialized_path() is called. In this case, it's possible that a uevent got processed and changed pp->initialized, so we need to recheck it in update_uninitialized_path(). -Ben > Martin
diff --git a/libmultipath/structs.h b/libmultipath/structs.h index d8231e95..af8e31e9 100644 --- a/libmultipath/structs.h +++ b/libmultipath/structs.h @@ -314,6 +314,14 @@ enum recheck_wwid_states { RECHECK_WWID_ON = YNU_YES, }; +enum check_path_states { + CHECK_PATH_UNCHECKED, + CHECK_PATH_STARTED, + CHECK_PATH_CHECKED, + CHECK_PATH_SKIPPED, + CHECK_PATH_REMOVED, +}; + struct vpd_vendor_page { int pg; const char *name; @@ -395,7 +403,7 @@ struct path { int fast_io_fail; unsigned int dev_loss; int eh_deadline; - bool is_checked; + enum check_path_states is_checked; bool can_use_env_uid; unsigned int checker_timeout; /* configlet pointers */ diff --git a/multipathd/main.c b/multipathd/main.c index 45d40559..9519b6c5 100644 --- a/multipathd/main.c +++ b/multipathd/main.c @@ -2389,13 +2389,6 @@ sync_mpp(struct vectors * vecs, struct multipath *mpp, unsigned int ticks) do_sync_mpp(vecs, mpp); } -enum check_path_return { - CHECK_PATH_STARTED, - CHECK_PATH_CHECKED, - CHECK_PATH_SKIPPED, - CHECK_PATH_REMOVED, -}; - static int update_path_state (struct vectors * vecs, struct path * pp) { @@ -2735,6 +2728,7 @@ check_uninitialized_path(struct path * pp, unsigned int ticks) conf = get_multipath_config(); retrigger_tries = conf->retrigger_tries; pp->tick = conf->max_checkint; + pp->checkint = conf->checkint; put_multipath_config(conf); if (pp->initialized == INIT_MISSING_UDEV) { @@ -2778,6 +2772,10 @@ update_uninitialized_path(struct vectors * vecs, struct path * pp) int newstate, ret; struct config *conf; + if (pp->initialized != INIT_NEW && pp->initialized != INIT_FAILED && + pp->initialized != INIT_MISSING_UDEV) + return CHECK_PATH_SKIPPED; + newstate = get_new_state(pp); if (!strlen(pp->wwid) && @@ -2807,80 +2805,69 @@ update_uninitialized_path(struct vectors * vecs, struct path * pp) enum checker_state { CHECKER_STARTING, - CHECKER_RUNNING, + CHECKER_CHECKING_PATHS, + CHECKER_UPDATING_PATHS, CHECKER_FINISHED, }; static enum checker_state -check_paths(struct vectors *vecs, unsigned int ticks, int *num_paths_p) +check_paths(struct vectors *vecs, unsigned int ticks) { unsigned int paths_checked = 0; struct timespec diff_time, start_time, end_time; - struct multipath *mpp; struct path *pp; - int i, rc; + int i; get_monotonic_time(&start_time); - vector_foreach_slot(vecs->mpvec, mpp, i) { - struct pathgroup *pgp; - struct path *pp; - int j, k; - bool check_for_waiters = false; - /* maps can be rechecked, so this is not always 0 */ - int synced_count = mpp->synced_count; - - vector_foreach_slot (mpp->pg, pgp, j) { - vector_foreach_slot (pgp->paths, pp, k) { - if (!pp->mpp || pp->is_checked) - continue; - pp->is_checked = true; - rc = check_path(pp, ticks); - if (rc == CHECK_PATH_STARTED) - rc = update_path(vecs, pp, - start_time.tv_sec); - if (rc == CHECK_PATH_CHECKED) - (*num_paths_p)++; - if (++paths_checked % 128 == 0) - check_for_waiters = true; - /* - * mpp has been removed or resynced. Path may - * have been removed. - */ - if (VECTOR_SLOT(vecs->mpvec, i) != mpp || - synced_count != mpp->synced_count) { - i--; - goto next_mpp; - } - } - } -next_mpp: - if (check_for_waiters && + vector_foreach_slot(vecs->pathvec, pp, i) { + if (pp->is_checked != CHECK_PATH_UNCHECKED) + continue; + if (pp->mpp) + pp->is_checked = check_path(pp, ticks); + else + pp->is_checked = check_uninitialized_path(pp, ticks); + if (++paths_checked % 128 == 0 && (lock_has_waiters(&vecs->lock) || waiting_clients())) { get_monotonic_time(&end_time); timespecsub(&end_time, &start_time, &diff_time); if (diff_time.tv_sec > 0) - return CHECKER_RUNNING; + return CHECKER_CHECKING_PATHS; } } + return CHECKER_UPDATING_PATHS; +} + +static enum checker_state +update_paths(struct vectors *vecs, int *num_paths_p, time_t start_secs) +{ + unsigned int paths_checked = 0; + struct timespec diff_time, start_time, end_time; + struct path *pp; + int i, rc; + + get_monotonic_time(&start_time); + vector_foreach_slot(vecs->pathvec, pp, i) { - if (pp->mpp || pp->is_checked) + if (pp->is_checked != CHECK_PATH_STARTED) continue; - pp->is_checked = true; - - rc = check_uninitialized_path(pp, ticks); - if (rc == CHECK_PATH_STARTED) + if (pp->mpp) + rc = update_path(vecs, pp, start_secs); + else rc = update_uninitialized_path(vecs, pp); if (rc == CHECK_PATH_REMOVED) i--; - else if (rc == CHECK_PATH_CHECKED) - (*num_paths_p)++; + else { + pp->is_checked = rc; + if (rc == CHECK_PATH_CHECKED) + (*num_paths_p)++; + } if (++paths_checked % 128 == 0 && (lock_has_waiters(&vecs->lock) || waiting_clients())) { get_monotonic_time(&end_time); timespecsub(&end_time, &start_time, &diff_time); if (diff_time.tv_sec > 0) - return CHECKER_RUNNING; + return CHECKER_UPDATING_PATHS; } } return CHECKER_FINISHED; @@ -2948,10 +2935,14 @@ checkerloop (void *ap) vector_foreach_slot(vecs->mpvec, mpp, i) sync_mpp(vecs, mpp, ticks); vector_foreach_slot(vecs->pathvec, pp, i) - pp->is_checked = false; - checker_state = CHECKER_RUNNING; + pp->is_checked = CHECK_PATH_UNCHECKED; + checker_state = CHECKER_CHECKING_PATHS; } - checker_state = check_paths(vecs, ticks, &num_paths); + if (checker_state == CHECKER_CHECKING_PATHS) + checker_state = check_paths(vecs, ticks); + if (checker_state == CHECKER_UPDATING_PATHS) + checker_state = update_paths(vecs, &num_paths, + start_time.tv_sec); lock_cleanup_pop(vecs->lock); if (checker_state != CHECKER_FINISHED) { /* Yield to waiters */
Instead of checking and updating each path, the checkerloop now starts the checkers on all the paths in check_paths(), and then goes back and updates all the paths in update_paths(). Since the async checkers use an absolute time to wait for before returning PATH_PENDING, only one checker actually needs to be waited for in update_paths(). The rest will already have reached their timeout when update_path() is called for them. The check_paths() and update_paths() loop over the pathvec instead of looping through the multipath device paths to avoid having to restart checking of the device paths when the multipath device needs to be resynced while updating the paths. Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com> --- libmultipath/structs.h | 10 +++- multipathd/main.c | 105 +++++++++++++++++++---------------------- 2 files changed, 57 insertions(+), 58 deletions(-)