Message ID | 20240912214947.783819-15-bmarzins@redhat.com (mailing list archive) |
---|---|
State | Not Applicable, archived |
Delegated to: | Benjamin Marzinski |
Headers | show |
Series | Yet Another path checker refactor | expand |
On Thu, 2024-09-12 at 17:49 -0400, Benjamin Marzinski wrote: > Instead of updating the path priorities and possibly reloading the > multipath device when each path is updated, wait till all paths > have been updated, and then go through the multipath devices updating > the priorities once, reloading if necessary. > > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com> > --- > libmultipath/structs.h | 9 +++ > multipathd/main.c | 169 ++++++++++++++++++++++++++------------- > -- > 2 files changed, 118 insertions(+), 60 deletions(-) > > diff --git a/libmultipath/structs.h b/libmultipath/structs.h > index af8e31e9..1f531d30 100644 > --- a/libmultipath/structs.h > +++ b/libmultipath/structs.h > @@ -318,6 +318,7 @@ enum check_path_states { > CHECK_PATH_UNCHECKED, > CHECK_PATH_STARTED, > CHECK_PATH_CHECKED, > + CHECK_PATH_NEW_UP, > CHECK_PATH_SKIPPED, > CHECK_PATH_REMOVED, > }; > @@ -421,6 +422,13 @@ enum prflag_value { > PRFLAG_SET, > }; > > +enum prio_update_type { > + PRIO_UPDATE_NONE, > + PRIO_UPDATE_NORMAL, > + PRIO_UPDATE_NEW_PATH, > + PRIO_UPDATE_MARGINAL, > +}; > + > struct multipath { > char wwid[WWID_SIZE]; > char alias_old[WWID_SIZE]; > @@ -464,6 +472,7 @@ struct multipath { > int queue_mode; > unsigned int sync_tick; > int synced_count; > + enum prio_update_type prio_update; > uid_t uid; > gid_t gid; > mode_t mode; > diff --git a/multipathd/main.c b/multipathd/main.c > index 9519b6c5..3cda3c18 100644 > --- a/multipathd/main.c > +++ b/multipathd/main.c > @@ -1996,15 +1996,13 @@ mpvec_garbage_collector (struct vectors * > vecs) > * best pathgroup, and this is the first path in the pathgroup to > come back > * up, then switch to this pathgroup */ > static int > -followover_should_failback(struct path * pp) > +do_followover_should_failback(struct path * pp) > { > struct pathgroup * pgp; > struct path *pp1; > int i; > > - if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER || > - !pp->mpp->pg || !pp->pgindex || > - pp->pgindex != pp->mpp->bestpg) > + if (!pp->pgindex || pp->pgindex != pp->mpp->bestpg) > return 0; > > pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1); > @@ -2017,6 +2015,26 @@ followover_should_failback(struct path * pp) > return 1; > } > > +static int > +followover_should_failback(struct multipath *mpp) > +{ > + struct path *pp; > + struct pathgroup * pgp; > + int i, j; > + > + if (mpp->pgfailback != -FAILBACK_FOLLOWOVER || !mpp->pg) > + return 0; > + > + vector_foreach_slot (mpp->pg, pgp, i) { > + vector_foreach_slot (pgp->paths, pp, j) { > + if (pp->is_checked == CHECK_PATH_NEW_UP && > + do_followover_should_failback(pp)) > + return 1; > + } > + } > + return 0; > +} > + > static void > missing_uev_wait_tick(struct vectors *vecs) > { > @@ -2132,41 +2150,53 @@ partial_retrigger_tick(vector pathvec) > } > } > > -static int update_prio(struct path *pp, int force_refresh_all) > +static bool update_prio(struct multipath *mpp, bool refresh_all) > { > int oldpriority; > - struct path *pp1; > + struct path *pp; > struct pathgroup * pgp; > - int i, j, changed = 0; > + int i, j; > + bool changed = false; > + bool skipped_path = false; > struct config *conf; > > - oldpriority = pp->priority; > - if (pp->state != PATH_DOWN) { > - conf = get_multipath_config(); > - pthread_cleanup_push(put_multipath_config, conf); > - pathinfo(pp, conf, DI_PRIO); > - pthread_cleanup_pop(1); > + vector_foreach_slot (mpp->pg, pgp, i) { > + vector_foreach_slot (pgp->paths, pp, j) { > + if (pp->state == PATH_DOWN) > + continue; > + if (!refresh_all && > + pp->is_checked != CHECK_PATH_CHECKED) { > + skipped_path = true; > + continue; > + } Nit: My first thought here was that this would skip paths for which pp->is_checked == CHECK_PATH_NEW_UP. Then I realized that if there was a new path up, refresh_all would be set, which is not immediately obvious. Can you add a comment? Martin
On Thu, Oct 03, 2024 at 11:00:20PM +0200, Martin Wilck wrote: > On Thu, 2024-09-12 at 17:49 -0400, Benjamin Marzinski wrote: > > Instead of updating the path priorities and possibly reloading the > > multipath device when each path is updated, wait till all paths > > have been updated, and then go through the multipath devices updating > > the priorities once, reloading if necessary. > > > > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com> > > --- > > libmultipath/structs.h | 9 +++ > > multipathd/main.c | 169 ++++++++++++++++++++++++++------------- > > -- > > 2 files changed, 118 insertions(+), 60 deletions(-) > > > > diff --git a/libmultipath/structs.h b/libmultipath/structs.h > > index af8e31e9..1f531d30 100644 > > --- a/libmultipath/structs.h > > +++ b/libmultipath/structs.h > > @@ -318,6 +318,7 @@ enum check_path_states { > > CHECK_PATH_UNCHECKED, > > CHECK_PATH_STARTED, > > CHECK_PATH_CHECKED, > > + CHECK_PATH_NEW_UP, > > CHECK_PATH_SKIPPED, > > CHECK_PATH_REMOVED, > > }; > > @@ -421,6 +422,13 @@ enum prflag_value { > > PRFLAG_SET, > > }; > > > > +enum prio_update_type { > > + PRIO_UPDATE_NONE, > > + PRIO_UPDATE_NORMAL, > > + PRIO_UPDATE_NEW_PATH, > > + PRIO_UPDATE_MARGINAL, > > +}; > > + > > struct multipath { > > char wwid[WWID_SIZE]; > > char alias_old[WWID_SIZE]; > > @@ -464,6 +472,7 @@ struct multipath { > > int queue_mode; > > unsigned int sync_tick; > > int synced_count; > > + enum prio_update_type prio_update; > > uid_t uid; > > gid_t gid; > > mode_t mode; > > diff --git a/multipathd/main.c b/multipathd/main.c > > index 9519b6c5..3cda3c18 100644 > > --- a/multipathd/main.c > > +++ b/multipathd/main.c > > @@ -1996,15 +1996,13 @@ mpvec_garbage_collector (struct vectors * > > vecs) > > * best pathgroup, and this is the first path in the pathgroup to > > come back > > * up, then switch to this pathgroup */ > > static int > > -followover_should_failback(struct path * pp) > > +do_followover_should_failback(struct path * pp) > > { > > struct pathgroup * pgp; > > struct path *pp1; > > int i; > > > > - if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER || > > - !pp->mpp->pg || !pp->pgindex || > > - pp->pgindex != pp->mpp->bestpg) > > + if (!pp->pgindex || pp->pgindex != pp->mpp->bestpg) > > return 0; > > > > pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1); > > @@ -2017,6 +2015,26 @@ followover_should_failback(struct path * pp) > > return 1; > > } > > > > +static int > > +followover_should_failback(struct multipath *mpp) > > +{ > > + struct path *pp; > > + struct pathgroup * pgp; > > + int i, j; > > + > > + if (mpp->pgfailback != -FAILBACK_FOLLOWOVER || !mpp->pg) > > + return 0; > > + > > + vector_foreach_slot (mpp->pg, pgp, i) { > > + vector_foreach_slot (pgp->paths, pp, j) { > > + if (pp->is_checked == CHECK_PATH_NEW_UP && > > + do_followover_should_failback(pp)) > > + return 1; > > + } > > + } > > + return 0; > > +} > > + > > static void > > missing_uev_wait_tick(struct vectors *vecs) > > { > > @@ -2132,41 +2150,53 @@ partial_retrigger_tick(vector pathvec) > > } > > } > > > > -static int update_prio(struct path *pp, int force_refresh_all) > > +static bool update_prio(struct multipath *mpp, bool refresh_all) > > { > > int oldpriority; > > - struct path *pp1; > > + struct path *pp; > > struct pathgroup * pgp; > > - int i, j, changed = 0; > > + int i, j; > > + bool changed = false; > > + bool skipped_path = false; > > struct config *conf; > > > > - oldpriority = pp->priority; > > - if (pp->state != PATH_DOWN) { > > - conf = get_multipath_config(); > > - pthread_cleanup_push(put_multipath_config, conf); > > - pathinfo(pp, conf, DI_PRIO); > > - pthread_cleanup_pop(1); > > + vector_foreach_slot (mpp->pg, pgp, i) { > > + vector_foreach_slot (pgp->paths, pp, j) { > > + if (pp->state == PATH_DOWN) > > + continue; > > + if (!refresh_all && > > + pp->is_checked != CHECK_PATH_CHECKED) { > > + skipped_path = true; > > + continue; > > + } > > Nit: My first thought here was that this would skip paths for which > pp->is_checked == CHECK_PATH_NEW_UP. Then I realized that if there was > a new path up, refresh_all would be set, which is not immediately > obvious. Can you add a comment? Sure. > > Martin
diff --git a/libmultipath/structs.h b/libmultipath/structs.h index af8e31e9..1f531d30 100644 --- a/libmultipath/structs.h +++ b/libmultipath/structs.h @@ -318,6 +318,7 @@ enum check_path_states { CHECK_PATH_UNCHECKED, CHECK_PATH_STARTED, CHECK_PATH_CHECKED, + CHECK_PATH_NEW_UP, CHECK_PATH_SKIPPED, CHECK_PATH_REMOVED, }; @@ -421,6 +422,13 @@ enum prflag_value { PRFLAG_SET, }; +enum prio_update_type { + PRIO_UPDATE_NONE, + PRIO_UPDATE_NORMAL, + PRIO_UPDATE_NEW_PATH, + PRIO_UPDATE_MARGINAL, +}; + struct multipath { char wwid[WWID_SIZE]; char alias_old[WWID_SIZE]; @@ -464,6 +472,7 @@ struct multipath { int queue_mode; unsigned int sync_tick; int synced_count; + enum prio_update_type prio_update; uid_t uid; gid_t gid; mode_t mode; diff --git a/multipathd/main.c b/multipathd/main.c index 9519b6c5..3cda3c18 100644 --- a/multipathd/main.c +++ b/multipathd/main.c @@ -1996,15 +1996,13 @@ mpvec_garbage_collector (struct vectors * vecs) * best pathgroup, and this is the first path in the pathgroup to come back * up, then switch to this pathgroup */ static int -followover_should_failback(struct path * pp) +do_followover_should_failback(struct path * pp) { struct pathgroup * pgp; struct path *pp1; int i; - if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER || - !pp->mpp->pg || !pp->pgindex || - pp->pgindex != pp->mpp->bestpg) + if (!pp->pgindex || pp->pgindex != pp->mpp->bestpg) return 0; pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1); @@ -2017,6 +2015,26 @@ followover_should_failback(struct path * pp) return 1; } +static int +followover_should_failback(struct multipath *mpp) +{ + struct path *pp; + struct pathgroup * pgp; + int i, j; + + if (mpp->pgfailback != -FAILBACK_FOLLOWOVER || !mpp->pg) + return 0; + + vector_foreach_slot (mpp->pg, pgp, i) { + vector_foreach_slot (pgp->paths, pp, j) { + if (pp->is_checked == CHECK_PATH_NEW_UP && + do_followover_should_failback(pp)) + return 1; + } + } + return 0; +} + static void missing_uev_wait_tick(struct vectors *vecs) { @@ -2132,41 +2150,53 @@ partial_retrigger_tick(vector pathvec) } } -static int update_prio(struct path *pp, int force_refresh_all) +static bool update_prio(struct multipath *mpp, bool refresh_all) { int oldpriority; - struct path *pp1; + struct path *pp; struct pathgroup * pgp; - int i, j, changed = 0; + int i, j; + bool changed = false; + bool skipped_path = false; struct config *conf; - oldpriority = pp->priority; - if (pp->state != PATH_DOWN) { - conf = get_multipath_config(); - pthread_cleanup_push(put_multipath_config, conf); - pathinfo(pp, conf, DI_PRIO); - pthread_cleanup_pop(1); + vector_foreach_slot (mpp->pg, pgp, i) { + vector_foreach_slot (pgp->paths, pp, j) { + if (pp->state == PATH_DOWN) + continue; + if (!refresh_all && + pp->is_checked != CHECK_PATH_CHECKED) { + skipped_path = true; + continue; + } + oldpriority = pp->priority; + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + pathinfo(pp, conf, DI_PRIO); + pthread_cleanup_pop(1); + if (pp->priority != oldpriority) + changed = true; + } } - - if (pp->priority != oldpriority) - changed = 1; - else if (!force_refresh_all) - return 0; - - vector_foreach_slot (pp->mpp->pg, pgp, i) { - vector_foreach_slot (pgp->paths, pp1, j) { - if (pp1 == pp || pp1->state == PATH_DOWN) + if (!changed || !skipped_path) + return changed; + /* + * If a path changed priorities, refresh the priorities of any + * paths we skipped + */ + vector_foreach_slot (mpp->pg, pgp, i) { + vector_foreach_slot (pgp->paths, pp, j) { + if (pp->state == PATH_DOWN) + continue; + if (pp->is_checked == CHECK_PATH_CHECKED) continue; - oldpriority = pp1->priority; conf = get_multipath_config(); pthread_cleanup_push(put_multipath_config, conf); - pathinfo(pp1, conf, DI_PRIO); + pathinfo(pp, conf, DI_PRIO); pthread_cleanup_pop(1); - if (pp1->priority != oldpriority) - changed = 1; } } - return changed; + return true; } static int reload_map(struct vectors *vecs, struct multipath *mpp, @@ -2393,14 +2423,12 @@ static int update_path_state (struct vectors * vecs, struct path * pp) { int newstate; - int new_path_up = 0; int chkr_new_path_up = 0; int disable_reinstate = 0; int oldchkrstate = pp->chkrstate; unsigned int checkint, max_checkint; struct config *conf; - int marginal_pathgroups, marginal_changed = 0; - bool need_reload; + int marginal_pathgroups; conf = get_multipath_config(); checkint = conf->checkint; @@ -2462,7 +2490,7 @@ update_path_state (struct vectors * vecs, struct path * pp) } if (!pp->marginal) { pp->marginal = 1; - marginal_changed = 1; + pp->mpp->prio_update = PRIO_UPDATE_MARGINAL; } } else { if (pp->marginal || pp->state == PATH_DELAYED) @@ -2470,7 +2498,7 @@ update_path_state (struct vectors * vecs, struct path * pp) pp->dev); if (marginal_pathgroups && pp->marginal) { pp->marginal = 0; - marginal_changed = 1; + pp->mpp->prio_update = PRIO_UPDATE_MARGINAL; } } } @@ -2537,7 +2565,8 @@ update_path_state (struct vectors * vecs, struct path * pp) */ if (!disable_reinstate) reinstate_path(pp); - new_path_up = 1; + if (pp->mpp->prio_update != PRIO_UPDATE_MARGINAL) + pp->mpp->prio_update = PRIO_UPDATE_NEW_PATH; if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST) chkr_new_path_up = 1; @@ -2588,38 +2617,48 @@ update_path_state (struct vectors * vecs, struct path * pp) LOG_MSG(2, pp); } } - + if (pp->mpp->prio_update == PRIO_UPDATE_NONE && + (newstate == PATH_UP || newstate == PATH_GHOST)) + pp->mpp->prio_update = PRIO_UPDATE_NORMAL; pp->state = newstate; + return chkr_new_path_up ? CHECK_PATH_NEW_UP : CHECK_PATH_CHECKED; +} - if (pp->mpp->wait_for_udev) - return CHECK_PATH_CHECKED; - /* - * path prio refreshing - */ - condlog(4, "path prio refresh"); - - if (marginal_changed) { - update_prio(pp, 1); - reload_and_sync_map(pp->mpp, vecs); - } else if (update_prio(pp, new_path_up) && - pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio && - pp->mpp->pgfailback == -FAILBACK_IMMEDIATE) { +static int +update_mpp_prio(struct vectors *vecs, struct multipath *mpp) +{ + bool need_reload, changed; + enum prio_update_type prio_update = mpp->prio_update; + mpp->prio_update = PRIO_UPDATE_NONE; + + if (mpp->wait_for_udev || prio_update == PRIO_UPDATE_NONE) + return 0; + condlog(4, "prio refresh"); + + changed = update_prio(mpp, prio_update != PRIO_UPDATE_NORMAL); + if (prio_update == PRIO_UPDATE_MARGINAL) + return reload_and_sync_map(mpp, vecs); + if (changed && mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio && + mpp->pgfailback == -FAILBACK_IMMEDIATE) { condlog(2, "%s: path priorities changed. reloading", - pp->mpp->alias); - reload_and_sync_map(pp->mpp, vecs); - } else if (need_switch_pathgroup(pp->mpp, &need_reload)) { - if (pp->mpp->pgfailback > 0 && - (new_path_up || pp->mpp->failback_tick <= 0)) - pp->mpp->failback_tick = pp->mpp->pgfailback + 1; - else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE || - (chkr_new_path_up && followover_should_failback(pp))) { + mpp->alias); + return reload_and_sync_map(mpp, vecs); + } + if (need_switch_pathgroup(mpp, &need_reload)) { + if (mpp->pgfailback > 0 && + (prio_update == PRIO_UPDATE_NEW_PATH || + mpp->failback_tick <= 0)) + mpp->failback_tick = mpp->pgfailback + 1; + else if (mpp->pgfailback == -FAILBACK_IMMEDIATE || + (prio_update == PRIO_UPDATE_NEW_PATH && + followover_should_failback(mpp))) { if (need_reload) - reload_and_sync_map(pp->mpp, vecs); + return reload_and_sync_map(mpp, vecs); else - switch_pathgroup(pp->mpp); + switch_pathgroup(mpp); } } - return CHECK_PATH_CHECKED; + return 0; } static int @@ -2859,7 +2898,7 @@ update_paths(struct vectors *vecs, int *num_paths_p, time_t start_secs) i--; else { pp->is_checked = rc; - if (rc == CHECK_PATH_CHECKED) + if (rc == CHECK_PATH_CHECKED || rc == CHECK_PATH_NEW_UP) (*num_paths_p)++; } if (++paths_checked % 128 == 0 && @@ -2932,8 +2971,10 @@ checkerloop (void *ap) vector_foreach_slot(vecs->mpvec, mpp, i) mpp->synced_count = 0; if (checker_state == CHECKER_STARTING) { - vector_foreach_slot(vecs->mpvec, mpp, i) + vector_foreach_slot(vecs->mpvec, mpp, i) { sync_mpp(vecs, mpp, ticks); + mpp->prio_update = PRIO_UPDATE_NONE; + } vector_foreach_slot(vecs->pathvec, pp, i) pp->is_checked = CHECK_PATH_UNCHECKED; checker_state = CHECKER_CHECKING_PATHS; @@ -2943,6 +2984,14 @@ checkerloop (void *ap) if (checker_state == CHECKER_UPDATING_PATHS) checker_state = update_paths(vecs, &num_paths, start_time.tv_sec); + if (checker_state == CHECKER_FINISHED) { + vector_foreach_slot(vecs->mpvec, mpp, i) { + if (update_mpp_prio(vecs, mpp) == 2) { + /* multipath device deleted */ + i--; + } + } + } lock_cleanup_pop(vecs->lock); if (checker_state != CHECKER_FINISHED) { /* Yield to waiters */
Instead of updating the path priorities and possibly reloading the multipath device when each path is updated, wait till all paths have been updated, and then go through the multipath devices updating the priorities once, reloading if necessary. Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com> --- libmultipath/structs.h | 9 +++ multipathd/main.c | 169 ++++++++++++++++++++++++++--------------- 2 files changed, 118 insertions(+), 60 deletions(-)