diff mbox series

[v2,6/6] multipathd: cleanup logging for marginal paths

Message ID 1610677227-23550-7-git-send-email-bmarzins@redhat.com (mailing list archive)
State Not Applicable, archived
Delegated to: Mike Snitzer
Headers show
Series Multipath io_err_stat fixes | expand

Commit Message

Benjamin Marzinski Jan. 15, 2021, 2:20 a.m. UTC
io_err_stat logged at level 2 whenever it enqueued a path to check,
which could happen multiple times while a path was marginal.  On the
other hand if marginal_pathgroups wasn't set, multipathd didn't log when
paths were set to marginal. Now io_err_stat only logs at level 2 when
something unexpected happens, but multipathd will always log when a
path switches its marginal state.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
---
 libmultipath/io_err_stat.c |  7 +++----
 multipathd/main.c          | 25 ++++++++++++++-----------
 2 files changed, 17 insertions(+), 15 deletions(-)

Comments

Martin Wilck Jan. 15, 2021, 1:55 p.m. UTC | #1
On Thu, 2021-01-14 at 20:20 -0600, Benjamin Marzinski wrote:
> io_err_stat logged at level 2 whenever it enqueued a path to check,
> which could happen multiple times while a path was marginal.  On the
> other hand if marginal_pathgroups wasn't set, multipathd didn't log
> when
> paths were set to marginal. Now io_err_stat only logs at level 2 when
> something unexpected happens, but multipathd will always log when a
> path switches its marginal state.
> 
> Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
> ---
>  libmultipath/io_err_stat.c |  7 +++----
>  multipathd/main.c          | 25 ++++++++++++++-----------
>  2 files changed, 17 insertions(+), 15 deletions(-)
> 
> diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
> index bf78a236..abdd0b4f 100644
> --- a/libmultipath/io_err_stat.c
> +++ b/libmultipath/io_err_stat.c
> @@ -252,7 +252,7 @@ static int enqueue_io_err_stat_by_path(struct
> path *path)
>         vector_set_slot(io_err_pathvec, p);
>         pthread_mutex_unlock(&io_err_pathvec_lock);
>  
> -       io_err_stat_log(2, "%s: enqueue path %s to check",
> +       io_err_stat_log(3, "%s: enqueue path %s to check",
>                         path->mpp->alias, path->dev);
>         return 0;
>  
> @@ -343,7 +343,7 @@ int need_io_err_check(struct path *pp)
>         if (uatomic_read(&io_err_thread_running) == 0)
>                 return 0;
>         if (count_active_paths(pp->mpp) <= 0) {
> -               io_err_stat_log(2, "%s: recover path early", pp-
> >dev);
> +               io_err_stat_log(2, "%s: no paths. recovering early",
> pp->dev);
>                 goto recover;
>         }
>         if (pp->io_err_pathfail_cnt != PATH_IO_ERR_WAITING_TO_CHECK)
> @@ -361,8 +361,7 @@ int need_io_err_check(struct path *pp)
>                  * Or else,  return 1 to set path state to PATH_SHAKY
>                  */
>                 if (r == 1) {
> -                       io_err_stat_log(3, "%s: enqueue fails, to
> recover",
> -                                       pp->dev);
> +                       io_err_stat_log(2, "%s: enqueue failed.
> recovering early", pp->dev);
>                         goto recover;
>                 } else
>                         pp->io_err_pathfail_cnt =
> PATH_IO_ERR_IN_CHECKING;
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 92c45d44..99a89a69 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -2132,8 +2132,8 @@ check_path (struct vectors * vecs, struct path
> * pp, unsigned int ticks)
>                 pathinfo(pp, conf, 0);
>                 pthread_cleanup_pop(1);
>                 return 1;
> -       } else if ((newstate != PATH_UP && newstate != PATH_GHOST) &&
> -                       (pp->state == PATH_DELAYED)) {
> +       } else if ((newstate != PATH_UP && newstate != PATH_GHOST &&
> +                   newstate != PATH_PENDING) && (pp->state ==
> PATH_DELAYED)) {

I think this is correct, but it needs to  be mentioned in the commit
message (or go into a separate patch).

Regards,
Martin
Benjamin Marzinski Jan. 18, 2021, 8:32 p.m. UTC | #2
On Fri, Jan 15, 2021 at 01:55:09PM +0000, Martin Wilck wrote:
> On Thu, 2021-01-14 at 20:20 -0600, Benjamin Marzinski wrote:
> > io_err_stat logged at level 2 whenever it enqueued a path to check,
> > which could happen multiple times while a path was marginal.  On the
> > other hand if marginal_pathgroups wasn't set, multipathd didn't log
> > when
> > paths were set to marginal. Now io_err_stat only logs at level 2 when
> > something unexpected happens, but multipathd will always log when a
> > path switches its marginal state.
> > 
> > Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>

> > diff --git a/multipathd/main.c b/multipathd/main.c
> > index 92c45d44..99a89a69 100644
> > --- a/multipathd/main.c
> > +++ b/multipathd/main.c
> > @@ -2132,8 +2132,8 @@ check_path (struct vectors * vecs, struct path
> > * pp, unsigned int ticks)
> >                 pathinfo(pp, conf, 0);
> >                 pthread_cleanup_pop(1);
> >                 return 1;
> > -       } else if ((newstate != PATH_UP && newstate != PATH_GHOST) &&
> > -                       (pp->state == PATH_DELAYED)) {
> > +       } else if ((newstate != PATH_UP && newstate != PATH_GHOST &&
> > +                   newstate != PATH_PENDING) && (pp->state ==
> > PATH_DELAYED)) {
> 
> I think this is correct, but it needs to  be mentioned in the commit
> message (or go into a separate patch).

It needs to go in this patch. Without it, devices in the delayed state
can change to pending, and then back again. The code logs another
message every time this happens. But I can add an explanation to the
commit message.

I'll make a v3 release with just this patch.

-Ben
 
> Regards,
> Martin
> 
> 
> -- 
> Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
> SUSE Software Solutions Germany GmbH
> HRB 36809, AG Nürnberg GF: Felix Imendörffer
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
diff mbox series

Patch

diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
index bf78a236..abdd0b4f 100644
--- a/libmultipath/io_err_stat.c
+++ b/libmultipath/io_err_stat.c
@@ -252,7 +252,7 @@  static int enqueue_io_err_stat_by_path(struct path *path)
 	vector_set_slot(io_err_pathvec, p);
 	pthread_mutex_unlock(&io_err_pathvec_lock);
 
-	io_err_stat_log(2, "%s: enqueue path %s to check",
+	io_err_stat_log(3, "%s: enqueue path %s to check",
 			path->mpp->alias, path->dev);
 	return 0;
 
@@ -343,7 +343,7 @@  int need_io_err_check(struct path *pp)
 	if (uatomic_read(&io_err_thread_running) == 0)
 		return 0;
 	if (count_active_paths(pp->mpp) <= 0) {
-		io_err_stat_log(2, "%s: recover path early", pp->dev);
+		io_err_stat_log(2, "%s: no paths. recovering early", pp->dev);
 		goto recover;
 	}
 	if (pp->io_err_pathfail_cnt != PATH_IO_ERR_WAITING_TO_CHECK)
@@ -361,8 +361,7 @@  int need_io_err_check(struct path *pp)
 		 * Or else,  return 1 to set path state to PATH_SHAKY
 		 */
 		if (r == 1) {
-			io_err_stat_log(3, "%s: enqueue fails, to recover",
-					pp->dev);
+			io_err_stat_log(2, "%s: enqueue failed. recovering early", pp->dev);
 			goto recover;
 		} else
 			pp->io_err_pathfail_cnt = PATH_IO_ERR_IN_CHECKING;
diff --git a/multipathd/main.c b/multipathd/main.c
index 92c45d44..99a89a69 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -2132,8 +2132,8 @@  check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 		pathinfo(pp, conf, 0);
 		pthread_cleanup_pop(1);
 		return 1;
-	} else if ((newstate != PATH_UP && newstate != PATH_GHOST) &&
-			(pp->state == PATH_DELAYED)) {
+	} else if ((newstate != PATH_UP && newstate != PATH_GHOST &&
+		    newstate != PATH_PENDING) && (pp->state == PATH_DELAYED)) {
 		/* If path state become failed again cancel path delay state */
 		pp->state = newstate;
 		/*
@@ -2200,8 +2200,9 @@  check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 	if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
 	    (san_path_check_enabled(pp->mpp) ||
 	     marginal_path_check_enabled(pp->mpp))) {
-		int was_marginal = pp->marginal;
 		if (should_skip_path(pp)) {
+			if (!pp->marginal && pp->state != PATH_DELAYED)
+				condlog(2, "%s: path is now marginal", pp->dev);
 			if (!marginal_pathgroups) {
 				if (marginal_path_check_enabled(pp->mpp))
 					/* to reschedule as soon as possible,
@@ -2211,13 +2212,18 @@  check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 				pp->state = PATH_DELAYED;
 				return 1;
 			}
-			if (!was_marginal) {
+			if (!pp->marginal) {
 				pp->marginal = 1;
 				marginal_changed = 1;
 			}
-		} else if (marginal_pathgroups && was_marginal) {
-			pp->marginal = 0;
-			marginal_changed = 1;
+		} else {
+			if (pp->marginal || pp->state == PATH_DELAYED)
+				condlog(2, "%s: path is no longer marginal",
+					pp->dev);
+			if (marginal_pathgroups && pp->marginal) {
+				pp->marginal = 0;
+				marginal_changed = 1;
+			}
 		}
 	}
 
@@ -2343,11 +2349,8 @@  check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 	 */
 	condlog(4, "path prio refresh");
 
-	if (marginal_changed) {
-		condlog(2, "%s: path is %s marginal", pp->dev,
-			(pp->marginal)? "now" : "no longer");
+	if (marginal_changed)
 		reload_and_sync_map(pp->mpp, vecs, 1);
-	}
 	else if (update_prio(pp, new_path_up) &&
 	    (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
 	     pp->mpp->pgfailback == -FAILBACK_IMMEDIATE) {