diff mbox series

multipathd: give up "add missing path" after multiple failures

Message ID 20210329203935.19691-1-mwilck@suse.com (mailing list archive)
State Not Applicable, archived
Delegated to: christophe varoqui
Headers show
Series multipathd: give up "add missing path" after multiple failures | expand

Commit Message

Martin Wilck March 29, 2021, 8:39 p.m. UTC
From: Martin Wilck <mwilck@suse.com>

After b7aae60 ("multipathd: improve "add missing path" handling"),
a path that failed to initialize after multiple udev retriggers
would still be checked in check_path(). However, if a path is up,
has been checked more than once, the failback WWID methods have
been tried, and still there is no usable WWID, we may conclude
that something is fishy and we shouldn't keep trying.

Without this patch, totally WWID-less devices (seen e.g. on ESXi)
will cause a "add missing path" message in every checker iteration.

Fixes: b7aae60 ("multipathd: improve "add missing path" handling")
Signed-off-by: Martin Wilck <mwilck@suse.com>
---
 libmultipath/discovery.c | 16 ++++++++++++++++
 multipathd/main.c        |  6 ++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

Comments

Benjamin Marzinski March 30, 2021, 7:06 a.m. UTC | #1
On Mon, Mar 29, 2021 at 10:39:35PM +0200, mwilck@suse.com wrote:
> From: Martin Wilck <mwilck@suse.com>
> 
> After b7aae60 ("multipathd: improve "add missing path" handling"),
> a path that failed to initialize after multiple udev retriggers
> would still be checked in check_path(). However, if a path is up,
> has been checked more than once, the failback WWID methods have
> been tried, and still there is no usable WWID, we may conclude
> that something is fishy and we shouldn't keep trying.
> 
> Without this patch, totally WWID-less devices (seen e.g. on ESXi)
> will cause a "add missing path" message in every checker iteration.
> 
> Fixes: b7aae60 ("multipathd: improve "add missing path" handling")
> Signed-off-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
> ---
>  libmultipath/discovery.c | 16 ++++++++++++++++
>  multipathd/main.c        |  6 ++++--
>  2 files changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
> index f804414..ec99a7a 100644
> --- a/libmultipath/discovery.c
> +++ b/libmultipath/discovery.c
> @@ -2370,6 +2370,22 @@ int pathinfo(struct path *pp, struct config *conf, int mask)
>  			if (pp->initialized != INIT_FAILED) {
>  				pp->initialized = INIT_MISSING_UDEV;
>  				pp->tick = conf->retrigger_delay;
> +			} else if (pp->retriggers >= conf->retrigger_tries &&
> +				   (pp->state == PATH_UP || pp->state == PATH_GHOST)) {
> +				/*
> +				 * We have failed to read udev info for this path
> +				 * repeatedly. We used the fallback in get_uid()
> +				 * if there was any, and still got no WWID,
> +				 * although the path is allegedly up.
> +				 * It's likely that this path is not fit for
> +				 * multipath use.
> +				 */
> +				char buf[16];
> +
> +				snprint_path(buf, sizeof(buf), "%T", pp, 0);
> +				condlog(1, "%s: no WWID in state \"%s\", giving up",
> +					pp->dev, buf);
> +				return PATHINFO_SKIPPED;
>  			}
>  			return PATHINFO_OK;
>  		}
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 3579bad..102946b 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -2218,13 +2218,13 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
>  				ev_add_path(pp, vecs, 1);
>  				pp->tick = 1;
>  			} else {
> +				if (ret == PATHINFO_SKIPPED)
> +					return -1;
>  				/*
>  				 * We failed multiple times to initialize this
>  				 * path properly. Don't re-check too often.
>  				 */
>  				pp->checkint = max_checkint;
> -				if (ret == PATHINFO_SKIPPED)
> -					return -1;
>  			}
>  		}
>  		return 0;
> @@ -2504,6 +2504,8 @@ checkerloop (void *ap)
>  		vector_foreach_slot (vecs->pathvec, pp, i) {
>  			rc = check_path(vecs, pp, ticks);
>  			if (rc < 0) {
> +				condlog(1, "%s: check_path() failed, removing",
> +					pp->dev);
>  				vector_del_slot(vecs->pathvec, i);
>  				free_path(pp);
>  				i--;
> -- 
> 2.30.1

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
diff mbox series

Patch

diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
index f804414..ec99a7a 100644
--- a/libmultipath/discovery.c
+++ b/libmultipath/discovery.c
@@ -2370,6 +2370,22 @@  int pathinfo(struct path *pp, struct config *conf, int mask)
 			if (pp->initialized != INIT_FAILED) {
 				pp->initialized = INIT_MISSING_UDEV;
 				pp->tick = conf->retrigger_delay;
+			} else if (pp->retriggers >= conf->retrigger_tries &&
+				   (pp->state == PATH_UP || pp->state == PATH_GHOST)) {
+				/*
+				 * We have failed to read udev info for this path
+				 * repeatedly. We used the fallback in get_uid()
+				 * if there was any, and still got no WWID,
+				 * although the path is allegedly up.
+				 * It's likely that this path is not fit for
+				 * multipath use.
+				 */
+				char buf[16];
+
+				snprint_path(buf, sizeof(buf), "%T", pp, 0);
+				condlog(1, "%s: no WWID in state \"%s\", giving up",
+					pp->dev, buf);
+				return PATHINFO_SKIPPED;
 			}
 			return PATHINFO_OK;
 		}
diff --git a/multipathd/main.c b/multipathd/main.c
index 3579bad..102946b 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -2218,13 +2218,13 @@  check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
 				ev_add_path(pp, vecs, 1);
 				pp->tick = 1;
 			} else {
+				if (ret == PATHINFO_SKIPPED)
+					return -1;
 				/*
 				 * We failed multiple times to initialize this
 				 * path properly. Don't re-check too often.
 				 */
 				pp->checkint = max_checkint;
-				if (ret == PATHINFO_SKIPPED)
-					return -1;
 			}
 		}
 		return 0;
@@ -2504,6 +2504,8 @@  checkerloop (void *ap)
 		vector_foreach_slot (vecs->pathvec, pp, i) {
 			rc = check_path(vecs, pp, ticks);
 			if (rc < 0) {
+				condlog(1, "%s: check_path() failed, removing",
+					pp->dev);
 				vector_del_slot(vecs->pathvec, i);
 				free_path(pp);
 				i--;