diff mbox

[v4,10/20] libmultipath: indicate wwid failure in dm_addmap_create()

Message ID 20180404161627.6244-11-mwilck@suse.com (mailing list archive)
State Not Applicable, archived
Delegated to: christophe varoqui
Headers show

Commit Message

Martin Wilck April 4, 2018, 4:16 p.m. UTC
dm_addmap_create() is where we actually try to set up a new
multipath map. Depending on the result, mark the wwid as
failed (or not), and re-trigger an uevent if necessary.
If a path changes from multipath to non-multipath, use an "add"
event to make sure LVM2 rules pick it up. Increase log level
of this event to 3.

Signed-off-by: Martin Wilck <mwilck@suse.com>
---
 libmultipath/configure.c | 37 ++++++++++++++++++++++++++++---------
 libmultipath/configure.h |  2 +-
 libmultipath/devmapper.c |  9 ++++++++-
 libmultipath/structs.h   |  1 +
 multipathd/main.c        |  2 +-
 5 files changed, 39 insertions(+), 12 deletions(-)

Comments

Benjamin Marzinski April 12, 2018, 6:33 p.m. UTC | #1
On Wed, Apr 04, 2018 at 06:16:17PM +0200, Martin Wilck wrote:
> dm_addmap_create() is where we actually try to set up a new
> multipath map. Depending on the result, mark the wwid as
> failed (or not), and re-trigger an uevent if necessary.
> If a path changes from multipath to non-multipath, use an "add"
> event to make sure LVM2 rules pick it up. Increase log level
> of this event to 3.
> 

By only looking at domap, we will miss instances of multipathd failing
to create maps earlier in the process. This isn't necessarily wrong. It
just means that we can't rely on checking
/dev/shm/multipath/failed_wwids to definitively tell us whether
multipathd has tried and failed to create the device.

Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
> Signed-off-by: Martin Wilck <mwilck@suse.com>
> ---
>  libmultipath/configure.c | 37 ++++++++++++++++++++++++++++---------
>  libmultipath/configure.h |  2 +-
>  libmultipath/devmapper.c |  9 ++++++++-
>  libmultipath/structs.h   |  1 +
>  multipathd/main.c        |  2 +-
>  5 files changed, 39 insertions(+), 12 deletions(-)
> 
> diff --git a/libmultipath/configure.c b/libmultipath/configure.c
> index c1a50e4..9aa3d21 100644
> --- a/libmultipath/configure.c
> +++ b/libmultipath/configure.c
> @@ -445,11 +445,18 @@ trigger_udev_change(const struct multipath *mpp)
>  }
>  
>  void
> -trigger_paths_udev_change(const struct multipath *mpp)
> +trigger_paths_udev_change(struct multipath *mpp, bool is_mpath)
>  {
>  	struct pathgroup * pgp;
>  	struct path * pp;
>  	int i, j;
> +	/*
> +	 * If a path changes from multipath to non-multipath, we must
> +	 * synthesize an artificial "add" event, otherwise the LVM2 rules
> +	 * (69-lvm2-lvmetad.rules) won't pick it up. Otherwise, we'd just
> +	 * irritate ourselves with an "add", so use "change".
> +	 */
> +	const char *action = is_mpath ? "change" : "add";
>  
>  	if (!mpp || !mpp->pg)
>  		return;
> @@ -468,14 +475,21 @@ trigger_paths_udev_change(const struct multipath *mpp)
>  			 */
>  			env = udev_device_get_property_value(
>  				pp->udev, "DM_MULTIPATH_DEVICE_PATH");
> -			if (env != NULL && !strcmp(env, "1"))
> -					continue;
>  
> -			condlog(4, "triggering change uevent for %s", pp->dev);
> -			sysfs_attr_set_value(pp->udev, "uevent", "change",
> -					     strlen("change"));
> +			if (is_mpath && env != NULL && !strcmp(env, "1"))
> +				continue;
> +			else if (!is_mpath &&
> +				   (env == NULL || !strcmp(env, "0")))
> +				continue;
> +
> +			condlog(3, "triggering %s uevent for %s (is %smultipath member)",
> +				action, pp->dev, is_mpath ? "" : "no ");
> +			sysfs_attr_set_value(pp->udev, "uevent",
> +					     action, strlen(action));
>  		}
>  	}
> +
> +	mpp->needs_paths_uevent = 0;
>  }
>  
>  static int
> @@ -876,8 +890,10 @@ int domap(struct multipath *mpp, char *params, int is_daemon)
>  		 * succeeded
>  		 */
>  		mpp->force_udev_reload = 0;
> -		if (mpp->action == ACT_CREATE && remember_wwid(mpp->wwid) == 1)
> -			trigger_paths_udev_change(mpp);
> +		if (mpp->action == ACT_CREATE &&
> +		    (remember_wwid(mpp->wwid) == 1 ||
> +		     mpp->needs_paths_uevent))
> +			trigger_paths_udev_change(mpp, true);
>  		if (!is_daemon) {
>  			/* multipath client mode */
>  			dm_switchgroup(mpp->alias, mpp->bestpg);
> @@ -902,7 +918,10 @@ int domap(struct multipath *mpp, char *params, int is_daemon)
>  		}
>  		dm_setgeometry(mpp);
>  		return DOMAP_OK;
> -	}
> +	} else if (r == DOMAP_FAIL && mpp->action == ACT_CREATE &&
> +		   mpp->needs_paths_uevent)
> +		trigger_paths_udev_change(mpp, false);
> +
>  	return DOMAP_FAIL;
>  }
>  
> diff --git a/libmultipath/configure.h b/libmultipath/configure.h
> index 545cbc2..8b56d33 100644
> --- a/libmultipath/configure.h
> +++ b/libmultipath/configure.h
> @@ -37,4 +37,4 @@ int get_refwwid (enum mpath_cmds cmd, char * dev, enum devtypes dev_type,
>  		 vector pathvec, char **wwid);
>  int reload_map(struct vectors *vecs, struct multipath *mpp, int refresh, int is_daemon);
>  struct udev_device *get_udev_device(const char *dev, enum devtypes dev_type);
> -void trigger_paths_udev_change(const struct multipath *mpp);
> +void trigger_paths_udev_change(struct multipath *mpp, bool is_mpath);
> diff --git a/libmultipath/devmapper.c b/libmultipath/devmapper.c
> index 2a92105..f2befad 100644
> --- a/libmultipath/devmapper.c
> +++ b/libmultipath/devmapper.c
> @@ -22,6 +22,7 @@
>  #include "devmapper.h"
>  #include "sysfs.h"
>  #include "config.h"
> +#include "wwids.h"
>  
>  #include "log_pthread.h"
>  #include <sys/types.h>
> @@ -415,8 +416,12 @@ int dm_addmap_create (struct multipath *mpp, char * params)
>  		int err;
>  
>  		if (dm_addmap(DM_DEVICE_CREATE, TGT_MPATH, mpp, params, ro,
> -			      udev_flags))
> +			      udev_flags)) {
> +			if (unmark_failed_wwid(mpp->wwid) ==
> +			    WWID_FAILED_CHANGED)
> +				mpp->needs_paths_uevent = 1;
>  			return 1;
> +		}
>  		/*
>  		 * DM_DEVICE_CREATE is actually DM_DEV_CREATE + DM_TABLE_LOAD.
>  		 * Failing the second part leaves an empty map. Clean it up.
> @@ -432,6 +437,8 @@ int dm_addmap_create (struct multipath *mpp, char * params)
>  			break;
>  		}
>  	}
> +	if (mark_failed_wwid(mpp->wwid) == WWID_FAILED_CHANGED)
> +		mpp->needs_paths_uevent = 1;
>  	return 0;
>  }
>  
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index c43f2c3..1d3a34f 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -322,6 +322,7 @@ struct multipath {
>  	int max_sectors_kb;
>  	int force_readonly;
>  	int force_udev_reload;
> +	int needs_paths_uevent;
>  	int ghost_delay;
>  	int ghost_delay_tick;
>  	unsigned int dev_loss;
> diff --git a/multipathd/main.c b/multipathd/main.c
> index e6f4e77..7eeb743 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -2325,7 +2325,7 @@ configure (struct vectors * vecs)
>  	sync_maps_state(mpvec);
>  	vector_foreach_slot(mpvec, mpp, i){
>  		if (remember_wwid(mpp->wwid) == 1)
> -			trigger_paths_udev_change(mpp);
> +			trigger_paths_udev_change(mpp, true);
>  		update_map_pr(mpp);
>  	}
>  
> -- 
> 2.16.1

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Martin Wilck April 12, 2018, 8:16 p.m. UTC | #2
On Thu, 2018-04-12 at 13:33 -0500, Benjamin Marzinski wrote:
> On Wed, Apr 04, 2018 at 06:16:17PM +0200, Martin Wilck wrote:
> > dm_addmap_create() is where we actually try to set up a new
> > multipath map. Depending on the result, mark the wwid as
> > failed (or not), and re-trigger an uevent if necessary.
> > If a path changes from multipath to non-multipath, use an "add"
> > event to make sure LVM2 rules pick it up. Increase log level
> > of this event to 3.
> > 
> 
> By only looking at domap, we will miss instances of multipathd
> failing
> to create maps earlier in the process. This isn't necessarily wrong.
> It
> just means that we can't rely on checking
> /dev/shm/multipath/failed_wwids to definitively tell us whether
> multipathd has tried and failed to create the device.

Sorry, I can't follow you. Where else except in the 
domap->dm_addmap_create->dm_addmap() code path do we create maps?
I'm feeling stupid, I really can't see it.

Martin
Martin Wilck April 12, 2018, 8:22 p.m. UTC | #3
On Thu, 2018-04-12 at 22:16 +0200, Martin Wilck wrote:
> On Thu, 2018-04-12 at 13:33 -0500, Benjamin Marzinski wrote:
> > On Wed, Apr 04, 2018 at 06:16:17PM +0200, Martin Wilck wrote:
> > > dm_addmap_create() is where we actually try to set up a new
> > > multipath map. Depending on the result, mark the wwid as
> > > failed (or not), and re-trigger an uevent if necessary.
> > > If a path changes from multipath to non-multipath, use an "add"
> > > event to make sure LVM2 rules pick it up. Increase log level
> > > of this event to 3.
> > > 
> > 
> > By only looking at domap, we will miss instances of multipathd
> > failing
> > to create maps earlier in the process. This isn't necessarily
> > wrong.
> > It
> > just means that we can't rely on checking
> > /dev/shm/multipath/failed_wwids to definitively tell us whether
> > multipathd has tried and failed to create the device.
> 
> Sorry, I can't follow you. Where else except in the 
> domap->dm_addmap_create->dm_addmap() code path do we create maps?
> I'm feeling stupid, I really can't see it.

If you were referring to other instances of multipathd which have
already terminated (e.g. multipathd which ran in initramfs): these
leave the failed markers under /dev/shm when they quit. That's the
whole point of this patch. A failed marker will only be removed if a)
the system is rebooted, or b) another attempt to create the map
succeeds, or c) a user removes the marker manually.

But I suppose I'm still missing your point.

Cheers,
Martin
Benjamin Marzinski April 12, 2018, 9:32 p.m. UTC | #4
On Thu, Apr 12, 2018 at 10:22:19PM +0200, Martin Wilck wrote:
> On Thu, 2018-04-12 at 22:16 +0200, Martin Wilck wrote:
> > On Thu, 2018-04-12 at 13:33 -0500, Benjamin Marzinski wrote:
> > > On Wed, Apr 04, 2018 at 06:16:17PM +0200, Martin Wilck wrote:
> > > > dm_addmap_create() is where we actually try to set up a new
> > > > multipath map. Depending on the result, mark the wwid as
> > > > failed (or not), and re-trigger an uevent if necessary.
> > > > If a path changes from multipath to non-multipath, use an "add"
> > > > event to make sure LVM2 rules pick it up. Increase log level
> > > > of this event to 3.
> > > > 
> > > 
> > > By only looking at domap, we will miss instances of multipathd
> > > failing
> > > to create maps earlier in the process. This isn't necessarily
> > > wrong.
> > > It
> > > just means that we can't rely on checking
> > > /dev/shm/multipath/failed_wwids to definitively tell us whether
> > > multipathd has tried and failed to create the device.
> > 
> > Sorry, I can't follow you. Where else except in the 
> > domap->dm_addmap_create->dm_addmap() code path do we create maps?
> > I'm feeling stupid, I really can't see it.
> 
> If you were referring to other instances of multipathd which have
> already terminated (e.g. multipathd which ran in initramfs): these
> leave the failed markers under /dev/shm when they quit. That's the
> whole point of this patch. A failed marker will only be removed if a)
> the system is rebooted, or b) another attempt to create the map
> succeeds, or c) a user removes the marker manually.
> 
> But I suppose I'm still missing your point.

Any failure in ev_add_path() before the call to domap() could cause
multipath device creation to fail, without triggering this. For instance
add_map_with_path() or setup_map() can fail for a host of reasons.

> 
> Cheers,
> Martin
> 
> -- 
> Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
> SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
> HRB 21284 (AG Nürnberg)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Martin Wilck April 12, 2018, 10:43 p.m. UTC | #5
On Thu, 2018-04-12 at 16:32 -0500, Benjamin Marzinski wrote:
> On Thu, Apr 12, 2018 at 10:22:19PM +0200, Martin Wilck wrote:
> > 
> > But I suppose I'm still missing your point.
> 
> Any failure in ev_add_path() before the call to domap() could cause
> multipath device creation to fail, without triggering this. For
> instance
> add_map_with_path() or setup_map() can fail for a host of reasons.

OK, got it, finally. The semantics of my failed marker were intended to
be exactly what they are now: we tried DM_DEVICE_CREATE for this WWID,
and failed. Other semantics are certainly possible, but much harder to
define cleanly.

I believe that catches a large portion of the real-world failures. And
in many cases, this failure means that one or more devices are busy.
(Oh, it could also mean that we tried to set 'hwhandler "1 alua"' for a
device that doesn't support ALUA, but I digress ;-)

Martin
diff mbox

Patch

diff --git a/libmultipath/configure.c b/libmultipath/configure.c
index c1a50e4..9aa3d21 100644
--- a/libmultipath/configure.c
+++ b/libmultipath/configure.c
@@ -445,11 +445,18 @@  trigger_udev_change(const struct multipath *mpp)
 }
 
 void
-trigger_paths_udev_change(const struct multipath *mpp)
+trigger_paths_udev_change(struct multipath *mpp, bool is_mpath)
 {
 	struct pathgroup * pgp;
 	struct path * pp;
 	int i, j;
+	/*
+	 * If a path changes from multipath to non-multipath, we must
+	 * synthesize an artificial "add" event, otherwise the LVM2 rules
+	 * (69-lvm2-lvmetad.rules) won't pick it up. Otherwise, we'd just
+	 * irritate ourselves with an "add", so use "change".
+	 */
+	const char *action = is_mpath ? "change" : "add";
 
 	if (!mpp || !mpp->pg)
 		return;
@@ -468,14 +475,21 @@  trigger_paths_udev_change(const struct multipath *mpp)
 			 */
 			env = udev_device_get_property_value(
 				pp->udev, "DM_MULTIPATH_DEVICE_PATH");
-			if (env != NULL && !strcmp(env, "1"))
-					continue;
 
-			condlog(4, "triggering change uevent for %s", pp->dev);
-			sysfs_attr_set_value(pp->udev, "uevent", "change",
-					     strlen("change"));
+			if (is_mpath && env != NULL && !strcmp(env, "1"))
+				continue;
+			else if (!is_mpath &&
+				   (env == NULL || !strcmp(env, "0")))
+				continue;
+
+			condlog(3, "triggering %s uevent for %s (is %smultipath member)",
+				action, pp->dev, is_mpath ? "" : "no ");
+			sysfs_attr_set_value(pp->udev, "uevent",
+					     action, strlen(action));
 		}
 	}
+
+	mpp->needs_paths_uevent = 0;
 }
 
 static int
@@ -876,8 +890,10 @@  int domap(struct multipath *mpp, char *params, int is_daemon)
 		 * succeeded
 		 */
 		mpp->force_udev_reload = 0;
-		if (mpp->action == ACT_CREATE && remember_wwid(mpp->wwid) == 1)
-			trigger_paths_udev_change(mpp);
+		if (mpp->action == ACT_CREATE &&
+		    (remember_wwid(mpp->wwid) == 1 ||
+		     mpp->needs_paths_uevent))
+			trigger_paths_udev_change(mpp, true);
 		if (!is_daemon) {
 			/* multipath client mode */
 			dm_switchgroup(mpp->alias, mpp->bestpg);
@@ -902,7 +918,10 @@  int domap(struct multipath *mpp, char *params, int is_daemon)
 		}
 		dm_setgeometry(mpp);
 		return DOMAP_OK;
-	}
+	} else if (r == DOMAP_FAIL && mpp->action == ACT_CREATE &&
+		   mpp->needs_paths_uevent)
+		trigger_paths_udev_change(mpp, false);
+
 	return DOMAP_FAIL;
 }
 
diff --git a/libmultipath/configure.h b/libmultipath/configure.h
index 545cbc2..8b56d33 100644
--- a/libmultipath/configure.h
+++ b/libmultipath/configure.h
@@ -37,4 +37,4 @@  int get_refwwid (enum mpath_cmds cmd, char * dev, enum devtypes dev_type,
 		 vector pathvec, char **wwid);
 int reload_map(struct vectors *vecs, struct multipath *mpp, int refresh, int is_daemon);
 struct udev_device *get_udev_device(const char *dev, enum devtypes dev_type);
-void trigger_paths_udev_change(const struct multipath *mpp);
+void trigger_paths_udev_change(struct multipath *mpp, bool is_mpath);
diff --git a/libmultipath/devmapper.c b/libmultipath/devmapper.c
index 2a92105..f2befad 100644
--- a/libmultipath/devmapper.c
+++ b/libmultipath/devmapper.c
@@ -22,6 +22,7 @@ 
 #include "devmapper.h"
 #include "sysfs.h"
 #include "config.h"
+#include "wwids.h"
 
 #include "log_pthread.h"
 #include <sys/types.h>
@@ -415,8 +416,12 @@  int dm_addmap_create (struct multipath *mpp, char * params)
 		int err;
 
 		if (dm_addmap(DM_DEVICE_CREATE, TGT_MPATH, mpp, params, ro,
-			      udev_flags))
+			      udev_flags)) {
+			if (unmark_failed_wwid(mpp->wwid) ==
+			    WWID_FAILED_CHANGED)
+				mpp->needs_paths_uevent = 1;
 			return 1;
+		}
 		/*
 		 * DM_DEVICE_CREATE is actually DM_DEV_CREATE + DM_TABLE_LOAD.
 		 * Failing the second part leaves an empty map. Clean it up.
@@ -432,6 +437,8 @@  int dm_addmap_create (struct multipath *mpp, char * params)
 			break;
 		}
 	}
+	if (mark_failed_wwid(mpp->wwid) == WWID_FAILED_CHANGED)
+		mpp->needs_paths_uevent = 1;
 	return 0;
 }
 
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index c43f2c3..1d3a34f 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -322,6 +322,7 @@  struct multipath {
 	int max_sectors_kb;
 	int force_readonly;
 	int force_udev_reload;
+	int needs_paths_uevent;
 	int ghost_delay;
 	int ghost_delay_tick;
 	unsigned int dev_loss;
diff --git a/multipathd/main.c b/multipathd/main.c
index e6f4e77..7eeb743 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -2325,7 +2325,7 @@  configure (struct vectors * vecs)
 	sync_maps_state(mpvec);
 	vector_foreach_slot(mpvec, mpp, i){
 		if (remember_wwid(mpp->wwid) == 1)
-			trigger_paths_udev_change(mpp);
+			trigger_paths_udev_change(mpp, true);
 		update_map_pr(mpp);
 	}