diff mbox

[56/57] multipathd: push down lock in checkerloop()

Message ID 1461755458-29225-57-git-send-email-hare@suse.de (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Hannes Reinecke April 27, 2016, 11:10 a.m. UTC
Instead of grabbing the lock at the start of the checkerloop
and releasing it at the end we should be holding it only
during the time when we actually need it.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 multipathd/main.c | 136 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 87 insertions(+), 49 deletions(-)

Comments

Benjamin Marzinski May 3, 2016, 10:17 p.m. UTC | #1
On Wed, Apr 27, 2016 at 01:10:57PM +0200, Hannes Reinecke wrote:
> Instead of grabbing the lock at the start of the checkerloop
> and releasing it at the end we should be holding it only
> during the time when we actually need it.

I'm pretty sure that this can cause crashes if multipathd reconfigures
when it's just started servicing a uevent. It goes like this. The
uevq_thr thread calls uev_trigger(), which checks the running_state and
sees that it's DAEMON_IDLE.  The uxlsnr_thr thread gets a
reconfiguration request, which eventually calls set_config_state().
set_config_state() sees the state is DAEMON_IDLE, sets it to
DAEMON_CONFIGURE, and returns without waiting.  This wakes up the child
thread, which runs reconfigure().  reconfigure() sets conf to NULL. Then
the uevq_thr thread in uev_trigger() calls filter_devnode()
dereferencing conf->blist_devnode and conf->elist_devnode. conf is NULL,
and thus it crashes.

The short version is that multipathd has been using the vecs lock to
protect conf.  With your change to uev_trigger, you access conf outside
of the vecs lock. This isn't a problem in checkerloop(), since you can't
reconfigure while multipathd is in the DAEMON_RUNNING state, so that is
protecting conf. You need to do the same
set_config_state(DAEMON_RUNNING) in uev_trigger to make this safe.

-Ben

> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  multipathd/main.c | 136 ++++++++++++++++++++++++++++++++++--------------------
>  1 file changed, 87 insertions(+), 49 deletions(-)
> 
> diff --git a/multipathd/main.c b/multipathd/main.c
> index 41b5a49..132101f 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -416,7 +416,11 @@ uev_add_map (struct uevent * uev, struct vectors * vecs)
>  			return 1;
>  		}
>  	}
> +	pthread_cleanup_push(cleanup_lock, &vecs->lock);
> +	lock(vecs->lock);
> +	pthread_testcancel();
>  	rc = ev_add_map(uev->kernel, alias, vecs);
> +	lock_cleanup_pop(vecs->lock);
>  	FREE(alias);
>  	return rc;
>  }
> @@ -512,6 +516,10 @@ uev_remove_map (struct uevent * uev, struct vectors * vecs)
>  		return 0;
>  	}
>  	minor = uevent_get_minor(uev);
> +
> +	pthread_cleanup_push(cleanup_lock, &vecs->lock);
> +	lock(vecs->lock);
> +	pthread_testcancel();
>  	mpp = find_mp_by_minor(vecs->mpvec, minor);
>  
>  	if (!mpp) {
> @@ -528,10 +536,12 @@ uev_remove_map (struct uevent * uev, struct vectors * vecs)
>  	orphan_paths(vecs->pathvec, mpp);
>  	remove_map_and_stop_waiter(mpp, vecs, 1);
>  out:
> +	lock_cleanup_pop(vecs->lock);
>  	FREE(alias);
>  	return 0;
>  }
>  
> +/* Called from CLI handler */
>  int
>  ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
>  {
> @@ -567,6 +577,9 @@ uev_add_path (struct uevent *uev, struct vectors * vecs)
>  		return 1;
>  	}
>  
> +	pthread_cleanup_push(cleanup_lock, &vecs->lock);
> +	lock(vecs->lock);
> +	pthread_testcancel();
>  	pp = find_path_by_dev(vecs->pathvec, uev->kernel);
>  	if (pp) {
>  		int r;
> @@ -594,8 +607,10 @@ uev_add_path (struct uevent *uev, struct vectors * vecs)
>  				ret = 1;
>  			}
>  		}
> -		return ret;
>  	}
> +	lock_cleanup_pop(vecs->lock);
> +	if (pp)
> +		return ret;
>  
>  	/*
>  	 * get path vital state
> @@ -608,6 +623,9 @@ uev_add_path (struct uevent *uev, struct vectors * vecs)
>  		condlog(3, "%s: failed to get path info", uev->kernel);
>  		return 1;
>  	}
> +	pthread_cleanup_push(cleanup_lock, &vecs->lock);
> +	lock(vecs->lock);
> +	pthread_testcancel();
>  	ret = store_path(vecs->pathvec, pp);
>  	if (!ret) {
>  		pp->checkint = conf->checkint;
> @@ -619,7 +637,7 @@ uev_add_path (struct uevent *uev, struct vectors * vecs)
>  		free_path(pp);
>  		ret = 1;
>  	}
> -
> +	lock_cleanup_pop(vecs->lock);
>  	return ret;
>  }
>  
> @@ -687,12 +705,12 @@ rescan:
>  			 */
>  			start_waiter = 1;
>  		}
> -		else
> +		if (!start_waiter)
>  			goto fail; /* leave path added to pathvec */
>  	}
>  
> -	/* persistent reseravtion check*/
> -	mpath_pr_event_handle(pp);	
> +	/* persistent reservation check*/
> +	mpath_pr_event_handle(pp);
>  
>  	/*
>  	 * push the map to the device-mapper
> @@ -720,7 +738,7 @@ retry:
>  		 * deal with asynchronous uevents :((
>  		 */
>  		if (mpp->action == ACT_RELOAD && retries-- > 0) {
> -			condlog(0, "%s: uev_add_path sleep", mpp->alias);
> +			condlog(0, "%s: ev_add_path sleep", mpp->alias);
>  			sleep(1);
>  			update_mpp_paths(mpp, vecs->pathvec);
>  			goto rescan;
> @@ -749,8 +767,7 @@ retry:
>  		condlog(2, "%s [%s]: path added to devmap %s",
>  			pp->dev, pp->dev_t, mpp->alias);
>  		return 0;
> -	}
> -	else
> +	} else
>  		goto fail;
>  
>  fail_map:
> @@ -764,17 +781,22 @@ static int
>  uev_remove_path (struct uevent *uev, struct vectors * vecs)
>  {
>  	struct path *pp;
> +	int ret;
>  
>  	condlog(2, "%s: remove path (uevent)", uev->kernel);
> +	pthread_cleanup_push(cleanup_lock, &vecs->lock);
> +	lock(vecs->lock);
> +	pthread_testcancel();
>  	pp = find_path_by_dev(vecs->pathvec, uev->kernel);
> -
> +	if (pp)
> +		ret = ev_remove_path(pp, vecs);
> +	lock_cleanup_pop(vecs->lock);
>  	if (!pp) {
>  		/* Not an error; path might have been purged earlier */
>  		condlog(0, "%s: path already removed", uev->kernel);
>  		return 0;
>  	}
> -
> -	return ev_remove_path(pp, vecs);
> +	return ret;
>  }
>  
>  int
> @@ -877,35 +899,50 @@ static int
>  uev_update_path (struct uevent *uev, struct vectors * vecs)
>  {
>  	int ro, retval = 0;
> -	struct path * pp;
> -
> -	pp = find_path_by_dev(vecs->pathvec, uev->kernel);
> -	if (!pp) {
> -		condlog(0, "%s: spurious uevent, path not found",
> -			uev->kernel);
> -		return 1;
> -	}
> -
> -	if (pp->initialized == INIT_REQUESTED_UDEV)
> -		return uev_add_path(uev, vecs);
>  
>  	ro = uevent_get_disk_ro(uev);
>  
>  	if (ro >= 0) {
> +		struct path * pp;
> +		struct multipath *mpp = NULL;
> +
>  		condlog(2, "%s: update path write_protect to '%d' (uevent)",
>  			uev->kernel, ro);
> -		if (pp->mpp) {
> -			if (pp->mpp->wait_for_udev) {
> -				pp->mpp->wait_for_udev = 2;
> -				return 0;
> +		pthread_cleanup_push(cleanup_lock, &vecs->lock);
> +		lock(vecs->lock);
> +		pthread_testcancel();
> +		/*
> +		 * pthread_mutex_lock() and pthread_mutex_unlock()
> +		 * need to be at the same indentation level, hence
> +		 * this slightly convoluted codepath.
> +		 */
> +		pp = find_path_by_dev(vecs->pathvec, uev->kernel);
> +		if (pp) {
> +			if (pp->initialized == INIT_REQUESTED_UDEV) {
> +				retval = 2;
> +			} else {
> +				mpp = pp->mpp;
> +				if (mpp && mpp->wait_for_udev) {
> +					mpp->wait_for_udev = 2;
> +					mpp = NULL;
> +					retval = 0;
> +				}
>  			}
> +			if (mpp) {
> +				retval = reload_map(vecs, mpp, 0);
>  
> -			retval = reload_map(vecs, pp->mpp, 0);
> -
> -			condlog(2, "%s: map %s reloaded (retval %d)",
> -				uev->kernel, pp->mpp->alias, retval);
> +				condlog(2, "%s: map %s reloaded (retval %d)",
> +					uev->kernel, mpp->alias, retval);
> +			}
>  		}
> -
> +		lock_cleanup_pop(vecs->lock);
> +		if (!pp) {
> +			condlog(0, "%s: spurious uevent, path not found",
> +				uev->kernel);
> +			return 1;
> +		}
> +		if (retval == 2)
> +			return uev_add_path(uev, vecs);
>  	}
>  
>  	return retval;
> @@ -1002,10 +1039,6 @@ uev_trigger (struct uevent * uev, void * trigger_data)
>  	if (running_state == DAEMON_SHUTDOWN)
>  		return 0;
>  
> -	pthread_cleanup_push(cleanup_lock, &vecs->lock);
> -	lock(vecs->lock);
> -	pthread_testcancel();
> -
>  	/*
>  	 * device map event
>  	 * Add events are ignored here as the tables
> @@ -1044,7 +1077,6 @@ uev_trigger (struct uevent * uev, void * trigger_data)
>  	}
>  
>  out:
> -	lock_cleanup_pop(vecs->lock);
>  	return r;
>  }
>  
> @@ -1627,17 +1659,6 @@ checkerloop (void *ap)
>  
>  		if (gettimeofday(&start_time, NULL) != 0)
>  			start_time.tv_sec = 0;
> -
> -		rc = set_config_state(DAEMON_RUNNING);
> -		if (rc == ETIMEDOUT) {
> -			condlog(4, "timeout waiting for DAEMON_IDLE");
> -			continue;
> -		}
> -
> -		pthread_cleanup_push(cleanup_lock, &vecs->lock);
> -		lock(vecs->lock);
> -		pthread_testcancel();
> -		strict_timing = conf->strict_timing;
>  		if (start_time.tv_sec && last_time.tv_sec) {
>  			timersub(&start_time, &last_time, &diff_time);
>  			condlog(4, "tick (%lu.%06lu secs)",
> @@ -1653,28 +1674,45 @@ checkerloop (void *ap)
>  		if (use_watchdog)
>  			sd_notify(0, "WATCHDOG=1");
>  #endif
> +		rc = set_config_state(DAEMON_RUNNING);
> +		if (rc == ETIMEDOUT) {
> +			condlog(4, "timeout waiting for DAEMON_IDLE");
> +			continue;
> +		}
> +		strict_timing = conf->strict_timing;
>  		if (vecs->pathvec) {
> +			pthread_cleanup_push(cleanup_lock, &vecs->lock);
> +			lock(vecs->lock);
> +			pthread_testcancel();
>  			vector_foreach_slot (vecs->pathvec, pp, i) {
>  				num_paths += check_path(vecs, pp, ticks);
>  			}
> +			lock_cleanup_pop(vecs->lock);
>  		}
>  		if (vecs->mpvec) {
> +			pthread_cleanup_push(cleanup_lock, &vecs->lock);
> +			lock(vecs->lock);
> +			pthread_testcancel();
>  			defered_failback_tick(vecs->mpvec);
>  			retry_count_tick(vecs->mpvec);
>  			missing_uev_wait_tick(vecs);
> +			lock_cleanup_pop(vecs->lock);
>  		}
>  		if (count)
>  			count--;
>  		else {
> +			pthread_cleanup_push(cleanup_lock, &vecs->lock);
> +			lock(vecs->lock);
> +			pthread_testcancel();
>  			condlog(4, "map garbage collection");
>  			mpvec_garbage_collector(vecs);
>  			count = MAPGCINT;
> +			lock_cleanup_pop(vecs->lock);
>  		}
>  
> -		lock_cleanup_pop(vecs->lock);
>  		diff_time.tv_usec = 0;
>  		if (start_time.tv_sec &&
> -		    gettimeofday(&end_time, NULL)) {
> +		    gettimeofday(&end_time, NULL) == 0) {
>  			timersub(&end_time, &start_time, &diff_time);
>  			if (num_paths) {
>  				condlog(3, "checked %d path%s in %lu.%06lu secs",
> -- 
> 2.6.6

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Benjamin Marzinski May 3, 2016, 11:24 p.m. UTC | #2
On Tue, May 03, 2016 at 05:17:34PM -0500, Benjamin Marzinski wrote:
> On Wed, Apr 27, 2016 at 01:10:57PM +0200, Hannes Reinecke wrote:
> > Instead of grabbing the lock at the start of the checkerloop
> > and releasing it at the end we should be holding it only
> > during the time when we actually need it.
> 
> I'm pretty sure that this can cause crashes if multipathd reconfigures
> when it's just started servicing a uevent. It goes like this. The
> uevq_thr thread calls uev_trigger(), which checks the running_state and
> sees that it's DAEMON_IDLE.  The uxlsnr_thr thread gets a
> reconfiguration request, which eventually calls set_config_state().
> set_config_state() sees the state is DAEMON_IDLE, sets it to
> DAEMON_CONFIGURE, and returns without waiting.  This wakes up the child
> thread, which runs reconfigure().  reconfigure() sets conf to NULL. Then
> the uevq_thr thread in uev_trigger() calls filter_devnode()
> dereferencing conf->blist_devnode and conf->elist_devnode. conf is NULL,
> and thus it crashes.
> 
> The short version is that multipathd has been using the vecs lock to
> protect conf.  With your change to uev_trigger, you access conf outside
> of the vecs lock. This isn't a problem in checkerloop(), since you can't
> reconfigure while multipathd is in the DAEMON_RUNNING state, so that is
> protecting conf. You need to do the same
> set_config_state(DAEMON_RUNNING) in uev_trigger to make this safe.

Or not. ev_add_map calls set_config_state(DAEMON_CONFIGURE), so this
would deadlock if you called  set_config_state(DAEMON_RUNNING) in
uev_trigger. At any rate, either reconfigures need to be blocked while
running uev_trigger, or we need some sort of reference counting on the
config structure so that we don't free it when we do a reconfigure,
until everyone has switched to the new structure.

-Ben
> 
> -Ben
> 
> > 
> > Signed-off-by: Hannes Reinecke <hare@suse.de>
> > ---
> >  multipathd/main.c | 136 ++++++++++++++++++++++++++++++++++--------------------
> >  1 file changed, 87 insertions(+), 49 deletions(-)
> > 
> > diff --git a/multipathd/main.c b/multipathd/main.c
> > index 41b5a49..132101f 100644
> > --- a/multipathd/main.c
> > +++ b/multipathd/main.c
> > @@ -416,7 +416,11 @@ uev_add_map (struct uevent * uev, struct vectors * vecs)
> >  			return 1;
> >  		}
> >  	}
> > +	pthread_cleanup_push(cleanup_lock, &vecs->lock);
> > +	lock(vecs->lock);
> > +	pthread_testcancel();
> >  	rc = ev_add_map(uev->kernel, alias, vecs);
> > +	lock_cleanup_pop(vecs->lock);
> >  	FREE(alias);
> >  	return rc;
> >  }
> > @@ -512,6 +516,10 @@ uev_remove_map (struct uevent * uev, struct vectors * vecs)
> >  		return 0;
> >  	}
> >  	minor = uevent_get_minor(uev);
> > +
> > +	pthread_cleanup_push(cleanup_lock, &vecs->lock);
> > +	lock(vecs->lock);
> > +	pthread_testcancel();
> >  	mpp = find_mp_by_minor(vecs->mpvec, minor);
> >  
> >  	if (!mpp) {
> > @@ -528,10 +536,12 @@ uev_remove_map (struct uevent * uev, struct vectors * vecs)
> >  	orphan_paths(vecs->pathvec, mpp);
> >  	remove_map_and_stop_waiter(mpp, vecs, 1);
> >  out:
> > +	lock_cleanup_pop(vecs->lock);
> >  	FREE(alias);
> >  	return 0;
> >  }
> >  
> > +/* Called from CLI handler */
> >  int
> >  ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
> >  {
> > @@ -567,6 +577,9 @@ uev_add_path (struct uevent *uev, struct vectors * vecs)
> >  		return 1;
> >  	}
> >  
> > +	pthread_cleanup_push(cleanup_lock, &vecs->lock);
> > +	lock(vecs->lock);
> > +	pthread_testcancel();
> >  	pp = find_path_by_dev(vecs->pathvec, uev->kernel);
> >  	if (pp) {
> >  		int r;
> > @@ -594,8 +607,10 @@ uev_add_path (struct uevent *uev, struct vectors * vecs)
> >  				ret = 1;
> >  			}
> >  		}
> > -		return ret;
> >  	}
> > +	lock_cleanup_pop(vecs->lock);
> > +	if (pp)
> > +		return ret;
> >  
> >  	/*
> >  	 * get path vital state
> > @@ -608,6 +623,9 @@ uev_add_path (struct uevent *uev, struct vectors * vecs)
> >  		condlog(3, "%s: failed to get path info", uev->kernel);
> >  		return 1;
> >  	}
> > +	pthread_cleanup_push(cleanup_lock, &vecs->lock);
> > +	lock(vecs->lock);
> > +	pthread_testcancel();
> >  	ret = store_path(vecs->pathvec, pp);
> >  	if (!ret) {
> >  		pp->checkint = conf->checkint;
> > @@ -619,7 +637,7 @@ uev_add_path (struct uevent *uev, struct vectors * vecs)
> >  		free_path(pp);
> >  		ret = 1;
> >  	}
> > -
> > +	lock_cleanup_pop(vecs->lock);
> >  	return ret;
> >  }
> >  
> > @@ -687,12 +705,12 @@ rescan:
> >  			 */
> >  			start_waiter = 1;
> >  		}
> > -		else
> > +		if (!start_waiter)
> >  			goto fail; /* leave path added to pathvec */
> >  	}
> >  
> > -	/* persistent reseravtion check*/
> > -	mpath_pr_event_handle(pp);	
> > +	/* persistent reservation check*/
> > +	mpath_pr_event_handle(pp);
> >  
> >  	/*
> >  	 * push the map to the device-mapper
> > @@ -720,7 +738,7 @@ retry:
> >  		 * deal with asynchronous uevents :((
> >  		 */
> >  		if (mpp->action == ACT_RELOAD && retries-- > 0) {
> > -			condlog(0, "%s: uev_add_path sleep", mpp->alias);
> > +			condlog(0, "%s: ev_add_path sleep", mpp->alias);
> >  			sleep(1);
> >  			update_mpp_paths(mpp, vecs->pathvec);
> >  			goto rescan;
> > @@ -749,8 +767,7 @@ retry:
> >  		condlog(2, "%s [%s]: path added to devmap %s",
> >  			pp->dev, pp->dev_t, mpp->alias);
> >  		return 0;
> > -	}
> > -	else
> > +	} else
> >  		goto fail;
> >  
> >  fail_map:
> > @@ -764,17 +781,22 @@ static int
> >  uev_remove_path (struct uevent *uev, struct vectors * vecs)
> >  {
> >  	struct path *pp;
> > +	int ret;
> >  
> >  	condlog(2, "%s: remove path (uevent)", uev->kernel);
> > +	pthread_cleanup_push(cleanup_lock, &vecs->lock);
> > +	lock(vecs->lock);
> > +	pthread_testcancel();
> >  	pp = find_path_by_dev(vecs->pathvec, uev->kernel);
> > -
> > +	if (pp)
> > +		ret = ev_remove_path(pp, vecs);
> > +	lock_cleanup_pop(vecs->lock);
> >  	if (!pp) {
> >  		/* Not an error; path might have been purged earlier */
> >  		condlog(0, "%s: path already removed", uev->kernel);
> >  		return 0;
> >  	}
> > -
> > -	return ev_remove_path(pp, vecs);
> > +	return ret;
> >  }
> >  
> >  int
> > @@ -877,35 +899,50 @@ static int
> >  uev_update_path (struct uevent *uev, struct vectors * vecs)
> >  {
> >  	int ro, retval = 0;
> > -	struct path * pp;
> > -
> > -	pp = find_path_by_dev(vecs->pathvec, uev->kernel);
> > -	if (!pp) {
> > -		condlog(0, "%s: spurious uevent, path not found",
> > -			uev->kernel);
> > -		return 1;
> > -	}
> > -
> > -	if (pp->initialized == INIT_REQUESTED_UDEV)
> > -		return uev_add_path(uev, vecs);
> >  
> >  	ro = uevent_get_disk_ro(uev);
> >  
> >  	if (ro >= 0) {
> > +		struct path * pp;
> > +		struct multipath *mpp = NULL;
> > +
> >  		condlog(2, "%s: update path write_protect to '%d' (uevent)",
> >  			uev->kernel, ro);
> > -		if (pp->mpp) {
> > -			if (pp->mpp->wait_for_udev) {
> > -				pp->mpp->wait_for_udev = 2;
> > -				return 0;
> > +		pthread_cleanup_push(cleanup_lock, &vecs->lock);
> > +		lock(vecs->lock);
> > +		pthread_testcancel();
> > +		/*
> > +		 * pthread_mutex_lock() and pthread_mutex_unlock()
> > +		 * need to be at the same indentation level, hence
> > +		 * this slightly convoluted codepath.
> > +		 */
> > +		pp = find_path_by_dev(vecs->pathvec, uev->kernel);
> > +		if (pp) {
> > +			if (pp->initialized == INIT_REQUESTED_UDEV) {
> > +				retval = 2;
> > +			} else {
> > +				mpp = pp->mpp;
> > +				if (mpp && mpp->wait_for_udev) {
> > +					mpp->wait_for_udev = 2;
> > +					mpp = NULL;
> > +					retval = 0;
> > +				}
> >  			}
> > +			if (mpp) {
> > +				retval = reload_map(vecs, mpp, 0);
> >  
> > -			retval = reload_map(vecs, pp->mpp, 0);
> > -
> > -			condlog(2, "%s: map %s reloaded (retval %d)",
> > -				uev->kernel, pp->mpp->alias, retval);
> > +				condlog(2, "%s: map %s reloaded (retval %d)",
> > +					uev->kernel, mpp->alias, retval);
> > +			}
> >  		}
> > -
> > +		lock_cleanup_pop(vecs->lock);
> > +		if (!pp) {
> > +			condlog(0, "%s: spurious uevent, path not found",
> > +				uev->kernel);
> > +			return 1;
> > +		}
> > +		if (retval == 2)
> > +			return uev_add_path(uev, vecs);
> >  	}
> >  
> >  	return retval;
> > @@ -1002,10 +1039,6 @@ uev_trigger (struct uevent * uev, void * trigger_data)
> >  	if (running_state == DAEMON_SHUTDOWN)
> >  		return 0;
> >  
> > -	pthread_cleanup_push(cleanup_lock, &vecs->lock);
> > -	lock(vecs->lock);
> > -	pthread_testcancel();
> > -
> >  	/*
> >  	 * device map event
> >  	 * Add events are ignored here as the tables
> > @@ -1044,7 +1077,6 @@ uev_trigger (struct uevent * uev, void * trigger_data)
> >  	}
> >  
> >  out:
> > -	lock_cleanup_pop(vecs->lock);
> >  	return r;
> >  }
> >  
> > @@ -1627,17 +1659,6 @@ checkerloop (void *ap)
> >  
> >  		if (gettimeofday(&start_time, NULL) != 0)
> >  			start_time.tv_sec = 0;
> > -
> > -		rc = set_config_state(DAEMON_RUNNING);
> > -		if (rc == ETIMEDOUT) {
> > -			condlog(4, "timeout waiting for DAEMON_IDLE");
> > -			continue;
> > -		}
> > -
> > -		pthread_cleanup_push(cleanup_lock, &vecs->lock);
> > -		lock(vecs->lock);
> > -		pthread_testcancel();
> > -		strict_timing = conf->strict_timing;
> >  		if (start_time.tv_sec && last_time.tv_sec) {
> >  			timersub(&start_time, &last_time, &diff_time);
> >  			condlog(4, "tick (%lu.%06lu secs)",
> > @@ -1653,28 +1674,45 @@ checkerloop (void *ap)
> >  		if (use_watchdog)
> >  			sd_notify(0, "WATCHDOG=1");
> >  #endif
> > +		rc = set_config_state(DAEMON_RUNNING);
> > +		if (rc == ETIMEDOUT) {
> > +			condlog(4, "timeout waiting for DAEMON_IDLE");
> > +			continue;
> > +		}
> > +		strict_timing = conf->strict_timing;
> >  		if (vecs->pathvec) {
> > +			pthread_cleanup_push(cleanup_lock, &vecs->lock);
> > +			lock(vecs->lock);
> > +			pthread_testcancel();
> >  			vector_foreach_slot (vecs->pathvec, pp, i) {
> >  				num_paths += check_path(vecs, pp, ticks);
> >  			}
> > +			lock_cleanup_pop(vecs->lock);
> >  		}
> >  		if (vecs->mpvec) {
> > +			pthread_cleanup_push(cleanup_lock, &vecs->lock);
> > +			lock(vecs->lock);
> > +			pthread_testcancel();
> >  			defered_failback_tick(vecs->mpvec);
> >  			retry_count_tick(vecs->mpvec);
> >  			missing_uev_wait_tick(vecs);
> > +			lock_cleanup_pop(vecs->lock);
> >  		}
> >  		if (count)
> >  			count--;
> >  		else {
> > +			pthread_cleanup_push(cleanup_lock, &vecs->lock);
> > +			lock(vecs->lock);
> > +			pthread_testcancel();
> >  			condlog(4, "map garbage collection");
> >  			mpvec_garbage_collector(vecs);
> >  			count = MAPGCINT;
> > +			lock_cleanup_pop(vecs->lock);
> >  		}
> >  
> > -		lock_cleanup_pop(vecs->lock);
> >  		diff_time.tv_usec = 0;
> >  		if (start_time.tv_sec &&
> > -		    gettimeofday(&end_time, NULL)) {
> > +		    gettimeofday(&end_time, NULL) == 0) {
> >  			timersub(&end_time, &start_time, &diff_time);
> >  			if (num_paths) {
> >  				condlog(3, "checked %d path%s in %lu.%06lu secs",
> > -- 
> > 2.6.6
> 
> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Hannes Reinecke May 4, 2016, 6:39 a.m. UTC | #3
On 05/04/2016 12:17 AM, Benjamin Marzinski wrote:
> On Wed, Apr 27, 2016 at 01:10:57PM +0200, Hannes Reinecke wrote:
>> Instead of grabbing the lock at the start of the checkerloop
>> and releasing it at the end we should be holding it only
>> during the time when we actually need it.
> 
> I'm pretty sure that this can cause crashes if multipathd reconfigures
> when it's just started servicing a uevent. It goes like this. The
> uevq_thr thread calls uev_trigger(), which checks the running_state and
> sees that it's DAEMON_IDLE.  The uxlsnr_thr thread gets a
> reconfiguration request, which eventually calls set_config_state().
> set_config_state() sees the state is DAEMON_IDLE, sets it to
> DAEMON_CONFIGURE, and returns without waiting.  This wakes up the child
> thread, which runs reconfigure().  reconfigure() sets conf to NULL. Then
> the uevq_thr thread in uev_trigger() calls filter_devnode()
> dereferencing conf->blist_devnode and conf->elist_devnode. conf is NULL,
> and thus it crashes.
> 
> The short version is that multipathd has been using the vecs lock to
> protect conf.  With your change to uev_trigger, you access conf outside
> of the vecs lock. This isn't a problem in checkerloop(), since you can't
> reconfigure while multipathd is in the DAEMON_RUNNING state, so that is
> protecting conf. You need to do the same
> set_config_state(DAEMON_RUNNING) in uev_trigger to make this safe.
> 
I have now moved the call to 'filter_devnode' into the individual
functions, so that it will be called with the vector lock held.

Cheers,

Hannes
Hannes Reinecke May 4, 2016, 6:40 a.m. UTC | #4
On 05/04/2016 01:24 AM, Benjamin Marzinski wrote:
> On Tue, May 03, 2016 at 05:17:34PM -0500, Benjamin Marzinski wrote:
>> On Wed, Apr 27, 2016 at 01:10:57PM +0200, Hannes Reinecke wrote:
>>> Instead of grabbing the lock at the start of the checkerloop
>>> and releasing it at the end we should be holding it only
>>> during the time when we actually need it.
>>
>> I'm pretty sure that this can cause crashes if multipathd reconfigures
>> when it's just started servicing a uevent. It goes like this. The
>> uevq_thr thread calls uev_trigger(), which checks the running_state and
>> sees that it's DAEMON_IDLE.  The uxlsnr_thr thread gets a
>> reconfiguration request, which eventually calls set_config_state().
>> set_config_state() sees the state is DAEMON_IDLE, sets it to
>> DAEMON_CONFIGURE, and returns without waiting.  This wakes up the child
>> thread, which runs reconfigure().  reconfigure() sets conf to NULL. Then
>> the uevq_thr thread in uev_trigger() calls filter_devnode()
>> dereferencing conf->blist_devnode and conf->elist_devnode. conf is NULL,
>> and thus it crashes.
>>
>> The short version is that multipathd has been using the vecs lock to
>> protect conf.  With your change to uev_trigger, you access conf outside
>> of the vecs lock. This isn't a problem in checkerloop(), since you can't
>> reconfigure while multipathd is in the DAEMON_RUNNING state, so that is
>> protecting conf. You need to do the same
>> set_config_state(DAEMON_RUNNING) in uev_trigger to make this safe.
> 
> Or not. ev_add_map calls set_config_state(DAEMON_CONFIGURE), so this
> would deadlock if you called  set_config_state(DAEMON_RUNNING) in
> uev_trigger. At any rate, either reconfigures need to be blocked while
> running uev_trigger, or we need some sort of reference counting on the
> config structure so that we don't free it when we do a reconfigure,
> until everyone has switched to the new structure.
> 
See my previous reply. We can easily move the call to
filter_devnode() so that it'll be called with the vector lock held.

Cheers,

Hannes
diff mbox

Patch

diff --git a/multipathd/main.c b/multipathd/main.c
index 41b5a49..132101f 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -416,7 +416,11 @@  uev_add_map (struct uevent * uev, struct vectors * vecs)
 			return 1;
 		}
 	}
+	pthread_cleanup_push(cleanup_lock, &vecs->lock);
+	lock(vecs->lock);
+	pthread_testcancel();
 	rc = ev_add_map(uev->kernel, alias, vecs);
+	lock_cleanup_pop(vecs->lock);
 	FREE(alias);
 	return rc;
 }
@@ -512,6 +516,10 @@  uev_remove_map (struct uevent * uev, struct vectors * vecs)
 		return 0;
 	}
 	minor = uevent_get_minor(uev);
+
+	pthread_cleanup_push(cleanup_lock, &vecs->lock);
+	lock(vecs->lock);
+	pthread_testcancel();
 	mpp = find_mp_by_minor(vecs->mpvec, minor);
 
 	if (!mpp) {
@@ -528,10 +536,12 @@  uev_remove_map (struct uevent * uev, struct vectors * vecs)
 	orphan_paths(vecs->pathvec, mpp);
 	remove_map_and_stop_waiter(mpp, vecs, 1);
 out:
+	lock_cleanup_pop(vecs->lock);
 	FREE(alias);
 	return 0;
 }
 
+/* Called from CLI handler */
 int
 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
 {
@@ -567,6 +577,9 @@  uev_add_path (struct uevent *uev, struct vectors * vecs)
 		return 1;
 	}
 
+	pthread_cleanup_push(cleanup_lock, &vecs->lock);
+	lock(vecs->lock);
+	pthread_testcancel();
 	pp = find_path_by_dev(vecs->pathvec, uev->kernel);
 	if (pp) {
 		int r;
@@ -594,8 +607,10 @@  uev_add_path (struct uevent *uev, struct vectors * vecs)
 				ret = 1;
 			}
 		}
-		return ret;
 	}
+	lock_cleanup_pop(vecs->lock);
+	if (pp)
+		return ret;
 
 	/*
 	 * get path vital state
@@ -608,6 +623,9 @@  uev_add_path (struct uevent *uev, struct vectors * vecs)
 		condlog(3, "%s: failed to get path info", uev->kernel);
 		return 1;
 	}
+	pthread_cleanup_push(cleanup_lock, &vecs->lock);
+	lock(vecs->lock);
+	pthread_testcancel();
 	ret = store_path(vecs->pathvec, pp);
 	if (!ret) {
 		pp->checkint = conf->checkint;
@@ -619,7 +637,7 @@  uev_add_path (struct uevent *uev, struct vectors * vecs)
 		free_path(pp);
 		ret = 1;
 	}
-
+	lock_cleanup_pop(vecs->lock);
 	return ret;
 }
 
@@ -687,12 +705,12 @@  rescan:
 			 */
 			start_waiter = 1;
 		}
-		else
+		if (!start_waiter)
 			goto fail; /* leave path added to pathvec */
 	}
 
-	/* persistent reseravtion check*/
-	mpath_pr_event_handle(pp);	
+	/* persistent reservation check*/
+	mpath_pr_event_handle(pp);
 
 	/*
 	 * push the map to the device-mapper
@@ -720,7 +738,7 @@  retry:
 		 * deal with asynchronous uevents :((
 		 */
 		if (mpp->action == ACT_RELOAD && retries-- > 0) {
-			condlog(0, "%s: uev_add_path sleep", mpp->alias);
+			condlog(0, "%s: ev_add_path sleep", mpp->alias);
 			sleep(1);
 			update_mpp_paths(mpp, vecs->pathvec);
 			goto rescan;
@@ -749,8 +767,7 @@  retry:
 		condlog(2, "%s [%s]: path added to devmap %s",
 			pp->dev, pp->dev_t, mpp->alias);
 		return 0;
-	}
-	else
+	} else
 		goto fail;
 
 fail_map:
@@ -764,17 +781,22 @@  static int
 uev_remove_path (struct uevent *uev, struct vectors * vecs)
 {
 	struct path *pp;
+	int ret;
 
 	condlog(2, "%s: remove path (uevent)", uev->kernel);
+	pthread_cleanup_push(cleanup_lock, &vecs->lock);
+	lock(vecs->lock);
+	pthread_testcancel();
 	pp = find_path_by_dev(vecs->pathvec, uev->kernel);
-
+	if (pp)
+		ret = ev_remove_path(pp, vecs);
+	lock_cleanup_pop(vecs->lock);
 	if (!pp) {
 		/* Not an error; path might have been purged earlier */
 		condlog(0, "%s: path already removed", uev->kernel);
 		return 0;
 	}
-
-	return ev_remove_path(pp, vecs);
+	return ret;
 }
 
 int
@@ -877,35 +899,50 @@  static int
 uev_update_path (struct uevent *uev, struct vectors * vecs)
 {
 	int ro, retval = 0;
-	struct path * pp;
-
-	pp = find_path_by_dev(vecs->pathvec, uev->kernel);
-	if (!pp) {
-		condlog(0, "%s: spurious uevent, path not found",
-			uev->kernel);
-		return 1;
-	}
-
-	if (pp->initialized == INIT_REQUESTED_UDEV)
-		return uev_add_path(uev, vecs);
 
 	ro = uevent_get_disk_ro(uev);
 
 	if (ro >= 0) {
+		struct path * pp;
+		struct multipath *mpp = NULL;
+
 		condlog(2, "%s: update path write_protect to '%d' (uevent)",
 			uev->kernel, ro);
-		if (pp->mpp) {
-			if (pp->mpp->wait_for_udev) {
-				pp->mpp->wait_for_udev = 2;
-				return 0;
+		pthread_cleanup_push(cleanup_lock, &vecs->lock);
+		lock(vecs->lock);
+		pthread_testcancel();
+		/*
+		 * pthread_mutex_lock() and pthread_mutex_unlock()
+		 * need to be at the same indentation level, hence
+		 * this slightly convoluted codepath.
+		 */
+		pp = find_path_by_dev(vecs->pathvec, uev->kernel);
+		if (pp) {
+			if (pp->initialized == INIT_REQUESTED_UDEV) {
+				retval = 2;
+			} else {
+				mpp = pp->mpp;
+				if (mpp && mpp->wait_for_udev) {
+					mpp->wait_for_udev = 2;
+					mpp = NULL;
+					retval = 0;
+				}
 			}
+			if (mpp) {
+				retval = reload_map(vecs, mpp, 0);
 
-			retval = reload_map(vecs, pp->mpp, 0);
-
-			condlog(2, "%s: map %s reloaded (retval %d)",
-				uev->kernel, pp->mpp->alias, retval);
+				condlog(2, "%s: map %s reloaded (retval %d)",
+					uev->kernel, mpp->alias, retval);
+			}
 		}
-
+		lock_cleanup_pop(vecs->lock);
+		if (!pp) {
+			condlog(0, "%s: spurious uevent, path not found",
+				uev->kernel);
+			return 1;
+		}
+		if (retval == 2)
+			return uev_add_path(uev, vecs);
 	}
 
 	return retval;
@@ -1002,10 +1039,6 @@  uev_trigger (struct uevent * uev, void * trigger_data)
 	if (running_state == DAEMON_SHUTDOWN)
 		return 0;
 
-	pthread_cleanup_push(cleanup_lock, &vecs->lock);
-	lock(vecs->lock);
-	pthread_testcancel();
-
 	/*
 	 * device map event
 	 * Add events are ignored here as the tables
@@ -1044,7 +1077,6 @@  uev_trigger (struct uevent * uev, void * trigger_data)
 	}
 
 out:
-	lock_cleanup_pop(vecs->lock);
 	return r;
 }
 
@@ -1627,17 +1659,6 @@  checkerloop (void *ap)
 
 		if (gettimeofday(&start_time, NULL) != 0)
 			start_time.tv_sec = 0;
-
-		rc = set_config_state(DAEMON_RUNNING);
-		if (rc == ETIMEDOUT) {
-			condlog(4, "timeout waiting for DAEMON_IDLE");
-			continue;
-		}
-
-		pthread_cleanup_push(cleanup_lock, &vecs->lock);
-		lock(vecs->lock);
-		pthread_testcancel();
-		strict_timing = conf->strict_timing;
 		if (start_time.tv_sec && last_time.tv_sec) {
 			timersub(&start_time, &last_time, &diff_time);
 			condlog(4, "tick (%lu.%06lu secs)",
@@ -1653,28 +1674,45 @@  checkerloop (void *ap)
 		if (use_watchdog)
 			sd_notify(0, "WATCHDOG=1");
 #endif
+		rc = set_config_state(DAEMON_RUNNING);
+		if (rc == ETIMEDOUT) {
+			condlog(4, "timeout waiting for DAEMON_IDLE");
+			continue;
+		}
+		strict_timing = conf->strict_timing;
 		if (vecs->pathvec) {
+			pthread_cleanup_push(cleanup_lock, &vecs->lock);
+			lock(vecs->lock);
+			pthread_testcancel();
 			vector_foreach_slot (vecs->pathvec, pp, i) {
 				num_paths += check_path(vecs, pp, ticks);
 			}
+			lock_cleanup_pop(vecs->lock);
 		}
 		if (vecs->mpvec) {
+			pthread_cleanup_push(cleanup_lock, &vecs->lock);
+			lock(vecs->lock);
+			pthread_testcancel();
 			defered_failback_tick(vecs->mpvec);
 			retry_count_tick(vecs->mpvec);
 			missing_uev_wait_tick(vecs);
+			lock_cleanup_pop(vecs->lock);
 		}
 		if (count)
 			count--;
 		else {
+			pthread_cleanup_push(cleanup_lock, &vecs->lock);
+			lock(vecs->lock);
+			pthread_testcancel();
 			condlog(4, "map garbage collection");
 			mpvec_garbage_collector(vecs);
 			count = MAPGCINT;
+			lock_cleanup_pop(vecs->lock);
 		}
 
-		lock_cleanup_pop(vecs->lock);
 		diff_time.tv_usec = 0;
 		if (start_time.tv_sec &&
-		    gettimeofday(&end_time, NULL)) {
+		    gettimeofday(&end_time, NULL) == 0) {
 			timersub(&end_time, &start_time, &diff_time);
 			if (num_paths) {
 				condlog(3, "checked %d path%s in %lu.%06lu secs",