diff mbox

Handle multipath paths in a path group properly during pg_init

Message ID 1244246693.28800.23.camel@chandra-ubuntu (mailing list archive)
State Accepted, archived
Delegated to: Alasdair Kergon
Headers show

Commit Message

Chandra Seetharaman June 6, 2009, 12:04 a.m. UTC
Found a race condition where pg_init_in_progress being incremented when
queue_work() returns without queuing the work.

Changed the code to increment pg_init_in_progress only when queue_work
returns success.

Tested in 2.6.30-rc8

---------------------------------------------------
From: Chandra Seetharaman <sekharan@us.ibm.com>

Fixed a problem affecting reinstatement of passive paths and another
problem with io lockup during device offline/online.

Before we moved the hardware handler from dm to SCSI, it performed a pg_init
for a path group and didn't maintain any state about each path in hardware
handler code.

But in SCSI dh, such state is now maintained, as we want to fail I/O early on a
path if it is not the active path.

All the hardware handlers have a state now and set to active or some form of
inactive.  They have prep_fn() which uses this state to fail the I/O without
it ever being sent to the device.

So in effect when dm-multipath calls scsi_dh_activate(), activate is
sent to only one path and the "state" of that path is changed appropriately
to "active" while other paths in the same path group are never changed
as they never got an "activate".

In order make sure all the paths in a path group gets their state set
properly when a pg_init happens, we need to call scsi_dh_activate() on
all paths in a path group.

Doing this at the hardware handler layer is not a good option as we
want the multipath layer to define the relationship between path and path
groups and not the hardware handler.

Attached patch sends an "activate" on each path in a path group when a
path group is switched. It also sends an activate when a path is reinstated.

Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>

---
 drivers/md/dm-mpath.c |   63 +++++++++++++++++++++-----------------------------
 1 file changed, 26 insertions(+), 37 deletions(-)





--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Comments

babu moger June 15, 2009, 10:16 p.m. UTC | #1
I have seen this race condition earlier on my system. Problem is resolved after testing with this patch. Thanks.    
--Babu Moger 

> -----Original Message-----
> From: Chandra Seetharaman [mailto:sekharan@us.ibm.com]
> Sent: Friday, June 05, 2009 7:05 PM
> To: device-mapper development
> Cc: Alasdair G Kergon; Moger, Babu
> Subject: Re: [dm-devel] [PATCH] Handle multipath paths in a path group
> properly during pg_init
> 
> Found a race condition where pg_init_in_progress being incremented when
> queue_work() returns without queuing the work.
> 
> Changed the code to increment pg_init_in_progress only when queue_work
> returns success.
> 
> Tested in 2.6.30-rc8
> 
> ---------------------------------------------------
> From: Chandra Seetharaman <sekharan@us.ibm.com>
> 
> Fixed a problem affecting reinstatement of passive paths and another
> problem with io lockup during device offline/online.
> 
> Before we moved the hardware handler from dm to SCSI, it performed a
> pg_init
> for a path group and didn't maintain any state about each path in hardware
> handler code.
> 
> But in SCSI dh, such state is now maintained, as we want to fail I/O early
> on a
> path if it is not the active path.
> 
> All the hardware handlers have a state now and set to active or some form
> of
> inactive.  They have prep_fn() which uses this state to fail the I/O
> without
> it ever being sent to the device.
> 
> So in effect when dm-multipath calls scsi_dh_activate(), activate is
> sent to only one path and the "state" of that path is changed
> appropriately
> to "active" while other paths in the same path group are never changed
> as they never got an "activate".
> 
> In order make sure all the paths in a path group gets their state set
> properly when a pg_init happens, we need to call scsi_dh_activate() on
> all paths in a path group.
> 
> Doing this at the hardware handler layer is not a good option as we
> want the multipath layer to define the relationship between path and path
> groups and not the hardware handler.
> 
> Attached patch sends an "activate" on each path in a path group when a
> path group is switched. It also sends an activate when a path is
> reinstated.
> 
> Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
> 
> ---
>  drivers/md/dm-mpath.c |   63 +++++++++++++++++++++-----------------------
> ------
>  1 file changed, 26 insertions(+), 37 deletions(-)
> 
> Index: linux-2.6.29/drivers/md/dm-mpath.c
> ===================================================================
> --- linux-2.6.29.orig/drivers/md/dm-mpath.c
> +++ linux-2.6.29/drivers/md/dm-mpath.c
> @@ -35,6 +35,7 @@ struct pgpath {
> 
>  	struct dm_path path;
>  	struct work_struct deactivate_path;
> +	struct work_struct activate_path;
>  };
> 
>  #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
> @@ -64,8 +65,6 @@ struct multipath {
>  	spinlock_t lock;
> 
>  	const char *hw_handler_name;
> -	struct work_struct activate_path;
> -	struct pgpath *pgpath_to_activate;
>  	unsigned nr_priority_groups;
>  	struct list_head priority_groups;
>  	unsigned pg_init_required;	/* pg_init needs calling? */
> @@ -128,6 +127,7 @@ static struct pgpath *alloc_pgpath(void)
>  	if (pgpath) {
>  		pgpath->is_active = 1;
>  		INIT_WORK(&pgpath->deactivate_path, deactivate_path);
> +		INIT_WORK(&pgpath->activate_path, activate_path);
>  	}
> 
>  	return pgpath;
> @@ -160,7 +160,6 @@ static struct priority_group *alloc_prio
> 
>  static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
>  {
> -	unsigned long flags;
>  	struct pgpath *pgpath, *tmp;
>  	struct multipath *m = ti->private;
> 
> @@ -169,10 +168,6 @@ static void free_pgpaths(struct list_hea
>  		if (m->hw_handler_name)
>  			scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
>  		dm_put_device(ti, pgpath->path.dev);
> -		spin_lock_irqsave(&m->lock, flags);
> -		if (m->pgpath_to_activate == pgpath)
> -			m->pgpath_to_activate = NULL;
> -		spin_unlock_irqrestore(&m->lock, flags);
>  		free_pgpath(pgpath);
>  	}
>  }
> @@ -202,7 +197,6 @@ static struct multipath *alloc_multipath
>  		m->queue_io = 1;
>  		INIT_WORK(&m->process_queued_ios, process_queued_ios);
>  		INIT_WORK(&m->trigger_event, trigger_event);
> -		INIT_WORK(&m->activate_path, activate_path);
>  		m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
>  		if (!m->mpio_pool) {
>  			kfree(m);
> @@ -427,8 +421,8 @@ static void process_queued_ios(struct wo
>  {
>  	struct multipath *m =
>  		container_of(work, struct multipath, process_queued_ios);
> -	struct pgpath *pgpath = NULL;
> -	unsigned init_required = 0, must_queue = 1;
> +	struct pgpath *pgpath = NULL, *tmp;
> +	unsigned must_queue = 1;
>  	unsigned long flags;
> 
>  	spin_lock_irqsave(&m->lock, flags);
> @@ -446,19 +440,15 @@ static void process_queued_ios(struct wo
>  		must_queue = 0;
> 
>  	if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
> -		m->pgpath_to_activate = pgpath;
>  		m->pg_init_count++;
>  		m->pg_init_required = 0;
> -		m->pg_init_in_progress = 1;
> -		init_required = 1;
> +		list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
> +			if (queue_work(kmpath_handlerd, &tmp->activate_path))
> +				m->pg_init_in_progress++;
> +		}
>  	}
> -
>  out:
>  	spin_unlock_irqrestore(&m->lock, flags);
> -
> -	if (init_required)
> -		queue_work(kmpath_handlerd, &m->activate_path);
> -
>  	if (!must_queue)
>  		dispatch_queued_ios(m);
>  }
> @@ -924,9 +914,13 @@ static int reinstate_path(struct pgpath
> 
>  	pgpath->is_active = 1;
> 
> -	m->current_pgpath = NULL;
> -	if (!m->nr_valid_paths++ && m->queue_size)
> +	if (!m->nr_valid_paths++ && m->queue_size) {
> +		m->current_pgpath = NULL;
>  		queue_work(kmultipathd, &m->process_queued_ios);
> +	} else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
> +		if (queue_work(kmpath_handlerd, &pgpath->activate_path))
> +			m->pg_init_in_progress++;
> +	}
> 
>  	dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
>  		      pgpath->path.dev->name, m->nr_valid_paths);
> @@ -1102,35 +1096,30 @@ static void pg_init_done(struct dm_path
> 
>  	spin_lock_irqsave(&m->lock, flags);
>  	if (errors) {
> -		DMERR("Could not failover device. Error %d.", errors);
> -		m->current_pgpath = NULL;
> -		m->current_pg = NULL;
> +		if (pgpath == m->current_pgpath) {
> +			DMERR("Could not failover device. Error %d.", errors);
> +			m->current_pgpath = NULL;
> +			m->current_pg = NULL;
> +		}
>  	} else if (!m->pg_init_required) {
>  		m->queue_io = 0;
>  		pg->bypassed = 0;
>  	}
> 
> -	m->pg_init_in_progress = 0;
> -	queue_work(kmultipathd, &m->process_queued_ios);
> +	m->pg_init_in_progress--;
> +	if (!m->pg_init_in_progress)
> +		queue_work(kmultipathd, &m->process_queued_ios);
>  	spin_unlock_irqrestore(&m->lock, flags);
>  }
> 
>  static void activate_path(struct work_struct *work)
>  {
>  	int ret;
> -	struct multipath *m =
> -		container_of(work, struct multipath, activate_path);
> -	struct dm_path *path;
> -	unsigned long flags;
> +	struct pgpath *pgpath =
> +		container_of(work, struct pgpath, activate_path);
> 
> -	spin_lock_irqsave(&m->lock, flags);
> -	path = &m->pgpath_to_activate->path;
> -	m->pgpath_to_activate = NULL;
> -	spin_unlock_irqrestore(&m->lock, flags);
> -	if (!path)
> -		return;
> -	ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
> -	pg_init_done(path, ret);
> +	ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev));
> +	pg_init_done(&pgpath->path, ret);
>  }
> 
>  /*
> 
> 
> 


--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
diff mbox

Patch

Index: linux-2.6.29/drivers/md/dm-mpath.c
===================================================================
--- linux-2.6.29.orig/drivers/md/dm-mpath.c
+++ linux-2.6.29/drivers/md/dm-mpath.c
@@ -35,6 +35,7 @@  struct pgpath {
 
 	struct dm_path path;
 	struct work_struct deactivate_path;
+	struct work_struct activate_path;
 };
 
 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
@@ -64,8 +65,6 @@  struct multipath {
 	spinlock_t lock;
 
 	const char *hw_handler_name;
-	struct work_struct activate_path;
-	struct pgpath *pgpath_to_activate;
 	unsigned nr_priority_groups;
 	struct list_head priority_groups;
 	unsigned pg_init_required;	/* pg_init needs calling? */
@@ -128,6 +127,7 @@  static struct pgpath *alloc_pgpath(void)
 	if (pgpath) {
 		pgpath->is_active = 1;
 		INIT_WORK(&pgpath->deactivate_path, deactivate_path);
+		INIT_WORK(&pgpath->activate_path, activate_path);
 	}
 
 	return pgpath;
@@ -160,7 +160,6 @@  static struct priority_group *alloc_prio
 
 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
 {
-	unsigned long flags;
 	struct pgpath *pgpath, *tmp;
 	struct multipath *m = ti->private;
 
@@ -169,10 +168,6 @@  static void free_pgpaths(struct list_hea
 		if (m->hw_handler_name)
 			scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
 		dm_put_device(ti, pgpath->path.dev);
-		spin_lock_irqsave(&m->lock, flags);
-		if (m->pgpath_to_activate == pgpath)
-			m->pgpath_to_activate = NULL;
-		spin_unlock_irqrestore(&m->lock, flags);
 		free_pgpath(pgpath);
 	}
 }
@@ -202,7 +197,6 @@  static struct multipath *alloc_multipath
 		m->queue_io = 1;
 		INIT_WORK(&m->process_queued_ios, process_queued_ios);
 		INIT_WORK(&m->trigger_event, trigger_event);
-		INIT_WORK(&m->activate_path, activate_path);
 		m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
 		if (!m->mpio_pool) {
 			kfree(m);
@@ -427,8 +421,8 @@  static void process_queued_ios(struct wo
 {
 	struct multipath *m =
 		container_of(work, struct multipath, process_queued_ios);
-	struct pgpath *pgpath = NULL;
-	unsigned init_required = 0, must_queue = 1;
+	struct pgpath *pgpath = NULL, *tmp;
+	unsigned must_queue = 1;
 	unsigned long flags;
 
 	spin_lock_irqsave(&m->lock, flags);
@@ -446,19 +440,15 @@  static void process_queued_ios(struct wo
 		must_queue = 0;
 
 	if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
-		m->pgpath_to_activate = pgpath;
 		m->pg_init_count++;
 		m->pg_init_required = 0;
-		m->pg_init_in_progress = 1;
-		init_required = 1;
+		list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
+			if (queue_work(kmpath_handlerd, &tmp->activate_path))
+				m->pg_init_in_progress++;
+		}
 	}
-
 out:
 	spin_unlock_irqrestore(&m->lock, flags);
-
-	if (init_required)
-		queue_work(kmpath_handlerd, &m->activate_path);
-
 	if (!must_queue)
 		dispatch_queued_ios(m);
 }
@@ -924,9 +914,13 @@  static int reinstate_path(struct pgpath 
 
 	pgpath->is_active = 1;
 
-	m->current_pgpath = NULL;
-	if (!m->nr_valid_paths++ && m->queue_size)
+	if (!m->nr_valid_paths++ && m->queue_size) {
+		m->current_pgpath = NULL;
 		queue_work(kmultipathd, &m->process_queued_ios);
+	} else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
+		if (queue_work(kmpath_handlerd, &pgpath->activate_path))
+			m->pg_init_in_progress++;
+	}
 
 	dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
 		      pgpath->path.dev->name, m->nr_valid_paths);
@@ -1102,35 +1096,30 @@  static void pg_init_done(struct dm_path 
 
 	spin_lock_irqsave(&m->lock, flags);
 	if (errors) {
-		DMERR("Could not failover device. Error %d.", errors);
-		m->current_pgpath = NULL;
-		m->current_pg = NULL;
+		if (pgpath == m->current_pgpath) {
+			DMERR("Could not failover device. Error %d.", errors);
+			m->current_pgpath = NULL;
+			m->current_pg = NULL;
+		}
 	} else if (!m->pg_init_required) {
 		m->queue_io = 0;
 		pg->bypassed = 0;
 	}
 
-	m->pg_init_in_progress = 0;
-	queue_work(kmultipathd, &m->process_queued_ios);
+	m->pg_init_in_progress--;
+	if (!m->pg_init_in_progress)
+		queue_work(kmultipathd, &m->process_queued_ios);
 	spin_unlock_irqrestore(&m->lock, flags);
 }
 
 static void activate_path(struct work_struct *work)
 {
 	int ret;
-	struct multipath *m =
-		container_of(work, struct multipath, activate_path);
-	struct dm_path *path;
-	unsigned long flags;
+	struct pgpath *pgpath =
+		container_of(work, struct pgpath, activate_path);
 
-	spin_lock_irqsave(&m->lock, flags);
-	path = &m->pgpath_to_activate->path;
-	m->pgpath_to_activate = NULL;
-	spin_unlock_irqrestore(&m->lock, flags);
-	if (!path)
-		return;
-	ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
-	pg_init_done(path, ret);
+	ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev));
+	pg_init_done(&pgpath->path, ret);
 }
 
 /*