Message ID | 20200113224127.3367484-1-krisman@collabora.com (mailing list archive) |
---|---|
State | Accepted, archived |
Delegated to: | Mike Snitzer |
Headers | show |
Series | [v2] dm mpath: Add timeout mechanism for queue_if_no_path | expand |
On Mon, Jan 13 2020 at 5:41P -0500, Gabriel Krisman Bertazi <krisman@collabora.com> wrote: > From: Anatol Pomazau <anatol@google.com> > > Add a configurable timeout mechanism to disable queue_if_no_path without > assistance from multipathd. In reality, this reimplements the > no_path_retry mechanism from multipathd in kernel space, which is > interesting to prevent processes from hanging indefinitely in cases > where the daemon might be unable to respond, after a failure or for > whatever reason. > > Despite replicating the policy configuration on kernel space, it is > quite an important case to prevent IOs from hanging forever, waiting for > userspace to behave correctly. > > v2: > - Use a module parameter instead of configuring per table > - Simplify code > > Co-developed-by: Frank Mayhar <fmayhar@google.com> > Signed-off-by: Frank Mayhar <fmayhar@google.com> > Co-developed-by: Bharath Ravi <rbharath@google.com> > Signed-off-by: Bharath Ravi <rbharath@google.com> > Co-developed-by: Khazhismel Kumykov <khazhy@google.com> > Signed-off-by: Khazhismel Kumykov <khazhy@google.com> > Signed-off-by: Anatol Pomazau <anatol@google.com> > Co-developed-by: Gabriel Krisman Bertazi <krisman@collabora.com> > Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com> All these tags seem rather excessive (especially in that you aren't cc most of them on the submission). Please review/scrub. Just seems odd that so many had a hand in this relatively small patch. The Signed-off-by for anatol@google.com seems wrong, or did Anatol shephard this patch at some point? Or did you mean Reviewed-by? Something else? Anyway, if in the end you hold these tags to reflect the development evolution of this patch then so be it ;) I've reviewed the changes and found various things I felt were worthwhile to modify. Summary: - included missing <linux/timer.h> - added MODULE_PARM_DESC - moved new functions to reduce needed forward declarations - tweaked queue_if_no_path_timeout_work's DMWARN message - added lockdep_assert_held to enable_nopath_timeout - renamed static queue_if_no_path_timeout to queue_if_no_path_timeout_secs - use READ_ONCE when accessing queue_if_no_path_timeout_secs Think that was it. Please review/apply//test and if you're OK with the end result I'll get it staged for 5.6 inclusion. Thanks, Mike diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index b09e3f925f47..2bc18c9c3abc 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -20,6 +20,7 @@ #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/time.h> +#include <linux/timer.h> #include <linux/workqueue.h> #include <linux/delay.h> #include <scsi/scsi_dh.h> @@ -31,6 +32,8 @@ #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) #define QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT 0 +static unsigned long queue_if_no_path_timeout_secs = QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT; + /* Path properties */ struct pgpath { struct list_head list; @@ -104,10 +107,6 @@ struct dm_mpath_io { size_t nr_bytes; }; -static unsigned long queue_if_no_path_timeout = QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT; -module_param_named(queue_if_no_path_timeout_secs, - queue_if_no_path_timeout, ulong, 0644); - typedef int (*action_fn) (struct pgpath *pgpath); static struct workqueue_struct *kmultipathd, *kmpath_handlerd; @@ -115,10 +114,7 @@ static void trigger_event(struct work_struct *work); static void activate_or_offline_path(struct pgpath *pgpath); static void activate_path_work(struct work_struct *work); static void process_queued_bios(struct work_struct *work); - static void queue_if_no_path_timeout_work(struct timer_list *t); -static void enable_nopath_timeout(struct multipath *m); -static void disable_nopath_timeout(struct multipath *m); /*----------------------------------------------- * Multipath state flags. @@ -730,6 +726,43 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, return 0; } +/* + * If the queue_if_no_path timeout fires, turn off queue_if_no_path and + * process any queued I/O. + */ +static void queue_if_no_path_timeout_work(struct timer_list *t) +{ + struct multipath *m = from_timer(m, t, nopath_timer); + struct mapped_device *md = dm_table_get_md(m->ti->table); + + DMWARN("queue_if_no_path timeout on %s, failing queued IO", dm_device_name(md)); + queue_if_no_path(m, false, false); +} + +/* + * Enable the queue_if_no_path timeout if necessary. + * Called with m->lock held. + */ +static void enable_nopath_timeout(struct multipath *m) +{ + unsigned long queue_if_no_path_timeout = + READ_ONCE(queue_if_no_path_timeout_secs) * HZ; + + lockdep_assert_held(&m->lock); + + if (queue_if_no_path_timeout > 0 && + atomic_read(&m->nr_valid_paths) == 0 && + test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + mod_timer(&m->nopath_timer, + jiffies + queue_if_no_path_timeout); + } +} + +static void disable_nopath_timeout(struct multipath *m) +{ + del_timer_sync(&m->nopath_timer); +} + /* * An event is triggered whenever a path is taken out of use. * Includes path failure and PG bypass. @@ -1231,20 +1264,6 @@ static void multipath_dtr(struct dm_target *ti) free_multipath(m); } -/* - * If the queue_if_no_path timeout fires, turn off queue_if_no_path and - * process any queued I/O. - */ -static void queue_if_no_path_timeout_work(struct timer_list *t) -{ - struct multipath *m = from_timer(m, t, nopath_timer); - struct mapped_device *md = dm_table_get_md((m)->ti->table); - - DMWARN("queue_if_no_path timeout on %s", dm_device_name(md)); - - queue_if_no_path(m, false, false); -} - /* * Take a path out of use. */ @@ -1352,25 +1371,6 @@ static int action_dev(struct multipath *m, struct dm_dev *dev, return r; } -/* - * Enable the queue_if_no_path timeout if necessary. Called with m->lock - * held. - */ -static void enable_nopath_timeout(struct multipath *m) -{ - if (queue_if_no_path_timeout > 0 && - atomic_read(&m->nr_valid_paths) == 0 && - test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { - mod_timer(&m->nopath_timer, - jiffies + queue_if_no_path_timeout * HZ); - } -} - -static void disable_nopath_timeout(struct multipath *m) -{ - del_timer_sync(&m->nopath_timer); -} - /* * Temporarily try to avoid having to use the specified PG */ @@ -2127,6 +2127,10 @@ static void __exit dm_multipath_exit(void) module_init(dm_multipath_init); module_exit(dm_multipath_exit); +module_param_named(queue_if_no_path_timeout_secs, + queue_if_no_path_timeout_secs, ulong, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(queue_if_no_path_timeout_secs, "No available paths queue IO timeout in seconds"); + MODULE_DESCRIPTION(DM_NAME " multipath target"); MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); MODULE_LICENSE("GPL"); -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
Mike Snitzer <snitzer@redhat.com> writes: > On Mon, Jan 13 2020 at 5:41P -0500, > Gabriel Krisman Bertazi <krisman@collabora.com> wrote: > >> From: Anatol Pomazau <anatol@google.com> >> >> Add a configurable timeout mechanism to disable queue_if_no_path without >> assistance from multipathd. In reality, this reimplements the >> no_path_retry mechanism from multipathd in kernel space, which is >> interesting to prevent processes from hanging indefinitely in cases >> where the daemon might be unable to respond, after a failure or for >> whatever reason. >> >> Despite replicating the policy configuration on kernel space, it is >> quite an important case to prevent IOs from hanging forever, waiting for >> userspace to behave correctly. >> >> v2: >> - Use a module parameter instead of configuring per table >> - Simplify code >> >> Co-developed-by: Frank Mayhar <fmayhar@google.com> >> Signed-off-by: Frank Mayhar <fmayhar@google.com> >> Co-developed-by: Bharath Ravi <rbharath@google.com> >> Signed-off-by: Bharath Ravi <rbharath@google.com> >> Co-developed-by: Khazhismel Kumykov <khazhy@google.com> >> Signed-off-by: Khazhismel Kumykov <khazhy@google.com> >> Signed-off-by: Anatol Pomazau <anatol@google.com> >> Co-developed-by: Gabriel Krisman Bertazi <krisman@collabora.com> >> Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com> > > All these tags seem rather excessive (especially in that you aren't cc > most of them on the submission). Please review/scrub. Just seems odd > that so many had a hand in this relatively small patch. Hey, I removed some of the Cc's because those emails addresses were bouncing. Still, I kept the lines for credits. I got the bounces when sending v1. > The Signed-off-by for anatol@google.com seems wrong, or did Anatol > shephard this patch at some point? Or did you mean Reviewed-by? > Something else? Anatol was the main author, as listed in the From: header. This seems correct with regard to the ordering rules of Documentation/process/submitting-patches.rst, in particular the second example, (Example of a patch submitted by a Co-developed-by: author::). Am I missing something? > > Anyway, if in the end you hold these tags to reflect the development > evolution of this patch then so be it ;) > > I've reviewed the changes and found various things I felt were > worthwhile to modify. Summary: > > - included missing <linux/timer.h> > - added MODULE_PARM_DESC > - moved new functions to reduce needed forward declarations > - tweaked queue_if_no_path_timeout_work's DMWARN message > - added lockdep_assert_held to enable_nopath_timeout > - renamed static queue_if_no_path_timeout to queue_if_no_path_timeout_secs > - use READ_ONCE when accessing queue_if_no_path_timeout_secs > The changes you made look good to me and your version of the patch passes my testcase.
On Tue, Jan 14 2020 at 11:31am -0500, Gabriel Krisman Bertazi <krisman@collabora.com> wrote: > Mike Snitzer <snitzer@redhat.com> writes: > > > On Mon, Jan 13 2020 at 5:41P -0500, > > Gabriel Krisman Bertazi <krisman@collabora.com> wrote: > > > >> From: Anatol Pomazau <anatol@google.com> > >> > >> Add a configurable timeout mechanism to disable queue_if_no_path without > >> assistance from multipathd. In reality, this reimplements the > >> no_path_retry mechanism from multipathd in kernel space, which is > >> interesting to prevent processes from hanging indefinitely in cases > >> where the daemon might be unable to respond, after a failure or for > >> whatever reason. > >> > >> Despite replicating the policy configuration on kernel space, it is > >> quite an important case to prevent IOs from hanging forever, waiting for > >> userspace to behave correctly. > >> > >> v2: > >> - Use a module parameter instead of configuring per table > >> - Simplify code > >> > >> Co-developed-by: Frank Mayhar <fmayhar@google.com> > >> Signed-off-by: Frank Mayhar <fmayhar@google.com> > >> Co-developed-by: Bharath Ravi <rbharath@google.com> > >> Signed-off-by: Bharath Ravi <rbharath@google.com> > >> Co-developed-by: Khazhismel Kumykov <khazhy@google.com> > >> Signed-off-by: Khazhismel Kumykov <khazhy@google.com> > >> Signed-off-by: Anatol Pomazau <anatol@google.com> > >> Co-developed-by: Gabriel Krisman Bertazi <krisman@collabora.com> > >> Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com> > > > > All these tags seem rather excessive (especially in that you aren't cc > > most of them on the submission). Please review/scrub. Just seems odd > > that so many had a hand in this relatively small patch. > > Hey, > > I removed some of the Cc's because those emails addresses were bouncing. > Still, I kept the lines for credits. I got the bounces when sending v1. Hmm, if their emails bounce, not a lot of point detailing them. > > The Signed-off-by for anatol@google.com seems wrong, or did Anatol > > shephard this patch at some point? Or did you mean Reviewed-by? > > Something else? > > Anatol was the main author, as listed in the From: header. This > seems correct with regard to the ordering rules of > Documentation/process/submitting-patches.rst, in particular the second > example, (Example of a patch submitted by a Co-developed-by: author::). > > Am I missing something? No, I missed that Anatol is main author. I'd have noticed once I applied the patch via 'git am' but... > > > > > Anyway, if in the end you hold these tags to reflect the development > > evolution of this patch then so be it ;) > > > > I've reviewed the changes and found various things I felt were > > worthwhile to modify. Summary: > > > > - included missing <linux/timer.h> > > - added MODULE_PARM_DESC > > - moved new functions to reduce needed forward declarations > > - tweaked queue_if_no_path_timeout_work's DMWARN message > > - added lockdep_assert_held to enable_nopath_timeout > > - renamed static queue_if_no_path_timeout to queue_if_no_path_timeout_secs > > - use READ_ONCE when accessing queue_if_no_path_timeout_secs > > > > The changes you made look good to me and your version of the patch > passes my testcase. OK, thanks. Mike -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e0c32793c248..52d90900e85b 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -29,6 +29,7 @@ #define DM_MSG_PREFIX "multipath" #define DM_PG_INIT_DELAY_MSECS 2000 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) +#define QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT 0 /* Path properties */ struct pgpath { @@ -91,6 +92,8 @@ struct multipath { struct work_struct process_queued_bios; struct bio_list queued_bios; + + struct timer_list nopath_timer; /* Timeout for queue_if_no_path */ }; /* @@ -101,6 +104,10 @@ struct dm_mpath_io { size_t nr_bytes; }; +static unsigned long queue_if_no_path_timeout = QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT; +module_param_named(queue_if_no_path_timeout_secs, + queue_if_no_path_timeout, ulong, 0644); + typedef int (*action_fn) (struct pgpath *pgpath); static struct workqueue_struct *kmultipathd, *kmpath_handlerd; @@ -109,6 +116,10 @@ static void activate_or_offline_path(struct pgpath *pgpath); static void activate_path_work(struct work_struct *work); static void process_queued_bios(struct work_struct *work); +static void queue_if_no_path_timeout_work(struct timer_list *t); +static void enable_nopath_timeout(struct multipath *m); +static void disable_nopath_timeout(struct multipath *m); + /*----------------------------------------------- * Multipath state flags. *-----------------------------------------------*/ @@ -195,6 +206,8 @@ static struct multipath *alloc_multipath(struct dm_target *ti) m->ti = ti; ti->private = m; + + timer_setup(&m->nopath_timer, queue_if_no_path_timeout_work, 0); } return m; @@ -1090,6 +1103,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) struct dm_arg_set as; unsigned pg_count = 0; unsigned next_pg_num; + unsigned long flags; as.argc = argc; as.argv = argv; @@ -1154,6 +1168,10 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } + spin_lock_irqsave(&m->lock, flags); + enable_nopath_timeout(m); + spin_unlock_irqrestore(&m->lock, flags); + ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->num_write_same_bios = 1; @@ -1208,10 +1226,25 @@ static void multipath_dtr(struct dm_target *ti) { struct multipath *m = ti->private; + disable_nopath_timeout(m); flush_multipath_work(m); free_multipath(m); } +/* + * If the queue_if_no_path timeout fires, turn off queue_if_no_path and + * process any queued I/O. + */ +static void queue_if_no_path_timeout_work(struct timer_list *t) +{ + struct multipath *m = from_timer(m, t, nopath_timer); + struct mapped_device *md = dm_table_get_md((m)->ti->table); + + DMWARN("queue_if_no_path timeout on %s", dm_device_name(md)); + + queue_if_no_path(m, false, false); +} + /* * Take a path out of use. */ @@ -1241,6 +1274,8 @@ static int fail_path(struct pgpath *pgpath) schedule_work(&m->trigger_event); + enable_nopath_timeout(m); + out: spin_unlock_irqrestore(&m->lock, flags); @@ -1291,6 +1326,9 @@ static int reinstate_path(struct pgpath *pgpath) process_queued_io_list(m); } + if (pgpath->is_active) + disable_nopath_timeout(m); + return r; } @@ -1314,6 +1352,25 @@ static int action_dev(struct multipath *m, struct dm_dev *dev, return r; } +/* + * Enable the queue_if_no_path timeout if necessary. Called with m->lock + * held. + */ +static void enable_nopath_timeout(struct multipath *m) +{ + if (queue_if_no_path_timeout > 0 && + atomic_read(&m->nr_valid_paths) == 0 && + test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + mod_timer(&m->nopath_timer, + jiffies + queue_if_no_path_timeout * HZ); + } +} + +static void disable_nopath_timeout(struct multipath *m) +{ + del_timer_sync(&m->nopath_timer); +} + /* * Temporarily try to avoid having to use the specified PG */ @@ -1789,6 +1846,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv, struct dm_dev *dev; struct multipath *m = ti->private; action_fn action; + unsigned long flags; mutex_lock(&m->work_mutex); @@ -1800,9 +1858,13 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv, if (argc == 1) { if (!strcasecmp(argv[0], "queue_if_no_path")) { r = queue_if_no_path(m, true, false); + spin_lock_irqsave(&m->lock, flags); + enable_nopath_timeout(m); + spin_unlock_irqrestore(&m->lock, flags); goto out; } else if (!strcasecmp(argv[0], "fail_if_no_path")) { r = queue_if_no_path(m, false, false); + disable_nopath_timeout(m); goto out; } }