diff mbox

dm-mq and end_clone_request()

Message ID 20160801175948.GA6685@redhat.com (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Mike Snitzer Aug. 1, 2016, 5:59 p.m. UTC
With this debug patch ontop of v4.7:


I got:

Aug  1 13:41:48 client kernel: blk_update_request: I/O error, dev sdbf, sector 34376
Aug  1 13:41:48 client kernel: blk_update_request: I/O error, dev sdbf, sector 183608
Aug  1 13:41:48 client kernel: device-mapper: multipath: Failing path 67:144.
Aug  1 13:41:48 client kernel: blk_update_request: I/O error, dev sdbf, sector 237352
Aug  1 13:41:48 client kernel: device-mapper: multipath: must_push_back: queue_if_no_path=0 suspend_active=1 suspending=1
Aug  1 13:41:48 client kernel: device-mapper: multipath: must_push_back: queue_if_no_path=0 suspend_active=1 suspending=1
Aug  1 13:41:48 client kernel: device-mapper: multipath: must_push_back: queue_if_no_path=0 suspend_active=1 suspending=1
Aug  1 13:41:48 client kernel: device-mapper: multipath: must_push_back: queue_if_no_path=0 suspend_active=1 suspending=1
Aug  1 13:41:48 client kernel: device-mapper: multipath: must_push_back: queue_if_no_path=0 suspend_active=1 suspending=0
Aug  1 13:41:48 client kernel: ------------[ cut here ]------------
Aug  1 13:41:48 client kernel: WARNING: CPU: 10 PID: 6445 at drivers/md/dm-mpath.c:474 __multipath_map.isra.11+0xb7/0x240 [dm_multipath]
Aug  1 13:41:48 client kernel: Modules linked in: dm_round_robin(O) xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter rpcrdma ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi ib_srp scsi_transport_srp ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel glue_helper
Aug  1 13:41:48 client kernel: lrw gf128mul ablk_helper cryptd ipmi_ssif iTCO_wdt iTCO_vendor_support sg ipmi_si hpilo pcspkr ipmi_msghandler acpi_power_meter hpwdt lpc_ich shpchp i7core_edac mfd_core edac_core pcc_cpufreq acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc dm_multipath(O) ip_tables xfs libcrc32c mlx5_ib ib_core sd_mod radeon i2c_algo_bit drm_kms_helper syscopyarea sysfillrect mlx5_core sysimgblt fb_sys_fops ttm drm crc32c_intel serio_raw bnx2 hpsa i2c_core ptp pps_core scsi_transport_sas dm_mod(O) [last unloaded: dm_log]
Aug  1 13:41:48 client kernel: CPU: 10 PID: 6445 Comm: kworker/10:1H Tainted: G          IO    4.7.0.snitm #1
Aug  1 13:41:48 client kernel: Hardware name: HP ProLiant DL380 G7, BIOS P67 08/16/2015
Aug  1 13:41:48 client kernel: Workqueue: kblockd blk_mq_requeue_work
Aug  1 13:41:48 client kernel: 0000000000000286 000000008737cd27 ffff880607ecfbb8 ffffffff8134259f
Aug  1 13:41:48 client kernel: 0000000000000000 0000000000000000 ffff880607ecfbf8 ffffffff81088fb1
Aug  1 13:41:48 client kernel: 000001da00000000 ffff880bd0393100 0000000000000000 0000000000000000
Aug  1 13:41:48 client kernel: Call Trace:
Aug  1 13:41:48 client kernel: [<ffffffff8134259f>] dump_stack+0x63/0x84
Aug  1 13:41:48 client kernel: [<ffffffff81088fb1>] __warn+0xd1/0xf0
Aug  1 13:41:48 client kernel: [<ffffffff810890ed>] warn_slowpath_null+0x1d/0x20
Aug  1 13:41:48 client kernel: [<ffffffffa0104e27>] __multipath_map.isra.11+0xb7/0x240 [dm_multipath]
Aug  1 13:41:48 client kernel: [<ffffffffa0104fca>] multipath_clone_and_map+0x1a/0x20 [dm_multipath]
Aug  1 13:41:48 client kernel: [<ffffffffa0002242>] map_request+0xd2/0x240 [dm_mod]
Aug  1 13:41:48 client kernel: [<ffffffffa000407e>] dm_mq_queue_rq+0x7e/0x110 [dm_mod]
Aug  1 13:41:48 client kernel: [<ffffffff81320d32>] __blk_mq_run_hw_queue+0x1f2/0x370
Aug  1 13:41:48 client kernel: [<ffffffff81320b25>] blk_mq_run_hw_queue+0x95/0xb0
Aug  1 13:41:48 client kernel: [<ffffffff81322438>] ? blk_mq_insert_request+0x88/0xc0
Aug  1 13:41:48 client kernel: [<ffffffff81321ba5>] blk_mq_start_hw_queue+0x15/0x20
Aug  1 13:41:48 client kernel: [<ffffffff81321be2>] blk_mq_start_hw_queues+0x32/0x50
Aug  1 13:41:48 client kernel: [<ffffffff813229e5>] blk_mq_requeue_work+0x115/0x140
Aug  1 13:41:48 client kernel: [<ffffffff810a1e72>] process_one_work+0x152/0x400
Aug  1 13:41:48 client kernel: [<ffffffff810a2765>] worker_thread+0x125/0x4b0
Aug  1 13:41:48 client kernel: [<ffffffff810a2640>] ? rescuer_thread+0x380/0x380
Aug  1 13:41:48 client kernel: [<ffffffff810a8298>] kthread+0xd8/0xf0
Aug  1 13:41:48 client kernel: [<ffffffff816ba17f>] ret_from_fork+0x1f/0x40
Aug  1 13:41:48 client kernel: [<ffffffff810a81c0>] ? kthread_park+0x60/0x60
Aug  1 13:41:48 client kernel: ---[ end trace 1be159facc3adabe ]---
Aug  1 13:41:48 client kernel: blk_update_request: I/O error, dev dm-28, sector 267144
Aug  1 13:41:48 client multipathd: 360001ff0b035d0000000001a8d8a001b: load table [0 62277025792 multipath 1 queue_if_no_path 0 0 0]
Aug  1 13:41:48 client multipathd: 360001ff0b035d0000000001a8d8a001b: Entering recovery mode: max_retries=12

This says to me that must_push_back is returning false because
dm_noflush_suspending() is false.  When this happens -EIO will escape up
the IO stack.

And this confirms that must_push_back() calling dm_noflush_suspending()
is quite suspect given queue_if_no_path was configured: we should
_always_ pushback if no paths are available.

I'll dig deeper on really understanding _why_ must_push_back() is coded
like it is.  There is a deep historic reason but hell if I can recall
what it is...
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Bart Van Assche Aug. 1, 2016, 6:55 p.m. UTC | #1
On 08/01/2016 10:59 AM, Mike Snitzer wrote:
> This says to me that must_push_back is returning false because
> dm_noflush_suspending() is false.  When this happens -EIO will escape up
> the IO stack.
>
> And this confirms that must_push_back() calling dm_noflush_suspending()
> is quite suspect given queue_if_no_path was configured: we should
> _always_ pushback if no paths are available.
>
> I'll dig deeper on really understanding _why_ must_push_back() is coded
> like it is.

Hello Mike,

Earlier I had reported that I observe this behavior with 
CONFIG_DM_MQ_DEFAULT=y after the first simulated cable pull. I have been 
able to reproduce this behavior with CONFIG_DM_MQ_DEFAULT=n but it takes 
a large number of iterations to trigger this behavior. The output that 
appears on my setup in the kernel log with a bunch of printk()'s added 
in the dm-mpath driver for CONFIG_DM_MQ_DEFAULT=n is as follows (mpath 
254:0 and /dev/mapper/mpathbe refer to the same multipath device):

[  314.755582] mpath 254:0: queue_if_no_path 0 -> 1
[  314.770571] executing DM ioctl DEV_SUSPEND on mpathbe
[  314.770622] mpath 254:0: queue_if_no_path 1 -> 0
[  314.770657] __multipath_map(): (a) returning -5
[  314.770657] map_request(): clone_and_map_rq() returned -5
[  314.770658] dm_complete_request: error = -5

Bart.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mike Snitzer Aug. 1, 2016, 7:15 p.m. UTC | #2
On Mon, Aug 01 2016 at  2:55pm -0400,
Bart Van Assche <bart.vanassche@sandisk.com> wrote:

> On 08/01/2016 10:59 AM, Mike Snitzer wrote:
> >This says to me that must_push_back is returning false because
> >dm_noflush_suspending() is false.  When this happens -EIO will escape up
> >the IO stack.
> >
> >And this confirms that must_push_back() calling dm_noflush_suspending()
> >is quite suspect given queue_if_no_path was configured: we should
> >_always_ pushback if no paths are available.
> >
> >I'll dig deeper on really understanding _why_ must_push_back() is coded
> >like it is.
> 
> Hello Mike,
> 
> Earlier I had reported that I observe this behavior with
> CONFIG_DM_MQ_DEFAULT=y after the first simulated cable pull. I have
> been able to reproduce this behavior with CONFIG_DM_MQ_DEFAULT=n but
> it takes a large number of iterations to trigger this behavior. The
> output that appears on my setup in the kernel log with a bunch of
> printk()'s added in the dm-mpath driver for CONFIG_DM_MQ_DEFAULT=n
> is as follows (mpath 254:0 and /dev/mapper/mpathbe refer to the same
> multipath device):
> 
> [  314.755582] mpath 254:0: queue_if_no_path 0 -> 1
> [  314.770571] executing DM ioctl DEV_SUSPEND on mpathbe
> [  314.770622] mpath 254:0: queue_if_no_path 1 -> 0
> [  314.770657] __multipath_map(): (a) returning -5
> [  314.770657] map_request(): clone_and_map_rq() returned -5
> [  314.770658] dm_complete_request: error = -5

OK, that makes no sense at all (nevermind that your trace really isn't
useful, my debug patch is more so).

The old .request_fn code checks !blk_queue_stopped() in dm_request_fn().
When a dm-mpath device is suspended the queue gets stopped (same goes
for dm-mq).  So once a request-based DM device is suspended the .map_rq
and .clone_and_map_rq hooks shouldn't get called.

BUT the difference between .request_fn and dm-mq is that dm-mq doesn't
ever check if the queue if stopped.  We rely on blk-core to ensure that
IO is not submitted (via dm_mq_queue_rq) while a device is suspended
(and because the queue is stopped it shouldn't).

Now you're saying both cases aren't working.. which is really confusing.
Basically the request-based DM core should protect against requests
being mapped while a device is suspended... seems to be mounting
evidence that isn't the case.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 52baf8a..22baf29 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -433,10 +433,22 @@  failed:
  */
 static int must_push_back(struct multipath *m)
 {
+	bool queue_if_no_path = test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
+	bool suspend_active = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
+			       test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags));
+	bool suspending = (suspend_active && dm_noflush_suspending(m->ti));
+
+#if 0
 	return (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) ||
 		((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
 		  test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
 		 dm_noflush_suspending(m->ti)));
+#else
+	if (!queue_if_no_path || !suspending)
+		DMERR_LIMIT("%s: queue_if_no_path=%d suspend_active=%d suspending=%d",
+			    __func__, queue_if_no_path, suspend_active, suspending);
+	return (queue_if_no_path || suspending);
+#endif
 }
 
 /*
@@ -459,7 +471,7 @@  static int __multipath_map(struct dm_target *ti, struct request *clone,
 		pgpath = choose_pgpath(m, nr_bytes);
 
 	if (!pgpath) {
-		if (!must_push_back(m))
+		if (WARN_ON_ONCE(!must_push_back(m)))
 			r = -EIO;	/* Failed */
 		return r;
 	} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
@@ -1347,7 +1359,7 @@  static int do_end_io(struct multipath *m, struct request *clone,
 
 	if (!atomic_read(&m->nr_valid_paths)) {
 		if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
-			if (!must_push_back(m))
+			if (WARN_ON_ONCE(!must_push_back(m)))
 				r = -EIO;
 		} else {
 			if (error == -EBADE)