diff mbox series

[V3,14/14] blk-mq: add helpers for treating kdump kernel

Message ID 20230808104239.146085-15-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show
Series blk-mq: fix wrong queue mapping for kdump kernel | expand

Commit Message

Ming Lei Aug. 8, 2023, 10:42 a.m. UTC
Clean up code a bit by adding helpers for treating kdump kernel
specially.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq.c | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

Comments

kernel test robot Aug. 10, 2023, 8 a.m. UTC | #1
Hello,

kernel test robot noticed "WARNING:at_drivers/block/null_blk/main.c:#null_map_queues" on:

commit: 8ec7debf0b62ddf5f62e18b886925462215ab98b ("[PATCH V3 14/14] blk-mq: add helpers for treating kdump kernel")
url: https://github.com/intel-lab-lkp/linux/commits/Ming-Lei/blk-mq-add-blk_mq_max_nr_hw_queues/20230809-003555
base: https://git.kernel.org/cgit/linux/kernel/git/mkp/scsi.git for-next
patch link: https://lore.kernel.org/all/20230808104239.146085-15-ming.lei@redhat.com/
patch subject: [PATCH V3 14/14] blk-mq: add helpers for treating kdump kernel

in testcase: boot

compiler: gcc-12
test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 4G

(please refer to attached dmesg/kmsg for entire log/backtrace)


+-----------------------------------------------------------+------------+------------+
|                                                           | 27da637b41 | 8ec7debf0b |
+-----------------------------------------------------------+------------+------------+
| boot_successes                                            | 12         | 0          |
| boot_failures                                             | 0          | 12         |
| WARNING:at_drivers/block/null_blk/main.c:#null_map_queues | 0          | 12         |
| EIP:null_map_queues                                       | 0          | 12         |
| BUG:kernel_NULL_pointer_dereference,address               | 0          | 12         |
| Oops:#[##]                                                | 0          | 12         |
| EIP:group_cpus_evenly                                     | 0          | 12         |
| Kernel_panic-not_syncing:Fatal_exception                  | 0          | 12         |
+-----------------------------------------------------------+------------+------------+


If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202308101503.67a2d533-oliver.sang@intel.com



[    7.742766][    T1] null_blk: tag set has unexpected nr_hw_queues: 1
[    7.744704][    T1] ------------[ cut here ]------------
[    7.745825][    T1] WARNING: CPU: 0 PID: 1 at drivers/block/null_blk/main.c:1615 null_map_queues+0x56/0xdb
[    7.748029][    T1] Modules linked in:
[    7.748923][    T1] CPU: 0 PID: 1 Comm: swapper Tainted: G                T  6.5.0-rc1-00100-g8ec7debf0b62 #10 05d847e43b9b6f584ad59352c89de6920a7d94da
[    7.751662][    T1] EIP: null_map_queues+0x56/0xdb
[    7.753366][    T1] Code: b0 b4 01 00 00 8d 0c 37 39 ca 74 29 8b b8 b0 01 00 00 8b b0 b8 01 00 00 8d 04 37 39 c2 74 16 52 68 6d 34 9b 42 e8 f8 89 84 ff <0f> 0b 31
 f6 5a bf 01 00 00 00 59 8d 43 04 31 c9 31 d2 89 45 f0 3b
[    7.757272][    T1] EAX: 00000030 EBX: 46131e1c ECX: 00000000 EDX: 00000000
[    7.758794][    T1] ESI: 00000001 EDI: 00000001 EBP: 40343e88 ESP: 40343e68
[    7.760257][    T1] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00010246
[    7.761810][    T1] CR0: 80050033 CR2: ffd99000 CR3: 0343e000 CR4: 000406d0
[    7.763254][    T1] Call Trace:
[    7.763953][    T1]  ? show_regs+0x60/0x70
[    7.764872][    T1]  ? null_map_queues+0x56/0xdb
[    7.765908][    T1]  ? __warn+0x8c/0x10a
[    7.766754][    T1]  ? report_bug+0xdd/0x13e
[    7.767719][    T1]  ? null_map_queues+0x56/0xdb
[    7.768721][    T1]  ? exc_overflow+0x41/0x41
[    7.769668][    T1]  ? handle_bug+0x2b/0x53
[    7.770532][    T1]  ? exc_invalid_op+0x24/0x6a
[    7.771501][    T1]  ? handle_exception+0x11d/0x11d
[    7.772572][    T1]  ? lockdep_next_lockchain+0x18/0x2b
[    7.773697][    T1]  ? exc_overflow+0x41/0x41
[    7.774604][    T1]  ? null_map_queues+0x56/0xdb
[    7.775593][    T1]  ? exc_overflow+0x41/0x41
[    7.776561][    T1]  ? null_map_queues+0x56/0xdb
[    7.777534][    T1]  ? kmalloc_array_node+0x19/0x28
[    7.778828][    T1]  ? blk_mq_update_queue_map+0x57/0x7e
[    7.779975][    T1]  ? blk_mq_alloc_tag_set+0x1eb/0x353
[    7.781115][    T1]  ? null_init_tag_set+0xd7/0xe6
[    7.782155][    T1]  ? null_add_dev+0x1d9/0x5ef
[    7.783128][    T1]  ? null_alloc_dev+0x7a/0x1c2
[    7.784144][    T1]  ? null_init+0x26d/0x36b
[    7.785126][    T1]  ? do_one_initcall+0x77/0x1b9
[    7.786097][    T1]  ? virtio_blk_init+0xbf/0xbf
[    7.787076][    T1]  ? do_initcalls+0x176/0x1ba
[    7.788080][    T1]  ? kernel_init_freeable+0xe9/0x13c
[    7.792635][    T1]  ? rest_init+0x11d/0x11d
[    7.793557][    T1]  ? kernel_init+0x12/0xf7
[    7.794469][    T1]  ? ret_from_fork+0x1c/0x30
[    7.795465][    T1] irq event stamp: 321017
[    7.796352][    T1] hardirqs last  enabled at (321027): [<41095389>] __up_console_sem+0x59/0x71
[    7.798190][    T1] hardirqs last disabled at (321036): [<41095370>] __up_console_sem+0x40/0x71
[    7.800028][    T1] softirqs last  enabled at (320984): [<41f25ca9>] __do_softirq+0x279/0x2b7
[    7.801854][    T1] softirqs last disabled at (320975): [<4101cf25>] call_on_stack+0x40/0x50
[    7.803562][    T1] ---[ end trace 0000000000000000 ]---
[    7.804950][    T1] BUG: kernel NULL pointer dereference, address: 00000010
[    7.805688][    T1] #PF: supervisor write access in kernel mode
[    7.805688][    T1] #PF: error_code(0x0002) - not-present page
[    7.805688][    T1] *pde = 00000000
[    7.805688][    T1] Oops: 0002 [#1] PREEMPT
[    7.805688][    T1] CPU: 0 PID: 1 Comm: swapper Tainted: G        W       T  6.5.0-rc1-00100-g8ec7debf0b62 #10 05d847e43b9b6f584ad59352c89de6920a7d94da
[    7.805688][    T1] EIP: group_cpus_evenly+0x24/0x2e
[    7.805688][    T1] Code: c9 e9 a1 91 8d 00 55 89 e5 ba 04 00 00 00 f7 e2 70 0e ba c0 0d 00 00 e8 2e 87 b4 ff 85 c0 75 04 31 c0 eb 08 8b 15 a8 1b 34 43 <89> 10 5d
 31 d2 e9 73 91 8d 00 55 89 e5 56 53 8b 1d 84 5f 32 43 85
[    7.805688][    T1] EAX: 00000010 EBX: 46131e38 ECX: 00000000 EDX: 00000001
[    7.805688][    T1] ESI: 00000000 EDI: 00000001 EBP: 40343e58 ESP: 40343e58
[    7.805688][    T1] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00010202
[    7.805688][    T1] CR0: 80050033 CR2: 00000010 CR3: 0343e000 CR4: 000406d0
[    7.805688][    T1] Call Trace:
[    7.805688][    T1]  ? show_regs+0x60/0x70
[    7.805688][    T1]  ? __die_body+0x13/0x52
[    7.805688][    T1]  ? __die+0x22/0x2c
[    7.805688][    T1]  ? page_fault_oops+0x4c/0x7f
[    7.805688][    T1]  ? kernelmode_fixup_or_oops+0x8b/0x9d
[    7.805688][    T1]  ? __bad_area_nosemaphore+0x40/0x16c
[    7.805688][    T1]  ? bad_area_nosemaphore+0xa/0x17
[    7.805688][    T1]  ? do_user_addr_fault+0xdd/0x396
[    7.805688][    T1]  ? trace_irq_disable+0x3b/0x4e
[    7.805688][    T1]  ? exc_page_fault+0xf6/0x120
[    7.805688][    T1]  ? pvclock_clocksource_read_nowd+0x167/0x167
[    7.805688][    T1]  ? handle_exception+0x11d/0x11d
[    7.805688][    T1]  ? pvclock_clocksource_read_nowd+0x167/0x167
[    7.805688][    T1]  ? group_cpus_evenly+0x24/0x2e
[    7.805688][    T1]  ? pvclock_clocksource_read_nowd+0x167/0x167
[    7.805688][    T1]  ? group_cpus_evenly+0x24/0x2e
[    7.805688][    T1]  ? blk_mq_map_queues+0xf/0x46
[    7.805688][    T1]  ? null_map_queues+0xbc/0xdb
[    7.805688][    T1]  ? blk_mq_update_queue_map+0x57/0x7e
[    7.805688][    T1]  ? blk_mq_alloc_tag_set+0x1eb/0x353
[    7.805688][    T1]  ? null_init_tag_set+0xd7/0xe6
[    7.805688][    T1]  ? null_add_dev+0x1d9/0x5ef
[    7.805688][    T1]  ? null_alloc_dev+0x7a/0x1c2
[    7.805688][    T1]  ? null_init+0x26d/0x36b
[    7.805688][    T1]  ? do_one_initcall+0x77/0x1b9
[    7.805688][    T1]  ? virtio_blk_init+0xbf/0xbf
[    7.805688][    T1]  ? do_initcalls+0x176/0x1ba
[    7.805688][    T1]  ? kernel_init_freeable+0xe9/0x13c
[    7.805688][    T1]  ? rest_init+0x11d/0x11d
[    7.805688][    T1]  ? kernel_init+0x12/0xf7
[    7.805688][    T1]  ? ret_from_fork+0x1c/0x30
[    7.805688][    T1] Modules linked in:
[    7.805688][    T1] CR2: 0000000000000010
[    7.805688][    T1] ---[ end trace 0000000000000000 ]---
[    7.805688][    T1] EIP: group_cpus_evenly+0x24/0x2e
[    7.805688][    T1] Code: c9 e9 a1 91 8d 00 55 89 e5 ba 04 00 00 00 f7 e2 70 0e ba c0 0d 00 00 e8 2e 87 b4 ff 85 c0 75 04 31 c0 eb 08 8b 15 a8 1b 34 43 <89> 10 5d
 31 d2 e9 73 91 8d 00 55 89 e5 56 53 8b 1d 84 5f 32 43 85
[    7.805688][    T1] EAX: 00000010 EBX: 46131e38 ECX: 00000000 EDX: 00000001
[    7.805688][    T1] ESI: 00000000 EDI: 00000001 EBP: 40343e58 ESP: 40343e58
[    7.805688][    T1] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00010202
[    7.805688][    T1] CR0: 80050033 CR2: 00000010 CR3: 0343e000 CR4: 000406d0
[    7.805688][    T1] Kernel panic - not syncing: Fatal exception
[    7.805688][    T1] Kernel Offset: disabled



The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20230810/202308101503.67a2d533-oliver.sang@intel.com
diff mbox series

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 617d6f849a7b..afa51df2f0d3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -147,6 +147,8 @@  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
  * driver has to take blk-mq max supported nr_hw_queues into account
  * when figuring out nr_hw_queues from hardware info, for avoiding
  * inconsistency between driver and blk-mq.
+ *
+ * Limit to single queue in case of kdump kernel
  */
 unsigned int blk_mq_max_nr_hw_queues(void)
 {
@@ -4370,7 +4372,7 @@  static void blk_mq_update_queue_map(struct blk_mq_tag_set *set)
 	if (set->nr_maps == 1)
 		set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues;
 
-	if (set->ops->map_queues && !is_kdump_kernel()) {
+	if (set->ops->map_queues) {
 		int i;
 
 		/*
@@ -4420,6 +4422,22 @@  static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
 	return 0;
 }
 
+/* Limit to single map in case of kdump kernel */
+static unsigned int blk_mq_max_nr_maps(void)
+{
+	if (is_kdump_kernel())
+		return 1;
+	return HCTX_MAX_TYPES;
+}
+
+/* Limit to 64 in case of kdump kernel */
+static unsigned int blk_mq_max_depth(void)
+{
+	if (is_kdump_kernel())
+		return 64;
+	return BLK_MQ_MAX_DEPTH;
+}
+
 /*
  * Alloc a tag set to be associated with one or more request queues.
  * May fail with EINVAL for various error conditions. May adjust the
@@ -4456,16 +4474,13 @@  int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 	else if (set->nr_maps > HCTX_MAX_TYPES)
 		return -EINVAL;
 
-	/*
-	 * If a crashdump is active, then we are potentially in a very
-	 * memory constrained environment. Limit us to 1 queue and
-	 * 64 tags to prevent using too much memory.
-	 */
-	if (is_kdump_kernel()) {
-		set->nr_hw_queues = 1;
-		set->nr_maps = 1;
-		set->queue_depth = min(64U, set->queue_depth);
-	}
+	if (set->nr_hw_queues > blk_mq_max_nr_hw_queues())
+		set->nr_hw_queues = blk_mq_max_nr_hw_queues();
+	if (set->nr_maps > blk_mq_max_nr_maps())
+		set->nr_maps = blk_mq_max_nr_maps();
+	if (set->queue_depth > blk_mq_max_depth())
+		set->queue_depth = blk_mq_max_depth();
+
 	/*
 	 * There is no use for more h/w queues than cpus if we just have
 	 * a single map
@@ -4495,7 +4510,7 @@  int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 						  GFP_KERNEL, set->numa_node);
 		if (!set->map[i].mq_map)
 			goto out_free_mq_map;
-		set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
+		set->map[i].nr_queues = set->nr_hw_queues;
 	}
 
 	blk_mq_update_queue_map(set);