Message ID | 20230808104239.146085-15-ming.lei@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | blk-mq: fix wrong queue mapping for kdump kernel | expand |
Hello, kernel test robot noticed "WARNING:at_drivers/block/null_blk/main.c:#null_map_queues" on: commit: 8ec7debf0b62ddf5f62e18b886925462215ab98b ("[PATCH V3 14/14] blk-mq: add helpers for treating kdump kernel") url: https://github.com/intel-lab-lkp/linux/commits/Ming-Lei/blk-mq-add-blk_mq_max_nr_hw_queues/20230809-003555 base: https://git.kernel.org/cgit/linux/kernel/git/mkp/scsi.git for-next patch link: https://lore.kernel.org/all/20230808104239.146085-15-ming.lei@redhat.com/ patch subject: [PATCH V3 14/14] blk-mq: add helpers for treating kdump kernel in testcase: boot compiler: gcc-12 test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 4G (please refer to attached dmesg/kmsg for entire log/backtrace) +-----------------------------------------------------------+------------+------------+ | | 27da637b41 | 8ec7debf0b | +-----------------------------------------------------------+------------+------------+ | boot_successes | 12 | 0 | | boot_failures | 0 | 12 | | WARNING:at_drivers/block/null_blk/main.c:#null_map_queues | 0 | 12 | | EIP:null_map_queues | 0 | 12 | | BUG:kernel_NULL_pointer_dereference,address | 0 | 12 | | Oops:#[##] | 0 | 12 | | EIP:group_cpus_evenly | 0 | 12 | | Kernel_panic-not_syncing:Fatal_exception | 0 | 12 | +-----------------------------------------------------------+------------+------------+ If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202308101503.67a2d533-oliver.sang@intel.com [ 7.742766][ T1] null_blk: tag set has unexpected nr_hw_queues: 1 [ 7.744704][ T1] ------------[ cut here ]------------ [ 7.745825][ T1] WARNING: CPU: 0 PID: 1 at drivers/block/null_blk/main.c:1615 null_map_queues+0x56/0xdb [ 7.748029][ T1] Modules linked in: [ 7.748923][ T1] CPU: 0 PID: 1 Comm: swapper Tainted: G T 6.5.0-rc1-00100-g8ec7debf0b62 #10 05d847e43b9b6f584ad59352c89de6920a7d94da [ 7.751662][ T1] EIP: null_map_queues+0x56/0xdb [ 7.753366][ T1] Code: b0 b4 01 00 00 8d 0c 37 39 ca 74 29 8b b8 b0 01 00 00 8b b0 b8 01 00 00 8d 04 37 39 c2 74 16 52 68 6d 34 9b 42 e8 f8 89 84 ff <0f> 0b 31 f6 5a bf 01 00 00 00 59 8d 43 04 31 c9 31 d2 89 45 f0 3b [ 7.757272][ T1] EAX: 00000030 EBX: 46131e1c ECX: 00000000 EDX: 00000000 [ 7.758794][ T1] ESI: 00000001 EDI: 00000001 EBP: 40343e88 ESP: 40343e68 [ 7.760257][ T1] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00010246 [ 7.761810][ T1] CR0: 80050033 CR2: ffd99000 CR3: 0343e000 CR4: 000406d0 [ 7.763254][ T1] Call Trace: [ 7.763953][ T1] ? show_regs+0x60/0x70 [ 7.764872][ T1] ? null_map_queues+0x56/0xdb [ 7.765908][ T1] ? __warn+0x8c/0x10a [ 7.766754][ T1] ? report_bug+0xdd/0x13e [ 7.767719][ T1] ? null_map_queues+0x56/0xdb [ 7.768721][ T1] ? exc_overflow+0x41/0x41 [ 7.769668][ T1] ? handle_bug+0x2b/0x53 [ 7.770532][ T1] ? exc_invalid_op+0x24/0x6a [ 7.771501][ T1] ? handle_exception+0x11d/0x11d [ 7.772572][ T1] ? lockdep_next_lockchain+0x18/0x2b [ 7.773697][ T1] ? exc_overflow+0x41/0x41 [ 7.774604][ T1] ? null_map_queues+0x56/0xdb [ 7.775593][ T1] ? exc_overflow+0x41/0x41 [ 7.776561][ T1] ? null_map_queues+0x56/0xdb [ 7.777534][ T1] ? kmalloc_array_node+0x19/0x28 [ 7.778828][ T1] ? blk_mq_update_queue_map+0x57/0x7e [ 7.779975][ T1] ? blk_mq_alloc_tag_set+0x1eb/0x353 [ 7.781115][ T1] ? null_init_tag_set+0xd7/0xe6 [ 7.782155][ T1] ? null_add_dev+0x1d9/0x5ef [ 7.783128][ T1] ? null_alloc_dev+0x7a/0x1c2 [ 7.784144][ T1] ? null_init+0x26d/0x36b [ 7.785126][ T1] ? do_one_initcall+0x77/0x1b9 [ 7.786097][ T1] ? virtio_blk_init+0xbf/0xbf [ 7.787076][ T1] ? do_initcalls+0x176/0x1ba [ 7.788080][ T1] ? kernel_init_freeable+0xe9/0x13c [ 7.792635][ T1] ? rest_init+0x11d/0x11d [ 7.793557][ T1] ? kernel_init+0x12/0xf7 [ 7.794469][ T1] ? ret_from_fork+0x1c/0x30 [ 7.795465][ T1] irq event stamp: 321017 [ 7.796352][ T1] hardirqs last enabled at (321027): [<41095389>] __up_console_sem+0x59/0x71 [ 7.798190][ T1] hardirqs last disabled at (321036): [<41095370>] __up_console_sem+0x40/0x71 [ 7.800028][ T1] softirqs last enabled at (320984): [<41f25ca9>] __do_softirq+0x279/0x2b7 [ 7.801854][ T1] softirqs last disabled at (320975): [<4101cf25>] call_on_stack+0x40/0x50 [ 7.803562][ T1] ---[ end trace 0000000000000000 ]--- [ 7.804950][ T1] BUG: kernel NULL pointer dereference, address: 00000010 [ 7.805688][ T1] #PF: supervisor write access in kernel mode [ 7.805688][ T1] #PF: error_code(0x0002) - not-present page [ 7.805688][ T1] *pde = 00000000 [ 7.805688][ T1] Oops: 0002 [#1] PREEMPT [ 7.805688][ T1] CPU: 0 PID: 1 Comm: swapper Tainted: G W T 6.5.0-rc1-00100-g8ec7debf0b62 #10 05d847e43b9b6f584ad59352c89de6920a7d94da [ 7.805688][ T1] EIP: group_cpus_evenly+0x24/0x2e [ 7.805688][ T1] Code: c9 e9 a1 91 8d 00 55 89 e5 ba 04 00 00 00 f7 e2 70 0e ba c0 0d 00 00 e8 2e 87 b4 ff 85 c0 75 04 31 c0 eb 08 8b 15 a8 1b 34 43 <89> 10 5d 31 d2 e9 73 91 8d 00 55 89 e5 56 53 8b 1d 84 5f 32 43 85 [ 7.805688][ T1] EAX: 00000010 EBX: 46131e38 ECX: 00000000 EDX: 00000001 [ 7.805688][ T1] ESI: 00000000 EDI: 00000001 EBP: 40343e58 ESP: 40343e58 [ 7.805688][ T1] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00010202 [ 7.805688][ T1] CR0: 80050033 CR2: 00000010 CR3: 0343e000 CR4: 000406d0 [ 7.805688][ T1] Call Trace: [ 7.805688][ T1] ? show_regs+0x60/0x70 [ 7.805688][ T1] ? __die_body+0x13/0x52 [ 7.805688][ T1] ? __die+0x22/0x2c [ 7.805688][ T1] ? page_fault_oops+0x4c/0x7f [ 7.805688][ T1] ? kernelmode_fixup_or_oops+0x8b/0x9d [ 7.805688][ T1] ? __bad_area_nosemaphore+0x40/0x16c [ 7.805688][ T1] ? bad_area_nosemaphore+0xa/0x17 [ 7.805688][ T1] ? do_user_addr_fault+0xdd/0x396 [ 7.805688][ T1] ? trace_irq_disable+0x3b/0x4e [ 7.805688][ T1] ? exc_page_fault+0xf6/0x120 [ 7.805688][ T1] ? pvclock_clocksource_read_nowd+0x167/0x167 [ 7.805688][ T1] ? handle_exception+0x11d/0x11d [ 7.805688][ T1] ? pvclock_clocksource_read_nowd+0x167/0x167 [ 7.805688][ T1] ? group_cpus_evenly+0x24/0x2e [ 7.805688][ T1] ? pvclock_clocksource_read_nowd+0x167/0x167 [ 7.805688][ T1] ? group_cpus_evenly+0x24/0x2e [ 7.805688][ T1] ? blk_mq_map_queues+0xf/0x46 [ 7.805688][ T1] ? null_map_queues+0xbc/0xdb [ 7.805688][ T1] ? blk_mq_update_queue_map+0x57/0x7e [ 7.805688][ T1] ? blk_mq_alloc_tag_set+0x1eb/0x353 [ 7.805688][ T1] ? null_init_tag_set+0xd7/0xe6 [ 7.805688][ T1] ? null_add_dev+0x1d9/0x5ef [ 7.805688][ T1] ? null_alloc_dev+0x7a/0x1c2 [ 7.805688][ T1] ? null_init+0x26d/0x36b [ 7.805688][ T1] ? do_one_initcall+0x77/0x1b9 [ 7.805688][ T1] ? virtio_blk_init+0xbf/0xbf [ 7.805688][ T1] ? do_initcalls+0x176/0x1ba [ 7.805688][ T1] ? kernel_init_freeable+0xe9/0x13c [ 7.805688][ T1] ? rest_init+0x11d/0x11d [ 7.805688][ T1] ? kernel_init+0x12/0xf7 [ 7.805688][ T1] ? ret_from_fork+0x1c/0x30 [ 7.805688][ T1] Modules linked in: [ 7.805688][ T1] CR2: 0000000000000010 [ 7.805688][ T1] ---[ end trace 0000000000000000 ]--- [ 7.805688][ T1] EIP: group_cpus_evenly+0x24/0x2e [ 7.805688][ T1] Code: c9 e9 a1 91 8d 00 55 89 e5 ba 04 00 00 00 f7 e2 70 0e ba c0 0d 00 00 e8 2e 87 b4 ff 85 c0 75 04 31 c0 eb 08 8b 15 a8 1b 34 43 <89> 10 5d 31 d2 e9 73 91 8d 00 55 89 e5 56 53 8b 1d 84 5f 32 43 85 [ 7.805688][ T1] EAX: 00000010 EBX: 46131e38 ECX: 00000000 EDX: 00000001 [ 7.805688][ T1] ESI: 00000000 EDI: 00000001 EBP: 40343e58 ESP: 40343e58 [ 7.805688][ T1] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00010202 [ 7.805688][ T1] CR0: 80050033 CR2: 00000010 CR3: 0343e000 CR4: 000406d0 [ 7.805688][ T1] Kernel panic - not syncing: Fatal exception [ 7.805688][ T1] Kernel Offset: disabled The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20230810/202308101503.67a2d533-oliver.sang@intel.com
diff --git a/block/blk-mq.c b/block/blk-mq.c index 617d6f849a7b..afa51df2f0d3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -147,6 +147,8 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait); * driver has to take blk-mq max supported nr_hw_queues into account * when figuring out nr_hw_queues from hardware info, for avoiding * inconsistency between driver and blk-mq. + * + * Limit to single queue in case of kdump kernel */ unsigned int blk_mq_max_nr_hw_queues(void) { @@ -4370,7 +4372,7 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set) if (set->nr_maps == 1) set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues; - if (set->ops->map_queues && !is_kdump_kernel()) { + if (set->ops->map_queues) { int i; /* @@ -4420,6 +4422,22 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, return 0; } +/* Limit to single map in case of kdump kernel */ +static unsigned int blk_mq_max_nr_maps(void) +{ + if (is_kdump_kernel()) + return 1; + return HCTX_MAX_TYPES; +} + +/* Limit to 64 in case of kdump kernel */ +static unsigned int blk_mq_max_depth(void) +{ + if (is_kdump_kernel()) + return 64; + return BLK_MQ_MAX_DEPTH; +} + /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -4456,16 +4474,13 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) else if (set->nr_maps > HCTX_MAX_TYPES) return -EINVAL; - /* - * If a crashdump is active, then we are potentially in a very - * memory constrained environment. Limit us to 1 queue and - * 64 tags to prevent using too much memory. - */ - if (is_kdump_kernel()) { - set->nr_hw_queues = 1; - set->nr_maps = 1; - set->queue_depth = min(64U, set->queue_depth); - } + if (set->nr_hw_queues > blk_mq_max_nr_hw_queues()) + set->nr_hw_queues = blk_mq_max_nr_hw_queues(); + if (set->nr_maps > blk_mq_max_nr_maps()) + set->nr_maps = blk_mq_max_nr_maps(); + if (set->queue_depth > blk_mq_max_depth()) + set->queue_depth = blk_mq_max_depth(); + /* * There is no use for more h/w queues than cpus if we just have * a single map @@ -4495,7 +4510,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) GFP_KERNEL, set->numa_node); if (!set->map[i].mq_map) goto out_free_mq_map; - set->map[i].nr_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues; + set->map[i].nr_queues = set->nr_hw_queues; } blk_mq_update_queue_map(set);
Clean up code a bit by adding helpers for treating kdump kernel specially. Suggested-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Ming Lei <ming.lei@redhat.com> --- block/blk-mq.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-)