Message ID | 20200515014153.2403464-2-ming.lei@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | blk-mq: improvement CPU hotplug(simplified version) | expand |
On Fri, May 15, 2020 at 09:41:48AM +0800, Ming Lei wrote: > blk_mq_alloc_request_hctx() asks blk-mq to allocate request from > specified hctx, which is usually bound with fixed cpu mapping, and > request is supposed to be allocated on CPU in hctx->cpumask. > > So use smp_call_function_any() to allocate request on the cpu in > hctx->cpumask for blk_mq_alloc_request_hctx(). > > Dedclare blk_mq_get_request() beforehand because the following patches > reuses __blk_mq_alloc_request for blk_mq_get_request(). > > Prepare for improving cpu hotplug support. With your series applied the kernel instantly panics when creating a nvme-loop controller: [ 27.189993] nvmet: creating controller 1 for subsystem testnqn for NQN hostnqn. [ 27.199370] nvme nvme0: creating 4 I/O queues. [ 27.202650] BUG: kernel NULL pointer dereference, address: 0000000000000128 [ 27.205004] #PF: supervisor read access in kernel mode [ 27.206382] #PF: error_code(0x0000) - not-present page [ 27.207741] PGD 800000012dfc9067 P4D 800000012dfc9067 PUD 12dfae067 PMD 0 [ 27.209326] Oops: 0000 [#1] PREEMPT SMP PTI [ 27.210214] CPU: 2 PID: 3786 Comm: bash Not tainted 5.7.0-rc2+ #44 [ 27.211511] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 [ 27.213626] RIP: 0010:smp_call_function_any+0x34/0xf0 [ 27.214736] Code: 41 54 49 89 f4 55 48 89 fd bf 01 00 00 00 e8 33 23 f8 ff 48 c7 c7 3b 0b 12 d [ 27.218079] RSP: 0018:ffffc900003d3b90 EFLAGS: 00010202 [ 27.219340] RAX: 0000000000000002 RBX: 0000000000000000 RCX: 0000000000000001 [ 27.221199] RDX: 0000000000000000 RSI: ffffffff8198c920 RDI: ffffffff83120b3b [ 27.222517] RBP: 0000000000000128 R08: 0000000000000002 R09: 0000000000020022 [ 27.223780] R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff8198c920 [ 27.225002] R13: ffffc900003d3bb8 R14: 0000000000000001 R15: ffff88812b488008 [ 27.226195] FS: 00007fca23cae740(0000) GS:ffff88813bd00000(0000) knlGS:0000000000000000 [ 27.227520] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 27.228378] CR2: 0000000000000128 CR3: 000000012eba6000 CR4: 00000000000006e0 [ 27.229444] Call Trace: [ 27.229810] blk_mq_alloc_request_hctx+0xe7/0x140 [ 27.230487] nvme_alloc_request+0x2d/0x70 [ 27.231053] __nvme_submit_sync_cmd+0x4a/0x1f0 [ 27.231720] ? mark_held_locks+0x49/0x70 [ 27.232279] ? __slab_alloc.isra.0.constprop.0+0x63/0x80 [ 27.233038] ? nvmf_connect_io_queue+0x85/0x180 [ 27.233687] nvmf_connect_io_queue+0x12d/0x180 [ 27.234296] ? cpumask_next_and+0x19/0x20 [ 27.234848] ? nvme_loop_connect_io_queues+0x4c/0x60 [ 27.235718] ? blk_mq_init_queue_data+0x36/0x60 [ 27.236598] nvme_loop_connect_io_queues+0x4c/0x60 [ 27.237379] nvme_loop_create_ctrl+0x2f0/0x450 [ 27.238016] nvmf_dev_write+0x7e3/0xb2f [ 27.238541] ? find_held_lock+0x2b/0x80 [ 27.239080] ? do_user_addr_fault+0x205/0x480 [ 27.239763] vfs_write+0xb4/0x1a0 [ 27.240302] ksys_write+0x63/0xe0 [ 27.240845] do_syscall_64+0x4b/0x1e0 [ 27.241446] entry_SYSCALL_64_after_hwframe+0x49/0xb3 [ 27.242253] RIP: 0033:0x7fca233a2134 [ 27.242821] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 5 [ 27.245798] RSP: 002b:00007ffe430d43e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 27.247255] RAX: ffffffffffffffda RBX: 000000000000002b RCX: 00007fca233a2134 [ 27.248525] RDX: 000000000000002b RSI: 000000000119f408 RDI: 0000000000000001 [ 27.249806] RBP: 000000000119f408 R08: 000000000000000a R09: 00000000011d8988 [ 27.251238] R10: 000000000000000a R11: 0000000000000246 R12: 00007fca2366f760 [ 27.252195] R13: 000000000000002b R14: 00007fca2366a760 R15: 000000000000002b [ 27.253157] Modules linked in: [ 27.253603] CR2: 0000000000000128 [ 27.254056] ---[ end trace 75ba575e2625a1c6 ]--- [ 27.254684] RIP: 0010:smp_call_function_any+0x34/0xf0 [ 27.255398] Code: 41 54 49 89 f4 55 48 89 fd bf 01 00 00 00 e8 33 23 f8 ff 48 c7 c7 3b 0b 12 d [ 27.258140] RSP: 0018:ffffc900003d3b90 EFLAGS: 00010202 [ 27.258966] RAX: 0000000000000002 RBX: 0000000000000000 RCX: 0000000000000001 [ 27.260075] RDX: 0000000000000000 RSI: ffffffff8198c920 RDI: ffffffff83120b3b [ 27.261166] RBP: 0000000000000128 R08: 0000000000000002 R09: 0000000000020022 [ 27.262278] R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff8198c920 [ 27.263219] R13: ffffc900003d3bb8 R14: 0000000000000001 R15: ffff88812b488008 [ 27.264139] FS: 00007fca23cae740(0000) GS:ffff88813bd00000(0000) knlGS:0000000000000000 [ 27.265175] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 27.265893] CR2: 0000000000000128 CR3: 000000012eba6000 CR4: 00000000000006e0 [ 27.266805] Kernel panic - not syncing: Fatal exception [ 27.267640] Kernel Offset: disabled [ 27.268087] ---[ end Kernel panic - not syncing: Fatal exception ]---
diff --git a/block/blk-mq.c b/block/blk-mq.c index 9ee695bdf873..e2e1b6808b32 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -40,6 +40,10 @@ #include "blk-mq-sched.h" #include "blk-rq-qos.h" +static struct request *blk_mq_get_request(struct request_queue *q, + struct bio *bio, + struct blk_mq_alloc_data *data); + static void blk_mq_poll_stats_start(struct request_queue *q); static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); @@ -330,6 +334,19 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, return rq; } +struct blk_mq_smp_call_info { + struct request_queue *q; + struct blk_mq_alloc_data *data; + struct request *rq; +}; + +static void __blk_mq_alloc_request(void *alloc_info) +{ + struct blk_mq_smp_call_info *info = alloc_info; + + info->rq = blk_mq_get_request(info->q, NULL, info->data); +} + static struct request *blk_mq_get_request(struct request_queue *q, struct bio *bio, struct blk_mq_alloc_data *data) @@ -424,8 +441,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx) { struct blk_mq_alloc_data alloc_data = { .flags = flags, .cmd_flags = op }; - struct request *rq; - unsigned int cpu; + struct blk_mq_smp_call_info info = {.q = q, .data = &alloc_data}; int ret; /* @@ -448,21 +464,22 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, * Check if the hardware context is actually mapped to anything. * If not tell the caller that it should skip this queue. */ - alloc_data.hctx = q->queue_hw_ctx[hctx_idx]; - if (!blk_mq_hw_queue_mapped(alloc_data.hctx)) { + if (!blk_mq_hw_queue_mapped(q->queue_hw_ctx[hctx_idx])) { blk_queue_exit(q); return ERR_PTR(-EXDEV); } - cpu = cpumask_first_and(alloc_data.hctx->cpumask, cpu_online_mask); - alloc_data.ctx = __blk_mq_get_ctx(q, cpu); - rq = blk_mq_get_request(q, NULL, &alloc_data); + ret = smp_call_function_any(alloc_data.hctx->cpumask, + __blk_mq_alloc_request, &info, 1); blk_queue_exit(q); - if (!rq) + if (ret) + return ERR_PTR(ret); + + if (!info.rq) return ERR_PTR(-EWOULDBLOCK); - return rq; + return info.rq; } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
blk_mq_alloc_request_hctx() asks blk-mq to allocate request from specified hctx, which is usually bound with fixed cpu mapping, and request is supposed to be allocated on CPU in hctx->cpumask. So use smp_call_function_any() to allocate request on the cpu in hctx->cpumask for blk_mq_alloc_request_hctx(). Dedclare blk_mq_get_request() beforehand because the following patches reuses __blk_mq_alloc_request for blk_mq_get_request(). Prepare for improving cpu hotplug support. Cc: Bart Van Assche <bvanassche@acm.org> Cc: Hannes Reinecke <hare@suse.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: John Garry <john.garry@huawei.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> --- block/blk-mq.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-)