[V11,11/12] blk-mq: re-submit IO in case that hctx is inactive

Message ID	20200513034803.1844579-12-ming.lei@redhat.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=8+4U=63=vger.kernel.org=linux-block-owner@kernel.org> From: Ming Lei <ming.lei@redhat.com> To: Jens Axboe <axboe@kernel.dk> Cc: linux-block@vger.kernel.org, Ming Lei <ming.lei@redhat.com>, John Garry <john.garry@huawei.com>, Bart Van Assche <bvanassche@acm.org>, Hannes Reinecke <hare@suse.com>, Christoph Hellwig <hch@lst.de>, Thomas Gleixner <tglx@linutronix.de> Subject: [PATCH V11 11/12] blk-mq: re-submit IO in case that hctx is inactive Date: Wed, 13 May 2020 11:48:02 +0800 Message-Id: <20200513034803.1844579-12-ming.lei@redhat.com> In-Reply-To: <20200513034803.1844579-1-ming.lei@redhat.com> References: <20200513034803.1844579-1-ming.lei@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: linux-block-owner@vger.kernel.org Precedence: bulk
Series	blk-mq: improvement CPU hotplug \| expand [V11,00/12] blk-mq: improvement CPU hotplug [V11,01/12] block: clone nr_integrity_segments and write_hint in blk_rq_prep_clone [V11,02/12] block: add helper for copying request [V11,03/12] blk-mq: mark blk_mq_get_driver_tag as static [V11,04/12] blk-mq: assign rq->tag in blk_mq_get_driver_tag [V11,05/12] blk-mq: add blk_mq_all_tag_iter [V11,06/12] blk-mq: prepare for draining IO when hctx's all CPUs are offline [V11,07/12] blk-mq: stop to handle IO and drain IO before hctx becomes inactive [V11,08/12] block: add blk_end_flush_machinery [V11,09/12] blk-mq: add blk_mq_hctx_handle_dead_cpu for handling cpu dead [V11,10/12] block: add request allocation flag of BLK_MQ_REQ_FORCE [V11,11/12] blk-mq: re-submit IO in case that hctx is inactive [V11,12/12] block: deactivate hctx when the hctx is actually inactive

diff --git a/block/blk-mq.c b/block/blk-mq.c index 7c640482fb24..c9a3e48a1ebc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2402,6 +2402,109 @@ static int blk_mq_hctx_notify_online(unsigned int cpu, struct hlist_node *node) return 0; } +static void blk_mq_resubmit_end_rq(struct request *rq, blk_status_t error) +{ + struct request *orig_rq = rq->end_io_data; + + blk_mq_cleanup_rq(orig_rq); + blk_mq_end_request(orig_rq, error); + + blk_put_request(rq); +} + +static void blk_mq_resubmit_rq(struct request *rq) +{ + struct request *nrq; + unsigned int flags = 0; + struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + struct blk_mq_tags *tags = rq->q->elevator ? hctx->sched_tags : + hctx->tags; + bool reserved = blk_mq_tag_is_reserved(tags, rq->internal_tag); + + if (rq->rq_flags & RQF_PREEMPT) + flags |= BLK_MQ_REQ_PREEMPT; + if (reserved) + flags |= BLK_MQ_REQ_RESERVED; + /* + * Queue freezing might be in-progress, and wait freeze can't be + * done now because we have request not completed yet, so mark this + * allocation as BLK_MQ_REQ_FORCE for avoiding this allocation & + * freeze hung forever. + */ + flags |= BLK_MQ_REQ_FORCE; + + /* avoid allocation failure by clearing NOWAIT */ + nrq = blk_get_request(rq->q, rq->cmd_flags & ~REQ_NOWAIT, flags); + if (!nrq) + return; + + blk_rq_copy_request(nrq, rq); + + nrq->timeout = rq->timeout; + nrq->rq_disk = rq->rq_disk; + nrq->part = rq->part; + + memcpy(blk_mq_rq_to_pdu(nrq), blk_mq_rq_to_pdu(rq), + rq->q->tag_set->cmd_size); + + nrq->end_io = blk_mq_resubmit_end_rq; + nrq->end_io_data = rq; + nrq->bio = rq->bio; + nrq->biotail = rq->biotail; + + /* bios ownership has been transfered to new request */ + rq->bio = rq->biotail = NULL; + rq->__data_len = 0; + + if (blk_insert_cloned_request(nrq->q, nrq) != BLK_STS_OK) + blk_mq_request_bypass_insert(nrq, false, true); +} + +static void blk_mq_hctx_deactivate(struct blk_mq_hw_ctx *hctx) +{ + LIST_HEAD(sched); + LIST_HEAD(re_submit); + LIST_HEAD(flush_in); + LIST_HEAD(flush_out); + struct request *rq, *nxt; + struct elevator_queue *e = hctx->queue->elevator; + + if (!e) { + blk_mq_flush_busy_ctxs(hctx, &re_submit); + } else { + while ((rq = e->type->ops.dispatch_request(hctx))) { + if (rq->mq_hctx != hctx) + list_add(&rq->queuelist, &sched); + else + list_add(&rq->queuelist, &re_submit); + } + } + while (!list_empty(&sched)) { + rq = list_first_entry(&sched, struct request, queuelist); + list_del_init(&rq->queuelist); + blk_mq_sched_insert_request(rq, true, true, true); + } + + /* requests in dispatch list have to be re-submitted too */ + spin_lock(&hctx->lock); + list_splice_tail_init(&hctx->dispatch, &re_submit); + spin_unlock(&hctx->lock); + + /* blk_end_flush_machinery will cover flush request */ + list_for_each_entry_safe(rq, nxt, &re_submit, queuelist) { + if (rq->rq_flags & RQF_FLUSH_SEQ) + list_move(&rq->queuelist, &flush_in); + } + blk_end_flush_machinery(hctx, &flush_in, &flush_out); + list_splice_tail(&flush_out, &re_submit); + + while (!list_empty(&re_submit)) { + rq = list_first_entry(&re_submit, struct request, queuelist); + list_del_init(&rq->queuelist); + blk_mq_resubmit_rq(rq); + } +} + static void blk_mq_hctx_handle_dead_cpu(struct blk_mq_hw_ctx *hctx, unsigned int cpu) { @@ -2430,17 +2533,20 @@ static void blk_mq_hctx_handle_dead_cpu(struct blk_mq_hw_ctx *hctx, } /* - * 'cpu' is going away. splice any existing rq_list entries from this - * software queue to the hw queue dispatch list, and ensure that it - * gets run. + * @cpu has gone away. If this hctx is inactive, we can't dispatch request + * to the hctx any more, so clone and re-submit requests from this hctx */ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node) { struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead); - if (cpumask_test_cpu(cpu, hctx->cpumask)) - blk_mq_hctx_handle_dead_cpu(hctx, cpu); + if (cpumask_test_cpu(cpu, hctx->cpumask)) { + if (test_bit(BLK_MQ_S_INACTIVE, &hctx->state)) + blk_mq_hctx_deactivate(hctx); + else + blk_mq_hctx_handle_dead_cpu(hctx, cpu); + } return 0; }

[V11,11/12] blk-mq: re-submit IO in case that hctx is inactive

Commit Message

Comments

Patch