@@ -2382,6 +2382,102 @@ static int blk_mq_hctx_notify_online(unsigned int cpu, struct hlist_node *node)
return 0;
}
+static void blk_mq_resubmit_end_rq(struct request *rq, blk_status_t error)
+{
+ struct request *orig_rq = rq->end_io_data;
+
+ blk_mq_cleanup_rq(orig_rq);
+ blk_mq_end_request(orig_rq, error);
+
+ blk_put_request(rq);
+}
+
+static void blk_mq_resubmit_rq(struct request *rq)
+{
+ struct request *nrq;
+ unsigned int flags = 0;
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+ struct blk_mq_tags *tags = rq->q->elevator ? hctx->sched_tags :
+ hctx->tags;
+ bool reserved = blk_mq_tag_is_reserved(tags, rq->internal_tag);
+
+ if (rq->rq_flags & RQF_PREEMPT)
+ flags |= BLK_MQ_REQ_PREEMPT;
+ if (reserved)
+ flags |= BLK_MQ_REQ_RESERVED;
+
+ /* avoid allocation failure by clearing NOWAIT */
+ nrq = blk_get_request(rq->q, rq->cmd_flags & ~REQ_NOWAIT, flags);
+ if (!nrq)
+ return;
+
+ blk_rq_copy_request(nrq, rq);
+
+ nrq->timeout = rq->timeout;
+ nrq->rq_disk = rq->rq_disk;
+ nrq->part = rq->part;
+
+ memcpy(blk_mq_rq_to_pdu(nrq), blk_mq_rq_to_pdu(rq),
+ rq->q->tag_set->cmd_size);
+
+ nrq->end_io = blk_mq_resubmit_end_rq;
+ nrq->end_io_data = rq;
+ nrq->bio = rq->bio;
+ nrq->biotail = rq->biotail;
+
+ /* bios ownership has been transfered to new request */
+ rq->bio = rq->biotail = NULL;
+ rq->__data_len = 0;
+
+ if (blk_insert_cloned_request(nrq->q, nrq) != BLK_STS_OK)
+ blk_mq_request_bypass_insert(nrq, false, true);
+}
+
+static void blk_mq_hctx_deactivate(struct blk_mq_hw_ctx *hctx)
+{
+ LIST_HEAD(sched);
+ LIST_HEAD(re_submit);
+ LIST_HEAD(flush_in);
+ LIST_HEAD(flush_out);
+ struct request *rq, *nxt;
+ struct elevator_queue *e = hctx->queue->elevator;
+
+ if (!e) {
+ blk_mq_flush_busy_ctxs(hctx, &re_submit);
+ } else {
+ while ((rq = e->type->ops.dispatch_request(hctx))) {
+ if (rq->mq_hctx != hctx)
+ list_add(&rq->queuelist, &sched);
+ else
+ list_add(&rq->queuelist, &re_submit);
+ }
+ }
+ while (!list_empty(&sched)) {
+ rq = list_first_entry(&sched, struct request, queuelist);
+ list_del_init(&rq->queuelist);
+ blk_mq_sched_insert_request(rq, true, true, true);
+ }
+
+ /* requests in dispatch list have to be re-submitted too */
+ spin_lock(&hctx->lock);
+ list_splice_tail_init(&hctx->dispatch, &re_submit);
+ spin_unlock(&hctx->lock);
+
+ /* blk_end_flush_machinery will cover flush request */
+ list_for_each_entry_safe(rq, nxt, &re_submit, queuelist) {
+ if (rq->rq_flags & RQF_FLUSH_SEQ)
+ list_move(&rq->queuelist, &flush_in);
+ }
+ blk_end_flush_machinery(hctx, &flush_in, &flush_out);
+ list_splice_tail(&flush_out, &re_submit);
+
+ while (!list_empty(&re_submit)) {
+ rq = list_first_entry(&re_submit, struct request, queuelist);
+ list_del_init(&rq->queuelist);
+ blk_mq_resubmit_rq(rq);
+ }
+}
+
static void blk_mq_hctx_handle_dead_cpu(struct blk_mq_hw_ctx *hctx,
unsigned int cpu)
{
@@ -2410,17 +2506,20 @@ static void blk_mq_hctx_handle_dead_cpu(struct blk_mq_hw_ctx *hctx,
}
/*
- * 'cpu' is going away. splice any existing rq_list entries from this
- * software queue to the hw queue dispatch list, and ensure that it
- * gets run.
+ * @cpu has gone away. If this hctx is inactive, we can't dispatch request
+ * to the hctx any more, so clone and re-submit requests from this hctx
*/
static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
{
struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
struct blk_mq_hw_ctx, cpuhp_dead);
- if (cpumask_test_cpu(cpu, hctx->cpumask))
- blk_mq_hctx_handle_dead_cpu(hctx, cpu);
+ if (cpumask_test_cpu(cpu, hctx->cpumask)) {
+ if (test_bit(BLK_MQ_S_INACTIVE, &hctx->state))
+ blk_mq_hctx_deactivate(hctx);
+ else
+ blk_mq_hctx_handle_dead_cpu(hctx, cpu);
+ }
return 0;
}