diff mbox series

[V2,RESEND,5/5] blk-mq: handle requests dispatched from IO scheduler in case that hctx is dead

Message ID 20191006024516.19996-6-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show
Series blk-mq: improvement on handling IO during CPU hotplug | expand

Commit Message

Ming Lei Oct. 6, 2019, 2:45 a.m. UTC
If hctx becomes dead, all in-queue IO requests aimed at this hctx have to
be re-submitted, so cover requests queued in scheduler queue.

Cc: Bart Van Assche <bvanassche@acm.org>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Keith Busch <keith.busch@intel.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

Comments

Hannes Reinecke Oct. 7, 2019, 6:28 a.m. UTC | #1
On 10/6/19 4:45 AM, Ming Lei wrote:
> If hctx becomes dead, all in-queue IO requests aimed at this hctx have to
> be re-submitted, so cover requests queued in scheduler queue.
> 
> Cc: Bart Van Assche <bvanassche@acm.org>
> Cc: Hannes Reinecke <hare@suse.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Keith Busch <keith.busch@intel.com>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  block/blk-mq.c | 30 +++++++++++++++++++++++++-----
>  1 file changed, 25 insertions(+), 5 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 0b35fdbd1f17..94fd47cef1bc 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -2313,6 +2313,7 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
>  	enum hctx_type type;
>  	bool hctx_dead;
>  	struct request *rq;
> +	struct elevator_queue *e;
>  
>  	hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
>  	ctx = __blk_mq_get_ctx(hctx->queue, cpu);
> @@ -2323,12 +2324,31 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
>  	hctx_dead = cpumask_first_and(hctx->cpumask, cpu_online_mask) >=
>  		nr_cpu_ids;
>  
> -	spin_lock(&ctx->lock);
> -	if (!list_empty(&ctx->rq_lists[type])) {
> -		list_splice_init(&ctx->rq_lists[type], &tmp);
> -		blk_mq_hctx_clear_pending(hctx, ctx);
> +	e = hctx->queue->elevator;
> +	if (!e) {
> +		spin_lock(&ctx->lock);
> +		if (!list_empty(&ctx->rq_lists[type])) {
> +			list_splice_init(&ctx->rq_lists[type], &tmp);
> +			blk_mq_hctx_clear_pending(hctx, ctx);
> +		}
> +		spin_unlock(&ctx->lock);
> +	} else if (hctx_dead) {
> +		LIST_HEAD(sched_tmp);
> +
> +		while ((rq = e->type->ops.dispatch_request(hctx))) {
> +			if (rq->mq_hctx != hctx)
> +				list_add(&rq->queuelist, &sched_tmp);
> +			else
> +				list_add(&rq->queuelist, &tmp);
> +		}
> +
> +		while (!list_empty(&sched_tmp)) {
> +			rq = list_entry(sched_tmp.next, struct request,
> +					queuelist);
> +			list_del_init(&rq->queuelist);
> +			blk_mq_sched_insert_request(rq, true, true, true);
> +		}
>  	}
> -	spin_unlock(&ctx->lock);
>  
>  	if (list_empty(&tmp))
>  		return 0;
> 
Reviewed-by: Hannes Reinecke <hare@suse.com>

Cheers,

Hannes
diff mbox series

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0b35fdbd1f17..94fd47cef1bc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2313,6 +2313,7 @@  static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
 	enum hctx_type type;
 	bool hctx_dead;
 	struct request *rq;
+	struct elevator_queue *e;
 
 	hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
 	ctx = __blk_mq_get_ctx(hctx->queue, cpu);
@@ -2323,12 +2324,31 @@  static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
 	hctx_dead = cpumask_first_and(hctx->cpumask, cpu_online_mask) >=
 		nr_cpu_ids;
 
-	spin_lock(&ctx->lock);
-	if (!list_empty(&ctx->rq_lists[type])) {
-		list_splice_init(&ctx->rq_lists[type], &tmp);
-		blk_mq_hctx_clear_pending(hctx, ctx);
+	e = hctx->queue->elevator;
+	if (!e) {
+		spin_lock(&ctx->lock);
+		if (!list_empty(&ctx->rq_lists[type])) {
+			list_splice_init(&ctx->rq_lists[type], &tmp);
+			blk_mq_hctx_clear_pending(hctx, ctx);
+		}
+		spin_unlock(&ctx->lock);
+	} else if (hctx_dead) {
+		LIST_HEAD(sched_tmp);
+
+		while ((rq = e->type->ops.dispatch_request(hctx))) {
+			if (rq->mq_hctx != hctx)
+				list_add(&rq->queuelist, &sched_tmp);
+			else
+				list_add(&rq->queuelist, &tmp);
+		}
+
+		while (!list_empty(&sched_tmp)) {
+			rq = list_entry(sched_tmp.next, struct request,
+					queuelist);
+			list_del_init(&rq->queuelist);
+			blk_mq_sched_insert_request(rq, true, true, true);
+		}
 	}
-	spin_unlock(&ctx->lock);
 
 	if (list_empty(&tmp))
 		return 0;