@@ -179,7 +179,6 @@ static const char *const hctx_state_name[] = {
HCTX_STATE_NAME(STOPPED),
HCTX_STATE_NAME(TAG_ACTIVE),
HCTX_STATE_NAME(SCHED_RESTART),
- HCTX_STATE_NAME(TAG_WAITING),
HCTX_STATE_NAME(START_ON_RUN),
};
#undef HCTX_STATE_NAME
@@ -1005,42 +1005,53 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int fla
hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
- list_del(&wait->entry);
- clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state);
+ list_del_init(&wait->entry);
blk_mq_run_hw_queue(hctx, true);
return 1;
}
-static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
+static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx **hctx,
+ struct request *rq)
{
+ wait_queue_entry_t *wait = &(*hctx)->dispatch_wait;
struct sbq_wait_state *ws;
- /*
- * The TAG_WAITING bit serves as a lock protecting hctx->dispatch_wait.
- * The thread which wins the race to grab this bit adds the hardware
- * queue to the wait queue.
- */
- if (test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state) ||
- test_and_set_bit_lock(BLK_MQ_S_TAG_WAITING, &hctx->state))
+ if (!list_empty_careful(&wait->entry))
return false;
- init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
- ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx);
+ spin_lock(&(*hctx)->lock);
+ if (!list_empty(&wait->entry)) {
+ spin_unlock(&(*hctx)->lock);
+ return false;
+ }
+
+ init_waitqueue_func_entry(wait, blk_mq_dispatch_wake);
+ ws = bt_wait_ptr(&(*hctx)->tags->bitmap_tags, *hctx);
+ add_wait_queue(&ws->wait, wait);
+ spin_unlock(&(*hctx)->lock);
/*
- * As soon as this returns, it's no longer safe to fiddle with
- * hctx->dispatch_wait, since a completion can wake up the wait queue
- * and unlock the bit.
+ * It's possible that a tag was freed in the window between the
+ * allocation failure and adding the hardware queue to the wait
+ * queue. If we can get a tag now, remove ourselves from the
+ * wait queue to ensure someone else gets the wakeup.
*/
- add_wait_queue(&ws->wait, &hctx->dispatch_wait);
- return true;
+ if (blk_mq_get_driver_tag(rq, hctx, false)) {
+ spin_lock_irq(&ws->wait.lock);
+ list_del_init(&wait->entry);
+ spin_unlock_irq(&ws->wait.lock);
+ return true;
+ }
+
+ return false;
}
bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
- bool got_budget)
+ bool got_budget)
{
struct blk_mq_hw_ctx *hctx;
struct request *rq, *nxt;
+ bool failed_tag = false;
int errors, queued;
if (list_empty(list))
@@ -1062,20 +1073,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
* The initial allocation attempt failed, so we need to
* rerun the hardware queue when a tag is freed.
*/
- if (!blk_mq_dispatch_wait_add(hctx)) {
- if (got_budget)
- blk_mq_put_dispatch_budget(hctx);
- break;
- }
-
- /*
- * It's possible that a tag was freed in the window
- * between the allocation failure and adding the
- * hardware queue to the wait queue.
- */
- if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
+ if (!blk_mq_dispatch_wait_add(&hctx, rq)) {
if (got_budget)
blk_mq_put_dispatch_budget(hctx);
+ failed_tag = true;
break;
}
}
@@ -1140,10 +1141,11 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
* it is no longer set that means that it was cleared by another
* thread and hence that a queue rerun is needed.
*
- * If TAG_WAITING is set that means that an I/O scheduler has
- * been configured and another thread is waiting for a driver
- * tag. To guarantee fairness, do not rerun this hardware queue
- * but let the other thread grab the driver tag.
+ * If 'failed_tag' is set, that means that we failed getting
+ * a driver tag with an I/O scheduler attached. Ensure that
+ * we run the queue AFTER adding our entries back to the list,
+ * so we don't stall if the queue run and wakeup happened
+ * before we got here.
*
* If no I/O scheduler has been configured it is possible that
* the hardware queue got stopped and restarted before requests
@@ -1155,8 +1157,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
* returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
* and dm-rq.
*/
- if (!blk_mq_sched_needs_restart(hctx) &&
- !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))
+ if (!blk_mq_sched_needs_restart(hctx) || failed_tag)
blk_mq_run_hw_queue(hctx, true);
}
@@ -181,8 +181,7 @@ enum {
BLK_MQ_S_STOPPED = 0,
BLK_MQ_S_TAG_ACTIVE = 1,
BLK_MQ_S_SCHED_RESTART = 2,
- BLK_MQ_S_TAG_WAITING = 3,
- BLK_MQ_S_START_ON_RUN = 4,
+ BLK_MQ_S_START_ON_RUN = 3,
BLK_MQ_MAX_DEPTH = 10240,