diff mbox

[V13,04/10] mmc: block: Add CQE support

Message ID 1509715220-31885-5-git-send-email-adrian.hunter@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Adrian Hunter Nov. 3, 2017, 1:20 p.m. UTC
Add CQE support to the block driver, including:
    - optionally using DCMD for flush requests
    - "manually" issuing discard requests
    - issuing read / write requests to the CQE
    - supporting block-layer timeouts
    - handling recovery
    - supporting re-tuning

CQE offers 25% - 50% better random multi-threaded I/O.  There is a slight
(e.g. 2%) drop in sequential read speed but no observable change to sequential
write.

CQE automatically sends the commands to complete requests.  However it only
supports reads / writes and so-called "direct commands" (DCMD).  Furthermore
DCMD is limited to one command at a time, but discards require 3 commands.
That makes issuing discards through CQE very awkward, but some CQE's don't
support DCMD anyway.  So for discards, the existing non-CQE approach is
taken, where the mmc core code issues the 3 commands one at a time i.e.
mmc_erase(). Where DCMD is used, is for issuing flushes.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 drivers/mmc/core/block.c | 150 +++++++++++++++++++++++++++++++++++++++++++-
 drivers/mmc/core/block.h |   2 +
 drivers/mmc/core/queue.c | 158 +++++++++++++++++++++++++++++++++++++++++++++--
 drivers/mmc/core/queue.h |  18 ++++++
 4 files changed, 322 insertions(+), 6 deletions(-)

Comments

Linus Walleij Nov. 8, 2017, 9 a.m. UTC | #1
On Fri, Nov 3, 2017 at 2:20 PM, Adrian Hunter <adrian.hunter@intel.com> wrote:

> @@ -2188,11 +2327,18 @@ enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req)
>                         return MMC_REQ_FAILED_TO_START;
>                 }
>                 return MMC_REQ_FINISHED;
> +       case MMC_ISSUE_DCMD:
>         case MMC_ISSUE_ASYNC:
>                 switch (req_op(req)) {
> +               case REQ_OP_FLUSH:
> +                       ret = mmc_blk_cqe_issue_flush(mq, req);
> +                       break;
>                 case REQ_OP_READ:
>                 case REQ_OP_WRITE:
> -                       ret = mmc_blk_mq_issue_rw_rq(mq, req);
> +                       if (mq->use_cqe)
> +                               ret = mmc_blk_cqe_issue_rw_rq(mq, req);
> +                       else
> +                               ret = mmc_blk_mq_issue_rw_rq(mq, req);
>                         break;
>                 default:
>                         WARN_ON_ONCE(1);

This and other bits gives me the feeling CQE is now actually ONLY
working on the MQ path.

That is good. We only add new functionality on the MQ path,
yay!

But this fact (only abailable iff MQ==true) should at least be
mentioned in the commit message I think?

So why not ditch the old block layer or at least make MQ default?

When you keep it like this people have to reconfigure
their kernel to enable MQ before they see the benefits of MQ+CQE
combined, I think that should rather be the default experience.

Yours,
Linus Walleij
--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Adrian Hunter Nov. 8, 2017, 1:20 p.m. UTC | #2
On 08/11/17 11:00, Linus Walleij wrote:
> On Fri, Nov 3, 2017 at 2:20 PM, Adrian Hunter <adrian.hunter@intel.com> wrote:
> 
>> @@ -2188,11 +2327,18 @@ enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req)
>>                         return MMC_REQ_FAILED_TO_START;
>>                 }
>>                 return MMC_REQ_FINISHED;
>> +       case MMC_ISSUE_DCMD:
>>         case MMC_ISSUE_ASYNC:
>>                 switch (req_op(req)) {
>> +               case REQ_OP_FLUSH:
>> +                       ret = mmc_blk_cqe_issue_flush(mq, req);
>> +                       break;
>>                 case REQ_OP_READ:
>>                 case REQ_OP_WRITE:
>> -                       ret = mmc_blk_mq_issue_rw_rq(mq, req);
>> +                       if (mq->use_cqe)
>> +                               ret = mmc_blk_cqe_issue_rw_rq(mq, req);
>> +                       else
>> +                               ret = mmc_blk_mq_issue_rw_rq(mq, req);
>>                         break;
>>                 default:
>>                         WARN_ON_ONCE(1);
> 
> This and other bits gives me the feeling CQE is now actually ONLY
> working on the MQ path.

I was not allowed to support non-mq.

> 
> That is good. We only add new functionality on the MQ path,
> yay!
> 
> But this fact (only abailable iff MQ==true) should at least be
> mentioned in the commit message I think?

Why?  CQE is MQ only.

> 
> So why not ditch the old block layer or at least make MQ default?

CQE is MQ only.

> 
> When you keep it like this people have to reconfigure
> their kernel to enable MQ before they see the benefits of MQ+CQE
> combined, I think that should rather be the default experience.

Not at all.  I guess you are confusing the legacy mmc with CQE.  CQE is not
a layer on top of legacy mmc.  It is an alternative to legacy mmc.  CQE
does not sit on top of the legacy mmc blk-mq support.  You don't have to
enable legacy mmc blk-mq support to use CQE.
--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Linus Walleij Nov. 9, 2017, 12:04 p.m. UTC | #3
On Wed, Nov 8, 2017 at 2:20 PM, Adrian Hunter <adrian.hunter@intel.com> wrote:
> On 08/11/17 11:00, Linus Walleij wrote:

>> This and other bits gives me the feeling CQE is now actually ONLY
>> working on the MQ path.
>
> I was not allowed to support non-mq.

Fair enough.

>> That is good. We only add new functionality on the MQ path,
>> yay!
>>
>> But this fact (only abailable iff MQ==true) should at least be
>> mentioned in the commit message I think?
>
> Why?  CQE is MQ only.

So if you read what I say, I think the commit message should
say that CQE is MQ only so that people know that CQE is
MQ only.

>> So why not ditch the old block layer or at least make MQ default?
>
> CQE is MQ only.

Yeah? So why keep it around for everything else?

>> When you keep it like this people have to reconfigure
>> their kernel to enable MQ before they see the benefits of MQ+CQE
>> combined, I think that should rather be the default experience.
>
> Not at all.  I guess you are confusing the legacy mmc with CQE.  CQE is not
> a layer on top of legacy mmc.  It is an alternative to legacy mmc.  CQE
> does not sit on top of the legacy mmc blk-mq support.  You don't have to
> enable legacy mmc blk-mq support to use CQE.

Now I am confused. I can't parse the last sentence. There is no
such thing as legcay blk-mq?

Yours,
Linus Walleij
--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Adrian Hunter Nov. 9, 2017, 12:39 p.m. UTC | #4
On 09/11/17 14:04, Linus Walleij wrote:
> On Wed, Nov 8, 2017 at 2:20 PM, Adrian Hunter <adrian.hunter@intel.com> wrote:
>> On 08/11/17 11:00, Linus Walleij wrote:
> 
>>> This and other bits gives me the feeling CQE is now actually ONLY
>>> working on the MQ path.
>>
>> I was not allowed to support non-mq.
> 
> Fair enough.
> 
>>> That is good. We only add new functionality on the MQ path,
>>> yay!
>>>
>>> But this fact (only abailable iff MQ==true) should at least be
>>> mentioned in the commit message I think?
>>
>> Why?  CQE is MQ only.
> 
> So if you read what I say, I think the commit message should
> say that CQE is MQ only so that people know that CQE is
> MQ only.

Alright

> 
>>> So why not ditch the old block layer or at least make MQ default?
>>
>> CQE is MQ only.
> 
> Yeah? So why keep it around for everything else?

Never said we should keep it around.  As soon as blk-mq is ready and tested,
delete it.

> 
>>> When you keep it like this people have to reconfigure
>>> their kernel to enable MQ before they see the benefits of MQ+CQE
>>> combined, I think that should rather be the default experience.
>>
>> Not at all.  I guess you are confusing the legacy mmc with CQE.  CQE is not
>> a layer on top of legacy mmc.  It is an alternative to legacy mmc.  CQE
>> does not sit on top of the legacy mmc blk-mq support.  You don't have to
>> enable legacy mmc blk-mq support to use CQE.
> 
> Now I am confused. I can't parse the last sentence. There is no
> such thing as legcay blk-mq?

Don't need non-CQE mmc blk-mq support for CQE support.
--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index e2838ff4738e..e8be17152884 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -112,6 +112,7 @@  struct mmc_blk_data {
 #define MMC_BLK_WRITE		BIT(1)
 #define MMC_BLK_DISCARD		BIT(2)
 #define MMC_BLK_SECDISCARD	BIT(3)
+#define MMC_BLK_CQE_RECOVERY	BIT(4)
 
 	/*
 	 * Only set in main mmc_blk_data associated
@@ -1717,6 +1718,138 @@  static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq,
 		*do_data_tag_p = do_data_tag;
 }
 
+#define MMC_CQE_RETRIES 2
+
+static void mmc_blk_cqe_complete_rq(struct mmc_queue *mq, struct request *req)
+{
+	struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
+	struct mmc_request *mrq = &mqrq->brq.mrq;
+	struct request_queue *q = req->q;
+	struct mmc_host *host = mq->card->host;
+	unsigned long flags;
+	bool put_card;
+	int err;
+
+	mmc_cqe_post_req(host, mrq);
+
+	if (mrq->cmd && mrq->cmd->error)
+		err = mrq->cmd->error;
+	else if (mrq->data && mrq->data->error)
+		err = mrq->data->error;
+	else
+		err = 0;
+
+	if (err) {
+		if (mqrq->retries++ < MMC_CQE_RETRIES)
+			blk_mq_requeue_request(req, true);
+		else
+			blk_mq_end_request(req, BLK_STS_IOERR);
+	} else if (mrq->data) {
+		if (blk_update_request(req, BLK_STS_OK, mrq->data->bytes_xfered))
+			blk_mq_requeue_request(req, true);
+		else
+			__blk_mq_end_request(req, BLK_STS_OK);
+	} else {
+		blk_mq_end_request(req, BLK_STS_OK);
+	}
+
+	spin_lock_irqsave(q->queue_lock, flags);
+
+	mq->in_flight[mmc_issue_type(mq, req)] -= 1;
+
+	put_card = mmc_tot_in_flight(mq) == 0;
+
+	mmc_cqe_check_busy(mq);
+
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	if (!mq->cqe_busy)
+		blk_mq_run_hw_queues(q, true);
+
+	if (put_card)
+		mmc_put_card(mq->card, &mq->ctx);
+}
+
+void mmc_blk_cqe_recovery(struct mmc_queue *mq)
+{
+	struct mmc_card *card = mq->card;
+	struct mmc_host *host = card->host;
+	int err;
+
+	pr_debug("%s: CQE recovery start\n", mmc_hostname(host));
+
+	err = mmc_cqe_recovery(host);
+	if (err)
+		mmc_blk_reset(mq->blkdata, host, MMC_BLK_CQE_RECOVERY);
+	else
+		mmc_blk_reset_success(mq->blkdata, MMC_BLK_CQE_RECOVERY);
+
+	pr_debug("%s: CQE recovery done\n", mmc_hostname(host));
+}
+
+static void mmc_blk_cqe_req_done(struct mmc_request *mrq)
+{
+	struct mmc_queue_req *mqrq = container_of(mrq, struct mmc_queue_req,
+						  brq.mrq);
+	struct request *req = mmc_queue_req_to_req(mqrq);
+	struct request_queue *q = req->q;
+	struct mmc_queue *mq = q->queuedata;
+
+	/*
+	 * Block layer timeouts race with completions which means the normal
+	 * completion path cannot be used during recovery.
+	 */
+	if (mq->in_recovery)
+		mmc_blk_cqe_complete_rq(mq, req);
+	else
+		blk_mq_complete_request(req);
+}
+
+static int mmc_blk_cqe_start_req(struct mmc_host *host, struct mmc_request *mrq)
+{
+	mrq->done		= mmc_blk_cqe_req_done;
+	mrq->recovery_notifier	= mmc_cqe_recovery_notifier;
+
+	return mmc_cqe_start_req(host, mrq);
+}
+
+static struct mmc_request *mmc_blk_cqe_prep_dcmd(struct mmc_queue_req *mqrq,
+						 struct request *req)
+{
+	struct mmc_blk_request *brq = &mqrq->brq;
+
+	memset(brq, 0, sizeof(*brq));
+
+	brq->mrq.cmd = &brq->cmd;
+	brq->mrq.tag = req->tag;
+
+	return &brq->mrq;
+}
+
+static int mmc_blk_cqe_issue_flush(struct mmc_queue *mq, struct request *req)
+{
+	struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
+	struct mmc_request *mrq = mmc_blk_cqe_prep_dcmd(mqrq, req);
+
+	mrq->cmd->opcode = MMC_SWITCH;
+	mrq->cmd->arg = (MMC_SWITCH_MODE_WRITE_BYTE << 24) |
+			(EXT_CSD_FLUSH_CACHE << 16) |
+			(1 << 8) |
+			EXT_CSD_CMD_SET_NORMAL;
+	mrq->cmd->flags = MMC_CMD_AC | MMC_RSP_R1B;
+
+	return mmc_blk_cqe_start_req(mq->card->host, mrq);
+}
+
+static int mmc_blk_cqe_issue_rw_rq(struct mmc_queue *mq, struct request *req)
+{
+	struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
+
+	mmc_blk_data_prep(mq, mqrq, 0, NULL, NULL);
+
+	return mmc_blk_cqe_start_req(mq->card->host, &mqrq->brq.mrq);
+}
+
 static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 			       struct mmc_card *card,
 			       int disable_multi,
@@ -1991,7 +2124,10 @@  void mmc_blk_mq_complete(struct request *req)
 {
 	struct mmc_queue *mq = req->q->queuedata;
 
-	mmc_blk_mq_complete_rq(mq, req);
+	if (mq->use_cqe)
+		mmc_blk_cqe_complete_rq(mq, req);
+	else
+		mmc_blk_mq_complete_rq(mq, req);
 }
 
 static void mmc_blk_mq_poll_completion(struct mmc_queue *mq,
@@ -2150,6 +2286,9 @@  static int mmc_blk_mq_issue_rw_rq(struct mmc_queue *mq,
 
 static int mmc_blk_wait_for_idle(struct mmc_queue *mq, struct mmc_host *host)
 {
+	if (mq->use_cqe)
+		return host->cqe_ops->cqe_wait_for_idle(host);
+
 	return mmc_blk_rw_wait(mq, NULL);
 }
 
@@ -2188,11 +2327,18 @@  enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req)
 			return MMC_REQ_FAILED_TO_START;
 		}
 		return MMC_REQ_FINISHED;
+	case MMC_ISSUE_DCMD:
 	case MMC_ISSUE_ASYNC:
 		switch (req_op(req)) {
+		case REQ_OP_FLUSH:
+			ret = mmc_blk_cqe_issue_flush(mq, req);
+			break;
 		case REQ_OP_READ:
 		case REQ_OP_WRITE:
-			ret = mmc_blk_mq_issue_rw_rq(mq, req);
+			if (mq->use_cqe)
+				ret = mmc_blk_cqe_issue_rw_rq(mq, req);
+			else
+				ret = mmc_blk_mq_issue_rw_rq(mq, req);
 			break;
 		default:
 			WARN_ON_ONCE(1);
diff --git a/drivers/mmc/core/block.h b/drivers/mmc/core/block.h
index c62e3f3d3a3a..6c0e98c1af71 100644
--- a/drivers/mmc/core/block.h
+++ b/drivers/mmc/core/block.h
@@ -6,6 +6,8 @@ 
 
 void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req);
 
+void mmc_blk_cqe_recovery(struct mmc_queue *mq);
+
 enum mmc_issued;
 
 enum mmc_issued mmc_blk_mq_issue_rq(struct mmc_queue *mq, struct request *req);
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index a9c2351a9b29..971f97698866 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -40,18 +40,142 @@  static int mmc_prep_request(struct request_queue *q, struct request *req)
 	return BLKPREP_OK;
 }
 
+static inline bool mmc_cqe_dcmd_busy(struct mmc_queue *mq)
+{
+	/* Allow only 1 DCMD at a time */
+	return mq->in_flight[MMC_ISSUE_DCMD];
+}
+
+void mmc_cqe_check_busy(struct mmc_queue *mq)
+{
+	if ((mq->cqe_busy & MMC_CQE_DCMD_BUSY) && !mmc_cqe_dcmd_busy(mq))
+		mq->cqe_busy &= ~MMC_CQE_DCMD_BUSY;
+
+	mq->cqe_busy &= ~MMC_CQE_QUEUE_FULL;
+}
+
+static inline bool mmc_cqe_can_dcmd(struct mmc_host *host)
+{
+	return host->caps2 & MMC_CAP2_CQE_DCMD;
+}
+
+enum mmc_issue_type mmc_cqe_issue_type(struct mmc_host *host,
+				       struct request *req)
+{
+	switch (req_op(req)) {
+	case REQ_OP_DRV_IN:
+	case REQ_OP_DRV_OUT:
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
+		return MMC_ISSUE_SYNC;
+	case REQ_OP_FLUSH:
+		return mmc_cqe_can_dcmd(host) ? MMC_ISSUE_DCMD : MMC_ISSUE_SYNC;
+	default:
+		return MMC_ISSUE_ASYNC;
+	}
+}
+
 enum mmc_issue_type mmc_issue_type(struct mmc_queue *mq, struct request *req)
 {
+	struct mmc_host *host = mq->card->host;
+
+	if (mq->use_cqe)
+		return mmc_cqe_issue_type(host, req);
+
 	if (req_op(req) == REQ_OP_READ || req_op(req) == REQ_OP_WRITE)
 		return MMC_ISSUE_ASYNC;
 
 	return MMC_ISSUE_SYNC;
 }
 
+static void __mmc_cqe_recovery_notifier(struct mmc_queue *mq)
+{
+	if (!mq->recovery_needed) {
+		mq->recovery_needed = true;
+		schedule_work(&mq->recovery_work);
+	}
+}
+
+void mmc_cqe_recovery_notifier(struct mmc_request *mrq)
+{
+	struct mmc_queue_req *mqrq = container_of(mrq, struct mmc_queue_req,
+						  brq.mrq);
+	struct request *req = mmc_queue_req_to_req(mqrq);
+	struct request_queue *q = req->q;
+	struct mmc_queue *mq = q->queuedata;
+	unsigned long flags;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	__mmc_cqe_recovery_notifier(mq);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
+{
+	struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
+	struct mmc_request *mrq = &mqrq->brq.mrq;
+	struct mmc_queue *mq = req->q->queuedata;
+	struct mmc_host *host = mq->card->host;
+	enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
+	bool recovery_needed = false;
+
+	switch (issue_type) {
+	case MMC_ISSUE_ASYNC:
+	case MMC_ISSUE_DCMD:
+		if (host->cqe_ops->cqe_timeout(host, mrq, &recovery_needed)) {
+			if (recovery_needed)
+				__mmc_cqe_recovery_notifier(mq);
+			return BLK_EH_RESET_TIMER;
+		}
+		/* No timeout */
+		return BLK_EH_HANDLED;
+	default:
+		/* Timeout is handled by mmc core */
+		return BLK_EH_RESET_TIMER;
+	}
+}
+
 static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req,
 						 bool reserved)
 {
-	return BLK_EH_RESET_TIMER;
+	struct request_queue *q = req->q;
+	struct mmc_queue *mq = q->queuedata;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+
+	if (mq->recovery_needed || !mq->use_cqe)
+		ret = BLK_EH_RESET_TIMER;
+	else
+		ret = mmc_cqe_timed_out(req);
+
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	return ret;
+}
+
+static void mmc_mq_recovery_handler(struct work_struct *work)
+{
+	struct mmc_queue *mq = container_of(work, struct mmc_queue,
+					    recovery_work);
+	struct request_queue *q = mq->queue;
+
+	mmc_get_card(mq->card, &mq->ctx);
+
+	mq->in_recovery = true;
+
+	mmc_blk_cqe_recovery(mq);
+
+	mq->in_recovery = false;
+
+	spin_lock_irq(q->queue_lock);
+	mq->recovery_needed = false;
+	spin_unlock_irq(q->queue_lock);
+
+	mmc_put_card(mq->card, &mq->ctx);
+
+	blk_mq_run_hw_queues(q, true);
 }
 
 static int mmc_queue_thread(void *d)
@@ -219,9 +343,10 @@  static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct request_queue *q = req->q;
 	struct mmc_queue *mq = q->queuedata;
 	struct mmc_card *card = mq->card;
+	struct mmc_host *host = card->host;
 	enum mmc_issue_type issue_type;
 	enum mmc_issued issued;
-	bool get_card;
+	bool get_card, cqe_retune_ok;
 	int ret;
 
 	if (mmc_card_removed(mq->card)) {
@@ -233,7 +358,19 @@  static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	spin_lock_irq(q->queue_lock);
 
+	if (mq->recovery_needed) {
+		spin_unlock_irq(q->queue_lock);
+		return BLK_STS_RESOURCE;
+	}
+
 	switch (issue_type) {
+	case MMC_ISSUE_DCMD:
+		if (mmc_cqe_dcmd_busy(mq)) {
+			mq->cqe_busy |= MMC_CQE_DCMD_BUSY;
+			spin_unlock_irq(q->queue_lock);
+			return BLK_STS_RESOURCE;
+		}
+		break;
 	case MMC_ISSUE_ASYNC:
 		break;
 	default:
@@ -247,6 +384,7 @@  static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	mq->in_flight[issue_type] += 1;
 	get_card = mmc_tot_in_flight(mq) == 1;
+	cqe_retune_ok = mmc_cqe_qcnt(mq) == 1;
 
 	spin_unlock_irq(q->queue_lock);
 
@@ -258,6 +396,11 @@  static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (get_card)
 		mmc_get_card(card, &mq->ctx);
 
+	if (mq->use_cqe) {
+		host->retune_now = host->need_retune && cqe_retune_ok &&
+				   !host->hold_retune;
+	}
+
 	blk_mq_start_request(req);
 
 	issued = mmc_blk_mq_issue_rq(mq, req);
@@ -319,6 +462,7 @@  static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
 	/* Initialize thread_sem even if it is not used */
 	sema_init(&mq->thread_sem, 1);
 
+	INIT_WORK(&mq->recovery_work, mmc_mq_recovery_handler);
 	INIT_WORK(&mq->complete_work, mmc_blk_mq_complete_work);
 
 	mutex_init(&mq->complete_lock);
@@ -367,10 +511,14 @@  static int mmc_mq_init_queue(struct mmc_queue *mq, int q_depth,
 static int mmc_mq_init(struct mmc_queue *mq, struct mmc_card *card,
 			 spinlock_t *lock)
 {
+	struct mmc_host *host = card->host;
 	int q_depth;
 	int ret;
 
-	q_depth = MMC_QUEUE_DEPTH;
+	if (mq->use_cqe)
+		q_depth = min_t(int, card->ext_csd.cmdq_depth, host->cqe_qdepth);
+	else
+		q_depth = MMC_QUEUE_DEPTH;
 
 	ret = mmc_mq_init_queue(mq, q_depth, &mmc_mq_ops, lock);
 	if (ret)
@@ -400,7 +548,9 @@  int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 
 	mq->card = card;
 
-	if (mmc_host_use_blk_mq(host))
+	mq->use_cqe = host->cqe_enabled;
+
+	if (mq->use_cqe || mmc_host_use_blk_mq(host))
 		return mmc_mq_init(mq, card, lock);
 
 	mq->queue = blk_alloc_queue(GFP_KERNEL);
diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h
index a5424ee100ef..f05b5a9d2f87 100644
--- a/drivers/mmc/core/queue.h
+++ b/drivers/mmc/core/queue.h
@@ -16,6 +16,7 @@  enum mmc_issued {
 
 enum mmc_issue_type {
 	MMC_ISSUE_SYNC,
+	MMC_ISSUE_DCMD,
 	MMC_ISSUE_ASYNC,
 	MMC_ISSUE_MAX,
 };
@@ -91,8 +92,15 @@  struct mmc_queue {
 	int			qcnt;
 
 	int			in_flight[MMC_ISSUE_MAX];
+	unsigned int		cqe_busy;
+#define MMC_CQE_DCMD_BUSY	BIT(0)
+#define MMC_CQE_QUEUE_FULL	BIT(1)
+	bool			use_cqe;
+	bool			recovery_needed;
+	bool			in_recovery;
 	bool			rw_wait;
 	bool			waiting;
+	struct work_struct	recovery_work;
 	wait_queue_head_t	wait;
 	struct request		*complete_req;
 	struct mutex		complete_lock;
@@ -107,11 +115,21 @@  extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
 extern unsigned int mmc_queue_map_sg(struct mmc_queue *,
 				     struct mmc_queue_req *);
 
+void mmc_cqe_check_busy(struct mmc_queue *mq);
+void mmc_cqe_recovery_notifier(struct mmc_request *mrq);
+
 enum mmc_issue_type mmc_issue_type(struct mmc_queue *mq, struct request *req);
 
 static inline int mmc_tot_in_flight(struct mmc_queue *mq)
 {
 	return mq->in_flight[MMC_ISSUE_SYNC] +
+	       mq->in_flight[MMC_ISSUE_DCMD] +
+	       mq->in_flight[MMC_ISSUE_ASYNC];
+}
+
+static inline int mmc_cqe_qcnt(struct mmc_queue *mq)
+{
+	return mq->in_flight[MMC_ISSUE_DCMD] +
 	       mq->in_flight[MMC_ISSUE_ASYNC];
 }