diff mbox

[RFC,07/10] mmc: cmdq: support for command queue enabled host

Message ID 1465995674-15816-8-git-send-email-riteshh@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Ritesh Harjani June 15, 2016, 1:01 p.m. UTC
From: Venkat Gopalakrishnan <venkatg@codeaurora.org>

This patch adds CMDQ support for command-queue compatible
hosts.

Command queue is added in eMMC-5.1 specification. This
enables the controller to process upto 32 requests at
a time.

Signed-off-by: Asutosh Das <asutoshd@codeaurora.org>
Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Signed-off-by: Konstantin Dorfman <kdorfman@codeaurora.org>
Signed-off-by: Venkat Gopalakrishnan <venkatg@codeaurora.org>
[subhashj@codeaurora.org: fixed trivial merge conflicts]
Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
[riteshh@codeaurora.org: fixed merge conflicts]
Signed-off-by: Ritesh Harjani <riteshh@codeaurora.org>
---
 drivers/mmc/host/Kconfig    |  13 +
 drivers/mmc/host/Makefile   |   1 +
 drivers/mmc/host/cmdq_hci.c | 656 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/mmc/host/cmdq_hci.h | 211 ++++++++++++++
 include/linux/mmc/host.h    |  12 +
 5 files changed, 893 insertions(+)
 create mode 100644 drivers/mmc/host/cmdq_hci.c
 create mode 100644 drivers/mmc/host/cmdq_hci.h

Comments

Shawn Lin June 17, 2016, 8:45 a.m. UTC | #1
在 2016/6/15 21:01, Ritesh Harjani 写道:
> From: Venkat Gopalakrishnan <venkatg@codeaurora.org>
>
> This patch adds CMDQ support for command-queue compatible
> hosts.
>
> Command queue is added in eMMC-5.1 specification. This
> enables the controller to process upto 32 requests at
> a time.
>
> Signed-off-by: Asutosh Das <asutoshd@codeaurora.org>
> Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
> Signed-off-by: Konstantin Dorfman <kdorfman@codeaurora.org>
> Signed-off-by: Venkat Gopalakrishnan <venkatg@codeaurora.org>
> [subhashj@codeaurora.org: fixed trivial merge conflicts]
> Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
> [riteshh@codeaurora.org: fixed merge conflicts]
> Signed-off-by: Ritesh Harjani <riteshh@codeaurora.org>
> ---
>  drivers/mmc/host/Kconfig    |  13 +
>  drivers/mmc/host/Makefile   |   1 +
>  drivers/mmc/host/cmdq_hci.c | 656 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/mmc/host/cmdq_hci.h | 211 ++++++++++++++
>  include/linux/mmc/host.h    |  12 +
>  5 files changed, 893 insertions(+)
>  create mode 100644 drivers/mmc/host/cmdq_hci.c
>  create mode 100644 drivers/mmc/host/cmdq_hci.h
>
> diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
> index e657af0..f1b5433 100644
> --- a/drivers/mmc/host/Kconfig
> +++ b/drivers/mmc/host/Kconfig
> @@ -774,6 +774,19 @@ config MMC_SUNXI
>  	  This selects support for the SD/MMC Host Controller on
>  	  Allwinner sunxi SoCs.
>
> +config MMC_CQ_HCI
> +	tristate "Command Queue Support"
> +	depends on HAS_DMA
> +	help
> +	  This selects the Command Queue Host Controller Interface (CQHCI)
> +	  support present in host controllers of Qualcomm Technologies, Inc
> +	  amongst others.
> +	  This controller supports eMMC devices with command queue support.
> +
> +	  If you have a controller with this interface, say Y or M here.
> +
> +	  If unsure, say N.
> +

well, my 5.1 controller support HW cmdq, but I think it should be better
to enable it by defualt if finding MMC_CAP2_CMD_QUEUE.

>  config MMC_TOSHIBA_PCI
>  	tristate "Toshiba Type A SD/MMC Card Interface Driver"
>  	depends on PCI
> diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
> index af918d2..3715f73 100644
> --- a/drivers/mmc/host/Makefile
> +++ b/drivers/mmc/host/Makefile
> @@ -76,6 +76,7 @@ obj-$(CONFIG_MMC_SDHCI_IPROC)		+= sdhci-iproc.o
>  obj-$(CONFIG_MMC_SDHCI_MSM)		+= sdhci-msm.o
>  obj-$(CONFIG_MMC_SDHCI_ST)		+= sdhci-st.o
>  obj-$(CONFIG_MMC_SDHCI_MICROCHIP_PIC32)	+= sdhci-pic32.o
> +obj-$(CONFIG_MMC_CQ_HCI)		+= cmdq_hci.o
>
>  ifeq ($(CONFIG_CB710_DEBUG),y)
>  	CFLAGS-cb710-mmc	+= -DDEBUG
> diff --git a/drivers/mmc/host/cmdq_hci.c b/drivers/mmc/host/cmdq_hci.c
> new file mode 100644
> index 0000000..68c8e03
> --- /dev/null
> +++ b/drivers/mmc/host/cmdq_hci.c
> @@ -0,0 +1,656 @@
> +/* Copyright (c) 2015, The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/delay.h>
> +#include <linux/highmem.h>
> +#include <linux/io.h>
> +#include <linux/module.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/slab.h>
> +#include <linux/scatterlist.h>
> +#include <linux/platform_device.h>
> +#include <linux/blkdev.h>
> +
> +#include <linux/mmc/mmc.h>
> +#include <linux/mmc/host.h>
> +#include <linux/mmc/card.h>
> +
> +#include "cmdq_hci.h"
> +
> +#define DCMD_SLOT 31
> +#define NUM_SLOTS 32
> +
> +static inline u8 *get_desc(struct cmdq_host *cq_host, u8 tag)
> +{
> +	return cq_host->desc_base + (tag * cq_host->slot_sz);
> +}
> +
> +static inline u8 *get_link_desc(struct cmdq_host *cq_host, u8 tag)
> +{
> +	u8 *desc = get_desc(cq_host, tag);
> +
> +	return desc + cq_host->task_desc_len;
> +}
> +
> +static inline dma_addr_t get_trans_desc_dma(struct cmdq_host *cq_host, u8 tag)
> +{
> +	return cq_host->trans_desc_dma_base +
> +		(cq_host->mmc->max_segs * tag *
> +		 cq_host->trans_desc_len);
> +}
> +
> +static inline u8 *get_trans_desc(struct cmdq_host *cq_host, u8 tag)
> +{
> +	return cq_host->trans_desc_base +
> +		(cq_host->trans_desc_len * cq_host->mmc->max_segs * tag);
> +}
> +
> +static void setup_trans_desc(struct cmdq_host *cq_host, u8 tag)
> +{
> +	u8 *link_temp;
> +	dma_addr_t trans_temp;
> +
> +	link_temp = get_link_desc(cq_host, tag);
> +	trans_temp = get_trans_desc_dma(cq_host, tag);
> +
> +	memset(link_temp, 0, cq_host->link_desc_len);
> +	if (cq_host->link_desc_len > 8)
> +		*(link_temp + 8) = 0;
> +
> +	if (tag == DCMD_SLOT) {
> +		*link_temp = VALID(0) | ACT(0) | END(1);
> +		return;
> +	}
> +
> +	*link_temp = VALID(1) | ACT(0x6) | END(0);
> +
> +	if (cq_host->dma64) {
> +		__le64 *data_addr = (__le64 __force *)(link_temp + 4);
> +		data_addr[0] = cpu_to_le64(trans_temp);
> +	} else {
> +		__le32 *data_addr = (__le32 __force *)(link_temp + 4);
> +		data_addr[0] = cpu_to_le32(trans_temp);
> +	}
> +}
> +
> +static void cmdq_clear_set_irqs(struct cmdq_host *cq_host, u32 clear, u32 set)
> +{
> +	u32 ier;
> +
> +	ier = cmdq_readl(cq_host, CQISTE);
> +	ier &= ~clear;
> +	ier |= set;
> +	cmdq_writel(cq_host, ier, CQISTE);
> +	cmdq_writel(cq_host, ier, CQISGE);
> +	/* ensure the writes are done */
> +	mb();
> +}
> +
> +
> +#define DRV_NAME "cmdq-host"
> +
> +static void cmdq_dump_debug_ram(struct cmdq_host *cq_host)
> +{
> +	int i = 0;
> +
> +	pr_err("---- Debug RAM dump ----\n");
> +	pr_err(DRV_NAME ": Debug RAM wrap-around: 0x%08x | Debug RAM overlap: 0x%08x\n",
> +	       cmdq_readl(cq_host, CQ_CMD_DBG_RAM_WA),
> +	       cmdq_readl(cq_host, CQ_CMD_DBG_RAM_OL));]

well, it beyonds the scope of standard cmdq engine.

> +
> +	while (i < 16) {
> +		pr_err(DRV_NAME ": Debug RAM dump [%d]: 0x%08x\n", i,
> +		       cmdq_readl(cq_host, CQ_CMD_DBG_RAM + (0x4 * i)));
> +		i++;
> +	}
> +	pr_err("-------------------------\n");
> +}
> +
> +static void cmdq_dumpregs(struct cmdq_host *cq_host)
> +{
> +	struct mmc_host *mmc = cq_host->mmc;
> +
> +	pr_info(DRV_NAME ": ========== REGISTER DUMP (%s)==========\n",
> +		mmc_hostname(mmc));
> +
> +	pr_info(DRV_NAME ": Caps: 0x%08x	  | Version:  0x%08x\n",
> +		cmdq_readl(cq_host, CQCAP),
> +		cmdq_readl(cq_host, CQVER));
> +	pr_info(DRV_NAME ": Queing config: 0x%08x | Queue Ctrl:  0x%08x\n",
> +		cmdq_readl(cq_host, CQCFG),
> +		cmdq_readl(cq_host, CQCTL));
> +	pr_info(DRV_NAME ": Int stat: 0x%08x	  | Int enab:  0x%08x\n",
> +		cmdq_readl(cq_host, CQIS),
> +		cmdq_readl(cq_host, CQISTE));
> +	pr_info(DRV_NAME ": Int sig: 0x%08x	  | Int Coal:  0x%08x\n",
> +		cmdq_readl(cq_host, CQISGE),
> +		cmdq_readl(cq_host, CQIC));
> +	pr_info(DRV_NAME ": TDL base: 0x%08x	  | TDL up32:  0x%08x\n",
> +		cmdq_readl(cq_host, CQTDLBA),
> +		cmdq_readl(cq_host, CQTDLBAU));
> +	pr_info(DRV_NAME ": Doorbell: 0x%08x	  | Comp Notif:  0x%08x\n",
> +		cmdq_readl(cq_host, CQTDBR),
> +		cmdq_readl(cq_host, CQTCN));
> +	pr_info(DRV_NAME ": Dev queue: 0x%08x	  | Dev Pend:  0x%08x\n",
> +		cmdq_readl(cq_host, CQDQS),
> +		cmdq_readl(cq_host, CQDPT));
> +	pr_info(DRV_NAME ": Task clr: 0x%08x	  | Send stat 1:  0x%08x\n",
> +		cmdq_readl(cq_host, CQTCLR),
> +		cmdq_readl(cq_host, CQSSC1));
> +	pr_info(DRV_NAME ": Send stat 2: 0x%08x	  | DCMD resp:  0x%08x\n",
> +		cmdq_readl(cq_host, CQSSC2),
> +		cmdq_readl(cq_host, CQCRDCT));
> +	pr_info(DRV_NAME ": Resp err mask: 0x%08x | Task err:  0x%08x\n",
> +		cmdq_readl(cq_host, CQRMEM),
> +		cmdq_readl(cq_host, CQTERRI));
> +	pr_info(DRV_NAME ": Resp idx 0x%08x	  | Resp arg:  0x%08x\n",
> +		cmdq_readl(cq_host, CQCRI),
> +		cmdq_readl(cq_host, CQCRA));
> +	pr_info(DRV_NAME ": ===========================================\n");
> +
> +	cmdq_dump_debug_ram(cq_host);
> +	if (cq_host->ops->dump_vendor_regs)
> +		cq_host->ops->dump_vendor_regs(mmc);
> +}
> +
> +/**
> + * The allocated descriptor table for task, link & transfer descritors
> + * looks like:
> + * |----------|
> + * |task desc |  |->|----------|
> + * |----------|  |  |trans desc|
> + * |link desc-|->|  |----------|
> + * |----------|          .
> + *      .                .
> + *  no. of slots      max-segs
> + *      .           |----------|
> + * |----------|
> + * The idea here is to create the [task+trans] table and mark & point the
> + * link desc to the transfer desc table on a per slot basis.
> + */
> +static int cmdq_host_alloc_tdl(struct cmdq_host *cq_host)
> +{
> +
> +	size_t desc_size;
> +	size_t data_size;
> +	int i = 0;
> +
> +	/* task descriptor can be 64/128 bit irrespective of arch */
> +	if (cq_host->caps & CMDQ_TASK_DESC_SZ_128) {
> +		cmdq_writel(cq_host, cmdq_readl(cq_host, CQCFG) |
> +			       CQ_TASK_DESC_SZ, CQCFG);
> +		cq_host->task_desc_len = 16;
> +	} else {
> +		cq_host->task_desc_len = 8;
> +	}
> +
> +	/*
> +	 * 96 bits length of transfer desc instead of 128 bits which means
> +	 * ADMA would expect next valid descriptor at the 96th bit
> +	 * or 128th bit
> +	 */
> +	if (cq_host->dma64) {
> +		if (cq_host->quirks & CMDQ_QUIRK_SHORT_TXFR_DESC_SZ)
> +			cq_host->trans_desc_len = 12;
> +		else
> +			cq_host->trans_desc_len = 16;
> +		cq_host->link_desc_len = 16;
> +	} else {
> +		cq_host->trans_desc_len = 8;
> +		cq_host->link_desc_len = 8;
> +	}
> +
> +	/* total size of a slot: 1 task & 1 transfer (link) */
> +	cq_host->slot_sz = cq_host->task_desc_len + cq_host->link_desc_len;
> +
> +	desc_size = cq_host->slot_sz * cq_host->num_slots;
> +
> +	data_size = cq_host->trans_desc_len * cq_host->mmc->max_segs *
> +		(cq_host->num_slots - 1);
> +
> +	pr_info("%s: desc_size: %d data_sz: %d slot-sz: %d\n", __func__,
> +		(int)desc_size, (int)data_size, cq_host->slot_sz);
> +
> +	/*
> +	 * allocate a dma-mapped chunk of memory for the descriptors
> +	 * allocate a dma-mapped chunk of memory for link descriptors
> +	 * setup each link-desc memory offset per slot-number to
> +	 * the descriptor table.
> +	 */
> +	cq_host->desc_base = dmam_alloc_coherent(mmc_dev(cq_host->mmc),
> +						 desc_size,
> +						 &cq_host->desc_dma_base,
> +						 GFP_KERNEL);
> +	cq_host->trans_desc_base = dmam_alloc_coherent(mmc_dev(cq_host->mmc),
> +					      data_size,
> +					      &cq_host->trans_desc_dma_base,
> +					      GFP_KERNEL);
> +	if (!cq_host->desc_base || !cq_host->trans_desc_base)
> +		return -ENOMEM;
> +
> +	pr_info("desc-base: 0x%p trans-base: 0x%p\n desc_dma 0x%llx trans_dma: 0x%llx\n",
> +		 cq_host->desc_base, cq_host->trans_desc_base,
> +		(unsigned long long)cq_host->desc_dma_base,
> +		(unsigned long long) cq_host->trans_desc_dma_base);
> +
> +	for (; i < (cq_host->num_slots); i++)
> +		setup_trans_desc(cq_host, i);
> +
> +	return 0;
> +}
> +
> +static int cmdq_enable(struct mmc_host *mmc)
> +{
> +	int err = 0;
> +	u32 cqcfg;
> +	bool dcmd_enable;
> +	struct cmdq_host *cq_host = mmc_cmdq_private(mmc);
> +
> +	if (!cq_host || !mmc->card || !mmc_card_cmdq(mmc->card)) {
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (cq_host->enabled)
> +		goto out;
> +
> +	cqcfg = cmdq_readl(cq_host, CQCFG);
> +	if (cqcfg & 0x1) {
> +		pr_info("%s: %s: cq_host is already enabled\n",
> +				mmc_hostname(mmc), __func__);
> +		WARN_ON(1);
> +		goto out;
> +	}
> +
> +	if (cq_host->quirks & CMDQ_QUIRK_NO_DCMD)

Why you need it, DCMD should be certainly suppoted.
Have you find any vendor's CQE that doesn't support
it when fetching TD in slot#31?

> +		dcmd_enable = false;
> +	else
> +		dcmd_enable = true;
> +
> +	cqcfg = ((cq_host->caps & CMDQ_TASK_DESC_SZ_128 ? CQ_TASK_DESC_SZ : 0) |
> +			(dcmd_enable ? CQ_DCMD : 0));
> +
> +	cmdq_writel(cq_host, cqcfg, CQCFG);
> +	/* enable CQ_HOST */
> +	cmdq_writel(cq_host, cmdq_readl(cq_host, CQCFG) | CQ_ENABLE,
> +		    CQCFG);
> +
> +	if (!cq_host->desc_base ||
> +			!cq_host->trans_desc_base) {
> +		err = cmdq_host_alloc_tdl(cq_host);
> +		if (err)
> +			goto out;
> +		cmdq_writel(cq_host, lower_32_bits(cq_host->desc_dma_base),
> +				CQTDLBA);
> +		cmdq_writel(cq_host, upper_32_bits(cq_host->desc_dma_base),
> +				CQTDLBAU);
> +		cmdq_dumpregs(cq_host);
> +	}
> +
> +	/*
> +	 * disable all vendor interrupts
> +	 * enable CMDQ interrupts
> +	 * enable the vendor error interrupts
> +	 */
> +	if (cq_host->ops->clear_set_irqs)
> +		cq_host->ops->clear_set_irqs(mmc, true);
> +
> +	cmdq_clear_set_irqs(cq_host, 0x0, CQ_INT_ALL);
> +
> +	/* cq_host would use this rca to address the card */
> +	cmdq_writel(cq_host, mmc->card->rca, CQSSC2);
> +
> +	/* send QSR at lesser intervals than the default */
> +	cmdq_writel(cq_host, cmdq_readl(cq_host, CQSSC1) | SEND_QSR_INTERVAL,
> +				CQSSC1);
> +
> +	/* ensure the writes are done before enabling CQE */
> +	mb();
> +
> +	cq_host->enabled = true;
> +
> +	if (cq_host->ops->set_block_size)
> +		cq_host->ops->set_block_size(cq_host->mmc);
> +
> +	if (cq_host->ops->set_data_timeout)
> +		cq_host->ops->set_data_timeout(mmc, 0xf);
> +
> +	if (cq_host->ops->clear_set_dumpregs)
> +		cq_host->ops->clear_set_dumpregs(mmc, 1);
> +
> +out:
> +	return err;
> +}
> +
> +static void cmdq_disable(struct mmc_host *mmc, bool soft)
> +{
> +	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
> +
> +	if (soft) {
> +		cmdq_writel(cq_host, cmdq_readl(
> +				    cq_host, CQCFG) & ~(CQ_ENABLE),
> +			    CQCFG);
> +	}
> +

No need { }

> +	cq_host->enabled = false;
> +}
> +
> +static void cmdq_prep_task_desc(struct mmc_request *mrq,
> +					u64 *data, bool intr, bool qbr)
> +{
> +	struct mmc_cmdq_req *cmdq_req = mrq->cmdq_req;
> +	u32 req_flags = cmdq_req->cmdq_req_flags;
> +
> +	pr_debug("%s: %s: data-tag: 0x%08x - dir: %d - prio: %d - cnt: 0x%08x -	addr: 0x%llx\n",
> +		 mmc_hostname(mrq->host), __func__,
> +		 !!(req_flags & DAT_TAG), !!(req_flags & DIR),
> +		 !!(req_flags & PRIO), cmdq_req->data.blocks,
> +		 (u64)mrq->cmdq_req->blk_addr);
> +
> +	*data = VALID(1) |
> +		END(1) |
> +		INT(intr) |
> +		ACT(0x5) |
> +		FORCED_PROG(!!(req_flags & FORCED_PRG)) |
> +		CONTEXT(mrq->cmdq_req->ctx_id) |
> +		DATA_TAG(!!(req_flags & DAT_TAG)) |
> +		DATA_DIR(!!(req_flags & DIR)) |
> +		PRIORITY(!!(req_flags & PRIO)) |
> +		QBAR(qbr) |
> +		REL_WRITE(!!(req_flags & REL_WR)) |
> +		BLK_COUNT(mrq->cmdq_req->data.blocks) |
> +		BLK_ADDR((u64)mrq->cmdq_req->blk_addr);
> +}
> +
> +static int cmdq_dma_map(struct mmc_host *host, struct mmc_request *mrq)
> +{
> +	int sg_count;
> +	struct mmc_data *data = mrq->data;
> +
> +	if (!data)
> +		return -EINVAL;
> +
> +	sg_count = dma_map_sg(mmc_dev(host), data->sg,
> +			      data->sg_len,
> +			      (data->flags & MMC_DATA_WRITE) ?
> +			      DMA_TO_DEVICE : DMA_FROM_DEVICE);
> +	if (!sg_count) {
> +		pr_err("%s: sg-len: %d\n", __func__, data->sg_len);
> +		return -ENOMEM;
> +	}
> +
> +	return sg_count;
> +}
> +
> +static void cmdq_set_tran_desc(u8 *desc,
> +				 dma_addr_t addr, int len, bool end)
> +{
> +	__le64 *dataddr = (__le64 __force *)(desc + 4);
> +	__le32 *attr = (__le32 __force *)desc;
> +
> +	*attr = (VALID(1) |
> +		 END(end ? 1 : 0) |
> +		 INT(0) |
> +		 ACT(0x4) |
> +		 DAT_LENGTH(len));
> +
> +	dataddr[0] = cpu_to_le64(addr);
> +}
> +
> +static int cmdq_prep_tran_desc(struct mmc_request *mrq,
> +			       struct cmdq_host *cq_host, int tag)
> +{
> +	struct mmc_data *data = mrq->data;
> +	int i, sg_count, len;
> +	bool end = false;
> +	dma_addr_t addr;
> +	u8 *desc;
> +	struct scatterlist *sg;
> +
> +	sg_count = cmdq_dma_map(mrq->host, mrq);
> +	if (sg_count < 0) {
> +		pr_err("%s: %s: unable to map sg lists, %d\n",
> +				mmc_hostname(mrq->host), __func__, sg_count);
> +		return sg_count;
> +	}
> +
> +	desc = get_trans_desc(cq_host, tag);
> +	memset(desc, 0, cq_host->trans_desc_len * cq_host->mmc->max_segs);
> +
> +	for_each_sg(data->sg, sg, sg_count, i) {
> +		addr = sg_dma_address(sg);
> +		len = sg_dma_len(sg);
> +
> +		if ((i+1) == sg_count)
> +			end = true;
> +		cmdq_set_tran_desc(desc, addr, len, end);
> +		desc += cq_host->trans_desc_len;
> +	}
> +
> +	pr_debug("%s: req: 0x%p tag: %d calc_trans_des: 0x%p sg-cnt: %d\n",
> +		__func__, mrq->req, tag, desc, sg_count);
> +
> +	return 0;
> +}
> +
> +static void cmdq_prep_dcmd_desc(struct mmc_host *mmc,
> +				   struct mmc_request *mrq)
> +{
> +	u64 *task_desc = NULL;
> +	u64 data = 0;
> +	u8 resp_type;
> +	u8 *desc;
> +	__le64 *dataddr;
> +	struct cmdq_host *cq_host = mmc_cmdq_private(mmc);
> +	u8 timing;
> +
> +	if (!(mrq->cmd->flags & MMC_RSP_PRESENT)) {
> +		resp_type = 0x0;
> +		timing = 0x1;
> +	} else {
> +		if (mrq->cmd->flags & MMC_RSP_R1B) {
> +			resp_type = 0x3;
> +			timing = 0x0;
> +		} else {
> +			resp_type = 0x2;
> +			timing = 0x1;
> +		}
> +	}
> +
> +	task_desc = (__le64 __force *)get_desc(cq_host, cq_host->dcmd_slot);
> +	memset(task_desc, 0, cq_host->task_desc_len);
> +	data |= (VALID(1) |
> +		 END(1) |
> +		 INT(1) |
> +		 QBAR(1) |
> +		 ACT(0x5) |
> +		 CMD_INDEX(mrq->cmd->opcode) |
> +		 CMD_TIMING(timing) | RESP_TYPE(resp_type));
> +	*task_desc |= data;
> +	desc = (u8 *)task_desc;
> +	pr_debug("cmdq: dcmd: cmd: %d timing: %d resp: %d\n",
> +		mrq->cmd->opcode, timing, resp_type);
> +	dataddr = (__le64 __force *)(desc + 4);
> +	dataddr[0] = cpu_to_le64((u64)mrq->cmd->arg);
> +
> +}
> +
> +static int cmdq_request(struct mmc_host *mmc, struct mmc_request *mrq)
> +{
> +	int err;
> +	u64 data = 0;
> +	u64 *task_desc = NULL;
> +	u32 tag = mrq->cmdq_req->tag;
> +	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
> +
> +	if (!cq_host->enabled) {
> +		pr_err("%s: CMDQ host not enabled yet !!!\n",
> +		       mmc_hostname(mmc));
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (mrq->cmdq_req->cmdq_req_flags & DCMD) {
> +		cmdq_prep_dcmd_desc(mmc, mrq);
> +		cq_host->mrq_slot[DCMD_SLOT] = mrq;
> +		cmdq_writel(cq_host, 1 << DCMD_SLOT, CQTDBR);
> +		return 0;
> +	}
> +
> +	task_desc = (__le64 __force *)get_desc(cq_host, tag);
> +
> +	cmdq_prep_task_desc(mrq, &data, 1,
> +			    (mrq->cmdq_req->cmdq_req_flags & QBR));
> +	*task_desc = cpu_to_le64(data);
> +
> +	err = cmdq_prep_tran_desc(mrq, cq_host, tag);
> +	if (err) {
> +		pr_err("%s: %s: failed to setup tx desc: %d\n",
> +		       mmc_hostname(mmc), __func__, err);
> +		return err;
> +	}
> +
> +	BUG_ON(cmdq_readl(cq_host, CQTDBR) & (1 << tag));
> +
> +	cq_host->mrq_slot[tag] = mrq;
> +	if (cq_host->ops->set_tranfer_params)
> +		cq_host->ops->set_tranfer_params(mmc);
> +
> +	cmdq_writel(cq_host, 1 << tag, CQTDBR);
> +
> +out:
> +	return err;
> +}
> +
> +static void cmdq_finish_data(struct mmc_host *mmc, unsigned int tag)
> +{
> +	struct mmc_request *mrq;
> +	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
> +
> +	mrq = cq_host->mrq_slot[tag];
> +	mrq->done(mrq);
> +}
> +
> +irqreturn_t cmdq_irq(struct mmc_host *mmc, u32 intmask)
> +{
> +	u32 status;
> +	unsigned long tag = 0, comp_status;
> +	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
> +
> +	status = cmdq_readl(cq_host, CQIS);
> +	cmdq_writel(cq_host, status, CQIS);
> +

I don't think it's good to ignore the TERR here if finding
a invalid task desc..


> +	if (status & CQIS_TCC) {
> +		/* read QCTCN and complete the request */
> +		comp_status = cmdq_readl(cq_host, CQTCN);
> +		if (!comp_status)
> +			goto out;
> +
> +		for_each_set_bit(tag, &comp_status, cq_host->num_slots) {
> +			/* complete the corresponding mrq */
> +			pr_debug("%s: completing tag -> %lu\n",
> +				 mmc_hostname(mmc), tag);
> +			cmdq_finish_data(mmc, tag);
> +		}
> +		cmdq_writel(cq_host, comp_status, CQTCN);
> +	}
> +
> +	if (status & CQIS_RED) {
> +		/* task response has an error */
> +		pr_err("%s: RED error %d !!!\n", mmc_hostname(mmc), status);
> +		cmdq_dumpregs(cq_host);
> +	}
> +
> +out:
> +	return IRQ_HANDLED;
> +}
> +EXPORT_SYMBOL(cmdq_irq);
> +
> +static void cmdq_post_req(struct mmc_host *host, struct mmc_request *mrq,
> +			  int err)
> +{
> +	struct mmc_data *data = mrq->data;
> +
> +	if (data) {
> +		data->error = err;
> +		dma_unmap_sg(mmc_dev(host), data->sg, data->sg_len,
> +			     (data->flags & MMC_DATA_READ) ?
> +			     DMA_FROM_DEVICE : DMA_TO_DEVICE);
> +		if (err)
> +			data->bytes_xfered = 0;
> +		else
> +			data->bytes_xfered = blk_rq_bytes(mrq->req);
> +	}
> +}
> +
> +static const struct mmc_cmdq_host_ops cmdq_host_ops = {
> +	.enable = cmdq_enable,
> +	.disable = cmdq_disable,
> +	.request = cmdq_request,
> +	.post_req = cmdq_post_req,
> +};
> +
> +struct cmdq_host *cmdq_pltfm_init(struct platform_device *pdev)
> +{
> +	struct cmdq_host *cq_host;
> +	struct resource *cmdq_memres = NULL;
> +
> +	/* check and setup CMDQ interface */
> +	cmdq_memres = platform_get_resource_byname(pdev, IORESOURCE_MEM,
> +						   "cmdq_mem");

My cmdq engine is appended after the mmio of sdhci, so may I map the
whole when probing sdhci and give the offset of the CMDQ engine to hci?

> +	if (!cmdq_memres) {
> +		dev_dbg(&pdev->dev, "CMDQ not supported\n");
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	cq_host = kzalloc(sizeof(*cq_host), GFP_KERNEL);
> +	if (!cq_host) {
> +		dev_err(&pdev->dev, "failed to allocate memory for CMDQ\n");
> +		return ERR_PTR(-ENOMEM);
> +	}
> +	cq_host->mmio = devm_ioremap(&pdev->dev,
> +				     cmdq_memres->start,
> +				     resource_size(cmdq_memres));
> +	if (!cq_host->mmio) {
> +		dev_err(&pdev->dev, "failed to remap cmdq regs\n");
> +		kfree(cq_host);
> +		return ERR_PTR(-EBUSY);
> +	}
> +	dev_dbg(&pdev->dev, "CMDQ ioremap: done\n");
> +
> +	return cq_host;
> +}
> +EXPORT_SYMBOL(cmdq_pltfm_init);
> +
> +int cmdq_init(struct cmdq_host *cq_host, struct mmc_host *mmc,
> +	      bool dma64)
> +{
> +	int err = 0;
> +
> +	cq_host->dma64 = dma64;
> +	cq_host->mmc = mmc;
> +	cq_host->mmc->cmdq_private = cq_host;
> +
> +	cq_host->num_slots = NUM_SLOTS;
> +	cq_host->dcmd_slot = DCMD_SLOT;
> +
> +	mmc->cmdq_ops = &cmdq_host_ops;
> +
> +	cq_host->mrq_slot = kzalloc(sizeof(cq_host->mrq_slot) *
> +				    cq_host->num_slots, GFP_KERNEL);
> +	if (!cq_host->mrq_slot)
> +		return -ENOMEM;
> +
> +	init_completion(&cq_host->halt_comp);
> +	return err;
> +}
> +EXPORT_SYMBOL(cmdq_init);
> diff --git a/drivers/mmc/host/cmdq_hci.h b/drivers/mmc/host/cmdq_hci.h
> new file mode 100644
> index 0000000..e7f5a15
> --- /dev/null
> +++ b/drivers/mmc/host/cmdq_hci.h
> @@ -0,0 +1,211 @@
> +/* Copyright (c) 2015, The Linux Foundation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +#ifndef LINUX_MMC_CQ_HCI_H
> +#define LINUX_MMC_CQ_HCI_H
> +#include <linux/mmc/core.h>
> +
> +/* registers */
> +/* version */
> +#define CQVER		0x00
> +/* capabilities */
> +#define CQCAP		0x04
> +/* configuration */
> +#define CQCFG		0x08
> +#define CQ_DCMD		0x00001000
> +#define CQ_TASK_DESC_SZ 0x00000100
> +#define CQ_ENABLE	0x00000001
> +
> +/* control */
> +#define CQCTL		0x0C
> +#define CLEAR_ALL_TASKS 0x00000100
> +#define HALT		0x00000001
> +
> +/* interrupt status */
> +#define CQIS		0x10
> +#define CQIS_HAC	(1 << 0)
> +#define CQIS_TCC	(1 << 1)
> +#define CQIS_RED	(1 << 2)
> +#define CQIS_TCL	(1 << 3)
> +

okay, I check all these definitions and it seems
ok but I think it's better to use BIT(x) instead of
(1 << x). And the space and tab is mixed here.

> +/* interrupt status enable */
> +#define CQISTE		0x14
> +
> +/* interrupt signal enable */
> +#define CQISGE		0x18
> +
> +/* interrupt coalescing */
> +#define CQIC		0x1C
> +#define CQIC_ENABLE	(1 << 31)
> +#define CQIC_RESET	(1 << 16)
> +#define CQIC_ICCTHWEN	(1 << 15)
> +#define CQIC_ICCTH(x)	((x & 0x1F) << 8)
> +#define CQIC_ICTOVALWEN (1 << 7)
> +#define CQIC_ICTOVAL(x) (x & 0x7F)
> +
> +/* task list base address */
> +#define CQTDLBA		0x20
> +
> +/* task list base address upper */
> +#define CQTDLBAU	0x24
> +
> +/* door-bell */
> +#define CQTDBR		0x28
> +
> +/* task completion notification */
> +#define CQTCN		0x2C
> +
> +/* device queue status */
> +#define CQDQS		0x30
> +
> +/* device pending tasks */
> +#define CQDPT		0x34
> +
> +/* task clear */
> +#define CQTCLR		0x38
> +
> +/* send status config 1 */
> +#define CQSSC1		0x40
> +/*
> + * Value n means CQE would send CMD13 during the transfer of data block
> + * BLOCK_CNT-n
> + */
> +#define SEND_QSR_INTERVAL 0x70000
> +
> +/* send status config 2 */
> +#define CQSSC2		0x44
> +
> +/* response for dcmd */
> +#define CQCRDCT		0x48
> +
> +/* response mode error mask */
> +#define CQRMEM		0x50
> +
> +/* task error info */
> +#define CQTERRI		0x54
> +
> +/* command response index */
> +#define CQCRI		0x58
> +
> +/* command response argument */
> +#define CQCRA		0x5C
> +
> +#define CQ_INT_ALL	0xF
> +#define CQIC_DEFAULT_ICCTH 31
> +#define CQIC_DEFAULT_ICTOVAL 1
> +
> +#define CQ_CMD_DBG_RAM	0x158
> +#define CQ_CMD_DBG_RAM_WA 0x198
> +#define CQ_CMD_DBG_RAM_OL 0x19C
> +
> +/* attribute fields */
> +#define VALID(x)	((x & 1) << 0)
> +#define END(x)		((x & 1) << 1)
> +#define INT(x)		((x & 1) << 2)
> +#define ACT(x)		((x & 0x7) << 3)
> +
> +/* data command task descriptor fields */
> +#define FORCED_PROG(x)	((x & 1) << 6)
> +#define CONTEXT(x)	((x & 0xF) << 7)
> +#define DATA_TAG(x)	((x & 1) << 11)
> +#define DATA_DIR(x)	((x & 1) << 12)
> +#define PRIORITY(x)	((x & 1) << 13)
> +#define QBAR(x)		((x & 1) << 14)
> +#define REL_WRITE(x)	((x & 1) << 15)
> +#define BLK_COUNT(x)	((x & 0xFFFF) << 16)
> +#define BLK_ADDR(x)	((x & 0xFFFFFFFF) << 32)
> +
> +/* direct command task descriptor fields */
> +#define CMD_INDEX(x)	((x & 0x3F) << 16)
> +#define CMD_TIMING(x)	((x & 1) << 22)
> +#define RESP_TYPE(x)	((x & 0x3) << 23)
> +
> +/* transfer descriptor fields */
> +#define DAT_LENGTH(x)	((x & 0xFFFF) << 16)
> +#define DAT_ADDR_LO(x)	((x & 0xFFFFFFFF) << 32)
> +#define DAT_ADDR_HI(x)	((x & 0xFFFFFFFF) << 0)
> +
> +struct cmdq_host {
> +	const struct cmdq_host_ops *ops;
> +	void __iomem *mmio;
> +	struct mmc_host *mmc;
> +
> +	/* 64 bit DMA */
> +	bool dma64;
> +	int num_slots;
> +
> +	u32 dcmd_slot;
> +	u32 caps;
> +#define CMDQ_TASK_DESC_SZ_128 0x1
> +
> +	u32 quirks;
> +#define CMDQ_QUIRK_SHORT_TXFR_DESC_SZ 0x1
> +#define CMDQ_QUIRK_NO_DCMD	0x2
> +
> +	bool enabled;
> +	bool halted;
> +	bool init_done;
> +
> +	u8 *desc_base;
> +
> +	/* total descriptor size */
> +	u8 slot_sz;
> +
> +	/* 64/128 bit depends on CQCFG */
> +	u8 task_desc_len;
> +
> +	/* 64 bit on 32-bit arch, 128 bit on 64-bit */
> +	u8 link_desc_len;
> +
> +	u8 *trans_desc_base;
> +	/* same length as transfer descriptor */
> +	u8 trans_desc_len;
> +
> +	dma_addr_t desc_dma_base;
> +	dma_addr_t trans_desc_dma_base;
> +
> +	struct completion halt_comp;
> +	struct mmc_request **mrq_slot;
> +	void *private;
> +};
> +
> +struct cmdq_host_ops {
> +	void (*set_tranfer_params)(struct mmc_host *mmc);
> +	void (*set_data_timeout)(struct mmc_host *mmc, u32 val);
> +	void (*clear_set_irqs)(struct mmc_host *mmc, bool clear);
> +	void (*set_block_size)(struct mmc_host *mmc);
> +	void (*dump_vendor_regs)(struct mmc_host *mmc);
> +	void (*write_l)(struct cmdq_host *host, u32 val, int reg);
> +	u32 (*read_l)(struct cmdq_host *host, int reg);
> +	void (*clear_set_dumpregs)(struct mmc_host *mmc, bool set);
> +};
> +
> +static inline void cmdq_writel(struct cmdq_host *host, u32 val, int reg)
> +{
> +	if (unlikely(host->ops->write_l))
> +		host->ops->write_l(host, val, reg);
> +	else
> +		writel_relaxed(val, host->mmio + reg);
> +}
> +
> +static inline u32 cmdq_readl(struct cmdq_host *host, int reg)
> +{
> +	if (unlikely(host->ops->read_l))
> +		return host->ops->read_l(host, reg);
> +	else
> +		return readl_relaxed(host->mmio + reg);
> +}
> +
> +extern irqreturn_t cmdq_irq(struct mmc_host *mmc, u32 intmask);
> +extern int cmdq_init(struct cmdq_host *cq_host, struct mmc_host *mmc,
> +		     bool dma64);
> +extern struct cmdq_host *cmdq_pltfm_init(struct platform_device *pdev);
> +#endif
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> index dfe094a..135a83e 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -420,6 +420,13 @@ struct mmc_host {
>  	u32			dsr;	/* optional driver stage (DSR) value */
>
>  	struct mmc_cmdq_context_info	cmdq_ctx;
> +	/*
> +	 * several cmdq supporting host controllers are extensions
> +	 * of legacy controllers. This variable can be used to store
> +	 * a reference to the cmdq extension of the existing host
> +	 * controller.
> +	 */
> +	void *cmdq_private;
>  	unsigned long		private[0] ____cacheline_aligned;
>  };
>
> @@ -434,6 +441,11 @@ static inline void *mmc_priv(struct mmc_host *host)
>  	return (void *)host->private;
>  }
>
> +static inline void *mmc_cmdq_private(struct mmc_host *host)
> +{
> +	return host->cmdq_private;
> +}
> +
>  #define mmc_host_is_spi(host)	((host)->caps & MMC_CAP_SPI)
>
>  #define mmc_dev(x)	((x)->parent)
>
Ritesh Harjani June 27, 2016, 6:43 a.m. UTC | #2
Hi,


On 6/17/2016 2:15 PM, Shawn Lin wrote:
> 在 2016/6/15 21:01, Ritesh Harjani 写道:
>> From: Venkat Gopalakrishnan <venkatg@codeaurora.org>
>>
>> This patch adds CMDQ support for command-queue compatible
>> hosts.
>>
>> Command queue is added in eMMC-5.1 specification. This
>> enables the controller to process upto 32 requests at
>> a time.
>>
>> Signed-off-by: Asutosh Das <asutoshd@codeaurora.org>
>> Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
>> Signed-off-by: Konstantin Dorfman <kdorfman@codeaurora.org>
>> Signed-off-by: Venkat Gopalakrishnan <venkatg@codeaurora.org>
>> [subhashj@codeaurora.org: fixed trivial merge conflicts]
>> Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
>> [riteshh@codeaurora.org: fixed merge conflicts]
>> Signed-off-by: Ritesh Harjani <riteshh@codeaurora.org>
>> ---
>>  drivers/mmc/host/Kconfig    |  13 +
>>  drivers/mmc/host/Makefile   |   1 +
>>  drivers/mmc/host/cmdq_hci.c | 656
>> ++++++++++++++++++++++++++++++++++++++++++++
>>  drivers/mmc/host/cmdq_hci.h | 211 ++++++++++++++
>>  include/linux/mmc/host.h    |  12 +
>>  5 files changed, 893 insertions(+)
>>  create mode 100644 drivers/mmc/host/cmdq_hci.c
>>  create mode 100644 drivers/mmc/host/cmdq_hci.h
>>
>> diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
>> index e657af0..f1b5433 100644
>> --- a/drivers/mmc/host/Kconfig
>> +++ b/drivers/mmc/host/Kconfig
>> @@ -774,6 +774,19 @@ config MMC_SUNXI
>>        This selects support for the SD/MMC Host Controller on
>>        Allwinner sunxi SoCs.
>>
>> +config MMC_CQ_HCI
>> +    tristate "Command Queue Support"
>> +    depends on HAS_DMA
>> +    help
>> +      This selects the Command Queue Host Controller Interface (CQHCI)
>> +      support present in host controllers of Qualcomm Technologies, Inc
>> +      amongst others.
>> +      This controller supports eMMC devices with command queue support.
>> +
>> +      If you have a controller with this interface, say Y or M here.
>> +
>> +      If unsure, say N.
>> +
>
> well, my 5.1 controller support HW cmdq, but I think it should be better
That's good to know about.

> to enable it by defualt if finding MMC_CAP2_CMD_QUEUE.
I think we can still keep this config to disable cmdq_hci from being 
compiled no?

>
>>  config MMC_TOSHIBA_PCI
>>      tristate "Toshiba Type A SD/MMC Card Interface Driver"
>>      depends on PCI
>> diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
>> index af918d2..3715f73 100644
>> --- a/drivers/mmc/host/Makefile
>> +++ b/drivers/mmc/host/Makefile
>> @@ -76,6 +76,7 @@ obj-$(CONFIG_MMC_SDHCI_IPROC)        += sdhci-iproc.o
>>  obj-$(CONFIG_MMC_SDHCI_MSM)        += sdhci-msm.o
>>  obj-$(CONFIG_MMC_SDHCI_ST)        += sdhci-st.o
>>  obj-$(CONFIG_MMC_SDHCI_MICROCHIP_PIC32)    += sdhci-pic32.o
>> +obj-$(CONFIG_MMC_CQ_HCI)        += cmdq_hci.o
>>
>>  ifeq ($(CONFIG_CB710_DEBUG),y)
>>      CFLAGS-cb710-mmc    += -DDEBUG
>> diff --git a/drivers/mmc/host/cmdq_hci.c b/drivers/mmc/host/cmdq_hci.c
>> new file mode 100644
>> index 0000000..68c8e03
>> --- /dev/null
>> +++ b/drivers/mmc/host/cmdq_hci.c
>> @@ -0,0 +1,656 @@
>> +/* Copyright (c) 2015, The Linux Foundation. All rights reserved.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 and
>> + * only version 2 as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +#include <linux/delay.h>
>> +#include <linux/highmem.h>
>> +#include <linux/io.h>
>> +#include <linux/module.h>
>> +#include <linux/dma-mapping.h>
>> +#include <linux/slab.h>
>> +#include <linux/scatterlist.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/blkdev.h>
>> +
>> +#include <linux/mmc/mmc.h>
>> +#include <linux/mmc/host.h>
>> +#include <linux/mmc/card.h>
>> +
>> +#include "cmdq_hci.h"
>> +
>> +#define DCMD_SLOT 31
>> +#define NUM_SLOTS 32
>> +
>> +static inline u8 *get_desc(struct cmdq_host *cq_host, u8 tag)
>> +{
>> +    return cq_host->desc_base + (tag * cq_host->slot_sz);
>> +}
>> +
>> +static inline u8 *get_link_desc(struct cmdq_host *cq_host, u8 tag)
>> +{
>> +    u8 *desc = get_desc(cq_host, tag);
>> +
>> +    return desc + cq_host->task_desc_len;
>> +}
>> +
>> +static inline dma_addr_t get_trans_desc_dma(struct cmdq_host
>> *cq_host, u8 tag)
>> +{
>> +    return cq_host->trans_desc_dma_base +
>> +        (cq_host->mmc->max_segs * tag *
>> +         cq_host->trans_desc_len);
>> +}
>> +
>> +static inline u8 *get_trans_desc(struct cmdq_host *cq_host, u8 tag)
>> +{
>> +    return cq_host->trans_desc_base +
>> +        (cq_host->trans_desc_len * cq_host->mmc->max_segs * tag);
>> +}
>> +
>> +static void setup_trans_desc(struct cmdq_host *cq_host, u8 tag)
>> +{
>> +    u8 *link_temp;
>> +    dma_addr_t trans_temp;
>> +
>> +    link_temp = get_link_desc(cq_host, tag);
>> +    trans_temp = get_trans_desc_dma(cq_host, tag);
>> +
>> +    memset(link_temp, 0, cq_host->link_desc_len);
>> +    if (cq_host->link_desc_len > 8)
>> +        *(link_temp + 8) = 0;
>> +
>> +    if (tag == DCMD_SLOT) {
>> +        *link_temp = VALID(0) | ACT(0) | END(1);
>> +        return;
>> +    }
>> +
>> +    *link_temp = VALID(1) | ACT(0x6) | END(0);
>> +
>> +    if (cq_host->dma64) {
>> +        __le64 *data_addr = (__le64 __force *)(link_temp + 4);
>> +        data_addr[0] = cpu_to_le64(trans_temp);
>> +    } else {
>> +        __le32 *data_addr = (__le32 __force *)(link_temp + 4);
>> +        data_addr[0] = cpu_to_le32(trans_temp);
>> +    }
>> +}
>> +
>> +static void cmdq_clear_set_irqs(struct cmdq_host *cq_host, u32 clear,
>> u32 set)
>> +{
>> +    u32 ier;
>> +
>> +    ier = cmdq_readl(cq_host, CQISTE);
>> +    ier &= ~clear;
>> +    ier |= set;
>> +    cmdq_writel(cq_host, ier, CQISTE);
>> +    cmdq_writel(cq_host, ier, CQISGE);
>> +    /* ensure the writes are done */
>> +    mb();
>> +}
>> +
>> +
>> +#define DRV_NAME "cmdq-host"
>> +
>> +static void cmdq_dump_debug_ram(struct cmdq_host *cq_host)
>> +{
>> +    int i = 0;
>> +
>> +    pr_err("---- Debug RAM dump ----\n");
>> +    pr_err(DRV_NAME ": Debug RAM wrap-around: 0x%08x | Debug RAM
>> overlap: 0x%08x\n",
>> +           cmdq_readl(cq_host, CQ_CMD_DBG_RAM_WA),
>> +           cmdq_readl(cq_host, CQ_CMD_DBG_RAM_OL));]
>
> well, it beyonds the scope of standard cmdq engine.
Done. will check this part.

>
>> +
>> +    while (i < 16) {
>> +        pr_err(DRV_NAME ": Debug RAM dump [%d]: 0x%08x\n", i,
>> +               cmdq_readl(cq_host, CQ_CMD_DBG_RAM + (0x4 * i)));
>> +        i++;
>> +    }
>> +    pr_err("-------------------------\n");
>> +}
>> +
>> +static void cmdq_dumpregs(struct cmdq_host *cq_host)
>> +{
>> +    struct mmc_host *mmc = cq_host->mmc;
>> +
>> +    pr_info(DRV_NAME ": ========== REGISTER DUMP (%s)==========\n",
>> +        mmc_hostname(mmc));
>> +
>> +    pr_info(DRV_NAME ": Caps: 0x%08x      | Version:  0x%08x\n",
>> +        cmdq_readl(cq_host, CQCAP),
>> +        cmdq_readl(cq_host, CQVER));
>> +    pr_info(DRV_NAME ": Queing config: 0x%08x | Queue Ctrl:  0x%08x\n",
>> +        cmdq_readl(cq_host, CQCFG),
>> +        cmdq_readl(cq_host, CQCTL));
>> +    pr_info(DRV_NAME ": Int stat: 0x%08x      | Int enab:  0x%08x\n",
>> +        cmdq_readl(cq_host, CQIS),
>> +        cmdq_readl(cq_host, CQISTE));
>> +    pr_info(DRV_NAME ": Int sig: 0x%08x      | Int Coal:  0x%08x\n",
>> +        cmdq_readl(cq_host, CQISGE),
>> +        cmdq_readl(cq_host, CQIC));
>> +    pr_info(DRV_NAME ": TDL base: 0x%08x      | TDL up32:  0x%08x\n",
>> +        cmdq_readl(cq_host, CQTDLBA),
>> +        cmdq_readl(cq_host, CQTDLBAU));
>> +    pr_info(DRV_NAME ": Doorbell: 0x%08x      | Comp Notif:  0x%08x\n",
>> +        cmdq_readl(cq_host, CQTDBR),
>> +        cmdq_readl(cq_host, CQTCN));
>> +    pr_info(DRV_NAME ": Dev queue: 0x%08x      | Dev Pend:  0x%08x\n",
>> +        cmdq_readl(cq_host, CQDQS),
>> +        cmdq_readl(cq_host, CQDPT));
>> +    pr_info(DRV_NAME ": Task clr: 0x%08x      | Send stat 1:  0x%08x\n",
>> +        cmdq_readl(cq_host, CQTCLR),
>> +        cmdq_readl(cq_host, CQSSC1));
>> +    pr_info(DRV_NAME ": Send stat 2: 0x%08x      | DCMD resp:
>> 0x%08x\n",
>> +        cmdq_readl(cq_host, CQSSC2),
>> +        cmdq_readl(cq_host, CQCRDCT));
>> +    pr_info(DRV_NAME ": Resp err mask: 0x%08x | Task err:  0x%08x\n",
>> +        cmdq_readl(cq_host, CQRMEM),
>> +        cmdq_readl(cq_host, CQTERRI));
>> +    pr_info(DRV_NAME ": Resp idx 0x%08x      | Resp arg:  0x%08x\n",
>> +        cmdq_readl(cq_host, CQCRI),
>> +        cmdq_readl(cq_host, CQCRA));
>> +    pr_info(DRV_NAME ": ===========================================\n");
>> +
>> +    cmdq_dump_debug_ram(cq_host);
>> +    if (cq_host->ops->dump_vendor_regs)
>> +        cq_host->ops->dump_vendor_regs(mmc);
>> +}
>> +
>> +/**
>> + * The allocated descriptor table for task, link & transfer descritors
>> + * looks like:
>> + * |----------|
>> + * |task desc |  |->|----------|
>> + * |----------|  |  |trans desc|
>> + * |link desc-|->|  |----------|
>> + * |----------|          .
>> + *      .                .
>> + *  no. of slots      max-segs
>> + *      .           |----------|
>> + * |----------|
>> + * The idea here is to create the [task+trans] table and mark & point
>> the
>> + * link desc to the transfer desc table on a per slot basis.
>> + */
>> +static int cmdq_host_alloc_tdl(struct cmdq_host *cq_host)
>> +{
>> +
>> +    size_t desc_size;
>> +    size_t data_size;
>> +    int i = 0;
>> +
>> +    /* task descriptor can be 64/128 bit irrespective of arch */
>> +    if (cq_host->caps & CMDQ_TASK_DESC_SZ_128) {
>> +        cmdq_writel(cq_host, cmdq_readl(cq_host, CQCFG) |
>> +                   CQ_TASK_DESC_SZ, CQCFG);
>> +        cq_host->task_desc_len = 16;
>> +    } else {
>> +        cq_host->task_desc_len = 8;
>> +    }
>> +
>> +    /*
>> +     * 96 bits length of transfer desc instead of 128 bits which means
>> +     * ADMA would expect next valid descriptor at the 96th bit
>> +     * or 128th bit
>> +     */
>> +    if (cq_host->dma64) {
>> +        if (cq_host->quirks & CMDQ_QUIRK_SHORT_TXFR_DESC_SZ)
>> +            cq_host->trans_desc_len = 12;
>> +        else
>> +            cq_host->trans_desc_len = 16;
>> +        cq_host->link_desc_len = 16;
>> +    } else {
>> +        cq_host->trans_desc_len = 8;
>> +        cq_host->link_desc_len = 8;
>> +    }
>> +
>> +    /* total size of a slot: 1 task & 1 transfer (link) */
>> +    cq_host->slot_sz = cq_host->task_desc_len + cq_host->link_desc_len;
>> +
>> +    desc_size = cq_host->slot_sz * cq_host->num_slots;
>> +
>> +    data_size = cq_host->trans_desc_len * cq_host->mmc->max_segs *
>> +        (cq_host->num_slots - 1);
>> +
>> +    pr_info("%s: desc_size: %d data_sz: %d slot-sz: %d\n", __func__,
>> +        (int)desc_size, (int)data_size, cq_host->slot_sz);
>> +
>> +    /*
>> +     * allocate a dma-mapped chunk of memory for the descriptors
>> +     * allocate a dma-mapped chunk of memory for link descriptors
>> +     * setup each link-desc memory offset per slot-number to
>> +     * the descriptor table.
>> +     */
>> +    cq_host->desc_base = dmam_alloc_coherent(mmc_dev(cq_host->mmc),
>> +                         desc_size,
>> +                         &cq_host->desc_dma_base,
>> +                         GFP_KERNEL);
>> +    cq_host->trans_desc_base =
>> dmam_alloc_coherent(mmc_dev(cq_host->mmc),
>> +                          data_size,
>> +                          &cq_host->trans_desc_dma_base,
>> +                          GFP_KERNEL);
>> +    if (!cq_host->desc_base || !cq_host->trans_desc_base)
>> +        return -ENOMEM;
>> +
>> +    pr_info("desc-base: 0x%p trans-base: 0x%p\n desc_dma 0x%llx
>> trans_dma: 0x%llx\n",
>> +         cq_host->desc_base, cq_host->trans_desc_base,
>> +        (unsigned long long)cq_host->desc_dma_base,
>> +        (unsigned long long) cq_host->trans_desc_dma_base);
>> +
>> +    for (; i < (cq_host->num_slots); i++)
>> +        setup_trans_desc(cq_host, i);
>> +
>> +    return 0;
>> +}
>> +
>> +static int cmdq_enable(struct mmc_host *mmc)
>> +{
>> +    int err = 0;
>> +    u32 cqcfg;
>> +    bool dcmd_enable;
>> +    struct cmdq_host *cq_host = mmc_cmdq_private(mmc);
>> +
>> +    if (!cq_host || !mmc->card || !mmc_card_cmdq(mmc->card)) {
>> +        err = -EINVAL;
>> +        goto out;
>> +    }
>> +
>> +    if (cq_host->enabled)
>> +        goto out;
>> +
>> +    cqcfg = cmdq_readl(cq_host, CQCFG);
>> +    if (cqcfg & 0x1) {
>> +        pr_info("%s: %s: cq_host is already enabled\n",
>> +                mmc_hostname(mmc), __func__);
>> +        WARN_ON(1);
>> +        goto out;
>> +    }
>> +
>> +    if (cq_host->quirks & CMDQ_QUIRK_NO_DCMD)
>
> Why you need it, DCMD should be certainly suppoted.
> Have you find any vendor's CQE that doesn't support
> it when fetching TD in slot#31?
No, we haven't yet seen. From spec perspective,
this slot cab be used for either DCMD or Data transfer
task descriptor. So to facilitate this, we had mentioend this as quirk
to disable/enable DCMD.

>
>> +        dcmd_enable = false;
>> +    else
>> +        dcmd_enable = true;
>> +
>> +    cqcfg = ((cq_host->caps & CMDQ_TASK_DESC_SZ_128 ? CQ_TASK_DESC_SZ
>> : 0) |
>> +            (dcmd_enable ? CQ_DCMD : 0));
>> +
>> +    cmdq_writel(cq_host, cqcfg, CQCFG);
>> +    /* enable CQ_HOST */
>> +    cmdq_writel(cq_host, cmdq_readl(cq_host, CQCFG) | CQ_ENABLE,
>> +            CQCFG);
>> +
>> +    if (!cq_host->desc_base ||
>> +            !cq_host->trans_desc_base) {
>> +        err = cmdq_host_alloc_tdl(cq_host);
>> +        if (err)
>> +            goto out;
>> +        cmdq_writel(cq_host, lower_32_bits(cq_host->desc_dma_base),
>> +                CQTDLBA);
>> +        cmdq_writel(cq_host, upper_32_bits(cq_host->desc_dma_base),
>> +                CQTDLBAU);
>> +        cmdq_dumpregs(cq_host);
>> +    }
>> +
>> +    /*
>> +     * disable all vendor interrupts
>> +     * enable CMDQ interrupts
>> +     * enable the vendor error interrupts
>> +     */
>> +    if (cq_host->ops->clear_set_irqs)
>> +        cq_host->ops->clear_set_irqs(mmc, true);
>> +
>> +    cmdq_clear_set_irqs(cq_host, 0x0, CQ_INT_ALL);
>> +
>> +    /* cq_host would use this rca to address the card */
>> +    cmdq_writel(cq_host, mmc->card->rca, CQSSC2);
>> +
>> +    /* send QSR at lesser intervals than the default */
>> +    cmdq_writel(cq_host, cmdq_readl(cq_host, CQSSC1) |
>> SEND_QSR_INTERVAL,
>> +                CQSSC1);
>> +
>> +    /* ensure the writes are done before enabling CQE */
>> +    mb();
>> +
>> +    cq_host->enabled = true;
>> +
>> +    if (cq_host->ops->set_block_size)
>> +        cq_host->ops->set_block_size(cq_host->mmc);
>> +
>> +    if (cq_host->ops->set_data_timeout)
>> +        cq_host->ops->set_data_timeout(mmc, 0xf);
>> +
>> +    if (cq_host->ops->clear_set_dumpregs)
>> +        cq_host->ops->clear_set_dumpregs(mmc, 1);
>> +
>> +out:
>> +    return err;
>> +}
>> +
>> +static void cmdq_disable(struct mmc_host *mmc, bool soft)
>> +{
>> +    struct cmdq_host *cq_host = (struct cmdq_host
>> *)mmc_cmdq_private(mmc);
>> +
>> +    if (soft) {
>> +        cmdq_writel(cq_host, cmdq_readl(
>> +                    cq_host, CQCFG) & ~(CQ_ENABLE),
>> +                CQCFG);
>> +    }
>> +
>
> No need { }
Done.

>
>> +    cq_host->enabled = false;
>> +}
>> +
>> +static void cmdq_prep_task_desc(struct mmc_request *mrq,
>> +                    u64 *data, bool intr, bool qbr)
>> +{
>> +    struct mmc_cmdq_req *cmdq_req = mrq->cmdq_req;
>> +    u32 req_flags = cmdq_req->cmdq_req_flags;
>> +
>> +    pr_debug("%s: %s: data-tag: 0x%08x - dir: %d - prio: %d - cnt:
>> 0x%08x -    addr: 0x%llx\n",
>> +         mmc_hostname(mrq->host), __func__,
>> +         !!(req_flags & DAT_TAG), !!(req_flags & DIR),
>> +         !!(req_flags & PRIO), cmdq_req->data.blocks,
>> +         (u64)mrq->cmdq_req->blk_addr);
>> +
>> +    *data = VALID(1) |
>> +        END(1) |
>> +        INT(intr) |
>> +        ACT(0x5) |
>> +        FORCED_PROG(!!(req_flags & FORCED_PRG)) |
>> +        CONTEXT(mrq->cmdq_req->ctx_id) |
>> +        DATA_TAG(!!(req_flags & DAT_TAG)) |
>> +        DATA_DIR(!!(req_flags & DIR)) |
>> +        PRIORITY(!!(req_flags & PRIO)) |
>> +        QBAR(qbr) |
>> +        REL_WRITE(!!(req_flags & REL_WR)) |
>> +        BLK_COUNT(mrq->cmdq_req->data.blocks) |
>> +        BLK_ADDR((u64)mrq->cmdq_req->blk_addr);
>> +}
>> +
>> +static int cmdq_dma_map(struct mmc_host *host, struct mmc_request *mrq)
>> +{
>> +    int sg_count;
>> +    struct mmc_data *data = mrq->data;
>> +
>> +    if (!data)
>> +        return -EINVAL;
>> +
>> +    sg_count = dma_map_sg(mmc_dev(host), data->sg,
>> +                  data->sg_len,
>> +                  (data->flags & MMC_DATA_WRITE) ?
>> +                  DMA_TO_DEVICE : DMA_FROM_DEVICE);
>> +    if (!sg_count) {
>> +        pr_err("%s: sg-len: %d\n", __func__, data->sg_len);
>> +        return -ENOMEM;
>> +    }
>> +
>> +    return sg_count;
>> +}
>> +
>> +static void cmdq_set_tran_desc(u8 *desc,
>> +                 dma_addr_t addr, int len, bool end)
>> +{
>> +    __le64 *dataddr = (__le64 __force *)(desc + 4);
>> +    __le32 *attr = (__le32 __force *)desc;
>> +
>> +    *attr = (VALID(1) |
>> +         END(end ? 1 : 0) |
>> +         INT(0) |
>> +         ACT(0x4) |
>> +         DAT_LENGTH(len));
>> +
>> +    dataddr[0] = cpu_to_le64(addr);
>> +}
>> +
>> +static int cmdq_prep_tran_desc(struct mmc_request *mrq,
>> +                   struct cmdq_host *cq_host, int tag)
>> +{
>> +    struct mmc_data *data = mrq->data;
>> +    int i, sg_count, len;
>> +    bool end = false;
>> +    dma_addr_t addr;
>> +    u8 *desc;
>> +    struct scatterlist *sg;
>> +
>> +    sg_count = cmdq_dma_map(mrq->host, mrq);
>> +    if (sg_count < 0) {
>> +        pr_err("%s: %s: unable to map sg lists, %d\n",
>> +                mmc_hostname(mrq->host), __func__, sg_count);
>> +        return sg_count;
>> +    }
>> +
>> +    desc = get_trans_desc(cq_host, tag);
>> +    memset(desc, 0, cq_host->trans_desc_len * cq_host->mmc->max_segs);
>> +
>> +    for_each_sg(data->sg, sg, sg_count, i) {
>> +        addr = sg_dma_address(sg);
>> +        len = sg_dma_len(sg);
>> +
>> +        if ((i+1) == sg_count)
>> +            end = true;
>> +        cmdq_set_tran_desc(desc, addr, len, end);
>> +        desc += cq_host->trans_desc_len;
>> +    }
>> +
>> +    pr_debug("%s: req: 0x%p tag: %d calc_trans_des: 0x%p sg-cnt: %d\n",
>> +        __func__, mrq->req, tag, desc, sg_count);
>> +
>> +    return 0;
>> +}
>> +
>> +static void cmdq_prep_dcmd_desc(struct mmc_host *mmc,
>> +                   struct mmc_request *mrq)
>> +{
>> +    u64 *task_desc = NULL;
>> +    u64 data = 0;
>> +    u8 resp_type;
>> +    u8 *desc;
>> +    __le64 *dataddr;
>> +    struct cmdq_host *cq_host = mmc_cmdq_private(mmc);
>> +    u8 timing;
>> +
>> +    if (!(mrq->cmd->flags & MMC_RSP_PRESENT)) {
>> +        resp_type = 0x0;
>> +        timing = 0x1;
>> +    } else {
>> +        if (mrq->cmd->flags & MMC_RSP_R1B) {
>> +            resp_type = 0x3;
>> +            timing = 0x0;
>> +        } else {
>> +            resp_type = 0x2;
>> +            timing = 0x1;
>> +        }
>> +    }
>> +
>> +    task_desc = (__le64 __force *)get_desc(cq_host, cq_host->dcmd_slot);
>> +    memset(task_desc, 0, cq_host->task_desc_len);
>> +    data |= (VALID(1) |
>> +         END(1) |
>> +         INT(1) |
>> +         QBAR(1) |
>> +         ACT(0x5) |
>> +         CMD_INDEX(mrq->cmd->opcode) |
>> +         CMD_TIMING(timing) | RESP_TYPE(resp_type));
>> +    *task_desc |= data;
>> +    desc = (u8 *)task_desc;
>> +    pr_debug("cmdq: dcmd: cmd: %d timing: %d resp: %d\n",
>> +        mrq->cmd->opcode, timing, resp_type);
>> +    dataddr = (__le64 __force *)(desc + 4);
>> +    dataddr[0] = cpu_to_le64((u64)mrq->cmd->arg);
>> +
>> +}
>> +
>> +static int cmdq_request(struct mmc_host *mmc, struct mmc_request *mrq)
>> +{
>> +    int err;
>> +    u64 data = 0;
>> +    u64 *task_desc = NULL;
>> +    u32 tag = mrq->cmdq_req->tag;
>> +    struct cmdq_host *cq_host = (struct cmdq_host
>> *)mmc_cmdq_private(mmc);
>> +
>> +    if (!cq_host->enabled) {
>> +        pr_err("%s: CMDQ host not enabled yet !!!\n",
>> +               mmc_hostname(mmc));
>> +        err = -EINVAL;
>> +        goto out;
>> +    }
>> +
>> +    if (mrq->cmdq_req->cmdq_req_flags & DCMD) {
>> +        cmdq_prep_dcmd_desc(mmc, mrq);
>> +        cq_host->mrq_slot[DCMD_SLOT] = mrq;
>> +        cmdq_writel(cq_host, 1 << DCMD_SLOT, CQTDBR);
>> +        return 0;
>> +    }
>> +
>> +    task_desc = (__le64 __force *)get_desc(cq_host, tag);
>> +
>> +    cmdq_prep_task_desc(mrq, &data, 1,
>> +                (mrq->cmdq_req->cmdq_req_flags & QBR));
>> +    *task_desc = cpu_to_le64(data);
>> +
>> +    err = cmdq_prep_tran_desc(mrq, cq_host, tag);
>> +    if (err) {
>> +        pr_err("%s: %s: failed to setup tx desc: %d\n",
>> +               mmc_hostname(mmc), __func__, err);
>> +        return err;
>> +    }
>> +
>> +    BUG_ON(cmdq_readl(cq_host, CQTDBR) & (1 << tag));
>> +
>> +    cq_host->mrq_slot[tag] = mrq;
>> +    if (cq_host->ops->set_tranfer_params)
>> +        cq_host->ops->set_tranfer_params(mmc);
>> +
>> +    cmdq_writel(cq_host, 1 << tag, CQTDBR);
>> +
>> +out:
>> +    return err;
>> +}
>> +
>> +static void cmdq_finish_data(struct mmc_host *mmc, unsigned int tag)
>> +{
>> +    struct mmc_request *mrq;
>> +    struct cmdq_host *cq_host = (struct cmdq_host
>> *)mmc_cmdq_private(mmc);
>> +
>> +    mrq = cq_host->mrq_slot[tag];
>> +    mrq->done(mrq);
>> +}
>> +
>> +irqreturn_t cmdq_irq(struct mmc_host *mmc, u32 intmask)
>> +{
>> +    u32 status;
>> +    unsigned long tag = 0, comp_status;
>> +    struct cmdq_host *cq_host = (struct cmdq_host
>> *)mmc_cmdq_private(mmc);
>> +
>> +    status = cmdq_readl(cq_host, CQIS);
>> +    cmdq_writel(cq_host, status, CQIS);
>> +
>
> I don't think it's good to ignore the TERR here if finding
> a invalid task desc..
No we are not ignoring it. CQTERRI will only be updated
in case of errors and for error recovery procedures.
In later patch revisions, we use CQTERRI to handle errors.


>
>
>> +    if (status & CQIS_TCC) {
>> +        /* read QCTCN and complete the request */
>> +        comp_status = cmdq_readl(cq_host, CQTCN);
>> +        if (!comp_status)
>> +            goto out;
>> +
>> +        for_each_set_bit(tag, &comp_status, cq_host->num_slots) {
>> +            /* complete the corresponding mrq */
>> +            pr_debug("%s: completing tag -> %lu\n",
>> +                 mmc_hostname(mmc), tag);
>> +            cmdq_finish_data(mmc, tag);
>> +        }
>> +        cmdq_writel(cq_host, comp_status, CQTCN);
>> +    }
>> +
>> +    if (status & CQIS_RED) {
>> +        /* task response has an error */
>> +        pr_err("%s: RED error %d !!!\n", mmc_hostname(mmc), status);
>> +        cmdq_dumpregs(cq_host);
>> +    }
>> +
>> +out:
>> +    return IRQ_HANDLED;
>> +}
>> +EXPORT_SYMBOL(cmdq_irq);
>> +
>> +static void cmdq_post_req(struct mmc_host *host, struct mmc_request
>> *mrq,
>> +              int err)
>> +{
>> +    struct mmc_data *data = mrq->data;
>> +
>> +    if (data) {
>> +        data->error = err;
>> +        dma_unmap_sg(mmc_dev(host), data->sg, data->sg_len,
>> +                 (data->flags & MMC_DATA_READ) ?
>> +                 DMA_FROM_DEVICE : DMA_TO_DEVICE);
>> +        if (err)
>> +            data->bytes_xfered = 0;
>> +        else
>> +            data->bytes_xfered = blk_rq_bytes(mrq->req);
>> +    }
>> +}
>> +
>> +static const struct mmc_cmdq_host_ops cmdq_host_ops = {
>> +    .enable = cmdq_enable,
>> +    .disable = cmdq_disable,
>> +    .request = cmdq_request,
>> +    .post_req = cmdq_post_req,
>> +};
>> +
>> +struct cmdq_host *cmdq_pltfm_init(struct platform_device *pdev)
>> +{
>> +    struct cmdq_host *cq_host;
>> +    struct resource *cmdq_memres = NULL;
>> +
>> +    /* check and setup CMDQ interface */
>> +    cmdq_memres = platform_get_resource_byname(pdev, IORESOURCE_MEM,
>> +                           "cmdq_mem");
>
> My cmdq engine is appended after the mmio of sdhci, so may I map the
> whole when probing sdhci and give the offset of the CMDQ engine to hci?
Best would be to have "cmdq_mem" I/O resource mentioned as per the 
standard implemention (cmdq_hci), as mentioned above.


>
>> +    if (!cmdq_memres) {
>> +        dev_dbg(&pdev->dev, "CMDQ not supported\n");
>> +        return ERR_PTR(-EINVAL);
>> +    }
>> +
>> +    cq_host = kzalloc(sizeof(*cq_host), GFP_KERNEL);
>> +    if (!cq_host) {
>> +        dev_err(&pdev->dev, "failed to allocate memory for CMDQ\n");
>> +        return ERR_PTR(-ENOMEM);
>> +    }
>> +    cq_host->mmio = devm_ioremap(&pdev->dev,
>> +                     cmdq_memres->start,
>> +                     resource_size(cmdq_memres));
>> +    if (!cq_host->mmio) {
>> +        dev_err(&pdev->dev, "failed to remap cmdq regs\n");
>> +        kfree(cq_host);
>> +        return ERR_PTR(-EBUSY);
>> +    }
>> +    dev_dbg(&pdev->dev, "CMDQ ioremap: done\n");
>> +
>> +    return cq_host;
>> +}
>> +EXPORT_SYMBOL(cmdq_pltfm_init);
>> +
>> +int cmdq_init(struct cmdq_host *cq_host, struct mmc_host *mmc,
>> +          bool dma64)
>> +{
>> +    int err = 0;
>> +
>> +    cq_host->dma64 = dma64;
>> +    cq_host->mmc = mmc;
>> +    cq_host->mmc->cmdq_private = cq_host;
>> +
>> +    cq_host->num_slots = NUM_SLOTS;
>> +    cq_host->dcmd_slot = DCMD_SLOT;
>> +
>> +    mmc->cmdq_ops = &cmdq_host_ops;
>> +
>> +    cq_host->mrq_slot = kzalloc(sizeof(cq_host->mrq_slot) *
>> +                    cq_host->num_slots, GFP_KERNEL);
>> +    if (!cq_host->mrq_slot)
>> +        return -ENOMEM;
>> +
>> +    init_completion(&cq_host->halt_comp);
>> +    return err;
>> +}
>> +EXPORT_SYMBOL(cmdq_init);
>> diff --git a/drivers/mmc/host/cmdq_hci.h b/drivers/mmc/host/cmdq_hci.h
>> new file mode 100644
>> index 0000000..e7f5a15
>> --- /dev/null
>> +++ b/drivers/mmc/host/cmdq_hci.h
>> @@ -0,0 +1,211 @@
>> +/* Copyright (c) 2015, The Linux Foundation. All rights reserved.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 and
>> + * only version 2 as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +#ifndef LINUX_MMC_CQ_HCI_H
>> +#define LINUX_MMC_CQ_HCI_H
>> +#include <linux/mmc/core.h>
>> +
>> +/* registers */
>> +/* version */
>> +#define CQVER        0x00
>> +/* capabilities */
>> +#define CQCAP        0x04
>> +/* configuration */
>> +#define CQCFG        0x08
>> +#define CQ_DCMD        0x00001000
>> +#define CQ_TASK_DESC_SZ 0x00000100
>> +#define CQ_ENABLE    0x00000001
>> +
>> +/* control */
>> +#define CQCTL        0x0C
>> +#define CLEAR_ALL_TASKS 0x00000100
>> +#define HALT        0x00000001
>> +
>> +/* interrupt status */
>> +#define CQIS        0x10
>> +#define CQIS_HAC    (1 << 0)
>> +#define CQIS_TCC    (1 << 1)
>> +#define CQIS_RED    (1 << 2)
>> +#define CQIS_TCL    (1 << 3)
>> +
>
> okay, I check all these definitions and it seems
> ok but I think it's better to use BIT(x) instead of
> (1 << x). And the space and tab is mixed here.
Done.

>
>> +/* interrupt status enable */
>> +#define CQISTE        0x14
>> +
>> +/* interrupt signal enable */
>> +#define CQISGE        0x18
>> +
>> +/* interrupt coalescing */
>> +#define CQIC        0x1C
>> +#define CQIC_ENABLE    (1 << 31)
>> +#define CQIC_RESET    (1 << 16)
>> +#define CQIC_ICCTHWEN    (1 << 15)
>> +#define CQIC_ICCTH(x)    ((x & 0x1F) << 8)
>> +#define CQIC_ICTOVALWEN (1 << 7)
>> +#define CQIC_ICTOVAL(x) (x & 0x7F)
>> +
>> +/* task list base address */
>> +#define CQTDLBA        0x20
>> +
>> +/* task list base address upper */
>> +#define CQTDLBAU    0x24
>> +
>> +/* door-bell */
>> +#define CQTDBR        0x28
>> +
>> +/* task completion notification */
>> +#define CQTCN        0x2C
>> +
>> +/* device queue status */
>> +#define CQDQS        0x30
>> +
>> +/* device pending tasks */
>> +#define CQDPT        0x34
>> +
>> +/* task clear */
>> +#define CQTCLR        0x38
>> +
>> +/* send status config 1 */
>> +#define CQSSC1        0x40
>> +/*
>> + * Value n means CQE would send CMD13 during the transfer of data block
>> + * BLOCK_CNT-n
>> + */
>> +#define SEND_QSR_INTERVAL 0x70000
>> +
>> +/* send status config 2 */
>> +#define CQSSC2        0x44
>> +
>> +/* response for dcmd */
>> +#define CQCRDCT        0x48
>> +
>> +/* response mode error mask */
>> +#define CQRMEM        0x50
>> +
>> +/* task error info */
>> +#define CQTERRI        0x54
>> +
>> +/* command response index */
>> +#define CQCRI        0x58
>> +
>> +/* command response argument */
>> +#define CQCRA        0x5C
>> +
>> +#define CQ_INT_ALL    0xF
>> +#define CQIC_DEFAULT_ICCTH 31
>> +#define CQIC_DEFAULT_ICTOVAL 1
>> +
>> +#define CQ_CMD_DBG_RAM    0x158
>> +#define CQ_CMD_DBG_RAM_WA 0x198
>> +#define CQ_CMD_DBG_RAM_OL 0x19C
>> +
>> +/* attribute fields */
>> +#define VALID(x)    ((x & 1) << 0)
>> +#define END(x)        ((x & 1) << 1)
>> +#define INT(x)        ((x & 1) << 2)
>> +#define ACT(x)        ((x & 0x7) << 3)
>> +
>> +/* data command task descriptor fields */
>> +#define FORCED_PROG(x)    ((x & 1) << 6)
>> +#define CONTEXT(x)    ((x & 0xF) << 7)
>> +#define DATA_TAG(x)    ((x & 1) << 11)
>> +#define DATA_DIR(x)    ((x & 1) << 12)
>> +#define PRIORITY(x)    ((x & 1) << 13)
>> +#define QBAR(x)        ((x & 1) << 14)
>> +#define REL_WRITE(x)    ((x & 1) << 15)
>> +#define BLK_COUNT(x)    ((x & 0xFFFF) << 16)
>> +#define BLK_ADDR(x)    ((x & 0xFFFFFFFF) << 32)
>> +
>> +/* direct command task descriptor fields */
>> +#define CMD_INDEX(x)    ((x & 0x3F) << 16)
>> +#define CMD_TIMING(x)    ((x & 1) << 22)
>> +#define RESP_TYPE(x)    ((x & 0x3) << 23)
>> +
>> +/* transfer descriptor fields */
>> +#define DAT_LENGTH(x)    ((x & 0xFFFF) << 16)
>> +#define DAT_ADDR_LO(x)    ((x & 0xFFFFFFFF) << 32)
>> +#define DAT_ADDR_HI(x)    ((x & 0xFFFFFFFF) << 0)
>> +
>> +struct cmdq_host {
>> +    const struct cmdq_host_ops *ops;
>> +    void __iomem *mmio;
>> +    struct mmc_host *mmc;
>> +
>> +    /* 64 bit DMA */
>> +    bool dma64;
>> +    int num_slots;
>> +
>> +    u32 dcmd_slot;
>> +    u32 caps;
>> +#define CMDQ_TASK_DESC_SZ_128 0x1
>> +
>> +    u32 quirks;
>> +#define CMDQ_QUIRK_SHORT_TXFR_DESC_SZ 0x1
>> +#define CMDQ_QUIRK_NO_DCMD    0x2
>> +
>> +    bool enabled;
>> +    bool halted;
>> +    bool init_done;
>> +
>> +    u8 *desc_base;
>> +
>> +    /* total descriptor size */
>> +    u8 slot_sz;
>> +
>> +    /* 64/128 bit depends on CQCFG */
>> +    u8 task_desc_len;
>> +
>> +    /* 64 bit on 32-bit arch, 128 bit on 64-bit */
>> +    u8 link_desc_len;
>> +
>> +    u8 *trans_desc_base;
>> +    /* same length as transfer descriptor */
>> +    u8 trans_desc_len;
>> +
>> +    dma_addr_t desc_dma_base;
>> +    dma_addr_t trans_desc_dma_base;
>> +
>> +    struct completion halt_comp;
>> +    struct mmc_request **mrq_slot;
>> +    void *private;
>> +};
>> +
>> +struct cmdq_host_ops {
>> +    void (*set_tranfer_params)(struct mmc_host *mmc);
>> +    void (*set_data_timeout)(struct mmc_host *mmc, u32 val);
>> +    void (*clear_set_irqs)(struct mmc_host *mmc, bool clear);
>> +    void (*set_block_size)(struct mmc_host *mmc);
>> +    void (*dump_vendor_regs)(struct mmc_host *mmc);
>> +    void (*write_l)(struct cmdq_host *host, u32 val, int reg);
>> +    u32 (*read_l)(struct cmdq_host *host, int reg);
>> +    void (*clear_set_dumpregs)(struct mmc_host *mmc, bool set);
>> +};
>> +
>> +static inline void cmdq_writel(struct cmdq_host *host, u32 val, int reg)
>> +{
>> +    if (unlikely(host->ops->write_l))
>> +        host->ops->write_l(host, val, reg);
>> +    else
>> +        writel_relaxed(val, host->mmio + reg);
>> +}
>> +
>> +static inline u32 cmdq_readl(struct cmdq_host *host, int reg)
>> +{
>> +    if (unlikely(host->ops->read_l))
>> +        return host->ops->read_l(host, reg);
>> +    else
>> +        return readl_relaxed(host->mmio + reg);
>> +}
>> +
>> +extern irqreturn_t cmdq_irq(struct mmc_host *mmc, u32 intmask);
>> +extern int cmdq_init(struct cmdq_host *cq_host, struct mmc_host *mmc,
>> +             bool dma64);
>> +extern struct cmdq_host *cmdq_pltfm_init(struct platform_device *pdev);
>> +#endif
>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>> index dfe094a..135a83e 100644
>> --- a/include/linux/mmc/host.h
>> +++ b/include/linux/mmc/host.h
>> @@ -420,6 +420,13 @@ struct mmc_host {
>>      u32            dsr;    /* optional driver stage (DSR) value */
>>
>>      struct mmc_cmdq_context_info    cmdq_ctx;
>> +    /*
>> +     * several cmdq supporting host controllers are extensions
>> +     * of legacy controllers. This variable can be used to store
>> +     * a reference to the cmdq extension of the existing host
>> +     * controller.
>> +     */
>> +    void *cmdq_private;
>>      unsigned long        private[0] ____cacheline_aligned;
>>  };
>>
>> @@ -434,6 +441,11 @@ static inline void *mmc_priv(struct mmc_host *host)
>>      return (void *)host->private;
>>  }
>>
>> +static inline void *mmc_cmdq_private(struct mmc_host *host)
>> +{
>> +    return host->cmdq_private;
>> +}
>> +
>>  #define mmc_host_is_spi(host)    ((host)->caps & MMC_CAP_SPI)
>>
>>  #define mmc_dev(x)    ((x)->parent)
>>
>
>

--
BR
Ritesh
--
To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index e657af0..f1b5433 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -774,6 +774,19 @@  config MMC_SUNXI
 	  This selects support for the SD/MMC Host Controller on
 	  Allwinner sunxi SoCs.
 
+config MMC_CQ_HCI
+	tristate "Command Queue Support"
+	depends on HAS_DMA
+	help
+	  This selects the Command Queue Host Controller Interface (CQHCI)
+	  support present in host controllers of Qualcomm Technologies, Inc
+	  amongst others.
+	  This controller supports eMMC devices with command queue support.
+
+	  If you have a controller with this interface, say Y or M here.
+
+	  If unsure, say N.
+
 config MMC_TOSHIBA_PCI
 	tristate "Toshiba Type A SD/MMC Card Interface Driver"
 	depends on PCI
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index af918d2..3715f73 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -76,6 +76,7 @@  obj-$(CONFIG_MMC_SDHCI_IPROC)		+= sdhci-iproc.o
 obj-$(CONFIG_MMC_SDHCI_MSM)		+= sdhci-msm.o
 obj-$(CONFIG_MMC_SDHCI_ST)		+= sdhci-st.o
 obj-$(CONFIG_MMC_SDHCI_MICROCHIP_PIC32)	+= sdhci-pic32.o
+obj-$(CONFIG_MMC_CQ_HCI)		+= cmdq_hci.o
 
 ifeq ($(CONFIG_CB710_DEBUG),y)
 	CFLAGS-cb710-mmc	+= -DDEBUG
diff --git a/drivers/mmc/host/cmdq_hci.c b/drivers/mmc/host/cmdq_hci.c
new file mode 100644
index 0000000..68c8e03
--- /dev/null
+++ b/drivers/mmc/host/cmdq_hci.c
@@ -0,0 +1,656 @@ 
+/* Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include <linux/platform_device.h>
+#include <linux/blkdev.h>
+
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/card.h>
+
+#include "cmdq_hci.h"
+
+#define DCMD_SLOT 31
+#define NUM_SLOTS 32
+
+static inline u8 *get_desc(struct cmdq_host *cq_host, u8 tag)
+{
+	return cq_host->desc_base + (tag * cq_host->slot_sz);
+}
+
+static inline u8 *get_link_desc(struct cmdq_host *cq_host, u8 tag)
+{
+	u8 *desc = get_desc(cq_host, tag);
+
+	return desc + cq_host->task_desc_len;
+}
+
+static inline dma_addr_t get_trans_desc_dma(struct cmdq_host *cq_host, u8 tag)
+{
+	return cq_host->trans_desc_dma_base +
+		(cq_host->mmc->max_segs * tag *
+		 cq_host->trans_desc_len);
+}
+
+static inline u8 *get_trans_desc(struct cmdq_host *cq_host, u8 tag)
+{
+	return cq_host->trans_desc_base +
+		(cq_host->trans_desc_len * cq_host->mmc->max_segs * tag);
+}
+
+static void setup_trans_desc(struct cmdq_host *cq_host, u8 tag)
+{
+	u8 *link_temp;
+	dma_addr_t trans_temp;
+
+	link_temp = get_link_desc(cq_host, tag);
+	trans_temp = get_trans_desc_dma(cq_host, tag);
+
+	memset(link_temp, 0, cq_host->link_desc_len);
+	if (cq_host->link_desc_len > 8)
+		*(link_temp + 8) = 0;
+
+	if (tag == DCMD_SLOT) {
+		*link_temp = VALID(0) | ACT(0) | END(1);
+		return;
+	}
+
+	*link_temp = VALID(1) | ACT(0x6) | END(0);
+
+	if (cq_host->dma64) {
+		__le64 *data_addr = (__le64 __force *)(link_temp + 4);
+		data_addr[0] = cpu_to_le64(trans_temp);
+	} else {
+		__le32 *data_addr = (__le32 __force *)(link_temp + 4);
+		data_addr[0] = cpu_to_le32(trans_temp);
+	}
+}
+
+static void cmdq_clear_set_irqs(struct cmdq_host *cq_host, u32 clear, u32 set)
+{
+	u32 ier;
+
+	ier = cmdq_readl(cq_host, CQISTE);
+	ier &= ~clear;
+	ier |= set;
+	cmdq_writel(cq_host, ier, CQISTE);
+	cmdq_writel(cq_host, ier, CQISGE);
+	/* ensure the writes are done */
+	mb();
+}
+
+
+#define DRV_NAME "cmdq-host"
+
+static void cmdq_dump_debug_ram(struct cmdq_host *cq_host)
+{
+	int i = 0;
+
+	pr_err("---- Debug RAM dump ----\n");
+	pr_err(DRV_NAME ": Debug RAM wrap-around: 0x%08x | Debug RAM overlap: 0x%08x\n",
+	       cmdq_readl(cq_host, CQ_CMD_DBG_RAM_WA),
+	       cmdq_readl(cq_host, CQ_CMD_DBG_RAM_OL));
+
+	while (i < 16) {
+		pr_err(DRV_NAME ": Debug RAM dump [%d]: 0x%08x\n", i,
+		       cmdq_readl(cq_host, CQ_CMD_DBG_RAM + (0x4 * i)));
+		i++;
+	}
+	pr_err("-------------------------\n");
+}
+
+static void cmdq_dumpregs(struct cmdq_host *cq_host)
+{
+	struct mmc_host *mmc = cq_host->mmc;
+
+	pr_info(DRV_NAME ": ========== REGISTER DUMP (%s)==========\n",
+		mmc_hostname(mmc));
+
+	pr_info(DRV_NAME ": Caps: 0x%08x	  | Version:  0x%08x\n",
+		cmdq_readl(cq_host, CQCAP),
+		cmdq_readl(cq_host, CQVER));
+	pr_info(DRV_NAME ": Queing config: 0x%08x | Queue Ctrl:  0x%08x\n",
+		cmdq_readl(cq_host, CQCFG),
+		cmdq_readl(cq_host, CQCTL));
+	pr_info(DRV_NAME ": Int stat: 0x%08x	  | Int enab:  0x%08x\n",
+		cmdq_readl(cq_host, CQIS),
+		cmdq_readl(cq_host, CQISTE));
+	pr_info(DRV_NAME ": Int sig: 0x%08x	  | Int Coal:  0x%08x\n",
+		cmdq_readl(cq_host, CQISGE),
+		cmdq_readl(cq_host, CQIC));
+	pr_info(DRV_NAME ": TDL base: 0x%08x	  | TDL up32:  0x%08x\n",
+		cmdq_readl(cq_host, CQTDLBA),
+		cmdq_readl(cq_host, CQTDLBAU));
+	pr_info(DRV_NAME ": Doorbell: 0x%08x	  | Comp Notif:  0x%08x\n",
+		cmdq_readl(cq_host, CQTDBR),
+		cmdq_readl(cq_host, CQTCN));
+	pr_info(DRV_NAME ": Dev queue: 0x%08x	  | Dev Pend:  0x%08x\n",
+		cmdq_readl(cq_host, CQDQS),
+		cmdq_readl(cq_host, CQDPT));
+	pr_info(DRV_NAME ": Task clr: 0x%08x	  | Send stat 1:  0x%08x\n",
+		cmdq_readl(cq_host, CQTCLR),
+		cmdq_readl(cq_host, CQSSC1));
+	pr_info(DRV_NAME ": Send stat 2: 0x%08x	  | DCMD resp:  0x%08x\n",
+		cmdq_readl(cq_host, CQSSC2),
+		cmdq_readl(cq_host, CQCRDCT));
+	pr_info(DRV_NAME ": Resp err mask: 0x%08x | Task err:  0x%08x\n",
+		cmdq_readl(cq_host, CQRMEM),
+		cmdq_readl(cq_host, CQTERRI));
+	pr_info(DRV_NAME ": Resp idx 0x%08x	  | Resp arg:  0x%08x\n",
+		cmdq_readl(cq_host, CQCRI),
+		cmdq_readl(cq_host, CQCRA));
+	pr_info(DRV_NAME ": ===========================================\n");
+
+	cmdq_dump_debug_ram(cq_host);
+	if (cq_host->ops->dump_vendor_regs)
+		cq_host->ops->dump_vendor_regs(mmc);
+}
+
+/**
+ * The allocated descriptor table for task, link & transfer descritors
+ * looks like:
+ * |----------|
+ * |task desc |  |->|----------|
+ * |----------|  |  |trans desc|
+ * |link desc-|->|  |----------|
+ * |----------|          .
+ *      .                .
+ *  no. of slots      max-segs
+ *      .           |----------|
+ * |----------|
+ * The idea here is to create the [task+trans] table and mark & point the
+ * link desc to the transfer desc table on a per slot basis.
+ */
+static int cmdq_host_alloc_tdl(struct cmdq_host *cq_host)
+{
+
+	size_t desc_size;
+	size_t data_size;
+	int i = 0;
+
+	/* task descriptor can be 64/128 bit irrespective of arch */
+	if (cq_host->caps & CMDQ_TASK_DESC_SZ_128) {
+		cmdq_writel(cq_host, cmdq_readl(cq_host, CQCFG) |
+			       CQ_TASK_DESC_SZ, CQCFG);
+		cq_host->task_desc_len = 16;
+	} else {
+		cq_host->task_desc_len = 8;
+	}
+
+	/*
+	 * 96 bits length of transfer desc instead of 128 bits which means
+	 * ADMA would expect next valid descriptor at the 96th bit
+	 * or 128th bit
+	 */
+	if (cq_host->dma64) {
+		if (cq_host->quirks & CMDQ_QUIRK_SHORT_TXFR_DESC_SZ)
+			cq_host->trans_desc_len = 12;
+		else
+			cq_host->trans_desc_len = 16;
+		cq_host->link_desc_len = 16;
+	} else {
+		cq_host->trans_desc_len = 8;
+		cq_host->link_desc_len = 8;
+	}
+
+	/* total size of a slot: 1 task & 1 transfer (link) */
+	cq_host->slot_sz = cq_host->task_desc_len + cq_host->link_desc_len;
+
+	desc_size = cq_host->slot_sz * cq_host->num_slots;
+
+	data_size = cq_host->trans_desc_len * cq_host->mmc->max_segs *
+		(cq_host->num_slots - 1);
+
+	pr_info("%s: desc_size: %d data_sz: %d slot-sz: %d\n", __func__,
+		(int)desc_size, (int)data_size, cq_host->slot_sz);
+
+	/*
+	 * allocate a dma-mapped chunk of memory for the descriptors
+	 * allocate a dma-mapped chunk of memory for link descriptors
+	 * setup each link-desc memory offset per slot-number to
+	 * the descriptor table.
+	 */
+	cq_host->desc_base = dmam_alloc_coherent(mmc_dev(cq_host->mmc),
+						 desc_size,
+						 &cq_host->desc_dma_base,
+						 GFP_KERNEL);
+	cq_host->trans_desc_base = dmam_alloc_coherent(mmc_dev(cq_host->mmc),
+					      data_size,
+					      &cq_host->trans_desc_dma_base,
+					      GFP_KERNEL);
+	if (!cq_host->desc_base || !cq_host->trans_desc_base)
+		return -ENOMEM;
+
+	pr_info("desc-base: 0x%p trans-base: 0x%p\n desc_dma 0x%llx trans_dma: 0x%llx\n",
+		 cq_host->desc_base, cq_host->trans_desc_base,
+		(unsigned long long)cq_host->desc_dma_base,
+		(unsigned long long) cq_host->trans_desc_dma_base);
+
+	for (; i < (cq_host->num_slots); i++)
+		setup_trans_desc(cq_host, i);
+
+	return 0;
+}
+
+static int cmdq_enable(struct mmc_host *mmc)
+{
+	int err = 0;
+	u32 cqcfg;
+	bool dcmd_enable;
+	struct cmdq_host *cq_host = mmc_cmdq_private(mmc);
+
+	if (!cq_host || !mmc->card || !mmc_card_cmdq(mmc->card)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (cq_host->enabled)
+		goto out;
+
+	cqcfg = cmdq_readl(cq_host, CQCFG);
+	if (cqcfg & 0x1) {
+		pr_info("%s: %s: cq_host is already enabled\n",
+				mmc_hostname(mmc), __func__);
+		WARN_ON(1);
+		goto out;
+	}
+
+	if (cq_host->quirks & CMDQ_QUIRK_NO_DCMD)
+		dcmd_enable = false;
+	else
+		dcmd_enable = true;
+
+	cqcfg = ((cq_host->caps & CMDQ_TASK_DESC_SZ_128 ? CQ_TASK_DESC_SZ : 0) |
+			(dcmd_enable ? CQ_DCMD : 0));
+
+	cmdq_writel(cq_host, cqcfg, CQCFG);
+	/* enable CQ_HOST */
+	cmdq_writel(cq_host, cmdq_readl(cq_host, CQCFG) | CQ_ENABLE,
+		    CQCFG);
+
+	if (!cq_host->desc_base ||
+			!cq_host->trans_desc_base) {
+		err = cmdq_host_alloc_tdl(cq_host);
+		if (err)
+			goto out;
+		cmdq_writel(cq_host, lower_32_bits(cq_host->desc_dma_base),
+				CQTDLBA);
+		cmdq_writel(cq_host, upper_32_bits(cq_host->desc_dma_base),
+				CQTDLBAU);
+		cmdq_dumpregs(cq_host);
+	}
+
+	/*
+	 * disable all vendor interrupts
+	 * enable CMDQ interrupts
+	 * enable the vendor error interrupts
+	 */
+	if (cq_host->ops->clear_set_irqs)
+		cq_host->ops->clear_set_irqs(mmc, true);
+
+	cmdq_clear_set_irqs(cq_host, 0x0, CQ_INT_ALL);
+
+	/* cq_host would use this rca to address the card */
+	cmdq_writel(cq_host, mmc->card->rca, CQSSC2);
+
+	/* send QSR at lesser intervals than the default */
+	cmdq_writel(cq_host, cmdq_readl(cq_host, CQSSC1) | SEND_QSR_INTERVAL,
+				CQSSC1);
+
+	/* ensure the writes are done before enabling CQE */
+	mb();
+
+	cq_host->enabled = true;
+
+	if (cq_host->ops->set_block_size)
+		cq_host->ops->set_block_size(cq_host->mmc);
+
+	if (cq_host->ops->set_data_timeout)
+		cq_host->ops->set_data_timeout(mmc, 0xf);
+
+	if (cq_host->ops->clear_set_dumpregs)
+		cq_host->ops->clear_set_dumpregs(mmc, 1);
+
+out:
+	return err;
+}
+
+static void cmdq_disable(struct mmc_host *mmc, bool soft)
+{
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+
+	if (soft) {
+		cmdq_writel(cq_host, cmdq_readl(
+				    cq_host, CQCFG) & ~(CQ_ENABLE),
+			    CQCFG);
+	}
+
+	cq_host->enabled = false;
+}
+
+static void cmdq_prep_task_desc(struct mmc_request *mrq,
+					u64 *data, bool intr, bool qbr)
+{
+	struct mmc_cmdq_req *cmdq_req = mrq->cmdq_req;
+	u32 req_flags = cmdq_req->cmdq_req_flags;
+
+	pr_debug("%s: %s: data-tag: 0x%08x - dir: %d - prio: %d - cnt: 0x%08x -	addr: 0x%llx\n",
+		 mmc_hostname(mrq->host), __func__,
+		 !!(req_flags & DAT_TAG), !!(req_flags & DIR),
+		 !!(req_flags & PRIO), cmdq_req->data.blocks,
+		 (u64)mrq->cmdq_req->blk_addr);
+
+	*data = VALID(1) |
+		END(1) |
+		INT(intr) |
+		ACT(0x5) |
+		FORCED_PROG(!!(req_flags & FORCED_PRG)) |
+		CONTEXT(mrq->cmdq_req->ctx_id) |
+		DATA_TAG(!!(req_flags & DAT_TAG)) |
+		DATA_DIR(!!(req_flags & DIR)) |
+		PRIORITY(!!(req_flags & PRIO)) |
+		QBAR(qbr) |
+		REL_WRITE(!!(req_flags & REL_WR)) |
+		BLK_COUNT(mrq->cmdq_req->data.blocks) |
+		BLK_ADDR((u64)mrq->cmdq_req->blk_addr);
+}
+
+static int cmdq_dma_map(struct mmc_host *host, struct mmc_request *mrq)
+{
+	int sg_count;
+	struct mmc_data *data = mrq->data;
+
+	if (!data)
+		return -EINVAL;
+
+	sg_count = dma_map_sg(mmc_dev(host), data->sg,
+			      data->sg_len,
+			      (data->flags & MMC_DATA_WRITE) ?
+			      DMA_TO_DEVICE : DMA_FROM_DEVICE);
+	if (!sg_count) {
+		pr_err("%s: sg-len: %d\n", __func__, data->sg_len);
+		return -ENOMEM;
+	}
+
+	return sg_count;
+}
+
+static void cmdq_set_tran_desc(u8 *desc,
+				 dma_addr_t addr, int len, bool end)
+{
+	__le64 *dataddr = (__le64 __force *)(desc + 4);
+	__le32 *attr = (__le32 __force *)desc;
+
+	*attr = (VALID(1) |
+		 END(end ? 1 : 0) |
+		 INT(0) |
+		 ACT(0x4) |
+		 DAT_LENGTH(len));
+
+	dataddr[0] = cpu_to_le64(addr);
+}
+
+static int cmdq_prep_tran_desc(struct mmc_request *mrq,
+			       struct cmdq_host *cq_host, int tag)
+{
+	struct mmc_data *data = mrq->data;
+	int i, sg_count, len;
+	bool end = false;
+	dma_addr_t addr;
+	u8 *desc;
+	struct scatterlist *sg;
+
+	sg_count = cmdq_dma_map(mrq->host, mrq);
+	if (sg_count < 0) {
+		pr_err("%s: %s: unable to map sg lists, %d\n",
+				mmc_hostname(mrq->host), __func__, sg_count);
+		return sg_count;
+	}
+
+	desc = get_trans_desc(cq_host, tag);
+	memset(desc, 0, cq_host->trans_desc_len * cq_host->mmc->max_segs);
+
+	for_each_sg(data->sg, sg, sg_count, i) {
+		addr = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+
+		if ((i+1) == sg_count)
+			end = true;
+		cmdq_set_tran_desc(desc, addr, len, end);
+		desc += cq_host->trans_desc_len;
+	}
+
+	pr_debug("%s: req: 0x%p tag: %d calc_trans_des: 0x%p sg-cnt: %d\n",
+		__func__, mrq->req, tag, desc, sg_count);
+
+	return 0;
+}
+
+static void cmdq_prep_dcmd_desc(struct mmc_host *mmc,
+				   struct mmc_request *mrq)
+{
+	u64 *task_desc = NULL;
+	u64 data = 0;
+	u8 resp_type;
+	u8 *desc;
+	__le64 *dataddr;
+	struct cmdq_host *cq_host = mmc_cmdq_private(mmc);
+	u8 timing;
+
+	if (!(mrq->cmd->flags & MMC_RSP_PRESENT)) {
+		resp_type = 0x0;
+		timing = 0x1;
+	} else {
+		if (mrq->cmd->flags & MMC_RSP_R1B) {
+			resp_type = 0x3;
+			timing = 0x0;
+		} else {
+			resp_type = 0x2;
+			timing = 0x1;
+		}
+	}
+
+	task_desc = (__le64 __force *)get_desc(cq_host, cq_host->dcmd_slot);
+	memset(task_desc, 0, cq_host->task_desc_len);
+	data |= (VALID(1) |
+		 END(1) |
+		 INT(1) |
+		 QBAR(1) |
+		 ACT(0x5) |
+		 CMD_INDEX(mrq->cmd->opcode) |
+		 CMD_TIMING(timing) | RESP_TYPE(resp_type));
+	*task_desc |= data;
+	desc = (u8 *)task_desc;
+	pr_debug("cmdq: dcmd: cmd: %d timing: %d resp: %d\n",
+		mrq->cmd->opcode, timing, resp_type);
+	dataddr = (__le64 __force *)(desc + 4);
+	dataddr[0] = cpu_to_le64((u64)mrq->cmd->arg);
+
+}
+
+static int cmdq_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	int err;
+	u64 data = 0;
+	u64 *task_desc = NULL;
+	u32 tag = mrq->cmdq_req->tag;
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+
+	if (!cq_host->enabled) {
+		pr_err("%s: CMDQ host not enabled yet !!!\n",
+		       mmc_hostname(mmc));
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (mrq->cmdq_req->cmdq_req_flags & DCMD) {
+		cmdq_prep_dcmd_desc(mmc, mrq);
+		cq_host->mrq_slot[DCMD_SLOT] = mrq;
+		cmdq_writel(cq_host, 1 << DCMD_SLOT, CQTDBR);
+		return 0;
+	}
+
+	task_desc = (__le64 __force *)get_desc(cq_host, tag);
+
+	cmdq_prep_task_desc(mrq, &data, 1,
+			    (mrq->cmdq_req->cmdq_req_flags & QBR));
+	*task_desc = cpu_to_le64(data);
+
+	err = cmdq_prep_tran_desc(mrq, cq_host, tag);
+	if (err) {
+		pr_err("%s: %s: failed to setup tx desc: %d\n",
+		       mmc_hostname(mmc), __func__, err);
+		return err;
+	}
+
+	BUG_ON(cmdq_readl(cq_host, CQTDBR) & (1 << tag));
+
+	cq_host->mrq_slot[tag] = mrq;
+	if (cq_host->ops->set_tranfer_params)
+		cq_host->ops->set_tranfer_params(mmc);
+
+	cmdq_writel(cq_host, 1 << tag, CQTDBR);
+
+out:
+	return err;
+}
+
+static void cmdq_finish_data(struct mmc_host *mmc, unsigned int tag)
+{
+	struct mmc_request *mrq;
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+
+	mrq = cq_host->mrq_slot[tag];
+	mrq->done(mrq);
+}
+
+irqreturn_t cmdq_irq(struct mmc_host *mmc, u32 intmask)
+{
+	u32 status;
+	unsigned long tag = 0, comp_status;
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+
+	status = cmdq_readl(cq_host, CQIS);
+	cmdq_writel(cq_host, status, CQIS);
+
+	if (status & CQIS_TCC) {
+		/* read QCTCN and complete the request */
+		comp_status = cmdq_readl(cq_host, CQTCN);
+		if (!comp_status)
+			goto out;
+
+		for_each_set_bit(tag, &comp_status, cq_host->num_slots) {
+			/* complete the corresponding mrq */
+			pr_debug("%s: completing tag -> %lu\n",
+				 mmc_hostname(mmc), tag);
+			cmdq_finish_data(mmc, tag);
+		}
+		cmdq_writel(cq_host, comp_status, CQTCN);
+	}
+
+	if (status & CQIS_RED) {
+		/* task response has an error */
+		pr_err("%s: RED error %d !!!\n", mmc_hostname(mmc), status);
+		cmdq_dumpregs(cq_host);
+	}
+
+out:
+	return IRQ_HANDLED;
+}
+EXPORT_SYMBOL(cmdq_irq);
+
+static void cmdq_post_req(struct mmc_host *host, struct mmc_request *mrq,
+			  int err)
+{
+	struct mmc_data *data = mrq->data;
+
+	if (data) {
+		data->error = err;
+		dma_unmap_sg(mmc_dev(host), data->sg, data->sg_len,
+			     (data->flags & MMC_DATA_READ) ?
+			     DMA_FROM_DEVICE : DMA_TO_DEVICE);
+		if (err)
+			data->bytes_xfered = 0;
+		else
+			data->bytes_xfered = blk_rq_bytes(mrq->req);
+	}
+}
+
+static const struct mmc_cmdq_host_ops cmdq_host_ops = {
+	.enable = cmdq_enable,
+	.disable = cmdq_disable,
+	.request = cmdq_request,
+	.post_req = cmdq_post_req,
+};
+
+struct cmdq_host *cmdq_pltfm_init(struct platform_device *pdev)
+{
+	struct cmdq_host *cq_host;
+	struct resource *cmdq_memres = NULL;
+
+	/* check and setup CMDQ interface */
+	cmdq_memres = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						   "cmdq_mem");
+	if (!cmdq_memres) {
+		dev_dbg(&pdev->dev, "CMDQ not supported\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	cq_host = kzalloc(sizeof(*cq_host), GFP_KERNEL);
+	if (!cq_host) {
+		dev_err(&pdev->dev, "failed to allocate memory for CMDQ\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	cq_host->mmio = devm_ioremap(&pdev->dev,
+				     cmdq_memres->start,
+				     resource_size(cmdq_memres));
+	if (!cq_host->mmio) {
+		dev_err(&pdev->dev, "failed to remap cmdq regs\n");
+		kfree(cq_host);
+		return ERR_PTR(-EBUSY);
+	}
+	dev_dbg(&pdev->dev, "CMDQ ioremap: done\n");
+
+	return cq_host;
+}
+EXPORT_SYMBOL(cmdq_pltfm_init);
+
+int cmdq_init(struct cmdq_host *cq_host, struct mmc_host *mmc,
+	      bool dma64)
+{
+	int err = 0;
+
+	cq_host->dma64 = dma64;
+	cq_host->mmc = mmc;
+	cq_host->mmc->cmdq_private = cq_host;
+
+	cq_host->num_slots = NUM_SLOTS;
+	cq_host->dcmd_slot = DCMD_SLOT;
+
+	mmc->cmdq_ops = &cmdq_host_ops;
+
+	cq_host->mrq_slot = kzalloc(sizeof(cq_host->mrq_slot) *
+				    cq_host->num_slots, GFP_KERNEL);
+	if (!cq_host->mrq_slot)
+		return -ENOMEM;
+
+	init_completion(&cq_host->halt_comp);
+	return err;
+}
+EXPORT_SYMBOL(cmdq_init);
diff --git a/drivers/mmc/host/cmdq_hci.h b/drivers/mmc/host/cmdq_hci.h
new file mode 100644
index 0000000..e7f5a15
--- /dev/null
+++ b/drivers/mmc/host/cmdq_hci.h
@@ -0,0 +1,211 @@ 
+/* Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef LINUX_MMC_CQ_HCI_H
+#define LINUX_MMC_CQ_HCI_H
+#include <linux/mmc/core.h>
+
+/* registers */
+/* version */
+#define CQVER		0x00
+/* capabilities */
+#define CQCAP		0x04
+/* configuration */
+#define CQCFG		0x08
+#define CQ_DCMD		0x00001000
+#define CQ_TASK_DESC_SZ 0x00000100
+#define CQ_ENABLE	0x00000001
+
+/* control */
+#define CQCTL		0x0C
+#define CLEAR_ALL_TASKS 0x00000100
+#define HALT		0x00000001
+
+/* interrupt status */
+#define CQIS		0x10
+#define CQIS_HAC	(1 << 0)
+#define CQIS_TCC	(1 << 1)
+#define CQIS_RED	(1 << 2)
+#define CQIS_TCL	(1 << 3)
+
+/* interrupt status enable */
+#define CQISTE		0x14
+
+/* interrupt signal enable */
+#define CQISGE		0x18
+
+/* interrupt coalescing */
+#define CQIC		0x1C
+#define CQIC_ENABLE	(1 << 31)
+#define CQIC_RESET	(1 << 16)
+#define CQIC_ICCTHWEN	(1 << 15)
+#define CQIC_ICCTH(x)	((x & 0x1F) << 8)
+#define CQIC_ICTOVALWEN (1 << 7)
+#define CQIC_ICTOVAL(x) (x & 0x7F)
+
+/* task list base address */
+#define CQTDLBA		0x20
+
+/* task list base address upper */
+#define CQTDLBAU	0x24
+
+/* door-bell */
+#define CQTDBR		0x28
+
+/* task completion notification */
+#define CQTCN		0x2C
+
+/* device queue status */
+#define CQDQS		0x30
+
+/* device pending tasks */
+#define CQDPT		0x34
+
+/* task clear */
+#define CQTCLR		0x38
+
+/* send status config 1 */
+#define CQSSC1		0x40
+/*
+ * Value n means CQE would send CMD13 during the transfer of data block
+ * BLOCK_CNT-n
+ */
+#define SEND_QSR_INTERVAL 0x70000
+
+/* send status config 2 */
+#define CQSSC2		0x44
+
+/* response for dcmd */
+#define CQCRDCT		0x48
+
+/* response mode error mask */
+#define CQRMEM		0x50
+
+/* task error info */
+#define CQTERRI		0x54
+
+/* command response index */
+#define CQCRI		0x58
+
+/* command response argument */
+#define CQCRA		0x5C
+
+#define CQ_INT_ALL	0xF
+#define CQIC_DEFAULT_ICCTH 31
+#define CQIC_DEFAULT_ICTOVAL 1
+
+#define CQ_CMD_DBG_RAM	0x158
+#define CQ_CMD_DBG_RAM_WA 0x198
+#define CQ_CMD_DBG_RAM_OL 0x19C
+
+/* attribute fields */
+#define VALID(x)	((x & 1) << 0)
+#define END(x)		((x & 1) << 1)
+#define INT(x)		((x & 1) << 2)
+#define ACT(x)		((x & 0x7) << 3)
+
+/* data command task descriptor fields */
+#define FORCED_PROG(x)	((x & 1) << 6)
+#define CONTEXT(x)	((x & 0xF) << 7)
+#define DATA_TAG(x)	((x & 1) << 11)
+#define DATA_DIR(x)	((x & 1) << 12)
+#define PRIORITY(x)	((x & 1) << 13)
+#define QBAR(x)		((x & 1) << 14)
+#define REL_WRITE(x)	((x & 1) << 15)
+#define BLK_COUNT(x)	((x & 0xFFFF) << 16)
+#define BLK_ADDR(x)	((x & 0xFFFFFFFF) << 32)
+
+/* direct command task descriptor fields */
+#define CMD_INDEX(x)	((x & 0x3F) << 16)
+#define CMD_TIMING(x)	((x & 1) << 22)
+#define RESP_TYPE(x)	((x & 0x3) << 23)
+
+/* transfer descriptor fields */
+#define DAT_LENGTH(x)	((x & 0xFFFF) << 16)
+#define DAT_ADDR_LO(x)	((x & 0xFFFFFFFF) << 32)
+#define DAT_ADDR_HI(x)	((x & 0xFFFFFFFF) << 0)
+
+struct cmdq_host {
+	const struct cmdq_host_ops *ops;
+	void __iomem *mmio;
+	struct mmc_host *mmc;
+
+	/* 64 bit DMA */
+	bool dma64;
+	int num_slots;
+
+	u32 dcmd_slot;
+	u32 caps;
+#define CMDQ_TASK_DESC_SZ_128 0x1
+
+	u32 quirks;
+#define CMDQ_QUIRK_SHORT_TXFR_DESC_SZ 0x1
+#define CMDQ_QUIRK_NO_DCMD	0x2
+
+	bool enabled;
+	bool halted;
+	bool init_done;
+
+	u8 *desc_base;
+
+	/* total descriptor size */
+	u8 slot_sz;
+
+	/* 64/128 bit depends on CQCFG */
+	u8 task_desc_len;
+
+	/* 64 bit on 32-bit arch, 128 bit on 64-bit */
+	u8 link_desc_len;
+
+	u8 *trans_desc_base;
+	/* same length as transfer descriptor */
+	u8 trans_desc_len;
+
+	dma_addr_t desc_dma_base;
+	dma_addr_t trans_desc_dma_base;
+
+	struct completion halt_comp;
+	struct mmc_request **mrq_slot;
+	void *private;
+};
+
+struct cmdq_host_ops {
+	void (*set_tranfer_params)(struct mmc_host *mmc);
+	void (*set_data_timeout)(struct mmc_host *mmc, u32 val);
+	void (*clear_set_irqs)(struct mmc_host *mmc, bool clear);
+	void (*set_block_size)(struct mmc_host *mmc);
+	void (*dump_vendor_regs)(struct mmc_host *mmc);
+	void (*write_l)(struct cmdq_host *host, u32 val, int reg);
+	u32 (*read_l)(struct cmdq_host *host, int reg);
+	void (*clear_set_dumpregs)(struct mmc_host *mmc, bool set);
+};
+
+static inline void cmdq_writel(struct cmdq_host *host, u32 val, int reg)
+{
+	if (unlikely(host->ops->write_l))
+		host->ops->write_l(host, val, reg);
+	else
+		writel_relaxed(val, host->mmio + reg);
+}
+
+static inline u32 cmdq_readl(struct cmdq_host *host, int reg)
+{
+	if (unlikely(host->ops->read_l))
+		return host->ops->read_l(host, reg);
+	else
+		return readl_relaxed(host->mmio + reg);
+}
+
+extern irqreturn_t cmdq_irq(struct mmc_host *mmc, u32 intmask);
+extern int cmdq_init(struct cmdq_host *cq_host, struct mmc_host *mmc,
+		     bool dma64);
+extern struct cmdq_host *cmdq_pltfm_init(struct platform_device *pdev);
+#endif
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index dfe094a..135a83e 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -420,6 +420,13 @@  struct mmc_host {
 	u32			dsr;	/* optional driver stage (DSR) value */
 
 	struct mmc_cmdq_context_info	cmdq_ctx;
+	/*
+	 * several cmdq supporting host controllers are extensions
+	 * of legacy controllers. This variable can be used to store
+	 * a reference to the cmdq extension of the existing host
+	 * controller.
+	 */
+	void *cmdq_private;
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
@@ -434,6 +441,11 @@  static inline void *mmc_priv(struct mmc_host *host)
 	return (void *)host->private;
 }
 
+static inline void *mmc_cmdq_private(struct mmc_host *host)
+{
+	return host->cmdq_private;
+}
+
 #define mmc_host_is_spi(host)	((host)->caps & MMC_CAP_SPI)
 
 #define mmc_dev(x)	((x)->parent)