diff mbox series

[v2,1/3] scsi: ufs: core: Introduce mcq ops to config cqid

Message ID 20230530023227.16653-2-powen.kao@mediatek.com (mailing list archive)
State New, archived
Headers show
Series Add MCQ support for MTK platform | expand

Commit Message

Po-Wen Kao May 30, 2023, 2:32 a.m. UTC
From: Peter Wang <peter.wang@mediatek.com>

MCQ sq/cq mapping is not just one for one, could many for one.
This patch allow host driver to change the mapping, assign cqid
for each hw queue.

Signed-off-by: Po-Wen Kao <powen.kao@mediatek.com>
Signed-off-by: Peter Wang <peter.wang@mediatek.com>
---
 drivers/ufs/core/ufs-mcq.c     |  2 +-
 drivers/ufs/core/ufshcd-priv.h |  8 ++++++++
 drivers/ufs/core/ufshcd.c      | 11 +++++++++++
 include/ufs/ufshcd.h           |  3 +++
 4 files changed, 23 insertions(+), 1 deletion(-)

Comments

AngeloGioacchino Del Regno May 30, 2023, 7:20 a.m. UTC | #1
Il 30/05/23 04:32, Po-Wen Kao ha scritto:
> From: Peter Wang <peter.wang@mediatek.com>
> 
> MCQ sq/cq mapping is not just one for one, could many for one.
> This patch allow host driver to change the mapping, assign cqid
> for each hw queue.
> 
> Signed-off-by: Po-Wen Kao <powen.kao@mediatek.com>
> Signed-off-by: Peter Wang <peter.wang@mediatek.com>
> ---
>   drivers/ufs/core/ufs-mcq.c     |  2 +-
>   drivers/ufs/core/ufshcd-priv.h |  8 ++++++++
>   drivers/ufs/core/ufshcd.c      | 11 +++++++++++
>   include/ufs/ufshcd.h           |  3 +++
>   4 files changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
> index 51b3c6ae781d..1ba9c395c6b0 100644
> --- a/drivers/ufs/core/ufs-mcq.c
> +++ b/drivers/ufs/core/ufs-mcq.c
> @@ -368,7 +368,7 @@ void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba)
>   		 * Submission Queue Attribute
>   		 */
>   		ufsmcq_writel(hba, (1 << QUEUE_EN_OFFSET) | qsize |
> -			      (i << QUEUE_ID_OFFSET),
> +			      (hwq->cqid << QUEUE_ID_OFFSET),
>   			      MCQ_CFG_n(REG_SQATTR, i));
>   	}
>   }
> diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h
> index d53b93c21a0c..2de068b96c71 100644
> --- a/drivers/ufs/core/ufshcd-priv.h
> +++ b/drivers/ufs/core/ufshcd-priv.h
> @@ -287,6 +287,14 @@ static inline int ufshcd_mcq_vops_config_esi(struct ufs_hba *hba)
>   	return -EOPNOTSUPP;
>   }
>   
> +static inline int ufshcd_mcq_vops_config_cqid(struct ufs_hba *hba)
> +{
> +	if (hba->vops && hba->vops->config_cqid)
> +		return hba->vops->config_cqid(hba);
> +
> +	return -EOPNOTSUPP;
> +}
> +
>   extern const struct ufs_pm_lvl_states ufs_pm_lvl_states[];
>   
>   /**
> diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
> index 4ec8dacb447c..fad9ff4469b0 100644
> --- a/drivers/ufs/core/ufshcd.c
> +++ b/drivers/ufs/core/ufshcd.c
> @@ -8488,11 +8488,22 @@ static int ufshcd_alloc_mcq(struct ufs_hba *hba)
>   static void ufshcd_config_mcq(struct ufs_hba *hba)
>   {
>   	int ret;
> +	struct ufs_hw_queue *hwq;
> +	int i;
>   
>   	ret = ufshcd_mcq_vops_config_esi(hba);
>   	dev_info(hba->dev, "ESI %sconfigured\n", ret ? "is not " : "");
>   
>   	ufshcd_enable_intr(hba, UFSHCD_ENABLE_MCQ_INTRS);
> +
> +	ret = ufshcd_mcq_vops_config_cqid(hba);
> +	if (ret) {

If your return value here is not -EOPNOTSUPP you may want to perform some
different action... and perhaps ufshcd_config_mcq() should be changed to
return a failure.
Should also be trivial to do so, since this function is called 3 times in
total, and only in ufshcd_device_init(), which is already returning int.

So, I would say....

static int ufshcd_config_mcq(struct ufs_hba *hba)
{

..... code .....

	ret = ufshcd_mcq_vops_config_cqid(hba);
	if (ret) {
		if (ret != -EOPNOTSUPP)
			return ret;

		/* No special configuration, go for 1:1 mapping */
		for (i = 0; ....)
			....
	}

Regards,
Angelo
Po-Wen Kao May 30, 2023, 9:42 a.m. UTC | #2
On Tue, 2023-05-30 at 09:20 +0200, AngeloGioacchino Del Regno wrote:
> 
> If your return value here is not -EOPNOTSUPP you may want to perform
> some
> different action... and perhaps ufshcd_config_mcq() should be changed
> to
> return a failure.
> Should also be trivial to do so, since this function is called 3
> times in
> total, and only in ufshcd_device_init(), which is already returning
> int.
> 
> So, I would say....
> 
> static int ufshcd_config_mcq(struct ufs_hba *hba)
> {
> 
> ..... code .....
> 
> ret = ufshcd_mcq_vops_config_cqid(hba);
> if (ret) {
> if (ret != -EOPNOTSUPP)
> return ret;
> 
> /* No special configuration, go for 1:1 mapping */
> for (i = 0; ....)
> ....
> }
> 
> Regards,
> Angelo
> 
> 

Hi Angelo,

That's a good point. 
Can we update this later in another patch to take care of
ufshcd_config_mcq()'s return value since this patch focus on
introducing new vops?

Best,
Po-Wen
Bart Van Assche May 30, 2023, 11:54 p.m. UTC | #3
On 5/29/23 19:32, Po-Wen Kao wrote:
> MCQ sq/cq mapping is not just one for one, could many for one.
> This patch allow host driver to change the mapping, assign cqid
> for each hw queue.

What use case do you have in mind for associating multiple submission 
queues with a single completion queue?

No matter what the use case is, I think that which submission queues are 
associated with a completion queue is independent of the host driver and 
hence that such logic should exist in the UFS core instead of in a host 
driver.

Bart.
Stanley Jhu May 31, 2023, 1:54 a.m. UTC | #4
On Wed, May 31, 2023 at 7:54 AM Bart Van Assche <bvanassche@acm.org> wrote:
>
> On 5/29/23 19:32, Po-Wen Kao wrote:
> > MCQ sq/cq mapping is not just one for one, could many for one.
> > This patch allow host driver to change the mapping, assign cqid
> > for each hw queue.
>
> What use case do you have in mind for associating multiple submission
> queues with a single completion queue?
>

According to the specification or the original concept of MCQ, the
bindings between SQ and CQ can not be fixed to a single style.
In addition, some benchmark data shows that the performance can be
improved by using fewer CQs to aggregate the interrupt handling of
completion requests.
Therefore, we would like to introduce a vop to allow the host to
configure it accordingly.

Stanley
Bart Van Assche May 31, 2023, 12:48 p.m. UTC | #5
On 5/30/23 18:54, Stanley Chu wrote:
> In addition, some benchmark data shows that the performance can be
> improved by using fewer CQs to aggregate the interrupt handling of
> completion requests.

What has been measured? IOPS only or both IOPS and latency?

How big is the difference? A few percent or more?

For which number of SQs and which number of CQs has performance data 
been measured?

Would the following work instead of introducing a new vop?
- Introduce a new capability flag, e.g. UFSHCD_CAP_SINGLE_CQ.
- Set that flag from inside ufs_mtk_init().
- Modify the UFS core driver such that the number of completion queues
   depends on the UFSHCD_CAP_SINGLE_CQ flag.

> Therefore, we would like to introduce a vop to allow the host to
> configure it accordingly.

We do not accept new vops upstream without a user. Where is the 
implementation of the new .config_cqid() callback?

Thanks,

Bart.
Po-Wen Kao June 1, 2023, 10:25 a.m. UTC | #6
On Wed, 2023-05-31 at 05:48 -0700, Bart Van Assche wrote:
>  	 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
>  On 5/30/23 18:54, Stanley Chu wrote:
> > In addition, some benchmark data shows that the performance can be
> > improved by using fewer CQs to aggregate the interrupt handling of
> > completion requests.
> 
> What has been measured? IOPS only or both IOPS and latency?
> 
> How big is the difference? A few percent or more?
> For which number of SQs and which number of CQs has performance data 
> been measured?
> 
Comparing 8-8 to 8-1 mapping, it shows few percents improvement.

> 
> Would the following work instead of introducing a new vop?
> - Introduce a new capability flag, e.g. UFSHCD_CAP_SINGLE_CQ.
> - Set that flag from inside ufs_mtk_init().
> - Modify the UFS core driver such that the number of completion
> queues
>    depends on the UFSHCD_CAP_SINGLE_CQ flag.
> 
According to spec, driver is free to assign any SQ to CQ mapping. I am
not sure if it's ideal to constrain mapping to specific kind.

> > Therefore, we would like to introduce a vop to allow the host to
> > configure it accordingly.
> 
> We do not accept new vops upstream without a user. Where is the 
> implementation of the new .config_cqid() callback?
> 

Yes, please refer to
"[PATCH v2 3/3] scsi: ufs: ufs-mediatek: Add MCQ support for MTK
platform"

+static int ufs_mtk_config_cqid(struct ufs_hba *hba)
+{
+       struct ufs_hw_queue *hwq;
+       int i;
+
+       for (i = 0; i < hba->nr_hw_queues; i++) {
+               hwq = &hba->uhq[i];
+               hwq->cqid = 3;
+       }
+
+       return 0;
+}


Po-Wen
Bart Van Assche June 1, 2023, 2:23 p.m. UTC | #7
On 6/1/23 03:25, Powen Kao (高伯文) wrote:
> According to spec, driver is free to assign any SQ to CQ mapping. I am
> not sure if it's ideal to constrain mapping to specific kind.

As has been made clear several times recently during discussions around
LSF/MM topics, implementing all features of a standard is *not* one of the
goals of the Linux kernel. Whether a feature is defined in the NVMe, a SCSI
or the UFS standard, we (Linux kernel community) only support those features
that we consider useful and that can be implemented with a reasonable effort.
An example of a feature that probably will never be supported by the Linux
kernel is the "domains and realms" functionality from ZBC-2.

> Yes, please refer to
> "[PATCH v2 3/3] scsi: ufs: ufs-mediatek: Add MCQ support for MTK
> platform"
> 
> +static int ufs_mtk_config_cqid(struct ufs_hba *hba)
> +{
> +       struct ufs_hw_queue *hwq;
> +       int i;
> +
> +       for (i = 0; i < hba->nr_hw_queues; i++) {
> +               hwq = &hba->uhq[i];
> +               hwq->cqid = 3;
> +       }
> +
> +       return 0;
> +}

Thanks, I had overlooked this. Do you agree that the above shows that the
flag I proposed in my previous email (UFSHCD_CAP_SINGLE_CQ) is sufficient
to support the MediaTek use case? I want to keep the SQ-CQ association code
in the UFS driver core because the next step will probably to switch from
one CQ per SQ to one CQ per CPU core for UFS controllers that support
multiple completion interrupts.

Thanks,

Bart.
Bart Van Assche June 1, 2023, 8:54 p.m. UTC | #8
On 6/1/23 07:41, Peter Wang (王信友) wrote:
> On Thu, 2023-06-01 at 07:23 -0700, Bart Van Assche wrote:
>> Thanks, I had overlooked this. Do you agree that the above shows that the
>> flag I proposed in my previous email (UFSHCD_CAP_SINGLE_CQ) is sufficient
>> to support the MediaTek use case? I want to keep the SQ-CQ association code
>> in the UFS driver core because the next step will probably to switch from
>> one CQ per SQ to one CQ per CPU core for UFS controllers that support
>> multiple completion interrupts.
>
> If the UFS device speed is geting higher and higher, one CQ may not sufficient.
> 
> So, UFSHCD_CAP_SINGLE_CQ is not flexible for us beacuse we may want to map to two CQs.

Hi Peter,

Let's take a step back. The MediaTek UFSHCI 4.0 host controller only 
supports a single completion interrupt. A significant disadvantage of 
the single completion interrupt approach is that all completion 
interrupts are processed by the same CPU core. This is known to cause 
problems on Android. If sufficient time is spent in an interrupt 
handler, threads that run on the same CPU core as the interrupt handler 
may get scheduled too late. This can result in e.g. audio glitches 
noticeable by humans. Hardware designers told me that the area occupied 
by a single interrupt line is small. So I think it is fair to say that 
the (nonstandard!) approach of only supporting a single completion 
interrupt in an UFSHCI 4.0 controller is not a good choice.

The UFS driver already supports multiple hardware queue types 
(HCTX_TYPE_DEFAULT, READ and POLL). An interesting optimization would be 
to combine the completion queues for at least the DEFAULT and READ queue 
types. Introducing a vop to configure the completion queue ID would make 
it almost impossible to implement this optimization in a generic way.

Asking to add a vop that improves performance by only a few percent for 
a *nonstandard* controller and at the same time that makes it very hard 
to optimize the driver for standards compliant controllers is something 
that I consider unreasonable.

Bart.
diff mbox series

Patch

diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 51b3c6ae781d..1ba9c395c6b0 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -368,7 +368,7 @@  void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba)
 		 * Submission Queue Attribute
 		 */
 		ufsmcq_writel(hba, (1 << QUEUE_EN_OFFSET) | qsize |
-			      (i << QUEUE_ID_OFFSET),
+			      (hwq->cqid << QUEUE_ID_OFFSET),
 			      MCQ_CFG_n(REG_SQATTR, i));
 	}
 }
diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h
index d53b93c21a0c..2de068b96c71 100644
--- a/drivers/ufs/core/ufshcd-priv.h
+++ b/drivers/ufs/core/ufshcd-priv.h
@@ -287,6 +287,14 @@  static inline int ufshcd_mcq_vops_config_esi(struct ufs_hba *hba)
 	return -EOPNOTSUPP;
 }
 
+static inline int ufshcd_mcq_vops_config_cqid(struct ufs_hba *hba)
+{
+	if (hba->vops && hba->vops->config_cqid)
+		return hba->vops->config_cqid(hba);
+
+	return -EOPNOTSUPP;
+}
+
 extern const struct ufs_pm_lvl_states ufs_pm_lvl_states[];
 
 /**
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 4ec8dacb447c..fad9ff4469b0 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8488,11 +8488,22 @@  static int ufshcd_alloc_mcq(struct ufs_hba *hba)
 static void ufshcd_config_mcq(struct ufs_hba *hba)
 {
 	int ret;
+	struct ufs_hw_queue *hwq;
+	int i;
 
 	ret = ufshcd_mcq_vops_config_esi(hba);
 	dev_info(hba->dev, "ESI %sconfigured\n", ret ? "is not " : "");
 
 	ufshcd_enable_intr(hba, UFSHCD_ENABLE_MCQ_INTRS);
+
+	ret = ufshcd_mcq_vops_config_cqid(hba);
+	if (ret) {
+		for (i = 0; i < hba->nr_hw_queues; i++) {
+			hwq = &hba->uhq[i];
+			hwq->cqid = i;
+		}
+	}
+
 	ufshcd_mcq_make_queues_operational(hba);
 	ufshcd_mcq_config_mac(hba, hba->nutrs);
 
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index df1d04f7a542..d5b16f968d7f 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -307,6 +307,7 @@  struct ufs_pwr_mode_info {
  * @op_runtime_config: called to config Operation and runtime regs Pointers
  * @get_outstanding_cqs: called to get outstanding completion queues
  * @config_esi: called to config Event Specific Interrupt
+ * @config_cqid: called to config cqid for each sq
  */
 struct ufs_hba_variant_ops {
 	const char *name;
@@ -352,6 +353,7 @@  struct ufs_hba_variant_ops {
 	int	(*get_outstanding_cqs)(struct ufs_hba *hba,
 				       unsigned long *ocqs);
 	int	(*config_esi)(struct ufs_hba *hba);
+	int	(*config_cqid)(struct ufs_hba *hba);
 };
 
 /* clock gating state  */
@@ -1100,6 +1102,7 @@  struct ufs_hw_queue {
 	dma_addr_t cqe_dma_addr;
 	u32 max_entries;
 	u32 id;
+	u32 cqid;
 	u32 sq_tail_slot;
 	spinlock_t sq_lock;
 	u32 cq_tail_slot;