diff mbox series

[v2,11/20] scsi: ufs: Switch to scsi_(get|put)_internal_cmd()

Message ID 20211119195743.2817-12-bvanassche@acm.org (mailing list archive)
State Superseded
Headers show
Series UFS patches for kernel v5.17 | expand

Commit Message

Bart Van Assche Nov. 19, 2021, 7:57 p.m. UTC
The only functional change in this patch is the addition of a
blk_mq_start_request() call in ufshcd_issue_devman_upiu_cmd().

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/ufs/ufshcd.c | 46 +++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 16 deletions(-)

Comments

Bean Huo Nov. 23, 2021, 12:20 p.m. UTC | #1
On Fri, 2021-11-19 at 11:57 -0800, Bart Van Assche wrote:
> The only functional change in this patch is the addition of a
> blk_mq_start_request() call in ufshcd_issue_devman_upiu_cmd().
> 
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 46 +++++++++++++++++++++++++----------
> ----
>  1 file changed, 30 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index fced4528ee90..dfa5f127342b 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -2933,6 +2933,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba
> *hba,
>  {
>  	struct request_queue *q = hba->cmd_queue;
>  	DECLARE_COMPLETION_ONSTACK(wait);
> +	struct scsi_cmnd *scmd;
>  	struct request *req;
>  	struct ufshcd_lrb *lrbp;
>  	int err;
> @@ -2945,15 +2946,18 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba
> *hba,
>  	 * Even though we use wait_event() which sleeps indefinitely,
>  	 * the maximum wait time is bounded by SCSI request timeout.
>  	 */
> -	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
> -	if (IS_ERR(req)) {
> -		err = PTR_ERR(req);
> +	scmd = scsi_get_internal_cmd(q, DMA_TO_DEVICE, 0);
> +	if (IS_ERR(scmd)) {
> +		err = PTR_ERR(scmd);
>  		goto out_unlock;
>  	}
> +	req = scsi_cmd_to_rq(scmd);
>  	tag = req->tag;
>  	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
> -	/* Set the timeout such that the SCSI error handler is not
> activated. */
> -	req->timeout = msecs_to_jiffies(2 * timeout);
> +	/*
> +	 * Start the request such that blk_mq_tag_idle() is called when
> the
> +	 * device management request finishes.
> +	 */
>  	blk_mq_start_request(req);
>  
>  	lrbp = &hba->lrb[tag];
> @@ -2972,7 +2976,8 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba
> *hba,
>  				    (struct utp_upiu_req *)lrbp-
> >ucd_rsp_ptr);
>  
>  out:
> -	blk_mq_free_request(req);
> +	scsi_put_internal_cmd(scmd);
> +
>  out_unlock:
>  	up_read(&hba->clk_scaling_lock);
>  	return err;
> @@ -6573,17 +6578,16 @@ static int __ufshcd_issue_tm_cmd(struct
> ufs_hba *hba,
>  	struct request_queue *q = hba->tmf_queue;
>  	struct Scsi_Host *host = hba->host;
>  	DECLARE_COMPLETION_ONSTACK(wait);
> +	struct scsi_cmnd *scmd;
>  	struct request *req;
>  	unsigned long flags;
>  	int task_tag, err;
>  
> -	/*
> -	 * blk_mq_alloc_request() is used here only to get a free tag.
> -	 */
> -	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
> -	if (IS_ERR(req))
> -		return PTR_ERR(req);
> +	scmd = scsi_get_internal_cmd(q, DMA_TO_DEVICE, 0);
> +	if (IS_ERR(scmd))
> +		return PTR_ERR(scmd);
>  
> +	req = scsi_cmd_to_rq(scmd);
>  	req->end_io_data = &wait;
>  	ufshcd_hold(hba, false);
>  
> @@ -6636,7 +6640,8 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba
> *hba,
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
>  
>  	ufshcd_release(hba);
> -	blk_mq_free_request(req);
> +
> +	scsi_put_internal_cmd(scmd);
>  
>  	return err;
>  }
> @@ -6714,6 +6719,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct
> ufs_hba *hba,
>  {
>  	struct request_queue *q = hba->cmd_queue;
>  	DECLARE_COMPLETION_ONSTACK(wait);
> +	struct scsi_cmnd *scmd;
>  	struct request *req;
>  	struct ufshcd_lrb *lrbp;
>  	int err = 0;
> @@ -6722,13 +6728,19 @@ static int
> ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
>  
>  	down_read(&hba->clk_scaling_lock);
>  
> -	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
> -	if (IS_ERR(req)) {
> -		err = PTR_ERR(req);
> +	scmd = scsi_get_internal_cmd(q, DMA_TO_DEVICE, 0);
> +	if (IS_ERR(scmd)) {
> +		err = PTR_ERR(scmd);
>  		goto out_unlock;
>  	}
> +	req = scsi_cmd_to_rq(scmd);
>  	tag = req->tag;
>  	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
> +	/*
> +	 * Start the request such that blk_mq_tag_idle() is called when
> the
> +	 * device management request finishes.
> +	 */
> +	blk_mq_start_request(req);

Bart,

Calling blk_mq_start_request() will inject the trace print of the block
issued, but we do not have its paired completion trace print.
In addition, blk_mq_tag_idle() will not be called after the device
management request is completed, it will be called after the timer
expires.

I remember that we used to not allow this kind of LLD internal commands
to be attached to the block layer. I now find that to be correct way. 

Bean
Bart Van Assche Nov. 23, 2021, 5:54 p.m. UTC | #2
On 11/23/21 4:20 AM, Bean Huo wrote:
> On Fri, 2021-11-19 at 11:57 -0800, Bart Van Assche wrote:
>> @@ -6722,13 +6728,19 @@ static int
>> ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
>>   
>>   	down_read(&hba->clk_scaling_lock);
>>   
>> -	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
>> -	if (IS_ERR(req)) {
>> -		err = PTR_ERR(req);
>> +	scmd = scsi_get_internal_cmd(q, DMA_TO_DEVICE, 0);
>> +	if (IS_ERR(scmd)) {
>> +		err = PTR_ERR(scmd);
>>   		goto out_unlock;
>>   	}
>> +	req = scsi_cmd_to_rq(scmd);
>>   	tag = req->tag;
>>   	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
>> +	/*
>> +	 * Start the request such that blk_mq_tag_idle() is called when
>> the
>> +	 * device management request finishes.
>> +	 */
>> +	blk_mq_start_request(req);
> 
> Bart,
> 
> Calling blk_mq_start_request() will inject the trace print of the block
> issued, but we do not have its paired completion trace print.
> In addition, blk_mq_tag_idle() will not be called after the device
> management request is completed, it will be called after the timer
> expires.
> 
> I remember that we used to not allow this kind of LLD internal commands
> to be attached to the block layer. I now find that to be correct way.

Hi Bean,

As you may remember commit d0b2b70eb12e ("scsi: ufs: core: Increase the
usable queue depth") introduced a blk_mq_start_request() call in
ufshcd_exec_dev_cmd() to restore the queue depth from 16 to 32. I think
we need the same fix in ufshcd_issue_devman_upiu_cmd(). How about modifying
patch 1/20 of this series such that tracing is skipped for internal
requests? Would that address your concern?

Thanks,

Bart.
Bart Van Assche Nov. 23, 2021, 7:41 p.m. UTC | #3
On 11/23/21 4:20 AM, Bean Huo wrote:
> Calling blk_mq_start_request() will inject the trace print of the block
> issued, but we do not have its paired completion trace print.
> In addition, blk_mq_tag_idle() will not be called after the device
> management request is completed, it will be called after the timer
> expires.
> 
> I remember that we used to not allow this kind of LLD internal commands
> to be attached to the block layer. I now find that to be correct way.

Hi Bean,

How about modifying the block layer such that blk_mq_tag_busy() is not
called for requests with operation type REQ_OP_DRV_*? I think that will
allow to leave out the blk_mq_start_request() calls from the UFS driver.
These are the changes I currently have in mind (on top of this patch
series):

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3ab34c4f20da..a7090b509f2d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -433,6 +433,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,

  static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
  {
+	const bool is_passthrough = blk_op_is_passthrough(data->cmd_flags);
  	struct request_queue *q = data->q;
  	u64 alloc_time_ns = 0;
  	struct request *rq;
@@ -455,8 +456,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
  		 * dispatch list. Don't include reserved tags in the
  		 * limiting, as it isn't useful.
  		 */
-		if (!op_is_flush(data->cmd_flags) &&
-		    !blk_op_is_passthrough(data->cmd_flags) &&
+		if (!op_is_flush(data->cmd_flags) && !is_passthrough &&
  		    e->type->ops.limit_depth &&
  		    !(data->flags & BLK_MQ_REQ_RESERVED))
  			e->type->ops.limit_depth(data->cmd_flags, data);
@@ -465,7 +465,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
  retry:
  	data->ctx = blk_mq_get_ctx(q);
  	data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
-	if (!(data->rq_flags & RQF_ELV))
+	if (!(data->rq_flags & RQF_ELV) && !is_passthrough)
  		blk_mq_tag_busy(data->hctx);

  	/*
@@ -575,10 +575,10 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
  	cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
  	data.ctx = __blk_mq_get_ctx(q, cpu);

-	if (!q->elevator)
-		blk_mq_tag_busy(data.hctx);
-	else
+	if (q->elevator)
  		data.rq_flags |= RQF_ELV;
+	else if (!blk_op_is_passthrough(data.cmd_flags))
+		blk_mq_tag_busy(data.hctx);

  	ret = -EWOULDBLOCK;
  	tag = blk_mq_get_tag(&data);
@@ -1369,7 +1369,8 @@ static bool __blk_mq_alloc_driver_tag(struct request *rq)
  	unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
  	int tag;

-	blk_mq_tag_busy(rq->mq_hctx);
+	if (!blk_rq_is_passthrough(rq))
+		blk_mq_tag_busy(rq->mq_hctx);

  	if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
  		bt = &rq->mq_hctx->tags->breserved_tags;
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index fcecbc4ee81b..2c9e9c79ca34 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1360,25 +1360,6 @@ static int ufshcd_devfreq_target(struct device *dev,
  	return ret;
  }

-static bool ufshcd_is_busy(struct request *req, void *priv, bool reserved)
-{
-	int *busy = priv;
-
-	WARN_ON_ONCE(reserved);
-	(*busy)++;
-	return false;
-}
-
-/* Whether or not any tag is in use by a request that is in progress. */
-static bool ufshcd_any_tag_in_use(struct ufs_hba *hba)
-{
-	struct request_queue *q = hba->host->internal_queue;
-	int busy = 0;
-
-	blk_mq_tagset_busy_iter(q->tag_set, ufshcd_is_busy, &busy);
-	return busy;
-}
-
  static int ufshcd_devfreq_get_dev_status(struct device *dev,
  		struct devfreq_dev_status *stat)
  {
@@ -1778,7 +1759,7 @@ static void ufshcd_gate_work(struct work_struct *work)

  	if (hba->clk_gating.active_reqs
  		|| hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL
-		|| ufshcd_any_tag_in_use(hba) || hba->outstanding_tasks
+		|| hba->outstanding_reqs || hba->outstanding_tasks
  		|| hba->active_uic_cmd || hba->uic_async_done)
  		goto rel_lock;

@@ -2996,12 +2977,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
  	req = scsi_cmd_to_rq(scmd);
  	tag = req->tag;
  	WARN_ONCE(tag < 0 || tag >= hba->nutrs, "Invalid tag %d\n", tag);
-	/*
-	 * Start the request such that blk_mq_tag_idle() is called when the
-	 * device management request finishes.
-	 */
-	blk_mq_start_request(req);
-
  	lrbp = &hba->lrb[tag];
  	WARN_ON(lrbp->cmd);
  	err = ufshcd_compose_dev_cmd(hba, lrbp, cmd_type, tag);
@@ -6792,12 +6767,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
  	req = scsi_cmd_to_rq(scmd);
  	tag = req->tag;
  	WARN_ONCE(tag < 0 || tag >= hba->nutrs, "Invalid tag %d\n", tag);
-	/*
-	 * Start the request such that blk_mq_tag_idle() is called when the
-	 * device management request finishes.
-	 */
-	blk_mq_start_request(req);
-
  	lrbp = &hba->lrb[tag];
  	WARN_ON(lrbp->cmd);
  	lrbp->cmd = NULL;

Thanks,

Bart.
Adrian Hunter Nov. 24, 2021, 11:02 a.m. UTC | #4
On 19/11/2021 21:57, Bart Van Assche wrote:
> The only functional change in this patch is the addition of a
> blk_mq_start_request() call in ufshcd_issue_devman_upiu_cmd().
> 
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 46 +++++++++++++++++++++++++--------------
>  1 file changed, 30 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index fced4528ee90..dfa5f127342b 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -2933,6 +2933,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
>  {
>  	struct request_queue *q = hba->cmd_queue;
>  	DECLARE_COMPLETION_ONSTACK(wait);
> +	struct scsi_cmnd *scmd;
>  	struct request *req;
>  	struct ufshcd_lrb *lrbp;
>  	int err;
> @@ -2945,15 +2946,18 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
>  	 * Even though we use wait_event() which sleeps indefinitely,
>  	 * the maximum wait time is bounded by SCSI request timeout.
>  	 */
> -	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
> -	if (IS_ERR(req)) {
> -		err = PTR_ERR(req);
> +	scmd = scsi_get_internal_cmd(q, DMA_TO_DEVICE, 0);

We do not need the block layer, nor SCSI commands which begs the question,
why involve them at all?

For example, the following is much simpler and seems to work:


 drivers/scsi/ufs/ufshcd.c | 52 +++++++--------------------------------
 drivers/scsi/ufs/ufshcd.h |  1 +
 2 files changed, 10 insertions(+), 43 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index d125d8792ace5..bdfac3e9991ee 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -128,8 +128,9 @@ EXPORT_SYMBOL_GPL(ufshcd_dump_regs);
 enum {
 	UFSHCD_MAX_CHANNEL	= 0,
 	UFSHCD_MAX_ID		= 1,
-	UFSHCD_CMD_PER_LUN	= 32,
-	UFSHCD_CAN_QUEUE	= 32,
+	UFSHCD_NUM_RESERVED	= 1,
+	UFSHCD_CMD_PER_LUN	= 32 - UFSHCD_NUM_RESERVED,
+	UFSHCD_CAN_QUEUE	= 32 - UFSHCD_NUM_RESERVED,
 };
 
 static const char *const ufshcd_state_name[] = {
@@ -2189,6 +2190,7 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba)
 	hba->nutrs = (hba->capabilities & MASK_TRANSFER_REQUESTS_SLOTS) + 1;
 	hba->nutmrs =
 	((hba->capabilities & MASK_TASK_MANAGEMENT_REQUEST_SLOTS) >> 16) + 1;
+	hba->reserved_slot = hba->nutrs - 1;
 
 	/* Read crypto capabilities */
 	err = ufshcd_hba_init_crypto_capabilities(hba);
@@ -2931,31 +2933,13 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
 static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 		enum dev_cmd_type cmd_type, int timeout)
 {
-	struct request_queue *q = hba->cmd_queue;
 	DECLARE_COMPLETION_ONSTACK(wait);
-	struct request *req;
 	struct ufshcd_lrb *lrbp;
 	int err;
-	int tag;
+	int tag = hba->reserved_slot;
 
 	down_read(&hba->clk_scaling_lock);
 
-	/*
-	 * Get free slot, sleep if slots are unavailable.
-	 * Even though we use wait_event() which sleeps indefinitely,
-	 * the maximum wait time is bounded by SCSI request timeout.
-	 */
-	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out_unlock;
-	}
-	tag = req->tag;
-	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
-	/* Set the timeout such that the SCSI error handler is not activated. */
-	req->timeout = msecs_to_jiffies(2 * timeout);
-	blk_mq_start_request(req);
-
 	lrbp = &hba->lrb[tag];
 	WARN_ON(lrbp->cmd);
 	err = ufshcd_compose_dev_cmd(hba, lrbp, cmd_type, tag);
@@ -2970,10 +2954,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout);
 	ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP,
 				    (struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
-
 out:
-	blk_mq_free_request(req);
-out_unlock:
 	up_read(&hba->clk_scaling_lock);
 	return err;
 }
@@ -4955,11 +4936,7 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
  */
 static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth)
 {
-	struct ufs_hba *hba = shost_priv(sdev->host);
-
-	if (depth > hba->nutrs)
-		depth = hba->nutrs;
-	return scsi_change_queue_depth(sdev, depth);
+	return scsi_change_queue_depth(sdev, min(depth, sdev->host->can_queue));
 }
 
 static void ufshcd_hpb_destroy(struct ufs_hba *hba, struct scsi_device *sdev)
@@ -6706,24 +6683,14 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 					enum dev_cmd_type cmd_type,
 					enum query_opcode desc_op)
 {
-	struct request_queue *q = hba->cmd_queue;
 	DECLARE_COMPLETION_ONSTACK(wait);
-	struct request *req;
 	struct ufshcd_lrb *lrbp;
 	int err = 0;
-	int tag;
+	int tag = hba->reserved_slot;
 	u8 upiu_flags;
 
 	down_read(&hba->clk_scaling_lock);
 
-	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out_unlock;
-	}
-	tag = req->tag;
-	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
-
 	lrbp = &hba->lrb[tag];
 	WARN_ON(lrbp->cmd);
 	lrbp->cmd = NULL;
@@ -6791,7 +6758,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 	ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP,
 				    (struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
 
-out_unlock:
 	up_read(&hba->clk_scaling_lock);
 	return err;
 }
@@ -9516,8 +9482,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	/* Configure LRB */
 	ufshcd_host_memory_configure(hba);
 
-	host->can_queue = hba->nutrs;
-	host->cmd_per_lun = hba->nutrs;
+	host->can_queue = hba->nutrs - UFSHCD_NUM_RESERVED;
+	host->cmd_per_lun = hba->nutrs - UFSHCD_NUM_RESERVED;
 	host->max_id = UFSHCD_MAX_ID;
 	host->max_lun = UFS_MAX_LUNS;
 	host->max_channel = UFSHCD_MAX_CHANNEL;
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index e9bc07c69a801..1addb2c906bae 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -836,6 +836,7 @@ struct ufs_hba {
 	u32 capabilities;
 	int nutrs;
 	int nutmrs;
+	int reserved_slot; /* Protected by dev_cmd.lock */
 	u32 ufs_version;
 	const struct ufs_hba_variant_ops *vops;
 	struct ufs_hba_variant_params *vps;
Adrian Hunter Nov. 24, 2021, 11:15 a.m. UTC | #5
On 24/11/2021 13:02, Adrian Hunter wrote:
> On 19/11/2021 21:57, Bart Van Assche wrote:
>> The only functional change in this patch is the addition of a
>> blk_mq_start_request() call in ufshcd_issue_devman_upiu_cmd().
>>
>> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
>> ---
>>  drivers/scsi/ufs/ufshcd.c | 46 +++++++++++++++++++++++++--------------
>>  1 file changed, 30 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>> index fced4528ee90..dfa5f127342b 100644
>> --- a/drivers/scsi/ufs/ufshcd.c
>> +++ b/drivers/scsi/ufs/ufshcd.c
>> @@ -2933,6 +2933,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
>>  {
>>  	struct request_queue *q = hba->cmd_queue;
>>  	DECLARE_COMPLETION_ONSTACK(wait);
>> +	struct scsi_cmnd *scmd;
>>  	struct request *req;
>>  	struct ufshcd_lrb *lrbp;
>>  	int err;
>> @@ -2945,15 +2946,18 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
>>  	 * Even though we use wait_event() which sleeps indefinitely,
>>  	 * the maximum wait time is bounded by SCSI request timeout.
>>  	 */
>> -	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
>> -	if (IS_ERR(req)) {
>> -		err = PTR_ERR(req);
>> +	scmd = scsi_get_internal_cmd(q, DMA_TO_DEVICE, 0);
> 
> We do not need the block layer, nor SCSI commands which begs the question,
> why involve them at all?
> 
> For example, the following is much simpler and seems to work:

Applied on top of patches 7 - 10, where patch 10 has the unintended
removal of blk_mq_free_request() that would have been removed here
instead.

> 
> 
>  drivers/scsi/ufs/ufshcd.c | 52 +++++++--------------------------------
>  drivers/scsi/ufs/ufshcd.h |  1 +
>  2 files changed, 10 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index d125d8792ace5..bdfac3e9991ee 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -128,8 +128,9 @@ EXPORT_SYMBOL_GPL(ufshcd_dump_regs);
>  enum {
>  	UFSHCD_MAX_CHANNEL	= 0,
>  	UFSHCD_MAX_ID		= 1,
> -	UFSHCD_CMD_PER_LUN	= 32,
> -	UFSHCD_CAN_QUEUE	= 32,
> +	UFSHCD_NUM_RESERVED	= 1,
> +	UFSHCD_CMD_PER_LUN	= 32 - UFSHCD_NUM_RESERVED,
> +	UFSHCD_CAN_QUEUE	= 32 - UFSHCD_NUM_RESERVED,
>  };
>  
>  static const char *const ufshcd_state_name[] = {
> @@ -2189,6 +2190,7 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba)
>  	hba->nutrs = (hba->capabilities & MASK_TRANSFER_REQUESTS_SLOTS) + 1;
>  	hba->nutmrs =
>  	((hba->capabilities & MASK_TASK_MANAGEMENT_REQUEST_SLOTS) >> 16) + 1;
> +	hba->reserved_slot = hba->nutrs - 1;
>  
>  	/* Read crypto capabilities */
>  	err = ufshcd_hba_init_crypto_capabilities(hba);
> @@ -2931,31 +2933,13 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
>  static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
>  		enum dev_cmd_type cmd_type, int timeout)
>  {
> -	struct request_queue *q = hba->cmd_queue;
>  	DECLARE_COMPLETION_ONSTACK(wait);
> -	struct request *req;
>  	struct ufshcd_lrb *lrbp;
>  	int err;
> -	int tag;
> +	int tag = hba->reserved_slot;
>  
>  	down_read(&hba->clk_scaling_lock);
>  
> -	/*
> -	 * Get free slot, sleep if slots are unavailable.
> -	 * Even though we use wait_event() which sleeps indefinitely,
> -	 * the maximum wait time is bounded by SCSI request timeout.
> -	 */
> -	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
> -	if (IS_ERR(req)) {
> -		err = PTR_ERR(req);
> -		goto out_unlock;
> -	}
> -	tag = req->tag;
> -	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
> -	/* Set the timeout such that the SCSI error handler is not activated. */
> -	req->timeout = msecs_to_jiffies(2 * timeout);
> -	blk_mq_start_request(req);
> -
>  	lrbp = &hba->lrb[tag];
>  	WARN_ON(lrbp->cmd);
>  	err = ufshcd_compose_dev_cmd(hba, lrbp, cmd_type, tag);
> @@ -2970,10 +2954,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
>  	err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout);
>  	ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP,
>  				    (struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
> -
>  out:
> -	blk_mq_free_request(req);
> -out_unlock:
>  	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> @@ -4955,11 +4936,7 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
>   */
>  static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth)
>  {
> -	struct ufs_hba *hba = shost_priv(sdev->host);
> -
> -	if (depth > hba->nutrs)
> -		depth = hba->nutrs;
> -	return scsi_change_queue_depth(sdev, depth);
> +	return scsi_change_queue_depth(sdev, min(depth, sdev->host->can_queue));
>  }
>  
>  static void ufshcd_hpb_destroy(struct ufs_hba *hba, struct scsi_device *sdev)
> @@ -6706,24 +6683,14 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
>  					enum dev_cmd_type cmd_type,
>  					enum query_opcode desc_op)
>  {
> -	struct request_queue *q = hba->cmd_queue;
>  	DECLARE_COMPLETION_ONSTACK(wait);
> -	struct request *req;
>  	struct ufshcd_lrb *lrbp;
>  	int err = 0;
> -	int tag;
> +	int tag = hba->reserved_slot;
>  	u8 upiu_flags;
>  
>  	down_read(&hba->clk_scaling_lock);
>  
> -	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
> -	if (IS_ERR(req)) {
> -		err = PTR_ERR(req);
> -		goto out_unlock;
> -	}
> -	tag = req->tag;
> -	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
> -
>  	lrbp = &hba->lrb[tag];
>  	WARN_ON(lrbp->cmd);
>  	lrbp->cmd = NULL;
> @@ -6791,7 +6758,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
>  	ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP,
>  				    (struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
>  
> -out_unlock:
>  	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> @@ -9516,8 +9482,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
>  	/* Configure LRB */
>  	ufshcd_host_memory_configure(hba);
>  
> -	host->can_queue = hba->nutrs;
> -	host->cmd_per_lun = hba->nutrs;
> +	host->can_queue = hba->nutrs - UFSHCD_NUM_RESERVED;
> +	host->cmd_per_lun = hba->nutrs - UFSHCD_NUM_RESERVED;
>  	host->max_id = UFSHCD_MAX_ID;
>  	host->max_lun = UFS_MAX_LUNS;
>  	host->max_channel = UFSHCD_MAX_CHANNEL;
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index e9bc07c69a801..1addb2c906bae 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -836,6 +836,7 @@ struct ufs_hba {
>  	u32 capabilities;
>  	int nutrs;
>  	int nutmrs;
> +	int reserved_slot; /* Protected by dev_cmd.lock */
>  	u32 ufs_version;
>  	const struct ufs_hba_variant_ops *vops;
>  	struct ufs_hba_variant_params *vps;
>
Bean Huo Nov. 24, 2021, 6:18 p.m. UTC | #6
On Tue, 2021-11-23 at 11:41 -0800, Bart Van Assche wrote:
> On 11/23/21 4:20 AM, Bean Huo wrote:
> > Calling blk_mq_start_request() will inject the trace print of the
> > block
> > issued, but we do not have its paired completion trace print.
> > In addition, blk_mq_tag_idle() will not be called after the device
> > management request is completed, it will be called after the timer
> > expires.
> > 
> > I remember that we used to not allow this kind of LLD internal
> > commands
> > to be attached to the block layer. I now find that to be correct
> > way.
> 
> Hi Bean,
> 
> How about modifying the block layer such that blk_mq_tag_busy() is
> not
> called for requests with operation type REQ_OP_DRV_*? I think that
> will
> allow to leave out the blk_mq_start_request() calls from the UFS
> driver.
> These are the changes I currently have in mind (on top of this patch
> series):
> 

Hi Bart,

Yes, the following patch can solve these two problems, but you need to
change block layer code. Why do we have to fly to the block layer to
get this tag? and what is the benefit? This is a device management
request. As for the patch recommended by Adrian, that is the way I
think.


Kind regards,
Bean

> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 3ab34c4f20da..a7090b509f2d 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -433,6 +433,7 @@ __blk_mq_alloc_requests_batch(struct
> blk_mq_alloc_data *data,
> 
>   static struct request *__blk_mq_alloc_requests(struct
> blk_mq_alloc_data *data)
>   {
> +	const bool is_passthrough = blk_op_is_passthrough(data-
> >cmd_flags);
>   	struct request_queue *q = data->q;
>   	u64 alloc_time_ns = 0;
>   	struct request *rq;
> @@ -455,8 +456,7 @@ static struct request
> *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
>   		 * dispatch list. Don't include reserved tags in the
>   		 * limiting, as it isn't useful.
>   		 */
> -		if (!op_is_flush(data->cmd_flags) &&
> -		    !blk_op_is_passthrough(data->cmd_flags) &&
> +		if (!op_is_flush(data->cmd_flags) && !is_passthrough &&
>   		    e->type->ops.limit_depth &&
>   		    !(data->flags & BLK_MQ_REQ_RESERVED))
>   			e->type->ops.limit_depth(data->cmd_flags,
> data);
> @@ -465,7 +465,7 @@ static struct request
> *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
>   retry:
>   	data->ctx = blk_mq_get_ctx(q);
>   	data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
> -	if (!(data->rq_flags & RQF_ELV))
> +	if (!(data->rq_flags & RQF_ELV) && !is_passthrough)
>   		blk_mq_tag_busy(data->hctx);
> 
>   	/*
> @@ -575,10 +575,10 @@ struct request
> *blk_mq_alloc_request_hctx(struct request_queue *q,
>   	cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
>   	data.ctx = __blk_mq_get_ctx(q, cpu);
> 
> -	if (!q->elevator)
> -		blk_mq_tag_busy(data.hctx);
> -	else
> +	if (q->elevator)
>   		data.rq_flags |= RQF_ELV;
> +	else if (!blk_op_is_passthrough(data.cmd_flags))
> +		blk_mq_tag_busy(data.hctx);
> 
>   	ret = -EWOULDBLOCK;
>   	tag = blk_mq_get_tag(&data);
> @@ -1369,7 +1369,8 @@ static bool __blk_mq_alloc_driver_tag(struct
> request *rq)
>   	unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
>   	int tag;
> 
> -	blk_mq_tag_busy(rq->mq_hctx);
> +	if (!blk_rq_is_passthrough(rq))
> +		blk_mq_tag_busy(rq->mq_hctx);
> 
>   	if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq-
> >internal_tag)) {
>   		bt = &rq->mq_hctx->tags->breserved_tags;
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index fcecbc4ee81b..2c9e9c79ca34 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -1360,25 +1360,6 @@ static int ufshcd_devfreq_target(struct device
> *dev,
>   	return ret;
>   }
> 
> -static bool ufshcd_is_busy(struct request *req, void *priv, bool
> reserved)
> -{
> -	int *busy = priv;
> -
> -	WARN_ON_ONCE(reserved);
> -	(*busy)++;
> -	return false;
> -}
> -
> -/* Whether or not any tag is in use by a request that is in
> progress. */
> -static bool ufshcd_any_tag_in_use(struct ufs_hba *hba)
> -{
> -	struct request_queue *q = hba->host->internal_queue;
> -	int busy = 0;
> -
> -	blk_mq_tagset_busy_iter(q->tag_set, ufshcd_is_busy, &busy);
> -	return busy;
> -}
> -
>   static int ufshcd_devfreq_get_dev_status(struct device *dev,
>   		struct devfreq_dev_status *stat)
>   {
> @@ -1778,7 +1759,7 @@ static void ufshcd_gate_work(struct work_struct
> *work)
> 
>   	if (hba->clk_gating.active_reqs
>   		|| hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL
> -		|| ufshcd_any_tag_in_use(hba) || hba->outstanding_tasks
> +		|| hba->outstanding_reqs || hba->outstanding_tasks
>   		|| hba->active_uic_cmd || hba->uic_async_done)
>   		goto rel_lock;
> 
> @@ -2996,12 +2977,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba
> *hba,
>   	req = scsi_cmd_to_rq(scmd);
>   	tag = req->tag;
>   	WARN_ONCE(tag < 0 || tag >= hba->nutrs, "Invalid tag %d\n",
> tag);
> -	/*
> -	 * Start the request such that blk_mq_tag_idle() is called when
> the
> -	 * device management request finishes.
> -	 */
> -	blk_mq_start_request(req);
> -
>   	lrbp = &hba->lrb[tag];
>   	WARN_ON(lrbp->cmd);
>   	err = ufshcd_compose_dev_cmd(hba, lrbp, cmd_type, tag);
> @@ -6792,12 +6767,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct
> ufs_hba *hba,
>   	req = scsi_cmd_to_rq(scmd);
>   	tag = req->tag;
>   	WARN_ONCE(tag < 0 || tag >= hba->nutrs, "Invalid tag %d\n",
> tag);
> -	/*
> -	 * Start the request such that blk_mq_tag_idle() is called when
> the
> -	 * device management request finishes.
> -	 */
> -	blk_mq_start_request(req);
> -
>   	lrbp = &hba->lrb[tag];
>   	WARN_ON(lrbp->cmd);
>   	lrbp->cmd = NULL;
> 
> Thanks,
> 
> Bart.
Bart Van Assche Nov. 29, 2021, 7:32 p.m. UTC | #7
On 11/24/21 3:02 AM, Adrian Hunter wrote:
> On 19/11/2021 21:57, Bart Van Assche wrote:
>> The only functional change in this patch is the addition of a
>> blk_mq_start_request() call in ufshcd_issue_devman_upiu_cmd().
>>
>> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
>> ---
>>   drivers/scsi/ufs/ufshcd.c | 46 +++++++++++++++++++++++++--------------
>>   1 file changed, 30 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>> index fced4528ee90..dfa5f127342b 100644
>> --- a/drivers/scsi/ufs/ufshcd.c
>> +++ b/drivers/scsi/ufs/ufshcd.c
>> @@ -2933,6 +2933,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
>>   {
>>   	struct request_queue *q = hba->cmd_queue;
>>   	DECLARE_COMPLETION_ONSTACK(wait);
>> +	struct scsi_cmnd *scmd;
>>   	struct request *req;
>>   	struct ufshcd_lrb *lrbp;
>>   	int err;
>> @@ -2945,15 +2946,18 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
>>   	 * Even though we use wait_event() which sleeps indefinitely,
>>   	 * the maximum wait time is bounded by SCSI request timeout.
>>   	 */
>> -	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
>> -	if (IS_ERR(req)) {
>> -		err = PTR_ERR(req);
>> +	scmd = scsi_get_internal_cmd(q, DMA_TO_DEVICE, 0);
> 
> We do not need the block layer, nor SCSI commands which begs the question,
> why involve them at all?
> 
> For example, the following is much simpler and seems to work:
> [ ... ]

That patch bypasses the block layer for device management commands. So that
patch breaks a very basic assumption on which the block layer has been built,
namely that the block layer core knows whether or not any requests are ongoing.
That patch breaks at least the following functionality:
* Run-time power management. blk_pre_runtime_suspend() checks whether
   q_usage_counter is zero before initiating runtime power management.
   q_usage_counter is increased by blk_mq_alloc_request() and decreased by
   blk_mq_free_request(). I don't think it is safe to change the power state
   while a device management request is in progress.
* The code in blk_cleanup_queue() that waits for pending requests to finish
   before resources associated with the request queue are freed.
   ufshcd_remove() calls blk_cleanup_queue(hba->cmd_queue) and hence waits until
   pending device management commands have finished. That would no longer be the
   case if the block layer is bypassed to submit device management commands.

Bart.
Adrian Hunter Nov. 30, 2021, 6:41 a.m. UTC | #8
On 29/11/2021 21:32, Bart Van Assche wrote:
> On 11/24/21 3:02 AM, Adrian Hunter wrote:
>> On 19/11/2021 21:57, Bart Van Assche wrote:
>>> The only functional change in this patch is the addition of a
>>> blk_mq_start_request() call in ufshcd_issue_devman_upiu_cmd().
>>>
>>> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
>>> ---
>>>   drivers/scsi/ufs/ufshcd.c | 46 +++++++++++++++++++++++++--------------
>>>   1 file changed, 30 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>>> index fced4528ee90..dfa5f127342b 100644
>>> --- a/drivers/scsi/ufs/ufshcd.c
>>> +++ b/drivers/scsi/ufs/ufshcd.c
>>> @@ -2933,6 +2933,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
>>>   {
>>>       struct request_queue *q = hba->cmd_queue;
>>>       DECLARE_COMPLETION_ONSTACK(wait);
>>> +    struct scsi_cmnd *scmd;
>>>       struct request *req;
>>>       struct ufshcd_lrb *lrbp;
>>>       int err;
>>> @@ -2945,15 +2946,18 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
>>>        * Even though we use wait_event() which sleeps indefinitely,
>>>        * the maximum wait time is bounded by SCSI request timeout.
>>>        */
>>> -    req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
>>> -    if (IS_ERR(req)) {
>>> -        err = PTR_ERR(req);
>>> +    scmd = scsi_get_internal_cmd(q, DMA_TO_DEVICE, 0);
>>
>> We do not need the block layer, nor SCSI commands which begs the question,
>> why involve them at all?
>>
>> For example, the following is much simpler and seems to work:
>> [ ... ]
> 
> That patch bypasses the block layer for device management commands. So that
> patch breaks a very basic assumption on which the block layer has been built,
> namely that the block layer core knows whether or not any requests are ongoing.
> That patch breaks at least the following functionality:
> * Run-time power management. blk_pre_runtime_suspend() checks whether
>   q_usage_counter is zero before initiating runtime power management.
>   q_usage_counter is increased by blk_mq_alloc_request() and decreased by
>   blk_mq_free_request(). I don't think it is safe to change the power state
>   while a device management request is in progress.

hba->cmd_queue does not have runtime PM.

I suspect making it do runtime PM might open up deadlock issues similar to
the issues that were seen with sending sending clear WLUN UA from the
UFS error handler.

> * The code in blk_cleanup_queue() that waits for pending requests to finish
>   before resources associated with the request queue are freed.
>   ufshcd_remove() calls blk_cleanup_queue(hba->cmd_queue) and hence waits until
>   pending device management commands have finished. That would no longer be the
>   case if the block layer is bypassed to submit device management commands.

cmd_queue is used only by the UFS driver, so if the driver is racing with
itself at "remove", then that should be fixed. The risk is not that the UFS
driver might use requests, but that it might still be operating when hba or
other resources get freed.

So the question remains, for device commands, we do not need the block
layer, nor SCSI commands which still begs the question, why involve them
at all?
Bart Van Assche Nov. 30, 2021, 5:51 p.m. UTC | #9
On 11/29/21 10:41 PM, Adrian Hunter wrote:
> On 29/11/2021 21:32, Bart Van Assche wrote:
>> * The code in blk_cleanup_queue() that waits for pending requests to finish
>>    before resources associated with the request queue are freed.
>>    ufshcd_remove() calls blk_cleanup_queue(hba->cmd_queue) and hence waits until
>>    pending device management commands have finished. That would no longer be the
>>    case if the block layer is bypassed to submit device management commands.
> 
> cmd_queue is used only by the UFS driver, so if the driver is racing with
> itself at "remove", then that should be fixed. The risk is not that the UFS
> driver might use requests, but that it might still be operating when hba or
> other resources get freed.
> 
> So the question remains, for device commands, we do not need the block
> layer, nor SCSI commands which still begs the question, why involve them
> at all?

By using the block layer request allocation functions the block layer guarantees
that each tag is in use in only one context. When bypassing the block layer code
would have to be inserted in ufshcd_exec_dev_cmd() and ufshcd_issue_devman_upiu_cmd()
to serialize these functions. In other words, we would be duplicating existing
functionality if we bypass the block layer. The recommended approach in the Linux
kernel is not to duplicate existing functionality.

Thanks,

Bart.
Adrian Hunter Nov. 30, 2021, 7:15 p.m. UTC | #10
On 30/11/2021 19:51, Bart Van Assche wrote:
> On 11/29/21 10:41 PM, Adrian Hunter wrote:
>> On 29/11/2021 21:32, Bart Van Assche wrote:
>>> * The code in blk_cleanup_queue() that waits for pending requests to finish
>>>    before resources associated with the request queue are freed.
>>>    ufshcd_remove() calls blk_cleanup_queue(hba->cmd_queue) and hence waits until
>>>    pending device management commands have finished. That would no longer be the
>>>    case if the block layer is bypassed to submit device management commands.
>>
>> cmd_queue is used only by the UFS driver, so if the driver is racing with
>> itself at "remove", then that should be fixed. The risk is not that the UFS
>> driver might use requests, but that it might still be operating when hba or
>> other resources get freed.
>>
>> So the question remains, for device commands, we do not need the block
>> layer, nor SCSI commands which still begs the question, why involve them
>> at all?
> 
> By using the block layer request allocation functions the block layer guarantees
> that each tag is in use in only one context. When bypassing the block layer code
> would have to be inserted in ufshcd_exec_dev_cmd() and ufshcd_issue_devman_upiu_cmd()
> to serialize these functions.

They already are serialized, but you are essentially saying the functionality
being duplicated is just a lock.  What you are proposing seems awfully
complicated just to get the functionality of a lock.

> In other words, we would be duplicating existing
> functionality if we bypass the block layer. The recommended approach in the Linux
> kernel is not to duplicate existing functionality.

More accurately, the functionality would not be being used at all, so not
really any duplication.
Bart Van Assche Nov. 30, 2021, 7:21 p.m. UTC | #11
On 11/30/21 11:15 AM, Adrian Hunter wrote:
> On 30/11/2021 19:51, Bart Van Assche wrote:
>> By using the block layer request allocation functions the block layer guarantees
>> that each tag is in use in only one context. When bypassing the block layer code
>> would have to be inserted in ufshcd_exec_dev_cmd() and ufshcd_issue_devman_upiu_cmd()
>> to serialize these functions.
> 
> They already are serialized, but you are essentially saying the functionality
> being duplicated is just a lock.  What you are proposing seems awfully
> complicated just to get the functionality of a lock.

Are you perhaps referring to hba->dev_cmd.lock? I had overlooked that lock
when I wrote my previous email. I will take a closer look at the reserved
slot approach.

Thanks,

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index fced4528ee90..dfa5f127342b 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2933,6 +2933,7 @@  static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 {
 	struct request_queue *q = hba->cmd_queue;
 	DECLARE_COMPLETION_ONSTACK(wait);
+	struct scsi_cmnd *scmd;
 	struct request *req;
 	struct ufshcd_lrb *lrbp;
 	int err;
@@ -2945,15 +2946,18 @@  static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	 * Even though we use wait_event() which sleeps indefinitely,
 	 * the maximum wait time is bounded by SCSI request timeout.
 	 */
-	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
+	scmd = scsi_get_internal_cmd(q, DMA_TO_DEVICE, 0);
+	if (IS_ERR(scmd)) {
+		err = PTR_ERR(scmd);
 		goto out_unlock;
 	}
+	req = scsi_cmd_to_rq(scmd);
 	tag = req->tag;
 	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
-	/* Set the timeout such that the SCSI error handler is not activated. */
-	req->timeout = msecs_to_jiffies(2 * timeout);
+	/*
+	 * Start the request such that blk_mq_tag_idle() is called when the
+	 * device management request finishes.
+	 */
 	blk_mq_start_request(req);
 
 	lrbp = &hba->lrb[tag];
@@ -2972,7 +2976,8 @@  static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 				    (struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
 
 out:
-	blk_mq_free_request(req);
+	scsi_put_internal_cmd(scmd);
+
 out_unlock:
 	up_read(&hba->clk_scaling_lock);
 	return err;
@@ -6573,17 +6578,16 @@  static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 	struct request_queue *q = hba->tmf_queue;
 	struct Scsi_Host *host = hba->host;
 	DECLARE_COMPLETION_ONSTACK(wait);
+	struct scsi_cmnd *scmd;
 	struct request *req;
 	unsigned long flags;
 	int task_tag, err;
 
-	/*
-	 * blk_mq_alloc_request() is used here only to get a free tag.
-	 */
-	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
+	scmd = scsi_get_internal_cmd(q, DMA_TO_DEVICE, 0);
+	if (IS_ERR(scmd))
+		return PTR_ERR(scmd);
 
+	req = scsi_cmd_to_rq(scmd);
 	req->end_io_data = &wait;
 	ufshcd_hold(hba, false);
 
@@ -6636,7 +6640,8 @@  static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
 	ufshcd_release(hba);
-	blk_mq_free_request(req);
+
+	scsi_put_internal_cmd(scmd);
 
 	return err;
 }
@@ -6714,6 +6719,7 @@  static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 {
 	struct request_queue *q = hba->cmd_queue;
 	DECLARE_COMPLETION_ONSTACK(wait);
+	struct scsi_cmnd *scmd;
 	struct request *req;
 	struct ufshcd_lrb *lrbp;
 	int err = 0;
@@ -6722,13 +6728,19 @@  static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 
 	down_read(&hba->clk_scaling_lock);
 
-	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
+	scmd = scsi_get_internal_cmd(q, DMA_TO_DEVICE, 0);
+	if (IS_ERR(scmd)) {
+		err = PTR_ERR(scmd);
 		goto out_unlock;
 	}
+	req = scsi_cmd_to_rq(scmd);
 	tag = req->tag;
 	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
+	/*
+	 * Start the request such that blk_mq_tag_idle() is called when the
+	 * device management request finishes.
+	 */
+	blk_mq_start_request(req);
 
 	lrbp = &hba->lrb[tag];
 	WARN_ON(lrbp->cmd);
@@ -6797,6 +6809,8 @@  static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 	ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP,
 				    (struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
 
+	scsi_put_internal_cmd(scmd);
+
 out_unlock:
 	up_read(&hba->clk_scaling_lock);
 	return err;