Message ID | 5a52b255001e994d0a65be9b1d61fe69f2a12f6c.1681764704.git.quic_nguyenb@quicinc.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | ufs: core: mcq: Add ufshcd_abort() and error handler support in MCQ mode | expand |
On 4/17/23 14:05, Bao D. Nguyen wrote: > + /* MCQ mode */ > + if (is_mcq_enabled(hba)) > + return ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag); The above code will trigger an overflow if lrbp->task_tag >= 8 * sizeof(unsigned long). That's not acceptable. > static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) > { > + struct ufshcd_lrb *lrbp; > + u32 hwq_num, utag; > + int tag; > + > /* Resetting interrupt aggregation counters first and reading the > * DOOR_BELL afterward allows us to handle all the completed requests. > * In order to prevent other interrupts starvation the DB is read once > @@ -5580,7 +5590,22 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) > * Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we > * do not want polling to trigger spurious interrupt complaints. > */ > - ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT); > + if (!is_mcq_enabled(hba)) { > + ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT); > + goto out; > + } > + > + /* MCQ mode */ > + for (tag = 0; tag < hba->nutrs; tag++) { > + lrbp = &hba->lrb[tag]; > + if (lrbp->cmd) { > + utag = blk_mq_unique_tag(scsi_cmd_to_rq(lrbp->cmd)); > + hwq_num = blk_mq_unique_tag_to_hwq(utag); > + ufshcd_poll(hba->host, hwq_num); > + } > + } Is my understanding correct that ufshcd_transfer_req_compl() is only called from single doorbell code paths and hence that the above change is not necessary? > + if (is_mcq_enabled(hba)) { > + struct ufshcd_lrb *lrbp; > + int tag; > + > + for (tag = 0; tag < hba->nutrs; tag++) { > + lrbp = &hba->lrb[tag]; > + if (lrbp->cmd) { > + ret = ufshcd_try_to_abort_task(hba, tag); > + dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag, > + hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1, > + ret ? "failed" : "succeeded"); > + } > + if (ret) { > + needs_reset = true; > + goto out; > + } > + } > + } else { > + /* Clear pending transfer requests */ > + for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) { > + ret = ufshcd_try_to_abort_task(hba, tag); > + dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag, > + hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1, > + ret ? "failed" : "succeeded"); > + if (ret) { > + needs_reset = true; > + goto out; > + } > } > } Please introduce helper functions for the MCQ and SDB code paths such that the nesting level of the above code is reduced. Thanks, Bart.
On 4/25/2023 5:21 PM, Bart Van Assche wrote: > On 4/17/23 14:05, Bao D. Nguyen wrote: >> + /* MCQ mode */ >> + if (is_mcq_enabled(hba)) >> + return ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag); > > The above code will trigger an overflow if lrbp->task_tag >= 8 * > sizeof(unsigned long). That's not acceptable. This ufshcd_clear_cmds() uses a bit map. There are multiple places in the UFS code have this limitation if the queue depth grows to be greater than 64. I am thinking: 1. Current ufs controllers in the market probably support queue depth 64 or less, so it may not be a problem today if host controller cap is set to 64 queue depth, but can be a problem in multiple places in the code later. 2. In mcq mode, we can pass a tag number into this api ufshcd_clear_cmds(); while in SDB mode, pass the tag's bit mask as before. 3. Use sbitmask() to support large queue depth? Thanks for any suggestions. > >> static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) >> { >> + struct ufshcd_lrb *lrbp; >> + u32 hwq_num, utag; >> + int tag; >> + >> /* Resetting interrupt aggregation counters first and reading the >> * DOOR_BELL afterward allows us to handle all the completed >> requests. >> * In order to prevent other interrupts starvation the DB is >> read once >> @@ -5580,7 +5590,22 @@ static irqreturn_t >> ufshcd_transfer_req_compl(struct ufs_hba *hba) >> * Ignore the ufshcd_poll() return value and return IRQ_HANDLED >> since we >> * do not want polling to trigger spurious interrupt complaints. >> */ >> - ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT); >> + if (!is_mcq_enabled(hba)) { >> + ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT); >> + goto out; >> + } >> + >> + /* MCQ mode */ >> + for (tag = 0; tag < hba->nutrs; tag++) { >> + lrbp = &hba->lrb[tag]; >> + if (lrbp->cmd) { >> + utag = blk_mq_unique_tag(scsi_cmd_to_rq(lrbp->cmd)); >> + hwq_num = blk_mq_unique_tag_to_hwq(utag); >> + ufshcd_poll(hba->host, hwq_num); >> + } >> + } > > Is my understanding correct that ufshcd_transfer_req_compl() is only > called from single doorbell code paths and hence that the above change > is not necessary? ufshcd_transfer_req_compl() can be invoked from MCQ mode such as the ufshcd_err_handler() as below: ufshcd_err_handler()-->ufshcd_complete_requests()-->ufshcd_transfer_req_compl() > > >> + if (is_mcq_enabled(hba)) { >> + struct ufshcd_lrb *lrbp; >> + int tag; >> + >> + for (tag = 0; tag < hba->nutrs; tag++) { >> + lrbp = &hba->lrb[tag]; >> + if (lrbp->cmd) { >> + ret = ufshcd_try_to_abort_task(hba, tag); >> + dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", >> tag, >> + hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1, >> + ret ? "failed" : "succeeded"); >> + } >> + if (ret) { >> + needs_reset = true; >> + goto out; >> + } >> + } >> + } else { >> + /* Clear pending transfer requests */ >> + for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) { >> + ret = ufshcd_try_to_abort_task(hba, tag); >> + dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag, >> + hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1, >> + ret ? "failed" : "succeeded"); >> + if (ret) { >> + needs_reset = true; >> + goto out; >> + } >> } >> } > > Please introduce helper functions for the MCQ and SDB code paths such > that the nesting level of the above code is reduced. Sure. I will change. > > Thanks, > > Bart.
On 5/3/23 21:18, Bao D. Nguyen wrote: > On 4/25/2023 5:21 PM, Bart Van Assche wrote: >> On 4/17/23 14:05, Bao D. Nguyen wrote: >>> + /* MCQ mode */ >>> + if (is_mcq_enabled(hba)) >>> + return ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag); >> >> The above code will trigger an overflow if lrbp->task_tag >= 8 * sizeof(unsigned long). That's not acceptable. > This ufshcd_clear_cmds() uses a bit map. There are multiple places in the UFS code have this limitation if the queue depth grows to be greater than 64. I am thinking: > 1. Current ufs controllers in the market probably support queue depth 64 or less, so it may not be a problem today if host controller cap is set to 64 queue depth, but can be a problem in multiple places in the code later. > 2. In mcq mode, we can pass a tag number into this api ufshcd_clear_cmds(); while in SDB mode, pass the tag's bit mask as before. > 3. Use sbitmask() to support large queue depth? Thanks for any suggestions. The UFS driver is the only block driver I know that tracks which commands are pending in a bitmap. Please pass the lrbp pointer or the task_tag directly to ufshcd_clear_cmds() instead of passing a bitmap to that function. Please also introduce a loop in ufshcd_eh_device_reset_handler() around the ufshcd_clear_cmds() call instead of passing a bitmap to ufshcd_clear_cmds(). >>> static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) >>> { >>> + struct ufshcd_lrb *lrbp; >>> + u32 hwq_num, utag; >>> + int tag; >>> + >>> /* Resetting interrupt aggregation counters first and reading the >>> * DOOR_BELL afterward allows us to handle all the completed requests. >>> * In order to prevent other interrupts starvation the DB is read once >>> @@ -5580,7 +5590,22 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) >>> * Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we >>> * do not want polling to trigger spurious interrupt complaints. >>> */ >>> - ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT); >>> + if (!is_mcq_enabled(hba)) { >>> + ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT); >>> + goto out; >>> + } >>> + >>> + /* MCQ mode */ >>> + for (tag = 0; tag < hba->nutrs; tag++) { >>> + lrbp = &hba->lrb[tag]; >>> + if (lrbp->cmd) { >>> + utag = blk_mq_unique_tag(scsi_cmd_to_rq(lrbp->cmd)); >>> + hwq_num = blk_mq_unique_tag_to_hwq(utag); >>> + ufshcd_poll(hba->host, hwq_num); >>> + } >>> + } >> >> Is my understanding correct that ufshcd_transfer_req_compl() is only called from single doorbell code paths and hence that the above change is not necessary? > ufshcd_transfer_req_compl() can be invoked from MCQ mode such as the ufshcd_err_handler() as below: > ufshcd_err_handler()-->ufshcd_complete_requests()-->ufshcd_transfer_req_compl() Since there are multiple statements in ufshcd_transfer_req_compl() that assume SDB mode (resetting SDB interrupt aggregation and calling ufshcd_poll()), please move the is_mcq_enabled() test from ufshcd_transfer_req_compl() into the callers of that function. Thanks, Bart.
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index fef1907..e947f7f 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -3127,6 +3127,12 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, err = -ETIMEDOUT; dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n", __func__, lrbp->task_tag); + + /* MCQ mode */ + if (is_mcq_enabled(hba)) + return ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag); + + /* SDB mode */ if (ufshcd_clear_cmds(hba, 1UL << lrbp->task_tag) == 0) { /* successfully cleared the command, retry if needed */ err = -EAGAIN; @@ -5562,6 +5568,10 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num) */ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) { + struct ufshcd_lrb *lrbp; + u32 hwq_num, utag; + int tag; + /* Resetting interrupt aggregation counters first and reading the * DOOR_BELL afterward allows us to handle all the completed requests. * In order to prevent other interrupts starvation the DB is read once @@ -5580,7 +5590,22 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) * Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we * do not want polling to trigger spurious interrupt complaints. */ - ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT); + if (!is_mcq_enabled(hba)) { + ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT); + goto out; + } + + /* MCQ mode */ + for (tag = 0; tag < hba->nutrs; tag++) { + lrbp = &hba->lrb[tag]; + if (lrbp->cmd) { + utag = blk_mq_unique_tag(scsi_cmd_to_rq(lrbp->cmd)); + hwq_num = blk_mq_unique_tag_to_hwq(utag); + ufshcd_poll(hba->host, hwq_num); + } + } + +out: return IRQ_HANDLED; } @@ -6359,18 +6384,36 @@ static bool ufshcd_abort_all(struct ufs_hba *hba) bool needs_reset = false; int tag, ret; - /* Clear pending transfer requests */ - for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) { - ret = ufshcd_try_to_abort_task(hba, tag); - dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag, - hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1, - ret ? "failed" : "succeeded"); - if (ret) { - needs_reset = true; - goto out; + if (is_mcq_enabled(hba)) { + struct ufshcd_lrb *lrbp; + int tag; + + for (tag = 0; tag < hba->nutrs; tag++) { + lrbp = &hba->lrb[tag]; + if (lrbp->cmd) { + ret = ufshcd_try_to_abort_task(hba, tag); + dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag, + hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1, + ret ? "failed" : "succeeded"); + } + if (ret) { + needs_reset = true; + goto out; + } + } + } else { + /* Clear pending transfer requests */ + for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) { + ret = ufshcd_try_to_abort_task(hba, tag); + dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag, + hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1, + ret ? "failed" : "succeeded"); + if (ret) { + needs_reset = true; + goto out; + } } } - /* Clear pending task management requests */ for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) { if (ufshcd_clear_tm_cmd(hba, tag)) { @@ -7302,6 +7345,8 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) unsigned long flags, pending_reqs = 0, not_cleared = 0; struct Scsi_Host *host; struct ufs_hba *hba; + struct ufs_hw_queue *hwq; + struct ufshcd_lrb *lrbp; u32 pos; int err; u8 resp = 0xF, lun; @@ -7317,6 +7362,19 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) goto out; } + if (is_mcq_enabled(hba)) { + for (pos = 0; pos < hba->nutrs; pos++) { + lrbp = &hba->lrb[pos]; + if (lrbp->cmd && lrbp->lun == lun) { + ufshcd_clear_cmds(hba, 1UL << pos); + hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(lrbp->cmd)); + ufshcd_mcq_poll_cqe_lock(hba, hwq); + } + } + err = 0; + goto out; + } + /* clear the commands that were pending for corresponding LUN */ spin_lock_irqsave(&hba->outstanding_lock, flags); for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs)
Add support for error handling for MCQ mode. Signed-off-by: Bao D. Nguyen <quic_nguyenb@quicinc.com> --- drivers/ufs/core/ufshcd.c | 80 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 11 deletions(-)