Message ID | 20191112173743.141503-5-bvanassche@acm.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Simplify and optimize the UFS driver | expand |
On 2019-11-13 01:37, Bart Van Assche wrote: > Scaling the clock is only safe while no commands are in progress. Use > blk_mq_{un,}freeze_queue() to block submission of new commands and to > wait for ongoing commands to complete. Remove "_scsi" from the name of > the functions that block and unblock requests since these functions > now block both SCSI commands and non-SCSI commands. > > Cc: Bean Huo <beanhuo@micron.com> > Cc: Avri Altman <avri.altman@wdc.com> > Cc: Stanley Chu <stanley.chu@mediatek.com> > Cc: Tomas Winkler <tomas.winkler@intel.com> > Signed-off-by: Bart Van Assche <bvanassche@acm.org> > --- > drivers/scsi/ufs/ufshcd.c | 141 +++++++++++++------------------------- > drivers/scsi/ufs/ufshcd.h | 3 - > 2 files changed, 46 insertions(+), 98 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > index 810b997f55fc..717ba24a2d40 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -290,16 +290,50 @@ static inline void ufshcd_disable_irq(struct > ufs_hba *hba) > } > } > > -static void ufshcd_scsi_unblock_requests(struct ufs_hba *hba) > +static void ufshcd_unblock_requests(struct ufs_hba *hba) > { > - if (atomic_dec_and_test(&hba->scsi_block_reqs_cnt)) > - scsi_unblock_requests(hba->host); > + struct scsi_device *sdev; > + > + blk_mq_unfreeze_queue(hba->tmf_queue); > + blk_mq_unfreeze_queue(hba->cmd_queue); > + shost_for_each_device(sdev, hba->host) > + blk_mq_unfreeze_queue(sdev->request_queue); > } > > -static void ufshcd_scsi_block_requests(struct ufs_hba *hba) > +static int ufshcd_block_requests(struct ufs_hba *hba, unsigned long > timeout) > { > - if (atomic_inc_return(&hba->scsi_block_reqs_cnt) == 1) > - scsi_block_requests(hba->host); > + struct scsi_device *sdev; > + unsigned long deadline = jiffies + timeout; > + > + if (timeout == ULONG_MAX) { > + shost_for_each_device(sdev, hba->host) > + blk_mq_freeze_queue(sdev->request_queue); > + blk_mq_freeze_queue(hba->cmd_queue); > + blk_mq_freeze_queue(hba->tmf_queue); > + return 0; > + } > + > + shost_for_each_device(sdev, hba->host) > + blk_freeze_queue_start(sdev->request_queue); > + blk_freeze_queue_start(hba->cmd_queue); > + blk_freeze_queue_start(hba->tmf_queue); > + shost_for_each_device(sdev, hba->host) { > + if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue, > + max_t(long, 0, deadline - jiffies)) <= 0) { > + goto err; > + } > + } > + if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue, > + max_t(long, 0, deadline - jiffies)) <= 0) > + goto err; > + if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue, > + max_t(long, 0, deadline - jiffies)) <= 0) > + goto err; > + return 0; > + > +err: > + ufshcd_unblock_requests(hba); > + return -ETIMEDOUT; > } > > static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned > int tag, > @@ -971,65 +1005,6 @@ static bool > ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba, > return false; > } > > -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba, > - u64 wait_timeout_us) > -{ > - unsigned long flags; > - int ret = 0; > - u32 tm_doorbell; > - u32 tr_doorbell; > - bool timeout = false, do_last_check = false; > - ktime_t start; > - > - ufshcd_hold(hba, false); > - spin_lock_irqsave(hba->host->host_lock, flags); > - /* > - * Wait for all the outstanding tasks/transfer requests. > - * Verify by checking the doorbell registers are clear. > - */ > - start = ktime_get(); > - do { > - if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) { > - ret = -EBUSY; > - goto out; > - } > - > - tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL); > - tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); > - if (!tm_doorbell && !tr_doorbell) { > - timeout = false; > - break; > - } else if (do_last_check) { > - break; > - } > - > - spin_unlock_irqrestore(hba->host->host_lock, flags); > - schedule(); > - if (ktime_to_us(ktime_sub(ktime_get(), start)) > > - wait_timeout_us) { > - timeout = true; > - /* > - * We might have scheduled out for long time so make > - * sure to check if doorbells are cleared by this time > - * or not. > - */ > - do_last_check = true; > - } > - spin_lock_irqsave(hba->host->host_lock, flags); > - } while (tm_doorbell || tr_doorbell); > - > - if (timeout) { > - dev_err(hba->dev, > - "%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n", > - __func__, tm_doorbell, tr_doorbell); > - ret = -EBUSY; > - } > -out: > - spin_unlock_irqrestore(hba->host->host_lock, flags); > - ufshcd_release(hba); > - return ret; > -} > - > /** > * ufshcd_scale_gear - scale up/down UFS gear > * @hba: per adapter instance > @@ -1079,27 +1054,16 @@ static int ufshcd_scale_gear(struct ufs_hba > *hba, bool scale_up) > > static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba) > { > - #define DOORBELL_CLR_TOUT_US (1000 * 1000) /* 1 sec */ > - int ret = 0; > /* > * make sure that there are no outstanding requests when > * clock scaling is in progress > */ > - ufshcd_scsi_block_requests(hba); > - down_write(&hba->clk_scaling_lock); > - if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) { > - ret = -EBUSY; > - up_write(&hba->clk_scaling_lock); > - ufshcd_scsi_unblock_requests(hba); > - } > - > - return ret; > + return ufshcd_block_requests(hba, HZ); > } > > static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba) > { > - up_write(&hba->clk_scaling_lock); > - ufshcd_scsi_unblock_requests(hba); > + ufshcd_unblock_requests(hba); > } > > /** > @@ -1471,7 +1435,7 @@ static void ufshcd_ungate_work(struct work_struct > *work) > hba->clk_gating.is_suspended = false; > } > unblock_reqs: > - ufshcd_scsi_unblock_requests(hba); > + ufshcd_unblock_requests(hba); > } > > /** > @@ -1528,7 +1492,7 @@ int ufshcd_hold(struct ufs_hba *hba, bool async) > */ > /* fallthrough */ > case CLKS_OFF: > - ufshcd_scsi_block_requests(hba); > + ufshcd_block_requests(hba, ULONG_MAX); Hi Bart, ufshcd_hold(async == true) is used in ufshcd_queuecommand() path because ufshcd_queuecommand() can be entered under atomic contexts. Thus ufshcd_block_requests() here has the same risk causing scheduling while atomic. FYI, it is not easy to hit above scenario in small scale of test. Best Regards, Can Guo. > hba->clk_gating.state = REQ_CLKS_ON; > trace_ufshcd_clk_gating(dev_name(hba->dev), > hba->clk_gating.state); > @@ -2395,9 +2359,6 @@ static int ufshcd_queuecommand(struct Scsi_Host > *host, struct scsi_cmnd *cmd) > BUG(); > } > > - if (!down_read_trylock(&hba->clk_scaling_lock)) > - return SCSI_MLQUEUE_HOST_BUSY; > - > spin_lock_irqsave(hba->host->host_lock, flags); > switch (hba->ufshcd_state) { > case UFSHCD_STATE_OPERATIONAL: > @@ -2463,7 +2424,6 @@ static int ufshcd_queuecommand(struct Scsi_Host > *host, struct scsi_cmnd *cmd) > out_unlock: > spin_unlock_irqrestore(hba->host->host_lock, flags); > out: > - up_read(&hba->clk_scaling_lock); > return err; > } > > @@ -2617,8 +2577,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba > *hba, > struct completion wait; > unsigned long flags; > > - down_read(&hba->clk_scaling_lock); > - > /* > * Get free slot, sleep if slots are unavailable. > * Even though we use wait_event() which sleeps indefinitely, > @@ -2654,7 +2612,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba > *hba, > > out_put_tag: > blk_put_request(req); > - up_read(&hba->clk_scaling_lock); > return err; > } > > @@ -5342,7 +5299,7 @@ static void ufshcd_err_handler(struct work_struct > *work) > spin_unlock_irqrestore(hba->host->host_lock, flags); > if (starget) > scsi_target_unblock(&starget->dev, SDEV_RUNNING); > - ufshcd_scsi_unblock_requests(hba); > + scsi_unblock_requests(hba->host); > ufshcd_release(hba); > pm_runtime_put_sync(hba->dev); > } > @@ -5466,7 +5423,7 @@ static void ufshcd_check_errors(struct ufs_hba > *hba) > /* handle fatal errors only when link is functional */ > if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) { > /* block commands from scsi mid-layer */ > - ufshcd_scsi_block_requests(hba); > + scsi_block_requests(hba->host); > > hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED; > > @@ -5772,8 +5729,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct > ufs_hba *hba, > unsigned long flags; > u32 upiu_flags; > > - down_read(&hba->clk_scaling_lock); > - > req = blk_get_request(q, REQ_OP_DRV_OUT, 0); > if (IS_ERR(req)) > return PTR_ERR(req); > @@ -5853,7 +5808,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct > ufs_hba *hba, > } > > blk_put_request(req); > - up_read(&hba->clk_scaling_lock); > return err; > } > > @@ -8322,8 +8276,6 @@ int ufshcd_init(struct ufs_hba *hba, void > __iomem *mmio_base, unsigned int irq) > /* Initialize mutex for device management commands */ > mutex_init(&hba->dev_cmd.lock); > > - init_rwsem(&hba->clk_scaling_lock); > - > ufshcd_init_clk_gating(hba); > > ufshcd_init_clk_scaling(hba); > @@ -8410,7 +8362,6 @@ int ufshcd_init(struct ufs_hba *hba, void > __iomem *mmio_base, unsigned int irq) > > /* Hold auto suspend until async scan completes */ > pm_runtime_get_sync(dev); > - atomic_set(&hba->scsi_block_reqs_cnt, 0); > /* > * We are assuming that device wasn't put in sleep/power-down > * state exclusively during the boot stage before kernel. > diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h > index 5865e16f53a6..c44bfcdb26a3 100644 > --- a/drivers/scsi/ufs/ufshcd.h > +++ b/drivers/scsi/ufs/ufshcd.h > @@ -520,7 +520,6 @@ struct ufs_stats { > * @urgent_bkops_lvl: keeps track of urgent bkops level for device > * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level > for > * device is known or not. > - * @scsi_block_reqs_cnt: reference counting for scsi block requests > */ > struct ufs_hba { > void __iomem *mmio_base; > @@ -724,9 +723,7 @@ struct ufs_hba { > enum bkops_status urgent_bkops_lvl; > bool is_urgent_bkops_lvl_checked; > > - struct rw_semaphore clk_scaling_lock; > struct ufs_desc_size desc_size; > - atomic_t scsi_block_reqs_cnt; > > struct device bsg_dev; > struct request_queue *bsg_queue;
On 11/12/19 4:11 PM, cang@codeaurora.org wrote: > On 2019-11-13 01:37, Bart Van Assche wrote: >> @@ -1528,7 +1492,7 @@ int ufshcd_hold(struct ufs_hba *hba, bool async) >> */ >> /* fallthrough */ >> case CLKS_OFF: >> - ufshcd_scsi_block_requests(hba); >> + ufshcd_block_requests(hba, ULONG_MAX); > > ufshcd_hold(async == true) is used in ufshcd_queuecommand() path because > ufshcd_queuecommand() can be entered under atomic contexts. > Thus ufshcd_block_requests() here has the same risk causing scheduling > while atomic. > FYI, it is not easy to hit above scenario in small scale of test. Hi Bean, How about replacing patch 4/4 with the attached patch? Thanks, Bart.
Hi, Bart I think, you are asking for comment from Can. As for me, attached patch is better. Removing ufshcd_wait_for_doorbell_clr(), instead of reading doorbell register, Now using block layer blk_mq_{un,}freeze_queue(), looks good. I tested your V5 patches, didn't find problem yet. Since my available platform doesn't support dynamic clk scaling, I think, now seems only Qcom UFS controllers support this feature. So we need Can Guo to finally confirm it. Thanks, //Bean > -----Original Message----- > From: Bart Van Assche <bvanassche@acm.org> > Sent: Wednesday, November 13, 2019 1:56 AM > To: cang@codeaurora.org > Cc: Martin K . Petersen <martin.petersen@oracle.com>; James E . J . Bottomley > <jejb@linux.vnet.ibm.com>; Bean Huo (beanhuo) <beanhuo@micron.com>; Avri > Altman <avri.altman@wdc.com>; Asutosh Das <asutoshd@codeaurora.org>; > Vignesh Raghavendra <vigneshr@ti.com>; linux-scsi@vger.kernel.org; Stanley > Chu <stanley.chu@mediatek.com>; Tomas Winkler <tomas.winkler@intel.com> > Subject: [EXT] Re: [PATCH v5 4/4] ufs: Simplify the clock scaling mechanism > implementation > > On 11/12/19 4:11 PM, cang@codeaurora.org wrote: > > On 2019-11-13 01:37, Bart Van Assche wrote: > >> @@ -1528,7 +1492,7 @@ int ufshcd_hold(struct ufs_hba *hba, bool > >> async) > >> */ > >> /* fallthrough */ > >> case CLKS_OFF: > >> - ufshcd_scsi_block_requests(hba); > >> + ufshcd_block_requests(hba, ULONG_MAX); > > > > ufshcd_hold(async == true) is used in ufshcd_queuecommand() path > > because > > ufshcd_queuecommand() can be entered under atomic contexts. > > Thus ufshcd_block_requests() here has the same risk causing scheduling > > while atomic. > > FYI, it is not easy to hit above scenario in small scale of test. > > Hi Bean, > > How about replacing patch 4/4 with the attached patch? > > Thanks, > > Bart.
On 11/13/19 8:03 AM, Bean Huo (beanhuo) wrote: > I think, you are asking for comment from Can. As for me, attached patch is better. > Removing ufshcd_wait_for_doorbell_clr(), instead of reading doorbell register, Now > using block layer blk_mq_{un,}freeze_queue(), looks good. I tested your V5 patches, > didn't find problem yet. > > Since my available platform doesn't support dynamic clk scaling, I think, now seems only > Qcom UFS controllers support this feature. So we need Can Guo to finally confirm it. Hi Can, Do you agree with this patch series if patch 4 is replaced by the patch attached to my previous e-mail? The entire (revised) series is available at https://github.com/bvanassche/linux/tree/ufs-for-next. Thanks, Bart.
On 2019-11-15 00:11, Bart Van Assche wrote: > On 11/13/19 8:03 AM, Bean Huo (beanhuo) wrote: >> I think, you are asking for comment from Can. As for me, attached >> patch is better. >> Removing ufshcd_wait_for_doorbell_clr(), instead of reading doorbell >> register, Now >> using block layer blk_mq_{un,}freeze_queue(), looks good. I tested >> your V5 patches, >> didn't find problem yet. >> >> Since my available platform doesn't support dynamic clk scaling, I >> think, now seems only >> Qcom UFS controllers support this feature. So we need Can Guo to >> finally confirm it. > > Hi Can, > > Do you agree with this patch series if patch 4 is replaced by the > patch attached to my previous e-mail? The entire (revised) series is > available at https://github.com/bvanassche/linux/tree/ufs-for-next. > > Thanks, > > Bart. Hi Bart, After ufshcd_clock_scaling_prepare() returns(no error), all request queues are frozen. If failure happens(say power mode change command fails) after this point and error handler kicks off, we need to send dev commands(in ufshcd_probe_hba()) to bring UFS back to functionality. However, as the hba->cmd_queue is frozen, dev commands cannot be sent, the error handler shall fail. Thanks, Can Guo.
On 11/14/19 10:01 PM, Can Guo wrote: > After ufshcd_clock_scaling_prepare() returns(no error), all request queues are frozen. If failure > happens(say power mode change command fails) after this point and error handler kicks off, > we need to send dev commands(in ufshcd_probe_hba()) to bring UFS back to functionality. > However, as the hba->cmd_queue is frozen, dev commands cannot be sent, the error handler shall fail. Hi Can, I think that's easy to address. How about replacing patch 4/4 with the patch below? Thanks, Bart. Subject: [PATCH] ufs: Simplify the clock scaling mechanism implementation Scaling the clock is only safe while no commands are in progress. Use blk_mq_{un,}freeze_queue() to block submission of new commands and to wait for ongoing commands to complete. This patch removes a semaphore down and up operation pair from the hot path. Cc: Bean Huo <beanhuo@micron.com> Cc: Avri Altman <avri.altman@wdc.com> Cc: Stanley Chu <stanley.chu@mediatek.com> Cc: Tomas Winkler <tomas.winkler@intel.com> Signed-off-by: Bart Van Assche <bvanassche@acm.org> --- drivers/scsi/ufs/ufshcd.c | 115 +++++++++++--------------------------- drivers/scsi/ufs/ufshcd.h | 1 - 2 files changed, 33 insertions(+), 83 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 5314e8bfeeb6..343f9b746b70 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -971,65 +971,6 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba, return false; } -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba, - u64 wait_timeout_us) -{ - unsigned long flags; - int ret = 0; - u32 tm_doorbell; - u32 tr_doorbell; - bool timeout = false, do_last_check = false; - ktime_t start; - - ufshcd_hold(hba, false); - spin_lock_irqsave(hba->host->host_lock, flags); - /* - * Wait for all the outstanding tasks/transfer requests. - * Verify by checking the doorbell registers are clear. - */ - start = ktime_get(); - do { - if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) { - ret = -EBUSY; - goto out; - } - - tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL); - tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); - if (!tm_doorbell && !tr_doorbell) { - timeout = false; - break; - } else if (do_last_check) { - break; - } - - spin_unlock_irqrestore(hba->host->host_lock, flags); - schedule(); - if (ktime_to_us(ktime_sub(ktime_get(), start)) > - wait_timeout_us) { - timeout = true; - /* - * We might have scheduled out for long time so make - * sure to check if doorbells are cleared by this time - * or not. - */ - do_last_check = true; - } - spin_lock_irqsave(hba->host->host_lock, flags); - } while (tm_doorbell || tr_doorbell); - - if (timeout) { - dev_err(hba->dev, - "%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n", - __func__, tm_doorbell, tr_doorbell); - ret = -EBUSY; - } -out: - spin_unlock_irqrestore(hba->host->host_lock, flags); - ufshcd_release(hba); - return ret; -} - /** * ufshcd_scale_gear - scale up/down UFS gear * @hba: per adapter instance @@ -1079,27 +1020,49 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up) static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba) { - #define DOORBELL_CLR_TOUT_US (1000 * 1000) /* 1 sec */ - int ret = 0; + unsigned long deadline = jiffies + HZ; + struct scsi_device *sdev; + int res = 0; + /* * make sure that there are no outstanding requests when * clock scaling is in progress */ - ufshcd_scsi_block_requests(hba); - down_write(&hba->clk_scaling_lock); - if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) { - ret = -EBUSY; - up_write(&hba->clk_scaling_lock); - ufshcd_scsi_unblock_requests(hba); + shost_for_each_device(sdev, hba->host) + blk_freeze_queue_start(sdev->request_queue); + blk_freeze_queue_start(hba->cmd_queue); + blk_freeze_queue_start(hba->tmf_queue); + shost_for_each_device(sdev, hba->host) { + if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue, + max_t(long, 0, deadline - jiffies)) <= 0) { + goto err; + } } + if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue, + max_t(long, 0, deadline - jiffies)) <= 0) + goto err; + if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue, + max_t(long, 0, deadline - jiffies)) <= 0) + goto err; - return ret; +out: + blk_mq_unfreeze_queue(hba->tmf_queue); + blk_mq_unfreeze_queue(hba->cmd_queue); + return res; + +err: + shost_for_each_device(sdev, hba->host) + blk_mq_unfreeze_queue(sdev->request_queue); + res = -ETIMEDOUT; + goto out; } static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba) { - up_write(&hba->clk_scaling_lock); - ufshcd_scsi_unblock_requests(hba); + struct scsi_device *sdev; + + shost_for_each_device(sdev, hba->host) + blk_mq_unfreeze_queue(sdev->request_queue); } /** @@ -2394,9 +2357,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) BUG(); } - if (!down_read_trylock(&hba->clk_scaling_lock)) - return SCSI_MLQUEUE_HOST_BUSY; - spin_lock_irqsave(hba->host->host_lock, flags); switch (hba->ufshcd_state) { case UFSHCD_STATE_OPERATIONAL: @@ -2462,7 +2422,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) out_unlock: spin_unlock_irqrestore(hba->host->host_lock, flags); out: - up_read(&hba->clk_scaling_lock); return err; } @@ -2616,8 +2575,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, struct completion wait; unsigned long flags; - down_read(&hba->clk_scaling_lock); - /* * Get free slot, sleep if slots are unavailable. * Even though we use wait_event() which sleeps indefinitely, @@ -2653,7 +2610,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, out_put_tag: blk_put_request(req); - up_read(&hba->clk_scaling_lock); return err; } @@ -5771,8 +5727,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, unsigned long flags; u32 upiu_flags; - down_read(&hba->clk_scaling_lock); - req = blk_get_request(q, REQ_OP_DRV_OUT, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -5854,7 +5808,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, } blk_put_request(req); - up_read(&hba->clk_scaling_lock); return err; } @@ -8323,8 +8276,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) /* Initialize mutex for device management commands */ mutex_init(&hba->dev_cmd.lock); - init_rwsem(&hba->clk_scaling_lock); - ufshcd_init_clk_gating(hba); ufshcd_init_clk_scaling(hba); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 5865e16f53a6..5ebb920ae874 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -724,7 +724,6 @@ struct ufs_hba { enum bkops_status urgent_bkops_lvl; bool is_urgent_bkops_lvl_checked; - struct rw_semaphore clk_scaling_lock; struct ufs_desc_size desc_size; atomic_t scsi_block_reqs_cnt;
On 2019-11-16 00:23, Bart Van Assche wrote: > On 11/14/19 10:01 PM, Can Guo wrote: >> After ufshcd_clock_scaling_prepare() returns(no error), all request >> queues are frozen. If failure >> happens(say power mode change command fails) after this point and >> error handler kicks off, >> we need to send dev commands(in ufshcd_probe_hba()) to bring UFS back >> to functionality. >> However, as the hba->cmd_queue is frozen, dev commands cannot be sent, >> the error handler shall fail. > > Hi Can, > > I think that's easy to address. How about replacing patch 4/4 with the > patch below? > > Thanks, > > Bart. > > > Subject: [PATCH] ufs: Simplify the clock scaling mechanism > implementation > > Scaling the clock is only safe while no commands are in progress. Use > blk_mq_{un,}freeze_queue() to block submission of new commands and to > wait for ongoing commands to complete. This patch removes a semaphore > down and up operation pair from the hot path. > > Cc: Bean Huo <beanhuo@micron.com> > Cc: Avri Altman <avri.altman@wdc.com> > Cc: Stanley Chu <stanley.chu@mediatek.com> > Cc: Tomas Winkler <tomas.winkler@intel.com> > Signed-off-by: Bart Van Assche <bvanassche@acm.org> > --- > drivers/scsi/ufs/ufshcd.c | 115 +++++++++++--------------------------- > drivers/scsi/ufs/ufshcd.h | 1 - > 2 files changed, 33 insertions(+), 83 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > index 5314e8bfeeb6..343f9b746b70 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -971,65 +971,6 @@ static bool > ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba, > return false; > } > > -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba, > - u64 wait_timeout_us) > -{ > - unsigned long flags; > - int ret = 0; > - u32 tm_doorbell; > - u32 tr_doorbell; > - bool timeout = false, do_last_check = false; > - ktime_t start; > - > - ufshcd_hold(hba, false); > - spin_lock_irqsave(hba->host->host_lock, flags); > - /* > - * Wait for all the outstanding tasks/transfer requests. > - * Verify by checking the doorbell registers are clear. > - */ > - start = ktime_get(); > - do { > - if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) { > - ret = -EBUSY; > - goto out; > - } > - > - tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL); > - tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); > - if (!tm_doorbell && !tr_doorbell) { > - timeout = false; > - break; > - } else if (do_last_check) { > - break; > - } > - > - spin_unlock_irqrestore(hba->host->host_lock, flags); > - schedule(); > - if (ktime_to_us(ktime_sub(ktime_get(), start)) > > - wait_timeout_us) { > - timeout = true; > - /* > - * We might have scheduled out for long time so make > - * sure to check if doorbells are cleared by this time > - * or not. > - */ > - do_last_check = true; > - } > - spin_lock_irqsave(hba->host->host_lock, flags); > - } while (tm_doorbell || tr_doorbell); > - > - if (timeout) { > - dev_err(hba->dev, > - "%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n", > - __func__, tm_doorbell, tr_doorbell); > - ret = -EBUSY; > - } > -out: > - spin_unlock_irqrestore(hba->host->host_lock, flags); > - ufshcd_release(hba); > - return ret; > -} > - > /** > * ufshcd_scale_gear - scale up/down UFS gear > * @hba: per adapter instance > @@ -1079,27 +1020,49 @@ static int ufshcd_scale_gear(struct ufs_hba > *hba, bool scale_up) > > static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba) > { > - #define DOORBELL_CLR_TOUT_US (1000 * 1000) /* 1 sec */ > - int ret = 0; > + unsigned long deadline = jiffies + HZ; > + struct scsi_device *sdev; > + int res = 0; > + > /* > * make sure that there are no outstanding requests when > * clock scaling is in progress > */ > - ufshcd_scsi_block_requests(hba); > - down_write(&hba->clk_scaling_lock); > - if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) { > - ret = -EBUSY; > - up_write(&hba->clk_scaling_lock); > - ufshcd_scsi_unblock_requests(hba); > + shost_for_each_device(sdev, hba->host) > + blk_freeze_queue_start(sdev->request_queue); > + blk_freeze_queue_start(hba->cmd_queue); > + blk_freeze_queue_start(hba->tmf_queue); > + shost_for_each_device(sdev, hba->host) { > + if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue, > + max_t(long, 0, deadline - jiffies)) <= 0) { > + goto err; > + } > } > + if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue, > + max_t(long, 0, deadline - jiffies)) <= 0) > + goto err; > + if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue, > + max_t(long, 0, deadline - jiffies)) <= 0) > + goto err; > > - return ret; > +out: > + blk_mq_unfreeze_queue(hba->tmf_queue); > + blk_mq_unfreeze_queue(hba->cmd_queue); Hi Bart, After this point, dev commands can be sent on hba->cmd_queue. But there are multiple entries which users can send dev commands through in ufs-sysfs.c. So the clock scaling sequence lost its protection from being disturbed. Best Regards, Can Guo. > + return res; > + > +err: > + shost_for_each_device(sdev, hba->host) > + blk_mq_unfreeze_queue(sdev->request_queue); > + res = -ETIMEDOUT; > + goto out; > } > > static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba) > { > - up_write(&hba->clk_scaling_lock); > - ufshcd_scsi_unblock_requests(hba); > + struct scsi_device *sdev; > + > + shost_for_each_device(sdev, hba->host) > + blk_mq_unfreeze_queue(sdev->request_queue); > } > > /** > @@ -2394,9 +2357,6 @@ static int ufshcd_queuecommand(struct Scsi_Host > *host, struct scsi_cmnd *cmd) > BUG(); > } > > - if (!down_read_trylock(&hba->clk_scaling_lock)) > - return SCSI_MLQUEUE_HOST_BUSY; > - > spin_lock_irqsave(hba->host->host_lock, flags); > switch (hba->ufshcd_state) { > case UFSHCD_STATE_OPERATIONAL: > @@ -2462,7 +2422,6 @@ static int ufshcd_queuecommand(struct Scsi_Host > *host, struct scsi_cmnd *cmd) > out_unlock: > spin_unlock_irqrestore(hba->host->host_lock, flags); > out: > - up_read(&hba->clk_scaling_lock); > return err; > } > > @@ -2616,8 +2575,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba > *hba, > struct completion wait; > unsigned long flags; > > - down_read(&hba->clk_scaling_lock); > - > /* > * Get free slot, sleep if slots are unavailable. > * Even though we use wait_event() which sleeps indefinitely, > @@ -2653,7 +2610,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba > *hba, > > out_put_tag: > blk_put_request(req); > - up_read(&hba->clk_scaling_lock); > return err; > } > > @@ -5771,8 +5727,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct > ufs_hba *hba, > unsigned long flags; > u32 upiu_flags; > > - down_read(&hba->clk_scaling_lock); > - > req = blk_get_request(q, REQ_OP_DRV_OUT, 0); > if (IS_ERR(req)) > return PTR_ERR(req); > @@ -5854,7 +5808,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct > ufs_hba *hba, > } > > blk_put_request(req); > - up_read(&hba->clk_scaling_lock); > return err; > } > > @@ -8323,8 +8276,6 @@ int ufshcd_init(struct ufs_hba *hba, void > __iomem *mmio_base, unsigned int irq) > /* Initialize mutex for device management commands */ > mutex_init(&hba->dev_cmd.lock); > > - init_rwsem(&hba->clk_scaling_lock); > - > ufshcd_init_clk_gating(hba); > > ufshcd_init_clk_scaling(hba); > diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h > index 5865e16f53a6..5ebb920ae874 100644 > --- a/drivers/scsi/ufs/ufshcd.h > +++ b/drivers/scsi/ufs/ufshcd.h > @@ -724,7 +724,6 @@ struct ufs_hba { > enum bkops_status urgent_bkops_lvl; > bool is_urgent_bkops_lvl_checked; > > - struct rw_semaphore clk_scaling_lock; > struct ufs_desc_size desc_size; > atomic_t scsi_block_reqs_cnt;
On 11/17/19 7:49 PM, cang@codeaurora.org wrote: > After this point, dev commands can be sent on hba->cmd_queue. > But there are multiple entries which users can send dev commands through in ufs-sysfs.c. > So the clock scaling sequence lost its protection from being disturbed. Hi Can, My understanding of the current implementation (Martin's 5.5/scsi-queue branch) is that hba->clk_scaling_lock serializes clock scaling and submission of SCSI and device commands but not the submission of TMFs. I think that the following call chain allows user space to submit TMFs while clock scaling is in progress: submit UPIU_TRANSACTION_TASK_REQ on an UFS BSG queue -> ufs_bsg_request() -> ufshcd_exec_raw_upiu_cmd(msgcode = UPIU_TRANSACTION_TASK_REQ) -> ufshcd_exec_raw_upiu_cmd() -> __ufshcd_issue_tm_cmd() Is that a feature or is that rather unintended behavior? Anyway, can you have a look at the patch below and verify whether it preserves existing behavior? Thank you, Bart. Subject: [PATCH] ufs: Simplify the clock scaling mechanism implementation Scaling the clock is only safe while no commands are in progress. The current clock scaling implementation uses hba->clk_scaling_lock to serialize clock scaling against the following three functions: * ufshcd_queuecommand() (uses sdev->request_queue) * ufshcd_exec_dev_cmd() (uses hba->cmd_queue) * ufshcd_issue_devman_upiu_cmd() (uses hba->cmd_queue) __ufshcd_issue_tm_cmd(), which uses hba->tmf_queue, is not serialized against clock scaling. Use blk_mq_{un,}freeze_queue() to block submission of new commands and to wait for ongoing commands to complete. This patch removes a semaphore down and up operation pair from the hot path. Cc: Bean Huo <beanhuo@micron.com> Cc: Avri Altman <avri.altman@wdc.com> Cc: Stanley Chu <stanley.chu@mediatek.com> Cc: Tomas Winkler <tomas.winkler@intel.com> Signed-off-by: Bart Van Assche <bvanassche@acm.org> --- drivers/scsi/ufs/ufshcd.c | 116 +++++++++++--------------------------- drivers/scsi/ufs/ufshcd.h | 1 - 2 files changed, 34 insertions(+), 83 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 5314e8bfeeb6..46950cc87dc5 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -971,65 +971,6 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba, return false; } -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba, - u64 wait_timeout_us) -{ - unsigned long flags; - int ret = 0; - u32 tm_doorbell; - u32 tr_doorbell; - bool timeout = false, do_last_check = false; - ktime_t start; - - ufshcd_hold(hba, false); - spin_lock_irqsave(hba->host->host_lock, flags); - /* - * Wait for all the outstanding tasks/transfer requests. - * Verify by checking the doorbell registers are clear. - */ - start = ktime_get(); - do { - if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) { - ret = -EBUSY; - goto out; - } - - tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL); - tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); - if (!tm_doorbell && !tr_doorbell) { - timeout = false; - break; - } else if (do_last_check) { - break; - } - - spin_unlock_irqrestore(hba->host->host_lock, flags); - schedule(); - if (ktime_to_us(ktime_sub(ktime_get(), start)) > - wait_timeout_us) { - timeout = true; - /* - * We might have scheduled out for long time so make - * sure to check if doorbells are cleared by this time - * or not. - */ - do_last_check = true; - } - spin_lock_irqsave(hba->host->host_lock, flags); - } while (tm_doorbell || tr_doorbell); - - if (timeout) { - dev_err(hba->dev, - "%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n", - __func__, tm_doorbell, tr_doorbell); - ret = -EBUSY; - } -out: - spin_unlock_irqrestore(hba->host->host_lock, flags); - ufshcd_release(hba); - return ret; -} - /** * ufshcd_scale_gear - scale up/down UFS gear * @hba: per adapter instance @@ -1079,27 +1020,50 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up) static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba) { - #define DOORBELL_CLR_TOUT_US (1000 * 1000) /* 1 sec */ - int ret = 0; + unsigned long deadline = jiffies + HZ; + struct scsi_device *sdev; + int res = 0; + /* * make sure that there are no outstanding requests when * clock scaling is in progress */ - ufshcd_scsi_block_requests(hba); - down_write(&hba->clk_scaling_lock); - if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) { - ret = -EBUSY; - up_write(&hba->clk_scaling_lock); - ufshcd_scsi_unblock_requests(hba); + shost_for_each_device(sdev, hba->host) + blk_freeze_queue_start(sdev->request_queue); + blk_freeze_queue_start(hba->cmd_queue); + blk_freeze_queue_start(hba->tmf_queue); + shost_for_each_device(sdev, hba->host) { + if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue, + max_t(long, 0, deadline - jiffies)) <= 0) { + goto err; + } } + if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue, + max_t(long, 0, deadline - jiffies)) <= 0) + goto err; + if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue, + max_t(long, 0, deadline - jiffies)) <= 0) + goto err; - return ret; +out: + blk_mq_unfreeze_queue(hba->tmf_queue); + return res; + +err: + blk_mq_unfreeze_queue(hba->cmd_queue); + shost_for_each_device(sdev, hba->host) + blk_mq_unfreeze_queue(sdev->request_queue); + res = -ETIMEDOUT; + goto out; } static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba) { - up_write(&hba->clk_scaling_lock); - ufshcd_scsi_unblock_requests(hba); + struct scsi_device *sdev; + + blk_mq_unfreeze_queue(hba->cmd_queue); + shost_for_each_device(sdev, hba->host) + blk_mq_unfreeze_queue(sdev->request_queue); } /** @@ -2394,9 +2358,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) BUG(); } - if (!down_read_trylock(&hba->clk_scaling_lock)) - return SCSI_MLQUEUE_HOST_BUSY; - spin_lock_irqsave(hba->host->host_lock, flags); switch (hba->ufshcd_state) { case UFSHCD_STATE_OPERATIONAL: @@ -2462,7 +2423,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) out_unlock: spin_unlock_irqrestore(hba->host->host_lock, flags); out: - up_read(&hba->clk_scaling_lock); return err; } @@ -2616,8 +2576,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, struct completion wait; unsigned long flags; - down_read(&hba->clk_scaling_lock); - /* * Get free slot, sleep if slots are unavailable. * Even though we use wait_event() which sleeps indefinitely, @@ -2653,7 +2611,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, out_put_tag: blk_put_request(req); - up_read(&hba->clk_scaling_lock); return err; } @@ -5771,8 +5728,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, unsigned long flags; u32 upiu_flags; - down_read(&hba->clk_scaling_lock); - req = blk_get_request(q, REQ_OP_DRV_OUT, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -5854,7 +5809,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, } blk_put_request(req); - up_read(&hba->clk_scaling_lock); return err; } @@ -8323,8 +8277,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) /* Initialize mutex for device management commands */ mutex_init(&hba->dev_cmd.lock); - init_rwsem(&hba->clk_scaling_lock); - ufshcd_init_clk_gating(hba); ufshcd_init_clk_scaling(hba); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 5865e16f53a6..5ebb920ae874 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -724,7 +724,6 @@ struct ufs_hba { enum bkops_status urgent_bkops_lvl; bool is_urgent_bkops_lvl_checked; - struct rw_semaphore clk_scaling_lock; struct ufs_desc_size desc_size; atomic_t scsi_block_reqs_cnt;
On 11/14/19 10:01 PM, Can Guo wrote: > On 2019-11-15 00:11, Bart Van Assche wrote: >> On 11/13/19 8:03 AM, Bean Huo (beanhuo) wrote: >>> I think, you are asking for comment from Can. As for me, attached >>> patch is better. >>> Removing ufshcd_wait_for_doorbell_clr(), instead of reading doorbell >>> register, Now >>> using block layer blk_mq_{un,}freeze_queue(), looks good. I tested >>> your V5 patches, >>> didn't find problem yet. >>> >>> Since my available platform doesn't support dynamic clk scaling, I >>> think, now seems only >>> Qcom UFS controllers support this feature. So we need Can Guo to >>> finally confirm it. >> >> Do you agree with this patch series if patch 4 is replaced by the >> patch attached to my previous e-mail? The entire (revised) series is >> available at https://github.com/bvanassche/linux/tree/ufs-for-next. > > After ufshcd_clock_scaling_prepare() returns(no error), all request > queues are frozen. If failure > happens(say power mode change command fails) after this point and error > handler kicks off, > we need to send dev commands(in ufshcd_probe_hba()) to bring UFS back to > functionality. > However, as the hba->cmd_queue is frozen, dev commands cannot be sent, > the error handler shall fail. Hi Can, My understanding of the current UFS driver code is that ufshcd_clock_scaling_prepare() waits for ongoing commands to finish but not for SCSI error handling to finish. Would you agree with changing that behavior such that ufshcd_clock_scaling_prepare() returns an error code if SCSI error handling is in progress? Do you agree that once that change has been made that it is fine to invoke blk_freeze_queue_start() for all three types of block layer request queues (SCSI commands, device management commands and TMFs)? Do you agree that this would fix the issue that it is possible today to submit TMFs from user space using through the BSG queue while clock scaling is in progress? Thanks, Bart.
On 2019-11-19 02:13, Bart Van Assche wrote: > On 11/14/19 10:01 PM, Can Guo wrote: >> On 2019-11-15 00:11, Bart Van Assche wrote: >>> On 11/13/19 8:03 AM, Bean Huo (beanhuo) wrote: >>>> I think, you are asking for comment from Can. As for me, attached >>>> patch is better. >>>> Removing ufshcd_wait_for_doorbell_clr(), instead of reading doorbell >>>> register, Now >>>> using block layer blk_mq_{un,}freeze_queue(), looks good. I tested >>>> your V5 patches, >>>> didn't find problem yet. >>>> >>>> Since my available platform doesn't support dynamic clk scaling, I >>>> think, now seems only >>>> Qcom UFS controllers support this feature. So we need Can Guo to >>>> finally confirm it. >>> >>> Do you agree with this patch series if patch 4 is replaced by the >>> patch attached to my previous e-mail? The entire (revised) series is >>> available at https://github.com/bvanassche/linux/tree/ufs-for-next. >> >> After ufshcd_clock_scaling_prepare() returns(no error), all request >> queues are frozen. If failure >> happens(say power mode change command fails) after this point and >> error handler kicks off, >> we need to send dev commands(in ufshcd_probe_hba()) to bring UFS back >> to functionality. >> However, as the hba->cmd_queue is frozen, dev commands cannot be sent, >> the error handler shall fail. > > Hi Can, > > My understanding of the current UFS driver code is that > ufshcd_clock_scaling_prepare() waits for ongoing commands to finish > but not for SCSI error handling to finish. Would you agree with > changing that behavior such that ufshcd_clock_scaling_prepare() > returns an error > code if SCSI error handling is in progress? Do you agree that once > that change has been made that it is fine to invoke > blk_freeze_queue_start() for all three types of block layer request > queues (SCSI commands, device management commands and TMFs)? Do you > agree that this would fix the issue that it is possible today to > submit TMFs from user space using through the BSG queue while clock > scaling is in progress? > > Thanks, > > Bart. Hi Bart, Your understanding is correct. > Would you agree with changing that behavior such that > ufshcd_clock_scaling_prepare() > returns an error code if SCSI error handling is in progress? I believe we already have the check of ufshcd_eh_in_progress() in ufshcd_devfreq_target() before call ufshcd_devfreq_scale(). > Do you agree that once that change has been made that it is fine to > invoke > blk_freeze_queue_start() for all three types of block layer request > queues (SCSI commands, device management commands and TMFs)? I agree. As err handling work always runs in ASYNC way in current code base, it is fine to freeze all the queues. If there is err handling work, scheduled during scaling, would eventually be unblocked when hba->cmd_queue is unfrozen after scaling returns or fails. Sorry for making you confused. My previous comment is based on that if we need to do hibern8 enter/exit in vendor ops (see https://lore.kernel.org/patchwork/patch/1143579/), and if hibern8 enter/exit fails during scaling, we need to call ufshcd_link_recovery() to recovery everything in SYNC way. And as hba->cmd_queue is frozen, ufshcd_link_recovery() would hang when it needs to send device manangement commands. In this case, we need to make sure hba->cmd_queue is NOT frozen. So it seems contraditory. FYI, even with the patch https://lore.kernel.org/patchwork/patch/1143579/, current implementation also has this problem. And we have another change to address it by making change to ufshcd_exec_dev_cmd() like below. diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index c2a2ff2..81a000e 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4068,6 +4068,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, int tag; struct completion wait; unsigned long flags; + bool has_read_lock = false; @@ -4075,8 +4076,10 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, + if (!ufshcd_eh_in_progress(hba)) { down_read(&hba->lock); + has_read_lock = true; + } /* * Get free slot, sleep if slots are unavailable. @@ -4110,7 +4113,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, out_put_tag: ufshcd_put_dev_cmd_tag(hba, tag); wake_up(&hba->dev_cmd.tag_wq); + if (has_read_lock) up_read(&hba->lock); return err; } Aside the functionality, we need to take this change carefully as it may relate with performance. With the old implementation, when devfreq decides to scale up, we can make sure that all requests sent to UFS device after this point will go through the highest UFS Gear. Scaling up will happen right after doorbell is cleared, not even need to wait till the requests are freed from block layer. And 1 sec is long enough to clean out the doorbell by HW. In the new implementation of ufshcd_clock_scaling_prepare(), after blk_freeze_queue_start(), we call blk_mq_freeze_queue_wait_timeout() to wait for 1 sec. In addition to those requests which have already been queued to HW doorbell, more requests will be dispatched within 1 sec, through the lowest Gear. My first impression is that it might be a bit lazy and I am not sure how much it may affect the benchmarks. And if the request queues are heavily loaded(many bios are waiting for a free tag to become a request), is 1 sec long enough to freeze all these request queues? If no, performance would be affected if scaling up fails frequently. As per my understanding, the request queue will become frozen only after all the existing requests and bios(which are already waiting for a free tag on the queue) to be completed and freed from block layer. Please correct me if I am wrong. While, in the old implementatoin, when devfreq decides to scale up, scaling can always happen for majority chances, execept for those unexpected HW issues. Best Regards, Can Guo
On 11/18/19 9:33 PM, cang@codeaurora.org wrote: > Aside the functionality, we need to take this change carefully as it may > relate with performance. > With the old implementation, when devfreq decides to scale up, we can > make sure that all requests > sent to UFS device after this point will go through the highest UFS > Gear. Scaling up will happen > right after doorbell is cleared, not even need to wait till the requests > are freed from block layer. > And 1 sec is long enough to clean out the doorbell by HW. > > In the new implementation of ufshcd_clock_scaling_prepare(), after > blk_freeze_queue_start(), we call > blk_mq_freeze_queue_wait_timeout() to wait for 1 sec. In addition to > those requests which have already > been queued to HW doorbell, more requests will be dispatched within 1 > sec, through the lowest Gear. > My first impression is that it might be a bit lazy and I am not sure how > much it may affect the benchmarks. > > And if the request queues are heavily loaded(many bios are waiting for a > free tag to become a request), > is 1 sec long enough to freeze all these request queues? If no, > performance would be affected if scaling up > fails frequently. > As per my understanding, the request queue will become frozen only after > all the existing requests and > bios(which are already waiting for a free tag on the queue) to be > completed and freed from block layer. > Please correct me if I am wrong. > > While, in the old implementation, when devfreq decides to scale up, > scaling can always > happen for majority chances, except for those unexpected HW issues. Hi Can, I agree with the description above of the current behavior of the UFS driver. But I do not agree with the interpretation of the behavior of the changes I proposed. The implementation in the block layer of request queue freezing is as follows: void blk_freeze_queue_start(struct request_queue *q) { mutex_lock(&q->mq_freeze_lock); if (++q->mq_freeze_depth == 1) { percpu_ref_kill(&q->q_usage_counter); mutex_unlock(&q->mq_freeze_lock); if (queue_is_mq(q)) blk_mq_run_hw_queues(q, false); } else { mutex_unlock(&q->mq_freeze_lock); } } As long as calls from other threads to blk_mq_freeze_queue()/blk_mq_unfreeze_queue() are balanced, after blk_freeze_queue_start() returns it is guaranteed that q->q_usage_counter is in __PERCPU_REF_DEAD mode. Calls to blk_get_request() etc. block in that mode until the request queue is unfrozen. See also blk_queue_enter(). In other words, calling blk_freeze_queue() from inside ufshcd_clock_scaling_prepare() should preserve the current behavior, namely that ufshcd_clock_scaling_prepare() waits for ongoing requests to finish and also that submission of new requests is postponed until clock scaling has finished. Do you agree with this analysis? Thanks, Bart.
On 2019-11-20 07:16, Bart Van Assche wrote: > On 11/18/19 9:33 PM, cang@codeaurora.org wrote: >> Aside the functionality, we need to take this change carefully as it >> may relate with performance. >> With the old implementation, when devfreq decides to scale up, we can >> make sure that all requests >> sent to UFS device after this point will go through the highest UFS >> Gear. Scaling up will happen >> right after doorbell is cleared, not even need to wait till the >> requests are freed from block layer. >> And 1 sec is long enough to clean out the doorbell by HW. >> >> In the new implementation of ufshcd_clock_scaling_prepare(), after >> blk_freeze_queue_start(), we call >> blk_mq_freeze_queue_wait_timeout() to wait for 1 sec. In addition to >> those requests which have already >> been queued to HW doorbell, more requests will be dispatched within 1 >> sec, through the lowest Gear. >> My first impression is that it might be a bit lazy and I am not sure >> how much it may affect the benchmarks. >> >> And if the request queues are heavily loaded(many bios are waiting for >> a free tag to become a request), >> is 1 sec long enough to freeze all these request queues? If no, >> performance would be affected if scaling up >> fails frequently. >> As per my understanding, the request queue will become frozen only >> after all the existing requests and >> bios(which are already waiting for a free tag on the queue) to be >> completed and freed from block layer. >> Please correct me if I am wrong. >> >> While, in the old implementation, when devfreq decides to scale up, >> scaling can always >> happen for majority chances, except for those unexpected HW issues. > > Hi Can, > > I agree with the description above of the current behavior of the UFS > driver. But I do not agree with the interpretation of the behavior of > the changes I proposed. The implementation in the block layer of > request queue freezing is as follows: > > void blk_freeze_queue_start(struct request_queue *q) > { > mutex_lock(&q->mq_freeze_lock); > if (++q->mq_freeze_depth == 1) { > percpu_ref_kill(&q->q_usage_counter); > mutex_unlock(&q->mq_freeze_lock); > if (queue_is_mq(q)) > blk_mq_run_hw_queues(q, false); > } else { > mutex_unlock(&q->mq_freeze_lock); > } > } > > As long as calls from other threads to > blk_mq_freeze_queue()/blk_mq_unfreeze_queue() are balanced, after > blk_freeze_queue_start() returns it is guaranteed that > q->q_usage_counter is in __PERCPU_REF_DEAD mode. Calls to > blk_get_request() etc. block in that mode until the request queue is > unfrozen. See also blk_queue_enter(). > > In other words, calling blk_freeze_queue() from inside > ufshcd_clock_scaling_prepare() should preserve the current behavior, > namely that ufshcd_clock_scaling_prepare() waits for ongoing requests > to finish and also that submission of new requests is postponed until > clock scaling has finished. Do you agree with this analysis? > > Thanks, > > Bart. Hi Bart, I am still not quite clear about the blk_freeze_queue() part. >> In the new implementation of ufshcd_clock_scaling_prepare(), after >> blk_freeze_queue_start(), we call >> blk_mq_freeze_queue_wait_timeout() to wait for 1 sec. In addition to >> those requests which have already >> been queued to HW doorbell, more requests will be dispatched within 1 >> sec, through the lowest Gear. >> My first impression is that it might be a bit lazy and I am not sure >> how much it may affect the benchmarks. First of all, reg above lines, do you agree that there can be latency in scaling up/down comparing with the old implementation? I can understand that after queue is frozen, all calls to blk_get_request() are blocked, no submission can be made after this point, due to blk_queue_enter() shall wait on q->mq_freeze_wq. <--code snippet--> wait_event(q->mq_freeze_wq, (atomic_read(&q->mq_freeze_depth) == 0 && <--code snippet--> >> And if the request queues are heavily loaded(many bios are waiting for >> a free tag to become a request), >> is 1 sec long enough to freeze all these request queue? But here I meant those bios, before we call blk_freeze_queue_start(), sent through submit_bio() calls which have already entered blk_mq_get_request() and already returned from the blk_queue_enter_live(). These bios are waiting for a free tag (waiting on func blk_mq_get_tag() when queue is full). Shall the request queue be frozen before these bios are handled? void blk_mq_freeze_queue_wait(struct request_queue *q) { wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); } EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait); Per my understanding, these bios have already increased &q->q_usage_counter. And the &q->q_usage_counter will only become 0 when all of the requests in the queue and these bios(after they get free tags and turned into requests) are completed from block layer, meaning after blk_queue_exit() is called in __blk_mq_free_request() for all of them. Please correct me if I am wrong. Thanks, Can Guo.
On 11/19/19 7:38 PM, cang@codeaurora.org wrote: > On 2019-11-20 07:16, Bart Van Assche wrote: >> On 11/18/19 9:33 PM, cang@codeaurora.org wrote: [ ... ] > > I am still not quite clear about the blk_freeze_queue() part. > >>> In the new implementation of ufshcd_clock_scaling_prepare(), after >>> blk_freeze_queue_start(), we call blk_mq_freeze_queue_wait_timeout() >>> to wait for 1 sec. In addition to those requests which have already >>> been queued to HW doorbell, more requests will be dispatched within 1 >>> sec, through the lowest Gear. My first impression is that it might be >>> a bit lazy and I am not sure how much it may affect the benchmarks. > > First of all, reg above lines, do you agree that there can be latency in > scaling up/down > comparing with the old implementation? > > I can understand that after queue is frozen, all calls to blk_get_request() > are blocked, no submission can be made after this point, due to > blk_queue_enter() shall wait on q->mq_freeze_wq. > > <--code snippet--> > wait_event(q->mq_freeze_wq, > (atomic_read(&q->mq_freeze_depth) == 0 && > <--code snippet--> > >>> And if the request queues are heavily loaded(many bios are waiting >>> for a free tag to become a request), >>> is 1 sec long enough to freeze all these request queue? > > But here I meant those bios, before we call blk_freeze_queue_start(), > sent through > submit_bio() calls which have already entered blk_mq_get_request() and > already > returned from the blk_queue_enter_live(). These bios are waiting for a > free tag > (waiting on func blk_mq_get_tag() when queue is full). > Shall the request queue be frozen before these bios are handled? > > void blk_mq_freeze_queue_wait(struct request_queue *q) > { > wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); > } > EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait); > > Per my understanding, these bios have already increased > &q->q_usage_counter. > And the &q->q_usage_counter will only become 0 when all of the requests > in the > queue and these bios(after they get free tags and turned into requests) are > completed from block layer, meaning after blk_queue_exit() is called in > __blk_mq_free_request() for all of them. Please correct me if I am wrong. Hi Can, Please have another look at the request queue freezing mechanism in the block layer. If blk_queue_enter() sleeps in wait_event() that implies either that the percpu_ref_tryget_live() call failed or that the percpu_ref_tryget_live() succeeded and that it was followed by a percpu_ref_put() call. Ignoring concurrent q_usage_counter changes, in both cases q->q_usage_counter will have the same value as before blk_queue_enter() started. In other words, there shouldn't be a latency difference between the current and the proposed approach since neither approach waits for completion of bios or SCSI commands that are queued after clock scaling started. Thanks, Bart.
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 810b997f55fc..717ba24a2d40 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -290,16 +290,50 @@ static inline void ufshcd_disable_irq(struct ufs_hba *hba) } } -static void ufshcd_scsi_unblock_requests(struct ufs_hba *hba) +static void ufshcd_unblock_requests(struct ufs_hba *hba) { - if (atomic_dec_and_test(&hba->scsi_block_reqs_cnt)) - scsi_unblock_requests(hba->host); + struct scsi_device *sdev; + + blk_mq_unfreeze_queue(hba->tmf_queue); + blk_mq_unfreeze_queue(hba->cmd_queue); + shost_for_each_device(sdev, hba->host) + blk_mq_unfreeze_queue(sdev->request_queue); } -static void ufshcd_scsi_block_requests(struct ufs_hba *hba) +static int ufshcd_block_requests(struct ufs_hba *hba, unsigned long timeout) { - if (atomic_inc_return(&hba->scsi_block_reqs_cnt) == 1) - scsi_block_requests(hba->host); + struct scsi_device *sdev; + unsigned long deadline = jiffies + timeout; + + if (timeout == ULONG_MAX) { + shost_for_each_device(sdev, hba->host) + blk_mq_freeze_queue(sdev->request_queue); + blk_mq_freeze_queue(hba->cmd_queue); + blk_mq_freeze_queue(hba->tmf_queue); + return 0; + } + + shost_for_each_device(sdev, hba->host) + blk_freeze_queue_start(sdev->request_queue); + blk_freeze_queue_start(hba->cmd_queue); + blk_freeze_queue_start(hba->tmf_queue); + shost_for_each_device(sdev, hba->host) { + if (blk_mq_freeze_queue_wait_timeout(sdev->request_queue, + max_t(long, 0, deadline - jiffies)) <= 0) { + goto err; + } + } + if (blk_mq_freeze_queue_wait_timeout(hba->cmd_queue, + max_t(long, 0, deadline - jiffies)) <= 0) + goto err; + if (blk_mq_freeze_queue_wait_timeout(hba->tmf_queue, + max_t(long, 0, deadline - jiffies)) <= 0) + goto err; + return 0; + +err: + ufshcd_unblock_requests(hba); + return -ETIMEDOUT; } static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned int tag, @@ -971,65 +1005,6 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba, return false; } -static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba, - u64 wait_timeout_us) -{ - unsigned long flags; - int ret = 0; - u32 tm_doorbell; - u32 tr_doorbell; - bool timeout = false, do_last_check = false; - ktime_t start; - - ufshcd_hold(hba, false); - spin_lock_irqsave(hba->host->host_lock, flags); - /* - * Wait for all the outstanding tasks/transfer requests. - * Verify by checking the doorbell registers are clear. - */ - start = ktime_get(); - do { - if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) { - ret = -EBUSY; - goto out; - } - - tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL); - tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); - if (!tm_doorbell && !tr_doorbell) { - timeout = false; - break; - } else if (do_last_check) { - break; - } - - spin_unlock_irqrestore(hba->host->host_lock, flags); - schedule(); - if (ktime_to_us(ktime_sub(ktime_get(), start)) > - wait_timeout_us) { - timeout = true; - /* - * We might have scheduled out for long time so make - * sure to check if doorbells are cleared by this time - * or not. - */ - do_last_check = true; - } - spin_lock_irqsave(hba->host->host_lock, flags); - } while (tm_doorbell || tr_doorbell); - - if (timeout) { - dev_err(hba->dev, - "%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n", - __func__, tm_doorbell, tr_doorbell); - ret = -EBUSY; - } -out: - spin_unlock_irqrestore(hba->host->host_lock, flags); - ufshcd_release(hba); - return ret; -} - /** * ufshcd_scale_gear - scale up/down UFS gear * @hba: per adapter instance @@ -1079,27 +1054,16 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up) static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba) { - #define DOORBELL_CLR_TOUT_US (1000 * 1000) /* 1 sec */ - int ret = 0; /* * make sure that there are no outstanding requests when * clock scaling is in progress */ - ufshcd_scsi_block_requests(hba); - down_write(&hba->clk_scaling_lock); - if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) { - ret = -EBUSY; - up_write(&hba->clk_scaling_lock); - ufshcd_scsi_unblock_requests(hba); - } - - return ret; + return ufshcd_block_requests(hba, HZ); } static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba) { - up_write(&hba->clk_scaling_lock); - ufshcd_scsi_unblock_requests(hba); + ufshcd_unblock_requests(hba); } /** @@ -1471,7 +1435,7 @@ static void ufshcd_ungate_work(struct work_struct *work) hba->clk_gating.is_suspended = false; } unblock_reqs: - ufshcd_scsi_unblock_requests(hba); + ufshcd_unblock_requests(hba); } /** @@ -1528,7 +1492,7 @@ int ufshcd_hold(struct ufs_hba *hba, bool async) */ /* fallthrough */ case CLKS_OFF: - ufshcd_scsi_block_requests(hba); + ufshcd_block_requests(hba, ULONG_MAX); hba->clk_gating.state = REQ_CLKS_ON; trace_ufshcd_clk_gating(dev_name(hba->dev), hba->clk_gating.state); @@ -2395,9 +2359,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) BUG(); } - if (!down_read_trylock(&hba->clk_scaling_lock)) - return SCSI_MLQUEUE_HOST_BUSY; - spin_lock_irqsave(hba->host->host_lock, flags); switch (hba->ufshcd_state) { case UFSHCD_STATE_OPERATIONAL: @@ -2463,7 +2424,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) out_unlock: spin_unlock_irqrestore(hba->host->host_lock, flags); out: - up_read(&hba->clk_scaling_lock); return err; } @@ -2617,8 +2577,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, struct completion wait; unsigned long flags; - down_read(&hba->clk_scaling_lock); - /* * Get free slot, sleep if slots are unavailable. * Even though we use wait_event() which sleeps indefinitely, @@ -2654,7 +2612,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, out_put_tag: blk_put_request(req); - up_read(&hba->clk_scaling_lock); return err; } @@ -5342,7 +5299,7 @@ static void ufshcd_err_handler(struct work_struct *work) spin_unlock_irqrestore(hba->host->host_lock, flags); if (starget) scsi_target_unblock(&starget->dev, SDEV_RUNNING); - ufshcd_scsi_unblock_requests(hba); + scsi_unblock_requests(hba->host); ufshcd_release(hba); pm_runtime_put_sync(hba->dev); } @@ -5466,7 +5423,7 @@ static void ufshcd_check_errors(struct ufs_hba *hba) /* handle fatal errors only when link is functional */ if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) { /* block commands from scsi mid-layer */ - ufshcd_scsi_block_requests(hba); + scsi_block_requests(hba->host); hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED; @@ -5772,8 +5729,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, unsigned long flags; u32 upiu_flags; - down_read(&hba->clk_scaling_lock); - req = blk_get_request(q, REQ_OP_DRV_OUT, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -5853,7 +5808,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, } blk_put_request(req); - up_read(&hba->clk_scaling_lock); return err; } @@ -8322,8 +8276,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) /* Initialize mutex for device management commands */ mutex_init(&hba->dev_cmd.lock); - init_rwsem(&hba->clk_scaling_lock); - ufshcd_init_clk_gating(hba); ufshcd_init_clk_scaling(hba); @@ -8410,7 +8362,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) /* Hold auto suspend until async scan completes */ pm_runtime_get_sync(dev); - atomic_set(&hba->scsi_block_reqs_cnt, 0); /* * We are assuming that device wasn't put in sleep/power-down * state exclusively during the boot stage before kernel. diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 5865e16f53a6..c44bfcdb26a3 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -520,7 +520,6 @@ struct ufs_stats { * @urgent_bkops_lvl: keeps track of urgent bkops level for device * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for * device is known or not. - * @scsi_block_reqs_cnt: reference counting for scsi block requests */ struct ufs_hba { void __iomem *mmio_base; @@ -724,9 +723,7 @@ struct ufs_hba { enum bkops_status urgent_bkops_lvl; bool is_urgent_bkops_lvl_checked; - struct rw_semaphore clk_scaling_lock; struct ufs_desc_size desc_size; - atomic_t scsi_block_reqs_cnt; struct device bsg_dev; struct request_queue *bsg_queue;
Scaling the clock is only safe while no commands are in progress. Use blk_mq_{un,}freeze_queue() to block submission of new commands and to wait for ongoing commands to complete. Remove "_scsi" from the name of the functions that block and unblock requests since these functions now block both SCSI commands and non-SCSI commands. Cc: Bean Huo <beanhuo@micron.com> Cc: Avri Altman <avri.altman@wdc.com> Cc: Stanley Chu <stanley.chu@mediatek.com> Cc: Tomas Winkler <tomas.winkler@intel.com> Signed-off-by: Bart Van Assche <bvanassche@acm.org> --- drivers/scsi/ufs/ufshcd.c | 141 +++++++++++++------------------------- drivers/scsi/ufs/ufshcd.h | 3 - 2 files changed, 46 insertions(+), 98 deletions(-)