Message ID | 20210129053042epcms2p538e7fa396c3c2104594c44e48be53eb8@epcms2p5 (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | scsi: ufs: Add Host Performance Booster Support | expand |
On 2021-01-29 13:30, Daejun Park wrote: > This patch changes the read I/O to the HPB read I/O. > > If the logical address of the read I/O belongs to active sub-region, > the > HPB driver modifies the read I/O command to HPB read. It modifies the > UPIU > command of UFS instead of modifying the existing SCSI command. > > In the HPB version 1.0, the maximum read I/O size that can be converted > to > HPB read is 4KB. > > The dirty map of the active sub-region prevents an incorrect HPB read > that > has stale physical page number which is updated by previous write I/O. > > Reviewed-by: Can Guo <cang@codeaurora.org> > Reviewed-by: Bart Van Assche <bvanassche@acm.org> > Acked-by: Avri Altman <Avri.Altman@wdc.com> > Tested-by: Bean Huo <beanhuo@micron.com> > Signed-off-by: Daejun Park <daejun7.park@samsung.com> > --- > drivers/scsi/ufs/ufshcd.c | 2 + > drivers/scsi/ufs/ufshpb.c | 234 ++++++++++++++++++++++++++++++++++++++ > drivers/scsi/ufs/ufshpb.h | 2 + > 3 files changed, 238 insertions(+) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > index 52e48de8d27c..37cb343e9ec1 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -2653,6 +2653,8 @@ static int ufshcd_queuecommand(struct Scsi_Host > *host, struct scsi_cmnd *cmd) > > lrbp->req_abort_skip = false; > > + ufshpb_prep(hba, lrbp); > + > ufshcd_comp_scsi_upiu(hba, lrbp); > > err = ufshcd_map_sg(hba, lrbp); > diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c > index 48edfdd0f606..73e7b3ed04a4 100644 > --- a/drivers/scsi/ufs/ufshpb.c > +++ b/drivers/scsi/ufs/ufshpb.c > @@ -31,6 +31,29 @@ bool ufshpb_is_allowed(struct ufs_hba *hba) > return !(hba->ufshpb_dev.hpb_disabled); > } > > +static int ufshpb_is_valid_srgn(struct ufshpb_region *rgn, > + struct ufshpb_subregion *srgn) > +{ > + return rgn->rgn_state != HPB_RGN_INACTIVE && > + srgn->srgn_state == HPB_SRGN_VALID; > +} > + > +static bool ufshpb_is_read_cmd(struct scsi_cmnd *cmd) > +{ > + return req_op(cmd->request) == REQ_OP_READ; > +} > + > +static bool ufshpb_is_write_or_discard_cmd(struct scsi_cmnd *cmd) > +{ > + return op_is_write(req_op(cmd->request)) || > + op_is_discard(req_op(cmd->request)); > +} > + > +static bool ufshpb_is_support_chunk(int transfer_len) > +{ > + return transfer_len <= HPB_MULTI_CHUNK_HIGH; > +} > + > static bool ufshpb_is_general_lun(int lun) > { > return lun < UFS_UPIU_MAX_UNIT_NUM_ID; > @@ -98,6 +121,217 @@ static void ufshpb_set_state(struct ufshpb_lu > *hpb, int state) > atomic_set(&hpb->hpb_state, state); > } > > +static void ufshpb_set_ppn_dirty(struct ufshpb_lu *hpb, int rgn_idx, > + int srgn_idx, int srgn_offset, int cnt) > +{ > + struct ufshpb_region *rgn; > + struct ufshpb_subregion *srgn; > + int set_bit_len; > + int bitmap_len = hpb->entries_per_srgn; > + > +next_srgn: > + rgn = hpb->rgn_tbl + rgn_idx; > + srgn = rgn->srgn_tbl + srgn_idx; > + > + if ((srgn_offset + cnt) > bitmap_len) > + set_bit_len = bitmap_len - srgn_offset; > + else > + set_bit_len = cnt; > + > + if (rgn->rgn_state != HPB_RGN_INACTIVE && > + srgn->srgn_state == HPB_SRGN_VALID) > + bitmap_set(srgn->mctx->ppn_dirty, srgn_offset, set_bit_len); > + > + srgn_offset = 0; > + if (++srgn_idx == hpb->srgns_per_rgn) { > + srgn_idx = 0; > + rgn_idx++; > + } > + > + cnt -= set_bit_len; > + if (cnt > 0) > + goto next_srgn; > + > + WARN_ON(cnt < 0); > +} > + > +static bool ufshpb_test_ppn_dirty(struct ufshpb_lu *hpb, int rgn_idx, > + int srgn_idx, int srgn_offset, int cnt) > +{ > + struct ufshpb_region *rgn; > + struct ufshpb_subregion *srgn; > + int bitmap_len = hpb->entries_per_srgn; > + int bit_len; > + > +next_srgn: > + rgn = hpb->rgn_tbl + rgn_idx; > + srgn = rgn->srgn_tbl + srgn_idx; > + > + if (!ufshpb_is_valid_srgn(rgn, srgn)) > + return true; > + > + /* > + * If the region state is active, mctx must be allocated. > + * In this case, check whether the region is evicted or > + * mctx allcation fail. > + */ > + WARN_ON(!srgn->mctx); > + > + if ((srgn_offset + cnt) > bitmap_len) > + bit_len = bitmap_len - srgn_offset; > + else > + bit_len = cnt; > + > + if (find_next_bit(srgn->mctx->ppn_dirty, > + bit_len, srgn_offset) >= srgn_offset) > + return true; > + > + srgn_offset = 0; > + if (++srgn_idx == hpb->srgns_per_rgn) { > + srgn_idx = 0; > + rgn_idx++; > + } > + > + cnt -= bit_len; > + if (cnt > 0) > + goto next_srgn; > + > + return false; > +} > + > +static u64 ufshpb_get_ppn(struct ufshpb_lu *hpb, > + struct ufshpb_map_ctx *mctx, int pos, int *error) > +{ > + u64 *ppn_table; > + struct page *page; > + int index, offset; > + > + index = pos / (PAGE_SIZE / HPB_ENTRY_SIZE); > + offset = pos % (PAGE_SIZE / HPB_ENTRY_SIZE); > + > + page = mctx->m_page[index]; > + if (unlikely(!page)) { > + *error = -ENOMEM; > + dev_err(&hpb->sdev_ufs_lu->sdev_dev, > + "error. cannot find page in mctx\n"); > + return 0; > + } > + > + ppn_table = page_address(page); > + if (unlikely(!ppn_table)) { > + *error = -ENOMEM; > + dev_err(&hpb->sdev_ufs_lu->sdev_dev, > + "error. cannot get ppn_table\n"); > + return 0; > + } > + > + return ppn_table[offset]; > +} > + > +static void > +ufshpb_get_pos_from_lpn(struct ufshpb_lu *hpb, unsigned long lpn, int > *rgn_idx, > + int *srgn_idx, int *offset) > +{ > + int rgn_offset; > + > + *rgn_idx = lpn >> hpb->entries_per_rgn_shift; > + rgn_offset = lpn & hpb->entries_per_rgn_mask; > + *srgn_idx = rgn_offset >> hpb->entries_per_srgn_shift; > + *offset = rgn_offset & hpb->entries_per_srgn_mask; > +} > + > +static void > +ufshpb_set_hpb_read_to_upiu(struct ufshpb_lu *hpb, struct ufshcd_lrb > *lrbp, > + u32 lpn, u64 ppn, unsigned int transfer_len) > +{ > + unsigned char *cdb = lrbp->cmd->cmnd; > + > + cdb[0] = UFSHPB_READ; > + > + put_unaligned_be64(ppn, &cdb[6]); You are assuming the HPB entries read out by "HPB Read Buffer" cmd are in Little Endian, which is why you are using put_unaligned_be64 here. However, this assumption is not right for all the other flash vendors - HPB entries read out by "HPB Read Buffer" cmd may come in Big Endian, if so, their random read performance are screwed. Actually, I have seen at least two flash vendors acting so. I had to modify this line to get the code work properly on my setups. Meanwhile, in your cover letter, you mentioned that the performance data is collected on a UFS2.1 device. Please re-collect the data on a real UFS3.1 device and let me know the part number. Otherwise, the data is not quite convincing to us. Regards, Can Guo. > + cdb[14] = transfer_len; > + > + lrbp->cmd->cmd_len = UFS_CDB_SIZE; > +} > + > +/* > + * This function will set up HPB read command using host-side L2P map > data. > + * In HPB v1.0, maximum size of HPB read command is 4KB. > + */ > +void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) > +{ > + struct ufshpb_lu *hpb; > + struct ufshpb_region *rgn; > + struct ufshpb_subregion *srgn; > + struct scsi_cmnd *cmd = lrbp->cmd; > + u32 lpn; > + u64 ppn; > + unsigned long flags; > + int transfer_len, rgn_idx, srgn_idx, srgn_offset; > + int err = 0; > + > + hpb = ufshpb_get_hpb_data(cmd->device); > + if (!hpb) > + return; > + > + if (ufshpb_get_state(hpb) != HPB_PRESENT) { > + dev_notice(&hpb->sdev_ufs_lu->sdev_dev, > + "%s: ufshpb state is not PRESENT", __func__); > + return; > + } > + > + if (!ufshpb_is_write_or_discard_cmd(cmd) && > + !ufshpb_is_read_cmd(cmd)) > + return; > + > + transfer_len = sectors_to_logical(cmd->device, > blk_rq_sectors(cmd->request)); > + if (unlikely(!transfer_len)) > + return; > + > + lpn = sectors_to_logical(cmd->device, blk_rq_pos(cmd->request)); > + ufshpb_get_pos_from_lpn(hpb, lpn, &rgn_idx, &srgn_idx, &srgn_offset); > + rgn = hpb->rgn_tbl + rgn_idx; > + srgn = rgn->srgn_tbl + srgn_idx; > + > + /* If command type is WRITE or DISCARD, set bitmap as drity */ > + if (ufshpb_is_write_or_discard_cmd(cmd)) { > + spin_lock_irqsave(&hpb->rgn_state_lock, flags); > + ufshpb_set_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset, > + transfer_len); > + spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); > + return; > + } > + > + if (!ufshpb_is_support_chunk(transfer_len)) > + return; > + > + spin_lock_irqsave(&hpb->rgn_state_lock, flags); > + if (ufshpb_test_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset, > + transfer_len)) { > + hpb->stats.miss_cnt++; > + spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); > + return; > + } > + > + ppn = ufshpb_get_ppn(hpb, srgn->mctx, srgn_offset, &err); > + spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); > + if (unlikely(err)) { > + /* > + * In this case, the region state is active, > + * but the ppn table is not allocated. > + * Make sure that ppn table must be allocated on > + * active state. > + */ > + WARN_ON(true); > + dev_err(hba->dev, "ufshpb_get_ppn failed. err %d\n", err); > + return; > + } > + > + ufshpb_set_hpb_read_to_upiu(hpb, lrbp, lpn, ppn, transfer_len); > + > + hpb->stats.hit_cnt++; > +} > + > static struct ufshpb_req *ufshpb_get_map_req(struct ufshpb_lu *hpb, > struct ufshpb_subregion *srgn) > { > diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h > index e40b016971ac..2c43a03b66b6 100644 > --- a/drivers/scsi/ufs/ufshpb.h > +++ b/drivers/scsi/ufs/ufshpb.h > @@ -198,6 +198,7 @@ struct ufs_hba; > struct ufshcd_lrb; > > #ifndef CONFIG_SCSI_UFS_HPB > +static void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) > {} > static void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb > *lrbp) {} > static void ufshpb_resume(struct ufs_hba *hba) {} > static void ufshpb_suspend(struct ufs_hba *hba) {} > @@ -211,6 +212,7 @@ static bool ufshpb_is_allowed(struct ufs_hba *hba) > { return false; } > static void ufshpb_get_geo_info(struct ufs_hba *hba, u8 *geo_buf) {} > static void ufshpb_get_dev_info(struct ufs_hba *hba, u8 *desc_buf) {} > #else > +void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp); > void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp); > void ufshpb_resume(struct ufs_hba *hba); > void ufshpb_suspend(struct ufs_hba *hba);
On Fri, 2021-02-05 at 11:29 +0800, Can Guo wrote: > > + *rgn_idx = lpn >> hpb->entries_per_rgn_shift; > > + rgn_offset = lpn & hpb->entries_per_rgn_mask; > > + *srgn_idx = rgn_offset >> hpb->entries_per_srgn_shift; > > + *offset = rgn_offset & hpb->entries_per_srgn_mask; > > +} > > + > > +static void > > +ufshpb_set_hpb_read_to_upiu(struct ufshpb_lu *hpb, struct > > ufshcd_lrb > > *lrbp, > > + u32 lpn, u64 ppn, unsigned int > > transfer_len) > > +{ > > + unsigned char *cdb = lrbp->cmd->cmnd; > > + > > + cdb[0] = UFSHPB_READ; > > + > > + put_unaligned_be64(ppn, &cdb[6]); > > You are assuming the HPB entries read out by "HPB Read Buffer" cmd > are > in Little > Endian, which is why you are using put_unaligned_be64 here. However, > this assumption > is not right for all the other flash vendors - HPB entries read out > by > "HPB Read Buffer" > cmd may come in Big Endian, if so, their random read performance are > screwed. For this question, it is very hard to make a correct format since the Spec doesn't give a clear definition. Should we have a default format, if there is conflict, and then add quirk or add a vendor-specific table? Hi Avri Do you have a good idea? Bean
> > > + put_unaligned_be64(ppn, &cdb[6]); > > > > You are assuming the HPB entries read out by "HPB Read Buffer" cmd > > are > > in Little > > Endian, which is why you are using put_unaligned_be64 here. However, > > this assumption > > is not right for all the other flash vendors - HPB entries read out > > by > > "HPB Read Buffer" > > cmd may come in Big Endian, if so, their random read performance are > > screwed. > > For this question, it is very hard to make a correct format since the > Spec doesn't give a clear definition. Should we have a default format, > if there is conflict, and then add quirk or add a vendor-specific > table? > > Hi Avri > Do you have a good idea? I don't know. Better let Daejun answer this. This was working for me for both Galaxy S20 (Exynos) as well as Xiaomi Mi10 (8250). Thanks, Avri
On Fri, 2021-02-05 at 14:06 +0000, Avri Altman wrote: > > > > + put_unaligned_be64(ppn, &cdb[6]); > > > > > > You are assuming the HPB entries read out by "HPB Read Buffer" > > > cmd > > > are > > > in Little > > > Endian, which is why you are using put_unaligned_be64 here. > > > However, > > > this assumption > > > is not right for all the other flash vendors - HPB entries read > > > out > > > by > > > "HPB Read Buffer" > > > cmd may come in Big Endian, if so, their random read performance > > > are > > > screwed. > > > > For this question, it is very hard to make a correct format since > > the > > Spec doesn't give a clear definition. Should we have a default > > format, > > if there is conflict, and then add quirk or add a vendor-specific > > table? > > > > Hi Avri > > Do you have a good idea? > > I don't know. Better let Daejun answer this. > This was working for me for both Galaxy S20 (Exynos) as well as > Xiaomi Mi10 (8250). > Thanks, I tested Daejun's patchset before, it is also ok (I don't know which version patchset). maybe we can keep current implementation as default, then if there is conflict, and submit the quirk. Thanks, Bean > Thanks, > Avri
On 2021-02-05 23:08, Bean Huo wrote: > On Fri, 2021-02-05 at 14:06 +0000, Avri Altman wrote: >> > > > + put_unaligned_be64(ppn, &cdb[6]); >> > > >> > > You are assuming the HPB entries read out by "HPB Read Buffer" >> > > cmd >> > > are >> > > in Little >> > > Endian, which is why you are using put_unaligned_be64 here. >> > > However, >> > > this assumption >> > > is not right for all the other flash vendors - HPB entries read >> > > out >> > > by >> > > "HPB Read Buffer" >> > > cmd may come in Big Endian, if so, their random read performance >> > > are >> > > screwed. >> > >> > For this question, it is very hard to make a correct format since >> > the >> > Spec doesn't give a clear definition. Should we have a default >> > format, >> > if there is conflict, and then add quirk or add a vendor-specific >> > table? >> > >> > Hi Avri >> > Do you have a good idea? >> >> I don't know. Better let Daejun answer this. >> This was working for me for both Galaxy S20 (Exynos) as well as >> Xiaomi Mi10 (8250). >> > > Thanks, I tested Daejun's patchset before, it is also ok (I don't know > which version patchset). maybe we can keep current implementation as > default, then if there is conflict, and submit the quirk. > Yeah, you've tested it, are you sure that Micron's UFS devices are OK with this specific code line? Micron UFS FW team has confirmed that Micron's HPB entries read out by "HPB Buffer Read" cmd are in big-endian byte ordering. If Micron FW team is right, I am pretty sure that you would have seen random read performance regression on Micron UFS devices caused by invalid HPB entry format in HPB Read cmd UPIU (which leads to L2P cache miss in device side all the time) during your test. Can Guo. > Thanks, > Bean > >> Thanks, >> Avri
On Sun, 2021-02-07 at 15:36 +0800, Can Guo wrote: > > > > Thanks, I tested Daejun's patchset before, it is also ok (I don't > > know > > which version patchset). maybe we can keep current implementation > > as > > default, then if there is conflict, and submit the quirk. > > > > Yeah, you've tested it, are you sure that Micron's UFS devices are OK > with this specific code line? > > Micron UFS FW team has confirmed that Micron's HPB entries read out > by > "HPB Buffer Read" cmd are in big-endian byte ordering. Aha, I think you didn't check with right person :), ping me, let me tell you this confusing story. and see my HPB patch, I didn't the same with here: https://patchwork.kernel.org/project/linux-scsi/patch/20200504142032.16619-6-beanhuo@micron.com/ Bean
On Fri, 2021-02-05 at 11:29 +0800, Can Guo wrote: > > + return ppn_table[offset]; > > +} > > + > > +static void > > +ufshpb_get_pos_from_lpn(struct ufshpb_lu *hpb, unsigned long lpn, > > int > > *rgn_idx, > > + int *srgn_idx, int *offset) > > +{ > > + int rgn_offset; > > + > > + *rgn_idx = lpn >> hpb->entries_per_rgn_shift; > > + rgn_offset = lpn & hpb->entries_per_rgn_mask; > > + *srgn_idx = rgn_offset >> hpb->entries_per_srgn_shift; > > + *offset = rgn_offset & hpb->entries_per_srgn_mask; > > +} > > + > > +static void > > +ufshpb_set_hpb_read_to_upiu(struct ufshpb_lu *hpb, struct > > ufshcd_lrb > > *lrbp, > > + u32 lpn, u64 ppn, unsigned int > > transfer_len) > > +{ > > + unsigned char *cdb = lrbp->cmd->cmnd; > > + > > + cdb[0] = UFSHPB_READ; > > + > > + put_unaligned_be64(ppn, &cdb[6]); > > You are assuming the HPB entries read out by "HPB Read Buffer" cmd > are > in Little > Endian, which is why you are using put_unaligned_be64 here. > Actaully, here uses put_unaligned_be64 is no problem. SCSI command should be big-endian filled. I Think the problem is that geting ppn from HPB cache in ufshpb_get_ppn(). ... e0000001f: 12 34 56 78 90 fa de ef ... + +static u64 ufshpb_get_ppn(struct ufshpb_lu *hpb, + struct ufshpb_map_ctx *mctx, int pos, int *error) +{ + u64 *ppn_table; // It s a 64 bits pointer + struct page *page; + int index, offset; + + index = pos / (PAGE_SIZE / HPB_ENTRY_SIZE); + offset = pos % (PAGE_SIZE / HPB_ENTRY_SIZE); + + page = mctx->m_page[index]; + if (unlikely(!page)) { + *error = -ENOMEM; + dev_err(&hpb->sdev_ufs_lu->sdev_dev, + "error. cannot find page in mctx\n"); + return 0; + } + + ppn_table = page_address(page); + if (unlikely(!ppn_table)) { + *error = -ENOMEM; + dev_err(&hpb->sdev_ufs_lu->sdev_dev, + "error. cannot get ppn_table\n"); + return 0; + } + + return ppn_table[offset]; +} > this assumption > is not right for all the other flash vendors - HPB entries read out > by > "HPB Read Buffer" > cmd may come in Big Endian, if so, their random read performance are > screwed. > Actually, I have seen at least two flash vendors acting so. I had to > modify this line > to get the code work properly on my setups. > > Meanwhile, in your cover letter, you mentioned that the performance > data > is collected > on a UFS2.1 device. Please re-collect the data on a real UFS3.1 > device > and let me > know the part number. Otherwise, the data is not quite convincing to > us. > > Regards, > Can Guo.
On 2021-02-08 16:16, Bean Huo wrote: > On Fri, 2021-02-05 at 11:29 +0800, Can Guo wrote: >> > + return ppn_table[offset]; >> > +} >> > + >> > +static void >> > +ufshpb_get_pos_from_lpn(struct ufshpb_lu *hpb, unsigned long lpn, >> > int >> > *rgn_idx, >> > + int *srgn_idx, int *offset) >> > +{ >> > + int rgn_offset; >> > + >> > + *rgn_idx = lpn >> hpb->entries_per_rgn_shift; >> > + rgn_offset = lpn & hpb->entries_per_rgn_mask; >> > + *srgn_idx = rgn_offset >> hpb->entries_per_srgn_shift; >> > + *offset = rgn_offset & hpb->entries_per_srgn_mask; >> > +} >> > + >> > +static void >> > +ufshpb_set_hpb_read_to_upiu(struct ufshpb_lu *hpb, struct >> > ufshcd_lrb >> > *lrbp, >> > + u32 lpn, u64 ppn, unsigned int >> > transfer_len) >> > +{ >> > + unsigned char *cdb = lrbp->cmd->cmnd; >> > + >> > + cdb[0] = UFSHPB_READ; >> > + >> > + put_unaligned_be64(ppn, &cdb[6]); >> >> You are assuming the HPB entries read out by "HPB Read Buffer" cmd >> are >> in Little >> Endian, which is why you are using put_unaligned_be64 here. >> > > > Actaully, here uses put_unaligned_be64 is no problem. SCSI command > should be big-endian filled. I Think the problem is that geting ppn > from HPB cache in ufshpb_get_ppn(). > whatever... > ... > e0000001f: 12 34 56 78 90 fa de ef > ... > > + > +static u64 ufshpb_get_ppn(struct ufshpb_lu *hpb, > + struct ufshpb_map_ctx *mctx, int pos, int > *error) > +{ > + u64 *ppn_table; // It s a 64 bits pointer > + struct page *page; > + int index, offset; > + > + index = pos / (PAGE_SIZE / HPB_ENTRY_SIZE); > + offset = pos % (PAGE_SIZE / HPB_ENTRY_SIZE); > + > + page = mctx->m_page[index]; > + if (unlikely(!page)) { > + *error = -ENOMEM; > + dev_err(&hpb->sdev_ufs_lu->sdev_dev, > + "error. cannot find page in mctx\n"); > + return 0; > + } > + > + ppn_table = page_address(page); > + if (unlikely(!ppn_table)) { > + *error = -ENOMEM; > + dev_err(&hpb->sdev_ufs_lu->sdev_dev, > + "error. cannot get ppn_table\n"); > + return 0; > + } > + > + return ppn_table[offset]; > +} > > > > >> this assumption >> is not right for all the other flash vendors - HPB entries read out >> by >> "HPB Read Buffer" >> cmd may come in Big Endian, if so, their random read performance are >> screwed. >> Actually, I have seen at least two flash vendors acting so. I had to >> modify this line >> to get the code work properly on my setups. >> >> Meanwhile, in your cover letter, you mentioned that the performance >> data >> is collected >> on a UFS2.1 device. Please re-collect the data on a real UFS3.1 >> device >> and let me >> know the part number. Otherwise, the data is not quite convincing to >> us. >> >> Regards, >> Can Guo.
> > > > > > + put_unaligned_be64(ppn, &cdb[6]); > > > > > > You are assuming the HPB entries read out by "HPB Read Buffer" cmd > > > are > > > in Little > > > Endian, which is why you are using put_unaligned_be64 here. However, > > > this assumption > > > is not right for all the other flash vendors - HPB entries read out > > > by > > > "HPB Read Buffer" > > > cmd may come in Big Endian, if so, their random read performance are > > > screwed. > > > > For this question, it is very hard to make a correct format since the > > Spec doesn't give a clear definition. Should we have a default format, > > if there is conflict, and then add quirk or add a vendor-specific > > table? > > > > Hi Avri > > Do you have a good idea? > I don't know. Better let Daejun answer this. > This was working for me for both Galaxy S20 (Exynos) as well as Xiaomi Mi10 > (8250). As for the endianity issue - I don't think that any fix is needed in the hpb driver. It is readily seen that the ppn from get_ppn, and the one in the upiu cdb (upiu trace) are identical. Therefore, if an issue exist, it is IMHO a device issue. kworker/u16:10-315 [001] d..2 62.283264: ufshpb_get_ppn: Avri ppn 480d2f8244c21abd kworker/u16:10-315 [001] d..2 62.283336: ufshcd_upiu: v:1.10 send: T:62283314922, HDR:014000000000000000000000, CDB:8800002ddaac480d2f8244c21abd0100, D: Again, verified on both gs20 (exynos) and mi10 (8250). Thanks, Avri
On Tue, 2021-02-09 at 13:25 +0000, Avri Altman wrote: > > > > > > > > > + put_unaligned_be64(ppn, &cdb[6]); > > > > > > > > You are assuming the HPB entries read out by "HPB Read Buffer" > > > > cmd > > > > are > > > > in Little > > > > Endian, which is why you are using put_unaligned_be64 here. > > > > However, > > > > this assumption > > > > is not right for all the other flash vendors - HPB entries read > > > > out > > > > by > > > > "HPB Read Buffer" > > > > cmd may come in Big Endian, if so, their random read > > > > performance are > > > > screwed. > > > > > > For this question, it is very hard to make a correct format since > > > the > > > Spec doesn't give a clear definition. Should we have a default > > > format, > > > if there is conflict, and then add quirk or add a vendor-specific > > > table? > > > > > > Hi Avri > > > Do you have a good idea? > > > > I don't know. Better let Daejun answer this. > > This was working for me for both Galaxy S20 (Exynos) as well as > > Xiaomi Mi10 > > (8250). > > As for the endianity issue - > I don't think that any fix is needed in the hpb driver. > It is readily seen that the ppn from get_ppn, and the one in the upiu > cdb (upiu trace) are identical. > Therefore, if an issue exist, it is IMHO a device issue. > > kworker/u16:10-315 [001] d..2 62.283264: ufshpb_get_ppn: Avri > ppn 480d2f8244c21abd > kworker/u16:10-315 [001] d..2 62.283336: ufshcd_upiu: v:1.10 > send: T:62283314922, HDR:014000000000000000000000, > CDB:8800002ddaac480d2f8244c21abd0100, D: > > Again, verified on both gs20 (exynos) and mi10 (8250). > Thanks, > Avri Hi Avri, Your testing method is no problem, the current problem is in function ufshpb_get_ppn(). +static u64 ufshpb_get_ppn(struct ufshpb_lu *hpb, + struct ufshpb_map_ctx *mctx, int pos, int *error) +{ + u64 *ppn_table; + struct page *page; + int index, offset; + + index = pos / (PAGE_SIZE / HPB_ENTRY_SIZE); + offset = pos % (PAGE_SIZE / HPB_ENTRY_SIZE); + + page = mctx->m_page[index]; + if (unlikely(!page)) { + *error = -ENOMEM; + dev_err(&hpb->sdev_ufs_lu->sdev_dev, + "error. cannot find page in mctx\n"); + return 0; + } + + ppn_table = page_address(page); + if (unlikely(!ppn_table)) { + *error = -ENOMEM; + dev_err(&hpb->sdev_ufs_lu->sdev_dev, + "error. cannot get ppn_table\n"); + return 0; + } + + return ppn_table[offset]; +} Say, the UFS device outputs the L2P entry in big-endian, which means the most significant byte of an L2P entry will be output firstly, then the less significant byte..., let's take an example of one L2P entry: 0x 12 34 56 78 90 12 34 56 0x12 is the most significant byte, will be store in the lowest address in the L2P cache. eg, F0000008: 1234 5678 9012 3456 In the ARM based system, If we use "return ppn_table[offset]", the original L2P entry 0x1234 5678 9012 3456, will be converted to 0x5634 1290 7856 3412. then use put_unaligned_be64(), UFS receive unexpected L2P entry(L2P entry miss). If the UFS output L2P entry in the big-endian, this is a problem. For the UFS outputs L2P entry in little-endian, no problem, Because of the L2P entry in the memory: F0000008: 5634 1290 7856 3412 After return ppn_table[offset], L2P entry will be correct L2P entry: 0x1234567890123456. then use put_unaligned_be64(), UFS can receive expected L2P etnry(L2P entry hit). we need to figure out which way is the JEDEC recommended L2P entry output endianness. otherwise, two methods co-exist in HPB driver, there will confuse customer. If you have a look at the JEDEC HPB 2.0, seems the big-endian is correct way. This need you and Daejun to double check inside your company. thanks, Bean
On 2021-02-09 22:21, Bean Huo wrote: > On Tue, 2021-02-09 at 13:25 +0000, Avri Altman wrote: >> > >> > >> > > > > + put_unaligned_be64(ppn, &cdb[6]); >> > > > >> > > > You are assuming the HPB entries read out by "HPB Read Buffer" >> > > > cmd >> > > > are >> > > > in Little >> > > > Endian, which is why you are using put_unaligned_be64 here. >> > > > However, >> > > > this assumption >> > > > is not right for all the other flash vendors - HPB entries read >> > > > out >> > > > by >> > > > "HPB Read Buffer" >> > > > cmd may come in Big Endian, if so, their random read >> > > > performance are >> > > > screwed. >> > > >> > > For this question, it is very hard to make a correct format since >> > > the >> > > Spec doesn't give a clear definition. Should we have a default >> > > format, >> > > if there is conflict, and then add quirk or add a vendor-specific >> > > table? >> > > >> > > Hi Avri >> > > Do you have a good idea? >> > >> > I don't know. Better let Daejun answer this. >> > This was working for me for both Galaxy S20 (Exynos) as well as >> > Xiaomi Mi10 >> > (8250). >> >> As for the endianity issue - >> I don't think that any fix is needed in the hpb driver. >> It is readily seen that the ppn from get_ppn, and the one in the upiu >> cdb (upiu trace) are identical. >> Therefore, if an issue exist, it is IMHO a device issue. >> >> kworker/u16:10-315 [001] d..2 62.283264: ufshpb_get_ppn: Avri >> ppn 480d2f8244c21abd >> kworker/u16:10-315 [001] d..2 62.283336: ufshcd_upiu: v:1.10 >> send: T:62283314922, HDR:014000000000000000000000, >> CDB:8800002ddaac480d2f8244c21abd0100, D: >> >> Again, verified on both gs20 (exynos) and mi10 (8250). >> Thanks, >> Avri > > > Hi Avri, > Your testing method is no problem, the current problem is in function > ufshpb_get_ppn(). > > > +static u64 ufshpb_get_ppn(struct ufshpb_lu *hpb, > + struct ufshpb_map_ctx *mctx, int pos, int > *error) > +{ > + u64 *ppn_table; > + struct page *page; > + int index, offset; > + > + index = pos / (PAGE_SIZE / HPB_ENTRY_SIZE); > + offset = pos % (PAGE_SIZE / HPB_ENTRY_SIZE); > + > + page = mctx->m_page[index]; > + if (unlikely(!page)) { > + *error = -ENOMEM; > + dev_err(&hpb->sdev_ufs_lu->sdev_dev, > + "error. cannot find page in mctx\n"); > + return 0; > + } > + > + ppn_table = page_address(page); > + if (unlikely(!ppn_table)) { > + *error = -ENOMEM; > + dev_err(&hpb->sdev_ufs_lu->sdev_dev, > + "error. cannot get ppn_table\n"); > + return 0; > + } > + > + return ppn_table[offset]; > +} > > > Say, the UFS device outputs the L2P entry in big-endian, which means > the most significant byte of an L2P entry will be output firstly, then > the less significant byte..., let's take an example of one L2P entry: > > 0x 12 34 56 78 90 12 34 56 > > 0x12 is the most significant byte, will be store in the lowest address > in the L2P cache. > > eg, > > F0000008: 1234 5678 9012 3456 > > In the ARM based system, If we use "return ppn_table[offset]", > the original L2P entry 0x1234 5678 9012 3456, will be converted to > 0x5634 1290 7856 3412. then use put_unaligned_be64(), UFS receive > unexpected L2P entry(L2P entry miss). > > If the UFS output L2P entry in the big-endian, this is a problem. > > > For the UFS outputs L2P entry in little-endian, no problem, > > Because of the L2P entry in the memory: > > F0000008: 5634 1290 7856 3412 > > After return ppn_table[offset], L2P entry will be correct L2P entry: > > 0x1234567890123456. then use put_unaligned_be64(), UFS can receive > expected L2P etnry(L2P entry hit). > > > we need to figure out which way is the JEDEC recommended L2P entry > output endianness. otherwise, two methods co-exist in HPB driver, there > will confuse customer. > If you have a look at the JEDEC HPB 2.0, seems the big-endian is > correct way. This need you and Daejun to double check inside your > company. > Bean is right, finally you know what I was saying... We need to fix it before move on - all the UFS3.1 HPB parts which I tested over the last few weeks are screwed due to this... I don't care where/how you want to get it fixed in next version. In my case, which may not be a valid fix, I simply hack the code as below and it works for me. - put_unaligned_be64(ppn, &cdb[6]); + memcpy(&cdb[6], &ppn, sizeof(u64)); Thanks, Can Guo. > thanks, > Bean
> +static bool ufshpb_test_ppn_dirty(struct ufshpb_lu *hpb, int rgn_idx, > + int srgn_idx, int srgn_offset, int cnt) > +{ > + struct ufshpb_region *rgn; > + struct ufshpb_subregion *srgn; > + int bitmap_len = hpb->entries_per_srgn; > + int bit_len; > + > +next_srgn: > + rgn = hpb->rgn_tbl + rgn_idx; > + srgn = rgn->srgn_tbl + srgn_idx; > + > + if (!ufshpb_is_valid_srgn(rgn, srgn)) > + return true; The subregion is changing its state to issued, only in ufshpb_issue_map_req. Although you already know that those physical addresses are no longer valid in ufshpb_update_active_info. Can we mark that this subregion is no longer valid earlier, so not to keep sending faulty HPB-READ to that subregion? Thanks, Avri
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 52e48de8d27c..37cb343e9ec1 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -2653,6 +2653,8 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) lrbp->req_abort_skip = false; + ufshpb_prep(hba, lrbp); + ufshcd_comp_scsi_upiu(hba, lrbp); err = ufshcd_map_sg(hba, lrbp); diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c index 48edfdd0f606..73e7b3ed04a4 100644 --- a/drivers/scsi/ufs/ufshpb.c +++ b/drivers/scsi/ufs/ufshpb.c @@ -31,6 +31,29 @@ bool ufshpb_is_allowed(struct ufs_hba *hba) return !(hba->ufshpb_dev.hpb_disabled); } +static int ufshpb_is_valid_srgn(struct ufshpb_region *rgn, + struct ufshpb_subregion *srgn) +{ + return rgn->rgn_state != HPB_RGN_INACTIVE && + srgn->srgn_state == HPB_SRGN_VALID; +} + +static bool ufshpb_is_read_cmd(struct scsi_cmnd *cmd) +{ + return req_op(cmd->request) == REQ_OP_READ; +} + +static bool ufshpb_is_write_or_discard_cmd(struct scsi_cmnd *cmd) +{ + return op_is_write(req_op(cmd->request)) || + op_is_discard(req_op(cmd->request)); +} + +static bool ufshpb_is_support_chunk(int transfer_len) +{ + return transfer_len <= HPB_MULTI_CHUNK_HIGH; +} + static bool ufshpb_is_general_lun(int lun) { return lun < UFS_UPIU_MAX_UNIT_NUM_ID; @@ -98,6 +121,217 @@ static void ufshpb_set_state(struct ufshpb_lu *hpb, int state) atomic_set(&hpb->hpb_state, state); } +static void ufshpb_set_ppn_dirty(struct ufshpb_lu *hpb, int rgn_idx, + int srgn_idx, int srgn_offset, int cnt) +{ + struct ufshpb_region *rgn; + struct ufshpb_subregion *srgn; + int set_bit_len; + int bitmap_len = hpb->entries_per_srgn; + +next_srgn: + rgn = hpb->rgn_tbl + rgn_idx; + srgn = rgn->srgn_tbl + srgn_idx; + + if ((srgn_offset + cnt) > bitmap_len) + set_bit_len = bitmap_len - srgn_offset; + else + set_bit_len = cnt; + + if (rgn->rgn_state != HPB_RGN_INACTIVE && + srgn->srgn_state == HPB_SRGN_VALID) + bitmap_set(srgn->mctx->ppn_dirty, srgn_offset, set_bit_len); + + srgn_offset = 0; + if (++srgn_idx == hpb->srgns_per_rgn) { + srgn_idx = 0; + rgn_idx++; + } + + cnt -= set_bit_len; + if (cnt > 0) + goto next_srgn; + + WARN_ON(cnt < 0); +} + +static bool ufshpb_test_ppn_dirty(struct ufshpb_lu *hpb, int rgn_idx, + int srgn_idx, int srgn_offset, int cnt) +{ + struct ufshpb_region *rgn; + struct ufshpb_subregion *srgn; + int bitmap_len = hpb->entries_per_srgn; + int bit_len; + +next_srgn: + rgn = hpb->rgn_tbl + rgn_idx; + srgn = rgn->srgn_tbl + srgn_idx; + + if (!ufshpb_is_valid_srgn(rgn, srgn)) + return true; + + /* + * If the region state is active, mctx must be allocated. + * In this case, check whether the region is evicted or + * mctx allcation fail. + */ + WARN_ON(!srgn->mctx); + + if ((srgn_offset + cnt) > bitmap_len) + bit_len = bitmap_len - srgn_offset; + else + bit_len = cnt; + + if (find_next_bit(srgn->mctx->ppn_dirty, + bit_len, srgn_offset) >= srgn_offset) + return true; + + srgn_offset = 0; + if (++srgn_idx == hpb->srgns_per_rgn) { + srgn_idx = 0; + rgn_idx++; + } + + cnt -= bit_len; + if (cnt > 0) + goto next_srgn; + + return false; +} + +static u64 ufshpb_get_ppn(struct ufshpb_lu *hpb, + struct ufshpb_map_ctx *mctx, int pos, int *error) +{ + u64 *ppn_table; + struct page *page; + int index, offset; + + index = pos / (PAGE_SIZE / HPB_ENTRY_SIZE); + offset = pos % (PAGE_SIZE / HPB_ENTRY_SIZE); + + page = mctx->m_page[index]; + if (unlikely(!page)) { + *error = -ENOMEM; + dev_err(&hpb->sdev_ufs_lu->sdev_dev, + "error. cannot find page in mctx\n"); + return 0; + } + + ppn_table = page_address(page); + if (unlikely(!ppn_table)) { + *error = -ENOMEM; + dev_err(&hpb->sdev_ufs_lu->sdev_dev, + "error. cannot get ppn_table\n"); + return 0; + } + + return ppn_table[offset]; +} + +static void +ufshpb_get_pos_from_lpn(struct ufshpb_lu *hpb, unsigned long lpn, int *rgn_idx, + int *srgn_idx, int *offset) +{ + int rgn_offset; + + *rgn_idx = lpn >> hpb->entries_per_rgn_shift; + rgn_offset = lpn & hpb->entries_per_rgn_mask; + *srgn_idx = rgn_offset >> hpb->entries_per_srgn_shift; + *offset = rgn_offset & hpb->entries_per_srgn_mask; +} + +static void +ufshpb_set_hpb_read_to_upiu(struct ufshpb_lu *hpb, struct ufshcd_lrb *lrbp, + u32 lpn, u64 ppn, unsigned int transfer_len) +{ + unsigned char *cdb = lrbp->cmd->cmnd; + + cdb[0] = UFSHPB_READ; + + put_unaligned_be64(ppn, &cdb[6]); + cdb[14] = transfer_len; + + lrbp->cmd->cmd_len = UFS_CDB_SIZE; +} + +/* + * This function will set up HPB read command using host-side L2P map data. + * In HPB v1.0, maximum size of HPB read command is 4KB. + */ +void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) +{ + struct ufshpb_lu *hpb; + struct ufshpb_region *rgn; + struct ufshpb_subregion *srgn; + struct scsi_cmnd *cmd = lrbp->cmd; + u32 lpn; + u64 ppn; + unsigned long flags; + int transfer_len, rgn_idx, srgn_idx, srgn_offset; + int err = 0; + + hpb = ufshpb_get_hpb_data(cmd->device); + if (!hpb) + return; + + if (ufshpb_get_state(hpb) != HPB_PRESENT) { + dev_notice(&hpb->sdev_ufs_lu->sdev_dev, + "%s: ufshpb state is not PRESENT", __func__); + return; + } + + if (!ufshpb_is_write_or_discard_cmd(cmd) && + !ufshpb_is_read_cmd(cmd)) + return; + + transfer_len = sectors_to_logical(cmd->device, blk_rq_sectors(cmd->request)); + if (unlikely(!transfer_len)) + return; + + lpn = sectors_to_logical(cmd->device, blk_rq_pos(cmd->request)); + ufshpb_get_pos_from_lpn(hpb, lpn, &rgn_idx, &srgn_idx, &srgn_offset); + rgn = hpb->rgn_tbl + rgn_idx; + srgn = rgn->srgn_tbl + srgn_idx; + + /* If command type is WRITE or DISCARD, set bitmap as drity */ + if (ufshpb_is_write_or_discard_cmd(cmd)) { + spin_lock_irqsave(&hpb->rgn_state_lock, flags); + ufshpb_set_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset, + transfer_len); + spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); + return; + } + + if (!ufshpb_is_support_chunk(transfer_len)) + return; + + spin_lock_irqsave(&hpb->rgn_state_lock, flags); + if (ufshpb_test_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset, + transfer_len)) { + hpb->stats.miss_cnt++; + spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); + return; + } + + ppn = ufshpb_get_ppn(hpb, srgn->mctx, srgn_offset, &err); + spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); + if (unlikely(err)) { + /* + * In this case, the region state is active, + * but the ppn table is not allocated. + * Make sure that ppn table must be allocated on + * active state. + */ + WARN_ON(true); + dev_err(hba->dev, "ufshpb_get_ppn failed. err %d\n", err); + return; + } + + ufshpb_set_hpb_read_to_upiu(hpb, lrbp, lpn, ppn, transfer_len); + + hpb->stats.hit_cnt++; +} + static struct ufshpb_req *ufshpb_get_map_req(struct ufshpb_lu *hpb, struct ufshpb_subregion *srgn) { diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h index e40b016971ac..2c43a03b66b6 100644 --- a/drivers/scsi/ufs/ufshpb.h +++ b/drivers/scsi/ufs/ufshpb.h @@ -198,6 +198,7 @@ struct ufs_hba; struct ufshcd_lrb; #ifndef CONFIG_SCSI_UFS_HPB +static void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) {} static void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) {} static void ufshpb_resume(struct ufs_hba *hba) {} static void ufshpb_suspend(struct ufs_hba *hba) {} @@ -211,6 +212,7 @@ static bool ufshpb_is_allowed(struct ufs_hba *hba) { return false; } static void ufshpb_get_geo_info(struct ufs_hba *hba, u8 *geo_buf) {} static void ufshpb_get_dev_info(struct ufs_hba *hba, u8 *desc_buf) {} #else +void ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp); void ufshpb_rsp_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp); void ufshpb_resume(struct ufs_hba *hba); void ufshpb_suspend(struct ufs_hba *hba);