diff mbox

[1/2] mmc: mediatek: Use data tune for CMD line tune

Message ID 1484215490-7494-2-git-send-email-yong.mao@mediatek.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yong Mao Jan. 12, 2017, 10:04 a.m. UTC
From: yong mao <yong.mao@mediatek.com>

CMD response CRC error may cause cannot boot up
Change to use data tune for CMD line
Separate cmd internal delay for HS200/HS400 mode

Signed-off-by: Yong Mao <yong.mao@mediatek.com>
Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
---
 arch/arm64/boot/dts/mediatek/mt8173-evb.dts |    3 +
 drivers/mmc/host/mtk-sd.c                   |  169 +++++++++++++++++++++++----
 2 files changed, 149 insertions(+), 23 deletions(-)

Comments

Ulf Hansson Jan. 12, 2017, 10:39 a.m. UTC | #1
On 12 January 2017 at 11:04, Yong Mao <yong.mao@mediatek.com> wrote:
> From: yong mao <yong.mao@mediatek.com>
>
> CMD response CRC error may cause cannot boot up
> Change to use data tune for CMD line
> Separate cmd internal delay for HS200/HS400 mode

Please try to work a little bit on improving the change log. Moreover
as this is a fix for a regression (it seems like so?), please try to
make that clear.

>
> Signed-off-by: Yong Mao <yong.mao@mediatek.com>
> Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
> ---
>  arch/arm64/boot/dts/mediatek/mt8173-evb.dts |    3 +

Changes to the DTS files should be a separate change. Please split it
into its own patch.

>  drivers/mmc/host/mtk-sd.c                   |  169 +++++++++++++++++++++++----
>  2 files changed, 149 insertions(+), 23 deletions(-)
>
> diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
> index 0ecaad4..29c3100 100644
> --- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
> +++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
> @@ -134,6 +134,9 @@
>         bus-width = <8>;
>         max-frequency = <50000000>;
>         cap-mmc-highspeed;
> +       hs200-cmd-int-delay = <26>;
> +       hs400-cmd-int-delay = <14>;
> +       cmd-resp-sel = <0>; /* 0: rising, 1: falling */
>         vmmc-supply = <&mt6397_vemc_3v3_reg>;
>         vqmmc-supply = <&mt6397_vio18_reg>;
>         non-removable;
> diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
> index 80ba034..93eb395 100644
> --- a/drivers/mmc/host/mtk-sd.c
> +++ b/drivers/mmc/host/mtk-sd.c
> @@ -75,6 +75,7 @@
>  #define MSDC_PATCH_BIT1  0xb4
>  #define MSDC_PAD_TUNE    0xec
>  #define PAD_DS_TUNE      0x188
> +#define PAD_CMD_TUNE     0x18c
>  #define EMMC50_CFG0      0x208
>
>  /*--------------------------------------------------------------------------*/
> @@ -210,12 +211,17 @@
>  #define MSDC_PATCH_BIT_SPCPUSH    (0x1 << 29)  /* RW */
>  #define MSDC_PATCH_BIT_DECRCTMO   (0x1 << 30)  /* RW */
>
> -#define MSDC_PAD_TUNE_DATRRDLY   (0x1f <<  8)  /* RW */
> -#define MSDC_PAD_TUNE_CMDRDLY    (0x1f << 16)  /* RW */
> +#define MSDC_PAD_TUNE_DATWRDLY    (0x1f <<  0)  /* RW */
> +#define MSDC_PAD_TUNE_DATRRDLY    (0x1f <<  8) /* RW */
> +#define MSDC_PAD_TUNE_CMDRDLY     (0x1f << 16)  /* RW */
> +#define MSDC_PAD_TUNE_CMDRRDLY    (0x1f << 22)  /* RW */

Is there a white space change somewhere here? I don't see any changes
to MSDC_PAD_TUNE_DATRRDLY and MSDC_PAD_TUNE_CMDRDLY.

> +#define MSDC_PAD_TUNE_CLKTDLY     (0x1f << 27)  /* RW */
>
> -#define PAD_DS_TUNE_DLY1         (0x1f << 2)   /* RW */
> -#define PAD_DS_TUNE_DLY2         (0x1f << 7)   /* RW */
> -#define PAD_DS_TUNE_DLY3         (0x1f << 12)  /* RW */
> +#define PAD_DS_TUNE_DLY1          (0x1f << 2)   /* RW */
> +#define PAD_DS_TUNE_DLY2          (0x1f << 7)   /* RW */
> +#define PAD_DS_TUNE_DLY3          (0x1f << 12)  /* RW */
> +

Ditto.

> +#define PAD_CMD_TUNE_RX_DLY3      (0x1f << 1)   /* RW */
>
>  #define EMMC50_CFG_PADCMD_LATCHCK (0x1 << 0)   /* RW */
>  #define EMMC50_CFG_CRCSTS_EDGE    (0x1 << 3)   /* RW */
> @@ -236,7 +242,9 @@
>  #define CMD_TIMEOUT         (HZ/10 * 5)        /* 100ms x5 */
>  #define DAT_TIMEOUT         (HZ    * 5)        /* 1000ms x5 */
>
> -#define PAD_DELAY_MAX  32 /* PAD delay cells */
> +#define PAD_DELAY_MAX            32 /* PAD delay cells */

Ditto.

> +#define ENOUGH_MARGIN_MIN        12 /* Enough Margin */
> +#define PREFER_START_POS_MAX     4  /* Prefer start position */
>  /*--------------------------------------------------------------------------*/
>  /* Descriptor Structure                                                     */
>  /*--------------------------------------------------------------------------*/
> @@ -284,12 +292,14 @@ struct msdc_save_para {
>         u32 patch_bit0;
>         u32 patch_bit1;
>         u32 pad_ds_tune;
> +       u32 pad_cmd_tune;
>         u32 emmc50_cfg0;
>  };
>
>  struct msdc_tune_para {
>         u32 iocon;
>         u32 pad_tune;
> +       u32 pad_cmd_tune;
>  };
>
>  struct msdc_delay_phase {
> @@ -331,6 +341,9 @@ struct msdc_host {
>         unsigned char timing;
>         bool vqmmc_enabled;
>         u32 hs400_ds_delay;
> +       u32 hs200_cmd_int_delay; /* cmd internal delay for HS200/SDR104 */
> +       u32 hs400_cmd_int_delay; /* cmd internal delay for HS400 */
> +       u32 hs200_cmd_resp_sel; /* cmd response sample selection */
>         bool hs400_mode;        /* current eMMC will run at hs400 mode */
>         struct msdc_save_para save_para; /* used when gate HCLK */
>         struct msdc_tune_para def_tune_para; /* default tune setting */
> @@ -596,12 +609,21 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
>          */
>         if (host->sclk <= 52000000) {
>                 writel(host->def_tune_para.iocon, host->base + MSDC_IOCON);
> -               writel(host->def_tune_para.pad_tune, host->base + MSDC_PAD_TUNE);
> +               writel(host->def_tune_para.pad_tune,
> +                      host->base + MSDC_PAD_TUNE);

Please don't change code just because you feel like doing it. This is
a completely unessarry change and it makes it harder for me to review.

Can you go thorugh the complete patch and make sure to undo all
similar changes, there are more of them.

>         } else {
> -               writel(host->saved_tune_para.iocon, host->base + MSDC_IOCON);
> -               writel(host->saved_tune_para.pad_tune, host->base + MSDC_PAD_TUNE);
> +               writel(host->saved_tune_para.iocon,
> +                      host->base + MSDC_IOCON);
> +               writel(host->saved_tune_para.pad_tune,
> +                      host->base + MSDC_PAD_TUNE);
> +               writel(host->saved_tune_para.pad_cmd_tune,
> +                      host->base + PAD_CMD_TUNE);
>         }
>
> +       if (timing == MMC_TIMING_MMC_HS400)
> +               sdr_set_field(host->base + PAD_CMD_TUNE,
> +                             MSDC_PAD_TUNE_CMDRRDLY,
> +                             host->hs400_cmd_int_delay);
>         dev_dbg(host->dev, "sclk: %d, timing: %d\n", host->sclk, timing);
>  }
>
> @@ -1302,7 +1324,8 @@ static struct msdc_delay_phase get_best_delay(struct msdc_host *host, u32 delay)
>                         len_final = len;
>                 }
>                 start += len ? len : 1;
> -               if (len >= 8 && start_final < 4)
> +               if (len >= ENOUGH_MARGIN_MIN &&
> +                   start_final < PREFER_START_POS_MAX)
>                         break;
>         }
>
> @@ -1325,48 +1348,128 @@ static int msdc_tune_response(struct mmc_host *mmc, u32 opcode)
>         struct msdc_host *host = mmc_priv(mmc);
>         u32 rise_delay = 0, fall_delay = 0;
>         struct msdc_delay_phase final_rise_delay, final_fall_delay = { 0,};
> +       struct msdc_delay_phase internal_delay_phase;
>         u8 final_delay, final_maxlen;
> +       u32 internal_delay = 0;
>         int cmd_err;
> -       int i;
> +       int i, j;
>
> +       if (mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
> +           mmc->ios.timing == MMC_TIMING_UHS_SDR104)
> +               sdr_set_field(host->base + MSDC_PAD_TUNE,
> +                             MSDC_PAD_TUNE_CMDRRDLY,
> +                             host->hs200_cmd_int_delay);
>         sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
>         for (i = 0 ; i < PAD_DELAY_MAX; i++) {
>                 sdr_set_field(host->base + MSDC_PAD_TUNE,
>                               MSDC_PAD_TUNE_CMDRDLY, i);
> -               mmc_send_tuning(mmc, opcode, &cmd_err);
> -               if (!cmd_err)
> -                       rise_delay |= (1 << i);
> +               for (j = 0; j < 3; j++) {

Any reason to why looping three times makes sense? Maybe add a comment?

> +                       mmc_send_tuning(mmc, opcode, &cmd_err);
> +                       if (!cmd_err) {
> +                               rise_delay |= (1 << i);
> +                       } else {
> +                               rise_delay &= ~(1 << i);
> +                               break;
> +                       }
> +               }
>         }
>         final_rise_delay = get_best_delay(host, rise_delay);
>         /* if rising edge has enough margin, then do not scan falling edge */
> -       if (final_rise_delay.maxlen >= 10 ||
> -           (final_rise_delay.start == 0 && final_rise_delay.maxlen >= 4))
> +       if (final_rise_delay.maxlen >= ENOUGH_MARGIN_MIN &&
> +           final_rise_delay.start < PREFER_START_POS_MAX)

This looks like clean-ups, as you are converting from magic numbers to
defines. Please make this kind of changes separately.

>                 goto skip_fall;
>
>         sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
>         for (i = 0; i < PAD_DELAY_MAX; i++) {
>                 sdr_set_field(host->base + MSDC_PAD_TUNE,
>                               MSDC_PAD_TUNE_CMDRDLY, i);
> -               mmc_send_tuning(mmc, opcode, &cmd_err);
> -               if (!cmd_err)
> -                       fall_delay |= (1 << i);
> +               for (j = 0; j < 3; j++) {

3?

> +                       mmc_send_tuning(mmc, opcode, &cmd_err);
> +                       if (!cmd_err) {
> +                               fall_delay |= (1 << i);
> +                       } else {
> +                               fall_delay &= ~(1 << i);
> +                               break;
> +                       };
> +               }
>         }
>         final_fall_delay = get_best_delay(host, fall_delay);
>
>  skip_fall:
>         final_maxlen = max(final_rise_delay.maxlen, final_fall_delay.maxlen);
> +       if (final_fall_delay.maxlen >= ENOUGH_MARGIN_MIN &&
> +           final_fall_delay.start < PREFER_START_POS_MAX)
> +               final_maxlen = final_fall_delay.maxlen;
>         if (final_maxlen == final_rise_delay.maxlen) {
>                 sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
> -               sdr_set_field(host->base + MSDC_PAD_TUNE, MSDC_PAD_TUNE_CMDRDLY,
> +               sdr_set_field(host->base + MSDC_PAD_TUNE,
> +                             MSDC_PAD_TUNE_CMDRDLY,
>                               final_rise_delay.final_phase);
>                 final_delay = final_rise_delay.final_phase;
>         } else {
>                 sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
> -               sdr_set_field(host->base + MSDC_PAD_TUNE, MSDC_PAD_TUNE_CMDRDLY,
> +               sdr_set_field(host->base + MSDC_PAD_TUNE,
> +                             MSDC_PAD_TUNE_CMDRDLY,
>                               final_fall_delay.final_phase);
>                 final_delay = final_fall_delay.final_phase;
>         }
> +       if (host->hs200_cmd_int_delay)
> +               goto skip_internal;
>
> +       for (i = 0; i < PAD_DELAY_MAX; i++) {
> +               sdr_set_field(host->base + MSDC_PAD_TUNE,
> +                             MSDC_PAD_TUNE_CMDRRDLY, i);
> +               mmc_send_tuning(mmc, opcode, &cmd_err);
> +               if (!cmd_err)
> +                       internal_delay |= (1 << i);
> +       }
> +       dev_info(host->dev, "Final internal delay: 0x%x\n", internal_delay);

I don't think dev_info() is what you want, right? Perhaps dev_dbg(),
anything at all.

> +       internal_delay_phase = get_best_delay(host, internal_delay);
> +       sdr_set_field(host->base + MSDC_PAD_TUNE, MSDC_PAD_TUNE_CMDRRDLY,
> +                     internal_delay_phase.final_phase);
> +skip_internal:
> +       dev_info(host->dev, "Final cmd pad delay: %x\n", final_delay);

I don't think dev_info() is what you want, right? Perhaps dev_dbg(),
anything at all.

> +       return final_delay == 0xff ? -EIO : 0;
> +}
> +
> +static int hs400_tune_response(struct mmc_host *mmc, u32 opcode)
> +{
> +       struct msdc_host *host = mmc_priv(mmc);
> +       u32 cmd_delay = 0;
> +       struct msdc_delay_phase final_cmd_delay = { 0,};
> +       u8 final_delay;
> +       int cmd_err;
> +       int i, j;
> +
> +       /* select EMMC50 PAD CMD tune */
> +       sdr_set_bits(host->base + PAD_CMD_TUNE, BIT(0));
> +
> +       if (mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
> +           mmc->ios.timing == MMC_TIMING_UHS_SDR104)
> +               sdr_set_field(host->base + MSDC_PAD_TUNE,
> +                             MSDC_PAD_TUNE_CMDRRDLY,
> +                             host->hs200_cmd_int_delay);
> +       sdr_set_field(host->base + MSDC_IOCON, MSDC_IOCON_RSPL,
> +                     host->hs200_cmd_resp_sel);
> +       for (i = 0 ; i < PAD_DELAY_MAX; i++) {
> +               sdr_set_field(host->base + PAD_CMD_TUNE,
> +                             PAD_CMD_TUNE_RX_DLY3, i);
> +               for (j = 0; j < 3; j++) {

3?

> +                       mmc_send_tuning(mmc, opcode, &cmd_err);
> +                       if (!cmd_err) {
> +                               cmd_delay |= (1 << i);
> +                       } else {
> +                               cmd_delay &= ~(1 << i);
> +                               break;
> +                       }
> +               }
> +       }
> +       final_cmd_delay = get_best_delay(host, cmd_delay);
> +       sdr_set_field(host->base + PAD_CMD_TUNE, PAD_CMD_TUNE_RX_DLY3,
> +                     final_cmd_delay.final_phase);
> +       final_delay = final_cmd_delay.final_phase;
> +
> +       dev_info(host->dev, "Final cmd pad delay: %x\n", final_delay);

dev_info() -> dev_dbg() or remove it.

>         return final_delay == 0xff ? -EIO : 0;
>  }
>
> @@ -1389,7 +1492,7 @@ static int msdc_tune_data(struct mmc_host *mmc, u32 opcode)
>         }
>         final_rise_delay = get_best_delay(host, rise_delay);
>         /* if rising edge has enough margin, then do not scan falling edge */
> -       if (final_rise_delay.maxlen >= 10 ||
> +       if (final_rise_delay.maxlen >= ENOUGH_MARGIN_MIN ||

Clean up. Move to separate change.

>             (final_rise_delay.start == 0 && final_rise_delay.maxlen >= 4))
>                 goto skip_fall;
>
> @@ -1422,6 +1525,7 @@ static int msdc_tune_data(struct mmc_host *mmc, u32 opcode)
>                 final_delay = final_fall_delay.final_phase;
>         }
>
> +       dev_info(host->dev, "Final data pad delay: %x\n", final_delay);

dev_info() -> dev_dbg() or remove it.

>         return final_delay == 0xff ? -EIO : 0;
>  }
>
> @@ -1430,10 +1534,13 @@ static int msdc_execute_tuning(struct mmc_host *mmc, u32 opcode)
>         struct msdc_host *host = mmc_priv(mmc);
>         int ret;
>
> +       if (host->hs400_mode)
> +               ret = hs400_tune_response(mmc, opcode);
> +       else
>         ret = msdc_tune_response(mmc, opcode);

Because of the new else clause, seems like above needs an intendation.

>         if (ret == -EIO) {
>                 dev_err(host->dev, "Tune response fail!\n");
> -               return ret;
> +               goto out;

Not needed, remove the label and this change.

>         }
>         if (host->hs400_mode == false) {
>                 ret = msdc_tune_data(mmc, opcode);
> @@ -1443,6 +1550,8 @@ static int msdc_execute_tuning(struct mmc_host *mmc, u32 opcode)
>
>         host->saved_tune_para.iocon = readl(host->base + MSDC_IOCON);
>         host->saved_tune_para.pad_tune = readl(host->base + MSDC_PAD_TUNE);
> +       host->saved_tune_para.pad_cmd_tune = readl(host->base + PAD_CMD_TUNE);
> +out:
>         return ret;
>  }
>
> @@ -1553,6 +1662,18 @@ static int msdc_drv_probe(struct platform_device *pdev)
>                 dev_dbg(&pdev->dev, "hs400-ds-delay: %x\n",
>                         host->hs400_ds_delay);
>
> +       if (!of_property_read_u32(pdev->dev.of_node, "hs200-cmd-int-delay",
> +                                 &host->hs200_cmd_int_delay))
> +               dev_dbg(&pdev->dev, "host->hs200-cmd-int-delay: %x\n",
> +                       host->hs200_cmd_int_delay);
> +       if (!of_property_read_u32(pdev->dev.of_node, "hs400-cmd-int-delay",
> +                                 &host->hs400_cmd_int_delay))
> +               dev_dbg(&pdev->dev, "host->hs400-cmd-int-delay: %x\n",
> +                       host->hs400_cmd_int_delay);
> +       if (!of_property_read_u32(pdev->dev.of_node, "cmd-resp-sel",
> +                                 &host->hs200_cmd_resp_sel))
> +               dev_dbg(&pdev->dev, "host->hs200_cmd-resp-sel: %x\n",
> +                       host->hs200_cmd_resp_sel);

I suggest you take the oppotunity to move the MTK DTS parsing into its
own function, include the existing parsing of the "hs400-ds-delay".
This improve the readablitity of the code.

>         host->dev = &pdev->dev;
>         host->mmc = mmc;
>         host->src_clk_freq = clk_get_rate(host->src_clk);
> @@ -1663,6 +1784,7 @@ static void msdc_save_reg(struct msdc_host *host)
>         host->save_para.patch_bit0 = readl(host->base + MSDC_PATCH_BIT);
>         host->save_para.patch_bit1 = readl(host->base + MSDC_PATCH_BIT1);
>         host->save_para.pad_ds_tune = readl(host->base + PAD_DS_TUNE);
> +       host->save_para.pad_cmd_tune = readl(host->base + PAD_CMD_TUNE);
>         host->save_para.emmc50_cfg0 = readl(host->base + EMMC50_CFG0);
>  }
>
> @@ -1675,6 +1797,7 @@ static void msdc_restore_reg(struct msdc_host *host)
>         writel(host->save_para.patch_bit0, host->base + MSDC_PATCH_BIT);
>         writel(host->save_para.patch_bit1, host->base + MSDC_PATCH_BIT1);
>         writel(host->save_para.pad_ds_tune, host->base + PAD_DS_TUNE);
> +       writel(host->save_para.pad_cmd_tune, host->base + PAD_CMD_TUNE);
>         writel(host->save_para.emmc50_cfg0, host->base + EMMC50_CFG0);
>  }
>
> --
> 1.7.9.5
>

Kind regards
Uffe
Yong Mao Jan. 13, 2017, 10:17 a.m. UTC | #2
On Thu, 2017-01-12 at 11:39 +0100, Ulf Hansson wrote:
> On 12 January 2017 at 11:04, Yong Mao <yong.mao@mediatek.com> wrote:
> > From: yong mao <yong.mao@mediatek.com>
> >
> > CMD response CRC error may cause cannot boot up
> > Change to use data tune for CMD line
> > Separate cmd internal delay for HS200/HS400 mode
> 
> Please try to work a little bit on improving the change log. Moreover
> as this is a fix for a regression (it seems like so?), please try to
> make that clear.
This change can fix CMD respose CRC issue in our platform.
I will try to make it clear in next version.

> 
> >
> > Signed-off-by: Yong Mao <yong.mao@mediatek.com>
> > Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
> > ---
> >  arch/arm64/boot/dts/mediatek/mt8173-evb.dts |    3 +
> 
> Changes to the DTS files should be a separate change. Please split it
> into its own patch.
> 
> >  drivers/mmc/host/mtk-sd.c                   |  169 +++++++++++++++++++++++----
> >  2 files changed, 149 insertions(+), 23 deletions(-)
> >
> > diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
> > index 0ecaad4..29c3100 100644
> > --- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
> > +++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
> > @@ -134,6 +134,9 @@
> >         bus-width = <8>;
> >         max-frequency = <50000000>;
> >         cap-mmc-highspeed;
> > +       hs200-cmd-int-delay = <26>;
> > +       hs400-cmd-int-delay = <14>;
> > +       cmd-resp-sel = <0>; /* 0: rising, 1: falling */
> >         vmmc-supply = <&mt6397_vemc_3v3_reg>;
> >         vqmmc-supply = <&mt6397_vio18_reg>;
> >         non-removable;
> > diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
> > index 80ba034..93eb395 100644
> > --- a/drivers/mmc/host/mtk-sd.c
> > +++ b/drivers/mmc/host/mtk-sd.c
> > @@ -75,6 +75,7 @@
> >  #define MSDC_PATCH_BIT1  0xb4
> >  #define MSDC_PAD_TUNE    0xec
> >  #define PAD_DS_TUNE      0x188
> > +#define PAD_CMD_TUNE     0x18c
> >  #define EMMC50_CFG0      0x208
> >
> >  /*--------------------------------------------------------------------------*/
> > @@ -210,12 +211,17 @@
> >  #define MSDC_PATCH_BIT_SPCPUSH    (0x1 << 29)  /* RW */
> >  #define MSDC_PATCH_BIT_DECRCTMO   (0x1 << 30)  /* RW */
> >
> > -#define MSDC_PAD_TUNE_DATRRDLY   (0x1f <<  8)  /* RW */
> > -#define MSDC_PAD_TUNE_CMDRDLY    (0x1f << 16)  /* RW */
> > +#define MSDC_PAD_TUNE_DATWRDLY    (0x1f <<  0)  /* RW */
> > +#define MSDC_PAD_TUNE_DATRRDLY    (0x1f <<  8) /* RW */
> > +#define MSDC_PAD_TUNE_CMDRDLY     (0x1f << 16)  /* RW */
> > +#define MSDC_PAD_TUNE_CMDRRDLY    (0x1f << 22)  /* RW */
> 
> Is there a white space change somewhere here? I don't see any changes
> to MSDC_PAD_TUNE_DATRRDLY and MSDC_PAD_TUNE_CMDRDLY.
> 
Sorry. I will fix in next version.

> > +#define MSDC_PAD_TUNE_CLKTDLY     (0x1f << 27)  /* RW */
> >
> > -#define PAD_DS_TUNE_DLY1         (0x1f << 2)   /* RW */
> > -#define PAD_DS_TUNE_DLY2         (0x1f << 7)   /* RW */
> > -#define PAD_DS_TUNE_DLY3         (0x1f << 12)  /* RW */
> > +#define PAD_DS_TUNE_DLY1          (0x1f << 2)   /* RW */
> > +#define PAD_DS_TUNE_DLY2          (0x1f << 7)   /* RW */
> > +#define PAD_DS_TUNE_DLY3          (0x1f << 12)  /* RW */
> > +
> 
> Ditto.
Ditto.

> 
> > +#define PAD_CMD_TUNE_RX_DLY3      (0x1f << 1)   /* RW */
> >
> >  #define EMMC50_CFG_PADCMD_LATCHCK (0x1 << 0)   /* RW */
> >  #define EMMC50_CFG_CRCSTS_EDGE    (0x1 << 3)   /* RW */
> > @@ -236,7 +242,9 @@
> >  #define CMD_TIMEOUT         (HZ/10 * 5)        /* 100ms x5 */
> >  #define DAT_TIMEOUT         (HZ    * 5)        /* 1000ms x5 */
> >
> > -#define PAD_DELAY_MAX  32 /* PAD delay cells */
> > +#define PAD_DELAY_MAX            32 /* PAD delay cells */
> 
> Ditto.
> 
Ditto.

> > +#define ENOUGH_MARGIN_MIN        12 /* Enough Margin */
> > +#define PREFER_START_POS_MAX     4  /* Prefer start position */
> >  /*--------------------------------------------------------------------------*/
> >  /* Descriptor Structure                                                     */
> >  /*--------------------------------------------------------------------------*/
> > @@ -284,12 +292,14 @@ struct msdc_save_para {
> >         u32 patch_bit0;
> >         u32 patch_bit1;
> >         u32 pad_ds_tune;
> > +       u32 pad_cmd_tune;
> >         u32 emmc50_cfg0;
> >  };
> >
> >  struct msdc_tune_para {
> >         u32 iocon;
> >         u32 pad_tune;
> > +       u32 pad_cmd_tune;
> >  };
> >
> >  struct msdc_delay_phase {
> > @@ -331,6 +341,9 @@ struct msdc_host {
> >         unsigned char timing;
> >         bool vqmmc_enabled;
> >         u32 hs400_ds_delay;
> > +       u32 hs200_cmd_int_delay; /* cmd internal delay for HS200/SDR104 */
> > +       u32 hs400_cmd_int_delay; /* cmd internal delay for HS400 */
> > +       u32 hs200_cmd_resp_sel; /* cmd response sample selection */
> >         bool hs400_mode;        /* current eMMC will run at hs400 mode */
> >         struct msdc_save_para save_para; /* used when gate HCLK */
> >         struct msdc_tune_para def_tune_para; /* default tune setting */
> > @@ -596,12 +609,21 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
> >          */
> >         if (host->sclk <= 52000000) {
> >                 writel(host->def_tune_para.iocon, host->base + MSDC_IOCON);
> > -               writel(host->def_tune_para.pad_tune, host->base + MSDC_PAD_TUNE);
> > +               writel(host->def_tune_para.pad_tune,
> > +                      host->base + MSDC_PAD_TUNE);
> 
> Please don't change code just because you feel like doing it. This is
> a completely unessarry change and it makes it harder for me to review.
> 
> Can you go thorugh the complete patch and make sure to undo all
> similar changes, there are more of them.
> 
Sorry. I will fix in next version.

> >         } else {
> > -               writel(host->saved_tune_para.iocon, host->base + MSDC_IOCON);
> > -               writel(host->saved_tune_para.pad_tune, host->base + MSDC_PAD_TUNE);
> > +               writel(host->saved_tune_para.iocon,
> > +                      host->base + MSDC_IOCON);
> > +               writel(host->saved_tune_para.pad_tune,
> > +                      host->base + MSDC_PAD_TUNE);
> > +               writel(host->saved_tune_para.pad_cmd_tune,
> > +                      host->base + PAD_CMD_TUNE);
> >         }
> >
> > +       if (timing == MMC_TIMING_MMC_HS400)
> > +               sdr_set_field(host->base + PAD_CMD_TUNE,
> > +                             MSDC_PAD_TUNE_CMDRRDLY,
> > +                             host->hs400_cmd_int_delay);
> >         dev_dbg(host->dev, "sclk: %d, timing: %d\n", host->sclk, timing);
> >  }
> >
> > @@ -1302,7 +1324,8 @@ static struct msdc_delay_phase get_best_delay(struct msdc_host *host, u32 delay)
> >                         len_final = len;
> >                 }
> >                 start += len ? len : 1;
> > -               if (len >= 8 && start_final < 4)
> > +               if (len >= ENOUGH_MARGIN_MIN &&
> > +                   start_final < PREFER_START_POS_MAX)
> >                         break;
> >         }
> >
> > @@ -1325,48 +1348,128 @@ static int msdc_tune_response(struct mmc_host *mmc, u32 opcode)
> >         struct msdc_host *host = mmc_priv(mmc);
> >         u32 rise_delay = 0, fall_delay = 0;
> >         struct msdc_delay_phase final_rise_delay, final_fall_delay = { 0,};
> > +       struct msdc_delay_phase internal_delay_phase;
> >         u8 final_delay, final_maxlen;
> > +       u32 internal_delay = 0;
> >         int cmd_err;
> > -       int i;
> > +       int i, j;
> >
> > +       if (mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
> > +           mmc->ios.timing == MMC_TIMING_UHS_SDR104)
> > +               sdr_set_field(host->base + MSDC_PAD_TUNE,
> > +                             MSDC_PAD_TUNE_CMDRRDLY,
> > +                             host->hs200_cmd_int_delay);
> >         sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
> >         for (i = 0 ; i < PAD_DELAY_MAX; i++) {
> >                 sdr_set_field(host->base + MSDC_PAD_TUNE,
> >                               MSDC_PAD_TUNE_CMDRDLY, i);
> > -               mmc_send_tuning(mmc, opcode, &cmd_err);
> > -               if (!cmd_err)
> > -                       rise_delay |= (1 << i);
> > +               for (j = 0; j < 3; j++) {
> 
> Any reason to why looping three times makes sense? Maybe add a comment?
> 
Using the same parameters, it may sometimes pass the test,
but sometimes it may fail. To make sure the parameters is 
more stable, we test 3 times for on set of parameters.

Anyway, I will add comment here in next version.

> > +                       mmc_send_tuning(mmc, opcode, &cmd_err);
> > +                       if (!cmd_err) {
> > +                               rise_delay |= (1 << i);
> > +                       } else {
> > +                               rise_delay &= ~(1 << i);
> > +                               break;
> > +                       }
> > +               }
> >         }
> >         final_rise_delay = get_best_delay(host, rise_delay);
> >         /* if rising edge has enough margin, then do not scan falling edge */
> > -       if (final_rise_delay.maxlen >= 10 ||
> > -           (final_rise_delay.start == 0 && final_rise_delay.maxlen >= 4))
> > +       if (final_rise_delay.maxlen >= ENOUGH_MARGIN_MIN &&
> > +           final_rise_delay.start < PREFER_START_POS_MAX)
> 
> This looks like clean-ups, as you are converting from magic numbers to
> defines. Please make this kind of changes separately.
I will drop this change in next version.

> 
> >                 goto skip_fall;
> >
> >         sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
> >         for (i = 0; i < PAD_DELAY_MAX; i++) {
> >                 sdr_set_field(host->base + MSDC_PAD_TUNE,
> >                               MSDC_PAD_TUNE_CMDRDLY, i);
> > -               mmc_send_tuning(mmc, opcode, &cmd_err);
> > -               if (!cmd_err)
> > -                       fall_delay |= (1 << i);
> > +               for (j = 0; j < 3; j++) {
> 
> 3?
> 
> > +                       mmc_send_tuning(mmc, opcode, &cmd_err);
> > +                       if (!cmd_err) {
> > +                               fall_delay |= (1 << i);
> > +                       } else {
> > +                               fall_delay &= ~(1 << i);
> > +                               break;
> > +                       };
> > +               }
> >         }
> >         final_fall_delay = get_best_delay(host, fall_delay);
> >
> >  skip_fall:
> >         final_maxlen = max(final_rise_delay.maxlen, final_fall_delay.maxlen);
> > +       if (final_fall_delay.maxlen >= ENOUGH_MARGIN_MIN &&
> > +           final_fall_delay.start < PREFER_START_POS_MAX)
> > +               final_maxlen = final_fall_delay.maxlen;
> >         if (final_maxlen == final_rise_delay.maxlen) {
> >                 sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
> > -               sdr_set_field(host->base + MSDC_PAD_TUNE, MSDC_PAD_TUNE_CMDRDLY,
> > +               sdr_set_field(host->base + MSDC_PAD_TUNE,
> > +                             MSDC_PAD_TUNE_CMDRDLY,
> >                               final_rise_delay.final_phase);
> >                 final_delay = final_rise_delay.final_phase;
> >         } else {
> >                 sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
> > -               sdr_set_field(host->base + MSDC_PAD_TUNE, MSDC_PAD_TUNE_CMDRDLY,
> > +               sdr_set_field(host->base + MSDC_PAD_TUNE,
> > +                             MSDC_PAD_TUNE_CMDRDLY,
> >                               final_fall_delay.final_phase);
> >                 final_delay = final_fall_delay.final_phase;
> >         }
> > +       if (host->hs200_cmd_int_delay)
> > +               goto skip_internal;
> >
> > +       for (i = 0; i < PAD_DELAY_MAX; i++) {
> > +               sdr_set_field(host->base + MSDC_PAD_TUNE,
> > +                             MSDC_PAD_TUNE_CMDRRDLY, i);
> > +               mmc_send_tuning(mmc, opcode, &cmd_err);
> > +               if (!cmd_err)
> > +                       internal_delay |= (1 << i);
> > +       }
> > +       dev_info(host->dev, "Final internal delay: 0x%x\n", internal_delay);
> 
> I don't think dev_info() is what you want, right? Perhaps dev_dbg(),
> anything at all.
> 
Yes. We should use dev_dbg() here. Will be fixed in next version.

> > +       internal_delay_phase = get_best_delay(host, internal_delay);
> > +       sdr_set_field(host->base + MSDC_PAD_TUNE, MSDC_PAD_TUNE_CMDRRDLY,
> > +                     internal_delay_phase.final_phase);
> > +skip_internal:
> > +       dev_info(host->dev, "Final cmd pad delay: %x\n", final_delay);
> 
> I don't think dev_info() is what you want, right? Perhaps dev_dbg(),
> anything at all.
> 
Yes. We should use dev_dbg() here. Will be fixed in next version.

> > +       return final_delay == 0xff ? -EIO : 0;
> > +}
> > +
> > +static int hs400_tune_response(struct mmc_host *mmc, u32 opcode)
> > +{
> > +       struct msdc_host *host = mmc_priv(mmc);
> > +       u32 cmd_delay = 0;
> > +       struct msdc_delay_phase final_cmd_delay = { 0,};
> > +       u8 final_delay;
> > +       int cmd_err;
> > +       int i, j;
> > +
> > +       /* select EMMC50 PAD CMD tune */
> > +       sdr_set_bits(host->base + PAD_CMD_TUNE, BIT(0));
> > +
> > +       if (mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
> > +           mmc->ios.timing == MMC_TIMING_UHS_SDR104)
> > +               sdr_set_field(host->base + MSDC_PAD_TUNE,
> > +                             MSDC_PAD_TUNE_CMDRRDLY,
> > +                             host->hs200_cmd_int_delay);
> > +       sdr_set_field(host->base + MSDC_IOCON, MSDC_IOCON_RSPL,
> > +                     host->hs200_cmd_resp_sel);
> > +       for (i = 0 ; i < PAD_DELAY_MAX; i++) {
> > +               sdr_set_field(host->base + PAD_CMD_TUNE,
> > +                             PAD_CMD_TUNE_RX_DLY3, i);
> > +               for (j = 0; j < 3; j++) {
> 
> 3?
> 
> > +                       mmc_send_tuning(mmc, opcode, &cmd_err);
> > +                       if (!cmd_err) {
> > +                               cmd_delay |= (1 << i);
> > +                       } else {
> > +                               cmd_delay &= ~(1 << i);
> > +                               break;
> > +                       }
> > +               }
> > +       }
> > +       final_cmd_delay = get_best_delay(host, cmd_delay);
> > +       sdr_set_field(host->base + PAD_CMD_TUNE, PAD_CMD_TUNE_RX_DLY3,
> > +                     final_cmd_delay.final_phase);
> > +       final_delay = final_cmd_delay.final_phase;
> > +
> > +       dev_info(host->dev, "Final cmd pad delay: %x\n", final_delay);
> 
> dev_info() -> dev_dbg() or remove it.
We will use dev_dbg() instead of dev_info(). Will be fixed in next
version.

> 
> >         return final_delay == 0xff ? -EIO : 0;
> >  }
> >
> > @@ -1389,7 +1492,7 @@ static int msdc_tune_data(struct mmc_host *mmc, u32 opcode)
> >         }
> >         final_rise_delay = get_best_delay(host, rise_delay);
> >         /* if rising edge has enough margin, then do not scan falling edge */
> > -       if (final_rise_delay.maxlen >= 10 ||
> > +       if (final_rise_delay.maxlen >= ENOUGH_MARGIN_MIN ||
> 
> Clean up. Move to separate change.
> 
Will drop it in current change.

> >             (final_rise_delay.start == 0 && final_rise_delay.maxlen >= 4))
> >                 goto skip_fall;
> >
> > @@ -1422,6 +1525,7 @@ static int msdc_tune_data(struct mmc_host *mmc, u32 opcode)
> >                 final_delay = final_fall_delay.final_phase;
> >         }
> >
> > +       dev_info(host->dev, "Final data pad delay: %x\n", final_delay);
> 
> dev_info() -> dev_dbg() or remove it.
> 
We will use dev_dbg() instead of dev_info(). Will be fixed in next
version.

> >         return final_delay == 0xff ? -EIO : 0;
> >  }
> >
> > @@ -1430,10 +1534,13 @@ static int msdc_execute_tuning(struct mmc_host *mmc, u32 opcode)
> >         struct msdc_host *host = mmc_priv(mmc);
> >         int ret;
> >
> > +       if (host->hs400_mode)
> > +               ret = hs400_tune_response(mmc, opcode);
> > +       else
> >         ret = msdc_tune_response(mmc, opcode);
> 
> Because of the new else clause, seems like above needs an intendation.
> 
Sorry. Will be fixed in next version.

> >         if (ret == -EIO) {
> >                 dev_err(host->dev, "Tune response fail!\n");
> > -               return ret;
> > +               goto out;
> 
> Not needed, remove the label and this change.
> 
Will drop it in this change. 

> >         }
> >         if (host->hs400_mode == false) {
> >                 ret = msdc_tune_data(mmc, opcode);
> > @@ -1443,6 +1550,8 @@ static int msdc_execute_tuning(struct mmc_host *mmc, u32 opcode)
> >
> >         host->saved_tune_para.iocon = readl(host->base + MSDC_IOCON);
> >         host->saved_tune_para.pad_tune = readl(host->base + MSDC_PAD_TUNE);
> > +       host->saved_tune_para.pad_cmd_tune = readl(host->base + PAD_CMD_TUNE);
> > +out:
> >         return ret;
> >  }
> >
> > @@ -1553,6 +1662,18 @@ static int msdc_drv_probe(struct platform_device *pdev)
> >                 dev_dbg(&pdev->dev, "hs400-ds-delay: %x\n",
> >                         host->hs400_ds_delay);
> >
> > +       if (!of_property_read_u32(pdev->dev.of_node, "hs200-cmd-int-delay",
> > +                                 &host->hs200_cmd_int_delay))
> > +               dev_dbg(&pdev->dev, "host->hs200-cmd-int-delay: %x\n",
> > +                       host->hs200_cmd_int_delay);
> > +       if (!of_property_read_u32(pdev->dev.of_node, "hs400-cmd-int-delay",
> > +                                 &host->hs400_cmd_int_delay))
> > +               dev_dbg(&pdev->dev, "host->hs400-cmd-int-delay: %x\n",
> > +                       host->hs400_cmd_int_delay);
> > +       if (!of_property_read_u32(pdev->dev.of_node, "cmd-resp-sel",
> > +                                 &host->hs200_cmd_resp_sel))
> > +               dev_dbg(&pdev->dev, "host->hs200_cmd-resp-sel: %x\n",
> > +                       host->hs200_cmd_resp_sel);
> 
> I suggest you take the oppotunity to move the MTK DTS parsing into its
> own function, include the existing parsing of the "hs400-ds-delay".
> This improve the readablitity of the code.
> 
Thanks. We will move all of MTK DTS parsing into msdc_of_property_parse
function.

> >         host->dev = &pdev->dev;
> >         host->mmc = mmc;
> >         host->src_clk_freq = clk_get_rate(host->src_clk);
> > @@ -1663,6 +1784,7 @@ static void msdc_save_reg(struct msdc_host *host)
> >         host->save_para.patch_bit0 = readl(host->base + MSDC_PATCH_BIT);
> >         host->save_para.patch_bit1 = readl(host->base + MSDC_PATCH_BIT1);
> >         host->save_para.pad_ds_tune = readl(host->base + PAD_DS_TUNE);
> > +       host->save_para.pad_cmd_tune = readl(host->base + PAD_CMD_TUNE);
> >         host->save_para.emmc50_cfg0 = readl(host->base + EMMC50_CFG0);
> >  }
> >
> > @@ -1675,6 +1797,7 @@ static void msdc_restore_reg(struct msdc_host *host)
> >         writel(host->save_para.patch_bit0, host->base + MSDC_PATCH_BIT);
> >         writel(host->save_para.patch_bit1, host->base + MSDC_PATCH_BIT1);
> >         writel(host->save_para.pad_ds_tune, host->base + PAD_DS_TUNE);
> > +       writel(host->save_para.pad_cmd_tune, host->base + PAD_CMD_TUNE);
> >         writel(host->save_para.emmc50_cfg0, host->base + EMMC50_CFG0);
> >  }
> >
> > --
> > 1.7.9.5
> >
> 
> Kind regards
> Uffe
Best Regards,
Yong Mao.
diff mbox

Patch

diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
index 0ecaad4..29c3100 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
@@ -134,6 +134,9 @@ 
 	bus-width = <8>;
 	max-frequency = <50000000>;
 	cap-mmc-highspeed;
+	hs200-cmd-int-delay = <26>;
+	hs400-cmd-int-delay = <14>;
+	cmd-resp-sel = <0>; /* 0: rising, 1: falling */
 	vmmc-supply = <&mt6397_vemc_3v3_reg>;
 	vqmmc-supply = <&mt6397_vio18_reg>;
 	non-removable;
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 80ba034..93eb395 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -75,6 +75,7 @@ 
 #define MSDC_PATCH_BIT1  0xb4
 #define MSDC_PAD_TUNE    0xec
 #define PAD_DS_TUNE      0x188
+#define PAD_CMD_TUNE     0x18c
 #define EMMC50_CFG0      0x208
 
 /*--------------------------------------------------------------------------*/
@@ -210,12 +211,17 @@ 
 #define MSDC_PATCH_BIT_SPCPUSH    (0x1 << 29)	/* RW */
 #define MSDC_PATCH_BIT_DECRCTMO   (0x1 << 30)	/* RW */
 
-#define MSDC_PAD_TUNE_DATRRDLY	  (0x1f <<  8)	/* RW */
-#define MSDC_PAD_TUNE_CMDRDLY	  (0x1f << 16)  /* RW */
+#define MSDC_PAD_TUNE_DATWRDLY    (0x1f <<  0)  /* RW */
+#define MSDC_PAD_TUNE_DATRRDLY    (0x1f <<  8)	/* RW */
+#define MSDC_PAD_TUNE_CMDRDLY     (0x1f << 16)  /* RW */
+#define MSDC_PAD_TUNE_CMDRRDLY    (0x1f << 22)  /* RW */
+#define MSDC_PAD_TUNE_CLKTDLY     (0x1f << 27)  /* RW */
 
-#define PAD_DS_TUNE_DLY1	  (0x1f << 2)   /* RW */
-#define PAD_DS_TUNE_DLY2	  (0x1f << 7)   /* RW */
-#define PAD_DS_TUNE_DLY3	  (0x1f << 12)  /* RW */
+#define PAD_DS_TUNE_DLY1          (0x1f << 2)   /* RW */
+#define PAD_DS_TUNE_DLY2          (0x1f << 7)   /* RW */
+#define PAD_DS_TUNE_DLY3          (0x1f << 12)  /* RW */
+
+#define PAD_CMD_TUNE_RX_DLY3      (0x1f << 1)   /* RW */
 
 #define EMMC50_CFG_PADCMD_LATCHCK (0x1 << 0)   /* RW */
 #define EMMC50_CFG_CRCSTS_EDGE    (0x1 << 3)   /* RW */
@@ -236,7 +242,9 @@ 
 #define CMD_TIMEOUT         (HZ/10 * 5)	/* 100ms x5 */
 #define DAT_TIMEOUT         (HZ    * 5)	/* 1000ms x5 */
 
-#define PAD_DELAY_MAX	32 /* PAD delay cells */
+#define PAD_DELAY_MAX            32 /* PAD delay cells */
+#define ENOUGH_MARGIN_MIN        12 /* Enough Margin */
+#define PREFER_START_POS_MAX     4  /* Prefer start position */
 /*--------------------------------------------------------------------------*/
 /* Descriptor Structure                                                     */
 /*--------------------------------------------------------------------------*/
@@ -284,12 +292,14 @@  struct msdc_save_para {
 	u32 patch_bit0;
 	u32 patch_bit1;
 	u32 pad_ds_tune;
+	u32 pad_cmd_tune;
 	u32 emmc50_cfg0;
 };
 
 struct msdc_tune_para {
 	u32 iocon;
 	u32 pad_tune;
+	u32 pad_cmd_tune;
 };
 
 struct msdc_delay_phase {
@@ -331,6 +341,9 @@  struct msdc_host {
 	unsigned char timing;
 	bool vqmmc_enabled;
 	u32 hs400_ds_delay;
+	u32 hs200_cmd_int_delay; /* cmd internal delay for HS200/SDR104 */
+	u32 hs400_cmd_int_delay; /* cmd internal delay for HS400 */
+	u32 hs200_cmd_resp_sel; /* cmd response sample selection */
 	bool hs400_mode;	/* current eMMC will run at hs400 mode */
 	struct msdc_save_para save_para; /* used when gate HCLK */
 	struct msdc_tune_para def_tune_para; /* default tune setting */
@@ -596,12 +609,21 @@  static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz)
 	 */
 	if (host->sclk <= 52000000) {
 		writel(host->def_tune_para.iocon, host->base + MSDC_IOCON);
-		writel(host->def_tune_para.pad_tune, host->base + MSDC_PAD_TUNE);
+		writel(host->def_tune_para.pad_tune,
+		       host->base + MSDC_PAD_TUNE);
 	} else {
-		writel(host->saved_tune_para.iocon, host->base + MSDC_IOCON);
-		writel(host->saved_tune_para.pad_tune, host->base + MSDC_PAD_TUNE);
+		writel(host->saved_tune_para.iocon,
+		       host->base + MSDC_IOCON);
+		writel(host->saved_tune_para.pad_tune,
+		       host->base + MSDC_PAD_TUNE);
+		writel(host->saved_tune_para.pad_cmd_tune,
+		       host->base + PAD_CMD_TUNE);
 	}
 
+	if (timing == MMC_TIMING_MMC_HS400)
+		sdr_set_field(host->base + PAD_CMD_TUNE,
+			      MSDC_PAD_TUNE_CMDRRDLY,
+			      host->hs400_cmd_int_delay);
 	dev_dbg(host->dev, "sclk: %d, timing: %d\n", host->sclk, timing);
 }
 
@@ -1302,7 +1324,8 @@  static struct msdc_delay_phase get_best_delay(struct msdc_host *host, u32 delay)
 			len_final = len;
 		}
 		start += len ? len : 1;
-		if (len >= 8 && start_final < 4)
+		if (len >= ENOUGH_MARGIN_MIN &&
+		    start_final < PREFER_START_POS_MAX)
 			break;
 	}
 
@@ -1325,48 +1348,128 @@  static int msdc_tune_response(struct mmc_host *mmc, u32 opcode)
 	struct msdc_host *host = mmc_priv(mmc);
 	u32 rise_delay = 0, fall_delay = 0;
 	struct msdc_delay_phase final_rise_delay, final_fall_delay = { 0,};
+	struct msdc_delay_phase internal_delay_phase;
 	u8 final_delay, final_maxlen;
+	u32 internal_delay = 0;
 	int cmd_err;
-	int i;
+	int i, j;
 
+	if (mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
+	    mmc->ios.timing == MMC_TIMING_UHS_SDR104)
+		sdr_set_field(host->base + MSDC_PAD_TUNE,
+			      MSDC_PAD_TUNE_CMDRRDLY,
+			      host->hs200_cmd_int_delay);
 	sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
 	for (i = 0 ; i < PAD_DELAY_MAX; i++) {
 		sdr_set_field(host->base + MSDC_PAD_TUNE,
 			      MSDC_PAD_TUNE_CMDRDLY, i);
-		mmc_send_tuning(mmc, opcode, &cmd_err);
-		if (!cmd_err)
-			rise_delay |= (1 << i);
+		for (j = 0; j < 3; j++) {
+			mmc_send_tuning(mmc, opcode, &cmd_err);
+			if (!cmd_err) {
+				rise_delay |= (1 << i);
+			} else {
+				rise_delay &= ~(1 << i);
+				break;
+			}
+		}
 	}
 	final_rise_delay = get_best_delay(host, rise_delay);
 	/* if rising edge has enough margin, then do not scan falling edge */
-	if (final_rise_delay.maxlen >= 10 ||
-	    (final_rise_delay.start == 0 && final_rise_delay.maxlen >= 4))
+	if (final_rise_delay.maxlen >= ENOUGH_MARGIN_MIN &&
+	    final_rise_delay.start < PREFER_START_POS_MAX)
 		goto skip_fall;
 
 	sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
 	for (i = 0; i < PAD_DELAY_MAX; i++) {
 		sdr_set_field(host->base + MSDC_PAD_TUNE,
 			      MSDC_PAD_TUNE_CMDRDLY, i);
-		mmc_send_tuning(mmc, opcode, &cmd_err);
-		if (!cmd_err)
-			fall_delay |= (1 << i);
+		for (j = 0; j < 3; j++) {
+			mmc_send_tuning(mmc, opcode, &cmd_err);
+			if (!cmd_err) {
+				fall_delay |= (1 << i);
+			} else {
+				fall_delay &= ~(1 << i);
+				break;
+			};
+		}
 	}
 	final_fall_delay = get_best_delay(host, fall_delay);
 
 skip_fall:
 	final_maxlen = max(final_rise_delay.maxlen, final_fall_delay.maxlen);
+	if (final_fall_delay.maxlen >= ENOUGH_MARGIN_MIN &&
+	    final_fall_delay.start < PREFER_START_POS_MAX)
+		final_maxlen = final_fall_delay.maxlen;
 	if (final_maxlen == final_rise_delay.maxlen) {
 		sdr_clr_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
-		sdr_set_field(host->base + MSDC_PAD_TUNE, MSDC_PAD_TUNE_CMDRDLY,
+		sdr_set_field(host->base + MSDC_PAD_TUNE,
+			      MSDC_PAD_TUNE_CMDRDLY,
 			      final_rise_delay.final_phase);
 		final_delay = final_rise_delay.final_phase;
 	} else {
 		sdr_set_bits(host->base + MSDC_IOCON, MSDC_IOCON_RSPL);
-		sdr_set_field(host->base + MSDC_PAD_TUNE, MSDC_PAD_TUNE_CMDRDLY,
+		sdr_set_field(host->base + MSDC_PAD_TUNE,
+			      MSDC_PAD_TUNE_CMDRDLY,
 			      final_fall_delay.final_phase);
 		final_delay = final_fall_delay.final_phase;
 	}
+	if (host->hs200_cmd_int_delay)
+		goto skip_internal;
 
+	for (i = 0; i < PAD_DELAY_MAX; i++) {
+		sdr_set_field(host->base + MSDC_PAD_TUNE,
+			      MSDC_PAD_TUNE_CMDRRDLY, i);
+		mmc_send_tuning(mmc, opcode, &cmd_err);
+		if (!cmd_err)
+			internal_delay |= (1 << i);
+	}
+	dev_info(host->dev, "Final internal delay: 0x%x\n", internal_delay);
+	internal_delay_phase = get_best_delay(host, internal_delay);
+	sdr_set_field(host->base + MSDC_PAD_TUNE, MSDC_PAD_TUNE_CMDRRDLY,
+		      internal_delay_phase.final_phase);
+skip_internal:
+	dev_info(host->dev, "Final cmd pad delay: %x\n", final_delay);
+	return final_delay == 0xff ? -EIO : 0;
+}
+
+static int hs400_tune_response(struct mmc_host *mmc, u32 opcode)
+{
+	struct msdc_host *host = mmc_priv(mmc);
+	u32 cmd_delay = 0;
+	struct msdc_delay_phase final_cmd_delay = { 0,};
+	u8 final_delay;
+	int cmd_err;
+	int i, j;
+
+	/* select EMMC50 PAD CMD tune */
+	sdr_set_bits(host->base + PAD_CMD_TUNE, BIT(0));
+
+	if (mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
+	    mmc->ios.timing == MMC_TIMING_UHS_SDR104)
+		sdr_set_field(host->base + MSDC_PAD_TUNE,
+			      MSDC_PAD_TUNE_CMDRRDLY,
+			      host->hs200_cmd_int_delay);
+	sdr_set_field(host->base + MSDC_IOCON, MSDC_IOCON_RSPL,
+		      host->hs200_cmd_resp_sel);
+	for (i = 0 ; i < PAD_DELAY_MAX; i++) {
+		sdr_set_field(host->base + PAD_CMD_TUNE,
+			      PAD_CMD_TUNE_RX_DLY3, i);
+		for (j = 0; j < 3; j++) {
+			mmc_send_tuning(mmc, opcode, &cmd_err);
+			if (!cmd_err) {
+				cmd_delay |= (1 << i);
+			} else {
+				cmd_delay &= ~(1 << i);
+				break;
+			}
+		}
+	}
+	final_cmd_delay = get_best_delay(host, cmd_delay);
+	sdr_set_field(host->base + PAD_CMD_TUNE, PAD_CMD_TUNE_RX_DLY3,
+		      final_cmd_delay.final_phase);
+	final_delay = final_cmd_delay.final_phase;
+
+	dev_info(host->dev, "Final cmd pad delay: %x\n", final_delay);
 	return final_delay == 0xff ? -EIO : 0;
 }
 
@@ -1389,7 +1492,7 @@  static int msdc_tune_data(struct mmc_host *mmc, u32 opcode)
 	}
 	final_rise_delay = get_best_delay(host, rise_delay);
 	/* if rising edge has enough margin, then do not scan falling edge */
-	if (final_rise_delay.maxlen >= 10 ||
+	if (final_rise_delay.maxlen >= ENOUGH_MARGIN_MIN ||
 	    (final_rise_delay.start == 0 && final_rise_delay.maxlen >= 4))
 		goto skip_fall;
 
@@ -1422,6 +1525,7 @@  static int msdc_tune_data(struct mmc_host *mmc, u32 opcode)
 		final_delay = final_fall_delay.final_phase;
 	}
 
+	dev_info(host->dev, "Final data pad delay: %x\n", final_delay);
 	return final_delay == 0xff ? -EIO : 0;
 }
 
@@ -1430,10 +1534,13 @@  static int msdc_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	struct msdc_host *host = mmc_priv(mmc);
 	int ret;
 
+	if (host->hs400_mode)
+		ret = hs400_tune_response(mmc, opcode);
+	else
 	ret = msdc_tune_response(mmc, opcode);
 	if (ret == -EIO) {
 		dev_err(host->dev, "Tune response fail!\n");
-		return ret;
+		goto out;
 	}
 	if (host->hs400_mode == false) {
 		ret = msdc_tune_data(mmc, opcode);
@@ -1443,6 +1550,8 @@  static int msdc_execute_tuning(struct mmc_host *mmc, u32 opcode)
 
 	host->saved_tune_para.iocon = readl(host->base + MSDC_IOCON);
 	host->saved_tune_para.pad_tune = readl(host->base + MSDC_PAD_TUNE);
+	host->saved_tune_para.pad_cmd_tune = readl(host->base + PAD_CMD_TUNE);
+out:
 	return ret;
 }
 
@@ -1553,6 +1662,18 @@  static int msdc_drv_probe(struct platform_device *pdev)
 		dev_dbg(&pdev->dev, "hs400-ds-delay: %x\n",
 			host->hs400_ds_delay);
 
+	if (!of_property_read_u32(pdev->dev.of_node, "hs200-cmd-int-delay",
+				  &host->hs200_cmd_int_delay))
+		dev_dbg(&pdev->dev, "host->hs200-cmd-int-delay: %x\n",
+			host->hs200_cmd_int_delay);
+	if (!of_property_read_u32(pdev->dev.of_node, "hs400-cmd-int-delay",
+				  &host->hs400_cmd_int_delay))
+		dev_dbg(&pdev->dev, "host->hs400-cmd-int-delay: %x\n",
+			host->hs400_cmd_int_delay);
+	if (!of_property_read_u32(pdev->dev.of_node, "cmd-resp-sel",
+				  &host->hs200_cmd_resp_sel))
+		dev_dbg(&pdev->dev, "host->hs200_cmd-resp-sel: %x\n",
+			host->hs200_cmd_resp_sel);
 	host->dev = &pdev->dev;
 	host->mmc = mmc;
 	host->src_clk_freq = clk_get_rate(host->src_clk);
@@ -1663,6 +1784,7 @@  static void msdc_save_reg(struct msdc_host *host)
 	host->save_para.patch_bit0 = readl(host->base + MSDC_PATCH_BIT);
 	host->save_para.patch_bit1 = readl(host->base + MSDC_PATCH_BIT1);
 	host->save_para.pad_ds_tune = readl(host->base + PAD_DS_TUNE);
+	host->save_para.pad_cmd_tune = readl(host->base + PAD_CMD_TUNE);
 	host->save_para.emmc50_cfg0 = readl(host->base + EMMC50_CFG0);
 }
 
@@ -1675,6 +1797,7 @@  static void msdc_restore_reg(struct msdc_host *host)
 	writel(host->save_para.patch_bit0, host->base + MSDC_PATCH_BIT);
 	writel(host->save_para.patch_bit1, host->base + MSDC_PATCH_BIT1);
 	writel(host->save_para.pad_ds_tune, host->base + PAD_DS_TUNE);
+	writel(host->save_para.pad_cmd_tune, host->base + PAD_CMD_TUNE);
 	writel(host->save_para.emmc50_cfg0, host->base + EMMC50_CFG0);
 }