diff mbox

mmc: mediatek: fix request blocked by cancel_delayed_work

Message ID 1460963609-16179-1-git-send-email-chaotian.jing@mediatek.com (mailing list archive)
State New, archived
Headers show

Commit Message

Chaotian Jing April 18, 2016, 7:13 a.m. UTC
there are 2 points will cause could not call mmc_request_done()
and eventually cause the caller thread blocked.

A. if card was busy, cancel_delayed_work() will return false because
the delay work has not been scheduled, in this case, need put
mod_delayed_work() in front of msdc_cmd_is_ready()

B. if a request really need more than 5s(Some Sandisk TF card), it will
use cancel_delayed_work() to cancel itself, and also return false, so use
in_interrupt() to avoid this case

Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
---
 drivers/mmc/host/mtk-sd.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

Comments

Ulf Hansson April 22, 2016, 12:24 p.m. UTC | #1
On 18 April 2016 at 09:13, Chaotian Jing <chaotian.jing@mediatek.com> wrote:
> there are 2 points will cause could not call mmc_request_done()
> and eventually cause the caller thread blocked.
>
> A. if card was busy, cancel_delayed_work() will return false because
> the delay work has not been scheduled, in this case, need put
> mod_delayed_work() in front of msdc_cmd_is_ready()
>
> B. if a request really need more than 5s(Some Sandisk TF card), it will
> use cancel_delayed_work() to cancel itself, and also return false, so use
> in_interrupt() to avoid this case
>
> Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
> ---
>  drivers/mmc/host/mtk-sd.c | 11 ++++++++---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
> index b17f30d..1511b1b 100644
> --- a/drivers/mmc/host/mtk-sd.c
> +++ b/drivers/mmc/host/mtk-sd.c
> @@ -724,7 +724,7 @@ static void msdc_request_done(struct msdc_host *host, struct mmc_request *mrq)
>         bool ret;
>
>         ret = cancel_delayed_work(&host->req_timeout);
> -       if (!ret) {
> +       if (!ret && in_interrupt()) {
>                 /* delay work already running */
>                 return;
>         }
> @@ -824,7 +824,12 @@ static inline bool msdc_cmd_is_ready(struct msdc_host *host,
>         }
>
>         if (mmc_resp_type(cmd) == MMC_RSP_R1B || cmd->data) {
> -               tmo = jiffies + msecs_to_jiffies(20);
> +               /*
> +                * 2550ms is from EXT_CSD[248], after switch to hs200,
> +                * using CMD13 to polling card status, it will get response
> +                * of 0x800, but EMMC still pull-low DAT0.
> +                */

Seems like you are solving a eMMC specific issue on your driver?

Perhaps we should try to use a card quirk instead?


> +               tmo = jiffies + msecs_to_jiffies(2550);
>                 /* R1B or with data, should check SDCBUSY */
>                 while ((readl(host->base + SDC_STS) & SDC_STS_SDCBUSY) &&
>                                 time_before(jiffies, tmo))
> @@ -847,6 +852,7 @@ static void msdc_start_command(struct msdc_host *host,
>         WARN_ON(host->cmd);
>         host->cmd = cmd;
>
> +       mod_delayed_work(system_wq, &host->req_timeout, DAT_TIMEOUT);
>         if (!msdc_cmd_is_ready(host, mrq, cmd))
>                 return;
>
> @@ -858,7 +864,6 @@ static void msdc_start_command(struct msdc_host *host,
>
>         cmd->error = 0;
>         rawcmd = msdc_cmd_prepare_raw_cmd(host, mrq, cmd);
> -       mod_delayed_work(system_wq, &host->req_timeout, DAT_TIMEOUT);
>
>         sdr_set_bits(host->base + MSDC_INTEN, cmd_ints_mask);
>         writel(cmd->arg, host->base + SDC_ARG);
> --
> 1.8.1.1.dirty
>

Kind regards
Uffe
Chaotian Jing April 23, 2016, 9:43 a.m. UTC | #2
Hi,
On Fri, 2016-04-22 at 14:24 +0200, Ulf Hansson wrote:
> On 18 April 2016 at 09:13, Chaotian Jing <chaotian.jing@mediatek.com> wrote:
> > there are 2 points will cause could not call mmc_request_done()
> > and eventually cause the caller thread blocked.
> >
> > A. if card was busy, cancel_delayed_work() will return false because
> > the delay work has not been scheduled, in this case, need put
> > mod_delayed_work() in front of msdc_cmd_is_ready()
> >
> > B. if a request really need more than 5s(Some Sandisk TF card), it will
> > use cancel_delayed_work() to cancel itself, and also return false, so use
> > in_interrupt() to avoid this case
> >
> > Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
> > ---
> >  drivers/mmc/host/mtk-sd.c | 11 ++++++++---
> >  1 file changed, 8 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
> > index b17f30d..1511b1b 100644
> > --- a/drivers/mmc/host/mtk-sd.c
> > +++ b/drivers/mmc/host/mtk-sd.c
> > @@ -724,7 +724,7 @@ static void msdc_request_done(struct msdc_host *host, struct mmc_request *mrq)
> >         bool ret;
> >
> >         ret = cancel_delayed_work(&host->req_timeout);
> > -       if (!ret) {
> > +       if (!ret && in_interrupt()) {
> >                 /* delay work already running */
> >                 return;
> >         }
> > @@ -824,7 +824,12 @@ static inline bool msdc_cmd_is_ready(struct msdc_host *host,
> >         }
> >
> >         if (mmc_resp_type(cmd) == MMC_RSP_R1B || cmd->data) {
> > -               tmo = jiffies + msecs_to_jiffies(20);
> > +               /*
> > +                * 2550ms is from EXT_CSD[248], after switch to hs200,
> > +                * using CMD13 to polling card status, it will get response
> > +                * of 0x800, but EMMC still pull-low DAT0.
> > +                */
> 
> Seems like you are solving a eMMC specific issue on your driver?
> 
> Perhaps we should try to use a card quirk instead?

Actually, this is a Bug of __mmc_switch(), Per JEDEC Spec, while switch
speed mode, should not use CMD13 to get card status, as it's response
cannot reflect that if card was busy now, for this CMD6 switch HS200
case, I tried some Samsung/Sandisk/KSI eMMC, issue CMD13 will always get
0x800, even eMMC has already changed to transfer state and DAT0 is high,
the response of CMD13 is also 0x800, and will never be 0x900.
So, in __mmc_switch(), it's a bug to use CMD13 to know that if card has
already changed to transfer state.
But, Our host do not support MMC_CAP_WAIT_WHILE_BUSY, that's why we hit
this issue.

May you give some advice for this ?
Thx!
> 
> 
> > +               tmo = jiffies + msecs_to_jiffies(2550);
> >                 /* R1B or with data, should check SDCBUSY */
> >                 while ((readl(host->base + SDC_STS) & SDC_STS_SDCBUSY) &&
> >                                 time_before(jiffies, tmo))
> > @@ -847,6 +852,7 @@ static void msdc_start_command(struct msdc_host *host,
> >         WARN_ON(host->cmd);
> >         host->cmd = cmd;
> >
> > +       mod_delayed_work(system_wq, &host->req_timeout, DAT_TIMEOUT);
> >         if (!msdc_cmd_is_ready(host, mrq, cmd))
> >                 return;
> >
> > @@ -858,7 +864,6 @@ static void msdc_start_command(struct msdc_host *host,
> >
> >         cmd->error = 0;
> >         rawcmd = msdc_cmd_prepare_raw_cmd(host, mrq, cmd);
> > -       mod_delayed_work(system_wq, &host->req_timeout, DAT_TIMEOUT);
> >
> >         sdr_set_bits(host->base + MSDC_INTEN, cmd_ints_mask);
> >         writel(cmd->arg, host->base + SDC_ARG);
> > --
> > 1.8.1.1.dirty
> >
> 
> Kind regards
> Uffe
Ulf Hansson April 27, 2016, 10:02 a.m. UTC | #3
On 23 April 2016 at 11:43, Chaotian Jing <chaotian.jing@mediatek.com> wrote:
> Hi,
> On Fri, 2016-04-22 at 14:24 +0200, Ulf Hansson wrote:
>> On 18 April 2016 at 09:13, Chaotian Jing <chaotian.jing@mediatek.com> wrote:
>> > there are 2 points will cause could not call mmc_request_done()
>> > and eventually cause the caller thread blocked.
>> >
>> > A. if card was busy, cancel_delayed_work() will return false because
>> > the delay work has not been scheduled, in this case, need put
>> > mod_delayed_work() in front of msdc_cmd_is_ready()
>> >
>> > B. if a request really need more than 5s(Some Sandisk TF card), it will
>> > use cancel_delayed_work() to cancel itself, and also return false, so use
>> > in_interrupt() to avoid this case
>> >
>> > Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
>> > ---
>> >  drivers/mmc/host/mtk-sd.c | 11 ++++++++---
>> >  1 file changed, 8 insertions(+), 3 deletions(-)
>> >
>> > diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
>> > index b17f30d..1511b1b 100644
>> > --- a/drivers/mmc/host/mtk-sd.c
>> > +++ b/drivers/mmc/host/mtk-sd.c
>> > @@ -724,7 +724,7 @@ static void msdc_request_done(struct msdc_host *host, struct mmc_request *mrq)
>> >         bool ret;
>> >
>> >         ret = cancel_delayed_work(&host->req_timeout);
>> > -       if (!ret) {
>> > +       if (!ret && in_interrupt()) {
>> >                 /* delay work already running */
>> >                 return;
>> >         }
>> > @@ -824,7 +824,12 @@ static inline bool msdc_cmd_is_ready(struct msdc_host *host,
>> >         }
>> >
>> >         if (mmc_resp_type(cmd) == MMC_RSP_R1B || cmd->data) {
>> > -               tmo = jiffies + msecs_to_jiffies(20);
>> > +               /*
>> > +                * 2550ms is from EXT_CSD[248], after switch to hs200,
>> > +                * using CMD13 to polling card status, it will get response
>> > +                * of 0x800, but EMMC still pull-low DAT0.
>> > +                */
>>
>> Seems like you are solving a eMMC specific issue on your driver?
>>
>> Perhaps we should try to use a card quirk instead?
>
> Actually, this is a Bug of __mmc_switch(), Per JEDEC Spec, while switch
> speed mode, should not use CMD13 to get card status, as it's response
> cannot reflect that if card was busy now, for this CMD6 switch HS200

There is a statement applicable to all HS modes, which says it's *not
recommended* but *if* used, CRC errors shall be ignored.

That's what we have been doing so far, but perhaps that isn't good
enough for HS200/400.

> case, I tried some Samsung/Sandisk/KSI eMMC, issue CMD13 will always get
> 0x800, even eMMC has already changed to transfer state and DAT0 is high,
> the response of CMD13 is also 0x800, and will never be 0x900.

What do you mean by never? I assume it would when you extend the timeout?

Does your host driver make sure to ignore CRC errors in this case?
Just to be sure, that isn't the problem.

> So, in __mmc_switch(), it's a bug to use CMD13 to know that if card has
> already changed to transfer state.

Whether it's a bug or not, it seems like we have eMMC cards that we
have issues to support because of the way we have interpreted the
spec. So let's try to fix them!

> But, Our host do not support MMC_CAP_WAIT_WHILE_BUSY, that's why we hit
> this issue.

Okay, I see.

Let's try to change the behaviour in __mmc_switch() to prevent it to
send CMD13 before the cards stops signal busy on DAT0, when switching
to HS200/HS400 mode.

What I have in mind is:

1.
When the host controller doesn't support MMC_CAP_WAIT_WHILE_BUSY, we
would then to wait for a fixed timeout, before we send CMD13. In this
case, do you know if the "generic_cmd6_time" is working for your eMMC
devices that you had issues with?

2.
In additional to the above solution, we can for those hosts that
supports the ->card_busy() ops, but not MMC_CAP_WAIT_WHILE_BUSY,
invoke the ->card_busy() in a polling manner. Of course the above
timeout should also be considered as we need to stop polling at some
point.

So I noticed the Mediatek mmc host driver supports the ->card_busy()
ops. So I think you can try 1) first, then extend it to 2).

Does it makes sense?

[...]

Kind regards
Uffe
Chaotian Jing April 27, 2016, 12:06 p.m. UTC | #4
Hi,
On Wed, 2016-04-27 at 12:02 +0200, Ulf Hansson wrote:
> On 23 April 2016 at 11:43, Chaotian Jing <chaotian.jing@mediatek.com> wrote:
> > Hi,
> > On Fri, 2016-04-22 at 14:24 +0200, Ulf Hansson wrote:
> >> On 18 April 2016 at 09:13, Chaotian Jing <chaotian.jing@mediatek.com> wrote:
> >> > there are 2 points will cause could not call mmc_request_done()
> >> > and eventually cause the caller thread blocked.
> >> >
> >> > A. if card was busy, cancel_delayed_work() will return false because
> >> > the delay work has not been scheduled, in this case, need put
> >> > mod_delayed_work() in front of msdc_cmd_is_ready()
> >> >
> >> > B. if a request really need more than 5s(Some Sandisk TF card), it will
> >> > use cancel_delayed_work() to cancel itself, and also return false, so use
> >> > in_interrupt() to avoid this case
> >> >
> >> > Signed-off-by: Chaotian Jing <chaotian.jing@mediatek.com>
> >> > ---
> >> >  drivers/mmc/host/mtk-sd.c | 11 ++++++++---
> >> >  1 file changed, 8 insertions(+), 3 deletions(-)
> >> >
> >> > diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
> >> > index b17f30d..1511b1b 100644
> >> > --- a/drivers/mmc/host/mtk-sd.c
> >> > +++ b/drivers/mmc/host/mtk-sd.c
> >> > @@ -724,7 +724,7 @@ static void msdc_request_done(struct msdc_host *host, struct mmc_request *mrq)
> >> >         bool ret;
> >> >
> >> >         ret = cancel_delayed_work(&host->req_timeout);
> >> > -       if (!ret) {
> >> > +       if (!ret && in_interrupt()) {
> >> >                 /* delay work already running */
> >> >                 return;
> >> >         }
> >> > @@ -824,7 +824,12 @@ static inline bool msdc_cmd_is_ready(struct msdc_host *host,
> >> >         }
> >> >
> >> >         if (mmc_resp_type(cmd) == MMC_RSP_R1B || cmd->data) {
> >> > -               tmo = jiffies + msecs_to_jiffies(20);
> >> > +               /*
> >> > +                * 2550ms is from EXT_CSD[248], after switch to hs200,
> >> > +                * using CMD13 to polling card status, it will get response
> >> > +                * of 0x800, but EMMC still pull-low DAT0.
> >> > +                */
> >>
> >> Seems like you are solving a eMMC specific issue on your driver?
> >>
> >> Perhaps we should try to use a card quirk instead?
> >
> > Actually, this is a Bug of __mmc_switch(), Per JEDEC Spec, while switch
> > speed mode, should not use CMD13 to get card status, as it's response
> > cannot reflect that if card was busy now, for this CMD6 switch HS200
> 
> There is a statement applicable to all HS modes, which says it's *not
> recommended* but *if* used, CRC errors shall be ignored.
> 
Yes, but if we ignore the CRC error check, how do we know current
response is correct or incorrect ? if card is still busy but we get the
response of 0x900 caused by latching error, then it still consider that
card has already changed to trans state.
So, it's better to never use CMD13 when change the speed mode.
> That's what we have been doing so far, but perhaps that isn't good
> enough for HS200/400.
> 
> > case, I tried some Samsung/Sandisk/KSI eMMC, issue CMD13 will always get
> > 0x800, even eMMC has already changed to transfer state and DAT0 is high,
> > the response of CMD13 is also 0x800, and will never be 0x900.
> 
> What do you mean by never? I assume it would when you extend the timeout?
> 
> Does your host driver make sure to ignore CRC errors in this case?
> Just to be sure, that isn't the problem.
> 
> > So, in __mmc_switch(), it's a bug to use CMD13 to know that if card has
> > already changed to transfer state.
> 
> Whether it's a bug or not, it seems like we have eMMC cards that we
> have issues to support because of the way we have interpreted the
> spec. So let's try to fix them!
> 
> > But, Our host do not support MMC_CAP_WAIT_WHILE_BUSY, that's why we hit
> > this issue.
> 
> Okay, I see.
> 
> Let's try to change the behaviour in __mmc_switch() to prevent it to
> send CMD13 before the cards stops signal busy on DAT0, when switching
> to HS200/HS400 mode.
> What I have in mind is:
> 
> 1.
> When the host controller doesn't support MMC_CAP_WAIT_WHILE_BUSY, we
> would then to wait for a fixed timeout, before we send CMD13. In this
> case, do you know if the "generic_cmd6_time" is working for your eMMC
> devices that you had issues with?
> 
In general, generic_cmd6_time is enough, unless the card has firmware
issue itself,
> 2.
> In additional to the above solution, we can for those hosts that
> supports the ->card_busy() ops, but not MMC_CAP_WAIT_WHILE_BUSY,
> invoke the ->card_busy() in a polling manner. Of course the above
> timeout should also be considered as we need to stop polling at some
> point.
> 
this is what I want to do it, use the ops->card_busy() to check DAT0
status. if a host do not support MMC_CAP_WAIT_WHILE_BUSY and there is
no ops->card_busy(), then the only way is wait a fixed
timeout(generic_cmd6_time).

> So I noticed the Mediatek mmc host driver supports the ->card_busy()
> ops. So I think you can try 1) first, then extend it to 2).
> 
> Does it makes sense?
> 
> [...]
> 
> Kind regards
> Uffe
Ulf Hansson April 27, 2016, 12:33 p.m. UTC | #5
[...]

>> > Actually, this is a Bug of __mmc_switch(), Per JEDEC Spec, while switch
>> > speed mode, should not use CMD13 to get card status, as it's response
>> > cannot reflect that if card was busy now, for this CMD6 switch HS200
>>
>> There is a statement applicable to all HS modes, which says it's *not
>> recommended* but *if* used, CRC errors shall be ignored.
>>
> Yes, but if we ignore the CRC error check, how do we know current
> response is correct or incorrect ? if card is still busy but we get the
> response of 0x900 caused by latching error, then it still consider that
> card has already changed to trans state.

That's true. So perhaps we shouldn't ignore CRC errors, but instead
treat them as card is still busy and send a new CMD13?

Could you please try that and see how it works?

> So, it's better to never use CMD13 when change the speed mode.

Yes, that might be the case. My concern is only that we should avoid
adding unnecessary time-out for cards that is already working, as it
might effects the total initialization time of the card.

On the other hand, perhaps these card are working more because of
"luck" and thus the method to switch to HS mode are fragile and needs
to be fixed. :-)

>> That's what we have been doing so far, but perhaps that isn't good
>> enough for HS200/400.
>>
>> > case, I tried some Samsung/Sandisk/KSI eMMC, issue CMD13 will always get
>> > 0x800, even eMMC has already changed to transfer state and DAT0 is high,
>> > the response of CMD13 is also 0x800, and will never be 0x900.
>>
>> What do you mean by never? I assume it would when you extend the timeout?
>>
>> Does your host driver make sure to ignore CRC errors in this case?
>> Just to be sure, that isn't the problem.
>>
>> > So, in __mmc_switch(), it's a bug to use CMD13 to know that if card has
>> > already changed to transfer state.
>>
>> Whether it's a bug or not, it seems like we have eMMC cards that we
>> have issues to support because of the way we have interpreted the
>> spec. So let's try to fix them!
>>
>> > But, Our host do not support MMC_CAP_WAIT_WHILE_BUSY, that's why we hit
>> > this issue.
>>
>> Okay, I see.
>>
>> Let's try to change the behaviour in __mmc_switch() to prevent it to
>> send CMD13 before the cards stops signal busy on DAT0, when switching
>> to HS200/HS400 mode.
>> What I have in mind is:
>>
>> 1.
>> When the host controller doesn't support MMC_CAP_WAIT_WHILE_BUSY, we
>> would then to wait for a fixed timeout, before we send CMD13. In this
>> case, do you know if the "generic_cmd6_time" is working for your eMMC
>> devices that you had issues with?
>>
> In general, generic_cmd6_time is enough, unless the card has firmware
> issue itself,

Okay, so let's use that timeout value!

If another value is needed for a specific card, that would have to be
handled through a card quirk.

>> 2.
>> In additional to the above solution, we can for those hosts that
>> supports the ->card_busy() ops, but not MMC_CAP_WAIT_WHILE_BUSY,
>> invoke the ->card_busy() in a polling manner. Of course the above
>> timeout should also be considered as we need to stop polling at some
>> point.
>>
> this is what I want to do it, use the ops->card_busy() to check DAT0
> status. if a host do not support MMC_CAP_WAIT_WHILE_BUSY and there is
> no ops->card_busy(), then the only way is wait a fixed
> timeout(generic_cmd6_time).

Okay, great!

[...]

Kind regards
Uffe
diff mbox

Patch

diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index b17f30d..1511b1b 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -724,7 +724,7 @@  static void msdc_request_done(struct msdc_host *host, struct mmc_request *mrq)
 	bool ret;
 
 	ret = cancel_delayed_work(&host->req_timeout);
-	if (!ret) {
+	if (!ret && in_interrupt()) {
 		/* delay work already running */
 		return;
 	}
@@ -824,7 +824,12 @@  static inline bool msdc_cmd_is_ready(struct msdc_host *host,
 	}
 
 	if (mmc_resp_type(cmd) == MMC_RSP_R1B || cmd->data) {
-		tmo = jiffies + msecs_to_jiffies(20);
+		/*
+		 * 2550ms is from EXT_CSD[248], after switch to hs200,
+		 * using CMD13 to polling card status, it will get response
+		 * of 0x800, but EMMC still pull-low DAT0.
+		 */
+		tmo = jiffies + msecs_to_jiffies(2550);
 		/* R1B or with data, should check SDCBUSY */
 		while ((readl(host->base + SDC_STS) & SDC_STS_SDCBUSY) &&
 				time_before(jiffies, tmo))
@@ -847,6 +852,7 @@  static void msdc_start_command(struct msdc_host *host,
 	WARN_ON(host->cmd);
 	host->cmd = cmd;
 
+	mod_delayed_work(system_wq, &host->req_timeout, DAT_TIMEOUT);
 	if (!msdc_cmd_is_ready(host, mrq, cmd))
 		return;
 
@@ -858,7 +864,6 @@  static void msdc_start_command(struct msdc_host *host,
 
 	cmd->error = 0;
 	rawcmd = msdc_cmd_prepare_raw_cmd(host, mrq, cmd);
-	mod_delayed_work(system_wq, &host->req_timeout, DAT_TIMEOUT);
 
 	sdr_set_bits(host->base + MSDC_INTEN, cmd_ints_mask);
 	writel(cmd->arg, host->base + SDC_ARG);