diff mbox series

[2/2] mmc: sdhci_am654: Add tuning debug prints

Message ID 20240815201542.421653-3-jm@ti.com (mailing list archive)
State New
Headers show
Series Add retry tuning sequence | expand

Commit Message

Judith Mendez Aug. 15, 2024, 8:15 p.m. UTC
Add debug prints to tuning algorithm for debugging.

Signed-off-by: Judith Mendez <jm@ti.com>
---
 drivers/mmc/host/sdhci_am654.c | 5 +++++
 1 file changed, 5 insertions(+)

Comments

Ulf Hansson Aug. 20, 2024, 11:33 a.m. UTC | #1
On Thu, 15 Aug 2024 at 22:15, Judith Mendez <jm@ti.com> wrote:
>
> Add debug prints to tuning algorithm for debugging.
>
> Signed-off-by: Judith Mendez <jm@ti.com>
> ---
>  drivers/mmc/host/sdhci_am654.c | 5 +++++
>  1 file changed, 5 insertions(+)
>
> diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
> index c3d485bd4d553..a909f8de0eabe 100644
> --- a/drivers/mmc/host/sdhci_am654.c
> +++ b/drivers/mmc/host/sdhci_am654.c
> @@ -457,11 +457,13 @@ static u32 sdhci_am654_calculate_itap(struct sdhci_host *host, struct window
>
>         if (!num_fails) {
>                 /* Retry tuning */
> +               dev_err(dev, "No failing region found, retry tuning\n");

A dev_err seems to be too heavy, but I am not sure at what frequency
this could occur?

Why isn't a dev_dbg sufficient?

>                 return -1;
>         }
>
>         if (fail_window->length == ITAPDLY_LENGTH) {
>                 /* Retry tuning */
> +               dev_err(dev, "No passing ITAPDLY, retry tuning\n");

Ditto.

>                 return -1;
>         }
>
> @@ -505,6 +507,7 @@ static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host,
>         struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
>         unsigned char timing = host->mmc->ios.timing;
>         struct window fail_window[ITAPDLY_LENGTH];
> +       struct device *dev = mmc_dev(host->mmc);
>         u8 curr_pass, itap;
>         u8 fail_index = 0;
>         u8 prev_pass = 1;
> @@ -542,12 +545,14 @@ static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host,
>
>         if (ret >= 0) {
>                 itap = ret;
> +               dev_dbg(dev, "Final ITAPDLY=%d\n", itap);
>                 sdhci_am654_write_itapdly(sdhci_am654, itap, sdhci_am654->itap_del_ena[timing]);
>         } else {
>                 if (sdhci_am654->tuning_loop < RETRY_TUNING_MAX) {
>                         sdhci_am654->tuning_loop++;
>                         sdhci_am654_platform_execute_tuning(host, opcode);
>                 } else {
> +                       dev_err(dev, "Failed to find ITAPDLY, fail tuning\n");

The commit message only talks about debug messages, but this is an
error message. Perhaps update the commit message a bit?

>                         return -1;
>                 }
>         }
> --
> 2.46.0
>

Kind regards
Uffe
Judith Mendez Aug. 20, 2024, 2:40 p.m. UTC | #2
Hi Ulf Hansson,

On 8/20/24 6:33 AM, Ulf Hansson wrote:
> On Thu, 15 Aug 2024 at 22:15, Judith Mendez <jm@ti.com> wrote:
>>
>> Add debug prints to tuning algorithm for debugging.
>>
>> Signed-off-by: Judith Mendez <jm@ti.com>
>> ---
>>   drivers/mmc/host/sdhci_am654.c | 5 +++++
>>   1 file changed, 5 insertions(+)
>>
>> diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
>> index c3d485bd4d553..a909f8de0eabe 100644
>> --- a/drivers/mmc/host/sdhci_am654.c
>> +++ b/drivers/mmc/host/sdhci_am654.c
>> @@ -457,11 +457,13 @@ static u32 sdhci_am654_calculate_itap(struct sdhci_host *host, struct window
>>
>>          if (!num_fails) {
>>                  /* Retry tuning */
>> +               dev_err(dev, "No failing region found, retry tuning\n");
> 
> A dev_err seems to be too heavy, but I am not sure at what frequency
> this could occur?

Having no failing region is what we call a corner case, it rarely 
happens. The one case where it did happen, it took a good amount
of time to discover there were no failing regions found. The tuning
algorithm had to be looped 3 times before finding a failing itapdly.

> 
> Why isn't a dev_dbg sufficient?

I thought about using dev_dbg, but based on some feedback after coming
upon this issue on a board bring up case, we think it would help
enormously if we make it as obvious as possible when no failing region
is found.

The one case where this came up, the dev_err print would only print 3
times... Now this is only one case and we are not aware of any more
cases like this, also we cannot replicate on TI EVM's.

> 
>>                  return -1;
>>          }
>>
>>          if (fail_window->length == ITAPDLY_LENGTH) {
>>                  /* Retry tuning */
>> +               dev_err(dev, "No passing ITAPDLY, retry tuning\n");
> 
> Ditto.

Same idea as above..

But with this print, the maximum amount of prints that could be printed
is 20, is this too many prints in your opinion?


> 
>>                  return -1;
>>          }
>>
>> @@ -505,6 +507,7 @@ static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host,
>>          struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
>>          unsigned char timing = host->mmc->ios.timing;
>>          struct window fail_window[ITAPDLY_LENGTH];
>> +       struct device *dev = mmc_dev(host->mmc);
>>          u8 curr_pass, itap;
>>          u8 fail_index = 0;
>>          u8 prev_pass = 1;
>> @@ -542,12 +545,14 @@ static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host,
>>
>>          if (ret >= 0) {
>>                  itap = ret;
>> +               dev_dbg(dev, "Final ITAPDLY=%d\n", itap);
>>                  sdhci_am654_write_itapdly(sdhci_am654, itap, sdhci_am654->itap_del_ena[timing]);
>>          } else {
>>                  if (sdhci_am654->tuning_loop < RETRY_TUNING_MAX) {
>>                          sdhci_am654->tuning_loop++;
>>                          sdhci_am654_platform_execute_tuning(host, opcode);
>>                  } else {
>> +                       dev_err(dev, "Failed to find ITAPDLY, fail tuning\n");
> 
> The commit message only talks about debug messages, but this is an
> error message. Perhaps update the commit message a bit?

Sure will do, after we conclude the discussion above and in v2.

Thanks so much for reviewing.

~ Judith

> 
>>                          return -1;
>>                  }
>>          }
>> --
>> 2.46.0
>>
> 
> Kind regards
> Uffe
Ulf Hansson Aug. 20, 2024, 3:03 p.m. UTC | #3
On Tue, 20 Aug 2024 at 16:41, Judith Mendez <jm@ti.com> wrote:
>
> Hi Ulf Hansson,
>
> On 8/20/24 6:33 AM, Ulf Hansson wrote:
> > On Thu, 15 Aug 2024 at 22:15, Judith Mendez <jm@ti.com> wrote:
> >>
> >> Add debug prints to tuning algorithm for debugging.
> >>
> >> Signed-off-by: Judith Mendez <jm@ti.com>
> >> ---
> >>   drivers/mmc/host/sdhci_am654.c | 5 +++++
> >>   1 file changed, 5 insertions(+)
> >>
> >> diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
> >> index c3d485bd4d553..a909f8de0eabe 100644
> >> --- a/drivers/mmc/host/sdhci_am654.c
> >> +++ b/drivers/mmc/host/sdhci_am654.c
> >> @@ -457,11 +457,13 @@ static u32 sdhci_am654_calculate_itap(struct sdhci_host *host, struct window
> >>
> >>          if (!num_fails) {
> >>                  /* Retry tuning */
> >> +               dev_err(dev, "No failing region found, retry tuning\n");
> >
> > A dev_err seems to be too heavy, but I am not sure at what frequency
> > this could occur?
>
> Having no failing region is what we call a corner case, it rarely
> happens. The one case where it did happen, it took a good amount
> of time to discover there were no failing regions found. The tuning
> algorithm had to be looped 3 times before finding a failing itapdly.
>
> >
> > Why isn't a dev_dbg sufficient?
>
> I thought about using dev_dbg, but based on some feedback after coming
> upon this issue on a board bring up case, we think it would help
> enormously if we make it as obvious as possible when no failing region
> is found.
>
> The one case where this came up, the dev_err print would only print 3
> times... Now this is only one case and we are not aware of any more
> cases like this, also we cannot replicate on TI EVM's.

What happens if/when we fail here? Do we fail to detect the card or do
we end up running it in some degraded mode?

If the latter a dev_warn, the former a dev_err(). Does that make sense?

>
> >
> >>                  return -1;
> >>          }
> >>
> >>          if (fail_window->length == ITAPDLY_LENGTH) {
> >>                  /* Retry tuning */
> >> +               dev_err(dev, "No passing ITAPDLY, retry tuning\n");
> >
> > Ditto.
>
> Same idea as above..
>
> But with this print, the maximum amount of prints that could be printed
> is 20, is this too many prints in your opinion?

This sounds like dev_dbg to me. We are not really failing, as we are
making a re-try and will most likely succeed then, right?

>
>
> >
> >>                  return -1;
> >>          }
> >>
> >> @@ -505,6 +507,7 @@ static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host,
> >>          struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
> >>          unsigned char timing = host->mmc->ios.timing;
> >>          struct window fail_window[ITAPDLY_LENGTH];
> >> +       struct device *dev = mmc_dev(host->mmc);
> >>          u8 curr_pass, itap;
> >>          u8 fail_index = 0;
> >>          u8 prev_pass = 1;
> >> @@ -542,12 +545,14 @@ static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host,
> >>
> >>          if (ret >= 0) {
> >>                  itap = ret;
> >> +               dev_dbg(dev, "Final ITAPDLY=%d\n", itap);
> >>                  sdhci_am654_write_itapdly(sdhci_am654, itap, sdhci_am654->itap_del_ena[timing]);
> >>          } else {
> >>                  if (sdhci_am654->tuning_loop < RETRY_TUNING_MAX) {
> >>                          sdhci_am654->tuning_loop++;
> >>                          sdhci_am654_platform_execute_tuning(host, opcode);
> >>                  } else {
> >> +                       dev_err(dev, "Failed to find ITAPDLY, fail tuning\n");
> >
> > The commit message only talks about debug messages, but this is an
> > error message. Perhaps update the commit message a bit?
>
> Sure will do, after we conclude the discussion above and in v2.
>
> Thanks so much for reviewing.
>

Kind regards
Uffe
Judith Mendez Aug. 20, 2024, 8:17 p.m. UTC | #4
Hi Ulf Hansson,

On 8/20/24 10:03 AM, Ulf Hansson wrote:
> On Tue, 20 Aug 2024 at 16:41, Judith Mendez <jm@ti.com> wrote:
>>
>> Hi Ulf Hansson,
>>
>> On 8/20/24 6:33 AM, Ulf Hansson wrote:
>>> On Thu, 15 Aug 2024 at 22:15, Judith Mendez <jm@ti.com> wrote:
>>>>
>>>> Add debug prints to tuning algorithm for debugging.
>>>>
>>>> Signed-off-by: Judith Mendez <jm@ti.com>
>>>> ---
>>>>    drivers/mmc/host/sdhci_am654.c | 5 +++++
>>>>    1 file changed, 5 insertions(+)
>>>>
>>>> diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
>>>> index c3d485bd4d553..a909f8de0eabe 100644
>>>> --- a/drivers/mmc/host/sdhci_am654.c
>>>> +++ b/drivers/mmc/host/sdhci_am654.c
>>>> @@ -457,11 +457,13 @@ static u32 sdhci_am654_calculate_itap(struct sdhci_host *host, struct window
>>>>
>>>>           if (!num_fails) {
>>>>                   /* Retry tuning */
>>>> +               dev_err(dev, "No failing region found, retry tuning\n");
>>>
>>> A dev_err seems to be too heavy, but I am not sure at what frequency
>>> this could occur?
>>
>> Having no failing region is what we call a corner case, it rarely
>> happens. The one case where it did happen, it took a good amount
>> of time to discover there were no failing regions found. The tuning
>> algorithm had to be looped 3 times before finding a failing itapdly.
>>
>>>
>>> Why isn't a dev_dbg sufficient?
>>
>> I thought about using dev_dbg, but based on some feedback after coming
>> upon this issue on a board bring up case, we think it would help
>> enormously if we make it as obvious as possible when no failing region
>> is found.
>>
>> The one case where this came up, the dev_err print would only print 3
>> times... Now this is only one case and we are not aware of any more
>> cases like this, also we cannot replicate on TI EVM's.
> 
> What happens if/when we fail here? Do we fail to detect the card or do
> we end up running it in some degraded mode?
> 
> If the latter a dev_warn, the former a dev_err(). Does that make sense?

What was happening was, we enumerated to HS200, but then cqe error was
triggered and this re-triggers the tuning algo repeatedly until the
failing region was found.

After thinking about this some more, I think I will switch to dev_dbg,
if we fail to find failing region and we re-run the tuning algorithm,
don't want to confuse people by printing error logs if we are re-running
the tuning algo anyways. Thanks so much for your feedback!

~ Judith


> 
>>
>>>
>>>>                   return -1;
>>>>           }
>>>>
>>>>           if (fail_window->length == ITAPDLY_LENGTH) {
>>>>                   /* Retry tuning */
>>>> +               dev_err(dev, "No passing ITAPDLY, retry tuning\n");
>>>
>>> Ditto.
>>
>> Same idea as above..
>>
>> But with this print, the maximum amount of prints that could be printed
>> is 20, is this too many prints in your opinion?
> 
> This sounds like dev_dbg to me. We are not really failing, as we are
> making a re-try and will most likely succeed then, right?
> 
>>
>>
>>>
>>>>                   return -1;
>>>>           }
>>>>
>>>> @@ -505,6 +507,7 @@ static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host,
>>>>           struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
>>>>           unsigned char timing = host->mmc->ios.timing;
>>>>           struct window fail_window[ITAPDLY_LENGTH];
>>>> +       struct device *dev = mmc_dev(host->mmc);
>>>>           u8 curr_pass, itap;
>>>>           u8 fail_index = 0;
>>>>           u8 prev_pass = 1;
>>>> @@ -542,12 +545,14 @@ static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host,
>>>>
>>>>           if (ret >= 0) {
>>>>                   itap = ret;
>>>> +               dev_dbg(dev, "Final ITAPDLY=%d\n", itap);
>>>>                   sdhci_am654_write_itapdly(sdhci_am654, itap, sdhci_am654->itap_del_ena[timing]);
>>>>           } else {
>>>>                   if (sdhci_am654->tuning_loop < RETRY_TUNING_MAX) {
>>>>                           sdhci_am654->tuning_loop++;
>>>>                           sdhci_am654_platform_execute_tuning(host, opcode);
>>>>                   } else {
>>>> +                       dev_err(dev, "Failed to find ITAPDLY, fail tuning\n");
>>>
>>> The commit message only talks about debug messages, but this is an
>>> error message. Perhaps update the commit message a bit?
>>
>> Sure will do, after we conclude the discussion above and in v2.
>>
>> Thanks so much for reviewing.
>>
> 
> Kind regards
> Uffe
Judith Mendez Aug. 20, 2024, 8:18 p.m. UTC | #5
On 8/20/24 10:03 AM, Ulf Hansson wrote:
> On Tue, 20 Aug 2024 at 16:41, Judith Mendez <jm@ti.com> wrote:
>>
>> Hi Ulf Hansson,
>>
>> On 8/20/24 6:33 AM, Ulf Hansson wrote:
>>> On Thu, 15 Aug 2024 at 22:15, Judith Mendez <jm@ti.com> wrote:
>>>>
>>>> Add debug prints to tuning algorithm for debugging.
>>>>
>>>> Signed-off-by: Judith Mendez <jm@ti.com>
>>>> ---
>>>>    drivers/mmc/host/sdhci_am654.c | 5 +++++
>>>>    1 file changed, 5 insertions(+)
>>>>
>>>> diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
>>>> index c3d485bd4d553..a909f8de0eabe 100644
>>>> --- a/drivers/mmc/host/sdhci_am654.c
>>>> +++ b/drivers/mmc/host/sdhci_am654.c
>>>> @@ -457,11 +457,13 @@ static u32 sdhci_am654_calculate_itap(struct sdhci_host *host, struct window
>>>>
>>>>           if (!num_fails) {
>>>>                   /* Retry tuning */
>>>> +               dev_err(dev, "No failing region found, retry tuning\n");
>>>
>>> A dev_err seems to be too heavy, but I am not sure at what frequency
>>> this could occur?
>>
>> Having no failing region is what we call a corner case, it rarely
>> happens. The one case where it did happen, it took a good amount
>> of time to discover there were no failing regions found. The tuning
>> algorithm had to be looped 3 times before finding a failing itapdly.
>>
>>>
>>> Why isn't a dev_dbg sufficient?
>>
>> I thought about using dev_dbg, but based on some feedback after coming
>> upon this issue on a board bring up case, we think it would help
>> enormously if we make it as obvious as possible when no failing region
>> is found.
>>
>> The one case where this came up, the dev_err print would only print 3
>> times... Now this is only one case and we are not aware of any more
>> cases like this, also we cannot replicate on TI EVM's.
> 
> What happens if/when we fail here? Do we fail to detect the card or do
> we end up running it in some degraded mode?
> 
> If the latter a dev_warn, the former a dev_err(). Does that make sense?
> 
>>
>>>
>>>>                   return -1;
>>>>           }
>>>>
>>>>           if (fail_window->length == ITAPDLY_LENGTH) {
>>>>                   /* Retry tuning */
>>>> +               dev_err(dev, "No passing ITAPDLY, retry tuning\n");
>>>
>>> Ditto.
>>
>> Same idea as above..
>>
>> But with this print, the maximum amount of prints that could be printed
>> is 20, is this too many prints in your opinion?
> 
> This sounds like dev_dbg to me. We are not really failing, as we are
> making a re-try and will most likely succeed then, right?

Yes absolutely right, will fix for v2, thanks.

~ Judith

> 
>>
>>
>>>
>>>>                   return -1;
>>>>           }
>>>>
>>>> @@ -505,6 +507,7 @@ static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host,
>>>>           struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
>>>>           unsigned char timing = host->mmc->ios.timing;
>>>>           struct window fail_window[ITAPDLY_LENGTH];
>>>> +       struct device *dev = mmc_dev(host->mmc);
>>>>           u8 curr_pass, itap;
>>>>           u8 fail_index = 0;
>>>>           u8 prev_pass = 1;
>>>> @@ -542,12 +545,14 @@ static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host,
>>>>
>>>>           if (ret >= 0) {
>>>>                   itap = ret;
>>>> +               dev_dbg(dev, "Final ITAPDLY=%d\n", itap);
>>>>                   sdhci_am654_write_itapdly(sdhci_am654, itap, sdhci_am654->itap_del_ena[timing]);
>>>>           } else {
>>>>                   if (sdhci_am654->tuning_loop < RETRY_TUNING_MAX) {
>>>>                           sdhci_am654->tuning_loop++;
>>>>                           sdhci_am654_platform_execute_tuning(host, opcode);
>>>>                   } else {
>>>> +                       dev_err(dev, "Failed to find ITAPDLY, fail tuning\n");
>>>
>>> The commit message only talks about debug messages, but this is an
>>> error message. Perhaps update the commit message a bit?
>>
>> Sure will do, after we conclude the discussion above and in v2.
>>
>> Thanks so much for reviewing.
>>
> 
> Kind regards
> Uffe
diff mbox series

Patch

diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
index c3d485bd4d553..a909f8de0eabe 100644
--- a/drivers/mmc/host/sdhci_am654.c
+++ b/drivers/mmc/host/sdhci_am654.c
@@ -457,11 +457,13 @@  static u32 sdhci_am654_calculate_itap(struct sdhci_host *host, struct window
 
 	if (!num_fails) {
 		/* Retry tuning */
+		dev_err(dev, "No failing region found, retry tuning\n");
 		return -1;
 	}
 
 	if (fail_window->length == ITAPDLY_LENGTH) {
 		/* Retry tuning */
+		dev_err(dev, "No passing ITAPDLY, retry tuning\n");
 		return -1;
 	}
 
@@ -505,6 +507,7 @@  static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host,
 	struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host);
 	unsigned char timing = host->mmc->ios.timing;
 	struct window fail_window[ITAPDLY_LENGTH];
+	struct device *dev = mmc_dev(host->mmc);
 	u8 curr_pass, itap;
 	u8 fail_index = 0;
 	u8 prev_pass = 1;
@@ -542,12 +545,14 @@  static int sdhci_am654_platform_execute_tuning(struct sdhci_host *host,
 
 	if (ret >= 0) {
 		itap = ret;
+		dev_dbg(dev, "Final ITAPDLY=%d\n", itap);
 		sdhci_am654_write_itapdly(sdhci_am654, itap, sdhci_am654->itap_del_ena[timing]);
 	} else {
 		if (sdhci_am654->tuning_loop < RETRY_TUNING_MAX) {
 			sdhci_am654->tuning_loop++;
 			sdhci_am654_platform_execute_tuning(host, opcode);
 		} else {
+			dev_err(dev, "Failed to find ITAPDLY, fail tuning\n");
 			return -1;
 		}
 	}