diff mbox series

[2/2] usb: host: ehci-platform: add a quirk to avoid stuck

Message ID 1579258447-28135-3-git-send-email-yoshihiro.shimoda.uh@renesas.com (mailing list archive)
State Superseded
Delegated to: Geert Uytterhoeven
Headers show
Series usb: host: ehci-platform: add a quirk to avoid stuck | expand

Commit Message

Yoshihiro Shimoda Jan. 17, 2020, 10:54 a.m. UTC
Since EHCI/OHCI controllers on R-Car Gen3 SoCs are possible to
be getting stuck very rarely after a full/low usb device was
disconnected. To detect/recover from such a situation, the controllers
require a special way which poll the EHCI PORTSC register and changes
the OHCI functional state.

So, this patch adds a polling timer into the ehci-platform driver,
and if the ehci driver detects the issue by the EHCI PORTSC register,
the ehci driver removes a companion device (= the OHCI controller)
to change the OHCI functional state to USB Reset once. And then,
the ehci driver adds the companion device again.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
---
 drivers/usb/host/ehci-platform.c | 104 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

Comments

Alan Stern Jan. 17, 2020, 4:26 p.m. UTC | #1
On Fri, 17 Jan 2020, Yoshihiro Shimoda wrote:

> Since EHCI/OHCI controllers on R-Car Gen3 SoCs are possible to
> be getting stuck very rarely after a full/low usb device was
> disconnected. To detect/recover from such a situation, the controllers
> require a special way which poll the EHCI PORTSC register and changes
> the OHCI functional state.
> 
> So, this patch adds a polling timer into the ehci-platform driver,
> and if the ehci driver detects the issue by the EHCI PORTSC register,
> the ehci driver removes a companion device (= the OHCI controller)
> to change the OHCI functional state to USB Reset once. And then,
> the ehci driver adds the companion device again.
> 
> Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>

The programming in this patch could be improved in several ways.

> ---
>  drivers/usb/host/ehci-platform.c | 104 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 104 insertions(+)
> 
> diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c
> index 769749c..fc6bb06 100644
> --- a/drivers/usb/host/ehci-platform.c
> +++ b/drivers/usb/host/ehci-platform.c
> @@ -29,6 +29,7 @@
>  #include <linux/of.h>
>  #include <linux/platform_device.h>
>  #include <linux/reset.h>
> +#include <linux/timer.h>
>  #include <linux/usb.h>
>  #include <linux/usb/hcd.h>
>  #include <linux/usb/ehci_pdriver.h>
> @@ -44,6 +45,9 @@ struct ehci_platform_priv {
>  	struct clk *clks[EHCI_MAX_CLKS];
>  	struct reset_control *rsts;
>  	bool reset_on_resume;
> +	bool quirk_poll;
> +	struct timer_list poll_timer;
> +	struct work_struct poll_work;
>  };
>  
>  static const char hcd_name[] = "ehci-platform";
> @@ -118,6 +122,88 @@ static struct usb_ehci_pdata ehci_platform_defaults = {
>  	.power_off =		ehci_platform_power_off,
>  };
>  
> +static bool ehci_platform_quirk_poll_check_condition(struct ehci_hcd *ehci)

There should be a kerneldoc section above this line, explaining what 
the function does and why it is needed.  Otherwise people reading this 
code for the first time will have no idea what is going on.

You don't really need the "ehci_platform_" at the start of the function 
name, because this is a static function.

Also, "quirk_poll_check_condition" suggests that this is the _only_ 
condition that a quirk might need to poll for.  What if another similar 
quirk arises in the future?  The function name should indicate 
something about what condition is being checked.

> +{
> +	u32 port_status = ehci_readl(ehci, &ehci->regs->port_status[0]);
> +
> +	if (!(port_status & PORT_OWNER) &&	/* PO == 0b */
> +	    port_status & PORT_POWER &&		/* PP == 1b */
> +	    !(port_status & PORT_CONNECT) &&	/* CCS == 0b */
> +	    port_status & GENMASK(11, 10))	/* LS != 00b */

The comments are unnecessary.  Anyone reading the code will realize 
that !(port_status & PORT_OWNER) means that the PO value is 0b, and so 
on.

Also, I think the code would be a little clearer if all the tests were 
inside parentheses, not just the negated tests.

The GENMASK stuff is very obscure.  You could define a PORT_LS_MASK
macro in include/linux/usb/ehci_defs.h to be (3<<10), and make the
test:

	(port_status & PORT_LS_MASK)

> +		return true;
> +
> +	return false;
> +}
> +
> +static void ehci_platform_quirk_poll_rebind_companion(struct ehci_hcd *ehci)
> +{
> +	struct device *companion_dev;
> +	struct usb_hcd *hcd = ehci_to_hcd(ehci);
> +
> +	companion_dev = usb_of_get_companion_dev(hcd->self.controller);
> +	if (!companion_dev)
> +		return;
> +
> +	device_release_driver(companion_dev);
> +	if (device_attach(companion_dev) < 0)
> +		ehci_err(ehci, "%s: failed\n", __func__);
> +
> +	put_device(companion_dev);
> +}
> +
> +static void ehci_platform_quirk_poll_start_timer(struct ehci_platform_priv *p)
> +{
> +	mod_timer(&p->poll_timer, jiffies + msecs_to_jiffies(1000));
> +}

Does this really need to be in a separate function?  Why not include it
inline wherever it is used?

Also, instead of msecs_to_jiffies(1000) you can just write HZ.

> +
> +static void ehci_platform_quirk_poll_work(struct work_struct *work)
> +{
> +	struct ehci_platform_priv *priv =
> +		container_of(work, struct ehci_platform_priv, poll_work);
> +	struct ehci_hcd *ehci = container_of((void *)priv, struct ehci_hcd,
> +					     priv);
> +	int i;
> +
> +	usleep_range(4000, 8000);

You have just waited 1000 ms for the timer.  Why will sleeping an
additional 4 - 8 ms make any difference?

> +
> +	for (i = 0; i < 2; i++) {
> +		udelay(10);
> +		if (!ehci_platform_quirk_poll_check_condition(ehci))
> +			goto out;
> +	}

This will be clearer if you expand the loop and add a comment:

	/* Make sure the condition persists for at least 10 us */
	if (!ehci_platform_quirk_poll_check_condition(ehci))
		return;
	udelay(10);
	if (!ehci_platform_quirk_poll_check_condition(ehci))
		return;

> +
> +	ehci_dbg(ehci, "%s: detected getting stuck. rebind now!\n", __func__);
> +	ehci_platform_quirk_poll_rebind_companion(ehci);
> +
> +out:
> +	ehci_platform_quirk_poll_start_timer(priv);

You don't need to restart the timer here ...

> +}
> +
> +static void ehci_platform_quirk_poll_timer(struct timer_list *t)
> +{
> +	struct ehci_platform_priv *priv = from_timer(priv, t, poll_timer);
> +	struct ehci_hcd *ehci = container_of((void *)priv, struct ehci_hcd,
> +					     priv);
> +
> +	if (ehci_platform_quirk_poll_check_condition(ehci))
> +		schedule_work(&priv->poll_work);
> +	else

... if you simply remove this "else" line.

> +		ehci_platform_quirk_poll_start_timer(priv);

Also, it would be a lot cleaner if you run all the check_condition
stuff inside the timer routine here, and use the work routine only for
rebind_companion.

Alan Stern
Yoshihiro Shimoda Jan. 20, 2020, 9:33 a.m. UTC | #2
Hi Alan,

Thank you for your review!

> From: Alan Stern, Sent: Saturday, January 18, 2020 1:27 AM
> 
> On Fri, 17 Jan 2020, Yoshihiro Shimoda wrote:
> 
> > Since EHCI/OHCI controllers on R-Car Gen3 SoCs are possible to
> > be getting stuck very rarely after a full/low usb device was
> > disconnected. To detect/recover from such a situation, the controllers
> > require a special way which poll the EHCI PORTSC register and changes
> > the OHCI functional state.
> >
> > So, this patch adds a polling timer into the ehci-platform driver,
> > and if the ehci driver detects the issue by the EHCI PORTSC register,
> > the ehci driver removes a companion device (= the OHCI controller)
> > to change the OHCI functional state to USB Reset once. And then,
> > the ehci driver adds the companion device again.
> >
> > Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
> 
> The programming in this patch could be improved in several ways.
> 
> > ---
> >  drivers/usb/host/ehci-platform.c | 104 +++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 104 insertions(+)
> >
> > diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c
> > index 769749c..fc6bb06 100644
> > --- a/drivers/usb/host/ehci-platform.c
> > +++ b/drivers/usb/host/ehci-platform.c
> > @@ -29,6 +29,7 @@
> >  #include <linux/of.h>
> >  #include <linux/platform_device.h>
> >  #include <linux/reset.h>
> > +#include <linux/timer.h>
> >  #include <linux/usb.h>
> >  #include <linux/usb/hcd.h>
> >  #include <linux/usb/ehci_pdriver.h>
> > @@ -44,6 +45,9 @@ struct ehci_platform_priv {
> >  	struct clk *clks[EHCI_MAX_CLKS];
> >  	struct reset_control *rsts;
> >  	bool reset_on_resume;
> > +	bool quirk_poll;
> > +	struct timer_list poll_timer;
> > +	struct work_struct poll_work;
> >  };
> >
> >  static const char hcd_name[] = "ehci-platform";
> > @@ -118,6 +122,88 @@ static struct usb_ehci_pdata ehci_platform_defaults = {
> >  	.power_off =		ehci_platform_power_off,
> >  };
> >
> > +static bool ehci_platform_quirk_poll_check_condition(struct ehci_hcd *ehci)
> 
> There should be a kerneldoc section above this line, explaining what
> the function does and why it is needed.  Otherwise people reading this
> code for the first time will have no idea what is going on.

I got it. I'll add a kerneldoc section.

> You don't really need the "ehci_platform_" at the start of the function
> name, because this is a static function.
> 
> Also, "quirk_poll_check_condition" suggests that this is the _only_
> condition that a quirk might need to poll for.  What if another similar
> quirk arises in the future?  The function name should indicate
> something about what condition is being checked.

I got it. I'll change the function name to quirk_poll_stuck_connection().

> > +{
> > +	u32 port_status = ehci_readl(ehci, &ehci->regs->port_status[0]);
> > +
> > +	if (!(port_status & PORT_OWNER) &&	/* PO == 0b */
> > +	    port_status & PORT_POWER &&		/* PP == 1b */
> > +	    !(port_status & PORT_CONNECT) &&	/* CCS == 0b */
> > +	    port_status & GENMASK(11, 10))	/* LS != 00b */
> 
> The comments are unnecessary.  Anyone reading the code will realize
> that !(port_status & PORT_OWNER) means that the PO value is 0b, and so
> on.
> 
> Also, I think the code would be a little clearer if all the tests were
> inside parentheses, not just the negated tests.
> 
> The GENMASK stuff is very obscure.  You could define a PORT_LS_MASK
> macro in include/linux/usb/ehci_defs.h to be (3<<10), and make the
> test:
> 
> 	(port_status & PORT_LS_MASK)

I got it. I'll fix them.

> > +		return true;
> > +
> > +	return false;
> > +}
> > +
> > +static void ehci_platform_quirk_poll_rebind_companion(struct ehci_hcd *ehci)
> > +{
> > +	struct device *companion_dev;
> > +	struct usb_hcd *hcd = ehci_to_hcd(ehci);
> > +
> > +	companion_dev = usb_of_get_companion_dev(hcd->self.controller);
> > +	if (!companion_dev)
> > +		return;
> > +
> > +	device_release_driver(companion_dev);
> > +	if (device_attach(companion_dev) < 0)
> > +		ehci_err(ehci, "%s: failed\n", __func__);
> > +
> > +	put_device(companion_dev);
> > +}
> > +
> > +static void ehci_platform_quirk_poll_start_timer(struct ehci_platform_priv *p)
> > +{
> > +	mod_timer(&p->poll_timer, jiffies + msecs_to_jiffies(1000));
> > +}
> 
> Does this really need to be in a separate function?  Why not include it
> inline wherever it is used?
> 
> Also, instead of msecs_to_jiffies(1000) you can just write HZ.

I intended to avoid using magical number "1000", but using HZ and including it
inline are better. I'll fix it.

> > +
> > +static void ehci_platform_quirk_poll_work(struct work_struct *work)
> > +{
> > +	struct ehci_platform_priv *priv =
> > +		container_of(work, struct ehci_platform_priv, poll_work);
> > +	struct ehci_hcd *ehci = container_of((void *)priv, struct ehci_hcd,
> > +					     priv);
> > +	int i;
> > +
> > +	usleep_range(4000, 8000);
> 
> You have just waited 1000 ms for the timer.  Why will sleeping an
> additional 4 - 8 ms make any difference?

This sleeping can avoid a misdetection between this work function and
reconnection. If user reconnects the usb within 4 ms, the PORTSC
condition is possible to be the same as the issue's condition.
I think I should have described this information into the code.

However, if I used schedule_delayed_work() instead, we can remove
the usleep_range().

> > +
> > +	for (i = 0; i < 2; i++) {
> > +		udelay(10);
> > +		if (!ehci_platform_quirk_poll_check_condition(ehci))
> > +			goto out;
> > +	}
> 
> This will be clearer if you expand the loop and add a comment:
> 
> 	/* Make sure the condition persists for at least 10 us */
> 	if (!ehci_platform_quirk_poll_check_condition(ehci))
> 		return;
> 	udelay(10);
> 	if (!ehci_platform_quirk_poll_check_condition(ehci))
> 		return;

I think so. So, I'll fix it.

> > +
> > +	ehci_dbg(ehci, "%s: detected getting stuck. rebind now!\n", __func__);
> > +	ehci_platform_quirk_poll_rebind_companion(ehci);
> > +
> > +out:
> > +	ehci_platform_quirk_poll_start_timer(priv);
> 
> You don't need to restart the timer here ...
> 
> > +}
> > +
> > +static void ehci_platform_quirk_poll_timer(struct timer_list *t)
> > +{
> > +	struct ehci_platform_priv *priv = from_timer(priv, t, poll_timer);
> > +	struct ehci_hcd *ehci = container_of((void *)priv, struct ehci_hcd,
> > +					     priv);
> > +
> > +	if (ehci_platform_quirk_poll_check_condition(ehci))
> > +		schedule_work(&priv->poll_work);
> > +	else
> 
> ... if you simply remove this "else" line.

I think so. I was worried about a race condition between this timer function
and the work function. But, this will not happen because the timer is every 1 s.

> > +		ehci_platform_quirk_poll_start_timer(priv);
> 
> Also, it would be a lot cleaner if you run all the check_condition
> stuff inside the timer routine here, and use the work routine only for
> rebind_companion.

If using mdelay(4) in the timer routine here can be acceptable,
it would be a lot cleaner. But, Documentation/timers/timers-howto.rst
suggests to avoid using mdelay() in atomic.

Best regards.
Yoshihiro Shimoda

> Alan Stern
Alan Stern Jan. 20, 2020, 3:12 p.m. UTC | #3
On Mon, 20 Jan 2020, Yoshihiro Shimoda wrote:

> > > +static void ehci_platform_quirk_poll_work(struct work_struct *work)
> > > +{
> > > +	struct ehci_platform_priv *priv =
> > > +		container_of(work, struct ehci_platform_priv, poll_work);
> > > +	struct ehci_hcd *ehci = container_of((void *)priv, struct ehci_hcd,
> > > +					     priv);
> > > +	int i;
> > > +
> > > +	usleep_range(4000, 8000);
> > 
> > You have just waited 1000 ms for the timer.  Why will sleeping an
> > additional 4 - 8 ms make any difference?
> 
> This sleeping can avoid a misdetection between this work function and
> reconnection. If user reconnects the usb within 4 ms, the PORTSC
> condition is possible to be the same as the issue's condition.
> I think I should have described this information into the code.
> 
> However, if I used schedule_delayed_work() instead, we can remove
> the usleep_range().

Why not just make the timer delay be 1004 or 1008 ms instead of adding
this extra delay here?

Alan Stern
Yoshihiro Shimoda Jan. 21, 2020, 1:37 a.m. UTC | #4
Hi Alan,

> From: Alan Stern, Sent: Tuesday, January 21, 2020 12:12 AM
> 
> On Mon, 20 Jan 2020, Yoshihiro Shimoda wrote:
> 
> > > > +static void ehci_platform_quirk_poll_work(struct work_struct *work)
> > > > +{
> > > > +	struct ehci_platform_priv *priv =
> > > > +		container_of(work, struct ehci_platform_priv, poll_work);
> > > > +	struct ehci_hcd *ehci = container_of((void *)priv, struct ehci_hcd,
> > > > +					     priv);
> > > > +	int i;
> > > > +
> > > > +	usleep_range(4000, 8000);
> > >
> > > You have just waited 1000 ms for the timer.  Why will sleeping an
> > > additional 4 - 8 ms make any difference?
> >
> > This sleeping can avoid a misdetection between this work function and
> > reconnection. If user reconnects the usb within 4 ms, the PORTSC
> > condition is possible to be the same as the issue's condition.
> > I think I should have described this information into the code.
> >
> > However, if I used schedule_delayed_work() instead, we can remove
> > the usleep_range().
> 
> Why not just make the timer delay be 1004 or 1008 ms instead of adding
> this extra delay here?

My concern is a race condition when the issue doesn't happen. If
the workaround code has an extra delay, we can detect misdetection like below.
This is related to the EHCI/OHCI controllers on R-Car Gen3 SoCs though,
updating the CCS status is possible to be delayed. To be clear of the reason,
I should have described this CCS status behavior too.

Timer routine		workqueue		EHCI PORTSC	USB connection
								disconnect
						CCS=0		connect (within 4 ms)
condition = true (misdetection)			CCS=0
			usleep_range(4000,8000)	CCS=1
			condition = false

Best regards,
Yoshihiro Shimoda

> Alan Stern
Alan Stern Jan. 21, 2020, 3:09 p.m. UTC | #5
On Tue, 21 Jan 2020, Yoshihiro Shimoda wrote:

> Hi Alan,
> 
> > From: Alan Stern, Sent: Tuesday, January 21, 2020 12:12 AM
> > 
> > On Mon, 20 Jan 2020, Yoshihiro Shimoda wrote:
> > 
> > > > > +static void ehci_platform_quirk_poll_work(struct work_struct *work)
> > > > > +{
> > > > > +	struct ehci_platform_priv *priv =
> > > > > +		container_of(work, struct ehci_platform_priv, poll_work);
> > > > > +	struct ehci_hcd *ehci = container_of((void *)priv, struct ehci_hcd,
> > > > > +					     priv);
> > > > > +	int i;
> > > > > +
> > > > > +	usleep_range(4000, 8000);
> > > >
> > > > You have just waited 1000 ms for the timer.  Why will sleeping an
> > > > additional 4 - 8 ms make any difference?
> > >
> > > This sleeping can avoid a misdetection between this work function and
> > > reconnection. If user reconnects the usb within 4 ms, the PORTSC
> > > condition is possible to be the same as the issue's condition.
> > > I think I should have described this information into the code.
> > >
> > > However, if I used schedule_delayed_work() instead, we can remove
> > > the usleep_range().
> > 
> > Why not just make the timer delay be 1004 or 1008 ms instead of adding
> > this extra delay here?
> 
> My concern is a race condition when the issue doesn't happen. If
> the workaround code has an extra delay, we can detect misdetection like below.
> This is related to the EHCI/OHCI controllers on R-Car Gen3 SoCs though,
> updating the CCS status is possible to be delayed. To be clear of the reason,
> I should have described this CCS status behavior too.
> 
> Timer routine		workqueue		EHCI PORTSC	USB connection
> 								disconnect
> 						CCS=0		connect (within 4 ms)
> condition = true (misdetection)			CCS=0
> 			usleep_range(4000,8000)	CCS=1
> 			condition = false

Okay, now I understand.  I misread the code in the original patch.
But now it looks like the code does roughly this:

Timer routine:	if (ehci_platform_quirk_poll_check_condition(ehci))
			schedule_work();

Work routine:	usleep_range(4000, 8000);
		udelay(10);
		if (!ehci_platform_quirk_poll_check_condition(ehci))
			return;
		udelay(10);
		if (!ehci_platform_quirk_poll_check_condition(ehci))
			return;
		ehci_platform_quirk_poll_rebind_companion(ehci);

So there are three calls to quirk_poll_check_condition, with 4 - 8 ms
between the first and second, and 10 us between the second and third.
Do you really need to have this combination of a long delay followed by
a short delay?  Wouldn't two check_condition calls with only one delay
be good enough?

Alan Stern
Yoshihiro Shimoda Jan. 22, 2020, 11:05 a.m. UTC | #6
Hi Alan,

> From: Alan Stern, Sent: Wednesday, January 22, 2020 12:10 AM
<snip>
> On Tue, 21 Jan 2020, Yoshihiro Shimoda wrote:
> 
> > Hi Alan,
> >
> > > From: Alan Stern, Sent: Tuesday, January 21, 2020 12:12 AM
> > >
> > > On Mon, 20 Jan 2020, Yoshihiro Shimoda wrote:
> > >
> > > > > > +static void ehci_platform_quirk_poll_work(struct work_struct *work)
> > > > > > +{
> > > > > > +	struct ehci_platform_priv *priv =
> > > > > > +		container_of(work, struct ehci_platform_priv, poll_work);
> > > > > > +	struct ehci_hcd *ehci = container_of((void *)priv, struct ehci_hcd,
> > > > > > +					     priv);
> > > > > > +	int i;
> > > > > > +
> > > > > > +	usleep_range(4000, 8000);
> > > > >
> > > > > You have just waited 1000 ms for the timer.  Why will sleeping an
> > > > > additional 4 - 8 ms make any difference?
> > > >
> > > > This sleeping can avoid a misdetection between this work function and
> > > > reconnection. If user reconnects the usb within 4 ms, the PORTSC
> > > > condition is possible to be the same as the issue's condition.
> > > > I think I should have described this information into the code.
> > > >
> > > > However, if I used schedule_delayed_work() instead, we can remove
> > > > the usleep_range().
> > >
> > > Why not just make the timer delay be 1004 or 1008 ms instead of adding
> > > this extra delay here?
> >
> > My concern is a race condition when the issue doesn't happen. If
> > the workaround code has an extra delay, we can detect misdetection like below.
> > This is related to the EHCI/OHCI controllers on R-Car Gen3 SoCs though,
> > updating the CCS status is possible to be delayed. To be clear of the reason,
> > I should have described this CCS status behavior too.
> >
> > Timer routine		workqueue		EHCI PORTSC	USB connection
> > 								disconnect
> > 						CCS=0		connect (within 4 ms)
> > condition = true (misdetection)			CCS=0
> > 			usleep_range(4000,8000)	CCS=1
> > 			condition = false
> 
> Okay, now I understand.  I misread the code in the original patch.
> But now it looks like the code does roughly this:
> 
> Timer routine:	if (ehci_platform_quirk_poll_check_condition(ehci))
> 			schedule_work();
> 
> Work routine:	usleep_range(4000, 8000);
> 		udelay(10);
> 		if (!ehci_platform_quirk_poll_check_condition(ehci))
> 			return;
> 		udelay(10);
> 		if (!ehci_platform_quirk_poll_check_condition(ehci))
> 			return;
> 		ehci_platform_quirk_poll_rebind_companion(ehci);
> 
> So there are three calls to quirk_poll_check_condition, with 4 - 8 ms
> between the first and second, and 10 us between the second and third.
> Do you really need to have this combination of a long delay followed by
> a short delay?  Wouldn't two check_condition calls with only one delay
> be good enough?

I had implemented this code by using hardware team's suggestion without
any doubt. So, I asked hardware team about this combination of delays.
The hardware team said this combination can reduce misdetection ratio
from noise and so on. They also said we can wait single 5 ms instead
this combination (but this cannot reduce misdetection ratio).

So, now I'm thinking that the following process (single wait) is
enough and it can improve readability. But, what do you think?

Timer routine:	if (ehci_platform_quirk_poll_check_condition(ehci))
 			schedule_delayed_work(5 ms);

Delayed work routine:
		if (!ehci_platform_quirk_poll_check_condition(ehci))
 			return;
 		ehci_platform_quirk_poll_rebind_companion(ehci);

Best regards,
Yoshihiro Shimoda

> Alan Stern
Alan Stern Jan. 22, 2020, 2:58 p.m. UTC | #7
On Wed, 22 Jan 2020, Yoshihiro Shimoda wrote:

> > Okay, now I understand.  I misread the code in the original patch.
> > But now it looks like the code does roughly this:
> > 
> > Timer routine:	if (ehci_platform_quirk_poll_check_condition(ehci))
> > 			schedule_work();
> > 
> > Work routine:	usleep_range(4000, 8000);
> > 		udelay(10);
> > 		if (!ehci_platform_quirk_poll_check_condition(ehci))
> > 			return;
> > 		udelay(10);
> > 		if (!ehci_platform_quirk_poll_check_condition(ehci))
> > 			return;
> > 		ehci_platform_quirk_poll_rebind_companion(ehci);
> > 
> > So there are three calls to quirk_poll_check_condition, with 4 - 8 ms
> > between the first and second, and 10 us between the second and third.
> > Do you really need to have this combination of a long delay followed by
> > a short delay?  Wouldn't two check_condition calls with only one delay
> > be good enough?
> 
> I had implemented this code by using hardware team's suggestion without
> any doubt. So, I asked hardware team about this combination of delays.
> The hardware team said this combination can reduce misdetection ratio
> from noise and so on. They also said we can wait single 5 ms instead
> this combination (but this cannot reduce misdetection ratio).

Sure, the more times you delay and recheck, the better the error rate.  
But you don't want to go too far.

> So, now I'm thinking that the following process (single wait) is
> enough and it can improve readability. But, what do you think?
> 
> Timer routine:	if (ehci_platform_quirk_poll_check_condition(ehci))
>  			schedule_delayed_work(5 ms);
> 
> Delayed work routine:
> 		if (!ehci_platform_quirk_poll_check_condition(ehci))
>  			return;
>  		ehci_platform_quirk_poll_rebind_companion(ehci);

That looks good to me.

Alan Stern
Yoshihiro Shimoda Jan. 23, 2020, 12:05 p.m. UTC | #8
Hi Alan,

> From: Alan Stern, Sent: Wednesday, January 22, 2020 11:59 PM
> 
> On Wed, 22 Jan 2020, Yoshihiro Shimoda wrote:
> 
> > > Okay, now I understand.  I misread the code in the original patch.
> > > But now it looks like the code does roughly this:
> > >
> > > Timer routine:	if (ehci_platform_quirk_poll_check_condition(ehci))
> > > 			schedule_work();
> > >
> > > Work routine:	usleep_range(4000, 8000);
> > > 		udelay(10);
> > > 		if (!ehci_platform_quirk_poll_check_condition(ehci))
> > > 			return;
> > > 		udelay(10);
> > > 		if (!ehci_platform_quirk_poll_check_condition(ehci))
> > > 			return;
> > > 		ehci_platform_quirk_poll_rebind_companion(ehci);
> > >
> > > So there are three calls to quirk_poll_check_condition, with 4 - 8 ms
> > > between the first and second, and 10 us between the second and third.
> > > Do you really need to have this combination of a long delay followed by
> > > a short delay?  Wouldn't two check_condition calls with only one delay
> > > be good enough?
> >
> > I had implemented this code by using hardware team's suggestion without
> > any doubt. So, I asked hardware team about this combination of delays.
> > The hardware team said this combination can reduce misdetection ratio
> > from noise and so on. They also said we can wait single 5 ms instead
> > this combination (but this cannot reduce misdetection ratio).
> 
> Sure, the more times you delay and recheck, the better the error rate.
> But you don't want to go too far.

You're correct. However, my mind is changed a little...
I would like to remain rechecking as the same as before.

> > So, now I'm thinking that the following process (single wait) is
> > enough and it can improve readability. But, what do you think?
> >
> > Timer routine:	if (ehci_platform_quirk_poll_check_condition(ehci))
> >  			schedule_delayed_work(5 ms);
> >
> > Delayed work routine:
> > 		if (!ehci_platform_quirk_poll_check_condition(ehci))
> >  			return;
> >  		ehci_platform_quirk_poll_rebind_companion(ehci);
> 
> That looks good to me.

Thank you for your feedback! I'll submit v2 patch soon.

Best regards,
Yoshihiro Shimoda

> Alan Stern
diff mbox series

Patch

diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c
index 769749c..fc6bb06 100644
--- a/drivers/usb/host/ehci-platform.c
+++ b/drivers/usb/host/ehci-platform.c
@@ -29,6 +29,7 @@ 
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
+#include <linux/timer.h>
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
 #include <linux/usb/ehci_pdriver.h>
@@ -44,6 +45,9 @@  struct ehci_platform_priv {
 	struct clk *clks[EHCI_MAX_CLKS];
 	struct reset_control *rsts;
 	bool reset_on_resume;
+	bool quirk_poll;
+	struct timer_list poll_timer;
+	struct work_struct poll_work;
 };
 
 static const char hcd_name[] = "ehci-platform";
@@ -118,6 +122,88 @@  static struct usb_ehci_pdata ehci_platform_defaults = {
 	.power_off =		ehci_platform_power_off,
 };
 
+static bool ehci_platform_quirk_poll_check_condition(struct ehci_hcd *ehci)
+{
+	u32 port_status = ehci_readl(ehci, &ehci->regs->port_status[0]);
+
+	if (!(port_status & PORT_OWNER) &&	/* PO == 0b */
+	    port_status & PORT_POWER &&		/* PP == 1b */
+	    !(port_status & PORT_CONNECT) &&	/* CCS == 0b */
+	    port_status & GENMASK(11, 10))	/* LS != 00b */
+		return true;
+
+	return false;
+}
+
+static void ehci_platform_quirk_poll_rebind_companion(struct ehci_hcd *ehci)
+{
+	struct device *companion_dev;
+	struct usb_hcd *hcd = ehci_to_hcd(ehci);
+
+	companion_dev = usb_of_get_companion_dev(hcd->self.controller);
+	if (!companion_dev)
+		return;
+
+	device_release_driver(companion_dev);
+	if (device_attach(companion_dev) < 0)
+		ehci_err(ehci, "%s: failed\n", __func__);
+
+	put_device(companion_dev);
+}
+
+static void ehci_platform_quirk_poll_start_timer(struct ehci_platform_priv *p)
+{
+	mod_timer(&p->poll_timer, jiffies + msecs_to_jiffies(1000));
+}
+
+static void ehci_platform_quirk_poll_work(struct work_struct *work)
+{
+	struct ehci_platform_priv *priv =
+		container_of(work, struct ehci_platform_priv, poll_work);
+	struct ehci_hcd *ehci = container_of((void *)priv, struct ehci_hcd,
+					     priv);
+	int i;
+
+	usleep_range(4000, 8000);
+
+	for (i = 0; i < 2; i++) {
+		udelay(10);
+		if (!ehci_platform_quirk_poll_check_condition(ehci))
+			goto out;
+	}
+
+	ehci_dbg(ehci, "%s: detected getting stuck. rebind now!\n", __func__);
+	ehci_platform_quirk_poll_rebind_companion(ehci);
+
+out:
+	ehci_platform_quirk_poll_start_timer(priv);
+}
+
+static void ehci_platform_quirk_poll_timer(struct timer_list *t)
+{
+	struct ehci_platform_priv *priv = from_timer(priv, t, poll_timer);
+	struct ehci_hcd *ehci = container_of((void *)priv, struct ehci_hcd,
+					     priv);
+
+	if (ehci_platform_quirk_poll_check_condition(ehci))
+		schedule_work(&priv->poll_work);
+	else
+		ehci_platform_quirk_poll_start_timer(priv);
+}
+
+static void ehci_platform_quirk_poll_init(struct ehci_platform_priv *priv)
+{
+	INIT_WORK(&priv->poll_work, ehci_platform_quirk_poll_work);
+	timer_setup(&priv->poll_timer, ehci_platform_quirk_poll_timer, 0);
+	ehci_platform_quirk_poll_start_timer(priv);
+}
+
+static void ehci_platform_quirk_poll_end(struct ehci_platform_priv *priv)
+{
+	del_timer_sync(&priv->poll_timer);
+	flush_work(&priv->poll_work);
+}
+
 static int ehci_platform_probe(struct platform_device *dev)
 {
 	struct usb_hcd *hcd;
@@ -176,6 +262,10 @@  static int ehci_platform_probe(struct platform_device *dev)
 					  "has-transaction-translator"))
 			hcd->has_tt = 1;
 
+		if (of_property_read_bool(dev->dev.of_node,
+					  "needs-polling-to-avoid-stuck"))
+			priv->quirk_poll = true;
+
 		for (clk = 0; clk < EHCI_MAX_CLKS; clk++) {
 			priv->clks[clk] = of_clk_get(dev->dev.of_node, clk);
 			if (IS_ERR(priv->clks[clk])) {
@@ -247,6 +337,9 @@  static int ehci_platform_probe(struct platform_device *dev)
 	device_enable_async_suspend(hcd->self.controller);
 	platform_set_drvdata(dev, hcd);
 
+	if (priv->quirk_poll)
+		ehci_platform_quirk_poll_init(priv);
+
 	return err;
 
 err_power:
@@ -273,6 +366,9 @@  static int ehci_platform_remove(struct platform_device *dev)
 	struct ehci_platform_priv *priv = hcd_to_ehci_priv(hcd);
 	int clk;
 
+	if (priv->quirk_poll)
+		ehci_platform_quirk_poll_end(priv);
+
 	usb_remove_hcd(hcd);
 
 	if (pdata->power_off)
@@ -297,9 +393,13 @@  static int ehci_platform_suspend(struct device *dev)
 	struct usb_hcd *hcd = dev_get_drvdata(dev);
 	struct usb_ehci_pdata *pdata = dev_get_platdata(dev);
 	struct platform_device *pdev = to_platform_device(dev);
+	struct ehci_platform_priv *priv = hcd_to_ehci_priv(hcd);
 	bool do_wakeup = device_may_wakeup(dev);
 	int ret;
 
+	if (priv->quirk_poll)
+		ehci_platform_quirk_poll_end(priv);
+
 	ret = ehci_suspend(hcd, do_wakeup);
 	if (ret)
 		return ret;
@@ -331,6 +431,10 @@  static int ehci_platform_resume(struct device *dev)
 	}
 
 	ehci_resume(hcd, priv->reset_on_resume);
+
+	if (priv->quirk_poll)
+		ehci_platform_quirk_poll_init(priv);
+
 	return 0;
 }
 #endif /* CONFIG_PM_SLEEP */